1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 38 struct btrfs_path *path, 39 u64 offset) 40 { 41 struct btrfs_key key; 42 struct btrfs_key location; 43 struct btrfs_disk_key disk_key; 44 struct btrfs_free_space_header *header; 45 struct extent_buffer *leaf; 46 struct inode *inode = NULL; 47 int ret; 48 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 50 key.offset = offset; 51 key.type = 0; 52 53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 54 if (ret < 0) 55 return ERR_PTR(ret); 56 if (ret > 0) { 57 btrfs_release_path(path); 58 return ERR_PTR(-ENOENT); 59 } 60 61 leaf = path->nodes[0]; 62 header = btrfs_item_ptr(leaf, path->slots[0], 63 struct btrfs_free_space_header); 64 btrfs_free_space_key(leaf, header, &disk_key); 65 btrfs_disk_key_to_cpu(&location, &disk_key); 66 btrfs_release_path(path); 67 68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 69 if (!inode) 70 return ERR_PTR(-ENOENT); 71 if (IS_ERR(inode)) 72 return inode; 73 if (is_bad_inode(inode)) { 74 iput(inode); 75 return ERR_PTR(-ENOENT); 76 } 77 78 inode->i_mapping->flags &= ~__GFP_FS; 79 80 return inode; 81 } 82 83 struct inode *lookup_free_space_inode(struct btrfs_root *root, 84 struct btrfs_block_group_cache 85 *block_group, struct btrfs_path *path) 86 { 87 struct inode *inode = NULL; 88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 89 90 spin_lock(&block_group->lock); 91 if (block_group->inode) 92 inode = igrab(block_group->inode); 93 spin_unlock(&block_group->lock); 94 if (inode) 95 return inode; 96 97 inode = __lookup_free_space_inode(root, path, 98 block_group->key.objectid); 99 if (IS_ERR(inode)) 100 return inode; 101 102 spin_lock(&block_group->lock); 103 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 104 printk(KERN_INFO "Old style space inode found, converting.\n"); 105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 106 BTRFS_INODE_NODATACOW; 107 block_group->disk_cache_state = BTRFS_DC_CLEAR; 108 } 109 110 if (!block_group->iref) { 111 block_group->inode = igrab(inode); 112 block_group->iref = 1; 113 } 114 spin_unlock(&block_group->lock); 115 116 return inode; 117 } 118 119 int __create_free_space_inode(struct btrfs_root *root, 120 struct btrfs_trans_handle *trans, 121 struct btrfs_path *path, u64 ino, u64 offset) 122 { 123 struct btrfs_key key; 124 struct btrfs_disk_key disk_key; 125 struct btrfs_free_space_header *header; 126 struct btrfs_inode_item *inode_item; 127 struct extent_buffer *leaf; 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 129 int ret; 130 131 ret = btrfs_insert_empty_inode(trans, root, path, ino); 132 if (ret) 133 return ret; 134 135 /* We inline crc's for the free disk space cache */ 136 if (ino != BTRFS_FREE_INO_OBJECTID) 137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 138 139 leaf = path->nodes[0]; 140 inode_item = btrfs_item_ptr(leaf, path->slots[0], 141 struct btrfs_inode_item); 142 btrfs_item_key(leaf, &disk_key, path->slots[0]); 143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 144 sizeof(*inode_item)); 145 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 146 btrfs_set_inode_size(leaf, inode_item, 0); 147 btrfs_set_inode_nbytes(leaf, inode_item, 0); 148 btrfs_set_inode_uid(leaf, inode_item, 0); 149 btrfs_set_inode_gid(leaf, inode_item, 0); 150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 151 btrfs_set_inode_flags(leaf, inode_item, flags); 152 btrfs_set_inode_nlink(leaf, inode_item, 1); 153 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 154 btrfs_set_inode_block_group(leaf, inode_item, offset); 155 btrfs_mark_buffer_dirty(leaf); 156 btrfs_release_path(path); 157 158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 159 key.offset = offset; 160 key.type = 0; 161 162 ret = btrfs_insert_empty_item(trans, root, path, &key, 163 sizeof(struct btrfs_free_space_header)); 164 if (ret < 0) { 165 btrfs_release_path(path); 166 return ret; 167 } 168 leaf = path->nodes[0]; 169 header = btrfs_item_ptr(leaf, path->slots[0], 170 struct btrfs_free_space_header); 171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 172 btrfs_set_free_space_key(leaf, header, &disk_key); 173 btrfs_mark_buffer_dirty(leaf); 174 btrfs_release_path(path); 175 176 return 0; 177 } 178 179 int create_free_space_inode(struct btrfs_root *root, 180 struct btrfs_trans_handle *trans, 181 struct btrfs_block_group_cache *block_group, 182 struct btrfs_path *path) 183 { 184 int ret; 185 u64 ino; 186 187 ret = btrfs_find_free_objectid(root, &ino); 188 if (ret < 0) 189 return ret; 190 191 return __create_free_space_inode(root, trans, path, ino, 192 block_group->key.objectid); 193 } 194 195 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_path *path, 198 struct inode *inode) 199 { 200 struct btrfs_block_rsv *rsv; 201 u64 needed_bytes; 202 loff_t oldsize; 203 int ret = 0; 204 205 rsv = trans->block_rsv; 206 trans->block_rsv = &root->fs_info->global_block_rsv; 207 208 /* 1 for slack space, 1 for updating the inode */ 209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 210 btrfs_calc_trans_metadata_size(root, 1); 211 212 spin_lock(&trans->block_rsv->lock); 213 if (trans->block_rsv->reserved < needed_bytes) { 214 spin_unlock(&trans->block_rsv->lock); 215 trans->block_rsv = rsv; 216 return -ENOSPC; 217 } 218 spin_unlock(&trans->block_rsv->lock); 219 220 oldsize = i_size_read(inode); 221 btrfs_i_size_write(inode, 0); 222 truncate_pagecache(inode, oldsize, 0); 223 224 /* 225 * We don't need an orphan item because truncating the free space cache 226 * will never be split across transactions. 227 */ 228 ret = btrfs_truncate_inode_items(trans, root, inode, 229 0, BTRFS_EXTENT_DATA_KEY); 230 231 if (ret) { 232 trans->block_rsv = rsv; 233 btrfs_abort_transaction(trans, root, ret); 234 return ret; 235 } 236 237 ret = btrfs_update_inode(trans, root, inode); 238 if (ret) 239 btrfs_abort_transaction(trans, root, ret); 240 trans->block_rsv = rsv; 241 242 return ret; 243 } 244 245 static int readahead_cache(struct inode *inode) 246 { 247 struct file_ra_state *ra; 248 unsigned long last_index; 249 250 ra = kzalloc(sizeof(*ra), GFP_NOFS); 251 if (!ra) 252 return -ENOMEM; 253 254 file_ra_state_init(ra, inode->i_mapping); 255 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 256 257 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 258 259 kfree(ra); 260 261 return 0; 262 } 263 264 struct io_ctl { 265 void *cur, *orig; 266 struct page *page; 267 struct page **pages; 268 struct btrfs_root *root; 269 unsigned long size; 270 int index; 271 int num_pages; 272 unsigned check_crcs:1; 273 }; 274 275 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 276 struct btrfs_root *root) 277 { 278 memset(io_ctl, 0, sizeof(struct io_ctl)); 279 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 280 PAGE_CACHE_SHIFT; 281 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 282 GFP_NOFS); 283 if (!io_ctl->pages) 284 return -ENOMEM; 285 io_ctl->root = root; 286 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 287 io_ctl->check_crcs = 1; 288 return 0; 289 } 290 291 static void io_ctl_free(struct io_ctl *io_ctl) 292 { 293 kfree(io_ctl->pages); 294 } 295 296 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 297 { 298 if (io_ctl->cur) { 299 kunmap(io_ctl->page); 300 io_ctl->cur = NULL; 301 io_ctl->orig = NULL; 302 } 303 } 304 305 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 306 { 307 WARN_ON(io_ctl->cur); 308 BUG_ON(io_ctl->index >= io_ctl->num_pages); 309 io_ctl->page = io_ctl->pages[io_ctl->index++]; 310 io_ctl->cur = kmap(io_ctl->page); 311 io_ctl->orig = io_ctl->cur; 312 io_ctl->size = PAGE_CACHE_SIZE; 313 if (clear) 314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 315 } 316 317 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 318 { 319 int i; 320 321 io_ctl_unmap_page(io_ctl); 322 323 for (i = 0; i < io_ctl->num_pages; i++) { 324 if (io_ctl->pages[i]) { 325 ClearPageChecked(io_ctl->pages[i]); 326 unlock_page(io_ctl->pages[i]); 327 page_cache_release(io_ctl->pages[i]); 328 } 329 } 330 } 331 332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 333 int uptodate) 334 { 335 struct page *page; 336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 337 int i; 338 339 for (i = 0; i < io_ctl->num_pages; i++) { 340 page = find_or_create_page(inode->i_mapping, i, mask); 341 if (!page) { 342 io_ctl_drop_pages(io_ctl); 343 return -ENOMEM; 344 } 345 io_ctl->pages[i] = page; 346 if (uptodate && !PageUptodate(page)) { 347 btrfs_readpage(NULL, page); 348 lock_page(page); 349 if (!PageUptodate(page)) { 350 printk(KERN_ERR "btrfs: error reading free " 351 "space cache\n"); 352 io_ctl_drop_pages(io_ctl); 353 return -EIO; 354 } 355 } 356 } 357 358 for (i = 0; i < io_ctl->num_pages; i++) { 359 clear_page_dirty_for_io(io_ctl->pages[i]); 360 set_page_extent_mapped(io_ctl->pages[i]); 361 } 362 363 return 0; 364 } 365 366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 367 { 368 u64 *val; 369 370 io_ctl_map_page(io_ctl, 1); 371 372 /* 373 * Skip the csum areas. If we don't check crcs then we just have a 374 * 64bit chunk at the front of the first page. 375 */ 376 if (io_ctl->check_crcs) { 377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 379 } else { 380 io_ctl->cur += sizeof(u64); 381 io_ctl->size -= sizeof(u64) * 2; 382 } 383 384 val = io_ctl->cur; 385 *val = cpu_to_le64(generation); 386 io_ctl->cur += sizeof(u64); 387 } 388 389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 390 { 391 u64 *gen; 392 393 /* 394 * Skip the crc area. If we don't check crcs then we just have a 64bit 395 * chunk at the front of the first page. 396 */ 397 if (io_ctl->check_crcs) { 398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 399 io_ctl->size -= sizeof(u64) + 400 (sizeof(u32) * io_ctl->num_pages); 401 } else { 402 io_ctl->cur += sizeof(u64); 403 io_ctl->size -= sizeof(u64) * 2; 404 } 405 406 gen = io_ctl->cur; 407 if (le64_to_cpu(*gen) != generation) { 408 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 409 "(%Lu) does not match inode (%Lu)\n", *gen, 410 generation); 411 io_ctl_unmap_page(io_ctl); 412 return -EIO; 413 } 414 io_ctl->cur += sizeof(u64); 415 return 0; 416 } 417 418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 419 { 420 u32 *tmp; 421 u32 crc = ~(u32)0; 422 unsigned offset = 0; 423 424 if (!io_ctl->check_crcs) { 425 io_ctl_unmap_page(io_ctl); 426 return; 427 } 428 429 if (index == 0) 430 offset = sizeof(u32) * io_ctl->num_pages; 431 432 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 433 PAGE_CACHE_SIZE - offset); 434 btrfs_csum_final(crc, (char *)&crc); 435 io_ctl_unmap_page(io_ctl); 436 tmp = kmap(io_ctl->pages[0]); 437 tmp += index; 438 *tmp = crc; 439 kunmap(io_ctl->pages[0]); 440 } 441 442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 443 { 444 u32 *tmp, val; 445 u32 crc = ~(u32)0; 446 unsigned offset = 0; 447 448 if (!io_ctl->check_crcs) { 449 io_ctl_map_page(io_ctl, 0); 450 return 0; 451 } 452 453 if (index == 0) 454 offset = sizeof(u32) * io_ctl->num_pages; 455 456 tmp = kmap(io_ctl->pages[0]); 457 tmp += index; 458 val = *tmp; 459 kunmap(io_ctl->pages[0]); 460 461 io_ctl_map_page(io_ctl, 0); 462 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 463 PAGE_CACHE_SIZE - offset); 464 btrfs_csum_final(crc, (char *)&crc); 465 if (val != crc) { 466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 467 "space cache\n"); 468 io_ctl_unmap_page(io_ctl); 469 return -EIO; 470 } 471 472 return 0; 473 } 474 475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 476 void *bitmap) 477 { 478 struct btrfs_free_space_entry *entry; 479 480 if (!io_ctl->cur) 481 return -ENOSPC; 482 483 entry = io_ctl->cur; 484 entry->offset = cpu_to_le64(offset); 485 entry->bytes = cpu_to_le64(bytes); 486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 487 BTRFS_FREE_SPACE_EXTENT; 488 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 489 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 490 491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 492 return 0; 493 494 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 495 496 /* No more pages to map */ 497 if (io_ctl->index >= io_ctl->num_pages) 498 return 0; 499 500 /* map the next page */ 501 io_ctl_map_page(io_ctl, 1); 502 return 0; 503 } 504 505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 506 { 507 if (!io_ctl->cur) 508 return -ENOSPC; 509 510 /* 511 * If we aren't at the start of the current page, unmap this one and 512 * map the next one if there is any left. 513 */ 514 if (io_ctl->cur != io_ctl->orig) { 515 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 516 if (io_ctl->index >= io_ctl->num_pages) 517 return -ENOSPC; 518 io_ctl_map_page(io_ctl, 0); 519 } 520 521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 522 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 523 if (io_ctl->index < io_ctl->num_pages) 524 io_ctl_map_page(io_ctl, 0); 525 return 0; 526 } 527 528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 529 { 530 /* 531 * If we're not on the boundary we know we've modified the page and we 532 * need to crc the page. 533 */ 534 if (io_ctl->cur != io_ctl->orig) 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 else 537 io_ctl_unmap_page(io_ctl); 538 539 while (io_ctl->index < io_ctl->num_pages) { 540 io_ctl_map_page(io_ctl, 1); 541 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 542 } 543 } 544 545 static int io_ctl_read_entry(struct io_ctl *io_ctl, 546 struct btrfs_free_space *entry, u8 *type) 547 { 548 struct btrfs_free_space_entry *e; 549 int ret; 550 551 if (!io_ctl->cur) { 552 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 553 if (ret) 554 return ret; 555 } 556 557 e = io_ctl->cur; 558 entry->offset = le64_to_cpu(e->offset); 559 entry->bytes = le64_to_cpu(e->bytes); 560 *type = e->type; 561 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 562 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 563 564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 565 return 0; 566 567 io_ctl_unmap_page(io_ctl); 568 569 return 0; 570 } 571 572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 573 struct btrfs_free_space *entry) 574 { 575 int ret; 576 577 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 578 if (ret) 579 return ret; 580 581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 582 io_ctl_unmap_page(io_ctl); 583 584 return 0; 585 } 586 587 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 588 struct btrfs_free_space_ctl *ctl, 589 struct btrfs_path *path, u64 offset) 590 { 591 struct btrfs_free_space_header *header; 592 struct extent_buffer *leaf; 593 struct io_ctl io_ctl; 594 struct btrfs_key key; 595 struct btrfs_free_space *e, *n; 596 struct list_head bitmaps; 597 u64 num_entries; 598 u64 num_bitmaps; 599 u64 generation; 600 u8 type; 601 int ret = 0; 602 603 INIT_LIST_HEAD(&bitmaps); 604 605 /* Nothing in the space cache, goodbye */ 606 if (!i_size_read(inode)) 607 return 0; 608 609 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 610 key.offset = offset; 611 key.type = 0; 612 613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 614 if (ret < 0) 615 return 0; 616 else if (ret > 0) { 617 btrfs_release_path(path); 618 return 0; 619 } 620 621 ret = -1; 622 623 leaf = path->nodes[0]; 624 header = btrfs_item_ptr(leaf, path->slots[0], 625 struct btrfs_free_space_header); 626 num_entries = btrfs_free_space_entries(leaf, header); 627 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 628 generation = btrfs_free_space_generation(leaf, header); 629 btrfs_release_path(path); 630 631 if (BTRFS_I(inode)->generation != generation) { 632 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 633 " not match free space cache generation (%llu)\n", 634 (unsigned long long)BTRFS_I(inode)->generation, 635 (unsigned long long)generation); 636 return 0; 637 } 638 639 if (!num_entries) 640 return 0; 641 642 ret = io_ctl_init(&io_ctl, inode, root); 643 if (ret) 644 return ret; 645 646 ret = readahead_cache(inode); 647 if (ret) 648 goto out; 649 650 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 651 if (ret) 652 goto out; 653 654 ret = io_ctl_check_crc(&io_ctl, 0); 655 if (ret) 656 goto free_cache; 657 658 ret = io_ctl_check_generation(&io_ctl, generation); 659 if (ret) 660 goto free_cache; 661 662 while (num_entries) { 663 e = kmem_cache_zalloc(btrfs_free_space_cachep, 664 GFP_NOFS); 665 if (!e) 666 goto free_cache; 667 668 ret = io_ctl_read_entry(&io_ctl, e, &type); 669 if (ret) { 670 kmem_cache_free(btrfs_free_space_cachep, e); 671 goto free_cache; 672 } 673 674 if (!e->bytes) { 675 kmem_cache_free(btrfs_free_space_cachep, e); 676 goto free_cache; 677 } 678 679 if (type == BTRFS_FREE_SPACE_EXTENT) { 680 spin_lock(&ctl->tree_lock); 681 ret = link_free_space(ctl, e); 682 spin_unlock(&ctl->tree_lock); 683 if (ret) { 684 printk(KERN_ERR "Duplicate entries in " 685 "free space cache, dumping\n"); 686 kmem_cache_free(btrfs_free_space_cachep, e); 687 goto free_cache; 688 } 689 } else { 690 BUG_ON(!num_bitmaps); 691 num_bitmaps--; 692 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 693 if (!e->bitmap) { 694 kmem_cache_free( 695 btrfs_free_space_cachep, e); 696 goto free_cache; 697 } 698 spin_lock(&ctl->tree_lock); 699 ret = link_free_space(ctl, e); 700 ctl->total_bitmaps++; 701 ctl->op->recalc_thresholds(ctl); 702 spin_unlock(&ctl->tree_lock); 703 if (ret) { 704 printk(KERN_ERR "Duplicate entries in " 705 "free space cache, dumping\n"); 706 kmem_cache_free(btrfs_free_space_cachep, e); 707 goto free_cache; 708 } 709 list_add_tail(&e->list, &bitmaps); 710 } 711 712 num_entries--; 713 } 714 715 io_ctl_unmap_page(&io_ctl); 716 717 /* 718 * We add the bitmaps at the end of the entries in order that 719 * the bitmap entries are added to the cache. 720 */ 721 list_for_each_entry_safe(e, n, &bitmaps, list) { 722 list_del_init(&e->list); 723 ret = io_ctl_read_bitmap(&io_ctl, e); 724 if (ret) 725 goto free_cache; 726 } 727 728 io_ctl_drop_pages(&io_ctl); 729 ret = 1; 730 out: 731 io_ctl_free(&io_ctl); 732 return ret; 733 free_cache: 734 io_ctl_drop_pages(&io_ctl); 735 __btrfs_remove_free_space_cache(ctl); 736 goto out; 737 } 738 739 int load_free_space_cache(struct btrfs_fs_info *fs_info, 740 struct btrfs_block_group_cache *block_group) 741 { 742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 743 struct btrfs_root *root = fs_info->tree_root; 744 struct inode *inode; 745 struct btrfs_path *path; 746 int ret = 0; 747 bool matched; 748 u64 used = btrfs_block_group_used(&block_group->item); 749 750 /* 751 * If this block group has been marked to be cleared for one reason or 752 * another then we can't trust the on disk cache, so just return. 753 */ 754 spin_lock(&block_group->lock); 755 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 756 spin_unlock(&block_group->lock); 757 return 0; 758 } 759 spin_unlock(&block_group->lock); 760 761 path = btrfs_alloc_path(); 762 if (!path) 763 return 0; 764 path->search_commit_root = 1; 765 path->skip_locking = 1; 766 767 inode = lookup_free_space_inode(root, block_group, path); 768 if (IS_ERR(inode)) { 769 btrfs_free_path(path); 770 return 0; 771 } 772 773 /* We may have converted the inode and made the cache invalid. */ 774 spin_lock(&block_group->lock); 775 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 776 spin_unlock(&block_group->lock); 777 btrfs_free_path(path); 778 goto out; 779 } 780 spin_unlock(&block_group->lock); 781 782 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 783 path, block_group->key.objectid); 784 btrfs_free_path(path); 785 if (ret <= 0) 786 goto out; 787 788 spin_lock(&ctl->tree_lock); 789 matched = (ctl->free_space == (block_group->key.offset - used - 790 block_group->bytes_super)); 791 spin_unlock(&ctl->tree_lock); 792 793 if (!matched) { 794 __btrfs_remove_free_space_cache(ctl); 795 printk(KERN_ERR "block group %llu has an wrong amount of free " 796 "space\n", block_group->key.objectid); 797 ret = -1; 798 } 799 out: 800 if (ret < 0) { 801 /* This cache is bogus, make sure it gets cleared */ 802 spin_lock(&block_group->lock); 803 block_group->disk_cache_state = BTRFS_DC_CLEAR; 804 spin_unlock(&block_group->lock); 805 ret = 0; 806 807 printk(KERN_ERR "btrfs: failed to load free space cache " 808 "for block group %llu\n", block_group->key.objectid); 809 } 810 811 iput(inode); 812 return ret; 813 } 814 815 /** 816 * __btrfs_write_out_cache - write out cached info to an inode 817 * @root - the root the inode belongs to 818 * @ctl - the free space cache we are going to write out 819 * @block_group - the block_group for this cache if it belongs to a block_group 820 * @trans - the trans handle 821 * @path - the path to use 822 * @offset - the offset for the key we'll insert 823 * 824 * This function writes out a free space cache struct to disk for quick recovery 825 * on mount. This will return 0 if it was successfull in writing the cache out, 826 * and -1 if it was not. 827 */ 828 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 829 struct btrfs_free_space_ctl *ctl, 830 struct btrfs_block_group_cache *block_group, 831 struct btrfs_trans_handle *trans, 832 struct btrfs_path *path, u64 offset) 833 { 834 struct btrfs_free_space_header *header; 835 struct extent_buffer *leaf; 836 struct rb_node *node; 837 struct list_head *pos, *n; 838 struct extent_state *cached_state = NULL; 839 struct btrfs_free_cluster *cluster = NULL; 840 struct extent_io_tree *unpin = NULL; 841 struct io_ctl io_ctl; 842 struct list_head bitmap_list; 843 struct btrfs_key key; 844 u64 start, extent_start, extent_end, len; 845 int entries = 0; 846 int bitmaps = 0; 847 int ret; 848 int err = -1; 849 850 INIT_LIST_HEAD(&bitmap_list); 851 852 if (!i_size_read(inode)) 853 return -1; 854 855 ret = io_ctl_init(&io_ctl, inode, root); 856 if (ret) 857 return -1; 858 859 /* Get the cluster for this block_group if it exists */ 860 if (block_group && !list_empty(&block_group->cluster_list)) 861 cluster = list_entry(block_group->cluster_list.next, 862 struct btrfs_free_cluster, 863 block_group_list); 864 865 /* Lock all pages first so we can lock the extent safely. */ 866 io_ctl_prepare_pages(&io_ctl, inode, 0); 867 868 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 869 0, &cached_state); 870 871 node = rb_first(&ctl->free_space_offset); 872 if (!node && cluster) { 873 node = rb_first(&cluster->root); 874 cluster = NULL; 875 } 876 877 /* Make sure we can fit our crcs into the first page */ 878 if (io_ctl.check_crcs && 879 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 880 WARN_ON(1); 881 goto out_nospc; 882 } 883 884 io_ctl_set_generation(&io_ctl, trans->transid); 885 886 /* Write out the extent entries */ 887 while (node) { 888 struct btrfs_free_space *e; 889 890 e = rb_entry(node, struct btrfs_free_space, offset_index); 891 entries++; 892 893 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, 894 e->bitmap); 895 if (ret) 896 goto out_nospc; 897 898 if (e->bitmap) { 899 list_add_tail(&e->list, &bitmap_list); 900 bitmaps++; 901 } 902 node = rb_next(node); 903 if (!node && cluster) { 904 node = rb_first(&cluster->root); 905 cluster = NULL; 906 } 907 } 908 909 /* 910 * We want to add any pinned extents to our free space cache 911 * so we don't leak the space 912 */ 913 914 /* 915 * We shouldn't have switched the pinned extents yet so this is the 916 * right one 917 */ 918 unpin = root->fs_info->pinned_extents; 919 920 if (block_group) 921 start = block_group->key.objectid; 922 923 while (block_group && (start < block_group->key.objectid + 924 block_group->key.offset)) { 925 ret = find_first_extent_bit(unpin, start, 926 &extent_start, &extent_end, 927 EXTENT_DIRTY); 928 if (ret) { 929 ret = 0; 930 break; 931 } 932 933 /* This pinned extent is out of our range */ 934 if (extent_start >= block_group->key.objectid + 935 block_group->key.offset) 936 break; 937 938 extent_start = max(extent_start, start); 939 extent_end = min(block_group->key.objectid + 940 block_group->key.offset, extent_end + 1); 941 len = extent_end - extent_start; 942 943 entries++; 944 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); 945 if (ret) 946 goto out_nospc; 947 948 start = extent_end; 949 } 950 951 /* Write out the bitmaps */ 952 list_for_each_safe(pos, n, &bitmap_list) { 953 struct btrfs_free_space *entry = 954 list_entry(pos, struct btrfs_free_space, list); 955 956 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); 957 if (ret) 958 goto out_nospc; 959 list_del_init(&entry->list); 960 } 961 962 /* Zero out the rest of the pages just to make sure */ 963 io_ctl_zero_remaining_pages(&io_ctl); 964 965 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 966 0, i_size_read(inode), &cached_state); 967 io_ctl_drop_pages(&io_ctl); 968 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 969 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 970 971 if (ret) 972 goto out; 973 974 975 ret = filemap_write_and_wait(inode->i_mapping); 976 if (ret) 977 goto out; 978 979 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 980 key.offset = offset; 981 key.type = 0; 982 983 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 984 if (ret < 0) { 985 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 986 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 987 GFP_NOFS); 988 goto out; 989 } 990 leaf = path->nodes[0]; 991 if (ret > 0) { 992 struct btrfs_key found_key; 993 BUG_ON(!path->slots[0]); 994 path->slots[0]--; 995 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 996 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 997 found_key.offset != offset) { 998 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 999 inode->i_size - 1, 1000 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1001 NULL, GFP_NOFS); 1002 btrfs_release_path(path); 1003 goto out; 1004 } 1005 } 1006 1007 BTRFS_I(inode)->generation = trans->transid; 1008 header = btrfs_item_ptr(leaf, path->slots[0], 1009 struct btrfs_free_space_header); 1010 btrfs_set_free_space_entries(leaf, header, entries); 1011 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1012 btrfs_set_free_space_generation(leaf, header, trans->transid); 1013 btrfs_mark_buffer_dirty(leaf); 1014 btrfs_release_path(path); 1015 1016 err = 0; 1017 out: 1018 io_ctl_free(&io_ctl); 1019 if (err) { 1020 invalidate_inode_pages2(inode->i_mapping); 1021 BTRFS_I(inode)->generation = 0; 1022 } 1023 btrfs_update_inode(trans, root, inode); 1024 return err; 1025 1026 out_nospc: 1027 list_for_each_safe(pos, n, &bitmap_list) { 1028 struct btrfs_free_space *entry = 1029 list_entry(pos, struct btrfs_free_space, list); 1030 list_del_init(&entry->list); 1031 } 1032 io_ctl_drop_pages(&io_ctl); 1033 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1034 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1035 goto out; 1036 } 1037 1038 int btrfs_write_out_cache(struct btrfs_root *root, 1039 struct btrfs_trans_handle *trans, 1040 struct btrfs_block_group_cache *block_group, 1041 struct btrfs_path *path) 1042 { 1043 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1044 struct inode *inode; 1045 int ret = 0; 1046 1047 root = root->fs_info->tree_root; 1048 1049 spin_lock(&block_group->lock); 1050 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1051 spin_unlock(&block_group->lock); 1052 return 0; 1053 } 1054 spin_unlock(&block_group->lock); 1055 1056 inode = lookup_free_space_inode(root, block_group, path); 1057 if (IS_ERR(inode)) 1058 return 0; 1059 1060 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1061 path, block_group->key.objectid); 1062 if (ret) { 1063 spin_lock(&block_group->lock); 1064 block_group->disk_cache_state = BTRFS_DC_ERROR; 1065 spin_unlock(&block_group->lock); 1066 ret = 0; 1067 #ifdef DEBUG 1068 printk(KERN_ERR "btrfs: failed to write free space cache " 1069 "for block group %llu\n", block_group->key.objectid); 1070 #endif 1071 } 1072 1073 iput(inode); 1074 return ret; 1075 } 1076 1077 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1078 u64 offset) 1079 { 1080 BUG_ON(offset < bitmap_start); 1081 offset -= bitmap_start; 1082 return (unsigned long)(div_u64(offset, unit)); 1083 } 1084 1085 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1086 { 1087 return (unsigned long)(div_u64(bytes, unit)); 1088 } 1089 1090 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1091 u64 offset) 1092 { 1093 u64 bitmap_start; 1094 u64 bytes_per_bitmap; 1095 1096 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1097 bitmap_start = offset - ctl->start; 1098 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1099 bitmap_start *= bytes_per_bitmap; 1100 bitmap_start += ctl->start; 1101 1102 return bitmap_start; 1103 } 1104 1105 static int tree_insert_offset(struct rb_root *root, u64 offset, 1106 struct rb_node *node, int bitmap) 1107 { 1108 struct rb_node **p = &root->rb_node; 1109 struct rb_node *parent = NULL; 1110 struct btrfs_free_space *info; 1111 1112 while (*p) { 1113 parent = *p; 1114 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1115 1116 if (offset < info->offset) { 1117 p = &(*p)->rb_left; 1118 } else if (offset > info->offset) { 1119 p = &(*p)->rb_right; 1120 } else { 1121 /* 1122 * we could have a bitmap entry and an extent entry 1123 * share the same offset. If this is the case, we want 1124 * the extent entry to always be found first if we do a 1125 * linear search through the tree, since we want to have 1126 * the quickest allocation time, and allocating from an 1127 * extent is faster than allocating from a bitmap. So 1128 * if we're inserting a bitmap and we find an entry at 1129 * this offset, we want to go right, or after this entry 1130 * logically. If we are inserting an extent and we've 1131 * found a bitmap, we want to go left, or before 1132 * logically. 1133 */ 1134 if (bitmap) { 1135 if (info->bitmap) { 1136 WARN_ON_ONCE(1); 1137 return -EEXIST; 1138 } 1139 p = &(*p)->rb_right; 1140 } else { 1141 if (!info->bitmap) { 1142 WARN_ON_ONCE(1); 1143 return -EEXIST; 1144 } 1145 p = &(*p)->rb_left; 1146 } 1147 } 1148 } 1149 1150 rb_link_node(node, parent, p); 1151 rb_insert_color(node, root); 1152 1153 return 0; 1154 } 1155 1156 /* 1157 * searches the tree for the given offset. 1158 * 1159 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1160 * want a section that has at least bytes size and comes at or after the given 1161 * offset. 1162 */ 1163 static struct btrfs_free_space * 1164 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1165 u64 offset, int bitmap_only, int fuzzy) 1166 { 1167 struct rb_node *n = ctl->free_space_offset.rb_node; 1168 struct btrfs_free_space *entry, *prev = NULL; 1169 1170 /* find entry that is closest to the 'offset' */ 1171 while (1) { 1172 if (!n) { 1173 entry = NULL; 1174 break; 1175 } 1176 1177 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1178 prev = entry; 1179 1180 if (offset < entry->offset) 1181 n = n->rb_left; 1182 else if (offset > entry->offset) 1183 n = n->rb_right; 1184 else 1185 break; 1186 } 1187 1188 if (bitmap_only) { 1189 if (!entry) 1190 return NULL; 1191 if (entry->bitmap) 1192 return entry; 1193 1194 /* 1195 * bitmap entry and extent entry may share same offset, 1196 * in that case, bitmap entry comes after extent entry. 1197 */ 1198 n = rb_next(n); 1199 if (!n) 1200 return NULL; 1201 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1202 if (entry->offset != offset) 1203 return NULL; 1204 1205 WARN_ON(!entry->bitmap); 1206 return entry; 1207 } else if (entry) { 1208 if (entry->bitmap) { 1209 /* 1210 * if previous extent entry covers the offset, 1211 * we should return it instead of the bitmap entry 1212 */ 1213 n = &entry->offset_index; 1214 while (1) { 1215 n = rb_prev(n); 1216 if (!n) 1217 break; 1218 prev = rb_entry(n, struct btrfs_free_space, 1219 offset_index); 1220 if (!prev->bitmap) { 1221 if (prev->offset + prev->bytes > offset) 1222 entry = prev; 1223 break; 1224 } 1225 } 1226 } 1227 return entry; 1228 } 1229 1230 if (!prev) 1231 return NULL; 1232 1233 /* find last entry before the 'offset' */ 1234 entry = prev; 1235 if (entry->offset > offset) { 1236 n = rb_prev(&entry->offset_index); 1237 if (n) { 1238 entry = rb_entry(n, struct btrfs_free_space, 1239 offset_index); 1240 BUG_ON(entry->offset > offset); 1241 } else { 1242 if (fuzzy) 1243 return entry; 1244 else 1245 return NULL; 1246 } 1247 } 1248 1249 if (entry->bitmap) { 1250 n = &entry->offset_index; 1251 while (1) { 1252 n = rb_prev(n); 1253 if (!n) 1254 break; 1255 prev = rb_entry(n, struct btrfs_free_space, 1256 offset_index); 1257 if (!prev->bitmap) { 1258 if (prev->offset + prev->bytes > offset) 1259 return prev; 1260 break; 1261 } 1262 } 1263 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1264 return entry; 1265 } else if (entry->offset + entry->bytes > offset) 1266 return entry; 1267 1268 if (!fuzzy) 1269 return NULL; 1270 1271 while (1) { 1272 if (entry->bitmap) { 1273 if (entry->offset + BITS_PER_BITMAP * 1274 ctl->unit > offset) 1275 break; 1276 } else { 1277 if (entry->offset + entry->bytes > offset) 1278 break; 1279 } 1280 1281 n = rb_next(&entry->offset_index); 1282 if (!n) 1283 return NULL; 1284 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1285 } 1286 return entry; 1287 } 1288 1289 static inline void 1290 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1291 struct btrfs_free_space *info) 1292 { 1293 rb_erase(&info->offset_index, &ctl->free_space_offset); 1294 ctl->free_extents--; 1295 } 1296 1297 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1298 struct btrfs_free_space *info) 1299 { 1300 __unlink_free_space(ctl, info); 1301 ctl->free_space -= info->bytes; 1302 } 1303 1304 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1305 struct btrfs_free_space *info) 1306 { 1307 int ret = 0; 1308 1309 BUG_ON(!info->bitmap && !info->bytes); 1310 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1311 &info->offset_index, (info->bitmap != NULL)); 1312 if (ret) 1313 return ret; 1314 1315 ctl->free_space += info->bytes; 1316 ctl->free_extents++; 1317 return ret; 1318 } 1319 1320 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1321 { 1322 struct btrfs_block_group_cache *block_group = ctl->private; 1323 u64 max_bytes; 1324 u64 bitmap_bytes; 1325 u64 extent_bytes; 1326 u64 size = block_group->key.offset; 1327 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1328 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1329 1330 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1331 1332 /* 1333 * The goal is to keep the total amount of memory used per 1gb of space 1334 * at or below 32k, so we need to adjust how much memory we allow to be 1335 * used by extent based free space tracking 1336 */ 1337 if (size < 1024 * 1024 * 1024) 1338 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1339 else 1340 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1341 div64_u64(size, 1024 * 1024 * 1024); 1342 1343 /* 1344 * we want to account for 1 more bitmap than what we have so we can make 1345 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1346 * we add more bitmaps. 1347 */ 1348 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1349 1350 if (bitmap_bytes >= max_bytes) { 1351 ctl->extents_thresh = 0; 1352 return; 1353 } 1354 1355 /* 1356 * we want the extent entry threshold to always be at most 1/2 the maxw 1357 * bytes we can have, or whatever is less than that. 1358 */ 1359 extent_bytes = max_bytes - bitmap_bytes; 1360 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1361 1362 ctl->extents_thresh = 1363 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1364 } 1365 1366 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1367 struct btrfs_free_space *info, 1368 u64 offset, u64 bytes) 1369 { 1370 unsigned long start, count; 1371 1372 start = offset_to_bit(info->offset, ctl->unit, offset); 1373 count = bytes_to_bits(bytes, ctl->unit); 1374 BUG_ON(start + count > BITS_PER_BITMAP); 1375 1376 bitmap_clear(info->bitmap, start, count); 1377 1378 info->bytes -= bytes; 1379 } 1380 1381 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1382 struct btrfs_free_space *info, u64 offset, 1383 u64 bytes) 1384 { 1385 __bitmap_clear_bits(ctl, info, offset, bytes); 1386 ctl->free_space -= bytes; 1387 } 1388 1389 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1390 struct btrfs_free_space *info, u64 offset, 1391 u64 bytes) 1392 { 1393 unsigned long start, count; 1394 1395 start = offset_to_bit(info->offset, ctl->unit, offset); 1396 count = bytes_to_bits(bytes, ctl->unit); 1397 BUG_ON(start + count > BITS_PER_BITMAP); 1398 1399 bitmap_set(info->bitmap, start, count); 1400 1401 info->bytes += bytes; 1402 ctl->free_space += bytes; 1403 } 1404 1405 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1406 struct btrfs_free_space *bitmap_info, u64 *offset, 1407 u64 *bytes) 1408 { 1409 unsigned long found_bits = 0; 1410 unsigned long bits, i; 1411 unsigned long next_zero; 1412 1413 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1414 max_t(u64, *offset, bitmap_info->offset)); 1415 bits = bytes_to_bits(*bytes, ctl->unit); 1416 1417 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1418 i < BITS_PER_BITMAP; 1419 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1420 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1421 BITS_PER_BITMAP, i); 1422 if ((next_zero - i) >= bits) { 1423 found_bits = next_zero - i; 1424 break; 1425 } 1426 i = next_zero; 1427 } 1428 1429 if (found_bits) { 1430 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1431 *bytes = (u64)(found_bits) * ctl->unit; 1432 return 0; 1433 } 1434 1435 return -1; 1436 } 1437 1438 static struct btrfs_free_space * 1439 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1440 { 1441 struct btrfs_free_space *entry; 1442 struct rb_node *node; 1443 int ret; 1444 1445 if (!ctl->free_space_offset.rb_node) 1446 return NULL; 1447 1448 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1449 if (!entry) 1450 return NULL; 1451 1452 for (node = &entry->offset_index; node; node = rb_next(node)) { 1453 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1454 if (entry->bytes < *bytes) 1455 continue; 1456 1457 if (entry->bitmap) { 1458 ret = search_bitmap(ctl, entry, offset, bytes); 1459 if (!ret) 1460 return entry; 1461 continue; 1462 } 1463 1464 *offset = entry->offset; 1465 *bytes = entry->bytes; 1466 return entry; 1467 } 1468 1469 return NULL; 1470 } 1471 1472 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1473 struct btrfs_free_space *info, u64 offset) 1474 { 1475 info->offset = offset_to_bitmap(ctl, offset); 1476 info->bytes = 0; 1477 INIT_LIST_HEAD(&info->list); 1478 link_free_space(ctl, info); 1479 ctl->total_bitmaps++; 1480 1481 ctl->op->recalc_thresholds(ctl); 1482 } 1483 1484 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1485 struct btrfs_free_space *bitmap_info) 1486 { 1487 unlink_free_space(ctl, bitmap_info); 1488 kfree(bitmap_info->bitmap); 1489 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1490 ctl->total_bitmaps--; 1491 ctl->op->recalc_thresholds(ctl); 1492 } 1493 1494 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1495 struct btrfs_free_space *bitmap_info, 1496 u64 *offset, u64 *bytes) 1497 { 1498 u64 end; 1499 u64 search_start, search_bytes; 1500 int ret; 1501 1502 again: 1503 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1504 1505 /* 1506 * XXX - this can go away after a few releases. 1507 * 1508 * since the only user of btrfs_remove_free_space is the tree logging 1509 * stuff, and the only way to test that is under crash conditions, we 1510 * want to have this debug stuff here just in case somethings not 1511 * working. Search the bitmap for the space we are trying to use to 1512 * make sure its actually there. If its not there then we need to stop 1513 * because something has gone wrong. 1514 */ 1515 search_start = *offset; 1516 search_bytes = *bytes; 1517 search_bytes = min(search_bytes, end - search_start + 1); 1518 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1519 BUG_ON(ret < 0 || search_start != *offset); 1520 1521 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1522 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1523 *bytes -= end - *offset + 1; 1524 *offset = end + 1; 1525 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1526 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1527 *bytes = 0; 1528 } 1529 1530 if (*bytes) { 1531 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1532 if (!bitmap_info->bytes) 1533 free_bitmap(ctl, bitmap_info); 1534 1535 /* 1536 * no entry after this bitmap, but we still have bytes to 1537 * remove, so something has gone wrong. 1538 */ 1539 if (!next) 1540 return -EINVAL; 1541 1542 bitmap_info = rb_entry(next, struct btrfs_free_space, 1543 offset_index); 1544 1545 /* 1546 * if the next entry isn't a bitmap we need to return to let the 1547 * extent stuff do its work. 1548 */ 1549 if (!bitmap_info->bitmap) 1550 return -EAGAIN; 1551 1552 /* 1553 * Ok the next item is a bitmap, but it may not actually hold 1554 * the information for the rest of this free space stuff, so 1555 * look for it, and if we don't find it return so we can try 1556 * everything over again. 1557 */ 1558 search_start = *offset; 1559 search_bytes = *bytes; 1560 ret = search_bitmap(ctl, bitmap_info, &search_start, 1561 &search_bytes); 1562 if (ret < 0 || search_start != *offset) 1563 return -EAGAIN; 1564 1565 goto again; 1566 } else if (!bitmap_info->bytes) 1567 free_bitmap(ctl, bitmap_info); 1568 1569 return 0; 1570 } 1571 1572 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1573 struct btrfs_free_space *info, u64 offset, 1574 u64 bytes) 1575 { 1576 u64 bytes_to_set = 0; 1577 u64 end; 1578 1579 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1580 1581 bytes_to_set = min(end - offset, bytes); 1582 1583 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1584 1585 return bytes_to_set; 1586 1587 } 1588 1589 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1590 struct btrfs_free_space *info) 1591 { 1592 struct btrfs_block_group_cache *block_group = ctl->private; 1593 1594 /* 1595 * If we are below the extents threshold then we can add this as an 1596 * extent, and don't have to deal with the bitmap 1597 */ 1598 if (ctl->free_extents < ctl->extents_thresh) { 1599 /* 1600 * If this block group has some small extents we don't want to 1601 * use up all of our free slots in the cache with them, we want 1602 * to reserve them to larger extents, however if we have plent 1603 * of cache left then go ahead an dadd them, no sense in adding 1604 * the overhead of a bitmap if we don't have to. 1605 */ 1606 if (info->bytes <= block_group->sectorsize * 4) { 1607 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1608 return false; 1609 } else { 1610 return false; 1611 } 1612 } 1613 1614 /* 1615 * some block groups are so tiny they can't be enveloped by a bitmap, so 1616 * don't even bother to create a bitmap for this 1617 */ 1618 if (BITS_PER_BITMAP * block_group->sectorsize > 1619 block_group->key.offset) 1620 return false; 1621 1622 return true; 1623 } 1624 1625 static struct btrfs_free_space_op free_space_op = { 1626 .recalc_thresholds = recalculate_thresholds, 1627 .use_bitmap = use_bitmap, 1628 }; 1629 1630 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1631 struct btrfs_free_space *info) 1632 { 1633 struct btrfs_free_space *bitmap_info; 1634 struct btrfs_block_group_cache *block_group = NULL; 1635 int added = 0; 1636 u64 bytes, offset, bytes_added; 1637 int ret; 1638 1639 bytes = info->bytes; 1640 offset = info->offset; 1641 1642 if (!ctl->op->use_bitmap(ctl, info)) 1643 return 0; 1644 1645 if (ctl->op == &free_space_op) 1646 block_group = ctl->private; 1647 again: 1648 /* 1649 * Since we link bitmaps right into the cluster we need to see if we 1650 * have a cluster here, and if so and it has our bitmap we need to add 1651 * the free space to that bitmap. 1652 */ 1653 if (block_group && !list_empty(&block_group->cluster_list)) { 1654 struct btrfs_free_cluster *cluster; 1655 struct rb_node *node; 1656 struct btrfs_free_space *entry; 1657 1658 cluster = list_entry(block_group->cluster_list.next, 1659 struct btrfs_free_cluster, 1660 block_group_list); 1661 spin_lock(&cluster->lock); 1662 node = rb_first(&cluster->root); 1663 if (!node) { 1664 spin_unlock(&cluster->lock); 1665 goto no_cluster_bitmap; 1666 } 1667 1668 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1669 if (!entry->bitmap) { 1670 spin_unlock(&cluster->lock); 1671 goto no_cluster_bitmap; 1672 } 1673 1674 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1675 bytes_added = add_bytes_to_bitmap(ctl, entry, 1676 offset, bytes); 1677 bytes -= bytes_added; 1678 offset += bytes_added; 1679 } 1680 spin_unlock(&cluster->lock); 1681 if (!bytes) { 1682 ret = 1; 1683 goto out; 1684 } 1685 } 1686 1687 no_cluster_bitmap: 1688 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1689 1, 0); 1690 if (!bitmap_info) { 1691 BUG_ON(added); 1692 goto new_bitmap; 1693 } 1694 1695 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1696 bytes -= bytes_added; 1697 offset += bytes_added; 1698 added = 0; 1699 1700 if (!bytes) { 1701 ret = 1; 1702 goto out; 1703 } else 1704 goto again; 1705 1706 new_bitmap: 1707 if (info && info->bitmap) { 1708 add_new_bitmap(ctl, info, offset); 1709 added = 1; 1710 info = NULL; 1711 goto again; 1712 } else { 1713 spin_unlock(&ctl->tree_lock); 1714 1715 /* no pre-allocated info, allocate a new one */ 1716 if (!info) { 1717 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1718 GFP_NOFS); 1719 if (!info) { 1720 spin_lock(&ctl->tree_lock); 1721 ret = -ENOMEM; 1722 goto out; 1723 } 1724 } 1725 1726 /* allocate the bitmap */ 1727 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1728 spin_lock(&ctl->tree_lock); 1729 if (!info->bitmap) { 1730 ret = -ENOMEM; 1731 goto out; 1732 } 1733 goto again; 1734 } 1735 1736 out: 1737 if (info) { 1738 if (info->bitmap) 1739 kfree(info->bitmap); 1740 kmem_cache_free(btrfs_free_space_cachep, info); 1741 } 1742 1743 return ret; 1744 } 1745 1746 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1747 struct btrfs_free_space *info, bool update_stat) 1748 { 1749 struct btrfs_free_space *left_info; 1750 struct btrfs_free_space *right_info; 1751 bool merged = false; 1752 u64 offset = info->offset; 1753 u64 bytes = info->bytes; 1754 1755 /* 1756 * first we want to see if there is free space adjacent to the range we 1757 * are adding, if there is remove that struct and add a new one to 1758 * cover the entire range 1759 */ 1760 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1761 if (right_info && rb_prev(&right_info->offset_index)) 1762 left_info = rb_entry(rb_prev(&right_info->offset_index), 1763 struct btrfs_free_space, offset_index); 1764 else 1765 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1766 1767 if (right_info && !right_info->bitmap) { 1768 if (update_stat) 1769 unlink_free_space(ctl, right_info); 1770 else 1771 __unlink_free_space(ctl, right_info); 1772 info->bytes += right_info->bytes; 1773 kmem_cache_free(btrfs_free_space_cachep, right_info); 1774 merged = true; 1775 } 1776 1777 if (left_info && !left_info->bitmap && 1778 left_info->offset + left_info->bytes == offset) { 1779 if (update_stat) 1780 unlink_free_space(ctl, left_info); 1781 else 1782 __unlink_free_space(ctl, left_info); 1783 info->offset = left_info->offset; 1784 info->bytes += left_info->bytes; 1785 kmem_cache_free(btrfs_free_space_cachep, left_info); 1786 merged = true; 1787 } 1788 1789 return merged; 1790 } 1791 1792 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1793 u64 offset, u64 bytes) 1794 { 1795 struct btrfs_free_space *info; 1796 int ret = 0; 1797 1798 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1799 if (!info) 1800 return -ENOMEM; 1801 1802 info->offset = offset; 1803 info->bytes = bytes; 1804 1805 spin_lock(&ctl->tree_lock); 1806 1807 if (try_merge_free_space(ctl, info, true)) 1808 goto link; 1809 1810 /* 1811 * There was no extent directly to the left or right of this new 1812 * extent then we know we're going to have to allocate a new extent, so 1813 * before we do that see if we need to drop this into a bitmap 1814 */ 1815 ret = insert_into_bitmap(ctl, info); 1816 if (ret < 0) { 1817 goto out; 1818 } else if (ret) { 1819 ret = 0; 1820 goto out; 1821 } 1822 link: 1823 ret = link_free_space(ctl, info); 1824 if (ret) 1825 kmem_cache_free(btrfs_free_space_cachep, info); 1826 out: 1827 spin_unlock(&ctl->tree_lock); 1828 1829 if (ret) { 1830 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1831 BUG_ON(ret == -EEXIST); 1832 } 1833 1834 return ret; 1835 } 1836 1837 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1838 u64 offset, u64 bytes) 1839 { 1840 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1841 struct btrfs_free_space *info; 1842 struct btrfs_free_space *next_info = NULL; 1843 int ret = 0; 1844 1845 spin_lock(&ctl->tree_lock); 1846 1847 again: 1848 info = tree_search_offset(ctl, offset, 0, 0); 1849 if (!info) { 1850 /* 1851 * oops didn't find an extent that matched the space we wanted 1852 * to remove, look for a bitmap instead 1853 */ 1854 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1855 1, 0); 1856 if (!info) { 1857 /* the tree logging code might be calling us before we 1858 * have fully loaded the free space rbtree for this 1859 * block group. So it is possible the entry won't 1860 * be in the rbtree yet at all. The caching code 1861 * will make sure not to put it in the rbtree if 1862 * the logging code has pinned it. 1863 */ 1864 goto out_lock; 1865 } 1866 } 1867 1868 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1869 u64 end; 1870 next_info = rb_entry(rb_next(&info->offset_index), 1871 struct btrfs_free_space, 1872 offset_index); 1873 1874 if (next_info->bitmap) 1875 end = next_info->offset + 1876 BITS_PER_BITMAP * ctl->unit - 1; 1877 else 1878 end = next_info->offset + next_info->bytes; 1879 1880 if (next_info->bytes < bytes || 1881 next_info->offset > offset || offset > end) { 1882 printk(KERN_CRIT "Found free space at %llu, size %llu," 1883 " trying to use %llu\n", 1884 (unsigned long long)info->offset, 1885 (unsigned long long)info->bytes, 1886 (unsigned long long)bytes); 1887 WARN_ON(1); 1888 ret = -EINVAL; 1889 goto out_lock; 1890 } 1891 1892 info = next_info; 1893 } 1894 1895 if (info->bytes == bytes) { 1896 unlink_free_space(ctl, info); 1897 if (info->bitmap) { 1898 kfree(info->bitmap); 1899 ctl->total_bitmaps--; 1900 } 1901 kmem_cache_free(btrfs_free_space_cachep, info); 1902 ret = 0; 1903 goto out_lock; 1904 } 1905 1906 if (!info->bitmap && info->offset == offset) { 1907 unlink_free_space(ctl, info); 1908 info->offset += bytes; 1909 info->bytes -= bytes; 1910 ret = link_free_space(ctl, info); 1911 WARN_ON(ret); 1912 goto out_lock; 1913 } 1914 1915 if (!info->bitmap && info->offset <= offset && 1916 info->offset + info->bytes >= offset + bytes) { 1917 u64 old_start = info->offset; 1918 /* 1919 * we're freeing space in the middle of the info, 1920 * this can happen during tree log replay 1921 * 1922 * first unlink the old info and then 1923 * insert it again after the hole we're creating 1924 */ 1925 unlink_free_space(ctl, info); 1926 if (offset + bytes < info->offset + info->bytes) { 1927 u64 old_end = info->offset + info->bytes; 1928 1929 info->offset = offset + bytes; 1930 info->bytes = old_end - info->offset; 1931 ret = link_free_space(ctl, info); 1932 WARN_ON(ret); 1933 if (ret) 1934 goto out_lock; 1935 } else { 1936 /* the hole we're creating ends at the end 1937 * of the info struct, just free the info 1938 */ 1939 kmem_cache_free(btrfs_free_space_cachep, info); 1940 } 1941 spin_unlock(&ctl->tree_lock); 1942 1943 /* step two, insert a new info struct to cover 1944 * anything before the hole 1945 */ 1946 ret = btrfs_add_free_space(block_group, old_start, 1947 offset - old_start); 1948 WARN_ON(ret); /* -ENOMEM */ 1949 goto out; 1950 } 1951 1952 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1953 if (ret == -EAGAIN) 1954 goto again; 1955 BUG_ON(ret); /* logic error */ 1956 out_lock: 1957 spin_unlock(&ctl->tree_lock); 1958 out: 1959 return ret; 1960 } 1961 1962 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1963 u64 bytes) 1964 { 1965 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1966 struct btrfs_free_space *info; 1967 struct rb_node *n; 1968 int count = 0; 1969 1970 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1971 info = rb_entry(n, struct btrfs_free_space, offset_index); 1972 if (info->bytes >= bytes) 1973 count++; 1974 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1975 (unsigned long long)info->offset, 1976 (unsigned long long)info->bytes, 1977 (info->bitmap) ? "yes" : "no"); 1978 } 1979 printk(KERN_INFO "block group has cluster?: %s\n", 1980 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1981 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1982 "\n", count); 1983 } 1984 1985 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1986 { 1987 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1988 1989 spin_lock_init(&ctl->tree_lock); 1990 ctl->unit = block_group->sectorsize; 1991 ctl->start = block_group->key.objectid; 1992 ctl->private = block_group; 1993 ctl->op = &free_space_op; 1994 1995 /* 1996 * we only want to have 32k of ram per block group for keeping 1997 * track of free space, and if we pass 1/2 of that we want to 1998 * start converting things over to using bitmaps 1999 */ 2000 ctl->extents_thresh = ((1024 * 32) / 2) / 2001 sizeof(struct btrfs_free_space); 2002 } 2003 2004 /* 2005 * for a given cluster, put all of its extents back into the free 2006 * space cache. If the block group passed doesn't match the block group 2007 * pointed to by the cluster, someone else raced in and freed the 2008 * cluster already. In that case, we just return without changing anything 2009 */ 2010 static int 2011 __btrfs_return_cluster_to_free_space( 2012 struct btrfs_block_group_cache *block_group, 2013 struct btrfs_free_cluster *cluster) 2014 { 2015 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2016 struct btrfs_free_space *entry; 2017 struct rb_node *node; 2018 2019 spin_lock(&cluster->lock); 2020 if (cluster->block_group != block_group) 2021 goto out; 2022 2023 cluster->block_group = NULL; 2024 cluster->window_start = 0; 2025 list_del_init(&cluster->block_group_list); 2026 2027 node = rb_first(&cluster->root); 2028 while (node) { 2029 bool bitmap; 2030 2031 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2032 node = rb_next(&entry->offset_index); 2033 rb_erase(&entry->offset_index, &cluster->root); 2034 2035 bitmap = (entry->bitmap != NULL); 2036 if (!bitmap) 2037 try_merge_free_space(ctl, entry, false); 2038 tree_insert_offset(&ctl->free_space_offset, 2039 entry->offset, &entry->offset_index, bitmap); 2040 } 2041 cluster->root = RB_ROOT; 2042 2043 out: 2044 spin_unlock(&cluster->lock); 2045 btrfs_put_block_group(block_group); 2046 return 0; 2047 } 2048 2049 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2050 { 2051 struct btrfs_free_space *info; 2052 struct rb_node *node; 2053 2054 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2055 info = rb_entry(node, struct btrfs_free_space, offset_index); 2056 if (!info->bitmap) { 2057 unlink_free_space(ctl, info); 2058 kmem_cache_free(btrfs_free_space_cachep, info); 2059 } else { 2060 free_bitmap(ctl, info); 2061 } 2062 if (need_resched()) { 2063 spin_unlock(&ctl->tree_lock); 2064 cond_resched(); 2065 spin_lock(&ctl->tree_lock); 2066 } 2067 } 2068 } 2069 2070 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2071 { 2072 spin_lock(&ctl->tree_lock); 2073 __btrfs_remove_free_space_cache_locked(ctl); 2074 spin_unlock(&ctl->tree_lock); 2075 } 2076 2077 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2078 { 2079 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2080 struct btrfs_free_cluster *cluster; 2081 struct list_head *head; 2082 2083 spin_lock(&ctl->tree_lock); 2084 while ((head = block_group->cluster_list.next) != 2085 &block_group->cluster_list) { 2086 cluster = list_entry(head, struct btrfs_free_cluster, 2087 block_group_list); 2088 2089 WARN_ON(cluster->block_group != block_group); 2090 __btrfs_return_cluster_to_free_space(block_group, cluster); 2091 if (need_resched()) { 2092 spin_unlock(&ctl->tree_lock); 2093 cond_resched(); 2094 spin_lock(&ctl->tree_lock); 2095 } 2096 } 2097 __btrfs_remove_free_space_cache_locked(ctl); 2098 spin_unlock(&ctl->tree_lock); 2099 2100 } 2101 2102 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2103 u64 offset, u64 bytes, u64 empty_size) 2104 { 2105 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2106 struct btrfs_free_space *entry = NULL; 2107 u64 bytes_search = bytes + empty_size; 2108 u64 ret = 0; 2109 2110 spin_lock(&ctl->tree_lock); 2111 entry = find_free_space(ctl, &offset, &bytes_search); 2112 if (!entry) 2113 goto out; 2114 2115 ret = offset; 2116 if (entry->bitmap) { 2117 bitmap_clear_bits(ctl, entry, offset, bytes); 2118 if (!entry->bytes) 2119 free_bitmap(ctl, entry); 2120 } else { 2121 unlink_free_space(ctl, entry); 2122 entry->offset += bytes; 2123 entry->bytes -= bytes; 2124 if (!entry->bytes) 2125 kmem_cache_free(btrfs_free_space_cachep, entry); 2126 else 2127 link_free_space(ctl, entry); 2128 } 2129 2130 out: 2131 spin_unlock(&ctl->tree_lock); 2132 2133 return ret; 2134 } 2135 2136 /* 2137 * given a cluster, put all of its extents back into the free space 2138 * cache. If a block group is passed, this function will only free 2139 * a cluster that belongs to the passed block group. 2140 * 2141 * Otherwise, it'll get a reference on the block group pointed to by the 2142 * cluster and remove the cluster from it. 2143 */ 2144 int btrfs_return_cluster_to_free_space( 2145 struct btrfs_block_group_cache *block_group, 2146 struct btrfs_free_cluster *cluster) 2147 { 2148 struct btrfs_free_space_ctl *ctl; 2149 int ret; 2150 2151 /* first, get a safe pointer to the block group */ 2152 spin_lock(&cluster->lock); 2153 if (!block_group) { 2154 block_group = cluster->block_group; 2155 if (!block_group) { 2156 spin_unlock(&cluster->lock); 2157 return 0; 2158 } 2159 } else if (cluster->block_group != block_group) { 2160 /* someone else has already freed it don't redo their work */ 2161 spin_unlock(&cluster->lock); 2162 return 0; 2163 } 2164 atomic_inc(&block_group->count); 2165 spin_unlock(&cluster->lock); 2166 2167 ctl = block_group->free_space_ctl; 2168 2169 /* now return any extents the cluster had on it */ 2170 spin_lock(&ctl->tree_lock); 2171 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2172 spin_unlock(&ctl->tree_lock); 2173 2174 /* finally drop our ref */ 2175 btrfs_put_block_group(block_group); 2176 return ret; 2177 } 2178 2179 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2180 struct btrfs_free_cluster *cluster, 2181 struct btrfs_free_space *entry, 2182 u64 bytes, u64 min_start) 2183 { 2184 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2185 int err; 2186 u64 search_start = cluster->window_start; 2187 u64 search_bytes = bytes; 2188 u64 ret = 0; 2189 2190 search_start = min_start; 2191 search_bytes = bytes; 2192 2193 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2194 if (err) 2195 return 0; 2196 2197 ret = search_start; 2198 __bitmap_clear_bits(ctl, entry, ret, bytes); 2199 2200 return ret; 2201 } 2202 2203 /* 2204 * given a cluster, try to allocate 'bytes' from it, returns 0 2205 * if it couldn't find anything suitably large, or a logical disk offset 2206 * if things worked out 2207 */ 2208 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2209 struct btrfs_free_cluster *cluster, u64 bytes, 2210 u64 min_start) 2211 { 2212 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2213 struct btrfs_free_space *entry = NULL; 2214 struct rb_node *node; 2215 u64 ret = 0; 2216 2217 spin_lock(&cluster->lock); 2218 if (bytes > cluster->max_size) 2219 goto out; 2220 2221 if (cluster->block_group != block_group) 2222 goto out; 2223 2224 node = rb_first(&cluster->root); 2225 if (!node) 2226 goto out; 2227 2228 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2229 while(1) { 2230 if (entry->bytes < bytes || 2231 (!entry->bitmap && entry->offset < min_start)) { 2232 node = rb_next(&entry->offset_index); 2233 if (!node) 2234 break; 2235 entry = rb_entry(node, struct btrfs_free_space, 2236 offset_index); 2237 continue; 2238 } 2239 2240 if (entry->bitmap) { 2241 ret = btrfs_alloc_from_bitmap(block_group, 2242 cluster, entry, bytes, 2243 cluster->window_start); 2244 if (ret == 0) { 2245 node = rb_next(&entry->offset_index); 2246 if (!node) 2247 break; 2248 entry = rb_entry(node, struct btrfs_free_space, 2249 offset_index); 2250 continue; 2251 } 2252 cluster->window_start += bytes; 2253 } else { 2254 ret = entry->offset; 2255 2256 entry->offset += bytes; 2257 entry->bytes -= bytes; 2258 } 2259 2260 if (entry->bytes == 0) 2261 rb_erase(&entry->offset_index, &cluster->root); 2262 break; 2263 } 2264 out: 2265 spin_unlock(&cluster->lock); 2266 2267 if (!ret) 2268 return 0; 2269 2270 spin_lock(&ctl->tree_lock); 2271 2272 ctl->free_space -= bytes; 2273 if (entry->bytes == 0) { 2274 ctl->free_extents--; 2275 if (entry->bitmap) { 2276 kfree(entry->bitmap); 2277 ctl->total_bitmaps--; 2278 ctl->op->recalc_thresholds(ctl); 2279 } 2280 kmem_cache_free(btrfs_free_space_cachep, entry); 2281 } 2282 2283 spin_unlock(&ctl->tree_lock); 2284 2285 return ret; 2286 } 2287 2288 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2289 struct btrfs_free_space *entry, 2290 struct btrfs_free_cluster *cluster, 2291 u64 offset, u64 bytes, 2292 u64 cont1_bytes, u64 min_bytes) 2293 { 2294 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2295 unsigned long next_zero; 2296 unsigned long i; 2297 unsigned long want_bits; 2298 unsigned long min_bits; 2299 unsigned long found_bits; 2300 unsigned long start = 0; 2301 unsigned long total_found = 0; 2302 int ret; 2303 2304 i = offset_to_bit(entry->offset, block_group->sectorsize, 2305 max_t(u64, offset, entry->offset)); 2306 want_bits = bytes_to_bits(bytes, block_group->sectorsize); 2307 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2308 2309 again: 2310 found_bits = 0; 2311 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2312 i < BITS_PER_BITMAP; 2313 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2314 next_zero = find_next_zero_bit(entry->bitmap, 2315 BITS_PER_BITMAP, i); 2316 if (next_zero - i >= min_bits) { 2317 found_bits = next_zero - i; 2318 break; 2319 } 2320 i = next_zero; 2321 } 2322 2323 if (!found_bits) 2324 return -ENOSPC; 2325 2326 if (!total_found) { 2327 start = i; 2328 cluster->max_size = 0; 2329 } 2330 2331 total_found += found_bits; 2332 2333 if (cluster->max_size < found_bits * block_group->sectorsize) 2334 cluster->max_size = found_bits * block_group->sectorsize; 2335 2336 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2337 i = next_zero + 1; 2338 goto again; 2339 } 2340 2341 cluster->window_start = start * block_group->sectorsize + 2342 entry->offset; 2343 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2344 ret = tree_insert_offset(&cluster->root, entry->offset, 2345 &entry->offset_index, 1); 2346 BUG_ON(ret); /* -EEXIST; Logic error */ 2347 2348 trace_btrfs_setup_cluster(block_group, cluster, 2349 total_found * block_group->sectorsize, 1); 2350 return 0; 2351 } 2352 2353 /* 2354 * This searches the block group for just extents to fill the cluster with. 2355 * Try to find a cluster with at least bytes total bytes, at least one 2356 * extent of cont1_bytes, and other clusters of at least min_bytes. 2357 */ 2358 static noinline int 2359 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2360 struct btrfs_free_cluster *cluster, 2361 struct list_head *bitmaps, u64 offset, u64 bytes, 2362 u64 cont1_bytes, u64 min_bytes) 2363 { 2364 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2365 struct btrfs_free_space *first = NULL; 2366 struct btrfs_free_space *entry = NULL; 2367 struct btrfs_free_space *last; 2368 struct rb_node *node; 2369 u64 window_start; 2370 u64 window_free; 2371 u64 max_extent; 2372 u64 total_size = 0; 2373 2374 entry = tree_search_offset(ctl, offset, 0, 1); 2375 if (!entry) 2376 return -ENOSPC; 2377 2378 /* 2379 * We don't want bitmaps, so just move along until we find a normal 2380 * extent entry. 2381 */ 2382 while (entry->bitmap || entry->bytes < min_bytes) { 2383 if (entry->bitmap && list_empty(&entry->list)) 2384 list_add_tail(&entry->list, bitmaps); 2385 node = rb_next(&entry->offset_index); 2386 if (!node) 2387 return -ENOSPC; 2388 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2389 } 2390 2391 window_start = entry->offset; 2392 window_free = entry->bytes; 2393 max_extent = entry->bytes; 2394 first = entry; 2395 last = entry; 2396 2397 for (node = rb_next(&entry->offset_index); node; 2398 node = rb_next(&entry->offset_index)) { 2399 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2400 2401 if (entry->bitmap) { 2402 if (list_empty(&entry->list)) 2403 list_add_tail(&entry->list, bitmaps); 2404 continue; 2405 } 2406 2407 if (entry->bytes < min_bytes) 2408 continue; 2409 2410 last = entry; 2411 window_free += entry->bytes; 2412 if (entry->bytes > max_extent) 2413 max_extent = entry->bytes; 2414 } 2415 2416 if (window_free < bytes || max_extent < cont1_bytes) 2417 return -ENOSPC; 2418 2419 cluster->window_start = first->offset; 2420 2421 node = &first->offset_index; 2422 2423 /* 2424 * now we've found our entries, pull them out of the free space 2425 * cache and put them into the cluster rbtree 2426 */ 2427 do { 2428 int ret; 2429 2430 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2431 node = rb_next(&entry->offset_index); 2432 if (entry->bitmap || entry->bytes < min_bytes) 2433 continue; 2434 2435 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2436 ret = tree_insert_offset(&cluster->root, entry->offset, 2437 &entry->offset_index, 0); 2438 total_size += entry->bytes; 2439 BUG_ON(ret); /* -EEXIST; Logic error */ 2440 } while (node && entry != last); 2441 2442 cluster->max_size = max_extent; 2443 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2444 return 0; 2445 } 2446 2447 /* 2448 * This specifically looks for bitmaps that may work in the cluster, we assume 2449 * that we have already failed to find extents that will work. 2450 */ 2451 static noinline int 2452 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2453 struct btrfs_free_cluster *cluster, 2454 struct list_head *bitmaps, u64 offset, u64 bytes, 2455 u64 cont1_bytes, u64 min_bytes) 2456 { 2457 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2458 struct btrfs_free_space *entry; 2459 int ret = -ENOSPC; 2460 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2461 2462 if (ctl->total_bitmaps == 0) 2463 return -ENOSPC; 2464 2465 /* 2466 * The bitmap that covers offset won't be in the list unless offset 2467 * is just its start offset. 2468 */ 2469 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2470 if (entry->offset != bitmap_offset) { 2471 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2472 if (entry && list_empty(&entry->list)) 2473 list_add(&entry->list, bitmaps); 2474 } 2475 2476 list_for_each_entry(entry, bitmaps, list) { 2477 if (entry->bytes < bytes) 2478 continue; 2479 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2480 bytes, cont1_bytes, min_bytes); 2481 if (!ret) 2482 return 0; 2483 } 2484 2485 /* 2486 * The bitmaps list has all the bitmaps that record free space 2487 * starting after offset, so no more search is required. 2488 */ 2489 return -ENOSPC; 2490 } 2491 2492 /* 2493 * here we try to find a cluster of blocks in a block group. The goal 2494 * is to find at least bytes+empty_size. 2495 * We might not find them all in one contiguous area. 2496 * 2497 * returns zero and sets up cluster if things worked out, otherwise 2498 * it returns -enospc 2499 */ 2500 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2501 struct btrfs_root *root, 2502 struct btrfs_block_group_cache *block_group, 2503 struct btrfs_free_cluster *cluster, 2504 u64 offset, u64 bytes, u64 empty_size) 2505 { 2506 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2507 struct btrfs_free_space *entry, *tmp; 2508 LIST_HEAD(bitmaps); 2509 u64 min_bytes; 2510 u64 cont1_bytes; 2511 int ret; 2512 2513 /* 2514 * Choose the minimum extent size we'll require for this 2515 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2516 * For metadata, allow allocates with smaller extents. For 2517 * data, keep it dense. 2518 */ 2519 if (btrfs_test_opt(root, SSD_SPREAD)) { 2520 cont1_bytes = min_bytes = bytes + empty_size; 2521 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2522 cont1_bytes = bytes; 2523 min_bytes = block_group->sectorsize; 2524 } else { 2525 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2526 min_bytes = block_group->sectorsize; 2527 } 2528 2529 spin_lock(&ctl->tree_lock); 2530 2531 /* 2532 * If we know we don't have enough space to make a cluster don't even 2533 * bother doing all the work to try and find one. 2534 */ 2535 if (ctl->free_space < bytes) { 2536 spin_unlock(&ctl->tree_lock); 2537 return -ENOSPC; 2538 } 2539 2540 spin_lock(&cluster->lock); 2541 2542 /* someone already found a cluster, hooray */ 2543 if (cluster->block_group) { 2544 ret = 0; 2545 goto out; 2546 } 2547 2548 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2549 min_bytes); 2550 2551 INIT_LIST_HEAD(&bitmaps); 2552 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2553 bytes + empty_size, 2554 cont1_bytes, min_bytes); 2555 if (ret) 2556 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2557 offset, bytes + empty_size, 2558 cont1_bytes, min_bytes); 2559 2560 /* Clear our temporary list */ 2561 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2562 list_del_init(&entry->list); 2563 2564 if (!ret) { 2565 atomic_inc(&block_group->count); 2566 list_add_tail(&cluster->block_group_list, 2567 &block_group->cluster_list); 2568 cluster->block_group = block_group; 2569 } else { 2570 trace_btrfs_failed_cluster_setup(block_group); 2571 } 2572 out: 2573 spin_unlock(&cluster->lock); 2574 spin_unlock(&ctl->tree_lock); 2575 2576 return ret; 2577 } 2578 2579 /* 2580 * simple code to zero out a cluster 2581 */ 2582 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2583 { 2584 spin_lock_init(&cluster->lock); 2585 spin_lock_init(&cluster->refill_lock); 2586 cluster->root = RB_ROOT; 2587 cluster->max_size = 0; 2588 INIT_LIST_HEAD(&cluster->block_group_list); 2589 cluster->block_group = NULL; 2590 } 2591 2592 static int do_trimming(struct btrfs_block_group_cache *block_group, 2593 u64 *total_trimmed, u64 start, u64 bytes, 2594 u64 reserved_start, u64 reserved_bytes) 2595 { 2596 struct btrfs_space_info *space_info = block_group->space_info; 2597 struct btrfs_fs_info *fs_info = block_group->fs_info; 2598 int ret; 2599 int update = 0; 2600 u64 trimmed = 0; 2601 2602 spin_lock(&space_info->lock); 2603 spin_lock(&block_group->lock); 2604 if (!block_group->ro) { 2605 block_group->reserved += reserved_bytes; 2606 space_info->bytes_reserved += reserved_bytes; 2607 update = 1; 2608 } 2609 spin_unlock(&block_group->lock); 2610 spin_unlock(&space_info->lock); 2611 2612 ret = btrfs_error_discard_extent(fs_info->extent_root, 2613 start, bytes, &trimmed); 2614 if (!ret) 2615 *total_trimmed += trimmed; 2616 2617 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2618 2619 if (update) { 2620 spin_lock(&space_info->lock); 2621 spin_lock(&block_group->lock); 2622 if (block_group->ro) 2623 space_info->bytes_readonly += reserved_bytes; 2624 block_group->reserved -= reserved_bytes; 2625 space_info->bytes_reserved -= reserved_bytes; 2626 spin_unlock(&space_info->lock); 2627 spin_unlock(&block_group->lock); 2628 } 2629 2630 return ret; 2631 } 2632 2633 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2634 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2635 { 2636 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2637 struct btrfs_free_space *entry; 2638 struct rb_node *node; 2639 int ret = 0; 2640 u64 extent_start; 2641 u64 extent_bytes; 2642 u64 bytes; 2643 2644 while (start < end) { 2645 spin_lock(&ctl->tree_lock); 2646 2647 if (ctl->free_space < minlen) { 2648 spin_unlock(&ctl->tree_lock); 2649 break; 2650 } 2651 2652 entry = tree_search_offset(ctl, start, 0, 1); 2653 if (!entry) { 2654 spin_unlock(&ctl->tree_lock); 2655 break; 2656 } 2657 2658 /* skip bitmaps */ 2659 while (entry->bitmap) { 2660 node = rb_next(&entry->offset_index); 2661 if (!node) { 2662 spin_unlock(&ctl->tree_lock); 2663 goto out; 2664 } 2665 entry = rb_entry(node, struct btrfs_free_space, 2666 offset_index); 2667 } 2668 2669 if (entry->offset >= end) { 2670 spin_unlock(&ctl->tree_lock); 2671 break; 2672 } 2673 2674 extent_start = entry->offset; 2675 extent_bytes = entry->bytes; 2676 start = max(start, extent_start); 2677 bytes = min(extent_start + extent_bytes, end) - start; 2678 if (bytes < minlen) { 2679 spin_unlock(&ctl->tree_lock); 2680 goto next; 2681 } 2682 2683 unlink_free_space(ctl, entry); 2684 kmem_cache_free(btrfs_free_space_cachep, entry); 2685 2686 spin_unlock(&ctl->tree_lock); 2687 2688 ret = do_trimming(block_group, total_trimmed, start, bytes, 2689 extent_start, extent_bytes); 2690 if (ret) 2691 break; 2692 next: 2693 start += bytes; 2694 2695 if (fatal_signal_pending(current)) { 2696 ret = -ERESTARTSYS; 2697 break; 2698 } 2699 2700 cond_resched(); 2701 } 2702 out: 2703 return ret; 2704 } 2705 2706 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2707 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2708 { 2709 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2710 struct btrfs_free_space *entry; 2711 int ret = 0; 2712 int ret2; 2713 u64 bytes; 2714 u64 offset = offset_to_bitmap(ctl, start); 2715 2716 while (offset < end) { 2717 bool next_bitmap = false; 2718 2719 spin_lock(&ctl->tree_lock); 2720 2721 if (ctl->free_space < minlen) { 2722 spin_unlock(&ctl->tree_lock); 2723 break; 2724 } 2725 2726 entry = tree_search_offset(ctl, offset, 1, 0); 2727 if (!entry) { 2728 spin_unlock(&ctl->tree_lock); 2729 next_bitmap = true; 2730 goto next; 2731 } 2732 2733 bytes = minlen; 2734 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2735 if (ret2 || start >= end) { 2736 spin_unlock(&ctl->tree_lock); 2737 next_bitmap = true; 2738 goto next; 2739 } 2740 2741 bytes = min(bytes, end - start); 2742 if (bytes < minlen) { 2743 spin_unlock(&ctl->tree_lock); 2744 goto next; 2745 } 2746 2747 bitmap_clear_bits(ctl, entry, start, bytes); 2748 if (entry->bytes == 0) 2749 free_bitmap(ctl, entry); 2750 2751 spin_unlock(&ctl->tree_lock); 2752 2753 ret = do_trimming(block_group, total_trimmed, start, bytes, 2754 start, bytes); 2755 if (ret) 2756 break; 2757 next: 2758 if (next_bitmap) { 2759 offset += BITS_PER_BITMAP * ctl->unit; 2760 } else { 2761 start += bytes; 2762 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2763 offset += BITS_PER_BITMAP * ctl->unit; 2764 } 2765 2766 if (fatal_signal_pending(current)) { 2767 ret = -ERESTARTSYS; 2768 break; 2769 } 2770 2771 cond_resched(); 2772 } 2773 2774 return ret; 2775 } 2776 2777 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2778 u64 *trimmed, u64 start, u64 end, u64 minlen) 2779 { 2780 int ret; 2781 2782 *trimmed = 0; 2783 2784 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2785 if (ret) 2786 return ret; 2787 2788 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2789 2790 return ret; 2791 } 2792 2793 /* 2794 * Find the left-most item in the cache tree, and then return the 2795 * smallest inode number in the item. 2796 * 2797 * Note: the returned inode number may not be the smallest one in 2798 * the tree, if the left-most item is a bitmap. 2799 */ 2800 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2801 { 2802 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2803 struct btrfs_free_space *entry = NULL; 2804 u64 ino = 0; 2805 2806 spin_lock(&ctl->tree_lock); 2807 2808 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2809 goto out; 2810 2811 entry = rb_entry(rb_first(&ctl->free_space_offset), 2812 struct btrfs_free_space, offset_index); 2813 2814 if (!entry->bitmap) { 2815 ino = entry->offset; 2816 2817 unlink_free_space(ctl, entry); 2818 entry->offset++; 2819 entry->bytes--; 2820 if (!entry->bytes) 2821 kmem_cache_free(btrfs_free_space_cachep, entry); 2822 else 2823 link_free_space(ctl, entry); 2824 } else { 2825 u64 offset = 0; 2826 u64 count = 1; 2827 int ret; 2828 2829 ret = search_bitmap(ctl, entry, &offset, &count); 2830 /* Logic error; Should be empty if it can't find anything */ 2831 BUG_ON(ret); 2832 2833 ino = offset; 2834 bitmap_clear_bits(ctl, entry, offset, 1); 2835 if (entry->bytes == 0) 2836 free_bitmap(ctl, entry); 2837 } 2838 out: 2839 spin_unlock(&ctl->tree_lock); 2840 2841 return ino; 2842 } 2843 2844 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2845 struct btrfs_path *path) 2846 { 2847 struct inode *inode = NULL; 2848 2849 spin_lock(&root->cache_lock); 2850 if (root->cache_inode) 2851 inode = igrab(root->cache_inode); 2852 spin_unlock(&root->cache_lock); 2853 if (inode) 2854 return inode; 2855 2856 inode = __lookup_free_space_inode(root, path, 0); 2857 if (IS_ERR(inode)) 2858 return inode; 2859 2860 spin_lock(&root->cache_lock); 2861 if (!btrfs_fs_closing(root->fs_info)) 2862 root->cache_inode = igrab(inode); 2863 spin_unlock(&root->cache_lock); 2864 2865 return inode; 2866 } 2867 2868 int create_free_ino_inode(struct btrfs_root *root, 2869 struct btrfs_trans_handle *trans, 2870 struct btrfs_path *path) 2871 { 2872 return __create_free_space_inode(root, trans, path, 2873 BTRFS_FREE_INO_OBJECTID, 0); 2874 } 2875 2876 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2877 { 2878 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2879 struct btrfs_path *path; 2880 struct inode *inode; 2881 int ret = 0; 2882 u64 root_gen = btrfs_root_generation(&root->root_item); 2883 2884 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2885 return 0; 2886 2887 /* 2888 * If we're unmounting then just return, since this does a search on the 2889 * normal root and not the commit root and we could deadlock. 2890 */ 2891 if (btrfs_fs_closing(fs_info)) 2892 return 0; 2893 2894 path = btrfs_alloc_path(); 2895 if (!path) 2896 return 0; 2897 2898 inode = lookup_free_ino_inode(root, path); 2899 if (IS_ERR(inode)) 2900 goto out; 2901 2902 if (root_gen != BTRFS_I(inode)->generation) 2903 goto out_put; 2904 2905 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2906 2907 if (ret < 0) 2908 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2909 "root %llu\n", root->root_key.objectid); 2910 out_put: 2911 iput(inode); 2912 out: 2913 btrfs_free_path(path); 2914 return ret; 2915 } 2916 2917 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2918 struct btrfs_trans_handle *trans, 2919 struct btrfs_path *path) 2920 { 2921 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2922 struct inode *inode; 2923 int ret; 2924 2925 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2926 return 0; 2927 2928 inode = lookup_free_ino_inode(root, path); 2929 if (IS_ERR(inode)) 2930 return 0; 2931 2932 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2933 if (ret) { 2934 btrfs_delalloc_release_metadata(inode, inode->i_size); 2935 #ifdef DEBUG 2936 printk(KERN_ERR "btrfs: failed to write free ino cache " 2937 "for root %llu\n", root->root_key.objectid); 2938 #endif 2939 } 2940 2941 iput(inode); 2942 return ret; 2943 } 2944