1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *info); 38 39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 40 struct btrfs_path *path, 41 u64 offset) 42 { 43 struct btrfs_key key; 44 struct btrfs_key location; 45 struct btrfs_disk_key disk_key; 46 struct btrfs_free_space_header *header; 47 struct extent_buffer *leaf; 48 struct inode *inode = NULL; 49 int ret; 50 51 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 52 key.offset = offset; 53 key.type = 0; 54 55 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 56 if (ret < 0) 57 return ERR_PTR(ret); 58 if (ret > 0) { 59 btrfs_release_path(path); 60 return ERR_PTR(-ENOENT); 61 } 62 63 leaf = path->nodes[0]; 64 header = btrfs_item_ptr(leaf, path->slots[0], 65 struct btrfs_free_space_header); 66 btrfs_free_space_key(leaf, header, &disk_key); 67 btrfs_disk_key_to_cpu(&location, &disk_key); 68 btrfs_release_path(path); 69 70 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 71 if (!inode) 72 return ERR_PTR(-ENOENT); 73 if (IS_ERR(inode)) 74 return inode; 75 if (is_bad_inode(inode)) { 76 iput(inode); 77 return ERR_PTR(-ENOENT); 78 } 79 80 mapping_set_gfp_mask(inode->i_mapping, 81 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 82 83 return inode; 84 } 85 86 struct inode *lookup_free_space_inode(struct btrfs_root *root, 87 struct btrfs_block_group_cache 88 *block_group, struct btrfs_path *path) 89 { 90 struct inode *inode = NULL; 91 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 92 93 spin_lock(&block_group->lock); 94 if (block_group->inode) 95 inode = igrab(block_group->inode); 96 spin_unlock(&block_group->lock); 97 if (inode) 98 return inode; 99 100 inode = __lookup_free_space_inode(root, path, 101 block_group->key.objectid); 102 if (IS_ERR(inode)) 103 return inode; 104 105 spin_lock(&block_group->lock); 106 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 107 btrfs_info(root->fs_info, 108 "Old style space inode found, converting."); 109 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 110 BTRFS_INODE_NODATACOW; 111 block_group->disk_cache_state = BTRFS_DC_CLEAR; 112 } 113 114 if (!block_group->iref) { 115 block_group->inode = igrab(inode); 116 block_group->iref = 1; 117 } 118 spin_unlock(&block_group->lock); 119 120 return inode; 121 } 122 123 static int __create_free_space_inode(struct btrfs_root *root, 124 struct btrfs_trans_handle *trans, 125 struct btrfs_path *path, 126 u64 ino, u64 offset) 127 { 128 struct btrfs_key key; 129 struct btrfs_disk_key disk_key; 130 struct btrfs_free_space_header *header; 131 struct btrfs_inode_item *inode_item; 132 struct extent_buffer *leaf; 133 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 134 int ret; 135 136 ret = btrfs_insert_empty_inode(trans, root, path, ino); 137 if (ret) 138 return ret; 139 140 /* We inline crc's for the free disk space cache */ 141 if (ino != BTRFS_FREE_INO_OBJECTID) 142 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 143 144 leaf = path->nodes[0]; 145 inode_item = btrfs_item_ptr(leaf, path->slots[0], 146 struct btrfs_inode_item); 147 btrfs_item_key(leaf, &disk_key, path->slots[0]); 148 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 149 sizeof(*inode_item)); 150 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 151 btrfs_set_inode_size(leaf, inode_item, 0); 152 btrfs_set_inode_nbytes(leaf, inode_item, 0); 153 btrfs_set_inode_uid(leaf, inode_item, 0); 154 btrfs_set_inode_gid(leaf, inode_item, 0); 155 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 156 btrfs_set_inode_flags(leaf, inode_item, flags); 157 btrfs_set_inode_nlink(leaf, inode_item, 1); 158 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 159 btrfs_set_inode_block_group(leaf, inode_item, offset); 160 btrfs_mark_buffer_dirty(leaf); 161 btrfs_release_path(path); 162 163 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 164 key.offset = offset; 165 key.type = 0; 166 167 ret = btrfs_insert_empty_item(trans, root, path, &key, 168 sizeof(struct btrfs_free_space_header)); 169 if (ret < 0) { 170 btrfs_release_path(path); 171 return ret; 172 } 173 leaf = path->nodes[0]; 174 header = btrfs_item_ptr(leaf, path->slots[0], 175 struct btrfs_free_space_header); 176 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 177 btrfs_set_free_space_key(leaf, header, &disk_key); 178 btrfs_mark_buffer_dirty(leaf); 179 btrfs_release_path(path); 180 181 return 0; 182 } 183 184 int create_free_space_inode(struct btrfs_root *root, 185 struct btrfs_trans_handle *trans, 186 struct btrfs_block_group_cache *block_group, 187 struct btrfs_path *path) 188 { 189 int ret; 190 u64 ino; 191 192 ret = btrfs_find_free_objectid(root, &ino); 193 if (ret < 0) 194 return ret; 195 196 return __create_free_space_inode(root, trans, path, ino, 197 block_group->key.objectid); 198 } 199 200 int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, 201 struct btrfs_block_rsv *rsv) 202 { 203 u64 needed_bytes; 204 int ret; 205 206 /* 1 for slack space, 1 for updating the inode */ 207 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 208 btrfs_calc_trans_metadata_size(root, 1); 209 210 spin_lock(&rsv->lock); 211 if (rsv->reserved < needed_bytes) 212 ret = -ENOSPC; 213 else 214 ret = 0; 215 spin_unlock(&rsv->lock); 216 return ret; 217 } 218 219 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 220 struct btrfs_trans_handle *trans, 221 struct inode *inode) 222 { 223 int ret = 0; 224 225 btrfs_i_size_write(inode, 0); 226 truncate_pagecache(inode, 0); 227 228 /* 229 * We don't need an orphan item because truncating the free space cache 230 * will never be split across transactions. 231 */ 232 ret = btrfs_truncate_inode_items(trans, root, inode, 233 0, BTRFS_EXTENT_DATA_KEY); 234 if (ret) { 235 btrfs_abort_transaction(trans, root, ret); 236 return ret; 237 } 238 239 ret = btrfs_update_inode(trans, root, inode); 240 if (ret) 241 btrfs_abort_transaction(trans, root, ret); 242 243 return ret; 244 } 245 246 static int readahead_cache(struct inode *inode) 247 { 248 struct file_ra_state *ra; 249 unsigned long last_index; 250 251 ra = kzalloc(sizeof(*ra), GFP_NOFS); 252 if (!ra) 253 return -ENOMEM; 254 255 file_ra_state_init(ra, inode->i_mapping); 256 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 257 258 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 259 260 kfree(ra); 261 262 return 0; 263 } 264 265 struct io_ctl { 266 void *cur, *orig; 267 struct page *page; 268 struct page **pages; 269 struct btrfs_root *root; 270 unsigned long size; 271 int index; 272 int num_pages; 273 unsigned check_crcs:1; 274 }; 275 276 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 277 struct btrfs_root *root) 278 { 279 memset(io_ctl, 0, sizeof(struct io_ctl)); 280 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 281 PAGE_CACHE_SHIFT; 282 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 283 GFP_NOFS); 284 if (!io_ctl->pages) 285 return -ENOMEM; 286 io_ctl->root = root; 287 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 288 io_ctl->check_crcs = 1; 289 return 0; 290 } 291 292 static void io_ctl_free(struct io_ctl *io_ctl) 293 { 294 kfree(io_ctl->pages); 295 } 296 297 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 298 { 299 if (io_ctl->cur) { 300 kunmap(io_ctl->page); 301 io_ctl->cur = NULL; 302 io_ctl->orig = NULL; 303 } 304 } 305 306 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 307 { 308 ASSERT(io_ctl->index < io_ctl->num_pages); 309 io_ctl->page = io_ctl->pages[io_ctl->index++]; 310 io_ctl->cur = kmap(io_ctl->page); 311 io_ctl->orig = io_ctl->cur; 312 io_ctl->size = PAGE_CACHE_SIZE; 313 if (clear) 314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 315 } 316 317 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 318 { 319 int i; 320 321 io_ctl_unmap_page(io_ctl); 322 323 for (i = 0; i < io_ctl->num_pages; i++) { 324 if (io_ctl->pages[i]) { 325 ClearPageChecked(io_ctl->pages[i]); 326 unlock_page(io_ctl->pages[i]); 327 page_cache_release(io_ctl->pages[i]); 328 } 329 } 330 } 331 332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 333 int uptodate) 334 { 335 struct page *page; 336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 337 int i; 338 339 for (i = 0; i < io_ctl->num_pages; i++) { 340 page = find_or_create_page(inode->i_mapping, i, mask); 341 if (!page) { 342 io_ctl_drop_pages(io_ctl); 343 return -ENOMEM; 344 } 345 io_ctl->pages[i] = page; 346 if (uptodate && !PageUptodate(page)) { 347 btrfs_readpage(NULL, page); 348 lock_page(page); 349 if (!PageUptodate(page)) { 350 btrfs_err(BTRFS_I(inode)->root->fs_info, 351 "error reading free space cache"); 352 io_ctl_drop_pages(io_ctl); 353 return -EIO; 354 } 355 } 356 } 357 358 for (i = 0; i < io_ctl->num_pages; i++) { 359 clear_page_dirty_for_io(io_ctl->pages[i]); 360 set_page_extent_mapped(io_ctl->pages[i]); 361 } 362 363 return 0; 364 } 365 366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 367 { 368 __le64 *val; 369 370 io_ctl_map_page(io_ctl, 1); 371 372 /* 373 * Skip the csum areas. If we don't check crcs then we just have a 374 * 64bit chunk at the front of the first page. 375 */ 376 if (io_ctl->check_crcs) { 377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 379 } else { 380 io_ctl->cur += sizeof(u64); 381 io_ctl->size -= sizeof(u64) * 2; 382 } 383 384 val = io_ctl->cur; 385 *val = cpu_to_le64(generation); 386 io_ctl->cur += sizeof(u64); 387 } 388 389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 390 { 391 __le64 *gen; 392 393 /* 394 * Skip the crc area. If we don't check crcs then we just have a 64bit 395 * chunk at the front of the first page. 396 */ 397 if (io_ctl->check_crcs) { 398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 399 io_ctl->size -= sizeof(u64) + 400 (sizeof(u32) * io_ctl->num_pages); 401 } else { 402 io_ctl->cur += sizeof(u64); 403 io_ctl->size -= sizeof(u64) * 2; 404 } 405 406 gen = io_ctl->cur; 407 if (le64_to_cpu(*gen) != generation) { 408 printk_ratelimited(KERN_ERR "BTRFS: space cache generation " 409 "(%Lu) does not match inode (%Lu)\n", *gen, 410 generation); 411 io_ctl_unmap_page(io_ctl); 412 return -EIO; 413 } 414 io_ctl->cur += sizeof(u64); 415 return 0; 416 } 417 418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 419 { 420 u32 *tmp; 421 u32 crc = ~(u32)0; 422 unsigned offset = 0; 423 424 if (!io_ctl->check_crcs) { 425 io_ctl_unmap_page(io_ctl); 426 return; 427 } 428 429 if (index == 0) 430 offset = sizeof(u32) * io_ctl->num_pages; 431 432 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 433 PAGE_CACHE_SIZE - offset); 434 btrfs_csum_final(crc, (char *)&crc); 435 io_ctl_unmap_page(io_ctl); 436 tmp = kmap(io_ctl->pages[0]); 437 tmp += index; 438 *tmp = crc; 439 kunmap(io_ctl->pages[0]); 440 } 441 442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 443 { 444 u32 *tmp, val; 445 u32 crc = ~(u32)0; 446 unsigned offset = 0; 447 448 if (!io_ctl->check_crcs) { 449 io_ctl_map_page(io_ctl, 0); 450 return 0; 451 } 452 453 if (index == 0) 454 offset = sizeof(u32) * io_ctl->num_pages; 455 456 tmp = kmap(io_ctl->pages[0]); 457 tmp += index; 458 val = *tmp; 459 kunmap(io_ctl->pages[0]); 460 461 io_ctl_map_page(io_ctl, 0); 462 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 463 PAGE_CACHE_SIZE - offset); 464 btrfs_csum_final(crc, (char *)&crc); 465 if (val != crc) { 466 printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free " 467 "space cache\n"); 468 io_ctl_unmap_page(io_ctl); 469 return -EIO; 470 } 471 472 return 0; 473 } 474 475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 476 void *bitmap) 477 { 478 struct btrfs_free_space_entry *entry; 479 480 if (!io_ctl->cur) 481 return -ENOSPC; 482 483 entry = io_ctl->cur; 484 entry->offset = cpu_to_le64(offset); 485 entry->bytes = cpu_to_le64(bytes); 486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 487 BTRFS_FREE_SPACE_EXTENT; 488 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 489 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 490 491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 492 return 0; 493 494 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 495 496 /* No more pages to map */ 497 if (io_ctl->index >= io_ctl->num_pages) 498 return 0; 499 500 /* map the next page */ 501 io_ctl_map_page(io_ctl, 1); 502 return 0; 503 } 504 505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 506 { 507 if (!io_ctl->cur) 508 return -ENOSPC; 509 510 /* 511 * If we aren't at the start of the current page, unmap this one and 512 * map the next one if there is any left. 513 */ 514 if (io_ctl->cur != io_ctl->orig) { 515 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 516 if (io_ctl->index >= io_ctl->num_pages) 517 return -ENOSPC; 518 io_ctl_map_page(io_ctl, 0); 519 } 520 521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 522 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 523 if (io_ctl->index < io_ctl->num_pages) 524 io_ctl_map_page(io_ctl, 0); 525 return 0; 526 } 527 528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 529 { 530 /* 531 * If we're not on the boundary we know we've modified the page and we 532 * need to crc the page. 533 */ 534 if (io_ctl->cur != io_ctl->orig) 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 else 537 io_ctl_unmap_page(io_ctl); 538 539 while (io_ctl->index < io_ctl->num_pages) { 540 io_ctl_map_page(io_ctl, 1); 541 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 542 } 543 } 544 545 static int io_ctl_read_entry(struct io_ctl *io_ctl, 546 struct btrfs_free_space *entry, u8 *type) 547 { 548 struct btrfs_free_space_entry *e; 549 int ret; 550 551 if (!io_ctl->cur) { 552 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 553 if (ret) 554 return ret; 555 } 556 557 e = io_ctl->cur; 558 entry->offset = le64_to_cpu(e->offset); 559 entry->bytes = le64_to_cpu(e->bytes); 560 *type = e->type; 561 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 562 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 563 564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 565 return 0; 566 567 io_ctl_unmap_page(io_ctl); 568 569 return 0; 570 } 571 572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 573 struct btrfs_free_space *entry) 574 { 575 int ret; 576 577 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 578 if (ret) 579 return ret; 580 581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 582 io_ctl_unmap_page(io_ctl); 583 584 return 0; 585 } 586 587 /* 588 * Since we attach pinned extents after the fact we can have contiguous sections 589 * of free space that are split up in entries. This poses a problem with the 590 * tree logging stuff since it could have allocated across what appears to be 2 591 * entries since we would have merged the entries when adding the pinned extents 592 * back to the free space cache. So run through the space cache that we just 593 * loaded and merge contiguous entries. This will make the log replay stuff not 594 * blow up and it will make for nicer allocator behavior. 595 */ 596 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 597 { 598 struct btrfs_free_space *e, *prev = NULL; 599 struct rb_node *n; 600 601 again: 602 spin_lock(&ctl->tree_lock); 603 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 604 e = rb_entry(n, struct btrfs_free_space, offset_index); 605 if (!prev) 606 goto next; 607 if (e->bitmap || prev->bitmap) 608 goto next; 609 if (prev->offset + prev->bytes == e->offset) { 610 unlink_free_space(ctl, prev); 611 unlink_free_space(ctl, e); 612 prev->bytes += e->bytes; 613 kmem_cache_free(btrfs_free_space_cachep, e); 614 link_free_space(ctl, prev); 615 prev = NULL; 616 spin_unlock(&ctl->tree_lock); 617 goto again; 618 } 619 next: 620 prev = e; 621 } 622 spin_unlock(&ctl->tree_lock); 623 } 624 625 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 626 struct btrfs_free_space_ctl *ctl, 627 struct btrfs_path *path, u64 offset) 628 { 629 struct btrfs_free_space_header *header; 630 struct extent_buffer *leaf; 631 struct io_ctl io_ctl; 632 struct btrfs_key key; 633 struct btrfs_free_space *e, *n; 634 struct list_head bitmaps; 635 u64 num_entries; 636 u64 num_bitmaps; 637 u64 generation; 638 u8 type; 639 int ret = 0; 640 641 INIT_LIST_HEAD(&bitmaps); 642 643 /* Nothing in the space cache, goodbye */ 644 if (!i_size_read(inode)) 645 return 0; 646 647 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 648 key.offset = offset; 649 key.type = 0; 650 651 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 652 if (ret < 0) 653 return 0; 654 else if (ret > 0) { 655 btrfs_release_path(path); 656 return 0; 657 } 658 659 ret = -1; 660 661 leaf = path->nodes[0]; 662 header = btrfs_item_ptr(leaf, path->slots[0], 663 struct btrfs_free_space_header); 664 num_entries = btrfs_free_space_entries(leaf, header); 665 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 666 generation = btrfs_free_space_generation(leaf, header); 667 btrfs_release_path(path); 668 669 if (BTRFS_I(inode)->generation != generation) { 670 btrfs_err(root->fs_info, 671 "free space inode generation (%llu) " 672 "did not match free space cache generation (%llu)", 673 BTRFS_I(inode)->generation, generation); 674 return 0; 675 } 676 677 if (!num_entries) 678 return 0; 679 680 ret = io_ctl_init(&io_ctl, inode, root); 681 if (ret) 682 return ret; 683 684 ret = readahead_cache(inode); 685 if (ret) 686 goto out; 687 688 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 689 if (ret) 690 goto out; 691 692 ret = io_ctl_check_crc(&io_ctl, 0); 693 if (ret) 694 goto free_cache; 695 696 ret = io_ctl_check_generation(&io_ctl, generation); 697 if (ret) 698 goto free_cache; 699 700 while (num_entries) { 701 e = kmem_cache_zalloc(btrfs_free_space_cachep, 702 GFP_NOFS); 703 if (!e) 704 goto free_cache; 705 706 ret = io_ctl_read_entry(&io_ctl, e, &type); 707 if (ret) { 708 kmem_cache_free(btrfs_free_space_cachep, e); 709 goto free_cache; 710 } 711 712 if (!e->bytes) { 713 kmem_cache_free(btrfs_free_space_cachep, e); 714 goto free_cache; 715 } 716 717 if (type == BTRFS_FREE_SPACE_EXTENT) { 718 spin_lock(&ctl->tree_lock); 719 ret = link_free_space(ctl, e); 720 spin_unlock(&ctl->tree_lock); 721 if (ret) { 722 btrfs_err(root->fs_info, 723 "Duplicate entries in free space cache, dumping"); 724 kmem_cache_free(btrfs_free_space_cachep, e); 725 goto free_cache; 726 } 727 } else { 728 ASSERT(num_bitmaps); 729 num_bitmaps--; 730 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 731 if (!e->bitmap) { 732 kmem_cache_free( 733 btrfs_free_space_cachep, e); 734 goto free_cache; 735 } 736 spin_lock(&ctl->tree_lock); 737 ret = link_free_space(ctl, e); 738 ctl->total_bitmaps++; 739 ctl->op->recalc_thresholds(ctl); 740 spin_unlock(&ctl->tree_lock); 741 if (ret) { 742 btrfs_err(root->fs_info, 743 "Duplicate entries in free space cache, dumping"); 744 kmem_cache_free(btrfs_free_space_cachep, e); 745 goto free_cache; 746 } 747 list_add_tail(&e->list, &bitmaps); 748 } 749 750 num_entries--; 751 } 752 753 io_ctl_unmap_page(&io_ctl); 754 755 /* 756 * We add the bitmaps at the end of the entries in order that 757 * the bitmap entries are added to the cache. 758 */ 759 list_for_each_entry_safe(e, n, &bitmaps, list) { 760 list_del_init(&e->list); 761 ret = io_ctl_read_bitmap(&io_ctl, e); 762 if (ret) 763 goto free_cache; 764 } 765 766 io_ctl_drop_pages(&io_ctl); 767 merge_space_tree(ctl); 768 ret = 1; 769 out: 770 io_ctl_free(&io_ctl); 771 return ret; 772 free_cache: 773 io_ctl_drop_pages(&io_ctl); 774 __btrfs_remove_free_space_cache(ctl); 775 goto out; 776 } 777 778 int load_free_space_cache(struct btrfs_fs_info *fs_info, 779 struct btrfs_block_group_cache *block_group) 780 { 781 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 782 struct btrfs_root *root = fs_info->tree_root; 783 struct inode *inode; 784 struct btrfs_path *path; 785 int ret = 0; 786 bool matched; 787 u64 used = btrfs_block_group_used(&block_group->item); 788 789 /* 790 * If this block group has been marked to be cleared for one reason or 791 * another then we can't trust the on disk cache, so just return. 792 */ 793 spin_lock(&block_group->lock); 794 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 795 spin_unlock(&block_group->lock); 796 return 0; 797 } 798 spin_unlock(&block_group->lock); 799 800 path = btrfs_alloc_path(); 801 if (!path) 802 return 0; 803 path->search_commit_root = 1; 804 path->skip_locking = 1; 805 806 inode = lookup_free_space_inode(root, block_group, path); 807 if (IS_ERR(inode)) { 808 btrfs_free_path(path); 809 return 0; 810 } 811 812 /* We may have converted the inode and made the cache invalid. */ 813 spin_lock(&block_group->lock); 814 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 815 spin_unlock(&block_group->lock); 816 btrfs_free_path(path); 817 goto out; 818 } 819 spin_unlock(&block_group->lock); 820 821 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 822 path, block_group->key.objectid); 823 btrfs_free_path(path); 824 if (ret <= 0) 825 goto out; 826 827 spin_lock(&ctl->tree_lock); 828 matched = (ctl->free_space == (block_group->key.offset - used - 829 block_group->bytes_super)); 830 spin_unlock(&ctl->tree_lock); 831 832 if (!matched) { 833 __btrfs_remove_free_space_cache(ctl); 834 btrfs_warn(fs_info, "block group %llu has wrong amount of free space", 835 block_group->key.objectid); 836 ret = -1; 837 } 838 out: 839 if (ret < 0) { 840 /* This cache is bogus, make sure it gets cleared */ 841 spin_lock(&block_group->lock); 842 block_group->disk_cache_state = BTRFS_DC_CLEAR; 843 spin_unlock(&block_group->lock); 844 ret = 0; 845 846 btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", 847 block_group->key.objectid); 848 } 849 850 iput(inode); 851 return ret; 852 } 853 854 static noinline_for_stack 855 int write_cache_extent_entries(struct io_ctl *io_ctl, 856 struct btrfs_free_space_ctl *ctl, 857 struct btrfs_block_group_cache *block_group, 858 int *entries, int *bitmaps, 859 struct list_head *bitmap_list) 860 { 861 int ret; 862 struct btrfs_free_cluster *cluster = NULL; 863 struct rb_node *node = rb_first(&ctl->free_space_offset); 864 865 /* Get the cluster for this block_group if it exists */ 866 if (block_group && !list_empty(&block_group->cluster_list)) { 867 cluster = list_entry(block_group->cluster_list.next, 868 struct btrfs_free_cluster, 869 block_group_list); 870 } 871 872 if (!node && cluster) { 873 node = rb_first(&cluster->root); 874 cluster = NULL; 875 } 876 877 /* Write out the extent entries */ 878 while (node) { 879 struct btrfs_free_space *e; 880 881 e = rb_entry(node, struct btrfs_free_space, offset_index); 882 *entries += 1; 883 884 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 885 e->bitmap); 886 if (ret) 887 goto fail; 888 889 if (e->bitmap) { 890 list_add_tail(&e->list, bitmap_list); 891 *bitmaps += 1; 892 } 893 node = rb_next(node); 894 if (!node && cluster) { 895 node = rb_first(&cluster->root); 896 cluster = NULL; 897 } 898 } 899 return 0; 900 fail: 901 return -ENOSPC; 902 } 903 904 static noinline_for_stack int 905 update_cache_item(struct btrfs_trans_handle *trans, 906 struct btrfs_root *root, 907 struct inode *inode, 908 struct btrfs_path *path, u64 offset, 909 int entries, int bitmaps) 910 { 911 struct btrfs_key key; 912 struct btrfs_free_space_header *header; 913 struct extent_buffer *leaf; 914 int ret; 915 916 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 917 key.offset = offset; 918 key.type = 0; 919 920 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 921 if (ret < 0) { 922 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 923 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 924 GFP_NOFS); 925 goto fail; 926 } 927 leaf = path->nodes[0]; 928 if (ret > 0) { 929 struct btrfs_key found_key; 930 ASSERT(path->slots[0]); 931 path->slots[0]--; 932 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 933 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 934 found_key.offset != offset) { 935 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 936 inode->i_size - 1, 937 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 938 NULL, GFP_NOFS); 939 btrfs_release_path(path); 940 goto fail; 941 } 942 } 943 944 BTRFS_I(inode)->generation = trans->transid; 945 header = btrfs_item_ptr(leaf, path->slots[0], 946 struct btrfs_free_space_header); 947 btrfs_set_free_space_entries(leaf, header, entries); 948 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 949 btrfs_set_free_space_generation(leaf, header, trans->transid); 950 btrfs_mark_buffer_dirty(leaf); 951 btrfs_release_path(path); 952 953 return 0; 954 955 fail: 956 return -1; 957 } 958 959 static noinline_for_stack int 960 add_ioctl_entries(struct btrfs_root *root, 961 struct inode *inode, 962 struct btrfs_block_group_cache *block_group, 963 struct io_ctl *io_ctl, 964 struct extent_state **cached_state, 965 struct list_head *bitmap_list, 966 int *entries) 967 { 968 u64 start, extent_start, extent_end, len; 969 struct list_head *pos, *n; 970 struct extent_io_tree *unpin = NULL; 971 int ret; 972 973 /* 974 * We want to add any pinned extents to our free space cache 975 * so we don't leak the space 976 * 977 * We shouldn't have switched the pinned extents yet so this is the 978 * right one 979 */ 980 unpin = root->fs_info->pinned_extents; 981 982 if (block_group) 983 start = block_group->key.objectid; 984 985 while (block_group && (start < block_group->key.objectid + 986 block_group->key.offset)) { 987 ret = find_first_extent_bit(unpin, start, 988 &extent_start, &extent_end, 989 EXTENT_DIRTY, NULL); 990 if (ret) { 991 ret = 0; 992 break; 993 } 994 995 /* This pinned extent is out of our range */ 996 if (extent_start >= block_group->key.objectid + 997 block_group->key.offset) 998 break; 999 1000 extent_start = max(extent_start, start); 1001 extent_end = min(block_group->key.objectid + 1002 block_group->key.offset, extent_end + 1); 1003 len = extent_end - extent_start; 1004 1005 *entries += 1; 1006 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1007 if (ret) 1008 goto out_nospc; 1009 1010 start = extent_end; 1011 } 1012 1013 /* Write out the bitmaps */ 1014 list_for_each_safe(pos, n, bitmap_list) { 1015 struct btrfs_free_space *entry = 1016 list_entry(pos, struct btrfs_free_space, list); 1017 1018 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1019 if (ret) 1020 goto out_nospc; 1021 list_del_init(&entry->list); 1022 } 1023 1024 /* Zero out the rest of the pages just to make sure */ 1025 io_ctl_zero_remaining_pages(io_ctl); 1026 1027 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, 1028 0, i_size_read(inode), cached_state); 1029 io_ctl_drop_pages(io_ctl); 1030 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1031 i_size_read(inode) - 1, cached_state, GFP_NOFS); 1032 1033 if (ret) 1034 goto fail; 1035 1036 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1037 if (ret) { 1038 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1039 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1040 GFP_NOFS); 1041 goto fail; 1042 } 1043 return 0; 1044 1045 fail: 1046 return -1; 1047 1048 out_nospc: 1049 return -ENOSPC; 1050 } 1051 1052 static void noinline_for_stack 1053 cleanup_write_cache_enospc(struct inode *inode, 1054 struct io_ctl *io_ctl, 1055 struct extent_state **cached_state, 1056 struct list_head *bitmap_list) 1057 { 1058 struct list_head *pos, *n; 1059 list_for_each_safe(pos, n, bitmap_list) { 1060 struct btrfs_free_space *entry = 1061 list_entry(pos, struct btrfs_free_space, list); 1062 list_del_init(&entry->list); 1063 } 1064 io_ctl_drop_pages(io_ctl); 1065 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1066 i_size_read(inode) - 1, cached_state, 1067 GFP_NOFS); 1068 } 1069 1070 /** 1071 * __btrfs_write_out_cache - write out cached info to an inode 1072 * @root - the root the inode belongs to 1073 * @ctl - the free space cache we are going to write out 1074 * @block_group - the block_group for this cache if it belongs to a block_group 1075 * @trans - the trans handle 1076 * @path - the path to use 1077 * @offset - the offset for the key we'll insert 1078 * 1079 * This function writes out a free space cache struct to disk for quick recovery 1080 * on mount. This will return 0 if it was successfull in writing the cache out, 1081 * and -1 if it was not. 1082 */ 1083 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1084 struct btrfs_free_space_ctl *ctl, 1085 struct btrfs_block_group_cache *block_group, 1086 struct btrfs_trans_handle *trans, 1087 struct btrfs_path *path, u64 offset) 1088 { 1089 struct extent_state *cached_state = NULL; 1090 struct io_ctl io_ctl; 1091 struct list_head bitmap_list; 1092 int entries = 0; 1093 int bitmaps = 0; 1094 int ret; 1095 int err = -1; 1096 1097 INIT_LIST_HEAD(&bitmap_list); 1098 1099 if (!i_size_read(inode)) 1100 return -1; 1101 1102 ret = io_ctl_init(&io_ctl, inode, root); 1103 if (ret) 1104 return -1; 1105 1106 /* Lock all pages first so we can lock the extent safely. */ 1107 io_ctl_prepare_pages(&io_ctl, inode, 0); 1108 1109 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1110 0, &cached_state); 1111 1112 1113 /* Make sure we can fit our crcs into the first page */ 1114 if (io_ctl.check_crcs && 1115 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 1116 goto out_nospc; 1117 1118 io_ctl_set_generation(&io_ctl, trans->transid); 1119 1120 ret = write_cache_extent_entries(&io_ctl, ctl, 1121 block_group, &entries, &bitmaps, 1122 &bitmap_list); 1123 if (ret) 1124 goto out_nospc; 1125 1126 ret = add_ioctl_entries(root, inode, block_group, &io_ctl, 1127 &cached_state, &bitmap_list, &entries); 1128 1129 if (ret == -ENOSPC) 1130 goto out_nospc; 1131 else if (ret) 1132 goto out; 1133 1134 err = update_cache_item(trans, root, inode, path, offset, 1135 entries, bitmaps); 1136 1137 out: 1138 io_ctl_free(&io_ctl); 1139 if (err) { 1140 invalidate_inode_pages2(inode->i_mapping); 1141 BTRFS_I(inode)->generation = 0; 1142 } 1143 btrfs_update_inode(trans, root, inode); 1144 return err; 1145 1146 out_nospc: 1147 1148 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1149 goto out; 1150 } 1151 1152 int btrfs_write_out_cache(struct btrfs_root *root, 1153 struct btrfs_trans_handle *trans, 1154 struct btrfs_block_group_cache *block_group, 1155 struct btrfs_path *path) 1156 { 1157 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1158 struct inode *inode; 1159 int ret = 0; 1160 1161 root = root->fs_info->tree_root; 1162 1163 spin_lock(&block_group->lock); 1164 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1165 spin_unlock(&block_group->lock); 1166 return 0; 1167 } 1168 spin_unlock(&block_group->lock); 1169 1170 inode = lookup_free_space_inode(root, block_group, path); 1171 if (IS_ERR(inode)) 1172 return 0; 1173 1174 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1175 path, block_group->key.objectid); 1176 if (ret) { 1177 spin_lock(&block_group->lock); 1178 block_group->disk_cache_state = BTRFS_DC_ERROR; 1179 spin_unlock(&block_group->lock); 1180 ret = 0; 1181 #ifdef DEBUG 1182 btrfs_err(root->fs_info, 1183 "failed to write free space cache for block group %llu", 1184 block_group->key.objectid); 1185 #endif 1186 } 1187 1188 iput(inode); 1189 return ret; 1190 } 1191 1192 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1193 u64 offset) 1194 { 1195 ASSERT(offset >= bitmap_start); 1196 offset -= bitmap_start; 1197 return (unsigned long)(div_u64(offset, unit)); 1198 } 1199 1200 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1201 { 1202 return (unsigned long)(div_u64(bytes, unit)); 1203 } 1204 1205 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1206 u64 offset) 1207 { 1208 u64 bitmap_start; 1209 u64 bytes_per_bitmap; 1210 1211 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1212 bitmap_start = offset - ctl->start; 1213 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1214 bitmap_start *= bytes_per_bitmap; 1215 bitmap_start += ctl->start; 1216 1217 return bitmap_start; 1218 } 1219 1220 static int tree_insert_offset(struct rb_root *root, u64 offset, 1221 struct rb_node *node, int bitmap) 1222 { 1223 struct rb_node **p = &root->rb_node; 1224 struct rb_node *parent = NULL; 1225 struct btrfs_free_space *info; 1226 1227 while (*p) { 1228 parent = *p; 1229 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1230 1231 if (offset < info->offset) { 1232 p = &(*p)->rb_left; 1233 } else if (offset > info->offset) { 1234 p = &(*p)->rb_right; 1235 } else { 1236 /* 1237 * we could have a bitmap entry and an extent entry 1238 * share the same offset. If this is the case, we want 1239 * the extent entry to always be found first if we do a 1240 * linear search through the tree, since we want to have 1241 * the quickest allocation time, and allocating from an 1242 * extent is faster than allocating from a bitmap. So 1243 * if we're inserting a bitmap and we find an entry at 1244 * this offset, we want to go right, or after this entry 1245 * logically. If we are inserting an extent and we've 1246 * found a bitmap, we want to go left, or before 1247 * logically. 1248 */ 1249 if (bitmap) { 1250 if (info->bitmap) { 1251 WARN_ON_ONCE(1); 1252 return -EEXIST; 1253 } 1254 p = &(*p)->rb_right; 1255 } else { 1256 if (!info->bitmap) { 1257 WARN_ON_ONCE(1); 1258 return -EEXIST; 1259 } 1260 p = &(*p)->rb_left; 1261 } 1262 } 1263 } 1264 1265 rb_link_node(node, parent, p); 1266 rb_insert_color(node, root); 1267 1268 return 0; 1269 } 1270 1271 /* 1272 * searches the tree for the given offset. 1273 * 1274 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1275 * want a section that has at least bytes size and comes at or after the given 1276 * offset. 1277 */ 1278 static struct btrfs_free_space * 1279 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1280 u64 offset, int bitmap_only, int fuzzy) 1281 { 1282 struct rb_node *n = ctl->free_space_offset.rb_node; 1283 struct btrfs_free_space *entry, *prev = NULL; 1284 1285 /* find entry that is closest to the 'offset' */ 1286 while (1) { 1287 if (!n) { 1288 entry = NULL; 1289 break; 1290 } 1291 1292 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1293 prev = entry; 1294 1295 if (offset < entry->offset) 1296 n = n->rb_left; 1297 else if (offset > entry->offset) 1298 n = n->rb_right; 1299 else 1300 break; 1301 } 1302 1303 if (bitmap_only) { 1304 if (!entry) 1305 return NULL; 1306 if (entry->bitmap) 1307 return entry; 1308 1309 /* 1310 * bitmap entry and extent entry may share same offset, 1311 * in that case, bitmap entry comes after extent entry. 1312 */ 1313 n = rb_next(n); 1314 if (!n) 1315 return NULL; 1316 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1317 if (entry->offset != offset) 1318 return NULL; 1319 1320 WARN_ON(!entry->bitmap); 1321 return entry; 1322 } else if (entry) { 1323 if (entry->bitmap) { 1324 /* 1325 * if previous extent entry covers the offset, 1326 * we should return it instead of the bitmap entry 1327 */ 1328 n = rb_prev(&entry->offset_index); 1329 if (n) { 1330 prev = rb_entry(n, struct btrfs_free_space, 1331 offset_index); 1332 if (!prev->bitmap && 1333 prev->offset + prev->bytes > offset) 1334 entry = prev; 1335 } 1336 } 1337 return entry; 1338 } 1339 1340 if (!prev) 1341 return NULL; 1342 1343 /* find last entry before the 'offset' */ 1344 entry = prev; 1345 if (entry->offset > offset) { 1346 n = rb_prev(&entry->offset_index); 1347 if (n) { 1348 entry = rb_entry(n, struct btrfs_free_space, 1349 offset_index); 1350 ASSERT(entry->offset <= offset); 1351 } else { 1352 if (fuzzy) 1353 return entry; 1354 else 1355 return NULL; 1356 } 1357 } 1358 1359 if (entry->bitmap) { 1360 n = rb_prev(&entry->offset_index); 1361 if (n) { 1362 prev = rb_entry(n, struct btrfs_free_space, 1363 offset_index); 1364 if (!prev->bitmap && 1365 prev->offset + prev->bytes > offset) 1366 return prev; 1367 } 1368 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1369 return entry; 1370 } else if (entry->offset + entry->bytes > offset) 1371 return entry; 1372 1373 if (!fuzzy) 1374 return NULL; 1375 1376 while (1) { 1377 if (entry->bitmap) { 1378 if (entry->offset + BITS_PER_BITMAP * 1379 ctl->unit > offset) 1380 break; 1381 } else { 1382 if (entry->offset + entry->bytes > offset) 1383 break; 1384 } 1385 1386 n = rb_next(&entry->offset_index); 1387 if (!n) 1388 return NULL; 1389 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1390 } 1391 return entry; 1392 } 1393 1394 static inline void 1395 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1396 struct btrfs_free_space *info) 1397 { 1398 rb_erase(&info->offset_index, &ctl->free_space_offset); 1399 ctl->free_extents--; 1400 } 1401 1402 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1403 struct btrfs_free_space *info) 1404 { 1405 __unlink_free_space(ctl, info); 1406 ctl->free_space -= info->bytes; 1407 } 1408 1409 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1410 struct btrfs_free_space *info) 1411 { 1412 int ret = 0; 1413 1414 ASSERT(info->bytes || info->bitmap); 1415 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1416 &info->offset_index, (info->bitmap != NULL)); 1417 if (ret) 1418 return ret; 1419 1420 ctl->free_space += info->bytes; 1421 ctl->free_extents++; 1422 return ret; 1423 } 1424 1425 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1426 { 1427 struct btrfs_block_group_cache *block_group = ctl->private; 1428 u64 max_bytes; 1429 u64 bitmap_bytes; 1430 u64 extent_bytes; 1431 u64 size = block_group->key.offset; 1432 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1433 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1434 1435 max_bitmaps = max(max_bitmaps, 1); 1436 1437 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1438 1439 /* 1440 * The goal is to keep the total amount of memory used per 1gb of space 1441 * at or below 32k, so we need to adjust how much memory we allow to be 1442 * used by extent based free space tracking 1443 */ 1444 if (size < 1024 * 1024 * 1024) 1445 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1446 else 1447 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1448 div64_u64(size, 1024 * 1024 * 1024); 1449 1450 /* 1451 * we want to account for 1 more bitmap than what we have so we can make 1452 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1453 * we add more bitmaps. 1454 */ 1455 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1456 1457 if (bitmap_bytes >= max_bytes) { 1458 ctl->extents_thresh = 0; 1459 return; 1460 } 1461 1462 /* 1463 * we want the extent entry threshold to always be at most 1/2 the maxw 1464 * bytes we can have, or whatever is less than that. 1465 */ 1466 extent_bytes = max_bytes - bitmap_bytes; 1467 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1468 1469 ctl->extents_thresh = 1470 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1471 } 1472 1473 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1474 struct btrfs_free_space *info, 1475 u64 offset, u64 bytes) 1476 { 1477 unsigned long start, count; 1478 1479 start = offset_to_bit(info->offset, ctl->unit, offset); 1480 count = bytes_to_bits(bytes, ctl->unit); 1481 ASSERT(start + count <= BITS_PER_BITMAP); 1482 1483 bitmap_clear(info->bitmap, start, count); 1484 1485 info->bytes -= bytes; 1486 } 1487 1488 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1489 struct btrfs_free_space *info, u64 offset, 1490 u64 bytes) 1491 { 1492 __bitmap_clear_bits(ctl, info, offset, bytes); 1493 ctl->free_space -= bytes; 1494 } 1495 1496 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1497 struct btrfs_free_space *info, u64 offset, 1498 u64 bytes) 1499 { 1500 unsigned long start, count; 1501 1502 start = offset_to_bit(info->offset, ctl->unit, offset); 1503 count = bytes_to_bits(bytes, ctl->unit); 1504 ASSERT(start + count <= BITS_PER_BITMAP); 1505 1506 bitmap_set(info->bitmap, start, count); 1507 1508 info->bytes += bytes; 1509 ctl->free_space += bytes; 1510 } 1511 1512 /* 1513 * If we can not find suitable extent, we will use bytes to record 1514 * the size of the max extent. 1515 */ 1516 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1517 struct btrfs_free_space *bitmap_info, u64 *offset, 1518 u64 *bytes) 1519 { 1520 unsigned long found_bits = 0; 1521 unsigned long max_bits = 0; 1522 unsigned long bits, i; 1523 unsigned long next_zero; 1524 unsigned long extent_bits; 1525 1526 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1527 max_t(u64, *offset, bitmap_info->offset)); 1528 bits = bytes_to_bits(*bytes, ctl->unit); 1529 1530 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1531 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1532 BITS_PER_BITMAP, i); 1533 extent_bits = next_zero - i; 1534 if (extent_bits >= bits) { 1535 found_bits = extent_bits; 1536 break; 1537 } else if (extent_bits > max_bits) { 1538 max_bits = extent_bits; 1539 } 1540 i = next_zero; 1541 } 1542 1543 if (found_bits) { 1544 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1545 *bytes = (u64)(found_bits) * ctl->unit; 1546 return 0; 1547 } 1548 1549 *bytes = (u64)(max_bits) * ctl->unit; 1550 return -1; 1551 } 1552 1553 /* Cache the size of the max extent in bytes */ 1554 static struct btrfs_free_space * 1555 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1556 unsigned long align, u64 *max_extent_size) 1557 { 1558 struct btrfs_free_space *entry; 1559 struct rb_node *node; 1560 u64 tmp; 1561 u64 align_off; 1562 int ret; 1563 1564 if (!ctl->free_space_offset.rb_node) 1565 goto out; 1566 1567 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1568 if (!entry) 1569 goto out; 1570 1571 for (node = &entry->offset_index; node; node = rb_next(node)) { 1572 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1573 if (entry->bytes < *bytes) { 1574 if (entry->bytes > *max_extent_size) 1575 *max_extent_size = entry->bytes; 1576 continue; 1577 } 1578 1579 /* make sure the space returned is big enough 1580 * to match our requested alignment 1581 */ 1582 if (*bytes >= align) { 1583 tmp = entry->offset - ctl->start + align - 1; 1584 do_div(tmp, align); 1585 tmp = tmp * align + ctl->start; 1586 align_off = tmp - entry->offset; 1587 } else { 1588 align_off = 0; 1589 tmp = entry->offset; 1590 } 1591 1592 if (entry->bytes < *bytes + align_off) { 1593 if (entry->bytes > *max_extent_size) 1594 *max_extent_size = entry->bytes; 1595 continue; 1596 } 1597 1598 if (entry->bitmap) { 1599 u64 size = *bytes; 1600 1601 ret = search_bitmap(ctl, entry, &tmp, &size); 1602 if (!ret) { 1603 *offset = tmp; 1604 *bytes = size; 1605 return entry; 1606 } else if (size > *max_extent_size) { 1607 *max_extent_size = size; 1608 } 1609 continue; 1610 } 1611 1612 *offset = tmp; 1613 *bytes = entry->bytes - align_off; 1614 return entry; 1615 } 1616 out: 1617 return NULL; 1618 } 1619 1620 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1621 struct btrfs_free_space *info, u64 offset) 1622 { 1623 info->offset = offset_to_bitmap(ctl, offset); 1624 info->bytes = 0; 1625 INIT_LIST_HEAD(&info->list); 1626 link_free_space(ctl, info); 1627 ctl->total_bitmaps++; 1628 1629 ctl->op->recalc_thresholds(ctl); 1630 } 1631 1632 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1633 struct btrfs_free_space *bitmap_info) 1634 { 1635 unlink_free_space(ctl, bitmap_info); 1636 kfree(bitmap_info->bitmap); 1637 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1638 ctl->total_bitmaps--; 1639 ctl->op->recalc_thresholds(ctl); 1640 } 1641 1642 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1643 struct btrfs_free_space *bitmap_info, 1644 u64 *offset, u64 *bytes) 1645 { 1646 u64 end; 1647 u64 search_start, search_bytes; 1648 int ret; 1649 1650 again: 1651 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1652 1653 /* 1654 * We need to search for bits in this bitmap. We could only cover some 1655 * of the extent in this bitmap thanks to how we add space, so we need 1656 * to search for as much as it as we can and clear that amount, and then 1657 * go searching for the next bit. 1658 */ 1659 search_start = *offset; 1660 search_bytes = ctl->unit; 1661 search_bytes = min(search_bytes, end - search_start + 1); 1662 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1663 if (ret < 0 || search_start != *offset) 1664 return -EINVAL; 1665 1666 /* We may have found more bits than what we need */ 1667 search_bytes = min(search_bytes, *bytes); 1668 1669 /* Cannot clear past the end of the bitmap */ 1670 search_bytes = min(search_bytes, end - search_start + 1); 1671 1672 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1673 *offset += search_bytes; 1674 *bytes -= search_bytes; 1675 1676 if (*bytes) { 1677 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1678 if (!bitmap_info->bytes) 1679 free_bitmap(ctl, bitmap_info); 1680 1681 /* 1682 * no entry after this bitmap, but we still have bytes to 1683 * remove, so something has gone wrong. 1684 */ 1685 if (!next) 1686 return -EINVAL; 1687 1688 bitmap_info = rb_entry(next, struct btrfs_free_space, 1689 offset_index); 1690 1691 /* 1692 * if the next entry isn't a bitmap we need to return to let the 1693 * extent stuff do its work. 1694 */ 1695 if (!bitmap_info->bitmap) 1696 return -EAGAIN; 1697 1698 /* 1699 * Ok the next item is a bitmap, but it may not actually hold 1700 * the information for the rest of this free space stuff, so 1701 * look for it, and if we don't find it return so we can try 1702 * everything over again. 1703 */ 1704 search_start = *offset; 1705 search_bytes = ctl->unit; 1706 ret = search_bitmap(ctl, bitmap_info, &search_start, 1707 &search_bytes); 1708 if (ret < 0 || search_start != *offset) 1709 return -EAGAIN; 1710 1711 goto again; 1712 } else if (!bitmap_info->bytes) 1713 free_bitmap(ctl, bitmap_info); 1714 1715 return 0; 1716 } 1717 1718 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1719 struct btrfs_free_space *info, u64 offset, 1720 u64 bytes) 1721 { 1722 u64 bytes_to_set = 0; 1723 u64 end; 1724 1725 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1726 1727 bytes_to_set = min(end - offset, bytes); 1728 1729 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1730 1731 return bytes_to_set; 1732 1733 } 1734 1735 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1736 struct btrfs_free_space *info) 1737 { 1738 struct btrfs_block_group_cache *block_group = ctl->private; 1739 1740 /* 1741 * If we are below the extents threshold then we can add this as an 1742 * extent, and don't have to deal with the bitmap 1743 */ 1744 if (ctl->free_extents < ctl->extents_thresh) { 1745 /* 1746 * If this block group has some small extents we don't want to 1747 * use up all of our free slots in the cache with them, we want 1748 * to reserve them to larger extents, however if we have plent 1749 * of cache left then go ahead an dadd them, no sense in adding 1750 * the overhead of a bitmap if we don't have to. 1751 */ 1752 if (info->bytes <= block_group->sectorsize * 4) { 1753 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1754 return false; 1755 } else { 1756 return false; 1757 } 1758 } 1759 1760 /* 1761 * The original block groups from mkfs can be really small, like 8 1762 * megabytes, so don't bother with a bitmap for those entries. However 1763 * some block groups can be smaller than what a bitmap would cover but 1764 * are still large enough that they could overflow the 32k memory limit, 1765 * so allow those block groups to still be allowed to have a bitmap 1766 * entry. 1767 */ 1768 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 1769 return false; 1770 1771 return true; 1772 } 1773 1774 static struct btrfs_free_space_op free_space_op = { 1775 .recalc_thresholds = recalculate_thresholds, 1776 .use_bitmap = use_bitmap, 1777 }; 1778 1779 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1780 struct btrfs_free_space *info) 1781 { 1782 struct btrfs_free_space *bitmap_info; 1783 struct btrfs_block_group_cache *block_group = NULL; 1784 int added = 0; 1785 u64 bytes, offset, bytes_added; 1786 int ret; 1787 1788 bytes = info->bytes; 1789 offset = info->offset; 1790 1791 if (!ctl->op->use_bitmap(ctl, info)) 1792 return 0; 1793 1794 if (ctl->op == &free_space_op) 1795 block_group = ctl->private; 1796 again: 1797 /* 1798 * Since we link bitmaps right into the cluster we need to see if we 1799 * have a cluster here, and if so and it has our bitmap we need to add 1800 * the free space to that bitmap. 1801 */ 1802 if (block_group && !list_empty(&block_group->cluster_list)) { 1803 struct btrfs_free_cluster *cluster; 1804 struct rb_node *node; 1805 struct btrfs_free_space *entry; 1806 1807 cluster = list_entry(block_group->cluster_list.next, 1808 struct btrfs_free_cluster, 1809 block_group_list); 1810 spin_lock(&cluster->lock); 1811 node = rb_first(&cluster->root); 1812 if (!node) { 1813 spin_unlock(&cluster->lock); 1814 goto no_cluster_bitmap; 1815 } 1816 1817 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1818 if (!entry->bitmap) { 1819 spin_unlock(&cluster->lock); 1820 goto no_cluster_bitmap; 1821 } 1822 1823 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1824 bytes_added = add_bytes_to_bitmap(ctl, entry, 1825 offset, bytes); 1826 bytes -= bytes_added; 1827 offset += bytes_added; 1828 } 1829 spin_unlock(&cluster->lock); 1830 if (!bytes) { 1831 ret = 1; 1832 goto out; 1833 } 1834 } 1835 1836 no_cluster_bitmap: 1837 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1838 1, 0); 1839 if (!bitmap_info) { 1840 ASSERT(added == 0); 1841 goto new_bitmap; 1842 } 1843 1844 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1845 bytes -= bytes_added; 1846 offset += bytes_added; 1847 added = 0; 1848 1849 if (!bytes) { 1850 ret = 1; 1851 goto out; 1852 } else 1853 goto again; 1854 1855 new_bitmap: 1856 if (info && info->bitmap) { 1857 add_new_bitmap(ctl, info, offset); 1858 added = 1; 1859 info = NULL; 1860 goto again; 1861 } else { 1862 spin_unlock(&ctl->tree_lock); 1863 1864 /* no pre-allocated info, allocate a new one */ 1865 if (!info) { 1866 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1867 GFP_NOFS); 1868 if (!info) { 1869 spin_lock(&ctl->tree_lock); 1870 ret = -ENOMEM; 1871 goto out; 1872 } 1873 } 1874 1875 /* allocate the bitmap */ 1876 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1877 spin_lock(&ctl->tree_lock); 1878 if (!info->bitmap) { 1879 ret = -ENOMEM; 1880 goto out; 1881 } 1882 goto again; 1883 } 1884 1885 out: 1886 if (info) { 1887 if (info->bitmap) 1888 kfree(info->bitmap); 1889 kmem_cache_free(btrfs_free_space_cachep, info); 1890 } 1891 1892 return ret; 1893 } 1894 1895 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1896 struct btrfs_free_space *info, bool update_stat) 1897 { 1898 struct btrfs_free_space *left_info; 1899 struct btrfs_free_space *right_info; 1900 bool merged = false; 1901 u64 offset = info->offset; 1902 u64 bytes = info->bytes; 1903 1904 /* 1905 * first we want to see if there is free space adjacent to the range we 1906 * are adding, if there is remove that struct and add a new one to 1907 * cover the entire range 1908 */ 1909 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1910 if (right_info && rb_prev(&right_info->offset_index)) 1911 left_info = rb_entry(rb_prev(&right_info->offset_index), 1912 struct btrfs_free_space, offset_index); 1913 else 1914 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1915 1916 if (right_info && !right_info->bitmap) { 1917 if (update_stat) 1918 unlink_free_space(ctl, right_info); 1919 else 1920 __unlink_free_space(ctl, right_info); 1921 info->bytes += right_info->bytes; 1922 kmem_cache_free(btrfs_free_space_cachep, right_info); 1923 merged = true; 1924 } 1925 1926 if (left_info && !left_info->bitmap && 1927 left_info->offset + left_info->bytes == offset) { 1928 if (update_stat) 1929 unlink_free_space(ctl, left_info); 1930 else 1931 __unlink_free_space(ctl, left_info); 1932 info->offset = left_info->offset; 1933 info->bytes += left_info->bytes; 1934 kmem_cache_free(btrfs_free_space_cachep, left_info); 1935 merged = true; 1936 } 1937 1938 return merged; 1939 } 1940 1941 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1942 u64 offset, u64 bytes) 1943 { 1944 struct btrfs_free_space *info; 1945 int ret = 0; 1946 1947 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1948 if (!info) 1949 return -ENOMEM; 1950 1951 info->offset = offset; 1952 info->bytes = bytes; 1953 1954 spin_lock(&ctl->tree_lock); 1955 1956 if (try_merge_free_space(ctl, info, true)) 1957 goto link; 1958 1959 /* 1960 * There was no extent directly to the left or right of this new 1961 * extent then we know we're going to have to allocate a new extent, so 1962 * before we do that see if we need to drop this into a bitmap 1963 */ 1964 ret = insert_into_bitmap(ctl, info); 1965 if (ret < 0) { 1966 goto out; 1967 } else if (ret) { 1968 ret = 0; 1969 goto out; 1970 } 1971 link: 1972 ret = link_free_space(ctl, info); 1973 if (ret) 1974 kmem_cache_free(btrfs_free_space_cachep, info); 1975 out: 1976 spin_unlock(&ctl->tree_lock); 1977 1978 if (ret) { 1979 printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret); 1980 ASSERT(ret != -EEXIST); 1981 } 1982 1983 return ret; 1984 } 1985 1986 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1987 u64 offset, u64 bytes) 1988 { 1989 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1990 struct btrfs_free_space *info; 1991 int ret; 1992 bool re_search = false; 1993 1994 spin_lock(&ctl->tree_lock); 1995 1996 again: 1997 ret = 0; 1998 if (!bytes) 1999 goto out_lock; 2000 2001 info = tree_search_offset(ctl, offset, 0, 0); 2002 if (!info) { 2003 /* 2004 * oops didn't find an extent that matched the space we wanted 2005 * to remove, look for a bitmap instead 2006 */ 2007 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2008 1, 0); 2009 if (!info) { 2010 /* 2011 * If we found a partial bit of our free space in a 2012 * bitmap but then couldn't find the other part this may 2013 * be a problem, so WARN about it. 2014 */ 2015 WARN_ON(re_search); 2016 goto out_lock; 2017 } 2018 } 2019 2020 re_search = false; 2021 if (!info->bitmap) { 2022 unlink_free_space(ctl, info); 2023 if (offset == info->offset) { 2024 u64 to_free = min(bytes, info->bytes); 2025 2026 info->bytes -= to_free; 2027 info->offset += to_free; 2028 if (info->bytes) { 2029 ret = link_free_space(ctl, info); 2030 WARN_ON(ret); 2031 } else { 2032 kmem_cache_free(btrfs_free_space_cachep, info); 2033 } 2034 2035 offset += to_free; 2036 bytes -= to_free; 2037 goto again; 2038 } else { 2039 u64 old_end = info->bytes + info->offset; 2040 2041 info->bytes = offset - info->offset; 2042 ret = link_free_space(ctl, info); 2043 WARN_ON(ret); 2044 if (ret) 2045 goto out_lock; 2046 2047 /* Not enough bytes in this entry to satisfy us */ 2048 if (old_end < offset + bytes) { 2049 bytes -= old_end - offset; 2050 offset = old_end; 2051 goto again; 2052 } else if (old_end == offset + bytes) { 2053 /* all done */ 2054 goto out_lock; 2055 } 2056 spin_unlock(&ctl->tree_lock); 2057 2058 ret = btrfs_add_free_space(block_group, offset + bytes, 2059 old_end - (offset + bytes)); 2060 WARN_ON(ret); 2061 goto out; 2062 } 2063 } 2064 2065 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2066 if (ret == -EAGAIN) { 2067 re_search = true; 2068 goto again; 2069 } 2070 out_lock: 2071 spin_unlock(&ctl->tree_lock); 2072 out: 2073 return ret; 2074 } 2075 2076 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2077 u64 bytes) 2078 { 2079 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2080 struct btrfs_free_space *info; 2081 struct rb_node *n; 2082 int count = 0; 2083 2084 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2085 info = rb_entry(n, struct btrfs_free_space, offset_index); 2086 if (info->bytes >= bytes && !block_group->ro) 2087 count++; 2088 btrfs_crit(block_group->fs_info, 2089 "entry offset %llu, bytes %llu, bitmap %s", 2090 info->offset, info->bytes, 2091 (info->bitmap) ? "yes" : "no"); 2092 } 2093 btrfs_info(block_group->fs_info, "block group has cluster?: %s", 2094 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2095 btrfs_info(block_group->fs_info, 2096 "%d blocks of free space at or bigger than bytes is", count); 2097 } 2098 2099 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2100 { 2101 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2102 2103 spin_lock_init(&ctl->tree_lock); 2104 ctl->unit = block_group->sectorsize; 2105 ctl->start = block_group->key.objectid; 2106 ctl->private = block_group; 2107 ctl->op = &free_space_op; 2108 2109 /* 2110 * we only want to have 32k of ram per block group for keeping 2111 * track of free space, and if we pass 1/2 of that we want to 2112 * start converting things over to using bitmaps 2113 */ 2114 ctl->extents_thresh = ((1024 * 32) / 2) / 2115 sizeof(struct btrfs_free_space); 2116 } 2117 2118 /* 2119 * for a given cluster, put all of its extents back into the free 2120 * space cache. If the block group passed doesn't match the block group 2121 * pointed to by the cluster, someone else raced in and freed the 2122 * cluster already. In that case, we just return without changing anything 2123 */ 2124 static int 2125 __btrfs_return_cluster_to_free_space( 2126 struct btrfs_block_group_cache *block_group, 2127 struct btrfs_free_cluster *cluster) 2128 { 2129 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2130 struct btrfs_free_space *entry; 2131 struct rb_node *node; 2132 2133 spin_lock(&cluster->lock); 2134 if (cluster->block_group != block_group) 2135 goto out; 2136 2137 cluster->block_group = NULL; 2138 cluster->window_start = 0; 2139 list_del_init(&cluster->block_group_list); 2140 2141 node = rb_first(&cluster->root); 2142 while (node) { 2143 bool bitmap; 2144 2145 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2146 node = rb_next(&entry->offset_index); 2147 rb_erase(&entry->offset_index, &cluster->root); 2148 2149 bitmap = (entry->bitmap != NULL); 2150 if (!bitmap) 2151 try_merge_free_space(ctl, entry, false); 2152 tree_insert_offset(&ctl->free_space_offset, 2153 entry->offset, &entry->offset_index, bitmap); 2154 } 2155 cluster->root = RB_ROOT; 2156 2157 out: 2158 spin_unlock(&cluster->lock); 2159 btrfs_put_block_group(block_group); 2160 return 0; 2161 } 2162 2163 static void __btrfs_remove_free_space_cache_locked( 2164 struct btrfs_free_space_ctl *ctl) 2165 { 2166 struct btrfs_free_space *info; 2167 struct rb_node *node; 2168 2169 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2170 info = rb_entry(node, struct btrfs_free_space, offset_index); 2171 if (!info->bitmap) { 2172 unlink_free_space(ctl, info); 2173 kmem_cache_free(btrfs_free_space_cachep, info); 2174 } else { 2175 free_bitmap(ctl, info); 2176 } 2177 if (need_resched()) { 2178 spin_unlock(&ctl->tree_lock); 2179 cond_resched(); 2180 spin_lock(&ctl->tree_lock); 2181 } 2182 } 2183 } 2184 2185 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2186 { 2187 spin_lock(&ctl->tree_lock); 2188 __btrfs_remove_free_space_cache_locked(ctl); 2189 spin_unlock(&ctl->tree_lock); 2190 } 2191 2192 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2193 { 2194 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2195 struct btrfs_free_cluster *cluster; 2196 struct list_head *head; 2197 2198 spin_lock(&ctl->tree_lock); 2199 while ((head = block_group->cluster_list.next) != 2200 &block_group->cluster_list) { 2201 cluster = list_entry(head, struct btrfs_free_cluster, 2202 block_group_list); 2203 2204 WARN_ON(cluster->block_group != block_group); 2205 __btrfs_return_cluster_to_free_space(block_group, cluster); 2206 if (need_resched()) { 2207 spin_unlock(&ctl->tree_lock); 2208 cond_resched(); 2209 spin_lock(&ctl->tree_lock); 2210 } 2211 } 2212 __btrfs_remove_free_space_cache_locked(ctl); 2213 spin_unlock(&ctl->tree_lock); 2214 2215 } 2216 2217 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2218 u64 offset, u64 bytes, u64 empty_size, 2219 u64 *max_extent_size) 2220 { 2221 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2222 struct btrfs_free_space *entry = NULL; 2223 u64 bytes_search = bytes + empty_size; 2224 u64 ret = 0; 2225 u64 align_gap = 0; 2226 u64 align_gap_len = 0; 2227 2228 spin_lock(&ctl->tree_lock); 2229 entry = find_free_space(ctl, &offset, &bytes_search, 2230 block_group->full_stripe_len, max_extent_size); 2231 if (!entry) 2232 goto out; 2233 2234 ret = offset; 2235 if (entry->bitmap) { 2236 bitmap_clear_bits(ctl, entry, offset, bytes); 2237 if (!entry->bytes) 2238 free_bitmap(ctl, entry); 2239 } else { 2240 unlink_free_space(ctl, entry); 2241 align_gap_len = offset - entry->offset; 2242 align_gap = entry->offset; 2243 2244 entry->offset = offset + bytes; 2245 WARN_ON(entry->bytes < bytes + align_gap_len); 2246 2247 entry->bytes -= bytes + align_gap_len; 2248 if (!entry->bytes) 2249 kmem_cache_free(btrfs_free_space_cachep, entry); 2250 else 2251 link_free_space(ctl, entry); 2252 } 2253 out: 2254 spin_unlock(&ctl->tree_lock); 2255 2256 if (align_gap_len) 2257 __btrfs_add_free_space(ctl, align_gap, align_gap_len); 2258 return ret; 2259 } 2260 2261 /* 2262 * given a cluster, put all of its extents back into the free space 2263 * cache. If a block group is passed, this function will only free 2264 * a cluster that belongs to the passed block group. 2265 * 2266 * Otherwise, it'll get a reference on the block group pointed to by the 2267 * cluster and remove the cluster from it. 2268 */ 2269 int btrfs_return_cluster_to_free_space( 2270 struct btrfs_block_group_cache *block_group, 2271 struct btrfs_free_cluster *cluster) 2272 { 2273 struct btrfs_free_space_ctl *ctl; 2274 int ret; 2275 2276 /* first, get a safe pointer to the block group */ 2277 spin_lock(&cluster->lock); 2278 if (!block_group) { 2279 block_group = cluster->block_group; 2280 if (!block_group) { 2281 spin_unlock(&cluster->lock); 2282 return 0; 2283 } 2284 } else if (cluster->block_group != block_group) { 2285 /* someone else has already freed it don't redo their work */ 2286 spin_unlock(&cluster->lock); 2287 return 0; 2288 } 2289 atomic_inc(&block_group->count); 2290 spin_unlock(&cluster->lock); 2291 2292 ctl = block_group->free_space_ctl; 2293 2294 /* now return any extents the cluster had on it */ 2295 spin_lock(&ctl->tree_lock); 2296 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2297 spin_unlock(&ctl->tree_lock); 2298 2299 /* finally drop our ref */ 2300 btrfs_put_block_group(block_group); 2301 return ret; 2302 } 2303 2304 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2305 struct btrfs_free_cluster *cluster, 2306 struct btrfs_free_space *entry, 2307 u64 bytes, u64 min_start, 2308 u64 *max_extent_size) 2309 { 2310 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2311 int err; 2312 u64 search_start = cluster->window_start; 2313 u64 search_bytes = bytes; 2314 u64 ret = 0; 2315 2316 search_start = min_start; 2317 search_bytes = bytes; 2318 2319 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2320 if (err) { 2321 if (search_bytes > *max_extent_size) 2322 *max_extent_size = search_bytes; 2323 return 0; 2324 } 2325 2326 ret = search_start; 2327 __bitmap_clear_bits(ctl, entry, ret, bytes); 2328 2329 return ret; 2330 } 2331 2332 /* 2333 * given a cluster, try to allocate 'bytes' from it, returns 0 2334 * if it couldn't find anything suitably large, or a logical disk offset 2335 * if things worked out 2336 */ 2337 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2338 struct btrfs_free_cluster *cluster, u64 bytes, 2339 u64 min_start, u64 *max_extent_size) 2340 { 2341 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2342 struct btrfs_free_space *entry = NULL; 2343 struct rb_node *node; 2344 u64 ret = 0; 2345 2346 spin_lock(&cluster->lock); 2347 if (bytes > cluster->max_size) 2348 goto out; 2349 2350 if (cluster->block_group != block_group) 2351 goto out; 2352 2353 node = rb_first(&cluster->root); 2354 if (!node) 2355 goto out; 2356 2357 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2358 while (1) { 2359 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2360 *max_extent_size = entry->bytes; 2361 2362 if (entry->bytes < bytes || 2363 (!entry->bitmap && entry->offset < min_start)) { 2364 node = rb_next(&entry->offset_index); 2365 if (!node) 2366 break; 2367 entry = rb_entry(node, struct btrfs_free_space, 2368 offset_index); 2369 continue; 2370 } 2371 2372 if (entry->bitmap) { 2373 ret = btrfs_alloc_from_bitmap(block_group, 2374 cluster, entry, bytes, 2375 cluster->window_start, 2376 max_extent_size); 2377 if (ret == 0) { 2378 node = rb_next(&entry->offset_index); 2379 if (!node) 2380 break; 2381 entry = rb_entry(node, struct btrfs_free_space, 2382 offset_index); 2383 continue; 2384 } 2385 cluster->window_start += bytes; 2386 } else { 2387 ret = entry->offset; 2388 2389 entry->offset += bytes; 2390 entry->bytes -= bytes; 2391 } 2392 2393 if (entry->bytes == 0) 2394 rb_erase(&entry->offset_index, &cluster->root); 2395 break; 2396 } 2397 out: 2398 spin_unlock(&cluster->lock); 2399 2400 if (!ret) 2401 return 0; 2402 2403 spin_lock(&ctl->tree_lock); 2404 2405 ctl->free_space -= bytes; 2406 if (entry->bytes == 0) { 2407 ctl->free_extents--; 2408 if (entry->bitmap) { 2409 kfree(entry->bitmap); 2410 ctl->total_bitmaps--; 2411 ctl->op->recalc_thresholds(ctl); 2412 } 2413 kmem_cache_free(btrfs_free_space_cachep, entry); 2414 } 2415 2416 spin_unlock(&ctl->tree_lock); 2417 2418 return ret; 2419 } 2420 2421 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2422 struct btrfs_free_space *entry, 2423 struct btrfs_free_cluster *cluster, 2424 u64 offset, u64 bytes, 2425 u64 cont1_bytes, u64 min_bytes) 2426 { 2427 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2428 unsigned long next_zero; 2429 unsigned long i; 2430 unsigned long want_bits; 2431 unsigned long min_bits; 2432 unsigned long found_bits; 2433 unsigned long start = 0; 2434 unsigned long total_found = 0; 2435 int ret; 2436 2437 i = offset_to_bit(entry->offset, ctl->unit, 2438 max_t(u64, offset, entry->offset)); 2439 want_bits = bytes_to_bits(bytes, ctl->unit); 2440 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2441 2442 again: 2443 found_bits = 0; 2444 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2445 next_zero = find_next_zero_bit(entry->bitmap, 2446 BITS_PER_BITMAP, i); 2447 if (next_zero - i >= min_bits) { 2448 found_bits = next_zero - i; 2449 break; 2450 } 2451 i = next_zero; 2452 } 2453 2454 if (!found_bits) 2455 return -ENOSPC; 2456 2457 if (!total_found) { 2458 start = i; 2459 cluster->max_size = 0; 2460 } 2461 2462 total_found += found_bits; 2463 2464 if (cluster->max_size < found_bits * ctl->unit) 2465 cluster->max_size = found_bits * ctl->unit; 2466 2467 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2468 i = next_zero + 1; 2469 goto again; 2470 } 2471 2472 cluster->window_start = start * ctl->unit + entry->offset; 2473 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2474 ret = tree_insert_offset(&cluster->root, entry->offset, 2475 &entry->offset_index, 1); 2476 ASSERT(!ret); /* -EEXIST; Logic error */ 2477 2478 trace_btrfs_setup_cluster(block_group, cluster, 2479 total_found * ctl->unit, 1); 2480 return 0; 2481 } 2482 2483 /* 2484 * This searches the block group for just extents to fill the cluster with. 2485 * Try to find a cluster with at least bytes total bytes, at least one 2486 * extent of cont1_bytes, and other clusters of at least min_bytes. 2487 */ 2488 static noinline int 2489 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2490 struct btrfs_free_cluster *cluster, 2491 struct list_head *bitmaps, u64 offset, u64 bytes, 2492 u64 cont1_bytes, u64 min_bytes) 2493 { 2494 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2495 struct btrfs_free_space *first = NULL; 2496 struct btrfs_free_space *entry = NULL; 2497 struct btrfs_free_space *last; 2498 struct rb_node *node; 2499 u64 window_free; 2500 u64 max_extent; 2501 u64 total_size = 0; 2502 2503 entry = tree_search_offset(ctl, offset, 0, 1); 2504 if (!entry) 2505 return -ENOSPC; 2506 2507 /* 2508 * We don't want bitmaps, so just move along until we find a normal 2509 * extent entry. 2510 */ 2511 while (entry->bitmap || entry->bytes < min_bytes) { 2512 if (entry->bitmap && list_empty(&entry->list)) 2513 list_add_tail(&entry->list, bitmaps); 2514 node = rb_next(&entry->offset_index); 2515 if (!node) 2516 return -ENOSPC; 2517 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2518 } 2519 2520 window_free = entry->bytes; 2521 max_extent = entry->bytes; 2522 first = entry; 2523 last = entry; 2524 2525 for (node = rb_next(&entry->offset_index); node; 2526 node = rb_next(&entry->offset_index)) { 2527 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2528 2529 if (entry->bitmap) { 2530 if (list_empty(&entry->list)) 2531 list_add_tail(&entry->list, bitmaps); 2532 continue; 2533 } 2534 2535 if (entry->bytes < min_bytes) 2536 continue; 2537 2538 last = entry; 2539 window_free += entry->bytes; 2540 if (entry->bytes > max_extent) 2541 max_extent = entry->bytes; 2542 } 2543 2544 if (window_free < bytes || max_extent < cont1_bytes) 2545 return -ENOSPC; 2546 2547 cluster->window_start = first->offset; 2548 2549 node = &first->offset_index; 2550 2551 /* 2552 * now we've found our entries, pull them out of the free space 2553 * cache and put them into the cluster rbtree 2554 */ 2555 do { 2556 int ret; 2557 2558 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2559 node = rb_next(&entry->offset_index); 2560 if (entry->bitmap || entry->bytes < min_bytes) 2561 continue; 2562 2563 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2564 ret = tree_insert_offset(&cluster->root, entry->offset, 2565 &entry->offset_index, 0); 2566 total_size += entry->bytes; 2567 ASSERT(!ret); /* -EEXIST; Logic error */ 2568 } while (node && entry != last); 2569 2570 cluster->max_size = max_extent; 2571 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2572 return 0; 2573 } 2574 2575 /* 2576 * This specifically looks for bitmaps that may work in the cluster, we assume 2577 * that we have already failed to find extents that will work. 2578 */ 2579 static noinline int 2580 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2581 struct btrfs_free_cluster *cluster, 2582 struct list_head *bitmaps, u64 offset, u64 bytes, 2583 u64 cont1_bytes, u64 min_bytes) 2584 { 2585 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2586 struct btrfs_free_space *entry; 2587 int ret = -ENOSPC; 2588 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2589 2590 if (ctl->total_bitmaps == 0) 2591 return -ENOSPC; 2592 2593 /* 2594 * The bitmap that covers offset won't be in the list unless offset 2595 * is just its start offset. 2596 */ 2597 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2598 if (entry->offset != bitmap_offset) { 2599 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2600 if (entry && list_empty(&entry->list)) 2601 list_add(&entry->list, bitmaps); 2602 } 2603 2604 list_for_each_entry(entry, bitmaps, list) { 2605 if (entry->bytes < bytes) 2606 continue; 2607 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2608 bytes, cont1_bytes, min_bytes); 2609 if (!ret) 2610 return 0; 2611 } 2612 2613 /* 2614 * The bitmaps list has all the bitmaps that record free space 2615 * starting after offset, so no more search is required. 2616 */ 2617 return -ENOSPC; 2618 } 2619 2620 /* 2621 * here we try to find a cluster of blocks in a block group. The goal 2622 * is to find at least bytes+empty_size. 2623 * We might not find them all in one contiguous area. 2624 * 2625 * returns zero and sets up cluster if things worked out, otherwise 2626 * it returns -enospc 2627 */ 2628 int btrfs_find_space_cluster(struct btrfs_root *root, 2629 struct btrfs_block_group_cache *block_group, 2630 struct btrfs_free_cluster *cluster, 2631 u64 offset, u64 bytes, u64 empty_size) 2632 { 2633 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2634 struct btrfs_free_space *entry, *tmp; 2635 LIST_HEAD(bitmaps); 2636 u64 min_bytes; 2637 u64 cont1_bytes; 2638 int ret; 2639 2640 /* 2641 * Choose the minimum extent size we'll require for this 2642 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2643 * For metadata, allow allocates with smaller extents. For 2644 * data, keep it dense. 2645 */ 2646 if (btrfs_test_opt(root, SSD_SPREAD)) { 2647 cont1_bytes = min_bytes = bytes + empty_size; 2648 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2649 cont1_bytes = bytes; 2650 min_bytes = block_group->sectorsize; 2651 } else { 2652 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2653 min_bytes = block_group->sectorsize; 2654 } 2655 2656 spin_lock(&ctl->tree_lock); 2657 2658 /* 2659 * If we know we don't have enough space to make a cluster don't even 2660 * bother doing all the work to try and find one. 2661 */ 2662 if (ctl->free_space < bytes) { 2663 spin_unlock(&ctl->tree_lock); 2664 return -ENOSPC; 2665 } 2666 2667 spin_lock(&cluster->lock); 2668 2669 /* someone already found a cluster, hooray */ 2670 if (cluster->block_group) { 2671 ret = 0; 2672 goto out; 2673 } 2674 2675 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2676 min_bytes); 2677 2678 INIT_LIST_HEAD(&bitmaps); 2679 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2680 bytes + empty_size, 2681 cont1_bytes, min_bytes); 2682 if (ret) 2683 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2684 offset, bytes + empty_size, 2685 cont1_bytes, min_bytes); 2686 2687 /* Clear our temporary list */ 2688 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2689 list_del_init(&entry->list); 2690 2691 if (!ret) { 2692 atomic_inc(&block_group->count); 2693 list_add_tail(&cluster->block_group_list, 2694 &block_group->cluster_list); 2695 cluster->block_group = block_group; 2696 } else { 2697 trace_btrfs_failed_cluster_setup(block_group); 2698 } 2699 out: 2700 spin_unlock(&cluster->lock); 2701 spin_unlock(&ctl->tree_lock); 2702 2703 return ret; 2704 } 2705 2706 /* 2707 * simple code to zero out a cluster 2708 */ 2709 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2710 { 2711 spin_lock_init(&cluster->lock); 2712 spin_lock_init(&cluster->refill_lock); 2713 cluster->root = RB_ROOT; 2714 cluster->max_size = 0; 2715 INIT_LIST_HEAD(&cluster->block_group_list); 2716 cluster->block_group = NULL; 2717 } 2718 2719 static int do_trimming(struct btrfs_block_group_cache *block_group, 2720 u64 *total_trimmed, u64 start, u64 bytes, 2721 u64 reserved_start, u64 reserved_bytes) 2722 { 2723 struct btrfs_space_info *space_info = block_group->space_info; 2724 struct btrfs_fs_info *fs_info = block_group->fs_info; 2725 int ret; 2726 int update = 0; 2727 u64 trimmed = 0; 2728 2729 spin_lock(&space_info->lock); 2730 spin_lock(&block_group->lock); 2731 if (!block_group->ro) { 2732 block_group->reserved += reserved_bytes; 2733 space_info->bytes_reserved += reserved_bytes; 2734 update = 1; 2735 } 2736 spin_unlock(&block_group->lock); 2737 spin_unlock(&space_info->lock); 2738 2739 ret = btrfs_error_discard_extent(fs_info->extent_root, 2740 start, bytes, &trimmed); 2741 if (!ret) 2742 *total_trimmed += trimmed; 2743 2744 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2745 2746 if (update) { 2747 spin_lock(&space_info->lock); 2748 spin_lock(&block_group->lock); 2749 if (block_group->ro) 2750 space_info->bytes_readonly += reserved_bytes; 2751 block_group->reserved -= reserved_bytes; 2752 space_info->bytes_reserved -= reserved_bytes; 2753 spin_unlock(&space_info->lock); 2754 spin_unlock(&block_group->lock); 2755 } 2756 2757 return ret; 2758 } 2759 2760 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2761 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2762 { 2763 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2764 struct btrfs_free_space *entry; 2765 struct rb_node *node; 2766 int ret = 0; 2767 u64 extent_start; 2768 u64 extent_bytes; 2769 u64 bytes; 2770 2771 while (start < end) { 2772 spin_lock(&ctl->tree_lock); 2773 2774 if (ctl->free_space < minlen) { 2775 spin_unlock(&ctl->tree_lock); 2776 break; 2777 } 2778 2779 entry = tree_search_offset(ctl, start, 0, 1); 2780 if (!entry) { 2781 spin_unlock(&ctl->tree_lock); 2782 break; 2783 } 2784 2785 /* skip bitmaps */ 2786 while (entry->bitmap) { 2787 node = rb_next(&entry->offset_index); 2788 if (!node) { 2789 spin_unlock(&ctl->tree_lock); 2790 goto out; 2791 } 2792 entry = rb_entry(node, struct btrfs_free_space, 2793 offset_index); 2794 } 2795 2796 if (entry->offset >= end) { 2797 spin_unlock(&ctl->tree_lock); 2798 break; 2799 } 2800 2801 extent_start = entry->offset; 2802 extent_bytes = entry->bytes; 2803 start = max(start, extent_start); 2804 bytes = min(extent_start + extent_bytes, end) - start; 2805 if (bytes < minlen) { 2806 spin_unlock(&ctl->tree_lock); 2807 goto next; 2808 } 2809 2810 unlink_free_space(ctl, entry); 2811 kmem_cache_free(btrfs_free_space_cachep, entry); 2812 2813 spin_unlock(&ctl->tree_lock); 2814 2815 ret = do_trimming(block_group, total_trimmed, start, bytes, 2816 extent_start, extent_bytes); 2817 if (ret) 2818 break; 2819 next: 2820 start += bytes; 2821 2822 if (fatal_signal_pending(current)) { 2823 ret = -ERESTARTSYS; 2824 break; 2825 } 2826 2827 cond_resched(); 2828 } 2829 out: 2830 return ret; 2831 } 2832 2833 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2834 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2835 { 2836 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2837 struct btrfs_free_space *entry; 2838 int ret = 0; 2839 int ret2; 2840 u64 bytes; 2841 u64 offset = offset_to_bitmap(ctl, start); 2842 2843 while (offset < end) { 2844 bool next_bitmap = false; 2845 2846 spin_lock(&ctl->tree_lock); 2847 2848 if (ctl->free_space < minlen) { 2849 spin_unlock(&ctl->tree_lock); 2850 break; 2851 } 2852 2853 entry = tree_search_offset(ctl, offset, 1, 0); 2854 if (!entry) { 2855 spin_unlock(&ctl->tree_lock); 2856 next_bitmap = true; 2857 goto next; 2858 } 2859 2860 bytes = minlen; 2861 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2862 if (ret2 || start >= end) { 2863 spin_unlock(&ctl->tree_lock); 2864 next_bitmap = true; 2865 goto next; 2866 } 2867 2868 bytes = min(bytes, end - start); 2869 if (bytes < minlen) { 2870 spin_unlock(&ctl->tree_lock); 2871 goto next; 2872 } 2873 2874 bitmap_clear_bits(ctl, entry, start, bytes); 2875 if (entry->bytes == 0) 2876 free_bitmap(ctl, entry); 2877 2878 spin_unlock(&ctl->tree_lock); 2879 2880 ret = do_trimming(block_group, total_trimmed, start, bytes, 2881 start, bytes); 2882 if (ret) 2883 break; 2884 next: 2885 if (next_bitmap) { 2886 offset += BITS_PER_BITMAP * ctl->unit; 2887 } else { 2888 start += bytes; 2889 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2890 offset += BITS_PER_BITMAP * ctl->unit; 2891 } 2892 2893 if (fatal_signal_pending(current)) { 2894 ret = -ERESTARTSYS; 2895 break; 2896 } 2897 2898 cond_resched(); 2899 } 2900 2901 return ret; 2902 } 2903 2904 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2905 u64 *trimmed, u64 start, u64 end, u64 minlen) 2906 { 2907 int ret; 2908 2909 *trimmed = 0; 2910 2911 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2912 if (ret) 2913 return ret; 2914 2915 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2916 2917 return ret; 2918 } 2919 2920 /* 2921 * Find the left-most item in the cache tree, and then return the 2922 * smallest inode number in the item. 2923 * 2924 * Note: the returned inode number may not be the smallest one in 2925 * the tree, if the left-most item is a bitmap. 2926 */ 2927 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2928 { 2929 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2930 struct btrfs_free_space *entry = NULL; 2931 u64 ino = 0; 2932 2933 spin_lock(&ctl->tree_lock); 2934 2935 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2936 goto out; 2937 2938 entry = rb_entry(rb_first(&ctl->free_space_offset), 2939 struct btrfs_free_space, offset_index); 2940 2941 if (!entry->bitmap) { 2942 ino = entry->offset; 2943 2944 unlink_free_space(ctl, entry); 2945 entry->offset++; 2946 entry->bytes--; 2947 if (!entry->bytes) 2948 kmem_cache_free(btrfs_free_space_cachep, entry); 2949 else 2950 link_free_space(ctl, entry); 2951 } else { 2952 u64 offset = 0; 2953 u64 count = 1; 2954 int ret; 2955 2956 ret = search_bitmap(ctl, entry, &offset, &count); 2957 /* Logic error; Should be empty if it can't find anything */ 2958 ASSERT(!ret); 2959 2960 ino = offset; 2961 bitmap_clear_bits(ctl, entry, offset, 1); 2962 if (entry->bytes == 0) 2963 free_bitmap(ctl, entry); 2964 } 2965 out: 2966 spin_unlock(&ctl->tree_lock); 2967 2968 return ino; 2969 } 2970 2971 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2972 struct btrfs_path *path) 2973 { 2974 struct inode *inode = NULL; 2975 2976 spin_lock(&root->cache_lock); 2977 if (root->cache_inode) 2978 inode = igrab(root->cache_inode); 2979 spin_unlock(&root->cache_lock); 2980 if (inode) 2981 return inode; 2982 2983 inode = __lookup_free_space_inode(root, path, 0); 2984 if (IS_ERR(inode)) 2985 return inode; 2986 2987 spin_lock(&root->cache_lock); 2988 if (!btrfs_fs_closing(root->fs_info)) 2989 root->cache_inode = igrab(inode); 2990 spin_unlock(&root->cache_lock); 2991 2992 return inode; 2993 } 2994 2995 int create_free_ino_inode(struct btrfs_root *root, 2996 struct btrfs_trans_handle *trans, 2997 struct btrfs_path *path) 2998 { 2999 return __create_free_space_inode(root, trans, path, 3000 BTRFS_FREE_INO_OBJECTID, 0); 3001 } 3002 3003 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3004 { 3005 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3006 struct btrfs_path *path; 3007 struct inode *inode; 3008 int ret = 0; 3009 u64 root_gen = btrfs_root_generation(&root->root_item); 3010 3011 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3012 return 0; 3013 3014 /* 3015 * If we're unmounting then just return, since this does a search on the 3016 * normal root and not the commit root and we could deadlock. 3017 */ 3018 if (btrfs_fs_closing(fs_info)) 3019 return 0; 3020 3021 path = btrfs_alloc_path(); 3022 if (!path) 3023 return 0; 3024 3025 inode = lookup_free_ino_inode(root, path); 3026 if (IS_ERR(inode)) 3027 goto out; 3028 3029 if (root_gen != BTRFS_I(inode)->generation) 3030 goto out_put; 3031 3032 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3033 3034 if (ret < 0) 3035 btrfs_err(fs_info, 3036 "failed to load free ino cache for root %llu", 3037 root->root_key.objectid); 3038 out_put: 3039 iput(inode); 3040 out: 3041 btrfs_free_path(path); 3042 return ret; 3043 } 3044 3045 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3046 struct btrfs_trans_handle *trans, 3047 struct btrfs_path *path, 3048 struct inode *inode) 3049 { 3050 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3051 int ret; 3052 3053 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3054 return 0; 3055 3056 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 3057 if (ret) { 3058 btrfs_delalloc_release_metadata(inode, inode->i_size); 3059 #ifdef DEBUG 3060 btrfs_err(root->fs_info, 3061 "failed to write free ino cache for root %llu", 3062 root->root_key.objectid); 3063 #endif 3064 } 3065 3066 return ret; 3067 } 3068 3069 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3070 /* 3071 * Use this if you need to make a bitmap or extent entry specifically, it 3072 * doesn't do any of the merging that add_free_space does, this acts a lot like 3073 * how the free space cache loading stuff works, so you can get really weird 3074 * configurations. 3075 */ 3076 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3077 u64 offset, u64 bytes, bool bitmap) 3078 { 3079 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3080 struct btrfs_free_space *info = NULL, *bitmap_info; 3081 void *map = NULL; 3082 u64 bytes_added; 3083 int ret; 3084 3085 again: 3086 if (!info) { 3087 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3088 if (!info) 3089 return -ENOMEM; 3090 } 3091 3092 if (!bitmap) { 3093 spin_lock(&ctl->tree_lock); 3094 info->offset = offset; 3095 info->bytes = bytes; 3096 ret = link_free_space(ctl, info); 3097 spin_unlock(&ctl->tree_lock); 3098 if (ret) 3099 kmem_cache_free(btrfs_free_space_cachep, info); 3100 return ret; 3101 } 3102 3103 if (!map) { 3104 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 3105 if (!map) { 3106 kmem_cache_free(btrfs_free_space_cachep, info); 3107 return -ENOMEM; 3108 } 3109 } 3110 3111 spin_lock(&ctl->tree_lock); 3112 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3113 1, 0); 3114 if (!bitmap_info) { 3115 info->bitmap = map; 3116 map = NULL; 3117 add_new_bitmap(ctl, info, offset); 3118 bitmap_info = info; 3119 } 3120 3121 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3122 bytes -= bytes_added; 3123 offset += bytes_added; 3124 spin_unlock(&ctl->tree_lock); 3125 3126 if (bytes) 3127 goto again; 3128 3129 if (map) 3130 kfree(map); 3131 return 0; 3132 } 3133 3134 /* 3135 * Checks to see if the given range is in the free space cache. This is really 3136 * just used to check the absence of space, so if there is free space in the 3137 * range at all we will return 1. 3138 */ 3139 int test_check_exists(struct btrfs_block_group_cache *cache, 3140 u64 offset, u64 bytes) 3141 { 3142 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3143 struct btrfs_free_space *info; 3144 int ret = 0; 3145 3146 spin_lock(&ctl->tree_lock); 3147 info = tree_search_offset(ctl, offset, 0, 0); 3148 if (!info) { 3149 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3150 1, 0); 3151 if (!info) 3152 goto out; 3153 } 3154 3155 have_info: 3156 if (info->bitmap) { 3157 u64 bit_off, bit_bytes; 3158 struct rb_node *n; 3159 struct btrfs_free_space *tmp; 3160 3161 bit_off = offset; 3162 bit_bytes = ctl->unit; 3163 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); 3164 if (!ret) { 3165 if (bit_off == offset) { 3166 ret = 1; 3167 goto out; 3168 } else if (bit_off > offset && 3169 offset + bytes > bit_off) { 3170 ret = 1; 3171 goto out; 3172 } 3173 } 3174 3175 n = rb_prev(&info->offset_index); 3176 while (n) { 3177 tmp = rb_entry(n, struct btrfs_free_space, 3178 offset_index); 3179 if (tmp->offset + tmp->bytes < offset) 3180 break; 3181 if (offset + bytes < tmp->offset) { 3182 n = rb_prev(&info->offset_index); 3183 continue; 3184 } 3185 info = tmp; 3186 goto have_info; 3187 } 3188 3189 n = rb_next(&info->offset_index); 3190 while (n) { 3191 tmp = rb_entry(n, struct btrfs_free_space, 3192 offset_index); 3193 if (offset + bytes < tmp->offset) 3194 break; 3195 if (tmp->offset + tmp->bytes < offset) { 3196 n = rb_next(&info->offset_index); 3197 continue; 3198 } 3199 info = tmp; 3200 goto have_info; 3201 } 3202 3203 goto out; 3204 } 3205 3206 if (info->offset == offset) { 3207 ret = 1; 3208 goto out; 3209 } 3210 3211 if (offset > info->offset && offset < info->offset + info->bytes) 3212 ret = 1; 3213 out: 3214 spin_unlock(&ctl->tree_lock); 3215 return ret; 3216 } 3217 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3218