1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 #include "volumes.h" 31 32 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 33 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 34 35 struct btrfs_trim_range { 36 u64 start; 37 u64 bytes; 38 struct list_head list; 39 }; 40 41 static int link_free_space(struct btrfs_free_space_ctl *ctl, 42 struct btrfs_free_space *info); 43 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 46 struct btrfs_trans_handle *trans, 47 struct btrfs_io_ctl *io_ctl, 48 struct btrfs_path *path); 49 50 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 51 struct btrfs_path *path, 52 u64 offset) 53 { 54 struct btrfs_fs_info *fs_info = root->fs_info; 55 struct btrfs_key key; 56 struct btrfs_key location; 57 struct btrfs_disk_key disk_key; 58 struct btrfs_free_space_header *header; 59 struct extent_buffer *leaf; 60 struct inode *inode = NULL; 61 int ret; 62 63 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 64 key.offset = offset; 65 key.type = 0; 66 67 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 68 if (ret < 0) 69 return ERR_PTR(ret); 70 if (ret > 0) { 71 btrfs_release_path(path); 72 return ERR_PTR(-ENOENT); 73 } 74 75 leaf = path->nodes[0]; 76 header = btrfs_item_ptr(leaf, path->slots[0], 77 struct btrfs_free_space_header); 78 btrfs_free_space_key(leaf, header, &disk_key); 79 btrfs_disk_key_to_cpu(&location, &disk_key); 80 btrfs_release_path(path); 81 82 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 83 if (IS_ERR(inode)) 84 return inode; 85 if (is_bad_inode(inode)) { 86 iput(inode); 87 return ERR_PTR(-ENOENT); 88 } 89 90 mapping_set_gfp_mask(inode->i_mapping, 91 mapping_gfp_constraint(inode->i_mapping, 92 ~(__GFP_FS | __GFP_HIGHMEM))); 93 94 return inode; 95 } 96 97 struct inode *lookup_free_space_inode(struct btrfs_root *root, 98 struct btrfs_block_group_cache 99 *block_group, struct btrfs_path *path) 100 { 101 struct inode *inode = NULL; 102 struct btrfs_fs_info *fs_info = root->fs_info; 103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 104 105 spin_lock(&block_group->lock); 106 if (block_group->inode) 107 inode = igrab(block_group->inode); 108 spin_unlock(&block_group->lock); 109 if (inode) 110 return inode; 111 112 inode = __lookup_free_space_inode(root, path, 113 block_group->key.objectid); 114 if (IS_ERR(inode)) 115 return inode; 116 117 spin_lock(&block_group->lock); 118 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 119 btrfs_info(fs_info, "Old style space inode found, converting."); 120 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 121 BTRFS_INODE_NODATACOW; 122 block_group->disk_cache_state = BTRFS_DC_CLEAR; 123 } 124 125 if (!block_group->iref) { 126 block_group->inode = igrab(inode); 127 block_group->iref = 1; 128 } 129 spin_unlock(&block_group->lock); 130 131 return inode; 132 } 133 134 static int __create_free_space_inode(struct btrfs_root *root, 135 struct btrfs_trans_handle *trans, 136 struct btrfs_path *path, 137 u64 ino, u64 offset) 138 { 139 struct btrfs_key key; 140 struct btrfs_disk_key disk_key; 141 struct btrfs_free_space_header *header; 142 struct btrfs_inode_item *inode_item; 143 struct extent_buffer *leaf; 144 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 145 int ret; 146 147 ret = btrfs_insert_empty_inode(trans, root, path, ino); 148 if (ret) 149 return ret; 150 151 /* We inline crc's for the free disk space cache */ 152 if (ino != BTRFS_FREE_INO_OBJECTID) 153 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193 } 194 195 int create_free_space_inode(struct btrfs_root *root, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_block_group_cache *block_group, 198 struct btrfs_path *path) 199 { 200 int ret; 201 u64 ino; 202 203 ret = btrfs_find_free_objectid(root, &ino); 204 if (ret < 0) 205 return ret; 206 207 return __create_free_space_inode(root, trans, path, ino, 208 block_group->key.objectid); 209 } 210 211 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 212 struct btrfs_block_rsv *rsv) 213 { 214 u64 needed_bytes; 215 int ret; 216 217 /* 1 for slack space, 1 for updating the inode */ 218 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 219 btrfs_calc_trans_metadata_size(fs_info, 1); 220 221 spin_lock(&rsv->lock); 222 if (rsv->reserved < needed_bytes) 223 ret = -ENOSPC; 224 else 225 ret = 0; 226 spin_unlock(&rsv->lock); 227 return ret; 228 } 229 230 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 231 struct btrfs_trans_handle *trans, 232 struct btrfs_block_group_cache *block_group, 233 struct inode *inode) 234 { 235 int ret = 0; 236 struct btrfs_path *path = btrfs_alloc_path(); 237 bool locked = false; 238 239 if (!path) { 240 ret = -ENOMEM; 241 goto fail; 242 } 243 244 if (block_group) { 245 locked = true; 246 mutex_lock(&trans->transaction->cache_write_mutex); 247 if (!list_empty(&block_group->io_list)) { 248 list_del_init(&block_group->io_list); 249 250 btrfs_wait_cache_io(trans, block_group, path); 251 btrfs_put_block_group(block_group); 252 } 253 254 /* 255 * now that we've truncated the cache away, its no longer 256 * setup or written 257 */ 258 spin_lock(&block_group->lock); 259 block_group->disk_cache_state = BTRFS_DC_CLEAR; 260 spin_unlock(&block_group->lock); 261 } 262 btrfs_free_path(path); 263 264 btrfs_i_size_write(inode, 0); 265 truncate_pagecache(inode, 0); 266 267 /* 268 * We don't need an orphan item because truncating the free space cache 269 * will never be split across transactions. 270 * We don't need to check for -EAGAIN because we're a free space 271 * cache inode 272 */ 273 ret = btrfs_truncate_inode_items(trans, root, inode, 274 0, BTRFS_EXTENT_DATA_KEY); 275 if (ret) 276 goto fail; 277 278 ret = btrfs_update_inode(trans, root, inode); 279 280 fail: 281 if (locked) 282 mutex_unlock(&trans->transaction->cache_write_mutex); 283 if (ret) 284 btrfs_abort_transaction(trans, ret); 285 286 return ret; 287 } 288 289 static int readahead_cache(struct inode *inode) 290 { 291 struct file_ra_state *ra; 292 unsigned long last_index; 293 294 ra = kzalloc(sizeof(*ra), GFP_NOFS); 295 if (!ra) 296 return -ENOMEM; 297 298 file_ra_state_init(ra, inode->i_mapping); 299 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 300 301 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 302 303 kfree(ra); 304 305 return 0; 306 } 307 308 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 309 int write) 310 { 311 int num_pages; 312 int check_crcs = 0; 313 314 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 315 316 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 317 check_crcs = 1; 318 319 /* Make sure we can fit our crcs into the first page */ 320 if (write && check_crcs && 321 (num_pages * sizeof(u32)) >= PAGE_SIZE) 322 return -ENOSPC; 323 324 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 325 326 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 327 if (!io_ctl->pages) 328 return -ENOMEM; 329 330 io_ctl->num_pages = num_pages; 331 io_ctl->fs_info = btrfs_sb(inode->i_sb); 332 io_ctl->check_crcs = check_crcs; 333 io_ctl->inode = inode; 334 335 return 0; 336 } 337 338 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 339 { 340 kfree(io_ctl->pages); 341 io_ctl->pages = NULL; 342 } 343 344 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 345 { 346 if (io_ctl->cur) { 347 io_ctl->cur = NULL; 348 io_ctl->orig = NULL; 349 } 350 } 351 352 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 353 { 354 ASSERT(io_ctl->index < io_ctl->num_pages); 355 io_ctl->page = io_ctl->pages[io_ctl->index++]; 356 io_ctl->cur = page_address(io_ctl->page); 357 io_ctl->orig = io_ctl->cur; 358 io_ctl->size = PAGE_SIZE; 359 if (clear) 360 memset(io_ctl->cur, 0, PAGE_SIZE); 361 } 362 363 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 364 { 365 int i; 366 367 io_ctl_unmap_page(io_ctl); 368 369 for (i = 0; i < io_ctl->num_pages; i++) { 370 if (io_ctl->pages[i]) { 371 ClearPageChecked(io_ctl->pages[i]); 372 unlock_page(io_ctl->pages[i]); 373 put_page(io_ctl->pages[i]); 374 } 375 } 376 } 377 378 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 379 int uptodate) 380 { 381 struct page *page; 382 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 383 int i; 384 385 for (i = 0; i < io_ctl->num_pages; i++) { 386 page = find_or_create_page(inode->i_mapping, i, mask); 387 if (!page) { 388 io_ctl_drop_pages(io_ctl); 389 return -ENOMEM; 390 } 391 io_ctl->pages[i] = page; 392 if (uptodate && !PageUptodate(page)) { 393 btrfs_readpage(NULL, page); 394 lock_page(page); 395 if (!PageUptodate(page)) { 396 btrfs_err(BTRFS_I(inode)->root->fs_info, 397 "error reading free space cache"); 398 io_ctl_drop_pages(io_ctl); 399 return -EIO; 400 } 401 } 402 } 403 404 for (i = 0; i < io_ctl->num_pages; i++) { 405 clear_page_dirty_for_io(io_ctl->pages[i]); 406 set_page_extent_mapped(io_ctl->pages[i]); 407 } 408 409 return 0; 410 } 411 412 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 413 { 414 __le64 *val; 415 416 io_ctl_map_page(io_ctl, 1); 417 418 /* 419 * Skip the csum areas. If we don't check crcs then we just have a 420 * 64bit chunk at the front of the first page. 421 */ 422 if (io_ctl->check_crcs) { 423 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 424 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 425 } else { 426 io_ctl->cur += sizeof(u64); 427 io_ctl->size -= sizeof(u64) * 2; 428 } 429 430 val = io_ctl->cur; 431 *val = cpu_to_le64(generation); 432 io_ctl->cur += sizeof(u64); 433 } 434 435 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 436 { 437 __le64 *gen; 438 439 /* 440 * Skip the crc area. If we don't check crcs then we just have a 64bit 441 * chunk at the front of the first page. 442 */ 443 if (io_ctl->check_crcs) { 444 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 445 io_ctl->size -= sizeof(u64) + 446 (sizeof(u32) * io_ctl->num_pages); 447 } else { 448 io_ctl->cur += sizeof(u64); 449 io_ctl->size -= sizeof(u64) * 2; 450 } 451 452 gen = io_ctl->cur; 453 if (le64_to_cpu(*gen) != generation) { 454 btrfs_err_rl(io_ctl->fs_info, 455 "space cache generation (%llu) does not match inode (%llu)", 456 *gen, generation); 457 io_ctl_unmap_page(io_ctl); 458 return -EIO; 459 } 460 io_ctl->cur += sizeof(u64); 461 return 0; 462 } 463 464 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 465 { 466 u32 *tmp; 467 u32 crc = ~(u32)0; 468 unsigned offset = 0; 469 470 if (!io_ctl->check_crcs) { 471 io_ctl_unmap_page(io_ctl); 472 return; 473 } 474 475 if (index == 0) 476 offset = sizeof(u32) * io_ctl->num_pages; 477 478 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 479 PAGE_SIZE - offset); 480 btrfs_csum_final(crc, (u8 *)&crc); 481 io_ctl_unmap_page(io_ctl); 482 tmp = page_address(io_ctl->pages[0]); 483 tmp += index; 484 *tmp = crc; 485 } 486 487 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 488 { 489 u32 *tmp, val; 490 u32 crc = ~(u32)0; 491 unsigned offset = 0; 492 493 if (!io_ctl->check_crcs) { 494 io_ctl_map_page(io_ctl, 0); 495 return 0; 496 } 497 498 if (index == 0) 499 offset = sizeof(u32) * io_ctl->num_pages; 500 501 tmp = page_address(io_ctl->pages[0]); 502 tmp += index; 503 val = *tmp; 504 505 io_ctl_map_page(io_ctl, 0); 506 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 507 PAGE_SIZE - offset); 508 btrfs_csum_final(crc, (u8 *)&crc); 509 if (val != crc) { 510 btrfs_err_rl(io_ctl->fs_info, 511 "csum mismatch on free space cache"); 512 io_ctl_unmap_page(io_ctl); 513 return -EIO; 514 } 515 516 return 0; 517 } 518 519 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 520 void *bitmap) 521 { 522 struct btrfs_free_space_entry *entry; 523 524 if (!io_ctl->cur) 525 return -ENOSPC; 526 527 entry = io_ctl->cur; 528 entry->offset = cpu_to_le64(offset); 529 entry->bytes = cpu_to_le64(bytes); 530 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 531 BTRFS_FREE_SPACE_EXTENT; 532 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 533 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 534 535 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 536 return 0; 537 538 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 539 540 /* No more pages to map */ 541 if (io_ctl->index >= io_ctl->num_pages) 542 return 0; 543 544 /* map the next page */ 545 io_ctl_map_page(io_ctl, 1); 546 return 0; 547 } 548 549 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 550 { 551 if (!io_ctl->cur) 552 return -ENOSPC; 553 554 /* 555 * If we aren't at the start of the current page, unmap this one and 556 * map the next one if there is any left. 557 */ 558 if (io_ctl->cur != io_ctl->orig) { 559 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 560 if (io_ctl->index >= io_ctl->num_pages) 561 return -ENOSPC; 562 io_ctl_map_page(io_ctl, 0); 563 } 564 565 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 566 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 567 if (io_ctl->index < io_ctl->num_pages) 568 io_ctl_map_page(io_ctl, 0); 569 return 0; 570 } 571 572 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 573 { 574 /* 575 * If we're not on the boundary we know we've modified the page and we 576 * need to crc the page. 577 */ 578 if (io_ctl->cur != io_ctl->orig) 579 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 580 else 581 io_ctl_unmap_page(io_ctl); 582 583 while (io_ctl->index < io_ctl->num_pages) { 584 io_ctl_map_page(io_ctl, 1); 585 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 586 } 587 } 588 589 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 590 struct btrfs_free_space *entry, u8 *type) 591 { 592 struct btrfs_free_space_entry *e; 593 int ret; 594 595 if (!io_ctl->cur) { 596 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 597 if (ret) 598 return ret; 599 } 600 601 e = io_ctl->cur; 602 entry->offset = le64_to_cpu(e->offset); 603 entry->bytes = le64_to_cpu(e->bytes); 604 *type = e->type; 605 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 606 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 607 608 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 609 return 0; 610 611 io_ctl_unmap_page(io_ctl); 612 613 return 0; 614 } 615 616 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 617 struct btrfs_free_space *entry) 618 { 619 int ret; 620 621 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 622 if (ret) 623 return ret; 624 625 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 626 io_ctl_unmap_page(io_ctl); 627 628 return 0; 629 } 630 631 /* 632 * Since we attach pinned extents after the fact we can have contiguous sections 633 * of free space that are split up in entries. This poses a problem with the 634 * tree logging stuff since it could have allocated across what appears to be 2 635 * entries since we would have merged the entries when adding the pinned extents 636 * back to the free space cache. So run through the space cache that we just 637 * loaded and merge contiguous entries. This will make the log replay stuff not 638 * blow up and it will make for nicer allocator behavior. 639 */ 640 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 641 { 642 struct btrfs_free_space *e, *prev = NULL; 643 struct rb_node *n; 644 645 again: 646 spin_lock(&ctl->tree_lock); 647 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 648 e = rb_entry(n, struct btrfs_free_space, offset_index); 649 if (!prev) 650 goto next; 651 if (e->bitmap || prev->bitmap) 652 goto next; 653 if (prev->offset + prev->bytes == e->offset) { 654 unlink_free_space(ctl, prev); 655 unlink_free_space(ctl, e); 656 prev->bytes += e->bytes; 657 kmem_cache_free(btrfs_free_space_cachep, e); 658 link_free_space(ctl, prev); 659 prev = NULL; 660 spin_unlock(&ctl->tree_lock); 661 goto again; 662 } 663 next: 664 prev = e; 665 } 666 spin_unlock(&ctl->tree_lock); 667 } 668 669 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 670 struct btrfs_free_space_ctl *ctl, 671 struct btrfs_path *path, u64 offset) 672 { 673 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 674 struct btrfs_free_space_header *header; 675 struct extent_buffer *leaf; 676 struct btrfs_io_ctl io_ctl; 677 struct btrfs_key key; 678 struct btrfs_free_space *e, *n; 679 LIST_HEAD(bitmaps); 680 u64 num_entries; 681 u64 num_bitmaps; 682 u64 generation; 683 u8 type; 684 int ret = 0; 685 686 /* Nothing in the space cache, goodbye */ 687 if (!i_size_read(inode)) 688 return 0; 689 690 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 691 key.offset = offset; 692 key.type = 0; 693 694 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 695 if (ret < 0) 696 return 0; 697 else if (ret > 0) { 698 btrfs_release_path(path); 699 return 0; 700 } 701 702 ret = -1; 703 704 leaf = path->nodes[0]; 705 header = btrfs_item_ptr(leaf, path->slots[0], 706 struct btrfs_free_space_header); 707 num_entries = btrfs_free_space_entries(leaf, header); 708 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 709 generation = btrfs_free_space_generation(leaf, header); 710 btrfs_release_path(path); 711 712 if (!BTRFS_I(inode)->generation) { 713 btrfs_info(fs_info, 714 "The free space cache file (%llu) is invalid. skip it\n", 715 offset); 716 return 0; 717 } 718 719 if (BTRFS_I(inode)->generation != generation) { 720 btrfs_err(fs_info, 721 "free space inode generation (%llu) did not match free space cache generation (%llu)", 722 BTRFS_I(inode)->generation, generation); 723 return 0; 724 } 725 726 if (!num_entries) 727 return 0; 728 729 ret = io_ctl_init(&io_ctl, inode, 0); 730 if (ret) 731 return ret; 732 733 ret = readahead_cache(inode); 734 if (ret) 735 goto out; 736 737 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 738 if (ret) 739 goto out; 740 741 ret = io_ctl_check_crc(&io_ctl, 0); 742 if (ret) 743 goto free_cache; 744 745 ret = io_ctl_check_generation(&io_ctl, generation); 746 if (ret) 747 goto free_cache; 748 749 while (num_entries) { 750 e = kmem_cache_zalloc(btrfs_free_space_cachep, 751 GFP_NOFS); 752 if (!e) 753 goto free_cache; 754 755 ret = io_ctl_read_entry(&io_ctl, e, &type); 756 if (ret) { 757 kmem_cache_free(btrfs_free_space_cachep, e); 758 goto free_cache; 759 } 760 761 if (!e->bytes) { 762 kmem_cache_free(btrfs_free_space_cachep, e); 763 goto free_cache; 764 } 765 766 if (type == BTRFS_FREE_SPACE_EXTENT) { 767 spin_lock(&ctl->tree_lock); 768 ret = link_free_space(ctl, e); 769 spin_unlock(&ctl->tree_lock); 770 if (ret) { 771 btrfs_err(fs_info, 772 "Duplicate entries in free space cache, dumping"); 773 kmem_cache_free(btrfs_free_space_cachep, e); 774 goto free_cache; 775 } 776 } else { 777 ASSERT(num_bitmaps); 778 num_bitmaps--; 779 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 780 if (!e->bitmap) { 781 kmem_cache_free( 782 btrfs_free_space_cachep, e); 783 goto free_cache; 784 } 785 spin_lock(&ctl->tree_lock); 786 ret = link_free_space(ctl, e); 787 ctl->total_bitmaps++; 788 ctl->op->recalc_thresholds(ctl); 789 spin_unlock(&ctl->tree_lock); 790 if (ret) { 791 btrfs_err(fs_info, 792 "Duplicate entries in free space cache, dumping"); 793 kmem_cache_free(btrfs_free_space_cachep, e); 794 goto free_cache; 795 } 796 list_add_tail(&e->list, &bitmaps); 797 } 798 799 num_entries--; 800 } 801 802 io_ctl_unmap_page(&io_ctl); 803 804 /* 805 * We add the bitmaps at the end of the entries in order that 806 * the bitmap entries are added to the cache. 807 */ 808 list_for_each_entry_safe(e, n, &bitmaps, list) { 809 list_del_init(&e->list); 810 ret = io_ctl_read_bitmap(&io_ctl, e); 811 if (ret) 812 goto free_cache; 813 } 814 815 io_ctl_drop_pages(&io_ctl); 816 merge_space_tree(ctl); 817 ret = 1; 818 out: 819 io_ctl_free(&io_ctl); 820 return ret; 821 free_cache: 822 io_ctl_drop_pages(&io_ctl); 823 __btrfs_remove_free_space_cache(ctl); 824 goto out; 825 } 826 827 int load_free_space_cache(struct btrfs_fs_info *fs_info, 828 struct btrfs_block_group_cache *block_group) 829 { 830 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 831 struct btrfs_root *root = fs_info->tree_root; 832 struct inode *inode; 833 struct btrfs_path *path; 834 int ret = 0; 835 bool matched; 836 u64 used = btrfs_block_group_used(&block_group->item); 837 838 /* 839 * If this block group has been marked to be cleared for one reason or 840 * another then we can't trust the on disk cache, so just return. 841 */ 842 spin_lock(&block_group->lock); 843 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 844 spin_unlock(&block_group->lock); 845 return 0; 846 } 847 spin_unlock(&block_group->lock); 848 849 path = btrfs_alloc_path(); 850 if (!path) 851 return 0; 852 path->search_commit_root = 1; 853 path->skip_locking = 1; 854 855 inode = lookup_free_space_inode(root, block_group, path); 856 if (IS_ERR(inode)) { 857 btrfs_free_path(path); 858 return 0; 859 } 860 861 /* We may have converted the inode and made the cache invalid. */ 862 spin_lock(&block_group->lock); 863 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 864 spin_unlock(&block_group->lock); 865 btrfs_free_path(path); 866 goto out; 867 } 868 spin_unlock(&block_group->lock); 869 870 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 871 path, block_group->key.objectid); 872 btrfs_free_path(path); 873 if (ret <= 0) 874 goto out; 875 876 spin_lock(&ctl->tree_lock); 877 matched = (ctl->free_space == (block_group->key.offset - used - 878 block_group->bytes_super)); 879 spin_unlock(&ctl->tree_lock); 880 881 if (!matched) { 882 __btrfs_remove_free_space_cache(ctl); 883 btrfs_warn(fs_info, 884 "block group %llu has wrong amount of free space", 885 block_group->key.objectid); 886 ret = -1; 887 } 888 out: 889 if (ret < 0) { 890 /* This cache is bogus, make sure it gets cleared */ 891 spin_lock(&block_group->lock); 892 block_group->disk_cache_state = BTRFS_DC_CLEAR; 893 spin_unlock(&block_group->lock); 894 ret = 0; 895 896 btrfs_warn(fs_info, 897 "failed to load free space cache for block group %llu, rebuilding it now", 898 block_group->key.objectid); 899 } 900 901 iput(inode); 902 return ret; 903 } 904 905 static noinline_for_stack 906 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 907 struct btrfs_free_space_ctl *ctl, 908 struct btrfs_block_group_cache *block_group, 909 int *entries, int *bitmaps, 910 struct list_head *bitmap_list) 911 { 912 int ret; 913 struct btrfs_free_cluster *cluster = NULL; 914 struct btrfs_free_cluster *cluster_locked = NULL; 915 struct rb_node *node = rb_first(&ctl->free_space_offset); 916 struct btrfs_trim_range *trim_entry; 917 918 /* Get the cluster for this block_group if it exists */ 919 if (block_group && !list_empty(&block_group->cluster_list)) { 920 cluster = list_entry(block_group->cluster_list.next, 921 struct btrfs_free_cluster, 922 block_group_list); 923 } 924 925 if (!node && cluster) { 926 cluster_locked = cluster; 927 spin_lock(&cluster_locked->lock); 928 node = rb_first(&cluster->root); 929 cluster = NULL; 930 } 931 932 /* Write out the extent entries */ 933 while (node) { 934 struct btrfs_free_space *e; 935 936 e = rb_entry(node, struct btrfs_free_space, offset_index); 937 *entries += 1; 938 939 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 940 e->bitmap); 941 if (ret) 942 goto fail; 943 944 if (e->bitmap) { 945 list_add_tail(&e->list, bitmap_list); 946 *bitmaps += 1; 947 } 948 node = rb_next(node); 949 if (!node && cluster) { 950 node = rb_first(&cluster->root); 951 cluster_locked = cluster; 952 spin_lock(&cluster_locked->lock); 953 cluster = NULL; 954 } 955 } 956 if (cluster_locked) { 957 spin_unlock(&cluster_locked->lock); 958 cluster_locked = NULL; 959 } 960 961 /* 962 * Make sure we don't miss any range that was removed from our rbtree 963 * because trimming is running. Otherwise after a umount+mount (or crash 964 * after committing the transaction) we would leak free space and get 965 * an inconsistent free space cache report from fsck. 966 */ 967 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 968 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 969 trim_entry->bytes, NULL); 970 if (ret) 971 goto fail; 972 *entries += 1; 973 } 974 975 return 0; 976 fail: 977 if (cluster_locked) 978 spin_unlock(&cluster_locked->lock); 979 return -ENOSPC; 980 } 981 982 static noinline_for_stack int 983 update_cache_item(struct btrfs_trans_handle *trans, 984 struct btrfs_root *root, 985 struct inode *inode, 986 struct btrfs_path *path, u64 offset, 987 int entries, int bitmaps) 988 { 989 struct btrfs_key key; 990 struct btrfs_free_space_header *header; 991 struct extent_buffer *leaf; 992 int ret; 993 994 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 995 key.offset = offset; 996 key.type = 0; 997 998 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 999 if (ret < 0) { 1000 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1001 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1002 GFP_NOFS); 1003 goto fail; 1004 } 1005 leaf = path->nodes[0]; 1006 if (ret > 0) { 1007 struct btrfs_key found_key; 1008 ASSERT(path->slots[0]); 1009 path->slots[0]--; 1010 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1011 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1012 found_key.offset != offset) { 1013 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1014 inode->i_size - 1, 1015 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1016 NULL, GFP_NOFS); 1017 btrfs_release_path(path); 1018 goto fail; 1019 } 1020 } 1021 1022 BTRFS_I(inode)->generation = trans->transid; 1023 header = btrfs_item_ptr(leaf, path->slots[0], 1024 struct btrfs_free_space_header); 1025 btrfs_set_free_space_entries(leaf, header, entries); 1026 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1027 btrfs_set_free_space_generation(leaf, header, trans->transid); 1028 btrfs_mark_buffer_dirty(leaf); 1029 btrfs_release_path(path); 1030 1031 return 0; 1032 1033 fail: 1034 return -1; 1035 } 1036 1037 static noinline_for_stack int 1038 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1039 struct btrfs_block_group_cache *block_group, 1040 struct btrfs_io_ctl *io_ctl, 1041 int *entries) 1042 { 1043 u64 start, extent_start, extent_end, len; 1044 struct extent_io_tree *unpin = NULL; 1045 int ret; 1046 1047 if (!block_group) 1048 return 0; 1049 1050 /* 1051 * We want to add any pinned extents to our free space cache 1052 * so we don't leak the space 1053 * 1054 * We shouldn't have switched the pinned extents yet so this is the 1055 * right one 1056 */ 1057 unpin = fs_info->pinned_extents; 1058 1059 start = block_group->key.objectid; 1060 1061 while (start < block_group->key.objectid + block_group->key.offset) { 1062 ret = find_first_extent_bit(unpin, start, 1063 &extent_start, &extent_end, 1064 EXTENT_DIRTY, NULL); 1065 if (ret) 1066 return 0; 1067 1068 /* This pinned extent is out of our range */ 1069 if (extent_start >= block_group->key.objectid + 1070 block_group->key.offset) 1071 return 0; 1072 1073 extent_start = max(extent_start, start); 1074 extent_end = min(block_group->key.objectid + 1075 block_group->key.offset, extent_end + 1); 1076 len = extent_end - extent_start; 1077 1078 *entries += 1; 1079 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1080 if (ret) 1081 return -ENOSPC; 1082 1083 start = extent_end; 1084 } 1085 1086 return 0; 1087 } 1088 1089 static noinline_for_stack int 1090 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1091 { 1092 struct btrfs_free_space *entry, *next; 1093 int ret; 1094 1095 /* Write out the bitmaps */ 1096 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1097 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1098 if (ret) 1099 return -ENOSPC; 1100 list_del_init(&entry->list); 1101 } 1102 1103 return 0; 1104 } 1105 1106 static int flush_dirty_cache(struct inode *inode) 1107 { 1108 int ret; 1109 1110 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1111 if (ret) 1112 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1113 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1114 GFP_NOFS); 1115 1116 return ret; 1117 } 1118 1119 static void noinline_for_stack 1120 cleanup_bitmap_list(struct list_head *bitmap_list) 1121 { 1122 struct btrfs_free_space *entry, *next; 1123 1124 list_for_each_entry_safe(entry, next, bitmap_list, list) 1125 list_del_init(&entry->list); 1126 } 1127 1128 static void noinline_for_stack 1129 cleanup_write_cache_enospc(struct inode *inode, 1130 struct btrfs_io_ctl *io_ctl, 1131 struct extent_state **cached_state, 1132 struct list_head *bitmap_list) 1133 { 1134 io_ctl_drop_pages(io_ctl); 1135 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1136 i_size_read(inode) - 1, cached_state, 1137 GFP_NOFS); 1138 } 1139 1140 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1141 struct btrfs_trans_handle *trans, 1142 struct btrfs_block_group_cache *block_group, 1143 struct btrfs_io_ctl *io_ctl, 1144 struct btrfs_path *path, u64 offset) 1145 { 1146 int ret; 1147 struct inode *inode = io_ctl->inode; 1148 struct btrfs_fs_info *fs_info; 1149 1150 if (!inode) 1151 return 0; 1152 1153 fs_info = btrfs_sb(inode->i_sb); 1154 1155 /* Flush the dirty pages in the cache file. */ 1156 ret = flush_dirty_cache(inode); 1157 if (ret) 1158 goto out; 1159 1160 /* Update the cache item to tell everyone this cache file is valid. */ 1161 ret = update_cache_item(trans, root, inode, path, offset, 1162 io_ctl->entries, io_ctl->bitmaps); 1163 out: 1164 io_ctl_free(io_ctl); 1165 if (ret) { 1166 invalidate_inode_pages2(inode->i_mapping); 1167 BTRFS_I(inode)->generation = 0; 1168 if (block_group) { 1169 #ifdef DEBUG 1170 btrfs_err(fs_info, 1171 "failed to write free space cache for block group %llu", 1172 block_group->key.objectid); 1173 #endif 1174 } 1175 } 1176 btrfs_update_inode(trans, root, inode); 1177 1178 if (block_group) { 1179 /* the dirty list is protected by the dirty_bgs_lock */ 1180 spin_lock(&trans->transaction->dirty_bgs_lock); 1181 1182 /* the disk_cache_state is protected by the block group lock */ 1183 spin_lock(&block_group->lock); 1184 1185 /* 1186 * only mark this as written if we didn't get put back on 1187 * the dirty list while waiting for IO. Otherwise our 1188 * cache state won't be right, and we won't get written again 1189 */ 1190 if (!ret && list_empty(&block_group->dirty_list)) 1191 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1192 else if (ret) 1193 block_group->disk_cache_state = BTRFS_DC_ERROR; 1194 1195 spin_unlock(&block_group->lock); 1196 spin_unlock(&trans->transaction->dirty_bgs_lock); 1197 io_ctl->inode = NULL; 1198 iput(inode); 1199 } 1200 1201 return ret; 1202 1203 } 1204 1205 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1206 struct btrfs_trans_handle *trans, 1207 struct btrfs_io_ctl *io_ctl, 1208 struct btrfs_path *path) 1209 { 1210 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1211 } 1212 1213 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1214 struct btrfs_block_group_cache *block_group, 1215 struct btrfs_path *path) 1216 { 1217 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1218 block_group, &block_group->io_ctl, 1219 path, block_group->key.objectid); 1220 } 1221 1222 /** 1223 * __btrfs_write_out_cache - write out cached info to an inode 1224 * @root - the root the inode belongs to 1225 * @ctl - the free space cache we are going to write out 1226 * @block_group - the block_group for this cache if it belongs to a block_group 1227 * @trans - the trans handle 1228 * @path - the path to use 1229 * @offset - the offset for the key we'll insert 1230 * 1231 * This function writes out a free space cache struct to disk for quick recovery 1232 * on mount. This will return 0 if it was successful in writing the cache out, 1233 * or an errno if it was not. 1234 */ 1235 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1236 struct btrfs_free_space_ctl *ctl, 1237 struct btrfs_block_group_cache *block_group, 1238 struct btrfs_io_ctl *io_ctl, 1239 struct btrfs_trans_handle *trans, 1240 struct btrfs_path *path, u64 offset) 1241 { 1242 struct btrfs_fs_info *fs_info = root->fs_info; 1243 struct extent_state *cached_state = NULL; 1244 LIST_HEAD(bitmap_list); 1245 int entries = 0; 1246 int bitmaps = 0; 1247 int ret; 1248 int must_iput = 0; 1249 1250 if (!i_size_read(inode)) 1251 return -EIO; 1252 1253 WARN_ON(io_ctl->pages); 1254 ret = io_ctl_init(io_ctl, inode, 1); 1255 if (ret) 1256 return ret; 1257 1258 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1259 down_write(&block_group->data_rwsem); 1260 spin_lock(&block_group->lock); 1261 if (block_group->delalloc_bytes) { 1262 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1263 spin_unlock(&block_group->lock); 1264 up_write(&block_group->data_rwsem); 1265 BTRFS_I(inode)->generation = 0; 1266 ret = 0; 1267 must_iput = 1; 1268 goto out; 1269 } 1270 spin_unlock(&block_group->lock); 1271 } 1272 1273 /* Lock all pages first so we can lock the extent safely. */ 1274 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1275 if (ret) 1276 goto out; 1277 1278 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1279 &cached_state); 1280 1281 io_ctl_set_generation(io_ctl, trans->transid); 1282 1283 mutex_lock(&ctl->cache_writeout_mutex); 1284 /* Write out the extent entries in the free space cache */ 1285 spin_lock(&ctl->tree_lock); 1286 ret = write_cache_extent_entries(io_ctl, ctl, 1287 block_group, &entries, &bitmaps, 1288 &bitmap_list); 1289 if (ret) 1290 goto out_nospc_locked; 1291 1292 /* 1293 * Some spaces that are freed in the current transaction are pinned, 1294 * they will be added into free space cache after the transaction is 1295 * committed, we shouldn't lose them. 1296 * 1297 * If this changes while we are working we'll get added back to 1298 * the dirty list and redo it. No locking needed 1299 */ 1300 ret = write_pinned_extent_entries(fs_info, block_group, 1301 io_ctl, &entries); 1302 if (ret) 1303 goto out_nospc_locked; 1304 1305 /* 1306 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1307 * locked while doing it because a concurrent trim can be manipulating 1308 * or freeing the bitmap. 1309 */ 1310 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1311 spin_unlock(&ctl->tree_lock); 1312 mutex_unlock(&ctl->cache_writeout_mutex); 1313 if (ret) 1314 goto out_nospc; 1315 1316 /* Zero out the rest of the pages just to make sure */ 1317 io_ctl_zero_remaining_pages(io_ctl); 1318 1319 /* Everything is written out, now we dirty the pages in the file. */ 1320 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1321 i_size_read(inode), &cached_state); 1322 if (ret) 1323 goto out_nospc; 1324 1325 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1326 up_write(&block_group->data_rwsem); 1327 /* 1328 * Release the pages and unlock the extent, we will flush 1329 * them out later 1330 */ 1331 io_ctl_drop_pages(io_ctl); 1332 1333 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1334 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1335 1336 /* 1337 * at this point the pages are under IO and we're happy, 1338 * The caller is responsible for waiting on them and updating the 1339 * the cache and the inode 1340 */ 1341 io_ctl->entries = entries; 1342 io_ctl->bitmaps = bitmaps; 1343 1344 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1345 if (ret) 1346 goto out; 1347 1348 return 0; 1349 1350 out: 1351 io_ctl->inode = NULL; 1352 io_ctl_free(io_ctl); 1353 if (ret) { 1354 invalidate_inode_pages2(inode->i_mapping); 1355 BTRFS_I(inode)->generation = 0; 1356 } 1357 btrfs_update_inode(trans, root, inode); 1358 if (must_iput) 1359 iput(inode); 1360 return ret; 1361 1362 out_nospc_locked: 1363 cleanup_bitmap_list(&bitmap_list); 1364 spin_unlock(&ctl->tree_lock); 1365 mutex_unlock(&ctl->cache_writeout_mutex); 1366 1367 out_nospc: 1368 cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); 1369 1370 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1371 up_write(&block_group->data_rwsem); 1372 1373 goto out; 1374 } 1375 1376 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1377 struct btrfs_trans_handle *trans, 1378 struct btrfs_block_group_cache *block_group, 1379 struct btrfs_path *path) 1380 { 1381 struct btrfs_root *root = fs_info->tree_root; 1382 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1383 struct inode *inode; 1384 int ret = 0; 1385 1386 spin_lock(&block_group->lock); 1387 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1388 spin_unlock(&block_group->lock); 1389 return 0; 1390 } 1391 spin_unlock(&block_group->lock); 1392 1393 inode = lookup_free_space_inode(root, block_group, path); 1394 if (IS_ERR(inode)) 1395 return 0; 1396 1397 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, 1398 &block_group->io_ctl, trans, 1399 path, block_group->key.objectid); 1400 if (ret) { 1401 #ifdef DEBUG 1402 btrfs_err(fs_info, 1403 "failed to write free space cache for block group %llu", 1404 block_group->key.objectid); 1405 #endif 1406 spin_lock(&block_group->lock); 1407 block_group->disk_cache_state = BTRFS_DC_ERROR; 1408 spin_unlock(&block_group->lock); 1409 1410 block_group->io_ctl.inode = NULL; 1411 iput(inode); 1412 } 1413 1414 /* 1415 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1416 * to wait for IO and put the inode 1417 */ 1418 1419 return ret; 1420 } 1421 1422 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1423 u64 offset) 1424 { 1425 ASSERT(offset >= bitmap_start); 1426 offset -= bitmap_start; 1427 return (unsigned long)(div_u64(offset, unit)); 1428 } 1429 1430 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1431 { 1432 return (unsigned long)(div_u64(bytes, unit)); 1433 } 1434 1435 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1436 u64 offset) 1437 { 1438 u64 bitmap_start; 1439 u64 bytes_per_bitmap; 1440 1441 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1442 bitmap_start = offset - ctl->start; 1443 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1444 bitmap_start *= bytes_per_bitmap; 1445 bitmap_start += ctl->start; 1446 1447 return bitmap_start; 1448 } 1449 1450 static int tree_insert_offset(struct rb_root *root, u64 offset, 1451 struct rb_node *node, int bitmap) 1452 { 1453 struct rb_node **p = &root->rb_node; 1454 struct rb_node *parent = NULL; 1455 struct btrfs_free_space *info; 1456 1457 while (*p) { 1458 parent = *p; 1459 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1460 1461 if (offset < info->offset) { 1462 p = &(*p)->rb_left; 1463 } else if (offset > info->offset) { 1464 p = &(*p)->rb_right; 1465 } else { 1466 /* 1467 * we could have a bitmap entry and an extent entry 1468 * share the same offset. If this is the case, we want 1469 * the extent entry to always be found first if we do a 1470 * linear search through the tree, since we want to have 1471 * the quickest allocation time, and allocating from an 1472 * extent is faster than allocating from a bitmap. So 1473 * if we're inserting a bitmap and we find an entry at 1474 * this offset, we want to go right, or after this entry 1475 * logically. If we are inserting an extent and we've 1476 * found a bitmap, we want to go left, or before 1477 * logically. 1478 */ 1479 if (bitmap) { 1480 if (info->bitmap) { 1481 WARN_ON_ONCE(1); 1482 return -EEXIST; 1483 } 1484 p = &(*p)->rb_right; 1485 } else { 1486 if (!info->bitmap) { 1487 WARN_ON_ONCE(1); 1488 return -EEXIST; 1489 } 1490 p = &(*p)->rb_left; 1491 } 1492 } 1493 } 1494 1495 rb_link_node(node, parent, p); 1496 rb_insert_color(node, root); 1497 1498 return 0; 1499 } 1500 1501 /* 1502 * searches the tree for the given offset. 1503 * 1504 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1505 * want a section that has at least bytes size and comes at or after the given 1506 * offset. 1507 */ 1508 static struct btrfs_free_space * 1509 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1510 u64 offset, int bitmap_only, int fuzzy) 1511 { 1512 struct rb_node *n = ctl->free_space_offset.rb_node; 1513 struct btrfs_free_space *entry, *prev = NULL; 1514 1515 /* find entry that is closest to the 'offset' */ 1516 while (1) { 1517 if (!n) { 1518 entry = NULL; 1519 break; 1520 } 1521 1522 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1523 prev = entry; 1524 1525 if (offset < entry->offset) 1526 n = n->rb_left; 1527 else if (offset > entry->offset) 1528 n = n->rb_right; 1529 else 1530 break; 1531 } 1532 1533 if (bitmap_only) { 1534 if (!entry) 1535 return NULL; 1536 if (entry->bitmap) 1537 return entry; 1538 1539 /* 1540 * bitmap entry and extent entry may share same offset, 1541 * in that case, bitmap entry comes after extent entry. 1542 */ 1543 n = rb_next(n); 1544 if (!n) 1545 return NULL; 1546 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1547 if (entry->offset != offset) 1548 return NULL; 1549 1550 WARN_ON(!entry->bitmap); 1551 return entry; 1552 } else if (entry) { 1553 if (entry->bitmap) { 1554 /* 1555 * if previous extent entry covers the offset, 1556 * we should return it instead of the bitmap entry 1557 */ 1558 n = rb_prev(&entry->offset_index); 1559 if (n) { 1560 prev = rb_entry(n, struct btrfs_free_space, 1561 offset_index); 1562 if (!prev->bitmap && 1563 prev->offset + prev->bytes > offset) 1564 entry = prev; 1565 } 1566 } 1567 return entry; 1568 } 1569 1570 if (!prev) 1571 return NULL; 1572 1573 /* find last entry before the 'offset' */ 1574 entry = prev; 1575 if (entry->offset > offset) { 1576 n = rb_prev(&entry->offset_index); 1577 if (n) { 1578 entry = rb_entry(n, struct btrfs_free_space, 1579 offset_index); 1580 ASSERT(entry->offset <= offset); 1581 } else { 1582 if (fuzzy) 1583 return entry; 1584 else 1585 return NULL; 1586 } 1587 } 1588 1589 if (entry->bitmap) { 1590 n = rb_prev(&entry->offset_index); 1591 if (n) { 1592 prev = rb_entry(n, struct btrfs_free_space, 1593 offset_index); 1594 if (!prev->bitmap && 1595 prev->offset + prev->bytes > offset) 1596 return prev; 1597 } 1598 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1599 return entry; 1600 } else if (entry->offset + entry->bytes > offset) 1601 return entry; 1602 1603 if (!fuzzy) 1604 return NULL; 1605 1606 while (1) { 1607 if (entry->bitmap) { 1608 if (entry->offset + BITS_PER_BITMAP * 1609 ctl->unit > offset) 1610 break; 1611 } else { 1612 if (entry->offset + entry->bytes > offset) 1613 break; 1614 } 1615 1616 n = rb_next(&entry->offset_index); 1617 if (!n) 1618 return NULL; 1619 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1620 } 1621 return entry; 1622 } 1623 1624 static inline void 1625 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1626 struct btrfs_free_space *info) 1627 { 1628 rb_erase(&info->offset_index, &ctl->free_space_offset); 1629 ctl->free_extents--; 1630 } 1631 1632 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1633 struct btrfs_free_space *info) 1634 { 1635 __unlink_free_space(ctl, info); 1636 ctl->free_space -= info->bytes; 1637 } 1638 1639 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1640 struct btrfs_free_space *info) 1641 { 1642 int ret = 0; 1643 1644 ASSERT(info->bytes || info->bitmap); 1645 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1646 &info->offset_index, (info->bitmap != NULL)); 1647 if (ret) 1648 return ret; 1649 1650 ctl->free_space += info->bytes; 1651 ctl->free_extents++; 1652 return ret; 1653 } 1654 1655 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1656 { 1657 struct btrfs_block_group_cache *block_group = ctl->private; 1658 u64 max_bytes; 1659 u64 bitmap_bytes; 1660 u64 extent_bytes; 1661 u64 size = block_group->key.offset; 1662 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1663 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1664 1665 max_bitmaps = max_t(u64, max_bitmaps, 1); 1666 1667 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1668 1669 /* 1670 * The goal is to keep the total amount of memory used per 1gb of space 1671 * at or below 32k, so we need to adjust how much memory we allow to be 1672 * used by extent based free space tracking 1673 */ 1674 if (size < SZ_1G) 1675 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1676 else 1677 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1678 1679 /* 1680 * we want to account for 1 more bitmap than what we have so we can make 1681 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1682 * we add more bitmaps. 1683 */ 1684 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1685 1686 if (bitmap_bytes >= max_bytes) { 1687 ctl->extents_thresh = 0; 1688 return; 1689 } 1690 1691 /* 1692 * we want the extent entry threshold to always be at most 1/2 the max 1693 * bytes we can have, or whatever is less than that. 1694 */ 1695 extent_bytes = max_bytes - bitmap_bytes; 1696 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1697 1698 ctl->extents_thresh = 1699 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1700 } 1701 1702 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1703 struct btrfs_free_space *info, 1704 u64 offset, u64 bytes) 1705 { 1706 unsigned long start, count; 1707 1708 start = offset_to_bit(info->offset, ctl->unit, offset); 1709 count = bytes_to_bits(bytes, ctl->unit); 1710 ASSERT(start + count <= BITS_PER_BITMAP); 1711 1712 bitmap_clear(info->bitmap, start, count); 1713 1714 info->bytes -= bytes; 1715 } 1716 1717 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1718 struct btrfs_free_space *info, u64 offset, 1719 u64 bytes) 1720 { 1721 __bitmap_clear_bits(ctl, info, offset, bytes); 1722 ctl->free_space -= bytes; 1723 } 1724 1725 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1726 struct btrfs_free_space *info, u64 offset, 1727 u64 bytes) 1728 { 1729 unsigned long start, count; 1730 1731 start = offset_to_bit(info->offset, ctl->unit, offset); 1732 count = bytes_to_bits(bytes, ctl->unit); 1733 ASSERT(start + count <= BITS_PER_BITMAP); 1734 1735 bitmap_set(info->bitmap, start, count); 1736 1737 info->bytes += bytes; 1738 ctl->free_space += bytes; 1739 } 1740 1741 /* 1742 * If we can not find suitable extent, we will use bytes to record 1743 * the size of the max extent. 1744 */ 1745 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1746 struct btrfs_free_space *bitmap_info, u64 *offset, 1747 u64 *bytes, bool for_alloc) 1748 { 1749 unsigned long found_bits = 0; 1750 unsigned long max_bits = 0; 1751 unsigned long bits, i; 1752 unsigned long next_zero; 1753 unsigned long extent_bits; 1754 1755 /* 1756 * Skip searching the bitmap if we don't have a contiguous section that 1757 * is large enough for this allocation. 1758 */ 1759 if (for_alloc && 1760 bitmap_info->max_extent_size && 1761 bitmap_info->max_extent_size < *bytes) { 1762 *bytes = bitmap_info->max_extent_size; 1763 return -1; 1764 } 1765 1766 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1767 max_t(u64, *offset, bitmap_info->offset)); 1768 bits = bytes_to_bits(*bytes, ctl->unit); 1769 1770 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1771 if (for_alloc && bits == 1) { 1772 found_bits = 1; 1773 break; 1774 } 1775 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1776 BITS_PER_BITMAP, i); 1777 extent_bits = next_zero - i; 1778 if (extent_bits >= bits) { 1779 found_bits = extent_bits; 1780 break; 1781 } else if (extent_bits > max_bits) { 1782 max_bits = extent_bits; 1783 } 1784 i = next_zero; 1785 } 1786 1787 if (found_bits) { 1788 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1789 *bytes = (u64)(found_bits) * ctl->unit; 1790 return 0; 1791 } 1792 1793 *bytes = (u64)(max_bits) * ctl->unit; 1794 bitmap_info->max_extent_size = *bytes; 1795 return -1; 1796 } 1797 1798 /* Cache the size of the max extent in bytes */ 1799 static struct btrfs_free_space * 1800 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1801 unsigned long align, u64 *max_extent_size) 1802 { 1803 struct btrfs_free_space *entry; 1804 struct rb_node *node; 1805 u64 tmp; 1806 u64 align_off; 1807 int ret; 1808 1809 if (!ctl->free_space_offset.rb_node) 1810 goto out; 1811 1812 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1813 if (!entry) 1814 goto out; 1815 1816 for (node = &entry->offset_index; node; node = rb_next(node)) { 1817 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1818 if (entry->bytes < *bytes) { 1819 if (entry->bytes > *max_extent_size) 1820 *max_extent_size = entry->bytes; 1821 continue; 1822 } 1823 1824 /* make sure the space returned is big enough 1825 * to match our requested alignment 1826 */ 1827 if (*bytes >= align) { 1828 tmp = entry->offset - ctl->start + align - 1; 1829 tmp = div64_u64(tmp, align); 1830 tmp = tmp * align + ctl->start; 1831 align_off = tmp - entry->offset; 1832 } else { 1833 align_off = 0; 1834 tmp = entry->offset; 1835 } 1836 1837 if (entry->bytes < *bytes + align_off) { 1838 if (entry->bytes > *max_extent_size) 1839 *max_extent_size = entry->bytes; 1840 continue; 1841 } 1842 1843 if (entry->bitmap) { 1844 u64 size = *bytes; 1845 1846 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1847 if (!ret) { 1848 *offset = tmp; 1849 *bytes = size; 1850 return entry; 1851 } else if (size > *max_extent_size) { 1852 *max_extent_size = size; 1853 } 1854 continue; 1855 } 1856 1857 *offset = tmp; 1858 *bytes = entry->bytes - align_off; 1859 return entry; 1860 } 1861 out: 1862 return NULL; 1863 } 1864 1865 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1866 struct btrfs_free_space *info, u64 offset) 1867 { 1868 info->offset = offset_to_bitmap(ctl, offset); 1869 info->bytes = 0; 1870 INIT_LIST_HEAD(&info->list); 1871 link_free_space(ctl, info); 1872 ctl->total_bitmaps++; 1873 1874 ctl->op->recalc_thresholds(ctl); 1875 } 1876 1877 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1878 struct btrfs_free_space *bitmap_info) 1879 { 1880 unlink_free_space(ctl, bitmap_info); 1881 kfree(bitmap_info->bitmap); 1882 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1883 ctl->total_bitmaps--; 1884 ctl->op->recalc_thresholds(ctl); 1885 } 1886 1887 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1888 struct btrfs_free_space *bitmap_info, 1889 u64 *offset, u64 *bytes) 1890 { 1891 u64 end; 1892 u64 search_start, search_bytes; 1893 int ret; 1894 1895 again: 1896 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1897 1898 /* 1899 * We need to search for bits in this bitmap. We could only cover some 1900 * of the extent in this bitmap thanks to how we add space, so we need 1901 * to search for as much as it as we can and clear that amount, and then 1902 * go searching for the next bit. 1903 */ 1904 search_start = *offset; 1905 search_bytes = ctl->unit; 1906 search_bytes = min(search_bytes, end - search_start + 1); 1907 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1908 false); 1909 if (ret < 0 || search_start != *offset) 1910 return -EINVAL; 1911 1912 /* We may have found more bits than what we need */ 1913 search_bytes = min(search_bytes, *bytes); 1914 1915 /* Cannot clear past the end of the bitmap */ 1916 search_bytes = min(search_bytes, end - search_start + 1); 1917 1918 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1919 *offset += search_bytes; 1920 *bytes -= search_bytes; 1921 1922 if (*bytes) { 1923 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1924 if (!bitmap_info->bytes) 1925 free_bitmap(ctl, bitmap_info); 1926 1927 /* 1928 * no entry after this bitmap, but we still have bytes to 1929 * remove, so something has gone wrong. 1930 */ 1931 if (!next) 1932 return -EINVAL; 1933 1934 bitmap_info = rb_entry(next, struct btrfs_free_space, 1935 offset_index); 1936 1937 /* 1938 * if the next entry isn't a bitmap we need to return to let the 1939 * extent stuff do its work. 1940 */ 1941 if (!bitmap_info->bitmap) 1942 return -EAGAIN; 1943 1944 /* 1945 * Ok the next item is a bitmap, but it may not actually hold 1946 * the information for the rest of this free space stuff, so 1947 * look for it, and if we don't find it return so we can try 1948 * everything over again. 1949 */ 1950 search_start = *offset; 1951 search_bytes = ctl->unit; 1952 ret = search_bitmap(ctl, bitmap_info, &search_start, 1953 &search_bytes, false); 1954 if (ret < 0 || search_start != *offset) 1955 return -EAGAIN; 1956 1957 goto again; 1958 } else if (!bitmap_info->bytes) 1959 free_bitmap(ctl, bitmap_info); 1960 1961 return 0; 1962 } 1963 1964 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1965 struct btrfs_free_space *info, u64 offset, 1966 u64 bytes) 1967 { 1968 u64 bytes_to_set = 0; 1969 u64 end; 1970 1971 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1972 1973 bytes_to_set = min(end - offset, bytes); 1974 1975 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1976 1977 /* 1978 * We set some bytes, we have no idea what the max extent size is 1979 * anymore. 1980 */ 1981 info->max_extent_size = 0; 1982 1983 return bytes_to_set; 1984 1985 } 1986 1987 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1988 struct btrfs_free_space *info) 1989 { 1990 struct btrfs_block_group_cache *block_group = ctl->private; 1991 struct btrfs_fs_info *fs_info = block_group->fs_info; 1992 bool forced = false; 1993 1994 #ifdef CONFIG_BTRFS_DEBUG 1995 if (btrfs_should_fragment_free_space(block_group)) 1996 forced = true; 1997 #endif 1998 1999 /* 2000 * If we are below the extents threshold then we can add this as an 2001 * extent, and don't have to deal with the bitmap 2002 */ 2003 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2004 /* 2005 * If this block group has some small extents we don't want to 2006 * use up all of our free slots in the cache with them, we want 2007 * to reserve them to larger extents, however if we have plenty 2008 * of cache left then go ahead an dadd them, no sense in adding 2009 * the overhead of a bitmap if we don't have to. 2010 */ 2011 if (info->bytes <= fs_info->sectorsize * 4) { 2012 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2013 return false; 2014 } else { 2015 return false; 2016 } 2017 } 2018 2019 /* 2020 * The original block groups from mkfs can be really small, like 8 2021 * megabytes, so don't bother with a bitmap for those entries. However 2022 * some block groups can be smaller than what a bitmap would cover but 2023 * are still large enough that they could overflow the 32k memory limit, 2024 * so allow those block groups to still be allowed to have a bitmap 2025 * entry. 2026 */ 2027 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2028 return false; 2029 2030 return true; 2031 } 2032 2033 static const struct btrfs_free_space_op free_space_op = { 2034 .recalc_thresholds = recalculate_thresholds, 2035 .use_bitmap = use_bitmap, 2036 }; 2037 2038 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2039 struct btrfs_free_space *info) 2040 { 2041 struct btrfs_free_space *bitmap_info; 2042 struct btrfs_block_group_cache *block_group = NULL; 2043 int added = 0; 2044 u64 bytes, offset, bytes_added; 2045 int ret; 2046 2047 bytes = info->bytes; 2048 offset = info->offset; 2049 2050 if (!ctl->op->use_bitmap(ctl, info)) 2051 return 0; 2052 2053 if (ctl->op == &free_space_op) 2054 block_group = ctl->private; 2055 again: 2056 /* 2057 * Since we link bitmaps right into the cluster we need to see if we 2058 * have a cluster here, and if so and it has our bitmap we need to add 2059 * the free space to that bitmap. 2060 */ 2061 if (block_group && !list_empty(&block_group->cluster_list)) { 2062 struct btrfs_free_cluster *cluster; 2063 struct rb_node *node; 2064 struct btrfs_free_space *entry; 2065 2066 cluster = list_entry(block_group->cluster_list.next, 2067 struct btrfs_free_cluster, 2068 block_group_list); 2069 spin_lock(&cluster->lock); 2070 node = rb_first(&cluster->root); 2071 if (!node) { 2072 spin_unlock(&cluster->lock); 2073 goto no_cluster_bitmap; 2074 } 2075 2076 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2077 if (!entry->bitmap) { 2078 spin_unlock(&cluster->lock); 2079 goto no_cluster_bitmap; 2080 } 2081 2082 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2083 bytes_added = add_bytes_to_bitmap(ctl, entry, 2084 offset, bytes); 2085 bytes -= bytes_added; 2086 offset += bytes_added; 2087 } 2088 spin_unlock(&cluster->lock); 2089 if (!bytes) { 2090 ret = 1; 2091 goto out; 2092 } 2093 } 2094 2095 no_cluster_bitmap: 2096 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2097 1, 0); 2098 if (!bitmap_info) { 2099 ASSERT(added == 0); 2100 goto new_bitmap; 2101 } 2102 2103 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2104 bytes -= bytes_added; 2105 offset += bytes_added; 2106 added = 0; 2107 2108 if (!bytes) { 2109 ret = 1; 2110 goto out; 2111 } else 2112 goto again; 2113 2114 new_bitmap: 2115 if (info && info->bitmap) { 2116 add_new_bitmap(ctl, info, offset); 2117 added = 1; 2118 info = NULL; 2119 goto again; 2120 } else { 2121 spin_unlock(&ctl->tree_lock); 2122 2123 /* no pre-allocated info, allocate a new one */ 2124 if (!info) { 2125 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2126 GFP_NOFS); 2127 if (!info) { 2128 spin_lock(&ctl->tree_lock); 2129 ret = -ENOMEM; 2130 goto out; 2131 } 2132 } 2133 2134 /* allocate the bitmap */ 2135 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2136 spin_lock(&ctl->tree_lock); 2137 if (!info->bitmap) { 2138 ret = -ENOMEM; 2139 goto out; 2140 } 2141 goto again; 2142 } 2143 2144 out: 2145 if (info) { 2146 if (info->bitmap) 2147 kfree(info->bitmap); 2148 kmem_cache_free(btrfs_free_space_cachep, info); 2149 } 2150 2151 return ret; 2152 } 2153 2154 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2155 struct btrfs_free_space *info, bool update_stat) 2156 { 2157 struct btrfs_free_space *left_info; 2158 struct btrfs_free_space *right_info; 2159 bool merged = false; 2160 u64 offset = info->offset; 2161 u64 bytes = info->bytes; 2162 2163 /* 2164 * first we want to see if there is free space adjacent to the range we 2165 * are adding, if there is remove that struct and add a new one to 2166 * cover the entire range 2167 */ 2168 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2169 if (right_info && rb_prev(&right_info->offset_index)) 2170 left_info = rb_entry(rb_prev(&right_info->offset_index), 2171 struct btrfs_free_space, offset_index); 2172 else 2173 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2174 2175 if (right_info && !right_info->bitmap) { 2176 if (update_stat) 2177 unlink_free_space(ctl, right_info); 2178 else 2179 __unlink_free_space(ctl, right_info); 2180 info->bytes += right_info->bytes; 2181 kmem_cache_free(btrfs_free_space_cachep, right_info); 2182 merged = true; 2183 } 2184 2185 if (left_info && !left_info->bitmap && 2186 left_info->offset + left_info->bytes == offset) { 2187 if (update_stat) 2188 unlink_free_space(ctl, left_info); 2189 else 2190 __unlink_free_space(ctl, left_info); 2191 info->offset = left_info->offset; 2192 info->bytes += left_info->bytes; 2193 kmem_cache_free(btrfs_free_space_cachep, left_info); 2194 merged = true; 2195 } 2196 2197 return merged; 2198 } 2199 2200 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2201 struct btrfs_free_space *info, 2202 bool update_stat) 2203 { 2204 struct btrfs_free_space *bitmap; 2205 unsigned long i; 2206 unsigned long j; 2207 const u64 end = info->offset + info->bytes; 2208 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2209 u64 bytes; 2210 2211 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2212 if (!bitmap) 2213 return false; 2214 2215 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2216 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2217 if (j == i) 2218 return false; 2219 bytes = (j - i) * ctl->unit; 2220 info->bytes += bytes; 2221 2222 if (update_stat) 2223 bitmap_clear_bits(ctl, bitmap, end, bytes); 2224 else 2225 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2226 2227 if (!bitmap->bytes) 2228 free_bitmap(ctl, bitmap); 2229 2230 return true; 2231 } 2232 2233 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2234 struct btrfs_free_space *info, 2235 bool update_stat) 2236 { 2237 struct btrfs_free_space *bitmap; 2238 u64 bitmap_offset; 2239 unsigned long i; 2240 unsigned long j; 2241 unsigned long prev_j; 2242 u64 bytes; 2243 2244 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2245 /* If we're on a boundary, try the previous logical bitmap. */ 2246 if (bitmap_offset == info->offset) { 2247 if (info->offset == 0) 2248 return false; 2249 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2250 } 2251 2252 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2253 if (!bitmap) 2254 return false; 2255 2256 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2257 j = 0; 2258 prev_j = (unsigned long)-1; 2259 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2260 if (j > i) 2261 break; 2262 prev_j = j; 2263 } 2264 if (prev_j == i) 2265 return false; 2266 2267 if (prev_j == (unsigned long)-1) 2268 bytes = (i + 1) * ctl->unit; 2269 else 2270 bytes = (i - prev_j) * ctl->unit; 2271 2272 info->offset -= bytes; 2273 info->bytes += bytes; 2274 2275 if (update_stat) 2276 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2277 else 2278 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2279 2280 if (!bitmap->bytes) 2281 free_bitmap(ctl, bitmap); 2282 2283 return true; 2284 } 2285 2286 /* 2287 * We prefer always to allocate from extent entries, both for clustered and 2288 * non-clustered allocation requests. So when attempting to add a new extent 2289 * entry, try to see if there's adjacent free space in bitmap entries, and if 2290 * there is, migrate that space from the bitmaps to the extent. 2291 * Like this we get better chances of satisfying space allocation requests 2292 * because we attempt to satisfy them based on a single cache entry, and never 2293 * on 2 or more entries - even if the entries represent a contiguous free space 2294 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2295 * ends). 2296 */ 2297 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2298 struct btrfs_free_space *info, 2299 bool update_stat) 2300 { 2301 /* 2302 * Only work with disconnected entries, as we can change their offset, 2303 * and must be extent entries. 2304 */ 2305 ASSERT(!info->bitmap); 2306 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2307 2308 if (ctl->total_bitmaps > 0) { 2309 bool stole_end; 2310 bool stole_front = false; 2311 2312 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2313 if (ctl->total_bitmaps > 0) 2314 stole_front = steal_from_bitmap_to_front(ctl, info, 2315 update_stat); 2316 2317 if (stole_end || stole_front) 2318 try_merge_free_space(ctl, info, update_stat); 2319 } 2320 } 2321 2322 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2323 struct btrfs_free_space_ctl *ctl, 2324 u64 offset, u64 bytes) 2325 { 2326 struct btrfs_free_space *info; 2327 int ret = 0; 2328 2329 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2330 if (!info) 2331 return -ENOMEM; 2332 2333 info->offset = offset; 2334 info->bytes = bytes; 2335 RB_CLEAR_NODE(&info->offset_index); 2336 2337 spin_lock(&ctl->tree_lock); 2338 2339 if (try_merge_free_space(ctl, info, true)) 2340 goto link; 2341 2342 /* 2343 * There was no extent directly to the left or right of this new 2344 * extent then we know we're going to have to allocate a new extent, so 2345 * before we do that see if we need to drop this into a bitmap 2346 */ 2347 ret = insert_into_bitmap(ctl, info); 2348 if (ret < 0) { 2349 goto out; 2350 } else if (ret) { 2351 ret = 0; 2352 goto out; 2353 } 2354 link: 2355 /* 2356 * Only steal free space from adjacent bitmaps if we're sure we're not 2357 * going to add the new free space to existing bitmap entries - because 2358 * that would mean unnecessary work that would be reverted. Therefore 2359 * attempt to steal space from bitmaps if we're adding an extent entry. 2360 */ 2361 steal_from_bitmap(ctl, info, true); 2362 2363 ret = link_free_space(ctl, info); 2364 if (ret) 2365 kmem_cache_free(btrfs_free_space_cachep, info); 2366 out: 2367 spin_unlock(&ctl->tree_lock); 2368 2369 if (ret) { 2370 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2371 ASSERT(ret != -EEXIST); 2372 } 2373 2374 return ret; 2375 } 2376 2377 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2378 u64 offset, u64 bytes) 2379 { 2380 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2381 struct btrfs_free_space *info; 2382 int ret; 2383 bool re_search = false; 2384 2385 spin_lock(&ctl->tree_lock); 2386 2387 again: 2388 ret = 0; 2389 if (!bytes) 2390 goto out_lock; 2391 2392 info = tree_search_offset(ctl, offset, 0, 0); 2393 if (!info) { 2394 /* 2395 * oops didn't find an extent that matched the space we wanted 2396 * to remove, look for a bitmap instead 2397 */ 2398 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2399 1, 0); 2400 if (!info) { 2401 /* 2402 * If we found a partial bit of our free space in a 2403 * bitmap but then couldn't find the other part this may 2404 * be a problem, so WARN about it. 2405 */ 2406 WARN_ON(re_search); 2407 goto out_lock; 2408 } 2409 } 2410 2411 re_search = false; 2412 if (!info->bitmap) { 2413 unlink_free_space(ctl, info); 2414 if (offset == info->offset) { 2415 u64 to_free = min(bytes, info->bytes); 2416 2417 info->bytes -= to_free; 2418 info->offset += to_free; 2419 if (info->bytes) { 2420 ret = link_free_space(ctl, info); 2421 WARN_ON(ret); 2422 } else { 2423 kmem_cache_free(btrfs_free_space_cachep, info); 2424 } 2425 2426 offset += to_free; 2427 bytes -= to_free; 2428 goto again; 2429 } else { 2430 u64 old_end = info->bytes + info->offset; 2431 2432 info->bytes = offset - info->offset; 2433 ret = link_free_space(ctl, info); 2434 WARN_ON(ret); 2435 if (ret) 2436 goto out_lock; 2437 2438 /* Not enough bytes in this entry to satisfy us */ 2439 if (old_end < offset + bytes) { 2440 bytes -= old_end - offset; 2441 offset = old_end; 2442 goto again; 2443 } else if (old_end == offset + bytes) { 2444 /* all done */ 2445 goto out_lock; 2446 } 2447 spin_unlock(&ctl->tree_lock); 2448 2449 ret = btrfs_add_free_space(block_group, offset + bytes, 2450 old_end - (offset + bytes)); 2451 WARN_ON(ret); 2452 goto out; 2453 } 2454 } 2455 2456 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2457 if (ret == -EAGAIN) { 2458 re_search = true; 2459 goto again; 2460 } 2461 out_lock: 2462 spin_unlock(&ctl->tree_lock); 2463 out: 2464 return ret; 2465 } 2466 2467 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2468 u64 bytes) 2469 { 2470 struct btrfs_fs_info *fs_info = block_group->fs_info; 2471 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2472 struct btrfs_free_space *info; 2473 struct rb_node *n; 2474 int count = 0; 2475 2476 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2477 info = rb_entry(n, struct btrfs_free_space, offset_index); 2478 if (info->bytes >= bytes && !block_group->ro) 2479 count++; 2480 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2481 info->offset, info->bytes, 2482 (info->bitmap) ? "yes" : "no"); 2483 } 2484 btrfs_info(fs_info, "block group has cluster?: %s", 2485 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2486 btrfs_info(fs_info, 2487 "%d blocks of free space at or bigger than bytes is", count); 2488 } 2489 2490 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2491 { 2492 struct btrfs_fs_info *fs_info = block_group->fs_info; 2493 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2494 2495 spin_lock_init(&ctl->tree_lock); 2496 ctl->unit = fs_info->sectorsize; 2497 ctl->start = block_group->key.objectid; 2498 ctl->private = block_group; 2499 ctl->op = &free_space_op; 2500 INIT_LIST_HEAD(&ctl->trimming_ranges); 2501 mutex_init(&ctl->cache_writeout_mutex); 2502 2503 /* 2504 * we only want to have 32k of ram per block group for keeping 2505 * track of free space, and if we pass 1/2 of that we want to 2506 * start converting things over to using bitmaps 2507 */ 2508 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2509 } 2510 2511 /* 2512 * for a given cluster, put all of its extents back into the free 2513 * space cache. If the block group passed doesn't match the block group 2514 * pointed to by the cluster, someone else raced in and freed the 2515 * cluster already. In that case, we just return without changing anything 2516 */ 2517 static int 2518 __btrfs_return_cluster_to_free_space( 2519 struct btrfs_block_group_cache *block_group, 2520 struct btrfs_free_cluster *cluster) 2521 { 2522 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2523 struct btrfs_free_space *entry; 2524 struct rb_node *node; 2525 2526 spin_lock(&cluster->lock); 2527 if (cluster->block_group != block_group) 2528 goto out; 2529 2530 cluster->block_group = NULL; 2531 cluster->window_start = 0; 2532 list_del_init(&cluster->block_group_list); 2533 2534 node = rb_first(&cluster->root); 2535 while (node) { 2536 bool bitmap; 2537 2538 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2539 node = rb_next(&entry->offset_index); 2540 rb_erase(&entry->offset_index, &cluster->root); 2541 RB_CLEAR_NODE(&entry->offset_index); 2542 2543 bitmap = (entry->bitmap != NULL); 2544 if (!bitmap) { 2545 try_merge_free_space(ctl, entry, false); 2546 steal_from_bitmap(ctl, entry, false); 2547 } 2548 tree_insert_offset(&ctl->free_space_offset, 2549 entry->offset, &entry->offset_index, bitmap); 2550 } 2551 cluster->root = RB_ROOT; 2552 2553 out: 2554 spin_unlock(&cluster->lock); 2555 btrfs_put_block_group(block_group); 2556 return 0; 2557 } 2558 2559 static void __btrfs_remove_free_space_cache_locked( 2560 struct btrfs_free_space_ctl *ctl) 2561 { 2562 struct btrfs_free_space *info; 2563 struct rb_node *node; 2564 2565 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2566 info = rb_entry(node, struct btrfs_free_space, offset_index); 2567 if (!info->bitmap) { 2568 unlink_free_space(ctl, info); 2569 kmem_cache_free(btrfs_free_space_cachep, info); 2570 } else { 2571 free_bitmap(ctl, info); 2572 } 2573 2574 cond_resched_lock(&ctl->tree_lock); 2575 } 2576 } 2577 2578 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2579 { 2580 spin_lock(&ctl->tree_lock); 2581 __btrfs_remove_free_space_cache_locked(ctl); 2582 spin_unlock(&ctl->tree_lock); 2583 } 2584 2585 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2586 { 2587 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2588 struct btrfs_free_cluster *cluster; 2589 struct list_head *head; 2590 2591 spin_lock(&ctl->tree_lock); 2592 while ((head = block_group->cluster_list.next) != 2593 &block_group->cluster_list) { 2594 cluster = list_entry(head, struct btrfs_free_cluster, 2595 block_group_list); 2596 2597 WARN_ON(cluster->block_group != block_group); 2598 __btrfs_return_cluster_to_free_space(block_group, cluster); 2599 2600 cond_resched_lock(&ctl->tree_lock); 2601 } 2602 __btrfs_remove_free_space_cache_locked(ctl); 2603 spin_unlock(&ctl->tree_lock); 2604 2605 } 2606 2607 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2608 u64 offset, u64 bytes, u64 empty_size, 2609 u64 *max_extent_size) 2610 { 2611 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2612 struct btrfs_free_space *entry = NULL; 2613 u64 bytes_search = bytes + empty_size; 2614 u64 ret = 0; 2615 u64 align_gap = 0; 2616 u64 align_gap_len = 0; 2617 2618 spin_lock(&ctl->tree_lock); 2619 entry = find_free_space(ctl, &offset, &bytes_search, 2620 block_group->full_stripe_len, max_extent_size); 2621 if (!entry) 2622 goto out; 2623 2624 ret = offset; 2625 if (entry->bitmap) { 2626 bitmap_clear_bits(ctl, entry, offset, bytes); 2627 if (!entry->bytes) 2628 free_bitmap(ctl, entry); 2629 } else { 2630 unlink_free_space(ctl, entry); 2631 align_gap_len = offset - entry->offset; 2632 align_gap = entry->offset; 2633 2634 entry->offset = offset + bytes; 2635 WARN_ON(entry->bytes < bytes + align_gap_len); 2636 2637 entry->bytes -= bytes + align_gap_len; 2638 if (!entry->bytes) 2639 kmem_cache_free(btrfs_free_space_cachep, entry); 2640 else 2641 link_free_space(ctl, entry); 2642 } 2643 out: 2644 spin_unlock(&ctl->tree_lock); 2645 2646 if (align_gap_len) 2647 __btrfs_add_free_space(block_group->fs_info, ctl, 2648 align_gap, align_gap_len); 2649 return ret; 2650 } 2651 2652 /* 2653 * given a cluster, put all of its extents back into the free space 2654 * cache. If a block group is passed, this function will only free 2655 * a cluster that belongs to the passed block group. 2656 * 2657 * Otherwise, it'll get a reference on the block group pointed to by the 2658 * cluster and remove the cluster from it. 2659 */ 2660 int btrfs_return_cluster_to_free_space( 2661 struct btrfs_block_group_cache *block_group, 2662 struct btrfs_free_cluster *cluster) 2663 { 2664 struct btrfs_free_space_ctl *ctl; 2665 int ret; 2666 2667 /* first, get a safe pointer to the block group */ 2668 spin_lock(&cluster->lock); 2669 if (!block_group) { 2670 block_group = cluster->block_group; 2671 if (!block_group) { 2672 spin_unlock(&cluster->lock); 2673 return 0; 2674 } 2675 } else if (cluster->block_group != block_group) { 2676 /* someone else has already freed it don't redo their work */ 2677 spin_unlock(&cluster->lock); 2678 return 0; 2679 } 2680 atomic_inc(&block_group->count); 2681 spin_unlock(&cluster->lock); 2682 2683 ctl = block_group->free_space_ctl; 2684 2685 /* now return any extents the cluster had on it */ 2686 spin_lock(&ctl->tree_lock); 2687 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2688 spin_unlock(&ctl->tree_lock); 2689 2690 /* finally drop our ref */ 2691 btrfs_put_block_group(block_group); 2692 return ret; 2693 } 2694 2695 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2696 struct btrfs_free_cluster *cluster, 2697 struct btrfs_free_space *entry, 2698 u64 bytes, u64 min_start, 2699 u64 *max_extent_size) 2700 { 2701 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2702 int err; 2703 u64 search_start = cluster->window_start; 2704 u64 search_bytes = bytes; 2705 u64 ret = 0; 2706 2707 search_start = min_start; 2708 search_bytes = bytes; 2709 2710 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2711 if (err) { 2712 if (search_bytes > *max_extent_size) 2713 *max_extent_size = search_bytes; 2714 return 0; 2715 } 2716 2717 ret = search_start; 2718 __bitmap_clear_bits(ctl, entry, ret, bytes); 2719 2720 return ret; 2721 } 2722 2723 /* 2724 * given a cluster, try to allocate 'bytes' from it, returns 0 2725 * if it couldn't find anything suitably large, or a logical disk offset 2726 * if things worked out 2727 */ 2728 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2729 struct btrfs_free_cluster *cluster, u64 bytes, 2730 u64 min_start, u64 *max_extent_size) 2731 { 2732 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2733 struct btrfs_free_space *entry = NULL; 2734 struct rb_node *node; 2735 u64 ret = 0; 2736 2737 spin_lock(&cluster->lock); 2738 if (bytes > cluster->max_size) 2739 goto out; 2740 2741 if (cluster->block_group != block_group) 2742 goto out; 2743 2744 node = rb_first(&cluster->root); 2745 if (!node) 2746 goto out; 2747 2748 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2749 while (1) { 2750 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2751 *max_extent_size = entry->bytes; 2752 2753 if (entry->bytes < bytes || 2754 (!entry->bitmap && entry->offset < min_start)) { 2755 node = rb_next(&entry->offset_index); 2756 if (!node) 2757 break; 2758 entry = rb_entry(node, struct btrfs_free_space, 2759 offset_index); 2760 continue; 2761 } 2762 2763 if (entry->bitmap) { 2764 ret = btrfs_alloc_from_bitmap(block_group, 2765 cluster, entry, bytes, 2766 cluster->window_start, 2767 max_extent_size); 2768 if (ret == 0) { 2769 node = rb_next(&entry->offset_index); 2770 if (!node) 2771 break; 2772 entry = rb_entry(node, struct btrfs_free_space, 2773 offset_index); 2774 continue; 2775 } 2776 cluster->window_start += bytes; 2777 } else { 2778 ret = entry->offset; 2779 2780 entry->offset += bytes; 2781 entry->bytes -= bytes; 2782 } 2783 2784 if (entry->bytes == 0) 2785 rb_erase(&entry->offset_index, &cluster->root); 2786 break; 2787 } 2788 out: 2789 spin_unlock(&cluster->lock); 2790 2791 if (!ret) 2792 return 0; 2793 2794 spin_lock(&ctl->tree_lock); 2795 2796 ctl->free_space -= bytes; 2797 if (entry->bytes == 0) { 2798 ctl->free_extents--; 2799 if (entry->bitmap) { 2800 kfree(entry->bitmap); 2801 ctl->total_bitmaps--; 2802 ctl->op->recalc_thresholds(ctl); 2803 } 2804 kmem_cache_free(btrfs_free_space_cachep, entry); 2805 } 2806 2807 spin_unlock(&ctl->tree_lock); 2808 2809 return ret; 2810 } 2811 2812 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2813 struct btrfs_free_space *entry, 2814 struct btrfs_free_cluster *cluster, 2815 u64 offset, u64 bytes, 2816 u64 cont1_bytes, u64 min_bytes) 2817 { 2818 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2819 unsigned long next_zero; 2820 unsigned long i; 2821 unsigned long want_bits; 2822 unsigned long min_bits; 2823 unsigned long found_bits; 2824 unsigned long max_bits = 0; 2825 unsigned long start = 0; 2826 unsigned long total_found = 0; 2827 int ret; 2828 2829 i = offset_to_bit(entry->offset, ctl->unit, 2830 max_t(u64, offset, entry->offset)); 2831 want_bits = bytes_to_bits(bytes, ctl->unit); 2832 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2833 2834 /* 2835 * Don't bother looking for a cluster in this bitmap if it's heavily 2836 * fragmented. 2837 */ 2838 if (entry->max_extent_size && 2839 entry->max_extent_size < cont1_bytes) 2840 return -ENOSPC; 2841 again: 2842 found_bits = 0; 2843 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2844 next_zero = find_next_zero_bit(entry->bitmap, 2845 BITS_PER_BITMAP, i); 2846 if (next_zero - i >= min_bits) { 2847 found_bits = next_zero - i; 2848 if (found_bits > max_bits) 2849 max_bits = found_bits; 2850 break; 2851 } 2852 if (next_zero - i > max_bits) 2853 max_bits = next_zero - i; 2854 i = next_zero; 2855 } 2856 2857 if (!found_bits) { 2858 entry->max_extent_size = (u64)max_bits * ctl->unit; 2859 return -ENOSPC; 2860 } 2861 2862 if (!total_found) { 2863 start = i; 2864 cluster->max_size = 0; 2865 } 2866 2867 total_found += found_bits; 2868 2869 if (cluster->max_size < found_bits * ctl->unit) 2870 cluster->max_size = found_bits * ctl->unit; 2871 2872 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2873 i = next_zero + 1; 2874 goto again; 2875 } 2876 2877 cluster->window_start = start * ctl->unit + entry->offset; 2878 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2879 ret = tree_insert_offset(&cluster->root, entry->offset, 2880 &entry->offset_index, 1); 2881 ASSERT(!ret); /* -EEXIST; Logic error */ 2882 2883 trace_btrfs_setup_cluster(block_group, cluster, 2884 total_found * ctl->unit, 1); 2885 return 0; 2886 } 2887 2888 /* 2889 * This searches the block group for just extents to fill the cluster with. 2890 * Try to find a cluster with at least bytes total bytes, at least one 2891 * extent of cont1_bytes, and other clusters of at least min_bytes. 2892 */ 2893 static noinline int 2894 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2895 struct btrfs_free_cluster *cluster, 2896 struct list_head *bitmaps, u64 offset, u64 bytes, 2897 u64 cont1_bytes, u64 min_bytes) 2898 { 2899 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2900 struct btrfs_free_space *first = NULL; 2901 struct btrfs_free_space *entry = NULL; 2902 struct btrfs_free_space *last; 2903 struct rb_node *node; 2904 u64 window_free; 2905 u64 max_extent; 2906 u64 total_size = 0; 2907 2908 entry = tree_search_offset(ctl, offset, 0, 1); 2909 if (!entry) 2910 return -ENOSPC; 2911 2912 /* 2913 * We don't want bitmaps, so just move along until we find a normal 2914 * extent entry. 2915 */ 2916 while (entry->bitmap || entry->bytes < min_bytes) { 2917 if (entry->bitmap && list_empty(&entry->list)) 2918 list_add_tail(&entry->list, bitmaps); 2919 node = rb_next(&entry->offset_index); 2920 if (!node) 2921 return -ENOSPC; 2922 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2923 } 2924 2925 window_free = entry->bytes; 2926 max_extent = entry->bytes; 2927 first = entry; 2928 last = entry; 2929 2930 for (node = rb_next(&entry->offset_index); node; 2931 node = rb_next(&entry->offset_index)) { 2932 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2933 2934 if (entry->bitmap) { 2935 if (list_empty(&entry->list)) 2936 list_add_tail(&entry->list, bitmaps); 2937 continue; 2938 } 2939 2940 if (entry->bytes < min_bytes) 2941 continue; 2942 2943 last = entry; 2944 window_free += entry->bytes; 2945 if (entry->bytes > max_extent) 2946 max_extent = entry->bytes; 2947 } 2948 2949 if (window_free < bytes || max_extent < cont1_bytes) 2950 return -ENOSPC; 2951 2952 cluster->window_start = first->offset; 2953 2954 node = &first->offset_index; 2955 2956 /* 2957 * now we've found our entries, pull them out of the free space 2958 * cache and put them into the cluster rbtree 2959 */ 2960 do { 2961 int ret; 2962 2963 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2964 node = rb_next(&entry->offset_index); 2965 if (entry->bitmap || entry->bytes < min_bytes) 2966 continue; 2967 2968 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2969 ret = tree_insert_offset(&cluster->root, entry->offset, 2970 &entry->offset_index, 0); 2971 total_size += entry->bytes; 2972 ASSERT(!ret); /* -EEXIST; Logic error */ 2973 } while (node && entry != last); 2974 2975 cluster->max_size = max_extent; 2976 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2977 return 0; 2978 } 2979 2980 /* 2981 * This specifically looks for bitmaps that may work in the cluster, we assume 2982 * that we have already failed to find extents that will work. 2983 */ 2984 static noinline int 2985 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2986 struct btrfs_free_cluster *cluster, 2987 struct list_head *bitmaps, u64 offset, u64 bytes, 2988 u64 cont1_bytes, u64 min_bytes) 2989 { 2990 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2991 struct btrfs_free_space *entry = NULL; 2992 int ret = -ENOSPC; 2993 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2994 2995 if (ctl->total_bitmaps == 0) 2996 return -ENOSPC; 2997 2998 /* 2999 * The bitmap that covers offset won't be in the list unless offset 3000 * is just its start offset. 3001 */ 3002 if (!list_empty(bitmaps)) 3003 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3004 3005 if (!entry || entry->offset != bitmap_offset) { 3006 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3007 if (entry && list_empty(&entry->list)) 3008 list_add(&entry->list, bitmaps); 3009 } 3010 3011 list_for_each_entry(entry, bitmaps, list) { 3012 if (entry->bytes < bytes) 3013 continue; 3014 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3015 bytes, cont1_bytes, min_bytes); 3016 if (!ret) 3017 return 0; 3018 } 3019 3020 /* 3021 * The bitmaps list has all the bitmaps that record free space 3022 * starting after offset, so no more search is required. 3023 */ 3024 return -ENOSPC; 3025 } 3026 3027 /* 3028 * here we try to find a cluster of blocks in a block group. The goal 3029 * is to find at least bytes+empty_size. 3030 * We might not find them all in one contiguous area. 3031 * 3032 * returns zero and sets up cluster if things worked out, otherwise 3033 * it returns -enospc 3034 */ 3035 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3036 struct btrfs_block_group_cache *block_group, 3037 struct btrfs_free_cluster *cluster, 3038 u64 offset, u64 bytes, u64 empty_size) 3039 { 3040 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3041 struct btrfs_free_space *entry, *tmp; 3042 LIST_HEAD(bitmaps); 3043 u64 min_bytes; 3044 u64 cont1_bytes; 3045 int ret; 3046 3047 /* 3048 * Choose the minimum extent size we'll require for this 3049 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3050 * For metadata, allow allocates with smaller extents. For 3051 * data, keep it dense. 3052 */ 3053 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3054 cont1_bytes = min_bytes = bytes + empty_size; 3055 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3056 cont1_bytes = bytes; 3057 min_bytes = fs_info->sectorsize; 3058 } else { 3059 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3060 min_bytes = fs_info->sectorsize; 3061 } 3062 3063 spin_lock(&ctl->tree_lock); 3064 3065 /* 3066 * If we know we don't have enough space to make a cluster don't even 3067 * bother doing all the work to try and find one. 3068 */ 3069 if (ctl->free_space < bytes) { 3070 spin_unlock(&ctl->tree_lock); 3071 return -ENOSPC; 3072 } 3073 3074 spin_lock(&cluster->lock); 3075 3076 /* someone already found a cluster, hooray */ 3077 if (cluster->block_group) { 3078 ret = 0; 3079 goto out; 3080 } 3081 3082 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3083 min_bytes); 3084 3085 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3086 bytes + empty_size, 3087 cont1_bytes, min_bytes); 3088 if (ret) 3089 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3090 offset, bytes + empty_size, 3091 cont1_bytes, min_bytes); 3092 3093 /* Clear our temporary list */ 3094 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3095 list_del_init(&entry->list); 3096 3097 if (!ret) { 3098 atomic_inc(&block_group->count); 3099 list_add_tail(&cluster->block_group_list, 3100 &block_group->cluster_list); 3101 cluster->block_group = block_group; 3102 } else { 3103 trace_btrfs_failed_cluster_setup(block_group); 3104 } 3105 out: 3106 spin_unlock(&cluster->lock); 3107 spin_unlock(&ctl->tree_lock); 3108 3109 return ret; 3110 } 3111 3112 /* 3113 * simple code to zero out a cluster 3114 */ 3115 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3116 { 3117 spin_lock_init(&cluster->lock); 3118 spin_lock_init(&cluster->refill_lock); 3119 cluster->root = RB_ROOT; 3120 cluster->max_size = 0; 3121 cluster->fragmented = false; 3122 INIT_LIST_HEAD(&cluster->block_group_list); 3123 cluster->block_group = NULL; 3124 } 3125 3126 static int do_trimming(struct btrfs_block_group_cache *block_group, 3127 u64 *total_trimmed, u64 start, u64 bytes, 3128 u64 reserved_start, u64 reserved_bytes, 3129 struct btrfs_trim_range *trim_entry) 3130 { 3131 struct btrfs_space_info *space_info = block_group->space_info; 3132 struct btrfs_fs_info *fs_info = block_group->fs_info; 3133 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3134 int ret; 3135 int update = 0; 3136 u64 trimmed = 0; 3137 3138 spin_lock(&space_info->lock); 3139 spin_lock(&block_group->lock); 3140 if (!block_group->ro) { 3141 block_group->reserved += reserved_bytes; 3142 space_info->bytes_reserved += reserved_bytes; 3143 update = 1; 3144 } 3145 spin_unlock(&block_group->lock); 3146 spin_unlock(&space_info->lock); 3147 3148 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3149 if (!ret) 3150 *total_trimmed += trimmed; 3151 3152 mutex_lock(&ctl->cache_writeout_mutex); 3153 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3154 list_del(&trim_entry->list); 3155 mutex_unlock(&ctl->cache_writeout_mutex); 3156 3157 if (update) { 3158 spin_lock(&space_info->lock); 3159 spin_lock(&block_group->lock); 3160 if (block_group->ro) 3161 space_info->bytes_readonly += reserved_bytes; 3162 block_group->reserved -= reserved_bytes; 3163 space_info->bytes_reserved -= reserved_bytes; 3164 spin_unlock(&space_info->lock); 3165 spin_unlock(&block_group->lock); 3166 } 3167 3168 return ret; 3169 } 3170 3171 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3172 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3173 { 3174 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3175 struct btrfs_free_space *entry; 3176 struct rb_node *node; 3177 int ret = 0; 3178 u64 extent_start; 3179 u64 extent_bytes; 3180 u64 bytes; 3181 3182 while (start < end) { 3183 struct btrfs_trim_range trim_entry; 3184 3185 mutex_lock(&ctl->cache_writeout_mutex); 3186 spin_lock(&ctl->tree_lock); 3187 3188 if (ctl->free_space < minlen) { 3189 spin_unlock(&ctl->tree_lock); 3190 mutex_unlock(&ctl->cache_writeout_mutex); 3191 break; 3192 } 3193 3194 entry = tree_search_offset(ctl, start, 0, 1); 3195 if (!entry) { 3196 spin_unlock(&ctl->tree_lock); 3197 mutex_unlock(&ctl->cache_writeout_mutex); 3198 break; 3199 } 3200 3201 /* skip bitmaps */ 3202 while (entry->bitmap) { 3203 node = rb_next(&entry->offset_index); 3204 if (!node) { 3205 spin_unlock(&ctl->tree_lock); 3206 mutex_unlock(&ctl->cache_writeout_mutex); 3207 goto out; 3208 } 3209 entry = rb_entry(node, struct btrfs_free_space, 3210 offset_index); 3211 } 3212 3213 if (entry->offset >= end) { 3214 spin_unlock(&ctl->tree_lock); 3215 mutex_unlock(&ctl->cache_writeout_mutex); 3216 break; 3217 } 3218 3219 extent_start = entry->offset; 3220 extent_bytes = entry->bytes; 3221 start = max(start, extent_start); 3222 bytes = min(extent_start + extent_bytes, end) - start; 3223 if (bytes < minlen) { 3224 spin_unlock(&ctl->tree_lock); 3225 mutex_unlock(&ctl->cache_writeout_mutex); 3226 goto next; 3227 } 3228 3229 unlink_free_space(ctl, entry); 3230 kmem_cache_free(btrfs_free_space_cachep, entry); 3231 3232 spin_unlock(&ctl->tree_lock); 3233 trim_entry.start = extent_start; 3234 trim_entry.bytes = extent_bytes; 3235 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3236 mutex_unlock(&ctl->cache_writeout_mutex); 3237 3238 ret = do_trimming(block_group, total_trimmed, start, bytes, 3239 extent_start, extent_bytes, &trim_entry); 3240 if (ret) 3241 break; 3242 next: 3243 start += bytes; 3244 3245 if (fatal_signal_pending(current)) { 3246 ret = -ERESTARTSYS; 3247 break; 3248 } 3249 3250 cond_resched(); 3251 } 3252 out: 3253 return ret; 3254 } 3255 3256 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3257 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3258 { 3259 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3260 struct btrfs_free_space *entry; 3261 int ret = 0; 3262 int ret2; 3263 u64 bytes; 3264 u64 offset = offset_to_bitmap(ctl, start); 3265 3266 while (offset < end) { 3267 bool next_bitmap = false; 3268 struct btrfs_trim_range trim_entry; 3269 3270 mutex_lock(&ctl->cache_writeout_mutex); 3271 spin_lock(&ctl->tree_lock); 3272 3273 if (ctl->free_space < minlen) { 3274 spin_unlock(&ctl->tree_lock); 3275 mutex_unlock(&ctl->cache_writeout_mutex); 3276 break; 3277 } 3278 3279 entry = tree_search_offset(ctl, offset, 1, 0); 3280 if (!entry) { 3281 spin_unlock(&ctl->tree_lock); 3282 mutex_unlock(&ctl->cache_writeout_mutex); 3283 next_bitmap = true; 3284 goto next; 3285 } 3286 3287 bytes = minlen; 3288 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3289 if (ret2 || start >= end) { 3290 spin_unlock(&ctl->tree_lock); 3291 mutex_unlock(&ctl->cache_writeout_mutex); 3292 next_bitmap = true; 3293 goto next; 3294 } 3295 3296 bytes = min(bytes, end - start); 3297 if (bytes < minlen) { 3298 spin_unlock(&ctl->tree_lock); 3299 mutex_unlock(&ctl->cache_writeout_mutex); 3300 goto next; 3301 } 3302 3303 bitmap_clear_bits(ctl, entry, start, bytes); 3304 if (entry->bytes == 0) 3305 free_bitmap(ctl, entry); 3306 3307 spin_unlock(&ctl->tree_lock); 3308 trim_entry.start = start; 3309 trim_entry.bytes = bytes; 3310 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3311 mutex_unlock(&ctl->cache_writeout_mutex); 3312 3313 ret = do_trimming(block_group, total_trimmed, start, bytes, 3314 start, bytes, &trim_entry); 3315 if (ret) 3316 break; 3317 next: 3318 if (next_bitmap) { 3319 offset += BITS_PER_BITMAP * ctl->unit; 3320 } else { 3321 start += bytes; 3322 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3323 offset += BITS_PER_BITMAP * ctl->unit; 3324 } 3325 3326 if (fatal_signal_pending(current)) { 3327 ret = -ERESTARTSYS; 3328 break; 3329 } 3330 3331 cond_resched(); 3332 } 3333 3334 return ret; 3335 } 3336 3337 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3338 { 3339 atomic_inc(&cache->trimming); 3340 } 3341 3342 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3343 { 3344 struct btrfs_fs_info *fs_info = block_group->fs_info; 3345 struct extent_map_tree *em_tree; 3346 struct extent_map *em; 3347 bool cleanup; 3348 3349 spin_lock(&block_group->lock); 3350 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3351 block_group->removed); 3352 spin_unlock(&block_group->lock); 3353 3354 if (cleanup) { 3355 mutex_lock(&fs_info->chunk_mutex); 3356 em_tree = &fs_info->mapping_tree.map_tree; 3357 write_lock(&em_tree->lock); 3358 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3359 1); 3360 BUG_ON(!em); /* logic error, can't happen */ 3361 /* 3362 * remove_extent_mapping() will delete us from the pinned_chunks 3363 * list, which is protected by the chunk mutex. 3364 */ 3365 remove_extent_mapping(em_tree, em); 3366 write_unlock(&em_tree->lock); 3367 mutex_unlock(&fs_info->chunk_mutex); 3368 3369 /* once for us and once for the tree */ 3370 free_extent_map(em); 3371 free_extent_map(em); 3372 3373 /* 3374 * We've left one free space entry and other tasks trimming 3375 * this block group have left 1 entry each one. Free them. 3376 */ 3377 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3378 } 3379 } 3380 3381 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3382 u64 *trimmed, u64 start, u64 end, u64 minlen) 3383 { 3384 int ret; 3385 3386 *trimmed = 0; 3387 3388 spin_lock(&block_group->lock); 3389 if (block_group->removed) { 3390 spin_unlock(&block_group->lock); 3391 return 0; 3392 } 3393 btrfs_get_block_group_trimming(block_group); 3394 spin_unlock(&block_group->lock); 3395 3396 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3397 if (ret) 3398 goto out; 3399 3400 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3401 out: 3402 btrfs_put_block_group_trimming(block_group); 3403 return ret; 3404 } 3405 3406 /* 3407 * Find the left-most item in the cache tree, and then return the 3408 * smallest inode number in the item. 3409 * 3410 * Note: the returned inode number may not be the smallest one in 3411 * the tree, if the left-most item is a bitmap. 3412 */ 3413 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3414 { 3415 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3416 struct btrfs_free_space *entry = NULL; 3417 u64 ino = 0; 3418 3419 spin_lock(&ctl->tree_lock); 3420 3421 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3422 goto out; 3423 3424 entry = rb_entry(rb_first(&ctl->free_space_offset), 3425 struct btrfs_free_space, offset_index); 3426 3427 if (!entry->bitmap) { 3428 ino = entry->offset; 3429 3430 unlink_free_space(ctl, entry); 3431 entry->offset++; 3432 entry->bytes--; 3433 if (!entry->bytes) 3434 kmem_cache_free(btrfs_free_space_cachep, entry); 3435 else 3436 link_free_space(ctl, entry); 3437 } else { 3438 u64 offset = 0; 3439 u64 count = 1; 3440 int ret; 3441 3442 ret = search_bitmap(ctl, entry, &offset, &count, true); 3443 /* Logic error; Should be empty if it can't find anything */ 3444 ASSERT(!ret); 3445 3446 ino = offset; 3447 bitmap_clear_bits(ctl, entry, offset, 1); 3448 if (entry->bytes == 0) 3449 free_bitmap(ctl, entry); 3450 } 3451 out: 3452 spin_unlock(&ctl->tree_lock); 3453 3454 return ino; 3455 } 3456 3457 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3458 struct btrfs_path *path) 3459 { 3460 struct inode *inode = NULL; 3461 3462 spin_lock(&root->ino_cache_lock); 3463 if (root->ino_cache_inode) 3464 inode = igrab(root->ino_cache_inode); 3465 spin_unlock(&root->ino_cache_lock); 3466 if (inode) 3467 return inode; 3468 3469 inode = __lookup_free_space_inode(root, path, 0); 3470 if (IS_ERR(inode)) 3471 return inode; 3472 3473 spin_lock(&root->ino_cache_lock); 3474 if (!btrfs_fs_closing(root->fs_info)) 3475 root->ino_cache_inode = igrab(inode); 3476 spin_unlock(&root->ino_cache_lock); 3477 3478 return inode; 3479 } 3480 3481 int create_free_ino_inode(struct btrfs_root *root, 3482 struct btrfs_trans_handle *trans, 3483 struct btrfs_path *path) 3484 { 3485 return __create_free_space_inode(root, trans, path, 3486 BTRFS_FREE_INO_OBJECTID, 0); 3487 } 3488 3489 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3490 { 3491 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3492 struct btrfs_path *path; 3493 struct inode *inode; 3494 int ret = 0; 3495 u64 root_gen = btrfs_root_generation(&root->root_item); 3496 3497 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3498 return 0; 3499 3500 /* 3501 * If we're unmounting then just return, since this does a search on the 3502 * normal root and not the commit root and we could deadlock. 3503 */ 3504 if (btrfs_fs_closing(fs_info)) 3505 return 0; 3506 3507 path = btrfs_alloc_path(); 3508 if (!path) 3509 return 0; 3510 3511 inode = lookup_free_ino_inode(root, path); 3512 if (IS_ERR(inode)) 3513 goto out; 3514 3515 if (root_gen != BTRFS_I(inode)->generation) 3516 goto out_put; 3517 3518 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3519 3520 if (ret < 0) 3521 btrfs_err(fs_info, 3522 "failed to load free ino cache for root %llu", 3523 root->root_key.objectid); 3524 out_put: 3525 iput(inode); 3526 out: 3527 btrfs_free_path(path); 3528 return ret; 3529 } 3530 3531 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3532 struct btrfs_trans_handle *trans, 3533 struct btrfs_path *path, 3534 struct inode *inode) 3535 { 3536 struct btrfs_fs_info *fs_info = root->fs_info; 3537 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3538 int ret; 3539 struct btrfs_io_ctl io_ctl; 3540 bool release_metadata = true; 3541 3542 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3543 return 0; 3544 3545 memset(&io_ctl, 0, sizeof(io_ctl)); 3546 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3547 trans, path, 0); 3548 if (!ret) { 3549 /* 3550 * At this point writepages() didn't error out, so our metadata 3551 * reservation is released when the writeback finishes, at 3552 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3553 * with or without an error. 3554 */ 3555 release_metadata = false; 3556 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3557 } 3558 3559 if (ret) { 3560 if (release_metadata) 3561 btrfs_delalloc_release_metadata(inode, inode->i_size); 3562 #ifdef DEBUG 3563 btrfs_err(fs_info, 3564 "failed to write free ino cache for root %llu", 3565 root->root_key.objectid); 3566 #endif 3567 } 3568 3569 return ret; 3570 } 3571 3572 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3573 /* 3574 * Use this if you need to make a bitmap or extent entry specifically, it 3575 * doesn't do any of the merging that add_free_space does, this acts a lot like 3576 * how the free space cache loading stuff works, so you can get really weird 3577 * configurations. 3578 */ 3579 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3580 u64 offset, u64 bytes, bool bitmap) 3581 { 3582 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3583 struct btrfs_free_space *info = NULL, *bitmap_info; 3584 void *map = NULL; 3585 u64 bytes_added; 3586 int ret; 3587 3588 again: 3589 if (!info) { 3590 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3591 if (!info) 3592 return -ENOMEM; 3593 } 3594 3595 if (!bitmap) { 3596 spin_lock(&ctl->tree_lock); 3597 info->offset = offset; 3598 info->bytes = bytes; 3599 info->max_extent_size = 0; 3600 ret = link_free_space(ctl, info); 3601 spin_unlock(&ctl->tree_lock); 3602 if (ret) 3603 kmem_cache_free(btrfs_free_space_cachep, info); 3604 return ret; 3605 } 3606 3607 if (!map) { 3608 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3609 if (!map) { 3610 kmem_cache_free(btrfs_free_space_cachep, info); 3611 return -ENOMEM; 3612 } 3613 } 3614 3615 spin_lock(&ctl->tree_lock); 3616 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3617 1, 0); 3618 if (!bitmap_info) { 3619 info->bitmap = map; 3620 map = NULL; 3621 add_new_bitmap(ctl, info, offset); 3622 bitmap_info = info; 3623 info = NULL; 3624 } 3625 3626 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3627 3628 bytes -= bytes_added; 3629 offset += bytes_added; 3630 spin_unlock(&ctl->tree_lock); 3631 3632 if (bytes) 3633 goto again; 3634 3635 if (info) 3636 kmem_cache_free(btrfs_free_space_cachep, info); 3637 if (map) 3638 kfree(map); 3639 return 0; 3640 } 3641 3642 /* 3643 * Checks to see if the given range is in the free space cache. This is really 3644 * just used to check the absence of space, so if there is free space in the 3645 * range at all we will return 1. 3646 */ 3647 int test_check_exists(struct btrfs_block_group_cache *cache, 3648 u64 offset, u64 bytes) 3649 { 3650 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3651 struct btrfs_free_space *info; 3652 int ret = 0; 3653 3654 spin_lock(&ctl->tree_lock); 3655 info = tree_search_offset(ctl, offset, 0, 0); 3656 if (!info) { 3657 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3658 1, 0); 3659 if (!info) 3660 goto out; 3661 } 3662 3663 have_info: 3664 if (info->bitmap) { 3665 u64 bit_off, bit_bytes; 3666 struct rb_node *n; 3667 struct btrfs_free_space *tmp; 3668 3669 bit_off = offset; 3670 bit_bytes = ctl->unit; 3671 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3672 if (!ret) { 3673 if (bit_off == offset) { 3674 ret = 1; 3675 goto out; 3676 } else if (bit_off > offset && 3677 offset + bytes > bit_off) { 3678 ret = 1; 3679 goto out; 3680 } 3681 } 3682 3683 n = rb_prev(&info->offset_index); 3684 while (n) { 3685 tmp = rb_entry(n, struct btrfs_free_space, 3686 offset_index); 3687 if (tmp->offset + tmp->bytes < offset) 3688 break; 3689 if (offset + bytes < tmp->offset) { 3690 n = rb_prev(&tmp->offset_index); 3691 continue; 3692 } 3693 info = tmp; 3694 goto have_info; 3695 } 3696 3697 n = rb_next(&info->offset_index); 3698 while (n) { 3699 tmp = rb_entry(n, struct btrfs_free_space, 3700 offset_index); 3701 if (offset + bytes < tmp->offset) 3702 break; 3703 if (tmp->offset + tmp->bytes < offset) { 3704 n = rb_next(&tmp->offset_index); 3705 continue; 3706 } 3707 info = tmp; 3708 goto have_info; 3709 } 3710 3711 ret = 0; 3712 goto out; 3713 } 3714 3715 if (info->offset == offset) { 3716 ret = 1; 3717 goto out; 3718 } 3719 3720 if (offset > info->offset && offset < info->offset + info->bytes) 3721 ret = 1; 3722 out: 3723 spin_unlock(&ctl->tree_lock); 3724 return ret; 3725 } 3726 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3727