1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "volumes.h" 20 #include "space-info.h" 21 #include "delalloc-space.h" 22 #include "block-group.h" 23 #include "discard.h" 24 25 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 26 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 27 #define FORCE_EXTENT_THRESHOLD SZ_1M 28 29 struct btrfs_trim_range { 30 u64 start; 31 u64 bytes; 32 struct list_head list; 33 }; 34 35 static int link_free_space(struct btrfs_free_space_ctl *ctl, 36 struct btrfs_free_space *info); 37 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 38 struct btrfs_free_space *info); 39 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 40 struct btrfs_free_space *bitmap_info, u64 *offset, 41 u64 *bytes, bool for_alloc); 42 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 43 struct btrfs_free_space *bitmap_info); 44 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 45 struct btrfs_free_space *info, u64 offset, 46 u64 bytes); 47 48 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 49 struct btrfs_path *path, 50 u64 offset) 51 { 52 struct btrfs_fs_info *fs_info = root->fs_info; 53 struct btrfs_key key; 54 struct btrfs_key location; 55 struct btrfs_disk_key disk_key; 56 struct btrfs_free_space_header *header; 57 struct extent_buffer *leaf; 58 struct inode *inode = NULL; 59 unsigned nofs_flag; 60 int ret; 61 62 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 63 key.offset = offset; 64 key.type = 0; 65 66 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 67 if (ret < 0) 68 return ERR_PTR(ret); 69 if (ret > 0) { 70 btrfs_release_path(path); 71 return ERR_PTR(-ENOENT); 72 } 73 74 leaf = path->nodes[0]; 75 header = btrfs_item_ptr(leaf, path->slots[0], 76 struct btrfs_free_space_header); 77 btrfs_free_space_key(leaf, header, &disk_key); 78 btrfs_disk_key_to_cpu(&location, &disk_key); 79 btrfs_release_path(path); 80 81 /* 82 * We are often under a trans handle at this point, so we need to make 83 * sure NOFS is set to keep us from deadlocking. 84 */ 85 nofs_flag = memalloc_nofs_save(); 86 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 87 btrfs_release_path(path); 88 memalloc_nofs_restore(nofs_flag); 89 if (IS_ERR(inode)) 90 return inode; 91 92 mapping_set_gfp_mask(inode->i_mapping, 93 mapping_gfp_constraint(inode->i_mapping, 94 ~(__GFP_FS | __GFP_HIGHMEM))); 95 96 return inode; 97 } 98 99 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 100 struct btrfs_path *path) 101 { 102 struct btrfs_fs_info *fs_info = block_group->fs_info; 103 struct inode *inode = NULL; 104 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 105 106 spin_lock(&block_group->lock); 107 if (block_group->inode) 108 inode = igrab(block_group->inode); 109 spin_unlock(&block_group->lock); 110 if (inode) 111 return inode; 112 113 inode = __lookup_free_space_inode(fs_info->tree_root, path, 114 block_group->start); 115 if (IS_ERR(inode)) 116 return inode; 117 118 spin_lock(&block_group->lock); 119 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 120 btrfs_info(fs_info, "Old style space inode found, converting."); 121 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 122 BTRFS_INODE_NODATACOW; 123 block_group->disk_cache_state = BTRFS_DC_CLEAR; 124 } 125 126 if (!block_group->iref) { 127 block_group->inode = igrab(inode); 128 block_group->iref = 1; 129 } 130 spin_unlock(&block_group->lock); 131 132 return inode; 133 } 134 135 static int __create_free_space_inode(struct btrfs_root *root, 136 struct btrfs_trans_handle *trans, 137 struct btrfs_path *path, 138 u64 ino, u64 offset) 139 { 140 struct btrfs_key key; 141 struct btrfs_disk_key disk_key; 142 struct btrfs_free_space_header *header; 143 struct btrfs_inode_item *inode_item; 144 struct extent_buffer *leaf; 145 /* We inline CRCs for the free disk space cache */ 146 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | 147 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 148 int ret; 149 150 ret = btrfs_insert_empty_inode(trans, root, path, ino); 151 if (ret) 152 return ret; 153 154 leaf = path->nodes[0]; 155 inode_item = btrfs_item_ptr(leaf, path->slots[0], 156 struct btrfs_inode_item); 157 btrfs_item_key(leaf, &disk_key, path->slots[0]); 158 memzero_extent_buffer(leaf, (unsigned long)inode_item, 159 sizeof(*inode_item)); 160 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 161 btrfs_set_inode_size(leaf, inode_item, 0); 162 btrfs_set_inode_nbytes(leaf, inode_item, 0); 163 btrfs_set_inode_uid(leaf, inode_item, 0); 164 btrfs_set_inode_gid(leaf, inode_item, 0); 165 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 166 btrfs_set_inode_flags(leaf, inode_item, flags); 167 btrfs_set_inode_nlink(leaf, inode_item, 1); 168 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 169 btrfs_set_inode_block_group(leaf, inode_item, offset); 170 btrfs_mark_buffer_dirty(leaf); 171 btrfs_release_path(path); 172 173 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 174 key.offset = offset; 175 key.type = 0; 176 ret = btrfs_insert_empty_item(trans, root, path, &key, 177 sizeof(struct btrfs_free_space_header)); 178 if (ret < 0) { 179 btrfs_release_path(path); 180 return ret; 181 } 182 183 leaf = path->nodes[0]; 184 header = btrfs_item_ptr(leaf, path->slots[0], 185 struct btrfs_free_space_header); 186 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 187 btrfs_set_free_space_key(leaf, header, &disk_key); 188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_release_path(path); 190 191 return 0; 192 } 193 194 int create_free_space_inode(struct btrfs_trans_handle *trans, 195 struct btrfs_block_group *block_group, 196 struct btrfs_path *path) 197 { 198 int ret; 199 u64 ino; 200 201 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); 202 if (ret < 0) 203 return ret; 204 205 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 206 ino, block_group->start); 207 } 208 209 /* 210 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode 211 * handles lookup, otherwise it takes ownership and iputs the inode. 212 * Don't reuse an inode pointer after passing it into this function. 213 */ 214 int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, 215 struct inode *inode, 216 struct btrfs_block_group *block_group) 217 { 218 struct btrfs_path *path; 219 struct btrfs_key key; 220 int ret = 0; 221 222 path = btrfs_alloc_path(); 223 if (!path) 224 return -ENOMEM; 225 226 if (!inode) 227 inode = lookup_free_space_inode(block_group, path); 228 if (IS_ERR(inode)) { 229 if (PTR_ERR(inode) != -ENOENT) 230 ret = PTR_ERR(inode); 231 goto out; 232 } 233 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 234 if (ret) { 235 btrfs_add_delayed_iput(inode); 236 goto out; 237 } 238 clear_nlink(inode); 239 /* One for the block groups ref */ 240 spin_lock(&block_group->lock); 241 if (block_group->iref) { 242 block_group->iref = 0; 243 block_group->inode = NULL; 244 spin_unlock(&block_group->lock); 245 iput(inode); 246 } else { 247 spin_unlock(&block_group->lock); 248 } 249 /* One for the lookup ref */ 250 btrfs_add_delayed_iput(inode); 251 252 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 253 key.type = 0; 254 key.offset = block_group->start; 255 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, 256 -1, 1); 257 if (ret) { 258 if (ret > 0) 259 ret = 0; 260 goto out; 261 } 262 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 263 out: 264 btrfs_free_path(path); 265 return ret; 266 } 267 268 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 269 struct btrfs_block_rsv *rsv) 270 { 271 u64 needed_bytes; 272 int ret; 273 274 /* 1 for slack space, 1 for updating the inode */ 275 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 276 btrfs_calc_metadata_size(fs_info, 1); 277 278 spin_lock(&rsv->lock); 279 if (rsv->reserved < needed_bytes) 280 ret = -ENOSPC; 281 else 282 ret = 0; 283 spin_unlock(&rsv->lock); 284 return ret; 285 } 286 287 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 288 struct btrfs_block_group *block_group, 289 struct inode *inode) 290 { 291 struct btrfs_root *root = BTRFS_I(inode)->root; 292 int ret = 0; 293 bool locked = false; 294 295 if (block_group) { 296 struct btrfs_path *path = btrfs_alloc_path(); 297 298 if (!path) { 299 ret = -ENOMEM; 300 goto fail; 301 } 302 locked = true; 303 mutex_lock(&trans->transaction->cache_write_mutex); 304 if (!list_empty(&block_group->io_list)) { 305 list_del_init(&block_group->io_list); 306 307 btrfs_wait_cache_io(trans, block_group, path); 308 btrfs_put_block_group(block_group); 309 } 310 311 /* 312 * now that we've truncated the cache away, its no longer 313 * setup or written 314 */ 315 spin_lock(&block_group->lock); 316 block_group->disk_cache_state = BTRFS_DC_CLEAR; 317 spin_unlock(&block_group->lock); 318 btrfs_free_path(path); 319 } 320 321 btrfs_i_size_write(BTRFS_I(inode), 0); 322 truncate_pagecache(inode, 0); 323 324 /* 325 * We skip the throttling logic for free space cache inodes, so we don't 326 * need to check for -EAGAIN. 327 */ 328 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode), 329 0, BTRFS_EXTENT_DATA_KEY); 330 if (ret) 331 goto fail; 332 333 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 334 335 fail: 336 if (locked) 337 mutex_unlock(&trans->transaction->cache_write_mutex); 338 if (ret) 339 btrfs_abort_transaction(trans, ret); 340 341 return ret; 342 } 343 344 static void readahead_cache(struct inode *inode) 345 { 346 struct file_ra_state *ra; 347 unsigned long last_index; 348 349 ra = kzalloc(sizeof(*ra), GFP_NOFS); 350 if (!ra) 351 return; 352 353 file_ra_state_init(ra, inode->i_mapping); 354 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 355 356 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 357 358 kfree(ra); 359 } 360 361 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 362 int write) 363 { 364 int num_pages; 365 366 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 367 368 /* Make sure we can fit our crcs and generation into the first page */ 369 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 370 return -ENOSPC; 371 372 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 373 374 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 375 if (!io_ctl->pages) 376 return -ENOMEM; 377 378 io_ctl->num_pages = num_pages; 379 io_ctl->fs_info = btrfs_sb(inode->i_sb); 380 io_ctl->inode = inode; 381 382 return 0; 383 } 384 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 385 386 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 387 { 388 kfree(io_ctl->pages); 389 io_ctl->pages = NULL; 390 } 391 392 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 393 { 394 if (io_ctl->cur) { 395 io_ctl->cur = NULL; 396 io_ctl->orig = NULL; 397 } 398 } 399 400 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 401 { 402 ASSERT(io_ctl->index < io_ctl->num_pages); 403 io_ctl->page = io_ctl->pages[io_ctl->index++]; 404 io_ctl->cur = page_address(io_ctl->page); 405 io_ctl->orig = io_ctl->cur; 406 io_ctl->size = PAGE_SIZE; 407 if (clear) 408 clear_page(io_ctl->cur); 409 } 410 411 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 412 { 413 int i; 414 415 io_ctl_unmap_page(io_ctl); 416 417 for (i = 0; i < io_ctl->num_pages; i++) { 418 if (io_ctl->pages[i]) { 419 ClearPageChecked(io_ctl->pages[i]); 420 unlock_page(io_ctl->pages[i]); 421 put_page(io_ctl->pages[i]); 422 } 423 } 424 } 425 426 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 427 { 428 struct page *page; 429 struct inode *inode = io_ctl->inode; 430 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 431 int i; 432 433 for (i = 0; i < io_ctl->num_pages; i++) { 434 int ret; 435 436 page = find_or_create_page(inode->i_mapping, i, mask); 437 if (!page) { 438 io_ctl_drop_pages(io_ctl); 439 return -ENOMEM; 440 } 441 442 ret = set_page_extent_mapped(page); 443 if (ret < 0) { 444 unlock_page(page); 445 put_page(page); 446 io_ctl_drop_pages(io_ctl); 447 return ret; 448 } 449 450 io_ctl->pages[i] = page; 451 if (uptodate && !PageUptodate(page)) { 452 btrfs_readpage(NULL, page); 453 lock_page(page); 454 if (page->mapping != inode->i_mapping) { 455 btrfs_err(BTRFS_I(inode)->root->fs_info, 456 "free space cache page truncated"); 457 io_ctl_drop_pages(io_ctl); 458 return -EIO; 459 } 460 if (!PageUptodate(page)) { 461 btrfs_err(BTRFS_I(inode)->root->fs_info, 462 "error reading free space cache"); 463 io_ctl_drop_pages(io_ctl); 464 return -EIO; 465 } 466 } 467 } 468 469 for (i = 0; i < io_ctl->num_pages; i++) 470 clear_page_dirty_for_io(io_ctl->pages[i]); 471 472 return 0; 473 } 474 475 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 476 { 477 io_ctl_map_page(io_ctl, 1); 478 479 /* 480 * Skip the csum areas. If we don't check crcs then we just have a 481 * 64bit chunk at the front of the first page. 482 */ 483 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 484 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 485 486 put_unaligned_le64(generation, io_ctl->cur); 487 io_ctl->cur += sizeof(u64); 488 } 489 490 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 491 { 492 u64 cache_gen; 493 494 /* 495 * Skip the crc area. If we don't check crcs then we just have a 64bit 496 * chunk at the front of the first page. 497 */ 498 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 499 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 500 501 cache_gen = get_unaligned_le64(io_ctl->cur); 502 if (cache_gen != generation) { 503 btrfs_err_rl(io_ctl->fs_info, 504 "space cache generation (%llu) does not match inode (%llu)", 505 cache_gen, generation); 506 io_ctl_unmap_page(io_ctl); 507 return -EIO; 508 } 509 io_ctl->cur += sizeof(u64); 510 return 0; 511 } 512 513 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 514 { 515 u32 *tmp; 516 u32 crc = ~(u32)0; 517 unsigned offset = 0; 518 519 if (index == 0) 520 offset = sizeof(u32) * io_ctl->num_pages; 521 522 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 523 btrfs_crc32c_final(crc, (u8 *)&crc); 524 io_ctl_unmap_page(io_ctl); 525 tmp = page_address(io_ctl->pages[0]); 526 tmp += index; 527 *tmp = crc; 528 } 529 530 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 531 { 532 u32 *tmp, val; 533 u32 crc = ~(u32)0; 534 unsigned offset = 0; 535 536 if (index == 0) 537 offset = sizeof(u32) * io_ctl->num_pages; 538 539 tmp = page_address(io_ctl->pages[0]); 540 tmp += index; 541 val = *tmp; 542 543 io_ctl_map_page(io_ctl, 0); 544 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 545 btrfs_crc32c_final(crc, (u8 *)&crc); 546 if (val != crc) { 547 btrfs_err_rl(io_ctl->fs_info, 548 "csum mismatch on free space cache"); 549 io_ctl_unmap_page(io_ctl); 550 return -EIO; 551 } 552 553 return 0; 554 } 555 556 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 557 void *bitmap) 558 { 559 struct btrfs_free_space_entry *entry; 560 561 if (!io_ctl->cur) 562 return -ENOSPC; 563 564 entry = io_ctl->cur; 565 put_unaligned_le64(offset, &entry->offset); 566 put_unaligned_le64(bytes, &entry->bytes); 567 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 568 BTRFS_FREE_SPACE_EXTENT; 569 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 570 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 571 572 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 573 return 0; 574 575 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 576 577 /* No more pages to map */ 578 if (io_ctl->index >= io_ctl->num_pages) 579 return 0; 580 581 /* map the next page */ 582 io_ctl_map_page(io_ctl, 1); 583 return 0; 584 } 585 586 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 587 { 588 if (!io_ctl->cur) 589 return -ENOSPC; 590 591 /* 592 * If we aren't at the start of the current page, unmap this one and 593 * map the next one if there is any left. 594 */ 595 if (io_ctl->cur != io_ctl->orig) { 596 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 597 if (io_ctl->index >= io_ctl->num_pages) 598 return -ENOSPC; 599 io_ctl_map_page(io_ctl, 0); 600 } 601 602 copy_page(io_ctl->cur, bitmap); 603 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 604 if (io_ctl->index < io_ctl->num_pages) 605 io_ctl_map_page(io_ctl, 0); 606 return 0; 607 } 608 609 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 610 { 611 /* 612 * If we're not on the boundary we know we've modified the page and we 613 * need to crc the page. 614 */ 615 if (io_ctl->cur != io_ctl->orig) 616 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 617 else 618 io_ctl_unmap_page(io_ctl); 619 620 while (io_ctl->index < io_ctl->num_pages) { 621 io_ctl_map_page(io_ctl, 1); 622 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 623 } 624 } 625 626 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 627 struct btrfs_free_space *entry, u8 *type) 628 { 629 struct btrfs_free_space_entry *e; 630 int ret; 631 632 if (!io_ctl->cur) { 633 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 634 if (ret) 635 return ret; 636 } 637 638 e = io_ctl->cur; 639 entry->offset = get_unaligned_le64(&e->offset); 640 entry->bytes = get_unaligned_le64(&e->bytes); 641 *type = e->type; 642 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 643 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 644 645 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 646 return 0; 647 648 io_ctl_unmap_page(io_ctl); 649 650 return 0; 651 } 652 653 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 654 struct btrfs_free_space *entry) 655 { 656 int ret; 657 658 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 659 if (ret) 660 return ret; 661 662 copy_page(entry->bitmap, io_ctl->cur); 663 io_ctl_unmap_page(io_ctl); 664 665 return 0; 666 } 667 668 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 669 { 670 struct btrfs_block_group *block_group = ctl->private; 671 u64 max_bytes; 672 u64 bitmap_bytes; 673 u64 extent_bytes; 674 u64 size = block_group->length; 675 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 676 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 677 678 max_bitmaps = max_t(u64, max_bitmaps, 1); 679 680 ASSERT(ctl->total_bitmaps <= max_bitmaps); 681 682 /* 683 * We are trying to keep the total amount of memory used per 1GiB of 684 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 685 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 686 * bitmaps, we may end up using more memory than this. 687 */ 688 if (size < SZ_1G) 689 max_bytes = MAX_CACHE_BYTES_PER_GIG; 690 else 691 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 692 693 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 694 695 /* 696 * we want the extent entry threshold to always be at most 1/2 the max 697 * bytes we can have, or whatever is less than that. 698 */ 699 extent_bytes = max_bytes - bitmap_bytes; 700 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 701 702 ctl->extents_thresh = 703 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 704 } 705 706 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 707 struct btrfs_free_space_ctl *ctl, 708 struct btrfs_path *path, u64 offset) 709 { 710 struct btrfs_fs_info *fs_info = root->fs_info; 711 struct btrfs_free_space_header *header; 712 struct extent_buffer *leaf; 713 struct btrfs_io_ctl io_ctl; 714 struct btrfs_key key; 715 struct btrfs_free_space *e, *n; 716 LIST_HEAD(bitmaps); 717 u64 num_entries; 718 u64 num_bitmaps; 719 u64 generation; 720 u8 type; 721 int ret = 0; 722 723 /* Nothing in the space cache, goodbye */ 724 if (!i_size_read(inode)) 725 return 0; 726 727 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 728 key.offset = offset; 729 key.type = 0; 730 731 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 732 if (ret < 0) 733 return 0; 734 else if (ret > 0) { 735 btrfs_release_path(path); 736 return 0; 737 } 738 739 ret = -1; 740 741 leaf = path->nodes[0]; 742 header = btrfs_item_ptr(leaf, path->slots[0], 743 struct btrfs_free_space_header); 744 num_entries = btrfs_free_space_entries(leaf, header); 745 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 746 generation = btrfs_free_space_generation(leaf, header); 747 btrfs_release_path(path); 748 749 if (!BTRFS_I(inode)->generation) { 750 btrfs_info(fs_info, 751 "the free space cache file (%llu) is invalid, skip it", 752 offset); 753 return 0; 754 } 755 756 if (BTRFS_I(inode)->generation != generation) { 757 btrfs_err(fs_info, 758 "free space inode generation (%llu) did not match free space cache generation (%llu)", 759 BTRFS_I(inode)->generation, generation); 760 return 0; 761 } 762 763 if (!num_entries) 764 return 0; 765 766 ret = io_ctl_init(&io_ctl, inode, 0); 767 if (ret) 768 return ret; 769 770 readahead_cache(inode); 771 772 ret = io_ctl_prepare_pages(&io_ctl, true); 773 if (ret) 774 goto out; 775 776 ret = io_ctl_check_crc(&io_ctl, 0); 777 if (ret) 778 goto free_cache; 779 780 ret = io_ctl_check_generation(&io_ctl, generation); 781 if (ret) 782 goto free_cache; 783 784 while (num_entries) { 785 e = kmem_cache_zalloc(btrfs_free_space_cachep, 786 GFP_NOFS); 787 if (!e) { 788 ret = -ENOMEM; 789 goto free_cache; 790 } 791 792 ret = io_ctl_read_entry(&io_ctl, e, &type); 793 if (ret) { 794 kmem_cache_free(btrfs_free_space_cachep, e); 795 goto free_cache; 796 } 797 798 if (!e->bytes) { 799 ret = -1; 800 kmem_cache_free(btrfs_free_space_cachep, e); 801 goto free_cache; 802 } 803 804 if (type == BTRFS_FREE_SPACE_EXTENT) { 805 spin_lock(&ctl->tree_lock); 806 ret = link_free_space(ctl, e); 807 spin_unlock(&ctl->tree_lock); 808 if (ret) { 809 btrfs_err(fs_info, 810 "Duplicate entries in free space cache, dumping"); 811 kmem_cache_free(btrfs_free_space_cachep, e); 812 goto free_cache; 813 } 814 } else { 815 ASSERT(num_bitmaps); 816 num_bitmaps--; 817 e->bitmap = kmem_cache_zalloc( 818 btrfs_free_space_bitmap_cachep, GFP_NOFS); 819 if (!e->bitmap) { 820 ret = -ENOMEM; 821 kmem_cache_free( 822 btrfs_free_space_cachep, e); 823 goto free_cache; 824 } 825 spin_lock(&ctl->tree_lock); 826 ret = link_free_space(ctl, e); 827 ctl->total_bitmaps++; 828 recalculate_thresholds(ctl); 829 spin_unlock(&ctl->tree_lock); 830 if (ret) { 831 btrfs_err(fs_info, 832 "Duplicate entries in free space cache, dumping"); 833 kmem_cache_free(btrfs_free_space_cachep, e); 834 goto free_cache; 835 } 836 list_add_tail(&e->list, &bitmaps); 837 } 838 839 num_entries--; 840 } 841 842 io_ctl_unmap_page(&io_ctl); 843 844 /* 845 * We add the bitmaps at the end of the entries in order that 846 * the bitmap entries are added to the cache. 847 */ 848 list_for_each_entry_safe(e, n, &bitmaps, list) { 849 list_del_init(&e->list); 850 ret = io_ctl_read_bitmap(&io_ctl, e); 851 if (ret) 852 goto free_cache; 853 } 854 855 io_ctl_drop_pages(&io_ctl); 856 ret = 1; 857 out: 858 io_ctl_free(&io_ctl); 859 return ret; 860 free_cache: 861 io_ctl_drop_pages(&io_ctl); 862 __btrfs_remove_free_space_cache(ctl); 863 goto out; 864 } 865 866 static int copy_free_space_cache(struct btrfs_block_group *block_group, 867 struct btrfs_free_space_ctl *ctl) 868 { 869 struct btrfs_free_space *info; 870 struct rb_node *n; 871 int ret = 0; 872 873 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) { 874 info = rb_entry(n, struct btrfs_free_space, offset_index); 875 if (!info->bitmap) { 876 unlink_free_space(ctl, info); 877 ret = btrfs_add_free_space(block_group, info->offset, 878 info->bytes); 879 kmem_cache_free(btrfs_free_space_cachep, info); 880 } else { 881 u64 offset = info->offset; 882 u64 bytes = ctl->unit; 883 884 while (search_bitmap(ctl, info, &offset, &bytes, 885 false) == 0) { 886 ret = btrfs_add_free_space(block_group, offset, 887 bytes); 888 if (ret) 889 break; 890 bitmap_clear_bits(ctl, info, offset, bytes); 891 offset = info->offset; 892 bytes = ctl->unit; 893 } 894 free_bitmap(ctl, info); 895 } 896 cond_resched(); 897 } 898 return ret; 899 } 900 901 int load_free_space_cache(struct btrfs_block_group *block_group) 902 { 903 struct btrfs_fs_info *fs_info = block_group->fs_info; 904 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 905 struct btrfs_free_space_ctl tmp_ctl = {}; 906 struct inode *inode; 907 struct btrfs_path *path; 908 int ret = 0; 909 bool matched; 910 u64 used = block_group->used; 911 912 /* 913 * Because we could potentially discard our loaded free space, we want 914 * to load everything into a temporary structure first, and then if it's 915 * valid copy it all into the actual free space ctl. 916 */ 917 btrfs_init_free_space_ctl(block_group, &tmp_ctl); 918 919 /* 920 * If this block group has been marked to be cleared for one reason or 921 * another then we can't trust the on disk cache, so just return. 922 */ 923 spin_lock(&block_group->lock); 924 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 925 spin_unlock(&block_group->lock); 926 return 0; 927 } 928 spin_unlock(&block_group->lock); 929 930 path = btrfs_alloc_path(); 931 if (!path) 932 return 0; 933 path->search_commit_root = 1; 934 path->skip_locking = 1; 935 936 /* 937 * We must pass a path with search_commit_root set to btrfs_iget in 938 * order to avoid a deadlock when allocating extents for the tree root. 939 * 940 * When we are COWing an extent buffer from the tree root, when looking 941 * for a free extent, at extent-tree.c:find_free_extent(), we can find 942 * block group without its free space cache loaded. When we find one 943 * we must load its space cache which requires reading its free space 944 * cache's inode item from the root tree. If this inode item is located 945 * in the same leaf that we started COWing before, then we end up in 946 * deadlock on the extent buffer (trying to read lock it when we 947 * previously write locked it). 948 * 949 * It's safe to read the inode item using the commit root because 950 * block groups, once loaded, stay in memory forever (until they are 951 * removed) as well as their space caches once loaded. New block groups 952 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 953 * we will never try to read their inode item while the fs is mounted. 954 */ 955 inode = lookup_free_space_inode(block_group, path); 956 if (IS_ERR(inode)) { 957 btrfs_free_path(path); 958 return 0; 959 } 960 961 /* We may have converted the inode and made the cache invalid. */ 962 spin_lock(&block_group->lock); 963 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 964 spin_unlock(&block_group->lock); 965 btrfs_free_path(path); 966 goto out; 967 } 968 spin_unlock(&block_group->lock); 969 970 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, 971 path, block_group->start); 972 btrfs_free_path(path); 973 if (ret <= 0) 974 goto out; 975 976 matched = (tmp_ctl.free_space == (block_group->length - used - 977 block_group->bytes_super)); 978 979 if (matched) { 980 ret = copy_free_space_cache(block_group, &tmp_ctl); 981 /* 982 * ret == 1 means we successfully loaded the free space cache, 983 * so we need to re-set it here. 984 */ 985 if (ret == 0) 986 ret = 1; 987 } else { 988 __btrfs_remove_free_space_cache(&tmp_ctl); 989 btrfs_warn(fs_info, 990 "block group %llu has wrong amount of free space", 991 block_group->start); 992 ret = -1; 993 } 994 out: 995 if (ret < 0) { 996 /* This cache is bogus, make sure it gets cleared */ 997 spin_lock(&block_group->lock); 998 block_group->disk_cache_state = BTRFS_DC_CLEAR; 999 spin_unlock(&block_group->lock); 1000 ret = 0; 1001 1002 btrfs_warn(fs_info, 1003 "failed to load free space cache for block group %llu, rebuilding it now", 1004 block_group->start); 1005 } 1006 1007 spin_lock(&ctl->tree_lock); 1008 btrfs_discard_update_discardable(block_group); 1009 spin_unlock(&ctl->tree_lock); 1010 iput(inode); 1011 return ret; 1012 } 1013 1014 static noinline_for_stack 1015 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 1016 struct btrfs_free_space_ctl *ctl, 1017 struct btrfs_block_group *block_group, 1018 int *entries, int *bitmaps, 1019 struct list_head *bitmap_list) 1020 { 1021 int ret; 1022 struct btrfs_free_cluster *cluster = NULL; 1023 struct btrfs_free_cluster *cluster_locked = NULL; 1024 struct rb_node *node = rb_first(&ctl->free_space_offset); 1025 struct btrfs_trim_range *trim_entry; 1026 1027 /* Get the cluster for this block_group if it exists */ 1028 if (block_group && !list_empty(&block_group->cluster_list)) { 1029 cluster = list_entry(block_group->cluster_list.next, 1030 struct btrfs_free_cluster, 1031 block_group_list); 1032 } 1033 1034 if (!node && cluster) { 1035 cluster_locked = cluster; 1036 spin_lock(&cluster_locked->lock); 1037 node = rb_first(&cluster->root); 1038 cluster = NULL; 1039 } 1040 1041 /* Write out the extent entries */ 1042 while (node) { 1043 struct btrfs_free_space *e; 1044 1045 e = rb_entry(node, struct btrfs_free_space, offset_index); 1046 *entries += 1; 1047 1048 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 1049 e->bitmap); 1050 if (ret) 1051 goto fail; 1052 1053 if (e->bitmap) { 1054 list_add_tail(&e->list, bitmap_list); 1055 *bitmaps += 1; 1056 } 1057 node = rb_next(node); 1058 if (!node && cluster) { 1059 node = rb_first(&cluster->root); 1060 cluster_locked = cluster; 1061 spin_lock(&cluster_locked->lock); 1062 cluster = NULL; 1063 } 1064 } 1065 if (cluster_locked) { 1066 spin_unlock(&cluster_locked->lock); 1067 cluster_locked = NULL; 1068 } 1069 1070 /* 1071 * Make sure we don't miss any range that was removed from our rbtree 1072 * because trimming is running. Otherwise after a umount+mount (or crash 1073 * after committing the transaction) we would leak free space and get 1074 * an inconsistent free space cache report from fsck. 1075 */ 1076 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1077 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1078 trim_entry->bytes, NULL); 1079 if (ret) 1080 goto fail; 1081 *entries += 1; 1082 } 1083 1084 return 0; 1085 fail: 1086 if (cluster_locked) 1087 spin_unlock(&cluster_locked->lock); 1088 return -ENOSPC; 1089 } 1090 1091 static noinline_for_stack int 1092 update_cache_item(struct btrfs_trans_handle *trans, 1093 struct btrfs_root *root, 1094 struct inode *inode, 1095 struct btrfs_path *path, u64 offset, 1096 int entries, int bitmaps) 1097 { 1098 struct btrfs_key key; 1099 struct btrfs_free_space_header *header; 1100 struct extent_buffer *leaf; 1101 int ret; 1102 1103 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1104 key.offset = offset; 1105 key.type = 0; 1106 1107 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1108 if (ret < 0) { 1109 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1110 EXTENT_DELALLOC, 0, 0, NULL); 1111 goto fail; 1112 } 1113 leaf = path->nodes[0]; 1114 if (ret > 0) { 1115 struct btrfs_key found_key; 1116 ASSERT(path->slots[0]); 1117 path->slots[0]--; 1118 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1119 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1120 found_key.offset != offset) { 1121 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1122 inode->i_size - 1, EXTENT_DELALLOC, 0, 1123 0, NULL); 1124 btrfs_release_path(path); 1125 goto fail; 1126 } 1127 } 1128 1129 BTRFS_I(inode)->generation = trans->transid; 1130 header = btrfs_item_ptr(leaf, path->slots[0], 1131 struct btrfs_free_space_header); 1132 btrfs_set_free_space_entries(leaf, header, entries); 1133 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1134 btrfs_set_free_space_generation(leaf, header, trans->transid); 1135 btrfs_mark_buffer_dirty(leaf); 1136 btrfs_release_path(path); 1137 1138 return 0; 1139 1140 fail: 1141 return -1; 1142 } 1143 1144 static noinline_for_stack int write_pinned_extent_entries( 1145 struct btrfs_trans_handle *trans, 1146 struct btrfs_block_group *block_group, 1147 struct btrfs_io_ctl *io_ctl, 1148 int *entries) 1149 { 1150 u64 start, extent_start, extent_end, len; 1151 struct extent_io_tree *unpin = NULL; 1152 int ret; 1153 1154 if (!block_group) 1155 return 0; 1156 1157 /* 1158 * We want to add any pinned extents to our free space cache 1159 * so we don't leak the space 1160 * 1161 * We shouldn't have switched the pinned extents yet so this is the 1162 * right one 1163 */ 1164 unpin = &trans->transaction->pinned_extents; 1165 1166 start = block_group->start; 1167 1168 while (start < block_group->start + block_group->length) { 1169 ret = find_first_extent_bit(unpin, start, 1170 &extent_start, &extent_end, 1171 EXTENT_DIRTY, NULL); 1172 if (ret) 1173 return 0; 1174 1175 /* This pinned extent is out of our range */ 1176 if (extent_start >= block_group->start + block_group->length) 1177 return 0; 1178 1179 extent_start = max(extent_start, start); 1180 extent_end = min(block_group->start + block_group->length, 1181 extent_end + 1); 1182 len = extent_end - extent_start; 1183 1184 *entries += 1; 1185 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1186 if (ret) 1187 return -ENOSPC; 1188 1189 start = extent_end; 1190 } 1191 1192 return 0; 1193 } 1194 1195 static noinline_for_stack int 1196 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1197 { 1198 struct btrfs_free_space *entry, *next; 1199 int ret; 1200 1201 /* Write out the bitmaps */ 1202 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1203 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1204 if (ret) 1205 return -ENOSPC; 1206 list_del_init(&entry->list); 1207 } 1208 1209 return 0; 1210 } 1211 1212 static int flush_dirty_cache(struct inode *inode) 1213 { 1214 int ret; 1215 1216 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1217 if (ret) 1218 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1219 EXTENT_DELALLOC, 0, 0, NULL); 1220 1221 return ret; 1222 } 1223 1224 static void noinline_for_stack 1225 cleanup_bitmap_list(struct list_head *bitmap_list) 1226 { 1227 struct btrfs_free_space *entry, *next; 1228 1229 list_for_each_entry_safe(entry, next, bitmap_list, list) 1230 list_del_init(&entry->list); 1231 } 1232 1233 static void noinline_for_stack 1234 cleanup_write_cache_enospc(struct inode *inode, 1235 struct btrfs_io_ctl *io_ctl, 1236 struct extent_state **cached_state) 1237 { 1238 io_ctl_drop_pages(io_ctl); 1239 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1240 i_size_read(inode) - 1, cached_state); 1241 } 1242 1243 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1244 struct btrfs_trans_handle *trans, 1245 struct btrfs_block_group *block_group, 1246 struct btrfs_io_ctl *io_ctl, 1247 struct btrfs_path *path, u64 offset) 1248 { 1249 int ret; 1250 struct inode *inode = io_ctl->inode; 1251 1252 if (!inode) 1253 return 0; 1254 1255 /* Flush the dirty pages in the cache file. */ 1256 ret = flush_dirty_cache(inode); 1257 if (ret) 1258 goto out; 1259 1260 /* Update the cache item to tell everyone this cache file is valid. */ 1261 ret = update_cache_item(trans, root, inode, path, offset, 1262 io_ctl->entries, io_ctl->bitmaps); 1263 out: 1264 if (ret) { 1265 invalidate_inode_pages2(inode->i_mapping); 1266 BTRFS_I(inode)->generation = 0; 1267 if (block_group) 1268 btrfs_debug(root->fs_info, 1269 "failed to write free space cache for block group %llu error %d", 1270 block_group->start, ret); 1271 } 1272 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1273 1274 if (block_group) { 1275 /* the dirty list is protected by the dirty_bgs_lock */ 1276 spin_lock(&trans->transaction->dirty_bgs_lock); 1277 1278 /* the disk_cache_state is protected by the block group lock */ 1279 spin_lock(&block_group->lock); 1280 1281 /* 1282 * only mark this as written if we didn't get put back on 1283 * the dirty list while waiting for IO. Otherwise our 1284 * cache state won't be right, and we won't get written again 1285 */ 1286 if (!ret && list_empty(&block_group->dirty_list)) 1287 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1288 else if (ret) 1289 block_group->disk_cache_state = BTRFS_DC_ERROR; 1290 1291 spin_unlock(&block_group->lock); 1292 spin_unlock(&trans->transaction->dirty_bgs_lock); 1293 io_ctl->inode = NULL; 1294 iput(inode); 1295 } 1296 1297 return ret; 1298 1299 } 1300 1301 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1302 struct btrfs_block_group *block_group, 1303 struct btrfs_path *path) 1304 { 1305 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1306 block_group, &block_group->io_ctl, 1307 path, block_group->start); 1308 } 1309 1310 /** 1311 * Write out cached info to an inode 1312 * 1313 * @root: root the inode belongs to 1314 * @inode: freespace inode we are writing out 1315 * @ctl: free space cache we are going to write out 1316 * @block_group: block_group for this cache if it belongs to a block_group 1317 * @io_ctl: holds context for the io 1318 * @trans: the trans handle 1319 * 1320 * This function writes out a free space cache struct to disk for quick recovery 1321 * on mount. This will return 0 if it was successful in writing the cache out, 1322 * or an errno if it was not. 1323 */ 1324 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1325 struct btrfs_free_space_ctl *ctl, 1326 struct btrfs_block_group *block_group, 1327 struct btrfs_io_ctl *io_ctl, 1328 struct btrfs_trans_handle *trans) 1329 { 1330 struct extent_state *cached_state = NULL; 1331 LIST_HEAD(bitmap_list); 1332 int entries = 0; 1333 int bitmaps = 0; 1334 int ret; 1335 int must_iput = 0; 1336 1337 if (!i_size_read(inode)) 1338 return -EIO; 1339 1340 WARN_ON(io_ctl->pages); 1341 ret = io_ctl_init(io_ctl, inode, 1); 1342 if (ret) 1343 return ret; 1344 1345 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1346 down_write(&block_group->data_rwsem); 1347 spin_lock(&block_group->lock); 1348 if (block_group->delalloc_bytes) { 1349 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1350 spin_unlock(&block_group->lock); 1351 up_write(&block_group->data_rwsem); 1352 BTRFS_I(inode)->generation = 0; 1353 ret = 0; 1354 must_iput = 1; 1355 goto out; 1356 } 1357 spin_unlock(&block_group->lock); 1358 } 1359 1360 /* Lock all pages first so we can lock the extent safely. */ 1361 ret = io_ctl_prepare_pages(io_ctl, false); 1362 if (ret) 1363 goto out_unlock; 1364 1365 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1366 &cached_state); 1367 1368 io_ctl_set_generation(io_ctl, trans->transid); 1369 1370 mutex_lock(&ctl->cache_writeout_mutex); 1371 /* Write out the extent entries in the free space cache */ 1372 spin_lock(&ctl->tree_lock); 1373 ret = write_cache_extent_entries(io_ctl, ctl, 1374 block_group, &entries, &bitmaps, 1375 &bitmap_list); 1376 if (ret) 1377 goto out_nospc_locked; 1378 1379 /* 1380 * Some spaces that are freed in the current transaction are pinned, 1381 * they will be added into free space cache after the transaction is 1382 * committed, we shouldn't lose them. 1383 * 1384 * If this changes while we are working we'll get added back to 1385 * the dirty list and redo it. No locking needed 1386 */ 1387 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1388 if (ret) 1389 goto out_nospc_locked; 1390 1391 /* 1392 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1393 * locked while doing it because a concurrent trim can be manipulating 1394 * or freeing the bitmap. 1395 */ 1396 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1397 spin_unlock(&ctl->tree_lock); 1398 mutex_unlock(&ctl->cache_writeout_mutex); 1399 if (ret) 1400 goto out_nospc; 1401 1402 /* Zero out the rest of the pages just to make sure */ 1403 io_ctl_zero_remaining_pages(io_ctl); 1404 1405 /* Everything is written out, now we dirty the pages in the file. */ 1406 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1407 io_ctl->num_pages, 0, i_size_read(inode), 1408 &cached_state, false); 1409 if (ret) 1410 goto out_nospc; 1411 1412 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1413 up_write(&block_group->data_rwsem); 1414 /* 1415 * Release the pages and unlock the extent, we will flush 1416 * them out later 1417 */ 1418 io_ctl_drop_pages(io_ctl); 1419 io_ctl_free(io_ctl); 1420 1421 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1422 i_size_read(inode) - 1, &cached_state); 1423 1424 /* 1425 * at this point the pages are under IO and we're happy, 1426 * The caller is responsible for waiting on them and updating 1427 * the cache and the inode 1428 */ 1429 io_ctl->entries = entries; 1430 io_ctl->bitmaps = bitmaps; 1431 1432 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1433 if (ret) 1434 goto out; 1435 1436 return 0; 1437 1438 out_nospc_locked: 1439 cleanup_bitmap_list(&bitmap_list); 1440 spin_unlock(&ctl->tree_lock); 1441 mutex_unlock(&ctl->cache_writeout_mutex); 1442 1443 out_nospc: 1444 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1445 1446 out_unlock: 1447 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1448 up_write(&block_group->data_rwsem); 1449 1450 out: 1451 io_ctl->inode = NULL; 1452 io_ctl_free(io_ctl); 1453 if (ret) { 1454 invalidate_inode_pages2(inode->i_mapping); 1455 BTRFS_I(inode)->generation = 0; 1456 } 1457 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1458 if (must_iput) 1459 iput(inode); 1460 return ret; 1461 } 1462 1463 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1464 struct btrfs_block_group *block_group, 1465 struct btrfs_path *path) 1466 { 1467 struct btrfs_fs_info *fs_info = trans->fs_info; 1468 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1469 struct inode *inode; 1470 int ret = 0; 1471 1472 spin_lock(&block_group->lock); 1473 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1474 spin_unlock(&block_group->lock); 1475 return 0; 1476 } 1477 spin_unlock(&block_group->lock); 1478 1479 inode = lookup_free_space_inode(block_group, path); 1480 if (IS_ERR(inode)) 1481 return 0; 1482 1483 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1484 block_group, &block_group->io_ctl, trans); 1485 if (ret) { 1486 btrfs_debug(fs_info, 1487 "failed to write free space cache for block group %llu error %d", 1488 block_group->start, ret); 1489 spin_lock(&block_group->lock); 1490 block_group->disk_cache_state = BTRFS_DC_ERROR; 1491 spin_unlock(&block_group->lock); 1492 1493 block_group->io_ctl.inode = NULL; 1494 iput(inode); 1495 } 1496 1497 /* 1498 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1499 * to wait for IO and put the inode 1500 */ 1501 1502 return ret; 1503 } 1504 1505 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1506 u64 offset) 1507 { 1508 ASSERT(offset >= bitmap_start); 1509 offset -= bitmap_start; 1510 return (unsigned long)(div_u64(offset, unit)); 1511 } 1512 1513 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1514 { 1515 return (unsigned long)(div_u64(bytes, unit)); 1516 } 1517 1518 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1519 u64 offset) 1520 { 1521 u64 bitmap_start; 1522 u64 bytes_per_bitmap; 1523 1524 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1525 bitmap_start = offset - ctl->start; 1526 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1527 bitmap_start *= bytes_per_bitmap; 1528 bitmap_start += ctl->start; 1529 1530 return bitmap_start; 1531 } 1532 1533 static int tree_insert_offset(struct rb_root *root, u64 offset, 1534 struct rb_node *node, int bitmap) 1535 { 1536 struct rb_node **p = &root->rb_node; 1537 struct rb_node *parent = NULL; 1538 struct btrfs_free_space *info; 1539 1540 while (*p) { 1541 parent = *p; 1542 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1543 1544 if (offset < info->offset) { 1545 p = &(*p)->rb_left; 1546 } else if (offset > info->offset) { 1547 p = &(*p)->rb_right; 1548 } else { 1549 /* 1550 * we could have a bitmap entry and an extent entry 1551 * share the same offset. If this is the case, we want 1552 * the extent entry to always be found first if we do a 1553 * linear search through the tree, since we want to have 1554 * the quickest allocation time, and allocating from an 1555 * extent is faster than allocating from a bitmap. So 1556 * if we're inserting a bitmap and we find an entry at 1557 * this offset, we want to go right, or after this entry 1558 * logically. If we are inserting an extent and we've 1559 * found a bitmap, we want to go left, or before 1560 * logically. 1561 */ 1562 if (bitmap) { 1563 if (info->bitmap) { 1564 WARN_ON_ONCE(1); 1565 return -EEXIST; 1566 } 1567 p = &(*p)->rb_right; 1568 } else { 1569 if (!info->bitmap) { 1570 WARN_ON_ONCE(1); 1571 return -EEXIST; 1572 } 1573 p = &(*p)->rb_left; 1574 } 1575 } 1576 } 1577 1578 rb_link_node(node, parent, p); 1579 rb_insert_color(node, root); 1580 1581 return 0; 1582 } 1583 1584 /* 1585 * searches the tree for the given offset. 1586 * 1587 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1588 * want a section that has at least bytes size and comes at or after the given 1589 * offset. 1590 */ 1591 static struct btrfs_free_space * 1592 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1593 u64 offset, int bitmap_only, int fuzzy) 1594 { 1595 struct rb_node *n = ctl->free_space_offset.rb_node; 1596 struct btrfs_free_space *entry, *prev = NULL; 1597 1598 /* find entry that is closest to the 'offset' */ 1599 while (1) { 1600 if (!n) { 1601 entry = NULL; 1602 break; 1603 } 1604 1605 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1606 prev = entry; 1607 1608 if (offset < entry->offset) 1609 n = n->rb_left; 1610 else if (offset > entry->offset) 1611 n = n->rb_right; 1612 else 1613 break; 1614 } 1615 1616 if (bitmap_only) { 1617 if (!entry) 1618 return NULL; 1619 if (entry->bitmap) 1620 return entry; 1621 1622 /* 1623 * bitmap entry and extent entry may share same offset, 1624 * in that case, bitmap entry comes after extent entry. 1625 */ 1626 n = rb_next(n); 1627 if (!n) 1628 return NULL; 1629 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1630 if (entry->offset != offset) 1631 return NULL; 1632 1633 WARN_ON(!entry->bitmap); 1634 return entry; 1635 } else if (entry) { 1636 if (entry->bitmap) { 1637 /* 1638 * if previous extent entry covers the offset, 1639 * we should return it instead of the bitmap entry 1640 */ 1641 n = rb_prev(&entry->offset_index); 1642 if (n) { 1643 prev = rb_entry(n, struct btrfs_free_space, 1644 offset_index); 1645 if (!prev->bitmap && 1646 prev->offset + prev->bytes > offset) 1647 entry = prev; 1648 } 1649 } 1650 return entry; 1651 } 1652 1653 if (!prev) 1654 return NULL; 1655 1656 /* find last entry before the 'offset' */ 1657 entry = prev; 1658 if (entry->offset > offset) { 1659 n = rb_prev(&entry->offset_index); 1660 if (n) { 1661 entry = rb_entry(n, struct btrfs_free_space, 1662 offset_index); 1663 ASSERT(entry->offset <= offset); 1664 } else { 1665 if (fuzzy) 1666 return entry; 1667 else 1668 return NULL; 1669 } 1670 } 1671 1672 if (entry->bitmap) { 1673 n = rb_prev(&entry->offset_index); 1674 if (n) { 1675 prev = rb_entry(n, struct btrfs_free_space, 1676 offset_index); 1677 if (!prev->bitmap && 1678 prev->offset + prev->bytes > offset) 1679 return prev; 1680 } 1681 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1682 return entry; 1683 } else if (entry->offset + entry->bytes > offset) 1684 return entry; 1685 1686 if (!fuzzy) 1687 return NULL; 1688 1689 while (1) { 1690 if (entry->bitmap) { 1691 if (entry->offset + BITS_PER_BITMAP * 1692 ctl->unit > offset) 1693 break; 1694 } else { 1695 if (entry->offset + entry->bytes > offset) 1696 break; 1697 } 1698 1699 n = rb_next(&entry->offset_index); 1700 if (!n) 1701 return NULL; 1702 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1703 } 1704 return entry; 1705 } 1706 1707 static inline void 1708 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1709 struct btrfs_free_space *info) 1710 { 1711 rb_erase(&info->offset_index, &ctl->free_space_offset); 1712 ctl->free_extents--; 1713 1714 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1715 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1716 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1717 } 1718 } 1719 1720 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1721 struct btrfs_free_space *info) 1722 { 1723 __unlink_free_space(ctl, info); 1724 ctl->free_space -= info->bytes; 1725 } 1726 1727 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1728 struct btrfs_free_space *info) 1729 { 1730 int ret = 0; 1731 1732 ASSERT(info->bytes || info->bitmap); 1733 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1734 &info->offset_index, (info->bitmap != NULL)); 1735 if (ret) 1736 return ret; 1737 1738 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1739 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1740 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1741 } 1742 1743 ctl->free_space += info->bytes; 1744 ctl->free_extents++; 1745 return ret; 1746 } 1747 1748 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1749 struct btrfs_free_space *info, 1750 u64 offset, u64 bytes) 1751 { 1752 unsigned long start, count, end; 1753 int extent_delta = -1; 1754 1755 start = offset_to_bit(info->offset, ctl->unit, offset); 1756 count = bytes_to_bits(bytes, ctl->unit); 1757 end = start + count; 1758 ASSERT(end <= BITS_PER_BITMAP); 1759 1760 bitmap_clear(info->bitmap, start, count); 1761 1762 info->bytes -= bytes; 1763 if (info->max_extent_size > ctl->unit) 1764 info->max_extent_size = 0; 1765 1766 if (start && test_bit(start - 1, info->bitmap)) 1767 extent_delta++; 1768 1769 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1770 extent_delta++; 1771 1772 info->bitmap_extents += extent_delta; 1773 if (!btrfs_free_space_trimmed(info)) { 1774 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1775 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1776 } 1777 } 1778 1779 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1780 struct btrfs_free_space *info, u64 offset, 1781 u64 bytes) 1782 { 1783 __bitmap_clear_bits(ctl, info, offset, bytes); 1784 ctl->free_space -= bytes; 1785 } 1786 1787 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1788 struct btrfs_free_space *info, u64 offset, 1789 u64 bytes) 1790 { 1791 unsigned long start, count, end; 1792 int extent_delta = 1; 1793 1794 start = offset_to_bit(info->offset, ctl->unit, offset); 1795 count = bytes_to_bits(bytes, ctl->unit); 1796 end = start + count; 1797 ASSERT(end <= BITS_PER_BITMAP); 1798 1799 bitmap_set(info->bitmap, start, count); 1800 1801 info->bytes += bytes; 1802 ctl->free_space += bytes; 1803 1804 if (start && test_bit(start - 1, info->bitmap)) 1805 extent_delta--; 1806 1807 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1808 extent_delta--; 1809 1810 info->bitmap_extents += extent_delta; 1811 if (!btrfs_free_space_trimmed(info)) { 1812 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1813 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1814 } 1815 } 1816 1817 /* 1818 * If we can not find suitable extent, we will use bytes to record 1819 * the size of the max extent. 1820 */ 1821 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1822 struct btrfs_free_space *bitmap_info, u64 *offset, 1823 u64 *bytes, bool for_alloc) 1824 { 1825 unsigned long found_bits = 0; 1826 unsigned long max_bits = 0; 1827 unsigned long bits, i; 1828 unsigned long next_zero; 1829 unsigned long extent_bits; 1830 1831 /* 1832 * Skip searching the bitmap if we don't have a contiguous section that 1833 * is large enough for this allocation. 1834 */ 1835 if (for_alloc && 1836 bitmap_info->max_extent_size && 1837 bitmap_info->max_extent_size < *bytes) { 1838 *bytes = bitmap_info->max_extent_size; 1839 return -1; 1840 } 1841 1842 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1843 max_t(u64, *offset, bitmap_info->offset)); 1844 bits = bytes_to_bits(*bytes, ctl->unit); 1845 1846 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1847 if (for_alloc && bits == 1) { 1848 found_bits = 1; 1849 break; 1850 } 1851 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1852 BITS_PER_BITMAP, i); 1853 extent_bits = next_zero - i; 1854 if (extent_bits >= bits) { 1855 found_bits = extent_bits; 1856 break; 1857 } else if (extent_bits > max_bits) { 1858 max_bits = extent_bits; 1859 } 1860 i = next_zero; 1861 } 1862 1863 if (found_bits) { 1864 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1865 *bytes = (u64)(found_bits) * ctl->unit; 1866 return 0; 1867 } 1868 1869 *bytes = (u64)(max_bits) * ctl->unit; 1870 bitmap_info->max_extent_size = *bytes; 1871 return -1; 1872 } 1873 1874 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1875 { 1876 if (entry->bitmap) 1877 return entry->max_extent_size; 1878 return entry->bytes; 1879 } 1880 1881 /* Cache the size of the max extent in bytes */ 1882 static struct btrfs_free_space * 1883 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1884 unsigned long align, u64 *max_extent_size) 1885 { 1886 struct btrfs_free_space *entry; 1887 struct rb_node *node; 1888 u64 tmp; 1889 u64 align_off; 1890 int ret; 1891 1892 if (!ctl->free_space_offset.rb_node) 1893 goto out; 1894 1895 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1896 if (!entry) 1897 goto out; 1898 1899 for (node = &entry->offset_index; node; node = rb_next(node)) { 1900 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1901 if (entry->bytes < *bytes) { 1902 *max_extent_size = max(get_max_extent_size(entry), 1903 *max_extent_size); 1904 continue; 1905 } 1906 1907 /* make sure the space returned is big enough 1908 * to match our requested alignment 1909 */ 1910 if (*bytes >= align) { 1911 tmp = entry->offset - ctl->start + align - 1; 1912 tmp = div64_u64(tmp, align); 1913 tmp = tmp * align + ctl->start; 1914 align_off = tmp - entry->offset; 1915 } else { 1916 align_off = 0; 1917 tmp = entry->offset; 1918 } 1919 1920 if (entry->bytes < *bytes + align_off) { 1921 *max_extent_size = max(get_max_extent_size(entry), 1922 *max_extent_size); 1923 continue; 1924 } 1925 1926 if (entry->bitmap) { 1927 u64 size = *bytes; 1928 1929 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1930 if (!ret) { 1931 *offset = tmp; 1932 *bytes = size; 1933 return entry; 1934 } else { 1935 *max_extent_size = 1936 max(get_max_extent_size(entry), 1937 *max_extent_size); 1938 } 1939 continue; 1940 } 1941 1942 *offset = tmp; 1943 *bytes = entry->bytes - align_off; 1944 return entry; 1945 } 1946 out: 1947 return NULL; 1948 } 1949 1950 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1951 struct btrfs_free_space *info, u64 offset) 1952 { 1953 info->offset = offset_to_bitmap(ctl, offset); 1954 info->bytes = 0; 1955 info->bitmap_extents = 0; 1956 INIT_LIST_HEAD(&info->list); 1957 link_free_space(ctl, info); 1958 ctl->total_bitmaps++; 1959 recalculate_thresholds(ctl); 1960 } 1961 1962 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1963 struct btrfs_free_space *bitmap_info) 1964 { 1965 /* 1966 * Normally when this is called, the bitmap is completely empty. However, 1967 * if we are blowing up the free space cache for one reason or another 1968 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1969 * we may leave stats on the table. 1970 */ 1971 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1972 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1973 bitmap_info->bitmap_extents; 1974 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1975 1976 } 1977 unlink_free_space(ctl, bitmap_info); 1978 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1979 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1980 ctl->total_bitmaps--; 1981 recalculate_thresholds(ctl); 1982 } 1983 1984 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1985 struct btrfs_free_space *bitmap_info, 1986 u64 *offset, u64 *bytes) 1987 { 1988 u64 end; 1989 u64 search_start, search_bytes; 1990 int ret; 1991 1992 again: 1993 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1994 1995 /* 1996 * We need to search for bits in this bitmap. We could only cover some 1997 * of the extent in this bitmap thanks to how we add space, so we need 1998 * to search for as much as it as we can and clear that amount, and then 1999 * go searching for the next bit. 2000 */ 2001 search_start = *offset; 2002 search_bytes = ctl->unit; 2003 search_bytes = min(search_bytes, end - search_start + 1); 2004 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 2005 false); 2006 if (ret < 0 || search_start != *offset) 2007 return -EINVAL; 2008 2009 /* We may have found more bits than what we need */ 2010 search_bytes = min(search_bytes, *bytes); 2011 2012 /* Cannot clear past the end of the bitmap */ 2013 search_bytes = min(search_bytes, end - search_start + 1); 2014 2015 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2016 *offset += search_bytes; 2017 *bytes -= search_bytes; 2018 2019 if (*bytes) { 2020 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2021 if (!bitmap_info->bytes) 2022 free_bitmap(ctl, bitmap_info); 2023 2024 /* 2025 * no entry after this bitmap, but we still have bytes to 2026 * remove, so something has gone wrong. 2027 */ 2028 if (!next) 2029 return -EINVAL; 2030 2031 bitmap_info = rb_entry(next, struct btrfs_free_space, 2032 offset_index); 2033 2034 /* 2035 * if the next entry isn't a bitmap we need to return to let the 2036 * extent stuff do its work. 2037 */ 2038 if (!bitmap_info->bitmap) 2039 return -EAGAIN; 2040 2041 /* 2042 * Ok the next item is a bitmap, but it may not actually hold 2043 * the information for the rest of this free space stuff, so 2044 * look for it, and if we don't find it return so we can try 2045 * everything over again. 2046 */ 2047 search_start = *offset; 2048 search_bytes = ctl->unit; 2049 ret = search_bitmap(ctl, bitmap_info, &search_start, 2050 &search_bytes, false); 2051 if (ret < 0 || search_start != *offset) 2052 return -EAGAIN; 2053 2054 goto again; 2055 } else if (!bitmap_info->bytes) 2056 free_bitmap(ctl, bitmap_info); 2057 2058 return 0; 2059 } 2060 2061 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2062 struct btrfs_free_space *info, u64 offset, 2063 u64 bytes, enum btrfs_trim_state trim_state) 2064 { 2065 u64 bytes_to_set = 0; 2066 u64 end; 2067 2068 /* 2069 * This is a tradeoff to make bitmap trim state minimal. We mark the 2070 * whole bitmap untrimmed if at any point we add untrimmed regions. 2071 */ 2072 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2073 if (btrfs_free_space_trimmed(info)) { 2074 ctl->discardable_extents[BTRFS_STAT_CURR] += 2075 info->bitmap_extents; 2076 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2077 } 2078 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2079 } 2080 2081 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2082 2083 bytes_to_set = min(end - offset, bytes); 2084 2085 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2086 2087 /* 2088 * We set some bytes, we have no idea what the max extent size is 2089 * anymore. 2090 */ 2091 info->max_extent_size = 0; 2092 2093 return bytes_to_set; 2094 2095 } 2096 2097 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2098 struct btrfs_free_space *info) 2099 { 2100 struct btrfs_block_group *block_group = ctl->private; 2101 struct btrfs_fs_info *fs_info = block_group->fs_info; 2102 bool forced = false; 2103 2104 #ifdef CONFIG_BTRFS_DEBUG 2105 if (btrfs_should_fragment_free_space(block_group)) 2106 forced = true; 2107 #endif 2108 2109 /* This is a way to reclaim large regions from the bitmaps. */ 2110 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2111 return false; 2112 2113 /* 2114 * If we are below the extents threshold then we can add this as an 2115 * extent, and don't have to deal with the bitmap 2116 */ 2117 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2118 /* 2119 * If this block group has some small extents we don't want to 2120 * use up all of our free slots in the cache with them, we want 2121 * to reserve them to larger extents, however if we have plenty 2122 * of cache left then go ahead an dadd them, no sense in adding 2123 * the overhead of a bitmap if we don't have to. 2124 */ 2125 if (info->bytes <= fs_info->sectorsize * 8) { 2126 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2127 return false; 2128 } else { 2129 return false; 2130 } 2131 } 2132 2133 /* 2134 * The original block groups from mkfs can be really small, like 8 2135 * megabytes, so don't bother with a bitmap for those entries. However 2136 * some block groups can be smaller than what a bitmap would cover but 2137 * are still large enough that they could overflow the 32k memory limit, 2138 * so allow those block groups to still be allowed to have a bitmap 2139 * entry. 2140 */ 2141 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2142 return false; 2143 2144 return true; 2145 } 2146 2147 static const struct btrfs_free_space_op free_space_op = { 2148 .use_bitmap = use_bitmap, 2149 }; 2150 2151 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2152 struct btrfs_free_space *info) 2153 { 2154 struct btrfs_free_space *bitmap_info; 2155 struct btrfs_block_group *block_group = NULL; 2156 int added = 0; 2157 u64 bytes, offset, bytes_added; 2158 enum btrfs_trim_state trim_state; 2159 int ret; 2160 2161 bytes = info->bytes; 2162 offset = info->offset; 2163 trim_state = info->trim_state; 2164 2165 if (!ctl->op->use_bitmap(ctl, info)) 2166 return 0; 2167 2168 if (ctl->op == &free_space_op) 2169 block_group = ctl->private; 2170 again: 2171 /* 2172 * Since we link bitmaps right into the cluster we need to see if we 2173 * have a cluster here, and if so and it has our bitmap we need to add 2174 * the free space to that bitmap. 2175 */ 2176 if (block_group && !list_empty(&block_group->cluster_list)) { 2177 struct btrfs_free_cluster *cluster; 2178 struct rb_node *node; 2179 struct btrfs_free_space *entry; 2180 2181 cluster = list_entry(block_group->cluster_list.next, 2182 struct btrfs_free_cluster, 2183 block_group_list); 2184 spin_lock(&cluster->lock); 2185 node = rb_first(&cluster->root); 2186 if (!node) { 2187 spin_unlock(&cluster->lock); 2188 goto no_cluster_bitmap; 2189 } 2190 2191 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2192 if (!entry->bitmap) { 2193 spin_unlock(&cluster->lock); 2194 goto no_cluster_bitmap; 2195 } 2196 2197 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2198 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2199 bytes, trim_state); 2200 bytes -= bytes_added; 2201 offset += bytes_added; 2202 } 2203 spin_unlock(&cluster->lock); 2204 if (!bytes) { 2205 ret = 1; 2206 goto out; 2207 } 2208 } 2209 2210 no_cluster_bitmap: 2211 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2212 1, 0); 2213 if (!bitmap_info) { 2214 ASSERT(added == 0); 2215 goto new_bitmap; 2216 } 2217 2218 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2219 trim_state); 2220 bytes -= bytes_added; 2221 offset += bytes_added; 2222 added = 0; 2223 2224 if (!bytes) { 2225 ret = 1; 2226 goto out; 2227 } else 2228 goto again; 2229 2230 new_bitmap: 2231 if (info && info->bitmap) { 2232 add_new_bitmap(ctl, info, offset); 2233 added = 1; 2234 info = NULL; 2235 goto again; 2236 } else { 2237 spin_unlock(&ctl->tree_lock); 2238 2239 /* no pre-allocated info, allocate a new one */ 2240 if (!info) { 2241 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2242 GFP_NOFS); 2243 if (!info) { 2244 spin_lock(&ctl->tree_lock); 2245 ret = -ENOMEM; 2246 goto out; 2247 } 2248 } 2249 2250 /* allocate the bitmap */ 2251 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2252 GFP_NOFS); 2253 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2254 spin_lock(&ctl->tree_lock); 2255 if (!info->bitmap) { 2256 ret = -ENOMEM; 2257 goto out; 2258 } 2259 goto again; 2260 } 2261 2262 out: 2263 if (info) { 2264 if (info->bitmap) 2265 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2266 info->bitmap); 2267 kmem_cache_free(btrfs_free_space_cachep, info); 2268 } 2269 2270 return ret; 2271 } 2272 2273 /* 2274 * Free space merging rules: 2275 * 1) Merge trimmed areas together 2276 * 2) Let untrimmed areas coalesce with trimmed areas 2277 * 3) Always pull neighboring regions from bitmaps 2278 * 2279 * The above rules are for when we merge free space based on btrfs_trim_state. 2280 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2281 * same reason: to promote larger extent regions which makes life easier for 2282 * find_free_extent(). Rule 2 enables coalescing based on the common path 2283 * being returning free space from btrfs_finish_extent_commit(). So when free 2284 * space is trimmed, it will prevent aggregating trimmed new region and 2285 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2286 * and provide find_free_extent() with the largest extents possible hoping for 2287 * the reuse path. 2288 */ 2289 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2290 struct btrfs_free_space *info, bool update_stat) 2291 { 2292 struct btrfs_free_space *left_info = NULL; 2293 struct btrfs_free_space *right_info; 2294 bool merged = false; 2295 u64 offset = info->offset; 2296 u64 bytes = info->bytes; 2297 const bool is_trimmed = btrfs_free_space_trimmed(info); 2298 2299 /* 2300 * first we want to see if there is free space adjacent to the range we 2301 * are adding, if there is remove that struct and add a new one to 2302 * cover the entire range 2303 */ 2304 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2305 if (right_info && rb_prev(&right_info->offset_index)) 2306 left_info = rb_entry(rb_prev(&right_info->offset_index), 2307 struct btrfs_free_space, offset_index); 2308 else if (!right_info) 2309 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2310 2311 /* See try_merge_free_space() comment. */ 2312 if (right_info && !right_info->bitmap && 2313 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2314 if (update_stat) 2315 unlink_free_space(ctl, right_info); 2316 else 2317 __unlink_free_space(ctl, right_info); 2318 info->bytes += right_info->bytes; 2319 kmem_cache_free(btrfs_free_space_cachep, right_info); 2320 merged = true; 2321 } 2322 2323 /* See try_merge_free_space() comment. */ 2324 if (left_info && !left_info->bitmap && 2325 left_info->offset + left_info->bytes == offset && 2326 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2327 if (update_stat) 2328 unlink_free_space(ctl, left_info); 2329 else 2330 __unlink_free_space(ctl, left_info); 2331 info->offset = left_info->offset; 2332 info->bytes += left_info->bytes; 2333 kmem_cache_free(btrfs_free_space_cachep, left_info); 2334 merged = true; 2335 } 2336 2337 return merged; 2338 } 2339 2340 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2341 struct btrfs_free_space *info, 2342 bool update_stat) 2343 { 2344 struct btrfs_free_space *bitmap; 2345 unsigned long i; 2346 unsigned long j; 2347 const u64 end = info->offset + info->bytes; 2348 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2349 u64 bytes; 2350 2351 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2352 if (!bitmap) 2353 return false; 2354 2355 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2356 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2357 if (j == i) 2358 return false; 2359 bytes = (j - i) * ctl->unit; 2360 info->bytes += bytes; 2361 2362 /* See try_merge_free_space() comment. */ 2363 if (!btrfs_free_space_trimmed(bitmap)) 2364 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2365 2366 if (update_stat) 2367 bitmap_clear_bits(ctl, bitmap, end, bytes); 2368 else 2369 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2370 2371 if (!bitmap->bytes) 2372 free_bitmap(ctl, bitmap); 2373 2374 return true; 2375 } 2376 2377 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2378 struct btrfs_free_space *info, 2379 bool update_stat) 2380 { 2381 struct btrfs_free_space *bitmap; 2382 u64 bitmap_offset; 2383 unsigned long i; 2384 unsigned long j; 2385 unsigned long prev_j; 2386 u64 bytes; 2387 2388 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2389 /* If we're on a boundary, try the previous logical bitmap. */ 2390 if (bitmap_offset == info->offset) { 2391 if (info->offset == 0) 2392 return false; 2393 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2394 } 2395 2396 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2397 if (!bitmap) 2398 return false; 2399 2400 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2401 j = 0; 2402 prev_j = (unsigned long)-1; 2403 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2404 if (j > i) 2405 break; 2406 prev_j = j; 2407 } 2408 if (prev_j == i) 2409 return false; 2410 2411 if (prev_j == (unsigned long)-1) 2412 bytes = (i + 1) * ctl->unit; 2413 else 2414 bytes = (i - prev_j) * ctl->unit; 2415 2416 info->offset -= bytes; 2417 info->bytes += bytes; 2418 2419 /* See try_merge_free_space() comment. */ 2420 if (!btrfs_free_space_trimmed(bitmap)) 2421 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2422 2423 if (update_stat) 2424 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2425 else 2426 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2427 2428 if (!bitmap->bytes) 2429 free_bitmap(ctl, bitmap); 2430 2431 return true; 2432 } 2433 2434 /* 2435 * We prefer always to allocate from extent entries, both for clustered and 2436 * non-clustered allocation requests. So when attempting to add a new extent 2437 * entry, try to see if there's adjacent free space in bitmap entries, and if 2438 * there is, migrate that space from the bitmaps to the extent. 2439 * Like this we get better chances of satisfying space allocation requests 2440 * because we attempt to satisfy them based on a single cache entry, and never 2441 * on 2 or more entries - even if the entries represent a contiguous free space 2442 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2443 * ends). 2444 */ 2445 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2446 struct btrfs_free_space *info, 2447 bool update_stat) 2448 { 2449 /* 2450 * Only work with disconnected entries, as we can change their offset, 2451 * and must be extent entries. 2452 */ 2453 ASSERT(!info->bitmap); 2454 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2455 2456 if (ctl->total_bitmaps > 0) { 2457 bool stole_end; 2458 bool stole_front = false; 2459 2460 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2461 if (ctl->total_bitmaps > 0) 2462 stole_front = steal_from_bitmap_to_front(ctl, info, 2463 update_stat); 2464 2465 if (stole_end || stole_front) 2466 try_merge_free_space(ctl, info, update_stat); 2467 } 2468 } 2469 2470 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2471 struct btrfs_free_space_ctl *ctl, 2472 u64 offset, u64 bytes, 2473 enum btrfs_trim_state trim_state) 2474 { 2475 struct btrfs_block_group *block_group = ctl->private; 2476 struct btrfs_free_space *info; 2477 int ret = 0; 2478 u64 filter_bytes = bytes; 2479 2480 ASSERT(!btrfs_is_zoned(fs_info)); 2481 2482 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2483 if (!info) 2484 return -ENOMEM; 2485 2486 info->offset = offset; 2487 info->bytes = bytes; 2488 info->trim_state = trim_state; 2489 RB_CLEAR_NODE(&info->offset_index); 2490 2491 spin_lock(&ctl->tree_lock); 2492 2493 if (try_merge_free_space(ctl, info, true)) 2494 goto link; 2495 2496 /* 2497 * There was no extent directly to the left or right of this new 2498 * extent then we know we're going to have to allocate a new extent, so 2499 * before we do that see if we need to drop this into a bitmap 2500 */ 2501 ret = insert_into_bitmap(ctl, info); 2502 if (ret < 0) { 2503 goto out; 2504 } else if (ret) { 2505 ret = 0; 2506 goto out; 2507 } 2508 link: 2509 /* 2510 * Only steal free space from adjacent bitmaps if we're sure we're not 2511 * going to add the new free space to existing bitmap entries - because 2512 * that would mean unnecessary work that would be reverted. Therefore 2513 * attempt to steal space from bitmaps if we're adding an extent entry. 2514 */ 2515 steal_from_bitmap(ctl, info, true); 2516 2517 filter_bytes = max(filter_bytes, info->bytes); 2518 2519 ret = link_free_space(ctl, info); 2520 if (ret) 2521 kmem_cache_free(btrfs_free_space_cachep, info); 2522 out: 2523 btrfs_discard_update_discardable(block_group); 2524 spin_unlock(&ctl->tree_lock); 2525 2526 if (ret) { 2527 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2528 ASSERT(ret != -EEXIST); 2529 } 2530 2531 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2532 btrfs_discard_check_filter(block_group, filter_bytes); 2533 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2534 } 2535 2536 return ret; 2537 } 2538 2539 static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, 2540 u64 bytenr, u64 size, bool used) 2541 { 2542 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2543 u64 offset = bytenr - block_group->start; 2544 u64 to_free, to_unusable; 2545 2546 spin_lock(&ctl->tree_lock); 2547 if (!used) 2548 to_free = size; 2549 else if (offset >= block_group->alloc_offset) 2550 to_free = size; 2551 else if (offset + size <= block_group->alloc_offset) 2552 to_free = 0; 2553 else 2554 to_free = offset + size - block_group->alloc_offset; 2555 to_unusable = size - to_free; 2556 2557 ctl->free_space += to_free; 2558 block_group->zone_unusable += to_unusable; 2559 spin_unlock(&ctl->tree_lock); 2560 if (!used) { 2561 spin_lock(&block_group->lock); 2562 block_group->alloc_offset -= size; 2563 spin_unlock(&block_group->lock); 2564 } 2565 2566 /* All the region is now unusable. Mark it as unused and reclaim */ 2567 if (block_group->zone_unusable == block_group->length) 2568 btrfs_mark_bg_unused(block_group); 2569 2570 return 0; 2571 } 2572 2573 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2574 u64 bytenr, u64 size) 2575 { 2576 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2577 2578 if (btrfs_is_zoned(block_group->fs_info)) 2579 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2580 true); 2581 2582 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2583 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2584 2585 return __btrfs_add_free_space(block_group->fs_info, 2586 block_group->free_space_ctl, 2587 bytenr, size, trim_state); 2588 } 2589 2590 int btrfs_add_free_space_unused(struct btrfs_block_group *block_group, 2591 u64 bytenr, u64 size) 2592 { 2593 if (btrfs_is_zoned(block_group->fs_info)) 2594 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2595 false); 2596 2597 return btrfs_add_free_space(block_group, bytenr, size); 2598 } 2599 2600 /* 2601 * This is a subtle distinction because when adding free space back in general, 2602 * we want it to be added as untrimmed for async. But in the case where we add 2603 * it on loading of a block group, we want to consider it trimmed. 2604 */ 2605 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2606 u64 bytenr, u64 size) 2607 { 2608 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2609 2610 if (btrfs_is_zoned(block_group->fs_info)) 2611 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2612 true); 2613 2614 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2615 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2616 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2617 2618 return __btrfs_add_free_space(block_group->fs_info, 2619 block_group->free_space_ctl, 2620 bytenr, size, trim_state); 2621 } 2622 2623 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2624 u64 offset, u64 bytes) 2625 { 2626 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2627 struct btrfs_free_space *info; 2628 int ret; 2629 bool re_search = false; 2630 2631 if (btrfs_is_zoned(block_group->fs_info)) { 2632 /* 2633 * This can happen with conventional zones when replaying log. 2634 * Since the allocation info of tree-log nodes are not recorded 2635 * to the extent-tree, calculate_alloc_pointer() failed to 2636 * advance the allocation pointer after last allocated tree log 2637 * node blocks. 2638 * 2639 * This function is called from 2640 * btrfs_pin_extent_for_log_replay() when replaying the log. 2641 * Advance the pointer not to overwrite the tree-log nodes. 2642 */ 2643 if (block_group->alloc_offset < offset + bytes) 2644 block_group->alloc_offset = offset + bytes; 2645 return 0; 2646 } 2647 2648 spin_lock(&ctl->tree_lock); 2649 2650 again: 2651 ret = 0; 2652 if (!bytes) 2653 goto out_lock; 2654 2655 info = tree_search_offset(ctl, offset, 0, 0); 2656 if (!info) { 2657 /* 2658 * oops didn't find an extent that matched the space we wanted 2659 * to remove, look for a bitmap instead 2660 */ 2661 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2662 1, 0); 2663 if (!info) { 2664 /* 2665 * If we found a partial bit of our free space in a 2666 * bitmap but then couldn't find the other part this may 2667 * be a problem, so WARN about it. 2668 */ 2669 WARN_ON(re_search); 2670 goto out_lock; 2671 } 2672 } 2673 2674 re_search = false; 2675 if (!info->bitmap) { 2676 unlink_free_space(ctl, info); 2677 if (offset == info->offset) { 2678 u64 to_free = min(bytes, info->bytes); 2679 2680 info->bytes -= to_free; 2681 info->offset += to_free; 2682 if (info->bytes) { 2683 ret = link_free_space(ctl, info); 2684 WARN_ON(ret); 2685 } else { 2686 kmem_cache_free(btrfs_free_space_cachep, info); 2687 } 2688 2689 offset += to_free; 2690 bytes -= to_free; 2691 goto again; 2692 } else { 2693 u64 old_end = info->bytes + info->offset; 2694 2695 info->bytes = offset - info->offset; 2696 ret = link_free_space(ctl, info); 2697 WARN_ON(ret); 2698 if (ret) 2699 goto out_lock; 2700 2701 /* Not enough bytes in this entry to satisfy us */ 2702 if (old_end < offset + bytes) { 2703 bytes -= old_end - offset; 2704 offset = old_end; 2705 goto again; 2706 } else if (old_end == offset + bytes) { 2707 /* all done */ 2708 goto out_lock; 2709 } 2710 spin_unlock(&ctl->tree_lock); 2711 2712 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2713 offset + bytes, 2714 old_end - (offset + bytes), 2715 info->trim_state); 2716 WARN_ON(ret); 2717 goto out; 2718 } 2719 } 2720 2721 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2722 if (ret == -EAGAIN) { 2723 re_search = true; 2724 goto again; 2725 } 2726 out_lock: 2727 btrfs_discard_update_discardable(block_group); 2728 spin_unlock(&ctl->tree_lock); 2729 out: 2730 return ret; 2731 } 2732 2733 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2734 u64 bytes) 2735 { 2736 struct btrfs_fs_info *fs_info = block_group->fs_info; 2737 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2738 struct btrfs_free_space *info; 2739 struct rb_node *n; 2740 int count = 0; 2741 2742 /* 2743 * Zoned btrfs does not use free space tree and cluster. Just print 2744 * out the free space after the allocation offset. 2745 */ 2746 if (btrfs_is_zoned(fs_info)) { 2747 btrfs_info(fs_info, "free space %llu", 2748 block_group->length - block_group->alloc_offset); 2749 return; 2750 } 2751 2752 spin_lock(&ctl->tree_lock); 2753 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2754 info = rb_entry(n, struct btrfs_free_space, offset_index); 2755 if (info->bytes >= bytes && !block_group->ro) 2756 count++; 2757 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2758 info->offset, info->bytes, 2759 (info->bitmap) ? "yes" : "no"); 2760 } 2761 spin_unlock(&ctl->tree_lock); 2762 btrfs_info(fs_info, "block group has cluster?: %s", 2763 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2764 btrfs_info(fs_info, 2765 "%d blocks of free space at or bigger than bytes is", count); 2766 } 2767 2768 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, 2769 struct btrfs_free_space_ctl *ctl) 2770 { 2771 struct btrfs_fs_info *fs_info = block_group->fs_info; 2772 2773 spin_lock_init(&ctl->tree_lock); 2774 ctl->unit = fs_info->sectorsize; 2775 ctl->start = block_group->start; 2776 ctl->private = block_group; 2777 ctl->op = &free_space_op; 2778 INIT_LIST_HEAD(&ctl->trimming_ranges); 2779 mutex_init(&ctl->cache_writeout_mutex); 2780 2781 /* 2782 * we only want to have 32k of ram per block group for keeping 2783 * track of free space, and if we pass 1/2 of that we want to 2784 * start converting things over to using bitmaps 2785 */ 2786 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2787 } 2788 2789 /* 2790 * for a given cluster, put all of its extents back into the free 2791 * space cache. If the block group passed doesn't match the block group 2792 * pointed to by the cluster, someone else raced in and freed the 2793 * cluster already. In that case, we just return without changing anything 2794 */ 2795 static void __btrfs_return_cluster_to_free_space( 2796 struct btrfs_block_group *block_group, 2797 struct btrfs_free_cluster *cluster) 2798 { 2799 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2800 struct btrfs_free_space *entry; 2801 struct rb_node *node; 2802 2803 spin_lock(&cluster->lock); 2804 if (cluster->block_group != block_group) 2805 goto out; 2806 2807 cluster->block_group = NULL; 2808 cluster->window_start = 0; 2809 list_del_init(&cluster->block_group_list); 2810 2811 node = rb_first(&cluster->root); 2812 while (node) { 2813 bool bitmap; 2814 2815 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2816 node = rb_next(&entry->offset_index); 2817 rb_erase(&entry->offset_index, &cluster->root); 2818 RB_CLEAR_NODE(&entry->offset_index); 2819 2820 bitmap = (entry->bitmap != NULL); 2821 if (!bitmap) { 2822 /* Merging treats extents as if they were new */ 2823 if (!btrfs_free_space_trimmed(entry)) { 2824 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2825 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2826 entry->bytes; 2827 } 2828 2829 try_merge_free_space(ctl, entry, false); 2830 steal_from_bitmap(ctl, entry, false); 2831 2832 /* As we insert directly, update these statistics */ 2833 if (!btrfs_free_space_trimmed(entry)) { 2834 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2835 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2836 entry->bytes; 2837 } 2838 } 2839 tree_insert_offset(&ctl->free_space_offset, 2840 entry->offset, &entry->offset_index, bitmap); 2841 } 2842 cluster->root = RB_ROOT; 2843 2844 out: 2845 spin_unlock(&cluster->lock); 2846 btrfs_put_block_group(block_group); 2847 } 2848 2849 static void __btrfs_remove_free_space_cache_locked( 2850 struct btrfs_free_space_ctl *ctl) 2851 { 2852 struct btrfs_free_space *info; 2853 struct rb_node *node; 2854 2855 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2856 info = rb_entry(node, struct btrfs_free_space, offset_index); 2857 if (!info->bitmap) { 2858 unlink_free_space(ctl, info); 2859 kmem_cache_free(btrfs_free_space_cachep, info); 2860 } else { 2861 free_bitmap(ctl, info); 2862 } 2863 2864 cond_resched_lock(&ctl->tree_lock); 2865 } 2866 } 2867 2868 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2869 { 2870 spin_lock(&ctl->tree_lock); 2871 __btrfs_remove_free_space_cache_locked(ctl); 2872 if (ctl->private) 2873 btrfs_discard_update_discardable(ctl->private); 2874 spin_unlock(&ctl->tree_lock); 2875 } 2876 2877 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2878 { 2879 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2880 struct btrfs_free_cluster *cluster; 2881 struct list_head *head; 2882 2883 spin_lock(&ctl->tree_lock); 2884 while ((head = block_group->cluster_list.next) != 2885 &block_group->cluster_list) { 2886 cluster = list_entry(head, struct btrfs_free_cluster, 2887 block_group_list); 2888 2889 WARN_ON(cluster->block_group != block_group); 2890 __btrfs_return_cluster_to_free_space(block_group, cluster); 2891 2892 cond_resched_lock(&ctl->tree_lock); 2893 } 2894 __btrfs_remove_free_space_cache_locked(ctl); 2895 btrfs_discard_update_discardable(block_group); 2896 spin_unlock(&ctl->tree_lock); 2897 2898 } 2899 2900 /** 2901 * btrfs_is_free_space_trimmed - see if everything is trimmed 2902 * @block_group: block_group of interest 2903 * 2904 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2905 */ 2906 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2907 { 2908 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2909 struct btrfs_free_space *info; 2910 struct rb_node *node; 2911 bool ret = true; 2912 2913 spin_lock(&ctl->tree_lock); 2914 node = rb_first(&ctl->free_space_offset); 2915 2916 while (node) { 2917 info = rb_entry(node, struct btrfs_free_space, offset_index); 2918 2919 if (!btrfs_free_space_trimmed(info)) { 2920 ret = false; 2921 break; 2922 } 2923 2924 node = rb_next(node); 2925 } 2926 2927 spin_unlock(&ctl->tree_lock); 2928 return ret; 2929 } 2930 2931 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2932 u64 offset, u64 bytes, u64 empty_size, 2933 u64 *max_extent_size) 2934 { 2935 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2936 struct btrfs_discard_ctl *discard_ctl = 2937 &block_group->fs_info->discard_ctl; 2938 struct btrfs_free_space *entry = NULL; 2939 u64 bytes_search = bytes + empty_size; 2940 u64 ret = 0; 2941 u64 align_gap = 0; 2942 u64 align_gap_len = 0; 2943 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2944 2945 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 2946 2947 spin_lock(&ctl->tree_lock); 2948 entry = find_free_space(ctl, &offset, &bytes_search, 2949 block_group->full_stripe_len, max_extent_size); 2950 if (!entry) 2951 goto out; 2952 2953 ret = offset; 2954 if (entry->bitmap) { 2955 bitmap_clear_bits(ctl, entry, offset, bytes); 2956 2957 if (!btrfs_free_space_trimmed(entry)) 2958 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2959 2960 if (!entry->bytes) 2961 free_bitmap(ctl, entry); 2962 } else { 2963 unlink_free_space(ctl, entry); 2964 align_gap_len = offset - entry->offset; 2965 align_gap = entry->offset; 2966 align_gap_trim_state = entry->trim_state; 2967 2968 if (!btrfs_free_space_trimmed(entry)) 2969 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2970 2971 entry->offset = offset + bytes; 2972 WARN_ON(entry->bytes < bytes + align_gap_len); 2973 2974 entry->bytes -= bytes + align_gap_len; 2975 if (!entry->bytes) 2976 kmem_cache_free(btrfs_free_space_cachep, entry); 2977 else 2978 link_free_space(ctl, entry); 2979 } 2980 out: 2981 btrfs_discard_update_discardable(block_group); 2982 spin_unlock(&ctl->tree_lock); 2983 2984 if (align_gap_len) 2985 __btrfs_add_free_space(block_group->fs_info, ctl, 2986 align_gap, align_gap_len, 2987 align_gap_trim_state); 2988 return ret; 2989 } 2990 2991 /* 2992 * given a cluster, put all of its extents back into the free space 2993 * cache. If a block group is passed, this function will only free 2994 * a cluster that belongs to the passed block group. 2995 * 2996 * Otherwise, it'll get a reference on the block group pointed to by the 2997 * cluster and remove the cluster from it. 2998 */ 2999 void btrfs_return_cluster_to_free_space( 3000 struct btrfs_block_group *block_group, 3001 struct btrfs_free_cluster *cluster) 3002 { 3003 struct btrfs_free_space_ctl *ctl; 3004 3005 /* first, get a safe pointer to the block group */ 3006 spin_lock(&cluster->lock); 3007 if (!block_group) { 3008 block_group = cluster->block_group; 3009 if (!block_group) { 3010 spin_unlock(&cluster->lock); 3011 return; 3012 } 3013 } else if (cluster->block_group != block_group) { 3014 /* someone else has already freed it don't redo their work */ 3015 spin_unlock(&cluster->lock); 3016 return; 3017 } 3018 btrfs_get_block_group(block_group); 3019 spin_unlock(&cluster->lock); 3020 3021 ctl = block_group->free_space_ctl; 3022 3023 /* now return any extents the cluster had on it */ 3024 spin_lock(&ctl->tree_lock); 3025 __btrfs_return_cluster_to_free_space(block_group, cluster); 3026 spin_unlock(&ctl->tree_lock); 3027 3028 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 3029 3030 /* finally drop our ref */ 3031 btrfs_put_block_group(block_group); 3032 } 3033 3034 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 3035 struct btrfs_free_cluster *cluster, 3036 struct btrfs_free_space *entry, 3037 u64 bytes, u64 min_start, 3038 u64 *max_extent_size) 3039 { 3040 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3041 int err; 3042 u64 search_start = cluster->window_start; 3043 u64 search_bytes = bytes; 3044 u64 ret = 0; 3045 3046 search_start = min_start; 3047 search_bytes = bytes; 3048 3049 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 3050 if (err) { 3051 *max_extent_size = max(get_max_extent_size(entry), 3052 *max_extent_size); 3053 return 0; 3054 } 3055 3056 ret = search_start; 3057 __bitmap_clear_bits(ctl, entry, ret, bytes); 3058 3059 return ret; 3060 } 3061 3062 /* 3063 * given a cluster, try to allocate 'bytes' from it, returns 0 3064 * if it couldn't find anything suitably large, or a logical disk offset 3065 * if things worked out 3066 */ 3067 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 3068 struct btrfs_free_cluster *cluster, u64 bytes, 3069 u64 min_start, u64 *max_extent_size) 3070 { 3071 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3072 struct btrfs_discard_ctl *discard_ctl = 3073 &block_group->fs_info->discard_ctl; 3074 struct btrfs_free_space *entry = NULL; 3075 struct rb_node *node; 3076 u64 ret = 0; 3077 3078 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3079 3080 spin_lock(&cluster->lock); 3081 if (bytes > cluster->max_size) 3082 goto out; 3083 3084 if (cluster->block_group != block_group) 3085 goto out; 3086 3087 node = rb_first(&cluster->root); 3088 if (!node) 3089 goto out; 3090 3091 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3092 while (1) { 3093 if (entry->bytes < bytes) 3094 *max_extent_size = max(get_max_extent_size(entry), 3095 *max_extent_size); 3096 3097 if (entry->bytes < bytes || 3098 (!entry->bitmap && entry->offset < min_start)) { 3099 node = rb_next(&entry->offset_index); 3100 if (!node) 3101 break; 3102 entry = rb_entry(node, struct btrfs_free_space, 3103 offset_index); 3104 continue; 3105 } 3106 3107 if (entry->bitmap) { 3108 ret = btrfs_alloc_from_bitmap(block_group, 3109 cluster, entry, bytes, 3110 cluster->window_start, 3111 max_extent_size); 3112 if (ret == 0) { 3113 node = rb_next(&entry->offset_index); 3114 if (!node) 3115 break; 3116 entry = rb_entry(node, struct btrfs_free_space, 3117 offset_index); 3118 continue; 3119 } 3120 cluster->window_start += bytes; 3121 } else { 3122 ret = entry->offset; 3123 3124 entry->offset += bytes; 3125 entry->bytes -= bytes; 3126 } 3127 3128 if (entry->bytes == 0) 3129 rb_erase(&entry->offset_index, &cluster->root); 3130 break; 3131 } 3132 out: 3133 spin_unlock(&cluster->lock); 3134 3135 if (!ret) 3136 return 0; 3137 3138 spin_lock(&ctl->tree_lock); 3139 3140 if (!btrfs_free_space_trimmed(entry)) 3141 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3142 3143 ctl->free_space -= bytes; 3144 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3145 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3146 if (entry->bytes == 0) { 3147 ctl->free_extents--; 3148 if (entry->bitmap) { 3149 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3150 entry->bitmap); 3151 ctl->total_bitmaps--; 3152 recalculate_thresholds(ctl); 3153 } else if (!btrfs_free_space_trimmed(entry)) { 3154 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3155 } 3156 kmem_cache_free(btrfs_free_space_cachep, entry); 3157 } 3158 3159 spin_unlock(&ctl->tree_lock); 3160 3161 return ret; 3162 } 3163 3164 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3165 struct btrfs_free_space *entry, 3166 struct btrfs_free_cluster *cluster, 3167 u64 offset, u64 bytes, 3168 u64 cont1_bytes, u64 min_bytes) 3169 { 3170 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3171 unsigned long next_zero; 3172 unsigned long i; 3173 unsigned long want_bits; 3174 unsigned long min_bits; 3175 unsigned long found_bits; 3176 unsigned long max_bits = 0; 3177 unsigned long start = 0; 3178 unsigned long total_found = 0; 3179 int ret; 3180 3181 i = offset_to_bit(entry->offset, ctl->unit, 3182 max_t(u64, offset, entry->offset)); 3183 want_bits = bytes_to_bits(bytes, ctl->unit); 3184 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3185 3186 /* 3187 * Don't bother looking for a cluster in this bitmap if it's heavily 3188 * fragmented. 3189 */ 3190 if (entry->max_extent_size && 3191 entry->max_extent_size < cont1_bytes) 3192 return -ENOSPC; 3193 again: 3194 found_bits = 0; 3195 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3196 next_zero = find_next_zero_bit(entry->bitmap, 3197 BITS_PER_BITMAP, i); 3198 if (next_zero - i >= min_bits) { 3199 found_bits = next_zero - i; 3200 if (found_bits > max_bits) 3201 max_bits = found_bits; 3202 break; 3203 } 3204 if (next_zero - i > max_bits) 3205 max_bits = next_zero - i; 3206 i = next_zero; 3207 } 3208 3209 if (!found_bits) { 3210 entry->max_extent_size = (u64)max_bits * ctl->unit; 3211 return -ENOSPC; 3212 } 3213 3214 if (!total_found) { 3215 start = i; 3216 cluster->max_size = 0; 3217 } 3218 3219 total_found += found_bits; 3220 3221 if (cluster->max_size < found_bits * ctl->unit) 3222 cluster->max_size = found_bits * ctl->unit; 3223 3224 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3225 i = next_zero + 1; 3226 goto again; 3227 } 3228 3229 cluster->window_start = start * ctl->unit + entry->offset; 3230 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3231 ret = tree_insert_offset(&cluster->root, entry->offset, 3232 &entry->offset_index, 1); 3233 ASSERT(!ret); /* -EEXIST; Logic error */ 3234 3235 trace_btrfs_setup_cluster(block_group, cluster, 3236 total_found * ctl->unit, 1); 3237 return 0; 3238 } 3239 3240 /* 3241 * This searches the block group for just extents to fill the cluster with. 3242 * Try to find a cluster with at least bytes total bytes, at least one 3243 * extent of cont1_bytes, and other clusters of at least min_bytes. 3244 */ 3245 static noinline int 3246 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3247 struct btrfs_free_cluster *cluster, 3248 struct list_head *bitmaps, u64 offset, u64 bytes, 3249 u64 cont1_bytes, u64 min_bytes) 3250 { 3251 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3252 struct btrfs_free_space *first = NULL; 3253 struct btrfs_free_space *entry = NULL; 3254 struct btrfs_free_space *last; 3255 struct rb_node *node; 3256 u64 window_free; 3257 u64 max_extent; 3258 u64 total_size = 0; 3259 3260 entry = tree_search_offset(ctl, offset, 0, 1); 3261 if (!entry) 3262 return -ENOSPC; 3263 3264 /* 3265 * We don't want bitmaps, so just move along until we find a normal 3266 * extent entry. 3267 */ 3268 while (entry->bitmap || entry->bytes < min_bytes) { 3269 if (entry->bitmap && list_empty(&entry->list)) 3270 list_add_tail(&entry->list, bitmaps); 3271 node = rb_next(&entry->offset_index); 3272 if (!node) 3273 return -ENOSPC; 3274 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3275 } 3276 3277 window_free = entry->bytes; 3278 max_extent = entry->bytes; 3279 first = entry; 3280 last = entry; 3281 3282 for (node = rb_next(&entry->offset_index); node; 3283 node = rb_next(&entry->offset_index)) { 3284 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3285 3286 if (entry->bitmap) { 3287 if (list_empty(&entry->list)) 3288 list_add_tail(&entry->list, bitmaps); 3289 continue; 3290 } 3291 3292 if (entry->bytes < min_bytes) 3293 continue; 3294 3295 last = entry; 3296 window_free += entry->bytes; 3297 if (entry->bytes > max_extent) 3298 max_extent = entry->bytes; 3299 } 3300 3301 if (window_free < bytes || max_extent < cont1_bytes) 3302 return -ENOSPC; 3303 3304 cluster->window_start = first->offset; 3305 3306 node = &first->offset_index; 3307 3308 /* 3309 * now we've found our entries, pull them out of the free space 3310 * cache and put them into the cluster rbtree 3311 */ 3312 do { 3313 int ret; 3314 3315 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3316 node = rb_next(&entry->offset_index); 3317 if (entry->bitmap || entry->bytes < min_bytes) 3318 continue; 3319 3320 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3321 ret = tree_insert_offset(&cluster->root, entry->offset, 3322 &entry->offset_index, 0); 3323 total_size += entry->bytes; 3324 ASSERT(!ret); /* -EEXIST; Logic error */ 3325 } while (node && entry != last); 3326 3327 cluster->max_size = max_extent; 3328 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3329 return 0; 3330 } 3331 3332 /* 3333 * This specifically looks for bitmaps that may work in the cluster, we assume 3334 * that we have already failed to find extents that will work. 3335 */ 3336 static noinline int 3337 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3338 struct btrfs_free_cluster *cluster, 3339 struct list_head *bitmaps, u64 offset, u64 bytes, 3340 u64 cont1_bytes, u64 min_bytes) 3341 { 3342 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3343 struct btrfs_free_space *entry = NULL; 3344 int ret = -ENOSPC; 3345 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3346 3347 if (ctl->total_bitmaps == 0) 3348 return -ENOSPC; 3349 3350 /* 3351 * The bitmap that covers offset won't be in the list unless offset 3352 * is just its start offset. 3353 */ 3354 if (!list_empty(bitmaps)) 3355 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3356 3357 if (!entry || entry->offset != bitmap_offset) { 3358 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3359 if (entry && list_empty(&entry->list)) 3360 list_add(&entry->list, bitmaps); 3361 } 3362 3363 list_for_each_entry(entry, bitmaps, list) { 3364 if (entry->bytes < bytes) 3365 continue; 3366 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3367 bytes, cont1_bytes, min_bytes); 3368 if (!ret) 3369 return 0; 3370 } 3371 3372 /* 3373 * The bitmaps list has all the bitmaps that record free space 3374 * starting after offset, so no more search is required. 3375 */ 3376 return -ENOSPC; 3377 } 3378 3379 /* 3380 * here we try to find a cluster of blocks in a block group. The goal 3381 * is to find at least bytes+empty_size. 3382 * We might not find them all in one contiguous area. 3383 * 3384 * returns zero and sets up cluster if things worked out, otherwise 3385 * it returns -enospc 3386 */ 3387 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3388 struct btrfs_free_cluster *cluster, 3389 u64 offset, u64 bytes, u64 empty_size) 3390 { 3391 struct btrfs_fs_info *fs_info = block_group->fs_info; 3392 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3393 struct btrfs_free_space *entry, *tmp; 3394 LIST_HEAD(bitmaps); 3395 u64 min_bytes; 3396 u64 cont1_bytes; 3397 int ret; 3398 3399 /* 3400 * Choose the minimum extent size we'll require for this 3401 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3402 * For metadata, allow allocates with smaller extents. For 3403 * data, keep it dense. 3404 */ 3405 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3406 cont1_bytes = min_bytes = bytes + empty_size; 3407 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3408 cont1_bytes = bytes; 3409 min_bytes = fs_info->sectorsize; 3410 } else { 3411 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3412 min_bytes = fs_info->sectorsize; 3413 } 3414 3415 spin_lock(&ctl->tree_lock); 3416 3417 /* 3418 * If we know we don't have enough space to make a cluster don't even 3419 * bother doing all the work to try and find one. 3420 */ 3421 if (ctl->free_space < bytes) { 3422 spin_unlock(&ctl->tree_lock); 3423 return -ENOSPC; 3424 } 3425 3426 spin_lock(&cluster->lock); 3427 3428 /* someone already found a cluster, hooray */ 3429 if (cluster->block_group) { 3430 ret = 0; 3431 goto out; 3432 } 3433 3434 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3435 min_bytes); 3436 3437 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3438 bytes + empty_size, 3439 cont1_bytes, min_bytes); 3440 if (ret) 3441 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3442 offset, bytes + empty_size, 3443 cont1_bytes, min_bytes); 3444 3445 /* Clear our temporary list */ 3446 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3447 list_del_init(&entry->list); 3448 3449 if (!ret) { 3450 btrfs_get_block_group(block_group); 3451 list_add_tail(&cluster->block_group_list, 3452 &block_group->cluster_list); 3453 cluster->block_group = block_group; 3454 } else { 3455 trace_btrfs_failed_cluster_setup(block_group); 3456 } 3457 out: 3458 spin_unlock(&cluster->lock); 3459 spin_unlock(&ctl->tree_lock); 3460 3461 return ret; 3462 } 3463 3464 /* 3465 * simple code to zero out a cluster 3466 */ 3467 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3468 { 3469 spin_lock_init(&cluster->lock); 3470 spin_lock_init(&cluster->refill_lock); 3471 cluster->root = RB_ROOT; 3472 cluster->max_size = 0; 3473 cluster->fragmented = false; 3474 INIT_LIST_HEAD(&cluster->block_group_list); 3475 cluster->block_group = NULL; 3476 } 3477 3478 static int do_trimming(struct btrfs_block_group *block_group, 3479 u64 *total_trimmed, u64 start, u64 bytes, 3480 u64 reserved_start, u64 reserved_bytes, 3481 enum btrfs_trim_state reserved_trim_state, 3482 struct btrfs_trim_range *trim_entry) 3483 { 3484 struct btrfs_space_info *space_info = block_group->space_info; 3485 struct btrfs_fs_info *fs_info = block_group->fs_info; 3486 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3487 int ret; 3488 int update = 0; 3489 const u64 end = start + bytes; 3490 const u64 reserved_end = reserved_start + reserved_bytes; 3491 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3492 u64 trimmed = 0; 3493 3494 spin_lock(&space_info->lock); 3495 spin_lock(&block_group->lock); 3496 if (!block_group->ro) { 3497 block_group->reserved += reserved_bytes; 3498 space_info->bytes_reserved += reserved_bytes; 3499 update = 1; 3500 } 3501 spin_unlock(&block_group->lock); 3502 spin_unlock(&space_info->lock); 3503 3504 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3505 if (!ret) { 3506 *total_trimmed += trimmed; 3507 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3508 } 3509 3510 mutex_lock(&ctl->cache_writeout_mutex); 3511 if (reserved_start < start) 3512 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3513 start - reserved_start, 3514 reserved_trim_state); 3515 if (start + bytes < reserved_start + reserved_bytes) 3516 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3517 reserved_trim_state); 3518 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3519 list_del(&trim_entry->list); 3520 mutex_unlock(&ctl->cache_writeout_mutex); 3521 3522 if (update) { 3523 spin_lock(&space_info->lock); 3524 spin_lock(&block_group->lock); 3525 if (block_group->ro) 3526 space_info->bytes_readonly += reserved_bytes; 3527 block_group->reserved -= reserved_bytes; 3528 space_info->bytes_reserved -= reserved_bytes; 3529 spin_unlock(&block_group->lock); 3530 spin_unlock(&space_info->lock); 3531 } 3532 3533 return ret; 3534 } 3535 3536 /* 3537 * If @async is set, then we will trim 1 region and return. 3538 */ 3539 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3540 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3541 bool async) 3542 { 3543 struct btrfs_discard_ctl *discard_ctl = 3544 &block_group->fs_info->discard_ctl; 3545 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3546 struct btrfs_free_space *entry; 3547 struct rb_node *node; 3548 int ret = 0; 3549 u64 extent_start; 3550 u64 extent_bytes; 3551 enum btrfs_trim_state extent_trim_state; 3552 u64 bytes; 3553 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3554 3555 while (start < end) { 3556 struct btrfs_trim_range trim_entry; 3557 3558 mutex_lock(&ctl->cache_writeout_mutex); 3559 spin_lock(&ctl->tree_lock); 3560 3561 if (ctl->free_space < minlen) 3562 goto out_unlock; 3563 3564 entry = tree_search_offset(ctl, start, 0, 1); 3565 if (!entry) 3566 goto out_unlock; 3567 3568 /* Skip bitmaps and if async, already trimmed entries */ 3569 while (entry->bitmap || 3570 (async && btrfs_free_space_trimmed(entry))) { 3571 node = rb_next(&entry->offset_index); 3572 if (!node) 3573 goto out_unlock; 3574 entry = rb_entry(node, struct btrfs_free_space, 3575 offset_index); 3576 } 3577 3578 if (entry->offset >= end) 3579 goto out_unlock; 3580 3581 extent_start = entry->offset; 3582 extent_bytes = entry->bytes; 3583 extent_trim_state = entry->trim_state; 3584 if (async) { 3585 start = entry->offset; 3586 bytes = entry->bytes; 3587 if (bytes < minlen) { 3588 spin_unlock(&ctl->tree_lock); 3589 mutex_unlock(&ctl->cache_writeout_mutex); 3590 goto next; 3591 } 3592 unlink_free_space(ctl, entry); 3593 /* 3594 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3595 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3596 * X when we come back around. So trim it now. 3597 */ 3598 if (max_discard_size && 3599 bytes >= (max_discard_size + 3600 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3601 bytes = max_discard_size; 3602 extent_bytes = max_discard_size; 3603 entry->offset += max_discard_size; 3604 entry->bytes -= max_discard_size; 3605 link_free_space(ctl, entry); 3606 } else { 3607 kmem_cache_free(btrfs_free_space_cachep, entry); 3608 } 3609 } else { 3610 start = max(start, extent_start); 3611 bytes = min(extent_start + extent_bytes, end) - start; 3612 if (bytes < minlen) { 3613 spin_unlock(&ctl->tree_lock); 3614 mutex_unlock(&ctl->cache_writeout_mutex); 3615 goto next; 3616 } 3617 3618 unlink_free_space(ctl, entry); 3619 kmem_cache_free(btrfs_free_space_cachep, entry); 3620 } 3621 3622 spin_unlock(&ctl->tree_lock); 3623 trim_entry.start = extent_start; 3624 trim_entry.bytes = extent_bytes; 3625 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3626 mutex_unlock(&ctl->cache_writeout_mutex); 3627 3628 ret = do_trimming(block_group, total_trimmed, start, bytes, 3629 extent_start, extent_bytes, extent_trim_state, 3630 &trim_entry); 3631 if (ret) { 3632 block_group->discard_cursor = start + bytes; 3633 break; 3634 } 3635 next: 3636 start += bytes; 3637 block_group->discard_cursor = start; 3638 if (async && *total_trimmed) 3639 break; 3640 3641 if (fatal_signal_pending(current)) { 3642 ret = -ERESTARTSYS; 3643 break; 3644 } 3645 3646 cond_resched(); 3647 } 3648 3649 return ret; 3650 3651 out_unlock: 3652 block_group->discard_cursor = btrfs_block_group_end(block_group); 3653 spin_unlock(&ctl->tree_lock); 3654 mutex_unlock(&ctl->cache_writeout_mutex); 3655 3656 return ret; 3657 } 3658 3659 /* 3660 * If we break out of trimming a bitmap prematurely, we should reset the 3661 * trimming bit. In a rather contrieved case, it's possible to race here so 3662 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3663 * 3664 * start = start of bitmap 3665 * end = near end of bitmap 3666 * 3667 * Thread 1: Thread 2: 3668 * trim_bitmaps(start) 3669 * trim_bitmaps(end) 3670 * end_trimming_bitmap() 3671 * reset_trimming_bitmap() 3672 */ 3673 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3674 { 3675 struct btrfs_free_space *entry; 3676 3677 spin_lock(&ctl->tree_lock); 3678 entry = tree_search_offset(ctl, offset, 1, 0); 3679 if (entry) { 3680 if (btrfs_free_space_trimmed(entry)) { 3681 ctl->discardable_extents[BTRFS_STAT_CURR] += 3682 entry->bitmap_extents; 3683 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3684 } 3685 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3686 } 3687 3688 spin_unlock(&ctl->tree_lock); 3689 } 3690 3691 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3692 struct btrfs_free_space *entry) 3693 { 3694 if (btrfs_free_space_trimming_bitmap(entry)) { 3695 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3696 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3697 entry->bitmap_extents; 3698 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3699 } 3700 } 3701 3702 /* 3703 * If @async is set, then we will trim 1 region and return. 3704 */ 3705 static int trim_bitmaps(struct btrfs_block_group *block_group, 3706 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3707 u64 maxlen, bool async) 3708 { 3709 struct btrfs_discard_ctl *discard_ctl = 3710 &block_group->fs_info->discard_ctl; 3711 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3712 struct btrfs_free_space *entry; 3713 int ret = 0; 3714 int ret2; 3715 u64 bytes; 3716 u64 offset = offset_to_bitmap(ctl, start); 3717 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3718 3719 while (offset < end) { 3720 bool next_bitmap = false; 3721 struct btrfs_trim_range trim_entry; 3722 3723 mutex_lock(&ctl->cache_writeout_mutex); 3724 spin_lock(&ctl->tree_lock); 3725 3726 if (ctl->free_space < minlen) { 3727 block_group->discard_cursor = 3728 btrfs_block_group_end(block_group); 3729 spin_unlock(&ctl->tree_lock); 3730 mutex_unlock(&ctl->cache_writeout_mutex); 3731 break; 3732 } 3733 3734 entry = tree_search_offset(ctl, offset, 1, 0); 3735 /* 3736 * Bitmaps are marked trimmed lossily now to prevent constant 3737 * discarding of the same bitmap (the reason why we are bound 3738 * by the filters). So, retrim the block group bitmaps when we 3739 * are preparing to punt to the unused_bgs list. This uses 3740 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3741 * which is the only discard index which sets minlen to 0. 3742 */ 3743 if (!entry || (async && minlen && start == offset && 3744 btrfs_free_space_trimmed(entry))) { 3745 spin_unlock(&ctl->tree_lock); 3746 mutex_unlock(&ctl->cache_writeout_mutex); 3747 next_bitmap = true; 3748 goto next; 3749 } 3750 3751 /* 3752 * Async discard bitmap trimming begins at by setting the start 3753 * to be key.objectid and the offset_to_bitmap() aligns to the 3754 * start of the bitmap. This lets us know we are fully 3755 * scanning the bitmap rather than only some portion of it. 3756 */ 3757 if (start == offset) 3758 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3759 3760 bytes = minlen; 3761 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3762 if (ret2 || start >= end) { 3763 /* 3764 * We lossily consider a bitmap trimmed if we only skip 3765 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3766 */ 3767 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3768 end_trimming_bitmap(ctl, entry); 3769 else 3770 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3771 spin_unlock(&ctl->tree_lock); 3772 mutex_unlock(&ctl->cache_writeout_mutex); 3773 next_bitmap = true; 3774 goto next; 3775 } 3776 3777 /* 3778 * We already trimmed a region, but are using the locking above 3779 * to reset the trim_state. 3780 */ 3781 if (async && *total_trimmed) { 3782 spin_unlock(&ctl->tree_lock); 3783 mutex_unlock(&ctl->cache_writeout_mutex); 3784 goto out; 3785 } 3786 3787 bytes = min(bytes, end - start); 3788 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3789 spin_unlock(&ctl->tree_lock); 3790 mutex_unlock(&ctl->cache_writeout_mutex); 3791 goto next; 3792 } 3793 3794 /* 3795 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3796 * If X < @minlen, we won't trim X when we come back around. 3797 * So trim it now. We differ here from trimming extents as we 3798 * don't keep individual state per bit. 3799 */ 3800 if (async && 3801 max_discard_size && 3802 bytes > (max_discard_size + minlen)) 3803 bytes = max_discard_size; 3804 3805 bitmap_clear_bits(ctl, entry, start, bytes); 3806 if (entry->bytes == 0) 3807 free_bitmap(ctl, entry); 3808 3809 spin_unlock(&ctl->tree_lock); 3810 trim_entry.start = start; 3811 trim_entry.bytes = bytes; 3812 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3813 mutex_unlock(&ctl->cache_writeout_mutex); 3814 3815 ret = do_trimming(block_group, total_trimmed, start, bytes, 3816 start, bytes, 0, &trim_entry); 3817 if (ret) { 3818 reset_trimming_bitmap(ctl, offset); 3819 block_group->discard_cursor = 3820 btrfs_block_group_end(block_group); 3821 break; 3822 } 3823 next: 3824 if (next_bitmap) { 3825 offset += BITS_PER_BITMAP * ctl->unit; 3826 start = offset; 3827 } else { 3828 start += bytes; 3829 } 3830 block_group->discard_cursor = start; 3831 3832 if (fatal_signal_pending(current)) { 3833 if (start != offset) 3834 reset_trimming_bitmap(ctl, offset); 3835 ret = -ERESTARTSYS; 3836 break; 3837 } 3838 3839 cond_resched(); 3840 } 3841 3842 if (offset >= end) 3843 block_group->discard_cursor = end; 3844 3845 out: 3846 return ret; 3847 } 3848 3849 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3850 u64 *trimmed, u64 start, u64 end, u64 minlen) 3851 { 3852 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3853 int ret; 3854 u64 rem = 0; 3855 3856 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3857 3858 *trimmed = 0; 3859 3860 spin_lock(&block_group->lock); 3861 if (block_group->removed) { 3862 spin_unlock(&block_group->lock); 3863 return 0; 3864 } 3865 btrfs_freeze_block_group(block_group); 3866 spin_unlock(&block_group->lock); 3867 3868 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3869 if (ret) 3870 goto out; 3871 3872 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3873 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3874 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3875 if (rem) 3876 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3877 out: 3878 btrfs_unfreeze_block_group(block_group); 3879 return ret; 3880 } 3881 3882 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3883 u64 *trimmed, u64 start, u64 end, u64 minlen, 3884 bool async) 3885 { 3886 int ret; 3887 3888 *trimmed = 0; 3889 3890 spin_lock(&block_group->lock); 3891 if (block_group->removed) { 3892 spin_unlock(&block_group->lock); 3893 return 0; 3894 } 3895 btrfs_freeze_block_group(block_group); 3896 spin_unlock(&block_group->lock); 3897 3898 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3899 btrfs_unfreeze_block_group(block_group); 3900 3901 return ret; 3902 } 3903 3904 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3905 u64 *trimmed, u64 start, u64 end, u64 minlen, 3906 u64 maxlen, bool async) 3907 { 3908 int ret; 3909 3910 *trimmed = 0; 3911 3912 spin_lock(&block_group->lock); 3913 if (block_group->removed) { 3914 spin_unlock(&block_group->lock); 3915 return 0; 3916 } 3917 btrfs_freeze_block_group(block_group); 3918 spin_unlock(&block_group->lock); 3919 3920 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3921 async); 3922 3923 btrfs_unfreeze_block_group(block_group); 3924 3925 return ret; 3926 } 3927 3928 bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info) 3929 { 3930 return btrfs_super_cache_generation(fs_info->super_copy); 3931 } 3932 3933 static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, 3934 struct btrfs_trans_handle *trans) 3935 { 3936 struct btrfs_block_group *block_group; 3937 struct rb_node *node; 3938 int ret; 3939 3940 btrfs_info(fs_info, "cleaning free space cache v1"); 3941 3942 node = rb_first(&fs_info->block_group_cache_tree); 3943 while (node) { 3944 block_group = rb_entry(node, struct btrfs_block_group, cache_node); 3945 ret = btrfs_remove_free_space_inode(trans, NULL, block_group); 3946 if (ret) 3947 goto out; 3948 node = rb_next(node); 3949 } 3950 out: 3951 return ret; 3952 } 3953 3954 int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active) 3955 { 3956 struct btrfs_trans_handle *trans; 3957 int ret; 3958 3959 /* 3960 * update_super_roots will appropriately set or unset 3961 * super_copy->cache_generation based on SPACE_CACHE and 3962 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a 3963 * transaction commit whether we are enabling space cache v1 and don't 3964 * have any other work to do, or are disabling it and removing free 3965 * space inodes. 3966 */ 3967 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3968 if (IS_ERR(trans)) 3969 return PTR_ERR(trans); 3970 3971 if (!active) { 3972 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3973 ret = cleanup_free_space_cache_v1(fs_info, trans); 3974 if (ret) { 3975 btrfs_abort_transaction(trans, ret); 3976 btrfs_end_transaction(trans); 3977 goto out; 3978 } 3979 } 3980 3981 ret = btrfs_commit_transaction(trans); 3982 out: 3983 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3984 3985 return ret; 3986 } 3987 3988 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3989 /* 3990 * Use this if you need to make a bitmap or extent entry specifically, it 3991 * doesn't do any of the merging that add_free_space does, this acts a lot like 3992 * how the free space cache loading stuff works, so you can get really weird 3993 * configurations. 3994 */ 3995 int test_add_free_space_entry(struct btrfs_block_group *cache, 3996 u64 offset, u64 bytes, bool bitmap) 3997 { 3998 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3999 struct btrfs_free_space *info = NULL, *bitmap_info; 4000 void *map = NULL; 4001 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4002 u64 bytes_added; 4003 int ret; 4004 4005 again: 4006 if (!info) { 4007 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4008 if (!info) 4009 return -ENOMEM; 4010 } 4011 4012 if (!bitmap) { 4013 spin_lock(&ctl->tree_lock); 4014 info->offset = offset; 4015 info->bytes = bytes; 4016 info->max_extent_size = 0; 4017 ret = link_free_space(ctl, info); 4018 spin_unlock(&ctl->tree_lock); 4019 if (ret) 4020 kmem_cache_free(btrfs_free_space_cachep, info); 4021 return ret; 4022 } 4023 4024 if (!map) { 4025 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4026 if (!map) { 4027 kmem_cache_free(btrfs_free_space_cachep, info); 4028 return -ENOMEM; 4029 } 4030 } 4031 4032 spin_lock(&ctl->tree_lock); 4033 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4034 1, 0); 4035 if (!bitmap_info) { 4036 info->bitmap = map; 4037 map = NULL; 4038 add_new_bitmap(ctl, info, offset); 4039 bitmap_info = info; 4040 info = NULL; 4041 } 4042 4043 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4044 trim_state); 4045 4046 bytes -= bytes_added; 4047 offset += bytes_added; 4048 spin_unlock(&ctl->tree_lock); 4049 4050 if (bytes) 4051 goto again; 4052 4053 if (info) 4054 kmem_cache_free(btrfs_free_space_cachep, info); 4055 if (map) 4056 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4057 return 0; 4058 } 4059 4060 /* 4061 * Checks to see if the given range is in the free space cache. This is really 4062 * just used to check the absence of space, so if there is free space in the 4063 * range at all we will return 1. 4064 */ 4065 int test_check_exists(struct btrfs_block_group *cache, 4066 u64 offset, u64 bytes) 4067 { 4068 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4069 struct btrfs_free_space *info; 4070 int ret = 0; 4071 4072 spin_lock(&ctl->tree_lock); 4073 info = tree_search_offset(ctl, offset, 0, 0); 4074 if (!info) { 4075 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4076 1, 0); 4077 if (!info) 4078 goto out; 4079 } 4080 4081 have_info: 4082 if (info->bitmap) { 4083 u64 bit_off, bit_bytes; 4084 struct rb_node *n; 4085 struct btrfs_free_space *tmp; 4086 4087 bit_off = offset; 4088 bit_bytes = ctl->unit; 4089 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4090 if (!ret) { 4091 if (bit_off == offset) { 4092 ret = 1; 4093 goto out; 4094 } else if (bit_off > offset && 4095 offset + bytes > bit_off) { 4096 ret = 1; 4097 goto out; 4098 } 4099 } 4100 4101 n = rb_prev(&info->offset_index); 4102 while (n) { 4103 tmp = rb_entry(n, struct btrfs_free_space, 4104 offset_index); 4105 if (tmp->offset + tmp->bytes < offset) 4106 break; 4107 if (offset + bytes < tmp->offset) { 4108 n = rb_prev(&tmp->offset_index); 4109 continue; 4110 } 4111 info = tmp; 4112 goto have_info; 4113 } 4114 4115 n = rb_next(&info->offset_index); 4116 while (n) { 4117 tmp = rb_entry(n, struct btrfs_free_space, 4118 offset_index); 4119 if (offset + bytes < tmp->offset) 4120 break; 4121 if (tmp->offset + tmp->bytes < offset) { 4122 n = rb_next(&tmp->offset_index); 4123 continue; 4124 } 4125 info = tmp; 4126 goto have_info; 4127 } 4128 4129 ret = 0; 4130 goto out; 4131 } 4132 4133 if (info->offset == offset) { 4134 ret = 1; 4135 goto out; 4136 } 4137 4138 if (offset > info->offset && offset < info->offset + info->bytes) 4139 ret = 1; 4140 out: 4141 spin_unlock(&ctl->tree_lock); 4142 return ret; 4143 } 4144 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4145