1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "volumes.h" 20 #include "space-info.h" 21 #include "delalloc-space.h" 22 #include "block-group.h" 23 #include "discard.h" 24 25 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 26 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 27 #define FORCE_EXTENT_THRESHOLD SZ_1M 28 29 struct btrfs_trim_range { 30 u64 start; 31 u64 bytes; 32 struct list_head list; 33 }; 34 35 static int link_free_space(struct btrfs_free_space_ctl *ctl, 36 struct btrfs_free_space *info); 37 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 38 struct btrfs_free_space *info); 39 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 40 struct btrfs_free_space *bitmap_info, u64 *offset, 41 u64 *bytes, bool for_alloc); 42 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 43 struct btrfs_free_space *bitmap_info); 44 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 45 struct btrfs_free_space *info, u64 offset, 46 u64 bytes); 47 48 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 49 struct btrfs_path *path, 50 u64 offset) 51 { 52 struct btrfs_fs_info *fs_info = root->fs_info; 53 struct btrfs_key key; 54 struct btrfs_key location; 55 struct btrfs_disk_key disk_key; 56 struct btrfs_free_space_header *header; 57 struct extent_buffer *leaf; 58 struct inode *inode = NULL; 59 unsigned nofs_flag; 60 int ret; 61 62 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 63 key.offset = offset; 64 key.type = 0; 65 66 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 67 if (ret < 0) 68 return ERR_PTR(ret); 69 if (ret > 0) { 70 btrfs_release_path(path); 71 return ERR_PTR(-ENOENT); 72 } 73 74 leaf = path->nodes[0]; 75 header = btrfs_item_ptr(leaf, path->slots[0], 76 struct btrfs_free_space_header); 77 btrfs_free_space_key(leaf, header, &disk_key); 78 btrfs_disk_key_to_cpu(&location, &disk_key); 79 btrfs_release_path(path); 80 81 /* 82 * We are often under a trans handle at this point, so we need to make 83 * sure NOFS is set to keep us from deadlocking. 84 */ 85 nofs_flag = memalloc_nofs_save(); 86 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 87 btrfs_release_path(path); 88 memalloc_nofs_restore(nofs_flag); 89 if (IS_ERR(inode)) 90 return inode; 91 92 mapping_set_gfp_mask(inode->i_mapping, 93 mapping_gfp_constraint(inode->i_mapping, 94 ~(__GFP_FS | __GFP_HIGHMEM))); 95 96 return inode; 97 } 98 99 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 100 struct btrfs_path *path) 101 { 102 struct btrfs_fs_info *fs_info = block_group->fs_info; 103 struct inode *inode = NULL; 104 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 105 106 spin_lock(&block_group->lock); 107 if (block_group->inode) 108 inode = igrab(block_group->inode); 109 spin_unlock(&block_group->lock); 110 if (inode) 111 return inode; 112 113 inode = __lookup_free_space_inode(fs_info->tree_root, path, 114 block_group->start); 115 if (IS_ERR(inode)) 116 return inode; 117 118 spin_lock(&block_group->lock); 119 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 120 btrfs_info(fs_info, "Old style space inode found, converting."); 121 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 122 BTRFS_INODE_NODATACOW; 123 block_group->disk_cache_state = BTRFS_DC_CLEAR; 124 } 125 126 if (!block_group->iref) { 127 block_group->inode = igrab(inode); 128 block_group->iref = 1; 129 } 130 spin_unlock(&block_group->lock); 131 132 return inode; 133 } 134 135 static int __create_free_space_inode(struct btrfs_root *root, 136 struct btrfs_trans_handle *trans, 137 struct btrfs_path *path, 138 u64 ino, u64 offset) 139 { 140 struct btrfs_key key; 141 struct btrfs_disk_key disk_key; 142 struct btrfs_free_space_header *header; 143 struct btrfs_inode_item *inode_item; 144 struct extent_buffer *leaf; 145 /* We inline CRCs for the free disk space cache */ 146 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | 147 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 148 int ret; 149 150 ret = btrfs_insert_empty_inode(trans, root, path, ino); 151 if (ret) 152 return ret; 153 154 leaf = path->nodes[0]; 155 inode_item = btrfs_item_ptr(leaf, path->slots[0], 156 struct btrfs_inode_item); 157 btrfs_item_key(leaf, &disk_key, path->slots[0]); 158 memzero_extent_buffer(leaf, (unsigned long)inode_item, 159 sizeof(*inode_item)); 160 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 161 btrfs_set_inode_size(leaf, inode_item, 0); 162 btrfs_set_inode_nbytes(leaf, inode_item, 0); 163 btrfs_set_inode_uid(leaf, inode_item, 0); 164 btrfs_set_inode_gid(leaf, inode_item, 0); 165 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 166 btrfs_set_inode_flags(leaf, inode_item, flags); 167 btrfs_set_inode_nlink(leaf, inode_item, 1); 168 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 169 btrfs_set_inode_block_group(leaf, inode_item, offset); 170 btrfs_mark_buffer_dirty(leaf); 171 btrfs_release_path(path); 172 173 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 174 key.offset = offset; 175 key.type = 0; 176 ret = btrfs_insert_empty_item(trans, root, path, &key, 177 sizeof(struct btrfs_free_space_header)); 178 if (ret < 0) { 179 btrfs_release_path(path); 180 return ret; 181 } 182 183 leaf = path->nodes[0]; 184 header = btrfs_item_ptr(leaf, path->slots[0], 185 struct btrfs_free_space_header); 186 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 187 btrfs_set_free_space_key(leaf, header, &disk_key); 188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_release_path(path); 190 191 return 0; 192 } 193 194 int create_free_space_inode(struct btrfs_trans_handle *trans, 195 struct btrfs_block_group *block_group, 196 struct btrfs_path *path) 197 { 198 int ret; 199 u64 ino; 200 201 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); 202 if (ret < 0) 203 return ret; 204 205 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 206 ino, block_group->start); 207 } 208 209 /* 210 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode 211 * handles lookup, otherwise it takes ownership and iputs the inode. 212 * Don't reuse an inode pointer after passing it into this function. 213 */ 214 int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, 215 struct inode *inode, 216 struct btrfs_block_group *block_group) 217 { 218 struct btrfs_path *path; 219 struct btrfs_key key; 220 int ret = 0; 221 222 path = btrfs_alloc_path(); 223 if (!path) 224 return -ENOMEM; 225 226 if (!inode) 227 inode = lookup_free_space_inode(block_group, path); 228 if (IS_ERR(inode)) { 229 if (PTR_ERR(inode) != -ENOENT) 230 ret = PTR_ERR(inode); 231 goto out; 232 } 233 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 234 if (ret) { 235 btrfs_add_delayed_iput(inode); 236 goto out; 237 } 238 clear_nlink(inode); 239 /* One for the block groups ref */ 240 spin_lock(&block_group->lock); 241 if (block_group->iref) { 242 block_group->iref = 0; 243 block_group->inode = NULL; 244 spin_unlock(&block_group->lock); 245 iput(inode); 246 } else { 247 spin_unlock(&block_group->lock); 248 } 249 /* One for the lookup ref */ 250 btrfs_add_delayed_iput(inode); 251 252 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 253 key.type = 0; 254 key.offset = block_group->start; 255 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, 256 -1, 1); 257 if (ret) { 258 if (ret > 0) 259 ret = 0; 260 goto out; 261 } 262 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 263 out: 264 btrfs_free_path(path); 265 return ret; 266 } 267 268 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 269 struct btrfs_block_rsv *rsv) 270 { 271 u64 needed_bytes; 272 int ret; 273 274 /* 1 for slack space, 1 for updating the inode */ 275 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 276 btrfs_calc_metadata_size(fs_info, 1); 277 278 spin_lock(&rsv->lock); 279 if (rsv->reserved < needed_bytes) 280 ret = -ENOSPC; 281 else 282 ret = 0; 283 spin_unlock(&rsv->lock); 284 return ret; 285 } 286 287 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 288 struct btrfs_block_group *block_group, 289 struct inode *inode) 290 { 291 struct btrfs_root *root = BTRFS_I(inode)->root; 292 int ret = 0; 293 bool locked = false; 294 295 if (block_group) { 296 struct btrfs_path *path = btrfs_alloc_path(); 297 298 if (!path) { 299 ret = -ENOMEM; 300 goto fail; 301 } 302 locked = true; 303 mutex_lock(&trans->transaction->cache_write_mutex); 304 if (!list_empty(&block_group->io_list)) { 305 list_del_init(&block_group->io_list); 306 307 btrfs_wait_cache_io(trans, block_group, path); 308 btrfs_put_block_group(block_group); 309 } 310 311 /* 312 * now that we've truncated the cache away, its no longer 313 * setup or written 314 */ 315 spin_lock(&block_group->lock); 316 block_group->disk_cache_state = BTRFS_DC_CLEAR; 317 spin_unlock(&block_group->lock); 318 btrfs_free_path(path); 319 } 320 321 btrfs_i_size_write(BTRFS_I(inode), 0); 322 truncate_pagecache(inode, 0); 323 324 /* 325 * We skip the throttling logic for free space cache inodes, so we don't 326 * need to check for -EAGAIN. 327 */ 328 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode), 329 0, BTRFS_EXTENT_DATA_KEY); 330 if (ret) 331 goto fail; 332 333 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 334 335 fail: 336 if (locked) 337 mutex_unlock(&trans->transaction->cache_write_mutex); 338 if (ret) 339 btrfs_abort_transaction(trans, ret); 340 341 return ret; 342 } 343 344 static void readahead_cache(struct inode *inode) 345 { 346 struct file_ra_state *ra; 347 unsigned long last_index; 348 349 ra = kzalloc(sizeof(*ra), GFP_NOFS); 350 if (!ra) 351 return; 352 353 file_ra_state_init(ra, inode->i_mapping); 354 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 355 356 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 357 358 kfree(ra); 359 } 360 361 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 362 int write) 363 { 364 int num_pages; 365 366 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 367 368 /* Make sure we can fit our crcs and generation into the first page */ 369 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 370 return -ENOSPC; 371 372 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 373 374 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 375 if (!io_ctl->pages) 376 return -ENOMEM; 377 378 io_ctl->num_pages = num_pages; 379 io_ctl->fs_info = btrfs_sb(inode->i_sb); 380 io_ctl->inode = inode; 381 382 return 0; 383 } 384 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 385 386 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 387 { 388 kfree(io_ctl->pages); 389 io_ctl->pages = NULL; 390 } 391 392 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 393 { 394 if (io_ctl->cur) { 395 io_ctl->cur = NULL; 396 io_ctl->orig = NULL; 397 } 398 } 399 400 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 401 { 402 ASSERT(io_ctl->index < io_ctl->num_pages); 403 io_ctl->page = io_ctl->pages[io_ctl->index++]; 404 io_ctl->cur = page_address(io_ctl->page); 405 io_ctl->orig = io_ctl->cur; 406 io_ctl->size = PAGE_SIZE; 407 if (clear) 408 clear_page(io_ctl->cur); 409 } 410 411 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 412 { 413 int i; 414 415 io_ctl_unmap_page(io_ctl); 416 417 for (i = 0; i < io_ctl->num_pages; i++) { 418 if (io_ctl->pages[i]) { 419 ClearPageChecked(io_ctl->pages[i]); 420 unlock_page(io_ctl->pages[i]); 421 put_page(io_ctl->pages[i]); 422 } 423 } 424 } 425 426 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 427 { 428 struct page *page; 429 struct inode *inode = io_ctl->inode; 430 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 431 int i; 432 433 for (i = 0; i < io_ctl->num_pages; i++) { 434 int ret; 435 436 page = find_or_create_page(inode->i_mapping, i, mask); 437 if (!page) { 438 io_ctl_drop_pages(io_ctl); 439 return -ENOMEM; 440 } 441 442 ret = set_page_extent_mapped(page); 443 if (ret < 0) { 444 unlock_page(page); 445 put_page(page); 446 io_ctl_drop_pages(io_ctl); 447 return ret; 448 } 449 450 io_ctl->pages[i] = page; 451 if (uptodate && !PageUptodate(page)) { 452 btrfs_readpage(NULL, page); 453 lock_page(page); 454 if (page->mapping != inode->i_mapping) { 455 btrfs_err(BTRFS_I(inode)->root->fs_info, 456 "free space cache page truncated"); 457 io_ctl_drop_pages(io_ctl); 458 return -EIO; 459 } 460 if (!PageUptodate(page)) { 461 btrfs_err(BTRFS_I(inode)->root->fs_info, 462 "error reading free space cache"); 463 io_ctl_drop_pages(io_ctl); 464 return -EIO; 465 } 466 } 467 } 468 469 for (i = 0; i < io_ctl->num_pages; i++) 470 clear_page_dirty_for_io(io_ctl->pages[i]); 471 472 return 0; 473 } 474 475 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 476 { 477 io_ctl_map_page(io_ctl, 1); 478 479 /* 480 * Skip the csum areas. If we don't check crcs then we just have a 481 * 64bit chunk at the front of the first page. 482 */ 483 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 484 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 485 486 put_unaligned_le64(generation, io_ctl->cur); 487 io_ctl->cur += sizeof(u64); 488 } 489 490 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 491 { 492 u64 cache_gen; 493 494 /* 495 * Skip the crc area. If we don't check crcs then we just have a 64bit 496 * chunk at the front of the first page. 497 */ 498 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 499 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 500 501 cache_gen = get_unaligned_le64(io_ctl->cur); 502 if (cache_gen != generation) { 503 btrfs_err_rl(io_ctl->fs_info, 504 "space cache generation (%llu) does not match inode (%llu)", 505 cache_gen, generation); 506 io_ctl_unmap_page(io_ctl); 507 return -EIO; 508 } 509 io_ctl->cur += sizeof(u64); 510 return 0; 511 } 512 513 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 514 { 515 u32 *tmp; 516 u32 crc = ~(u32)0; 517 unsigned offset = 0; 518 519 if (index == 0) 520 offset = sizeof(u32) * io_ctl->num_pages; 521 522 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 523 btrfs_crc32c_final(crc, (u8 *)&crc); 524 io_ctl_unmap_page(io_ctl); 525 tmp = page_address(io_ctl->pages[0]); 526 tmp += index; 527 *tmp = crc; 528 } 529 530 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 531 { 532 u32 *tmp, val; 533 u32 crc = ~(u32)0; 534 unsigned offset = 0; 535 536 if (index == 0) 537 offset = sizeof(u32) * io_ctl->num_pages; 538 539 tmp = page_address(io_ctl->pages[0]); 540 tmp += index; 541 val = *tmp; 542 543 io_ctl_map_page(io_ctl, 0); 544 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 545 btrfs_crc32c_final(crc, (u8 *)&crc); 546 if (val != crc) { 547 btrfs_err_rl(io_ctl->fs_info, 548 "csum mismatch on free space cache"); 549 io_ctl_unmap_page(io_ctl); 550 return -EIO; 551 } 552 553 return 0; 554 } 555 556 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 557 void *bitmap) 558 { 559 struct btrfs_free_space_entry *entry; 560 561 if (!io_ctl->cur) 562 return -ENOSPC; 563 564 entry = io_ctl->cur; 565 put_unaligned_le64(offset, &entry->offset); 566 put_unaligned_le64(bytes, &entry->bytes); 567 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 568 BTRFS_FREE_SPACE_EXTENT; 569 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 570 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 571 572 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 573 return 0; 574 575 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 576 577 /* No more pages to map */ 578 if (io_ctl->index >= io_ctl->num_pages) 579 return 0; 580 581 /* map the next page */ 582 io_ctl_map_page(io_ctl, 1); 583 return 0; 584 } 585 586 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 587 { 588 if (!io_ctl->cur) 589 return -ENOSPC; 590 591 /* 592 * If we aren't at the start of the current page, unmap this one and 593 * map the next one if there is any left. 594 */ 595 if (io_ctl->cur != io_ctl->orig) { 596 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 597 if (io_ctl->index >= io_ctl->num_pages) 598 return -ENOSPC; 599 io_ctl_map_page(io_ctl, 0); 600 } 601 602 copy_page(io_ctl->cur, bitmap); 603 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 604 if (io_ctl->index < io_ctl->num_pages) 605 io_ctl_map_page(io_ctl, 0); 606 return 0; 607 } 608 609 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 610 { 611 /* 612 * If we're not on the boundary we know we've modified the page and we 613 * need to crc the page. 614 */ 615 if (io_ctl->cur != io_ctl->orig) 616 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 617 else 618 io_ctl_unmap_page(io_ctl); 619 620 while (io_ctl->index < io_ctl->num_pages) { 621 io_ctl_map_page(io_ctl, 1); 622 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 623 } 624 } 625 626 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 627 struct btrfs_free_space *entry, u8 *type) 628 { 629 struct btrfs_free_space_entry *e; 630 int ret; 631 632 if (!io_ctl->cur) { 633 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 634 if (ret) 635 return ret; 636 } 637 638 e = io_ctl->cur; 639 entry->offset = get_unaligned_le64(&e->offset); 640 entry->bytes = get_unaligned_le64(&e->bytes); 641 *type = e->type; 642 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 643 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 644 645 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 646 return 0; 647 648 io_ctl_unmap_page(io_ctl); 649 650 return 0; 651 } 652 653 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 654 struct btrfs_free_space *entry) 655 { 656 int ret; 657 658 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 659 if (ret) 660 return ret; 661 662 copy_page(entry->bitmap, io_ctl->cur); 663 io_ctl_unmap_page(io_ctl); 664 665 return 0; 666 } 667 668 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 669 { 670 struct btrfs_block_group *block_group = ctl->private; 671 u64 max_bytes; 672 u64 bitmap_bytes; 673 u64 extent_bytes; 674 u64 size = block_group->length; 675 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 676 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 677 678 max_bitmaps = max_t(u64, max_bitmaps, 1); 679 680 ASSERT(ctl->total_bitmaps <= max_bitmaps); 681 682 /* 683 * We are trying to keep the total amount of memory used per 1GiB of 684 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 685 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 686 * bitmaps, we may end up using more memory than this. 687 */ 688 if (size < SZ_1G) 689 max_bytes = MAX_CACHE_BYTES_PER_GIG; 690 else 691 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 692 693 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 694 695 /* 696 * we want the extent entry threshold to always be at most 1/2 the max 697 * bytes we can have, or whatever is less than that. 698 */ 699 extent_bytes = max_bytes - bitmap_bytes; 700 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 701 702 ctl->extents_thresh = 703 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 704 } 705 706 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 707 struct btrfs_free_space_ctl *ctl, 708 struct btrfs_path *path, u64 offset) 709 { 710 struct btrfs_fs_info *fs_info = root->fs_info; 711 struct btrfs_free_space_header *header; 712 struct extent_buffer *leaf; 713 struct btrfs_io_ctl io_ctl; 714 struct btrfs_key key; 715 struct btrfs_free_space *e, *n; 716 LIST_HEAD(bitmaps); 717 u64 num_entries; 718 u64 num_bitmaps; 719 u64 generation; 720 u8 type; 721 int ret = 0; 722 723 /* Nothing in the space cache, goodbye */ 724 if (!i_size_read(inode)) 725 return 0; 726 727 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 728 key.offset = offset; 729 key.type = 0; 730 731 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 732 if (ret < 0) 733 return 0; 734 else if (ret > 0) { 735 btrfs_release_path(path); 736 return 0; 737 } 738 739 ret = -1; 740 741 leaf = path->nodes[0]; 742 header = btrfs_item_ptr(leaf, path->slots[0], 743 struct btrfs_free_space_header); 744 num_entries = btrfs_free_space_entries(leaf, header); 745 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 746 generation = btrfs_free_space_generation(leaf, header); 747 btrfs_release_path(path); 748 749 if (!BTRFS_I(inode)->generation) { 750 btrfs_info(fs_info, 751 "the free space cache file (%llu) is invalid, skip it", 752 offset); 753 return 0; 754 } 755 756 if (BTRFS_I(inode)->generation != generation) { 757 btrfs_err(fs_info, 758 "free space inode generation (%llu) did not match free space cache generation (%llu)", 759 BTRFS_I(inode)->generation, generation); 760 return 0; 761 } 762 763 if (!num_entries) 764 return 0; 765 766 ret = io_ctl_init(&io_ctl, inode, 0); 767 if (ret) 768 return ret; 769 770 readahead_cache(inode); 771 772 ret = io_ctl_prepare_pages(&io_ctl, true); 773 if (ret) 774 goto out; 775 776 ret = io_ctl_check_crc(&io_ctl, 0); 777 if (ret) 778 goto free_cache; 779 780 ret = io_ctl_check_generation(&io_ctl, generation); 781 if (ret) 782 goto free_cache; 783 784 while (num_entries) { 785 e = kmem_cache_zalloc(btrfs_free_space_cachep, 786 GFP_NOFS); 787 if (!e) { 788 ret = -ENOMEM; 789 goto free_cache; 790 } 791 792 ret = io_ctl_read_entry(&io_ctl, e, &type); 793 if (ret) { 794 kmem_cache_free(btrfs_free_space_cachep, e); 795 goto free_cache; 796 } 797 798 if (!e->bytes) { 799 ret = -1; 800 kmem_cache_free(btrfs_free_space_cachep, e); 801 goto free_cache; 802 } 803 804 if (type == BTRFS_FREE_SPACE_EXTENT) { 805 spin_lock(&ctl->tree_lock); 806 ret = link_free_space(ctl, e); 807 spin_unlock(&ctl->tree_lock); 808 if (ret) { 809 btrfs_err(fs_info, 810 "Duplicate entries in free space cache, dumping"); 811 kmem_cache_free(btrfs_free_space_cachep, e); 812 goto free_cache; 813 } 814 } else { 815 ASSERT(num_bitmaps); 816 num_bitmaps--; 817 e->bitmap = kmem_cache_zalloc( 818 btrfs_free_space_bitmap_cachep, GFP_NOFS); 819 if (!e->bitmap) { 820 ret = -ENOMEM; 821 kmem_cache_free( 822 btrfs_free_space_cachep, e); 823 goto free_cache; 824 } 825 spin_lock(&ctl->tree_lock); 826 ret = link_free_space(ctl, e); 827 ctl->total_bitmaps++; 828 recalculate_thresholds(ctl); 829 spin_unlock(&ctl->tree_lock); 830 if (ret) { 831 btrfs_err(fs_info, 832 "Duplicate entries in free space cache, dumping"); 833 kmem_cache_free(btrfs_free_space_cachep, e); 834 goto free_cache; 835 } 836 list_add_tail(&e->list, &bitmaps); 837 } 838 839 num_entries--; 840 } 841 842 io_ctl_unmap_page(&io_ctl); 843 844 /* 845 * We add the bitmaps at the end of the entries in order that 846 * the bitmap entries are added to the cache. 847 */ 848 list_for_each_entry_safe(e, n, &bitmaps, list) { 849 list_del_init(&e->list); 850 ret = io_ctl_read_bitmap(&io_ctl, e); 851 if (ret) 852 goto free_cache; 853 } 854 855 io_ctl_drop_pages(&io_ctl); 856 ret = 1; 857 out: 858 io_ctl_free(&io_ctl); 859 return ret; 860 free_cache: 861 io_ctl_drop_pages(&io_ctl); 862 __btrfs_remove_free_space_cache(ctl); 863 goto out; 864 } 865 866 static int copy_free_space_cache(struct btrfs_block_group *block_group, 867 struct btrfs_free_space_ctl *ctl) 868 { 869 struct btrfs_free_space *info; 870 struct rb_node *n; 871 int ret = 0; 872 873 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) { 874 info = rb_entry(n, struct btrfs_free_space, offset_index); 875 if (!info->bitmap) { 876 unlink_free_space(ctl, info); 877 ret = btrfs_add_free_space(block_group, info->offset, 878 info->bytes); 879 kmem_cache_free(btrfs_free_space_cachep, info); 880 } else { 881 u64 offset = info->offset; 882 u64 bytes = ctl->unit; 883 884 while (search_bitmap(ctl, info, &offset, &bytes, 885 false) == 0) { 886 ret = btrfs_add_free_space(block_group, offset, 887 bytes); 888 if (ret) 889 break; 890 bitmap_clear_bits(ctl, info, offset, bytes); 891 offset = info->offset; 892 bytes = ctl->unit; 893 } 894 free_bitmap(ctl, info); 895 } 896 cond_resched(); 897 } 898 return ret; 899 } 900 901 int load_free_space_cache(struct btrfs_block_group *block_group) 902 { 903 struct btrfs_fs_info *fs_info = block_group->fs_info; 904 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 905 struct btrfs_free_space_ctl tmp_ctl = {}; 906 struct inode *inode; 907 struct btrfs_path *path; 908 int ret = 0; 909 bool matched; 910 u64 used = block_group->used; 911 912 /* 913 * Because we could potentially discard our loaded free space, we want 914 * to load everything into a temporary structure first, and then if it's 915 * valid copy it all into the actual free space ctl. 916 */ 917 btrfs_init_free_space_ctl(block_group, &tmp_ctl); 918 919 /* 920 * If this block group has been marked to be cleared for one reason or 921 * another then we can't trust the on disk cache, so just return. 922 */ 923 spin_lock(&block_group->lock); 924 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 925 spin_unlock(&block_group->lock); 926 return 0; 927 } 928 spin_unlock(&block_group->lock); 929 930 path = btrfs_alloc_path(); 931 if (!path) 932 return 0; 933 path->search_commit_root = 1; 934 path->skip_locking = 1; 935 936 /* 937 * We must pass a path with search_commit_root set to btrfs_iget in 938 * order to avoid a deadlock when allocating extents for the tree root. 939 * 940 * When we are COWing an extent buffer from the tree root, when looking 941 * for a free extent, at extent-tree.c:find_free_extent(), we can find 942 * block group without its free space cache loaded. When we find one 943 * we must load its space cache which requires reading its free space 944 * cache's inode item from the root tree. If this inode item is located 945 * in the same leaf that we started COWing before, then we end up in 946 * deadlock on the extent buffer (trying to read lock it when we 947 * previously write locked it). 948 * 949 * It's safe to read the inode item using the commit root because 950 * block groups, once loaded, stay in memory forever (until they are 951 * removed) as well as their space caches once loaded. New block groups 952 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 953 * we will never try to read their inode item while the fs is mounted. 954 */ 955 inode = lookup_free_space_inode(block_group, path); 956 if (IS_ERR(inode)) { 957 btrfs_free_path(path); 958 return 0; 959 } 960 961 /* We may have converted the inode and made the cache invalid. */ 962 spin_lock(&block_group->lock); 963 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 964 spin_unlock(&block_group->lock); 965 btrfs_free_path(path); 966 goto out; 967 } 968 spin_unlock(&block_group->lock); 969 970 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, 971 path, block_group->start); 972 btrfs_free_path(path); 973 if (ret <= 0) 974 goto out; 975 976 matched = (tmp_ctl.free_space == (block_group->length - used - 977 block_group->bytes_super)); 978 979 if (matched) { 980 ret = copy_free_space_cache(block_group, &tmp_ctl); 981 /* 982 * ret == 1 means we successfully loaded the free space cache, 983 * so we need to re-set it here. 984 */ 985 if (ret == 0) 986 ret = 1; 987 } else { 988 __btrfs_remove_free_space_cache(&tmp_ctl); 989 btrfs_warn(fs_info, 990 "block group %llu has wrong amount of free space", 991 block_group->start); 992 ret = -1; 993 } 994 out: 995 if (ret < 0) { 996 /* This cache is bogus, make sure it gets cleared */ 997 spin_lock(&block_group->lock); 998 block_group->disk_cache_state = BTRFS_DC_CLEAR; 999 spin_unlock(&block_group->lock); 1000 ret = 0; 1001 1002 btrfs_warn(fs_info, 1003 "failed to load free space cache for block group %llu, rebuilding it now", 1004 block_group->start); 1005 } 1006 1007 spin_lock(&ctl->tree_lock); 1008 btrfs_discard_update_discardable(block_group); 1009 spin_unlock(&ctl->tree_lock); 1010 iput(inode); 1011 return ret; 1012 } 1013 1014 static noinline_for_stack 1015 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 1016 struct btrfs_free_space_ctl *ctl, 1017 struct btrfs_block_group *block_group, 1018 int *entries, int *bitmaps, 1019 struct list_head *bitmap_list) 1020 { 1021 int ret; 1022 struct btrfs_free_cluster *cluster = NULL; 1023 struct btrfs_free_cluster *cluster_locked = NULL; 1024 struct rb_node *node = rb_first(&ctl->free_space_offset); 1025 struct btrfs_trim_range *trim_entry; 1026 1027 /* Get the cluster for this block_group if it exists */ 1028 if (block_group && !list_empty(&block_group->cluster_list)) { 1029 cluster = list_entry(block_group->cluster_list.next, 1030 struct btrfs_free_cluster, 1031 block_group_list); 1032 } 1033 1034 if (!node && cluster) { 1035 cluster_locked = cluster; 1036 spin_lock(&cluster_locked->lock); 1037 node = rb_first(&cluster->root); 1038 cluster = NULL; 1039 } 1040 1041 /* Write out the extent entries */ 1042 while (node) { 1043 struct btrfs_free_space *e; 1044 1045 e = rb_entry(node, struct btrfs_free_space, offset_index); 1046 *entries += 1; 1047 1048 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 1049 e->bitmap); 1050 if (ret) 1051 goto fail; 1052 1053 if (e->bitmap) { 1054 list_add_tail(&e->list, bitmap_list); 1055 *bitmaps += 1; 1056 } 1057 node = rb_next(node); 1058 if (!node && cluster) { 1059 node = rb_first(&cluster->root); 1060 cluster_locked = cluster; 1061 spin_lock(&cluster_locked->lock); 1062 cluster = NULL; 1063 } 1064 } 1065 if (cluster_locked) { 1066 spin_unlock(&cluster_locked->lock); 1067 cluster_locked = NULL; 1068 } 1069 1070 /* 1071 * Make sure we don't miss any range that was removed from our rbtree 1072 * because trimming is running. Otherwise after a umount+mount (or crash 1073 * after committing the transaction) we would leak free space and get 1074 * an inconsistent free space cache report from fsck. 1075 */ 1076 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1077 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1078 trim_entry->bytes, NULL); 1079 if (ret) 1080 goto fail; 1081 *entries += 1; 1082 } 1083 1084 return 0; 1085 fail: 1086 if (cluster_locked) 1087 spin_unlock(&cluster_locked->lock); 1088 return -ENOSPC; 1089 } 1090 1091 static noinline_for_stack int 1092 update_cache_item(struct btrfs_trans_handle *trans, 1093 struct btrfs_root *root, 1094 struct inode *inode, 1095 struct btrfs_path *path, u64 offset, 1096 int entries, int bitmaps) 1097 { 1098 struct btrfs_key key; 1099 struct btrfs_free_space_header *header; 1100 struct extent_buffer *leaf; 1101 int ret; 1102 1103 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1104 key.offset = offset; 1105 key.type = 0; 1106 1107 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1108 if (ret < 0) { 1109 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1110 EXTENT_DELALLOC, 0, 0, NULL); 1111 goto fail; 1112 } 1113 leaf = path->nodes[0]; 1114 if (ret > 0) { 1115 struct btrfs_key found_key; 1116 ASSERT(path->slots[0]); 1117 path->slots[0]--; 1118 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1119 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1120 found_key.offset != offset) { 1121 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1122 inode->i_size - 1, EXTENT_DELALLOC, 0, 1123 0, NULL); 1124 btrfs_release_path(path); 1125 goto fail; 1126 } 1127 } 1128 1129 BTRFS_I(inode)->generation = trans->transid; 1130 header = btrfs_item_ptr(leaf, path->slots[0], 1131 struct btrfs_free_space_header); 1132 btrfs_set_free_space_entries(leaf, header, entries); 1133 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1134 btrfs_set_free_space_generation(leaf, header, trans->transid); 1135 btrfs_mark_buffer_dirty(leaf); 1136 btrfs_release_path(path); 1137 1138 return 0; 1139 1140 fail: 1141 return -1; 1142 } 1143 1144 static noinline_for_stack int write_pinned_extent_entries( 1145 struct btrfs_trans_handle *trans, 1146 struct btrfs_block_group *block_group, 1147 struct btrfs_io_ctl *io_ctl, 1148 int *entries) 1149 { 1150 u64 start, extent_start, extent_end, len; 1151 struct extent_io_tree *unpin = NULL; 1152 int ret; 1153 1154 if (!block_group) 1155 return 0; 1156 1157 /* 1158 * We want to add any pinned extents to our free space cache 1159 * so we don't leak the space 1160 * 1161 * We shouldn't have switched the pinned extents yet so this is the 1162 * right one 1163 */ 1164 unpin = &trans->transaction->pinned_extents; 1165 1166 start = block_group->start; 1167 1168 while (start < block_group->start + block_group->length) { 1169 ret = find_first_extent_bit(unpin, start, 1170 &extent_start, &extent_end, 1171 EXTENT_DIRTY, NULL); 1172 if (ret) 1173 return 0; 1174 1175 /* This pinned extent is out of our range */ 1176 if (extent_start >= block_group->start + block_group->length) 1177 return 0; 1178 1179 extent_start = max(extent_start, start); 1180 extent_end = min(block_group->start + block_group->length, 1181 extent_end + 1); 1182 len = extent_end - extent_start; 1183 1184 *entries += 1; 1185 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1186 if (ret) 1187 return -ENOSPC; 1188 1189 start = extent_end; 1190 } 1191 1192 return 0; 1193 } 1194 1195 static noinline_for_stack int 1196 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1197 { 1198 struct btrfs_free_space *entry, *next; 1199 int ret; 1200 1201 /* Write out the bitmaps */ 1202 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1203 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1204 if (ret) 1205 return -ENOSPC; 1206 list_del_init(&entry->list); 1207 } 1208 1209 return 0; 1210 } 1211 1212 static int flush_dirty_cache(struct inode *inode) 1213 { 1214 int ret; 1215 1216 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1217 if (ret) 1218 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1219 EXTENT_DELALLOC, 0, 0, NULL); 1220 1221 return ret; 1222 } 1223 1224 static void noinline_for_stack 1225 cleanup_bitmap_list(struct list_head *bitmap_list) 1226 { 1227 struct btrfs_free_space *entry, *next; 1228 1229 list_for_each_entry_safe(entry, next, bitmap_list, list) 1230 list_del_init(&entry->list); 1231 } 1232 1233 static void noinline_for_stack 1234 cleanup_write_cache_enospc(struct inode *inode, 1235 struct btrfs_io_ctl *io_ctl, 1236 struct extent_state **cached_state) 1237 { 1238 io_ctl_drop_pages(io_ctl); 1239 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1240 i_size_read(inode) - 1, cached_state); 1241 } 1242 1243 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1244 struct btrfs_trans_handle *trans, 1245 struct btrfs_block_group *block_group, 1246 struct btrfs_io_ctl *io_ctl, 1247 struct btrfs_path *path, u64 offset) 1248 { 1249 int ret; 1250 struct inode *inode = io_ctl->inode; 1251 1252 if (!inode) 1253 return 0; 1254 1255 /* Flush the dirty pages in the cache file. */ 1256 ret = flush_dirty_cache(inode); 1257 if (ret) 1258 goto out; 1259 1260 /* Update the cache item to tell everyone this cache file is valid. */ 1261 ret = update_cache_item(trans, root, inode, path, offset, 1262 io_ctl->entries, io_ctl->bitmaps); 1263 out: 1264 if (ret) { 1265 invalidate_inode_pages2(inode->i_mapping); 1266 BTRFS_I(inode)->generation = 0; 1267 if (block_group) 1268 btrfs_debug(root->fs_info, 1269 "failed to write free space cache for block group %llu error %d", 1270 block_group->start, ret); 1271 } 1272 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1273 1274 if (block_group) { 1275 /* the dirty list is protected by the dirty_bgs_lock */ 1276 spin_lock(&trans->transaction->dirty_bgs_lock); 1277 1278 /* the disk_cache_state is protected by the block group lock */ 1279 spin_lock(&block_group->lock); 1280 1281 /* 1282 * only mark this as written if we didn't get put back on 1283 * the dirty list while waiting for IO. Otherwise our 1284 * cache state won't be right, and we won't get written again 1285 */ 1286 if (!ret && list_empty(&block_group->dirty_list)) 1287 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1288 else if (ret) 1289 block_group->disk_cache_state = BTRFS_DC_ERROR; 1290 1291 spin_unlock(&block_group->lock); 1292 spin_unlock(&trans->transaction->dirty_bgs_lock); 1293 io_ctl->inode = NULL; 1294 iput(inode); 1295 } 1296 1297 return ret; 1298 1299 } 1300 1301 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1302 struct btrfs_block_group *block_group, 1303 struct btrfs_path *path) 1304 { 1305 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1306 block_group, &block_group->io_ctl, 1307 path, block_group->start); 1308 } 1309 1310 /** 1311 * Write out cached info to an inode 1312 * 1313 * @root: root the inode belongs to 1314 * @inode: freespace inode we are writing out 1315 * @ctl: free space cache we are going to write out 1316 * @block_group: block_group for this cache if it belongs to a block_group 1317 * @io_ctl: holds context for the io 1318 * @trans: the trans handle 1319 * 1320 * This function writes out a free space cache struct to disk for quick recovery 1321 * on mount. This will return 0 if it was successful in writing the cache out, 1322 * or an errno if it was not. 1323 */ 1324 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1325 struct btrfs_free_space_ctl *ctl, 1326 struct btrfs_block_group *block_group, 1327 struct btrfs_io_ctl *io_ctl, 1328 struct btrfs_trans_handle *trans) 1329 { 1330 struct extent_state *cached_state = NULL; 1331 LIST_HEAD(bitmap_list); 1332 int entries = 0; 1333 int bitmaps = 0; 1334 int ret; 1335 int must_iput = 0; 1336 1337 if (!i_size_read(inode)) 1338 return -EIO; 1339 1340 WARN_ON(io_ctl->pages); 1341 ret = io_ctl_init(io_ctl, inode, 1); 1342 if (ret) 1343 return ret; 1344 1345 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1346 down_write(&block_group->data_rwsem); 1347 spin_lock(&block_group->lock); 1348 if (block_group->delalloc_bytes) { 1349 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1350 spin_unlock(&block_group->lock); 1351 up_write(&block_group->data_rwsem); 1352 BTRFS_I(inode)->generation = 0; 1353 ret = 0; 1354 must_iput = 1; 1355 goto out; 1356 } 1357 spin_unlock(&block_group->lock); 1358 } 1359 1360 /* Lock all pages first so we can lock the extent safely. */ 1361 ret = io_ctl_prepare_pages(io_ctl, false); 1362 if (ret) 1363 goto out_unlock; 1364 1365 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1366 &cached_state); 1367 1368 io_ctl_set_generation(io_ctl, trans->transid); 1369 1370 mutex_lock(&ctl->cache_writeout_mutex); 1371 /* Write out the extent entries in the free space cache */ 1372 spin_lock(&ctl->tree_lock); 1373 ret = write_cache_extent_entries(io_ctl, ctl, 1374 block_group, &entries, &bitmaps, 1375 &bitmap_list); 1376 if (ret) 1377 goto out_nospc_locked; 1378 1379 /* 1380 * Some spaces that are freed in the current transaction are pinned, 1381 * they will be added into free space cache after the transaction is 1382 * committed, we shouldn't lose them. 1383 * 1384 * If this changes while we are working we'll get added back to 1385 * the dirty list and redo it. No locking needed 1386 */ 1387 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1388 if (ret) 1389 goto out_nospc_locked; 1390 1391 /* 1392 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1393 * locked while doing it because a concurrent trim can be manipulating 1394 * or freeing the bitmap. 1395 */ 1396 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1397 spin_unlock(&ctl->tree_lock); 1398 mutex_unlock(&ctl->cache_writeout_mutex); 1399 if (ret) 1400 goto out_nospc; 1401 1402 /* Zero out the rest of the pages just to make sure */ 1403 io_ctl_zero_remaining_pages(io_ctl); 1404 1405 /* Everything is written out, now we dirty the pages in the file. */ 1406 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1407 io_ctl->num_pages, 0, i_size_read(inode), 1408 &cached_state, false); 1409 if (ret) 1410 goto out_nospc; 1411 1412 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1413 up_write(&block_group->data_rwsem); 1414 /* 1415 * Release the pages and unlock the extent, we will flush 1416 * them out later 1417 */ 1418 io_ctl_drop_pages(io_ctl); 1419 io_ctl_free(io_ctl); 1420 1421 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1422 i_size_read(inode) - 1, &cached_state); 1423 1424 /* 1425 * at this point the pages are under IO and we're happy, 1426 * The caller is responsible for waiting on them and updating 1427 * the cache and the inode 1428 */ 1429 io_ctl->entries = entries; 1430 io_ctl->bitmaps = bitmaps; 1431 1432 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1433 if (ret) 1434 goto out; 1435 1436 return 0; 1437 1438 out_nospc_locked: 1439 cleanup_bitmap_list(&bitmap_list); 1440 spin_unlock(&ctl->tree_lock); 1441 mutex_unlock(&ctl->cache_writeout_mutex); 1442 1443 out_nospc: 1444 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1445 1446 out_unlock: 1447 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1448 up_write(&block_group->data_rwsem); 1449 1450 out: 1451 io_ctl->inode = NULL; 1452 io_ctl_free(io_ctl); 1453 if (ret) { 1454 invalidate_inode_pages2(inode->i_mapping); 1455 BTRFS_I(inode)->generation = 0; 1456 } 1457 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1458 if (must_iput) 1459 iput(inode); 1460 return ret; 1461 } 1462 1463 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1464 struct btrfs_block_group *block_group, 1465 struct btrfs_path *path) 1466 { 1467 struct btrfs_fs_info *fs_info = trans->fs_info; 1468 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1469 struct inode *inode; 1470 int ret = 0; 1471 1472 spin_lock(&block_group->lock); 1473 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1474 spin_unlock(&block_group->lock); 1475 return 0; 1476 } 1477 spin_unlock(&block_group->lock); 1478 1479 inode = lookup_free_space_inode(block_group, path); 1480 if (IS_ERR(inode)) 1481 return 0; 1482 1483 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1484 block_group, &block_group->io_ctl, trans); 1485 if (ret) { 1486 btrfs_debug(fs_info, 1487 "failed to write free space cache for block group %llu error %d", 1488 block_group->start, ret); 1489 spin_lock(&block_group->lock); 1490 block_group->disk_cache_state = BTRFS_DC_ERROR; 1491 spin_unlock(&block_group->lock); 1492 1493 block_group->io_ctl.inode = NULL; 1494 iput(inode); 1495 } 1496 1497 /* 1498 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1499 * to wait for IO and put the inode 1500 */ 1501 1502 return ret; 1503 } 1504 1505 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1506 u64 offset) 1507 { 1508 ASSERT(offset >= bitmap_start); 1509 offset -= bitmap_start; 1510 return (unsigned long)(div_u64(offset, unit)); 1511 } 1512 1513 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1514 { 1515 return (unsigned long)(div_u64(bytes, unit)); 1516 } 1517 1518 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1519 u64 offset) 1520 { 1521 u64 bitmap_start; 1522 u64 bytes_per_bitmap; 1523 1524 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1525 bitmap_start = offset - ctl->start; 1526 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1527 bitmap_start *= bytes_per_bitmap; 1528 bitmap_start += ctl->start; 1529 1530 return bitmap_start; 1531 } 1532 1533 static int tree_insert_offset(struct rb_root *root, u64 offset, 1534 struct rb_node *node, int bitmap) 1535 { 1536 struct rb_node **p = &root->rb_node; 1537 struct rb_node *parent = NULL; 1538 struct btrfs_free_space *info; 1539 1540 while (*p) { 1541 parent = *p; 1542 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1543 1544 if (offset < info->offset) { 1545 p = &(*p)->rb_left; 1546 } else if (offset > info->offset) { 1547 p = &(*p)->rb_right; 1548 } else { 1549 /* 1550 * we could have a bitmap entry and an extent entry 1551 * share the same offset. If this is the case, we want 1552 * the extent entry to always be found first if we do a 1553 * linear search through the tree, since we want to have 1554 * the quickest allocation time, and allocating from an 1555 * extent is faster than allocating from a bitmap. So 1556 * if we're inserting a bitmap and we find an entry at 1557 * this offset, we want to go right, or after this entry 1558 * logically. If we are inserting an extent and we've 1559 * found a bitmap, we want to go left, or before 1560 * logically. 1561 */ 1562 if (bitmap) { 1563 if (info->bitmap) { 1564 WARN_ON_ONCE(1); 1565 return -EEXIST; 1566 } 1567 p = &(*p)->rb_right; 1568 } else { 1569 if (!info->bitmap) { 1570 WARN_ON_ONCE(1); 1571 return -EEXIST; 1572 } 1573 p = &(*p)->rb_left; 1574 } 1575 } 1576 } 1577 1578 rb_link_node(node, parent, p); 1579 rb_insert_color(node, root); 1580 1581 return 0; 1582 } 1583 1584 /* 1585 * searches the tree for the given offset. 1586 * 1587 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1588 * want a section that has at least bytes size and comes at or after the given 1589 * offset. 1590 */ 1591 static struct btrfs_free_space * 1592 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1593 u64 offset, int bitmap_only, int fuzzy) 1594 { 1595 struct rb_node *n = ctl->free_space_offset.rb_node; 1596 struct btrfs_free_space *entry, *prev = NULL; 1597 1598 /* find entry that is closest to the 'offset' */ 1599 while (1) { 1600 if (!n) { 1601 entry = NULL; 1602 break; 1603 } 1604 1605 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1606 prev = entry; 1607 1608 if (offset < entry->offset) 1609 n = n->rb_left; 1610 else if (offset > entry->offset) 1611 n = n->rb_right; 1612 else 1613 break; 1614 } 1615 1616 if (bitmap_only) { 1617 if (!entry) 1618 return NULL; 1619 if (entry->bitmap) 1620 return entry; 1621 1622 /* 1623 * bitmap entry and extent entry may share same offset, 1624 * in that case, bitmap entry comes after extent entry. 1625 */ 1626 n = rb_next(n); 1627 if (!n) 1628 return NULL; 1629 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1630 if (entry->offset != offset) 1631 return NULL; 1632 1633 WARN_ON(!entry->bitmap); 1634 return entry; 1635 } else if (entry) { 1636 if (entry->bitmap) { 1637 /* 1638 * if previous extent entry covers the offset, 1639 * we should return it instead of the bitmap entry 1640 */ 1641 n = rb_prev(&entry->offset_index); 1642 if (n) { 1643 prev = rb_entry(n, struct btrfs_free_space, 1644 offset_index); 1645 if (!prev->bitmap && 1646 prev->offset + prev->bytes > offset) 1647 entry = prev; 1648 } 1649 } 1650 return entry; 1651 } 1652 1653 if (!prev) 1654 return NULL; 1655 1656 /* find last entry before the 'offset' */ 1657 entry = prev; 1658 if (entry->offset > offset) { 1659 n = rb_prev(&entry->offset_index); 1660 if (n) { 1661 entry = rb_entry(n, struct btrfs_free_space, 1662 offset_index); 1663 ASSERT(entry->offset <= offset); 1664 } else { 1665 if (fuzzy) 1666 return entry; 1667 else 1668 return NULL; 1669 } 1670 } 1671 1672 if (entry->bitmap) { 1673 n = rb_prev(&entry->offset_index); 1674 if (n) { 1675 prev = rb_entry(n, struct btrfs_free_space, 1676 offset_index); 1677 if (!prev->bitmap && 1678 prev->offset + prev->bytes > offset) 1679 return prev; 1680 } 1681 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1682 return entry; 1683 } else if (entry->offset + entry->bytes > offset) 1684 return entry; 1685 1686 if (!fuzzy) 1687 return NULL; 1688 1689 while (1) { 1690 if (entry->bitmap) { 1691 if (entry->offset + BITS_PER_BITMAP * 1692 ctl->unit > offset) 1693 break; 1694 } else { 1695 if (entry->offset + entry->bytes > offset) 1696 break; 1697 } 1698 1699 n = rb_next(&entry->offset_index); 1700 if (!n) 1701 return NULL; 1702 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1703 } 1704 return entry; 1705 } 1706 1707 static inline void 1708 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1709 struct btrfs_free_space *info) 1710 { 1711 rb_erase(&info->offset_index, &ctl->free_space_offset); 1712 ctl->free_extents--; 1713 1714 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1715 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1716 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1717 } 1718 } 1719 1720 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1721 struct btrfs_free_space *info) 1722 { 1723 __unlink_free_space(ctl, info); 1724 ctl->free_space -= info->bytes; 1725 } 1726 1727 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1728 struct btrfs_free_space *info) 1729 { 1730 int ret = 0; 1731 1732 ASSERT(info->bytes || info->bitmap); 1733 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1734 &info->offset_index, (info->bitmap != NULL)); 1735 if (ret) 1736 return ret; 1737 1738 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1739 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1740 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1741 } 1742 1743 ctl->free_space += info->bytes; 1744 ctl->free_extents++; 1745 return ret; 1746 } 1747 1748 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1749 struct btrfs_free_space *info, 1750 u64 offset, u64 bytes) 1751 { 1752 unsigned long start, count, end; 1753 int extent_delta = -1; 1754 1755 start = offset_to_bit(info->offset, ctl->unit, offset); 1756 count = bytes_to_bits(bytes, ctl->unit); 1757 end = start + count; 1758 ASSERT(end <= BITS_PER_BITMAP); 1759 1760 bitmap_clear(info->bitmap, start, count); 1761 1762 info->bytes -= bytes; 1763 if (info->max_extent_size > ctl->unit) 1764 info->max_extent_size = 0; 1765 1766 if (start && test_bit(start - 1, info->bitmap)) 1767 extent_delta++; 1768 1769 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1770 extent_delta++; 1771 1772 info->bitmap_extents += extent_delta; 1773 if (!btrfs_free_space_trimmed(info)) { 1774 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1775 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1776 } 1777 } 1778 1779 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1780 struct btrfs_free_space *info, u64 offset, 1781 u64 bytes) 1782 { 1783 __bitmap_clear_bits(ctl, info, offset, bytes); 1784 ctl->free_space -= bytes; 1785 } 1786 1787 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1788 struct btrfs_free_space *info, u64 offset, 1789 u64 bytes) 1790 { 1791 unsigned long start, count, end; 1792 int extent_delta = 1; 1793 1794 start = offset_to_bit(info->offset, ctl->unit, offset); 1795 count = bytes_to_bits(bytes, ctl->unit); 1796 end = start + count; 1797 ASSERT(end <= BITS_PER_BITMAP); 1798 1799 bitmap_set(info->bitmap, start, count); 1800 1801 info->bytes += bytes; 1802 ctl->free_space += bytes; 1803 1804 if (start && test_bit(start - 1, info->bitmap)) 1805 extent_delta--; 1806 1807 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1808 extent_delta--; 1809 1810 info->bitmap_extents += extent_delta; 1811 if (!btrfs_free_space_trimmed(info)) { 1812 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1813 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1814 } 1815 } 1816 1817 /* 1818 * If we can not find suitable extent, we will use bytes to record 1819 * the size of the max extent. 1820 */ 1821 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1822 struct btrfs_free_space *bitmap_info, u64 *offset, 1823 u64 *bytes, bool for_alloc) 1824 { 1825 unsigned long found_bits = 0; 1826 unsigned long max_bits = 0; 1827 unsigned long bits, i; 1828 unsigned long next_zero; 1829 unsigned long extent_bits; 1830 1831 /* 1832 * Skip searching the bitmap if we don't have a contiguous section that 1833 * is large enough for this allocation. 1834 */ 1835 if (for_alloc && 1836 bitmap_info->max_extent_size && 1837 bitmap_info->max_extent_size < *bytes) { 1838 *bytes = bitmap_info->max_extent_size; 1839 return -1; 1840 } 1841 1842 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1843 max_t(u64, *offset, bitmap_info->offset)); 1844 bits = bytes_to_bits(*bytes, ctl->unit); 1845 1846 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1847 if (for_alloc && bits == 1) { 1848 found_bits = 1; 1849 break; 1850 } 1851 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1852 BITS_PER_BITMAP, i); 1853 extent_bits = next_zero - i; 1854 if (extent_bits >= bits) { 1855 found_bits = extent_bits; 1856 break; 1857 } else if (extent_bits > max_bits) { 1858 max_bits = extent_bits; 1859 } 1860 i = next_zero; 1861 } 1862 1863 if (found_bits) { 1864 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1865 *bytes = (u64)(found_bits) * ctl->unit; 1866 return 0; 1867 } 1868 1869 *bytes = (u64)(max_bits) * ctl->unit; 1870 bitmap_info->max_extent_size = *bytes; 1871 return -1; 1872 } 1873 1874 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1875 { 1876 if (entry->bitmap) 1877 return entry->max_extent_size; 1878 return entry->bytes; 1879 } 1880 1881 /* Cache the size of the max extent in bytes */ 1882 static struct btrfs_free_space * 1883 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1884 unsigned long align, u64 *max_extent_size) 1885 { 1886 struct btrfs_free_space *entry; 1887 struct rb_node *node; 1888 u64 tmp; 1889 u64 align_off; 1890 int ret; 1891 1892 if (!ctl->free_space_offset.rb_node) 1893 goto out; 1894 1895 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1896 if (!entry) 1897 goto out; 1898 1899 for (node = &entry->offset_index; node; node = rb_next(node)) { 1900 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1901 if (entry->bytes < *bytes) { 1902 *max_extent_size = max(get_max_extent_size(entry), 1903 *max_extent_size); 1904 continue; 1905 } 1906 1907 /* make sure the space returned is big enough 1908 * to match our requested alignment 1909 */ 1910 if (*bytes >= align) { 1911 tmp = entry->offset - ctl->start + align - 1; 1912 tmp = div64_u64(tmp, align); 1913 tmp = tmp * align + ctl->start; 1914 align_off = tmp - entry->offset; 1915 } else { 1916 align_off = 0; 1917 tmp = entry->offset; 1918 } 1919 1920 if (entry->bytes < *bytes + align_off) { 1921 *max_extent_size = max(get_max_extent_size(entry), 1922 *max_extent_size); 1923 continue; 1924 } 1925 1926 if (entry->bitmap) { 1927 u64 size = *bytes; 1928 1929 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1930 if (!ret) { 1931 *offset = tmp; 1932 *bytes = size; 1933 return entry; 1934 } else { 1935 *max_extent_size = 1936 max(get_max_extent_size(entry), 1937 *max_extent_size); 1938 } 1939 continue; 1940 } 1941 1942 *offset = tmp; 1943 *bytes = entry->bytes - align_off; 1944 return entry; 1945 } 1946 out: 1947 return NULL; 1948 } 1949 1950 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1951 struct btrfs_free_space *info, u64 offset) 1952 { 1953 info->offset = offset_to_bitmap(ctl, offset); 1954 info->bytes = 0; 1955 info->bitmap_extents = 0; 1956 INIT_LIST_HEAD(&info->list); 1957 link_free_space(ctl, info); 1958 ctl->total_bitmaps++; 1959 recalculate_thresholds(ctl); 1960 } 1961 1962 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1963 struct btrfs_free_space *bitmap_info) 1964 { 1965 /* 1966 * Normally when this is called, the bitmap is completely empty. However, 1967 * if we are blowing up the free space cache for one reason or another 1968 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1969 * we may leave stats on the table. 1970 */ 1971 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1972 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1973 bitmap_info->bitmap_extents; 1974 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1975 1976 } 1977 unlink_free_space(ctl, bitmap_info); 1978 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1979 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1980 ctl->total_bitmaps--; 1981 recalculate_thresholds(ctl); 1982 } 1983 1984 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1985 struct btrfs_free_space *bitmap_info, 1986 u64 *offset, u64 *bytes) 1987 { 1988 u64 end; 1989 u64 search_start, search_bytes; 1990 int ret; 1991 1992 again: 1993 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1994 1995 /* 1996 * We need to search for bits in this bitmap. We could only cover some 1997 * of the extent in this bitmap thanks to how we add space, so we need 1998 * to search for as much as it as we can and clear that amount, and then 1999 * go searching for the next bit. 2000 */ 2001 search_start = *offset; 2002 search_bytes = ctl->unit; 2003 search_bytes = min(search_bytes, end - search_start + 1); 2004 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 2005 false); 2006 if (ret < 0 || search_start != *offset) 2007 return -EINVAL; 2008 2009 /* We may have found more bits than what we need */ 2010 search_bytes = min(search_bytes, *bytes); 2011 2012 /* Cannot clear past the end of the bitmap */ 2013 search_bytes = min(search_bytes, end - search_start + 1); 2014 2015 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2016 *offset += search_bytes; 2017 *bytes -= search_bytes; 2018 2019 if (*bytes) { 2020 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2021 if (!bitmap_info->bytes) 2022 free_bitmap(ctl, bitmap_info); 2023 2024 /* 2025 * no entry after this bitmap, but we still have bytes to 2026 * remove, so something has gone wrong. 2027 */ 2028 if (!next) 2029 return -EINVAL; 2030 2031 bitmap_info = rb_entry(next, struct btrfs_free_space, 2032 offset_index); 2033 2034 /* 2035 * if the next entry isn't a bitmap we need to return to let the 2036 * extent stuff do its work. 2037 */ 2038 if (!bitmap_info->bitmap) 2039 return -EAGAIN; 2040 2041 /* 2042 * Ok the next item is a bitmap, but it may not actually hold 2043 * the information for the rest of this free space stuff, so 2044 * look for it, and if we don't find it return so we can try 2045 * everything over again. 2046 */ 2047 search_start = *offset; 2048 search_bytes = ctl->unit; 2049 ret = search_bitmap(ctl, bitmap_info, &search_start, 2050 &search_bytes, false); 2051 if (ret < 0 || search_start != *offset) 2052 return -EAGAIN; 2053 2054 goto again; 2055 } else if (!bitmap_info->bytes) 2056 free_bitmap(ctl, bitmap_info); 2057 2058 return 0; 2059 } 2060 2061 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2062 struct btrfs_free_space *info, u64 offset, 2063 u64 bytes, enum btrfs_trim_state trim_state) 2064 { 2065 u64 bytes_to_set = 0; 2066 u64 end; 2067 2068 /* 2069 * This is a tradeoff to make bitmap trim state minimal. We mark the 2070 * whole bitmap untrimmed if at any point we add untrimmed regions. 2071 */ 2072 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2073 if (btrfs_free_space_trimmed(info)) { 2074 ctl->discardable_extents[BTRFS_STAT_CURR] += 2075 info->bitmap_extents; 2076 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2077 } 2078 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2079 } 2080 2081 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2082 2083 bytes_to_set = min(end - offset, bytes); 2084 2085 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2086 2087 /* 2088 * We set some bytes, we have no idea what the max extent size is 2089 * anymore. 2090 */ 2091 info->max_extent_size = 0; 2092 2093 return bytes_to_set; 2094 2095 } 2096 2097 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2098 struct btrfs_free_space *info) 2099 { 2100 struct btrfs_block_group *block_group = ctl->private; 2101 struct btrfs_fs_info *fs_info = block_group->fs_info; 2102 bool forced = false; 2103 2104 #ifdef CONFIG_BTRFS_DEBUG 2105 if (btrfs_should_fragment_free_space(block_group)) 2106 forced = true; 2107 #endif 2108 2109 /* This is a way to reclaim large regions from the bitmaps. */ 2110 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2111 return false; 2112 2113 /* 2114 * If we are below the extents threshold then we can add this as an 2115 * extent, and don't have to deal with the bitmap 2116 */ 2117 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2118 /* 2119 * If this block group has some small extents we don't want to 2120 * use up all of our free slots in the cache with them, we want 2121 * to reserve them to larger extents, however if we have plenty 2122 * of cache left then go ahead an dadd them, no sense in adding 2123 * the overhead of a bitmap if we don't have to. 2124 */ 2125 if (info->bytes <= fs_info->sectorsize * 8) { 2126 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2127 return false; 2128 } else { 2129 return false; 2130 } 2131 } 2132 2133 /* 2134 * The original block groups from mkfs can be really small, like 8 2135 * megabytes, so don't bother with a bitmap for those entries. However 2136 * some block groups can be smaller than what a bitmap would cover but 2137 * are still large enough that they could overflow the 32k memory limit, 2138 * so allow those block groups to still be allowed to have a bitmap 2139 * entry. 2140 */ 2141 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2142 return false; 2143 2144 return true; 2145 } 2146 2147 static const struct btrfs_free_space_op free_space_op = { 2148 .use_bitmap = use_bitmap, 2149 }; 2150 2151 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2152 struct btrfs_free_space *info) 2153 { 2154 struct btrfs_free_space *bitmap_info; 2155 struct btrfs_block_group *block_group = NULL; 2156 int added = 0; 2157 u64 bytes, offset, bytes_added; 2158 enum btrfs_trim_state trim_state; 2159 int ret; 2160 2161 bytes = info->bytes; 2162 offset = info->offset; 2163 trim_state = info->trim_state; 2164 2165 if (!ctl->op->use_bitmap(ctl, info)) 2166 return 0; 2167 2168 if (ctl->op == &free_space_op) 2169 block_group = ctl->private; 2170 again: 2171 /* 2172 * Since we link bitmaps right into the cluster we need to see if we 2173 * have a cluster here, and if so and it has our bitmap we need to add 2174 * the free space to that bitmap. 2175 */ 2176 if (block_group && !list_empty(&block_group->cluster_list)) { 2177 struct btrfs_free_cluster *cluster; 2178 struct rb_node *node; 2179 struct btrfs_free_space *entry; 2180 2181 cluster = list_entry(block_group->cluster_list.next, 2182 struct btrfs_free_cluster, 2183 block_group_list); 2184 spin_lock(&cluster->lock); 2185 node = rb_first(&cluster->root); 2186 if (!node) { 2187 spin_unlock(&cluster->lock); 2188 goto no_cluster_bitmap; 2189 } 2190 2191 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2192 if (!entry->bitmap) { 2193 spin_unlock(&cluster->lock); 2194 goto no_cluster_bitmap; 2195 } 2196 2197 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2198 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2199 bytes, trim_state); 2200 bytes -= bytes_added; 2201 offset += bytes_added; 2202 } 2203 spin_unlock(&cluster->lock); 2204 if (!bytes) { 2205 ret = 1; 2206 goto out; 2207 } 2208 } 2209 2210 no_cluster_bitmap: 2211 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2212 1, 0); 2213 if (!bitmap_info) { 2214 ASSERT(added == 0); 2215 goto new_bitmap; 2216 } 2217 2218 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2219 trim_state); 2220 bytes -= bytes_added; 2221 offset += bytes_added; 2222 added = 0; 2223 2224 if (!bytes) { 2225 ret = 1; 2226 goto out; 2227 } else 2228 goto again; 2229 2230 new_bitmap: 2231 if (info && info->bitmap) { 2232 add_new_bitmap(ctl, info, offset); 2233 added = 1; 2234 info = NULL; 2235 goto again; 2236 } else { 2237 spin_unlock(&ctl->tree_lock); 2238 2239 /* no pre-allocated info, allocate a new one */ 2240 if (!info) { 2241 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2242 GFP_NOFS); 2243 if (!info) { 2244 spin_lock(&ctl->tree_lock); 2245 ret = -ENOMEM; 2246 goto out; 2247 } 2248 } 2249 2250 /* allocate the bitmap */ 2251 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2252 GFP_NOFS); 2253 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2254 spin_lock(&ctl->tree_lock); 2255 if (!info->bitmap) { 2256 ret = -ENOMEM; 2257 goto out; 2258 } 2259 goto again; 2260 } 2261 2262 out: 2263 if (info) { 2264 if (info->bitmap) 2265 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2266 info->bitmap); 2267 kmem_cache_free(btrfs_free_space_cachep, info); 2268 } 2269 2270 return ret; 2271 } 2272 2273 /* 2274 * Free space merging rules: 2275 * 1) Merge trimmed areas together 2276 * 2) Let untrimmed areas coalesce with trimmed areas 2277 * 3) Always pull neighboring regions from bitmaps 2278 * 2279 * The above rules are for when we merge free space based on btrfs_trim_state. 2280 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2281 * same reason: to promote larger extent regions which makes life easier for 2282 * find_free_extent(). Rule 2 enables coalescing based on the common path 2283 * being returning free space from btrfs_finish_extent_commit(). So when free 2284 * space is trimmed, it will prevent aggregating trimmed new region and 2285 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2286 * and provide find_free_extent() with the largest extents possible hoping for 2287 * the reuse path. 2288 */ 2289 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2290 struct btrfs_free_space *info, bool update_stat) 2291 { 2292 struct btrfs_free_space *left_info = NULL; 2293 struct btrfs_free_space *right_info; 2294 bool merged = false; 2295 u64 offset = info->offset; 2296 u64 bytes = info->bytes; 2297 const bool is_trimmed = btrfs_free_space_trimmed(info); 2298 2299 /* 2300 * first we want to see if there is free space adjacent to the range we 2301 * are adding, if there is remove that struct and add a new one to 2302 * cover the entire range 2303 */ 2304 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2305 if (right_info && rb_prev(&right_info->offset_index)) 2306 left_info = rb_entry(rb_prev(&right_info->offset_index), 2307 struct btrfs_free_space, offset_index); 2308 else if (!right_info) 2309 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2310 2311 /* See try_merge_free_space() comment. */ 2312 if (right_info && !right_info->bitmap && 2313 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2314 if (update_stat) 2315 unlink_free_space(ctl, right_info); 2316 else 2317 __unlink_free_space(ctl, right_info); 2318 info->bytes += right_info->bytes; 2319 kmem_cache_free(btrfs_free_space_cachep, right_info); 2320 merged = true; 2321 } 2322 2323 /* See try_merge_free_space() comment. */ 2324 if (left_info && !left_info->bitmap && 2325 left_info->offset + left_info->bytes == offset && 2326 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2327 if (update_stat) 2328 unlink_free_space(ctl, left_info); 2329 else 2330 __unlink_free_space(ctl, left_info); 2331 info->offset = left_info->offset; 2332 info->bytes += left_info->bytes; 2333 kmem_cache_free(btrfs_free_space_cachep, left_info); 2334 merged = true; 2335 } 2336 2337 return merged; 2338 } 2339 2340 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2341 struct btrfs_free_space *info, 2342 bool update_stat) 2343 { 2344 struct btrfs_free_space *bitmap; 2345 unsigned long i; 2346 unsigned long j; 2347 const u64 end = info->offset + info->bytes; 2348 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2349 u64 bytes; 2350 2351 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2352 if (!bitmap) 2353 return false; 2354 2355 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2356 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2357 if (j == i) 2358 return false; 2359 bytes = (j - i) * ctl->unit; 2360 info->bytes += bytes; 2361 2362 /* See try_merge_free_space() comment. */ 2363 if (!btrfs_free_space_trimmed(bitmap)) 2364 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2365 2366 if (update_stat) 2367 bitmap_clear_bits(ctl, bitmap, end, bytes); 2368 else 2369 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2370 2371 if (!bitmap->bytes) 2372 free_bitmap(ctl, bitmap); 2373 2374 return true; 2375 } 2376 2377 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2378 struct btrfs_free_space *info, 2379 bool update_stat) 2380 { 2381 struct btrfs_free_space *bitmap; 2382 u64 bitmap_offset; 2383 unsigned long i; 2384 unsigned long j; 2385 unsigned long prev_j; 2386 u64 bytes; 2387 2388 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2389 /* If we're on a boundary, try the previous logical bitmap. */ 2390 if (bitmap_offset == info->offset) { 2391 if (info->offset == 0) 2392 return false; 2393 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2394 } 2395 2396 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2397 if (!bitmap) 2398 return false; 2399 2400 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2401 j = 0; 2402 prev_j = (unsigned long)-1; 2403 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2404 if (j > i) 2405 break; 2406 prev_j = j; 2407 } 2408 if (prev_j == i) 2409 return false; 2410 2411 if (prev_j == (unsigned long)-1) 2412 bytes = (i + 1) * ctl->unit; 2413 else 2414 bytes = (i - prev_j) * ctl->unit; 2415 2416 info->offset -= bytes; 2417 info->bytes += bytes; 2418 2419 /* See try_merge_free_space() comment. */ 2420 if (!btrfs_free_space_trimmed(bitmap)) 2421 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2422 2423 if (update_stat) 2424 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2425 else 2426 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2427 2428 if (!bitmap->bytes) 2429 free_bitmap(ctl, bitmap); 2430 2431 return true; 2432 } 2433 2434 /* 2435 * We prefer always to allocate from extent entries, both for clustered and 2436 * non-clustered allocation requests. So when attempting to add a new extent 2437 * entry, try to see if there's adjacent free space in bitmap entries, and if 2438 * there is, migrate that space from the bitmaps to the extent. 2439 * Like this we get better chances of satisfying space allocation requests 2440 * because we attempt to satisfy them based on a single cache entry, and never 2441 * on 2 or more entries - even if the entries represent a contiguous free space 2442 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2443 * ends). 2444 */ 2445 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2446 struct btrfs_free_space *info, 2447 bool update_stat) 2448 { 2449 /* 2450 * Only work with disconnected entries, as we can change their offset, 2451 * and must be extent entries. 2452 */ 2453 ASSERT(!info->bitmap); 2454 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2455 2456 if (ctl->total_bitmaps > 0) { 2457 bool stole_end; 2458 bool stole_front = false; 2459 2460 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2461 if (ctl->total_bitmaps > 0) 2462 stole_front = steal_from_bitmap_to_front(ctl, info, 2463 update_stat); 2464 2465 if (stole_end || stole_front) 2466 try_merge_free_space(ctl, info, update_stat); 2467 } 2468 } 2469 2470 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2471 struct btrfs_free_space_ctl *ctl, 2472 u64 offset, u64 bytes, 2473 enum btrfs_trim_state trim_state) 2474 { 2475 struct btrfs_block_group *block_group = ctl->private; 2476 struct btrfs_free_space *info; 2477 int ret = 0; 2478 u64 filter_bytes = bytes; 2479 2480 ASSERT(!btrfs_is_zoned(fs_info)); 2481 2482 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2483 if (!info) 2484 return -ENOMEM; 2485 2486 info->offset = offset; 2487 info->bytes = bytes; 2488 info->trim_state = trim_state; 2489 RB_CLEAR_NODE(&info->offset_index); 2490 2491 spin_lock(&ctl->tree_lock); 2492 2493 if (try_merge_free_space(ctl, info, true)) 2494 goto link; 2495 2496 /* 2497 * There was no extent directly to the left or right of this new 2498 * extent then we know we're going to have to allocate a new extent, so 2499 * before we do that see if we need to drop this into a bitmap 2500 */ 2501 ret = insert_into_bitmap(ctl, info); 2502 if (ret < 0) { 2503 goto out; 2504 } else if (ret) { 2505 ret = 0; 2506 goto out; 2507 } 2508 link: 2509 /* 2510 * Only steal free space from adjacent bitmaps if we're sure we're not 2511 * going to add the new free space to existing bitmap entries - because 2512 * that would mean unnecessary work that would be reverted. Therefore 2513 * attempt to steal space from bitmaps if we're adding an extent entry. 2514 */ 2515 steal_from_bitmap(ctl, info, true); 2516 2517 filter_bytes = max(filter_bytes, info->bytes); 2518 2519 ret = link_free_space(ctl, info); 2520 if (ret) 2521 kmem_cache_free(btrfs_free_space_cachep, info); 2522 out: 2523 btrfs_discard_update_discardable(block_group); 2524 spin_unlock(&ctl->tree_lock); 2525 2526 if (ret) { 2527 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2528 ASSERT(ret != -EEXIST); 2529 } 2530 2531 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2532 btrfs_discard_check_filter(block_group, filter_bytes); 2533 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2534 } 2535 2536 return ret; 2537 } 2538 2539 static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, 2540 u64 bytenr, u64 size, bool used) 2541 { 2542 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2543 u64 offset = bytenr - block_group->start; 2544 u64 to_free, to_unusable; 2545 2546 spin_lock(&ctl->tree_lock); 2547 if (!used) 2548 to_free = size; 2549 else if (offset >= block_group->alloc_offset) 2550 to_free = size; 2551 else if (offset + size <= block_group->alloc_offset) 2552 to_free = 0; 2553 else 2554 to_free = offset + size - block_group->alloc_offset; 2555 to_unusable = size - to_free; 2556 2557 ctl->free_space += to_free; 2558 /* 2559 * If the block group is read-only, we should account freed space into 2560 * bytes_readonly. 2561 */ 2562 if (!block_group->ro) 2563 block_group->zone_unusable += to_unusable; 2564 spin_unlock(&ctl->tree_lock); 2565 if (!used) { 2566 spin_lock(&block_group->lock); 2567 block_group->alloc_offset -= size; 2568 spin_unlock(&block_group->lock); 2569 } 2570 2571 /* All the region is now unusable. Mark it as unused and reclaim */ 2572 if (block_group->zone_unusable == block_group->length) 2573 btrfs_mark_bg_unused(block_group); 2574 2575 return 0; 2576 } 2577 2578 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2579 u64 bytenr, u64 size) 2580 { 2581 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2582 2583 if (btrfs_is_zoned(block_group->fs_info)) 2584 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2585 true); 2586 2587 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2588 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2589 2590 return __btrfs_add_free_space(block_group->fs_info, 2591 block_group->free_space_ctl, 2592 bytenr, size, trim_state); 2593 } 2594 2595 int btrfs_add_free_space_unused(struct btrfs_block_group *block_group, 2596 u64 bytenr, u64 size) 2597 { 2598 if (btrfs_is_zoned(block_group->fs_info)) 2599 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2600 false); 2601 2602 return btrfs_add_free_space(block_group, bytenr, size); 2603 } 2604 2605 /* 2606 * This is a subtle distinction because when adding free space back in general, 2607 * we want it to be added as untrimmed for async. But in the case where we add 2608 * it on loading of a block group, we want to consider it trimmed. 2609 */ 2610 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2611 u64 bytenr, u64 size) 2612 { 2613 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2614 2615 if (btrfs_is_zoned(block_group->fs_info)) 2616 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2617 true); 2618 2619 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2620 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2621 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2622 2623 return __btrfs_add_free_space(block_group->fs_info, 2624 block_group->free_space_ctl, 2625 bytenr, size, trim_state); 2626 } 2627 2628 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2629 u64 offset, u64 bytes) 2630 { 2631 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2632 struct btrfs_free_space *info; 2633 int ret; 2634 bool re_search = false; 2635 2636 if (btrfs_is_zoned(block_group->fs_info)) { 2637 /* 2638 * This can happen with conventional zones when replaying log. 2639 * Since the allocation info of tree-log nodes are not recorded 2640 * to the extent-tree, calculate_alloc_pointer() failed to 2641 * advance the allocation pointer after last allocated tree log 2642 * node blocks. 2643 * 2644 * This function is called from 2645 * btrfs_pin_extent_for_log_replay() when replaying the log. 2646 * Advance the pointer not to overwrite the tree-log nodes. 2647 */ 2648 if (block_group->alloc_offset < offset + bytes) 2649 block_group->alloc_offset = offset + bytes; 2650 return 0; 2651 } 2652 2653 spin_lock(&ctl->tree_lock); 2654 2655 again: 2656 ret = 0; 2657 if (!bytes) 2658 goto out_lock; 2659 2660 info = tree_search_offset(ctl, offset, 0, 0); 2661 if (!info) { 2662 /* 2663 * oops didn't find an extent that matched the space we wanted 2664 * to remove, look for a bitmap instead 2665 */ 2666 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2667 1, 0); 2668 if (!info) { 2669 /* 2670 * If we found a partial bit of our free space in a 2671 * bitmap but then couldn't find the other part this may 2672 * be a problem, so WARN about it. 2673 */ 2674 WARN_ON(re_search); 2675 goto out_lock; 2676 } 2677 } 2678 2679 re_search = false; 2680 if (!info->bitmap) { 2681 unlink_free_space(ctl, info); 2682 if (offset == info->offset) { 2683 u64 to_free = min(bytes, info->bytes); 2684 2685 info->bytes -= to_free; 2686 info->offset += to_free; 2687 if (info->bytes) { 2688 ret = link_free_space(ctl, info); 2689 WARN_ON(ret); 2690 } else { 2691 kmem_cache_free(btrfs_free_space_cachep, info); 2692 } 2693 2694 offset += to_free; 2695 bytes -= to_free; 2696 goto again; 2697 } else { 2698 u64 old_end = info->bytes + info->offset; 2699 2700 info->bytes = offset - info->offset; 2701 ret = link_free_space(ctl, info); 2702 WARN_ON(ret); 2703 if (ret) 2704 goto out_lock; 2705 2706 /* Not enough bytes in this entry to satisfy us */ 2707 if (old_end < offset + bytes) { 2708 bytes -= old_end - offset; 2709 offset = old_end; 2710 goto again; 2711 } else if (old_end == offset + bytes) { 2712 /* all done */ 2713 goto out_lock; 2714 } 2715 spin_unlock(&ctl->tree_lock); 2716 2717 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2718 offset + bytes, 2719 old_end - (offset + bytes), 2720 info->trim_state); 2721 WARN_ON(ret); 2722 goto out; 2723 } 2724 } 2725 2726 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2727 if (ret == -EAGAIN) { 2728 re_search = true; 2729 goto again; 2730 } 2731 out_lock: 2732 btrfs_discard_update_discardable(block_group); 2733 spin_unlock(&ctl->tree_lock); 2734 out: 2735 return ret; 2736 } 2737 2738 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2739 u64 bytes) 2740 { 2741 struct btrfs_fs_info *fs_info = block_group->fs_info; 2742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2743 struct btrfs_free_space *info; 2744 struct rb_node *n; 2745 int count = 0; 2746 2747 /* 2748 * Zoned btrfs does not use free space tree and cluster. Just print 2749 * out the free space after the allocation offset. 2750 */ 2751 if (btrfs_is_zoned(fs_info)) { 2752 btrfs_info(fs_info, "free space %llu", 2753 block_group->length - block_group->alloc_offset); 2754 return; 2755 } 2756 2757 spin_lock(&ctl->tree_lock); 2758 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2759 info = rb_entry(n, struct btrfs_free_space, offset_index); 2760 if (info->bytes >= bytes && !block_group->ro) 2761 count++; 2762 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2763 info->offset, info->bytes, 2764 (info->bitmap) ? "yes" : "no"); 2765 } 2766 spin_unlock(&ctl->tree_lock); 2767 btrfs_info(fs_info, "block group has cluster?: %s", 2768 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2769 btrfs_info(fs_info, 2770 "%d blocks of free space at or bigger than bytes is", count); 2771 } 2772 2773 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, 2774 struct btrfs_free_space_ctl *ctl) 2775 { 2776 struct btrfs_fs_info *fs_info = block_group->fs_info; 2777 2778 spin_lock_init(&ctl->tree_lock); 2779 ctl->unit = fs_info->sectorsize; 2780 ctl->start = block_group->start; 2781 ctl->private = block_group; 2782 ctl->op = &free_space_op; 2783 INIT_LIST_HEAD(&ctl->trimming_ranges); 2784 mutex_init(&ctl->cache_writeout_mutex); 2785 2786 /* 2787 * we only want to have 32k of ram per block group for keeping 2788 * track of free space, and if we pass 1/2 of that we want to 2789 * start converting things over to using bitmaps 2790 */ 2791 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2792 } 2793 2794 /* 2795 * for a given cluster, put all of its extents back into the free 2796 * space cache. If the block group passed doesn't match the block group 2797 * pointed to by the cluster, someone else raced in and freed the 2798 * cluster already. In that case, we just return without changing anything 2799 */ 2800 static void __btrfs_return_cluster_to_free_space( 2801 struct btrfs_block_group *block_group, 2802 struct btrfs_free_cluster *cluster) 2803 { 2804 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2805 struct btrfs_free_space *entry; 2806 struct rb_node *node; 2807 2808 spin_lock(&cluster->lock); 2809 if (cluster->block_group != block_group) { 2810 spin_unlock(&cluster->lock); 2811 return; 2812 } 2813 2814 cluster->block_group = NULL; 2815 cluster->window_start = 0; 2816 list_del_init(&cluster->block_group_list); 2817 2818 node = rb_first(&cluster->root); 2819 while (node) { 2820 bool bitmap; 2821 2822 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2823 node = rb_next(&entry->offset_index); 2824 rb_erase(&entry->offset_index, &cluster->root); 2825 RB_CLEAR_NODE(&entry->offset_index); 2826 2827 bitmap = (entry->bitmap != NULL); 2828 if (!bitmap) { 2829 /* Merging treats extents as if they were new */ 2830 if (!btrfs_free_space_trimmed(entry)) { 2831 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2832 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2833 entry->bytes; 2834 } 2835 2836 try_merge_free_space(ctl, entry, false); 2837 steal_from_bitmap(ctl, entry, false); 2838 2839 /* As we insert directly, update these statistics */ 2840 if (!btrfs_free_space_trimmed(entry)) { 2841 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2842 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2843 entry->bytes; 2844 } 2845 } 2846 tree_insert_offset(&ctl->free_space_offset, 2847 entry->offset, &entry->offset_index, bitmap); 2848 } 2849 cluster->root = RB_ROOT; 2850 spin_unlock(&cluster->lock); 2851 btrfs_put_block_group(block_group); 2852 } 2853 2854 static void __btrfs_remove_free_space_cache_locked( 2855 struct btrfs_free_space_ctl *ctl) 2856 { 2857 struct btrfs_free_space *info; 2858 struct rb_node *node; 2859 2860 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2861 info = rb_entry(node, struct btrfs_free_space, offset_index); 2862 if (!info->bitmap) { 2863 unlink_free_space(ctl, info); 2864 kmem_cache_free(btrfs_free_space_cachep, info); 2865 } else { 2866 free_bitmap(ctl, info); 2867 } 2868 2869 cond_resched_lock(&ctl->tree_lock); 2870 } 2871 } 2872 2873 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2874 { 2875 spin_lock(&ctl->tree_lock); 2876 __btrfs_remove_free_space_cache_locked(ctl); 2877 if (ctl->private) 2878 btrfs_discard_update_discardable(ctl->private); 2879 spin_unlock(&ctl->tree_lock); 2880 } 2881 2882 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2883 { 2884 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2885 struct btrfs_free_cluster *cluster; 2886 struct list_head *head; 2887 2888 spin_lock(&ctl->tree_lock); 2889 while ((head = block_group->cluster_list.next) != 2890 &block_group->cluster_list) { 2891 cluster = list_entry(head, struct btrfs_free_cluster, 2892 block_group_list); 2893 2894 WARN_ON(cluster->block_group != block_group); 2895 __btrfs_return_cluster_to_free_space(block_group, cluster); 2896 2897 cond_resched_lock(&ctl->tree_lock); 2898 } 2899 __btrfs_remove_free_space_cache_locked(ctl); 2900 btrfs_discard_update_discardable(block_group); 2901 spin_unlock(&ctl->tree_lock); 2902 2903 } 2904 2905 /** 2906 * btrfs_is_free_space_trimmed - see if everything is trimmed 2907 * @block_group: block_group of interest 2908 * 2909 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2910 */ 2911 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2912 { 2913 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2914 struct btrfs_free_space *info; 2915 struct rb_node *node; 2916 bool ret = true; 2917 2918 spin_lock(&ctl->tree_lock); 2919 node = rb_first(&ctl->free_space_offset); 2920 2921 while (node) { 2922 info = rb_entry(node, struct btrfs_free_space, offset_index); 2923 2924 if (!btrfs_free_space_trimmed(info)) { 2925 ret = false; 2926 break; 2927 } 2928 2929 node = rb_next(node); 2930 } 2931 2932 spin_unlock(&ctl->tree_lock); 2933 return ret; 2934 } 2935 2936 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2937 u64 offset, u64 bytes, u64 empty_size, 2938 u64 *max_extent_size) 2939 { 2940 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2941 struct btrfs_discard_ctl *discard_ctl = 2942 &block_group->fs_info->discard_ctl; 2943 struct btrfs_free_space *entry = NULL; 2944 u64 bytes_search = bytes + empty_size; 2945 u64 ret = 0; 2946 u64 align_gap = 0; 2947 u64 align_gap_len = 0; 2948 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2949 2950 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 2951 2952 spin_lock(&ctl->tree_lock); 2953 entry = find_free_space(ctl, &offset, &bytes_search, 2954 block_group->full_stripe_len, max_extent_size); 2955 if (!entry) 2956 goto out; 2957 2958 ret = offset; 2959 if (entry->bitmap) { 2960 bitmap_clear_bits(ctl, entry, offset, bytes); 2961 2962 if (!btrfs_free_space_trimmed(entry)) 2963 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2964 2965 if (!entry->bytes) 2966 free_bitmap(ctl, entry); 2967 } else { 2968 unlink_free_space(ctl, entry); 2969 align_gap_len = offset - entry->offset; 2970 align_gap = entry->offset; 2971 align_gap_trim_state = entry->trim_state; 2972 2973 if (!btrfs_free_space_trimmed(entry)) 2974 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2975 2976 entry->offset = offset + bytes; 2977 WARN_ON(entry->bytes < bytes + align_gap_len); 2978 2979 entry->bytes -= bytes + align_gap_len; 2980 if (!entry->bytes) 2981 kmem_cache_free(btrfs_free_space_cachep, entry); 2982 else 2983 link_free_space(ctl, entry); 2984 } 2985 out: 2986 btrfs_discard_update_discardable(block_group); 2987 spin_unlock(&ctl->tree_lock); 2988 2989 if (align_gap_len) 2990 __btrfs_add_free_space(block_group->fs_info, ctl, 2991 align_gap, align_gap_len, 2992 align_gap_trim_state); 2993 return ret; 2994 } 2995 2996 /* 2997 * given a cluster, put all of its extents back into the free space 2998 * cache. If a block group is passed, this function will only free 2999 * a cluster that belongs to the passed block group. 3000 * 3001 * Otherwise, it'll get a reference on the block group pointed to by the 3002 * cluster and remove the cluster from it. 3003 */ 3004 void btrfs_return_cluster_to_free_space( 3005 struct btrfs_block_group *block_group, 3006 struct btrfs_free_cluster *cluster) 3007 { 3008 struct btrfs_free_space_ctl *ctl; 3009 3010 /* first, get a safe pointer to the block group */ 3011 spin_lock(&cluster->lock); 3012 if (!block_group) { 3013 block_group = cluster->block_group; 3014 if (!block_group) { 3015 spin_unlock(&cluster->lock); 3016 return; 3017 } 3018 } else if (cluster->block_group != block_group) { 3019 /* someone else has already freed it don't redo their work */ 3020 spin_unlock(&cluster->lock); 3021 return; 3022 } 3023 btrfs_get_block_group(block_group); 3024 spin_unlock(&cluster->lock); 3025 3026 ctl = block_group->free_space_ctl; 3027 3028 /* now return any extents the cluster had on it */ 3029 spin_lock(&ctl->tree_lock); 3030 __btrfs_return_cluster_to_free_space(block_group, cluster); 3031 spin_unlock(&ctl->tree_lock); 3032 3033 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 3034 3035 /* finally drop our ref */ 3036 btrfs_put_block_group(block_group); 3037 } 3038 3039 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 3040 struct btrfs_free_cluster *cluster, 3041 struct btrfs_free_space *entry, 3042 u64 bytes, u64 min_start, 3043 u64 *max_extent_size) 3044 { 3045 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3046 int err; 3047 u64 search_start = cluster->window_start; 3048 u64 search_bytes = bytes; 3049 u64 ret = 0; 3050 3051 search_start = min_start; 3052 search_bytes = bytes; 3053 3054 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 3055 if (err) { 3056 *max_extent_size = max(get_max_extent_size(entry), 3057 *max_extent_size); 3058 return 0; 3059 } 3060 3061 ret = search_start; 3062 __bitmap_clear_bits(ctl, entry, ret, bytes); 3063 3064 return ret; 3065 } 3066 3067 /* 3068 * given a cluster, try to allocate 'bytes' from it, returns 0 3069 * if it couldn't find anything suitably large, or a logical disk offset 3070 * if things worked out 3071 */ 3072 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 3073 struct btrfs_free_cluster *cluster, u64 bytes, 3074 u64 min_start, u64 *max_extent_size) 3075 { 3076 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3077 struct btrfs_discard_ctl *discard_ctl = 3078 &block_group->fs_info->discard_ctl; 3079 struct btrfs_free_space *entry = NULL; 3080 struct rb_node *node; 3081 u64 ret = 0; 3082 3083 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3084 3085 spin_lock(&cluster->lock); 3086 if (bytes > cluster->max_size) 3087 goto out; 3088 3089 if (cluster->block_group != block_group) 3090 goto out; 3091 3092 node = rb_first(&cluster->root); 3093 if (!node) 3094 goto out; 3095 3096 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3097 while (1) { 3098 if (entry->bytes < bytes) 3099 *max_extent_size = max(get_max_extent_size(entry), 3100 *max_extent_size); 3101 3102 if (entry->bytes < bytes || 3103 (!entry->bitmap && entry->offset < min_start)) { 3104 node = rb_next(&entry->offset_index); 3105 if (!node) 3106 break; 3107 entry = rb_entry(node, struct btrfs_free_space, 3108 offset_index); 3109 continue; 3110 } 3111 3112 if (entry->bitmap) { 3113 ret = btrfs_alloc_from_bitmap(block_group, 3114 cluster, entry, bytes, 3115 cluster->window_start, 3116 max_extent_size); 3117 if (ret == 0) { 3118 node = rb_next(&entry->offset_index); 3119 if (!node) 3120 break; 3121 entry = rb_entry(node, struct btrfs_free_space, 3122 offset_index); 3123 continue; 3124 } 3125 cluster->window_start += bytes; 3126 } else { 3127 ret = entry->offset; 3128 3129 entry->offset += bytes; 3130 entry->bytes -= bytes; 3131 } 3132 3133 break; 3134 } 3135 out: 3136 spin_unlock(&cluster->lock); 3137 3138 if (!ret) 3139 return 0; 3140 3141 spin_lock(&ctl->tree_lock); 3142 3143 if (!btrfs_free_space_trimmed(entry)) 3144 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3145 3146 ctl->free_space -= bytes; 3147 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3148 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3149 3150 spin_lock(&cluster->lock); 3151 if (entry->bytes == 0) { 3152 rb_erase(&entry->offset_index, &cluster->root); 3153 ctl->free_extents--; 3154 if (entry->bitmap) { 3155 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3156 entry->bitmap); 3157 ctl->total_bitmaps--; 3158 recalculate_thresholds(ctl); 3159 } else if (!btrfs_free_space_trimmed(entry)) { 3160 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3161 } 3162 kmem_cache_free(btrfs_free_space_cachep, entry); 3163 } 3164 3165 spin_unlock(&cluster->lock); 3166 spin_unlock(&ctl->tree_lock); 3167 3168 return ret; 3169 } 3170 3171 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3172 struct btrfs_free_space *entry, 3173 struct btrfs_free_cluster *cluster, 3174 u64 offset, u64 bytes, 3175 u64 cont1_bytes, u64 min_bytes) 3176 { 3177 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3178 unsigned long next_zero; 3179 unsigned long i; 3180 unsigned long want_bits; 3181 unsigned long min_bits; 3182 unsigned long found_bits; 3183 unsigned long max_bits = 0; 3184 unsigned long start = 0; 3185 unsigned long total_found = 0; 3186 int ret; 3187 3188 i = offset_to_bit(entry->offset, ctl->unit, 3189 max_t(u64, offset, entry->offset)); 3190 want_bits = bytes_to_bits(bytes, ctl->unit); 3191 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3192 3193 /* 3194 * Don't bother looking for a cluster in this bitmap if it's heavily 3195 * fragmented. 3196 */ 3197 if (entry->max_extent_size && 3198 entry->max_extent_size < cont1_bytes) 3199 return -ENOSPC; 3200 again: 3201 found_bits = 0; 3202 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3203 next_zero = find_next_zero_bit(entry->bitmap, 3204 BITS_PER_BITMAP, i); 3205 if (next_zero - i >= min_bits) { 3206 found_bits = next_zero - i; 3207 if (found_bits > max_bits) 3208 max_bits = found_bits; 3209 break; 3210 } 3211 if (next_zero - i > max_bits) 3212 max_bits = next_zero - i; 3213 i = next_zero; 3214 } 3215 3216 if (!found_bits) { 3217 entry->max_extent_size = (u64)max_bits * ctl->unit; 3218 return -ENOSPC; 3219 } 3220 3221 if (!total_found) { 3222 start = i; 3223 cluster->max_size = 0; 3224 } 3225 3226 total_found += found_bits; 3227 3228 if (cluster->max_size < found_bits * ctl->unit) 3229 cluster->max_size = found_bits * ctl->unit; 3230 3231 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3232 i = next_zero + 1; 3233 goto again; 3234 } 3235 3236 cluster->window_start = start * ctl->unit + entry->offset; 3237 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3238 ret = tree_insert_offset(&cluster->root, entry->offset, 3239 &entry->offset_index, 1); 3240 ASSERT(!ret); /* -EEXIST; Logic error */ 3241 3242 trace_btrfs_setup_cluster(block_group, cluster, 3243 total_found * ctl->unit, 1); 3244 return 0; 3245 } 3246 3247 /* 3248 * This searches the block group for just extents to fill the cluster with. 3249 * Try to find a cluster with at least bytes total bytes, at least one 3250 * extent of cont1_bytes, and other clusters of at least min_bytes. 3251 */ 3252 static noinline int 3253 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3254 struct btrfs_free_cluster *cluster, 3255 struct list_head *bitmaps, u64 offset, u64 bytes, 3256 u64 cont1_bytes, u64 min_bytes) 3257 { 3258 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3259 struct btrfs_free_space *first = NULL; 3260 struct btrfs_free_space *entry = NULL; 3261 struct btrfs_free_space *last; 3262 struct rb_node *node; 3263 u64 window_free; 3264 u64 max_extent; 3265 u64 total_size = 0; 3266 3267 entry = tree_search_offset(ctl, offset, 0, 1); 3268 if (!entry) 3269 return -ENOSPC; 3270 3271 /* 3272 * We don't want bitmaps, so just move along until we find a normal 3273 * extent entry. 3274 */ 3275 while (entry->bitmap || entry->bytes < min_bytes) { 3276 if (entry->bitmap && list_empty(&entry->list)) 3277 list_add_tail(&entry->list, bitmaps); 3278 node = rb_next(&entry->offset_index); 3279 if (!node) 3280 return -ENOSPC; 3281 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3282 } 3283 3284 window_free = entry->bytes; 3285 max_extent = entry->bytes; 3286 first = entry; 3287 last = entry; 3288 3289 for (node = rb_next(&entry->offset_index); node; 3290 node = rb_next(&entry->offset_index)) { 3291 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3292 3293 if (entry->bitmap) { 3294 if (list_empty(&entry->list)) 3295 list_add_tail(&entry->list, bitmaps); 3296 continue; 3297 } 3298 3299 if (entry->bytes < min_bytes) 3300 continue; 3301 3302 last = entry; 3303 window_free += entry->bytes; 3304 if (entry->bytes > max_extent) 3305 max_extent = entry->bytes; 3306 } 3307 3308 if (window_free < bytes || max_extent < cont1_bytes) 3309 return -ENOSPC; 3310 3311 cluster->window_start = first->offset; 3312 3313 node = &first->offset_index; 3314 3315 /* 3316 * now we've found our entries, pull them out of the free space 3317 * cache and put them into the cluster rbtree 3318 */ 3319 do { 3320 int ret; 3321 3322 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3323 node = rb_next(&entry->offset_index); 3324 if (entry->bitmap || entry->bytes < min_bytes) 3325 continue; 3326 3327 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3328 ret = tree_insert_offset(&cluster->root, entry->offset, 3329 &entry->offset_index, 0); 3330 total_size += entry->bytes; 3331 ASSERT(!ret); /* -EEXIST; Logic error */ 3332 } while (node && entry != last); 3333 3334 cluster->max_size = max_extent; 3335 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3336 return 0; 3337 } 3338 3339 /* 3340 * This specifically looks for bitmaps that may work in the cluster, we assume 3341 * that we have already failed to find extents that will work. 3342 */ 3343 static noinline int 3344 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3345 struct btrfs_free_cluster *cluster, 3346 struct list_head *bitmaps, u64 offset, u64 bytes, 3347 u64 cont1_bytes, u64 min_bytes) 3348 { 3349 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3350 struct btrfs_free_space *entry = NULL; 3351 int ret = -ENOSPC; 3352 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3353 3354 if (ctl->total_bitmaps == 0) 3355 return -ENOSPC; 3356 3357 /* 3358 * The bitmap that covers offset won't be in the list unless offset 3359 * is just its start offset. 3360 */ 3361 if (!list_empty(bitmaps)) 3362 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3363 3364 if (!entry || entry->offset != bitmap_offset) { 3365 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3366 if (entry && list_empty(&entry->list)) 3367 list_add(&entry->list, bitmaps); 3368 } 3369 3370 list_for_each_entry(entry, bitmaps, list) { 3371 if (entry->bytes < bytes) 3372 continue; 3373 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3374 bytes, cont1_bytes, min_bytes); 3375 if (!ret) 3376 return 0; 3377 } 3378 3379 /* 3380 * The bitmaps list has all the bitmaps that record free space 3381 * starting after offset, so no more search is required. 3382 */ 3383 return -ENOSPC; 3384 } 3385 3386 /* 3387 * here we try to find a cluster of blocks in a block group. The goal 3388 * is to find at least bytes+empty_size. 3389 * We might not find them all in one contiguous area. 3390 * 3391 * returns zero and sets up cluster if things worked out, otherwise 3392 * it returns -enospc 3393 */ 3394 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3395 struct btrfs_free_cluster *cluster, 3396 u64 offset, u64 bytes, u64 empty_size) 3397 { 3398 struct btrfs_fs_info *fs_info = block_group->fs_info; 3399 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3400 struct btrfs_free_space *entry, *tmp; 3401 LIST_HEAD(bitmaps); 3402 u64 min_bytes; 3403 u64 cont1_bytes; 3404 int ret; 3405 3406 /* 3407 * Choose the minimum extent size we'll require for this 3408 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3409 * For metadata, allow allocates with smaller extents. For 3410 * data, keep it dense. 3411 */ 3412 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3413 cont1_bytes = min_bytes = bytes + empty_size; 3414 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3415 cont1_bytes = bytes; 3416 min_bytes = fs_info->sectorsize; 3417 } else { 3418 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3419 min_bytes = fs_info->sectorsize; 3420 } 3421 3422 spin_lock(&ctl->tree_lock); 3423 3424 /* 3425 * If we know we don't have enough space to make a cluster don't even 3426 * bother doing all the work to try and find one. 3427 */ 3428 if (ctl->free_space < bytes) { 3429 spin_unlock(&ctl->tree_lock); 3430 return -ENOSPC; 3431 } 3432 3433 spin_lock(&cluster->lock); 3434 3435 /* someone already found a cluster, hooray */ 3436 if (cluster->block_group) { 3437 ret = 0; 3438 goto out; 3439 } 3440 3441 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3442 min_bytes); 3443 3444 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3445 bytes + empty_size, 3446 cont1_bytes, min_bytes); 3447 if (ret) 3448 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3449 offset, bytes + empty_size, 3450 cont1_bytes, min_bytes); 3451 3452 /* Clear our temporary list */ 3453 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3454 list_del_init(&entry->list); 3455 3456 if (!ret) { 3457 btrfs_get_block_group(block_group); 3458 list_add_tail(&cluster->block_group_list, 3459 &block_group->cluster_list); 3460 cluster->block_group = block_group; 3461 } else { 3462 trace_btrfs_failed_cluster_setup(block_group); 3463 } 3464 out: 3465 spin_unlock(&cluster->lock); 3466 spin_unlock(&ctl->tree_lock); 3467 3468 return ret; 3469 } 3470 3471 /* 3472 * simple code to zero out a cluster 3473 */ 3474 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3475 { 3476 spin_lock_init(&cluster->lock); 3477 spin_lock_init(&cluster->refill_lock); 3478 cluster->root = RB_ROOT; 3479 cluster->max_size = 0; 3480 cluster->fragmented = false; 3481 INIT_LIST_HEAD(&cluster->block_group_list); 3482 cluster->block_group = NULL; 3483 } 3484 3485 static int do_trimming(struct btrfs_block_group *block_group, 3486 u64 *total_trimmed, u64 start, u64 bytes, 3487 u64 reserved_start, u64 reserved_bytes, 3488 enum btrfs_trim_state reserved_trim_state, 3489 struct btrfs_trim_range *trim_entry) 3490 { 3491 struct btrfs_space_info *space_info = block_group->space_info; 3492 struct btrfs_fs_info *fs_info = block_group->fs_info; 3493 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3494 int ret; 3495 int update = 0; 3496 const u64 end = start + bytes; 3497 const u64 reserved_end = reserved_start + reserved_bytes; 3498 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3499 u64 trimmed = 0; 3500 3501 spin_lock(&space_info->lock); 3502 spin_lock(&block_group->lock); 3503 if (!block_group->ro) { 3504 block_group->reserved += reserved_bytes; 3505 space_info->bytes_reserved += reserved_bytes; 3506 update = 1; 3507 } 3508 spin_unlock(&block_group->lock); 3509 spin_unlock(&space_info->lock); 3510 3511 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3512 if (!ret) { 3513 *total_trimmed += trimmed; 3514 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3515 } 3516 3517 mutex_lock(&ctl->cache_writeout_mutex); 3518 if (reserved_start < start) 3519 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3520 start - reserved_start, 3521 reserved_trim_state); 3522 if (start + bytes < reserved_start + reserved_bytes) 3523 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3524 reserved_trim_state); 3525 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3526 list_del(&trim_entry->list); 3527 mutex_unlock(&ctl->cache_writeout_mutex); 3528 3529 if (update) { 3530 spin_lock(&space_info->lock); 3531 spin_lock(&block_group->lock); 3532 if (block_group->ro) 3533 space_info->bytes_readonly += reserved_bytes; 3534 block_group->reserved -= reserved_bytes; 3535 space_info->bytes_reserved -= reserved_bytes; 3536 spin_unlock(&block_group->lock); 3537 spin_unlock(&space_info->lock); 3538 } 3539 3540 return ret; 3541 } 3542 3543 /* 3544 * If @async is set, then we will trim 1 region and return. 3545 */ 3546 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3547 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3548 bool async) 3549 { 3550 struct btrfs_discard_ctl *discard_ctl = 3551 &block_group->fs_info->discard_ctl; 3552 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3553 struct btrfs_free_space *entry; 3554 struct rb_node *node; 3555 int ret = 0; 3556 u64 extent_start; 3557 u64 extent_bytes; 3558 enum btrfs_trim_state extent_trim_state; 3559 u64 bytes; 3560 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3561 3562 while (start < end) { 3563 struct btrfs_trim_range trim_entry; 3564 3565 mutex_lock(&ctl->cache_writeout_mutex); 3566 spin_lock(&ctl->tree_lock); 3567 3568 if (ctl->free_space < minlen) 3569 goto out_unlock; 3570 3571 entry = tree_search_offset(ctl, start, 0, 1); 3572 if (!entry) 3573 goto out_unlock; 3574 3575 /* Skip bitmaps and if async, already trimmed entries */ 3576 while (entry->bitmap || 3577 (async && btrfs_free_space_trimmed(entry))) { 3578 node = rb_next(&entry->offset_index); 3579 if (!node) 3580 goto out_unlock; 3581 entry = rb_entry(node, struct btrfs_free_space, 3582 offset_index); 3583 } 3584 3585 if (entry->offset >= end) 3586 goto out_unlock; 3587 3588 extent_start = entry->offset; 3589 extent_bytes = entry->bytes; 3590 extent_trim_state = entry->trim_state; 3591 if (async) { 3592 start = entry->offset; 3593 bytes = entry->bytes; 3594 if (bytes < minlen) { 3595 spin_unlock(&ctl->tree_lock); 3596 mutex_unlock(&ctl->cache_writeout_mutex); 3597 goto next; 3598 } 3599 unlink_free_space(ctl, entry); 3600 /* 3601 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3602 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3603 * X when we come back around. So trim it now. 3604 */ 3605 if (max_discard_size && 3606 bytes >= (max_discard_size + 3607 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3608 bytes = max_discard_size; 3609 extent_bytes = max_discard_size; 3610 entry->offset += max_discard_size; 3611 entry->bytes -= max_discard_size; 3612 link_free_space(ctl, entry); 3613 } else { 3614 kmem_cache_free(btrfs_free_space_cachep, entry); 3615 } 3616 } else { 3617 start = max(start, extent_start); 3618 bytes = min(extent_start + extent_bytes, end) - start; 3619 if (bytes < minlen) { 3620 spin_unlock(&ctl->tree_lock); 3621 mutex_unlock(&ctl->cache_writeout_mutex); 3622 goto next; 3623 } 3624 3625 unlink_free_space(ctl, entry); 3626 kmem_cache_free(btrfs_free_space_cachep, entry); 3627 } 3628 3629 spin_unlock(&ctl->tree_lock); 3630 trim_entry.start = extent_start; 3631 trim_entry.bytes = extent_bytes; 3632 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3633 mutex_unlock(&ctl->cache_writeout_mutex); 3634 3635 ret = do_trimming(block_group, total_trimmed, start, bytes, 3636 extent_start, extent_bytes, extent_trim_state, 3637 &trim_entry); 3638 if (ret) { 3639 block_group->discard_cursor = start + bytes; 3640 break; 3641 } 3642 next: 3643 start += bytes; 3644 block_group->discard_cursor = start; 3645 if (async && *total_trimmed) 3646 break; 3647 3648 if (fatal_signal_pending(current)) { 3649 ret = -ERESTARTSYS; 3650 break; 3651 } 3652 3653 cond_resched(); 3654 } 3655 3656 return ret; 3657 3658 out_unlock: 3659 block_group->discard_cursor = btrfs_block_group_end(block_group); 3660 spin_unlock(&ctl->tree_lock); 3661 mutex_unlock(&ctl->cache_writeout_mutex); 3662 3663 return ret; 3664 } 3665 3666 /* 3667 * If we break out of trimming a bitmap prematurely, we should reset the 3668 * trimming bit. In a rather contrieved case, it's possible to race here so 3669 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3670 * 3671 * start = start of bitmap 3672 * end = near end of bitmap 3673 * 3674 * Thread 1: Thread 2: 3675 * trim_bitmaps(start) 3676 * trim_bitmaps(end) 3677 * end_trimming_bitmap() 3678 * reset_trimming_bitmap() 3679 */ 3680 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3681 { 3682 struct btrfs_free_space *entry; 3683 3684 spin_lock(&ctl->tree_lock); 3685 entry = tree_search_offset(ctl, offset, 1, 0); 3686 if (entry) { 3687 if (btrfs_free_space_trimmed(entry)) { 3688 ctl->discardable_extents[BTRFS_STAT_CURR] += 3689 entry->bitmap_extents; 3690 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3691 } 3692 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3693 } 3694 3695 spin_unlock(&ctl->tree_lock); 3696 } 3697 3698 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3699 struct btrfs_free_space *entry) 3700 { 3701 if (btrfs_free_space_trimming_bitmap(entry)) { 3702 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3703 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3704 entry->bitmap_extents; 3705 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3706 } 3707 } 3708 3709 /* 3710 * If @async is set, then we will trim 1 region and return. 3711 */ 3712 static int trim_bitmaps(struct btrfs_block_group *block_group, 3713 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3714 u64 maxlen, bool async) 3715 { 3716 struct btrfs_discard_ctl *discard_ctl = 3717 &block_group->fs_info->discard_ctl; 3718 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3719 struct btrfs_free_space *entry; 3720 int ret = 0; 3721 int ret2; 3722 u64 bytes; 3723 u64 offset = offset_to_bitmap(ctl, start); 3724 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3725 3726 while (offset < end) { 3727 bool next_bitmap = false; 3728 struct btrfs_trim_range trim_entry; 3729 3730 mutex_lock(&ctl->cache_writeout_mutex); 3731 spin_lock(&ctl->tree_lock); 3732 3733 if (ctl->free_space < minlen) { 3734 block_group->discard_cursor = 3735 btrfs_block_group_end(block_group); 3736 spin_unlock(&ctl->tree_lock); 3737 mutex_unlock(&ctl->cache_writeout_mutex); 3738 break; 3739 } 3740 3741 entry = tree_search_offset(ctl, offset, 1, 0); 3742 /* 3743 * Bitmaps are marked trimmed lossily now to prevent constant 3744 * discarding of the same bitmap (the reason why we are bound 3745 * by the filters). So, retrim the block group bitmaps when we 3746 * are preparing to punt to the unused_bgs list. This uses 3747 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3748 * which is the only discard index which sets minlen to 0. 3749 */ 3750 if (!entry || (async && minlen && start == offset && 3751 btrfs_free_space_trimmed(entry))) { 3752 spin_unlock(&ctl->tree_lock); 3753 mutex_unlock(&ctl->cache_writeout_mutex); 3754 next_bitmap = true; 3755 goto next; 3756 } 3757 3758 /* 3759 * Async discard bitmap trimming begins at by setting the start 3760 * to be key.objectid and the offset_to_bitmap() aligns to the 3761 * start of the bitmap. This lets us know we are fully 3762 * scanning the bitmap rather than only some portion of it. 3763 */ 3764 if (start == offset) 3765 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3766 3767 bytes = minlen; 3768 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3769 if (ret2 || start >= end) { 3770 /* 3771 * We lossily consider a bitmap trimmed if we only skip 3772 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3773 */ 3774 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3775 end_trimming_bitmap(ctl, entry); 3776 else 3777 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3778 spin_unlock(&ctl->tree_lock); 3779 mutex_unlock(&ctl->cache_writeout_mutex); 3780 next_bitmap = true; 3781 goto next; 3782 } 3783 3784 /* 3785 * We already trimmed a region, but are using the locking above 3786 * to reset the trim_state. 3787 */ 3788 if (async && *total_trimmed) { 3789 spin_unlock(&ctl->tree_lock); 3790 mutex_unlock(&ctl->cache_writeout_mutex); 3791 goto out; 3792 } 3793 3794 bytes = min(bytes, end - start); 3795 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3796 spin_unlock(&ctl->tree_lock); 3797 mutex_unlock(&ctl->cache_writeout_mutex); 3798 goto next; 3799 } 3800 3801 /* 3802 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3803 * If X < @minlen, we won't trim X when we come back around. 3804 * So trim it now. We differ here from trimming extents as we 3805 * don't keep individual state per bit. 3806 */ 3807 if (async && 3808 max_discard_size && 3809 bytes > (max_discard_size + minlen)) 3810 bytes = max_discard_size; 3811 3812 bitmap_clear_bits(ctl, entry, start, bytes); 3813 if (entry->bytes == 0) 3814 free_bitmap(ctl, entry); 3815 3816 spin_unlock(&ctl->tree_lock); 3817 trim_entry.start = start; 3818 trim_entry.bytes = bytes; 3819 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3820 mutex_unlock(&ctl->cache_writeout_mutex); 3821 3822 ret = do_trimming(block_group, total_trimmed, start, bytes, 3823 start, bytes, 0, &trim_entry); 3824 if (ret) { 3825 reset_trimming_bitmap(ctl, offset); 3826 block_group->discard_cursor = 3827 btrfs_block_group_end(block_group); 3828 break; 3829 } 3830 next: 3831 if (next_bitmap) { 3832 offset += BITS_PER_BITMAP * ctl->unit; 3833 start = offset; 3834 } else { 3835 start += bytes; 3836 } 3837 block_group->discard_cursor = start; 3838 3839 if (fatal_signal_pending(current)) { 3840 if (start != offset) 3841 reset_trimming_bitmap(ctl, offset); 3842 ret = -ERESTARTSYS; 3843 break; 3844 } 3845 3846 cond_resched(); 3847 } 3848 3849 if (offset >= end) 3850 block_group->discard_cursor = end; 3851 3852 out: 3853 return ret; 3854 } 3855 3856 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3857 u64 *trimmed, u64 start, u64 end, u64 minlen) 3858 { 3859 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3860 int ret; 3861 u64 rem = 0; 3862 3863 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3864 3865 *trimmed = 0; 3866 3867 spin_lock(&block_group->lock); 3868 if (block_group->removed) { 3869 spin_unlock(&block_group->lock); 3870 return 0; 3871 } 3872 btrfs_freeze_block_group(block_group); 3873 spin_unlock(&block_group->lock); 3874 3875 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3876 if (ret) 3877 goto out; 3878 3879 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3880 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3881 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3882 if (rem) 3883 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3884 out: 3885 btrfs_unfreeze_block_group(block_group); 3886 return ret; 3887 } 3888 3889 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3890 u64 *trimmed, u64 start, u64 end, u64 minlen, 3891 bool async) 3892 { 3893 int ret; 3894 3895 *trimmed = 0; 3896 3897 spin_lock(&block_group->lock); 3898 if (block_group->removed) { 3899 spin_unlock(&block_group->lock); 3900 return 0; 3901 } 3902 btrfs_freeze_block_group(block_group); 3903 spin_unlock(&block_group->lock); 3904 3905 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3906 btrfs_unfreeze_block_group(block_group); 3907 3908 return ret; 3909 } 3910 3911 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3912 u64 *trimmed, u64 start, u64 end, u64 minlen, 3913 u64 maxlen, bool async) 3914 { 3915 int ret; 3916 3917 *trimmed = 0; 3918 3919 spin_lock(&block_group->lock); 3920 if (block_group->removed) { 3921 spin_unlock(&block_group->lock); 3922 return 0; 3923 } 3924 btrfs_freeze_block_group(block_group); 3925 spin_unlock(&block_group->lock); 3926 3927 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3928 async); 3929 3930 btrfs_unfreeze_block_group(block_group); 3931 3932 return ret; 3933 } 3934 3935 bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info) 3936 { 3937 return btrfs_super_cache_generation(fs_info->super_copy); 3938 } 3939 3940 static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, 3941 struct btrfs_trans_handle *trans) 3942 { 3943 struct btrfs_block_group *block_group; 3944 struct rb_node *node; 3945 int ret; 3946 3947 btrfs_info(fs_info, "cleaning free space cache v1"); 3948 3949 node = rb_first(&fs_info->block_group_cache_tree); 3950 while (node) { 3951 block_group = rb_entry(node, struct btrfs_block_group, cache_node); 3952 ret = btrfs_remove_free_space_inode(trans, NULL, block_group); 3953 if (ret) 3954 goto out; 3955 node = rb_next(node); 3956 } 3957 out: 3958 return ret; 3959 } 3960 3961 int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active) 3962 { 3963 struct btrfs_trans_handle *trans; 3964 int ret; 3965 3966 /* 3967 * update_super_roots will appropriately set or unset 3968 * super_copy->cache_generation based on SPACE_CACHE and 3969 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a 3970 * transaction commit whether we are enabling space cache v1 and don't 3971 * have any other work to do, or are disabling it and removing free 3972 * space inodes. 3973 */ 3974 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3975 if (IS_ERR(trans)) 3976 return PTR_ERR(trans); 3977 3978 if (!active) { 3979 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3980 ret = cleanup_free_space_cache_v1(fs_info, trans); 3981 if (ret) { 3982 btrfs_abort_transaction(trans, ret); 3983 btrfs_end_transaction(trans); 3984 goto out; 3985 } 3986 } 3987 3988 ret = btrfs_commit_transaction(trans); 3989 out: 3990 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3991 3992 return ret; 3993 } 3994 3995 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3996 /* 3997 * Use this if you need to make a bitmap or extent entry specifically, it 3998 * doesn't do any of the merging that add_free_space does, this acts a lot like 3999 * how the free space cache loading stuff works, so you can get really weird 4000 * configurations. 4001 */ 4002 int test_add_free_space_entry(struct btrfs_block_group *cache, 4003 u64 offset, u64 bytes, bool bitmap) 4004 { 4005 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4006 struct btrfs_free_space *info = NULL, *bitmap_info; 4007 void *map = NULL; 4008 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4009 u64 bytes_added; 4010 int ret; 4011 4012 again: 4013 if (!info) { 4014 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4015 if (!info) 4016 return -ENOMEM; 4017 } 4018 4019 if (!bitmap) { 4020 spin_lock(&ctl->tree_lock); 4021 info->offset = offset; 4022 info->bytes = bytes; 4023 info->max_extent_size = 0; 4024 ret = link_free_space(ctl, info); 4025 spin_unlock(&ctl->tree_lock); 4026 if (ret) 4027 kmem_cache_free(btrfs_free_space_cachep, info); 4028 return ret; 4029 } 4030 4031 if (!map) { 4032 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4033 if (!map) { 4034 kmem_cache_free(btrfs_free_space_cachep, info); 4035 return -ENOMEM; 4036 } 4037 } 4038 4039 spin_lock(&ctl->tree_lock); 4040 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4041 1, 0); 4042 if (!bitmap_info) { 4043 info->bitmap = map; 4044 map = NULL; 4045 add_new_bitmap(ctl, info, offset); 4046 bitmap_info = info; 4047 info = NULL; 4048 } 4049 4050 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4051 trim_state); 4052 4053 bytes -= bytes_added; 4054 offset += bytes_added; 4055 spin_unlock(&ctl->tree_lock); 4056 4057 if (bytes) 4058 goto again; 4059 4060 if (info) 4061 kmem_cache_free(btrfs_free_space_cachep, info); 4062 if (map) 4063 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4064 return 0; 4065 } 4066 4067 /* 4068 * Checks to see if the given range is in the free space cache. This is really 4069 * just used to check the absence of space, so if there is free space in the 4070 * range at all we will return 1. 4071 */ 4072 int test_check_exists(struct btrfs_block_group *cache, 4073 u64 offset, u64 bytes) 4074 { 4075 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4076 struct btrfs_free_space *info; 4077 int ret = 0; 4078 4079 spin_lock(&ctl->tree_lock); 4080 info = tree_search_offset(ctl, offset, 0, 0); 4081 if (!info) { 4082 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4083 1, 0); 4084 if (!info) 4085 goto out; 4086 } 4087 4088 have_info: 4089 if (info->bitmap) { 4090 u64 bit_off, bit_bytes; 4091 struct rb_node *n; 4092 struct btrfs_free_space *tmp; 4093 4094 bit_off = offset; 4095 bit_bytes = ctl->unit; 4096 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4097 if (!ret) { 4098 if (bit_off == offset) { 4099 ret = 1; 4100 goto out; 4101 } else if (bit_off > offset && 4102 offset + bytes > bit_off) { 4103 ret = 1; 4104 goto out; 4105 } 4106 } 4107 4108 n = rb_prev(&info->offset_index); 4109 while (n) { 4110 tmp = rb_entry(n, struct btrfs_free_space, 4111 offset_index); 4112 if (tmp->offset + tmp->bytes < offset) 4113 break; 4114 if (offset + bytes < tmp->offset) { 4115 n = rb_prev(&tmp->offset_index); 4116 continue; 4117 } 4118 info = tmp; 4119 goto have_info; 4120 } 4121 4122 n = rb_next(&info->offset_index); 4123 while (n) { 4124 tmp = rb_entry(n, struct btrfs_free_space, 4125 offset_index); 4126 if (offset + bytes < tmp->offset) 4127 break; 4128 if (tmp->offset + tmp->bytes < offset) { 4129 n = rb_next(&tmp->offset_index); 4130 continue; 4131 } 4132 info = tmp; 4133 goto have_info; 4134 } 4135 4136 ret = 0; 4137 goto out; 4138 } 4139 4140 if (info->offset == offset) { 4141 ret = 1; 4142 goto out; 4143 } 4144 4145 if (offset > info->offset && offset < info->offset + info->bytes) 4146 ret = 1; 4147 out: 4148 spin_unlock(&ctl->tree_lock); 4149 return ret; 4150 } 4151 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4152