1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "misc.h" 15 #include "ctree.h" 16 #include "free-space-cache.h" 17 #include "transaction.h" 18 #include "disk-io.h" 19 #include "extent_io.h" 20 #include "volumes.h" 21 #include "space-info.h" 22 #include "delalloc-space.h" 23 #include "block-group.h" 24 #include "discard.h" 25 26 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 27 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 28 #define FORCE_EXTENT_THRESHOLD SZ_1M 29 30 struct btrfs_trim_range { 31 u64 start; 32 u64 bytes; 33 struct list_head list; 34 }; 35 36 static int link_free_space(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *info); 38 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 39 struct btrfs_free_space *info); 40 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 41 struct btrfs_free_space *bitmap_info, u64 *offset, 42 u64 *bytes, bool for_alloc); 43 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *bitmap_info); 45 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 46 struct btrfs_free_space *info, u64 offset, 47 u64 bytes); 48 49 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 50 struct btrfs_path *path, 51 u64 offset) 52 { 53 struct btrfs_fs_info *fs_info = root->fs_info; 54 struct btrfs_key key; 55 struct btrfs_key location; 56 struct btrfs_disk_key disk_key; 57 struct btrfs_free_space_header *header; 58 struct extent_buffer *leaf; 59 struct inode *inode = NULL; 60 unsigned nofs_flag; 61 int ret; 62 63 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 64 key.offset = offset; 65 key.type = 0; 66 67 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 68 if (ret < 0) 69 return ERR_PTR(ret); 70 if (ret > 0) { 71 btrfs_release_path(path); 72 return ERR_PTR(-ENOENT); 73 } 74 75 leaf = path->nodes[0]; 76 header = btrfs_item_ptr(leaf, path->slots[0], 77 struct btrfs_free_space_header); 78 btrfs_free_space_key(leaf, header, &disk_key); 79 btrfs_disk_key_to_cpu(&location, &disk_key); 80 btrfs_release_path(path); 81 82 /* 83 * We are often under a trans handle at this point, so we need to make 84 * sure NOFS is set to keep us from deadlocking. 85 */ 86 nofs_flag = memalloc_nofs_save(); 87 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 88 btrfs_release_path(path); 89 memalloc_nofs_restore(nofs_flag); 90 if (IS_ERR(inode)) 91 return inode; 92 93 mapping_set_gfp_mask(inode->i_mapping, 94 mapping_gfp_constraint(inode->i_mapping, 95 ~(__GFP_FS | __GFP_HIGHMEM))); 96 97 return inode; 98 } 99 100 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 101 struct btrfs_path *path) 102 { 103 struct btrfs_fs_info *fs_info = block_group->fs_info; 104 struct inode *inode = NULL; 105 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 106 107 spin_lock(&block_group->lock); 108 if (block_group->inode) 109 inode = igrab(block_group->inode); 110 spin_unlock(&block_group->lock); 111 if (inode) 112 return inode; 113 114 inode = __lookup_free_space_inode(fs_info->tree_root, path, 115 block_group->start); 116 if (IS_ERR(inode)) 117 return inode; 118 119 spin_lock(&block_group->lock); 120 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 121 btrfs_info(fs_info, "Old style space inode found, converting."); 122 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 123 BTRFS_INODE_NODATACOW; 124 block_group->disk_cache_state = BTRFS_DC_CLEAR; 125 } 126 127 if (!block_group->iref) { 128 block_group->inode = igrab(inode); 129 block_group->iref = 1; 130 } 131 spin_unlock(&block_group->lock); 132 133 return inode; 134 } 135 136 static int __create_free_space_inode(struct btrfs_root *root, 137 struct btrfs_trans_handle *trans, 138 struct btrfs_path *path, 139 u64 ino, u64 offset) 140 { 141 struct btrfs_key key; 142 struct btrfs_disk_key disk_key; 143 struct btrfs_free_space_header *header; 144 struct btrfs_inode_item *inode_item; 145 struct extent_buffer *leaf; 146 /* We inline CRCs for the free disk space cache */ 147 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | 148 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 149 int ret; 150 151 ret = btrfs_insert_empty_inode(trans, root, path, ino); 152 if (ret) 153 return ret; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193 } 194 195 int create_free_space_inode(struct btrfs_trans_handle *trans, 196 struct btrfs_block_group *block_group, 197 struct btrfs_path *path) 198 { 199 int ret; 200 u64 ino; 201 202 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); 203 if (ret < 0) 204 return ret; 205 206 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 207 ino, block_group->start); 208 } 209 210 /* 211 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode 212 * handles lookup, otherwise it takes ownership and iputs the inode. 213 * Don't reuse an inode pointer after passing it into this function. 214 */ 215 int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, 216 struct inode *inode, 217 struct btrfs_block_group *block_group) 218 { 219 struct btrfs_path *path; 220 struct btrfs_key key; 221 int ret = 0; 222 223 path = btrfs_alloc_path(); 224 if (!path) 225 return -ENOMEM; 226 227 if (!inode) 228 inode = lookup_free_space_inode(block_group, path); 229 if (IS_ERR(inode)) { 230 if (PTR_ERR(inode) != -ENOENT) 231 ret = PTR_ERR(inode); 232 goto out; 233 } 234 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 235 if (ret) { 236 btrfs_add_delayed_iput(inode); 237 goto out; 238 } 239 clear_nlink(inode); 240 /* One for the block groups ref */ 241 spin_lock(&block_group->lock); 242 if (block_group->iref) { 243 block_group->iref = 0; 244 block_group->inode = NULL; 245 spin_unlock(&block_group->lock); 246 iput(inode); 247 } else { 248 spin_unlock(&block_group->lock); 249 } 250 /* One for the lookup ref */ 251 btrfs_add_delayed_iput(inode); 252 253 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 254 key.type = 0; 255 key.offset = block_group->start; 256 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, 257 -1, 1); 258 if (ret) { 259 if (ret > 0) 260 ret = 0; 261 goto out; 262 } 263 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 264 out: 265 btrfs_free_path(path); 266 return ret; 267 } 268 269 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 270 struct btrfs_block_rsv *rsv) 271 { 272 u64 needed_bytes; 273 int ret; 274 275 /* 1 for slack space, 1 for updating the inode */ 276 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 277 btrfs_calc_metadata_size(fs_info, 1); 278 279 spin_lock(&rsv->lock); 280 if (rsv->reserved < needed_bytes) 281 ret = -ENOSPC; 282 else 283 ret = 0; 284 spin_unlock(&rsv->lock); 285 return ret; 286 } 287 288 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 289 struct btrfs_block_group *block_group, 290 struct inode *inode) 291 { 292 struct btrfs_root *root = BTRFS_I(inode)->root; 293 int ret = 0; 294 bool locked = false; 295 296 if (block_group) { 297 struct btrfs_path *path = btrfs_alloc_path(); 298 299 if (!path) { 300 ret = -ENOMEM; 301 goto fail; 302 } 303 locked = true; 304 mutex_lock(&trans->transaction->cache_write_mutex); 305 if (!list_empty(&block_group->io_list)) { 306 list_del_init(&block_group->io_list); 307 308 btrfs_wait_cache_io(trans, block_group, path); 309 btrfs_put_block_group(block_group); 310 } 311 312 /* 313 * now that we've truncated the cache away, its no longer 314 * setup or written 315 */ 316 spin_lock(&block_group->lock); 317 block_group->disk_cache_state = BTRFS_DC_CLEAR; 318 spin_unlock(&block_group->lock); 319 btrfs_free_path(path); 320 } 321 322 btrfs_i_size_write(BTRFS_I(inode), 0); 323 truncate_pagecache(inode, 0); 324 325 /* 326 * We skip the throttling logic for free space cache inodes, so we don't 327 * need to check for -EAGAIN. 328 */ 329 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode), 330 0, BTRFS_EXTENT_DATA_KEY, NULL); 331 if (ret) 332 goto fail; 333 334 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 335 336 fail: 337 if (locked) 338 mutex_unlock(&trans->transaction->cache_write_mutex); 339 if (ret) 340 btrfs_abort_transaction(trans, ret); 341 342 return ret; 343 } 344 345 static void readahead_cache(struct inode *inode) 346 { 347 struct file_ra_state ra; 348 unsigned long last_index; 349 350 file_ra_state_init(&ra, inode->i_mapping); 351 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 352 353 page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index); 354 } 355 356 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 357 int write) 358 { 359 int num_pages; 360 361 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 362 363 /* Make sure we can fit our crcs and generation into the first page */ 364 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 365 return -ENOSPC; 366 367 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 368 369 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 370 if (!io_ctl->pages) 371 return -ENOMEM; 372 373 io_ctl->num_pages = num_pages; 374 io_ctl->fs_info = btrfs_sb(inode->i_sb); 375 io_ctl->inode = inode; 376 377 return 0; 378 } 379 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 380 381 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 382 { 383 kfree(io_ctl->pages); 384 io_ctl->pages = NULL; 385 } 386 387 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 388 { 389 if (io_ctl->cur) { 390 io_ctl->cur = NULL; 391 io_ctl->orig = NULL; 392 } 393 } 394 395 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 396 { 397 ASSERT(io_ctl->index < io_ctl->num_pages); 398 io_ctl->page = io_ctl->pages[io_ctl->index++]; 399 io_ctl->cur = page_address(io_ctl->page); 400 io_ctl->orig = io_ctl->cur; 401 io_ctl->size = PAGE_SIZE; 402 if (clear) 403 clear_page(io_ctl->cur); 404 } 405 406 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 407 { 408 int i; 409 410 io_ctl_unmap_page(io_ctl); 411 412 for (i = 0; i < io_ctl->num_pages; i++) { 413 if (io_ctl->pages[i]) { 414 ClearPageChecked(io_ctl->pages[i]); 415 unlock_page(io_ctl->pages[i]); 416 put_page(io_ctl->pages[i]); 417 } 418 } 419 } 420 421 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 422 { 423 struct page *page; 424 struct inode *inode = io_ctl->inode; 425 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 426 int i; 427 428 for (i = 0; i < io_ctl->num_pages; i++) { 429 int ret; 430 431 page = find_or_create_page(inode->i_mapping, i, mask); 432 if (!page) { 433 io_ctl_drop_pages(io_ctl); 434 return -ENOMEM; 435 } 436 437 ret = set_page_extent_mapped(page); 438 if (ret < 0) { 439 unlock_page(page); 440 put_page(page); 441 io_ctl_drop_pages(io_ctl); 442 return ret; 443 } 444 445 io_ctl->pages[i] = page; 446 if (uptodate && !PageUptodate(page)) { 447 btrfs_readpage(NULL, page); 448 lock_page(page); 449 if (page->mapping != inode->i_mapping) { 450 btrfs_err(BTRFS_I(inode)->root->fs_info, 451 "free space cache page truncated"); 452 io_ctl_drop_pages(io_ctl); 453 return -EIO; 454 } 455 if (!PageUptodate(page)) { 456 btrfs_err(BTRFS_I(inode)->root->fs_info, 457 "error reading free space cache"); 458 io_ctl_drop_pages(io_ctl); 459 return -EIO; 460 } 461 } 462 } 463 464 for (i = 0; i < io_ctl->num_pages; i++) 465 clear_page_dirty_for_io(io_ctl->pages[i]); 466 467 return 0; 468 } 469 470 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 471 { 472 io_ctl_map_page(io_ctl, 1); 473 474 /* 475 * Skip the csum areas. If we don't check crcs then we just have a 476 * 64bit chunk at the front of the first page. 477 */ 478 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 479 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 480 481 put_unaligned_le64(generation, io_ctl->cur); 482 io_ctl->cur += sizeof(u64); 483 } 484 485 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 486 { 487 u64 cache_gen; 488 489 /* 490 * Skip the crc area. If we don't check crcs then we just have a 64bit 491 * chunk at the front of the first page. 492 */ 493 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 494 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 495 496 cache_gen = get_unaligned_le64(io_ctl->cur); 497 if (cache_gen != generation) { 498 btrfs_err_rl(io_ctl->fs_info, 499 "space cache generation (%llu) does not match inode (%llu)", 500 cache_gen, generation); 501 io_ctl_unmap_page(io_ctl); 502 return -EIO; 503 } 504 io_ctl->cur += sizeof(u64); 505 return 0; 506 } 507 508 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 509 { 510 u32 *tmp; 511 u32 crc = ~(u32)0; 512 unsigned offset = 0; 513 514 if (index == 0) 515 offset = sizeof(u32) * io_ctl->num_pages; 516 517 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 518 btrfs_crc32c_final(crc, (u8 *)&crc); 519 io_ctl_unmap_page(io_ctl); 520 tmp = page_address(io_ctl->pages[0]); 521 tmp += index; 522 *tmp = crc; 523 } 524 525 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 526 { 527 u32 *tmp, val; 528 u32 crc = ~(u32)0; 529 unsigned offset = 0; 530 531 if (index == 0) 532 offset = sizeof(u32) * io_ctl->num_pages; 533 534 tmp = page_address(io_ctl->pages[0]); 535 tmp += index; 536 val = *tmp; 537 538 io_ctl_map_page(io_ctl, 0); 539 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 540 btrfs_crc32c_final(crc, (u8 *)&crc); 541 if (val != crc) { 542 btrfs_err_rl(io_ctl->fs_info, 543 "csum mismatch on free space cache"); 544 io_ctl_unmap_page(io_ctl); 545 return -EIO; 546 } 547 548 return 0; 549 } 550 551 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 552 void *bitmap) 553 { 554 struct btrfs_free_space_entry *entry; 555 556 if (!io_ctl->cur) 557 return -ENOSPC; 558 559 entry = io_ctl->cur; 560 put_unaligned_le64(offset, &entry->offset); 561 put_unaligned_le64(bytes, &entry->bytes); 562 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 563 BTRFS_FREE_SPACE_EXTENT; 564 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 565 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 566 567 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 568 return 0; 569 570 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 571 572 /* No more pages to map */ 573 if (io_ctl->index >= io_ctl->num_pages) 574 return 0; 575 576 /* map the next page */ 577 io_ctl_map_page(io_ctl, 1); 578 return 0; 579 } 580 581 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 582 { 583 if (!io_ctl->cur) 584 return -ENOSPC; 585 586 /* 587 * If we aren't at the start of the current page, unmap this one and 588 * map the next one if there is any left. 589 */ 590 if (io_ctl->cur != io_ctl->orig) { 591 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 592 if (io_ctl->index >= io_ctl->num_pages) 593 return -ENOSPC; 594 io_ctl_map_page(io_ctl, 0); 595 } 596 597 copy_page(io_ctl->cur, bitmap); 598 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 599 if (io_ctl->index < io_ctl->num_pages) 600 io_ctl_map_page(io_ctl, 0); 601 return 0; 602 } 603 604 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 605 { 606 /* 607 * If we're not on the boundary we know we've modified the page and we 608 * need to crc the page. 609 */ 610 if (io_ctl->cur != io_ctl->orig) 611 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 612 else 613 io_ctl_unmap_page(io_ctl); 614 615 while (io_ctl->index < io_ctl->num_pages) { 616 io_ctl_map_page(io_ctl, 1); 617 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 618 } 619 } 620 621 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 622 struct btrfs_free_space *entry, u8 *type) 623 { 624 struct btrfs_free_space_entry *e; 625 int ret; 626 627 if (!io_ctl->cur) { 628 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 629 if (ret) 630 return ret; 631 } 632 633 e = io_ctl->cur; 634 entry->offset = get_unaligned_le64(&e->offset); 635 entry->bytes = get_unaligned_le64(&e->bytes); 636 *type = e->type; 637 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 638 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 639 640 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 641 return 0; 642 643 io_ctl_unmap_page(io_ctl); 644 645 return 0; 646 } 647 648 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 649 struct btrfs_free_space *entry) 650 { 651 int ret; 652 653 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 654 if (ret) 655 return ret; 656 657 copy_page(entry->bitmap, io_ctl->cur); 658 io_ctl_unmap_page(io_ctl); 659 660 return 0; 661 } 662 663 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 664 { 665 struct btrfs_block_group *block_group = ctl->private; 666 u64 max_bytes; 667 u64 bitmap_bytes; 668 u64 extent_bytes; 669 u64 size = block_group->length; 670 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 671 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 672 673 max_bitmaps = max_t(u64, max_bitmaps, 1); 674 675 ASSERT(ctl->total_bitmaps <= max_bitmaps); 676 677 /* 678 * We are trying to keep the total amount of memory used per 1GiB of 679 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 680 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 681 * bitmaps, we may end up using more memory than this. 682 */ 683 if (size < SZ_1G) 684 max_bytes = MAX_CACHE_BYTES_PER_GIG; 685 else 686 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 687 688 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 689 690 /* 691 * we want the extent entry threshold to always be at most 1/2 the max 692 * bytes we can have, or whatever is less than that. 693 */ 694 extent_bytes = max_bytes - bitmap_bytes; 695 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 696 697 ctl->extents_thresh = 698 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 699 } 700 701 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 702 struct btrfs_free_space_ctl *ctl, 703 struct btrfs_path *path, u64 offset) 704 { 705 struct btrfs_fs_info *fs_info = root->fs_info; 706 struct btrfs_free_space_header *header; 707 struct extent_buffer *leaf; 708 struct btrfs_io_ctl io_ctl; 709 struct btrfs_key key; 710 struct btrfs_free_space *e, *n; 711 LIST_HEAD(bitmaps); 712 u64 num_entries; 713 u64 num_bitmaps; 714 u64 generation; 715 u8 type; 716 int ret = 0; 717 718 /* Nothing in the space cache, goodbye */ 719 if (!i_size_read(inode)) 720 return 0; 721 722 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 723 key.offset = offset; 724 key.type = 0; 725 726 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 727 if (ret < 0) 728 return 0; 729 else if (ret > 0) { 730 btrfs_release_path(path); 731 return 0; 732 } 733 734 ret = -1; 735 736 leaf = path->nodes[0]; 737 header = btrfs_item_ptr(leaf, path->slots[0], 738 struct btrfs_free_space_header); 739 num_entries = btrfs_free_space_entries(leaf, header); 740 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 741 generation = btrfs_free_space_generation(leaf, header); 742 btrfs_release_path(path); 743 744 if (!BTRFS_I(inode)->generation) { 745 btrfs_info(fs_info, 746 "the free space cache file (%llu) is invalid, skip it", 747 offset); 748 return 0; 749 } 750 751 if (BTRFS_I(inode)->generation != generation) { 752 btrfs_err(fs_info, 753 "free space inode generation (%llu) did not match free space cache generation (%llu)", 754 BTRFS_I(inode)->generation, generation); 755 return 0; 756 } 757 758 if (!num_entries) 759 return 0; 760 761 ret = io_ctl_init(&io_ctl, inode, 0); 762 if (ret) 763 return ret; 764 765 readahead_cache(inode); 766 767 ret = io_ctl_prepare_pages(&io_ctl, true); 768 if (ret) 769 goto out; 770 771 ret = io_ctl_check_crc(&io_ctl, 0); 772 if (ret) 773 goto free_cache; 774 775 ret = io_ctl_check_generation(&io_ctl, generation); 776 if (ret) 777 goto free_cache; 778 779 while (num_entries) { 780 e = kmem_cache_zalloc(btrfs_free_space_cachep, 781 GFP_NOFS); 782 if (!e) { 783 ret = -ENOMEM; 784 goto free_cache; 785 } 786 787 ret = io_ctl_read_entry(&io_ctl, e, &type); 788 if (ret) { 789 kmem_cache_free(btrfs_free_space_cachep, e); 790 goto free_cache; 791 } 792 793 if (!e->bytes) { 794 ret = -1; 795 kmem_cache_free(btrfs_free_space_cachep, e); 796 goto free_cache; 797 } 798 799 if (type == BTRFS_FREE_SPACE_EXTENT) { 800 spin_lock(&ctl->tree_lock); 801 ret = link_free_space(ctl, e); 802 spin_unlock(&ctl->tree_lock); 803 if (ret) { 804 btrfs_err(fs_info, 805 "Duplicate entries in free space cache, dumping"); 806 kmem_cache_free(btrfs_free_space_cachep, e); 807 goto free_cache; 808 } 809 } else { 810 ASSERT(num_bitmaps); 811 num_bitmaps--; 812 e->bitmap = kmem_cache_zalloc( 813 btrfs_free_space_bitmap_cachep, GFP_NOFS); 814 if (!e->bitmap) { 815 ret = -ENOMEM; 816 kmem_cache_free( 817 btrfs_free_space_cachep, e); 818 goto free_cache; 819 } 820 spin_lock(&ctl->tree_lock); 821 ret = link_free_space(ctl, e); 822 ctl->total_bitmaps++; 823 recalculate_thresholds(ctl); 824 spin_unlock(&ctl->tree_lock); 825 if (ret) { 826 btrfs_err(fs_info, 827 "Duplicate entries in free space cache, dumping"); 828 kmem_cache_free(btrfs_free_space_cachep, e); 829 goto free_cache; 830 } 831 list_add_tail(&e->list, &bitmaps); 832 } 833 834 num_entries--; 835 } 836 837 io_ctl_unmap_page(&io_ctl); 838 839 /* 840 * We add the bitmaps at the end of the entries in order that 841 * the bitmap entries are added to the cache. 842 */ 843 list_for_each_entry_safe(e, n, &bitmaps, list) { 844 list_del_init(&e->list); 845 ret = io_ctl_read_bitmap(&io_ctl, e); 846 if (ret) 847 goto free_cache; 848 } 849 850 io_ctl_drop_pages(&io_ctl); 851 ret = 1; 852 out: 853 io_ctl_free(&io_ctl); 854 return ret; 855 free_cache: 856 io_ctl_drop_pages(&io_ctl); 857 __btrfs_remove_free_space_cache(ctl); 858 goto out; 859 } 860 861 static int copy_free_space_cache(struct btrfs_block_group *block_group, 862 struct btrfs_free_space_ctl *ctl) 863 { 864 struct btrfs_free_space *info; 865 struct rb_node *n; 866 int ret = 0; 867 868 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) { 869 info = rb_entry(n, struct btrfs_free_space, offset_index); 870 if (!info->bitmap) { 871 unlink_free_space(ctl, info); 872 ret = btrfs_add_free_space(block_group, info->offset, 873 info->bytes); 874 kmem_cache_free(btrfs_free_space_cachep, info); 875 } else { 876 u64 offset = info->offset; 877 u64 bytes = ctl->unit; 878 879 while (search_bitmap(ctl, info, &offset, &bytes, 880 false) == 0) { 881 ret = btrfs_add_free_space(block_group, offset, 882 bytes); 883 if (ret) 884 break; 885 bitmap_clear_bits(ctl, info, offset, bytes); 886 offset = info->offset; 887 bytes = ctl->unit; 888 } 889 free_bitmap(ctl, info); 890 } 891 cond_resched(); 892 } 893 return ret; 894 } 895 896 int load_free_space_cache(struct btrfs_block_group *block_group) 897 { 898 struct btrfs_fs_info *fs_info = block_group->fs_info; 899 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 900 struct btrfs_free_space_ctl tmp_ctl = {}; 901 struct inode *inode; 902 struct btrfs_path *path; 903 int ret = 0; 904 bool matched; 905 u64 used = block_group->used; 906 907 /* 908 * Because we could potentially discard our loaded free space, we want 909 * to load everything into a temporary structure first, and then if it's 910 * valid copy it all into the actual free space ctl. 911 */ 912 btrfs_init_free_space_ctl(block_group, &tmp_ctl); 913 914 /* 915 * If this block group has been marked to be cleared for one reason or 916 * another then we can't trust the on disk cache, so just return. 917 */ 918 spin_lock(&block_group->lock); 919 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 920 spin_unlock(&block_group->lock); 921 return 0; 922 } 923 spin_unlock(&block_group->lock); 924 925 path = btrfs_alloc_path(); 926 if (!path) 927 return 0; 928 path->search_commit_root = 1; 929 path->skip_locking = 1; 930 931 /* 932 * We must pass a path with search_commit_root set to btrfs_iget in 933 * order to avoid a deadlock when allocating extents for the tree root. 934 * 935 * When we are COWing an extent buffer from the tree root, when looking 936 * for a free extent, at extent-tree.c:find_free_extent(), we can find 937 * block group without its free space cache loaded. When we find one 938 * we must load its space cache which requires reading its free space 939 * cache's inode item from the root tree. If this inode item is located 940 * in the same leaf that we started COWing before, then we end up in 941 * deadlock on the extent buffer (trying to read lock it when we 942 * previously write locked it). 943 * 944 * It's safe to read the inode item using the commit root because 945 * block groups, once loaded, stay in memory forever (until they are 946 * removed) as well as their space caches once loaded. New block groups 947 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 948 * we will never try to read their inode item while the fs is mounted. 949 */ 950 inode = lookup_free_space_inode(block_group, path); 951 if (IS_ERR(inode)) { 952 btrfs_free_path(path); 953 return 0; 954 } 955 956 /* We may have converted the inode and made the cache invalid. */ 957 spin_lock(&block_group->lock); 958 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 959 spin_unlock(&block_group->lock); 960 btrfs_free_path(path); 961 goto out; 962 } 963 spin_unlock(&block_group->lock); 964 965 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, 966 path, block_group->start); 967 btrfs_free_path(path); 968 if (ret <= 0) 969 goto out; 970 971 matched = (tmp_ctl.free_space == (block_group->length - used - 972 block_group->bytes_super)); 973 974 if (matched) { 975 ret = copy_free_space_cache(block_group, &tmp_ctl); 976 /* 977 * ret == 1 means we successfully loaded the free space cache, 978 * so we need to re-set it here. 979 */ 980 if (ret == 0) 981 ret = 1; 982 } else { 983 __btrfs_remove_free_space_cache(&tmp_ctl); 984 btrfs_warn(fs_info, 985 "block group %llu has wrong amount of free space", 986 block_group->start); 987 ret = -1; 988 } 989 out: 990 if (ret < 0) { 991 /* This cache is bogus, make sure it gets cleared */ 992 spin_lock(&block_group->lock); 993 block_group->disk_cache_state = BTRFS_DC_CLEAR; 994 spin_unlock(&block_group->lock); 995 ret = 0; 996 997 btrfs_warn(fs_info, 998 "failed to load free space cache for block group %llu, rebuilding it now", 999 block_group->start); 1000 } 1001 1002 spin_lock(&ctl->tree_lock); 1003 btrfs_discard_update_discardable(block_group); 1004 spin_unlock(&ctl->tree_lock); 1005 iput(inode); 1006 return ret; 1007 } 1008 1009 static noinline_for_stack 1010 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 1011 struct btrfs_free_space_ctl *ctl, 1012 struct btrfs_block_group *block_group, 1013 int *entries, int *bitmaps, 1014 struct list_head *bitmap_list) 1015 { 1016 int ret; 1017 struct btrfs_free_cluster *cluster = NULL; 1018 struct btrfs_free_cluster *cluster_locked = NULL; 1019 struct rb_node *node = rb_first(&ctl->free_space_offset); 1020 struct btrfs_trim_range *trim_entry; 1021 1022 /* Get the cluster for this block_group if it exists */ 1023 if (block_group && !list_empty(&block_group->cluster_list)) { 1024 cluster = list_entry(block_group->cluster_list.next, 1025 struct btrfs_free_cluster, 1026 block_group_list); 1027 } 1028 1029 if (!node && cluster) { 1030 cluster_locked = cluster; 1031 spin_lock(&cluster_locked->lock); 1032 node = rb_first(&cluster->root); 1033 cluster = NULL; 1034 } 1035 1036 /* Write out the extent entries */ 1037 while (node) { 1038 struct btrfs_free_space *e; 1039 1040 e = rb_entry(node, struct btrfs_free_space, offset_index); 1041 *entries += 1; 1042 1043 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 1044 e->bitmap); 1045 if (ret) 1046 goto fail; 1047 1048 if (e->bitmap) { 1049 list_add_tail(&e->list, bitmap_list); 1050 *bitmaps += 1; 1051 } 1052 node = rb_next(node); 1053 if (!node && cluster) { 1054 node = rb_first(&cluster->root); 1055 cluster_locked = cluster; 1056 spin_lock(&cluster_locked->lock); 1057 cluster = NULL; 1058 } 1059 } 1060 if (cluster_locked) { 1061 spin_unlock(&cluster_locked->lock); 1062 cluster_locked = NULL; 1063 } 1064 1065 /* 1066 * Make sure we don't miss any range that was removed from our rbtree 1067 * because trimming is running. Otherwise after a umount+mount (or crash 1068 * after committing the transaction) we would leak free space and get 1069 * an inconsistent free space cache report from fsck. 1070 */ 1071 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1072 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1073 trim_entry->bytes, NULL); 1074 if (ret) 1075 goto fail; 1076 *entries += 1; 1077 } 1078 1079 return 0; 1080 fail: 1081 if (cluster_locked) 1082 spin_unlock(&cluster_locked->lock); 1083 return -ENOSPC; 1084 } 1085 1086 static noinline_for_stack int 1087 update_cache_item(struct btrfs_trans_handle *trans, 1088 struct btrfs_root *root, 1089 struct inode *inode, 1090 struct btrfs_path *path, u64 offset, 1091 int entries, int bitmaps) 1092 { 1093 struct btrfs_key key; 1094 struct btrfs_free_space_header *header; 1095 struct extent_buffer *leaf; 1096 int ret; 1097 1098 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1099 key.offset = offset; 1100 key.type = 0; 1101 1102 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1103 if (ret < 0) { 1104 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1105 EXTENT_DELALLOC, 0, 0, NULL); 1106 goto fail; 1107 } 1108 leaf = path->nodes[0]; 1109 if (ret > 0) { 1110 struct btrfs_key found_key; 1111 ASSERT(path->slots[0]); 1112 path->slots[0]--; 1113 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1114 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1115 found_key.offset != offset) { 1116 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1117 inode->i_size - 1, EXTENT_DELALLOC, 0, 1118 0, NULL); 1119 btrfs_release_path(path); 1120 goto fail; 1121 } 1122 } 1123 1124 BTRFS_I(inode)->generation = trans->transid; 1125 header = btrfs_item_ptr(leaf, path->slots[0], 1126 struct btrfs_free_space_header); 1127 btrfs_set_free_space_entries(leaf, header, entries); 1128 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1129 btrfs_set_free_space_generation(leaf, header, trans->transid); 1130 btrfs_mark_buffer_dirty(leaf); 1131 btrfs_release_path(path); 1132 1133 return 0; 1134 1135 fail: 1136 return -1; 1137 } 1138 1139 static noinline_for_stack int write_pinned_extent_entries( 1140 struct btrfs_trans_handle *trans, 1141 struct btrfs_block_group *block_group, 1142 struct btrfs_io_ctl *io_ctl, 1143 int *entries) 1144 { 1145 u64 start, extent_start, extent_end, len; 1146 struct extent_io_tree *unpin = NULL; 1147 int ret; 1148 1149 if (!block_group) 1150 return 0; 1151 1152 /* 1153 * We want to add any pinned extents to our free space cache 1154 * so we don't leak the space 1155 * 1156 * We shouldn't have switched the pinned extents yet so this is the 1157 * right one 1158 */ 1159 unpin = &trans->transaction->pinned_extents; 1160 1161 start = block_group->start; 1162 1163 while (start < block_group->start + block_group->length) { 1164 ret = find_first_extent_bit(unpin, start, 1165 &extent_start, &extent_end, 1166 EXTENT_DIRTY, NULL); 1167 if (ret) 1168 return 0; 1169 1170 /* This pinned extent is out of our range */ 1171 if (extent_start >= block_group->start + block_group->length) 1172 return 0; 1173 1174 extent_start = max(extent_start, start); 1175 extent_end = min(block_group->start + block_group->length, 1176 extent_end + 1); 1177 len = extent_end - extent_start; 1178 1179 *entries += 1; 1180 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1181 if (ret) 1182 return -ENOSPC; 1183 1184 start = extent_end; 1185 } 1186 1187 return 0; 1188 } 1189 1190 static noinline_for_stack int 1191 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1192 { 1193 struct btrfs_free_space *entry, *next; 1194 int ret; 1195 1196 /* Write out the bitmaps */ 1197 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1198 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1199 if (ret) 1200 return -ENOSPC; 1201 list_del_init(&entry->list); 1202 } 1203 1204 return 0; 1205 } 1206 1207 static int flush_dirty_cache(struct inode *inode) 1208 { 1209 int ret; 1210 1211 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1212 if (ret) 1213 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1214 EXTENT_DELALLOC, 0, 0, NULL); 1215 1216 return ret; 1217 } 1218 1219 static void noinline_for_stack 1220 cleanup_bitmap_list(struct list_head *bitmap_list) 1221 { 1222 struct btrfs_free_space *entry, *next; 1223 1224 list_for_each_entry_safe(entry, next, bitmap_list, list) 1225 list_del_init(&entry->list); 1226 } 1227 1228 static void noinline_for_stack 1229 cleanup_write_cache_enospc(struct inode *inode, 1230 struct btrfs_io_ctl *io_ctl, 1231 struct extent_state **cached_state) 1232 { 1233 io_ctl_drop_pages(io_ctl); 1234 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1235 i_size_read(inode) - 1, cached_state); 1236 } 1237 1238 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1239 struct btrfs_trans_handle *trans, 1240 struct btrfs_block_group *block_group, 1241 struct btrfs_io_ctl *io_ctl, 1242 struct btrfs_path *path, u64 offset) 1243 { 1244 int ret; 1245 struct inode *inode = io_ctl->inode; 1246 1247 if (!inode) 1248 return 0; 1249 1250 /* Flush the dirty pages in the cache file. */ 1251 ret = flush_dirty_cache(inode); 1252 if (ret) 1253 goto out; 1254 1255 /* Update the cache item to tell everyone this cache file is valid. */ 1256 ret = update_cache_item(trans, root, inode, path, offset, 1257 io_ctl->entries, io_ctl->bitmaps); 1258 out: 1259 if (ret) { 1260 invalidate_inode_pages2(inode->i_mapping); 1261 BTRFS_I(inode)->generation = 0; 1262 if (block_group) 1263 btrfs_debug(root->fs_info, 1264 "failed to write free space cache for block group %llu error %d", 1265 block_group->start, ret); 1266 } 1267 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1268 1269 if (block_group) { 1270 /* the dirty list is protected by the dirty_bgs_lock */ 1271 spin_lock(&trans->transaction->dirty_bgs_lock); 1272 1273 /* the disk_cache_state is protected by the block group lock */ 1274 spin_lock(&block_group->lock); 1275 1276 /* 1277 * only mark this as written if we didn't get put back on 1278 * the dirty list while waiting for IO. Otherwise our 1279 * cache state won't be right, and we won't get written again 1280 */ 1281 if (!ret && list_empty(&block_group->dirty_list)) 1282 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1283 else if (ret) 1284 block_group->disk_cache_state = BTRFS_DC_ERROR; 1285 1286 spin_unlock(&block_group->lock); 1287 spin_unlock(&trans->transaction->dirty_bgs_lock); 1288 io_ctl->inode = NULL; 1289 iput(inode); 1290 } 1291 1292 return ret; 1293 1294 } 1295 1296 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1297 struct btrfs_block_group *block_group, 1298 struct btrfs_path *path) 1299 { 1300 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1301 block_group, &block_group->io_ctl, 1302 path, block_group->start); 1303 } 1304 1305 /** 1306 * Write out cached info to an inode 1307 * 1308 * @root: root the inode belongs to 1309 * @inode: freespace inode we are writing out 1310 * @ctl: free space cache we are going to write out 1311 * @block_group: block_group for this cache if it belongs to a block_group 1312 * @io_ctl: holds context for the io 1313 * @trans: the trans handle 1314 * 1315 * This function writes out a free space cache struct to disk for quick recovery 1316 * on mount. This will return 0 if it was successful in writing the cache out, 1317 * or an errno if it was not. 1318 */ 1319 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1320 struct btrfs_free_space_ctl *ctl, 1321 struct btrfs_block_group *block_group, 1322 struct btrfs_io_ctl *io_ctl, 1323 struct btrfs_trans_handle *trans) 1324 { 1325 struct extent_state *cached_state = NULL; 1326 LIST_HEAD(bitmap_list); 1327 int entries = 0; 1328 int bitmaps = 0; 1329 int ret; 1330 int must_iput = 0; 1331 1332 if (!i_size_read(inode)) 1333 return -EIO; 1334 1335 WARN_ON(io_ctl->pages); 1336 ret = io_ctl_init(io_ctl, inode, 1); 1337 if (ret) 1338 return ret; 1339 1340 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1341 down_write(&block_group->data_rwsem); 1342 spin_lock(&block_group->lock); 1343 if (block_group->delalloc_bytes) { 1344 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1345 spin_unlock(&block_group->lock); 1346 up_write(&block_group->data_rwsem); 1347 BTRFS_I(inode)->generation = 0; 1348 ret = 0; 1349 must_iput = 1; 1350 goto out; 1351 } 1352 spin_unlock(&block_group->lock); 1353 } 1354 1355 /* Lock all pages first so we can lock the extent safely. */ 1356 ret = io_ctl_prepare_pages(io_ctl, false); 1357 if (ret) 1358 goto out_unlock; 1359 1360 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1361 &cached_state); 1362 1363 io_ctl_set_generation(io_ctl, trans->transid); 1364 1365 mutex_lock(&ctl->cache_writeout_mutex); 1366 /* Write out the extent entries in the free space cache */ 1367 spin_lock(&ctl->tree_lock); 1368 ret = write_cache_extent_entries(io_ctl, ctl, 1369 block_group, &entries, &bitmaps, 1370 &bitmap_list); 1371 if (ret) 1372 goto out_nospc_locked; 1373 1374 /* 1375 * Some spaces that are freed in the current transaction are pinned, 1376 * they will be added into free space cache after the transaction is 1377 * committed, we shouldn't lose them. 1378 * 1379 * If this changes while we are working we'll get added back to 1380 * the dirty list and redo it. No locking needed 1381 */ 1382 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1383 if (ret) 1384 goto out_nospc_locked; 1385 1386 /* 1387 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1388 * locked while doing it because a concurrent trim can be manipulating 1389 * or freeing the bitmap. 1390 */ 1391 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1392 spin_unlock(&ctl->tree_lock); 1393 mutex_unlock(&ctl->cache_writeout_mutex); 1394 if (ret) 1395 goto out_nospc; 1396 1397 /* Zero out the rest of the pages just to make sure */ 1398 io_ctl_zero_remaining_pages(io_ctl); 1399 1400 /* Everything is written out, now we dirty the pages in the file. */ 1401 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1402 io_ctl->num_pages, 0, i_size_read(inode), 1403 &cached_state, false); 1404 if (ret) 1405 goto out_nospc; 1406 1407 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1408 up_write(&block_group->data_rwsem); 1409 /* 1410 * Release the pages and unlock the extent, we will flush 1411 * them out later 1412 */ 1413 io_ctl_drop_pages(io_ctl); 1414 io_ctl_free(io_ctl); 1415 1416 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1417 i_size_read(inode) - 1, &cached_state); 1418 1419 /* 1420 * at this point the pages are under IO and we're happy, 1421 * The caller is responsible for waiting on them and updating 1422 * the cache and the inode 1423 */ 1424 io_ctl->entries = entries; 1425 io_ctl->bitmaps = bitmaps; 1426 1427 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1428 if (ret) 1429 goto out; 1430 1431 return 0; 1432 1433 out_nospc_locked: 1434 cleanup_bitmap_list(&bitmap_list); 1435 spin_unlock(&ctl->tree_lock); 1436 mutex_unlock(&ctl->cache_writeout_mutex); 1437 1438 out_nospc: 1439 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1440 1441 out_unlock: 1442 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1443 up_write(&block_group->data_rwsem); 1444 1445 out: 1446 io_ctl->inode = NULL; 1447 io_ctl_free(io_ctl); 1448 if (ret) { 1449 invalidate_inode_pages2(inode->i_mapping); 1450 BTRFS_I(inode)->generation = 0; 1451 } 1452 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1453 if (must_iput) 1454 iput(inode); 1455 return ret; 1456 } 1457 1458 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1459 struct btrfs_block_group *block_group, 1460 struct btrfs_path *path) 1461 { 1462 struct btrfs_fs_info *fs_info = trans->fs_info; 1463 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1464 struct inode *inode; 1465 int ret = 0; 1466 1467 spin_lock(&block_group->lock); 1468 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1469 spin_unlock(&block_group->lock); 1470 return 0; 1471 } 1472 spin_unlock(&block_group->lock); 1473 1474 inode = lookup_free_space_inode(block_group, path); 1475 if (IS_ERR(inode)) 1476 return 0; 1477 1478 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1479 block_group, &block_group->io_ctl, trans); 1480 if (ret) { 1481 btrfs_debug(fs_info, 1482 "failed to write free space cache for block group %llu error %d", 1483 block_group->start, ret); 1484 spin_lock(&block_group->lock); 1485 block_group->disk_cache_state = BTRFS_DC_ERROR; 1486 spin_unlock(&block_group->lock); 1487 1488 block_group->io_ctl.inode = NULL; 1489 iput(inode); 1490 } 1491 1492 /* 1493 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1494 * to wait for IO and put the inode 1495 */ 1496 1497 return ret; 1498 } 1499 1500 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1501 u64 offset) 1502 { 1503 ASSERT(offset >= bitmap_start); 1504 offset -= bitmap_start; 1505 return (unsigned long)(div_u64(offset, unit)); 1506 } 1507 1508 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1509 { 1510 return (unsigned long)(div_u64(bytes, unit)); 1511 } 1512 1513 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1514 u64 offset) 1515 { 1516 u64 bitmap_start; 1517 u64 bytes_per_bitmap; 1518 1519 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1520 bitmap_start = offset - ctl->start; 1521 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1522 bitmap_start *= bytes_per_bitmap; 1523 bitmap_start += ctl->start; 1524 1525 return bitmap_start; 1526 } 1527 1528 static int tree_insert_offset(struct rb_root *root, u64 offset, 1529 struct rb_node *node, int bitmap) 1530 { 1531 struct rb_node **p = &root->rb_node; 1532 struct rb_node *parent = NULL; 1533 struct btrfs_free_space *info; 1534 1535 while (*p) { 1536 parent = *p; 1537 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1538 1539 if (offset < info->offset) { 1540 p = &(*p)->rb_left; 1541 } else if (offset > info->offset) { 1542 p = &(*p)->rb_right; 1543 } else { 1544 /* 1545 * we could have a bitmap entry and an extent entry 1546 * share the same offset. If this is the case, we want 1547 * the extent entry to always be found first if we do a 1548 * linear search through the tree, since we want to have 1549 * the quickest allocation time, and allocating from an 1550 * extent is faster than allocating from a bitmap. So 1551 * if we're inserting a bitmap and we find an entry at 1552 * this offset, we want to go right, or after this entry 1553 * logically. If we are inserting an extent and we've 1554 * found a bitmap, we want to go left, or before 1555 * logically. 1556 */ 1557 if (bitmap) { 1558 if (info->bitmap) { 1559 WARN_ON_ONCE(1); 1560 return -EEXIST; 1561 } 1562 p = &(*p)->rb_right; 1563 } else { 1564 if (!info->bitmap) { 1565 WARN_ON_ONCE(1); 1566 return -EEXIST; 1567 } 1568 p = &(*p)->rb_left; 1569 } 1570 } 1571 } 1572 1573 rb_link_node(node, parent, p); 1574 rb_insert_color(node, root); 1575 1576 return 0; 1577 } 1578 1579 /* 1580 * searches the tree for the given offset. 1581 * 1582 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1583 * want a section that has at least bytes size and comes at or after the given 1584 * offset. 1585 */ 1586 static struct btrfs_free_space * 1587 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1588 u64 offset, int bitmap_only, int fuzzy) 1589 { 1590 struct rb_node *n = ctl->free_space_offset.rb_node; 1591 struct btrfs_free_space *entry, *prev = NULL; 1592 1593 /* find entry that is closest to the 'offset' */ 1594 while (1) { 1595 if (!n) { 1596 entry = NULL; 1597 break; 1598 } 1599 1600 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1601 prev = entry; 1602 1603 if (offset < entry->offset) 1604 n = n->rb_left; 1605 else if (offset > entry->offset) 1606 n = n->rb_right; 1607 else 1608 break; 1609 } 1610 1611 if (bitmap_only) { 1612 if (!entry) 1613 return NULL; 1614 if (entry->bitmap) 1615 return entry; 1616 1617 /* 1618 * bitmap entry and extent entry may share same offset, 1619 * in that case, bitmap entry comes after extent entry. 1620 */ 1621 n = rb_next(n); 1622 if (!n) 1623 return NULL; 1624 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1625 if (entry->offset != offset) 1626 return NULL; 1627 1628 WARN_ON(!entry->bitmap); 1629 return entry; 1630 } else if (entry) { 1631 if (entry->bitmap) { 1632 /* 1633 * if previous extent entry covers the offset, 1634 * we should return it instead of the bitmap entry 1635 */ 1636 n = rb_prev(&entry->offset_index); 1637 if (n) { 1638 prev = rb_entry(n, struct btrfs_free_space, 1639 offset_index); 1640 if (!prev->bitmap && 1641 prev->offset + prev->bytes > offset) 1642 entry = prev; 1643 } 1644 } 1645 return entry; 1646 } 1647 1648 if (!prev) 1649 return NULL; 1650 1651 /* find last entry before the 'offset' */ 1652 entry = prev; 1653 if (entry->offset > offset) { 1654 n = rb_prev(&entry->offset_index); 1655 if (n) { 1656 entry = rb_entry(n, struct btrfs_free_space, 1657 offset_index); 1658 ASSERT(entry->offset <= offset); 1659 } else { 1660 if (fuzzy) 1661 return entry; 1662 else 1663 return NULL; 1664 } 1665 } 1666 1667 if (entry->bitmap) { 1668 n = rb_prev(&entry->offset_index); 1669 if (n) { 1670 prev = rb_entry(n, struct btrfs_free_space, 1671 offset_index); 1672 if (!prev->bitmap && 1673 prev->offset + prev->bytes > offset) 1674 return prev; 1675 } 1676 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1677 return entry; 1678 } else if (entry->offset + entry->bytes > offset) 1679 return entry; 1680 1681 if (!fuzzy) 1682 return NULL; 1683 1684 while (1) { 1685 if (entry->bitmap) { 1686 if (entry->offset + BITS_PER_BITMAP * 1687 ctl->unit > offset) 1688 break; 1689 } else { 1690 if (entry->offset + entry->bytes > offset) 1691 break; 1692 } 1693 1694 n = rb_next(&entry->offset_index); 1695 if (!n) 1696 return NULL; 1697 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1698 } 1699 return entry; 1700 } 1701 1702 static inline void 1703 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1704 struct btrfs_free_space *info) 1705 { 1706 rb_erase(&info->offset_index, &ctl->free_space_offset); 1707 ctl->free_extents--; 1708 1709 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1710 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1711 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1712 } 1713 } 1714 1715 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1716 struct btrfs_free_space *info) 1717 { 1718 __unlink_free_space(ctl, info); 1719 ctl->free_space -= info->bytes; 1720 } 1721 1722 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1723 struct btrfs_free_space *info) 1724 { 1725 int ret = 0; 1726 1727 ASSERT(info->bytes || info->bitmap); 1728 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1729 &info->offset_index, (info->bitmap != NULL)); 1730 if (ret) 1731 return ret; 1732 1733 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1734 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1735 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1736 } 1737 1738 ctl->free_space += info->bytes; 1739 ctl->free_extents++; 1740 return ret; 1741 } 1742 1743 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1744 struct btrfs_free_space *info, 1745 u64 offset, u64 bytes) 1746 { 1747 unsigned long start, count, end; 1748 int extent_delta = -1; 1749 1750 start = offset_to_bit(info->offset, ctl->unit, offset); 1751 count = bytes_to_bits(bytes, ctl->unit); 1752 end = start + count; 1753 ASSERT(end <= BITS_PER_BITMAP); 1754 1755 bitmap_clear(info->bitmap, start, count); 1756 1757 info->bytes -= bytes; 1758 if (info->max_extent_size > ctl->unit) 1759 info->max_extent_size = 0; 1760 1761 if (start && test_bit(start - 1, info->bitmap)) 1762 extent_delta++; 1763 1764 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1765 extent_delta++; 1766 1767 info->bitmap_extents += extent_delta; 1768 if (!btrfs_free_space_trimmed(info)) { 1769 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1770 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1771 } 1772 } 1773 1774 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1775 struct btrfs_free_space *info, u64 offset, 1776 u64 bytes) 1777 { 1778 __bitmap_clear_bits(ctl, info, offset, bytes); 1779 ctl->free_space -= bytes; 1780 } 1781 1782 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1783 struct btrfs_free_space *info, u64 offset, 1784 u64 bytes) 1785 { 1786 unsigned long start, count, end; 1787 int extent_delta = 1; 1788 1789 start = offset_to_bit(info->offset, ctl->unit, offset); 1790 count = bytes_to_bits(bytes, ctl->unit); 1791 end = start + count; 1792 ASSERT(end <= BITS_PER_BITMAP); 1793 1794 bitmap_set(info->bitmap, start, count); 1795 1796 info->bytes += bytes; 1797 ctl->free_space += bytes; 1798 1799 if (start && test_bit(start - 1, info->bitmap)) 1800 extent_delta--; 1801 1802 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1803 extent_delta--; 1804 1805 info->bitmap_extents += extent_delta; 1806 if (!btrfs_free_space_trimmed(info)) { 1807 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1808 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1809 } 1810 } 1811 1812 /* 1813 * If we can not find suitable extent, we will use bytes to record 1814 * the size of the max extent. 1815 */ 1816 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1817 struct btrfs_free_space *bitmap_info, u64 *offset, 1818 u64 *bytes, bool for_alloc) 1819 { 1820 unsigned long found_bits = 0; 1821 unsigned long max_bits = 0; 1822 unsigned long bits, i; 1823 unsigned long next_zero; 1824 unsigned long extent_bits; 1825 1826 /* 1827 * Skip searching the bitmap if we don't have a contiguous section that 1828 * is large enough for this allocation. 1829 */ 1830 if (for_alloc && 1831 bitmap_info->max_extent_size && 1832 bitmap_info->max_extent_size < *bytes) { 1833 *bytes = bitmap_info->max_extent_size; 1834 return -1; 1835 } 1836 1837 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1838 max_t(u64, *offset, bitmap_info->offset)); 1839 bits = bytes_to_bits(*bytes, ctl->unit); 1840 1841 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1842 if (for_alloc && bits == 1) { 1843 found_bits = 1; 1844 break; 1845 } 1846 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1847 BITS_PER_BITMAP, i); 1848 extent_bits = next_zero - i; 1849 if (extent_bits >= bits) { 1850 found_bits = extent_bits; 1851 break; 1852 } else if (extent_bits > max_bits) { 1853 max_bits = extent_bits; 1854 } 1855 i = next_zero; 1856 } 1857 1858 if (found_bits) { 1859 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1860 *bytes = (u64)(found_bits) * ctl->unit; 1861 return 0; 1862 } 1863 1864 *bytes = (u64)(max_bits) * ctl->unit; 1865 bitmap_info->max_extent_size = *bytes; 1866 return -1; 1867 } 1868 1869 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1870 { 1871 if (entry->bitmap) 1872 return entry->max_extent_size; 1873 return entry->bytes; 1874 } 1875 1876 /* Cache the size of the max extent in bytes */ 1877 static struct btrfs_free_space * 1878 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1879 unsigned long align, u64 *max_extent_size) 1880 { 1881 struct btrfs_free_space *entry; 1882 struct rb_node *node; 1883 u64 tmp; 1884 u64 align_off; 1885 int ret; 1886 1887 if (!ctl->free_space_offset.rb_node) 1888 goto out; 1889 1890 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1891 if (!entry) 1892 goto out; 1893 1894 for (node = &entry->offset_index; node; node = rb_next(node)) { 1895 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1896 if (entry->bytes < *bytes) { 1897 *max_extent_size = max(get_max_extent_size(entry), 1898 *max_extent_size); 1899 continue; 1900 } 1901 1902 /* make sure the space returned is big enough 1903 * to match our requested alignment 1904 */ 1905 if (*bytes >= align) { 1906 tmp = entry->offset - ctl->start + align - 1; 1907 tmp = div64_u64(tmp, align); 1908 tmp = tmp * align + ctl->start; 1909 align_off = tmp - entry->offset; 1910 } else { 1911 align_off = 0; 1912 tmp = entry->offset; 1913 } 1914 1915 if (entry->bytes < *bytes + align_off) { 1916 *max_extent_size = max(get_max_extent_size(entry), 1917 *max_extent_size); 1918 continue; 1919 } 1920 1921 if (entry->bitmap) { 1922 u64 size = *bytes; 1923 1924 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1925 if (!ret) { 1926 *offset = tmp; 1927 *bytes = size; 1928 return entry; 1929 } else { 1930 *max_extent_size = 1931 max(get_max_extent_size(entry), 1932 *max_extent_size); 1933 } 1934 continue; 1935 } 1936 1937 *offset = tmp; 1938 *bytes = entry->bytes - align_off; 1939 return entry; 1940 } 1941 out: 1942 return NULL; 1943 } 1944 1945 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1946 struct btrfs_free_space *info, u64 offset) 1947 { 1948 info->offset = offset_to_bitmap(ctl, offset); 1949 info->bytes = 0; 1950 info->bitmap_extents = 0; 1951 INIT_LIST_HEAD(&info->list); 1952 link_free_space(ctl, info); 1953 ctl->total_bitmaps++; 1954 recalculate_thresholds(ctl); 1955 } 1956 1957 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1958 struct btrfs_free_space *bitmap_info) 1959 { 1960 /* 1961 * Normally when this is called, the bitmap is completely empty. However, 1962 * if we are blowing up the free space cache for one reason or another 1963 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1964 * we may leave stats on the table. 1965 */ 1966 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1967 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1968 bitmap_info->bitmap_extents; 1969 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1970 1971 } 1972 unlink_free_space(ctl, bitmap_info); 1973 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1974 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1975 ctl->total_bitmaps--; 1976 recalculate_thresholds(ctl); 1977 } 1978 1979 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1980 struct btrfs_free_space *bitmap_info, 1981 u64 *offset, u64 *bytes) 1982 { 1983 u64 end; 1984 u64 search_start, search_bytes; 1985 int ret; 1986 1987 again: 1988 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1989 1990 /* 1991 * We need to search for bits in this bitmap. We could only cover some 1992 * of the extent in this bitmap thanks to how we add space, so we need 1993 * to search for as much as it as we can and clear that amount, and then 1994 * go searching for the next bit. 1995 */ 1996 search_start = *offset; 1997 search_bytes = ctl->unit; 1998 search_bytes = min(search_bytes, end - search_start + 1); 1999 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 2000 false); 2001 if (ret < 0 || search_start != *offset) 2002 return -EINVAL; 2003 2004 /* We may have found more bits than what we need */ 2005 search_bytes = min(search_bytes, *bytes); 2006 2007 /* Cannot clear past the end of the bitmap */ 2008 search_bytes = min(search_bytes, end - search_start + 1); 2009 2010 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2011 *offset += search_bytes; 2012 *bytes -= search_bytes; 2013 2014 if (*bytes) { 2015 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2016 if (!bitmap_info->bytes) 2017 free_bitmap(ctl, bitmap_info); 2018 2019 /* 2020 * no entry after this bitmap, but we still have bytes to 2021 * remove, so something has gone wrong. 2022 */ 2023 if (!next) 2024 return -EINVAL; 2025 2026 bitmap_info = rb_entry(next, struct btrfs_free_space, 2027 offset_index); 2028 2029 /* 2030 * if the next entry isn't a bitmap we need to return to let the 2031 * extent stuff do its work. 2032 */ 2033 if (!bitmap_info->bitmap) 2034 return -EAGAIN; 2035 2036 /* 2037 * Ok the next item is a bitmap, but it may not actually hold 2038 * the information for the rest of this free space stuff, so 2039 * look for it, and if we don't find it return so we can try 2040 * everything over again. 2041 */ 2042 search_start = *offset; 2043 search_bytes = ctl->unit; 2044 ret = search_bitmap(ctl, bitmap_info, &search_start, 2045 &search_bytes, false); 2046 if (ret < 0 || search_start != *offset) 2047 return -EAGAIN; 2048 2049 goto again; 2050 } else if (!bitmap_info->bytes) 2051 free_bitmap(ctl, bitmap_info); 2052 2053 return 0; 2054 } 2055 2056 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2057 struct btrfs_free_space *info, u64 offset, 2058 u64 bytes, enum btrfs_trim_state trim_state) 2059 { 2060 u64 bytes_to_set = 0; 2061 u64 end; 2062 2063 /* 2064 * This is a tradeoff to make bitmap trim state minimal. We mark the 2065 * whole bitmap untrimmed if at any point we add untrimmed regions. 2066 */ 2067 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2068 if (btrfs_free_space_trimmed(info)) { 2069 ctl->discardable_extents[BTRFS_STAT_CURR] += 2070 info->bitmap_extents; 2071 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2072 } 2073 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2074 } 2075 2076 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2077 2078 bytes_to_set = min(end - offset, bytes); 2079 2080 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2081 2082 /* 2083 * We set some bytes, we have no idea what the max extent size is 2084 * anymore. 2085 */ 2086 info->max_extent_size = 0; 2087 2088 return bytes_to_set; 2089 2090 } 2091 2092 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2093 struct btrfs_free_space *info) 2094 { 2095 struct btrfs_block_group *block_group = ctl->private; 2096 struct btrfs_fs_info *fs_info = block_group->fs_info; 2097 bool forced = false; 2098 2099 #ifdef CONFIG_BTRFS_DEBUG 2100 if (btrfs_should_fragment_free_space(block_group)) 2101 forced = true; 2102 #endif 2103 2104 /* This is a way to reclaim large regions from the bitmaps. */ 2105 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2106 return false; 2107 2108 /* 2109 * If we are below the extents threshold then we can add this as an 2110 * extent, and don't have to deal with the bitmap 2111 */ 2112 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2113 /* 2114 * If this block group has some small extents we don't want to 2115 * use up all of our free slots in the cache with them, we want 2116 * to reserve them to larger extents, however if we have plenty 2117 * of cache left then go ahead an dadd them, no sense in adding 2118 * the overhead of a bitmap if we don't have to. 2119 */ 2120 if (info->bytes <= fs_info->sectorsize * 8) { 2121 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2122 return false; 2123 } else { 2124 return false; 2125 } 2126 } 2127 2128 /* 2129 * The original block groups from mkfs can be really small, like 8 2130 * megabytes, so don't bother with a bitmap for those entries. However 2131 * some block groups can be smaller than what a bitmap would cover but 2132 * are still large enough that they could overflow the 32k memory limit, 2133 * so allow those block groups to still be allowed to have a bitmap 2134 * entry. 2135 */ 2136 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2137 return false; 2138 2139 return true; 2140 } 2141 2142 static const struct btrfs_free_space_op free_space_op = { 2143 .use_bitmap = use_bitmap, 2144 }; 2145 2146 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2147 struct btrfs_free_space *info) 2148 { 2149 struct btrfs_free_space *bitmap_info; 2150 struct btrfs_block_group *block_group = NULL; 2151 int added = 0; 2152 u64 bytes, offset, bytes_added; 2153 enum btrfs_trim_state trim_state; 2154 int ret; 2155 2156 bytes = info->bytes; 2157 offset = info->offset; 2158 trim_state = info->trim_state; 2159 2160 if (!ctl->op->use_bitmap(ctl, info)) 2161 return 0; 2162 2163 if (ctl->op == &free_space_op) 2164 block_group = ctl->private; 2165 again: 2166 /* 2167 * Since we link bitmaps right into the cluster we need to see if we 2168 * have a cluster here, and if so and it has our bitmap we need to add 2169 * the free space to that bitmap. 2170 */ 2171 if (block_group && !list_empty(&block_group->cluster_list)) { 2172 struct btrfs_free_cluster *cluster; 2173 struct rb_node *node; 2174 struct btrfs_free_space *entry; 2175 2176 cluster = list_entry(block_group->cluster_list.next, 2177 struct btrfs_free_cluster, 2178 block_group_list); 2179 spin_lock(&cluster->lock); 2180 node = rb_first(&cluster->root); 2181 if (!node) { 2182 spin_unlock(&cluster->lock); 2183 goto no_cluster_bitmap; 2184 } 2185 2186 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2187 if (!entry->bitmap) { 2188 spin_unlock(&cluster->lock); 2189 goto no_cluster_bitmap; 2190 } 2191 2192 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2193 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2194 bytes, trim_state); 2195 bytes -= bytes_added; 2196 offset += bytes_added; 2197 } 2198 spin_unlock(&cluster->lock); 2199 if (!bytes) { 2200 ret = 1; 2201 goto out; 2202 } 2203 } 2204 2205 no_cluster_bitmap: 2206 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2207 1, 0); 2208 if (!bitmap_info) { 2209 ASSERT(added == 0); 2210 goto new_bitmap; 2211 } 2212 2213 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2214 trim_state); 2215 bytes -= bytes_added; 2216 offset += bytes_added; 2217 added = 0; 2218 2219 if (!bytes) { 2220 ret = 1; 2221 goto out; 2222 } else 2223 goto again; 2224 2225 new_bitmap: 2226 if (info && info->bitmap) { 2227 add_new_bitmap(ctl, info, offset); 2228 added = 1; 2229 info = NULL; 2230 goto again; 2231 } else { 2232 spin_unlock(&ctl->tree_lock); 2233 2234 /* no pre-allocated info, allocate a new one */ 2235 if (!info) { 2236 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2237 GFP_NOFS); 2238 if (!info) { 2239 spin_lock(&ctl->tree_lock); 2240 ret = -ENOMEM; 2241 goto out; 2242 } 2243 } 2244 2245 /* allocate the bitmap */ 2246 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2247 GFP_NOFS); 2248 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2249 spin_lock(&ctl->tree_lock); 2250 if (!info->bitmap) { 2251 ret = -ENOMEM; 2252 goto out; 2253 } 2254 goto again; 2255 } 2256 2257 out: 2258 if (info) { 2259 if (info->bitmap) 2260 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2261 info->bitmap); 2262 kmem_cache_free(btrfs_free_space_cachep, info); 2263 } 2264 2265 return ret; 2266 } 2267 2268 /* 2269 * Free space merging rules: 2270 * 1) Merge trimmed areas together 2271 * 2) Let untrimmed areas coalesce with trimmed areas 2272 * 3) Always pull neighboring regions from bitmaps 2273 * 2274 * The above rules are for when we merge free space based on btrfs_trim_state. 2275 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2276 * same reason: to promote larger extent regions which makes life easier for 2277 * find_free_extent(). Rule 2 enables coalescing based on the common path 2278 * being returning free space from btrfs_finish_extent_commit(). So when free 2279 * space is trimmed, it will prevent aggregating trimmed new region and 2280 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2281 * and provide find_free_extent() with the largest extents possible hoping for 2282 * the reuse path. 2283 */ 2284 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2285 struct btrfs_free_space *info, bool update_stat) 2286 { 2287 struct btrfs_free_space *left_info = NULL; 2288 struct btrfs_free_space *right_info; 2289 bool merged = false; 2290 u64 offset = info->offset; 2291 u64 bytes = info->bytes; 2292 const bool is_trimmed = btrfs_free_space_trimmed(info); 2293 2294 /* 2295 * first we want to see if there is free space adjacent to the range we 2296 * are adding, if there is remove that struct and add a new one to 2297 * cover the entire range 2298 */ 2299 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2300 if (right_info && rb_prev(&right_info->offset_index)) 2301 left_info = rb_entry(rb_prev(&right_info->offset_index), 2302 struct btrfs_free_space, offset_index); 2303 else if (!right_info) 2304 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2305 2306 /* See try_merge_free_space() comment. */ 2307 if (right_info && !right_info->bitmap && 2308 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2309 if (update_stat) 2310 unlink_free_space(ctl, right_info); 2311 else 2312 __unlink_free_space(ctl, right_info); 2313 info->bytes += right_info->bytes; 2314 kmem_cache_free(btrfs_free_space_cachep, right_info); 2315 merged = true; 2316 } 2317 2318 /* See try_merge_free_space() comment. */ 2319 if (left_info && !left_info->bitmap && 2320 left_info->offset + left_info->bytes == offset && 2321 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2322 if (update_stat) 2323 unlink_free_space(ctl, left_info); 2324 else 2325 __unlink_free_space(ctl, left_info); 2326 info->offset = left_info->offset; 2327 info->bytes += left_info->bytes; 2328 kmem_cache_free(btrfs_free_space_cachep, left_info); 2329 merged = true; 2330 } 2331 2332 return merged; 2333 } 2334 2335 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2336 struct btrfs_free_space *info, 2337 bool update_stat) 2338 { 2339 struct btrfs_free_space *bitmap; 2340 unsigned long i; 2341 unsigned long j; 2342 const u64 end = info->offset + info->bytes; 2343 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2344 u64 bytes; 2345 2346 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2347 if (!bitmap) 2348 return false; 2349 2350 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2351 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2352 if (j == i) 2353 return false; 2354 bytes = (j - i) * ctl->unit; 2355 info->bytes += bytes; 2356 2357 /* See try_merge_free_space() comment. */ 2358 if (!btrfs_free_space_trimmed(bitmap)) 2359 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2360 2361 if (update_stat) 2362 bitmap_clear_bits(ctl, bitmap, end, bytes); 2363 else 2364 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2365 2366 if (!bitmap->bytes) 2367 free_bitmap(ctl, bitmap); 2368 2369 return true; 2370 } 2371 2372 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2373 struct btrfs_free_space *info, 2374 bool update_stat) 2375 { 2376 struct btrfs_free_space *bitmap; 2377 u64 bitmap_offset; 2378 unsigned long i; 2379 unsigned long j; 2380 unsigned long prev_j; 2381 u64 bytes; 2382 2383 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2384 /* If we're on a boundary, try the previous logical bitmap. */ 2385 if (bitmap_offset == info->offset) { 2386 if (info->offset == 0) 2387 return false; 2388 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2389 } 2390 2391 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2392 if (!bitmap) 2393 return false; 2394 2395 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2396 j = 0; 2397 prev_j = (unsigned long)-1; 2398 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2399 if (j > i) 2400 break; 2401 prev_j = j; 2402 } 2403 if (prev_j == i) 2404 return false; 2405 2406 if (prev_j == (unsigned long)-1) 2407 bytes = (i + 1) * ctl->unit; 2408 else 2409 bytes = (i - prev_j) * ctl->unit; 2410 2411 info->offset -= bytes; 2412 info->bytes += bytes; 2413 2414 /* See try_merge_free_space() comment. */ 2415 if (!btrfs_free_space_trimmed(bitmap)) 2416 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2417 2418 if (update_stat) 2419 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2420 else 2421 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2422 2423 if (!bitmap->bytes) 2424 free_bitmap(ctl, bitmap); 2425 2426 return true; 2427 } 2428 2429 /* 2430 * We prefer always to allocate from extent entries, both for clustered and 2431 * non-clustered allocation requests. So when attempting to add a new extent 2432 * entry, try to see if there's adjacent free space in bitmap entries, and if 2433 * there is, migrate that space from the bitmaps to the extent. 2434 * Like this we get better chances of satisfying space allocation requests 2435 * because we attempt to satisfy them based on a single cache entry, and never 2436 * on 2 or more entries - even if the entries represent a contiguous free space 2437 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2438 * ends). 2439 */ 2440 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2441 struct btrfs_free_space *info, 2442 bool update_stat) 2443 { 2444 /* 2445 * Only work with disconnected entries, as we can change their offset, 2446 * and must be extent entries. 2447 */ 2448 ASSERT(!info->bitmap); 2449 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2450 2451 if (ctl->total_bitmaps > 0) { 2452 bool stole_end; 2453 bool stole_front = false; 2454 2455 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2456 if (ctl->total_bitmaps > 0) 2457 stole_front = steal_from_bitmap_to_front(ctl, info, 2458 update_stat); 2459 2460 if (stole_end || stole_front) 2461 try_merge_free_space(ctl, info, update_stat); 2462 } 2463 } 2464 2465 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2466 struct btrfs_free_space_ctl *ctl, 2467 u64 offset, u64 bytes, 2468 enum btrfs_trim_state trim_state) 2469 { 2470 struct btrfs_block_group *block_group = ctl->private; 2471 struct btrfs_free_space *info; 2472 int ret = 0; 2473 u64 filter_bytes = bytes; 2474 2475 ASSERT(!btrfs_is_zoned(fs_info)); 2476 2477 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2478 if (!info) 2479 return -ENOMEM; 2480 2481 info->offset = offset; 2482 info->bytes = bytes; 2483 info->trim_state = trim_state; 2484 RB_CLEAR_NODE(&info->offset_index); 2485 2486 spin_lock(&ctl->tree_lock); 2487 2488 if (try_merge_free_space(ctl, info, true)) 2489 goto link; 2490 2491 /* 2492 * There was no extent directly to the left or right of this new 2493 * extent then we know we're going to have to allocate a new extent, so 2494 * before we do that see if we need to drop this into a bitmap 2495 */ 2496 ret = insert_into_bitmap(ctl, info); 2497 if (ret < 0) { 2498 goto out; 2499 } else if (ret) { 2500 ret = 0; 2501 goto out; 2502 } 2503 link: 2504 /* 2505 * Only steal free space from adjacent bitmaps if we're sure we're not 2506 * going to add the new free space to existing bitmap entries - because 2507 * that would mean unnecessary work that would be reverted. Therefore 2508 * attempt to steal space from bitmaps if we're adding an extent entry. 2509 */ 2510 steal_from_bitmap(ctl, info, true); 2511 2512 filter_bytes = max(filter_bytes, info->bytes); 2513 2514 ret = link_free_space(ctl, info); 2515 if (ret) 2516 kmem_cache_free(btrfs_free_space_cachep, info); 2517 out: 2518 btrfs_discard_update_discardable(block_group); 2519 spin_unlock(&ctl->tree_lock); 2520 2521 if (ret) { 2522 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2523 ASSERT(ret != -EEXIST); 2524 } 2525 2526 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2527 btrfs_discard_check_filter(block_group, filter_bytes); 2528 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2529 } 2530 2531 return ret; 2532 } 2533 2534 static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, 2535 u64 bytenr, u64 size, bool used) 2536 { 2537 struct btrfs_fs_info *fs_info = block_group->fs_info; 2538 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2539 u64 offset = bytenr - block_group->start; 2540 u64 to_free, to_unusable; 2541 const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold); 2542 2543 spin_lock(&ctl->tree_lock); 2544 if (!used) 2545 to_free = size; 2546 else if (offset >= block_group->alloc_offset) 2547 to_free = size; 2548 else if (offset + size <= block_group->alloc_offset) 2549 to_free = 0; 2550 else 2551 to_free = offset + size - block_group->alloc_offset; 2552 to_unusable = size - to_free; 2553 2554 ctl->free_space += to_free; 2555 /* 2556 * If the block group is read-only, we should account freed space into 2557 * bytes_readonly. 2558 */ 2559 if (!block_group->ro) 2560 block_group->zone_unusable += to_unusable; 2561 spin_unlock(&ctl->tree_lock); 2562 if (!used) { 2563 spin_lock(&block_group->lock); 2564 block_group->alloc_offset -= size; 2565 spin_unlock(&block_group->lock); 2566 } 2567 2568 /* All the region is now unusable. Mark it as unused and reclaim */ 2569 if (block_group->zone_unusable == block_group->length) { 2570 btrfs_mark_bg_unused(block_group); 2571 } else if (bg_reclaim_threshold && 2572 block_group->zone_unusable >= 2573 div_factor_fine(block_group->length, bg_reclaim_threshold)) { 2574 btrfs_mark_bg_to_reclaim(block_group); 2575 } 2576 2577 return 0; 2578 } 2579 2580 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2581 u64 bytenr, u64 size) 2582 { 2583 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2584 2585 if (btrfs_is_zoned(block_group->fs_info)) 2586 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2587 true); 2588 2589 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2590 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2591 2592 return __btrfs_add_free_space(block_group->fs_info, 2593 block_group->free_space_ctl, 2594 bytenr, size, trim_state); 2595 } 2596 2597 int btrfs_add_free_space_unused(struct btrfs_block_group *block_group, 2598 u64 bytenr, u64 size) 2599 { 2600 if (btrfs_is_zoned(block_group->fs_info)) 2601 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2602 false); 2603 2604 return btrfs_add_free_space(block_group, bytenr, size); 2605 } 2606 2607 /* 2608 * This is a subtle distinction because when adding free space back in general, 2609 * we want it to be added as untrimmed for async. But in the case where we add 2610 * it on loading of a block group, we want to consider it trimmed. 2611 */ 2612 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2613 u64 bytenr, u64 size) 2614 { 2615 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2616 2617 if (btrfs_is_zoned(block_group->fs_info)) 2618 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2619 true); 2620 2621 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2622 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2623 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2624 2625 return __btrfs_add_free_space(block_group->fs_info, 2626 block_group->free_space_ctl, 2627 bytenr, size, trim_state); 2628 } 2629 2630 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2631 u64 offset, u64 bytes) 2632 { 2633 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2634 struct btrfs_free_space *info; 2635 int ret; 2636 bool re_search = false; 2637 2638 if (btrfs_is_zoned(block_group->fs_info)) { 2639 /* 2640 * This can happen with conventional zones when replaying log. 2641 * Since the allocation info of tree-log nodes are not recorded 2642 * to the extent-tree, calculate_alloc_pointer() failed to 2643 * advance the allocation pointer after last allocated tree log 2644 * node blocks. 2645 * 2646 * This function is called from 2647 * btrfs_pin_extent_for_log_replay() when replaying the log. 2648 * Advance the pointer not to overwrite the tree-log nodes. 2649 */ 2650 if (block_group->start + block_group->alloc_offset < 2651 offset + bytes) { 2652 block_group->alloc_offset = 2653 offset + bytes - block_group->start; 2654 } 2655 return 0; 2656 } 2657 2658 spin_lock(&ctl->tree_lock); 2659 2660 again: 2661 ret = 0; 2662 if (!bytes) 2663 goto out_lock; 2664 2665 info = tree_search_offset(ctl, offset, 0, 0); 2666 if (!info) { 2667 /* 2668 * oops didn't find an extent that matched the space we wanted 2669 * to remove, look for a bitmap instead 2670 */ 2671 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2672 1, 0); 2673 if (!info) { 2674 /* 2675 * If we found a partial bit of our free space in a 2676 * bitmap but then couldn't find the other part this may 2677 * be a problem, so WARN about it. 2678 */ 2679 WARN_ON(re_search); 2680 goto out_lock; 2681 } 2682 } 2683 2684 re_search = false; 2685 if (!info->bitmap) { 2686 unlink_free_space(ctl, info); 2687 if (offset == info->offset) { 2688 u64 to_free = min(bytes, info->bytes); 2689 2690 info->bytes -= to_free; 2691 info->offset += to_free; 2692 if (info->bytes) { 2693 ret = link_free_space(ctl, info); 2694 WARN_ON(ret); 2695 } else { 2696 kmem_cache_free(btrfs_free_space_cachep, info); 2697 } 2698 2699 offset += to_free; 2700 bytes -= to_free; 2701 goto again; 2702 } else { 2703 u64 old_end = info->bytes + info->offset; 2704 2705 info->bytes = offset - info->offset; 2706 ret = link_free_space(ctl, info); 2707 WARN_ON(ret); 2708 if (ret) 2709 goto out_lock; 2710 2711 /* Not enough bytes in this entry to satisfy us */ 2712 if (old_end < offset + bytes) { 2713 bytes -= old_end - offset; 2714 offset = old_end; 2715 goto again; 2716 } else if (old_end == offset + bytes) { 2717 /* all done */ 2718 goto out_lock; 2719 } 2720 spin_unlock(&ctl->tree_lock); 2721 2722 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2723 offset + bytes, 2724 old_end - (offset + bytes), 2725 info->trim_state); 2726 WARN_ON(ret); 2727 goto out; 2728 } 2729 } 2730 2731 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2732 if (ret == -EAGAIN) { 2733 re_search = true; 2734 goto again; 2735 } 2736 out_lock: 2737 btrfs_discard_update_discardable(block_group); 2738 spin_unlock(&ctl->tree_lock); 2739 out: 2740 return ret; 2741 } 2742 2743 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2744 u64 bytes) 2745 { 2746 struct btrfs_fs_info *fs_info = block_group->fs_info; 2747 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2748 struct btrfs_free_space *info; 2749 struct rb_node *n; 2750 int count = 0; 2751 2752 /* 2753 * Zoned btrfs does not use free space tree and cluster. Just print 2754 * out the free space after the allocation offset. 2755 */ 2756 if (btrfs_is_zoned(fs_info)) { 2757 btrfs_info(fs_info, "free space %llu", 2758 block_group->length - block_group->alloc_offset); 2759 return; 2760 } 2761 2762 spin_lock(&ctl->tree_lock); 2763 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2764 info = rb_entry(n, struct btrfs_free_space, offset_index); 2765 if (info->bytes >= bytes && !block_group->ro) 2766 count++; 2767 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2768 info->offset, info->bytes, 2769 (info->bitmap) ? "yes" : "no"); 2770 } 2771 spin_unlock(&ctl->tree_lock); 2772 btrfs_info(fs_info, "block group has cluster?: %s", 2773 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2774 btrfs_info(fs_info, 2775 "%d blocks of free space at or bigger than bytes is", count); 2776 } 2777 2778 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, 2779 struct btrfs_free_space_ctl *ctl) 2780 { 2781 struct btrfs_fs_info *fs_info = block_group->fs_info; 2782 2783 spin_lock_init(&ctl->tree_lock); 2784 ctl->unit = fs_info->sectorsize; 2785 ctl->start = block_group->start; 2786 ctl->private = block_group; 2787 ctl->op = &free_space_op; 2788 INIT_LIST_HEAD(&ctl->trimming_ranges); 2789 mutex_init(&ctl->cache_writeout_mutex); 2790 2791 /* 2792 * we only want to have 32k of ram per block group for keeping 2793 * track of free space, and if we pass 1/2 of that we want to 2794 * start converting things over to using bitmaps 2795 */ 2796 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2797 } 2798 2799 /* 2800 * for a given cluster, put all of its extents back into the free 2801 * space cache. If the block group passed doesn't match the block group 2802 * pointed to by the cluster, someone else raced in and freed the 2803 * cluster already. In that case, we just return without changing anything 2804 */ 2805 static void __btrfs_return_cluster_to_free_space( 2806 struct btrfs_block_group *block_group, 2807 struct btrfs_free_cluster *cluster) 2808 { 2809 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2810 struct btrfs_free_space *entry; 2811 struct rb_node *node; 2812 2813 spin_lock(&cluster->lock); 2814 if (cluster->block_group != block_group) { 2815 spin_unlock(&cluster->lock); 2816 return; 2817 } 2818 2819 cluster->block_group = NULL; 2820 cluster->window_start = 0; 2821 list_del_init(&cluster->block_group_list); 2822 2823 node = rb_first(&cluster->root); 2824 while (node) { 2825 bool bitmap; 2826 2827 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2828 node = rb_next(&entry->offset_index); 2829 rb_erase(&entry->offset_index, &cluster->root); 2830 RB_CLEAR_NODE(&entry->offset_index); 2831 2832 bitmap = (entry->bitmap != NULL); 2833 if (!bitmap) { 2834 /* Merging treats extents as if they were new */ 2835 if (!btrfs_free_space_trimmed(entry)) { 2836 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2837 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2838 entry->bytes; 2839 } 2840 2841 try_merge_free_space(ctl, entry, false); 2842 steal_from_bitmap(ctl, entry, false); 2843 2844 /* As we insert directly, update these statistics */ 2845 if (!btrfs_free_space_trimmed(entry)) { 2846 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2847 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2848 entry->bytes; 2849 } 2850 } 2851 tree_insert_offset(&ctl->free_space_offset, 2852 entry->offset, &entry->offset_index, bitmap); 2853 } 2854 cluster->root = RB_ROOT; 2855 spin_unlock(&cluster->lock); 2856 btrfs_put_block_group(block_group); 2857 } 2858 2859 static void __btrfs_remove_free_space_cache_locked( 2860 struct btrfs_free_space_ctl *ctl) 2861 { 2862 struct btrfs_free_space *info; 2863 struct rb_node *node; 2864 2865 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2866 info = rb_entry(node, struct btrfs_free_space, offset_index); 2867 if (!info->bitmap) { 2868 unlink_free_space(ctl, info); 2869 kmem_cache_free(btrfs_free_space_cachep, info); 2870 } else { 2871 free_bitmap(ctl, info); 2872 } 2873 2874 cond_resched_lock(&ctl->tree_lock); 2875 } 2876 } 2877 2878 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2879 { 2880 spin_lock(&ctl->tree_lock); 2881 __btrfs_remove_free_space_cache_locked(ctl); 2882 if (ctl->private) 2883 btrfs_discard_update_discardable(ctl->private); 2884 spin_unlock(&ctl->tree_lock); 2885 } 2886 2887 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2888 { 2889 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2890 struct btrfs_free_cluster *cluster; 2891 struct list_head *head; 2892 2893 spin_lock(&ctl->tree_lock); 2894 while ((head = block_group->cluster_list.next) != 2895 &block_group->cluster_list) { 2896 cluster = list_entry(head, struct btrfs_free_cluster, 2897 block_group_list); 2898 2899 WARN_ON(cluster->block_group != block_group); 2900 __btrfs_return_cluster_to_free_space(block_group, cluster); 2901 2902 cond_resched_lock(&ctl->tree_lock); 2903 } 2904 __btrfs_remove_free_space_cache_locked(ctl); 2905 btrfs_discard_update_discardable(block_group); 2906 spin_unlock(&ctl->tree_lock); 2907 2908 } 2909 2910 /** 2911 * btrfs_is_free_space_trimmed - see if everything is trimmed 2912 * @block_group: block_group of interest 2913 * 2914 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2915 */ 2916 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2917 { 2918 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2919 struct btrfs_free_space *info; 2920 struct rb_node *node; 2921 bool ret = true; 2922 2923 spin_lock(&ctl->tree_lock); 2924 node = rb_first(&ctl->free_space_offset); 2925 2926 while (node) { 2927 info = rb_entry(node, struct btrfs_free_space, offset_index); 2928 2929 if (!btrfs_free_space_trimmed(info)) { 2930 ret = false; 2931 break; 2932 } 2933 2934 node = rb_next(node); 2935 } 2936 2937 spin_unlock(&ctl->tree_lock); 2938 return ret; 2939 } 2940 2941 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2942 u64 offset, u64 bytes, u64 empty_size, 2943 u64 *max_extent_size) 2944 { 2945 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2946 struct btrfs_discard_ctl *discard_ctl = 2947 &block_group->fs_info->discard_ctl; 2948 struct btrfs_free_space *entry = NULL; 2949 u64 bytes_search = bytes + empty_size; 2950 u64 ret = 0; 2951 u64 align_gap = 0; 2952 u64 align_gap_len = 0; 2953 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2954 2955 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 2956 2957 spin_lock(&ctl->tree_lock); 2958 entry = find_free_space(ctl, &offset, &bytes_search, 2959 block_group->full_stripe_len, max_extent_size); 2960 if (!entry) 2961 goto out; 2962 2963 ret = offset; 2964 if (entry->bitmap) { 2965 bitmap_clear_bits(ctl, entry, offset, bytes); 2966 2967 if (!btrfs_free_space_trimmed(entry)) 2968 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2969 2970 if (!entry->bytes) 2971 free_bitmap(ctl, entry); 2972 } else { 2973 unlink_free_space(ctl, entry); 2974 align_gap_len = offset - entry->offset; 2975 align_gap = entry->offset; 2976 align_gap_trim_state = entry->trim_state; 2977 2978 if (!btrfs_free_space_trimmed(entry)) 2979 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2980 2981 entry->offset = offset + bytes; 2982 WARN_ON(entry->bytes < bytes + align_gap_len); 2983 2984 entry->bytes -= bytes + align_gap_len; 2985 if (!entry->bytes) 2986 kmem_cache_free(btrfs_free_space_cachep, entry); 2987 else 2988 link_free_space(ctl, entry); 2989 } 2990 out: 2991 btrfs_discard_update_discardable(block_group); 2992 spin_unlock(&ctl->tree_lock); 2993 2994 if (align_gap_len) 2995 __btrfs_add_free_space(block_group->fs_info, ctl, 2996 align_gap, align_gap_len, 2997 align_gap_trim_state); 2998 return ret; 2999 } 3000 3001 /* 3002 * given a cluster, put all of its extents back into the free space 3003 * cache. If a block group is passed, this function will only free 3004 * a cluster that belongs to the passed block group. 3005 * 3006 * Otherwise, it'll get a reference on the block group pointed to by the 3007 * cluster and remove the cluster from it. 3008 */ 3009 void btrfs_return_cluster_to_free_space( 3010 struct btrfs_block_group *block_group, 3011 struct btrfs_free_cluster *cluster) 3012 { 3013 struct btrfs_free_space_ctl *ctl; 3014 3015 /* first, get a safe pointer to the block group */ 3016 spin_lock(&cluster->lock); 3017 if (!block_group) { 3018 block_group = cluster->block_group; 3019 if (!block_group) { 3020 spin_unlock(&cluster->lock); 3021 return; 3022 } 3023 } else if (cluster->block_group != block_group) { 3024 /* someone else has already freed it don't redo their work */ 3025 spin_unlock(&cluster->lock); 3026 return; 3027 } 3028 btrfs_get_block_group(block_group); 3029 spin_unlock(&cluster->lock); 3030 3031 ctl = block_group->free_space_ctl; 3032 3033 /* now return any extents the cluster had on it */ 3034 spin_lock(&ctl->tree_lock); 3035 __btrfs_return_cluster_to_free_space(block_group, cluster); 3036 spin_unlock(&ctl->tree_lock); 3037 3038 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 3039 3040 /* finally drop our ref */ 3041 btrfs_put_block_group(block_group); 3042 } 3043 3044 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 3045 struct btrfs_free_cluster *cluster, 3046 struct btrfs_free_space *entry, 3047 u64 bytes, u64 min_start, 3048 u64 *max_extent_size) 3049 { 3050 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3051 int err; 3052 u64 search_start = cluster->window_start; 3053 u64 search_bytes = bytes; 3054 u64 ret = 0; 3055 3056 search_start = min_start; 3057 search_bytes = bytes; 3058 3059 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 3060 if (err) { 3061 *max_extent_size = max(get_max_extent_size(entry), 3062 *max_extent_size); 3063 return 0; 3064 } 3065 3066 ret = search_start; 3067 __bitmap_clear_bits(ctl, entry, ret, bytes); 3068 3069 return ret; 3070 } 3071 3072 /* 3073 * given a cluster, try to allocate 'bytes' from it, returns 0 3074 * if it couldn't find anything suitably large, or a logical disk offset 3075 * if things worked out 3076 */ 3077 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 3078 struct btrfs_free_cluster *cluster, u64 bytes, 3079 u64 min_start, u64 *max_extent_size) 3080 { 3081 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3082 struct btrfs_discard_ctl *discard_ctl = 3083 &block_group->fs_info->discard_ctl; 3084 struct btrfs_free_space *entry = NULL; 3085 struct rb_node *node; 3086 u64 ret = 0; 3087 3088 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3089 3090 spin_lock(&cluster->lock); 3091 if (bytes > cluster->max_size) 3092 goto out; 3093 3094 if (cluster->block_group != block_group) 3095 goto out; 3096 3097 node = rb_first(&cluster->root); 3098 if (!node) 3099 goto out; 3100 3101 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3102 while (1) { 3103 if (entry->bytes < bytes) 3104 *max_extent_size = max(get_max_extent_size(entry), 3105 *max_extent_size); 3106 3107 if (entry->bytes < bytes || 3108 (!entry->bitmap && entry->offset < min_start)) { 3109 node = rb_next(&entry->offset_index); 3110 if (!node) 3111 break; 3112 entry = rb_entry(node, struct btrfs_free_space, 3113 offset_index); 3114 continue; 3115 } 3116 3117 if (entry->bitmap) { 3118 ret = btrfs_alloc_from_bitmap(block_group, 3119 cluster, entry, bytes, 3120 cluster->window_start, 3121 max_extent_size); 3122 if (ret == 0) { 3123 node = rb_next(&entry->offset_index); 3124 if (!node) 3125 break; 3126 entry = rb_entry(node, struct btrfs_free_space, 3127 offset_index); 3128 continue; 3129 } 3130 cluster->window_start += bytes; 3131 } else { 3132 ret = entry->offset; 3133 3134 entry->offset += bytes; 3135 entry->bytes -= bytes; 3136 } 3137 3138 break; 3139 } 3140 out: 3141 spin_unlock(&cluster->lock); 3142 3143 if (!ret) 3144 return 0; 3145 3146 spin_lock(&ctl->tree_lock); 3147 3148 if (!btrfs_free_space_trimmed(entry)) 3149 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3150 3151 ctl->free_space -= bytes; 3152 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3153 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3154 3155 spin_lock(&cluster->lock); 3156 if (entry->bytes == 0) { 3157 rb_erase(&entry->offset_index, &cluster->root); 3158 ctl->free_extents--; 3159 if (entry->bitmap) { 3160 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3161 entry->bitmap); 3162 ctl->total_bitmaps--; 3163 recalculate_thresholds(ctl); 3164 } else if (!btrfs_free_space_trimmed(entry)) { 3165 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3166 } 3167 kmem_cache_free(btrfs_free_space_cachep, entry); 3168 } 3169 3170 spin_unlock(&cluster->lock); 3171 spin_unlock(&ctl->tree_lock); 3172 3173 return ret; 3174 } 3175 3176 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3177 struct btrfs_free_space *entry, 3178 struct btrfs_free_cluster *cluster, 3179 u64 offset, u64 bytes, 3180 u64 cont1_bytes, u64 min_bytes) 3181 { 3182 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3183 unsigned long next_zero; 3184 unsigned long i; 3185 unsigned long want_bits; 3186 unsigned long min_bits; 3187 unsigned long found_bits; 3188 unsigned long max_bits = 0; 3189 unsigned long start = 0; 3190 unsigned long total_found = 0; 3191 int ret; 3192 3193 i = offset_to_bit(entry->offset, ctl->unit, 3194 max_t(u64, offset, entry->offset)); 3195 want_bits = bytes_to_bits(bytes, ctl->unit); 3196 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3197 3198 /* 3199 * Don't bother looking for a cluster in this bitmap if it's heavily 3200 * fragmented. 3201 */ 3202 if (entry->max_extent_size && 3203 entry->max_extent_size < cont1_bytes) 3204 return -ENOSPC; 3205 again: 3206 found_bits = 0; 3207 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3208 next_zero = find_next_zero_bit(entry->bitmap, 3209 BITS_PER_BITMAP, i); 3210 if (next_zero - i >= min_bits) { 3211 found_bits = next_zero - i; 3212 if (found_bits > max_bits) 3213 max_bits = found_bits; 3214 break; 3215 } 3216 if (next_zero - i > max_bits) 3217 max_bits = next_zero - i; 3218 i = next_zero; 3219 } 3220 3221 if (!found_bits) { 3222 entry->max_extent_size = (u64)max_bits * ctl->unit; 3223 return -ENOSPC; 3224 } 3225 3226 if (!total_found) { 3227 start = i; 3228 cluster->max_size = 0; 3229 } 3230 3231 total_found += found_bits; 3232 3233 if (cluster->max_size < found_bits * ctl->unit) 3234 cluster->max_size = found_bits * ctl->unit; 3235 3236 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3237 i = next_zero + 1; 3238 goto again; 3239 } 3240 3241 cluster->window_start = start * ctl->unit + entry->offset; 3242 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3243 ret = tree_insert_offset(&cluster->root, entry->offset, 3244 &entry->offset_index, 1); 3245 ASSERT(!ret); /* -EEXIST; Logic error */ 3246 3247 trace_btrfs_setup_cluster(block_group, cluster, 3248 total_found * ctl->unit, 1); 3249 return 0; 3250 } 3251 3252 /* 3253 * This searches the block group for just extents to fill the cluster with. 3254 * Try to find a cluster with at least bytes total bytes, at least one 3255 * extent of cont1_bytes, and other clusters of at least min_bytes. 3256 */ 3257 static noinline int 3258 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3259 struct btrfs_free_cluster *cluster, 3260 struct list_head *bitmaps, u64 offset, u64 bytes, 3261 u64 cont1_bytes, u64 min_bytes) 3262 { 3263 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3264 struct btrfs_free_space *first = NULL; 3265 struct btrfs_free_space *entry = NULL; 3266 struct btrfs_free_space *last; 3267 struct rb_node *node; 3268 u64 window_free; 3269 u64 max_extent; 3270 u64 total_size = 0; 3271 3272 entry = tree_search_offset(ctl, offset, 0, 1); 3273 if (!entry) 3274 return -ENOSPC; 3275 3276 /* 3277 * We don't want bitmaps, so just move along until we find a normal 3278 * extent entry. 3279 */ 3280 while (entry->bitmap || entry->bytes < min_bytes) { 3281 if (entry->bitmap && list_empty(&entry->list)) 3282 list_add_tail(&entry->list, bitmaps); 3283 node = rb_next(&entry->offset_index); 3284 if (!node) 3285 return -ENOSPC; 3286 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3287 } 3288 3289 window_free = entry->bytes; 3290 max_extent = entry->bytes; 3291 first = entry; 3292 last = entry; 3293 3294 for (node = rb_next(&entry->offset_index); node; 3295 node = rb_next(&entry->offset_index)) { 3296 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3297 3298 if (entry->bitmap) { 3299 if (list_empty(&entry->list)) 3300 list_add_tail(&entry->list, bitmaps); 3301 continue; 3302 } 3303 3304 if (entry->bytes < min_bytes) 3305 continue; 3306 3307 last = entry; 3308 window_free += entry->bytes; 3309 if (entry->bytes > max_extent) 3310 max_extent = entry->bytes; 3311 } 3312 3313 if (window_free < bytes || max_extent < cont1_bytes) 3314 return -ENOSPC; 3315 3316 cluster->window_start = first->offset; 3317 3318 node = &first->offset_index; 3319 3320 /* 3321 * now we've found our entries, pull them out of the free space 3322 * cache and put them into the cluster rbtree 3323 */ 3324 do { 3325 int ret; 3326 3327 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3328 node = rb_next(&entry->offset_index); 3329 if (entry->bitmap || entry->bytes < min_bytes) 3330 continue; 3331 3332 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3333 ret = tree_insert_offset(&cluster->root, entry->offset, 3334 &entry->offset_index, 0); 3335 total_size += entry->bytes; 3336 ASSERT(!ret); /* -EEXIST; Logic error */ 3337 } while (node && entry != last); 3338 3339 cluster->max_size = max_extent; 3340 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3341 return 0; 3342 } 3343 3344 /* 3345 * This specifically looks for bitmaps that may work in the cluster, we assume 3346 * that we have already failed to find extents that will work. 3347 */ 3348 static noinline int 3349 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3350 struct btrfs_free_cluster *cluster, 3351 struct list_head *bitmaps, u64 offset, u64 bytes, 3352 u64 cont1_bytes, u64 min_bytes) 3353 { 3354 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3355 struct btrfs_free_space *entry = NULL; 3356 int ret = -ENOSPC; 3357 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3358 3359 if (ctl->total_bitmaps == 0) 3360 return -ENOSPC; 3361 3362 /* 3363 * The bitmap that covers offset won't be in the list unless offset 3364 * is just its start offset. 3365 */ 3366 if (!list_empty(bitmaps)) 3367 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3368 3369 if (!entry || entry->offset != bitmap_offset) { 3370 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3371 if (entry && list_empty(&entry->list)) 3372 list_add(&entry->list, bitmaps); 3373 } 3374 3375 list_for_each_entry(entry, bitmaps, list) { 3376 if (entry->bytes < bytes) 3377 continue; 3378 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3379 bytes, cont1_bytes, min_bytes); 3380 if (!ret) 3381 return 0; 3382 } 3383 3384 /* 3385 * The bitmaps list has all the bitmaps that record free space 3386 * starting after offset, so no more search is required. 3387 */ 3388 return -ENOSPC; 3389 } 3390 3391 /* 3392 * here we try to find a cluster of blocks in a block group. The goal 3393 * is to find at least bytes+empty_size. 3394 * We might not find them all in one contiguous area. 3395 * 3396 * returns zero and sets up cluster if things worked out, otherwise 3397 * it returns -enospc 3398 */ 3399 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3400 struct btrfs_free_cluster *cluster, 3401 u64 offset, u64 bytes, u64 empty_size) 3402 { 3403 struct btrfs_fs_info *fs_info = block_group->fs_info; 3404 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3405 struct btrfs_free_space *entry, *tmp; 3406 LIST_HEAD(bitmaps); 3407 u64 min_bytes; 3408 u64 cont1_bytes; 3409 int ret; 3410 3411 /* 3412 * Choose the minimum extent size we'll require for this 3413 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3414 * For metadata, allow allocates with smaller extents. For 3415 * data, keep it dense. 3416 */ 3417 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3418 cont1_bytes = min_bytes = bytes + empty_size; 3419 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3420 cont1_bytes = bytes; 3421 min_bytes = fs_info->sectorsize; 3422 } else { 3423 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3424 min_bytes = fs_info->sectorsize; 3425 } 3426 3427 spin_lock(&ctl->tree_lock); 3428 3429 /* 3430 * If we know we don't have enough space to make a cluster don't even 3431 * bother doing all the work to try and find one. 3432 */ 3433 if (ctl->free_space < bytes) { 3434 spin_unlock(&ctl->tree_lock); 3435 return -ENOSPC; 3436 } 3437 3438 spin_lock(&cluster->lock); 3439 3440 /* someone already found a cluster, hooray */ 3441 if (cluster->block_group) { 3442 ret = 0; 3443 goto out; 3444 } 3445 3446 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3447 min_bytes); 3448 3449 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3450 bytes + empty_size, 3451 cont1_bytes, min_bytes); 3452 if (ret) 3453 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3454 offset, bytes + empty_size, 3455 cont1_bytes, min_bytes); 3456 3457 /* Clear our temporary list */ 3458 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3459 list_del_init(&entry->list); 3460 3461 if (!ret) { 3462 btrfs_get_block_group(block_group); 3463 list_add_tail(&cluster->block_group_list, 3464 &block_group->cluster_list); 3465 cluster->block_group = block_group; 3466 } else { 3467 trace_btrfs_failed_cluster_setup(block_group); 3468 } 3469 out: 3470 spin_unlock(&cluster->lock); 3471 spin_unlock(&ctl->tree_lock); 3472 3473 return ret; 3474 } 3475 3476 /* 3477 * simple code to zero out a cluster 3478 */ 3479 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3480 { 3481 spin_lock_init(&cluster->lock); 3482 spin_lock_init(&cluster->refill_lock); 3483 cluster->root = RB_ROOT; 3484 cluster->max_size = 0; 3485 cluster->fragmented = false; 3486 INIT_LIST_HEAD(&cluster->block_group_list); 3487 cluster->block_group = NULL; 3488 } 3489 3490 static int do_trimming(struct btrfs_block_group *block_group, 3491 u64 *total_trimmed, u64 start, u64 bytes, 3492 u64 reserved_start, u64 reserved_bytes, 3493 enum btrfs_trim_state reserved_trim_state, 3494 struct btrfs_trim_range *trim_entry) 3495 { 3496 struct btrfs_space_info *space_info = block_group->space_info; 3497 struct btrfs_fs_info *fs_info = block_group->fs_info; 3498 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3499 int ret; 3500 int update = 0; 3501 const u64 end = start + bytes; 3502 const u64 reserved_end = reserved_start + reserved_bytes; 3503 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3504 u64 trimmed = 0; 3505 3506 spin_lock(&space_info->lock); 3507 spin_lock(&block_group->lock); 3508 if (!block_group->ro) { 3509 block_group->reserved += reserved_bytes; 3510 space_info->bytes_reserved += reserved_bytes; 3511 update = 1; 3512 } 3513 spin_unlock(&block_group->lock); 3514 spin_unlock(&space_info->lock); 3515 3516 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3517 if (!ret) { 3518 *total_trimmed += trimmed; 3519 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3520 } 3521 3522 mutex_lock(&ctl->cache_writeout_mutex); 3523 if (reserved_start < start) 3524 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3525 start - reserved_start, 3526 reserved_trim_state); 3527 if (start + bytes < reserved_start + reserved_bytes) 3528 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3529 reserved_trim_state); 3530 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3531 list_del(&trim_entry->list); 3532 mutex_unlock(&ctl->cache_writeout_mutex); 3533 3534 if (update) { 3535 spin_lock(&space_info->lock); 3536 spin_lock(&block_group->lock); 3537 if (block_group->ro) 3538 space_info->bytes_readonly += reserved_bytes; 3539 block_group->reserved -= reserved_bytes; 3540 space_info->bytes_reserved -= reserved_bytes; 3541 spin_unlock(&block_group->lock); 3542 spin_unlock(&space_info->lock); 3543 } 3544 3545 return ret; 3546 } 3547 3548 /* 3549 * If @async is set, then we will trim 1 region and return. 3550 */ 3551 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3552 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3553 bool async) 3554 { 3555 struct btrfs_discard_ctl *discard_ctl = 3556 &block_group->fs_info->discard_ctl; 3557 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3558 struct btrfs_free_space *entry; 3559 struct rb_node *node; 3560 int ret = 0; 3561 u64 extent_start; 3562 u64 extent_bytes; 3563 enum btrfs_trim_state extent_trim_state; 3564 u64 bytes; 3565 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3566 3567 while (start < end) { 3568 struct btrfs_trim_range trim_entry; 3569 3570 mutex_lock(&ctl->cache_writeout_mutex); 3571 spin_lock(&ctl->tree_lock); 3572 3573 if (ctl->free_space < minlen) 3574 goto out_unlock; 3575 3576 entry = tree_search_offset(ctl, start, 0, 1); 3577 if (!entry) 3578 goto out_unlock; 3579 3580 /* Skip bitmaps and if async, already trimmed entries */ 3581 while (entry->bitmap || 3582 (async && btrfs_free_space_trimmed(entry))) { 3583 node = rb_next(&entry->offset_index); 3584 if (!node) 3585 goto out_unlock; 3586 entry = rb_entry(node, struct btrfs_free_space, 3587 offset_index); 3588 } 3589 3590 if (entry->offset >= end) 3591 goto out_unlock; 3592 3593 extent_start = entry->offset; 3594 extent_bytes = entry->bytes; 3595 extent_trim_state = entry->trim_state; 3596 if (async) { 3597 start = entry->offset; 3598 bytes = entry->bytes; 3599 if (bytes < minlen) { 3600 spin_unlock(&ctl->tree_lock); 3601 mutex_unlock(&ctl->cache_writeout_mutex); 3602 goto next; 3603 } 3604 unlink_free_space(ctl, entry); 3605 /* 3606 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3607 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3608 * X when we come back around. So trim it now. 3609 */ 3610 if (max_discard_size && 3611 bytes >= (max_discard_size + 3612 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3613 bytes = max_discard_size; 3614 extent_bytes = max_discard_size; 3615 entry->offset += max_discard_size; 3616 entry->bytes -= max_discard_size; 3617 link_free_space(ctl, entry); 3618 } else { 3619 kmem_cache_free(btrfs_free_space_cachep, entry); 3620 } 3621 } else { 3622 start = max(start, extent_start); 3623 bytes = min(extent_start + extent_bytes, end) - start; 3624 if (bytes < minlen) { 3625 spin_unlock(&ctl->tree_lock); 3626 mutex_unlock(&ctl->cache_writeout_mutex); 3627 goto next; 3628 } 3629 3630 unlink_free_space(ctl, entry); 3631 kmem_cache_free(btrfs_free_space_cachep, entry); 3632 } 3633 3634 spin_unlock(&ctl->tree_lock); 3635 trim_entry.start = extent_start; 3636 trim_entry.bytes = extent_bytes; 3637 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3638 mutex_unlock(&ctl->cache_writeout_mutex); 3639 3640 ret = do_trimming(block_group, total_trimmed, start, bytes, 3641 extent_start, extent_bytes, extent_trim_state, 3642 &trim_entry); 3643 if (ret) { 3644 block_group->discard_cursor = start + bytes; 3645 break; 3646 } 3647 next: 3648 start += bytes; 3649 block_group->discard_cursor = start; 3650 if (async && *total_trimmed) 3651 break; 3652 3653 if (fatal_signal_pending(current)) { 3654 ret = -ERESTARTSYS; 3655 break; 3656 } 3657 3658 cond_resched(); 3659 } 3660 3661 return ret; 3662 3663 out_unlock: 3664 block_group->discard_cursor = btrfs_block_group_end(block_group); 3665 spin_unlock(&ctl->tree_lock); 3666 mutex_unlock(&ctl->cache_writeout_mutex); 3667 3668 return ret; 3669 } 3670 3671 /* 3672 * If we break out of trimming a bitmap prematurely, we should reset the 3673 * trimming bit. In a rather contrieved case, it's possible to race here so 3674 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3675 * 3676 * start = start of bitmap 3677 * end = near end of bitmap 3678 * 3679 * Thread 1: Thread 2: 3680 * trim_bitmaps(start) 3681 * trim_bitmaps(end) 3682 * end_trimming_bitmap() 3683 * reset_trimming_bitmap() 3684 */ 3685 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3686 { 3687 struct btrfs_free_space *entry; 3688 3689 spin_lock(&ctl->tree_lock); 3690 entry = tree_search_offset(ctl, offset, 1, 0); 3691 if (entry) { 3692 if (btrfs_free_space_trimmed(entry)) { 3693 ctl->discardable_extents[BTRFS_STAT_CURR] += 3694 entry->bitmap_extents; 3695 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3696 } 3697 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3698 } 3699 3700 spin_unlock(&ctl->tree_lock); 3701 } 3702 3703 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3704 struct btrfs_free_space *entry) 3705 { 3706 if (btrfs_free_space_trimming_bitmap(entry)) { 3707 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3708 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3709 entry->bitmap_extents; 3710 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3711 } 3712 } 3713 3714 /* 3715 * If @async is set, then we will trim 1 region and return. 3716 */ 3717 static int trim_bitmaps(struct btrfs_block_group *block_group, 3718 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3719 u64 maxlen, bool async) 3720 { 3721 struct btrfs_discard_ctl *discard_ctl = 3722 &block_group->fs_info->discard_ctl; 3723 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3724 struct btrfs_free_space *entry; 3725 int ret = 0; 3726 int ret2; 3727 u64 bytes; 3728 u64 offset = offset_to_bitmap(ctl, start); 3729 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3730 3731 while (offset < end) { 3732 bool next_bitmap = false; 3733 struct btrfs_trim_range trim_entry; 3734 3735 mutex_lock(&ctl->cache_writeout_mutex); 3736 spin_lock(&ctl->tree_lock); 3737 3738 if (ctl->free_space < minlen) { 3739 block_group->discard_cursor = 3740 btrfs_block_group_end(block_group); 3741 spin_unlock(&ctl->tree_lock); 3742 mutex_unlock(&ctl->cache_writeout_mutex); 3743 break; 3744 } 3745 3746 entry = tree_search_offset(ctl, offset, 1, 0); 3747 /* 3748 * Bitmaps are marked trimmed lossily now to prevent constant 3749 * discarding of the same bitmap (the reason why we are bound 3750 * by the filters). So, retrim the block group bitmaps when we 3751 * are preparing to punt to the unused_bgs list. This uses 3752 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3753 * which is the only discard index which sets minlen to 0. 3754 */ 3755 if (!entry || (async && minlen && start == offset && 3756 btrfs_free_space_trimmed(entry))) { 3757 spin_unlock(&ctl->tree_lock); 3758 mutex_unlock(&ctl->cache_writeout_mutex); 3759 next_bitmap = true; 3760 goto next; 3761 } 3762 3763 /* 3764 * Async discard bitmap trimming begins at by setting the start 3765 * to be key.objectid and the offset_to_bitmap() aligns to the 3766 * start of the bitmap. This lets us know we are fully 3767 * scanning the bitmap rather than only some portion of it. 3768 */ 3769 if (start == offset) 3770 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3771 3772 bytes = minlen; 3773 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3774 if (ret2 || start >= end) { 3775 /* 3776 * We lossily consider a bitmap trimmed if we only skip 3777 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3778 */ 3779 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3780 end_trimming_bitmap(ctl, entry); 3781 else 3782 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3783 spin_unlock(&ctl->tree_lock); 3784 mutex_unlock(&ctl->cache_writeout_mutex); 3785 next_bitmap = true; 3786 goto next; 3787 } 3788 3789 /* 3790 * We already trimmed a region, but are using the locking above 3791 * to reset the trim_state. 3792 */ 3793 if (async && *total_trimmed) { 3794 spin_unlock(&ctl->tree_lock); 3795 mutex_unlock(&ctl->cache_writeout_mutex); 3796 goto out; 3797 } 3798 3799 bytes = min(bytes, end - start); 3800 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3801 spin_unlock(&ctl->tree_lock); 3802 mutex_unlock(&ctl->cache_writeout_mutex); 3803 goto next; 3804 } 3805 3806 /* 3807 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3808 * If X < @minlen, we won't trim X when we come back around. 3809 * So trim it now. We differ here from trimming extents as we 3810 * don't keep individual state per bit. 3811 */ 3812 if (async && 3813 max_discard_size && 3814 bytes > (max_discard_size + minlen)) 3815 bytes = max_discard_size; 3816 3817 bitmap_clear_bits(ctl, entry, start, bytes); 3818 if (entry->bytes == 0) 3819 free_bitmap(ctl, entry); 3820 3821 spin_unlock(&ctl->tree_lock); 3822 trim_entry.start = start; 3823 trim_entry.bytes = bytes; 3824 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3825 mutex_unlock(&ctl->cache_writeout_mutex); 3826 3827 ret = do_trimming(block_group, total_trimmed, start, bytes, 3828 start, bytes, 0, &trim_entry); 3829 if (ret) { 3830 reset_trimming_bitmap(ctl, offset); 3831 block_group->discard_cursor = 3832 btrfs_block_group_end(block_group); 3833 break; 3834 } 3835 next: 3836 if (next_bitmap) { 3837 offset += BITS_PER_BITMAP * ctl->unit; 3838 start = offset; 3839 } else { 3840 start += bytes; 3841 } 3842 block_group->discard_cursor = start; 3843 3844 if (fatal_signal_pending(current)) { 3845 if (start != offset) 3846 reset_trimming_bitmap(ctl, offset); 3847 ret = -ERESTARTSYS; 3848 break; 3849 } 3850 3851 cond_resched(); 3852 } 3853 3854 if (offset >= end) 3855 block_group->discard_cursor = end; 3856 3857 out: 3858 return ret; 3859 } 3860 3861 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3862 u64 *trimmed, u64 start, u64 end, u64 minlen) 3863 { 3864 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3865 int ret; 3866 u64 rem = 0; 3867 3868 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3869 3870 *trimmed = 0; 3871 3872 spin_lock(&block_group->lock); 3873 if (block_group->removed) { 3874 spin_unlock(&block_group->lock); 3875 return 0; 3876 } 3877 btrfs_freeze_block_group(block_group); 3878 spin_unlock(&block_group->lock); 3879 3880 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3881 if (ret) 3882 goto out; 3883 3884 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3885 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3886 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3887 if (rem) 3888 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3889 out: 3890 btrfs_unfreeze_block_group(block_group); 3891 return ret; 3892 } 3893 3894 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3895 u64 *trimmed, u64 start, u64 end, u64 minlen, 3896 bool async) 3897 { 3898 int ret; 3899 3900 *trimmed = 0; 3901 3902 spin_lock(&block_group->lock); 3903 if (block_group->removed) { 3904 spin_unlock(&block_group->lock); 3905 return 0; 3906 } 3907 btrfs_freeze_block_group(block_group); 3908 spin_unlock(&block_group->lock); 3909 3910 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3911 btrfs_unfreeze_block_group(block_group); 3912 3913 return ret; 3914 } 3915 3916 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3917 u64 *trimmed, u64 start, u64 end, u64 minlen, 3918 u64 maxlen, bool async) 3919 { 3920 int ret; 3921 3922 *trimmed = 0; 3923 3924 spin_lock(&block_group->lock); 3925 if (block_group->removed) { 3926 spin_unlock(&block_group->lock); 3927 return 0; 3928 } 3929 btrfs_freeze_block_group(block_group); 3930 spin_unlock(&block_group->lock); 3931 3932 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3933 async); 3934 3935 btrfs_unfreeze_block_group(block_group); 3936 3937 return ret; 3938 } 3939 3940 bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info) 3941 { 3942 return btrfs_super_cache_generation(fs_info->super_copy); 3943 } 3944 3945 static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, 3946 struct btrfs_trans_handle *trans) 3947 { 3948 struct btrfs_block_group *block_group; 3949 struct rb_node *node; 3950 int ret = 0; 3951 3952 btrfs_info(fs_info, "cleaning free space cache v1"); 3953 3954 node = rb_first(&fs_info->block_group_cache_tree); 3955 while (node) { 3956 block_group = rb_entry(node, struct btrfs_block_group, cache_node); 3957 ret = btrfs_remove_free_space_inode(trans, NULL, block_group); 3958 if (ret) 3959 goto out; 3960 node = rb_next(node); 3961 } 3962 out: 3963 return ret; 3964 } 3965 3966 int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active) 3967 { 3968 struct btrfs_trans_handle *trans; 3969 int ret; 3970 3971 /* 3972 * update_super_roots will appropriately set or unset 3973 * super_copy->cache_generation based on SPACE_CACHE and 3974 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a 3975 * transaction commit whether we are enabling space cache v1 and don't 3976 * have any other work to do, or are disabling it and removing free 3977 * space inodes. 3978 */ 3979 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3980 if (IS_ERR(trans)) 3981 return PTR_ERR(trans); 3982 3983 if (!active) { 3984 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3985 ret = cleanup_free_space_cache_v1(fs_info, trans); 3986 if (ret) { 3987 btrfs_abort_transaction(trans, ret); 3988 btrfs_end_transaction(trans); 3989 goto out; 3990 } 3991 } 3992 3993 ret = btrfs_commit_transaction(trans); 3994 out: 3995 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3996 3997 return ret; 3998 } 3999 4000 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4001 /* 4002 * Use this if you need to make a bitmap or extent entry specifically, it 4003 * doesn't do any of the merging that add_free_space does, this acts a lot like 4004 * how the free space cache loading stuff works, so you can get really weird 4005 * configurations. 4006 */ 4007 int test_add_free_space_entry(struct btrfs_block_group *cache, 4008 u64 offset, u64 bytes, bool bitmap) 4009 { 4010 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4011 struct btrfs_free_space *info = NULL, *bitmap_info; 4012 void *map = NULL; 4013 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4014 u64 bytes_added; 4015 int ret; 4016 4017 again: 4018 if (!info) { 4019 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4020 if (!info) 4021 return -ENOMEM; 4022 } 4023 4024 if (!bitmap) { 4025 spin_lock(&ctl->tree_lock); 4026 info->offset = offset; 4027 info->bytes = bytes; 4028 info->max_extent_size = 0; 4029 ret = link_free_space(ctl, info); 4030 spin_unlock(&ctl->tree_lock); 4031 if (ret) 4032 kmem_cache_free(btrfs_free_space_cachep, info); 4033 return ret; 4034 } 4035 4036 if (!map) { 4037 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4038 if (!map) { 4039 kmem_cache_free(btrfs_free_space_cachep, info); 4040 return -ENOMEM; 4041 } 4042 } 4043 4044 spin_lock(&ctl->tree_lock); 4045 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4046 1, 0); 4047 if (!bitmap_info) { 4048 info->bitmap = map; 4049 map = NULL; 4050 add_new_bitmap(ctl, info, offset); 4051 bitmap_info = info; 4052 info = NULL; 4053 } 4054 4055 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4056 trim_state); 4057 4058 bytes -= bytes_added; 4059 offset += bytes_added; 4060 spin_unlock(&ctl->tree_lock); 4061 4062 if (bytes) 4063 goto again; 4064 4065 if (info) 4066 kmem_cache_free(btrfs_free_space_cachep, info); 4067 if (map) 4068 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4069 return 0; 4070 } 4071 4072 /* 4073 * Checks to see if the given range is in the free space cache. This is really 4074 * just used to check the absence of space, so if there is free space in the 4075 * range at all we will return 1. 4076 */ 4077 int test_check_exists(struct btrfs_block_group *cache, 4078 u64 offset, u64 bytes) 4079 { 4080 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4081 struct btrfs_free_space *info; 4082 int ret = 0; 4083 4084 spin_lock(&ctl->tree_lock); 4085 info = tree_search_offset(ctl, offset, 0, 0); 4086 if (!info) { 4087 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4088 1, 0); 4089 if (!info) 4090 goto out; 4091 } 4092 4093 have_info: 4094 if (info->bitmap) { 4095 u64 bit_off, bit_bytes; 4096 struct rb_node *n; 4097 struct btrfs_free_space *tmp; 4098 4099 bit_off = offset; 4100 bit_bytes = ctl->unit; 4101 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4102 if (!ret) { 4103 if (bit_off == offset) { 4104 ret = 1; 4105 goto out; 4106 } else if (bit_off > offset && 4107 offset + bytes > bit_off) { 4108 ret = 1; 4109 goto out; 4110 } 4111 } 4112 4113 n = rb_prev(&info->offset_index); 4114 while (n) { 4115 tmp = rb_entry(n, struct btrfs_free_space, 4116 offset_index); 4117 if (tmp->offset + tmp->bytes < offset) 4118 break; 4119 if (offset + bytes < tmp->offset) { 4120 n = rb_prev(&tmp->offset_index); 4121 continue; 4122 } 4123 info = tmp; 4124 goto have_info; 4125 } 4126 4127 n = rb_next(&info->offset_index); 4128 while (n) { 4129 tmp = rb_entry(n, struct btrfs_free_space, 4130 offset_index); 4131 if (offset + bytes < tmp->offset) 4132 break; 4133 if (tmp->offset + tmp->bytes < offset) { 4134 n = rb_next(&tmp->offset_index); 4135 continue; 4136 } 4137 info = tmp; 4138 goto have_info; 4139 } 4140 4141 ret = 0; 4142 goto out; 4143 } 4144 4145 if (info->offset == offset) { 4146 ret = 1; 4147 goto out; 4148 } 4149 4150 if (offset > info->offset && offset < info->offset + info->bytes) 4151 ret = 1; 4152 out: 4153 spin_unlock(&ctl->tree_lock); 4154 return ret; 4155 } 4156 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4157