1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "misc.h" 15 #include "ctree.h" 16 #include "free-space-cache.h" 17 #include "transaction.h" 18 #include "disk-io.h" 19 #include "extent_io.h" 20 #include "volumes.h" 21 #include "space-info.h" 22 #include "delalloc-space.h" 23 #include "block-group.h" 24 #include "discard.h" 25 #include "subpage.h" 26 27 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 28 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 29 #define FORCE_EXTENT_THRESHOLD SZ_1M 30 31 struct btrfs_trim_range { 32 u64 start; 33 u64 bytes; 34 struct list_head list; 35 }; 36 37 static int link_free_space(struct btrfs_free_space_ctl *ctl, 38 struct btrfs_free_space *info); 39 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 40 struct btrfs_free_space *info); 41 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 42 struct btrfs_free_space *bitmap_info, u64 *offset, 43 u64 *bytes, bool for_alloc); 44 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 45 struct btrfs_free_space *bitmap_info); 46 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 47 struct btrfs_free_space *info, u64 offset, 48 u64 bytes); 49 50 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 51 struct btrfs_path *path, 52 u64 offset) 53 { 54 struct btrfs_fs_info *fs_info = root->fs_info; 55 struct btrfs_key key; 56 struct btrfs_key location; 57 struct btrfs_disk_key disk_key; 58 struct btrfs_free_space_header *header; 59 struct extent_buffer *leaf; 60 struct inode *inode = NULL; 61 unsigned nofs_flag; 62 int ret; 63 64 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 65 key.offset = offset; 66 key.type = 0; 67 68 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 69 if (ret < 0) 70 return ERR_PTR(ret); 71 if (ret > 0) { 72 btrfs_release_path(path); 73 return ERR_PTR(-ENOENT); 74 } 75 76 leaf = path->nodes[0]; 77 header = btrfs_item_ptr(leaf, path->slots[0], 78 struct btrfs_free_space_header); 79 btrfs_free_space_key(leaf, header, &disk_key); 80 btrfs_disk_key_to_cpu(&location, &disk_key); 81 btrfs_release_path(path); 82 83 /* 84 * We are often under a trans handle at this point, so we need to make 85 * sure NOFS is set to keep us from deadlocking. 86 */ 87 nofs_flag = memalloc_nofs_save(); 88 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 89 btrfs_release_path(path); 90 memalloc_nofs_restore(nofs_flag); 91 if (IS_ERR(inode)) 92 return inode; 93 94 mapping_set_gfp_mask(inode->i_mapping, 95 mapping_gfp_constraint(inode->i_mapping, 96 ~(__GFP_FS | __GFP_HIGHMEM))); 97 98 return inode; 99 } 100 101 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 102 struct btrfs_path *path) 103 { 104 struct btrfs_fs_info *fs_info = block_group->fs_info; 105 struct inode *inode = NULL; 106 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 107 108 spin_lock(&block_group->lock); 109 if (block_group->inode) 110 inode = igrab(block_group->inode); 111 spin_unlock(&block_group->lock); 112 if (inode) 113 return inode; 114 115 inode = __lookup_free_space_inode(fs_info->tree_root, path, 116 block_group->start); 117 if (IS_ERR(inode)) 118 return inode; 119 120 spin_lock(&block_group->lock); 121 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 122 btrfs_info(fs_info, "Old style space inode found, converting."); 123 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 124 BTRFS_INODE_NODATACOW; 125 block_group->disk_cache_state = BTRFS_DC_CLEAR; 126 } 127 128 if (!block_group->iref) { 129 block_group->inode = igrab(inode); 130 block_group->iref = 1; 131 } 132 spin_unlock(&block_group->lock); 133 134 return inode; 135 } 136 137 static int __create_free_space_inode(struct btrfs_root *root, 138 struct btrfs_trans_handle *trans, 139 struct btrfs_path *path, 140 u64 ino, u64 offset) 141 { 142 struct btrfs_key key; 143 struct btrfs_disk_key disk_key; 144 struct btrfs_free_space_header *header; 145 struct btrfs_inode_item *inode_item; 146 struct extent_buffer *leaf; 147 /* We inline CRCs for the free disk space cache */ 148 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | 149 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 150 int ret; 151 152 ret = btrfs_insert_empty_inode(trans, root, path, ino); 153 if (ret) 154 return ret; 155 156 leaf = path->nodes[0]; 157 inode_item = btrfs_item_ptr(leaf, path->slots[0], 158 struct btrfs_inode_item); 159 btrfs_item_key(leaf, &disk_key, path->slots[0]); 160 memzero_extent_buffer(leaf, (unsigned long)inode_item, 161 sizeof(*inode_item)); 162 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 163 btrfs_set_inode_size(leaf, inode_item, 0); 164 btrfs_set_inode_nbytes(leaf, inode_item, 0); 165 btrfs_set_inode_uid(leaf, inode_item, 0); 166 btrfs_set_inode_gid(leaf, inode_item, 0); 167 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 168 btrfs_set_inode_flags(leaf, inode_item, flags); 169 btrfs_set_inode_nlink(leaf, inode_item, 1); 170 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 171 btrfs_set_inode_block_group(leaf, inode_item, offset); 172 btrfs_mark_buffer_dirty(leaf); 173 btrfs_release_path(path); 174 175 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 176 key.offset = offset; 177 key.type = 0; 178 ret = btrfs_insert_empty_item(trans, root, path, &key, 179 sizeof(struct btrfs_free_space_header)); 180 if (ret < 0) { 181 btrfs_release_path(path); 182 return ret; 183 } 184 185 leaf = path->nodes[0]; 186 header = btrfs_item_ptr(leaf, path->slots[0], 187 struct btrfs_free_space_header); 188 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 189 btrfs_set_free_space_key(leaf, header, &disk_key); 190 btrfs_mark_buffer_dirty(leaf); 191 btrfs_release_path(path); 192 193 return 0; 194 } 195 196 int create_free_space_inode(struct btrfs_trans_handle *trans, 197 struct btrfs_block_group *block_group, 198 struct btrfs_path *path) 199 { 200 int ret; 201 u64 ino; 202 203 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); 204 if (ret < 0) 205 return ret; 206 207 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 208 ino, block_group->start); 209 } 210 211 /* 212 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode 213 * handles lookup, otherwise it takes ownership and iputs the inode. 214 * Don't reuse an inode pointer after passing it into this function. 215 */ 216 int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, 217 struct inode *inode, 218 struct btrfs_block_group *block_group) 219 { 220 struct btrfs_path *path; 221 struct btrfs_key key; 222 int ret = 0; 223 224 path = btrfs_alloc_path(); 225 if (!path) 226 return -ENOMEM; 227 228 if (!inode) 229 inode = lookup_free_space_inode(block_group, path); 230 if (IS_ERR(inode)) { 231 if (PTR_ERR(inode) != -ENOENT) 232 ret = PTR_ERR(inode); 233 goto out; 234 } 235 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 236 if (ret) { 237 btrfs_add_delayed_iput(inode); 238 goto out; 239 } 240 clear_nlink(inode); 241 /* One for the block groups ref */ 242 spin_lock(&block_group->lock); 243 if (block_group->iref) { 244 block_group->iref = 0; 245 block_group->inode = NULL; 246 spin_unlock(&block_group->lock); 247 iput(inode); 248 } else { 249 spin_unlock(&block_group->lock); 250 } 251 /* One for the lookup ref */ 252 btrfs_add_delayed_iput(inode); 253 254 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 255 key.type = 0; 256 key.offset = block_group->start; 257 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, 258 -1, 1); 259 if (ret) { 260 if (ret > 0) 261 ret = 0; 262 goto out; 263 } 264 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 265 out: 266 btrfs_free_path(path); 267 return ret; 268 } 269 270 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 271 struct btrfs_block_rsv *rsv) 272 { 273 u64 needed_bytes; 274 int ret; 275 276 /* 1 for slack space, 1 for updating the inode */ 277 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 278 btrfs_calc_metadata_size(fs_info, 1); 279 280 spin_lock(&rsv->lock); 281 if (rsv->reserved < needed_bytes) 282 ret = -ENOSPC; 283 else 284 ret = 0; 285 spin_unlock(&rsv->lock); 286 return ret; 287 } 288 289 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 290 struct btrfs_block_group *block_group, 291 struct inode *inode) 292 { 293 struct btrfs_root *root = BTRFS_I(inode)->root; 294 int ret = 0; 295 bool locked = false; 296 297 if (block_group) { 298 struct btrfs_path *path = btrfs_alloc_path(); 299 300 if (!path) { 301 ret = -ENOMEM; 302 goto fail; 303 } 304 locked = true; 305 mutex_lock(&trans->transaction->cache_write_mutex); 306 if (!list_empty(&block_group->io_list)) { 307 list_del_init(&block_group->io_list); 308 309 btrfs_wait_cache_io(trans, block_group, path); 310 btrfs_put_block_group(block_group); 311 } 312 313 /* 314 * now that we've truncated the cache away, its no longer 315 * setup or written 316 */ 317 spin_lock(&block_group->lock); 318 block_group->disk_cache_state = BTRFS_DC_CLEAR; 319 spin_unlock(&block_group->lock); 320 btrfs_free_path(path); 321 } 322 323 btrfs_i_size_write(BTRFS_I(inode), 0); 324 truncate_pagecache(inode, 0); 325 326 /* 327 * We skip the throttling logic for free space cache inodes, so we don't 328 * need to check for -EAGAIN. 329 */ 330 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode), 331 0, BTRFS_EXTENT_DATA_KEY, NULL); 332 if (ret) 333 goto fail; 334 335 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 336 337 fail: 338 if (locked) 339 mutex_unlock(&trans->transaction->cache_write_mutex); 340 if (ret) 341 btrfs_abort_transaction(trans, ret); 342 343 return ret; 344 } 345 346 static void readahead_cache(struct inode *inode) 347 { 348 struct file_ra_state ra; 349 unsigned long last_index; 350 351 file_ra_state_init(&ra, inode->i_mapping); 352 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 353 354 page_cache_sync_readahead(inode->i_mapping, &ra, NULL, 0, last_index); 355 } 356 357 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 358 int write) 359 { 360 int num_pages; 361 362 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 363 364 /* Make sure we can fit our crcs and generation into the first page */ 365 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 366 return -ENOSPC; 367 368 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 369 370 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 371 if (!io_ctl->pages) 372 return -ENOMEM; 373 374 io_ctl->num_pages = num_pages; 375 io_ctl->fs_info = btrfs_sb(inode->i_sb); 376 io_ctl->inode = inode; 377 378 return 0; 379 } 380 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 381 382 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 383 { 384 kfree(io_ctl->pages); 385 io_ctl->pages = NULL; 386 } 387 388 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 389 { 390 if (io_ctl->cur) { 391 io_ctl->cur = NULL; 392 io_ctl->orig = NULL; 393 } 394 } 395 396 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 397 { 398 ASSERT(io_ctl->index < io_ctl->num_pages); 399 io_ctl->page = io_ctl->pages[io_ctl->index++]; 400 io_ctl->cur = page_address(io_ctl->page); 401 io_ctl->orig = io_ctl->cur; 402 io_ctl->size = PAGE_SIZE; 403 if (clear) 404 clear_page(io_ctl->cur); 405 } 406 407 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 408 { 409 int i; 410 411 io_ctl_unmap_page(io_ctl); 412 413 for (i = 0; i < io_ctl->num_pages; i++) { 414 if (io_ctl->pages[i]) { 415 btrfs_page_clear_checked(io_ctl->fs_info, 416 io_ctl->pages[i], 417 page_offset(io_ctl->pages[i]), 418 PAGE_SIZE); 419 unlock_page(io_ctl->pages[i]); 420 put_page(io_ctl->pages[i]); 421 } 422 } 423 } 424 425 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 426 { 427 struct page *page; 428 struct inode *inode = io_ctl->inode; 429 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 430 int i; 431 432 for (i = 0; i < io_ctl->num_pages; i++) { 433 int ret; 434 435 page = find_or_create_page(inode->i_mapping, i, mask); 436 if (!page) { 437 io_ctl_drop_pages(io_ctl); 438 return -ENOMEM; 439 } 440 441 ret = set_page_extent_mapped(page); 442 if (ret < 0) { 443 unlock_page(page); 444 put_page(page); 445 io_ctl_drop_pages(io_ctl); 446 return ret; 447 } 448 449 io_ctl->pages[i] = page; 450 if (uptodate && !PageUptodate(page)) { 451 btrfs_readpage(NULL, page); 452 lock_page(page); 453 if (page->mapping != inode->i_mapping) { 454 btrfs_err(BTRFS_I(inode)->root->fs_info, 455 "free space cache page truncated"); 456 io_ctl_drop_pages(io_ctl); 457 return -EIO; 458 } 459 if (!PageUptodate(page)) { 460 btrfs_err(BTRFS_I(inode)->root->fs_info, 461 "error reading free space cache"); 462 io_ctl_drop_pages(io_ctl); 463 return -EIO; 464 } 465 } 466 } 467 468 for (i = 0; i < io_ctl->num_pages; i++) 469 clear_page_dirty_for_io(io_ctl->pages[i]); 470 471 return 0; 472 } 473 474 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 475 { 476 io_ctl_map_page(io_ctl, 1); 477 478 /* 479 * Skip the csum areas. If we don't check crcs then we just have a 480 * 64bit chunk at the front of the first page. 481 */ 482 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 483 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 484 485 put_unaligned_le64(generation, io_ctl->cur); 486 io_ctl->cur += sizeof(u64); 487 } 488 489 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 490 { 491 u64 cache_gen; 492 493 /* 494 * Skip the crc area. If we don't check crcs then we just have a 64bit 495 * chunk at the front of the first page. 496 */ 497 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 498 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 499 500 cache_gen = get_unaligned_le64(io_ctl->cur); 501 if (cache_gen != generation) { 502 btrfs_err_rl(io_ctl->fs_info, 503 "space cache generation (%llu) does not match inode (%llu)", 504 cache_gen, generation); 505 io_ctl_unmap_page(io_ctl); 506 return -EIO; 507 } 508 io_ctl->cur += sizeof(u64); 509 return 0; 510 } 511 512 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 513 { 514 u32 *tmp; 515 u32 crc = ~(u32)0; 516 unsigned offset = 0; 517 518 if (index == 0) 519 offset = sizeof(u32) * io_ctl->num_pages; 520 521 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 522 btrfs_crc32c_final(crc, (u8 *)&crc); 523 io_ctl_unmap_page(io_ctl); 524 tmp = page_address(io_ctl->pages[0]); 525 tmp += index; 526 *tmp = crc; 527 } 528 529 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 530 { 531 u32 *tmp, val; 532 u32 crc = ~(u32)0; 533 unsigned offset = 0; 534 535 if (index == 0) 536 offset = sizeof(u32) * io_ctl->num_pages; 537 538 tmp = page_address(io_ctl->pages[0]); 539 tmp += index; 540 val = *tmp; 541 542 io_ctl_map_page(io_ctl, 0); 543 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 544 btrfs_crc32c_final(crc, (u8 *)&crc); 545 if (val != crc) { 546 btrfs_err_rl(io_ctl->fs_info, 547 "csum mismatch on free space cache"); 548 io_ctl_unmap_page(io_ctl); 549 return -EIO; 550 } 551 552 return 0; 553 } 554 555 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 556 void *bitmap) 557 { 558 struct btrfs_free_space_entry *entry; 559 560 if (!io_ctl->cur) 561 return -ENOSPC; 562 563 entry = io_ctl->cur; 564 put_unaligned_le64(offset, &entry->offset); 565 put_unaligned_le64(bytes, &entry->bytes); 566 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 567 BTRFS_FREE_SPACE_EXTENT; 568 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 569 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 570 571 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 572 return 0; 573 574 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 575 576 /* No more pages to map */ 577 if (io_ctl->index >= io_ctl->num_pages) 578 return 0; 579 580 /* map the next page */ 581 io_ctl_map_page(io_ctl, 1); 582 return 0; 583 } 584 585 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 586 { 587 if (!io_ctl->cur) 588 return -ENOSPC; 589 590 /* 591 * If we aren't at the start of the current page, unmap this one and 592 * map the next one if there is any left. 593 */ 594 if (io_ctl->cur != io_ctl->orig) { 595 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 596 if (io_ctl->index >= io_ctl->num_pages) 597 return -ENOSPC; 598 io_ctl_map_page(io_ctl, 0); 599 } 600 601 copy_page(io_ctl->cur, bitmap); 602 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 603 if (io_ctl->index < io_ctl->num_pages) 604 io_ctl_map_page(io_ctl, 0); 605 return 0; 606 } 607 608 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 609 { 610 /* 611 * If we're not on the boundary we know we've modified the page and we 612 * need to crc the page. 613 */ 614 if (io_ctl->cur != io_ctl->orig) 615 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 616 else 617 io_ctl_unmap_page(io_ctl); 618 619 while (io_ctl->index < io_ctl->num_pages) { 620 io_ctl_map_page(io_ctl, 1); 621 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 622 } 623 } 624 625 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 626 struct btrfs_free_space *entry, u8 *type) 627 { 628 struct btrfs_free_space_entry *e; 629 int ret; 630 631 if (!io_ctl->cur) { 632 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 633 if (ret) 634 return ret; 635 } 636 637 e = io_ctl->cur; 638 entry->offset = get_unaligned_le64(&e->offset); 639 entry->bytes = get_unaligned_le64(&e->bytes); 640 *type = e->type; 641 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 642 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 643 644 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 645 return 0; 646 647 io_ctl_unmap_page(io_ctl); 648 649 return 0; 650 } 651 652 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 653 struct btrfs_free_space *entry) 654 { 655 int ret; 656 657 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 658 if (ret) 659 return ret; 660 661 copy_page(entry->bitmap, io_ctl->cur); 662 io_ctl_unmap_page(io_ctl); 663 664 return 0; 665 } 666 667 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 668 { 669 struct btrfs_block_group *block_group = ctl->private; 670 u64 max_bytes; 671 u64 bitmap_bytes; 672 u64 extent_bytes; 673 u64 size = block_group->length; 674 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 675 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 676 677 max_bitmaps = max_t(u64, max_bitmaps, 1); 678 679 ASSERT(ctl->total_bitmaps <= max_bitmaps); 680 681 /* 682 * We are trying to keep the total amount of memory used per 1GiB of 683 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 684 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 685 * bitmaps, we may end up using more memory than this. 686 */ 687 if (size < SZ_1G) 688 max_bytes = MAX_CACHE_BYTES_PER_GIG; 689 else 690 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 691 692 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 693 694 /* 695 * we want the extent entry threshold to always be at most 1/2 the max 696 * bytes we can have, or whatever is less than that. 697 */ 698 extent_bytes = max_bytes - bitmap_bytes; 699 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 700 701 ctl->extents_thresh = 702 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 703 } 704 705 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 706 struct btrfs_free_space_ctl *ctl, 707 struct btrfs_path *path, u64 offset) 708 { 709 struct btrfs_fs_info *fs_info = root->fs_info; 710 struct btrfs_free_space_header *header; 711 struct extent_buffer *leaf; 712 struct btrfs_io_ctl io_ctl; 713 struct btrfs_key key; 714 struct btrfs_free_space *e, *n; 715 LIST_HEAD(bitmaps); 716 u64 num_entries; 717 u64 num_bitmaps; 718 u64 generation; 719 u8 type; 720 int ret = 0; 721 722 /* Nothing in the space cache, goodbye */ 723 if (!i_size_read(inode)) 724 return 0; 725 726 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 727 key.offset = offset; 728 key.type = 0; 729 730 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 731 if (ret < 0) 732 return 0; 733 else if (ret > 0) { 734 btrfs_release_path(path); 735 return 0; 736 } 737 738 ret = -1; 739 740 leaf = path->nodes[0]; 741 header = btrfs_item_ptr(leaf, path->slots[0], 742 struct btrfs_free_space_header); 743 num_entries = btrfs_free_space_entries(leaf, header); 744 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 745 generation = btrfs_free_space_generation(leaf, header); 746 btrfs_release_path(path); 747 748 if (!BTRFS_I(inode)->generation) { 749 btrfs_info(fs_info, 750 "the free space cache file (%llu) is invalid, skip it", 751 offset); 752 return 0; 753 } 754 755 if (BTRFS_I(inode)->generation != generation) { 756 btrfs_err(fs_info, 757 "free space inode generation (%llu) did not match free space cache generation (%llu)", 758 BTRFS_I(inode)->generation, generation); 759 return 0; 760 } 761 762 if (!num_entries) 763 return 0; 764 765 ret = io_ctl_init(&io_ctl, inode, 0); 766 if (ret) 767 return ret; 768 769 readahead_cache(inode); 770 771 ret = io_ctl_prepare_pages(&io_ctl, true); 772 if (ret) 773 goto out; 774 775 ret = io_ctl_check_crc(&io_ctl, 0); 776 if (ret) 777 goto free_cache; 778 779 ret = io_ctl_check_generation(&io_ctl, generation); 780 if (ret) 781 goto free_cache; 782 783 while (num_entries) { 784 e = kmem_cache_zalloc(btrfs_free_space_cachep, 785 GFP_NOFS); 786 if (!e) { 787 ret = -ENOMEM; 788 goto free_cache; 789 } 790 791 ret = io_ctl_read_entry(&io_ctl, e, &type); 792 if (ret) { 793 kmem_cache_free(btrfs_free_space_cachep, e); 794 goto free_cache; 795 } 796 797 if (!e->bytes) { 798 ret = -1; 799 kmem_cache_free(btrfs_free_space_cachep, e); 800 goto free_cache; 801 } 802 803 if (type == BTRFS_FREE_SPACE_EXTENT) { 804 spin_lock(&ctl->tree_lock); 805 ret = link_free_space(ctl, e); 806 spin_unlock(&ctl->tree_lock); 807 if (ret) { 808 btrfs_err(fs_info, 809 "Duplicate entries in free space cache, dumping"); 810 kmem_cache_free(btrfs_free_space_cachep, e); 811 goto free_cache; 812 } 813 } else { 814 ASSERT(num_bitmaps); 815 num_bitmaps--; 816 e->bitmap = kmem_cache_zalloc( 817 btrfs_free_space_bitmap_cachep, GFP_NOFS); 818 if (!e->bitmap) { 819 ret = -ENOMEM; 820 kmem_cache_free( 821 btrfs_free_space_cachep, e); 822 goto free_cache; 823 } 824 spin_lock(&ctl->tree_lock); 825 ret = link_free_space(ctl, e); 826 ctl->total_bitmaps++; 827 recalculate_thresholds(ctl); 828 spin_unlock(&ctl->tree_lock); 829 if (ret) { 830 btrfs_err(fs_info, 831 "Duplicate entries in free space cache, dumping"); 832 kmem_cache_free(btrfs_free_space_cachep, e); 833 goto free_cache; 834 } 835 list_add_tail(&e->list, &bitmaps); 836 } 837 838 num_entries--; 839 } 840 841 io_ctl_unmap_page(&io_ctl); 842 843 /* 844 * We add the bitmaps at the end of the entries in order that 845 * the bitmap entries are added to the cache. 846 */ 847 list_for_each_entry_safe(e, n, &bitmaps, list) { 848 list_del_init(&e->list); 849 ret = io_ctl_read_bitmap(&io_ctl, e); 850 if (ret) 851 goto free_cache; 852 } 853 854 io_ctl_drop_pages(&io_ctl); 855 ret = 1; 856 out: 857 io_ctl_free(&io_ctl); 858 return ret; 859 free_cache: 860 io_ctl_drop_pages(&io_ctl); 861 __btrfs_remove_free_space_cache(ctl); 862 goto out; 863 } 864 865 static int copy_free_space_cache(struct btrfs_block_group *block_group, 866 struct btrfs_free_space_ctl *ctl) 867 { 868 struct btrfs_free_space *info; 869 struct rb_node *n; 870 int ret = 0; 871 872 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) { 873 info = rb_entry(n, struct btrfs_free_space, offset_index); 874 if (!info->bitmap) { 875 unlink_free_space(ctl, info); 876 ret = btrfs_add_free_space(block_group, info->offset, 877 info->bytes); 878 kmem_cache_free(btrfs_free_space_cachep, info); 879 } else { 880 u64 offset = info->offset; 881 u64 bytes = ctl->unit; 882 883 while (search_bitmap(ctl, info, &offset, &bytes, 884 false) == 0) { 885 ret = btrfs_add_free_space(block_group, offset, 886 bytes); 887 if (ret) 888 break; 889 bitmap_clear_bits(ctl, info, offset, bytes); 890 offset = info->offset; 891 bytes = ctl->unit; 892 } 893 free_bitmap(ctl, info); 894 } 895 cond_resched(); 896 } 897 return ret; 898 } 899 900 int load_free_space_cache(struct btrfs_block_group *block_group) 901 { 902 struct btrfs_fs_info *fs_info = block_group->fs_info; 903 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 904 struct btrfs_free_space_ctl tmp_ctl = {}; 905 struct inode *inode; 906 struct btrfs_path *path; 907 int ret = 0; 908 bool matched; 909 u64 used = block_group->used; 910 911 /* 912 * Because we could potentially discard our loaded free space, we want 913 * to load everything into a temporary structure first, and then if it's 914 * valid copy it all into the actual free space ctl. 915 */ 916 btrfs_init_free_space_ctl(block_group, &tmp_ctl); 917 918 /* 919 * If this block group has been marked to be cleared for one reason or 920 * another then we can't trust the on disk cache, so just return. 921 */ 922 spin_lock(&block_group->lock); 923 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 924 spin_unlock(&block_group->lock); 925 return 0; 926 } 927 spin_unlock(&block_group->lock); 928 929 path = btrfs_alloc_path(); 930 if (!path) 931 return 0; 932 path->search_commit_root = 1; 933 path->skip_locking = 1; 934 935 /* 936 * We must pass a path with search_commit_root set to btrfs_iget in 937 * order to avoid a deadlock when allocating extents for the tree root. 938 * 939 * When we are COWing an extent buffer from the tree root, when looking 940 * for a free extent, at extent-tree.c:find_free_extent(), we can find 941 * block group without its free space cache loaded. When we find one 942 * we must load its space cache which requires reading its free space 943 * cache's inode item from the root tree. If this inode item is located 944 * in the same leaf that we started COWing before, then we end up in 945 * deadlock on the extent buffer (trying to read lock it when we 946 * previously write locked it). 947 * 948 * It's safe to read the inode item using the commit root because 949 * block groups, once loaded, stay in memory forever (until they are 950 * removed) as well as their space caches once loaded. New block groups 951 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 952 * we will never try to read their inode item while the fs is mounted. 953 */ 954 inode = lookup_free_space_inode(block_group, path); 955 if (IS_ERR(inode)) { 956 btrfs_free_path(path); 957 return 0; 958 } 959 960 /* We may have converted the inode and made the cache invalid. */ 961 spin_lock(&block_group->lock); 962 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 963 spin_unlock(&block_group->lock); 964 btrfs_free_path(path); 965 goto out; 966 } 967 spin_unlock(&block_group->lock); 968 969 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, 970 path, block_group->start); 971 btrfs_free_path(path); 972 if (ret <= 0) 973 goto out; 974 975 matched = (tmp_ctl.free_space == (block_group->length - used - 976 block_group->bytes_super)); 977 978 if (matched) { 979 ret = copy_free_space_cache(block_group, &tmp_ctl); 980 /* 981 * ret == 1 means we successfully loaded the free space cache, 982 * so we need to re-set it here. 983 */ 984 if (ret == 0) 985 ret = 1; 986 } else { 987 __btrfs_remove_free_space_cache(&tmp_ctl); 988 btrfs_warn(fs_info, 989 "block group %llu has wrong amount of free space", 990 block_group->start); 991 ret = -1; 992 } 993 out: 994 if (ret < 0) { 995 /* This cache is bogus, make sure it gets cleared */ 996 spin_lock(&block_group->lock); 997 block_group->disk_cache_state = BTRFS_DC_CLEAR; 998 spin_unlock(&block_group->lock); 999 ret = 0; 1000 1001 btrfs_warn(fs_info, 1002 "failed to load free space cache for block group %llu, rebuilding it now", 1003 block_group->start); 1004 } 1005 1006 spin_lock(&ctl->tree_lock); 1007 btrfs_discard_update_discardable(block_group); 1008 spin_unlock(&ctl->tree_lock); 1009 iput(inode); 1010 return ret; 1011 } 1012 1013 static noinline_for_stack 1014 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 1015 struct btrfs_free_space_ctl *ctl, 1016 struct btrfs_block_group *block_group, 1017 int *entries, int *bitmaps, 1018 struct list_head *bitmap_list) 1019 { 1020 int ret; 1021 struct btrfs_free_cluster *cluster = NULL; 1022 struct btrfs_free_cluster *cluster_locked = NULL; 1023 struct rb_node *node = rb_first(&ctl->free_space_offset); 1024 struct btrfs_trim_range *trim_entry; 1025 1026 /* Get the cluster for this block_group if it exists */ 1027 if (block_group && !list_empty(&block_group->cluster_list)) { 1028 cluster = list_entry(block_group->cluster_list.next, 1029 struct btrfs_free_cluster, 1030 block_group_list); 1031 } 1032 1033 if (!node && cluster) { 1034 cluster_locked = cluster; 1035 spin_lock(&cluster_locked->lock); 1036 node = rb_first(&cluster->root); 1037 cluster = NULL; 1038 } 1039 1040 /* Write out the extent entries */ 1041 while (node) { 1042 struct btrfs_free_space *e; 1043 1044 e = rb_entry(node, struct btrfs_free_space, offset_index); 1045 *entries += 1; 1046 1047 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 1048 e->bitmap); 1049 if (ret) 1050 goto fail; 1051 1052 if (e->bitmap) { 1053 list_add_tail(&e->list, bitmap_list); 1054 *bitmaps += 1; 1055 } 1056 node = rb_next(node); 1057 if (!node && cluster) { 1058 node = rb_first(&cluster->root); 1059 cluster_locked = cluster; 1060 spin_lock(&cluster_locked->lock); 1061 cluster = NULL; 1062 } 1063 } 1064 if (cluster_locked) { 1065 spin_unlock(&cluster_locked->lock); 1066 cluster_locked = NULL; 1067 } 1068 1069 /* 1070 * Make sure we don't miss any range that was removed from our rbtree 1071 * because trimming is running. Otherwise after a umount+mount (or crash 1072 * after committing the transaction) we would leak free space and get 1073 * an inconsistent free space cache report from fsck. 1074 */ 1075 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1076 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1077 trim_entry->bytes, NULL); 1078 if (ret) 1079 goto fail; 1080 *entries += 1; 1081 } 1082 1083 return 0; 1084 fail: 1085 if (cluster_locked) 1086 spin_unlock(&cluster_locked->lock); 1087 return -ENOSPC; 1088 } 1089 1090 static noinline_for_stack int 1091 update_cache_item(struct btrfs_trans_handle *trans, 1092 struct btrfs_root *root, 1093 struct inode *inode, 1094 struct btrfs_path *path, u64 offset, 1095 int entries, int bitmaps) 1096 { 1097 struct btrfs_key key; 1098 struct btrfs_free_space_header *header; 1099 struct extent_buffer *leaf; 1100 int ret; 1101 1102 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1103 key.offset = offset; 1104 key.type = 0; 1105 1106 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1107 if (ret < 0) { 1108 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1109 EXTENT_DELALLOC, 0, 0, NULL); 1110 goto fail; 1111 } 1112 leaf = path->nodes[0]; 1113 if (ret > 0) { 1114 struct btrfs_key found_key; 1115 ASSERT(path->slots[0]); 1116 path->slots[0]--; 1117 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1118 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1119 found_key.offset != offset) { 1120 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1121 inode->i_size - 1, EXTENT_DELALLOC, 0, 1122 0, NULL); 1123 btrfs_release_path(path); 1124 goto fail; 1125 } 1126 } 1127 1128 BTRFS_I(inode)->generation = trans->transid; 1129 header = btrfs_item_ptr(leaf, path->slots[0], 1130 struct btrfs_free_space_header); 1131 btrfs_set_free_space_entries(leaf, header, entries); 1132 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1133 btrfs_set_free_space_generation(leaf, header, trans->transid); 1134 btrfs_mark_buffer_dirty(leaf); 1135 btrfs_release_path(path); 1136 1137 return 0; 1138 1139 fail: 1140 return -1; 1141 } 1142 1143 static noinline_for_stack int write_pinned_extent_entries( 1144 struct btrfs_trans_handle *trans, 1145 struct btrfs_block_group *block_group, 1146 struct btrfs_io_ctl *io_ctl, 1147 int *entries) 1148 { 1149 u64 start, extent_start, extent_end, len; 1150 struct extent_io_tree *unpin = NULL; 1151 int ret; 1152 1153 if (!block_group) 1154 return 0; 1155 1156 /* 1157 * We want to add any pinned extents to our free space cache 1158 * so we don't leak the space 1159 * 1160 * We shouldn't have switched the pinned extents yet so this is the 1161 * right one 1162 */ 1163 unpin = &trans->transaction->pinned_extents; 1164 1165 start = block_group->start; 1166 1167 while (start < block_group->start + block_group->length) { 1168 ret = find_first_extent_bit(unpin, start, 1169 &extent_start, &extent_end, 1170 EXTENT_DIRTY, NULL); 1171 if (ret) 1172 return 0; 1173 1174 /* This pinned extent is out of our range */ 1175 if (extent_start >= block_group->start + block_group->length) 1176 return 0; 1177 1178 extent_start = max(extent_start, start); 1179 extent_end = min(block_group->start + block_group->length, 1180 extent_end + 1); 1181 len = extent_end - extent_start; 1182 1183 *entries += 1; 1184 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1185 if (ret) 1186 return -ENOSPC; 1187 1188 start = extent_end; 1189 } 1190 1191 return 0; 1192 } 1193 1194 static noinline_for_stack int 1195 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1196 { 1197 struct btrfs_free_space *entry, *next; 1198 int ret; 1199 1200 /* Write out the bitmaps */ 1201 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1202 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1203 if (ret) 1204 return -ENOSPC; 1205 list_del_init(&entry->list); 1206 } 1207 1208 return 0; 1209 } 1210 1211 static int flush_dirty_cache(struct inode *inode) 1212 { 1213 int ret; 1214 1215 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1216 if (ret) 1217 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1218 EXTENT_DELALLOC, 0, 0, NULL); 1219 1220 return ret; 1221 } 1222 1223 static void noinline_for_stack 1224 cleanup_bitmap_list(struct list_head *bitmap_list) 1225 { 1226 struct btrfs_free_space *entry, *next; 1227 1228 list_for_each_entry_safe(entry, next, bitmap_list, list) 1229 list_del_init(&entry->list); 1230 } 1231 1232 static void noinline_for_stack 1233 cleanup_write_cache_enospc(struct inode *inode, 1234 struct btrfs_io_ctl *io_ctl, 1235 struct extent_state **cached_state) 1236 { 1237 io_ctl_drop_pages(io_ctl); 1238 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1239 i_size_read(inode) - 1, cached_state); 1240 } 1241 1242 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1243 struct btrfs_trans_handle *trans, 1244 struct btrfs_block_group *block_group, 1245 struct btrfs_io_ctl *io_ctl, 1246 struct btrfs_path *path, u64 offset) 1247 { 1248 int ret; 1249 struct inode *inode = io_ctl->inode; 1250 1251 if (!inode) 1252 return 0; 1253 1254 /* Flush the dirty pages in the cache file. */ 1255 ret = flush_dirty_cache(inode); 1256 if (ret) 1257 goto out; 1258 1259 /* Update the cache item to tell everyone this cache file is valid. */ 1260 ret = update_cache_item(trans, root, inode, path, offset, 1261 io_ctl->entries, io_ctl->bitmaps); 1262 out: 1263 if (ret) { 1264 invalidate_inode_pages2(inode->i_mapping); 1265 BTRFS_I(inode)->generation = 0; 1266 if (block_group) 1267 btrfs_debug(root->fs_info, 1268 "failed to write free space cache for block group %llu error %d", 1269 block_group->start, ret); 1270 } 1271 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1272 1273 if (block_group) { 1274 /* the dirty list is protected by the dirty_bgs_lock */ 1275 spin_lock(&trans->transaction->dirty_bgs_lock); 1276 1277 /* the disk_cache_state is protected by the block group lock */ 1278 spin_lock(&block_group->lock); 1279 1280 /* 1281 * only mark this as written if we didn't get put back on 1282 * the dirty list while waiting for IO. Otherwise our 1283 * cache state won't be right, and we won't get written again 1284 */ 1285 if (!ret && list_empty(&block_group->dirty_list)) 1286 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1287 else if (ret) 1288 block_group->disk_cache_state = BTRFS_DC_ERROR; 1289 1290 spin_unlock(&block_group->lock); 1291 spin_unlock(&trans->transaction->dirty_bgs_lock); 1292 io_ctl->inode = NULL; 1293 iput(inode); 1294 } 1295 1296 return ret; 1297 1298 } 1299 1300 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1301 struct btrfs_block_group *block_group, 1302 struct btrfs_path *path) 1303 { 1304 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1305 block_group, &block_group->io_ctl, 1306 path, block_group->start); 1307 } 1308 1309 /** 1310 * Write out cached info to an inode 1311 * 1312 * @root: root the inode belongs to 1313 * @inode: freespace inode we are writing out 1314 * @ctl: free space cache we are going to write out 1315 * @block_group: block_group for this cache if it belongs to a block_group 1316 * @io_ctl: holds context for the io 1317 * @trans: the trans handle 1318 * 1319 * This function writes out a free space cache struct to disk for quick recovery 1320 * on mount. This will return 0 if it was successful in writing the cache out, 1321 * or an errno if it was not. 1322 */ 1323 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1324 struct btrfs_free_space_ctl *ctl, 1325 struct btrfs_block_group *block_group, 1326 struct btrfs_io_ctl *io_ctl, 1327 struct btrfs_trans_handle *trans) 1328 { 1329 struct extent_state *cached_state = NULL; 1330 LIST_HEAD(bitmap_list); 1331 int entries = 0; 1332 int bitmaps = 0; 1333 int ret; 1334 int must_iput = 0; 1335 1336 if (!i_size_read(inode)) 1337 return -EIO; 1338 1339 WARN_ON(io_ctl->pages); 1340 ret = io_ctl_init(io_ctl, inode, 1); 1341 if (ret) 1342 return ret; 1343 1344 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1345 down_write(&block_group->data_rwsem); 1346 spin_lock(&block_group->lock); 1347 if (block_group->delalloc_bytes) { 1348 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1349 spin_unlock(&block_group->lock); 1350 up_write(&block_group->data_rwsem); 1351 BTRFS_I(inode)->generation = 0; 1352 ret = 0; 1353 must_iput = 1; 1354 goto out; 1355 } 1356 spin_unlock(&block_group->lock); 1357 } 1358 1359 /* Lock all pages first so we can lock the extent safely. */ 1360 ret = io_ctl_prepare_pages(io_ctl, false); 1361 if (ret) 1362 goto out_unlock; 1363 1364 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1365 &cached_state); 1366 1367 io_ctl_set_generation(io_ctl, trans->transid); 1368 1369 mutex_lock(&ctl->cache_writeout_mutex); 1370 /* Write out the extent entries in the free space cache */ 1371 spin_lock(&ctl->tree_lock); 1372 ret = write_cache_extent_entries(io_ctl, ctl, 1373 block_group, &entries, &bitmaps, 1374 &bitmap_list); 1375 if (ret) 1376 goto out_nospc_locked; 1377 1378 /* 1379 * Some spaces that are freed in the current transaction are pinned, 1380 * they will be added into free space cache after the transaction is 1381 * committed, we shouldn't lose them. 1382 * 1383 * If this changes while we are working we'll get added back to 1384 * the dirty list and redo it. No locking needed 1385 */ 1386 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1387 if (ret) 1388 goto out_nospc_locked; 1389 1390 /* 1391 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1392 * locked while doing it because a concurrent trim can be manipulating 1393 * or freeing the bitmap. 1394 */ 1395 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1396 spin_unlock(&ctl->tree_lock); 1397 mutex_unlock(&ctl->cache_writeout_mutex); 1398 if (ret) 1399 goto out_nospc; 1400 1401 /* Zero out the rest of the pages just to make sure */ 1402 io_ctl_zero_remaining_pages(io_ctl); 1403 1404 /* Everything is written out, now we dirty the pages in the file. */ 1405 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1406 io_ctl->num_pages, 0, i_size_read(inode), 1407 &cached_state, false); 1408 if (ret) 1409 goto out_nospc; 1410 1411 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1412 up_write(&block_group->data_rwsem); 1413 /* 1414 * Release the pages and unlock the extent, we will flush 1415 * them out later 1416 */ 1417 io_ctl_drop_pages(io_ctl); 1418 io_ctl_free(io_ctl); 1419 1420 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1421 i_size_read(inode) - 1, &cached_state); 1422 1423 /* 1424 * at this point the pages are under IO and we're happy, 1425 * The caller is responsible for waiting on them and updating 1426 * the cache and the inode 1427 */ 1428 io_ctl->entries = entries; 1429 io_ctl->bitmaps = bitmaps; 1430 1431 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1432 if (ret) 1433 goto out; 1434 1435 return 0; 1436 1437 out_nospc_locked: 1438 cleanup_bitmap_list(&bitmap_list); 1439 spin_unlock(&ctl->tree_lock); 1440 mutex_unlock(&ctl->cache_writeout_mutex); 1441 1442 out_nospc: 1443 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1444 1445 out_unlock: 1446 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1447 up_write(&block_group->data_rwsem); 1448 1449 out: 1450 io_ctl->inode = NULL; 1451 io_ctl_free(io_ctl); 1452 if (ret) { 1453 invalidate_inode_pages2(inode->i_mapping); 1454 BTRFS_I(inode)->generation = 0; 1455 } 1456 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1457 if (must_iput) 1458 iput(inode); 1459 return ret; 1460 } 1461 1462 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1463 struct btrfs_block_group *block_group, 1464 struct btrfs_path *path) 1465 { 1466 struct btrfs_fs_info *fs_info = trans->fs_info; 1467 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1468 struct inode *inode; 1469 int ret = 0; 1470 1471 spin_lock(&block_group->lock); 1472 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1473 spin_unlock(&block_group->lock); 1474 return 0; 1475 } 1476 spin_unlock(&block_group->lock); 1477 1478 inode = lookup_free_space_inode(block_group, path); 1479 if (IS_ERR(inode)) 1480 return 0; 1481 1482 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1483 block_group, &block_group->io_ctl, trans); 1484 if (ret) { 1485 btrfs_debug(fs_info, 1486 "failed to write free space cache for block group %llu error %d", 1487 block_group->start, ret); 1488 spin_lock(&block_group->lock); 1489 block_group->disk_cache_state = BTRFS_DC_ERROR; 1490 spin_unlock(&block_group->lock); 1491 1492 block_group->io_ctl.inode = NULL; 1493 iput(inode); 1494 } 1495 1496 /* 1497 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1498 * to wait for IO and put the inode 1499 */ 1500 1501 return ret; 1502 } 1503 1504 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1505 u64 offset) 1506 { 1507 ASSERT(offset >= bitmap_start); 1508 offset -= bitmap_start; 1509 return (unsigned long)(div_u64(offset, unit)); 1510 } 1511 1512 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1513 { 1514 return (unsigned long)(div_u64(bytes, unit)); 1515 } 1516 1517 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1518 u64 offset) 1519 { 1520 u64 bitmap_start; 1521 u64 bytes_per_bitmap; 1522 1523 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1524 bitmap_start = offset - ctl->start; 1525 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1526 bitmap_start *= bytes_per_bitmap; 1527 bitmap_start += ctl->start; 1528 1529 return bitmap_start; 1530 } 1531 1532 static int tree_insert_offset(struct rb_root *root, u64 offset, 1533 struct rb_node *node, int bitmap) 1534 { 1535 struct rb_node **p = &root->rb_node; 1536 struct rb_node *parent = NULL; 1537 struct btrfs_free_space *info; 1538 1539 while (*p) { 1540 parent = *p; 1541 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1542 1543 if (offset < info->offset) { 1544 p = &(*p)->rb_left; 1545 } else if (offset > info->offset) { 1546 p = &(*p)->rb_right; 1547 } else { 1548 /* 1549 * we could have a bitmap entry and an extent entry 1550 * share the same offset. If this is the case, we want 1551 * the extent entry to always be found first if we do a 1552 * linear search through the tree, since we want to have 1553 * the quickest allocation time, and allocating from an 1554 * extent is faster than allocating from a bitmap. So 1555 * if we're inserting a bitmap and we find an entry at 1556 * this offset, we want to go right, or after this entry 1557 * logically. If we are inserting an extent and we've 1558 * found a bitmap, we want to go left, or before 1559 * logically. 1560 */ 1561 if (bitmap) { 1562 if (info->bitmap) { 1563 WARN_ON_ONCE(1); 1564 return -EEXIST; 1565 } 1566 p = &(*p)->rb_right; 1567 } else { 1568 if (!info->bitmap) { 1569 WARN_ON_ONCE(1); 1570 return -EEXIST; 1571 } 1572 p = &(*p)->rb_left; 1573 } 1574 } 1575 } 1576 1577 rb_link_node(node, parent, p); 1578 rb_insert_color(node, root); 1579 1580 return 0; 1581 } 1582 1583 /* 1584 * searches the tree for the given offset. 1585 * 1586 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1587 * want a section that has at least bytes size and comes at or after the given 1588 * offset. 1589 */ 1590 static struct btrfs_free_space * 1591 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1592 u64 offset, int bitmap_only, int fuzzy) 1593 { 1594 struct rb_node *n = ctl->free_space_offset.rb_node; 1595 struct btrfs_free_space *entry, *prev = NULL; 1596 1597 /* find entry that is closest to the 'offset' */ 1598 while (1) { 1599 if (!n) { 1600 entry = NULL; 1601 break; 1602 } 1603 1604 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1605 prev = entry; 1606 1607 if (offset < entry->offset) 1608 n = n->rb_left; 1609 else if (offset > entry->offset) 1610 n = n->rb_right; 1611 else 1612 break; 1613 } 1614 1615 if (bitmap_only) { 1616 if (!entry) 1617 return NULL; 1618 if (entry->bitmap) 1619 return entry; 1620 1621 /* 1622 * bitmap entry and extent entry may share same offset, 1623 * in that case, bitmap entry comes after extent entry. 1624 */ 1625 n = rb_next(n); 1626 if (!n) 1627 return NULL; 1628 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1629 if (entry->offset != offset) 1630 return NULL; 1631 1632 WARN_ON(!entry->bitmap); 1633 return entry; 1634 } else if (entry) { 1635 if (entry->bitmap) { 1636 /* 1637 * if previous extent entry covers the offset, 1638 * we should return it instead of the bitmap entry 1639 */ 1640 n = rb_prev(&entry->offset_index); 1641 if (n) { 1642 prev = rb_entry(n, struct btrfs_free_space, 1643 offset_index); 1644 if (!prev->bitmap && 1645 prev->offset + prev->bytes > offset) 1646 entry = prev; 1647 } 1648 } 1649 return entry; 1650 } 1651 1652 if (!prev) 1653 return NULL; 1654 1655 /* find last entry before the 'offset' */ 1656 entry = prev; 1657 if (entry->offset > offset) { 1658 n = rb_prev(&entry->offset_index); 1659 if (n) { 1660 entry = rb_entry(n, struct btrfs_free_space, 1661 offset_index); 1662 ASSERT(entry->offset <= offset); 1663 } else { 1664 if (fuzzy) 1665 return entry; 1666 else 1667 return NULL; 1668 } 1669 } 1670 1671 if (entry->bitmap) { 1672 n = rb_prev(&entry->offset_index); 1673 if (n) { 1674 prev = rb_entry(n, struct btrfs_free_space, 1675 offset_index); 1676 if (!prev->bitmap && 1677 prev->offset + prev->bytes > offset) 1678 return prev; 1679 } 1680 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1681 return entry; 1682 } else if (entry->offset + entry->bytes > offset) 1683 return entry; 1684 1685 if (!fuzzy) 1686 return NULL; 1687 1688 while (1) { 1689 if (entry->bitmap) { 1690 if (entry->offset + BITS_PER_BITMAP * 1691 ctl->unit > offset) 1692 break; 1693 } else { 1694 if (entry->offset + entry->bytes > offset) 1695 break; 1696 } 1697 1698 n = rb_next(&entry->offset_index); 1699 if (!n) 1700 return NULL; 1701 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1702 } 1703 return entry; 1704 } 1705 1706 static inline void 1707 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1708 struct btrfs_free_space *info) 1709 { 1710 rb_erase(&info->offset_index, &ctl->free_space_offset); 1711 ctl->free_extents--; 1712 1713 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1714 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1715 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1716 } 1717 } 1718 1719 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1720 struct btrfs_free_space *info) 1721 { 1722 __unlink_free_space(ctl, info); 1723 ctl->free_space -= info->bytes; 1724 } 1725 1726 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1727 struct btrfs_free_space *info) 1728 { 1729 int ret = 0; 1730 1731 ASSERT(info->bytes || info->bitmap); 1732 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1733 &info->offset_index, (info->bitmap != NULL)); 1734 if (ret) 1735 return ret; 1736 1737 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1738 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1739 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1740 } 1741 1742 ctl->free_space += info->bytes; 1743 ctl->free_extents++; 1744 return ret; 1745 } 1746 1747 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1748 struct btrfs_free_space *info, 1749 u64 offset, u64 bytes) 1750 { 1751 unsigned long start, count, end; 1752 int extent_delta = -1; 1753 1754 start = offset_to_bit(info->offset, ctl->unit, offset); 1755 count = bytes_to_bits(bytes, ctl->unit); 1756 end = start + count; 1757 ASSERT(end <= BITS_PER_BITMAP); 1758 1759 bitmap_clear(info->bitmap, start, count); 1760 1761 info->bytes -= bytes; 1762 if (info->max_extent_size > ctl->unit) 1763 info->max_extent_size = 0; 1764 1765 if (start && test_bit(start - 1, info->bitmap)) 1766 extent_delta++; 1767 1768 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1769 extent_delta++; 1770 1771 info->bitmap_extents += extent_delta; 1772 if (!btrfs_free_space_trimmed(info)) { 1773 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1774 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1775 } 1776 } 1777 1778 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1779 struct btrfs_free_space *info, u64 offset, 1780 u64 bytes) 1781 { 1782 __bitmap_clear_bits(ctl, info, offset, bytes); 1783 ctl->free_space -= bytes; 1784 } 1785 1786 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1787 struct btrfs_free_space *info, u64 offset, 1788 u64 bytes) 1789 { 1790 unsigned long start, count, end; 1791 int extent_delta = 1; 1792 1793 start = offset_to_bit(info->offset, ctl->unit, offset); 1794 count = bytes_to_bits(bytes, ctl->unit); 1795 end = start + count; 1796 ASSERT(end <= BITS_PER_BITMAP); 1797 1798 bitmap_set(info->bitmap, start, count); 1799 1800 info->bytes += bytes; 1801 ctl->free_space += bytes; 1802 1803 if (start && test_bit(start - 1, info->bitmap)) 1804 extent_delta--; 1805 1806 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1807 extent_delta--; 1808 1809 info->bitmap_extents += extent_delta; 1810 if (!btrfs_free_space_trimmed(info)) { 1811 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1812 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1813 } 1814 } 1815 1816 /* 1817 * If we can not find suitable extent, we will use bytes to record 1818 * the size of the max extent. 1819 */ 1820 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1821 struct btrfs_free_space *bitmap_info, u64 *offset, 1822 u64 *bytes, bool for_alloc) 1823 { 1824 unsigned long found_bits = 0; 1825 unsigned long max_bits = 0; 1826 unsigned long bits, i; 1827 unsigned long next_zero; 1828 unsigned long extent_bits; 1829 1830 /* 1831 * Skip searching the bitmap if we don't have a contiguous section that 1832 * is large enough for this allocation. 1833 */ 1834 if (for_alloc && 1835 bitmap_info->max_extent_size && 1836 bitmap_info->max_extent_size < *bytes) { 1837 *bytes = bitmap_info->max_extent_size; 1838 return -1; 1839 } 1840 1841 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1842 max_t(u64, *offset, bitmap_info->offset)); 1843 bits = bytes_to_bits(*bytes, ctl->unit); 1844 1845 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1846 if (for_alloc && bits == 1) { 1847 found_bits = 1; 1848 break; 1849 } 1850 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1851 BITS_PER_BITMAP, i); 1852 extent_bits = next_zero - i; 1853 if (extent_bits >= bits) { 1854 found_bits = extent_bits; 1855 break; 1856 } else if (extent_bits > max_bits) { 1857 max_bits = extent_bits; 1858 } 1859 i = next_zero; 1860 } 1861 1862 if (found_bits) { 1863 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1864 *bytes = (u64)(found_bits) * ctl->unit; 1865 return 0; 1866 } 1867 1868 *bytes = (u64)(max_bits) * ctl->unit; 1869 bitmap_info->max_extent_size = *bytes; 1870 return -1; 1871 } 1872 1873 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1874 { 1875 if (entry->bitmap) 1876 return entry->max_extent_size; 1877 return entry->bytes; 1878 } 1879 1880 /* Cache the size of the max extent in bytes */ 1881 static struct btrfs_free_space * 1882 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1883 unsigned long align, u64 *max_extent_size) 1884 { 1885 struct btrfs_free_space *entry; 1886 struct rb_node *node; 1887 u64 tmp; 1888 u64 align_off; 1889 int ret; 1890 1891 if (!ctl->free_space_offset.rb_node) 1892 goto out; 1893 1894 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1895 if (!entry) 1896 goto out; 1897 1898 for (node = &entry->offset_index; node; node = rb_next(node)) { 1899 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1900 if (entry->bytes < *bytes) { 1901 *max_extent_size = max(get_max_extent_size(entry), 1902 *max_extent_size); 1903 continue; 1904 } 1905 1906 /* make sure the space returned is big enough 1907 * to match our requested alignment 1908 */ 1909 if (*bytes >= align) { 1910 tmp = entry->offset - ctl->start + align - 1; 1911 tmp = div64_u64(tmp, align); 1912 tmp = tmp * align + ctl->start; 1913 align_off = tmp - entry->offset; 1914 } else { 1915 align_off = 0; 1916 tmp = entry->offset; 1917 } 1918 1919 if (entry->bytes < *bytes + align_off) { 1920 *max_extent_size = max(get_max_extent_size(entry), 1921 *max_extent_size); 1922 continue; 1923 } 1924 1925 if (entry->bitmap) { 1926 u64 size = *bytes; 1927 1928 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1929 if (!ret) { 1930 *offset = tmp; 1931 *bytes = size; 1932 return entry; 1933 } else { 1934 *max_extent_size = 1935 max(get_max_extent_size(entry), 1936 *max_extent_size); 1937 } 1938 continue; 1939 } 1940 1941 *offset = tmp; 1942 *bytes = entry->bytes - align_off; 1943 return entry; 1944 } 1945 out: 1946 return NULL; 1947 } 1948 1949 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1950 struct btrfs_free_space *info, u64 offset) 1951 { 1952 info->offset = offset_to_bitmap(ctl, offset); 1953 info->bytes = 0; 1954 info->bitmap_extents = 0; 1955 INIT_LIST_HEAD(&info->list); 1956 link_free_space(ctl, info); 1957 ctl->total_bitmaps++; 1958 recalculate_thresholds(ctl); 1959 } 1960 1961 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1962 struct btrfs_free_space *bitmap_info) 1963 { 1964 /* 1965 * Normally when this is called, the bitmap is completely empty. However, 1966 * if we are blowing up the free space cache for one reason or another 1967 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1968 * we may leave stats on the table. 1969 */ 1970 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1971 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1972 bitmap_info->bitmap_extents; 1973 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1974 1975 } 1976 unlink_free_space(ctl, bitmap_info); 1977 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1978 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1979 ctl->total_bitmaps--; 1980 recalculate_thresholds(ctl); 1981 } 1982 1983 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1984 struct btrfs_free_space *bitmap_info, 1985 u64 *offset, u64 *bytes) 1986 { 1987 u64 end; 1988 u64 search_start, search_bytes; 1989 int ret; 1990 1991 again: 1992 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1993 1994 /* 1995 * We need to search for bits in this bitmap. We could only cover some 1996 * of the extent in this bitmap thanks to how we add space, so we need 1997 * to search for as much as it as we can and clear that amount, and then 1998 * go searching for the next bit. 1999 */ 2000 search_start = *offset; 2001 search_bytes = ctl->unit; 2002 search_bytes = min(search_bytes, end - search_start + 1); 2003 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 2004 false); 2005 if (ret < 0 || search_start != *offset) 2006 return -EINVAL; 2007 2008 /* We may have found more bits than what we need */ 2009 search_bytes = min(search_bytes, *bytes); 2010 2011 /* Cannot clear past the end of the bitmap */ 2012 search_bytes = min(search_bytes, end - search_start + 1); 2013 2014 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2015 *offset += search_bytes; 2016 *bytes -= search_bytes; 2017 2018 if (*bytes) { 2019 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2020 if (!bitmap_info->bytes) 2021 free_bitmap(ctl, bitmap_info); 2022 2023 /* 2024 * no entry after this bitmap, but we still have bytes to 2025 * remove, so something has gone wrong. 2026 */ 2027 if (!next) 2028 return -EINVAL; 2029 2030 bitmap_info = rb_entry(next, struct btrfs_free_space, 2031 offset_index); 2032 2033 /* 2034 * if the next entry isn't a bitmap we need to return to let the 2035 * extent stuff do its work. 2036 */ 2037 if (!bitmap_info->bitmap) 2038 return -EAGAIN; 2039 2040 /* 2041 * Ok the next item is a bitmap, but it may not actually hold 2042 * the information for the rest of this free space stuff, so 2043 * look for it, and if we don't find it return so we can try 2044 * everything over again. 2045 */ 2046 search_start = *offset; 2047 search_bytes = ctl->unit; 2048 ret = search_bitmap(ctl, bitmap_info, &search_start, 2049 &search_bytes, false); 2050 if (ret < 0 || search_start != *offset) 2051 return -EAGAIN; 2052 2053 goto again; 2054 } else if (!bitmap_info->bytes) 2055 free_bitmap(ctl, bitmap_info); 2056 2057 return 0; 2058 } 2059 2060 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2061 struct btrfs_free_space *info, u64 offset, 2062 u64 bytes, enum btrfs_trim_state trim_state) 2063 { 2064 u64 bytes_to_set = 0; 2065 u64 end; 2066 2067 /* 2068 * This is a tradeoff to make bitmap trim state minimal. We mark the 2069 * whole bitmap untrimmed if at any point we add untrimmed regions. 2070 */ 2071 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2072 if (btrfs_free_space_trimmed(info)) { 2073 ctl->discardable_extents[BTRFS_STAT_CURR] += 2074 info->bitmap_extents; 2075 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2076 } 2077 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2078 } 2079 2080 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2081 2082 bytes_to_set = min(end - offset, bytes); 2083 2084 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2085 2086 /* 2087 * We set some bytes, we have no idea what the max extent size is 2088 * anymore. 2089 */ 2090 info->max_extent_size = 0; 2091 2092 return bytes_to_set; 2093 2094 } 2095 2096 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2097 struct btrfs_free_space *info) 2098 { 2099 struct btrfs_block_group *block_group = ctl->private; 2100 struct btrfs_fs_info *fs_info = block_group->fs_info; 2101 bool forced = false; 2102 2103 #ifdef CONFIG_BTRFS_DEBUG 2104 if (btrfs_should_fragment_free_space(block_group)) 2105 forced = true; 2106 #endif 2107 2108 /* This is a way to reclaim large regions from the bitmaps. */ 2109 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2110 return false; 2111 2112 /* 2113 * If we are below the extents threshold then we can add this as an 2114 * extent, and don't have to deal with the bitmap 2115 */ 2116 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2117 /* 2118 * If this block group has some small extents we don't want to 2119 * use up all of our free slots in the cache with them, we want 2120 * to reserve them to larger extents, however if we have plenty 2121 * of cache left then go ahead an dadd them, no sense in adding 2122 * the overhead of a bitmap if we don't have to. 2123 */ 2124 if (info->bytes <= fs_info->sectorsize * 8) { 2125 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2126 return false; 2127 } else { 2128 return false; 2129 } 2130 } 2131 2132 /* 2133 * The original block groups from mkfs can be really small, like 8 2134 * megabytes, so don't bother with a bitmap for those entries. However 2135 * some block groups can be smaller than what a bitmap would cover but 2136 * are still large enough that they could overflow the 32k memory limit, 2137 * so allow those block groups to still be allowed to have a bitmap 2138 * entry. 2139 */ 2140 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2141 return false; 2142 2143 return true; 2144 } 2145 2146 static const struct btrfs_free_space_op free_space_op = { 2147 .use_bitmap = use_bitmap, 2148 }; 2149 2150 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2151 struct btrfs_free_space *info) 2152 { 2153 struct btrfs_free_space *bitmap_info; 2154 struct btrfs_block_group *block_group = NULL; 2155 int added = 0; 2156 u64 bytes, offset, bytes_added; 2157 enum btrfs_trim_state trim_state; 2158 int ret; 2159 2160 bytes = info->bytes; 2161 offset = info->offset; 2162 trim_state = info->trim_state; 2163 2164 if (!ctl->op->use_bitmap(ctl, info)) 2165 return 0; 2166 2167 if (ctl->op == &free_space_op) 2168 block_group = ctl->private; 2169 again: 2170 /* 2171 * Since we link bitmaps right into the cluster we need to see if we 2172 * have a cluster here, and if so and it has our bitmap we need to add 2173 * the free space to that bitmap. 2174 */ 2175 if (block_group && !list_empty(&block_group->cluster_list)) { 2176 struct btrfs_free_cluster *cluster; 2177 struct rb_node *node; 2178 struct btrfs_free_space *entry; 2179 2180 cluster = list_entry(block_group->cluster_list.next, 2181 struct btrfs_free_cluster, 2182 block_group_list); 2183 spin_lock(&cluster->lock); 2184 node = rb_first(&cluster->root); 2185 if (!node) { 2186 spin_unlock(&cluster->lock); 2187 goto no_cluster_bitmap; 2188 } 2189 2190 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2191 if (!entry->bitmap) { 2192 spin_unlock(&cluster->lock); 2193 goto no_cluster_bitmap; 2194 } 2195 2196 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2197 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2198 bytes, trim_state); 2199 bytes -= bytes_added; 2200 offset += bytes_added; 2201 } 2202 spin_unlock(&cluster->lock); 2203 if (!bytes) { 2204 ret = 1; 2205 goto out; 2206 } 2207 } 2208 2209 no_cluster_bitmap: 2210 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2211 1, 0); 2212 if (!bitmap_info) { 2213 ASSERT(added == 0); 2214 goto new_bitmap; 2215 } 2216 2217 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2218 trim_state); 2219 bytes -= bytes_added; 2220 offset += bytes_added; 2221 added = 0; 2222 2223 if (!bytes) { 2224 ret = 1; 2225 goto out; 2226 } else 2227 goto again; 2228 2229 new_bitmap: 2230 if (info && info->bitmap) { 2231 add_new_bitmap(ctl, info, offset); 2232 added = 1; 2233 info = NULL; 2234 goto again; 2235 } else { 2236 spin_unlock(&ctl->tree_lock); 2237 2238 /* no pre-allocated info, allocate a new one */ 2239 if (!info) { 2240 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2241 GFP_NOFS); 2242 if (!info) { 2243 spin_lock(&ctl->tree_lock); 2244 ret = -ENOMEM; 2245 goto out; 2246 } 2247 } 2248 2249 /* allocate the bitmap */ 2250 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2251 GFP_NOFS); 2252 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2253 spin_lock(&ctl->tree_lock); 2254 if (!info->bitmap) { 2255 ret = -ENOMEM; 2256 goto out; 2257 } 2258 goto again; 2259 } 2260 2261 out: 2262 if (info) { 2263 if (info->bitmap) 2264 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2265 info->bitmap); 2266 kmem_cache_free(btrfs_free_space_cachep, info); 2267 } 2268 2269 return ret; 2270 } 2271 2272 /* 2273 * Free space merging rules: 2274 * 1) Merge trimmed areas together 2275 * 2) Let untrimmed areas coalesce with trimmed areas 2276 * 3) Always pull neighboring regions from bitmaps 2277 * 2278 * The above rules are for when we merge free space based on btrfs_trim_state. 2279 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2280 * same reason: to promote larger extent regions which makes life easier for 2281 * find_free_extent(). Rule 2 enables coalescing based on the common path 2282 * being returning free space from btrfs_finish_extent_commit(). So when free 2283 * space is trimmed, it will prevent aggregating trimmed new region and 2284 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2285 * and provide find_free_extent() with the largest extents possible hoping for 2286 * the reuse path. 2287 */ 2288 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2289 struct btrfs_free_space *info, bool update_stat) 2290 { 2291 struct btrfs_free_space *left_info = NULL; 2292 struct btrfs_free_space *right_info; 2293 bool merged = false; 2294 u64 offset = info->offset; 2295 u64 bytes = info->bytes; 2296 const bool is_trimmed = btrfs_free_space_trimmed(info); 2297 2298 /* 2299 * first we want to see if there is free space adjacent to the range we 2300 * are adding, if there is remove that struct and add a new one to 2301 * cover the entire range 2302 */ 2303 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2304 if (right_info && rb_prev(&right_info->offset_index)) 2305 left_info = rb_entry(rb_prev(&right_info->offset_index), 2306 struct btrfs_free_space, offset_index); 2307 else if (!right_info) 2308 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2309 2310 /* See try_merge_free_space() comment. */ 2311 if (right_info && !right_info->bitmap && 2312 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2313 if (update_stat) 2314 unlink_free_space(ctl, right_info); 2315 else 2316 __unlink_free_space(ctl, right_info); 2317 info->bytes += right_info->bytes; 2318 kmem_cache_free(btrfs_free_space_cachep, right_info); 2319 merged = true; 2320 } 2321 2322 /* See try_merge_free_space() comment. */ 2323 if (left_info && !left_info->bitmap && 2324 left_info->offset + left_info->bytes == offset && 2325 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2326 if (update_stat) 2327 unlink_free_space(ctl, left_info); 2328 else 2329 __unlink_free_space(ctl, left_info); 2330 info->offset = left_info->offset; 2331 info->bytes += left_info->bytes; 2332 kmem_cache_free(btrfs_free_space_cachep, left_info); 2333 merged = true; 2334 } 2335 2336 return merged; 2337 } 2338 2339 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2340 struct btrfs_free_space *info, 2341 bool update_stat) 2342 { 2343 struct btrfs_free_space *bitmap; 2344 unsigned long i; 2345 unsigned long j; 2346 const u64 end = info->offset + info->bytes; 2347 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2348 u64 bytes; 2349 2350 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2351 if (!bitmap) 2352 return false; 2353 2354 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2355 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2356 if (j == i) 2357 return false; 2358 bytes = (j - i) * ctl->unit; 2359 info->bytes += bytes; 2360 2361 /* See try_merge_free_space() comment. */ 2362 if (!btrfs_free_space_trimmed(bitmap)) 2363 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2364 2365 if (update_stat) 2366 bitmap_clear_bits(ctl, bitmap, end, bytes); 2367 else 2368 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2369 2370 if (!bitmap->bytes) 2371 free_bitmap(ctl, bitmap); 2372 2373 return true; 2374 } 2375 2376 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2377 struct btrfs_free_space *info, 2378 bool update_stat) 2379 { 2380 struct btrfs_free_space *bitmap; 2381 u64 bitmap_offset; 2382 unsigned long i; 2383 unsigned long j; 2384 unsigned long prev_j; 2385 u64 bytes; 2386 2387 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2388 /* If we're on a boundary, try the previous logical bitmap. */ 2389 if (bitmap_offset == info->offset) { 2390 if (info->offset == 0) 2391 return false; 2392 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2393 } 2394 2395 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2396 if (!bitmap) 2397 return false; 2398 2399 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2400 j = 0; 2401 prev_j = (unsigned long)-1; 2402 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2403 if (j > i) 2404 break; 2405 prev_j = j; 2406 } 2407 if (prev_j == i) 2408 return false; 2409 2410 if (prev_j == (unsigned long)-1) 2411 bytes = (i + 1) * ctl->unit; 2412 else 2413 bytes = (i - prev_j) * ctl->unit; 2414 2415 info->offset -= bytes; 2416 info->bytes += bytes; 2417 2418 /* See try_merge_free_space() comment. */ 2419 if (!btrfs_free_space_trimmed(bitmap)) 2420 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2421 2422 if (update_stat) 2423 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2424 else 2425 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2426 2427 if (!bitmap->bytes) 2428 free_bitmap(ctl, bitmap); 2429 2430 return true; 2431 } 2432 2433 /* 2434 * We prefer always to allocate from extent entries, both for clustered and 2435 * non-clustered allocation requests. So when attempting to add a new extent 2436 * entry, try to see if there's adjacent free space in bitmap entries, and if 2437 * there is, migrate that space from the bitmaps to the extent. 2438 * Like this we get better chances of satisfying space allocation requests 2439 * because we attempt to satisfy them based on a single cache entry, and never 2440 * on 2 or more entries - even if the entries represent a contiguous free space 2441 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2442 * ends). 2443 */ 2444 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2445 struct btrfs_free_space *info, 2446 bool update_stat) 2447 { 2448 /* 2449 * Only work with disconnected entries, as we can change their offset, 2450 * and must be extent entries. 2451 */ 2452 ASSERT(!info->bitmap); 2453 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2454 2455 if (ctl->total_bitmaps > 0) { 2456 bool stole_end; 2457 bool stole_front = false; 2458 2459 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2460 if (ctl->total_bitmaps > 0) 2461 stole_front = steal_from_bitmap_to_front(ctl, info, 2462 update_stat); 2463 2464 if (stole_end || stole_front) 2465 try_merge_free_space(ctl, info, update_stat); 2466 } 2467 } 2468 2469 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2470 struct btrfs_free_space_ctl *ctl, 2471 u64 offset, u64 bytes, 2472 enum btrfs_trim_state trim_state) 2473 { 2474 struct btrfs_block_group *block_group = ctl->private; 2475 struct btrfs_free_space *info; 2476 int ret = 0; 2477 u64 filter_bytes = bytes; 2478 2479 ASSERT(!btrfs_is_zoned(fs_info)); 2480 2481 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2482 if (!info) 2483 return -ENOMEM; 2484 2485 info->offset = offset; 2486 info->bytes = bytes; 2487 info->trim_state = trim_state; 2488 RB_CLEAR_NODE(&info->offset_index); 2489 2490 spin_lock(&ctl->tree_lock); 2491 2492 if (try_merge_free_space(ctl, info, true)) 2493 goto link; 2494 2495 /* 2496 * There was no extent directly to the left or right of this new 2497 * extent then we know we're going to have to allocate a new extent, so 2498 * before we do that see if we need to drop this into a bitmap 2499 */ 2500 ret = insert_into_bitmap(ctl, info); 2501 if (ret < 0) { 2502 goto out; 2503 } else if (ret) { 2504 ret = 0; 2505 goto out; 2506 } 2507 link: 2508 /* 2509 * Only steal free space from adjacent bitmaps if we're sure we're not 2510 * going to add the new free space to existing bitmap entries - because 2511 * that would mean unnecessary work that would be reverted. Therefore 2512 * attempt to steal space from bitmaps if we're adding an extent entry. 2513 */ 2514 steal_from_bitmap(ctl, info, true); 2515 2516 filter_bytes = max(filter_bytes, info->bytes); 2517 2518 ret = link_free_space(ctl, info); 2519 if (ret) 2520 kmem_cache_free(btrfs_free_space_cachep, info); 2521 out: 2522 btrfs_discard_update_discardable(block_group); 2523 spin_unlock(&ctl->tree_lock); 2524 2525 if (ret) { 2526 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2527 ASSERT(ret != -EEXIST); 2528 } 2529 2530 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2531 btrfs_discard_check_filter(block_group, filter_bytes); 2532 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2533 } 2534 2535 return ret; 2536 } 2537 2538 static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, 2539 u64 bytenr, u64 size, bool used) 2540 { 2541 struct btrfs_fs_info *fs_info = block_group->fs_info; 2542 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2543 u64 offset = bytenr - block_group->start; 2544 u64 to_free, to_unusable; 2545 const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold); 2546 bool initial = (size == block_group->length); 2547 u64 reclaimable_unusable; 2548 2549 WARN_ON(!initial && offset + size > block_group->zone_capacity); 2550 2551 spin_lock(&ctl->tree_lock); 2552 if (!used) 2553 to_free = size; 2554 else if (initial) 2555 to_free = block_group->zone_capacity; 2556 else if (offset >= block_group->alloc_offset) 2557 to_free = size; 2558 else if (offset + size <= block_group->alloc_offset) 2559 to_free = 0; 2560 else 2561 to_free = offset + size - block_group->alloc_offset; 2562 to_unusable = size - to_free; 2563 2564 ctl->free_space += to_free; 2565 /* 2566 * If the block group is read-only, we should account freed space into 2567 * bytes_readonly. 2568 */ 2569 if (!block_group->ro) 2570 block_group->zone_unusable += to_unusable; 2571 spin_unlock(&ctl->tree_lock); 2572 if (!used) { 2573 spin_lock(&block_group->lock); 2574 block_group->alloc_offset -= size; 2575 spin_unlock(&block_group->lock); 2576 } 2577 2578 reclaimable_unusable = block_group->zone_unusable - 2579 (block_group->length - block_group->zone_capacity); 2580 /* All the region is now unusable. Mark it as unused and reclaim */ 2581 if (block_group->zone_unusable == block_group->length) { 2582 btrfs_mark_bg_unused(block_group); 2583 } else if (bg_reclaim_threshold && 2584 reclaimable_unusable >= 2585 div_factor_fine(block_group->zone_capacity, 2586 bg_reclaim_threshold)) { 2587 btrfs_mark_bg_to_reclaim(block_group); 2588 } 2589 2590 return 0; 2591 } 2592 2593 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2594 u64 bytenr, u64 size) 2595 { 2596 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2597 2598 if (btrfs_is_zoned(block_group->fs_info)) 2599 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2600 true); 2601 2602 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2603 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2604 2605 return __btrfs_add_free_space(block_group->fs_info, 2606 block_group->free_space_ctl, 2607 bytenr, size, trim_state); 2608 } 2609 2610 int btrfs_add_free_space_unused(struct btrfs_block_group *block_group, 2611 u64 bytenr, u64 size) 2612 { 2613 if (btrfs_is_zoned(block_group->fs_info)) 2614 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2615 false); 2616 2617 return btrfs_add_free_space(block_group, bytenr, size); 2618 } 2619 2620 /* 2621 * This is a subtle distinction because when adding free space back in general, 2622 * we want it to be added as untrimmed for async. But in the case where we add 2623 * it on loading of a block group, we want to consider it trimmed. 2624 */ 2625 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2626 u64 bytenr, u64 size) 2627 { 2628 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2629 2630 if (btrfs_is_zoned(block_group->fs_info)) 2631 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2632 true); 2633 2634 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2635 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2636 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2637 2638 return __btrfs_add_free_space(block_group->fs_info, 2639 block_group->free_space_ctl, 2640 bytenr, size, trim_state); 2641 } 2642 2643 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2644 u64 offset, u64 bytes) 2645 { 2646 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2647 struct btrfs_free_space *info; 2648 int ret; 2649 bool re_search = false; 2650 2651 if (btrfs_is_zoned(block_group->fs_info)) { 2652 /* 2653 * This can happen with conventional zones when replaying log. 2654 * Since the allocation info of tree-log nodes are not recorded 2655 * to the extent-tree, calculate_alloc_pointer() failed to 2656 * advance the allocation pointer after last allocated tree log 2657 * node blocks. 2658 * 2659 * This function is called from 2660 * btrfs_pin_extent_for_log_replay() when replaying the log. 2661 * Advance the pointer not to overwrite the tree-log nodes. 2662 */ 2663 if (block_group->start + block_group->alloc_offset < 2664 offset + bytes) { 2665 block_group->alloc_offset = 2666 offset + bytes - block_group->start; 2667 } 2668 return 0; 2669 } 2670 2671 spin_lock(&ctl->tree_lock); 2672 2673 again: 2674 ret = 0; 2675 if (!bytes) 2676 goto out_lock; 2677 2678 info = tree_search_offset(ctl, offset, 0, 0); 2679 if (!info) { 2680 /* 2681 * oops didn't find an extent that matched the space we wanted 2682 * to remove, look for a bitmap instead 2683 */ 2684 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2685 1, 0); 2686 if (!info) { 2687 /* 2688 * If we found a partial bit of our free space in a 2689 * bitmap but then couldn't find the other part this may 2690 * be a problem, so WARN about it. 2691 */ 2692 WARN_ON(re_search); 2693 goto out_lock; 2694 } 2695 } 2696 2697 re_search = false; 2698 if (!info->bitmap) { 2699 unlink_free_space(ctl, info); 2700 if (offset == info->offset) { 2701 u64 to_free = min(bytes, info->bytes); 2702 2703 info->bytes -= to_free; 2704 info->offset += to_free; 2705 if (info->bytes) { 2706 ret = link_free_space(ctl, info); 2707 WARN_ON(ret); 2708 } else { 2709 kmem_cache_free(btrfs_free_space_cachep, info); 2710 } 2711 2712 offset += to_free; 2713 bytes -= to_free; 2714 goto again; 2715 } else { 2716 u64 old_end = info->bytes + info->offset; 2717 2718 info->bytes = offset - info->offset; 2719 ret = link_free_space(ctl, info); 2720 WARN_ON(ret); 2721 if (ret) 2722 goto out_lock; 2723 2724 /* Not enough bytes in this entry to satisfy us */ 2725 if (old_end < offset + bytes) { 2726 bytes -= old_end - offset; 2727 offset = old_end; 2728 goto again; 2729 } else if (old_end == offset + bytes) { 2730 /* all done */ 2731 goto out_lock; 2732 } 2733 spin_unlock(&ctl->tree_lock); 2734 2735 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2736 offset + bytes, 2737 old_end - (offset + bytes), 2738 info->trim_state); 2739 WARN_ON(ret); 2740 goto out; 2741 } 2742 } 2743 2744 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2745 if (ret == -EAGAIN) { 2746 re_search = true; 2747 goto again; 2748 } 2749 out_lock: 2750 btrfs_discard_update_discardable(block_group); 2751 spin_unlock(&ctl->tree_lock); 2752 out: 2753 return ret; 2754 } 2755 2756 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2757 u64 bytes) 2758 { 2759 struct btrfs_fs_info *fs_info = block_group->fs_info; 2760 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2761 struct btrfs_free_space *info; 2762 struct rb_node *n; 2763 int count = 0; 2764 2765 /* 2766 * Zoned btrfs does not use free space tree and cluster. Just print 2767 * out the free space after the allocation offset. 2768 */ 2769 if (btrfs_is_zoned(fs_info)) { 2770 btrfs_info(fs_info, "free space %llu active %d", 2771 block_group->zone_capacity - block_group->alloc_offset, 2772 block_group->zone_is_active); 2773 return; 2774 } 2775 2776 spin_lock(&ctl->tree_lock); 2777 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2778 info = rb_entry(n, struct btrfs_free_space, offset_index); 2779 if (info->bytes >= bytes && !block_group->ro) 2780 count++; 2781 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2782 info->offset, info->bytes, 2783 (info->bitmap) ? "yes" : "no"); 2784 } 2785 spin_unlock(&ctl->tree_lock); 2786 btrfs_info(fs_info, "block group has cluster?: %s", 2787 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2788 btrfs_info(fs_info, 2789 "%d blocks of free space at or bigger than bytes is", count); 2790 } 2791 2792 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, 2793 struct btrfs_free_space_ctl *ctl) 2794 { 2795 struct btrfs_fs_info *fs_info = block_group->fs_info; 2796 2797 spin_lock_init(&ctl->tree_lock); 2798 ctl->unit = fs_info->sectorsize; 2799 ctl->start = block_group->start; 2800 ctl->private = block_group; 2801 ctl->op = &free_space_op; 2802 INIT_LIST_HEAD(&ctl->trimming_ranges); 2803 mutex_init(&ctl->cache_writeout_mutex); 2804 2805 /* 2806 * we only want to have 32k of ram per block group for keeping 2807 * track of free space, and if we pass 1/2 of that we want to 2808 * start converting things over to using bitmaps 2809 */ 2810 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2811 } 2812 2813 /* 2814 * for a given cluster, put all of its extents back into the free 2815 * space cache. If the block group passed doesn't match the block group 2816 * pointed to by the cluster, someone else raced in and freed the 2817 * cluster already. In that case, we just return without changing anything 2818 */ 2819 static void __btrfs_return_cluster_to_free_space( 2820 struct btrfs_block_group *block_group, 2821 struct btrfs_free_cluster *cluster) 2822 { 2823 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2824 struct btrfs_free_space *entry; 2825 struct rb_node *node; 2826 2827 spin_lock(&cluster->lock); 2828 if (cluster->block_group != block_group) { 2829 spin_unlock(&cluster->lock); 2830 return; 2831 } 2832 2833 cluster->block_group = NULL; 2834 cluster->window_start = 0; 2835 list_del_init(&cluster->block_group_list); 2836 2837 node = rb_first(&cluster->root); 2838 while (node) { 2839 bool bitmap; 2840 2841 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2842 node = rb_next(&entry->offset_index); 2843 rb_erase(&entry->offset_index, &cluster->root); 2844 RB_CLEAR_NODE(&entry->offset_index); 2845 2846 bitmap = (entry->bitmap != NULL); 2847 if (!bitmap) { 2848 /* Merging treats extents as if they were new */ 2849 if (!btrfs_free_space_trimmed(entry)) { 2850 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2851 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2852 entry->bytes; 2853 } 2854 2855 try_merge_free_space(ctl, entry, false); 2856 steal_from_bitmap(ctl, entry, false); 2857 2858 /* As we insert directly, update these statistics */ 2859 if (!btrfs_free_space_trimmed(entry)) { 2860 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2861 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2862 entry->bytes; 2863 } 2864 } 2865 tree_insert_offset(&ctl->free_space_offset, 2866 entry->offset, &entry->offset_index, bitmap); 2867 } 2868 cluster->root = RB_ROOT; 2869 spin_unlock(&cluster->lock); 2870 btrfs_put_block_group(block_group); 2871 } 2872 2873 static void __btrfs_remove_free_space_cache_locked( 2874 struct btrfs_free_space_ctl *ctl) 2875 { 2876 struct btrfs_free_space *info; 2877 struct rb_node *node; 2878 2879 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2880 info = rb_entry(node, struct btrfs_free_space, offset_index); 2881 if (!info->bitmap) { 2882 unlink_free_space(ctl, info); 2883 kmem_cache_free(btrfs_free_space_cachep, info); 2884 } else { 2885 free_bitmap(ctl, info); 2886 } 2887 2888 cond_resched_lock(&ctl->tree_lock); 2889 } 2890 } 2891 2892 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2893 { 2894 spin_lock(&ctl->tree_lock); 2895 __btrfs_remove_free_space_cache_locked(ctl); 2896 if (ctl->private) 2897 btrfs_discard_update_discardable(ctl->private); 2898 spin_unlock(&ctl->tree_lock); 2899 } 2900 2901 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2902 { 2903 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2904 struct btrfs_free_cluster *cluster; 2905 struct list_head *head; 2906 2907 spin_lock(&ctl->tree_lock); 2908 while ((head = block_group->cluster_list.next) != 2909 &block_group->cluster_list) { 2910 cluster = list_entry(head, struct btrfs_free_cluster, 2911 block_group_list); 2912 2913 WARN_ON(cluster->block_group != block_group); 2914 __btrfs_return_cluster_to_free_space(block_group, cluster); 2915 2916 cond_resched_lock(&ctl->tree_lock); 2917 } 2918 __btrfs_remove_free_space_cache_locked(ctl); 2919 btrfs_discard_update_discardable(block_group); 2920 spin_unlock(&ctl->tree_lock); 2921 2922 } 2923 2924 /** 2925 * btrfs_is_free_space_trimmed - see if everything is trimmed 2926 * @block_group: block_group of interest 2927 * 2928 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2929 */ 2930 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2931 { 2932 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2933 struct btrfs_free_space *info; 2934 struct rb_node *node; 2935 bool ret = true; 2936 2937 spin_lock(&ctl->tree_lock); 2938 node = rb_first(&ctl->free_space_offset); 2939 2940 while (node) { 2941 info = rb_entry(node, struct btrfs_free_space, offset_index); 2942 2943 if (!btrfs_free_space_trimmed(info)) { 2944 ret = false; 2945 break; 2946 } 2947 2948 node = rb_next(node); 2949 } 2950 2951 spin_unlock(&ctl->tree_lock); 2952 return ret; 2953 } 2954 2955 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2956 u64 offset, u64 bytes, u64 empty_size, 2957 u64 *max_extent_size) 2958 { 2959 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2960 struct btrfs_discard_ctl *discard_ctl = 2961 &block_group->fs_info->discard_ctl; 2962 struct btrfs_free_space *entry = NULL; 2963 u64 bytes_search = bytes + empty_size; 2964 u64 ret = 0; 2965 u64 align_gap = 0; 2966 u64 align_gap_len = 0; 2967 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2968 2969 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 2970 2971 spin_lock(&ctl->tree_lock); 2972 entry = find_free_space(ctl, &offset, &bytes_search, 2973 block_group->full_stripe_len, max_extent_size); 2974 if (!entry) 2975 goto out; 2976 2977 ret = offset; 2978 if (entry->bitmap) { 2979 bitmap_clear_bits(ctl, entry, offset, bytes); 2980 2981 if (!btrfs_free_space_trimmed(entry)) 2982 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2983 2984 if (!entry->bytes) 2985 free_bitmap(ctl, entry); 2986 } else { 2987 unlink_free_space(ctl, entry); 2988 align_gap_len = offset - entry->offset; 2989 align_gap = entry->offset; 2990 align_gap_trim_state = entry->trim_state; 2991 2992 if (!btrfs_free_space_trimmed(entry)) 2993 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2994 2995 entry->offset = offset + bytes; 2996 WARN_ON(entry->bytes < bytes + align_gap_len); 2997 2998 entry->bytes -= bytes + align_gap_len; 2999 if (!entry->bytes) 3000 kmem_cache_free(btrfs_free_space_cachep, entry); 3001 else 3002 link_free_space(ctl, entry); 3003 } 3004 out: 3005 btrfs_discard_update_discardable(block_group); 3006 spin_unlock(&ctl->tree_lock); 3007 3008 if (align_gap_len) 3009 __btrfs_add_free_space(block_group->fs_info, ctl, 3010 align_gap, align_gap_len, 3011 align_gap_trim_state); 3012 return ret; 3013 } 3014 3015 /* 3016 * given a cluster, put all of its extents back into the free space 3017 * cache. If a block group is passed, this function will only free 3018 * a cluster that belongs to the passed block group. 3019 * 3020 * Otherwise, it'll get a reference on the block group pointed to by the 3021 * cluster and remove the cluster from it. 3022 */ 3023 void btrfs_return_cluster_to_free_space( 3024 struct btrfs_block_group *block_group, 3025 struct btrfs_free_cluster *cluster) 3026 { 3027 struct btrfs_free_space_ctl *ctl; 3028 3029 /* first, get a safe pointer to the block group */ 3030 spin_lock(&cluster->lock); 3031 if (!block_group) { 3032 block_group = cluster->block_group; 3033 if (!block_group) { 3034 spin_unlock(&cluster->lock); 3035 return; 3036 } 3037 } else if (cluster->block_group != block_group) { 3038 /* someone else has already freed it don't redo their work */ 3039 spin_unlock(&cluster->lock); 3040 return; 3041 } 3042 btrfs_get_block_group(block_group); 3043 spin_unlock(&cluster->lock); 3044 3045 ctl = block_group->free_space_ctl; 3046 3047 /* now return any extents the cluster had on it */ 3048 spin_lock(&ctl->tree_lock); 3049 __btrfs_return_cluster_to_free_space(block_group, cluster); 3050 spin_unlock(&ctl->tree_lock); 3051 3052 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 3053 3054 /* finally drop our ref */ 3055 btrfs_put_block_group(block_group); 3056 } 3057 3058 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 3059 struct btrfs_free_cluster *cluster, 3060 struct btrfs_free_space *entry, 3061 u64 bytes, u64 min_start, 3062 u64 *max_extent_size) 3063 { 3064 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3065 int err; 3066 u64 search_start = cluster->window_start; 3067 u64 search_bytes = bytes; 3068 u64 ret = 0; 3069 3070 search_start = min_start; 3071 search_bytes = bytes; 3072 3073 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 3074 if (err) { 3075 *max_extent_size = max(get_max_extent_size(entry), 3076 *max_extent_size); 3077 return 0; 3078 } 3079 3080 ret = search_start; 3081 __bitmap_clear_bits(ctl, entry, ret, bytes); 3082 3083 return ret; 3084 } 3085 3086 /* 3087 * given a cluster, try to allocate 'bytes' from it, returns 0 3088 * if it couldn't find anything suitably large, or a logical disk offset 3089 * if things worked out 3090 */ 3091 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 3092 struct btrfs_free_cluster *cluster, u64 bytes, 3093 u64 min_start, u64 *max_extent_size) 3094 { 3095 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3096 struct btrfs_discard_ctl *discard_ctl = 3097 &block_group->fs_info->discard_ctl; 3098 struct btrfs_free_space *entry = NULL; 3099 struct rb_node *node; 3100 u64 ret = 0; 3101 3102 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3103 3104 spin_lock(&cluster->lock); 3105 if (bytes > cluster->max_size) 3106 goto out; 3107 3108 if (cluster->block_group != block_group) 3109 goto out; 3110 3111 node = rb_first(&cluster->root); 3112 if (!node) 3113 goto out; 3114 3115 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3116 while (1) { 3117 if (entry->bytes < bytes) 3118 *max_extent_size = max(get_max_extent_size(entry), 3119 *max_extent_size); 3120 3121 if (entry->bytes < bytes || 3122 (!entry->bitmap && entry->offset < min_start)) { 3123 node = rb_next(&entry->offset_index); 3124 if (!node) 3125 break; 3126 entry = rb_entry(node, struct btrfs_free_space, 3127 offset_index); 3128 continue; 3129 } 3130 3131 if (entry->bitmap) { 3132 ret = btrfs_alloc_from_bitmap(block_group, 3133 cluster, entry, bytes, 3134 cluster->window_start, 3135 max_extent_size); 3136 if (ret == 0) { 3137 node = rb_next(&entry->offset_index); 3138 if (!node) 3139 break; 3140 entry = rb_entry(node, struct btrfs_free_space, 3141 offset_index); 3142 continue; 3143 } 3144 cluster->window_start += bytes; 3145 } else { 3146 ret = entry->offset; 3147 3148 entry->offset += bytes; 3149 entry->bytes -= bytes; 3150 } 3151 3152 break; 3153 } 3154 out: 3155 spin_unlock(&cluster->lock); 3156 3157 if (!ret) 3158 return 0; 3159 3160 spin_lock(&ctl->tree_lock); 3161 3162 if (!btrfs_free_space_trimmed(entry)) 3163 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3164 3165 ctl->free_space -= bytes; 3166 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3167 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3168 3169 spin_lock(&cluster->lock); 3170 if (entry->bytes == 0) { 3171 rb_erase(&entry->offset_index, &cluster->root); 3172 ctl->free_extents--; 3173 if (entry->bitmap) { 3174 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3175 entry->bitmap); 3176 ctl->total_bitmaps--; 3177 recalculate_thresholds(ctl); 3178 } else if (!btrfs_free_space_trimmed(entry)) { 3179 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3180 } 3181 kmem_cache_free(btrfs_free_space_cachep, entry); 3182 } 3183 3184 spin_unlock(&cluster->lock); 3185 spin_unlock(&ctl->tree_lock); 3186 3187 return ret; 3188 } 3189 3190 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3191 struct btrfs_free_space *entry, 3192 struct btrfs_free_cluster *cluster, 3193 u64 offset, u64 bytes, 3194 u64 cont1_bytes, u64 min_bytes) 3195 { 3196 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3197 unsigned long next_zero; 3198 unsigned long i; 3199 unsigned long want_bits; 3200 unsigned long min_bits; 3201 unsigned long found_bits; 3202 unsigned long max_bits = 0; 3203 unsigned long start = 0; 3204 unsigned long total_found = 0; 3205 int ret; 3206 3207 i = offset_to_bit(entry->offset, ctl->unit, 3208 max_t(u64, offset, entry->offset)); 3209 want_bits = bytes_to_bits(bytes, ctl->unit); 3210 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3211 3212 /* 3213 * Don't bother looking for a cluster in this bitmap if it's heavily 3214 * fragmented. 3215 */ 3216 if (entry->max_extent_size && 3217 entry->max_extent_size < cont1_bytes) 3218 return -ENOSPC; 3219 again: 3220 found_bits = 0; 3221 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3222 next_zero = find_next_zero_bit(entry->bitmap, 3223 BITS_PER_BITMAP, i); 3224 if (next_zero - i >= min_bits) { 3225 found_bits = next_zero - i; 3226 if (found_bits > max_bits) 3227 max_bits = found_bits; 3228 break; 3229 } 3230 if (next_zero - i > max_bits) 3231 max_bits = next_zero - i; 3232 i = next_zero; 3233 } 3234 3235 if (!found_bits) { 3236 entry->max_extent_size = (u64)max_bits * ctl->unit; 3237 return -ENOSPC; 3238 } 3239 3240 if (!total_found) { 3241 start = i; 3242 cluster->max_size = 0; 3243 } 3244 3245 total_found += found_bits; 3246 3247 if (cluster->max_size < found_bits * ctl->unit) 3248 cluster->max_size = found_bits * ctl->unit; 3249 3250 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3251 i = next_zero + 1; 3252 goto again; 3253 } 3254 3255 cluster->window_start = start * ctl->unit + entry->offset; 3256 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3257 ret = tree_insert_offset(&cluster->root, entry->offset, 3258 &entry->offset_index, 1); 3259 ASSERT(!ret); /* -EEXIST; Logic error */ 3260 3261 trace_btrfs_setup_cluster(block_group, cluster, 3262 total_found * ctl->unit, 1); 3263 return 0; 3264 } 3265 3266 /* 3267 * This searches the block group for just extents to fill the cluster with. 3268 * Try to find a cluster with at least bytes total bytes, at least one 3269 * extent of cont1_bytes, and other clusters of at least min_bytes. 3270 */ 3271 static noinline int 3272 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3273 struct btrfs_free_cluster *cluster, 3274 struct list_head *bitmaps, u64 offset, u64 bytes, 3275 u64 cont1_bytes, u64 min_bytes) 3276 { 3277 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3278 struct btrfs_free_space *first = NULL; 3279 struct btrfs_free_space *entry = NULL; 3280 struct btrfs_free_space *last; 3281 struct rb_node *node; 3282 u64 window_free; 3283 u64 max_extent; 3284 u64 total_size = 0; 3285 3286 entry = tree_search_offset(ctl, offset, 0, 1); 3287 if (!entry) 3288 return -ENOSPC; 3289 3290 /* 3291 * We don't want bitmaps, so just move along until we find a normal 3292 * extent entry. 3293 */ 3294 while (entry->bitmap || entry->bytes < min_bytes) { 3295 if (entry->bitmap && list_empty(&entry->list)) 3296 list_add_tail(&entry->list, bitmaps); 3297 node = rb_next(&entry->offset_index); 3298 if (!node) 3299 return -ENOSPC; 3300 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3301 } 3302 3303 window_free = entry->bytes; 3304 max_extent = entry->bytes; 3305 first = entry; 3306 last = entry; 3307 3308 for (node = rb_next(&entry->offset_index); node; 3309 node = rb_next(&entry->offset_index)) { 3310 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3311 3312 if (entry->bitmap) { 3313 if (list_empty(&entry->list)) 3314 list_add_tail(&entry->list, bitmaps); 3315 continue; 3316 } 3317 3318 if (entry->bytes < min_bytes) 3319 continue; 3320 3321 last = entry; 3322 window_free += entry->bytes; 3323 if (entry->bytes > max_extent) 3324 max_extent = entry->bytes; 3325 } 3326 3327 if (window_free < bytes || max_extent < cont1_bytes) 3328 return -ENOSPC; 3329 3330 cluster->window_start = first->offset; 3331 3332 node = &first->offset_index; 3333 3334 /* 3335 * now we've found our entries, pull them out of the free space 3336 * cache and put them into the cluster rbtree 3337 */ 3338 do { 3339 int ret; 3340 3341 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3342 node = rb_next(&entry->offset_index); 3343 if (entry->bitmap || entry->bytes < min_bytes) 3344 continue; 3345 3346 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3347 ret = tree_insert_offset(&cluster->root, entry->offset, 3348 &entry->offset_index, 0); 3349 total_size += entry->bytes; 3350 ASSERT(!ret); /* -EEXIST; Logic error */ 3351 } while (node && entry != last); 3352 3353 cluster->max_size = max_extent; 3354 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3355 return 0; 3356 } 3357 3358 /* 3359 * This specifically looks for bitmaps that may work in the cluster, we assume 3360 * that we have already failed to find extents that will work. 3361 */ 3362 static noinline int 3363 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3364 struct btrfs_free_cluster *cluster, 3365 struct list_head *bitmaps, u64 offset, u64 bytes, 3366 u64 cont1_bytes, u64 min_bytes) 3367 { 3368 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3369 struct btrfs_free_space *entry = NULL; 3370 int ret = -ENOSPC; 3371 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3372 3373 if (ctl->total_bitmaps == 0) 3374 return -ENOSPC; 3375 3376 /* 3377 * The bitmap that covers offset won't be in the list unless offset 3378 * is just its start offset. 3379 */ 3380 if (!list_empty(bitmaps)) 3381 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3382 3383 if (!entry || entry->offset != bitmap_offset) { 3384 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3385 if (entry && list_empty(&entry->list)) 3386 list_add(&entry->list, bitmaps); 3387 } 3388 3389 list_for_each_entry(entry, bitmaps, list) { 3390 if (entry->bytes < bytes) 3391 continue; 3392 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3393 bytes, cont1_bytes, min_bytes); 3394 if (!ret) 3395 return 0; 3396 } 3397 3398 /* 3399 * The bitmaps list has all the bitmaps that record free space 3400 * starting after offset, so no more search is required. 3401 */ 3402 return -ENOSPC; 3403 } 3404 3405 /* 3406 * here we try to find a cluster of blocks in a block group. The goal 3407 * is to find at least bytes+empty_size. 3408 * We might not find them all in one contiguous area. 3409 * 3410 * returns zero and sets up cluster if things worked out, otherwise 3411 * it returns -enospc 3412 */ 3413 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3414 struct btrfs_free_cluster *cluster, 3415 u64 offset, u64 bytes, u64 empty_size) 3416 { 3417 struct btrfs_fs_info *fs_info = block_group->fs_info; 3418 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3419 struct btrfs_free_space *entry, *tmp; 3420 LIST_HEAD(bitmaps); 3421 u64 min_bytes; 3422 u64 cont1_bytes; 3423 int ret; 3424 3425 /* 3426 * Choose the minimum extent size we'll require for this 3427 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3428 * For metadata, allow allocates with smaller extents. For 3429 * data, keep it dense. 3430 */ 3431 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3432 cont1_bytes = min_bytes = bytes + empty_size; 3433 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3434 cont1_bytes = bytes; 3435 min_bytes = fs_info->sectorsize; 3436 } else { 3437 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3438 min_bytes = fs_info->sectorsize; 3439 } 3440 3441 spin_lock(&ctl->tree_lock); 3442 3443 /* 3444 * If we know we don't have enough space to make a cluster don't even 3445 * bother doing all the work to try and find one. 3446 */ 3447 if (ctl->free_space < bytes) { 3448 spin_unlock(&ctl->tree_lock); 3449 return -ENOSPC; 3450 } 3451 3452 spin_lock(&cluster->lock); 3453 3454 /* someone already found a cluster, hooray */ 3455 if (cluster->block_group) { 3456 ret = 0; 3457 goto out; 3458 } 3459 3460 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3461 min_bytes); 3462 3463 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3464 bytes + empty_size, 3465 cont1_bytes, min_bytes); 3466 if (ret) 3467 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3468 offset, bytes + empty_size, 3469 cont1_bytes, min_bytes); 3470 3471 /* Clear our temporary list */ 3472 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3473 list_del_init(&entry->list); 3474 3475 if (!ret) { 3476 btrfs_get_block_group(block_group); 3477 list_add_tail(&cluster->block_group_list, 3478 &block_group->cluster_list); 3479 cluster->block_group = block_group; 3480 } else { 3481 trace_btrfs_failed_cluster_setup(block_group); 3482 } 3483 out: 3484 spin_unlock(&cluster->lock); 3485 spin_unlock(&ctl->tree_lock); 3486 3487 return ret; 3488 } 3489 3490 /* 3491 * simple code to zero out a cluster 3492 */ 3493 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3494 { 3495 spin_lock_init(&cluster->lock); 3496 spin_lock_init(&cluster->refill_lock); 3497 cluster->root = RB_ROOT; 3498 cluster->max_size = 0; 3499 cluster->fragmented = false; 3500 INIT_LIST_HEAD(&cluster->block_group_list); 3501 cluster->block_group = NULL; 3502 } 3503 3504 static int do_trimming(struct btrfs_block_group *block_group, 3505 u64 *total_trimmed, u64 start, u64 bytes, 3506 u64 reserved_start, u64 reserved_bytes, 3507 enum btrfs_trim_state reserved_trim_state, 3508 struct btrfs_trim_range *trim_entry) 3509 { 3510 struct btrfs_space_info *space_info = block_group->space_info; 3511 struct btrfs_fs_info *fs_info = block_group->fs_info; 3512 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3513 int ret; 3514 int update = 0; 3515 const u64 end = start + bytes; 3516 const u64 reserved_end = reserved_start + reserved_bytes; 3517 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3518 u64 trimmed = 0; 3519 3520 spin_lock(&space_info->lock); 3521 spin_lock(&block_group->lock); 3522 if (!block_group->ro) { 3523 block_group->reserved += reserved_bytes; 3524 space_info->bytes_reserved += reserved_bytes; 3525 update = 1; 3526 } 3527 spin_unlock(&block_group->lock); 3528 spin_unlock(&space_info->lock); 3529 3530 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3531 if (!ret) { 3532 *total_trimmed += trimmed; 3533 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3534 } 3535 3536 mutex_lock(&ctl->cache_writeout_mutex); 3537 if (reserved_start < start) 3538 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3539 start - reserved_start, 3540 reserved_trim_state); 3541 if (start + bytes < reserved_start + reserved_bytes) 3542 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3543 reserved_trim_state); 3544 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3545 list_del(&trim_entry->list); 3546 mutex_unlock(&ctl->cache_writeout_mutex); 3547 3548 if (update) { 3549 spin_lock(&space_info->lock); 3550 spin_lock(&block_group->lock); 3551 if (block_group->ro) 3552 space_info->bytes_readonly += reserved_bytes; 3553 block_group->reserved -= reserved_bytes; 3554 space_info->bytes_reserved -= reserved_bytes; 3555 spin_unlock(&block_group->lock); 3556 spin_unlock(&space_info->lock); 3557 } 3558 3559 return ret; 3560 } 3561 3562 /* 3563 * If @async is set, then we will trim 1 region and return. 3564 */ 3565 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3566 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3567 bool async) 3568 { 3569 struct btrfs_discard_ctl *discard_ctl = 3570 &block_group->fs_info->discard_ctl; 3571 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3572 struct btrfs_free_space *entry; 3573 struct rb_node *node; 3574 int ret = 0; 3575 u64 extent_start; 3576 u64 extent_bytes; 3577 enum btrfs_trim_state extent_trim_state; 3578 u64 bytes; 3579 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3580 3581 while (start < end) { 3582 struct btrfs_trim_range trim_entry; 3583 3584 mutex_lock(&ctl->cache_writeout_mutex); 3585 spin_lock(&ctl->tree_lock); 3586 3587 if (ctl->free_space < minlen) 3588 goto out_unlock; 3589 3590 entry = tree_search_offset(ctl, start, 0, 1); 3591 if (!entry) 3592 goto out_unlock; 3593 3594 /* Skip bitmaps and if async, already trimmed entries */ 3595 while (entry->bitmap || 3596 (async && btrfs_free_space_trimmed(entry))) { 3597 node = rb_next(&entry->offset_index); 3598 if (!node) 3599 goto out_unlock; 3600 entry = rb_entry(node, struct btrfs_free_space, 3601 offset_index); 3602 } 3603 3604 if (entry->offset >= end) 3605 goto out_unlock; 3606 3607 extent_start = entry->offset; 3608 extent_bytes = entry->bytes; 3609 extent_trim_state = entry->trim_state; 3610 if (async) { 3611 start = entry->offset; 3612 bytes = entry->bytes; 3613 if (bytes < minlen) { 3614 spin_unlock(&ctl->tree_lock); 3615 mutex_unlock(&ctl->cache_writeout_mutex); 3616 goto next; 3617 } 3618 unlink_free_space(ctl, entry); 3619 /* 3620 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3621 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3622 * X when we come back around. So trim it now. 3623 */ 3624 if (max_discard_size && 3625 bytes >= (max_discard_size + 3626 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3627 bytes = max_discard_size; 3628 extent_bytes = max_discard_size; 3629 entry->offset += max_discard_size; 3630 entry->bytes -= max_discard_size; 3631 link_free_space(ctl, entry); 3632 } else { 3633 kmem_cache_free(btrfs_free_space_cachep, entry); 3634 } 3635 } else { 3636 start = max(start, extent_start); 3637 bytes = min(extent_start + extent_bytes, end) - start; 3638 if (bytes < minlen) { 3639 spin_unlock(&ctl->tree_lock); 3640 mutex_unlock(&ctl->cache_writeout_mutex); 3641 goto next; 3642 } 3643 3644 unlink_free_space(ctl, entry); 3645 kmem_cache_free(btrfs_free_space_cachep, entry); 3646 } 3647 3648 spin_unlock(&ctl->tree_lock); 3649 trim_entry.start = extent_start; 3650 trim_entry.bytes = extent_bytes; 3651 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3652 mutex_unlock(&ctl->cache_writeout_mutex); 3653 3654 ret = do_trimming(block_group, total_trimmed, start, bytes, 3655 extent_start, extent_bytes, extent_trim_state, 3656 &trim_entry); 3657 if (ret) { 3658 block_group->discard_cursor = start + bytes; 3659 break; 3660 } 3661 next: 3662 start += bytes; 3663 block_group->discard_cursor = start; 3664 if (async && *total_trimmed) 3665 break; 3666 3667 if (fatal_signal_pending(current)) { 3668 ret = -ERESTARTSYS; 3669 break; 3670 } 3671 3672 cond_resched(); 3673 } 3674 3675 return ret; 3676 3677 out_unlock: 3678 block_group->discard_cursor = btrfs_block_group_end(block_group); 3679 spin_unlock(&ctl->tree_lock); 3680 mutex_unlock(&ctl->cache_writeout_mutex); 3681 3682 return ret; 3683 } 3684 3685 /* 3686 * If we break out of trimming a bitmap prematurely, we should reset the 3687 * trimming bit. In a rather contrieved case, it's possible to race here so 3688 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3689 * 3690 * start = start of bitmap 3691 * end = near end of bitmap 3692 * 3693 * Thread 1: Thread 2: 3694 * trim_bitmaps(start) 3695 * trim_bitmaps(end) 3696 * end_trimming_bitmap() 3697 * reset_trimming_bitmap() 3698 */ 3699 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3700 { 3701 struct btrfs_free_space *entry; 3702 3703 spin_lock(&ctl->tree_lock); 3704 entry = tree_search_offset(ctl, offset, 1, 0); 3705 if (entry) { 3706 if (btrfs_free_space_trimmed(entry)) { 3707 ctl->discardable_extents[BTRFS_STAT_CURR] += 3708 entry->bitmap_extents; 3709 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3710 } 3711 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3712 } 3713 3714 spin_unlock(&ctl->tree_lock); 3715 } 3716 3717 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3718 struct btrfs_free_space *entry) 3719 { 3720 if (btrfs_free_space_trimming_bitmap(entry)) { 3721 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3722 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3723 entry->bitmap_extents; 3724 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3725 } 3726 } 3727 3728 /* 3729 * If @async is set, then we will trim 1 region and return. 3730 */ 3731 static int trim_bitmaps(struct btrfs_block_group *block_group, 3732 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3733 u64 maxlen, bool async) 3734 { 3735 struct btrfs_discard_ctl *discard_ctl = 3736 &block_group->fs_info->discard_ctl; 3737 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3738 struct btrfs_free_space *entry; 3739 int ret = 0; 3740 int ret2; 3741 u64 bytes; 3742 u64 offset = offset_to_bitmap(ctl, start); 3743 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3744 3745 while (offset < end) { 3746 bool next_bitmap = false; 3747 struct btrfs_trim_range trim_entry; 3748 3749 mutex_lock(&ctl->cache_writeout_mutex); 3750 spin_lock(&ctl->tree_lock); 3751 3752 if (ctl->free_space < minlen) { 3753 block_group->discard_cursor = 3754 btrfs_block_group_end(block_group); 3755 spin_unlock(&ctl->tree_lock); 3756 mutex_unlock(&ctl->cache_writeout_mutex); 3757 break; 3758 } 3759 3760 entry = tree_search_offset(ctl, offset, 1, 0); 3761 /* 3762 * Bitmaps are marked trimmed lossily now to prevent constant 3763 * discarding of the same bitmap (the reason why we are bound 3764 * by the filters). So, retrim the block group bitmaps when we 3765 * are preparing to punt to the unused_bgs list. This uses 3766 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3767 * which is the only discard index which sets minlen to 0. 3768 */ 3769 if (!entry || (async && minlen && start == offset && 3770 btrfs_free_space_trimmed(entry))) { 3771 spin_unlock(&ctl->tree_lock); 3772 mutex_unlock(&ctl->cache_writeout_mutex); 3773 next_bitmap = true; 3774 goto next; 3775 } 3776 3777 /* 3778 * Async discard bitmap trimming begins at by setting the start 3779 * to be key.objectid and the offset_to_bitmap() aligns to the 3780 * start of the bitmap. This lets us know we are fully 3781 * scanning the bitmap rather than only some portion of it. 3782 */ 3783 if (start == offset) 3784 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3785 3786 bytes = minlen; 3787 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3788 if (ret2 || start >= end) { 3789 /* 3790 * We lossily consider a bitmap trimmed if we only skip 3791 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3792 */ 3793 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3794 end_trimming_bitmap(ctl, entry); 3795 else 3796 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3797 spin_unlock(&ctl->tree_lock); 3798 mutex_unlock(&ctl->cache_writeout_mutex); 3799 next_bitmap = true; 3800 goto next; 3801 } 3802 3803 /* 3804 * We already trimmed a region, but are using the locking above 3805 * to reset the trim_state. 3806 */ 3807 if (async && *total_trimmed) { 3808 spin_unlock(&ctl->tree_lock); 3809 mutex_unlock(&ctl->cache_writeout_mutex); 3810 goto out; 3811 } 3812 3813 bytes = min(bytes, end - start); 3814 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3815 spin_unlock(&ctl->tree_lock); 3816 mutex_unlock(&ctl->cache_writeout_mutex); 3817 goto next; 3818 } 3819 3820 /* 3821 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3822 * If X < @minlen, we won't trim X when we come back around. 3823 * So trim it now. We differ here from trimming extents as we 3824 * don't keep individual state per bit. 3825 */ 3826 if (async && 3827 max_discard_size && 3828 bytes > (max_discard_size + minlen)) 3829 bytes = max_discard_size; 3830 3831 bitmap_clear_bits(ctl, entry, start, bytes); 3832 if (entry->bytes == 0) 3833 free_bitmap(ctl, entry); 3834 3835 spin_unlock(&ctl->tree_lock); 3836 trim_entry.start = start; 3837 trim_entry.bytes = bytes; 3838 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3839 mutex_unlock(&ctl->cache_writeout_mutex); 3840 3841 ret = do_trimming(block_group, total_trimmed, start, bytes, 3842 start, bytes, 0, &trim_entry); 3843 if (ret) { 3844 reset_trimming_bitmap(ctl, offset); 3845 block_group->discard_cursor = 3846 btrfs_block_group_end(block_group); 3847 break; 3848 } 3849 next: 3850 if (next_bitmap) { 3851 offset += BITS_PER_BITMAP * ctl->unit; 3852 start = offset; 3853 } else { 3854 start += bytes; 3855 } 3856 block_group->discard_cursor = start; 3857 3858 if (fatal_signal_pending(current)) { 3859 if (start != offset) 3860 reset_trimming_bitmap(ctl, offset); 3861 ret = -ERESTARTSYS; 3862 break; 3863 } 3864 3865 cond_resched(); 3866 } 3867 3868 if (offset >= end) 3869 block_group->discard_cursor = end; 3870 3871 out: 3872 return ret; 3873 } 3874 3875 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3876 u64 *trimmed, u64 start, u64 end, u64 minlen) 3877 { 3878 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3879 int ret; 3880 u64 rem = 0; 3881 3882 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3883 3884 *trimmed = 0; 3885 3886 spin_lock(&block_group->lock); 3887 if (block_group->removed) { 3888 spin_unlock(&block_group->lock); 3889 return 0; 3890 } 3891 btrfs_freeze_block_group(block_group); 3892 spin_unlock(&block_group->lock); 3893 3894 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3895 if (ret) 3896 goto out; 3897 3898 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3899 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3900 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3901 if (rem) 3902 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3903 out: 3904 btrfs_unfreeze_block_group(block_group); 3905 return ret; 3906 } 3907 3908 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3909 u64 *trimmed, u64 start, u64 end, u64 minlen, 3910 bool async) 3911 { 3912 int ret; 3913 3914 *trimmed = 0; 3915 3916 spin_lock(&block_group->lock); 3917 if (block_group->removed) { 3918 spin_unlock(&block_group->lock); 3919 return 0; 3920 } 3921 btrfs_freeze_block_group(block_group); 3922 spin_unlock(&block_group->lock); 3923 3924 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3925 btrfs_unfreeze_block_group(block_group); 3926 3927 return ret; 3928 } 3929 3930 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3931 u64 *trimmed, u64 start, u64 end, u64 minlen, 3932 u64 maxlen, bool async) 3933 { 3934 int ret; 3935 3936 *trimmed = 0; 3937 3938 spin_lock(&block_group->lock); 3939 if (block_group->removed) { 3940 spin_unlock(&block_group->lock); 3941 return 0; 3942 } 3943 btrfs_freeze_block_group(block_group); 3944 spin_unlock(&block_group->lock); 3945 3946 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3947 async); 3948 3949 btrfs_unfreeze_block_group(block_group); 3950 3951 return ret; 3952 } 3953 3954 bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info) 3955 { 3956 return btrfs_super_cache_generation(fs_info->super_copy); 3957 } 3958 3959 static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, 3960 struct btrfs_trans_handle *trans) 3961 { 3962 struct btrfs_block_group *block_group; 3963 struct rb_node *node; 3964 int ret = 0; 3965 3966 btrfs_info(fs_info, "cleaning free space cache v1"); 3967 3968 node = rb_first(&fs_info->block_group_cache_tree); 3969 while (node) { 3970 block_group = rb_entry(node, struct btrfs_block_group, cache_node); 3971 ret = btrfs_remove_free_space_inode(trans, NULL, block_group); 3972 if (ret) 3973 goto out; 3974 node = rb_next(node); 3975 } 3976 out: 3977 return ret; 3978 } 3979 3980 int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active) 3981 { 3982 struct btrfs_trans_handle *trans; 3983 int ret; 3984 3985 /* 3986 * update_super_roots will appropriately set or unset 3987 * super_copy->cache_generation based on SPACE_CACHE and 3988 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a 3989 * transaction commit whether we are enabling space cache v1 and don't 3990 * have any other work to do, or are disabling it and removing free 3991 * space inodes. 3992 */ 3993 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3994 if (IS_ERR(trans)) 3995 return PTR_ERR(trans); 3996 3997 if (!active) { 3998 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3999 ret = cleanup_free_space_cache_v1(fs_info, trans); 4000 if (ret) { 4001 btrfs_abort_transaction(trans, ret); 4002 btrfs_end_transaction(trans); 4003 goto out; 4004 } 4005 } 4006 4007 ret = btrfs_commit_transaction(trans); 4008 out: 4009 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 4010 4011 return ret; 4012 } 4013 4014 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4015 /* 4016 * Use this if you need to make a bitmap or extent entry specifically, it 4017 * doesn't do any of the merging that add_free_space does, this acts a lot like 4018 * how the free space cache loading stuff works, so you can get really weird 4019 * configurations. 4020 */ 4021 int test_add_free_space_entry(struct btrfs_block_group *cache, 4022 u64 offset, u64 bytes, bool bitmap) 4023 { 4024 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4025 struct btrfs_free_space *info = NULL, *bitmap_info; 4026 void *map = NULL; 4027 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4028 u64 bytes_added; 4029 int ret; 4030 4031 again: 4032 if (!info) { 4033 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4034 if (!info) 4035 return -ENOMEM; 4036 } 4037 4038 if (!bitmap) { 4039 spin_lock(&ctl->tree_lock); 4040 info->offset = offset; 4041 info->bytes = bytes; 4042 info->max_extent_size = 0; 4043 ret = link_free_space(ctl, info); 4044 spin_unlock(&ctl->tree_lock); 4045 if (ret) 4046 kmem_cache_free(btrfs_free_space_cachep, info); 4047 return ret; 4048 } 4049 4050 if (!map) { 4051 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4052 if (!map) { 4053 kmem_cache_free(btrfs_free_space_cachep, info); 4054 return -ENOMEM; 4055 } 4056 } 4057 4058 spin_lock(&ctl->tree_lock); 4059 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4060 1, 0); 4061 if (!bitmap_info) { 4062 info->bitmap = map; 4063 map = NULL; 4064 add_new_bitmap(ctl, info, offset); 4065 bitmap_info = info; 4066 info = NULL; 4067 } 4068 4069 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4070 trim_state); 4071 4072 bytes -= bytes_added; 4073 offset += bytes_added; 4074 spin_unlock(&ctl->tree_lock); 4075 4076 if (bytes) 4077 goto again; 4078 4079 if (info) 4080 kmem_cache_free(btrfs_free_space_cachep, info); 4081 if (map) 4082 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4083 return 0; 4084 } 4085 4086 /* 4087 * Checks to see if the given range is in the free space cache. This is really 4088 * just used to check the absence of space, so if there is free space in the 4089 * range at all we will return 1. 4090 */ 4091 int test_check_exists(struct btrfs_block_group *cache, 4092 u64 offset, u64 bytes) 4093 { 4094 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4095 struct btrfs_free_space *info; 4096 int ret = 0; 4097 4098 spin_lock(&ctl->tree_lock); 4099 info = tree_search_offset(ctl, offset, 0, 0); 4100 if (!info) { 4101 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4102 1, 0); 4103 if (!info) 4104 goto out; 4105 } 4106 4107 have_info: 4108 if (info->bitmap) { 4109 u64 bit_off, bit_bytes; 4110 struct rb_node *n; 4111 struct btrfs_free_space *tmp; 4112 4113 bit_off = offset; 4114 bit_bytes = ctl->unit; 4115 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4116 if (!ret) { 4117 if (bit_off == offset) { 4118 ret = 1; 4119 goto out; 4120 } else if (bit_off > offset && 4121 offset + bytes > bit_off) { 4122 ret = 1; 4123 goto out; 4124 } 4125 } 4126 4127 n = rb_prev(&info->offset_index); 4128 while (n) { 4129 tmp = rb_entry(n, struct btrfs_free_space, 4130 offset_index); 4131 if (tmp->offset + tmp->bytes < offset) 4132 break; 4133 if (offset + bytes < tmp->offset) { 4134 n = rb_prev(&tmp->offset_index); 4135 continue; 4136 } 4137 info = tmp; 4138 goto have_info; 4139 } 4140 4141 n = rb_next(&info->offset_index); 4142 while (n) { 4143 tmp = rb_entry(n, struct btrfs_free_space, 4144 offset_index); 4145 if (offset + bytes < tmp->offset) 4146 break; 4147 if (tmp->offset + tmp->bytes < offset) { 4148 n = rb_next(&tmp->offset_index); 4149 continue; 4150 } 4151 info = tmp; 4152 goto have_info; 4153 } 4154 4155 ret = 0; 4156 goto out; 4157 } 4158 4159 if (info->offset == offset) { 4160 ret = 1; 4161 goto out; 4162 } 4163 4164 if (offset > info->offset && offset < info->offset + info->bytes) 4165 ret = 1; 4166 out: 4167 spin_unlock(&ctl->tree_lock); 4168 return ret; 4169 } 4170 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4171