1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "inode-map.h" 20 #include "volumes.h" 21 #include "space-info.h" 22 #include "delalloc-space.h" 23 24 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 25 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 26 27 struct btrfs_trim_range { 28 u64 start; 29 u64 bytes; 30 struct list_head list; 31 }; 32 33 static int link_free_space(struct btrfs_free_space_ctl *ctl, 34 struct btrfs_free_space *info); 35 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 36 struct btrfs_free_space *info); 37 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 38 struct btrfs_trans_handle *trans, 39 struct btrfs_io_ctl *io_ctl, 40 struct btrfs_path *path); 41 42 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 43 struct btrfs_path *path, 44 u64 offset) 45 { 46 struct btrfs_fs_info *fs_info = root->fs_info; 47 struct btrfs_key key; 48 struct btrfs_key location; 49 struct btrfs_disk_key disk_key; 50 struct btrfs_free_space_header *header; 51 struct extent_buffer *leaf; 52 struct inode *inode = NULL; 53 unsigned nofs_flag; 54 int ret; 55 56 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 57 key.offset = offset; 58 key.type = 0; 59 60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 61 if (ret < 0) 62 return ERR_PTR(ret); 63 if (ret > 0) { 64 btrfs_release_path(path); 65 return ERR_PTR(-ENOENT); 66 } 67 68 leaf = path->nodes[0]; 69 header = btrfs_item_ptr(leaf, path->slots[0], 70 struct btrfs_free_space_header); 71 btrfs_free_space_key(leaf, header, &disk_key); 72 btrfs_disk_key_to_cpu(&location, &disk_key); 73 btrfs_release_path(path); 74 75 /* 76 * We are often under a trans handle at this point, so we need to make 77 * sure NOFS is set to keep us from deadlocking. 78 */ 79 nofs_flag = memalloc_nofs_save(); 80 inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); 81 btrfs_release_path(path); 82 memalloc_nofs_restore(nofs_flag); 83 if (IS_ERR(inode)) 84 return inode; 85 86 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_gfp_constraint(inode->i_mapping, 88 ~(__GFP_FS | __GFP_HIGHMEM))); 89 90 return inode; 91 } 92 93 struct inode *lookup_free_space_inode( 94 struct btrfs_block_group_cache *block_group, 95 struct btrfs_path *path) 96 { 97 struct btrfs_fs_info *fs_info = block_group->fs_info; 98 struct inode *inode = NULL; 99 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 100 101 spin_lock(&block_group->lock); 102 if (block_group->inode) 103 inode = igrab(block_group->inode); 104 spin_unlock(&block_group->lock); 105 if (inode) 106 return inode; 107 108 inode = __lookup_free_space_inode(fs_info->tree_root, path, 109 block_group->key.objectid); 110 if (IS_ERR(inode)) 111 return inode; 112 113 spin_lock(&block_group->lock); 114 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 115 btrfs_info(fs_info, "Old style space inode found, converting."); 116 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 117 BTRFS_INODE_NODATACOW; 118 block_group->disk_cache_state = BTRFS_DC_CLEAR; 119 } 120 121 if (!block_group->iref) { 122 block_group->inode = igrab(inode); 123 block_group->iref = 1; 124 } 125 spin_unlock(&block_group->lock); 126 127 return inode; 128 } 129 130 static int __create_free_space_inode(struct btrfs_root *root, 131 struct btrfs_trans_handle *trans, 132 struct btrfs_path *path, 133 u64 ino, u64 offset) 134 { 135 struct btrfs_key key; 136 struct btrfs_disk_key disk_key; 137 struct btrfs_free_space_header *header; 138 struct btrfs_inode_item *inode_item; 139 struct extent_buffer *leaf; 140 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 141 int ret; 142 143 ret = btrfs_insert_empty_inode(trans, root, path, ino); 144 if (ret) 145 return ret; 146 147 /* We inline crc's for the free disk space cache */ 148 if (ino != BTRFS_FREE_INO_OBJECTID) 149 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 150 151 leaf = path->nodes[0]; 152 inode_item = btrfs_item_ptr(leaf, path->slots[0], 153 struct btrfs_inode_item); 154 btrfs_item_key(leaf, &disk_key, path->slots[0]); 155 memzero_extent_buffer(leaf, (unsigned long)inode_item, 156 sizeof(*inode_item)); 157 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 158 btrfs_set_inode_size(leaf, inode_item, 0); 159 btrfs_set_inode_nbytes(leaf, inode_item, 0); 160 btrfs_set_inode_uid(leaf, inode_item, 0); 161 btrfs_set_inode_gid(leaf, inode_item, 0); 162 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 163 btrfs_set_inode_flags(leaf, inode_item, flags); 164 btrfs_set_inode_nlink(leaf, inode_item, 1); 165 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 166 btrfs_set_inode_block_group(leaf, inode_item, offset); 167 btrfs_mark_buffer_dirty(leaf); 168 btrfs_release_path(path); 169 170 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 171 key.offset = offset; 172 key.type = 0; 173 ret = btrfs_insert_empty_item(trans, root, path, &key, 174 sizeof(struct btrfs_free_space_header)); 175 if (ret < 0) { 176 btrfs_release_path(path); 177 return ret; 178 } 179 180 leaf = path->nodes[0]; 181 header = btrfs_item_ptr(leaf, path->slots[0], 182 struct btrfs_free_space_header); 183 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 184 btrfs_set_free_space_key(leaf, header, &disk_key); 185 btrfs_mark_buffer_dirty(leaf); 186 btrfs_release_path(path); 187 188 return 0; 189 } 190 191 int create_free_space_inode(struct btrfs_trans_handle *trans, 192 struct btrfs_block_group_cache *block_group, 193 struct btrfs_path *path) 194 { 195 int ret; 196 u64 ino; 197 198 ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino); 199 if (ret < 0) 200 return ret; 201 202 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 203 ino, block_group->key.objectid); 204 } 205 206 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 207 struct btrfs_block_rsv *rsv) 208 { 209 u64 needed_bytes; 210 int ret; 211 212 /* 1 for slack space, 1 for updating the inode */ 213 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 214 btrfs_calc_trans_metadata_size(fs_info, 1); 215 216 spin_lock(&rsv->lock); 217 if (rsv->reserved < needed_bytes) 218 ret = -ENOSPC; 219 else 220 ret = 0; 221 spin_unlock(&rsv->lock); 222 return ret; 223 } 224 225 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 226 struct btrfs_block_group_cache *block_group, 227 struct inode *inode) 228 { 229 struct btrfs_root *root = BTRFS_I(inode)->root; 230 int ret = 0; 231 bool locked = false; 232 233 if (block_group) { 234 struct btrfs_path *path = btrfs_alloc_path(); 235 236 if (!path) { 237 ret = -ENOMEM; 238 goto fail; 239 } 240 locked = true; 241 mutex_lock(&trans->transaction->cache_write_mutex); 242 if (!list_empty(&block_group->io_list)) { 243 list_del_init(&block_group->io_list); 244 245 btrfs_wait_cache_io(trans, block_group, path); 246 btrfs_put_block_group(block_group); 247 } 248 249 /* 250 * now that we've truncated the cache away, its no longer 251 * setup or written 252 */ 253 spin_lock(&block_group->lock); 254 block_group->disk_cache_state = BTRFS_DC_CLEAR; 255 spin_unlock(&block_group->lock); 256 btrfs_free_path(path); 257 } 258 259 btrfs_i_size_write(BTRFS_I(inode), 0); 260 truncate_pagecache(inode, 0); 261 262 /* 263 * We skip the throttling logic for free space cache inodes, so we don't 264 * need to check for -EAGAIN. 265 */ 266 ret = btrfs_truncate_inode_items(trans, root, inode, 267 0, BTRFS_EXTENT_DATA_KEY); 268 if (ret) 269 goto fail; 270 271 ret = btrfs_update_inode(trans, root, inode); 272 273 fail: 274 if (locked) 275 mutex_unlock(&trans->transaction->cache_write_mutex); 276 if (ret) 277 btrfs_abort_transaction(trans, ret); 278 279 return ret; 280 } 281 282 static void readahead_cache(struct inode *inode) 283 { 284 struct file_ra_state *ra; 285 unsigned long last_index; 286 287 ra = kzalloc(sizeof(*ra), GFP_NOFS); 288 if (!ra) 289 return; 290 291 file_ra_state_init(ra, inode->i_mapping); 292 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 293 294 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 295 296 kfree(ra); 297 } 298 299 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 300 int write) 301 { 302 int num_pages; 303 int check_crcs = 0; 304 305 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 306 307 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 308 check_crcs = 1; 309 310 /* Make sure we can fit our crcs and generation into the first page */ 311 if (write && check_crcs && 312 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 313 return -ENOSPC; 314 315 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 316 317 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 318 if (!io_ctl->pages) 319 return -ENOMEM; 320 321 io_ctl->num_pages = num_pages; 322 io_ctl->fs_info = btrfs_sb(inode->i_sb); 323 io_ctl->check_crcs = check_crcs; 324 io_ctl->inode = inode; 325 326 return 0; 327 } 328 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 329 330 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 331 { 332 kfree(io_ctl->pages); 333 io_ctl->pages = NULL; 334 } 335 336 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 337 { 338 if (io_ctl->cur) { 339 io_ctl->cur = NULL; 340 io_ctl->orig = NULL; 341 } 342 } 343 344 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 345 { 346 ASSERT(io_ctl->index < io_ctl->num_pages); 347 io_ctl->page = io_ctl->pages[io_ctl->index++]; 348 io_ctl->cur = page_address(io_ctl->page); 349 io_ctl->orig = io_ctl->cur; 350 io_ctl->size = PAGE_SIZE; 351 if (clear) 352 clear_page(io_ctl->cur); 353 } 354 355 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 356 { 357 int i; 358 359 io_ctl_unmap_page(io_ctl); 360 361 for (i = 0; i < io_ctl->num_pages; i++) { 362 if (io_ctl->pages[i]) { 363 ClearPageChecked(io_ctl->pages[i]); 364 unlock_page(io_ctl->pages[i]); 365 put_page(io_ctl->pages[i]); 366 } 367 } 368 } 369 370 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 371 int uptodate) 372 { 373 struct page *page; 374 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 375 int i; 376 377 for (i = 0; i < io_ctl->num_pages; i++) { 378 page = find_or_create_page(inode->i_mapping, i, mask); 379 if (!page) { 380 io_ctl_drop_pages(io_ctl); 381 return -ENOMEM; 382 } 383 io_ctl->pages[i] = page; 384 if (uptodate && !PageUptodate(page)) { 385 btrfs_readpage(NULL, page); 386 lock_page(page); 387 if (!PageUptodate(page)) { 388 btrfs_err(BTRFS_I(inode)->root->fs_info, 389 "error reading free space cache"); 390 io_ctl_drop_pages(io_ctl); 391 return -EIO; 392 } 393 } 394 } 395 396 for (i = 0; i < io_ctl->num_pages; i++) { 397 clear_page_dirty_for_io(io_ctl->pages[i]); 398 set_page_extent_mapped(io_ctl->pages[i]); 399 } 400 401 return 0; 402 } 403 404 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 405 { 406 __le64 *val; 407 408 io_ctl_map_page(io_ctl, 1); 409 410 /* 411 * Skip the csum areas. If we don't check crcs then we just have a 412 * 64bit chunk at the front of the first page. 413 */ 414 if (io_ctl->check_crcs) { 415 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 416 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 417 } else { 418 io_ctl->cur += sizeof(u64); 419 io_ctl->size -= sizeof(u64) * 2; 420 } 421 422 val = io_ctl->cur; 423 *val = cpu_to_le64(generation); 424 io_ctl->cur += sizeof(u64); 425 } 426 427 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 428 { 429 __le64 *gen; 430 431 /* 432 * Skip the crc area. If we don't check crcs then we just have a 64bit 433 * chunk at the front of the first page. 434 */ 435 if (io_ctl->check_crcs) { 436 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 437 io_ctl->size -= sizeof(u64) + 438 (sizeof(u32) * io_ctl->num_pages); 439 } else { 440 io_ctl->cur += sizeof(u64); 441 io_ctl->size -= sizeof(u64) * 2; 442 } 443 444 gen = io_ctl->cur; 445 if (le64_to_cpu(*gen) != generation) { 446 btrfs_err_rl(io_ctl->fs_info, 447 "space cache generation (%llu) does not match inode (%llu)", 448 *gen, generation); 449 io_ctl_unmap_page(io_ctl); 450 return -EIO; 451 } 452 io_ctl->cur += sizeof(u64); 453 return 0; 454 } 455 456 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 457 { 458 u32 *tmp; 459 u32 crc = ~(u32)0; 460 unsigned offset = 0; 461 462 if (!io_ctl->check_crcs) { 463 io_ctl_unmap_page(io_ctl); 464 return; 465 } 466 467 if (index == 0) 468 offset = sizeof(u32) * io_ctl->num_pages; 469 470 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 471 btrfs_crc32c_final(crc, (u8 *)&crc); 472 io_ctl_unmap_page(io_ctl); 473 tmp = page_address(io_ctl->pages[0]); 474 tmp += index; 475 *tmp = crc; 476 } 477 478 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 479 { 480 u32 *tmp, val; 481 u32 crc = ~(u32)0; 482 unsigned offset = 0; 483 484 if (!io_ctl->check_crcs) { 485 io_ctl_map_page(io_ctl, 0); 486 return 0; 487 } 488 489 if (index == 0) 490 offset = sizeof(u32) * io_ctl->num_pages; 491 492 tmp = page_address(io_ctl->pages[0]); 493 tmp += index; 494 val = *tmp; 495 496 io_ctl_map_page(io_ctl, 0); 497 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 498 btrfs_crc32c_final(crc, (u8 *)&crc); 499 if (val != crc) { 500 btrfs_err_rl(io_ctl->fs_info, 501 "csum mismatch on free space cache"); 502 io_ctl_unmap_page(io_ctl); 503 return -EIO; 504 } 505 506 return 0; 507 } 508 509 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 510 void *bitmap) 511 { 512 struct btrfs_free_space_entry *entry; 513 514 if (!io_ctl->cur) 515 return -ENOSPC; 516 517 entry = io_ctl->cur; 518 entry->offset = cpu_to_le64(offset); 519 entry->bytes = cpu_to_le64(bytes); 520 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 521 BTRFS_FREE_SPACE_EXTENT; 522 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 523 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 524 525 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 526 return 0; 527 528 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 529 530 /* No more pages to map */ 531 if (io_ctl->index >= io_ctl->num_pages) 532 return 0; 533 534 /* map the next page */ 535 io_ctl_map_page(io_ctl, 1); 536 return 0; 537 } 538 539 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 540 { 541 if (!io_ctl->cur) 542 return -ENOSPC; 543 544 /* 545 * If we aren't at the start of the current page, unmap this one and 546 * map the next one if there is any left. 547 */ 548 if (io_ctl->cur != io_ctl->orig) { 549 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 550 if (io_ctl->index >= io_ctl->num_pages) 551 return -ENOSPC; 552 io_ctl_map_page(io_ctl, 0); 553 } 554 555 copy_page(io_ctl->cur, bitmap); 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 557 if (io_ctl->index < io_ctl->num_pages) 558 io_ctl_map_page(io_ctl, 0); 559 return 0; 560 } 561 562 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 563 { 564 /* 565 * If we're not on the boundary we know we've modified the page and we 566 * need to crc the page. 567 */ 568 if (io_ctl->cur != io_ctl->orig) 569 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 570 else 571 io_ctl_unmap_page(io_ctl); 572 573 while (io_ctl->index < io_ctl->num_pages) { 574 io_ctl_map_page(io_ctl, 1); 575 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 576 } 577 } 578 579 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 580 struct btrfs_free_space *entry, u8 *type) 581 { 582 struct btrfs_free_space_entry *e; 583 int ret; 584 585 if (!io_ctl->cur) { 586 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 587 if (ret) 588 return ret; 589 } 590 591 e = io_ctl->cur; 592 entry->offset = le64_to_cpu(e->offset); 593 entry->bytes = le64_to_cpu(e->bytes); 594 *type = e->type; 595 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 596 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 597 598 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 599 return 0; 600 601 io_ctl_unmap_page(io_ctl); 602 603 return 0; 604 } 605 606 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 607 struct btrfs_free_space *entry) 608 { 609 int ret; 610 611 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 612 if (ret) 613 return ret; 614 615 copy_page(entry->bitmap, io_ctl->cur); 616 io_ctl_unmap_page(io_ctl); 617 618 return 0; 619 } 620 621 /* 622 * Since we attach pinned extents after the fact we can have contiguous sections 623 * of free space that are split up in entries. This poses a problem with the 624 * tree logging stuff since it could have allocated across what appears to be 2 625 * entries since we would have merged the entries when adding the pinned extents 626 * back to the free space cache. So run through the space cache that we just 627 * loaded and merge contiguous entries. This will make the log replay stuff not 628 * blow up and it will make for nicer allocator behavior. 629 */ 630 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 631 { 632 struct btrfs_free_space *e, *prev = NULL; 633 struct rb_node *n; 634 635 again: 636 spin_lock(&ctl->tree_lock); 637 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 638 e = rb_entry(n, struct btrfs_free_space, offset_index); 639 if (!prev) 640 goto next; 641 if (e->bitmap || prev->bitmap) 642 goto next; 643 if (prev->offset + prev->bytes == e->offset) { 644 unlink_free_space(ctl, prev); 645 unlink_free_space(ctl, e); 646 prev->bytes += e->bytes; 647 kmem_cache_free(btrfs_free_space_cachep, e); 648 link_free_space(ctl, prev); 649 prev = NULL; 650 spin_unlock(&ctl->tree_lock); 651 goto again; 652 } 653 next: 654 prev = e; 655 } 656 spin_unlock(&ctl->tree_lock); 657 } 658 659 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 660 struct btrfs_free_space_ctl *ctl, 661 struct btrfs_path *path, u64 offset) 662 { 663 struct btrfs_fs_info *fs_info = root->fs_info; 664 struct btrfs_free_space_header *header; 665 struct extent_buffer *leaf; 666 struct btrfs_io_ctl io_ctl; 667 struct btrfs_key key; 668 struct btrfs_free_space *e, *n; 669 LIST_HEAD(bitmaps); 670 u64 num_entries; 671 u64 num_bitmaps; 672 u64 generation; 673 u8 type; 674 int ret = 0; 675 676 /* Nothing in the space cache, goodbye */ 677 if (!i_size_read(inode)) 678 return 0; 679 680 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 681 key.offset = offset; 682 key.type = 0; 683 684 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 685 if (ret < 0) 686 return 0; 687 else if (ret > 0) { 688 btrfs_release_path(path); 689 return 0; 690 } 691 692 ret = -1; 693 694 leaf = path->nodes[0]; 695 header = btrfs_item_ptr(leaf, path->slots[0], 696 struct btrfs_free_space_header); 697 num_entries = btrfs_free_space_entries(leaf, header); 698 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 699 generation = btrfs_free_space_generation(leaf, header); 700 btrfs_release_path(path); 701 702 if (!BTRFS_I(inode)->generation) { 703 btrfs_info(fs_info, 704 "the free space cache file (%llu) is invalid, skip it", 705 offset); 706 return 0; 707 } 708 709 if (BTRFS_I(inode)->generation != generation) { 710 btrfs_err(fs_info, 711 "free space inode generation (%llu) did not match free space cache generation (%llu)", 712 BTRFS_I(inode)->generation, generation); 713 return 0; 714 } 715 716 if (!num_entries) 717 return 0; 718 719 ret = io_ctl_init(&io_ctl, inode, 0); 720 if (ret) 721 return ret; 722 723 readahead_cache(inode); 724 725 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 726 if (ret) 727 goto out; 728 729 ret = io_ctl_check_crc(&io_ctl, 0); 730 if (ret) 731 goto free_cache; 732 733 ret = io_ctl_check_generation(&io_ctl, generation); 734 if (ret) 735 goto free_cache; 736 737 while (num_entries) { 738 e = kmem_cache_zalloc(btrfs_free_space_cachep, 739 GFP_NOFS); 740 if (!e) 741 goto free_cache; 742 743 ret = io_ctl_read_entry(&io_ctl, e, &type); 744 if (ret) { 745 kmem_cache_free(btrfs_free_space_cachep, e); 746 goto free_cache; 747 } 748 749 if (!e->bytes) { 750 kmem_cache_free(btrfs_free_space_cachep, e); 751 goto free_cache; 752 } 753 754 if (type == BTRFS_FREE_SPACE_EXTENT) { 755 spin_lock(&ctl->tree_lock); 756 ret = link_free_space(ctl, e); 757 spin_unlock(&ctl->tree_lock); 758 if (ret) { 759 btrfs_err(fs_info, 760 "Duplicate entries in free space cache, dumping"); 761 kmem_cache_free(btrfs_free_space_cachep, e); 762 goto free_cache; 763 } 764 } else { 765 ASSERT(num_bitmaps); 766 num_bitmaps--; 767 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 768 if (!e->bitmap) { 769 kmem_cache_free( 770 btrfs_free_space_cachep, e); 771 goto free_cache; 772 } 773 spin_lock(&ctl->tree_lock); 774 ret = link_free_space(ctl, e); 775 ctl->total_bitmaps++; 776 ctl->op->recalc_thresholds(ctl); 777 spin_unlock(&ctl->tree_lock); 778 if (ret) { 779 btrfs_err(fs_info, 780 "Duplicate entries in free space cache, dumping"); 781 kmem_cache_free(btrfs_free_space_cachep, e); 782 goto free_cache; 783 } 784 list_add_tail(&e->list, &bitmaps); 785 } 786 787 num_entries--; 788 } 789 790 io_ctl_unmap_page(&io_ctl); 791 792 /* 793 * We add the bitmaps at the end of the entries in order that 794 * the bitmap entries are added to the cache. 795 */ 796 list_for_each_entry_safe(e, n, &bitmaps, list) { 797 list_del_init(&e->list); 798 ret = io_ctl_read_bitmap(&io_ctl, e); 799 if (ret) 800 goto free_cache; 801 } 802 803 io_ctl_drop_pages(&io_ctl); 804 merge_space_tree(ctl); 805 ret = 1; 806 out: 807 io_ctl_free(&io_ctl); 808 return ret; 809 free_cache: 810 io_ctl_drop_pages(&io_ctl); 811 __btrfs_remove_free_space_cache(ctl); 812 goto out; 813 } 814 815 int load_free_space_cache(struct btrfs_block_group_cache *block_group) 816 { 817 struct btrfs_fs_info *fs_info = block_group->fs_info; 818 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 819 struct inode *inode; 820 struct btrfs_path *path; 821 int ret = 0; 822 bool matched; 823 u64 used = btrfs_block_group_used(&block_group->item); 824 825 /* 826 * If this block group has been marked to be cleared for one reason or 827 * another then we can't trust the on disk cache, so just return. 828 */ 829 spin_lock(&block_group->lock); 830 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 831 spin_unlock(&block_group->lock); 832 return 0; 833 } 834 spin_unlock(&block_group->lock); 835 836 path = btrfs_alloc_path(); 837 if (!path) 838 return 0; 839 path->search_commit_root = 1; 840 path->skip_locking = 1; 841 842 /* 843 * We must pass a path with search_commit_root set to btrfs_iget in 844 * order to avoid a deadlock when allocating extents for the tree root. 845 * 846 * When we are COWing an extent buffer from the tree root, when looking 847 * for a free extent, at extent-tree.c:find_free_extent(), we can find 848 * block group without its free space cache loaded. When we find one 849 * we must load its space cache which requires reading its free space 850 * cache's inode item from the root tree. If this inode item is located 851 * in the same leaf that we started COWing before, then we end up in 852 * deadlock on the extent buffer (trying to read lock it when we 853 * previously write locked it). 854 * 855 * It's safe to read the inode item using the commit root because 856 * block groups, once loaded, stay in memory forever (until they are 857 * removed) as well as their space caches once loaded. New block groups 858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 859 * we will never try to read their inode item while the fs is mounted. 860 */ 861 inode = lookup_free_space_inode(block_group, path); 862 if (IS_ERR(inode)) { 863 btrfs_free_path(path); 864 return 0; 865 } 866 867 /* We may have converted the inode and made the cache invalid. */ 868 spin_lock(&block_group->lock); 869 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 870 spin_unlock(&block_group->lock); 871 btrfs_free_path(path); 872 goto out; 873 } 874 spin_unlock(&block_group->lock); 875 876 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 877 path, block_group->key.objectid); 878 btrfs_free_path(path); 879 if (ret <= 0) 880 goto out; 881 882 spin_lock(&ctl->tree_lock); 883 matched = (ctl->free_space == (block_group->key.offset - used - 884 block_group->bytes_super)); 885 spin_unlock(&ctl->tree_lock); 886 887 if (!matched) { 888 __btrfs_remove_free_space_cache(ctl); 889 btrfs_warn(fs_info, 890 "block group %llu has wrong amount of free space", 891 block_group->key.objectid); 892 ret = -1; 893 } 894 out: 895 if (ret < 0) { 896 /* This cache is bogus, make sure it gets cleared */ 897 spin_lock(&block_group->lock); 898 block_group->disk_cache_state = BTRFS_DC_CLEAR; 899 spin_unlock(&block_group->lock); 900 ret = 0; 901 902 btrfs_warn(fs_info, 903 "failed to load free space cache for block group %llu, rebuilding it now", 904 block_group->key.objectid); 905 } 906 907 iput(inode); 908 return ret; 909 } 910 911 static noinline_for_stack 912 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 913 struct btrfs_free_space_ctl *ctl, 914 struct btrfs_block_group_cache *block_group, 915 int *entries, int *bitmaps, 916 struct list_head *bitmap_list) 917 { 918 int ret; 919 struct btrfs_free_cluster *cluster = NULL; 920 struct btrfs_free_cluster *cluster_locked = NULL; 921 struct rb_node *node = rb_first(&ctl->free_space_offset); 922 struct btrfs_trim_range *trim_entry; 923 924 /* Get the cluster for this block_group if it exists */ 925 if (block_group && !list_empty(&block_group->cluster_list)) { 926 cluster = list_entry(block_group->cluster_list.next, 927 struct btrfs_free_cluster, 928 block_group_list); 929 } 930 931 if (!node && cluster) { 932 cluster_locked = cluster; 933 spin_lock(&cluster_locked->lock); 934 node = rb_first(&cluster->root); 935 cluster = NULL; 936 } 937 938 /* Write out the extent entries */ 939 while (node) { 940 struct btrfs_free_space *e; 941 942 e = rb_entry(node, struct btrfs_free_space, offset_index); 943 *entries += 1; 944 945 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 946 e->bitmap); 947 if (ret) 948 goto fail; 949 950 if (e->bitmap) { 951 list_add_tail(&e->list, bitmap_list); 952 *bitmaps += 1; 953 } 954 node = rb_next(node); 955 if (!node && cluster) { 956 node = rb_first(&cluster->root); 957 cluster_locked = cluster; 958 spin_lock(&cluster_locked->lock); 959 cluster = NULL; 960 } 961 } 962 if (cluster_locked) { 963 spin_unlock(&cluster_locked->lock); 964 cluster_locked = NULL; 965 } 966 967 /* 968 * Make sure we don't miss any range that was removed from our rbtree 969 * because trimming is running. Otherwise after a umount+mount (or crash 970 * after committing the transaction) we would leak free space and get 971 * an inconsistent free space cache report from fsck. 972 */ 973 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 974 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 975 trim_entry->bytes, NULL); 976 if (ret) 977 goto fail; 978 *entries += 1; 979 } 980 981 return 0; 982 fail: 983 if (cluster_locked) 984 spin_unlock(&cluster_locked->lock); 985 return -ENOSPC; 986 } 987 988 static noinline_for_stack int 989 update_cache_item(struct btrfs_trans_handle *trans, 990 struct btrfs_root *root, 991 struct inode *inode, 992 struct btrfs_path *path, u64 offset, 993 int entries, int bitmaps) 994 { 995 struct btrfs_key key; 996 struct btrfs_free_space_header *header; 997 struct extent_buffer *leaf; 998 int ret; 999 1000 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1001 key.offset = offset; 1002 key.type = 0; 1003 1004 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1005 if (ret < 0) { 1006 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1007 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1008 goto fail; 1009 } 1010 leaf = path->nodes[0]; 1011 if (ret > 0) { 1012 struct btrfs_key found_key; 1013 ASSERT(path->slots[0]); 1014 path->slots[0]--; 1015 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1016 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1017 found_key.offset != offset) { 1018 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1019 inode->i_size - 1, 1020 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1021 NULL); 1022 btrfs_release_path(path); 1023 goto fail; 1024 } 1025 } 1026 1027 BTRFS_I(inode)->generation = trans->transid; 1028 header = btrfs_item_ptr(leaf, path->slots[0], 1029 struct btrfs_free_space_header); 1030 btrfs_set_free_space_entries(leaf, header, entries); 1031 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1032 btrfs_set_free_space_generation(leaf, header, trans->transid); 1033 btrfs_mark_buffer_dirty(leaf); 1034 btrfs_release_path(path); 1035 1036 return 0; 1037 1038 fail: 1039 return -1; 1040 } 1041 1042 static noinline_for_stack int write_pinned_extent_entries( 1043 struct btrfs_block_group_cache *block_group, 1044 struct btrfs_io_ctl *io_ctl, 1045 int *entries) 1046 { 1047 u64 start, extent_start, extent_end, len; 1048 struct extent_io_tree *unpin = NULL; 1049 int ret; 1050 1051 if (!block_group) 1052 return 0; 1053 1054 /* 1055 * We want to add any pinned extents to our free space cache 1056 * so we don't leak the space 1057 * 1058 * We shouldn't have switched the pinned extents yet so this is the 1059 * right one 1060 */ 1061 unpin = block_group->fs_info->pinned_extents; 1062 1063 start = block_group->key.objectid; 1064 1065 while (start < block_group->key.objectid + block_group->key.offset) { 1066 ret = find_first_extent_bit(unpin, start, 1067 &extent_start, &extent_end, 1068 EXTENT_DIRTY, NULL); 1069 if (ret) 1070 return 0; 1071 1072 /* This pinned extent is out of our range */ 1073 if (extent_start >= block_group->key.objectid + 1074 block_group->key.offset) 1075 return 0; 1076 1077 extent_start = max(extent_start, start); 1078 extent_end = min(block_group->key.objectid + 1079 block_group->key.offset, extent_end + 1); 1080 len = extent_end - extent_start; 1081 1082 *entries += 1; 1083 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1084 if (ret) 1085 return -ENOSPC; 1086 1087 start = extent_end; 1088 } 1089 1090 return 0; 1091 } 1092 1093 static noinline_for_stack int 1094 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1095 { 1096 struct btrfs_free_space *entry, *next; 1097 int ret; 1098 1099 /* Write out the bitmaps */ 1100 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1101 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1102 if (ret) 1103 return -ENOSPC; 1104 list_del_init(&entry->list); 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int flush_dirty_cache(struct inode *inode) 1111 { 1112 int ret; 1113 1114 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1115 if (ret) 1116 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1117 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1118 1119 return ret; 1120 } 1121 1122 static void noinline_for_stack 1123 cleanup_bitmap_list(struct list_head *bitmap_list) 1124 { 1125 struct btrfs_free_space *entry, *next; 1126 1127 list_for_each_entry_safe(entry, next, bitmap_list, list) 1128 list_del_init(&entry->list); 1129 } 1130 1131 static void noinline_for_stack 1132 cleanup_write_cache_enospc(struct inode *inode, 1133 struct btrfs_io_ctl *io_ctl, 1134 struct extent_state **cached_state) 1135 { 1136 io_ctl_drop_pages(io_ctl); 1137 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1138 i_size_read(inode) - 1, cached_state); 1139 } 1140 1141 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1142 struct btrfs_trans_handle *trans, 1143 struct btrfs_block_group_cache *block_group, 1144 struct btrfs_io_ctl *io_ctl, 1145 struct btrfs_path *path, u64 offset) 1146 { 1147 int ret; 1148 struct inode *inode = io_ctl->inode; 1149 1150 if (!inode) 1151 return 0; 1152 1153 /* Flush the dirty pages in the cache file. */ 1154 ret = flush_dirty_cache(inode); 1155 if (ret) 1156 goto out; 1157 1158 /* Update the cache item to tell everyone this cache file is valid. */ 1159 ret = update_cache_item(trans, root, inode, path, offset, 1160 io_ctl->entries, io_ctl->bitmaps); 1161 out: 1162 io_ctl_free(io_ctl); 1163 if (ret) { 1164 invalidate_inode_pages2(inode->i_mapping); 1165 BTRFS_I(inode)->generation = 0; 1166 if (block_group) { 1167 #ifdef DEBUG 1168 btrfs_err(root->fs_info, 1169 "failed to write free space cache for block group %llu", 1170 block_group->key.objectid); 1171 #endif 1172 } 1173 } 1174 btrfs_update_inode(trans, root, inode); 1175 1176 if (block_group) { 1177 /* the dirty list is protected by the dirty_bgs_lock */ 1178 spin_lock(&trans->transaction->dirty_bgs_lock); 1179 1180 /* the disk_cache_state is protected by the block group lock */ 1181 spin_lock(&block_group->lock); 1182 1183 /* 1184 * only mark this as written if we didn't get put back on 1185 * the dirty list while waiting for IO. Otherwise our 1186 * cache state won't be right, and we won't get written again 1187 */ 1188 if (!ret && list_empty(&block_group->dirty_list)) 1189 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1190 else if (ret) 1191 block_group->disk_cache_state = BTRFS_DC_ERROR; 1192 1193 spin_unlock(&block_group->lock); 1194 spin_unlock(&trans->transaction->dirty_bgs_lock); 1195 io_ctl->inode = NULL; 1196 iput(inode); 1197 } 1198 1199 return ret; 1200 1201 } 1202 1203 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1204 struct btrfs_trans_handle *trans, 1205 struct btrfs_io_ctl *io_ctl, 1206 struct btrfs_path *path) 1207 { 1208 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1209 } 1210 1211 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1212 struct btrfs_block_group_cache *block_group, 1213 struct btrfs_path *path) 1214 { 1215 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1216 block_group, &block_group->io_ctl, 1217 path, block_group->key.objectid); 1218 } 1219 1220 /** 1221 * __btrfs_write_out_cache - write out cached info to an inode 1222 * @root - the root the inode belongs to 1223 * @ctl - the free space cache we are going to write out 1224 * @block_group - the block_group for this cache if it belongs to a block_group 1225 * @trans - the trans handle 1226 * 1227 * This function writes out a free space cache struct to disk for quick recovery 1228 * on mount. This will return 0 if it was successful in writing the cache out, 1229 * or an errno if it was not. 1230 */ 1231 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1232 struct btrfs_free_space_ctl *ctl, 1233 struct btrfs_block_group_cache *block_group, 1234 struct btrfs_io_ctl *io_ctl, 1235 struct btrfs_trans_handle *trans) 1236 { 1237 struct extent_state *cached_state = NULL; 1238 LIST_HEAD(bitmap_list); 1239 int entries = 0; 1240 int bitmaps = 0; 1241 int ret; 1242 int must_iput = 0; 1243 1244 if (!i_size_read(inode)) 1245 return -EIO; 1246 1247 WARN_ON(io_ctl->pages); 1248 ret = io_ctl_init(io_ctl, inode, 1); 1249 if (ret) 1250 return ret; 1251 1252 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1253 down_write(&block_group->data_rwsem); 1254 spin_lock(&block_group->lock); 1255 if (block_group->delalloc_bytes) { 1256 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1257 spin_unlock(&block_group->lock); 1258 up_write(&block_group->data_rwsem); 1259 BTRFS_I(inode)->generation = 0; 1260 ret = 0; 1261 must_iput = 1; 1262 goto out; 1263 } 1264 spin_unlock(&block_group->lock); 1265 } 1266 1267 /* Lock all pages first so we can lock the extent safely. */ 1268 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1269 if (ret) 1270 goto out_unlock; 1271 1272 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1273 &cached_state); 1274 1275 io_ctl_set_generation(io_ctl, trans->transid); 1276 1277 mutex_lock(&ctl->cache_writeout_mutex); 1278 /* Write out the extent entries in the free space cache */ 1279 spin_lock(&ctl->tree_lock); 1280 ret = write_cache_extent_entries(io_ctl, ctl, 1281 block_group, &entries, &bitmaps, 1282 &bitmap_list); 1283 if (ret) 1284 goto out_nospc_locked; 1285 1286 /* 1287 * Some spaces that are freed in the current transaction are pinned, 1288 * they will be added into free space cache after the transaction is 1289 * committed, we shouldn't lose them. 1290 * 1291 * If this changes while we are working we'll get added back to 1292 * the dirty list and redo it. No locking needed 1293 */ 1294 ret = write_pinned_extent_entries(block_group, io_ctl, &entries); 1295 if (ret) 1296 goto out_nospc_locked; 1297 1298 /* 1299 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1300 * locked while doing it because a concurrent trim can be manipulating 1301 * or freeing the bitmap. 1302 */ 1303 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1304 spin_unlock(&ctl->tree_lock); 1305 mutex_unlock(&ctl->cache_writeout_mutex); 1306 if (ret) 1307 goto out_nospc; 1308 1309 /* Zero out the rest of the pages just to make sure */ 1310 io_ctl_zero_remaining_pages(io_ctl); 1311 1312 /* Everything is written out, now we dirty the pages in the file. */ 1313 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1314 i_size_read(inode), &cached_state); 1315 if (ret) 1316 goto out_nospc; 1317 1318 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1319 up_write(&block_group->data_rwsem); 1320 /* 1321 * Release the pages and unlock the extent, we will flush 1322 * them out later 1323 */ 1324 io_ctl_drop_pages(io_ctl); 1325 1326 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1327 i_size_read(inode) - 1, &cached_state); 1328 1329 /* 1330 * at this point the pages are under IO and we're happy, 1331 * The caller is responsible for waiting on them and updating the 1332 * the cache and the inode 1333 */ 1334 io_ctl->entries = entries; 1335 io_ctl->bitmaps = bitmaps; 1336 1337 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1338 if (ret) 1339 goto out; 1340 1341 return 0; 1342 1343 out: 1344 io_ctl->inode = NULL; 1345 io_ctl_free(io_ctl); 1346 if (ret) { 1347 invalidate_inode_pages2(inode->i_mapping); 1348 BTRFS_I(inode)->generation = 0; 1349 } 1350 btrfs_update_inode(trans, root, inode); 1351 if (must_iput) 1352 iput(inode); 1353 return ret; 1354 1355 out_nospc_locked: 1356 cleanup_bitmap_list(&bitmap_list); 1357 spin_unlock(&ctl->tree_lock); 1358 mutex_unlock(&ctl->cache_writeout_mutex); 1359 1360 out_nospc: 1361 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1362 1363 out_unlock: 1364 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1365 up_write(&block_group->data_rwsem); 1366 1367 goto out; 1368 } 1369 1370 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1371 struct btrfs_block_group_cache *block_group, 1372 struct btrfs_path *path) 1373 { 1374 struct btrfs_fs_info *fs_info = trans->fs_info; 1375 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1376 struct inode *inode; 1377 int ret = 0; 1378 1379 spin_lock(&block_group->lock); 1380 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1381 spin_unlock(&block_group->lock); 1382 return 0; 1383 } 1384 spin_unlock(&block_group->lock); 1385 1386 inode = lookup_free_space_inode(block_group, path); 1387 if (IS_ERR(inode)) 1388 return 0; 1389 1390 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1391 block_group, &block_group->io_ctl, trans); 1392 if (ret) { 1393 #ifdef DEBUG 1394 btrfs_err(fs_info, 1395 "failed to write free space cache for block group %llu", 1396 block_group->key.objectid); 1397 #endif 1398 spin_lock(&block_group->lock); 1399 block_group->disk_cache_state = BTRFS_DC_ERROR; 1400 spin_unlock(&block_group->lock); 1401 1402 block_group->io_ctl.inode = NULL; 1403 iput(inode); 1404 } 1405 1406 /* 1407 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1408 * to wait for IO and put the inode 1409 */ 1410 1411 return ret; 1412 } 1413 1414 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1415 u64 offset) 1416 { 1417 ASSERT(offset >= bitmap_start); 1418 offset -= bitmap_start; 1419 return (unsigned long)(div_u64(offset, unit)); 1420 } 1421 1422 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1423 { 1424 return (unsigned long)(div_u64(bytes, unit)); 1425 } 1426 1427 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1428 u64 offset) 1429 { 1430 u64 bitmap_start; 1431 u64 bytes_per_bitmap; 1432 1433 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1434 bitmap_start = offset - ctl->start; 1435 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1436 bitmap_start *= bytes_per_bitmap; 1437 bitmap_start += ctl->start; 1438 1439 return bitmap_start; 1440 } 1441 1442 static int tree_insert_offset(struct rb_root *root, u64 offset, 1443 struct rb_node *node, int bitmap) 1444 { 1445 struct rb_node **p = &root->rb_node; 1446 struct rb_node *parent = NULL; 1447 struct btrfs_free_space *info; 1448 1449 while (*p) { 1450 parent = *p; 1451 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1452 1453 if (offset < info->offset) { 1454 p = &(*p)->rb_left; 1455 } else if (offset > info->offset) { 1456 p = &(*p)->rb_right; 1457 } else { 1458 /* 1459 * we could have a bitmap entry and an extent entry 1460 * share the same offset. If this is the case, we want 1461 * the extent entry to always be found first if we do a 1462 * linear search through the tree, since we want to have 1463 * the quickest allocation time, and allocating from an 1464 * extent is faster than allocating from a bitmap. So 1465 * if we're inserting a bitmap and we find an entry at 1466 * this offset, we want to go right, or after this entry 1467 * logically. If we are inserting an extent and we've 1468 * found a bitmap, we want to go left, or before 1469 * logically. 1470 */ 1471 if (bitmap) { 1472 if (info->bitmap) { 1473 WARN_ON_ONCE(1); 1474 return -EEXIST; 1475 } 1476 p = &(*p)->rb_right; 1477 } else { 1478 if (!info->bitmap) { 1479 WARN_ON_ONCE(1); 1480 return -EEXIST; 1481 } 1482 p = &(*p)->rb_left; 1483 } 1484 } 1485 } 1486 1487 rb_link_node(node, parent, p); 1488 rb_insert_color(node, root); 1489 1490 return 0; 1491 } 1492 1493 /* 1494 * searches the tree for the given offset. 1495 * 1496 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1497 * want a section that has at least bytes size and comes at or after the given 1498 * offset. 1499 */ 1500 static struct btrfs_free_space * 1501 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1502 u64 offset, int bitmap_only, int fuzzy) 1503 { 1504 struct rb_node *n = ctl->free_space_offset.rb_node; 1505 struct btrfs_free_space *entry, *prev = NULL; 1506 1507 /* find entry that is closest to the 'offset' */ 1508 while (1) { 1509 if (!n) { 1510 entry = NULL; 1511 break; 1512 } 1513 1514 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1515 prev = entry; 1516 1517 if (offset < entry->offset) 1518 n = n->rb_left; 1519 else if (offset > entry->offset) 1520 n = n->rb_right; 1521 else 1522 break; 1523 } 1524 1525 if (bitmap_only) { 1526 if (!entry) 1527 return NULL; 1528 if (entry->bitmap) 1529 return entry; 1530 1531 /* 1532 * bitmap entry and extent entry may share same offset, 1533 * in that case, bitmap entry comes after extent entry. 1534 */ 1535 n = rb_next(n); 1536 if (!n) 1537 return NULL; 1538 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1539 if (entry->offset != offset) 1540 return NULL; 1541 1542 WARN_ON(!entry->bitmap); 1543 return entry; 1544 } else if (entry) { 1545 if (entry->bitmap) { 1546 /* 1547 * if previous extent entry covers the offset, 1548 * we should return it instead of the bitmap entry 1549 */ 1550 n = rb_prev(&entry->offset_index); 1551 if (n) { 1552 prev = rb_entry(n, struct btrfs_free_space, 1553 offset_index); 1554 if (!prev->bitmap && 1555 prev->offset + prev->bytes > offset) 1556 entry = prev; 1557 } 1558 } 1559 return entry; 1560 } 1561 1562 if (!prev) 1563 return NULL; 1564 1565 /* find last entry before the 'offset' */ 1566 entry = prev; 1567 if (entry->offset > offset) { 1568 n = rb_prev(&entry->offset_index); 1569 if (n) { 1570 entry = rb_entry(n, struct btrfs_free_space, 1571 offset_index); 1572 ASSERT(entry->offset <= offset); 1573 } else { 1574 if (fuzzy) 1575 return entry; 1576 else 1577 return NULL; 1578 } 1579 } 1580 1581 if (entry->bitmap) { 1582 n = rb_prev(&entry->offset_index); 1583 if (n) { 1584 prev = rb_entry(n, struct btrfs_free_space, 1585 offset_index); 1586 if (!prev->bitmap && 1587 prev->offset + prev->bytes > offset) 1588 return prev; 1589 } 1590 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1591 return entry; 1592 } else if (entry->offset + entry->bytes > offset) 1593 return entry; 1594 1595 if (!fuzzy) 1596 return NULL; 1597 1598 while (1) { 1599 if (entry->bitmap) { 1600 if (entry->offset + BITS_PER_BITMAP * 1601 ctl->unit > offset) 1602 break; 1603 } else { 1604 if (entry->offset + entry->bytes > offset) 1605 break; 1606 } 1607 1608 n = rb_next(&entry->offset_index); 1609 if (!n) 1610 return NULL; 1611 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1612 } 1613 return entry; 1614 } 1615 1616 static inline void 1617 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1618 struct btrfs_free_space *info) 1619 { 1620 rb_erase(&info->offset_index, &ctl->free_space_offset); 1621 ctl->free_extents--; 1622 } 1623 1624 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1625 struct btrfs_free_space *info) 1626 { 1627 __unlink_free_space(ctl, info); 1628 ctl->free_space -= info->bytes; 1629 } 1630 1631 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1632 struct btrfs_free_space *info) 1633 { 1634 int ret = 0; 1635 1636 ASSERT(info->bytes || info->bitmap); 1637 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1638 &info->offset_index, (info->bitmap != NULL)); 1639 if (ret) 1640 return ret; 1641 1642 ctl->free_space += info->bytes; 1643 ctl->free_extents++; 1644 return ret; 1645 } 1646 1647 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1648 { 1649 struct btrfs_block_group_cache *block_group = ctl->private; 1650 u64 max_bytes; 1651 u64 bitmap_bytes; 1652 u64 extent_bytes; 1653 u64 size = block_group->key.offset; 1654 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1655 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1656 1657 max_bitmaps = max_t(u64, max_bitmaps, 1); 1658 1659 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1660 1661 /* 1662 * The goal is to keep the total amount of memory used per 1gb of space 1663 * at or below 32k, so we need to adjust how much memory we allow to be 1664 * used by extent based free space tracking 1665 */ 1666 if (size < SZ_1G) 1667 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1668 else 1669 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1670 1671 /* 1672 * we want to account for 1 more bitmap than what we have so we can make 1673 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1674 * we add more bitmaps. 1675 */ 1676 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1677 1678 if (bitmap_bytes >= max_bytes) { 1679 ctl->extents_thresh = 0; 1680 return; 1681 } 1682 1683 /* 1684 * we want the extent entry threshold to always be at most 1/2 the max 1685 * bytes we can have, or whatever is less than that. 1686 */ 1687 extent_bytes = max_bytes - bitmap_bytes; 1688 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1689 1690 ctl->extents_thresh = 1691 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1692 } 1693 1694 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1695 struct btrfs_free_space *info, 1696 u64 offset, u64 bytes) 1697 { 1698 unsigned long start, count; 1699 1700 start = offset_to_bit(info->offset, ctl->unit, offset); 1701 count = bytes_to_bits(bytes, ctl->unit); 1702 ASSERT(start + count <= BITS_PER_BITMAP); 1703 1704 bitmap_clear(info->bitmap, start, count); 1705 1706 info->bytes -= bytes; 1707 if (info->max_extent_size > ctl->unit) 1708 info->max_extent_size = 0; 1709 } 1710 1711 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1712 struct btrfs_free_space *info, u64 offset, 1713 u64 bytes) 1714 { 1715 __bitmap_clear_bits(ctl, info, offset, bytes); 1716 ctl->free_space -= bytes; 1717 } 1718 1719 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1720 struct btrfs_free_space *info, u64 offset, 1721 u64 bytes) 1722 { 1723 unsigned long start, count; 1724 1725 start = offset_to_bit(info->offset, ctl->unit, offset); 1726 count = bytes_to_bits(bytes, ctl->unit); 1727 ASSERT(start + count <= BITS_PER_BITMAP); 1728 1729 bitmap_set(info->bitmap, start, count); 1730 1731 info->bytes += bytes; 1732 ctl->free_space += bytes; 1733 } 1734 1735 /* 1736 * If we can not find suitable extent, we will use bytes to record 1737 * the size of the max extent. 1738 */ 1739 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1740 struct btrfs_free_space *bitmap_info, u64 *offset, 1741 u64 *bytes, bool for_alloc) 1742 { 1743 unsigned long found_bits = 0; 1744 unsigned long max_bits = 0; 1745 unsigned long bits, i; 1746 unsigned long next_zero; 1747 unsigned long extent_bits; 1748 1749 /* 1750 * Skip searching the bitmap if we don't have a contiguous section that 1751 * is large enough for this allocation. 1752 */ 1753 if (for_alloc && 1754 bitmap_info->max_extent_size && 1755 bitmap_info->max_extent_size < *bytes) { 1756 *bytes = bitmap_info->max_extent_size; 1757 return -1; 1758 } 1759 1760 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1761 max_t(u64, *offset, bitmap_info->offset)); 1762 bits = bytes_to_bits(*bytes, ctl->unit); 1763 1764 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1765 if (for_alloc && bits == 1) { 1766 found_bits = 1; 1767 break; 1768 } 1769 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1770 BITS_PER_BITMAP, i); 1771 extent_bits = next_zero - i; 1772 if (extent_bits >= bits) { 1773 found_bits = extent_bits; 1774 break; 1775 } else if (extent_bits > max_bits) { 1776 max_bits = extent_bits; 1777 } 1778 i = next_zero; 1779 } 1780 1781 if (found_bits) { 1782 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1783 *bytes = (u64)(found_bits) * ctl->unit; 1784 return 0; 1785 } 1786 1787 *bytes = (u64)(max_bits) * ctl->unit; 1788 bitmap_info->max_extent_size = *bytes; 1789 return -1; 1790 } 1791 1792 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1793 { 1794 if (entry->bitmap) 1795 return entry->max_extent_size; 1796 return entry->bytes; 1797 } 1798 1799 /* Cache the size of the max extent in bytes */ 1800 static struct btrfs_free_space * 1801 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1802 unsigned long align, u64 *max_extent_size) 1803 { 1804 struct btrfs_free_space *entry; 1805 struct rb_node *node; 1806 u64 tmp; 1807 u64 align_off; 1808 int ret; 1809 1810 if (!ctl->free_space_offset.rb_node) 1811 goto out; 1812 1813 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1814 if (!entry) 1815 goto out; 1816 1817 for (node = &entry->offset_index; node; node = rb_next(node)) { 1818 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1819 if (entry->bytes < *bytes) { 1820 *max_extent_size = max(get_max_extent_size(entry), 1821 *max_extent_size); 1822 continue; 1823 } 1824 1825 /* make sure the space returned is big enough 1826 * to match our requested alignment 1827 */ 1828 if (*bytes >= align) { 1829 tmp = entry->offset - ctl->start + align - 1; 1830 tmp = div64_u64(tmp, align); 1831 tmp = tmp * align + ctl->start; 1832 align_off = tmp - entry->offset; 1833 } else { 1834 align_off = 0; 1835 tmp = entry->offset; 1836 } 1837 1838 if (entry->bytes < *bytes + align_off) { 1839 *max_extent_size = max(get_max_extent_size(entry), 1840 *max_extent_size); 1841 continue; 1842 } 1843 1844 if (entry->bitmap) { 1845 u64 size = *bytes; 1846 1847 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1848 if (!ret) { 1849 *offset = tmp; 1850 *bytes = size; 1851 return entry; 1852 } else { 1853 *max_extent_size = 1854 max(get_max_extent_size(entry), 1855 *max_extent_size); 1856 } 1857 continue; 1858 } 1859 1860 *offset = tmp; 1861 *bytes = entry->bytes - align_off; 1862 return entry; 1863 } 1864 out: 1865 return NULL; 1866 } 1867 1868 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1869 struct btrfs_free_space *info, u64 offset) 1870 { 1871 info->offset = offset_to_bitmap(ctl, offset); 1872 info->bytes = 0; 1873 INIT_LIST_HEAD(&info->list); 1874 link_free_space(ctl, info); 1875 ctl->total_bitmaps++; 1876 1877 ctl->op->recalc_thresholds(ctl); 1878 } 1879 1880 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1881 struct btrfs_free_space *bitmap_info) 1882 { 1883 unlink_free_space(ctl, bitmap_info); 1884 kfree(bitmap_info->bitmap); 1885 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1886 ctl->total_bitmaps--; 1887 ctl->op->recalc_thresholds(ctl); 1888 } 1889 1890 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1891 struct btrfs_free_space *bitmap_info, 1892 u64 *offset, u64 *bytes) 1893 { 1894 u64 end; 1895 u64 search_start, search_bytes; 1896 int ret; 1897 1898 again: 1899 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1900 1901 /* 1902 * We need to search for bits in this bitmap. We could only cover some 1903 * of the extent in this bitmap thanks to how we add space, so we need 1904 * to search for as much as it as we can and clear that amount, and then 1905 * go searching for the next bit. 1906 */ 1907 search_start = *offset; 1908 search_bytes = ctl->unit; 1909 search_bytes = min(search_bytes, end - search_start + 1); 1910 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1911 false); 1912 if (ret < 0 || search_start != *offset) 1913 return -EINVAL; 1914 1915 /* We may have found more bits than what we need */ 1916 search_bytes = min(search_bytes, *bytes); 1917 1918 /* Cannot clear past the end of the bitmap */ 1919 search_bytes = min(search_bytes, end - search_start + 1); 1920 1921 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1922 *offset += search_bytes; 1923 *bytes -= search_bytes; 1924 1925 if (*bytes) { 1926 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1927 if (!bitmap_info->bytes) 1928 free_bitmap(ctl, bitmap_info); 1929 1930 /* 1931 * no entry after this bitmap, but we still have bytes to 1932 * remove, so something has gone wrong. 1933 */ 1934 if (!next) 1935 return -EINVAL; 1936 1937 bitmap_info = rb_entry(next, struct btrfs_free_space, 1938 offset_index); 1939 1940 /* 1941 * if the next entry isn't a bitmap we need to return to let the 1942 * extent stuff do its work. 1943 */ 1944 if (!bitmap_info->bitmap) 1945 return -EAGAIN; 1946 1947 /* 1948 * Ok the next item is a bitmap, but it may not actually hold 1949 * the information for the rest of this free space stuff, so 1950 * look for it, and if we don't find it return so we can try 1951 * everything over again. 1952 */ 1953 search_start = *offset; 1954 search_bytes = ctl->unit; 1955 ret = search_bitmap(ctl, bitmap_info, &search_start, 1956 &search_bytes, false); 1957 if (ret < 0 || search_start != *offset) 1958 return -EAGAIN; 1959 1960 goto again; 1961 } else if (!bitmap_info->bytes) 1962 free_bitmap(ctl, bitmap_info); 1963 1964 return 0; 1965 } 1966 1967 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1968 struct btrfs_free_space *info, u64 offset, 1969 u64 bytes) 1970 { 1971 u64 bytes_to_set = 0; 1972 u64 end; 1973 1974 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1975 1976 bytes_to_set = min(end - offset, bytes); 1977 1978 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1979 1980 /* 1981 * We set some bytes, we have no idea what the max extent size is 1982 * anymore. 1983 */ 1984 info->max_extent_size = 0; 1985 1986 return bytes_to_set; 1987 1988 } 1989 1990 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1991 struct btrfs_free_space *info) 1992 { 1993 struct btrfs_block_group_cache *block_group = ctl->private; 1994 struct btrfs_fs_info *fs_info = block_group->fs_info; 1995 bool forced = false; 1996 1997 #ifdef CONFIG_BTRFS_DEBUG 1998 if (btrfs_should_fragment_free_space(block_group)) 1999 forced = true; 2000 #endif 2001 2002 /* 2003 * If we are below the extents threshold then we can add this as an 2004 * extent, and don't have to deal with the bitmap 2005 */ 2006 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2007 /* 2008 * If this block group has some small extents we don't want to 2009 * use up all of our free slots in the cache with them, we want 2010 * to reserve them to larger extents, however if we have plenty 2011 * of cache left then go ahead an dadd them, no sense in adding 2012 * the overhead of a bitmap if we don't have to. 2013 */ 2014 if (info->bytes <= fs_info->sectorsize * 4) { 2015 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2016 return false; 2017 } else { 2018 return false; 2019 } 2020 } 2021 2022 /* 2023 * The original block groups from mkfs can be really small, like 8 2024 * megabytes, so don't bother with a bitmap for those entries. However 2025 * some block groups can be smaller than what a bitmap would cover but 2026 * are still large enough that they could overflow the 32k memory limit, 2027 * so allow those block groups to still be allowed to have a bitmap 2028 * entry. 2029 */ 2030 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2031 return false; 2032 2033 return true; 2034 } 2035 2036 static const struct btrfs_free_space_op free_space_op = { 2037 .recalc_thresholds = recalculate_thresholds, 2038 .use_bitmap = use_bitmap, 2039 }; 2040 2041 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2042 struct btrfs_free_space *info) 2043 { 2044 struct btrfs_free_space *bitmap_info; 2045 struct btrfs_block_group_cache *block_group = NULL; 2046 int added = 0; 2047 u64 bytes, offset, bytes_added; 2048 int ret; 2049 2050 bytes = info->bytes; 2051 offset = info->offset; 2052 2053 if (!ctl->op->use_bitmap(ctl, info)) 2054 return 0; 2055 2056 if (ctl->op == &free_space_op) 2057 block_group = ctl->private; 2058 again: 2059 /* 2060 * Since we link bitmaps right into the cluster we need to see if we 2061 * have a cluster here, and if so and it has our bitmap we need to add 2062 * the free space to that bitmap. 2063 */ 2064 if (block_group && !list_empty(&block_group->cluster_list)) { 2065 struct btrfs_free_cluster *cluster; 2066 struct rb_node *node; 2067 struct btrfs_free_space *entry; 2068 2069 cluster = list_entry(block_group->cluster_list.next, 2070 struct btrfs_free_cluster, 2071 block_group_list); 2072 spin_lock(&cluster->lock); 2073 node = rb_first(&cluster->root); 2074 if (!node) { 2075 spin_unlock(&cluster->lock); 2076 goto no_cluster_bitmap; 2077 } 2078 2079 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2080 if (!entry->bitmap) { 2081 spin_unlock(&cluster->lock); 2082 goto no_cluster_bitmap; 2083 } 2084 2085 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2086 bytes_added = add_bytes_to_bitmap(ctl, entry, 2087 offset, bytes); 2088 bytes -= bytes_added; 2089 offset += bytes_added; 2090 } 2091 spin_unlock(&cluster->lock); 2092 if (!bytes) { 2093 ret = 1; 2094 goto out; 2095 } 2096 } 2097 2098 no_cluster_bitmap: 2099 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2100 1, 0); 2101 if (!bitmap_info) { 2102 ASSERT(added == 0); 2103 goto new_bitmap; 2104 } 2105 2106 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2107 bytes -= bytes_added; 2108 offset += bytes_added; 2109 added = 0; 2110 2111 if (!bytes) { 2112 ret = 1; 2113 goto out; 2114 } else 2115 goto again; 2116 2117 new_bitmap: 2118 if (info && info->bitmap) { 2119 add_new_bitmap(ctl, info, offset); 2120 added = 1; 2121 info = NULL; 2122 goto again; 2123 } else { 2124 spin_unlock(&ctl->tree_lock); 2125 2126 /* no pre-allocated info, allocate a new one */ 2127 if (!info) { 2128 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2129 GFP_NOFS); 2130 if (!info) { 2131 spin_lock(&ctl->tree_lock); 2132 ret = -ENOMEM; 2133 goto out; 2134 } 2135 } 2136 2137 /* allocate the bitmap */ 2138 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2139 spin_lock(&ctl->tree_lock); 2140 if (!info->bitmap) { 2141 ret = -ENOMEM; 2142 goto out; 2143 } 2144 goto again; 2145 } 2146 2147 out: 2148 if (info) { 2149 kfree(info->bitmap); 2150 kmem_cache_free(btrfs_free_space_cachep, info); 2151 } 2152 2153 return ret; 2154 } 2155 2156 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2157 struct btrfs_free_space *info, bool update_stat) 2158 { 2159 struct btrfs_free_space *left_info; 2160 struct btrfs_free_space *right_info; 2161 bool merged = false; 2162 u64 offset = info->offset; 2163 u64 bytes = info->bytes; 2164 2165 /* 2166 * first we want to see if there is free space adjacent to the range we 2167 * are adding, if there is remove that struct and add a new one to 2168 * cover the entire range 2169 */ 2170 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2171 if (right_info && rb_prev(&right_info->offset_index)) 2172 left_info = rb_entry(rb_prev(&right_info->offset_index), 2173 struct btrfs_free_space, offset_index); 2174 else 2175 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2176 2177 if (right_info && !right_info->bitmap) { 2178 if (update_stat) 2179 unlink_free_space(ctl, right_info); 2180 else 2181 __unlink_free_space(ctl, right_info); 2182 info->bytes += right_info->bytes; 2183 kmem_cache_free(btrfs_free_space_cachep, right_info); 2184 merged = true; 2185 } 2186 2187 if (left_info && !left_info->bitmap && 2188 left_info->offset + left_info->bytes == offset) { 2189 if (update_stat) 2190 unlink_free_space(ctl, left_info); 2191 else 2192 __unlink_free_space(ctl, left_info); 2193 info->offset = left_info->offset; 2194 info->bytes += left_info->bytes; 2195 kmem_cache_free(btrfs_free_space_cachep, left_info); 2196 merged = true; 2197 } 2198 2199 return merged; 2200 } 2201 2202 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2203 struct btrfs_free_space *info, 2204 bool update_stat) 2205 { 2206 struct btrfs_free_space *bitmap; 2207 unsigned long i; 2208 unsigned long j; 2209 const u64 end = info->offset + info->bytes; 2210 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2211 u64 bytes; 2212 2213 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2214 if (!bitmap) 2215 return false; 2216 2217 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2218 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2219 if (j == i) 2220 return false; 2221 bytes = (j - i) * ctl->unit; 2222 info->bytes += bytes; 2223 2224 if (update_stat) 2225 bitmap_clear_bits(ctl, bitmap, end, bytes); 2226 else 2227 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2228 2229 if (!bitmap->bytes) 2230 free_bitmap(ctl, bitmap); 2231 2232 return true; 2233 } 2234 2235 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2236 struct btrfs_free_space *info, 2237 bool update_stat) 2238 { 2239 struct btrfs_free_space *bitmap; 2240 u64 bitmap_offset; 2241 unsigned long i; 2242 unsigned long j; 2243 unsigned long prev_j; 2244 u64 bytes; 2245 2246 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2247 /* If we're on a boundary, try the previous logical bitmap. */ 2248 if (bitmap_offset == info->offset) { 2249 if (info->offset == 0) 2250 return false; 2251 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2252 } 2253 2254 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2255 if (!bitmap) 2256 return false; 2257 2258 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2259 j = 0; 2260 prev_j = (unsigned long)-1; 2261 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2262 if (j > i) 2263 break; 2264 prev_j = j; 2265 } 2266 if (prev_j == i) 2267 return false; 2268 2269 if (prev_j == (unsigned long)-1) 2270 bytes = (i + 1) * ctl->unit; 2271 else 2272 bytes = (i - prev_j) * ctl->unit; 2273 2274 info->offset -= bytes; 2275 info->bytes += bytes; 2276 2277 if (update_stat) 2278 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2279 else 2280 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2281 2282 if (!bitmap->bytes) 2283 free_bitmap(ctl, bitmap); 2284 2285 return true; 2286 } 2287 2288 /* 2289 * We prefer always to allocate from extent entries, both for clustered and 2290 * non-clustered allocation requests. So when attempting to add a new extent 2291 * entry, try to see if there's adjacent free space in bitmap entries, and if 2292 * there is, migrate that space from the bitmaps to the extent. 2293 * Like this we get better chances of satisfying space allocation requests 2294 * because we attempt to satisfy them based on a single cache entry, and never 2295 * on 2 or more entries - even if the entries represent a contiguous free space 2296 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2297 * ends). 2298 */ 2299 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2300 struct btrfs_free_space *info, 2301 bool update_stat) 2302 { 2303 /* 2304 * Only work with disconnected entries, as we can change their offset, 2305 * and must be extent entries. 2306 */ 2307 ASSERT(!info->bitmap); 2308 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2309 2310 if (ctl->total_bitmaps > 0) { 2311 bool stole_end; 2312 bool stole_front = false; 2313 2314 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2315 if (ctl->total_bitmaps > 0) 2316 stole_front = steal_from_bitmap_to_front(ctl, info, 2317 update_stat); 2318 2319 if (stole_end || stole_front) 2320 try_merge_free_space(ctl, info, update_stat); 2321 } 2322 } 2323 2324 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2325 struct btrfs_free_space_ctl *ctl, 2326 u64 offset, u64 bytes) 2327 { 2328 struct btrfs_free_space *info; 2329 int ret = 0; 2330 2331 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2332 if (!info) 2333 return -ENOMEM; 2334 2335 info->offset = offset; 2336 info->bytes = bytes; 2337 RB_CLEAR_NODE(&info->offset_index); 2338 2339 spin_lock(&ctl->tree_lock); 2340 2341 if (try_merge_free_space(ctl, info, true)) 2342 goto link; 2343 2344 /* 2345 * There was no extent directly to the left or right of this new 2346 * extent then we know we're going to have to allocate a new extent, so 2347 * before we do that see if we need to drop this into a bitmap 2348 */ 2349 ret = insert_into_bitmap(ctl, info); 2350 if (ret < 0) { 2351 goto out; 2352 } else if (ret) { 2353 ret = 0; 2354 goto out; 2355 } 2356 link: 2357 /* 2358 * Only steal free space from adjacent bitmaps if we're sure we're not 2359 * going to add the new free space to existing bitmap entries - because 2360 * that would mean unnecessary work that would be reverted. Therefore 2361 * attempt to steal space from bitmaps if we're adding an extent entry. 2362 */ 2363 steal_from_bitmap(ctl, info, true); 2364 2365 ret = link_free_space(ctl, info); 2366 if (ret) 2367 kmem_cache_free(btrfs_free_space_cachep, info); 2368 out: 2369 spin_unlock(&ctl->tree_lock); 2370 2371 if (ret) { 2372 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2373 ASSERT(ret != -EEXIST); 2374 } 2375 2376 return ret; 2377 } 2378 2379 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2380 u64 offset, u64 bytes) 2381 { 2382 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2383 struct btrfs_free_space *info; 2384 int ret; 2385 bool re_search = false; 2386 2387 spin_lock(&ctl->tree_lock); 2388 2389 again: 2390 ret = 0; 2391 if (!bytes) 2392 goto out_lock; 2393 2394 info = tree_search_offset(ctl, offset, 0, 0); 2395 if (!info) { 2396 /* 2397 * oops didn't find an extent that matched the space we wanted 2398 * to remove, look for a bitmap instead 2399 */ 2400 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2401 1, 0); 2402 if (!info) { 2403 /* 2404 * If we found a partial bit of our free space in a 2405 * bitmap but then couldn't find the other part this may 2406 * be a problem, so WARN about it. 2407 */ 2408 WARN_ON(re_search); 2409 goto out_lock; 2410 } 2411 } 2412 2413 re_search = false; 2414 if (!info->bitmap) { 2415 unlink_free_space(ctl, info); 2416 if (offset == info->offset) { 2417 u64 to_free = min(bytes, info->bytes); 2418 2419 info->bytes -= to_free; 2420 info->offset += to_free; 2421 if (info->bytes) { 2422 ret = link_free_space(ctl, info); 2423 WARN_ON(ret); 2424 } else { 2425 kmem_cache_free(btrfs_free_space_cachep, info); 2426 } 2427 2428 offset += to_free; 2429 bytes -= to_free; 2430 goto again; 2431 } else { 2432 u64 old_end = info->bytes + info->offset; 2433 2434 info->bytes = offset - info->offset; 2435 ret = link_free_space(ctl, info); 2436 WARN_ON(ret); 2437 if (ret) 2438 goto out_lock; 2439 2440 /* Not enough bytes in this entry to satisfy us */ 2441 if (old_end < offset + bytes) { 2442 bytes -= old_end - offset; 2443 offset = old_end; 2444 goto again; 2445 } else if (old_end == offset + bytes) { 2446 /* all done */ 2447 goto out_lock; 2448 } 2449 spin_unlock(&ctl->tree_lock); 2450 2451 ret = btrfs_add_free_space(block_group, offset + bytes, 2452 old_end - (offset + bytes)); 2453 WARN_ON(ret); 2454 goto out; 2455 } 2456 } 2457 2458 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2459 if (ret == -EAGAIN) { 2460 re_search = true; 2461 goto again; 2462 } 2463 out_lock: 2464 spin_unlock(&ctl->tree_lock); 2465 out: 2466 return ret; 2467 } 2468 2469 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2470 u64 bytes) 2471 { 2472 struct btrfs_fs_info *fs_info = block_group->fs_info; 2473 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2474 struct btrfs_free_space *info; 2475 struct rb_node *n; 2476 int count = 0; 2477 2478 spin_lock(&ctl->tree_lock); 2479 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2480 info = rb_entry(n, struct btrfs_free_space, offset_index); 2481 if (info->bytes >= bytes && !block_group->ro) 2482 count++; 2483 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2484 info->offset, info->bytes, 2485 (info->bitmap) ? "yes" : "no"); 2486 } 2487 spin_unlock(&ctl->tree_lock); 2488 btrfs_info(fs_info, "block group has cluster?: %s", 2489 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2490 btrfs_info(fs_info, 2491 "%d blocks of free space at or bigger than bytes is", count); 2492 } 2493 2494 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2495 { 2496 struct btrfs_fs_info *fs_info = block_group->fs_info; 2497 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2498 2499 spin_lock_init(&ctl->tree_lock); 2500 ctl->unit = fs_info->sectorsize; 2501 ctl->start = block_group->key.objectid; 2502 ctl->private = block_group; 2503 ctl->op = &free_space_op; 2504 INIT_LIST_HEAD(&ctl->trimming_ranges); 2505 mutex_init(&ctl->cache_writeout_mutex); 2506 2507 /* 2508 * we only want to have 32k of ram per block group for keeping 2509 * track of free space, and if we pass 1/2 of that we want to 2510 * start converting things over to using bitmaps 2511 */ 2512 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2513 } 2514 2515 /* 2516 * for a given cluster, put all of its extents back into the free 2517 * space cache. If the block group passed doesn't match the block group 2518 * pointed to by the cluster, someone else raced in and freed the 2519 * cluster already. In that case, we just return without changing anything 2520 */ 2521 static int 2522 __btrfs_return_cluster_to_free_space( 2523 struct btrfs_block_group_cache *block_group, 2524 struct btrfs_free_cluster *cluster) 2525 { 2526 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2527 struct btrfs_free_space *entry; 2528 struct rb_node *node; 2529 2530 spin_lock(&cluster->lock); 2531 if (cluster->block_group != block_group) 2532 goto out; 2533 2534 cluster->block_group = NULL; 2535 cluster->window_start = 0; 2536 list_del_init(&cluster->block_group_list); 2537 2538 node = rb_first(&cluster->root); 2539 while (node) { 2540 bool bitmap; 2541 2542 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2543 node = rb_next(&entry->offset_index); 2544 rb_erase(&entry->offset_index, &cluster->root); 2545 RB_CLEAR_NODE(&entry->offset_index); 2546 2547 bitmap = (entry->bitmap != NULL); 2548 if (!bitmap) { 2549 try_merge_free_space(ctl, entry, false); 2550 steal_from_bitmap(ctl, entry, false); 2551 } 2552 tree_insert_offset(&ctl->free_space_offset, 2553 entry->offset, &entry->offset_index, bitmap); 2554 } 2555 cluster->root = RB_ROOT; 2556 2557 out: 2558 spin_unlock(&cluster->lock); 2559 btrfs_put_block_group(block_group); 2560 return 0; 2561 } 2562 2563 static void __btrfs_remove_free_space_cache_locked( 2564 struct btrfs_free_space_ctl *ctl) 2565 { 2566 struct btrfs_free_space *info; 2567 struct rb_node *node; 2568 2569 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2570 info = rb_entry(node, struct btrfs_free_space, offset_index); 2571 if (!info->bitmap) { 2572 unlink_free_space(ctl, info); 2573 kmem_cache_free(btrfs_free_space_cachep, info); 2574 } else { 2575 free_bitmap(ctl, info); 2576 } 2577 2578 cond_resched_lock(&ctl->tree_lock); 2579 } 2580 } 2581 2582 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2583 { 2584 spin_lock(&ctl->tree_lock); 2585 __btrfs_remove_free_space_cache_locked(ctl); 2586 spin_unlock(&ctl->tree_lock); 2587 } 2588 2589 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2590 { 2591 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2592 struct btrfs_free_cluster *cluster; 2593 struct list_head *head; 2594 2595 spin_lock(&ctl->tree_lock); 2596 while ((head = block_group->cluster_list.next) != 2597 &block_group->cluster_list) { 2598 cluster = list_entry(head, struct btrfs_free_cluster, 2599 block_group_list); 2600 2601 WARN_ON(cluster->block_group != block_group); 2602 __btrfs_return_cluster_to_free_space(block_group, cluster); 2603 2604 cond_resched_lock(&ctl->tree_lock); 2605 } 2606 __btrfs_remove_free_space_cache_locked(ctl); 2607 spin_unlock(&ctl->tree_lock); 2608 2609 } 2610 2611 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2612 u64 offset, u64 bytes, u64 empty_size, 2613 u64 *max_extent_size) 2614 { 2615 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2616 struct btrfs_free_space *entry = NULL; 2617 u64 bytes_search = bytes + empty_size; 2618 u64 ret = 0; 2619 u64 align_gap = 0; 2620 u64 align_gap_len = 0; 2621 2622 spin_lock(&ctl->tree_lock); 2623 entry = find_free_space(ctl, &offset, &bytes_search, 2624 block_group->full_stripe_len, max_extent_size); 2625 if (!entry) 2626 goto out; 2627 2628 ret = offset; 2629 if (entry->bitmap) { 2630 bitmap_clear_bits(ctl, entry, offset, bytes); 2631 if (!entry->bytes) 2632 free_bitmap(ctl, entry); 2633 } else { 2634 unlink_free_space(ctl, entry); 2635 align_gap_len = offset - entry->offset; 2636 align_gap = entry->offset; 2637 2638 entry->offset = offset + bytes; 2639 WARN_ON(entry->bytes < bytes + align_gap_len); 2640 2641 entry->bytes -= bytes + align_gap_len; 2642 if (!entry->bytes) 2643 kmem_cache_free(btrfs_free_space_cachep, entry); 2644 else 2645 link_free_space(ctl, entry); 2646 } 2647 out: 2648 spin_unlock(&ctl->tree_lock); 2649 2650 if (align_gap_len) 2651 __btrfs_add_free_space(block_group->fs_info, ctl, 2652 align_gap, align_gap_len); 2653 return ret; 2654 } 2655 2656 /* 2657 * given a cluster, put all of its extents back into the free space 2658 * cache. If a block group is passed, this function will only free 2659 * a cluster that belongs to the passed block group. 2660 * 2661 * Otherwise, it'll get a reference on the block group pointed to by the 2662 * cluster and remove the cluster from it. 2663 */ 2664 int btrfs_return_cluster_to_free_space( 2665 struct btrfs_block_group_cache *block_group, 2666 struct btrfs_free_cluster *cluster) 2667 { 2668 struct btrfs_free_space_ctl *ctl; 2669 int ret; 2670 2671 /* first, get a safe pointer to the block group */ 2672 spin_lock(&cluster->lock); 2673 if (!block_group) { 2674 block_group = cluster->block_group; 2675 if (!block_group) { 2676 spin_unlock(&cluster->lock); 2677 return 0; 2678 } 2679 } else if (cluster->block_group != block_group) { 2680 /* someone else has already freed it don't redo their work */ 2681 spin_unlock(&cluster->lock); 2682 return 0; 2683 } 2684 atomic_inc(&block_group->count); 2685 spin_unlock(&cluster->lock); 2686 2687 ctl = block_group->free_space_ctl; 2688 2689 /* now return any extents the cluster had on it */ 2690 spin_lock(&ctl->tree_lock); 2691 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2692 spin_unlock(&ctl->tree_lock); 2693 2694 /* finally drop our ref */ 2695 btrfs_put_block_group(block_group); 2696 return ret; 2697 } 2698 2699 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2700 struct btrfs_free_cluster *cluster, 2701 struct btrfs_free_space *entry, 2702 u64 bytes, u64 min_start, 2703 u64 *max_extent_size) 2704 { 2705 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2706 int err; 2707 u64 search_start = cluster->window_start; 2708 u64 search_bytes = bytes; 2709 u64 ret = 0; 2710 2711 search_start = min_start; 2712 search_bytes = bytes; 2713 2714 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2715 if (err) { 2716 *max_extent_size = max(get_max_extent_size(entry), 2717 *max_extent_size); 2718 return 0; 2719 } 2720 2721 ret = search_start; 2722 __bitmap_clear_bits(ctl, entry, ret, bytes); 2723 2724 return ret; 2725 } 2726 2727 /* 2728 * given a cluster, try to allocate 'bytes' from it, returns 0 2729 * if it couldn't find anything suitably large, or a logical disk offset 2730 * if things worked out 2731 */ 2732 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2733 struct btrfs_free_cluster *cluster, u64 bytes, 2734 u64 min_start, u64 *max_extent_size) 2735 { 2736 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2737 struct btrfs_free_space *entry = NULL; 2738 struct rb_node *node; 2739 u64 ret = 0; 2740 2741 spin_lock(&cluster->lock); 2742 if (bytes > cluster->max_size) 2743 goto out; 2744 2745 if (cluster->block_group != block_group) 2746 goto out; 2747 2748 node = rb_first(&cluster->root); 2749 if (!node) 2750 goto out; 2751 2752 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2753 while (1) { 2754 if (entry->bytes < bytes) 2755 *max_extent_size = max(get_max_extent_size(entry), 2756 *max_extent_size); 2757 2758 if (entry->bytes < bytes || 2759 (!entry->bitmap && entry->offset < min_start)) { 2760 node = rb_next(&entry->offset_index); 2761 if (!node) 2762 break; 2763 entry = rb_entry(node, struct btrfs_free_space, 2764 offset_index); 2765 continue; 2766 } 2767 2768 if (entry->bitmap) { 2769 ret = btrfs_alloc_from_bitmap(block_group, 2770 cluster, entry, bytes, 2771 cluster->window_start, 2772 max_extent_size); 2773 if (ret == 0) { 2774 node = rb_next(&entry->offset_index); 2775 if (!node) 2776 break; 2777 entry = rb_entry(node, struct btrfs_free_space, 2778 offset_index); 2779 continue; 2780 } 2781 cluster->window_start += bytes; 2782 } else { 2783 ret = entry->offset; 2784 2785 entry->offset += bytes; 2786 entry->bytes -= bytes; 2787 } 2788 2789 if (entry->bytes == 0) 2790 rb_erase(&entry->offset_index, &cluster->root); 2791 break; 2792 } 2793 out: 2794 spin_unlock(&cluster->lock); 2795 2796 if (!ret) 2797 return 0; 2798 2799 spin_lock(&ctl->tree_lock); 2800 2801 ctl->free_space -= bytes; 2802 if (entry->bytes == 0) { 2803 ctl->free_extents--; 2804 if (entry->bitmap) { 2805 kfree(entry->bitmap); 2806 ctl->total_bitmaps--; 2807 ctl->op->recalc_thresholds(ctl); 2808 } 2809 kmem_cache_free(btrfs_free_space_cachep, entry); 2810 } 2811 2812 spin_unlock(&ctl->tree_lock); 2813 2814 return ret; 2815 } 2816 2817 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2818 struct btrfs_free_space *entry, 2819 struct btrfs_free_cluster *cluster, 2820 u64 offset, u64 bytes, 2821 u64 cont1_bytes, u64 min_bytes) 2822 { 2823 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2824 unsigned long next_zero; 2825 unsigned long i; 2826 unsigned long want_bits; 2827 unsigned long min_bits; 2828 unsigned long found_bits; 2829 unsigned long max_bits = 0; 2830 unsigned long start = 0; 2831 unsigned long total_found = 0; 2832 int ret; 2833 2834 i = offset_to_bit(entry->offset, ctl->unit, 2835 max_t(u64, offset, entry->offset)); 2836 want_bits = bytes_to_bits(bytes, ctl->unit); 2837 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2838 2839 /* 2840 * Don't bother looking for a cluster in this bitmap if it's heavily 2841 * fragmented. 2842 */ 2843 if (entry->max_extent_size && 2844 entry->max_extent_size < cont1_bytes) 2845 return -ENOSPC; 2846 again: 2847 found_bits = 0; 2848 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2849 next_zero = find_next_zero_bit(entry->bitmap, 2850 BITS_PER_BITMAP, i); 2851 if (next_zero - i >= min_bits) { 2852 found_bits = next_zero - i; 2853 if (found_bits > max_bits) 2854 max_bits = found_bits; 2855 break; 2856 } 2857 if (next_zero - i > max_bits) 2858 max_bits = next_zero - i; 2859 i = next_zero; 2860 } 2861 2862 if (!found_bits) { 2863 entry->max_extent_size = (u64)max_bits * ctl->unit; 2864 return -ENOSPC; 2865 } 2866 2867 if (!total_found) { 2868 start = i; 2869 cluster->max_size = 0; 2870 } 2871 2872 total_found += found_bits; 2873 2874 if (cluster->max_size < found_bits * ctl->unit) 2875 cluster->max_size = found_bits * ctl->unit; 2876 2877 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2878 i = next_zero + 1; 2879 goto again; 2880 } 2881 2882 cluster->window_start = start * ctl->unit + entry->offset; 2883 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2884 ret = tree_insert_offset(&cluster->root, entry->offset, 2885 &entry->offset_index, 1); 2886 ASSERT(!ret); /* -EEXIST; Logic error */ 2887 2888 trace_btrfs_setup_cluster(block_group, cluster, 2889 total_found * ctl->unit, 1); 2890 return 0; 2891 } 2892 2893 /* 2894 * This searches the block group for just extents to fill the cluster with. 2895 * Try to find a cluster with at least bytes total bytes, at least one 2896 * extent of cont1_bytes, and other clusters of at least min_bytes. 2897 */ 2898 static noinline int 2899 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2900 struct btrfs_free_cluster *cluster, 2901 struct list_head *bitmaps, u64 offset, u64 bytes, 2902 u64 cont1_bytes, u64 min_bytes) 2903 { 2904 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2905 struct btrfs_free_space *first = NULL; 2906 struct btrfs_free_space *entry = NULL; 2907 struct btrfs_free_space *last; 2908 struct rb_node *node; 2909 u64 window_free; 2910 u64 max_extent; 2911 u64 total_size = 0; 2912 2913 entry = tree_search_offset(ctl, offset, 0, 1); 2914 if (!entry) 2915 return -ENOSPC; 2916 2917 /* 2918 * We don't want bitmaps, so just move along until we find a normal 2919 * extent entry. 2920 */ 2921 while (entry->bitmap || entry->bytes < min_bytes) { 2922 if (entry->bitmap && list_empty(&entry->list)) 2923 list_add_tail(&entry->list, bitmaps); 2924 node = rb_next(&entry->offset_index); 2925 if (!node) 2926 return -ENOSPC; 2927 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2928 } 2929 2930 window_free = entry->bytes; 2931 max_extent = entry->bytes; 2932 first = entry; 2933 last = entry; 2934 2935 for (node = rb_next(&entry->offset_index); node; 2936 node = rb_next(&entry->offset_index)) { 2937 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2938 2939 if (entry->bitmap) { 2940 if (list_empty(&entry->list)) 2941 list_add_tail(&entry->list, bitmaps); 2942 continue; 2943 } 2944 2945 if (entry->bytes < min_bytes) 2946 continue; 2947 2948 last = entry; 2949 window_free += entry->bytes; 2950 if (entry->bytes > max_extent) 2951 max_extent = entry->bytes; 2952 } 2953 2954 if (window_free < bytes || max_extent < cont1_bytes) 2955 return -ENOSPC; 2956 2957 cluster->window_start = first->offset; 2958 2959 node = &first->offset_index; 2960 2961 /* 2962 * now we've found our entries, pull them out of the free space 2963 * cache and put them into the cluster rbtree 2964 */ 2965 do { 2966 int ret; 2967 2968 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2969 node = rb_next(&entry->offset_index); 2970 if (entry->bitmap || entry->bytes < min_bytes) 2971 continue; 2972 2973 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2974 ret = tree_insert_offset(&cluster->root, entry->offset, 2975 &entry->offset_index, 0); 2976 total_size += entry->bytes; 2977 ASSERT(!ret); /* -EEXIST; Logic error */ 2978 } while (node && entry != last); 2979 2980 cluster->max_size = max_extent; 2981 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2982 return 0; 2983 } 2984 2985 /* 2986 * This specifically looks for bitmaps that may work in the cluster, we assume 2987 * that we have already failed to find extents that will work. 2988 */ 2989 static noinline int 2990 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2991 struct btrfs_free_cluster *cluster, 2992 struct list_head *bitmaps, u64 offset, u64 bytes, 2993 u64 cont1_bytes, u64 min_bytes) 2994 { 2995 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2996 struct btrfs_free_space *entry = NULL; 2997 int ret = -ENOSPC; 2998 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2999 3000 if (ctl->total_bitmaps == 0) 3001 return -ENOSPC; 3002 3003 /* 3004 * The bitmap that covers offset won't be in the list unless offset 3005 * is just its start offset. 3006 */ 3007 if (!list_empty(bitmaps)) 3008 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3009 3010 if (!entry || entry->offset != bitmap_offset) { 3011 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3012 if (entry && list_empty(&entry->list)) 3013 list_add(&entry->list, bitmaps); 3014 } 3015 3016 list_for_each_entry(entry, bitmaps, list) { 3017 if (entry->bytes < bytes) 3018 continue; 3019 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3020 bytes, cont1_bytes, min_bytes); 3021 if (!ret) 3022 return 0; 3023 } 3024 3025 /* 3026 * The bitmaps list has all the bitmaps that record free space 3027 * starting after offset, so no more search is required. 3028 */ 3029 return -ENOSPC; 3030 } 3031 3032 /* 3033 * here we try to find a cluster of blocks in a block group. The goal 3034 * is to find at least bytes+empty_size. 3035 * We might not find them all in one contiguous area. 3036 * 3037 * returns zero and sets up cluster if things worked out, otherwise 3038 * it returns -enospc 3039 */ 3040 int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group, 3041 struct btrfs_free_cluster *cluster, 3042 u64 offset, u64 bytes, u64 empty_size) 3043 { 3044 struct btrfs_fs_info *fs_info = block_group->fs_info; 3045 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3046 struct btrfs_free_space *entry, *tmp; 3047 LIST_HEAD(bitmaps); 3048 u64 min_bytes; 3049 u64 cont1_bytes; 3050 int ret; 3051 3052 /* 3053 * Choose the minimum extent size we'll require for this 3054 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3055 * For metadata, allow allocates with smaller extents. For 3056 * data, keep it dense. 3057 */ 3058 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3059 cont1_bytes = min_bytes = bytes + empty_size; 3060 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3061 cont1_bytes = bytes; 3062 min_bytes = fs_info->sectorsize; 3063 } else { 3064 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3065 min_bytes = fs_info->sectorsize; 3066 } 3067 3068 spin_lock(&ctl->tree_lock); 3069 3070 /* 3071 * If we know we don't have enough space to make a cluster don't even 3072 * bother doing all the work to try and find one. 3073 */ 3074 if (ctl->free_space < bytes) { 3075 spin_unlock(&ctl->tree_lock); 3076 return -ENOSPC; 3077 } 3078 3079 spin_lock(&cluster->lock); 3080 3081 /* someone already found a cluster, hooray */ 3082 if (cluster->block_group) { 3083 ret = 0; 3084 goto out; 3085 } 3086 3087 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3088 min_bytes); 3089 3090 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3091 bytes + empty_size, 3092 cont1_bytes, min_bytes); 3093 if (ret) 3094 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3095 offset, bytes + empty_size, 3096 cont1_bytes, min_bytes); 3097 3098 /* Clear our temporary list */ 3099 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3100 list_del_init(&entry->list); 3101 3102 if (!ret) { 3103 atomic_inc(&block_group->count); 3104 list_add_tail(&cluster->block_group_list, 3105 &block_group->cluster_list); 3106 cluster->block_group = block_group; 3107 } else { 3108 trace_btrfs_failed_cluster_setup(block_group); 3109 } 3110 out: 3111 spin_unlock(&cluster->lock); 3112 spin_unlock(&ctl->tree_lock); 3113 3114 return ret; 3115 } 3116 3117 /* 3118 * simple code to zero out a cluster 3119 */ 3120 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3121 { 3122 spin_lock_init(&cluster->lock); 3123 spin_lock_init(&cluster->refill_lock); 3124 cluster->root = RB_ROOT; 3125 cluster->max_size = 0; 3126 cluster->fragmented = false; 3127 INIT_LIST_HEAD(&cluster->block_group_list); 3128 cluster->block_group = NULL; 3129 } 3130 3131 static int do_trimming(struct btrfs_block_group_cache *block_group, 3132 u64 *total_trimmed, u64 start, u64 bytes, 3133 u64 reserved_start, u64 reserved_bytes, 3134 struct btrfs_trim_range *trim_entry) 3135 { 3136 struct btrfs_space_info *space_info = block_group->space_info; 3137 struct btrfs_fs_info *fs_info = block_group->fs_info; 3138 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3139 int ret; 3140 int update = 0; 3141 u64 trimmed = 0; 3142 3143 spin_lock(&space_info->lock); 3144 spin_lock(&block_group->lock); 3145 if (!block_group->ro) { 3146 block_group->reserved += reserved_bytes; 3147 space_info->bytes_reserved += reserved_bytes; 3148 update = 1; 3149 } 3150 spin_unlock(&block_group->lock); 3151 spin_unlock(&space_info->lock); 3152 3153 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3154 if (!ret) 3155 *total_trimmed += trimmed; 3156 3157 mutex_lock(&ctl->cache_writeout_mutex); 3158 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3159 list_del(&trim_entry->list); 3160 mutex_unlock(&ctl->cache_writeout_mutex); 3161 3162 if (update) { 3163 spin_lock(&space_info->lock); 3164 spin_lock(&block_group->lock); 3165 if (block_group->ro) 3166 space_info->bytes_readonly += reserved_bytes; 3167 block_group->reserved -= reserved_bytes; 3168 space_info->bytes_reserved -= reserved_bytes; 3169 spin_unlock(&block_group->lock); 3170 spin_unlock(&space_info->lock); 3171 } 3172 3173 return ret; 3174 } 3175 3176 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3177 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3178 { 3179 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3180 struct btrfs_free_space *entry; 3181 struct rb_node *node; 3182 int ret = 0; 3183 u64 extent_start; 3184 u64 extent_bytes; 3185 u64 bytes; 3186 3187 while (start < end) { 3188 struct btrfs_trim_range trim_entry; 3189 3190 mutex_lock(&ctl->cache_writeout_mutex); 3191 spin_lock(&ctl->tree_lock); 3192 3193 if (ctl->free_space < minlen) { 3194 spin_unlock(&ctl->tree_lock); 3195 mutex_unlock(&ctl->cache_writeout_mutex); 3196 break; 3197 } 3198 3199 entry = tree_search_offset(ctl, start, 0, 1); 3200 if (!entry) { 3201 spin_unlock(&ctl->tree_lock); 3202 mutex_unlock(&ctl->cache_writeout_mutex); 3203 break; 3204 } 3205 3206 /* skip bitmaps */ 3207 while (entry->bitmap) { 3208 node = rb_next(&entry->offset_index); 3209 if (!node) { 3210 spin_unlock(&ctl->tree_lock); 3211 mutex_unlock(&ctl->cache_writeout_mutex); 3212 goto out; 3213 } 3214 entry = rb_entry(node, struct btrfs_free_space, 3215 offset_index); 3216 } 3217 3218 if (entry->offset >= end) { 3219 spin_unlock(&ctl->tree_lock); 3220 mutex_unlock(&ctl->cache_writeout_mutex); 3221 break; 3222 } 3223 3224 extent_start = entry->offset; 3225 extent_bytes = entry->bytes; 3226 start = max(start, extent_start); 3227 bytes = min(extent_start + extent_bytes, end) - start; 3228 if (bytes < minlen) { 3229 spin_unlock(&ctl->tree_lock); 3230 mutex_unlock(&ctl->cache_writeout_mutex); 3231 goto next; 3232 } 3233 3234 unlink_free_space(ctl, entry); 3235 kmem_cache_free(btrfs_free_space_cachep, entry); 3236 3237 spin_unlock(&ctl->tree_lock); 3238 trim_entry.start = extent_start; 3239 trim_entry.bytes = extent_bytes; 3240 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3241 mutex_unlock(&ctl->cache_writeout_mutex); 3242 3243 ret = do_trimming(block_group, total_trimmed, start, bytes, 3244 extent_start, extent_bytes, &trim_entry); 3245 if (ret) 3246 break; 3247 next: 3248 start += bytes; 3249 3250 if (fatal_signal_pending(current)) { 3251 ret = -ERESTARTSYS; 3252 break; 3253 } 3254 3255 cond_resched(); 3256 } 3257 out: 3258 return ret; 3259 } 3260 3261 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3262 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3263 { 3264 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3265 struct btrfs_free_space *entry; 3266 int ret = 0; 3267 int ret2; 3268 u64 bytes; 3269 u64 offset = offset_to_bitmap(ctl, start); 3270 3271 while (offset < end) { 3272 bool next_bitmap = false; 3273 struct btrfs_trim_range trim_entry; 3274 3275 mutex_lock(&ctl->cache_writeout_mutex); 3276 spin_lock(&ctl->tree_lock); 3277 3278 if (ctl->free_space < minlen) { 3279 spin_unlock(&ctl->tree_lock); 3280 mutex_unlock(&ctl->cache_writeout_mutex); 3281 break; 3282 } 3283 3284 entry = tree_search_offset(ctl, offset, 1, 0); 3285 if (!entry) { 3286 spin_unlock(&ctl->tree_lock); 3287 mutex_unlock(&ctl->cache_writeout_mutex); 3288 next_bitmap = true; 3289 goto next; 3290 } 3291 3292 bytes = minlen; 3293 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3294 if (ret2 || start >= end) { 3295 spin_unlock(&ctl->tree_lock); 3296 mutex_unlock(&ctl->cache_writeout_mutex); 3297 next_bitmap = true; 3298 goto next; 3299 } 3300 3301 bytes = min(bytes, end - start); 3302 if (bytes < minlen) { 3303 spin_unlock(&ctl->tree_lock); 3304 mutex_unlock(&ctl->cache_writeout_mutex); 3305 goto next; 3306 } 3307 3308 bitmap_clear_bits(ctl, entry, start, bytes); 3309 if (entry->bytes == 0) 3310 free_bitmap(ctl, entry); 3311 3312 spin_unlock(&ctl->tree_lock); 3313 trim_entry.start = start; 3314 trim_entry.bytes = bytes; 3315 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3316 mutex_unlock(&ctl->cache_writeout_mutex); 3317 3318 ret = do_trimming(block_group, total_trimmed, start, bytes, 3319 start, bytes, &trim_entry); 3320 if (ret) 3321 break; 3322 next: 3323 if (next_bitmap) { 3324 offset += BITS_PER_BITMAP * ctl->unit; 3325 } else { 3326 start += bytes; 3327 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3328 offset += BITS_PER_BITMAP * ctl->unit; 3329 } 3330 3331 if (fatal_signal_pending(current)) { 3332 ret = -ERESTARTSYS; 3333 break; 3334 } 3335 3336 cond_resched(); 3337 } 3338 3339 return ret; 3340 } 3341 3342 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3343 { 3344 atomic_inc(&cache->trimming); 3345 } 3346 3347 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3348 { 3349 struct btrfs_fs_info *fs_info = block_group->fs_info; 3350 struct extent_map_tree *em_tree; 3351 struct extent_map *em; 3352 bool cleanup; 3353 3354 spin_lock(&block_group->lock); 3355 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3356 block_group->removed); 3357 spin_unlock(&block_group->lock); 3358 3359 if (cleanup) { 3360 mutex_lock(&fs_info->chunk_mutex); 3361 em_tree = &fs_info->mapping_tree; 3362 write_lock(&em_tree->lock); 3363 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3364 1); 3365 BUG_ON(!em); /* logic error, can't happen */ 3366 remove_extent_mapping(em_tree, em); 3367 write_unlock(&em_tree->lock); 3368 mutex_unlock(&fs_info->chunk_mutex); 3369 3370 /* once for us and once for the tree */ 3371 free_extent_map(em); 3372 free_extent_map(em); 3373 3374 /* 3375 * We've left one free space entry and other tasks trimming 3376 * this block group have left 1 entry each one. Free them. 3377 */ 3378 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3379 } 3380 } 3381 3382 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3383 u64 *trimmed, u64 start, u64 end, u64 minlen) 3384 { 3385 int ret; 3386 3387 *trimmed = 0; 3388 3389 spin_lock(&block_group->lock); 3390 if (block_group->removed) { 3391 spin_unlock(&block_group->lock); 3392 return 0; 3393 } 3394 btrfs_get_block_group_trimming(block_group); 3395 spin_unlock(&block_group->lock); 3396 3397 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3398 if (ret) 3399 goto out; 3400 3401 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3402 out: 3403 btrfs_put_block_group_trimming(block_group); 3404 return ret; 3405 } 3406 3407 /* 3408 * Find the left-most item in the cache tree, and then return the 3409 * smallest inode number in the item. 3410 * 3411 * Note: the returned inode number may not be the smallest one in 3412 * the tree, if the left-most item is a bitmap. 3413 */ 3414 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3415 { 3416 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3417 struct btrfs_free_space *entry = NULL; 3418 u64 ino = 0; 3419 3420 spin_lock(&ctl->tree_lock); 3421 3422 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3423 goto out; 3424 3425 entry = rb_entry(rb_first(&ctl->free_space_offset), 3426 struct btrfs_free_space, offset_index); 3427 3428 if (!entry->bitmap) { 3429 ino = entry->offset; 3430 3431 unlink_free_space(ctl, entry); 3432 entry->offset++; 3433 entry->bytes--; 3434 if (!entry->bytes) 3435 kmem_cache_free(btrfs_free_space_cachep, entry); 3436 else 3437 link_free_space(ctl, entry); 3438 } else { 3439 u64 offset = 0; 3440 u64 count = 1; 3441 int ret; 3442 3443 ret = search_bitmap(ctl, entry, &offset, &count, true); 3444 /* Logic error; Should be empty if it can't find anything */ 3445 ASSERT(!ret); 3446 3447 ino = offset; 3448 bitmap_clear_bits(ctl, entry, offset, 1); 3449 if (entry->bytes == 0) 3450 free_bitmap(ctl, entry); 3451 } 3452 out: 3453 spin_unlock(&ctl->tree_lock); 3454 3455 return ino; 3456 } 3457 3458 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3459 struct btrfs_path *path) 3460 { 3461 struct inode *inode = NULL; 3462 3463 spin_lock(&root->ino_cache_lock); 3464 if (root->ino_cache_inode) 3465 inode = igrab(root->ino_cache_inode); 3466 spin_unlock(&root->ino_cache_lock); 3467 if (inode) 3468 return inode; 3469 3470 inode = __lookup_free_space_inode(root, path, 0); 3471 if (IS_ERR(inode)) 3472 return inode; 3473 3474 spin_lock(&root->ino_cache_lock); 3475 if (!btrfs_fs_closing(root->fs_info)) 3476 root->ino_cache_inode = igrab(inode); 3477 spin_unlock(&root->ino_cache_lock); 3478 3479 return inode; 3480 } 3481 3482 int create_free_ino_inode(struct btrfs_root *root, 3483 struct btrfs_trans_handle *trans, 3484 struct btrfs_path *path) 3485 { 3486 return __create_free_space_inode(root, trans, path, 3487 BTRFS_FREE_INO_OBJECTID, 0); 3488 } 3489 3490 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3491 { 3492 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3493 struct btrfs_path *path; 3494 struct inode *inode; 3495 int ret = 0; 3496 u64 root_gen = btrfs_root_generation(&root->root_item); 3497 3498 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3499 return 0; 3500 3501 /* 3502 * If we're unmounting then just return, since this does a search on the 3503 * normal root and not the commit root and we could deadlock. 3504 */ 3505 if (btrfs_fs_closing(fs_info)) 3506 return 0; 3507 3508 path = btrfs_alloc_path(); 3509 if (!path) 3510 return 0; 3511 3512 inode = lookup_free_ino_inode(root, path); 3513 if (IS_ERR(inode)) 3514 goto out; 3515 3516 if (root_gen != BTRFS_I(inode)->generation) 3517 goto out_put; 3518 3519 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3520 3521 if (ret < 0) 3522 btrfs_err(fs_info, 3523 "failed to load free ino cache for root %llu", 3524 root->root_key.objectid); 3525 out_put: 3526 iput(inode); 3527 out: 3528 btrfs_free_path(path); 3529 return ret; 3530 } 3531 3532 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3533 struct btrfs_trans_handle *trans, 3534 struct btrfs_path *path, 3535 struct inode *inode) 3536 { 3537 struct btrfs_fs_info *fs_info = root->fs_info; 3538 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3539 int ret; 3540 struct btrfs_io_ctl io_ctl; 3541 bool release_metadata = true; 3542 3543 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3544 return 0; 3545 3546 memset(&io_ctl, 0, sizeof(io_ctl)); 3547 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3548 if (!ret) { 3549 /* 3550 * At this point writepages() didn't error out, so our metadata 3551 * reservation is released when the writeback finishes, at 3552 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3553 * with or without an error. 3554 */ 3555 release_metadata = false; 3556 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3557 } 3558 3559 if (ret) { 3560 if (release_metadata) 3561 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3562 inode->i_size, true); 3563 #ifdef DEBUG 3564 btrfs_err(fs_info, 3565 "failed to write free ino cache for root %llu", 3566 root->root_key.objectid); 3567 #endif 3568 } 3569 3570 return ret; 3571 } 3572 3573 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3574 /* 3575 * Use this if you need to make a bitmap or extent entry specifically, it 3576 * doesn't do any of the merging that add_free_space does, this acts a lot like 3577 * how the free space cache loading stuff works, so you can get really weird 3578 * configurations. 3579 */ 3580 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3581 u64 offset, u64 bytes, bool bitmap) 3582 { 3583 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3584 struct btrfs_free_space *info = NULL, *bitmap_info; 3585 void *map = NULL; 3586 u64 bytes_added; 3587 int ret; 3588 3589 again: 3590 if (!info) { 3591 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3592 if (!info) 3593 return -ENOMEM; 3594 } 3595 3596 if (!bitmap) { 3597 spin_lock(&ctl->tree_lock); 3598 info->offset = offset; 3599 info->bytes = bytes; 3600 info->max_extent_size = 0; 3601 ret = link_free_space(ctl, info); 3602 spin_unlock(&ctl->tree_lock); 3603 if (ret) 3604 kmem_cache_free(btrfs_free_space_cachep, info); 3605 return ret; 3606 } 3607 3608 if (!map) { 3609 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3610 if (!map) { 3611 kmem_cache_free(btrfs_free_space_cachep, info); 3612 return -ENOMEM; 3613 } 3614 } 3615 3616 spin_lock(&ctl->tree_lock); 3617 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3618 1, 0); 3619 if (!bitmap_info) { 3620 info->bitmap = map; 3621 map = NULL; 3622 add_new_bitmap(ctl, info, offset); 3623 bitmap_info = info; 3624 info = NULL; 3625 } 3626 3627 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3628 3629 bytes -= bytes_added; 3630 offset += bytes_added; 3631 spin_unlock(&ctl->tree_lock); 3632 3633 if (bytes) 3634 goto again; 3635 3636 if (info) 3637 kmem_cache_free(btrfs_free_space_cachep, info); 3638 kfree(map); 3639 return 0; 3640 } 3641 3642 /* 3643 * Checks to see if the given range is in the free space cache. This is really 3644 * just used to check the absence of space, so if there is free space in the 3645 * range at all we will return 1. 3646 */ 3647 int test_check_exists(struct btrfs_block_group_cache *cache, 3648 u64 offset, u64 bytes) 3649 { 3650 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3651 struct btrfs_free_space *info; 3652 int ret = 0; 3653 3654 spin_lock(&ctl->tree_lock); 3655 info = tree_search_offset(ctl, offset, 0, 0); 3656 if (!info) { 3657 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3658 1, 0); 3659 if (!info) 3660 goto out; 3661 } 3662 3663 have_info: 3664 if (info->bitmap) { 3665 u64 bit_off, bit_bytes; 3666 struct rb_node *n; 3667 struct btrfs_free_space *tmp; 3668 3669 bit_off = offset; 3670 bit_bytes = ctl->unit; 3671 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3672 if (!ret) { 3673 if (bit_off == offset) { 3674 ret = 1; 3675 goto out; 3676 } else if (bit_off > offset && 3677 offset + bytes > bit_off) { 3678 ret = 1; 3679 goto out; 3680 } 3681 } 3682 3683 n = rb_prev(&info->offset_index); 3684 while (n) { 3685 tmp = rb_entry(n, struct btrfs_free_space, 3686 offset_index); 3687 if (tmp->offset + tmp->bytes < offset) 3688 break; 3689 if (offset + bytes < tmp->offset) { 3690 n = rb_prev(&tmp->offset_index); 3691 continue; 3692 } 3693 info = tmp; 3694 goto have_info; 3695 } 3696 3697 n = rb_next(&info->offset_index); 3698 while (n) { 3699 tmp = rb_entry(n, struct btrfs_free_space, 3700 offset_index); 3701 if (offset + bytes < tmp->offset) 3702 break; 3703 if (tmp->offset + tmp->bytes < offset) { 3704 n = rb_next(&tmp->offset_index); 3705 continue; 3706 } 3707 info = tmp; 3708 goto have_info; 3709 } 3710 3711 ret = 0; 3712 goto out; 3713 } 3714 3715 if (info->offset == offset) { 3716 ret = 1; 3717 goto out; 3718 } 3719 3720 if (offset > info->offset && offset < info->offset + info->bytes) 3721 ret = 1; 3722 out: 3723 spin_unlock(&ctl->tree_lock); 3724 return ret; 3725 } 3726 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3727