1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "inode-map.h" 20 #include "volumes.h" 21 22 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 23 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 24 25 struct btrfs_trim_range { 26 u64 start; 27 u64 bytes; 28 struct list_head list; 29 }; 30 31 static int link_free_space(struct btrfs_free_space_ctl *ctl, 32 struct btrfs_free_space *info); 33 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 34 struct btrfs_free_space *info); 35 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 36 struct btrfs_trans_handle *trans, 37 struct btrfs_io_ctl *io_ctl, 38 struct btrfs_path *path); 39 40 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 41 struct btrfs_path *path, 42 u64 offset) 43 { 44 struct btrfs_fs_info *fs_info = root->fs_info; 45 struct btrfs_key key; 46 struct btrfs_key location; 47 struct btrfs_disk_key disk_key; 48 struct btrfs_free_space_header *header; 49 struct extent_buffer *leaf; 50 struct inode *inode = NULL; 51 unsigned nofs_flag; 52 int ret; 53 54 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 55 key.offset = offset; 56 key.type = 0; 57 58 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 59 if (ret < 0) 60 return ERR_PTR(ret); 61 if (ret > 0) { 62 btrfs_release_path(path); 63 return ERR_PTR(-ENOENT); 64 } 65 66 leaf = path->nodes[0]; 67 header = btrfs_item_ptr(leaf, path->slots[0], 68 struct btrfs_free_space_header); 69 btrfs_free_space_key(leaf, header, &disk_key); 70 btrfs_disk_key_to_cpu(&location, &disk_key); 71 btrfs_release_path(path); 72 73 /* 74 * We are often under a trans handle at this point, so we need to make 75 * sure NOFS is set to keep us from deadlocking. 76 */ 77 nofs_flag = memalloc_nofs_save(); 78 inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); 79 btrfs_release_path(path); 80 memalloc_nofs_restore(nofs_flag); 81 if (IS_ERR(inode)) 82 return inode; 83 84 mapping_set_gfp_mask(inode->i_mapping, 85 mapping_gfp_constraint(inode->i_mapping, 86 ~(__GFP_FS | __GFP_HIGHMEM))); 87 88 return inode; 89 } 90 91 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 92 struct btrfs_block_group_cache 93 *block_group, struct btrfs_path *path) 94 { 95 struct inode *inode = NULL; 96 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 97 98 spin_lock(&block_group->lock); 99 if (block_group->inode) 100 inode = igrab(block_group->inode); 101 spin_unlock(&block_group->lock); 102 if (inode) 103 return inode; 104 105 inode = __lookup_free_space_inode(fs_info->tree_root, path, 106 block_group->key.objectid); 107 if (IS_ERR(inode)) 108 return inode; 109 110 spin_lock(&block_group->lock); 111 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 112 btrfs_info(fs_info, "Old style space inode found, converting."); 113 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 114 BTRFS_INODE_NODATACOW; 115 block_group->disk_cache_state = BTRFS_DC_CLEAR; 116 } 117 118 if (!block_group->iref) { 119 block_group->inode = igrab(inode); 120 block_group->iref = 1; 121 } 122 spin_unlock(&block_group->lock); 123 124 return inode; 125 } 126 127 static int __create_free_space_inode(struct btrfs_root *root, 128 struct btrfs_trans_handle *trans, 129 struct btrfs_path *path, 130 u64 ino, u64 offset) 131 { 132 struct btrfs_key key; 133 struct btrfs_disk_key disk_key; 134 struct btrfs_free_space_header *header; 135 struct btrfs_inode_item *inode_item; 136 struct extent_buffer *leaf; 137 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 138 int ret; 139 140 ret = btrfs_insert_empty_inode(trans, root, path, ino); 141 if (ret) 142 return ret; 143 144 /* We inline crc's for the free disk space cache */ 145 if (ino != BTRFS_FREE_INO_OBJECTID) 146 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 147 148 leaf = path->nodes[0]; 149 inode_item = btrfs_item_ptr(leaf, path->slots[0], 150 struct btrfs_inode_item); 151 btrfs_item_key(leaf, &disk_key, path->slots[0]); 152 memzero_extent_buffer(leaf, (unsigned long)inode_item, 153 sizeof(*inode_item)); 154 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 155 btrfs_set_inode_size(leaf, inode_item, 0); 156 btrfs_set_inode_nbytes(leaf, inode_item, 0); 157 btrfs_set_inode_uid(leaf, inode_item, 0); 158 btrfs_set_inode_gid(leaf, inode_item, 0); 159 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 160 btrfs_set_inode_flags(leaf, inode_item, flags); 161 btrfs_set_inode_nlink(leaf, inode_item, 1); 162 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 163 btrfs_set_inode_block_group(leaf, inode_item, offset); 164 btrfs_mark_buffer_dirty(leaf); 165 btrfs_release_path(path); 166 167 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 168 key.offset = offset; 169 key.type = 0; 170 ret = btrfs_insert_empty_item(trans, root, path, &key, 171 sizeof(struct btrfs_free_space_header)); 172 if (ret < 0) { 173 btrfs_release_path(path); 174 return ret; 175 } 176 177 leaf = path->nodes[0]; 178 header = btrfs_item_ptr(leaf, path->slots[0], 179 struct btrfs_free_space_header); 180 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 181 btrfs_set_free_space_key(leaf, header, &disk_key); 182 btrfs_mark_buffer_dirty(leaf); 183 btrfs_release_path(path); 184 185 return 0; 186 } 187 188 int create_free_space_inode(struct btrfs_fs_info *fs_info, 189 struct btrfs_trans_handle *trans, 190 struct btrfs_block_group_cache *block_group, 191 struct btrfs_path *path) 192 { 193 int ret; 194 u64 ino; 195 196 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 197 if (ret < 0) 198 return ret; 199 200 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 201 block_group->key.objectid); 202 } 203 204 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 205 struct btrfs_block_rsv *rsv) 206 { 207 u64 needed_bytes; 208 int ret; 209 210 /* 1 for slack space, 1 for updating the inode */ 211 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 212 btrfs_calc_trans_metadata_size(fs_info, 1); 213 214 spin_lock(&rsv->lock); 215 if (rsv->reserved < needed_bytes) 216 ret = -ENOSPC; 217 else 218 ret = 0; 219 spin_unlock(&rsv->lock); 220 return ret; 221 } 222 223 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 224 struct btrfs_block_group_cache *block_group, 225 struct inode *inode) 226 { 227 struct btrfs_root *root = BTRFS_I(inode)->root; 228 int ret = 0; 229 bool locked = false; 230 231 if (block_group) { 232 struct btrfs_path *path = btrfs_alloc_path(); 233 234 if (!path) { 235 ret = -ENOMEM; 236 goto fail; 237 } 238 locked = true; 239 mutex_lock(&trans->transaction->cache_write_mutex); 240 if (!list_empty(&block_group->io_list)) { 241 list_del_init(&block_group->io_list); 242 243 btrfs_wait_cache_io(trans, block_group, path); 244 btrfs_put_block_group(block_group); 245 } 246 247 /* 248 * now that we've truncated the cache away, its no longer 249 * setup or written 250 */ 251 spin_lock(&block_group->lock); 252 block_group->disk_cache_state = BTRFS_DC_CLEAR; 253 spin_unlock(&block_group->lock); 254 btrfs_free_path(path); 255 } 256 257 btrfs_i_size_write(BTRFS_I(inode), 0); 258 truncate_pagecache(inode, 0); 259 260 /* 261 * We skip the throttling logic for free space cache inodes, so we don't 262 * need to check for -EAGAIN. 263 */ 264 ret = btrfs_truncate_inode_items(trans, root, inode, 265 0, BTRFS_EXTENT_DATA_KEY); 266 if (ret) 267 goto fail; 268 269 ret = btrfs_update_inode(trans, root, inode); 270 271 fail: 272 if (locked) 273 mutex_unlock(&trans->transaction->cache_write_mutex); 274 if (ret) 275 btrfs_abort_transaction(trans, ret); 276 277 return ret; 278 } 279 280 static void readahead_cache(struct inode *inode) 281 { 282 struct file_ra_state *ra; 283 unsigned long last_index; 284 285 ra = kzalloc(sizeof(*ra), GFP_NOFS); 286 if (!ra) 287 return; 288 289 file_ra_state_init(ra, inode->i_mapping); 290 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 291 292 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 293 294 kfree(ra); 295 } 296 297 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 298 int write) 299 { 300 int num_pages; 301 int check_crcs = 0; 302 303 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 304 305 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 306 check_crcs = 1; 307 308 /* Make sure we can fit our crcs and generation into the first page */ 309 if (write && check_crcs && 310 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 311 return -ENOSPC; 312 313 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 314 315 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 316 if (!io_ctl->pages) 317 return -ENOMEM; 318 319 io_ctl->num_pages = num_pages; 320 io_ctl->fs_info = btrfs_sb(inode->i_sb); 321 io_ctl->check_crcs = check_crcs; 322 io_ctl->inode = inode; 323 324 return 0; 325 } 326 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 327 328 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 329 { 330 kfree(io_ctl->pages); 331 io_ctl->pages = NULL; 332 } 333 334 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 335 { 336 if (io_ctl->cur) { 337 io_ctl->cur = NULL; 338 io_ctl->orig = NULL; 339 } 340 } 341 342 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 343 { 344 ASSERT(io_ctl->index < io_ctl->num_pages); 345 io_ctl->page = io_ctl->pages[io_ctl->index++]; 346 io_ctl->cur = page_address(io_ctl->page); 347 io_ctl->orig = io_ctl->cur; 348 io_ctl->size = PAGE_SIZE; 349 if (clear) 350 clear_page(io_ctl->cur); 351 } 352 353 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 354 { 355 int i; 356 357 io_ctl_unmap_page(io_ctl); 358 359 for (i = 0; i < io_ctl->num_pages; i++) { 360 if (io_ctl->pages[i]) { 361 ClearPageChecked(io_ctl->pages[i]); 362 unlock_page(io_ctl->pages[i]); 363 put_page(io_ctl->pages[i]); 364 } 365 } 366 } 367 368 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 369 int uptodate) 370 { 371 struct page *page; 372 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 373 int i; 374 375 for (i = 0; i < io_ctl->num_pages; i++) { 376 page = find_or_create_page(inode->i_mapping, i, mask); 377 if (!page) { 378 io_ctl_drop_pages(io_ctl); 379 return -ENOMEM; 380 } 381 io_ctl->pages[i] = page; 382 if (uptodate && !PageUptodate(page)) { 383 btrfs_readpage(NULL, page); 384 lock_page(page); 385 if (!PageUptodate(page)) { 386 btrfs_err(BTRFS_I(inode)->root->fs_info, 387 "error reading free space cache"); 388 io_ctl_drop_pages(io_ctl); 389 return -EIO; 390 } 391 } 392 } 393 394 for (i = 0; i < io_ctl->num_pages; i++) { 395 clear_page_dirty_for_io(io_ctl->pages[i]); 396 set_page_extent_mapped(io_ctl->pages[i]); 397 } 398 399 return 0; 400 } 401 402 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 403 { 404 __le64 *val; 405 406 io_ctl_map_page(io_ctl, 1); 407 408 /* 409 * Skip the csum areas. If we don't check crcs then we just have a 410 * 64bit chunk at the front of the first page. 411 */ 412 if (io_ctl->check_crcs) { 413 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 414 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 415 } else { 416 io_ctl->cur += sizeof(u64); 417 io_ctl->size -= sizeof(u64) * 2; 418 } 419 420 val = io_ctl->cur; 421 *val = cpu_to_le64(generation); 422 io_ctl->cur += sizeof(u64); 423 } 424 425 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 426 { 427 __le64 *gen; 428 429 /* 430 * Skip the crc area. If we don't check crcs then we just have a 64bit 431 * chunk at the front of the first page. 432 */ 433 if (io_ctl->check_crcs) { 434 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 435 io_ctl->size -= sizeof(u64) + 436 (sizeof(u32) * io_ctl->num_pages); 437 } else { 438 io_ctl->cur += sizeof(u64); 439 io_ctl->size -= sizeof(u64) * 2; 440 } 441 442 gen = io_ctl->cur; 443 if (le64_to_cpu(*gen) != generation) { 444 btrfs_err_rl(io_ctl->fs_info, 445 "space cache generation (%llu) does not match inode (%llu)", 446 *gen, generation); 447 io_ctl_unmap_page(io_ctl); 448 return -EIO; 449 } 450 io_ctl->cur += sizeof(u64); 451 return 0; 452 } 453 454 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 455 { 456 u32 *tmp; 457 u32 crc = ~(u32)0; 458 unsigned offset = 0; 459 460 if (!io_ctl->check_crcs) { 461 io_ctl_unmap_page(io_ctl); 462 return; 463 } 464 465 if (index == 0) 466 offset = sizeof(u32) * io_ctl->num_pages; 467 468 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 469 PAGE_SIZE - offset); 470 btrfs_csum_final(crc, (u8 *)&crc); 471 io_ctl_unmap_page(io_ctl); 472 tmp = page_address(io_ctl->pages[0]); 473 tmp += index; 474 *tmp = crc; 475 } 476 477 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 478 { 479 u32 *tmp, val; 480 u32 crc = ~(u32)0; 481 unsigned offset = 0; 482 483 if (!io_ctl->check_crcs) { 484 io_ctl_map_page(io_ctl, 0); 485 return 0; 486 } 487 488 if (index == 0) 489 offset = sizeof(u32) * io_ctl->num_pages; 490 491 tmp = page_address(io_ctl->pages[0]); 492 tmp += index; 493 val = *tmp; 494 495 io_ctl_map_page(io_ctl, 0); 496 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 497 PAGE_SIZE - offset); 498 btrfs_csum_final(crc, (u8 *)&crc); 499 if (val != crc) { 500 btrfs_err_rl(io_ctl->fs_info, 501 "csum mismatch on free space cache"); 502 io_ctl_unmap_page(io_ctl); 503 return -EIO; 504 } 505 506 return 0; 507 } 508 509 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 510 void *bitmap) 511 { 512 struct btrfs_free_space_entry *entry; 513 514 if (!io_ctl->cur) 515 return -ENOSPC; 516 517 entry = io_ctl->cur; 518 entry->offset = cpu_to_le64(offset); 519 entry->bytes = cpu_to_le64(bytes); 520 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 521 BTRFS_FREE_SPACE_EXTENT; 522 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 523 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 524 525 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 526 return 0; 527 528 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 529 530 /* No more pages to map */ 531 if (io_ctl->index >= io_ctl->num_pages) 532 return 0; 533 534 /* map the next page */ 535 io_ctl_map_page(io_ctl, 1); 536 return 0; 537 } 538 539 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 540 { 541 if (!io_ctl->cur) 542 return -ENOSPC; 543 544 /* 545 * If we aren't at the start of the current page, unmap this one and 546 * map the next one if there is any left. 547 */ 548 if (io_ctl->cur != io_ctl->orig) { 549 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 550 if (io_ctl->index >= io_ctl->num_pages) 551 return -ENOSPC; 552 io_ctl_map_page(io_ctl, 0); 553 } 554 555 copy_page(io_ctl->cur, bitmap); 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 557 if (io_ctl->index < io_ctl->num_pages) 558 io_ctl_map_page(io_ctl, 0); 559 return 0; 560 } 561 562 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 563 { 564 /* 565 * If we're not on the boundary we know we've modified the page and we 566 * need to crc the page. 567 */ 568 if (io_ctl->cur != io_ctl->orig) 569 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 570 else 571 io_ctl_unmap_page(io_ctl); 572 573 while (io_ctl->index < io_ctl->num_pages) { 574 io_ctl_map_page(io_ctl, 1); 575 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 576 } 577 } 578 579 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 580 struct btrfs_free_space *entry, u8 *type) 581 { 582 struct btrfs_free_space_entry *e; 583 int ret; 584 585 if (!io_ctl->cur) { 586 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 587 if (ret) 588 return ret; 589 } 590 591 e = io_ctl->cur; 592 entry->offset = le64_to_cpu(e->offset); 593 entry->bytes = le64_to_cpu(e->bytes); 594 *type = e->type; 595 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 596 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 597 598 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 599 return 0; 600 601 io_ctl_unmap_page(io_ctl); 602 603 return 0; 604 } 605 606 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 607 struct btrfs_free_space *entry) 608 { 609 int ret; 610 611 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 612 if (ret) 613 return ret; 614 615 copy_page(entry->bitmap, io_ctl->cur); 616 io_ctl_unmap_page(io_ctl); 617 618 return 0; 619 } 620 621 /* 622 * Since we attach pinned extents after the fact we can have contiguous sections 623 * of free space that are split up in entries. This poses a problem with the 624 * tree logging stuff since it could have allocated across what appears to be 2 625 * entries since we would have merged the entries when adding the pinned extents 626 * back to the free space cache. So run through the space cache that we just 627 * loaded and merge contiguous entries. This will make the log replay stuff not 628 * blow up and it will make for nicer allocator behavior. 629 */ 630 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 631 { 632 struct btrfs_free_space *e, *prev = NULL; 633 struct rb_node *n; 634 635 again: 636 spin_lock(&ctl->tree_lock); 637 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 638 e = rb_entry(n, struct btrfs_free_space, offset_index); 639 if (!prev) 640 goto next; 641 if (e->bitmap || prev->bitmap) 642 goto next; 643 if (prev->offset + prev->bytes == e->offset) { 644 unlink_free_space(ctl, prev); 645 unlink_free_space(ctl, e); 646 prev->bytes += e->bytes; 647 kmem_cache_free(btrfs_free_space_cachep, e); 648 link_free_space(ctl, prev); 649 prev = NULL; 650 spin_unlock(&ctl->tree_lock); 651 goto again; 652 } 653 next: 654 prev = e; 655 } 656 spin_unlock(&ctl->tree_lock); 657 } 658 659 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 660 struct btrfs_free_space_ctl *ctl, 661 struct btrfs_path *path, u64 offset) 662 { 663 struct btrfs_fs_info *fs_info = root->fs_info; 664 struct btrfs_free_space_header *header; 665 struct extent_buffer *leaf; 666 struct btrfs_io_ctl io_ctl; 667 struct btrfs_key key; 668 struct btrfs_free_space *e, *n; 669 LIST_HEAD(bitmaps); 670 u64 num_entries; 671 u64 num_bitmaps; 672 u64 generation; 673 u8 type; 674 int ret = 0; 675 676 /* Nothing in the space cache, goodbye */ 677 if (!i_size_read(inode)) 678 return 0; 679 680 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 681 key.offset = offset; 682 key.type = 0; 683 684 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 685 if (ret < 0) 686 return 0; 687 else if (ret > 0) { 688 btrfs_release_path(path); 689 return 0; 690 } 691 692 ret = -1; 693 694 leaf = path->nodes[0]; 695 header = btrfs_item_ptr(leaf, path->slots[0], 696 struct btrfs_free_space_header); 697 num_entries = btrfs_free_space_entries(leaf, header); 698 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 699 generation = btrfs_free_space_generation(leaf, header); 700 btrfs_release_path(path); 701 702 if (!BTRFS_I(inode)->generation) { 703 btrfs_info(fs_info, 704 "the free space cache file (%llu) is invalid, skip it", 705 offset); 706 return 0; 707 } 708 709 if (BTRFS_I(inode)->generation != generation) { 710 btrfs_err(fs_info, 711 "free space inode generation (%llu) did not match free space cache generation (%llu)", 712 BTRFS_I(inode)->generation, generation); 713 return 0; 714 } 715 716 if (!num_entries) 717 return 0; 718 719 ret = io_ctl_init(&io_ctl, inode, 0); 720 if (ret) 721 return ret; 722 723 readahead_cache(inode); 724 725 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 726 if (ret) 727 goto out; 728 729 ret = io_ctl_check_crc(&io_ctl, 0); 730 if (ret) 731 goto free_cache; 732 733 ret = io_ctl_check_generation(&io_ctl, generation); 734 if (ret) 735 goto free_cache; 736 737 while (num_entries) { 738 e = kmem_cache_zalloc(btrfs_free_space_cachep, 739 GFP_NOFS); 740 if (!e) 741 goto free_cache; 742 743 ret = io_ctl_read_entry(&io_ctl, e, &type); 744 if (ret) { 745 kmem_cache_free(btrfs_free_space_cachep, e); 746 goto free_cache; 747 } 748 749 if (!e->bytes) { 750 kmem_cache_free(btrfs_free_space_cachep, e); 751 goto free_cache; 752 } 753 754 if (type == BTRFS_FREE_SPACE_EXTENT) { 755 spin_lock(&ctl->tree_lock); 756 ret = link_free_space(ctl, e); 757 spin_unlock(&ctl->tree_lock); 758 if (ret) { 759 btrfs_err(fs_info, 760 "Duplicate entries in free space cache, dumping"); 761 kmem_cache_free(btrfs_free_space_cachep, e); 762 goto free_cache; 763 } 764 } else { 765 ASSERT(num_bitmaps); 766 num_bitmaps--; 767 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 768 if (!e->bitmap) { 769 kmem_cache_free( 770 btrfs_free_space_cachep, e); 771 goto free_cache; 772 } 773 spin_lock(&ctl->tree_lock); 774 ret = link_free_space(ctl, e); 775 ctl->total_bitmaps++; 776 ctl->op->recalc_thresholds(ctl); 777 spin_unlock(&ctl->tree_lock); 778 if (ret) { 779 btrfs_err(fs_info, 780 "Duplicate entries in free space cache, dumping"); 781 kmem_cache_free(btrfs_free_space_cachep, e); 782 goto free_cache; 783 } 784 list_add_tail(&e->list, &bitmaps); 785 } 786 787 num_entries--; 788 } 789 790 io_ctl_unmap_page(&io_ctl); 791 792 /* 793 * We add the bitmaps at the end of the entries in order that 794 * the bitmap entries are added to the cache. 795 */ 796 list_for_each_entry_safe(e, n, &bitmaps, list) { 797 list_del_init(&e->list); 798 ret = io_ctl_read_bitmap(&io_ctl, e); 799 if (ret) 800 goto free_cache; 801 } 802 803 io_ctl_drop_pages(&io_ctl); 804 merge_space_tree(ctl); 805 ret = 1; 806 out: 807 io_ctl_free(&io_ctl); 808 return ret; 809 free_cache: 810 io_ctl_drop_pages(&io_ctl); 811 __btrfs_remove_free_space_cache(ctl); 812 goto out; 813 } 814 815 int load_free_space_cache(struct btrfs_fs_info *fs_info, 816 struct btrfs_block_group_cache *block_group) 817 { 818 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 819 struct inode *inode; 820 struct btrfs_path *path; 821 int ret = 0; 822 bool matched; 823 u64 used = btrfs_block_group_used(&block_group->item); 824 825 /* 826 * If this block group has been marked to be cleared for one reason or 827 * another then we can't trust the on disk cache, so just return. 828 */ 829 spin_lock(&block_group->lock); 830 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 831 spin_unlock(&block_group->lock); 832 return 0; 833 } 834 spin_unlock(&block_group->lock); 835 836 path = btrfs_alloc_path(); 837 if (!path) 838 return 0; 839 path->search_commit_root = 1; 840 path->skip_locking = 1; 841 842 /* 843 * We must pass a path with search_commit_root set to btrfs_iget in 844 * order to avoid a deadlock when allocating extents for the tree root. 845 * 846 * When we are COWing an extent buffer from the tree root, when looking 847 * for a free extent, at extent-tree.c:find_free_extent(), we can find 848 * block group without its free space cache loaded. When we find one 849 * we must load its space cache which requires reading its free space 850 * cache's inode item from the root tree. If this inode item is located 851 * in the same leaf that we started COWing before, then we end up in 852 * deadlock on the extent buffer (trying to read lock it when we 853 * previously write locked it). 854 * 855 * It's safe to read the inode item using the commit root because 856 * block groups, once loaded, stay in memory forever (until they are 857 * removed) as well as their space caches once loaded. New block groups 858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 859 * we will never try to read their inode item while the fs is mounted. 860 */ 861 inode = lookup_free_space_inode(fs_info, block_group, path); 862 if (IS_ERR(inode)) { 863 btrfs_free_path(path); 864 return 0; 865 } 866 867 /* We may have converted the inode and made the cache invalid. */ 868 spin_lock(&block_group->lock); 869 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 870 spin_unlock(&block_group->lock); 871 btrfs_free_path(path); 872 goto out; 873 } 874 spin_unlock(&block_group->lock); 875 876 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 877 path, block_group->key.objectid); 878 btrfs_free_path(path); 879 if (ret <= 0) 880 goto out; 881 882 spin_lock(&ctl->tree_lock); 883 matched = (ctl->free_space == (block_group->key.offset - used - 884 block_group->bytes_super)); 885 spin_unlock(&ctl->tree_lock); 886 887 if (!matched) { 888 __btrfs_remove_free_space_cache(ctl); 889 btrfs_warn(fs_info, 890 "block group %llu has wrong amount of free space", 891 block_group->key.objectid); 892 ret = -1; 893 } 894 out: 895 if (ret < 0) { 896 /* This cache is bogus, make sure it gets cleared */ 897 spin_lock(&block_group->lock); 898 block_group->disk_cache_state = BTRFS_DC_CLEAR; 899 spin_unlock(&block_group->lock); 900 ret = 0; 901 902 btrfs_warn(fs_info, 903 "failed to load free space cache for block group %llu, rebuilding it now", 904 block_group->key.objectid); 905 } 906 907 iput(inode); 908 return ret; 909 } 910 911 static noinline_for_stack 912 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 913 struct btrfs_free_space_ctl *ctl, 914 struct btrfs_block_group_cache *block_group, 915 int *entries, int *bitmaps, 916 struct list_head *bitmap_list) 917 { 918 int ret; 919 struct btrfs_free_cluster *cluster = NULL; 920 struct btrfs_free_cluster *cluster_locked = NULL; 921 struct rb_node *node = rb_first(&ctl->free_space_offset); 922 struct btrfs_trim_range *trim_entry; 923 924 /* Get the cluster for this block_group if it exists */ 925 if (block_group && !list_empty(&block_group->cluster_list)) { 926 cluster = list_entry(block_group->cluster_list.next, 927 struct btrfs_free_cluster, 928 block_group_list); 929 } 930 931 if (!node && cluster) { 932 cluster_locked = cluster; 933 spin_lock(&cluster_locked->lock); 934 node = rb_first(&cluster->root); 935 cluster = NULL; 936 } 937 938 /* Write out the extent entries */ 939 while (node) { 940 struct btrfs_free_space *e; 941 942 e = rb_entry(node, struct btrfs_free_space, offset_index); 943 *entries += 1; 944 945 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 946 e->bitmap); 947 if (ret) 948 goto fail; 949 950 if (e->bitmap) { 951 list_add_tail(&e->list, bitmap_list); 952 *bitmaps += 1; 953 } 954 node = rb_next(node); 955 if (!node && cluster) { 956 node = rb_first(&cluster->root); 957 cluster_locked = cluster; 958 spin_lock(&cluster_locked->lock); 959 cluster = NULL; 960 } 961 } 962 if (cluster_locked) { 963 spin_unlock(&cluster_locked->lock); 964 cluster_locked = NULL; 965 } 966 967 /* 968 * Make sure we don't miss any range that was removed from our rbtree 969 * because trimming is running. Otherwise after a umount+mount (or crash 970 * after committing the transaction) we would leak free space and get 971 * an inconsistent free space cache report from fsck. 972 */ 973 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 974 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 975 trim_entry->bytes, NULL); 976 if (ret) 977 goto fail; 978 *entries += 1; 979 } 980 981 return 0; 982 fail: 983 if (cluster_locked) 984 spin_unlock(&cluster_locked->lock); 985 return -ENOSPC; 986 } 987 988 static noinline_for_stack int 989 update_cache_item(struct btrfs_trans_handle *trans, 990 struct btrfs_root *root, 991 struct inode *inode, 992 struct btrfs_path *path, u64 offset, 993 int entries, int bitmaps) 994 { 995 struct btrfs_key key; 996 struct btrfs_free_space_header *header; 997 struct extent_buffer *leaf; 998 int ret; 999 1000 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1001 key.offset = offset; 1002 key.type = 0; 1003 1004 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1005 if (ret < 0) { 1006 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1007 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1008 goto fail; 1009 } 1010 leaf = path->nodes[0]; 1011 if (ret > 0) { 1012 struct btrfs_key found_key; 1013 ASSERT(path->slots[0]); 1014 path->slots[0]--; 1015 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1016 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1017 found_key.offset != offset) { 1018 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1019 inode->i_size - 1, 1020 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1021 NULL); 1022 btrfs_release_path(path); 1023 goto fail; 1024 } 1025 } 1026 1027 BTRFS_I(inode)->generation = trans->transid; 1028 header = btrfs_item_ptr(leaf, path->slots[0], 1029 struct btrfs_free_space_header); 1030 btrfs_set_free_space_entries(leaf, header, entries); 1031 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1032 btrfs_set_free_space_generation(leaf, header, trans->transid); 1033 btrfs_mark_buffer_dirty(leaf); 1034 btrfs_release_path(path); 1035 1036 return 0; 1037 1038 fail: 1039 return -1; 1040 } 1041 1042 static noinline_for_stack int 1043 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1044 struct btrfs_block_group_cache *block_group, 1045 struct btrfs_io_ctl *io_ctl, 1046 int *entries) 1047 { 1048 u64 start, extent_start, extent_end, len; 1049 struct extent_io_tree *unpin = NULL; 1050 int ret; 1051 1052 if (!block_group) 1053 return 0; 1054 1055 /* 1056 * We want to add any pinned extents to our free space cache 1057 * so we don't leak the space 1058 * 1059 * We shouldn't have switched the pinned extents yet so this is the 1060 * right one 1061 */ 1062 unpin = fs_info->pinned_extents; 1063 1064 start = block_group->key.objectid; 1065 1066 while (start < block_group->key.objectid + block_group->key.offset) { 1067 ret = find_first_extent_bit(unpin, start, 1068 &extent_start, &extent_end, 1069 EXTENT_DIRTY, NULL); 1070 if (ret) 1071 return 0; 1072 1073 /* This pinned extent is out of our range */ 1074 if (extent_start >= block_group->key.objectid + 1075 block_group->key.offset) 1076 return 0; 1077 1078 extent_start = max(extent_start, start); 1079 extent_end = min(block_group->key.objectid + 1080 block_group->key.offset, extent_end + 1); 1081 len = extent_end - extent_start; 1082 1083 *entries += 1; 1084 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1085 if (ret) 1086 return -ENOSPC; 1087 1088 start = extent_end; 1089 } 1090 1091 return 0; 1092 } 1093 1094 static noinline_for_stack int 1095 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1096 { 1097 struct btrfs_free_space *entry, *next; 1098 int ret; 1099 1100 /* Write out the bitmaps */ 1101 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1102 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1103 if (ret) 1104 return -ENOSPC; 1105 list_del_init(&entry->list); 1106 } 1107 1108 return 0; 1109 } 1110 1111 static int flush_dirty_cache(struct inode *inode) 1112 { 1113 int ret; 1114 1115 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1116 if (ret) 1117 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1118 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1119 1120 return ret; 1121 } 1122 1123 static void noinline_for_stack 1124 cleanup_bitmap_list(struct list_head *bitmap_list) 1125 { 1126 struct btrfs_free_space *entry, *next; 1127 1128 list_for_each_entry_safe(entry, next, bitmap_list, list) 1129 list_del_init(&entry->list); 1130 } 1131 1132 static void noinline_for_stack 1133 cleanup_write_cache_enospc(struct inode *inode, 1134 struct btrfs_io_ctl *io_ctl, 1135 struct extent_state **cached_state) 1136 { 1137 io_ctl_drop_pages(io_ctl); 1138 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1139 i_size_read(inode) - 1, cached_state); 1140 } 1141 1142 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1143 struct btrfs_trans_handle *trans, 1144 struct btrfs_block_group_cache *block_group, 1145 struct btrfs_io_ctl *io_ctl, 1146 struct btrfs_path *path, u64 offset) 1147 { 1148 int ret; 1149 struct inode *inode = io_ctl->inode; 1150 1151 if (!inode) 1152 return 0; 1153 1154 /* Flush the dirty pages in the cache file. */ 1155 ret = flush_dirty_cache(inode); 1156 if (ret) 1157 goto out; 1158 1159 /* Update the cache item to tell everyone this cache file is valid. */ 1160 ret = update_cache_item(trans, root, inode, path, offset, 1161 io_ctl->entries, io_ctl->bitmaps); 1162 out: 1163 io_ctl_free(io_ctl); 1164 if (ret) { 1165 invalidate_inode_pages2(inode->i_mapping); 1166 BTRFS_I(inode)->generation = 0; 1167 if (block_group) { 1168 #ifdef DEBUG 1169 btrfs_err(root->fs_info, 1170 "failed to write free space cache for block group %llu", 1171 block_group->key.objectid); 1172 #endif 1173 } 1174 } 1175 btrfs_update_inode(trans, root, inode); 1176 1177 if (block_group) { 1178 /* the dirty list is protected by the dirty_bgs_lock */ 1179 spin_lock(&trans->transaction->dirty_bgs_lock); 1180 1181 /* the disk_cache_state is protected by the block group lock */ 1182 spin_lock(&block_group->lock); 1183 1184 /* 1185 * only mark this as written if we didn't get put back on 1186 * the dirty list while waiting for IO. Otherwise our 1187 * cache state won't be right, and we won't get written again 1188 */ 1189 if (!ret && list_empty(&block_group->dirty_list)) 1190 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1191 else if (ret) 1192 block_group->disk_cache_state = BTRFS_DC_ERROR; 1193 1194 spin_unlock(&block_group->lock); 1195 spin_unlock(&trans->transaction->dirty_bgs_lock); 1196 io_ctl->inode = NULL; 1197 iput(inode); 1198 } 1199 1200 return ret; 1201 1202 } 1203 1204 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1205 struct btrfs_trans_handle *trans, 1206 struct btrfs_io_ctl *io_ctl, 1207 struct btrfs_path *path) 1208 { 1209 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1210 } 1211 1212 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1213 struct btrfs_block_group_cache *block_group, 1214 struct btrfs_path *path) 1215 { 1216 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1217 block_group, &block_group->io_ctl, 1218 path, block_group->key.objectid); 1219 } 1220 1221 /** 1222 * __btrfs_write_out_cache - write out cached info to an inode 1223 * @root - the root the inode belongs to 1224 * @ctl - the free space cache we are going to write out 1225 * @block_group - the block_group for this cache if it belongs to a block_group 1226 * @trans - the trans handle 1227 * 1228 * This function writes out a free space cache struct to disk for quick recovery 1229 * on mount. This will return 0 if it was successful in writing the cache out, 1230 * or an errno if it was not. 1231 */ 1232 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1233 struct btrfs_free_space_ctl *ctl, 1234 struct btrfs_block_group_cache *block_group, 1235 struct btrfs_io_ctl *io_ctl, 1236 struct btrfs_trans_handle *trans) 1237 { 1238 struct btrfs_fs_info *fs_info = root->fs_info; 1239 struct extent_state *cached_state = NULL; 1240 LIST_HEAD(bitmap_list); 1241 int entries = 0; 1242 int bitmaps = 0; 1243 int ret; 1244 int must_iput = 0; 1245 1246 if (!i_size_read(inode)) 1247 return -EIO; 1248 1249 WARN_ON(io_ctl->pages); 1250 ret = io_ctl_init(io_ctl, inode, 1); 1251 if (ret) 1252 return ret; 1253 1254 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1255 down_write(&block_group->data_rwsem); 1256 spin_lock(&block_group->lock); 1257 if (block_group->delalloc_bytes) { 1258 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1259 spin_unlock(&block_group->lock); 1260 up_write(&block_group->data_rwsem); 1261 BTRFS_I(inode)->generation = 0; 1262 ret = 0; 1263 must_iput = 1; 1264 goto out; 1265 } 1266 spin_unlock(&block_group->lock); 1267 } 1268 1269 /* Lock all pages first so we can lock the extent safely. */ 1270 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1271 if (ret) 1272 goto out_unlock; 1273 1274 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1275 &cached_state); 1276 1277 io_ctl_set_generation(io_ctl, trans->transid); 1278 1279 mutex_lock(&ctl->cache_writeout_mutex); 1280 /* Write out the extent entries in the free space cache */ 1281 spin_lock(&ctl->tree_lock); 1282 ret = write_cache_extent_entries(io_ctl, ctl, 1283 block_group, &entries, &bitmaps, 1284 &bitmap_list); 1285 if (ret) 1286 goto out_nospc_locked; 1287 1288 /* 1289 * Some spaces that are freed in the current transaction are pinned, 1290 * they will be added into free space cache after the transaction is 1291 * committed, we shouldn't lose them. 1292 * 1293 * If this changes while we are working we'll get added back to 1294 * the dirty list and redo it. No locking needed 1295 */ 1296 ret = write_pinned_extent_entries(fs_info, block_group, 1297 io_ctl, &entries); 1298 if (ret) 1299 goto out_nospc_locked; 1300 1301 /* 1302 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1303 * locked while doing it because a concurrent trim can be manipulating 1304 * or freeing the bitmap. 1305 */ 1306 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1307 spin_unlock(&ctl->tree_lock); 1308 mutex_unlock(&ctl->cache_writeout_mutex); 1309 if (ret) 1310 goto out_nospc; 1311 1312 /* Zero out the rest of the pages just to make sure */ 1313 io_ctl_zero_remaining_pages(io_ctl); 1314 1315 /* Everything is written out, now we dirty the pages in the file. */ 1316 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1317 i_size_read(inode), &cached_state); 1318 if (ret) 1319 goto out_nospc; 1320 1321 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1322 up_write(&block_group->data_rwsem); 1323 /* 1324 * Release the pages and unlock the extent, we will flush 1325 * them out later 1326 */ 1327 io_ctl_drop_pages(io_ctl); 1328 1329 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1330 i_size_read(inode) - 1, &cached_state); 1331 1332 /* 1333 * at this point the pages are under IO and we're happy, 1334 * The caller is responsible for waiting on them and updating the 1335 * the cache and the inode 1336 */ 1337 io_ctl->entries = entries; 1338 io_ctl->bitmaps = bitmaps; 1339 1340 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1341 if (ret) 1342 goto out; 1343 1344 return 0; 1345 1346 out: 1347 io_ctl->inode = NULL; 1348 io_ctl_free(io_ctl); 1349 if (ret) { 1350 invalidate_inode_pages2(inode->i_mapping); 1351 BTRFS_I(inode)->generation = 0; 1352 } 1353 btrfs_update_inode(trans, root, inode); 1354 if (must_iput) 1355 iput(inode); 1356 return ret; 1357 1358 out_nospc_locked: 1359 cleanup_bitmap_list(&bitmap_list); 1360 spin_unlock(&ctl->tree_lock); 1361 mutex_unlock(&ctl->cache_writeout_mutex); 1362 1363 out_nospc: 1364 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1365 1366 out_unlock: 1367 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1368 up_write(&block_group->data_rwsem); 1369 1370 goto out; 1371 } 1372 1373 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1374 struct btrfs_trans_handle *trans, 1375 struct btrfs_block_group_cache *block_group, 1376 struct btrfs_path *path) 1377 { 1378 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1379 struct inode *inode; 1380 int ret = 0; 1381 1382 spin_lock(&block_group->lock); 1383 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1384 spin_unlock(&block_group->lock); 1385 return 0; 1386 } 1387 spin_unlock(&block_group->lock); 1388 1389 inode = lookup_free_space_inode(fs_info, block_group, path); 1390 if (IS_ERR(inode)) 1391 return 0; 1392 1393 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1394 block_group, &block_group->io_ctl, trans); 1395 if (ret) { 1396 #ifdef DEBUG 1397 btrfs_err(fs_info, 1398 "failed to write free space cache for block group %llu", 1399 block_group->key.objectid); 1400 #endif 1401 spin_lock(&block_group->lock); 1402 block_group->disk_cache_state = BTRFS_DC_ERROR; 1403 spin_unlock(&block_group->lock); 1404 1405 block_group->io_ctl.inode = NULL; 1406 iput(inode); 1407 } 1408 1409 /* 1410 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1411 * to wait for IO and put the inode 1412 */ 1413 1414 return ret; 1415 } 1416 1417 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1418 u64 offset) 1419 { 1420 ASSERT(offset >= bitmap_start); 1421 offset -= bitmap_start; 1422 return (unsigned long)(div_u64(offset, unit)); 1423 } 1424 1425 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1426 { 1427 return (unsigned long)(div_u64(bytes, unit)); 1428 } 1429 1430 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1431 u64 offset) 1432 { 1433 u64 bitmap_start; 1434 u64 bytes_per_bitmap; 1435 1436 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1437 bitmap_start = offset - ctl->start; 1438 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1439 bitmap_start *= bytes_per_bitmap; 1440 bitmap_start += ctl->start; 1441 1442 return bitmap_start; 1443 } 1444 1445 static int tree_insert_offset(struct rb_root *root, u64 offset, 1446 struct rb_node *node, int bitmap) 1447 { 1448 struct rb_node **p = &root->rb_node; 1449 struct rb_node *parent = NULL; 1450 struct btrfs_free_space *info; 1451 1452 while (*p) { 1453 parent = *p; 1454 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1455 1456 if (offset < info->offset) { 1457 p = &(*p)->rb_left; 1458 } else if (offset > info->offset) { 1459 p = &(*p)->rb_right; 1460 } else { 1461 /* 1462 * we could have a bitmap entry and an extent entry 1463 * share the same offset. If this is the case, we want 1464 * the extent entry to always be found first if we do a 1465 * linear search through the tree, since we want to have 1466 * the quickest allocation time, and allocating from an 1467 * extent is faster than allocating from a bitmap. So 1468 * if we're inserting a bitmap and we find an entry at 1469 * this offset, we want to go right, or after this entry 1470 * logically. If we are inserting an extent and we've 1471 * found a bitmap, we want to go left, or before 1472 * logically. 1473 */ 1474 if (bitmap) { 1475 if (info->bitmap) { 1476 WARN_ON_ONCE(1); 1477 return -EEXIST; 1478 } 1479 p = &(*p)->rb_right; 1480 } else { 1481 if (!info->bitmap) { 1482 WARN_ON_ONCE(1); 1483 return -EEXIST; 1484 } 1485 p = &(*p)->rb_left; 1486 } 1487 } 1488 } 1489 1490 rb_link_node(node, parent, p); 1491 rb_insert_color(node, root); 1492 1493 return 0; 1494 } 1495 1496 /* 1497 * searches the tree for the given offset. 1498 * 1499 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1500 * want a section that has at least bytes size and comes at or after the given 1501 * offset. 1502 */ 1503 static struct btrfs_free_space * 1504 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1505 u64 offset, int bitmap_only, int fuzzy) 1506 { 1507 struct rb_node *n = ctl->free_space_offset.rb_node; 1508 struct btrfs_free_space *entry, *prev = NULL; 1509 1510 /* find entry that is closest to the 'offset' */ 1511 while (1) { 1512 if (!n) { 1513 entry = NULL; 1514 break; 1515 } 1516 1517 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1518 prev = entry; 1519 1520 if (offset < entry->offset) 1521 n = n->rb_left; 1522 else if (offset > entry->offset) 1523 n = n->rb_right; 1524 else 1525 break; 1526 } 1527 1528 if (bitmap_only) { 1529 if (!entry) 1530 return NULL; 1531 if (entry->bitmap) 1532 return entry; 1533 1534 /* 1535 * bitmap entry and extent entry may share same offset, 1536 * in that case, bitmap entry comes after extent entry. 1537 */ 1538 n = rb_next(n); 1539 if (!n) 1540 return NULL; 1541 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1542 if (entry->offset != offset) 1543 return NULL; 1544 1545 WARN_ON(!entry->bitmap); 1546 return entry; 1547 } else if (entry) { 1548 if (entry->bitmap) { 1549 /* 1550 * if previous extent entry covers the offset, 1551 * we should return it instead of the bitmap entry 1552 */ 1553 n = rb_prev(&entry->offset_index); 1554 if (n) { 1555 prev = rb_entry(n, struct btrfs_free_space, 1556 offset_index); 1557 if (!prev->bitmap && 1558 prev->offset + prev->bytes > offset) 1559 entry = prev; 1560 } 1561 } 1562 return entry; 1563 } 1564 1565 if (!prev) 1566 return NULL; 1567 1568 /* find last entry before the 'offset' */ 1569 entry = prev; 1570 if (entry->offset > offset) { 1571 n = rb_prev(&entry->offset_index); 1572 if (n) { 1573 entry = rb_entry(n, struct btrfs_free_space, 1574 offset_index); 1575 ASSERT(entry->offset <= offset); 1576 } else { 1577 if (fuzzy) 1578 return entry; 1579 else 1580 return NULL; 1581 } 1582 } 1583 1584 if (entry->bitmap) { 1585 n = rb_prev(&entry->offset_index); 1586 if (n) { 1587 prev = rb_entry(n, struct btrfs_free_space, 1588 offset_index); 1589 if (!prev->bitmap && 1590 prev->offset + prev->bytes > offset) 1591 return prev; 1592 } 1593 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1594 return entry; 1595 } else if (entry->offset + entry->bytes > offset) 1596 return entry; 1597 1598 if (!fuzzy) 1599 return NULL; 1600 1601 while (1) { 1602 if (entry->bitmap) { 1603 if (entry->offset + BITS_PER_BITMAP * 1604 ctl->unit > offset) 1605 break; 1606 } else { 1607 if (entry->offset + entry->bytes > offset) 1608 break; 1609 } 1610 1611 n = rb_next(&entry->offset_index); 1612 if (!n) 1613 return NULL; 1614 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1615 } 1616 return entry; 1617 } 1618 1619 static inline void 1620 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1621 struct btrfs_free_space *info) 1622 { 1623 rb_erase(&info->offset_index, &ctl->free_space_offset); 1624 ctl->free_extents--; 1625 } 1626 1627 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1628 struct btrfs_free_space *info) 1629 { 1630 __unlink_free_space(ctl, info); 1631 ctl->free_space -= info->bytes; 1632 } 1633 1634 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1635 struct btrfs_free_space *info) 1636 { 1637 int ret = 0; 1638 1639 ASSERT(info->bytes || info->bitmap); 1640 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1641 &info->offset_index, (info->bitmap != NULL)); 1642 if (ret) 1643 return ret; 1644 1645 ctl->free_space += info->bytes; 1646 ctl->free_extents++; 1647 return ret; 1648 } 1649 1650 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1651 { 1652 struct btrfs_block_group_cache *block_group = ctl->private; 1653 u64 max_bytes; 1654 u64 bitmap_bytes; 1655 u64 extent_bytes; 1656 u64 size = block_group->key.offset; 1657 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1658 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1659 1660 max_bitmaps = max_t(u64, max_bitmaps, 1); 1661 1662 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1663 1664 /* 1665 * The goal is to keep the total amount of memory used per 1gb of space 1666 * at or below 32k, so we need to adjust how much memory we allow to be 1667 * used by extent based free space tracking 1668 */ 1669 if (size < SZ_1G) 1670 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1671 else 1672 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1673 1674 /* 1675 * we want to account for 1 more bitmap than what we have so we can make 1676 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1677 * we add more bitmaps. 1678 */ 1679 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1680 1681 if (bitmap_bytes >= max_bytes) { 1682 ctl->extents_thresh = 0; 1683 return; 1684 } 1685 1686 /* 1687 * we want the extent entry threshold to always be at most 1/2 the max 1688 * bytes we can have, or whatever is less than that. 1689 */ 1690 extent_bytes = max_bytes - bitmap_bytes; 1691 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1692 1693 ctl->extents_thresh = 1694 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1695 } 1696 1697 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1698 struct btrfs_free_space *info, 1699 u64 offset, u64 bytes) 1700 { 1701 unsigned long start, count; 1702 1703 start = offset_to_bit(info->offset, ctl->unit, offset); 1704 count = bytes_to_bits(bytes, ctl->unit); 1705 ASSERT(start + count <= BITS_PER_BITMAP); 1706 1707 bitmap_clear(info->bitmap, start, count); 1708 1709 info->bytes -= bytes; 1710 if (info->max_extent_size > ctl->unit) 1711 info->max_extent_size = 0; 1712 } 1713 1714 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1715 struct btrfs_free_space *info, u64 offset, 1716 u64 bytes) 1717 { 1718 __bitmap_clear_bits(ctl, info, offset, bytes); 1719 ctl->free_space -= bytes; 1720 } 1721 1722 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1723 struct btrfs_free_space *info, u64 offset, 1724 u64 bytes) 1725 { 1726 unsigned long start, count; 1727 1728 start = offset_to_bit(info->offset, ctl->unit, offset); 1729 count = bytes_to_bits(bytes, ctl->unit); 1730 ASSERT(start + count <= BITS_PER_BITMAP); 1731 1732 bitmap_set(info->bitmap, start, count); 1733 1734 info->bytes += bytes; 1735 ctl->free_space += bytes; 1736 } 1737 1738 /* 1739 * If we can not find suitable extent, we will use bytes to record 1740 * the size of the max extent. 1741 */ 1742 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1743 struct btrfs_free_space *bitmap_info, u64 *offset, 1744 u64 *bytes, bool for_alloc) 1745 { 1746 unsigned long found_bits = 0; 1747 unsigned long max_bits = 0; 1748 unsigned long bits, i; 1749 unsigned long next_zero; 1750 unsigned long extent_bits; 1751 1752 /* 1753 * Skip searching the bitmap if we don't have a contiguous section that 1754 * is large enough for this allocation. 1755 */ 1756 if (for_alloc && 1757 bitmap_info->max_extent_size && 1758 bitmap_info->max_extent_size < *bytes) { 1759 *bytes = bitmap_info->max_extent_size; 1760 return -1; 1761 } 1762 1763 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1764 max_t(u64, *offset, bitmap_info->offset)); 1765 bits = bytes_to_bits(*bytes, ctl->unit); 1766 1767 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1768 if (for_alloc && bits == 1) { 1769 found_bits = 1; 1770 break; 1771 } 1772 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1773 BITS_PER_BITMAP, i); 1774 extent_bits = next_zero - i; 1775 if (extent_bits >= bits) { 1776 found_bits = extent_bits; 1777 break; 1778 } else if (extent_bits > max_bits) { 1779 max_bits = extent_bits; 1780 } 1781 i = next_zero; 1782 } 1783 1784 if (found_bits) { 1785 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1786 *bytes = (u64)(found_bits) * ctl->unit; 1787 return 0; 1788 } 1789 1790 *bytes = (u64)(max_bits) * ctl->unit; 1791 bitmap_info->max_extent_size = *bytes; 1792 return -1; 1793 } 1794 1795 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1796 { 1797 if (entry->bitmap) 1798 return entry->max_extent_size; 1799 return entry->bytes; 1800 } 1801 1802 /* Cache the size of the max extent in bytes */ 1803 static struct btrfs_free_space * 1804 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1805 unsigned long align, u64 *max_extent_size) 1806 { 1807 struct btrfs_free_space *entry; 1808 struct rb_node *node; 1809 u64 tmp; 1810 u64 align_off; 1811 int ret; 1812 1813 if (!ctl->free_space_offset.rb_node) 1814 goto out; 1815 1816 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1817 if (!entry) 1818 goto out; 1819 1820 for (node = &entry->offset_index; node; node = rb_next(node)) { 1821 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1822 if (entry->bytes < *bytes) { 1823 *max_extent_size = max(get_max_extent_size(entry), 1824 *max_extent_size); 1825 continue; 1826 } 1827 1828 /* make sure the space returned is big enough 1829 * to match our requested alignment 1830 */ 1831 if (*bytes >= align) { 1832 tmp = entry->offset - ctl->start + align - 1; 1833 tmp = div64_u64(tmp, align); 1834 tmp = tmp * align + ctl->start; 1835 align_off = tmp - entry->offset; 1836 } else { 1837 align_off = 0; 1838 tmp = entry->offset; 1839 } 1840 1841 if (entry->bytes < *bytes + align_off) { 1842 *max_extent_size = max(get_max_extent_size(entry), 1843 *max_extent_size); 1844 continue; 1845 } 1846 1847 if (entry->bitmap) { 1848 u64 size = *bytes; 1849 1850 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1851 if (!ret) { 1852 *offset = tmp; 1853 *bytes = size; 1854 return entry; 1855 } else { 1856 *max_extent_size = 1857 max(get_max_extent_size(entry), 1858 *max_extent_size); 1859 } 1860 continue; 1861 } 1862 1863 *offset = tmp; 1864 *bytes = entry->bytes - align_off; 1865 return entry; 1866 } 1867 out: 1868 return NULL; 1869 } 1870 1871 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1872 struct btrfs_free_space *info, u64 offset) 1873 { 1874 info->offset = offset_to_bitmap(ctl, offset); 1875 info->bytes = 0; 1876 INIT_LIST_HEAD(&info->list); 1877 link_free_space(ctl, info); 1878 ctl->total_bitmaps++; 1879 1880 ctl->op->recalc_thresholds(ctl); 1881 } 1882 1883 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1884 struct btrfs_free_space *bitmap_info) 1885 { 1886 unlink_free_space(ctl, bitmap_info); 1887 kfree(bitmap_info->bitmap); 1888 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1889 ctl->total_bitmaps--; 1890 ctl->op->recalc_thresholds(ctl); 1891 } 1892 1893 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1894 struct btrfs_free_space *bitmap_info, 1895 u64 *offset, u64 *bytes) 1896 { 1897 u64 end; 1898 u64 search_start, search_bytes; 1899 int ret; 1900 1901 again: 1902 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1903 1904 /* 1905 * We need to search for bits in this bitmap. We could only cover some 1906 * of the extent in this bitmap thanks to how we add space, so we need 1907 * to search for as much as it as we can and clear that amount, and then 1908 * go searching for the next bit. 1909 */ 1910 search_start = *offset; 1911 search_bytes = ctl->unit; 1912 search_bytes = min(search_bytes, end - search_start + 1); 1913 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1914 false); 1915 if (ret < 0 || search_start != *offset) 1916 return -EINVAL; 1917 1918 /* We may have found more bits than what we need */ 1919 search_bytes = min(search_bytes, *bytes); 1920 1921 /* Cannot clear past the end of the bitmap */ 1922 search_bytes = min(search_bytes, end - search_start + 1); 1923 1924 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1925 *offset += search_bytes; 1926 *bytes -= search_bytes; 1927 1928 if (*bytes) { 1929 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1930 if (!bitmap_info->bytes) 1931 free_bitmap(ctl, bitmap_info); 1932 1933 /* 1934 * no entry after this bitmap, but we still have bytes to 1935 * remove, so something has gone wrong. 1936 */ 1937 if (!next) 1938 return -EINVAL; 1939 1940 bitmap_info = rb_entry(next, struct btrfs_free_space, 1941 offset_index); 1942 1943 /* 1944 * if the next entry isn't a bitmap we need to return to let the 1945 * extent stuff do its work. 1946 */ 1947 if (!bitmap_info->bitmap) 1948 return -EAGAIN; 1949 1950 /* 1951 * Ok the next item is a bitmap, but it may not actually hold 1952 * the information for the rest of this free space stuff, so 1953 * look for it, and if we don't find it return so we can try 1954 * everything over again. 1955 */ 1956 search_start = *offset; 1957 search_bytes = ctl->unit; 1958 ret = search_bitmap(ctl, bitmap_info, &search_start, 1959 &search_bytes, false); 1960 if (ret < 0 || search_start != *offset) 1961 return -EAGAIN; 1962 1963 goto again; 1964 } else if (!bitmap_info->bytes) 1965 free_bitmap(ctl, bitmap_info); 1966 1967 return 0; 1968 } 1969 1970 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1971 struct btrfs_free_space *info, u64 offset, 1972 u64 bytes) 1973 { 1974 u64 bytes_to_set = 0; 1975 u64 end; 1976 1977 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1978 1979 bytes_to_set = min(end - offset, bytes); 1980 1981 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1982 1983 /* 1984 * We set some bytes, we have no idea what the max extent size is 1985 * anymore. 1986 */ 1987 info->max_extent_size = 0; 1988 1989 return bytes_to_set; 1990 1991 } 1992 1993 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1994 struct btrfs_free_space *info) 1995 { 1996 struct btrfs_block_group_cache *block_group = ctl->private; 1997 struct btrfs_fs_info *fs_info = block_group->fs_info; 1998 bool forced = false; 1999 2000 #ifdef CONFIG_BTRFS_DEBUG 2001 if (btrfs_should_fragment_free_space(block_group)) 2002 forced = true; 2003 #endif 2004 2005 /* 2006 * If we are below the extents threshold then we can add this as an 2007 * extent, and don't have to deal with the bitmap 2008 */ 2009 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2010 /* 2011 * If this block group has some small extents we don't want to 2012 * use up all of our free slots in the cache with them, we want 2013 * to reserve them to larger extents, however if we have plenty 2014 * of cache left then go ahead an dadd them, no sense in adding 2015 * the overhead of a bitmap if we don't have to. 2016 */ 2017 if (info->bytes <= fs_info->sectorsize * 4) { 2018 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2019 return false; 2020 } else { 2021 return false; 2022 } 2023 } 2024 2025 /* 2026 * The original block groups from mkfs can be really small, like 8 2027 * megabytes, so don't bother with a bitmap for those entries. However 2028 * some block groups can be smaller than what a bitmap would cover but 2029 * are still large enough that they could overflow the 32k memory limit, 2030 * so allow those block groups to still be allowed to have a bitmap 2031 * entry. 2032 */ 2033 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2034 return false; 2035 2036 return true; 2037 } 2038 2039 static const struct btrfs_free_space_op free_space_op = { 2040 .recalc_thresholds = recalculate_thresholds, 2041 .use_bitmap = use_bitmap, 2042 }; 2043 2044 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2045 struct btrfs_free_space *info) 2046 { 2047 struct btrfs_free_space *bitmap_info; 2048 struct btrfs_block_group_cache *block_group = NULL; 2049 int added = 0; 2050 u64 bytes, offset, bytes_added; 2051 int ret; 2052 2053 bytes = info->bytes; 2054 offset = info->offset; 2055 2056 if (!ctl->op->use_bitmap(ctl, info)) 2057 return 0; 2058 2059 if (ctl->op == &free_space_op) 2060 block_group = ctl->private; 2061 again: 2062 /* 2063 * Since we link bitmaps right into the cluster we need to see if we 2064 * have a cluster here, and if so and it has our bitmap we need to add 2065 * the free space to that bitmap. 2066 */ 2067 if (block_group && !list_empty(&block_group->cluster_list)) { 2068 struct btrfs_free_cluster *cluster; 2069 struct rb_node *node; 2070 struct btrfs_free_space *entry; 2071 2072 cluster = list_entry(block_group->cluster_list.next, 2073 struct btrfs_free_cluster, 2074 block_group_list); 2075 spin_lock(&cluster->lock); 2076 node = rb_first(&cluster->root); 2077 if (!node) { 2078 spin_unlock(&cluster->lock); 2079 goto no_cluster_bitmap; 2080 } 2081 2082 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2083 if (!entry->bitmap) { 2084 spin_unlock(&cluster->lock); 2085 goto no_cluster_bitmap; 2086 } 2087 2088 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2089 bytes_added = add_bytes_to_bitmap(ctl, entry, 2090 offset, bytes); 2091 bytes -= bytes_added; 2092 offset += bytes_added; 2093 } 2094 spin_unlock(&cluster->lock); 2095 if (!bytes) { 2096 ret = 1; 2097 goto out; 2098 } 2099 } 2100 2101 no_cluster_bitmap: 2102 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2103 1, 0); 2104 if (!bitmap_info) { 2105 ASSERT(added == 0); 2106 goto new_bitmap; 2107 } 2108 2109 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2110 bytes -= bytes_added; 2111 offset += bytes_added; 2112 added = 0; 2113 2114 if (!bytes) { 2115 ret = 1; 2116 goto out; 2117 } else 2118 goto again; 2119 2120 new_bitmap: 2121 if (info && info->bitmap) { 2122 add_new_bitmap(ctl, info, offset); 2123 added = 1; 2124 info = NULL; 2125 goto again; 2126 } else { 2127 spin_unlock(&ctl->tree_lock); 2128 2129 /* no pre-allocated info, allocate a new one */ 2130 if (!info) { 2131 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2132 GFP_NOFS); 2133 if (!info) { 2134 spin_lock(&ctl->tree_lock); 2135 ret = -ENOMEM; 2136 goto out; 2137 } 2138 } 2139 2140 /* allocate the bitmap */ 2141 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2142 spin_lock(&ctl->tree_lock); 2143 if (!info->bitmap) { 2144 ret = -ENOMEM; 2145 goto out; 2146 } 2147 goto again; 2148 } 2149 2150 out: 2151 if (info) { 2152 kfree(info->bitmap); 2153 kmem_cache_free(btrfs_free_space_cachep, info); 2154 } 2155 2156 return ret; 2157 } 2158 2159 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2160 struct btrfs_free_space *info, bool update_stat) 2161 { 2162 struct btrfs_free_space *left_info; 2163 struct btrfs_free_space *right_info; 2164 bool merged = false; 2165 u64 offset = info->offset; 2166 u64 bytes = info->bytes; 2167 2168 /* 2169 * first we want to see if there is free space adjacent to the range we 2170 * are adding, if there is remove that struct and add a new one to 2171 * cover the entire range 2172 */ 2173 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2174 if (right_info && rb_prev(&right_info->offset_index)) 2175 left_info = rb_entry(rb_prev(&right_info->offset_index), 2176 struct btrfs_free_space, offset_index); 2177 else 2178 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2179 2180 if (right_info && !right_info->bitmap) { 2181 if (update_stat) 2182 unlink_free_space(ctl, right_info); 2183 else 2184 __unlink_free_space(ctl, right_info); 2185 info->bytes += right_info->bytes; 2186 kmem_cache_free(btrfs_free_space_cachep, right_info); 2187 merged = true; 2188 } 2189 2190 if (left_info && !left_info->bitmap && 2191 left_info->offset + left_info->bytes == offset) { 2192 if (update_stat) 2193 unlink_free_space(ctl, left_info); 2194 else 2195 __unlink_free_space(ctl, left_info); 2196 info->offset = left_info->offset; 2197 info->bytes += left_info->bytes; 2198 kmem_cache_free(btrfs_free_space_cachep, left_info); 2199 merged = true; 2200 } 2201 2202 return merged; 2203 } 2204 2205 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2206 struct btrfs_free_space *info, 2207 bool update_stat) 2208 { 2209 struct btrfs_free_space *bitmap; 2210 unsigned long i; 2211 unsigned long j; 2212 const u64 end = info->offset + info->bytes; 2213 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2214 u64 bytes; 2215 2216 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2217 if (!bitmap) 2218 return false; 2219 2220 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2221 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2222 if (j == i) 2223 return false; 2224 bytes = (j - i) * ctl->unit; 2225 info->bytes += bytes; 2226 2227 if (update_stat) 2228 bitmap_clear_bits(ctl, bitmap, end, bytes); 2229 else 2230 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2231 2232 if (!bitmap->bytes) 2233 free_bitmap(ctl, bitmap); 2234 2235 return true; 2236 } 2237 2238 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2239 struct btrfs_free_space *info, 2240 bool update_stat) 2241 { 2242 struct btrfs_free_space *bitmap; 2243 u64 bitmap_offset; 2244 unsigned long i; 2245 unsigned long j; 2246 unsigned long prev_j; 2247 u64 bytes; 2248 2249 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2250 /* If we're on a boundary, try the previous logical bitmap. */ 2251 if (bitmap_offset == info->offset) { 2252 if (info->offset == 0) 2253 return false; 2254 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2255 } 2256 2257 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2258 if (!bitmap) 2259 return false; 2260 2261 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2262 j = 0; 2263 prev_j = (unsigned long)-1; 2264 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2265 if (j > i) 2266 break; 2267 prev_j = j; 2268 } 2269 if (prev_j == i) 2270 return false; 2271 2272 if (prev_j == (unsigned long)-1) 2273 bytes = (i + 1) * ctl->unit; 2274 else 2275 bytes = (i - prev_j) * ctl->unit; 2276 2277 info->offset -= bytes; 2278 info->bytes += bytes; 2279 2280 if (update_stat) 2281 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2282 else 2283 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2284 2285 if (!bitmap->bytes) 2286 free_bitmap(ctl, bitmap); 2287 2288 return true; 2289 } 2290 2291 /* 2292 * We prefer always to allocate from extent entries, both for clustered and 2293 * non-clustered allocation requests. So when attempting to add a new extent 2294 * entry, try to see if there's adjacent free space in bitmap entries, and if 2295 * there is, migrate that space from the bitmaps to the extent. 2296 * Like this we get better chances of satisfying space allocation requests 2297 * because we attempt to satisfy them based on a single cache entry, and never 2298 * on 2 or more entries - even if the entries represent a contiguous free space 2299 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2300 * ends). 2301 */ 2302 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2303 struct btrfs_free_space *info, 2304 bool update_stat) 2305 { 2306 /* 2307 * Only work with disconnected entries, as we can change their offset, 2308 * and must be extent entries. 2309 */ 2310 ASSERT(!info->bitmap); 2311 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2312 2313 if (ctl->total_bitmaps > 0) { 2314 bool stole_end; 2315 bool stole_front = false; 2316 2317 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2318 if (ctl->total_bitmaps > 0) 2319 stole_front = steal_from_bitmap_to_front(ctl, info, 2320 update_stat); 2321 2322 if (stole_end || stole_front) 2323 try_merge_free_space(ctl, info, update_stat); 2324 } 2325 } 2326 2327 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2328 struct btrfs_free_space_ctl *ctl, 2329 u64 offset, u64 bytes) 2330 { 2331 struct btrfs_free_space *info; 2332 int ret = 0; 2333 2334 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2335 if (!info) 2336 return -ENOMEM; 2337 2338 info->offset = offset; 2339 info->bytes = bytes; 2340 RB_CLEAR_NODE(&info->offset_index); 2341 2342 spin_lock(&ctl->tree_lock); 2343 2344 if (try_merge_free_space(ctl, info, true)) 2345 goto link; 2346 2347 /* 2348 * There was no extent directly to the left or right of this new 2349 * extent then we know we're going to have to allocate a new extent, so 2350 * before we do that see if we need to drop this into a bitmap 2351 */ 2352 ret = insert_into_bitmap(ctl, info); 2353 if (ret < 0) { 2354 goto out; 2355 } else if (ret) { 2356 ret = 0; 2357 goto out; 2358 } 2359 link: 2360 /* 2361 * Only steal free space from adjacent bitmaps if we're sure we're not 2362 * going to add the new free space to existing bitmap entries - because 2363 * that would mean unnecessary work that would be reverted. Therefore 2364 * attempt to steal space from bitmaps if we're adding an extent entry. 2365 */ 2366 steal_from_bitmap(ctl, info, true); 2367 2368 ret = link_free_space(ctl, info); 2369 if (ret) 2370 kmem_cache_free(btrfs_free_space_cachep, info); 2371 out: 2372 spin_unlock(&ctl->tree_lock); 2373 2374 if (ret) { 2375 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2376 ASSERT(ret != -EEXIST); 2377 } 2378 2379 return ret; 2380 } 2381 2382 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2383 u64 offset, u64 bytes) 2384 { 2385 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2386 struct btrfs_free_space *info; 2387 int ret; 2388 bool re_search = false; 2389 2390 spin_lock(&ctl->tree_lock); 2391 2392 again: 2393 ret = 0; 2394 if (!bytes) 2395 goto out_lock; 2396 2397 info = tree_search_offset(ctl, offset, 0, 0); 2398 if (!info) { 2399 /* 2400 * oops didn't find an extent that matched the space we wanted 2401 * to remove, look for a bitmap instead 2402 */ 2403 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2404 1, 0); 2405 if (!info) { 2406 /* 2407 * If we found a partial bit of our free space in a 2408 * bitmap but then couldn't find the other part this may 2409 * be a problem, so WARN about it. 2410 */ 2411 WARN_ON(re_search); 2412 goto out_lock; 2413 } 2414 } 2415 2416 re_search = false; 2417 if (!info->bitmap) { 2418 unlink_free_space(ctl, info); 2419 if (offset == info->offset) { 2420 u64 to_free = min(bytes, info->bytes); 2421 2422 info->bytes -= to_free; 2423 info->offset += to_free; 2424 if (info->bytes) { 2425 ret = link_free_space(ctl, info); 2426 WARN_ON(ret); 2427 } else { 2428 kmem_cache_free(btrfs_free_space_cachep, info); 2429 } 2430 2431 offset += to_free; 2432 bytes -= to_free; 2433 goto again; 2434 } else { 2435 u64 old_end = info->bytes + info->offset; 2436 2437 info->bytes = offset - info->offset; 2438 ret = link_free_space(ctl, info); 2439 WARN_ON(ret); 2440 if (ret) 2441 goto out_lock; 2442 2443 /* Not enough bytes in this entry to satisfy us */ 2444 if (old_end < offset + bytes) { 2445 bytes -= old_end - offset; 2446 offset = old_end; 2447 goto again; 2448 } else if (old_end == offset + bytes) { 2449 /* all done */ 2450 goto out_lock; 2451 } 2452 spin_unlock(&ctl->tree_lock); 2453 2454 ret = btrfs_add_free_space(block_group, offset + bytes, 2455 old_end - (offset + bytes)); 2456 WARN_ON(ret); 2457 goto out; 2458 } 2459 } 2460 2461 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2462 if (ret == -EAGAIN) { 2463 re_search = true; 2464 goto again; 2465 } 2466 out_lock: 2467 spin_unlock(&ctl->tree_lock); 2468 out: 2469 return ret; 2470 } 2471 2472 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2473 u64 bytes) 2474 { 2475 struct btrfs_fs_info *fs_info = block_group->fs_info; 2476 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2477 struct btrfs_free_space *info; 2478 struct rb_node *n; 2479 int count = 0; 2480 2481 spin_lock(&ctl->tree_lock); 2482 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2483 info = rb_entry(n, struct btrfs_free_space, offset_index); 2484 if (info->bytes >= bytes && !block_group->ro) 2485 count++; 2486 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2487 info->offset, info->bytes, 2488 (info->bitmap) ? "yes" : "no"); 2489 } 2490 spin_unlock(&ctl->tree_lock); 2491 btrfs_info(fs_info, "block group has cluster?: %s", 2492 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2493 btrfs_info(fs_info, 2494 "%d blocks of free space at or bigger than bytes is", count); 2495 } 2496 2497 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2498 { 2499 struct btrfs_fs_info *fs_info = block_group->fs_info; 2500 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2501 2502 spin_lock_init(&ctl->tree_lock); 2503 ctl->unit = fs_info->sectorsize; 2504 ctl->start = block_group->key.objectid; 2505 ctl->private = block_group; 2506 ctl->op = &free_space_op; 2507 INIT_LIST_HEAD(&ctl->trimming_ranges); 2508 mutex_init(&ctl->cache_writeout_mutex); 2509 2510 /* 2511 * we only want to have 32k of ram per block group for keeping 2512 * track of free space, and if we pass 1/2 of that we want to 2513 * start converting things over to using bitmaps 2514 */ 2515 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2516 } 2517 2518 /* 2519 * for a given cluster, put all of its extents back into the free 2520 * space cache. If the block group passed doesn't match the block group 2521 * pointed to by the cluster, someone else raced in and freed the 2522 * cluster already. In that case, we just return without changing anything 2523 */ 2524 static int 2525 __btrfs_return_cluster_to_free_space( 2526 struct btrfs_block_group_cache *block_group, 2527 struct btrfs_free_cluster *cluster) 2528 { 2529 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2530 struct btrfs_free_space *entry; 2531 struct rb_node *node; 2532 2533 spin_lock(&cluster->lock); 2534 if (cluster->block_group != block_group) 2535 goto out; 2536 2537 cluster->block_group = NULL; 2538 cluster->window_start = 0; 2539 list_del_init(&cluster->block_group_list); 2540 2541 node = rb_first(&cluster->root); 2542 while (node) { 2543 bool bitmap; 2544 2545 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2546 node = rb_next(&entry->offset_index); 2547 rb_erase(&entry->offset_index, &cluster->root); 2548 RB_CLEAR_NODE(&entry->offset_index); 2549 2550 bitmap = (entry->bitmap != NULL); 2551 if (!bitmap) { 2552 try_merge_free_space(ctl, entry, false); 2553 steal_from_bitmap(ctl, entry, false); 2554 } 2555 tree_insert_offset(&ctl->free_space_offset, 2556 entry->offset, &entry->offset_index, bitmap); 2557 } 2558 cluster->root = RB_ROOT; 2559 2560 out: 2561 spin_unlock(&cluster->lock); 2562 btrfs_put_block_group(block_group); 2563 return 0; 2564 } 2565 2566 static void __btrfs_remove_free_space_cache_locked( 2567 struct btrfs_free_space_ctl *ctl) 2568 { 2569 struct btrfs_free_space *info; 2570 struct rb_node *node; 2571 2572 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2573 info = rb_entry(node, struct btrfs_free_space, offset_index); 2574 if (!info->bitmap) { 2575 unlink_free_space(ctl, info); 2576 kmem_cache_free(btrfs_free_space_cachep, info); 2577 } else { 2578 free_bitmap(ctl, info); 2579 } 2580 2581 cond_resched_lock(&ctl->tree_lock); 2582 } 2583 } 2584 2585 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2586 { 2587 spin_lock(&ctl->tree_lock); 2588 __btrfs_remove_free_space_cache_locked(ctl); 2589 spin_unlock(&ctl->tree_lock); 2590 } 2591 2592 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2593 { 2594 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2595 struct btrfs_free_cluster *cluster; 2596 struct list_head *head; 2597 2598 spin_lock(&ctl->tree_lock); 2599 while ((head = block_group->cluster_list.next) != 2600 &block_group->cluster_list) { 2601 cluster = list_entry(head, struct btrfs_free_cluster, 2602 block_group_list); 2603 2604 WARN_ON(cluster->block_group != block_group); 2605 __btrfs_return_cluster_to_free_space(block_group, cluster); 2606 2607 cond_resched_lock(&ctl->tree_lock); 2608 } 2609 __btrfs_remove_free_space_cache_locked(ctl); 2610 spin_unlock(&ctl->tree_lock); 2611 2612 } 2613 2614 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2615 u64 offset, u64 bytes, u64 empty_size, 2616 u64 *max_extent_size) 2617 { 2618 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2619 struct btrfs_free_space *entry = NULL; 2620 u64 bytes_search = bytes + empty_size; 2621 u64 ret = 0; 2622 u64 align_gap = 0; 2623 u64 align_gap_len = 0; 2624 2625 spin_lock(&ctl->tree_lock); 2626 entry = find_free_space(ctl, &offset, &bytes_search, 2627 block_group->full_stripe_len, max_extent_size); 2628 if (!entry) 2629 goto out; 2630 2631 ret = offset; 2632 if (entry->bitmap) { 2633 bitmap_clear_bits(ctl, entry, offset, bytes); 2634 if (!entry->bytes) 2635 free_bitmap(ctl, entry); 2636 } else { 2637 unlink_free_space(ctl, entry); 2638 align_gap_len = offset - entry->offset; 2639 align_gap = entry->offset; 2640 2641 entry->offset = offset + bytes; 2642 WARN_ON(entry->bytes < bytes + align_gap_len); 2643 2644 entry->bytes -= bytes + align_gap_len; 2645 if (!entry->bytes) 2646 kmem_cache_free(btrfs_free_space_cachep, entry); 2647 else 2648 link_free_space(ctl, entry); 2649 } 2650 out: 2651 spin_unlock(&ctl->tree_lock); 2652 2653 if (align_gap_len) 2654 __btrfs_add_free_space(block_group->fs_info, ctl, 2655 align_gap, align_gap_len); 2656 return ret; 2657 } 2658 2659 /* 2660 * given a cluster, put all of its extents back into the free space 2661 * cache. If a block group is passed, this function will only free 2662 * a cluster that belongs to the passed block group. 2663 * 2664 * Otherwise, it'll get a reference on the block group pointed to by the 2665 * cluster and remove the cluster from it. 2666 */ 2667 int btrfs_return_cluster_to_free_space( 2668 struct btrfs_block_group_cache *block_group, 2669 struct btrfs_free_cluster *cluster) 2670 { 2671 struct btrfs_free_space_ctl *ctl; 2672 int ret; 2673 2674 /* first, get a safe pointer to the block group */ 2675 spin_lock(&cluster->lock); 2676 if (!block_group) { 2677 block_group = cluster->block_group; 2678 if (!block_group) { 2679 spin_unlock(&cluster->lock); 2680 return 0; 2681 } 2682 } else if (cluster->block_group != block_group) { 2683 /* someone else has already freed it don't redo their work */ 2684 spin_unlock(&cluster->lock); 2685 return 0; 2686 } 2687 atomic_inc(&block_group->count); 2688 spin_unlock(&cluster->lock); 2689 2690 ctl = block_group->free_space_ctl; 2691 2692 /* now return any extents the cluster had on it */ 2693 spin_lock(&ctl->tree_lock); 2694 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2695 spin_unlock(&ctl->tree_lock); 2696 2697 /* finally drop our ref */ 2698 btrfs_put_block_group(block_group); 2699 return ret; 2700 } 2701 2702 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2703 struct btrfs_free_cluster *cluster, 2704 struct btrfs_free_space *entry, 2705 u64 bytes, u64 min_start, 2706 u64 *max_extent_size) 2707 { 2708 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2709 int err; 2710 u64 search_start = cluster->window_start; 2711 u64 search_bytes = bytes; 2712 u64 ret = 0; 2713 2714 search_start = min_start; 2715 search_bytes = bytes; 2716 2717 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2718 if (err) { 2719 *max_extent_size = max(get_max_extent_size(entry), 2720 *max_extent_size); 2721 return 0; 2722 } 2723 2724 ret = search_start; 2725 __bitmap_clear_bits(ctl, entry, ret, bytes); 2726 2727 return ret; 2728 } 2729 2730 /* 2731 * given a cluster, try to allocate 'bytes' from it, returns 0 2732 * if it couldn't find anything suitably large, or a logical disk offset 2733 * if things worked out 2734 */ 2735 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2736 struct btrfs_free_cluster *cluster, u64 bytes, 2737 u64 min_start, u64 *max_extent_size) 2738 { 2739 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2740 struct btrfs_free_space *entry = NULL; 2741 struct rb_node *node; 2742 u64 ret = 0; 2743 2744 spin_lock(&cluster->lock); 2745 if (bytes > cluster->max_size) 2746 goto out; 2747 2748 if (cluster->block_group != block_group) 2749 goto out; 2750 2751 node = rb_first(&cluster->root); 2752 if (!node) 2753 goto out; 2754 2755 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2756 while (1) { 2757 if (entry->bytes < bytes) 2758 *max_extent_size = max(get_max_extent_size(entry), 2759 *max_extent_size); 2760 2761 if (entry->bytes < bytes || 2762 (!entry->bitmap && entry->offset < min_start)) { 2763 node = rb_next(&entry->offset_index); 2764 if (!node) 2765 break; 2766 entry = rb_entry(node, struct btrfs_free_space, 2767 offset_index); 2768 continue; 2769 } 2770 2771 if (entry->bitmap) { 2772 ret = btrfs_alloc_from_bitmap(block_group, 2773 cluster, entry, bytes, 2774 cluster->window_start, 2775 max_extent_size); 2776 if (ret == 0) { 2777 node = rb_next(&entry->offset_index); 2778 if (!node) 2779 break; 2780 entry = rb_entry(node, struct btrfs_free_space, 2781 offset_index); 2782 continue; 2783 } 2784 cluster->window_start += bytes; 2785 } else { 2786 ret = entry->offset; 2787 2788 entry->offset += bytes; 2789 entry->bytes -= bytes; 2790 } 2791 2792 if (entry->bytes == 0) 2793 rb_erase(&entry->offset_index, &cluster->root); 2794 break; 2795 } 2796 out: 2797 spin_unlock(&cluster->lock); 2798 2799 if (!ret) 2800 return 0; 2801 2802 spin_lock(&ctl->tree_lock); 2803 2804 ctl->free_space -= bytes; 2805 if (entry->bytes == 0) { 2806 ctl->free_extents--; 2807 if (entry->bitmap) { 2808 kfree(entry->bitmap); 2809 ctl->total_bitmaps--; 2810 ctl->op->recalc_thresholds(ctl); 2811 } 2812 kmem_cache_free(btrfs_free_space_cachep, entry); 2813 } 2814 2815 spin_unlock(&ctl->tree_lock); 2816 2817 return ret; 2818 } 2819 2820 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2821 struct btrfs_free_space *entry, 2822 struct btrfs_free_cluster *cluster, 2823 u64 offset, u64 bytes, 2824 u64 cont1_bytes, u64 min_bytes) 2825 { 2826 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2827 unsigned long next_zero; 2828 unsigned long i; 2829 unsigned long want_bits; 2830 unsigned long min_bits; 2831 unsigned long found_bits; 2832 unsigned long max_bits = 0; 2833 unsigned long start = 0; 2834 unsigned long total_found = 0; 2835 int ret; 2836 2837 i = offset_to_bit(entry->offset, ctl->unit, 2838 max_t(u64, offset, entry->offset)); 2839 want_bits = bytes_to_bits(bytes, ctl->unit); 2840 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2841 2842 /* 2843 * Don't bother looking for a cluster in this bitmap if it's heavily 2844 * fragmented. 2845 */ 2846 if (entry->max_extent_size && 2847 entry->max_extent_size < cont1_bytes) 2848 return -ENOSPC; 2849 again: 2850 found_bits = 0; 2851 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2852 next_zero = find_next_zero_bit(entry->bitmap, 2853 BITS_PER_BITMAP, i); 2854 if (next_zero - i >= min_bits) { 2855 found_bits = next_zero - i; 2856 if (found_bits > max_bits) 2857 max_bits = found_bits; 2858 break; 2859 } 2860 if (next_zero - i > max_bits) 2861 max_bits = next_zero - i; 2862 i = next_zero; 2863 } 2864 2865 if (!found_bits) { 2866 entry->max_extent_size = (u64)max_bits * ctl->unit; 2867 return -ENOSPC; 2868 } 2869 2870 if (!total_found) { 2871 start = i; 2872 cluster->max_size = 0; 2873 } 2874 2875 total_found += found_bits; 2876 2877 if (cluster->max_size < found_bits * ctl->unit) 2878 cluster->max_size = found_bits * ctl->unit; 2879 2880 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2881 i = next_zero + 1; 2882 goto again; 2883 } 2884 2885 cluster->window_start = start * ctl->unit + entry->offset; 2886 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2887 ret = tree_insert_offset(&cluster->root, entry->offset, 2888 &entry->offset_index, 1); 2889 ASSERT(!ret); /* -EEXIST; Logic error */ 2890 2891 trace_btrfs_setup_cluster(block_group, cluster, 2892 total_found * ctl->unit, 1); 2893 return 0; 2894 } 2895 2896 /* 2897 * This searches the block group for just extents to fill the cluster with. 2898 * Try to find a cluster with at least bytes total bytes, at least one 2899 * extent of cont1_bytes, and other clusters of at least min_bytes. 2900 */ 2901 static noinline int 2902 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2903 struct btrfs_free_cluster *cluster, 2904 struct list_head *bitmaps, u64 offset, u64 bytes, 2905 u64 cont1_bytes, u64 min_bytes) 2906 { 2907 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2908 struct btrfs_free_space *first = NULL; 2909 struct btrfs_free_space *entry = NULL; 2910 struct btrfs_free_space *last; 2911 struct rb_node *node; 2912 u64 window_free; 2913 u64 max_extent; 2914 u64 total_size = 0; 2915 2916 entry = tree_search_offset(ctl, offset, 0, 1); 2917 if (!entry) 2918 return -ENOSPC; 2919 2920 /* 2921 * We don't want bitmaps, so just move along until we find a normal 2922 * extent entry. 2923 */ 2924 while (entry->bitmap || entry->bytes < min_bytes) { 2925 if (entry->bitmap && list_empty(&entry->list)) 2926 list_add_tail(&entry->list, bitmaps); 2927 node = rb_next(&entry->offset_index); 2928 if (!node) 2929 return -ENOSPC; 2930 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2931 } 2932 2933 window_free = entry->bytes; 2934 max_extent = entry->bytes; 2935 first = entry; 2936 last = entry; 2937 2938 for (node = rb_next(&entry->offset_index); node; 2939 node = rb_next(&entry->offset_index)) { 2940 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2941 2942 if (entry->bitmap) { 2943 if (list_empty(&entry->list)) 2944 list_add_tail(&entry->list, bitmaps); 2945 continue; 2946 } 2947 2948 if (entry->bytes < min_bytes) 2949 continue; 2950 2951 last = entry; 2952 window_free += entry->bytes; 2953 if (entry->bytes > max_extent) 2954 max_extent = entry->bytes; 2955 } 2956 2957 if (window_free < bytes || max_extent < cont1_bytes) 2958 return -ENOSPC; 2959 2960 cluster->window_start = first->offset; 2961 2962 node = &first->offset_index; 2963 2964 /* 2965 * now we've found our entries, pull them out of the free space 2966 * cache and put them into the cluster rbtree 2967 */ 2968 do { 2969 int ret; 2970 2971 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2972 node = rb_next(&entry->offset_index); 2973 if (entry->bitmap || entry->bytes < min_bytes) 2974 continue; 2975 2976 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2977 ret = tree_insert_offset(&cluster->root, entry->offset, 2978 &entry->offset_index, 0); 2979 total_size += entry->bytes; 2980 ASSERT(!ret); /* -EEXIST; Logic error */ 2981 } while (node && entry != last); 2982 2983 cluster->max_size = max_extent; 2984 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2985 return 0; 2986 } 2987 2988 /* 2989 * This specifically looks for bitmaps that may work in the cluster, we assume 2990 * that we have already failed to find extents that will work. 2991 */ 2992 static noinline int 2993 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2994 struct btrfs_free_cluster *cluster, 2995 struct list_head *bitmaps, u64 offset, u64 bytes, 2996 u64 cont1_bytes, u64 min_bytes) 2997 { 2998 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2999 struct btrfs_free_space *entry = NULL; 3000 int ret = -ENOSPC; 3001 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3002 3003 if (ctl->total_bitmaps == 0) 3004 return -ENOSPC; 3005 3006 /* 3007 * The bitmap that covers offset won't be in the list unless offset 3008 * is just its start offset. 3009 */ 3010 if (!list_empty(bitmaps)) 3011 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3012 3013 if (!entry || entry->offset != bitmap_offset) { 3014 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3015 if (entry && list_empty(&entry->list)) 3016 list_add(&entry->list, bitmaps); 3017 } 3018 3019 list_for_each_entry(entry, bitmaps, list) { 3020 if (entry->bytes < bytes) 3021 continue; 3022 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3023 bytes, cont1_bytes, min_bytes); 3024 if (!ret) 3025 return 0; 3026 } 3027 3028 /* 3029 * The bitmaps list has all the bitmaps that record free space 3030 * starting after offset, so no more search is required. 3031 */ 3032 return -ENOSPC; 3033 } 3034 3035 /* 3036 * here we try to find a cluster of blocks in a block group. The goal 3037 * is to find at least bytes+empty_size. 3038 * We might not find them all in one contiguous area. 3039 * 3040 * returns zero and sets up cluster if things worked out, otherwise 3041 * it returns -enospc 3042 */ 3043 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3044 struct btrfs_block_group_cache *block_group, 3045 struct btrfs_free_cluster *cluster, 3046 u64 offset, u64 bytes, u64 empty_size) 3047 { 3048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3049 struct btrfs_free_space *entry, *tmp; 3050 LIST_HEAD(bitmaps); 3051 u64 min_bytes; 3052 u64 cont1_bytes; 3053 int ret; 3054 3055 /* 3056 * Choose the minimum extent size we'll require for this 3057 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3058 * For metadata, allow allocates with smaller extents. For 3059 * data, keep it dense. 3060 */ 3061 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3062 cont1_bytes = min_bytes = bytes + empty_size; 3063 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3064 cont1_bytes = bytes; 3065 min_bytes = fs_info->sectorsize; 3066 } else { 3067 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3068 min_bytes = fs_info->sectorsize; 3069 } 3070 3071 spin_lock(&ctl->tree_lock); 3072 3073 /* 3074 * If we know we don't have enough space to make a cluster don't even 3075 * bother doing all the work to try and find one. 3076 */ 3077 if (ctl->free_space < bytes) { 3078 spin_unlock(&ctl->tree_lock); 3079 return -ENOSPC; 3080 } 3081 3082 spin_lock(&cluster->lock); 3083 3084 /* someone already found a cluster, hooray */ 3085 if (cluster->block_group) { 3086 ret = 0; 3087 goto out; 3088 } 3089 3090 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3091 min_bytes); 3092 3093 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3094 bytes + empty_size, 3095 cont1_bytes, min_bytes); 3096 if (ret) 3097 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3098 offset, bytes + empty_size, 3099 cont1_bytes, min_bytes); 3100 3101 /* Clear our temporary list */ 3102 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3103 list_del_init(&entry->list); 3104 3105 if (!ret) { 3106 atomic_inc(&block_group->count); 3107 list_add_tail(&cluster->block_group_list, 3108 &block_group->cluster_list); 3109 cluster->block_group = block_group; 3110 } else { 3111 trace_btrfs_failed_cluster_setup(block_group); 3112 } 3113 out: 3114 spin_unlock(&cluster->lock); 3115 spin_unlock(&ctl->tree_lock); 3116 3117 return ret; 3118 } 3119 3120 /* 3121 * simple code to zero out a cluster 3122 */ 3123 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3124 { 3125 spin_lock_init(&cluster->lock); 3126 spin_lock_init(&cluster->refill_lock); 3127 cluster->root = RB_ROOT; 3128 cluster->max_size = 0; 3129 cluster->fragmented = false; 3130 INIT_LIST_HEAD(&cluster->block_group_list); 3131 cluster->block_group = NULL; 3132 } 3133 3134 static int do_trimming(struct btrfs_block_group_cache *block_group, 3135 u64 *total_trimmed, u64 start, u64 bytes, 3136 u64 reserved_start, u64 reserved_bytes, 3137 struct btrfs_trim_range *trim_entry) 3138 { 3139 struct btrfs_space_info *space_info = block_group->space_info; 3140 struct btrfs_fs_info *fs_info = block_group->fs_info; 3141 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3142 int ret; 3143 int update = 0; 3144 u64 trimmed = 0; 3145 3146 spin_lock(&space_info->lock); 3147 spin_lock(&block_group->lock); 3148 if (!block_group->ro) { 3149 block_group->reserved += reserved_bytes; 3150 space_info->bytes_reserved += reserved_bytes; 3151 update = 1; 3152 } 3153 spin_unlock(&block_group->lock); 3154 spin_unlock(&space_info->lock); 3155 3156 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3157 if (!ret) 3158 *total_trimmed += trimmed; 3159 3160 mutex_lock(&ctl->cache_writeout_mutex); 3161 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3162 list_del(&trim_entry->list); 3163 mutex_unlock(&ctl->cache_writeout_mutex); 3164 3165 if (update) { 3166 spin_lock(&space_info->lock); 3167 spin_lock(&block_group->lock); 3168 if (block_group->ro) 3169 space_info->bytes_readonly += reserved_bytes; 3170 block_group->reserved -= reserved_bytes; 3171 space_info->bytes_reserved -= reserved_bytes; 3172 spin_unlock(&space_info->lock); 3173 spin_unlock(&block_group->lock); 3174 } 3175 3176 return ret; 3177 } 3178 3179 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3180 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3181 { 3182 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3183 struct btrfs_free_space *entry; 3184 struct rb_node *node; 3185 int ret = 0; 3186 u64 extent_start; 3187 u64 extent_bytes; 3188 u64 bytes; 3189 3190 while (start < end) { 3191 struct btrfs_trim_range trim_entry; 3192 3193 mutex_lock(&ctl->cache_writeout_mutex); 3194 spin_lock(&ctl->tree_lock); 3195 3196 if (ctl->free_space < minlen) { 3197 spin_unlock(&ctl->tree_lock); 3198 mutex_unlock(&ctl->cache_writeout_mutex); 3199 break; 3200 } 3201 3202 entry = tree_search_offset(ctl, start, 0, 1); 3203 if (!entry) { 3204 spin_unlock(&ctl->tree_lock); 3205 mutex_unlock(&ctl->cache_writeout_mutex); 3206 break; 3207 } 3208 3209 /* skip bitmaps */ 3210 while (entry->bitmap) { 3211 node = rb_next(&entry->offset_index); 3212 if (!node) { 3213 spin_unlock(&ctl->tree_lock); 3214 mutex_unlock(&ctl->cache_writeout_mutex); 3215 goto out; 3216 } 3217 entry = rb_entry(node, struct btrfs_free_space, 3218 offset_index); 3219 } 3220 3221 if (entry->offset >= end) { 3222 spin_unlock(&ctl->tree_lock); 3223 mutex_unlock(&ctl->cache_writeout_mutex); 3224 break; 3225 } 3226 3227 extent_start = entry->offset; 3228 extent_bytes = entry->bytes; 3229 start = max(start, extent_start); 3230 bytes = min(extent_start + extent_bytes, end) - start; 3231 if (bytes < minlen) { 3232 spin_unlock(&ctl->tree_lock); 3233 mutex_unlock(&ctl->cache_writeout_mutex); 3234 goto next; 3235 } 3236 3237 unlink_free_space(ctl, entry); 3238 kmem_cache_free(btrfs_free_space_cachep, entry); 3239 3240 spin_unlock(&ctl->tree_lock); 3241 trim_entry.start = extent_start; 3242 trim_entry.bytes = extent_bytes; 3243 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3244 mutex_unlock(&ctl->cache_writeout_mutex); 3245 3246 ret = do_trimming(block_group, total_trimmed, start, bytes, 3247 extent_start, extent_bytes, &trim_entry); 3248 if (ret) 3249 break; 3250 next: 3251 start += bytes; 3252 3253 if (fatal_signal_pending(current)) { 3254 ret = -ERESTARTSYS; 3255 break; 3256 } 3257 3258 cond_resched(); 3259 } 3260 out: 3261 return ret; 3262 } 3263 3264 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3265 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3266 { 3267 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3268 struct btrfs_free_space *entry; 3269 int ret = 0; 3270 int ret2; 3271 u64 bytes; 3272 u64 offset = offset_to_bitmap(ctl, start); 3273 3274 while (offset < end) { 3275 bool next_bitmap = false; 3276 struct btrfs_trim_range trim_entry; 3277 3278 mutex_lock(&ctl->cache_writeout_mutex); 3279 spin_lock(&ctl->tree_lock); 3280 3281 if (ctl->free_space < minlen) { 3282 spin_unlock(&ctl->tree_lock); 3283 mutex_unlock(&ctl->cache_writeout_mutex); 3284 break; 3285 } 3286 3287 entry = tree_search_offset(ctl, offset, 1, 0); 3288 if (!entry) { 3289 spin_unlock(&ctl->tree_lock); 3290 mutex_unlock(&ctl->cache_writeout_mutex); 3291 next_bitmap = true; 3292 goto next; 3293 } 3294 3295 bytes = minlen; 3296 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3297 if (ret2 || start >= end) { 3298 spin_unlock(&ctl->tree_lock); 3299 mutex_unlock(&ctl->cache_writeout_mutex); 3300 next_bitmap = true; 3301 goto next; 3302 } 3303 3304 bytes = min(bytes, end - start); 3305 if (bytes < minlen) { 3306 spin_unlock(&ctl->tree_lock); 3307 mutex_unlock(&ctl->cache_writeout_mutex); 3308 goto next; 3309 } 3310 3311 bitmap_clear_bits(ctl, entry, start, bytes); 3312 if (entry->bytes == 0) 3313 free_bitmap(ctl, entry); 3314 3315 spin_unlock(&ctl->tree_lock); 3316 trim_entry.start = start; 3317 trim_entry.bytes = bytes; 3318 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3319 mutex_unlock(&ctl->cache_writeout_mutex); 3320 3321 ret = do_trimming(block_group, total_trimmed, start, bytes, 3322 start, bytes, &trim_entry); 3323 if (ret) 3324 break; 3325 next: 3326 if (next_bitmap) { 3327 offset += BITS_PER_BITMAP * ctl->unit; 3328 } else { 3329 start += bytes; 3330 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3331 offset += BITS_PER_BITMAP * ctl->unit; 3332 } 3333 3334 if (fatal_signal_pending(current)) { 3335 ret = -ERESTARTSYS; 3336 break; 3337 } 3338 3339 cond_resched(); 3340 } 3341 3342 return ret; 3343 } 3344 3345 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3346 { 3347 atomic_inc(&cache->trimming); 3348 } 3349 3350 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3351 { 3352 struct btrfs_fs_info *fs_info = block_group->fs_info; 3353 struct extent_map_tree *em_tree; 3354 struct extent_map *em; 3355 bool cleanup; 3356 3357 spin_lock(&block_group->lock); 3358 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3359 block_group->removed); 3360 spin_unlock(&block_group->lock); 3361 3362 if (cleanup) { 3363 mutex_lock(&fs_info->chunk_mutex); 3364 em_tree = &fs_info->mapping_tree.map_tree; 3365 write_lock(&em_tree->lock); 3366 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3367 1); 3368 BUG_ON(!em); /* logic error, can't happen */ 3369 /* 3370 * remove_extent_mapping() will delete us from the pinned_chunks 3371 * list, which is protected by the chunk mutex. 3372 */ 3373 remove_extent_mapping(em_tree, em); 3374 write_unlock(&em_tree->lock); 3375 mutex_unlock(&fs_info->chunk_mutex); 3376 3377 /* once for us and once for the tree */ 3378 free_extent_map(em); 3379 free_extent_map(em); 3380 3381 /* 3382 * We've left one free space entry and other tasks trimming 3383 * this block group have left 1 entry each one. Free them. 3384 */ 3385 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3386 } 3387 } 3388 3389 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3390 u64 *trimmed, u64 start, u64 end, u64 minlen) 3391 { 3392 int ret; 3393 3394 *trimmed = 0; 3395 3396 spin_lock(&block_group->lock); 3397 if (block_group->removed) { 3398 spin_unlock(&block_group->lock); 3399 return 0; 3400 } 3401 btrfs_get_block_group_trimming(block_group); 3402 spin_unlock(&block_group->lock); 3403 3404 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3405 if (ret) 3406 goto out; 3407 3408 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3409 out: 3410 btrfs_put_block_group_trimming(block_group); 3411 return ret; 3412 } 3413 3414 /* 3415 * Find the left-most item in the cache tree, and then return the 3416 * smallest inode number in the item. 3417 * 3418 * Note: the returned inode number may not be the smallest one in 3419 * the tree, if the left-most item is a bitmap. 3420 */ 3421 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3422 { 3423 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3424 struct btrfs_free_space *entry = NULL; 3425 u64 ino = 0; 3426 3427 spin_lock(&ctl->tree_lock); 3428 3429 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3430 goto out; 3431 3432 entry = rb_entry(rb_first(&ctl->free_space_offset), 3433 struct btrfs_free_space, offset_index); 3434 3435 if (!entry->bitmap) { 3436 ino = entry->offset; 3437 3438 unlink_free_space(ctl, entry); 3439 entry->offset++; 3440 entry->bytes--; 3441 if (!entry->bytes) 3442 kmem_cache_free(btrfs_free_space_cachep, entry); 3443 else 3444 link_free_space(ctl, entry); 3445 } else { 3446 u64 offset = 0; 3447 u64 count = 1; 3448 int ret; 3449 3450 ret = search_bitmap(ctl, entry, &offset, &count, true); 3451 /* Logic error; Should be empty if it can't find anything */ 3452 ASSERT(!ret); 3453 3454 ino = offset; 3455 bitmap_clear_bits(ctl, entry, offset, 1); 3456 if (entry->bytes == 0) 3457 free_bitmap(ctl, entry); 3458 } 3459 out: 3460 spin_unlock(&ctl->tree_lock); 3461 3462 return ino; 3463 } 3464 3465 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3466 struct btrfs_path *path) 3467 { 3468 struct inode *inode = NULL; 3469 3470 spin_lock(&root->ino_cache_lock); 3471 if (root->ino_cache_inode) 3472 inode = igrab(root->ino_cache_inode); 3473 spin_unlock(&root->ino_cache_lock); 3474 if (inode) 3475 return inode; 3476 3477 inode = __lookup_free_space_inode(root, path, 0); 3478 if (IS_ERR(inode)) 3479 return inode; 3480 3481 spin_lock(&root->ino_cache_lock); 3482 if (!btrfs_fs_closing(root->fs_info)) 3483 root->ino_cache_inode = igrab(inode); 3484 spin_unlock(&root->ino_cache_lock); 3485 3486 return inode; 3487 } 3488 3489 int create_free_ino_inode(struct btrfs_root *root, 3490 struct btrfs_trans_handle *trans, 3491 struct btrfs_path *path) 3492 { 3493 return __create_free_space_inode(root, trans, path, 3494 BTRFS_FREE_INO_OBJECTID, 0); 3495 } 3496 3497 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3498 { 3499 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3500 struct btrfs_path *path; 3501 struct inode *inode; 3502 int ret = 0; 3503 u64 root_gen = btrfs_root_generation(&root->root_item); 3504 3505 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3506 return 0; 3507 3508 /* 3509 * If we're unmounting then just return, since this does a search on the 3510 * normal root and not the commit root and we could deadlock. 3511 */ 3512 if (btrfs_fs_closing(fs_info)) 3513 return 0; 3514 3515 path = btrfs_alloc_path(); 3516 if (!path) 3517 return 0; 3518 3519 inode = lookup_free_ino_inode(root, path); 3520 if (IS_ERR(inode)) 3521 goto out; 3522 3523 if (root_gen != BTRFS_I(inode)->generation) 3524 goto out_put; 3525 3526 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3527 3528 if (ret < 0) 3529 btrfs_err(fs_info, 3530 "failed to load free ino cache for root %llu", 3531 root->root_key.objectid); 3532 out_put: 3533 iput(inode); 3534 out: 3535 btrfs_free_path(path); 3536 return ret; 3537 } 3538 3539 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3540 struct btrfs_trans_handle *trans, 3541 struct btrfs_path *path, 3542 struct inode *inode) 3543 { 3544 struct btrfs_fs_info *fs_info = root->fs_info; 3545 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3546 int ret; 3547 struct btrfs_io_ctl io_ctl; 3548 bool release_metadata = true; 3549 3550 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3551 return 0; 3552 3553 memset(&io_ctl, 0, sizeof(io_ctl)); 3554 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3555 if (!ret) { 3556 /* 3557 * At this point writepages() didn't error out, so our metadata 3558 * reservation is released when the writeback finishes, at 3559 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3560 * with or without an error. 3561 */ 3562 release_metadata = false; 3563 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3564 } 3565 3566 if (ret) { 3567 if (release_metadata) 3568 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3569 inode->i_size, true); 3570 #ifdef DEBUG 3571 btrfs_err(fs_info, 3572 "failed to write free ino cache for root %llu", 3573 root->root_key.objectid); 3574 #endif 3575 } 3576 3577 return ret; 3578 } 3579 3580 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3581 /* 3582 * Use this if you need to make a bitmap or extent entry specifically, it 3583 * doesn't do any of the merging that add_free_space does, this acts a lot like 3584 * how the free space cache loading stuff works, so you can get really weird 3585 * configurations. 3586 */ 3587 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3588 u64 offset, u64 bytes, bool bitmap) 3589 { 3590 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3591 struct btrfs_free_space *info = NULL, *bitmap_info; 3592 void *map = NULL; 3593 u64 bytes_added; 3594 int ret; 3595 3596 again: 3597 if (!info) { 3598 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3599 if (!info) 3600 return -ENOMEM; 3601 } 3602 3603 if (!bitmap) { 3604 spin_lock(&ctl->tree_lock); 3605 info->offset = offset; 3606 info->bytes = bytes; 3607 info->max_extent_size = 0; 3608 ret = link_free_space(ctl, info); 3609 spin_unlock(&ctl->tree_lock); 3610 if (ret) 3611 kmem_cache_free(btrfs_free_space_cachep, info); 3612 return ret; 3613 } 3614 3615 if (!map) { 3616 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3617 if (!map) { 3618 kmem_cache_free(btrfs_free_space_cachep, info); 3619 return -ENOMEM; 3620 } 3621 } 3622 3623 spin_lock(&ctl->tree_lock); 3624 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3625 1, 0); 3626 if (!bitmap_info) { 3627 info->bitmap = map; 3628 map = NULL; 3629 add_new_bitmap(ctl, info, offset); 3630 bitmap_info = info; 3631 info = NULL; 3632 } 3633 3634 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3635 3636 bytes -= bytes_added; 3637 offset += bytes_added; 3638 spin_unlock(&ctl->tree_lock); 3639 3640 if (bytes) 3641 goto again; 3642 3643 if (info) 3644 kmem_cache_free(btrfs_free_space_cachep, info); 3645 kfree(map); 3646 return 0; 3647 } 3648 3649 /* 3650 * Checks to see if the given range is in the free space cache. This is really 3651 * just used to check the absence of space, so if there is free space in the 3652 * range at all we will return 1. 3653 */ 3654 int test_check_exists(struct btrfs_block_group_cache *cache, 3655 u64 offset, u64 bytes) 3656 { 3657 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3658 struct btrfs_free_space *info; 3659 int ret = 0; 3660 3661 spin_lock(&ctl->tree_lock); 3662 info = tree_search_offset(ctl, offset, 0, 0); 3663 if (!info) { 3664 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3665 1, 0); 3666 if (!info) 3667 goto out; 3668 } 3669 3670 have_info: 3671 if (info->bitmap) { 3672 u64 bit_off, bit_bytes; 3673 struct rb_node *n; 3674 struct btrfs_free_space *tmp; 3675 3676 bit_off = offset; 3677 bit_bytes = ctl->unit; 3678 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3679 if (!ret) { 3680 if (bit_off == offset) { 3681 ret = 1; 3682 goto out; 3683 } else if (bit_off > offset && 3684 offset + bytes > bit_off) { 3685 ret = 1; 3686 goto out; 3687 } 3688 } 3689 3690 n = rb_prev(&info->offset_index); 3691 while (n) { 3692 tmp = rb_entry(n, struct btrfs_free_space, 3693 offset_index); 3694 if (tmp->offset + tmp->bytes < offset) 3695 break; 3696 if (offset + bytes < tmp->offset) { 3697 n = rb_prev(&tmp->offset_index); 3698 continue; 3699 } 3700 info = tmp; 3701 goto have_info; 3702 } 3703 3704 n = rb_next(&info->offset_index); 3705 while (n) { 3706 tmp = rb_entry(n, struct btrfs_free_space, 3707 offset_index); 3708 if (offset + bytes < tmp->offset) 3709 break; 3710 if (tmp->offset + tmp->bytes < offset) { 3711 n = rb_next(&tmp->offset_index); 3712 continue; 3713 } 3714 info = tmp; 3715 goto have_info; 3716 } 3717 3718 ret = 0; 3719 goto out; 3720 } 3721 3722 if (info->offset == offset) { 3723 ret = 1; 3724 goto out; 3725 } 3726 3727 if (offset > info->offset && offset < info->offset + info->bytes) 3728 ret = 1; 3729 out: 3730 spin_unlock(&ctl->tree_lock); 3731 return ret; 3732 } 3733 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3734