1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include "ctree.h" 14 #include "free-space-cache.h" 15 #include "transaction.h" 16 #include "disk-io.h" 17 #include "extent_io.h" 18 #include "inode-map.h" 19 #include "volumes.h" 20 21 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 22 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 23 24 struct btrfs_trim_range { 25 u64 start; 26 u64 bytes; 27 struct list_head list; 28 }; 29 30 static int link_free_space(struct btrfs_free_space_ctl *ctl, 31 struct btrfs_free_space *info); 32 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 33 struct btrfs_free_space *info); 34 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 35 struct btrfs_trans_handle *trans, 36 struct btrfs_io_ctl *io_ctl, 37 struct btrfs_path *path); 38 39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 40 struct btrfs_path *path, 41 u64 offset) 42 { 43 struct btrfs_fs_info *fs_info = root->fs_info; 44 struct btrfs_key key; 45 struct btrfs_key location; 46 struct btrfs_disk_key disk_key; 47 struct btrfs_free_space_header *header; 48 struct extent_buffer *leaf; 49 struct inode *inode = NULL; 50 int ret; 51 52 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 53 key.offset = offset; 54 key.type = 0; 55 56 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 57 if (ret < 0) 58 return ERR_PTR(ret); 59 if (ret > 0) { 60 btrfs_release_path(path); 61 return ERR_PTR(-ENOENT); 62 } 63 64 leaf = path->nodes[0]; 65 header = btrfs_item_ptr(leaf, path->slots[0], 66 struct btrfs_free_space_header); 67 btrfs_free_space_key(leaf, header, &disk_key); 68 btrfs_disk_key_to_cpu(&location, &disk_key); 69 btrfs_release_path(path); 70 71 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 72 if (IS_ERR(inode)) 73 return inode; 74 75 mapping_set_gfp_mask(inode->i_mapping, 76 mapping_gfp_constraint(inode->i_mapping, 77 ~(__GFP_FS | __GFP_HIGHMEM))); 78 79 return inode; 80 } 81 82 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 83 struct btrfs_block_group_cache 84 *block_group, struct btrfs_path *path) 85 { 86 struct inode *inode = NULL; 87 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 88 89 spin_lock(&block_group->lock); 90 if (block_group->inode) 91 inode = igrab(block_group->inode); 92 spin_unlock(&block_group->lock); 93 if (inode) 94 return inode; 95 96 inode = __lookup_free_space_inode(fs_info->tree_root, path, 97 block_group->key.objectid); 98 if (IS_ERR(inode)) 99 return inode; 100 101 spin_lock(&block_group->lock); 102 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 103 btrfs_info(fs_info, "Old style space inode found, converting."); 104 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 105 BTRFS_INODE_NODATACOW; 106 block_group->disk_cache_state = BTRFS_DC_CLEAR; 107 } 108 109 if (!block_group->iref) { 110 block_group->inode = igrab(inode); 111 block_group->iref = 1; 112 } 113 spin_unlock(&block_group->lock); 114 115 return inode; 116 } 117 118 static int __create_free_space_inode(struct btrfs_root *root, 119 struct btrfs_trans_handle *trans, 120 struct btrfs_path *path, 121 u64 ino, u64 offset) 122 { 123 struct btrfs_key key; 124 struct btrfs_disk_key disk_key; 125 struct btrfs_free_space_header *header; 126 struct btrfs_inode_item *inode_item; 127 struct extent_buffer *leaf; 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 129 int ret; 130 131 ret = btrfs_insert_empty_inode(trans, root, path, ino); 132 if (ret) 133 return ret; 134 135 /* We inline crc's for the free disk space cache */ 136 if (ino != BTRFS_FREE_INO_OBJECTID) 137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 138 139 leaf = path->nodes[0]; 140 inode_item = btrfs_item_ptr(leaf, path->slots[0], 141 struct btrfs_inode_item); 142 btrfs_item_key(leaf, &disk_key, path->slots[0]); 143 memzero_extent_buffer(leaf, (unsigned long)inode_item, 144 sizeof(*inode_item)); 145 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 146 btrfs_set_inode_size(leaf, inode_item, 0); 147 btrfs_set_inode_nbytes(leaf, inode_item, 0); 148 btrfs_set_inode_uid(leaf, inode_item, 0); 149 btrfs_set_inode_gid(leaf, inode_item, 0); 150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 151 btrfs_set_inode_flags(leaf, inode_item, flags); 152 btrfs_set_inode_nlink(leaf, inode_item, 1); 153 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 154 btrfs_set_inode_block_group(leaf, inode_item, offset); 155 btrfs_mark_buffer_dirty(leaf); 156 btrfs_release_path(path); 157 158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 159 key.offset = offset; 160 key.type = 0; 161 ret = btrfs_insert_empty_item(trans, root, path, &key, 162 sizeof(struct btrfs_free_space_header)); 163 if (ret < 0) { 164 btrfs_release_path(path); 165 return ret; 166 } 167 168 leaf = path->nodes[0]; 169 header = btrfs_item_ptr(leaf, path->slots[0], 170 struct btrfs_free_space_header); 171 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 172 btrfs_set_free_space_key(leaf, header, &disk_key); 173 btrfs_mark_buffer_dirty(leaf); 174 btrfs_release_path(path); 175 176 return 0; 177 } 178 179 int create_free_space_inode(struct btrfs_fs_info *fs_info, 180 struct btrfs_trans_handle *trans, 181 struct btrfs_block_group_cache *block_group, 182 struct btrfs_path *path) 183 { 184 int ret; 185 u64 ino; 186 187 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 188 if (ret < 0) 189 return ret; 190 191 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 192 block_group->key.objectid); 193 } 194 195 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 196 struct btrfs_block_rsv *rsv) 197 { 198 u64 needed_bytes; 199 int ret; 200 201 /* 1 for slack space, 1 for updating the inode */ 202 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 203 btrfs_calc_trans_metadata_size(fs_info, 1); 204 205 spin_lock(&rsv->lock); 206 if (rsv->reserved < needed_bytes) 207 ret = -ENOSPC; 208 else 209 ret = 0; 210 spin_unlock(&rsv->lock); 211 return ret; 212 } 213 214 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 215 struct btrfs_block_group_cache *block_group, 216 struct inode *inode) 217 { 218 struct btrfs_root *root = BTRFS_I(inode)->root; 219 int ret = 0; 220 bool locked = false; 221 222 if (block_group) { 223 struct btrfs_path *path = btrfs_alloc_path(); 224 225 if (!path) { 226 ret = -ENOMEM; 227 goto fail; 228 } 229 locked = true; 230 mutex_lock(&trans->transaction->cache_write_mutex); 231 if (!list_empty(&block_group->io_list)) { 232 list_del_init(&block_group->io_list); 233 234 btrfs_wait_cache_io(trans, block_group, path); 235 btrfs_put_block_group(block_group); 236 } 237 238 /* 239 * now that we've truncated the cache away, its no longer 240 * setup or written 241 */ 242 spin_lock(&block_group->lock); 243 block_group->disk_cache_state = BTRFS_DC_CLEAR; 244 spin_unlock(&block_group->lock); 245 btrfs_free_path(path); 246 } 247 248 btrfs_i_size_write(BTRFS_I(inode), 0); 249 truncate_pagecache(inode, 0); 250 251 /* 252 * We skip the throttling logic for free space cache inodes, so we don't 253 * need to check for -EAGAIN. 254 */ 255 ret = btrfs_truncate_inode_items(trans, root, inode, 256 0, BTRFS_EXTENT_DATA_KEY); 257 if (ret) 258 goto fail; 259 260 ret = btrfs_update_inode(trans, root, inode); 261 262 fail: 263 if (locked) 264 mutex_unlock(&trans->transaction->cache_write_mutex); 265 if (ret) 266 btrfs_abort_transaction(trans, ret); 267 268 return ret; 269 } 270 271 static void readahead_cache(struct inode *inode) 272 { 273 struct file_ra_state *ra; 274 unsigned long last_index; 275 276 ra = kzalloc(sizeof(*ra), GFP_NOFS); 277 if (!ra) 278 return; 279 280 file_ra_state_init(ra, inode->i_mapping); 281 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 282 283 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 284 285 kfree(ra); 286 } 287 288 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 289 int write) 290 { 291 int num_pages; 292 int check_crcs = 0; 293 294 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 295 296 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 297 check_crcs = 1; 298 299 /* Make sure we can fit our crcs and generation into the first page */ 300 if (write && check_crcs && 301 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 302 return -ENOSPC; 303 304 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 305 306 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 307 if (!io_ctl->pages) 308 return -ENOMEM; 309 310 io_ctl->num_pages = num_pages; 311 io_ctl->fs_info = btrfs_sb(inode->i_sb); 312 io_ctl->check_crcs = check_crcs; 313 io_ctl->inode = inode; 314 315 return 0; 316 } 317 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 318 319 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 320 { 321 kfree(io_ctl->pages); 322 io_ctl->pages = NULL; 323 } 324 325 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 326 { 327 if (io_ctl->cur) { 328 io_ctl->cur = NULL; 329 io_ctl->orig = NULL; 330 } 331 } 332 333 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 334 { 335 ASSERT(io_ctl->index < io_ctl->num_pages); 336 io_ctl->page = io_ctl->pages[io_ctl->index++]; 337 io_ctl->cur = page_address(io_ctl->page); 338 io_ctl->orig = io_ctl->cur; 339 io_ctl->size = PAGE_SIZE; 340 if (clear) 341 clear_page(io_ctl->cur); 342 } 343 344 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 345 { 346 int i; 347 348 io_ctl_unmap_page(io_ctl); 349 350 for (i = 0; i < io_ctl->num_pages; i++) { 351 if (io_ctl->pages[i]) { 352 ClearPageChecked(io_ctl->pages[i]); 353 unlock_page(io_ctl->pages[i]); 354 put_page(io_ctl->pages[i]); 355 } 356 } 357 } 358 359 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 360 int uptodate) 361 { 362 struct page *page; 363 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 364 int i; 365 366 for (i = 0; i < io_ctl->num_pages; i++) { 367 page = find_or_create_page(inode->i_mapping, i, mask); 368 if (!page) { 369 io_ctl_drop_pages(io_ctl); 370 return -ENOMEM; 371 } 372 io_ctl->pages[i] = page; 373 if (uptodate && !PageUptodate(page)) { 374 btrfs_readpage(NULL, page); 375 lock_page(page); 376 if (!PageUptodate(page)) { 377 btrfs_err(BTRFS_I(inode)->root->fs_info, 378 "error reading free space cache"); 379 io_ctl_drop_pages(io_ctl); 380 return -EIO; 381 } 382 } 383 } 384 385 for (i = 0; i < io_ctl->num_pages; i++) { 386 clear_page_dirty_for_io(io_ctl->pages[i]); 387 set_page_extent_mapped(io_ctl->pages[i]); 388 } 389 390 return 0; 391 } 392 393 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 394 { 395 __le64 *val; 396 397 io_ctl_map_page(io_ctl, 1); 398 399 /* 400 * Skip the csum areas. If we don't check crcs then we just have a 401 * 64bit chunk at the front of the first page. 402 */ 403 if (io_ctl->check_crcs) { 404 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 405 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 406 } else { 407 io_ctl->cur += sizeof(u64); 408 io_ctl->size -= sizeof(u64) * 2; 409 } 410 411 val = io_ctl->cur; 412 *val = cpu_to_le64(generation); 413 io_ctl->cur += sizeof(u64); 414 } 415 416 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 417 { 418 __le64 *gen; 419 420 /* 421 * Skip the crc area. If we don't check crcs then we just have a 64bit 422 * chunk at the front of the first page. 423 */ 424 if (io_ctl->check_crcs) { 425 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 426 io_ctl->size -= sizeof(u64) + 427 (sizeof(u32) * io_ctl->num_pages); 428 } else { 429 io_ctl->cur += sizeof(u64); 430 io_ctl->size -= sizeof(u64) * 2; 431 } 432 433 gen = io_ctl->cur; 434 if (le64_to_cpu(*gen) != generation) { 435 btrfs_err_rl(io_ctl->fs_info, 436 "space cache generation (%llu) does not match inode (%llu)", 437 *gen, generation); 438 io_ctl_unmap_page(io_ctl); 439 return -EIO; 440 } 441 io_ctl->cur += sizeof(u64); 442 return 0; 443 } 444 445 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 446 { 447 u32 *tmp; 448 u32 crc = ~(u32)0; 449 unsigned offset = 0; 450 451 if (!io_ctl->check_crcs) { 452 io_ctl_unmap_page(io_ctl); 453 return; 454 } 455 456 if (index == 0) 457 offset = sizeof(u32) * io_ctl->num_pages; 458 459 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 460 PAGE_SIZE - offset); 461 btrfs_csum_final(crc, (u8 *)&crc); 462 io_ctl_unmap_page(io_ctl); 463 tmp = page_address(io_ctl->pages[0]); 464 tmp += index; 465 *tmp = crc; 466 } 467 468 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 469 { 470 u32 *tmp, val; 471 u32 crc = ~(u32)0; 472 unsigned offset = 0; 473 474 if (!io_ctl->check_crcs) { 475 io_ctl_map_page(io_ctl, 0); 476 return 0; 477 } 478 479 if (index == 0) 480 offset = sizeof(u32) * io_ctl->num_pages; 481 482 tmp = page_address(io_ctl->pages[0]); 483 tmp += index; 484 val = *tmp; 485 486 io_ctl_map_page(io_ctl, 0); 487 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 488 PAGE_SIZE - offset); 489 btrfs_csum_final(crc, (u8 *)&crc); 490 if (val != crc) { 491 btrfs_err_rl(io_ctl->fs_info, 492 "csum mismatch on free space cache"); 493 io_ctl_unmap_page(io_ctl); 494 return -EIO; 495 } 496 497 return 0; 498 } 499 500 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 501 void *bitmap) 502 { 503 struct btrfs_free_space_entry *entry; 504 505 if (!io_ctl->cur) 506 return -ENOSPC; 507 508 entry = io_ctl->cur; 509 entry->offset = cpu_to_le64(offset); 510 entry->bytes = cpu_to_le64(bytes); 511 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 512 BTRFS_FREE_SPACE_EXTENT; 513 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 514 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 515 516 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 517 return 0; 518 519 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 520 521 /* No more pages to map */ 522 if (io_ctl->index >= io_ctl->num_pages) 523 return 0; 524 525 /* map the next page */ 526 io_ctl_map_page(io_ctl, 1); 527 return 0; 528 } 529 530 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 531 { 532 if (!io_ctl->cur) 533 return -ENOSPC; 534 535 /* 536 * If we aren't at the start of the current page, unmap this one and 537 * map the next one if there is any left. 538 */ 539 if (io_ctl->cur != io_ctl->orig) { 540 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 541 if (io_ctl->index >= io_ctl->num_pages) 542 return -ENOSPC; 543 io_ctl_map_page(io_ctl, 0); 544 } 545 546 copy_page(io_ctl->cur, bitmap); 547 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 548 if (io_ctl->index < io_ctl->num_pages) 549 io_ctl_map_page(io_ctl, 0); 550 return 0; 551 } 552 553 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 554 { 555 /* 556 * If we're not on the boundary we know we've modified the page and we 557 * need to crc the page. 558 */ 559 if (io_ctl->cur != io_ctl->orig) 560 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 561 else 562 io_ctl_unmap_page(io_ctl); 563 564 while (io_ctl->index < io_ctl->num_pages) { 565 io_ctl_map_page(io_ctl, 1); 566 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 567 } 568 } 569 570 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 571 struct btrfs_free_space *entry, u8 *type) 572 { 573 struct btrfs_free_space_entry *e; 574 int ret; 575 576 if (!io_ctl->cur) { 577 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 578 if (ret) 579 return ret; 580 } 581 582 e = io_ctl->cur; 583 entry->offset = le64_to_cpu(e->offset); 584 entry->bytes = le64_to_cpu(e->bytes); 585 *type = e->type; 586 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 587 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 588 589 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 590 return 0; 591 592 io_ctl_unmap_page(io_ctl); 593 594 return 0; 595 } 596 597 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 598 struct btrfs_free_space *entry) 599 { 600 int ret; 601 602 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 603 if (ret) 604 return ret; 605 606 copy_page(entry->bitmap, io_ctl->cur); 607 io_ctl_unmap_page(io_ctl); 608 609 return 0; 610 } 611 612 /* 613 * Since we attach pinned extents after the fact we can have contiguous sections 614 * of free space that are split up in entries. This poses a problem with the 615 * tree logging stuff since it could have allocated across what appears to be 2 616 * entries since we would have merged the entries when adding the pinned extents 617 * back to the free space cache. So run through the space cache that we just 618 * loaded and merge contiguous entries. This will make the log replay stuff not 619 * blow up and it will make for nicer allocator behavior. 620 */ 621 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 622 { 623 struct btrfs_free_space *e, *prev = NULL; 624 struct rb_node *n; 625 626 again: 627 spin_lock(&ctl->tree_lock); 628 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 629 e = rb_entry(n, struct btrfs_free_space, offset_index); 630 if (!prev) 631 goto next; 632 if (e->bitmap || prev->bitmap) 633 goto next; 634 if (prev->offset + prev->bytes == e->offset) { 635 unlink_free_space(ctl, prev); 636 unlink_free_space(ctl, e); 637 prev->bytes += e->bytes; 638 kmem_cache_free(btrfs_free_space_cachep, e); 639 link_free_space(ctl, prev); 640 prev = NULL; 641 spin_unlock(&ctl->tree_lock); 642 goto again; 643 } 644 next: 645 prev = e; 646 } 647 spin_unlock(&ctl->tree_lock); 648 } 649 650 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 651 struct btrfs_free_space_ctl *ctl, 652 struct btrfs_path *path, u64 offset) 653 { 654 struct btrfs_fs_info *fs_info = root->fs_info; 655 struct btrfs_free_space_header *header; 656 struct extent_buffer *leaf; 657 struct btrfs_io_ctl io_ctl; 658 struct btrfs_key key; 659 struct btrfs_free_space *e, *n; 660 LIST_HEAD(bitmaps); 661 u64 num_entries; 662 u64 num_bitmaps; 663 u64 generation; 664 u8 type; 665 int ret = 0; 666 667 /* Nothing in the space cache, goodbye */ 668 if (!i_size_read(inode)) 669 return 0; 670 671 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 672 key.offset = offset; 673 key.type = 0; 674 675 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 676 if (ret < 0) 677 return 0; 678 else if (ret > 0) { 679 btrfs_release_path(path); 680 return 0; 681 } 682 683 ret = -1; 684 685 leaf = path->nodes[0]; 686 header = btrfs_item_ptr(leaf, path->slots[0], 687 struct btrfs_free_space_header); 688 num_entries = btrfs_free_space_entries(leaf, header); 689 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 690 generation = btrfs_free_space_generation(leaf, header); 691 btrfs_release_path(path); 692 693 if (!BTRFS_I(inode)->generation) { 694 btrfs_info(fs_info, 695 "the free space cache file (%llu) is invalid, skip it", 696 offset); 697 return 0; 698 } 699 700 if (BTRFS_I(inode)->generation != generation) { 701 btrfs_err(fs_info, 702 "free space inode generation (%llu) did not match free space cache generation (%llu)", 703 BTRFS_I(inode)->generation, generation); 704 return 0; 705 } 706 707 if (!num_entries) 708 return 0; 709 710 ret = io_ctl_init(&io_ctl, inode, 0); 711 if (ret) 712 return ret; 713 714 readahead_cache(inode); 715 716 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 717 if (ret) 718 goto out; 719 720 ret = io_ctl_check_crc(&io_ctl, 0); 721 if (ret) 722 goto free_cache; 723 724 ret = io_ctl_check_generation(&io_ctl, generation); 725 if (ret) 726 goto free_cache; 727 728 while (num_entries) { 729 e = kmem_cache_zalloc(btrfs_free_space_cachep, 730 GFP_NOFS); 731 if (!e) 732 goto free_cache; 733 734 ret = io_ctl_read_entry(&io_ctl, e, &type); 735 if (ret) { 736 kmem_cache_free(btrfs_free_space_cachep, e); 737 goto free_cache; 738 } 739 740 if (!e->bytes) { 741 kmem_cache_free(btrfs_free_space_cachep, e); 742 goto free_cache; 743 } 744 745 if (type == BTRFS_FREE_SPACE_EXTENT) { 746 spin_lock(&ctl->tree_lock); 747 ret = link_free_space(ctl, e); 748 spin_unlock(&ctl->tree_lock); 749 if (ret) { 750 btrfs_err(fs_info, 751 "Duplicate entries in free space cache, dumping"); 752 kmem_cache_free(btrfs_free_space_cachep, e); 753 goto free_cache; 754 } 755 } else { 756 ASSERT(num_bitmaps); 757 num_bitmaps--; 758 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 759 if (!e->bitmap) { 760 kmem_cache_free( 761 btrfs_free_space_cachep, e); 762 goto free_cache; 763 } 764 spin_lock(&ctl->tree_lock); 765 ret = link_free_space(ctl, e); 766 ctl->total_bitmaps++; 767 ctl->op->recalc_thresholds(ctl); 768 spin_unlock(&ctl->tree_lock); 769 if (ret) { 770 btrfs_err(fs_info, 771 "Duplicate entries in free space cache, dumping"); 772 kmem_cache_free(btrfs_free_space_cachep, e); 773 goto free_cache; 774 } 775 list_add_tail(&e->list, &bitmaps); 776 } 777 778 num_entries--; 779 } 780 781 io_ctl_unmap_page(&io_ctl); 782 783 /* 784 * We add the bitmaps at the end of the entries in order that 785 * the bitmap entries are added to the cache. 786 */ 787 list_for_each_entry_safe(e, n, &bitmaps, list) { 788 list_del_init(&e->list); 789 ret = io_ctl_read_bitmap(&io_ctl, e); 790 if (ret) 791 goto free_cache; 792 } 793 794 io_ctl_drop_pages(&io_ctl); 795 merge_space_tree(ctl); 796 ret = 1; 797 out: 798 io_ctl_free(&io_ctl); 799 return ret; 800 free_cache: 801 io_ctl_drop_pages(&io_ctl); 802 __btrfs_remove_free_space_cache(ctl); 803 goto out; 804 } 805 806 int load_free_space_cache(struct btrfs_fs_info *fs_info, 807 struct btrfs_block_group_cache *block_group) 808 { 809 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 810 struct inode *inode; 811 struct btrfs_path *path; 812 int ret = 0; 813 bool matched; 814 u64 used = btrfs_block_group_used(&block_group->item); 815 816 /* 817 * If this block group has been marked to be cleared for one reason or 818 * another then we can't trust the on disk cache, so just return. 819 */ 820 spin_lock(&block_group->lock); 821 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 822 spin_unlock(&block_group->lock); 823 return 0; 824 } 825 spin_unlock(&block_group->lock); 826 827 path = btrfs_alloc_path(); 828 if (!path) 829 return 0; 830 path->search_commit_root = 1; 831 path->skip_locking = 1; 832 833 inode = lookup_free_space_inode(fs_info, block_group, path); 834 if (IS_ERR(inode)) { 835 btrfs_free_path(path); 836 return 0; 837 } 838 839 /* We may have converted the inode and made the cache invalid. */ 840 spin_lock(&block_group->lock); 841 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 842 spin_unlock(&block_group->lock); 843 btrfs_free_path(path); 844 goto out; 845 } 846 spin_unlock(&block_group->lock); 847 848 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 849 path, block_group->key.objectid); 850 btrfs_free_path(path); 851 if (ret <= 0) 852 goto out; 853 854 spin_lock(&ctl->tree_lock); 855 matched = (ctl->free_space == (block_group->key.offset - used - 856 block_group->bytes_super)); 857 spin_unlock(&ctl->tree_lock); 858 859 if (!matched) { 860 __btrfs_remove_free_space_cache(ctl); 861 btrfs_warn(fs_info, 862 "block group %llu has wrong amount of free space", 863 block_group->key.objectid); 864 ret = -1; 865 } 866 out: 867 if (ret < 0) { 868 /* This cache is bogus, make sure it gets cleared */ 869 spin_lock(&block_group->lock); 870 block_group->disk_cache_state = BTRFS_DC_CLEAR; 871 spin_unlock(&block_group->lock); 872 ret = 0; 873 874 btrfs_warn(fs_info, 875 "failed to load free space cache for block group %llu, rebuilding it now", 876 block_group->key.objectid); 877 } 878 879 iput(inode); 880 return ret; 881 } 882 883 static noinline_for_stack 884 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 885 struct btrfs_free_space_ctl *ctl, 886 struct btrfs_block_group_cache *block_group, 887 int *entries, int *bitmaps, 888 struct list_head *bitmap_list) 889 { 890 int ret; 891 struct btrfs_free_cluster *cluster = NULL; 892 struct btrfs_free_cluster *cluster_locked = NULL; 893 struct rb_node *node = rb_first(&ctl->free_space_offset); 894 struct btrfs_trim_range *trim_entry; 895 896 /* Get the cluster for this block_group if it exists */ 897 if (block_group && !list_empty(&block_group->cluster_list)) { 898 cluster = list_entry(block_group->cluster_list.next, 899 struct btrfs_free_cluster, 900 block_group_list); 901 } 902 903 if (!node && cluster) { 904 cluster_locked = cluster; 905 spin_lock(&cluster_locked->lock); 906 node = rb_first(&cluster->root); 907 cluster = NULL; 908 } 909 910 /* Write out the extent entries */ 911 while (node) { 912 struct btrfs_free_space *e; 913 914 e = rb_entry(node, struct btrfs_free_space, offset_index); 915 *entries += 1; 916 917 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 918 e->bitmap); 919 if (ret) 920 goto fail; 921 922 if (e->bitmap) { 923 list_add_tail(&e->list, bitmap_list); 924 *bitmaps += 1; 925 } 926 node = rb_next(node); 927 if (!node && cluster) { 928 node = rb_first(&cluster->root); 929 cluster_locked = cluster; 930 spin_lock(&cluster_locked->lock); 931 cluster = NULL; 932 } 933 } 934 if (cluster_locked) { 935 spin_unlock(&cluster_locked->lock); 936 cluster_locked = NULL; 937 } 938 939 /* 940 * Make sure we don't miss any range that was removed from our rbtree 941 * because trimming is running. Otherwise after a umount+mount (or crash 942 * after committing the transaction) we would leak free space and get 943 * an inconsistent free space cache report from fsck. 944 */ 945 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 946 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 947 trim_entry->bytes, NULL); 948 if (ret) 949 goto fail; 950 *entries += 1; 951 } 952 953 return 0; 954 fail: 955 if (cluster_locked) 956 spin_unlock(&cluster_locked->lock); 957 return -ENOSPC; 958 } 959 960 static noinline_for_stack int 961 update_cache_item(struct btrfs_trans_handle *trans, 962 struct btrfs_root *root, 963 struct inode *inode, 964 struct btrfs_path *path, u64 offset, 965 int entries, int bitmaps) 966 { 967 struct btrfs_key key; 968 struct btrfs_free_space_header *header; 969 struct extent_buffer *leaf; 970 int ret; 971 972 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 973 key.offset = offset; 974 key.type = 0; 975 976 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 977 if (ret < 0) { 978 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 979 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 980 goto fail; 981 } 982 leaf = path->nodes[0]; 983 if (ret > 0) { 984 struct btrfs_key found_key; 985 ASSERT(path->slots[0]); 986 path->slots[0]--; 987 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 988 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 989 found_key.offset != offset) { 990 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 991 inode->i_size - 1, 992 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 993 NULL); 994 btrfs_release_path(path); 995 goto fail; 996 } 997 } 998 999 BTRFS_I(inode)->generation = trans->transid; 1000 header = btrfs_item_ptr(leaf, path->slots[0], 1001 struct btrfs_free_space_header); 1002 btrfs_set_free_space_entries(leaf, header, entries); 1003 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1004 btrfs_set_free_space_generation(leaf, header, trans->transid); 1005 btrfs_mark_buffer_dirty(leaf); 1006 btrfs_release_path(path); 1007 1008 return 0; 1009 1010 fail: 1011 return -1; 1012 } 1013 1014 static noinline_for_stack int 1015 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1016 struct btrfs_block_group_cache *block_group, 1017 struct btrfs_io_ctl *io_ctl, 1018 int *entries) 1019 { 1020 u64 start, extent_start, extent_end, len; 1021 struct extent_io_tree *unpin = NULL; 1022 int ret; 1023 1024 if (!block_group) 1025 return 0; 1026 1027 /* 1028 * We want to add any pinned extents to our free space cache 1029 * so we don't leak the space 1030 * 1031 * We shouldn't have switched the pinned extents yet so this is the 1032 * right one 1033 */ 1034 unpin = fs_info->pinned_extents; 1035 1036 start = block_group->key.objectid; 1037 1038 while (start < block_group->key.objectid + block_group->key.offset) { 1039 ret = find_first_extent_bit(unpin, start, 1040 &extent_start, &extent_end, 1041 EXTENT_DIRTY, NULL); 1042 if (ret) 1043 return 0; 1044 1045 /* This pinned extent is out of our range */ 1046 if (extent_start >= block_group->key.objectid + 1047 block_group->key.offset) 1048 return 0; 1049 1050 extent_start = max(extent_start, start); 1051 extent_end = min(block_group->key.objectid + 1052 block_group->key.offset, extent_end + 1); 1053 len = extent_end - extent_start; 1054 1055 *entries += 1; 1056 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1057 if (ret) 1058 return -ENOSPC; 1059 1060 start = extent_end; 1061 } 1062 1063 return 0; 1064 } 1065 1066 static noinline_for_stack int 1067 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1068 { 1069 struct btrfs_free_space *entry, *next; 1070 int ret; 1071 1072 /* Write out the bitmaps */ 1073 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1074 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1075 if (ret) 1076 return -ENOSPC; 1077 list_del_init(&entry->list); 1078 } 1079 1080 return 0; 1081 } 1082 1083 static int flush_dirty_cache(struct inode *inode) 1084 { 1085 int ret; 1086 1087 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1088 if (ret) 1089 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1090 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1091 1092 return ret; 1093 } 1094 1095 static void noinline_for_stack 1096 cleanup_bitmap_list(struct list_head *bitmap_list) 1097 { 1098 struct btrfs_free_space *entry, *next; 1099 1100 list_for_each_entry_safe(entry, next, bitmap_list, list) 1101 list_del_init(&entry->list); 1102 } 1103 1104 static void noinline_for_stack 1105 cleanup_write_cache_enospc(struct inode *inode, 1106 struct btrfs_io_ctl *io_ctl, 1107 struct extent_state **cached_state) 1108 { 1109 io_ctl_drop_pages(io_ctl); 1110 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1111 i_size_read(inode) - 1, cached_state); 1112 } 1113 1114 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1115 struct btrfs_trans_handle *trans, 1116 struct btrfs_block_group_cache *block_group, 1117 struct btrfs_io_ctl *io_ctl, 1118 struct btrfs_path *path, u64 offset) 1119 { 1120 int ret; 1121 struct inode *inode = io_ctl->inode; 1122 1123 if (!inode) 1124 return 0; 1125 1126 /* Flush the dirty pages in the cache file. */ 1127 ret = flush_dirty_cache(inode); 1128 if (ret) 1129 goto out; 1130 1131 /* Update the cache item to tell everyone this cache file is valid. */ 1132 ret = update_cache_item(trans, root, inode, path, offset, 1133 io_ctl->entries, io_ctl->bitmaps); 1134 out: 1135 io_ctl_free(io_ctl); 1136 if (ret) { 1137 invalidate_inode_pages2(inode->i_mapping); 1138 BTRFS_I(inode)->generation = 0; 1139 if (block_group) { 1140 #ifdef DEBUG 1141 btrfs_err(root->fs_info, 1142 "failed to write free space cache for block group %llu", 1143 block_group->key.objectid); 1144 #endif 1145 } 1146 } 1147 btrfs_update_inode(trans, root, inode); 1148 1149 if (block_group) { 1150 /* the dirty list is protected by the dirty_bgs_lock */ 1151 spin_lock(&trans->transaction->dirty_bgs_lock); 1152 1153 /* the disk_cache_state is protected by the block group lock */ 1154 spin_lock(&block_group->lock); 1155 1156 /* 1157 * only mark this as written if we didn't get put back on 1158 * the dirty list while waiting for IO. Otherwise our 1159 * cache state won't be right, and we won't get written again 1160 */ 1161 if (!ret && list_empty(&block_group->dirty_list)) 1162 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1163 else if (ret) 1164 block_group->disk_cache_state = BTRFS_DC_ERROR; 1165 1166 spin_unlock(&block_group->lock); 1167 spin_unlock(&trans->transaction->dirty_bgs_lock); 1168 io_ctl->inode = NULL; 1169 iput(inode); 1170 } 1171 1172 return ret; 1173 1174 } 1175 1176 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1177 struct btrfs_trans_handle *trans, 1178 struct btrfs_io_ctl *io_ctl, 1179 struct btrfs_path *path) 1180 { 1181 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1182 } 1183 1184 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1185 struct btrfs_block_group_cache *block_group, 1186 struct btrfs_path *path) 1187 { 1188 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1189 block_group, &block_group->io_ctl, 1190 path, block_group->key.objectid); 1191 } 1192 1193 /** 1194 * __btrfs_write_out_cache - write out cached info to an inode 1195 * @root - the root the inode belongs to 1196 * @ctl - the free space cache we are going to write out 1197 * @block_group - the block_group for this cache if it belongs to a block_group 1198 * @trans - the trans handle 1199 * 1200 * This function writes out a free space cache struct to disk for quick recovery 1201 * on mount. This will return 0 if it was successful in writing the cache out, 1202 * or an errno if it was not. 1203 */ 1204 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1205 struct btrfs_free_space_ctl *ctl, 1206 struct btrfs_block_group_cache *block_group, 1207 struct btrfs_io_ctl *io_ctl, 1208 struct btrfs_trans_handle *trans) 1209 { 1210 struct btrfs_fs_info *fs_info = root->fs_info; 1211 struct extent_state *cached_state = NULL; 1212 LIST_HEAD(bitmap_list); 1213 int entries = 0; 1214 int bitmaps = 0; 1215 int ret; 1216 int must_iput = 0; 1217 1218 if (!i_size_read(inode)) 1219 return -EIO; 1220 1221 WARN_ON(io_ctl->pages); 1222 ret = io_ctl_init(io_ctl, inode, 1); 1223 if (ret) 1224 return ret; 1225 1226 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1227 down_write(&block_group->data_rwsem); 1228 spin_lock(&block_group->lock); 1229 if (block_group->delalloc_bytes) { 1230 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1231 spin_unlock(&block_group->lock); 1232 up_write(&block_group->data_rwsem); 1233 BTRFS_I(inode)->generation = 0; 1234 ret = 0; 1235 must_iput = 1; 1236 goto out; 1237 } 1238 spin_unlock(&block_group->lock); 1239 } 1240 1241 /* Lock all pages first so we can lock the extent safely. */ 1242 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1243 if (ret) 1244 goto out_unlock; 1245 1246 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1247 &cached_state); 1248 1249 io_ctl_set_generation(io_ctl, trans->transid); 1250 1251 mutex_lock(&ctl->cache_writeout_mutex); 1252 /* Write out the extent entries in the free space cache */ 1253 spin_lock(&ctl->tree_lock); 1254 ret = write_cache_extent_entries(io_ctl, ctl, 1255 block_group, &entries, &bitmaps, 1256 &bitmap_list); 1257 if (ret) 1258 goto out_nospc_locked; 1259 1260 /* 1261 * Some spaces that are freed in the current transaction are pinned, 1262 * they will be added into free space cache after the transaction is 1263 * committed, we shouldn't lose them. 1264 * 1265 * If this changes while we are working we'll get added back to 1266 * the dirty list and redo it. No locking needed 1267 */ 1268 ret = write_pinned_extent_entries(fs_info, block_group, 1269 io_ctl, &entries); 1270 if (ret) 1271 goto out_nospc_locked; 1272 1273 /* 1274 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1275 * locked while doing it because a concurrent trim can be manipulating 1276 * or freeing the bitmap. 1277 */ 1278 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1279 spin_unlock(&ctl->tree_lock); 1280 mutex_unlock(&ctl->cache_writeout_mutex); 1281 if (ret) 1282 goto out_nospc; 1283 1284 /* Zero out the rest of the pages just to make sure */ 1285 io_ctl_zero_remaining_pages(io_ctl); 1286 1287 /* Everything is written out, now we dirty the pages in the file. */ 1288 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1289 i_size_read(inode), &cached_state); 1290 if (ret) 1291 goto out_nospc; 1292 1293 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1294 up_write(&block_group->data_rwsem); 1295 /* 1296 * Release the pages and unlock the extent, we will flush 1297 * them out later 1298 */ 1299 io_ctl_drop_pages(io_ctl); 1300 1301 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1302 i_size_read(inode) - 1, &cached_state); 1303 1304 /* 1305 * at this point the pages are under IO and we're happy, 1306 * The caller is responsible for waiting on them and updating the 1307 * the cache and the inode 1308 */ 1309 io_ctl->entries = entries; 1310 io_ctl->bitmaps = bitmaps; 1311 1312 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1313 if (ret) 1314 goto out; 1315 1316 return 0; 1317 1318 out: 1319 io_ctl->inode = NULL; 1320 io_ctl_free(io_ctl); 1321 if (ret) { 1322 invalidate_inode_pages2(inode->i_mapping); 1323 BTRFS_I(inode)->generation = 0; 1324 } 1325 btrfs_update_inode(trans, root, inode); 1326 if (must_iput) 1327 iput(inode); 1328 return ret; 1329 1330 out_nospc_locked: 1331 cleanup_bitmap_list(&bitmap_list); 1332 spin_unlock(&ctl->tree_lock); 1333 mutex_unlock(&ctl->cache_writeout_mutex); 1334 1335 out_nospc: 1336 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1337 1338 out_unlock: 1339 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1340 up_write(&block_group->data_rwsem); 1341 1342 goto out; 1343 } 1344 1345 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1346 struct btrfs_trans_handle *trans, 1347 struct btrfs_block_group_cache *block_group, 1348 struct btrfs_path *path) 1349 { 1350 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1351 struct inode *inode; 1352 int ret = 0; 1353 1354 spin_lock(&block_group->lock); 1355 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1356 spin_unlock(&block_group->lock); 1357 return 0; 1358 } 1359 spin_unlock(&block_group->lock); 1360 1361 inode = lookup_free_space_inode(fs_info, block_group, path); 1362 if (IS_ERR(inode)) 1363 return 0; 1364 1365 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1366 block_group, &block_group->io_ctl, trans); 1367 if (ret) { 1368 #ifdef DEBUG 1369 btrfs_err(fs_info, 1370 "failed to write free space cache for block group %llu", 1371 block_group->key.objectid); 1372 #endif 1373 spin_lock(&block_group->lock); 1374 block_group->disk_cache_state = BTRFS_DC_ERROR; 1375 spin_unlock(&block_group->lock); 1376 1377 block_group->io_ctl.inode = NULL; 1378 iput(inode); 1379 } 1380 1381 /* 1382 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1383 * to wait for IO and put the inode 1384 */ 1385 1386 return ret; 1387 } 1388 1389 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1390 u64 offset) 1391 { 1392 ASSERT(offset >= bitmap_start); 1393 offset -= bitmap_start; 1394 return (unsigned long)(div_u64(offset, unit)); 1395 } 1396 1397 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1398 { 1399 return (unsigned long)(div_u64(bytes, unit)); 1400 } 1401 1402 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1403 u64 offset) 1404 { 1405 u64 bitmap_start; 1406 u64 bytes_per_bitmap; 1407 1408 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1409 bitmap_start = offset - ctl->start; 1410 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1411 bitmap_start *= bytes_per_bitmap; 1412 bitmap_start += ctl->start; 1413 1414 return bitmap_start; 1415 } 1416 1417 static int tree_insert_offset(struct rb_root *root, u64 offset, 1418 struct rb_node *node, int bitmap) 1419 { 1420 struct rb_node **p = &root->rb_node; 1421 struct rb_node *parent = NULL; 1422 struct btrfs_free_space *info; 1423 1424 while (*p) { 1425 parent = *p; 1426 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1427 1428 if (offset < info->offset) { 1429 p = &(*p)->rb_left; 1430 } else if (offset > info->offset) { 1431 p = &(*p)->rb_right; 1432 } else { 1433 /* 1434 * we could have a bitmap entry and an extent entry 1435 * share the same offset. If this is the case, we want 1436 * the extent entry to always be found first if we do a 1437 * linear search through the tree, since we want to have 1438 * the quickest allocation time, and allocating from an 1439 * extent is faster than allocating from a bitmap. So 1440 * if we're inserting a bitmap and we find an entry at 1441 * this offset, we want to go right, or after this entry 1442 * logically. If we are inserting an extent and we've 1443 * found a bitmap, we want to go left, or before 1444 * logically. 1445 */ 1446 if (bitmap) { 1447 if (info->bitmap) { 1448 WARN_ON_ONCE(1); 1449 return -EEXIST; 1450 } 1451 p = &(*p)->rb_right; 1452 } else { 1453 if (!info->bitmap) { 1454 WARN_ON_ONCE(1); 1455 return -EEXIST; 1456 } 1457 p = &(*p)->rb_left; 1458 } 1459 } 1460 } 1461 1462 rb_link_node(node, parent, p); 1463 rb_insert_color(node, root); 1464 1465 return 0; 1466 } 1467 1468 /* 1469 * searches the tree for the given offset. 1470 * 1471 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1472 * want a section that has at least bytes size and comes at or after the given 1473 * offset. 1474 */ 1475 static struct btrfs_free_space * 1476 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1477 u64 offset, int bitmap_only, int fuzzy) 1478 { 1479 struct rb_node *n = ctl->free_space_offset.rb_node; 1480 struct btrfs_free_space *entry, *prev = NULL; 1481 1482 /* find entry that is closest to the 'offset' */ 1483 while (1) { 1484 if (!n) { 1485 entry = NULL; 1486 break; 1487 } 1488 1489 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1490 prev = entry; 1491 1492 if (offset < entry->offset) 1493 n = n->rb_left; 1494 else if (offset > entry->offset) 1495 n = n->rb_right; 1496 else 1497 break; 1498 } 1499 1500 if (bitmap_only) { 1501 if (!entry) 1502 return NULL; 1503 if (entry->bitmap) 1504 return entry; 1505 1506 /* 1507 * bitmap entry and extent entry may share same offset, 1508 * in that case, bitmap entry comes after extent entry. 1509 */ 1510 n = rb_next(n); 1511 if (!n) 1512 return NULL; 1513 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1514 if (entry->offset != offset) 1515 return NULL; 1516 1517 WARN_ON(!entry->bitmap); 1518 return entry; 1519 } else if (entry) { 1520 if (entry->bitmap) { 1521 /* 1522 * if previous extent entry covers the offset, 1523 * we should return it instead of the bitmap entry 1524 */ 1525 n = rb_prev(&entry->offset_index); 1526 if (n) { 1527 prev = rb_entry(n, struct btrfs_free_space, 1528 offset_index); 1529 if (!prev->bitmap && 1530 prev->offset + prev->bytes > offset) 1531 entry = prev; 1532 } 1533 } 1534 return entry; 1535 } 1536 1537 if (!prev) 1538 return NULL; 1539 1540 /* find last entry before the 'offset' */ 1541 entry = prev; 1542 if (entry->offset > offset) { 1543 n = rb_prev(&entry->offset_index); 1544 if (n) { 1545 entry = rb_entry(n, struct btrfs_free_space, 1546 offset_index); 1547 ASSERT(entry->offset <= offset); 1548 } else { 1549 if (fuzzy) 1550 return entry; 1551 else 1552 return NULL; 1553 } 1554 } 1555 1556 if (entry->bitmap) { 1557 n = rb_prev(&entry->offset_index); 1558 if (n) { 1559 prev = rb_entry(n, struct btrfs_free_space, 1560 offset_index); 1561 if (!prev->bitmap && 1562 prev->offset + prev->bytes > offset) 1563 return prev; 1564 } 1565 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1566 return entry; 1567 } else if (entry->offset + entry->bytes > offset) 1568 return entry; 1569 1570 if (!fuzzy) 1571 return NULL; 1572 1573 while (1) { 1574 if (entry->bitmap) { 1575 if (entry->offset + BITS_PER_BITMAP * 1576 ctl->unit > offset) 1577 break; 1578 } else { 1579 if (entry->offset + entry->bytes > offset) 1580 break; 1581 } 1582 1583 n = rb_next(&entry->offset_index); 1584 if (!n) 1585 return NULL; 1586 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1587 } 1588 return entry; 1589 } 1590 1591 static inline void 1592 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1593 struct btrfs_free_space *info) 1594 { 1595 rb_erase(&info->offset_index, &ctl->free_space_offset); 1596 ctl->free_extents--; 1597 } 1598 1599 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1600 struct btrfs_free_space *info) 1601 { 1602 __unlink_free_space(ctl, info); 1603 ctl->free_space -= info->bytes; 1604 } 1605 1606 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1607 struct btrfs_free_space *info) 1608 { 1609 int ret = 0; 1610 1611 ASSERT(info->bytes || info->bitmap); 1612 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1613 &info->offset_index, (info->bitmap != NULL)); 1614 if (ret) 1615 return ret; 1616 1617 ctl->free_space += info->bytes; 1618 ctl->free_extents++; 1619 return ret; 1620 } 1621 1622 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1623 { 1624 struct btrfs_block_group_cache *block_group = ctl->private; 1625 u64 max_bytes; 1626 u64 bitmap_bytes; 1627 u64 extent_bytes; 1628 u64 size = block_group->key.offset; 1629 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1630 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1631 1632 max_bitmaps = max_t(u64, max_bitmaps, 1); 1633 1634 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1635 1636 /* 1637 * The goal is to keep the total amount of memory used per 1gb of space 1638 * at or below 32k, so we need to adjust how much memory we allow to be 1639 * used by extent based free space tracking 1640 */ 1641 if (size < SZ_1G) 1642 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1643 else 1644 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1645 1646 /* 1647 * we want to account for 1 more bitmap than what we have so we can make 1648 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1649 * we add more bitmaps. 1650 */ 1651 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1652 1653 if (bitmap_bytes >= max_bytes) { 1654 ctl->extents_thresh = 0; 1655 return; 1656 } 1657 1658 /* 1659 * we want the extent entry threshold to always be at most 1/2 the max 1660 * bytes we can have, or whatever is less than that. 1661 */ 1662 extent_bytes = max_bytes - bitmap_bytes; 1663 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1664 1665 ctl->extents_thresh = 1666 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1667 } 1668 1669 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1670 struct btrfs_free_space *info, 1671 u64 offset, u64 bytes) 1672 { 1673 unsigned long start, count; 1674 1675 start = offset_to_bit(info->offset, ctl->unit, offset); 1676 count = bytes_to_bits(bytes, ctl->unit); 1677 ASSERT(start + count <= BITS_PER_BITMAP); 1678 1679 bitmap_clear(info->bitmap, start, count); 1680 1681 info->bytes -= bytes; 1682 } 1683 1684 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1685 struct btrfs_free_space *info, u64 offset, 1686 u64 bytes) 1687 { 1688 __bitmap_clear_bits(ctl, info, offset, bytes); 1689 ctl->free_space -= bytes; 1690 } 1691 1692 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1693 struct btrfs_free_space *info, u64 offset, 1694 u64 bytes) 1695 { 1696 unsigned long start, count; 1697 1698 start = offset_to_bit(info->offset, ctl->unit, offset); 1699 count = bytes_to_bits(bytes, ctl->unit); 1700 ASSERT(start + count <= BITS_PER_BITMAP); 1701 1702 bitmap_set(info->bitmap, start, count); 1703 1704 info->bytes += bytes; 1705 ctl->free_space += bytes; 1706 } 1707 1708 /* 1709 * If we can not find suitable extent, we will use bytes to record 1710 * the size of the max extent. 1711 */ 1712 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1713 struct btrfs_free_space *bitmap_info, u64 *offset, 1714 u64 *bytes, bool for_alloc) 1715 { 1716 unsigned long found_bits = 0; 1717 unsigned long max_bits = 0; 1718 unsigned long bits, i; 1719 unsigned long next_zero; 1720 unsigned long extent_bits; 1721 1722 /* 1723 * Skip searching the bitmap if we don't have a contiguous section that 1724 * is large enough for this allocation. 1725 */ 1726 if (for_alloc && 1727 bitmap_info->max_extent_size && 1728 bitmap_info->max_extent_size < *bytes) { 1729 *bytes = bitmap_info->max_extent_size; 1730 return -1; 1731 } 1732 1733 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1734 max_t(u64, *offset, bitmap_info->offset)); 1735 bits = bytes_to_bits(*bytes, ctl->unit); 1736 1737 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1738 if (for_alloc && bits == 1) { 1739 found_bits = 1; 1740 break; 1741 } 1742 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1743 BITS_PER_BITMAP, i); 1744 extent_bits = next_zero - i; 1745 if (extent_bits >= bits) { 1746 found_bits = extent_bits; 1747 break; 1748 } else if (extent_bits > max_bits) { 1749 max_bits = extent_bits; 1750 } 1751 i = next_zero; 1752 } 1753 1754 if (found_bits) { 1755 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1756 *bytes = (u64)(found_bits) * ctl->unit; 1757 return 0; 1758 } 1759 1760 *bytes = (u64)(max_bits) * ctl->unit; 1761 bitmap_info->max_extent_size = *bytes; 1762 return -1; 1763 } 1764 1765 /* Cache the size of the max extent in bytes */ 1766 static struct btrfs_free_space * 1767 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1768 unsigned long align, u64 *max_extent_size) 1769 { 1770 struct btrfs_free_space *entry; 1771 struct rb_node *node; 1772 u64 tmp; 1773 u64 align_off; 1774 int ret; 1775 1776 if (!ctl->free_space_offset.rb_node) 1777 goto out; 1778 1779 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1780 if (!entry) 1781 goto out; 1782 1783 for (node = &entry->offset_index; node; node = rb_next(node)) { 1784 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1785 if (entry->bytes < *bytes) { 1786 if (entry->bytes > *max_extent_size) 1787 *max_extent_size = entry->bytes; 1788 continue; 1789 } 1790 1791 /* make sure the space returned is big enough 1792 * to match our requested alignment 1793 */ 1794 if (*bytes >= align) { 1795 tmp = entry->offset - ctl->start + align - 1; 1796 tmp = div64_u64(tmp, align); 1797 tmp = tmp * align + ctl->start; 1798 align_off = tmp - entry->offset; 1799 } else { 1800 align_off = 0; 1801 tmp = entry->offset; 1802 } 1803 1804 if (entry->bytes < *bytes + align_off) { 1805 if (entry->bytes > *max_extent_size) 1806 *max_extent_size = entry->bytes; 1807 continue; 1808 } 1809 1810 if (entry->bitmap) { 1811 u64 size = *bytes; 1812 1813 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1814 if (!ret) { 1815 *offset = tmp; 1816 *bytes = size; 1817 return entry; 1818 } else if (size > *max_extent_size) { 1819 *max_extent_size = size; 1820 } 1821 continue; 1822 } 1823 1824 *offset = tmp; 1825 *bytes = entry->bytes - align_off; 1826 return entry; 1827 } 1828 out: 1829 return NULL; 1830 } 1831 1832 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1833 struct btrfs_free_space *info, u64 offset) 1834 { 1835 info->offset = offset_to_bitmap(ctl, offset); 1836 info->bytes = 0; 1837 INIT_LIST_HEAD(&info->list); 1838 link_free_space(ctl, info); 1839 ctl->total_bitmaps++; 1840 1841 ctl->op->recalc_thresholds(ctl); 1842 } 1843 1844 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1845 struct btrfs_free_space *bitmap_info) 1846 { 1847 unlink_free_space(ctl, bitmap_info); 1848 kfree(bitmap_info->bitmap); 1849 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1850 ctl->total_bitmaps--; 1851 ctl->op->recalc_thresholds(ctl); 1852 } 1853 1854 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1855 struct btrfs_free_space *bitmap_info, 1856 u64 *offset, u64 *bytes) 1857 { 1858 u64 end; 1859 u64 search_start, search_bytes; 1860 int ret; 1861 1862 again: 1863 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1864 1865 /* 1866 * We need to search for bits in this bitmap. We could only cover some 1867 * of the extent in this bitmap thanks to how we add space, so we need 1868 * to search for as much as it as we can and clear that amount, and then 1869 * go searching for the next bit. 1870 */ 1871 search_start = *offset; 1872 search_bytes = ctl->unit; 1873 search_bytes = min(search_bytes, end - search_start + 1); 1874 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1875 false); 1876 if (ret < 0 || search_start != *offset) 1877 return -EINVAL; 1878 1879 /* We may have found more bits than what we need */ 1880 search_bytes = min(search_bytes, *bytes); 1881 1882 /* Cannot clear past the end of the bitmap */ 1883 search_bytes = min(search_bytes, end - search_start + 1); 1884 1885 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1886 *offset += search_bytes; 1887 *bytes -= search_bytes; 1888 1889 if (*bytes) { 1890 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1891 if (!bitmap_info->bytes) 1892 free_bitmap(ctl, bitmap_info); 1893 1894 /* 1895 * no entry after this bitmap, but we still have bytes to 1896 * remove, so something has gone wrong. 1897 */ 1898 if (!next) 1899 return -EINVAL; 1900 1901 bitmap_info = rb_entry(next, struct btrfs_free_space, 1902 offset_index); 1903 1904 /* 1905 * if the next entry isn't a bitmap we need to return to let the 1906 * extent stuff do its work. 1907 */ 1908 if (!bitmap_info->bitmap) 1909 return -EAGAIN; 1910 1911 /* 1912 * Ok the next item is a bitmap, but it may not actually hold 1913 * the information for the rest of this free space stuff, so 1914 * look for it, and if we don't find it return so we can try 1915 * everything over again. 1916 */ 1917 search_start = *offset; 1918 search_bytes = ctl->unit; 1919 ret = search_bitmap(ctl, bitmap_info, &search_start, 1920 &search_bytes, false); 1921 if (ret < 0 || search_start != *offset) 1922 return -EAGAIN; 1923 1924 goto again; 1925 } else if (!bitmap_info->bytes) 1926 free_bitmap(ctl, bitmap_info); 1927 1928 return 0; 1929 } 1930 1931 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1932 struct btrfs_free_space *info, u64 offset, 1933 u64 bytes) 1934 { 1935 u64 bytes_to_set = 0; 1936 u64 end; 1937 1938 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1939 1940 bytes_to_set = min(end - offset, bytes); 1941 1942 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1943 1944 /* 1945 * We set some bytes, we have no idea what the max extent size is 1946 * anymore. 1947 */ 1948 info->max_extent_size = 0; 1949 1950 return bytes_to_set; 1951 1952 } 1953 1954 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1955 struct btrfs_free_space *info) 1956 { 1957 struct btrfs_block_group_cache *block_group = ctl->private; 1958 struct btrfs_fs_info *fs_info = block_group->fs_info; 1959 bool forced = false; 1960 1961 #ifdef CONFIG_BTRFS_DEBUG 1962 if (btrfs_should_fragment_free_space(block_group)) 1963 forced = true; 1964 #endif 1965 1966 /* 1967 * If we are below the extents threshold then we can add this as an 1968 * extent, and don't have to deal with the bitmap 1969 */ 1970 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1971 /* 1972 * If this block group has some small extents we don't want to 1973 * use up all of our free slots in the cache with them, we want 1974 * to reserve them to larger extents, however if we have plenty 1975 * of cache left then go ahead an dadd them, no sense in adding 1976 * the overhead of a bitmap if we don't have to. 1977 */ 1978 if (info->bytes <= fs_info->sectorsize * 4) { 1979 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1980 return false; 1981 } else { 1982 return false; 1983 } 1984 } 1985 1986 /* 1987 * The original block groups from mkfs can be really small, like 8 1988 * megabytes, so don't bother with a bitmap for those entries. However 1989 * some block groups can be smaller than what a bitmap would cover but 1990 * are still large enough that they could overflow the 32k memory limit, 1991 * so allow those block groups to still be allowed to have a bitmap 1992 * entry. 1993 */ 1994 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 1995 return false; 1996 1997 return true; 1998 } 1999 2000 static const struct btrfs_free_space_op free_space_op = { 2001 .recalc_thresholds = recalculate_thresholds, 2002 .use_bitmap = use_bitmap, 2003 }; 2004 2005 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2006 struct btrfs_free_space *info) 2007 { 2008 struct btrfs_free_space *bitmap_info; 2009 struct btrfs_block_group_cache *block_group = NULL; 2010 int added = 0; 2011 u64 bytes, offset, bytes_added; 2012 int ret; 2013 2014 bytes = info->bytes; 2015 offset = info->offset; 2016 2017 if (!ctl->op->use_bitmap(ctl, info)) 2018 return 0; 2019 2020 if (ctl->op == &free_space_op) 2021 block_group = ctl->private; 2022 again: 2023 /* 2024 * Since we link bitmaps right into the cluster we need to see if we 2025 * have a cluster here, and if so and it has our bitmap we need to add 2026 * the free space to that bitmap. 2027 */ 2028 if (block_group && !list_empty(&block_group->cluster_list)) { 2029 struct btrfs_free_cluster *cluster; 2030 struct rb_node *node; 2031 struct btrfs_free_space *entry; 2032 2033 cluster = list_entry(block_group->cluster_list.next, 2034 struct btrfs_free_cluster, 2035 block_group_list); 2036 spin_lock(&cluster->lock); 2037 node = rb_first(&cluster->root); 2038 if (!node) { 2039 spin_unlock(&cluster->lock); 2040 goto no_cluster_bitmap; 2041 } 2042 2043 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2044 if (!entry->bitmap) { 2045 spin_unlock(&cluster->lock); 2046 goto no_cluster_bitmap; 2047 } 2048 2049 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2050 bytes_added = add_bytes_to_bitmap(ctl, entry, 2051 offset, bytes); 2052 bytes -= bytes_added; 2053 offset += bytes_added; 2054 } 2055 spin_unlock(&cluster->lock); 2056 if (!bytes) { 2057 ret = 1; 2058 goto out; 2059 } 2060 } 2061 2062 no_cluster_bitmap: 2063 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2064 1, 0); 2065 if (!bitmap_info) { 2066 ASSERT(added == 0); 2067 goto new_bitmap; 2068 } 2069 2070 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2071 bytes -= bytes_added; 2072 offset += bytes_added; 2073 added = 0; 2074 2075 if (!bytes) { 2076 ret = 1; 2077 goto out; 2078 } else 2079 goto again; 2080 2081 new_bitmap: 2082 if (info && info->bitmap) { 2083 add_new_bitmap(ctl, info, offset); 2084 added = 1; 2085 info = NULL; 2086 goto again; 2087 } else { 2088 spin_unlock(&ctl->tree_lock); 2089 2090 /* no pre-allocated info, allocate a new one */ 2091 if (!info) { 2092 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2093 GFP_NOFS); 2094 if (!info) { 2095 spin_lock(&ctl->tree_lock); 2096 ret = -ENOMEM; 2097 goto out; 2098 } 2099 } 2100 2101 /* allocate the bitmap */ 2102 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2103 spin_lock(&ctl->tree_lock); 2104 if (!info->bitmap) { 2105 ret = -ENOMEM; 2106 goto out; 2107 } 2108 goto again; 2109 } 2110 2111 out: 2112 if (info) { 2113 if (info->bitmap) 2114 kfree(info->bitmap); 2115 kmem_cache_free(btrfs_free_space_cachep, info); 2116 } 2117 2118 return ret; 2119 } 2120 2121 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2122 struct btrfs_free_space *info, bool update_stat) 2123 { 2124 struct btrfs_free_space *left_info; 2125 struct btrfs_free_space *right_info; 2126 bool merged = false; 2127 u64 offset = info->offset; 2128 u64 bytes = info->bytes; 2129 2130 /* 2131 * first we want to see if there is free space adjacent to the range we 2132 * are adding, if there is remove that struct and add a new one to 2133 * cover the entire range 2134 */ 2135 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2136 if (right_info && rb_prev(&right_info->offset_index)) 2137 left_info = rb_entry(rb_prev(&right_info->offset_index), 2138 struct btrfs_free_space, offset_index); 2139 else 2140 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2141 2142 if (right_info && !right_info->bitmap) { 2143 if (update_stat) 2144 unlink_free_space(ctl, right_info); 2145 else 2146 __unlink_free_space(ctl, right_info); 2147 info->bytes += right_info->bytes; 2148 kmem_cache_free(btrfs_free_space_cachep, right_info); 2149 merged = true; 2150 } 2151 2152 if (left_info && !left_info->bitmap && 2153 left_info->offset + left_info->bytes == offset) { 2154 if (update_stat) 2155 unlink_free_space(ctl, left_info); 2156 else 2157 __unlink_free_space(ctl, left_info); 2158 info->offset = left_info->offset; 2159 info->bytes += left_info->bytes; 2160 kmem_cache_free(btrfs_free_space_cachep, left_info); 2161 merged = true; 2162 } 2163 2164 return merged; 2165 } 2166 2167 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2168 struct btrfs_free_space *info, 2169 bool update_stat) 2170 { 2171 struct btrfs_free_space *bitmap; 2172 unsigned long i; 2173 unsigned long j; 2174 const u64 end = info->offset + info->bytes; 2175 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2176 u64 bytes; 2177 2178 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2179 if (!bitmap) 2180 return false; 2181 2182 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2183 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2184 if (j == i) 2185 return false; 2186 bytes = (j - i) * ctl->unit; 2187 info->bytes += bytes; 2188 2189 if (update_stat) 2190 bitmap_clear_bits(ctl, bitmap, end, bytes); 2191 else 2192 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2193 2194 if (!bitmap->bytes) 2195 free_bitmap(ctl, bitmap); 2196 2197 return true; 2198 } 2199 2200 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2201 struct btrfs_free_space *info, 2202 bool update_stat) 2203 { 2204 struct btrfs_free_space *bitmap; 2205 u64 bitmap_offset; 2206 unsigned long i; 2207 unsigned long j; 2208 unsigned long prev_j; 2209 u64 bytes; 2210 2211 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2212 /* If we're on a boundary, try the previous logical bitmap. */ 2213 if (bitmap_offset == info->offset) { 2214 if (info->offset == 0) 2215 return false; 2216 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2217 } 2218 2219 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2220 if (!bitmap) 2221 return false; 2222 2223 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2224 j = 0; 2225 prev_j = (unsigned long)-1; 2226 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2227 if (j > i) 2228 break; 2229 prev_j = j; 2230 } 2231 if (prev_j == i) 2232 return false; 2233 2234 if (prev_j == (unsigned long)-1) 2235 bytes = (i + 1) * ctl->unit; 2236 else 2237 bytes = (i - prev_j) * ctl->unit; 2238 2239 info->offset -= bytes; 2240 info->bytes += bytes; 2241 2242 if (update_stat) 2243 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2244 else 2245 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2246 2247 if (!bitmap->bytes) 2248 free_bitmap(ctl, bitmap); 2249 2250 return true; 2251 } 2252 2253 /* 2254 * We prefer always to allocate from extent entries, both for clustered and 2255 * non-clustered allocation requests. So when attempting to add a new extent 2256 * entry, try to see if there's adjacent free space in bitmap entries, and if 2257 * there is, migrate that space from the bitmaps to the extent. 2258 * Like this we get better chances of satisfying space allocation requests 2259 * because we attempt to satisfy them based on a single cache entry, and never 2260 * on 2 or more entries - even if the entries represent a contiguous free space 2261 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2262 * ends). 2263 */ 2264 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2265 struct btrfs_free_space *info, 2266 bool update_stat) 2267 { 2268 /* 2269 * Only work with disconnected entries, as we can change their offset, 2270 * and must be extent entries. 2271 */ 2272 ASSERT(!info->bitmap); 2273 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2274 2275 if (ctl->total_bitmaps > 0) { 2276 bool stole_end; 2277 bool stole_front = false; 2278 2279 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2280 if (ctl->total_bitmaps > 0) 2281 stole_front = steal_from_bitmap_to_front(ctl, info, 2282 update_stat); 2283 2284 if (stole_end || stole_front) 2285 try_merge_free_space(ctl, info, update_stat); 2286 } 2287 } 2288 2289 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2290 struct btrfs_free_space_ctl *ctl, 2291 u64 offset, u64 bytes) 2292 { 2293 struct btrfs_free_space *info; 2294 int ret = 0; 2295 2296 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2297 if (!info) 2298 return -ENOMEM; 2299 2300 info->offset = offset; 2301 info->bytes = bytes; 2302 RB_CLEAR_NODE(&info->offset_index); 2303 2304 spin_lock(&ctl->tree_lock); 2305 2306 if (try_merge_free_space(ctl, info, true)) 2307 goto link; 2308 2309 /* 2310 * There was no extent directly to the left or right of this new 2311 * extent then we know we're going to have to allocate a new extent, so 2312 * before we do that see if we need to drop this into a bitmap 2313 */ 2314 ret = insert_into_bitmap(ctl, info); 2315 if (ret < 0) { 2316 goto out; 2317 } else if (ret) { 2318 ret = 0; 2319 goto out; 2320 } 2321 link: 2322 /* 2323 * Only steal free space from adjacent bitmaps if we're sure we're not 2324 * going to add the new free space to existing bitmap entries - because 2325 * that would mean unnecessary work that would be reverted. Therefore 2326 * attempt to steal space from bitmaps if we're adding an extent entry. 2327 */ 2328 steal_from_bitmap(ctl, info, true); 2329 2330 ret = link_free_space(ctl, info); 2331 if (ret) 2332 kmem_cache_free(btrfs_free_space_cachep, info); 2333 out: 2334 spin_unlock(&ctl->tree_lock); 2335 2336 if (ret) { 2337 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2338 ASSERT(ret != -EEXIST); 2339 } 2340 2341 return ret; 2342 } 2343 2344 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2345 u64 offset, u64 bytes) 2346 { 2347 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2348 struct btrfs_free_space *info; 2349 int ret; 2350 bool re_search = false; 2351 2352 spin_lock(&ctl->tree_lock); 2353 2354 again: 2355 ret = 0; 2356 if (!bytes) 2357 goto out_lock; 2358 2359 info = tree_search_offset(ctl, offset, 0, 0); 2360 if (!info) { 2361 /* 2362 * oops didn't find an extent that matched the space we wanted 2363 * to remove, look for a bitmap instead 2364 */ 2365 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2366 1, 0); 2367 if (!info) { 2368 /* 2369 * If we found a partial bit of our free space in a 2370 * bitmap but then couldn't find the other part this may 2371 * be a problem, so WARN about it. 2372 */ 2373 WARN_ON(re_search); 2374 goto out_lock; 2375 } 2376 } 2377 2378 re_search = false; 2379 if (!info->bitmap) { 2380 unlink_free_space(ctl, info); 2381 if (offset == info->offset) { 2382 u64 to_free = min(bytes, info->bytes); 2383 2384 info->bytes -= to_free; 2385 info->offset += to_free; 2386 if (info->bytes) { 2387 ret = link_free_space(ctl, info); 2388 WARN_ON(ret); 2389 } else { 2390 kmem_cache_free(btrfs_free_space_cachep, info); 2391 } 2392 2393 offset += to_free; 2394 bytes -= to_free; 2395 goto again; 2396 } else { 2397 u64 old_end = info->bytes + info->offset; 2398 2399 info->bytes = offset - info->offset; 2400 ret = link_free_space(ctl, info); 2401 WARN_ON(ret); 2402 if (ret) 2403 goto out_lock; 2404 2405 /* Not enough bytes in this entry to satisfy us */ 2406 if (old_end < offset + bytes) { 2407 bytes -= old_end - offset; 2408 offset = old_end; 2409 goto again; 2410 } else if (old_end == offset + bytes) { 2411 /* all done */ 2412 goto out_lock; 2413 } 2414 spin_unlock(&ctl->tree_lock); 2415 2416 ret = btrfs_add_free_space(block_group, offset + bytes, 2417 old_end - (offset + bytes)); 2418 WARN_ON(ret); 2419 goto out; 2420 } 2421 } 2422 2423 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2424 if (ret == -EAGAIN) { 2425 re_search = true; 2426 goto again; 2427 } 2428 out_lock: 2429 spin_unlock(&ctl->tree_lock); 2430 out: 2431 return ret; 2432 } 2433 2434 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2435 u64 bytes) 2436 { 2437 struct btrfs_fs_info *fs_info = block_group->fs_info; 2438 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2439 struct btrfs_free_space *info; 2440 struct rb_node *n; 2441 int count = 0; 2442 2443 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2444 info = rb_entry(n, struct btrfs_free_space, offset_index); 2445 if (info->bytes >= bytes && !block_group->ro) 2446 count++; 2447 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2448 info->offset, info->bytes, 2449 (info->bitmap) ? "yes" : "no"); 2450 } 2451 btrfs_info(fs_info, "block group has cluster?: %s", 2452 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2453 btrfs_info(fs_info, 2454 "%d blocks of free space at or bigger than bytes is", count); 2455 } 2456 2457 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2458 { 2459 struct btrfs_fs_info *fs_info = block_group->fs_info; 2460 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2461 2462 spin_lock_init(&ctl->tree_lock); 2463 ctl->unit = fs_info->sectorsize; 2464 ctl->start = block_group->key.objectid; 2465 ctl->private = block_group; 2466 ctl->op = &free_space_op; 2467 INIT_LIST_HEAD(&ctl->trimming_ranges); 2468 mutex_init(&ctl->cache_writeout_mutex); 2469 2470 /* 2471 * we only want to have 32k of ram per block group for keeping 2472 * track of free space, and if we pass 1/2 of that we want to 2473 * start converting things over to using bitmaps 2474 */ 2475 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2476 } 2477 2478 /* 2479 * for a given cluster, put all of its extents back into the free 2480 * space cache. If the block group passed doesn't match the block group 2481 * pointed to by the cluster, someone else raced in and freed the 2482 * cluster already. In that case, we just return without changing anything 2483 */ 2484 static int 2485 __btrfs_return_cluster_to_free_space( 2486 struct btrfs_block_group_cache *block_group, 2487 struct btrfs_free_cluster *cluster) 2488 { 2489 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2490 struct btrfs_free_space *entry; 2491 struct rb_node *node; 2492 2493 spin_lock(&cluster->lock); 2494 if (cluster->block_group != block_group) 2495 goto out; 2496 2497 cluster->block_group = NULL; 2498 cluster->window_start = 0; 2499 list_del_init(&cluster->block_group_list); 2500 2501 node = rb_first(&cluster->root); 2502 while (node) { 2503 bool bitmap; 2504 2505 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2506 node = rb_next(&entry->offset_index); 2507 rb_erase(&entry->offset_index, &cluster->root); 2508 RB_CLEAR_NODE(&entry->offset_index); 2509 2510 bitmap = (entry->bitmap != NULL); 2511 if (!bitmap) { 2512 try_merge_free_space(ctl, entry, false); 2513 steal_from_bitmap(ctl, entry, false); 2514 } 2515 tree_insert_offset(&ctl->free_space_offset, 2516 entry->offset, &entry->offset_index, bitmap); 2517 } 2518 cluster->root = RB_ROOT; 2519 2520 out: 2521 spin_unlock(&cluster->lock); 2522 btrfs_put_block_group(block_group); 2523 return 0; 2524 } 2525 2526 static void __btrfs_remove_free_space_cache_locked( 2527 struct btrfs_free_space_ctl *ctl) 2528 { 2529 struct btrfs_free_space *info; 2530 struct rb_node *node; 2531 2532 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2533 info = rb_entry(node, struct btrfs_free_space, offset_index); 2534 if (!info->bitmap) { 2535 unlink_free_space(ctl, info); 2536 kmem_cache_free(btrfs_free_space_cachep, info); 2537 } else { 2538 free_bitmap(ctl, info); 2539 } 2540 2541 cond_resched_lock(&ctl->tree_lock); 2542 } 2543 } 2544 2545 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2546 { 2547 spin_lock(&ctl->tree_lock); 2548 __btrfs_remove_free_space_cache_locked(ctl); 2549 spin_unlock(&ctl->tree_lock); 2550 } 2551 2552 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2553 { 2554 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2555 struct btrfs_free_cluster *cluster; 2556 struct list_head *head; 2557 2558 spin_lock(&ctl->tree_lock); 2559 while ((head = block_group->cluster_list.next) != 2560 &block_group->cluster_list) { 2561 cluster = list_entry(head, struct btrfs_free_cluster, 2562 block_group_list); 2563 2564 WARN_ON(cluster->block_group != block_group); 2565 __btrfs_return_cluster_to_free_space(block_group, cluster); 2566 2567 cond_resched_lock(&ctl->tree_lock); 2568 } 2569 __btrfs_remove_free_space_cache_locked(ctl); 2570 spin_unlock(&ctl->tree_lock); 2571 2572 } 2573 2574 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2575 u64 offset, u64 bytes, u64 empty_size, 2576 u64 *max_extent_size) 2577 { 2578 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2579 struct btrfs_free_space *entry = NULL; 2580 u64 bytes_search = bytes + empty_size; 2581 u64 ret = 0; 2582 u64 align_gap = 0; 2583 u64 align_gap_len = 0; 2584 2585 spin_lock(&ctl->tree_lock); 2586 entry = find_free_space(ctl, &offset, &bytes_search, 2587 block_group->full_stripe_len, max_extent_size); 2588 if (!entry) 2589 goto out; 2590 2591 ret = offset; 2592 if (entry->bitmap) { 2593 bitmap_clear_bits(ctl, entry, offset, bytes); 2594 if (!entry->bytes) 2595 free_bitmap(ctl, entry); 2596 } else { 2597 unlink_free_space(ctl, entry); 2598 align_gap_len = offset - entry->offset; 2599 align_gap = entry->offset; 2600 2601 entry->offset = offset + bytes; 2602 WARN_ON(entry->bytes < bytes + align_gap_len); 2603 2604 entry->bytes -= bytes + align_gap_len; 2605 if (!entry->bytes) 2606 kmem_cache_free(btrfs_free_space_cachep, entry); 2607 else 2608 link_free_space(ctl, entry); 2609 } 2610 out: 2611 spin_unlock(&ctl->tree_lock); 2612 2613 if (align_gap_len) 2614 __btrfs_add_free_space(block_group->fs_info, ctl, 2615 align_gap, align_gap_len); 2616 return ret; 2617 } 2618 2619 /* 2620 * given a cluster, put all of its extents back into the free space 2621 * cache. If a block group is passed, this function will only free 2622 * a cluster that belongs to the passed block group. 2623 * 2624 * Otherwise, it'll get a reference on the block group pointed to by the 2625 * cluster and remove the cluster from it. 2626 */ 2627 int btrfs_return_cluster_to_free_space( 2628 struct btrfs_block_group_cache *block_group, 2629 struct btrfs_free_cluster *cluster) 2630 { 2631 struct btrfs_free_space_ctl *ctl; 2632 int ret; 2633 2634 /* first, get a safe pointer to the block group */ 2635 spin_lock(&cluster->lock); 2636 if (!block_group) { 2637 block_group = cluster->block_group; 2638 if (!block_group) { 2639 spin_unlock(&cluster->lock); 2640 return 0; 2641 } 2642 } else if (cluster->block_group != block_group) { 2643 /* someone else has already freed it don't redo their work */ 2644 spin_unlock(&cluster->lock); 2645 return 0; 2646 } 2647 atomic_inc(&block_group->count); 2648 spin_unlock(&cluster->lock); 2649 2650 ctl = block_group->free_space_ctl; 2651 2652 /* now return any extents the cluster had on it */ 2653 spin_lock(&ctl->tree_lock); 2654 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2655 spin_unlock(&ctl->tree_lock); 2656 2657 /* finally drop our ref */ 2658 btrfs_put_block_group(block_group); 2659 return ret; 2660 } 2661 2662 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2663 struct btrfs_free_cluster *cluster, 2664 struct btrfs_free_space *entry, 2665 u64 bytes, u64 min_start, 2666 u64 *max_extent_size) 2667 { 2668 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2669 int err; 2670 u64 search_start = cluster->window_start; 2671 u64 search_bytes = bytes; 2672 u64 ret = 0; 2673 2674 search_start = min_start; 2675 search_bytes = bytes; 2676 2677 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2678 if (err) { 2679 if (search_bytes > *max_extent_size) 2680 *max_extent_size = search_bytes; 2681 return 0; 2682 } 2683 2684 ret = search_start; 2685 __bitmap_clear_bits(ctl, entry, ret, bytes); 2686 2687 return ret; 2688 } 2689 2690 /* 2691 * given a cluster, try to allocate 'bytes' from it, returns 0 2692 * if it couldn't find anything suitably large, or a logical disk offset 2693 * if things worked out 2694 */ 2695 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2696 struct btrfs_free_cluster *cluster, u64 bytes, 2697 u64 min_start, u64 *max_extent_size) 2698 { 2699 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2700 struct btrfs_free_space *entry = NULL; 2701 struct rb_node *node; 2702 u64 ret = 0; 2703 2704 spin_lock(&cluster->lock); 2705 if (bytes > cluster->max_size) 2706 goto out; 2707 2708 if (cluster->block_group != block_group) 2709 goto out; 2710 2711 node = rb_first(&cluster->root); 2712 if (!node) 2713 goto out; 2714 2715 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2716 while (1) { 2717 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2718 *max_extent_size = entry->bytes; 2719 2720 if (entry->bytes < bytes || 2721 (!entry->bitmap && entry->offset < min_start)) { 2722 node = rb_next(&entry->offset_index); 2723 if (!node) 2724 break; 2725 entry = rb_entry(node, struct btrfs_free_space, 2726 offset_index); 2727 continue; 2728 } 2729 2730 if (entry->bitmap) { 2731 ret = btrfs_alloc_from_bitmap(block_group, 2732 cluster, entry, bytes, 2733 cluster->window_start, 2734 max_extent_size); 2735 if (ret == 0) { 2736 node = rb_next(&entry->offset_index); 2737 if (!node) 2738 break; 2739 entry = rb_entry(node, struct btrfs_free_space, 2740 offset_index); 2741 continue; 2742 } 2743 cluster->window_start += bytes; 2744 } else { 2745 ret = entry->offset; 2746 2747 entry->offset += bytes; 2748 entry->bytes -= bytes; 2749 } 2750 2751 if (entry->bytes == 0) 2752 rb_erase(&entry->offset_index, &cluster->root); 2753 break; 2754 } 2755 out: 2756 spin_unlock(&cluster->lock); 2757 2758 if (!ret) 2759 return 0; 2760 2761 spin_lock(&ctl->tree_lock); 2762 2763 ctl->free_space -= bytes; 2764 if (entry->bytes == 0) { 2765 ctl->free_extents--; 2766 if (entry->bitmap) { 2767 kfree(entry->bitmap); 2768 ctl->total_bitmaps--; 2769 ctl->op->recalc_thresholds(ctl); 2770 } 2771 kmem_cache_free(btrfs_free_space_cachep, entry); 2772 } 2773 2774 spin_unlock(&ctl->tree_lock); 2775 2776 return ret; 2777 } 2778 2779 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2780 struct btrfs_free_space *entry, 2781 struct btrfs_free_cluster *cluster, 2782 u64 offset, u64 bytes, 2783 u64 cont1_bytes, u64 min_bytes) 2784 { 2785 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2786 unsigned long next_zero; 2787 unsigned long i; 2788 unsigned long want_bits; 2789 unsigned long min_bits; 2790 unsigned long found_bits; 2791 unsigned long max_bits = 0; 2792 unsigned long start = 0; 2793 unsigned long total_found = 0; 2794 int ret; 2795 2796 i = offset_to_bit(entry->offset, ctl->unit, 2797 max_t(u64, offset, entry->offset)); 2798 want_bits = bytes_to_bits(bytes, ctl->unit); 2799 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2800 2801 /* 2802 * Don't bother looking for a cluster in this bitmap if it's heavily 2803 * fragmented. 2804 */ 2805 if (entry->max_extent_size && 2806 entry->max_extent_size < cont1_bytes) 2807 return -ENOSPC; 2808 again: 2809 found_bits = 0; 2810 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2811 next_zero = find_next_zero_bit(entry->bitmap, 2812 BITS_PER_BITMAP, i); 2813 if (next_zero - i >= min_bits) { 2814 found_bits = next_zero - i; 2815 if (found_bits > max_bits) 2816 max_bits = found_bits; 2817 break; 2818 } 2819 if (next_zero - i > max_bits) 2820 max_bits = next_zero - i; 2821 i = next_zero; 2822 } 2823 2824 if (!found_bits) { 2825 entry->max_extent_size = (u64)max_bits * ctl->unit; 2826 return -ENOSPC; 2827 } 2828 2829 if (!total_found) { 2830 start = i; 2831 cluster->max_size = 0; 2832 } 2833 2834 total_found += found_bits; 2835 2836 if (cluster->max_size < found_bits * ctl->unit) 2837 cluster->max_size = found_bits * ctl->unit; 2838 2839 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2840 i = next_zero + 1; 2841 goto again; 2842 } 2843 2844 cluster->window_start = start * ctl->unit + entry->offset; 2845 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2846 ret = tree_insert_offset(&cluster->root, entry->offset, 2847 &entry->offset_index, 1); 2848 ASSERT(!ret); /* -EEXIST; Logic error */ 2849 2850 trace_btrfs_setup_cluster(block_group, cluster, 2851 total_found * ctl->unit, 1); 2852 return 0; 2853 } 2854 2855 /* 2856 * This searches the block group for just extents to fill the cluster with. 2857 * Try to find a cluster with at least bytes total bytes, at least one 2858 * extent of cont1_bytes, and other clusters of at least min_bytes. 2859 */ 2860 static noinline int 2861 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2862 struct btrfs_free_cluster *cluster, 2863 struct list_head *bitmaps, u64 offset, u64 bytes, 2864 u64 cont1_bytes, u64 min_bytes) 2865 { 2866 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2867 struct btrfs_free_space *first = NULL; 2868 struct btrfs_free_space *entry = NULL; 2869 struct btrfs_free_space *last; 2870 struct rb_node *node; 2871 u64 window_free; 2872 u64 max_extent; 2873 u64 total_size = 0; 2874 2875 entry = tree_search_offset(ctl, offset, 0, 1); 2876 if (!entry) 2877 return -ENOSPC; 2878 2879 /* 2880 * We don't want bitmaps, so just move along until we find a normal 2881 * extent entry. 2882 */ 2883 while (entry->bitmap || entry->bytes < min_bytes) { 2884 if (entry->bitmap && list_empty(&entry->list)) 2885 list_add_tail(&entry->list, bitmaps); 2886 node = rb_next(&entry->offset_index); 2887 if (!node) 2888 return -ENOSPC; 2889 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2890 } 2891 2892 window_free = entry->bytes; 2893 max_extent = entry->bytes; 2894 first = entry; 2895 last = entry; 2896 2897 for (node = rb_next(&entry->offset_index); node; 2898 node = rb_next(&entry->offset_index)) { 2899 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2900 2901 if (entry->bitmap) { 2902 if (list_empty(&entry->list)) 2903 list_add_tail(&entry->list, bitmaps); 2904 continue; 2905 } 2906 2907 if (entry->bytes < min_bytes) 2908 continue; 2909 2910 last = entry; 2911 window_free += entry->bytes; 2912 if (entry->bytes > max_extent) 2913 max_extent = entry->bytes; 2914 } 2915 2916 if (window_free < bytes || max_extent < cont1_bytes) 2917 return -ENOSPC; 2918 2919 cluster->window_start = first->offset; 2920 2921 node = &first->offset_index; 2922 2923 /* 2924 * now we've found our entries, pull them out of the free space 2925 * cache and put them into the cluster rbtree 2926 */ 2927 do { 2928 int ret; 2929 2930 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2931 node = rb_next(&entry->offset_index); 2932 if (entry->bitmap || entry->bytes < min_bytes) 2933 continue; 2934 2935 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2936 ret = tree_insert_offset(&cluster->root, entry->offset, 2937 &entry->offset_index, 0); 2938 total_size += entry->bytes; 2939 ASSERT(!ret); /* -EEXIST; Logic error */ 2940 } while (node && entry != last); 2941 2942 cluster->max_size = max_extent; 2943 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2944 return 0; 2945 } 2946 2947 /* 2948 * This specifically looks for bitmaps that may work in the cluster, we assume 2949 * that we have already failed to find extents that will work. 2950 */ 2951 static noinline int 2952 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2953 struct btrfs_free_cluster *cluster, 2954 struct list_head *bitmaps, u64 offset, u64 bytes, 2955 u64 cont1_bytes, u64 min_bytes) 2956 { 2957 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2958 struct btrfs_free_space *entry = NULL; 2959 int ret = -ENOSPC; 2960 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2961 2962 if (ctl->total_bitmaps == 0) 2963 return -ENOSPC; 2964 2965 /* 2966 * The bitmap that covers offset won't be in the list unless offset 2967 * is just its start offset. 2968 */ 2969 if (!list_empty(bitmaps)) 2970 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2971 2972 if (!entry || entry->offset != bitmap_offset) { 2973 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2974 if (entry && list_empty(&entry->list)) 2975 list_add(&entry->list, bitmaps); 2976 } 2977 2978 list_for_each_entry(entry, bitmaps, list) { 2979 if (entry->bytes < bytes) 2980 continue; 2981 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2982 bytes, cont1_bytes, min_bytes); 2983 if (!ret) 2984 return 0; 2985 } 2986 2987 /* 2988 * The bitmaps list has all the bitmaps that record free space 2989 * starting after offset, so no more search is required. 2990 */ 2991 return -ENOSPC; 2992 } 2993 2994 /* 2995 * here we try to find a cluster of blocks in a block group. The goal 2996 * is to find at least bytes+empty_size. 2997 * We might not find them all in one contiguous area. 2998 * 2999 * returns zero and sets up cluster if things worked out, otherwise 3000 * it returns -enospc 3001 */ 3002 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3003 struct btrfs_block_group_cache *block_group, 3004 struct btrfs_free_cluster *cluster, 3005 u64 offset, u64 bytes, u64 empty_size) 3006 { 3007 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3008 struct btrfs_free_space *entry, *tmp; 3009 LIST_HEAD(bitmaps); 3010 u64 min_bytes; 3011 u64 cont1_bytes; 3012 int ret; 3013 3014 /* 3015 * Choose the minimum extent size we'll require for this 3016 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3017 * For metadata, allow allocates with smaller extents. For 3018 * data, keep it dense. 3019 */ 3020 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3021 cont1_bytes = min_bytes = bytes + empty_size; 3022 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3023 cont1_bytes = bytes; 3024 min_bytes = fs_info->sectorsize; 3025 } else { 3026 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3027 min_bytes = fs_info->sectorsize; 3028 } 3029 3030 spin_lock(&ctl->tree_lock); 3031 3032 /* 3033 * If we know we don't have enough space to make a cluster don't even 3034 * bother doing all the work to try and find one. 3035 */ 3036 if (ctl->free_space < bytes) { 3037 spin_unlock(&ctl->tree_lock); 3038 return -ENOSPC; 3039 } 3040 3041 spin_lock(&cluster->lock); 3042 3043 /* someone already found a cluster, hooray */ 3044 if (cluster->block_group) { 3045 ret = 0; 3046 goto out; 3047 } 3048 3049 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3050 min_bytes); 3051 3052 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3053 bytes + empty_size, 3054 cont1_bytes, min_bytes); 3055 if (ret) 3056 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3057 offset, bytes + empty_size, 3058 cont1_bytes, min_bytes); 3059 3060 /* Clear our temporary list */ 3061 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3062 list_del_init(&entry->list); 3063 3064 if (!ret) { 3065 atomic_inc(&block_group->count); 3066 list_add_tail(&cluster->block_group_list, 3067 &block_group->cluster_list); 3068 cluster->block_group = block_group; 3069 } else { 3070 trace_btrfs_failed_cluster_setup(block_group); 3071 } 3072 out: 3073 spin_unlock(&cluster->lock); 3074 spin_unlock(&ctl->tree_lock); 3075 3076 return ret; 3077 } 3078 3079 /* 3080 * simple code to zero out a cluster 3081 */ 3082 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3083 { 3084 spin_lock_init(&cluster->lock); 3085 spin_lock_init(&cluster->refill_lock); 3086 cluster->root = RB_ROOT; 3087 cluster->max_size = 0; 3088 cluster->fragmented = false; 3089 INIT_LIST_HEAD(&cluster->block_group_list); 3090 cluster->block_group = NULL; 3091 } 3092 3093 static int do_trimming(struct btrfs_block_group_cache *block_group, 3094 u64 *total_trimmed, u64 start, u64 bytes, 3095 u64 reserved_start, u64 reserved_bytes, 3096 struct btrfs_trim_range *trim_entry) 3097 { 3098 struct btrfs_space_info *space_info = block_group->space_info; 3099 struct btrfs_fs_info *fs_info = block_group->fs_info; 3100 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3101 int ret; 3102 int update = 0; 3103 u64 trimmed = 0; 3104 3105 spin_lock(&space_info->lock); 3106 spin_lock(&block_group->lock); 3107 if (!block_group->ro) { 3108 block_group->reserved += reserved_bytes; 3109 space_info->bytes_reserved += reserved_bytes; 3110 update = 1; 3111 } 3112 spin_unlock(&block_group->lock); 3113 spin_unlock(&space_info->lock); 3114 3115 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3116 if (!ret) 3117 *total_trimmed += trimmed; 3118 3119 mutex_lock(&ctl->cache_writeout_mutex); 3120 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3121 list_del(&trim_entry->list); 3122 mutex_unlock(&ctl->cache_writeout_mutex); 3123 3124 if (update) { 3125 spin_lock(&space_info->lock); 3126 spin_lock(&block_group->lock); 3127 if (block_group->ro) 3128 space_info->bytes_readonly += reserved_bytes; 3129 block_group->reserved -= reserved_bytes; 3130 space_info->bytes_reserved -= reserved_bytes; 3131 spin_unlock(&space_info->lock); 3132 spin_unlock(&block_group->lock); 3133 } 3134 3135 return ret; 3136 } 3137 3138 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3139 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3140 { 3141 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3142 struct btrfs_free_space *entry; 3143 struct rb_node *node; 3144 int ret = 0; 3145 u64 extent_start; 3146 u64 extent_bytes; 3147 u64 bytes; 3148 3149 while (start < end) { 3150 struct btrfs_trim_range trim_entry; 3151 3152 mutex_lock(&ctl->cache_writeout_mutex); 3153 spin_lock(&ctl->tree_lock); 3154 3155 if (ctl->free_space < minlen) { 3156 spin_unlock(&ctl->tree_lock); 3157 mutex_unlock(&ctl->cache_writeout_mutex); 3158 break; 3159 } 3160 3161 entry = tree_search_offset(ctl, start, 0, 1); 3162 if (!entry) { 3163 spin_unlock(&ctl->tree_lock); 3164 mutex_unlock(&ctl->cache_writeout_mutex); 3165 break; 3166 } 3167 3168 /* skip bitmaps */ 3169 while (entry->bitmap) { 3170 node = rb_next(&entry->offset_index); 3171 if (!node) { 3172 spin_unlock(&ctl->tree_lock); 3173 mutex_unlock(&ctl->cache_writeout_mutex); 3174 goto out; 3175 } 3176 entry = rb_entry(node, struct btrfs_free_space, 3177 offset_index); 3178 } 3179 3180 if (entry->offset >= end) { 3181 spin_unlock(&ctl->tree_lock); 3182 mutex_unlock(&ctl->cache_writeout_mutex); 3183 break; 3184 } 3185 3186 extent_start = entry->offset; 3187 extent_bytes = entry->bytes; 3188 start = max(start, extent_start); 3189 bytes = min(extent_start + extent_bytes, end) - start; 3190 if (bytes < minlen) { 3191 spin_unlock(&ctl->tree_lock); 3192 mutex_unlock(&ctl->cache_writeout_mutex); 3193 goto next; 3194 } 3195 3196 unlink_free_space(ctl, entry); 3197 kmem_cache_free(btrfs_free_space_cachep, entry); 3198 3199 spin_unlock(&ctl->tree_lock); 3200 trim_entry.start = extent_start; 3201 trim_entry.bytes = extent_bytes; 3202 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3203 mutex_unlock(&ctl->cache_writeout_mutex); 3204 3205 ret = do_trimming(block_group, total_trimmed, start, bytes, 3206 extent_start, extent_bytes, &trim_entry); 3207 if (ret) 3208 break; 3209 next: 3210 start += bytes; 3211 3212 if (fatal_signal_pending(current)) { 3213 ret = -ERESTARTSYS; 3214 break; 3215 } 3216 3217 cond_resched(); 3218 } 3219 out: 3220 return ret; 3221 } 3222 3223 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3224 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3225 { 3226 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3227 struct btrfs_free_space *entry; 3228 int ret = 0; 3229 int ret2; 3230 u64 bytes; 3231 u64 offset = offset_to_bitmap(ctl, start); 3232 3233 while (offset < end) { 3234 bool next_bitmap = false; 3235 struct btrfs_trim_range trim_entry; 3236 3237 mutex_lock(&ctl->cache_writeout_mutex); 3238 spin_lock(&ctl->tree_lock); 3239 3240 if (ctl->free_space < minlen) { 3241 spin_unlock(&ctl->tree_lock); 3242 mutex_unlock(&ctl->cache_writeout_mutex); 3243 break; 3244 } 3245 3246 entry = tree_search_offset(ctl, offset, 1, 0); 3247 if (!entry) { 3248 spin_unlock(&ctl->tree_lock); 3249 mutex_unlock(&ctl->cache_writeout_mutex); 3250 next_bitmap = true; 3251 goto next; 3252 } 3253 3254 bytes = minlen; 3255 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3256 if (ret2 || start >= end) { 3257 spin_unlock(&ctl->tree_lock); 3258 mutex_unlock(&ctl->cache_writeout_mutex); 3259 next_bitmap = true; 3260 goto next; 3261 } 3262 3263 bytes = min(bytes, end - start); 3264 if (bytes < minlen) { 3265 spin_unlock(&ctl->tree_lock); 3266 mutex_unlock(&ctl->cache_writeout_mutex); 3267 goto next; 3268 } 3269 3270 bitmap_clear_bits(ctl, entry, start, bytes); 3271 if (entry->bytes == 0) 3272 free_bitmap(ctl, entry); 3273 3274 spin_unlock(&ctl->tree_lock); 3275 trim_entry.start = start; 3276 trim_entry.bytes = bytes; 3277 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3278 mutex_unlock(&ctl->cache_writeout_mutex); 3279 3280 ret = do_trimming(block_group, total_trimmed, start, bytes, 3281 start, bytes, &trim_entry); 3282 if (ret) 3283 break; 3284 next: 3285 if (next_bitmap) { 3286 offset += BITS_PER_BITMAP * ctl->unit; 3287 } else { 3288 start += bytes; 3289 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3290 offset += BITS_PER_BITMAP * ctl->unit; 3291 } 3292 3293 if (fatal_signal_pending(current)) { 3294 ret = -ERESTARTSYS; 3295 break; 3296 } 3297 3298 cond_resched(); 3299 } 3300 3301 return ret; 3302 } 3303 3304 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3305 { 3306 atomic_inc(&cache->trimming); 3307 } 3308 3309 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3310 { 3311 struct btrfs_fs_info *fs_info = block_group->fs_info; 3312 struct extent_map_tree *em_tree; 3313 struct extent_map *em; 3314 bool cleanup; 3315 3316 spin_lock(&block_group->lock); 3317 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3318 block_group->removed); 3319 spin_unlock(&block_group->lock); 3320 3321 if (cleanup) { 3322 mutex_lock(&fs_info->chunk_mutex); 3323 em_tree = &fs_info->mapping_tree.map_tree; 3324 write_lock(&em_tree->lock); 3325 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3326 1); 3327 BUG_ON(!em); /* logic error, can't happen */ 3328 /* 3329 * remove_extent_mapping() will delete us from the pinned_chunks 3330 * list, which is protected by the chunk mutex. 3331 */ 3332 remove_extent_mapping(em_tree, em); 3333 write_unlock(&em_tree->lock); 3334 mutex_unlock(&fs_info->chunk_mutex); 3335 3336 /* once for us and once for the tree */ 3337 free_extent_map(em); 3338 free_extent_map(em); 3339 3340 /* 3341 * We've left one free space entry and other tasks trimming 3342 * this block group have left 1 entry each one. Free them. 3343 */ 3344 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3345 } 3346 } 3347 3348 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3349 u64 *trimmed, u64 start, u64 end, u64 minlen) 3350 { 3351 int ret; 3352 3353 *trimmed = 0; 3354 3355 spin_lock(&block_group->lock); 3356 if (block_group->removed) { 3357 spin_unlock(&block_group->lock); 3358 return 0; 3359 } 3360 btrfs_get_block_group_trimming(block_group); 3361 spin_unlock(&block_group->lock); 3362 3363 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3364 if (ret) 3365 goto out; 3366 3367 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3368 out: 3369 btrfs_put_block_group_trimming(block_group); 3370 return ret; 3371 } 3372 3373 /* 3374 * Find the left-most item in the cache tree, and then return the 3375 * smallest inode number in the item. 3376 * 3377 * Note: the returned inode number may not be the smallest one in 3378 * the tree, if the left-most item is a bitmap. 3379 */ 3380 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3381 { 3382 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3383 struct btrfs_free_space *entry = NULL; 3384 u64 ino = 0; 3385 3386 spin_lock(&ctl->tree_lock); 3387 3388 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3389 goto out; 3390 3391 entry = rb_entry(rb_first(&ctl->free_space_offset), 3392 struct btrfs_free_space, offset_index); 3393 3394 if (!entry->bitmap) { 3395 ino = entry->offset; 3396 3397 unlink_free_space(ctl, entry); 3398 entry->offset++; 3399 entry->bytes--; 3400 if (!entry->bytes) 3401 kmem_cache_free(btrfs_free_space_cachep, entry); 3402 else 3403 link_free_space(ctl, entry); 3404 } else { 3405 u64 offset = 0; 3406 u64 count = 1; 3407 int ret; 3408 3409 ret = search_bitmap(ctl, entry, &offset, &count, true); 3410 /* Logic error; Should be empty if it can't find anything */ 3411 ASSERT(!ret); 3412 3413 ino = offset; 3414 bitmap_clear_bits(ctl, entry, offset, 1); 3415 if (entry->bytes == 0) 3416 free_bitmap(ctl, entry); 3417 } 3418 out: 3419 spin_unlock(&ctl->tree_lock); 3420 3421 return ino; 3422 } 3423 3424 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3425 struct btrfs_path *path) 3426 { 3427 struct inode *inode = NULL; 3428 3429 spin_lock(&root->ino_cache_lock); 3430 if (root->ino_cache_inode) 3431 inode = igrab(root->ino_cache_inode); 3432 spin_unlock(&root->ino_cache_lock); 3433 if (inode) 3434 return inode; 3435 3436 inode = __lookup_free_space_inode(root, path, 0); 3437 if (IS_ERR(inode)) 3438 return inode; 3439 3440 spin_lock(&root->ino_cache_lock); 3441 if (!btrfs_fs_closing(root->fs_info)) 3442 root->ino_cache_inode = igrab(inode); 3443 spin_unlock(&root->ino_cache_lock); 3444 3445 return inode; 3446 } 3447 3448 int create_free_ino_inode(struct btrfs_root *root, 3449 struct btrfs_trans_handle *trans, 3450 struct btrfs_path *path) 3451 { 3452 return __create_free_space_inode(root, trans, path, 3453 BTRFS_FREE_INO_OBJECTID, 0); 3454 } 3455 3456 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3457 { 3458 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3459 struct btrfs_path *path; 3460 struct inode *inode; 3461 int ret = 0; 3462 u64 root_gen = btrfs_root_generation(&root->root_item); 3463 3464 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3465 return 0; 3466 3467 /* 3468 * If we're unmounting then just return, since this does a search on the 3469 * normal root and not the commit root and we could deadlock. 3470 */ 3471 if (btrfs_fs_closing(fs_info)) 3472 return 0; 3473 3474 path = btrfs_alloc_path(); 3475 if (!path) 3476 return 0; 3477 3478 inode = lookup_free_ino_inode(root, path); 3479 if (IS_ERR(inode)) 3480 goto out; 3481 3482 if (root_gen != BTRFS_I(inode)->generation) 3483 goto out_put; 3484 3485 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3486 3487 if (ret < 0) 3488 btrfs_err(fs_info, 3489 "failed to load free ino cache for root %llu", 3490 root->root_key.objectid); 3491 out_put: 3492 iput(inode); 3493 out: 3494 btrfs_free_path(path); 3495 return ret; 3496 } 3497 3498 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3499 struct btrfs_trans_handle *trans, 3500 struct btrfs_path *path, 3501 struct inode *inode) 3502 { 3503 struct btrfs_fs_info *fs_info = root->fs_info; 3504 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3505 int ret; 3506 struct btrfs_io_ctl io_ctl; 3507 bool release_metadata = true; 3508 3509 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3510 return 0; 3511 3512 memset(&io_ctl, 0, sizeof(io_ctl)); 3513 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3514 if (!ret) { 3515 /* 3516 * At this point writepages() didn't error out, so our metadata 3517 * reservation is released when the writeback finishes, at 3518 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3519 * with or without an error. 3520 */ 3521 release_metadata = false; 3522 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3523 } 3524 3525 if (ret) { 3526 if (release_metadata) 3527 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3528 inode->i_size, true); 3529 #ifdef DEBUG 3530 btrfs_err(fs_info, 3531 "failed to write free ino cache for root %llu", 3532 root->root_key.objectid); 3533 #endif 3534 } 3535 3536 return ret; 3537 } 3538 3539 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3540 /* 3541 * Use this if you need to make a bitmap or extent entry specifically, it 3542 * doesn't do any of the merging that add_free_space does, this acts a lot like 3543 * how the free space cache loading stuff works, so you can get really weird 3544 * configurations. 3545 */ 3546 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3547 u64 offset, u64 bytes, bool bitmap) 3548 { 3549 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3550 struct btrfs_free_space *info = NULL, *bitmap_info; 3551 void *map = NULL; 3552 u64 bytes_added; 3553 int ret; 3554 3555 again: 3556 if (!info) { 3557 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3558 if (!info) 3559 return -ENOMEM; 3560 } 3561 3562 if (!bitmap) { 3563 spin_lock(&ctl->tree_lock); 3564 info->offset = offset; 3565 info->bytes = bytes; 3566 info->max_extent_size = 0; 3567 ret = link_free_space(ctl, info); 3568 spin_unlock(&ctl->tree_lock); 3569 if (ret) 3570 kmem_cache_free(btrfs_free_space_cachep, info); 3571 return ret; 3572 } 3573 3574 if (!map) { 3575 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3576 if (!map) { 3577 kmem_cache_free(btrfs_free_space_cachep, info); 3578 return -ENOMEM; 3579 } 3580 } 3581 3582 spin_lock(&ctl->tree_lock); 3583 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3584 1, 0); 3585 if (!bitmap_info) { 3586 info->bitmap = map; 3587 map = NULL; 3588 add_new_bitmap(ctl, info, offset); 3589 bitmap_info = info; 3590 info = NULL; 3591 } 3592 3593 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3594 3595 bytes -= bytes_added; 3596 offset += bytes_added; 3597 spin_unlock(&ctl->tree_lock); 3598 3599 if (bytes) 3600 goto again; 3601 3602 if (info) 3603 kmem_cache_free(btrfs_free_space_cachep, info); 3604 if (map) 3605 kfree(map); 3606 return 0; 3607 } 3608 3609 /* 3610 * Checks to see if the given range is in the free space cache. This is really 3611 * just used to check the absence of space, so if there is free space in the 3612 * range at all we will return 1. 3613 */ 3614 int test_check_exists(struct btrfs_block_group_cache *cache, 3615 u64 offset, u64 bytes) 3616 { 3617 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3618 struct btrfs_free_space *info; 3619 int ret = 0; 3620 3621 spin_lock(&ctl->tree_lock); 3622 info = tree_search_offset(ctl, offset, 0, 0); 3623 if (!info) { 3624 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3625 1, 0); 3626 if (!info) 3627 goto out; 3628 } 3629 3630 have_info: 3631 if (info->bitmap) { 3632 u64 bit_off, bit_bytes; 3633 struct rb_node *n; 3634 struct btrfs_free_space *tmp; 3635 3636 bit_off = offset; 3637 bit_bytes = ctl->unit; 3638 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3639 if (!ret) { 3640 if (bit_off == offset) { 3641 ret = 1; 3642 goto out; 3643 } else if (bit_off > offset && 3644 offset + bytes > bit_off) { 3645 ret = 1; 3646 goto out; 3647 } 3648 } 3649 3650 n = rb_prev(&info->offset_index); 3651 while (n) { 3652 tmp = rb_entry(n, struct btrfs_free_space, 3653 offset_index); 3654 if (tmp->offset + tmp->bytes < offset) 3655 break; 3656 if (offset + bytes < tmp->offset) { 3657 n = rb_prev(&tmp->offset_index); 3658 continue; 3659 } 3660 info = tmp; 3661 goto have_info; 3662 } 3663 3664 n = rb_next(&info->offset_index); 3665 while (n) { 3666 tmp = rb_entry(n, struct btrfs_free_space, 3667 offset_index); 3668 if (offset + bytes < tmp->offset) 3669 break; 3670 if (tmp->offset + tmp->bytes < offset) { 3671 n = rb_next(&tmp->offset_index); 3672 continue; 3673 } 3674 info = tmp; 3675 goto have_info; 3676 } 3677 3678 ret = 0; 3679 goto out; 3680 } 3681 3682 if (info->offset == offset) { 3683 ret = 1; 3684 goto out; 3685 } 3686 3687 if (offset > info->offset && offset < info->offset + info->bytes) 3688 ret = 1; 3689 out: 3690 spin_unlock(&ctl->tree_lock); 3691 return ret; 3692 } 3693 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3694