1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include "ctree.h" 14 #include "free-space-cache.h" 15 #include "transaction.h" 16 #include "disk-io.h" 17 #include "extent_io.h" 18 #include "inode-map.h" 19 #include "volumes.h" 20 21 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 22 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 23 24 struct btrfs_trim_range { 25 u64 start; 26 u64 bytes; 27 struct list_head list; 28 }; 29 30 static int link_free_space(struct btrfs_free_space_ctl *ctl, 31 struct btrfs_free_space *info); 32 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 33 struct btrfs_free_space *info); 34 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 35 struct btrfs_trans_handle *trans, 36 struct btrfs_io_ctl *io_ctl, 37 struct btrfs_path *path); 38 39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 40 struct btrfs_path *path, 41 u64 offset) 42 { 43 struct btrfs_fs_info *fs_info = root->fs_info; 44 struct btrfs_key key; 45 struct btrfs_key location; 46 struct btrfs_disk_key disk_key; 47 struct btrfs_free_space_header *header; 48 struct extent_buffer *leaf; 49 struct inode *inode = NULL; 50 int ret; 51 52 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 53 key.offset = offset; 54 key.type = 0; 55 56 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 57 if (ret < 0) 58 return ERR_PTR(ret); 59 if (ret > 0) { 60 btrfs_release_path(path); 61 return ERR_PTR(-ENOENT); 62 } 63 64 leaf = path->nodes[0]; 65 header = btrfs_item_ptr(leaf, path->slots[0], 66 struct btrfs_free_space_header); 67 btrfs_free_space_key(leaf, header, &disk_key); 68 btrfs_disk_key_to_cpu(&location, &disk_key); 69 btrfs_release_path(path); 70 71 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 72 if (IS_ERR(inode)) 73 return inode; 74 if (is_bad_inode(inode)) { 75 iput(inode); 76 return ERR_PTR(-ENOENT); 77 } 78 79 mapping_set_gfp_mask(inode->i_mapping, 80 mapping_gfp_constraint(inode->i_mapping, 81 ~(__GFP_FS | __GFP_HIGHMEM))); 82 83 return inode; 84 } 85 86 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 87 struct btrfs_block_group_cache 88 *block_group, struct btrfs_path *path) 89 { 90 struct inode *inode = NULL; 91 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 92 93 spin_lock(&block_group->lock); 94 if (block_group->inode) 95 inode = igrab(block_group->inode); 96 spin_unlock(&block_group->lock); 97 if (inode) 98 return inode; 99 100 inode = __lookup_free_space_inode(fs_info->tree_root, path, 101 block_group->key.objectid); 102 if (IS_ERR(inode)) 103 return inode; 104 105 spin_lock(&block_group->lock); 106 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 107 btrfs_info(fs_info, "Old style space inode found, converting."); 108 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 109 BTRFS_INODE_NODATACOW; 110 block_group->disk_cache_state = BTRFS_DC_CLEAR; 111 } 112 113 if (!block_group->iref) { 114 block_group->inode = igrab(inode); 115 block_group->iref = 1; 116 } 117 spin_unlock(&block_group->lock); 118 119 return inode; 120 } 121 122 static int __create_free_space_inode(struct btrfs_root *root, 123 struct btrfs_trans_handle *trans, 124 struct btrfs_path *path, 125 u64 ino, u64 offset) 126 { 127 struct btrfs_key key; 128 struct btrfs_disk_key disk_key; 129 struct btrfs_free_space_header *header; 130 struct btrfs_inode_item *inode_item; 131 struct extent_buffer *leaf; 132 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 133 int ret; 134 135 ret = btrfs_insert_empty_inode(trans, root, path, ino); 136 if (ret) 137 return ret; 138 139 /* We inline crc's for the free disk space cache */ 140 if (ino != BTRFS_FREE_INO_OBJECTID) 141 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 142 143 leaf = path->nodes[0]; 144 inode_item = btrfs_item_ptr(leaf, path->slots[0], 145 struct btrfs_inode_item); 146 btrfs_item_key(leaf, &disk_key, path->slots[0]); 147 memzero_extent_buffer(leaf, (unsigned long)inode_item, 148 sizeof(*inode_item)); 149 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 150 btrfs_set_inode_size(leaf, inode_item, 0); 151 btrfs_set_inode_nbytes(leaf, inode_item, 0); 152 btrfs_set_inode_uid(leaf, inode_item, 0); 153 btrfs_set_inode_gid(leaf, inode_item, 0); 154 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 155 btrfs_set_inode_flags(leaf, inode_item, flags); 156 btrfs_set_inode_nlink(leaf, inode_item, 1); 157 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 158 btrfs_set_inode_block_group(leaf, inode_item, offset); 159 btrfs_mark_buffer_dirty(leaf); 160 btrfs_release_path(path); 161 162 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 163 key.offset = offset; 164 key.type = 0; 165 ret = btrfs_insert_empty_item(trans, root, path, &key, 166 sizeof(struct btrfs_free_space_header)); 167 if (ret < 0) { 168 btrfs_release_path(path); 169 return ret; 170 } 171 172 leaf = path->nodes[0]; 173 header = btrfs_item_ptr(leaf, path->slots[0], 174 struct btrfs_free_space_header); 175 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 176 btrfs_set_free_space_key(leaf, header, &disk_key); 177 btrfs_mark_buffer_dirty(leaf); 178 btrfs_release_path(path); 179 180 return 0; 181 } 182 183 int create_free_space_inode(struct btrfs_fs_info *fs_info, 184 struct btrfs_trans_handle *trans, 185 struct btrfs_block_group_cache *block_group, 186 struct btrfs_path *path) 187 { 188 int ret; 189 u64 ino; 190 191 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 192 if (ret < 0) 193 return ret; 194 195 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 196 block_group->key.objectid); 197 } 198 199 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 200 struct btrfs_block_rsv *rsv) 201 { 202 u64 needed_bytes; 203 int ret; 204 205 /* 1 for slack space, 1 for updating the inode */ 206 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 207 btrfs_calc_trans_metadata_size(fs_info, 1); 208 209 spin_lock(&rsv->lock); 210 if (rsv->reserved < needed_bytes) 211 ret = -ENOSPC; 212 else 213 ret = 0; 214 spin_unlock(&rsv->lock); 215 return ret; 216 } 217 218 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 219 struct btrfs_block_group_cache *block_group, 220 struct inode *inode) 221 { 222 struct btrfs_root *root = BTRFS_I(inode)->root; 223 int ret = 0; 224 bool locked = false; 225 226 if (block_group) { 227 struct btrfs_path *path = btrfs_alloc_path(); 228 229 if (!path) { 230 ret = -ENOMEM; 231 goto fail; 232 } 233 locked = true; 234 mutex_lock(&trans->transaction->cache_write_mutex); 235 if (!list_empty(&block_group->io_list)) { 236 list_del_init(&block_group->io_list); 237 238 btrfs_wait_cache_io(trans, block_group, path); 239 btrfs_put_block_group(block_group); 240 } 241 242 /* 243 * now that we've truncated the cache away, its no longer 244 * setup or written 245 */ 246 spin_lock(&block_group->lock); 247 block_group->disk_cache_state = BTRFS_DC_CLEAR; 248 spin_unlock(&block_group->lock); 249 btrfs_free_path(path); 250 } 251 252 btrfs_i_size_write(BTRFS_I(inode), 0); 253 truncate_pagecache(inode, 0); 254 255 /* 256 * We skip the throttling logic for free space cache inodes, so we don't 257 * need to check for -EAGAIN. 258 */ 259 ret = btrfs_truncate_inode_items(trans, root, inode, 260 0, BTRFS_EXTENT_DATA_KEY); 261 if (ret) 262 goto fail; 263 264 ret = btrfs_update_inode(trans, root, inode); 265 266 fail: 267 if (locked) 268 mutex_unlock(&trans->transaction->cache_write_mutex); 269 if (ret) 270 btrfs_abort_transaction(trans, ret); 271 272 return ret; 273 } 274 275 static void readahead_cache(struct inode *inode) 276 { 277 struct file_ra_state *ra; 278 unsigned long last_index; 279 280 ra = kzalloc(sizeof(*ra), GFP_NOFS); 281 if (!ra) 282 return; 283 284 file_ra_state_init(ra, inode->i_mapping); 285 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 286 287 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 288 289 kfree(ra); 290 } 291 292 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 293 int write) 294 { 295 int num_pages; 296 int check_crcs = 0; 297 298 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 299 300 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 301 check_crcs = 1; 302 303 /* Make sure we can fit our crcs into the first page */ 304 if (write && check_crcs && 305 (num_pages * sizeof(u32)) >= PAGE_SIZE) 306 return -ENOSPC; 307 308 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 309 310 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 311 if (!io_ctl->pages) 312 return -ENOMEM; 313 314 io_ctl->num_pages = num_pages; 315 io_ctl->fs_info = btrfs_sb(inode->i_sb); 316 io_ctl->check_crcs = check_crcs; 317 io_ctl->inode = inode; 318 319 return 0; 320 } 321 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 322 323 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 324 { 325 kfree(io_ctl->pages); 326 io_ctl->pages = NULL; 327 } 328 329 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 330 { 331 if (io_ctl->cur) { 332 io_ctl->cur = NULL; 333 io_ctl->orig = NULL; 334 } 335 } 336 337 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 338 { 339 ASSERT(io_ctl->index < io_ctl->num_pages); 340 io_ctl->page = io_ctl->pages[io_ctl->index++]; 341 io_ctl->cur = page_address(io_ctl->page); 342 io_ctl->orig = io_ctl->cur; 343 io_ctl->size = PAGE_SIZE; 344 if (clear) 345 clear_page(io_ctl->cur); 346 } 347 348 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 349 { 350 int i; 351 352 io_ctl_unmap_page(io_ctl); 353 354 for (i = 0; i < io_ctl->num_pages; i++) { 355 if (io_ctl->pages[i]) { 356 ClearPageChecked(io_ctl->pages[i]); 357 unlock_page(io_ctl->pages[i]); 358 put_page(io_ctl->pages[i]); 359 } 360 } 361 } 362 363 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 364 int uptodate) 365 { 366 struct page *page; 367 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 368 int i; 369 370 for (i = 0; i < io_ctl->num_pages; i++) { 371 page = find_or_create_page(inode->i_mapping, i, mask); 372 if (!page) { 373 io_ctl_drop_pages(io_ctl); 374 return -ENOMEM; 375 } 376 io_ctl->pages[i] = page; 377 if (uptodate && !PageUptodate(page)) { 378 btrfs_readpage(NULL, page); 379 lock_page(page); 380 if (!PageUptodate(page)) { 381 btrfs_err(BTRFS_I(inode)->root->fs_info, 382 "error reading free space cache"); 383 io_ctl_drop_pages(io_ctl); 384 return -EIO; 385 } 386 } 387 } 388 389 for (i = 0; i < io_ctl->num_pages; i++) { 390 clear_page_dirty_for_io(io_ctl->pages[i]); 391 set_page_extent_mapped(io_ctl->pages[i]); 392 } 393 394 return 0; 395 } 396 397 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 398 { 399 __le64 *val; 400 401 io_ctl_map_page(io_ctl, 1); 402 403 /* 404 * Skip the csum areas. If we don't check crcs then we just have a 405 * 64bit chunk at the front of the first page. 406 */ 407 if (io_ctl->check_crcs) { 408 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 409 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 410 } else { 411 io_ctl->cur += sizeof(u64); 412 io_ctl->size -= sizeof(u64) * 2; 413 } 414 415 val = io_ctl->cur; 416 *val = cpu_to_le64(generation); 417 io_ctl->cur += sizeof(u64); 418 } 419 420 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 421 { 422 __le64 *gen; 423 424 /* 425 * Skip the crc area. If we don't check crcs then we just have a 64bit 426 * chunk at the front of the first page. 427 */ 428 if (io_ctl->check_crcs) { 429 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 430 io_ctl->size -= sizeof(u64) + 431 (sizeof(u32) * io_ctl->num_pages); 432 } else { 433 io_ctl->cur += sizeof(u64); 434 io_ctl->size -= sizeof(u64) * 2; 435 } 436 437 gen = io_ctl->cur; 438 if (le64_to_cpu(*gen) != generation) { 439 btrfs_err_rl(io_ctl->fs_info, 440 "space cache generation (%llu) does not match inode (%llu)", 441 *gen, generation); 442 io_ctl_unmap_page(io_ctl); 443 return -EIO; 444 } 445 io_ctl->cur += sizeof(u64); 446 return 0; 447 } 448 449 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 450 { 451 u32 *tmp; 452 u32 crc = ~(u32)0; 453 unsigned offset = 0; 454 455 if (!io_ctl->check_crcs) { 456 io_ctl_unmap_page(io_ctl); 457 return; 458 } 459 460 if (index == 0) 461 offset = sizeof(u32) * io_ctl->num_pages; 462 463 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 464 PAGE_SIZE - offset); 465 btrfs_csum_final(crc, (u8 *)&crc); 466 io_ctl_unmap_page(io_ctl); 467 tmp = page_address(io_ctl->pages[0]); 468 tmp += index; 469 *tmp = crc; 470 } 471 472 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 473 { 474 u32 *tmp, val; 475 u32 crc = ~(u32)0; 476 unsigned offset = 0; 477 478 if (!io_ctl->check_crcs) { 479 io_ctl_map_page(io_ctl, 0); 480 return 0; 481 } 482 483 if (index == 0) 484 offset = sizeof(u32) * io_ctl->num_pages; 485 486 tmp = page_address(io_ctl->pages[0]); 487 tmp += index; 488 val = *tmp; 489 490 io_ctl_map_page(io_ctl, 0); 491 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 492 PAGE_SIZE - offset); 493 btrfs_csum_final(crc, (u8 *)&crc); 494 if (val != crc) { 495 btrfs_err_rl(io_ctl->fs_info, 496 "csum mismatch on free space cache"); 497 io_ctl_unmap_page(io_ctl); 498 return -EIO; 499 } 500 501 return 0; 502 } 503 504 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 505 void *bitmap) 506 { 507 struct btrfs_free_space_entry *entry; 508 509 if (!io_ctl->cur) 510 return -ENOSPC; 511 512 entry = io_ctl->cur; 513 entry->offset = cpu_to_le64(offset); 514 entry->bytes = cpu_to_le64(bytes); 515 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 516 BTRFS_FREE_SPACE_EXTENT; 517 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 518 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 519 520 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 521 return 0; 522 523 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 524 525 /* No more pages to map */ 526 if (io_ctl->index >= io_ctl->num_pages) 527 return 0; 528 529 /* map the next page */ 530 io_ctl_map_page(io_ctl, 1); 531 return 0; 532 } 533 534 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 535 { 536 if (!io_ctl->cur) 537 return -ENOSPC; 538 539 /* 540 * If we aren't at the start of the current page, unmap this one and 541 * map the next one if there is any left. 542 */ 543 if (io_ctl->cur != io_ctl->orig) { 544 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 545 if (io_ctl->index >= io_ctl->num_pages) 546 return -ENOSPC; 547 io_ctl_map_page(io_ctl, 0); 548 } 549 550 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 551 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 552 if (io_ctl->index < io_ctl->num_pages) 553 io_ctl_map_page(io_ctl, 0); 554 return 0; 555 } 556 557 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 558 { 559 /* 560 * If we're not on the boundary we know we've modified the page and we 561 * need to crc the page. 562 */ 563 if (io_ctl->cur != io_ctl->orig) 564 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 565 else 566 io_ctl_unmap_page(io_ctl); 567 568 while (io_ctl->index < io_ctl->num_pages) { 569 io_ctl_map_page(io_ctl, 1); 570 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 571 } 572 } 573 574 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 575 struct btrfs_free_space *entry, u8 *type) 576 { 577 struct btrfs_free_space_entry *e; 578 int ret; 579 580 if (!io_ctl->cur) { 581 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 582 if (ret) 583 return ret; 584 } 585 586 e = io_ctl->cur; 587 entry->offset = le64_to_cpu(e->offset); 588 entry->bytes = le64_to_cpu(e->bytes); 589 *type = e->type; 590 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 591 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 592 593 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 594 return 0; 595 596 io_ctl_unmap_page(io_ctl); 597 598 return 0; 599 } 600 601 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 602 struct btrfs_free_space *entry) 603 { 604 int ret; 605 606 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 607 if (ret) 608 return ret; 609 610 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 611 io_ctl_unmap_page(io_ctl); 612 613 return 0; 614 } 615 616 /* 617 * Since we attach pinned extents after the fact we can have contiguous sections 618 * of free space that are split up in entries. This poses a problem with the 619 * tree logging stuff since it could have allocated across what appears to be 2 620 * entries since we would have merged the entries when adding the pinned extents 621 * back to the free space cache. So run through the space cache that we just 622 * loaded and merge contiguous entries. This will make the log replay stuff not 623 * blow up and it will make for nicer allocator behavior. 624 */ 625 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 626 { 627 struct btrfs_free_space *e, *prev = NULL; 628 struct rb_node *n; 629 630 again: 631 spin_lock(&ctl->tree_lock); 632 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 633 e = rb_entry(n, struct btrfs_free_space, offset_index); 634 if (!prev) 635 goto next; 636 if (e->bitmap || prev->bitmap) 637 goto next; 638 if (prev->offset + prev->bytes == e->offset) { 639 unlink_free_space(ctl, prev); 640 unlink_free_space(ctl, e); 641 prev->bytes += e->bytes; 642 kmem_cache_free(btrfs_free_space_cachep, e); 643 link_free_space(ctl, prev); 644 prev = NULL; 645 spin_unlock(&ctl->tree_lock); 646 goto again; 647 } 648 next: 649 prev = e; 650 } 651 spin_unlock(&ctl->tree_lock); 652 } 653 654 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 655 struct btrfs_free_space_ctl *ctl, 656 struct btrfs_path *path, u64 offset) 657 { 658 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 659 struct btrfs_free_space_header *header; 660 struct extent_buffer *leaf; 661 struct btrfs_io_ctl io_ctl; 662 struct btrfs_key key; 663 struct btrfs_free_space *e, *n; 664 LIST_HEAD(bitmaps); 665 u64 num_entries; 666 u64 num_bitmaps; 667 u64 generation; 668 u8 type; 669 int ret = 0; 670 671 /* Nothing in the space cache, goodbye */ 672 if (!i_size_read(inode)) 673 return 0; 674 675 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 676 key.offset = offset; 677 key.type = 0; 678 679 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 680 if (ret < 0) 681 return 0; 682 else if (ret > 0) { 683 btrfs_release_path(path); 684 return 0; 685 } 686 687 ret = -1; 688 689 leaf = path->nodes[0]; 690 header = btrfs_item_ptr(leaf, path->slots[0], 691 struct btrfs_free_space_header); 692 num_entries = btrfs_free_space_entries(leaf, header); 693 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 694 generation = btrfs_free_space_generation(leaf, header); 695 btrfs_release_path(path); 696 697 if (!BTRFS_I(inode)->generation) { 698 btrfs_info(fs_info, 699 "the free space cache file (%llu) is invalid, skip it", 700 offset); 701 return 0; 702 } 703 704 if (BTRFS_I(inode)->generation != generation) { 705 btrfs_err(fs_info, 706 "free space inode generation (%llu) did not match free space cache generation (%llu)", 707 BTRFS_I(inode)->generation, generation); 708 return 0; 709 } 710 711 if (!num_entries) 712 return 0; 713 714 ret = io_ctl_init(&io_ctl, inode, 0); 715 if (ret) 716 return ret; 717 718 readahead_cache(inode); 719 720 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 721 if (ret) 722 goto out; 723 724 ret = io_ctl_check_crc(&io_ctl, 0); 725 if (ret) 726 goto free_cache; 727 728 ret = io_ctl_check_generation(&io_ctl, generation); 729 if (ret) 730 goto free_cache; 731 732 while (num_entries) { 733 e = kmem_cache_zalloc(btrfs_free_space_cachep, 734 GFP_NOFS); 735 if (!e) 736 goto free_cache; 737 738 ret = io_ctl_read_entry(&io_ctl, e, &type); 739 if (ret) { 740 kmem_cache_free(btrfs_free_space_cachep, e); 741 goto free_cache; 742 } 743 744 if (!e->bytes) { 745 kmem_cache_free(btrfs_free_space_cachep, e); 746 goto free_cache; 747 } 748 749 if (type == BTRFS_FREE_SPACE_EXTENT) { 750 spin_lock(&ctl->tree_lock); 751 ret = link_free_space(ctl, e); 752 spin_unlock(&ctl->tree_lock); 753 if (ret) { 754 btrfs_err(fs_info, 755 "Duplicate entries in free space cache, dumping"); 756 kmem_cache_free(btrfs_free_space_cachep, e); 757 goto free_cache; 758 } 759 } else { 760 ASSERT(num_bitmaps); 761 num_bitmaps--; 762 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 763 if (!e->bitmap) { 764 kmem_cache_free( 765 btrfs_free_space_cachep, e); 766 goto free_cache; 767 } 768 spin_lock(&ctl->tree_lock); 769 ret = link_free_space(ctl, e); 770 ctl->total_bitmaps++; 771 ctl->op->recalc_thresholds(ctl); 772 spin_unlock(&ctl->tree_lock); 773 if (ret) { 774 btrfs_err(fs_info, 775 "Duplicate entries in free space cache, dumping"); 776 kmem_cache_free(btrfs_free_space_cachep, e); 777 goto free_cache; 778 } 779 list_add_tail(&e->list, &bitmaps); 780 } 781 782 num_entries--; 783 } 784 785 io_ctl_unmap_page(&io_ctl); 786 787 /* 788 * We add the bitmaps at the end of the entries in order that 789 * the bitmap entries are added to the cache. 790 */ 791 list_for_each_entry_safe(e, n, &bitmaps, list) { 792 list_del_init(&e->list); 793 ret = io_ctl_read_bitmap(&io_ctl, e); 794 if (ret) 795 goto free_cache; 796 } 797 798 io_ctl_drop_pages(&io_ctl); 799 merge_space_tree(ctl); 800 ret = 1; 801 out: 802 io_ctl_free(&io_ctl); 803 return ret; 804 free_cache: 805 io_ctl_drop_pages(&io_ctl); 806 __btrfs_remove_free_space_cache(ctl); 807 goto out; 808 } 809 810 int load_free_space_cache(struct btrfs_fs_info *fs_info, 811 struct btrfs_block_group_cache *block_group) 812 { 813 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 814 struct inode *inode; 815 struct btrfs_path *path; 816 int ret = 0; 817 bool matched; 818 u64 used = btrfs_block_group_used(&block_group->item); 819 820 /* 821 * If this block group has been marked to be cleared for one reason or 822 * another then we can't trust the on disk cache, so just return. 823 */ 824 spin_lock(&block_group->lock); 825 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 826 spin_unlock(&block_group->lock); 827 return 0; 828 } 829 spin_unlock(&block_group->lock); 830 831 path = btrfs_alloc_path(); 832 if (!path) 833 return 0; 834 path->search_commit_root = 1; 835 path->skip_locking = 1; 836 837 inode = lookup_free_space_inode(fs_info, block_group, path); 838 if (IS_ERR(inode)) { 839 btrfs_free_path(path); 840 return 0; 841 } 842 843 /* We may have converted the inode and made the cache invalid. */ 844 spin_lock(&block_group->lock); 845 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 846 spin_unlock(&block_group->lock); 847 btrfs_free_path(path); 848 goto out; 849 } 850 spin_unlock(&block_group->lock); 851 852 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 853 path, block_group->key.objectid); 854 btrfs_free_path(path); 855 if (ret <= 0) 856 goto out; 857 858 spin_lock(&ctl->tree_lock); 859 matched = (ctl->free_space == (block_group->key.offset - used - 860 block_group->bytes_super)); 861 spin_unlock(&ctl->tree_lock); 862 863 if (!matched) { 864 __btrfs_remove_free_space_cache(ctl); 865 btrfs_warn(fs_info, 866 "block group %llu has wrong amount of free space", 867 block_group->key.objectid); 868 ret = -1; 869 } 870 out: 871 if (ret < 0) { 872 /* This cache is bogus, make sure it gets cleared */ 873 spin_lock(&block_group->lock); 874 block_group->disk_cache_state = BTRFS_DC_CLEAR; 875 spin_unlock(&block_group->lock); 876 ret = 0; 877 878 btrfs_warn(fs_info, 879 "failed to load free space cache for block group %llu, rebuilding it now", 880 block_group->key.objectid); 881 } 882 883 iput(inode); 884 return ret; 885 } 886 887 static noinline_for_stack 888 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 889 struct btrfs_free_space_ctl *ctl, 890 struct btrfs_block_group_cache *block_group, 891 int *entries, int *bitmaps, 892 struct list_head *bitmap_list) 893 { 894 int ret; 895 struct btrfs_free_cluster *cluster = NULL; 896 struct btrfs_free_cluster *cluster_locked = NULL; 897 struct rb_node *node = rb_first(&ctl->free_space_offset); 898 struct btrfs_trim_range *trim_entry; 899 900 /* Get the cluster for this block_group if it exists */ 901 if (block_group && !list_empty(&block_group->cluster_list)) { 902 cluster = list_entry(block_group->cluster_list.next, 903 struct btrfs_free_cluster, 904 block_group_list); 905 } 906 907 if (!node && cluster) { 908 cluster_locked = cluster; 909 spin_lock(&cluster_locked->lock); 910 node = rb_first(&cluster->root); 911 cluster = NULL; 912 } 913 914 /* Write out the extent entries */ 915 while (node) { 916 struct btrfs_free_space *e; 917 918 e = rb_entry(node, struct btrfs_free_space, offset_index); 919 *entries += 1; 920 921 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 922 e->bitmap); 923 if (ret) 924 goto fail; 925 926 if (e->bitmap) { 927 list_add_tail(&e->list, bitmap_list); 928 *bitmaps += 1; 929 } 930 node = rb_next(node); 931 if (!node && cluster) { 932 node = rb_first(&cluster->root); 933 cluster_locked = cluster; 934 spin_lock(&cluster_locked->lock); 935 cluster = NULL; 936 } 937 } 938 if (cluster_locked) { 939 spin_unlock(&cluster_locked->lock); 940 cluster_locked = NULL; 941 } 942 943 /* 944 * Make sure we don't miss any range that was removed from our rbtree 945 * because trimming is running. Otherwise after a umount+mount (or crash 946 * after committing the transaction) we would leak free space and get 947 * an inconsistent free space cache report from fsck. 948 */ 949 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 950 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 951 trim_entry->bytes, NULL); 952 if (ret) 953 goto fail; 954 *entries += 1; 955 } 956 957 return 0; 958 fail: 959 if (cluster_locked) 960 spin_unlock(&cluster_locked->lock); 961 return -ENOSPC; 962 } 963 964 static noinline_for_stack int 965 update_cache_item(struct btrfs_trans_handle *trans, 966 struct btrfs_root *root, 967 struct inode *inode, 968 struct btrfs_path *path, u64 offset, 969 int entries, int bitmaps) 970 { 971 struct btrfs_key key; 972 struct btrfs_free_space_header *header; 973 struct extent_buffer *leaf; 974 int ret; 975 976 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 977 key.offset = offset; 978 key.type = 0; 979 980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 981 if (ret < 0) { 982 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 983 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 984 goto fail; 985 } 986 leaf = path->nodes[0]; 987 if (ret > 0) { 988 struct btrfs_key found_key; 989 ASSERT(path->slots[0]); 990 path->slots[0]--; 991 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 992 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 993 found_key.offset != offset) { 994 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 995 inode->i_size - 1, 996 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 997 NULL); 998 btrfs_release_path(path); 999 goto fail; 1000 } 1001 } 1002 1003 BTRFS_I(inode)->generation = trans->transid; 1004 header = btrfs_item_ptr(leaf, path->slots[0], 1005 struct btrfs_free_space_header); 1006 btrfs_set_free_space_entries(leaf, header, entries); 1007 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1008 btrfs_set_free_space_generation(leaf, header, trans->transid); 1009 btrfs_mark_buffer_dirty(leaf); 1010 btrfs_release_path(path); 1011 1012 return 0; 1013 1014 fail: 1015 return -1; 1016 } 1017 1018 static noinline_for_stack int 1019 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1020 struct btrfs_block_group_cache *block_group, 1021 struct btrfs_io_ctl *io_ctl, 1022 int *entries) 1023 { 1024 u64 start, extent_start, extent_end, len; 1025 struct extent_io_tree *unpin = NULL; 1026 int ret; 1027 1028 if (!block_group) 1029 return 0; 1030 1031 /* 1032 * We want to add any pinned extents to our free space cache 1033 * so we don't leak the space 1034 * 1035 * We shouldn't have switched the pinned extents yet so this is the 1036 * right one 1037 */ 1038 unpin = fs_info->pinned_extents; 1039 1040 start = block_group->key.objectid; 1041 1042 while (start < block_group->key.objectid + block_group->key.offset) { 1043 ret = find_first_extent_bit(unpin, start, 1044 &extent_start, &extent_end, 1045 EXTENT_DIRTY, NULL); 1046 if (ret) 1047 return 0; 1048 1049 /* This pinned extent is out of our range */ 1050 if (extent_start >= block_group->key.objectid + 1051 block_group->key.offset) 1052 return 0; 1053 1054 extent_start = max(extent_start, start); 1055 extent_end = min(block_group->key.objectid + 1056 block_group->key.offset, extent_end + 1); 1057 len = extent_end - extent_start; 1058 1059 *entries += 1; 1060 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1061 if (ret) 1062 return -ENOSPC; 1063 1064 start = extent_end; 1065 } 1066 1067 return 0; 1068 } 1069 1070 static noinline_for_stack int 1071 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1072 { 1073 struct btrfs_free_space *entry, *next; 1074 int ret; 1075 1076 /* Write out the bitmaps */ 1077 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1078 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1079 if (ret) 1080 return -ENOSPC; 1081 list_del_init(&entry->list); 1082 } 1083 1084 return 0; 1085 } 1086 1087 static int flush_dirty_cache(struct inode *inode) 1088 { 1089 int ret; 1090 1091 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1092 if (ret) 1093 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1094 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1095 1096 return ret; 1097 } 1098 1099 static void noinline_for_stack 1100 cleanup_bitmap_list(struct list_head *bitmap_list) 1101 { 1102 struct btrfs_free_space *entry, *next; 1103 1104 list_for_each_entry_safe(entry, next, bitmap_list, list) 1105 list_del_init(&entry->list); 1106 } 1107 1108 static void noinline_for_stack 1109 cleanup_write_cache_enospc(struct inode *inode, 1110 struct btrfs_io_ctl *io_ctl, 1111 struct extent_state **cached_state) 1112 { 1113 io_ctl_drop_pages(io_ctl); 1114 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1115 i_size_read(inode) - 1, cached_state); 1116 } 1117 1118 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1119 struct btrfs_trans_handle *trans, 1120 struct btrfs_block_group_cache *block_group, 1121 struct btrfs_io_ctl *io_ctl, 1122 struct btrfs_path *path, u64 offset) 1123 { 1124 int ret; 1125 struct inode *inode = io_ctl->inode; 1126 struct btrfs_fs_info *fs_info; 1127 1128 if (!inode) 1129 return 0; 1130 1131 fs_info = btrfs_sb(inode->i_sb); 1132 1133 /* Flush the dirty pages in the cache file. */ 1134 ret = flush_dirty_cache(inode); 1135 if (ret) 1136 goto out; 1137 1138 /* Update the cache item to tell everyone this cache file is valid. */ 1139 ret = update_cache_item(trans, root, inode, path, offset, 1140 io_ctl->entries, io_ctl->bitmaps); 1141 out: 1142 io_ctl_free(io_ctl); 1143 if (ret) { 1144 invalidate_inode_pages2(inode->i_mapping); 1145 BTRFS_I(inode)->generation = 0; 1146 if (block_group) { 1147 #ifdef DEBUG 1148 btrfs_err(fs_info, 1149 "failed to write free space cache for block group %llu", 1150 block_group->key.objectid); 1151 #endif 1152 } 1153 } 1154 btrfs_update_inode(trans, root, inode); 1155 1156 if (block_group) { 1157 /* the dirty list is protected by the dirty_bgs_lock */ 1158 spin_lock(&trans->transaction->dirty_bgs_lock); 1159 1160 /* the disk_cache_state is protected by the block group lock */ 1161 spin_lock(&block_group->lock); 1162 1163 /* 1164 * only mark this as written if we didn't get put back on 1165 * the dirty list while waiting for IO. Otherwise our 1166 * cache state won't be right, and we won't get written again 1167 */ 1168 if (!ret && list_empty(&block_group->dirty_list)) 1169 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1170 else if (ret) 1171 block_group->disk_cache_state = BTRFS_DC_ERROR; 1172 1173 spin_unlock(&block_group->lock); 1174 spin_unlock(&trans->transaction->dirty_bgs_lock); 1175 io_ctl->inode = NULL; 1176 iput(inode); 1177 } 1178 1179 return ret; 1180 1181 } 1182 1183 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1184 struct btrfs_trans_handle *trans, 1185 struct btrfs_io_ctl *io_ctl, 1186 struct btrfs_path *path) 1187 { 1188 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1189 } 1190 1191 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1192 struct btrfs_block_group_cache *block_group, 1193 struct btrfs_path *path) 1194 { 1195 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1196 block_group, &block_group->io_ctl, 1197 path, block_group->key.objectid); 1198 } 1199 1200 /** 1201 * __btrfs_write_out_cache - write out cached info to an inode 1202 * @root - the root the inode belongs to 1203 * @ctl - the free space cache we are going to write out 1204 * @block_group - the block_group for this cache if it belongs to a block_group 1205 * @trans - the trans handle 1206 * 1207 * This function writes out a free space cache struct to disk for quick recovery 1208 * on mount. This will return 0 if it was successful in writing the cache out, 1209 * or an errno if it was not. 1210 */ 1211 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1212 struct btrfs_free_space_ctl *ctl, 1213 struct btrfs_block_group_cache *block_group, 1214 struct btrfs_io_ctl *io_ctl, 1215 struct btrfs_trans_handle *trans) 1216 { 1217 struct btrfs_fs_info *fs_info = root->fs_info; 1218 struct extent_state *cached_state = NULL; 1219 LIST_HEAD(bitmap_list); 1220 int entries = 0; 1221 int bitmaps = 0; 1222 int ret; 1223 int must_iput = 0; 1224 1225 if (!i_size_read(inode)) 1226 return -EIO; 1227 1228 WARN_ON(io_ctl->pages); 1229 ret = io_ctl_init(io_ctl, inode, 1); 1230 if (ret) 1231 return ret; 1232 1233 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1234 down_write(&block_group->data_rwsem); 1235 spin_lock(&block_group->lock); 1236 if (block_group->delalloc_bytes) { 1237 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1238 spin_unlock(&block_group->lock); 1239 up_write(&block_group->data_rwsem); 1240 BTRFS_I(inode)->generation = 0; 1241 ret = 0; 1242 must_iput = 1; 1243 goto out; 1244 } 1245 spin_unlock(&block_group->lock); 1246 } 1247 1248 /* Lock all pages first so we can lock the extent safely. */ 1249 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1250 if (ret) 1251 goto out_unlock; 1252 1253 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1254 &cached_state); 1255 1256 io_ctl_set_generation(io_ctl, trans->transid); 1257 1258 mutex_lock(&ctl->cache_writeout_mutex); 1259 /* Write out the extent entries in the free space cache */ 1260 spin_lock(&ctl->tree_lock); 1261 ret = write_cache_extent_entries(io_ctl, ctl, 1262 block_group, &entries, &bitmaps, 1263 &bitmap_list); 1264 if (ret) 1265 goto out_nospc_locked; 1266 1267 /* 1268 * Some spaces that are freed in the current transaction are pinned, 1269 * they will be added into free space cache after the transaction is 1270 * committed, we shouldn't lose them. 1271 * 1272 * If this changes while we are working we'll get added back to 1273 * the dirty list and redo it. No locking needed 1274 */ 1275 ret = write_pinned_extent_entries(fs_info, block_group, 1276 io_ctl, &entries); 1277 if (ret) 1278 goto out_nospc_locked; 1279 1280 /* 1281 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1282 * locked while doing it because a concurrent trim can be manipulating 1283 * or freeing the bitmap. 1284 */ 1285 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1286 spin_unlock(&ctl->tree_lock); 1287 mutex_unlock(&ctl->cache_writeout_mutex); 1288 if (ret) 1289 goto out_nospc; 1290 1291 /* Zero out the rest of the pages just to make sure */ 1292 io_ctl_zero_remaining_pages(io_ctl); 1293 1294 /* Everything is written out, now we dirty the pages in the file. */ 1295 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1296 i_size_read(inode), &cached_state); 1297 if (ret) 1298 goto out_nospc; 1299 1300 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1301 up_write(&block_group->data_rwsem); 1302 /* 1303 * Release the pages and unlock the extent, we will flush 1304 * them out later 1305 */ 1306 io_ctl_drop_pages(io_ctl); 1307 1308 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1309 i_size_read(inode) - 1, &cached_state); 1310 1311 /* 1312 * at this point the pages are under IO and we're happy, 1313 * The caller is responsible for waiting on them and updating the 1314 * the cache and the inode 1315 */ 1316 io_ctl->entries = entries; 1317 io_ctl->bitmaps = bitmaps; 1318 1319 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1320 if (ret) 1321 goto out; 1322 1323 return 0; 1324 1325 out: 1326 io_ctl->inode = NULL; 1327 io_ctl_free(io_ctl); 1328 if (ret) { 1329 invalidate_inode_pages2(inode->i_mapping); 1330 BTRFS_I(inode)->generation = 0; 1331 } 1332 btrfs_update_inode(trans, root, inode); 1333 if (must_iput) 1334 iput(inode); 1335 return ret; 1336 1337 out_nospc_locked: 1338 cleanup_bitmap_list(&bitmap_list); 1339 spin_unlock(&ctl->tree_lock); 1340 mutex_unlock(&ctl->cache_writeout_mutex); 1341 1342 out_nospc: 1343 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1344 1345 out_unlock: 1346 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1347 up_write(&block_group->data_rwsem); 1348 1349 goto out; 1350 } 1351 1352 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1353 struct btrfs_trans_handle *trans, 1354 struct btrfs_block_group_cache *block_group, 1355 struct btrfs_path *path) 1356 { 1357 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1358 struct inode *inode; 1359 int ret = 0; 1360 1361 spin_lock(&block_group->lock); 1362 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1363 spin_unlock(&block_group->lock); 1364 return 0; 1365 } 1366 spin_unlock(&block_group->lock); 1367 1368 inode = lookup_free_space_inode(fs_info, block_group, path); 1369 if (IS_ERR(inode)) 1370 return 0; 1371 1372 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1373 block_group, &block_group->io_ctl, trans); 1374 if (ret) { 1375 #ifdef DEBUG 1376 btrfs_err(fs_info, 1377 "failed to write free space cache for block group %llu", 1378 block_group->key.objectid); 1379 #endif 1380 spin_lock(&block_group->lock); 1381 block_group->disk_cache_state = BTRFS_DC_ERROR; 1382 spin_unlock(&block_group->lock); 1383 1384 block_group->io_ctl.inode = NULL; 1385 iput(inode); 1386 } 1387 1388 /* 1389 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1390 * to wait for IO and put the inode 1391 */ 1392 1393 return ret; 1394 } 1395 1396 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1397 u64 offset) 1398 { 1399 ASSERT(offset >= bitmap_start); 1400 offset -= bitmap_start; 1401 return (unsigned long)(div_u64(offset, unit)); 1402 } 1403 1404 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1405 { 1406 return (unsigned long)(div_u64(bytes, unit)); 1407 } 1408 1409 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1410 u64 offset) 1411 { 1412 u64 bitmap_start; 1413 u64 bytes_per_bitmap; 1414 1415 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1416 bitmap_start = offset - ctl->start; 1417 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1418 bitmap_start *= bytes_per_bitmap; 1419 bitmap_start += ctl->start; 1420 1421 return bitmap_start; 1422 } 1423 1424 static int tree_insert_offset(struct rb_root *root, u64 offset, 1425 struct rb_node *node, int bitmap) 1426 { 1427 struct rb_node **p = &root->rb_node; 1428 struct rb_node *parent = NULL; 1429 struct btrfs_free_space *info; 1430 1431 while (*p) { 1432 parent = *p; 1433 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1434 1435 if (offset < info->offset) { 1436 p = &(*p)->rb_left; 1437 } else if (offset > info->offset) { 1438 p = &(*p)->rb_right; 1439 } else { 1440 /* 1441 * we could have a bitmap entry and an extent entry 1442 * share the same offset. If this is the case, we want 1443 * the extent entry to always be found first if we do a 1444 * linear search through the tree, since we want to have 1445 * the quickest allocation time, and allocating from an 1446 * extent is faster than allocating from a bitmap. So 1447 * if we're inserting a bitmap and we find an entry at 1448 * this offset, we want to go right, or after this entry 1449 * logically. If we are inserting an extent and we've 1450 * found a bitmap, we want to go left, or before 1451 * logically. 1452 */ 1453 if (bitmap) { 1454 if (info->bitmap) { 1455 WARN_ON_ONCE(1); 1456 return -EEXIST; 1457 } 1458 p = &(*p)->rb_right; 1459 } else { 1460 if (!info->bitmap) { 1461 WARN_ON_ONCE(1); 1462 return -EEXIST; 1463 } 1464 p = &(*p)->rb_left; 1465 } 1466 } 1467 } 1468 1469 rb_link_node(node, parent, p); 1470 rb_insert_color(node, root); 1471 1472 return 0; 1473 } 1474 1475 /* 1476 * searches the tree for the given offset. 1477 * 1478 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1479 * want a section that has at least bytes size and comes at or after the given 1480 * offset. 1481 */ 1482 static struct btrfs_free_space * 1483 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1484 u64 offset, int bitmap_only, int fuzzy) 1485 { 1486 struct rb_node *n = ctl->free_space_offset.rb_node; 1487 struct btrfs_free_space *entry, *prev = NULL; 1488 1489 /* find entry that is closest to the 'offset' */ 1490 while (1) { 1491 if (!n) { 1492 entry = NULL; 1493 break; 1494 } 1495 1496 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1497 prev = entry; 1498 1499 if (offset < entry->offset) 1500 n = n->rb_left; 1501 else if (offset > entry->offset) 1502 n = n->rb_right; 1503 else 1504 break; 1505 } 1506 1507 if (bitmap_only) { 1508 if (!entry) 1509 return NULL; 1510 if (entry->bitmap) 1511 return entry; 1512 1513 /* 1514 * bitmap entry and extent entry may share same offset, 1515 * in that case, bitmap entry comes after extent entry. 1516 */ 1517 n = rb_next(n); 1518 if (!n) 1519 return NULL; 1520 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1521 if (entry->offset != offset) 1522 return NULL; 1523 1524 WARN_ON(!entry->bitmap); 1525 return entry; 1526 } else if (entry) { 1527 if (entry->bitmap) { 1528 /* 1529 * if previous extent entry covers the offset, 1530 * we should return it instead of the bitmap entry 1531 */ 1532 n = rb_prev(&entry->offset_index); 1533 if (n) { 1534 prev = rb_entry(n, struct btrfs_free_space, 1535 offset_index); 1536 if (!prev->bitmap && 1537 prev->offset + prev->bytes > offset) 1538 entry = prev; 1539 } 1540 } 1541 return entry; 1542 } 1543 1544 if (!prev) 1545 return NULL; 1546 1547 /* find last entry before the 'offset' */ 1548 entry = prev; 1549 if (entry->offset > offset) { 1550 n = rb_prev(&entry->offset_index); 1551 if (n) { 1552 entry = rb_entry(n, struct btrfs_free_space, 1553 offset_index); 1554 ASSERT(entry->offset <= offset); 1555 } else { 1556 if (fuzzy) 1557 return entry; 1558 else 1559 return NULL; 1560 } 1561 } 1562 1563 if (entry->bitmap) { 1564 n = rb_prev(&entry->offset_index); 1565 if (n) { 1566 prev = rb_entry(n, struct btrfs_free_space, 1567 offset_index); 1568 if (!prev->bitmap && 1569 prev->offset + prev->bytes > offset) 1570 return prev; 1571 } 1572 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1573 return entry; 1574 } else if (entry->offset + entry->bytes > offset) 1575 return entry; 1576 1577 if (!fuzzy) 1578 return NULL; 1579 1580 while (1) { 1581 if (entry->bitmap) { 1582 if (entry->offset + BITS_PER_BITMAP * 1583 ctl->unit > offset) 1584 break; 1585 } else { 1586 if (entry->offset + entry->bytes > offset) 1587 break; 1588 } 1589 1590 n = rb_next(&entry->offset_index); 1591 if (!n) 1592 return NULL; 1593 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1594 } 1595 return entry; 1596 } 1597 1598 static inline void 1599 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1600 struct btrfs_free_space *info) 1601 { 1602 rb_erase(&info->offset_index, &ctl->free_space_offset); 1603 ctl->free_extents--; 1604 } 1605 1606 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1607 struct btrfs_free_space *info) 1608 { 1609 __unlink_free_space(ctl, info); 1610 ctl->free_space -= info->bytes; 1611 } 1612 1613 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1614 struct btrfs_free_space *info) 1615 { 1616 int ret = 0; 1617 1618 ASSERT(info->bytes || info->bitmap); 1619 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1620 &info->offset_index, (info->bitmap != NULL)); 1621 if (ret) 1622 return ret; 1623 1624 ctl->free_space += info->bytes; 1625 ctl->free_extents++; 1626 return ret; 1627 } 1628 1629 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1630 { 1631 struct btrfs_block_group_cache *block_group = ctl->private; 1632 u64 max_bytes; 1633 u64 bitmap_bytes; 1634 u64 extent_bytes; 1635 u64 size = block_group->key.offset; 1636 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1637 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1638 1639 max_bitmaps = max_t(u64, max_bitmaps, 1); 1640 1641 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1642 1643 /* 1644 * The goal is to keep the total amount of memory used per 1gb of space 1645 * at or below 32k, so we need to adjust how much memory we allow to be 1646 * used by extent based free space tracking 1647 */ 1648 if (size < SZ_1G) 1649 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1650 else 1651 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1652 1653 /* 1654 * we want to account for 1 more bitmap than what we have so we can make 1655 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1656 * we add more bitmaps. 1657 */ 1658 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1659 1660 if (bitmap_bytes >= max_bytes) { 1661 ctl->extents_thresh = 0; 1662 return; 1663 } 1664 1665 /* 1666 * we want the extent entry threshold to always be at most 1/2 the max 1667 * bytes we can have, or whatever is less than that. 1668 */ 1669 extent_bytes = max_bytes - bitmap_bytes; 1670 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1671 1672 ctl->extents_thresh = 1673 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1674 } 1675 1676 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1677 struct btrfs_free_space *info, 1678 u64 offset, u64 bytes) 1679 { 1680 unsigned long start, count; 1681 1682 start = offset_to_bit(info->offset, ctl->unit, offset); 1683 count = bytes_to_bits(bytes, ctl->unit); 1684 ASSERT(start + count <= BITS_PER_BITMAP); 1685 1686 bitmap_clear(info->bitmap, start, count); 1687 1688 info->bytes -= bytes; 1689 } 1690 1691 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1692 struct btrfs_free_space *info, u64 offset, 1693 u64 bytes) 1694 { 1695 __bitmap_clear_bits(ctl, info, offset, bytes); 1696 ctl->free_space -= bytes; 1697 } 1698 1699 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1700 struct btrfs_free_space *info, u64 offset, 1701 u64 bytes) 1702 { 1703 unsigned long start, count; 1704 1705 start = offset_to_bit(info->offset, ctl->unit, offset); 1706 count = bytes_to_bits(bytes, ctl->unit); 1707 ASSERT(start + count <= BITS_PER_BITMAP); 1708 1709 bitmap_set(info->bitmap, start, count); 1710 1711 info->bytes += bytes; 1712 ctl->free_space += bytes; 1713 } 1714 1715 /* 1716 * If we can not find suitable extent, we will use bytes to record 1717 * the size of the max extent. 1718 */ 1719 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1720 struct btrfs_free_space *bitmap_info, u64 *offset, 1721 u64 *bytes, bool for_alloc) 1722 { 1723 unsigned long found_bits = 0; 1724 unsigned long max_bits = 0; 1725 unsigned long bits, i; 1726 unsigned long next_zero; 1727 unsigned long extent_bits; 1728 1729 /* 1730 * Skip searching the bitmap if we don't have a contiguous section that 1731 * is large enough for this allocation. 1732 */ 1733 if (for_alloc && 1734 bitmap_info->max_extent_size && 1735 bitmap_info->max_extent_size < *bytes) { 1736 *bytes = bitmap_info->max_extent_size; 1737 return -1; 1738 } 1739 1740 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1741 max_t(u64, *offset, bitmap_info->offset)); 1742 bits = bytes_to_bits(*bytes, ctl->unit); 1743 1744 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1745 if (for_alloc && bits == 1) { 1746 found_bits = 1; 1747 break; 1748 } 1749 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1750 BITS_PER_BITMAP, i); 1751 extent_bits = next_zero - i; 1752 if (extent_bits >= bits) { 1753 found_bits = extent_bits; 1754 break; 1755 } else if (extent_bits > max_bits) { 1756 max_bits = extent_bits; 1757 } 1758 i = next_zero; 1759 } 1760 1761 if (found_bits) { 1762 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1763 *bytes = (u64)(found_bits) * ctl->unit; 1764 return 0; 1765 } 1766 1767 *bytes = (u64)(max_bits) * ctl->unit; 1768 bitmap_info->max_extent_size = *bytes; 1769 return -1; 1770 } 1771 1772 /* Cache the size of the max extent in bytes */ 1773 static struct btrfs_free_space * 1774 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1775 unsigned long align, u64 *max_extent_size) 1776 { 1777 struct btrfs_free_space *entry; 1778 struct rb_node *node; 1779 u64 tmp; 1780 u64 align_off; 1781 int ret; 1782 1783 if (!ctl->free_space_offset.rb_node) 1784 goto out; 1785 1786 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1787 if (!entry) 1788 goto out; 1789 1790 for (node = &entry->offset_index; node; node = rb_next(node)) { 1791 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1792 if (entry->bytes < *bytes) { 1793 if (entry->bytes > *max_extent_size) 1794 *max_extent_size = entry->bytes; 1795 continue; 1796 } 1797 1798 /* make sure the space returned is big enough 1799 * to match our requested alignment 1800 */ 1801 if (*bytes >= align) { 1802 tmp = entry->offset - ctl->start + align - 1; 1803 tmp = div64_u64(tmp, align); 1804 tmp = tmp * align + ctl->start; 1805 align_off = tmp - entry->offset; 1806 } else { 1807 align_off = 0; 1808 tmp = entry->offset; 1809 } 1810 1811 if (entry->bytes < *bytes + align_off) { 1812 if (entry->bytes > *max_extent_size) 1813 *max_extent_size = entry->bytes; 1814 continue; 1815 } 1816 1817 if (entry->bitmap) { 1818 u64 size = *bytes; 1819 1820 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1821 if (!ret) { 1822 *offset = tmp; 1823 *bytes = size; 1824 return entry; 1825 } else if (size > *max_extent_size) { 1826 *max_extent_size = size; 1827 } 1828 continue; 1829 } 1830 1831 *offset = tmp; 1832 *bytes = entry->bytes - align_off; 1833 return entry; 1834 } 1835 out: 1836 return NULL; 1837 } 1838 1839 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1840 struct btrfs_free_space *info, u64 offset) 1841 { 1842 info->offset = offset_to_bitmap(ctl, offset); 1843 info->bytes = 0; 1844 INIT_LIST_HEAD(&info->list); 1845 link_free_space(ctl, info); 1846 ctl->total_bitmaps++; 1847 1848 ctl->op->recalc_thresholds(ctl); 1849 } 1850 1851 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1852 struct btrfs_free_space *bitmap_info) 1853 { 1854 unlink_free_space(ctl, bitmap_info); 1855 kfree(bitmap_info->bitmap); 1856 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1857 ctl->total_bitmaps--; 1858 ctl->op->recalc_thresholds(ctl); 1859 } 1860 1861 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1862 struct btrfs_free_space *bitmap_info, 1863 u64 *offset, u64 *bytes) 1864 { 1865 u64 end; 1866 u64 search_start, search_bytes; 1867 int ret; 1868 1869 again: 1870 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1871 1872 /* 1873 * We need to search for bits in this bitmap. We could only cover some 1874 * of the extent in this bitmap thanks to how we add space, so we need 1875 * to search for as much as it as we can and clear that amount, and then 1876 * go searching for the next bit. 1877 */ 1878 search_start = *offset; 1879 search_bytes = ctl->unit; 1880 search_bytes = min(search_bytes, end - search_start + 1); 1881 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1882 false); 1883 if (ret < 0 || search_start != *offset) 1884 return -EINVAL; 1885 1886 /* We may have found more bits than what we need */ 1887 search_bytes = min(search_bytes, *bytes); 1888 1889 /* Cannot clear past the end of the bitmap */ 1890 search_bytes = min(search_bytes, end - search_start + 1); 1891 1892 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1893 *offset += search_bytes; 1894 *bytes -= search_bytes; 1895 1896 if (*bytes) { 1897 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1898 if (!bitmap_info->bytes) 1899 free_bitmap(ctl, bitmap_info); 1900 1901 /* 1902 * no entry after this bitmap, but we still have bytes to 1903 * remove, so something has gone wrong. 1904 */ 1905 if (!next) 1906 return -EINVAL; 1907 1908 bitmap_info = rb_entry(next, struct btrfs_free_space, 1909 offset_index); 1910 1911 /* 1912 * if the next entry isn't a bitmap we need to return to let the 1913 * extent stuff do its work. 1914 */ 1915 if (!bitmap_info->bitmap) 1916 return -EAGAIN; 1917 1918 /* 1919 * Ok the next item is a bitmap, but it may not actually hold 1920 * the information for the rest of this free space stuff, so 1921 * look for it, and if we don't find it return so we can try 1922 * everything over again. 1923 */ 1924 search_start = *offset; 1925 search_bytes = ctl->unit; 1926 ret = search_bitmap(ctl, bitmap_info, &search_start, 1927 &search_bytes, false); 1928 if (ret < 0 || search_start != *offset) 1929 return -EAGAIN; 1930 1931 goto again; 1932 } else if (!bitmap_info->bytes) 1933 free_bitmap(ctl, bitmap_info); 1934 1935 return 0; 1936 } 1937 1938 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1939 struct btrfs_free_space *info, u64 offset, 1940 u64 bytes) 1941 { 1942 u64 bytes_to_set = 0; 1943 u64 end; 1944 1945 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1946 1947 bytes_to_set = min(end - offset, bytes); 1948 1949 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1950 1951 /* 1952 * We set some bytes, we have no idea what the max extent size is 1953 * anymore. 1954 */ 1955 info->max_extent_size = 0; 1956 1957 return bytes_to_set; 1958 1959 } 1960 1961 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1962 struct btrfs_free_space *info) 1963 { 1964 struct btrfs_block_group_cache *block_group = ctl->private; 1965 struct btrfs_fs_info *fs_info = block_group->fs_info; 1966 bool forced = false; 1967 1968 #ifdef CONFIG_BTRFS_DEBUG 1969 if (btrfs_should_fragment_free_space(block_group)) 1970 forced = true; 1971 #endif 1972 1973 /* 1974 * If we are below the extents threshold then we can add this as an 1975 * extent, and don't have to deal with the bitmap 1976 */ 1977 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1978 /* 1979 * If this block group has some small extents we don't want to 1980 * use up all of our free slots in the cache with them, we want 1981 * to reserve them to larger extents, however if we have plenty 1982 * of cache left then go ahead an dadd them, no sense in adding 1983 * the overhead of a bitmap if we don't have to. 1984 */ 1985 if (info->bytes <= fs_info->sectorsize * 4) { 1986 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1987 return false; 1988 } else { 1989 return false; 1990 } 1991 } 1992 1993 /* 1994 * The original block groups from mkfs can be really small, like 8 1995 * megabytes, so don't bother with a bitmap for those entries. However 1996 * some block groups can be smaller than what a bitmap would cover but 1997 * are still large enough that they could overflow the 32k memory limit, 1998 * so allow those block groups to still be allowed to have a bitmap 1999 * entry. 2000 */ 2001 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2002 return false; 2003 2004 return true; 2005 } 2006 2007 static const struct btrfs_free_space_op free_space_op = { 2008 .recalc_thresholds = recalculate_thresholds, 2009 .use_bitmap = use_bitmap, 2010 }; 2011 2012 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2013 struct btrfs_free_space *info) 2014 { 2015 struct btrfs_free_space *bitmap_info; 2016 struct btrfs_block_group_cache *block_group = NULL; 2017 int added = 0; 2018 u64 bytes, offset, bytes_added; 2019 int ret; 2020 2021 bytes = info->bytes; 2022 offset = info->offset; 2023 2024 if (!ctl->op->use_bitmap(ctl, info)) 2025 return 0; 2026 2027 if (ctl->op == &free_space_op) 2028 block_group = ctl->private; 2029 again: 2030 /* 2031 * Since we link bitmaps right into the cluster we need to see if we 2032 * have a cluster here, and if so and it has our bitmap we need to add 2033 * the free space to that bitmap. 2034 */ 2035 if (block_group && !list_empty(&block_group->cluster_list)) { 2036 struct btrfs_free_cluster *cluster; 2037 struct rb_node *node; 2038 struct btrfs_free_space *entry; 2039 2040 cluster = list_entry(block_group->cluster_list.next, 2041 struct btrfs_free_cluster, 2042 block_group_list); 2043 spin_lock(&cluster->lock); 2044 node = rb_first(&cluster->root); 2045 if (!node) { 2046 spin_unlock(&cluster->lock); 2047 goto no_cluster_bitmap; 2048 } 2049 2050 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2051 if (!entry->bitmap) { 2052 spin_unlock(&cluster->lock); 2053 goto no_cluster_bitmap; 2054 } 2055 2056 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2057 bytes_added = add_bytes_to_bitmap(ctl, entry, 2058 offset, bytes); 2059 bytes -= bytes_added; 2060 offset += bytes_added; 2061 } 2062 spin_unlock(&cluster->lock); 2063 if (!bytes) { 2064 ret = 1; 2065 goto out; 2066 } 2067 } 2068 2069 no_cluster_bitmap: 2070 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2071 1, 0); 2072 if (!bitmap_info) { 2073 ASSERT(added == 0); 2074 goto new_bitmap; 2075 } 2076 2077 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2078 bytes -= bytes_added; 2079 offset += bytes_added; 2080 added = 0; 2081 2082 if (!bytes) { 2083 ret = 1; 2084 goto out; 2085 } else 2086 goto again; 2087 2088 new_bitmap: 2089 if (info && info->bitmap) { 2090 add_new_bitmap(ctl, info, offset); 2091 added = 1; 2092 info = NULL; 2093 goto again; 2094 } else { 2095 spin_unlock(&ctl->tree_lock); 2096 2097 /* no pre-allocated info, allocate a new one */ 2098 if (!info) { 2099 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2100 GFP_NOFS); 2101 if (!info) { 2102 spin_lock(&ctl->tree_lock); 2103 ret = -ENOMEM; 2104 goto out; 2105 } 2106 } 2107 2108 /* allocate the bitmap */ 2109 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2110 spin_lock(&ctl->tree_lock); 2111 if (!info->bitmap) { 2112 ret = -ENOMEM; 2113 goto out; 2114 } 2115 goto again; 2116 } 2117 2118 out: 2119 if (info) { 2120 if (info->bitmap) 2121 kfree(info->bitmap); 2122 kmem_cache_free(btrfs_free_space_cachep, info); 2123 } 2124 2125 return ret; 2126 } 2127 2128 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2129 struct btrfs_free_space *info, bool update_stat) 2130 { 2131 struct btrfs_free_space *left_info; 2132 struct btrfs_free_space *right_info; 2133 bool merged = false; 2134 u64 offset = info->offset; 2135 u64 bytes = info->bytes; 2136 2137 /* 2138 * first we want to see if there is free space adjacent to the range we 2139 * are adding, if there is remove that struct and add a new one to 2140 * cover the entire range 2141 */ 2142 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2143 if (right_info && rb_prev(&right_info->offset_index)) 2144 left_info = rb_entry(rb_prev(&right_info->offset_index), 2145 struct btrfs_free_space, offset_index); 2146 else 2147 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2148 2149 if (right_info && !right_info->bitmap) { 2150 if (update_stat) 2151 unlink_free_space(ctl, right_info); 2152 else 2153 __unlink_free_space(ctl, right_info); 2154 info->bytes += right_info->bytes; 2155 kmem_cache_free(btrfs_free_space_cachep, right_info); 2156 merged = true; 2157 } 2158 2159 if (left_info && !left_info->bitmap && 2160 left_info->offset + left_info->bytes == offset) { 2161 if (update_stat) 2162 unlink_free_space(ctl, left_info); 2163 else 2164 __unlink_free_space(ctl, left_info); 2165 info->offset = left_info->offset; 2166 info->bytes += left_info->bytes; 2167 kmem_cache_free(btrfs_free_space_cachep, left_info); 2168 merged = true; 2169 } 2170 2171 return merged; 2172 } 2173 2174 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2175 struct btrfs_free_space *info, 2176 bool update_stat) 2177 { 2178 struct btrfs_free_space *bitmap; 2179 unsigned long i; 2180 unsigned long j; 2181 const u64 end = info->offset + info->bytes; 2182 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2183 u64 bytes; 2184 2185 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2186 if (!bitmap) 2187 return false; 2188 2189 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2190 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2191 if (j == i) 2192 return false; 2193 bytes = (j - i) * ctl->unit; 2194 info->bytes += bytes; 2195 2196 if (update_stat) 2197 bitmap_clear_bits(ctl, bitmap, end, bytes); 2198 else 2199 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2200 2201 if (!bitmap->bytes) 2202 free_bitmap(ctl, bitmap); 2203 2204 return true; 2205 } 2206 2207 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2208 struct btrfs_free_space *info, 2209 bool update_stat) 2210 { 2211 struct btrfs_free_space *bitmap; 2212 u64 bitmap_offset; 2213 unsigned long i; 2214 unsigned long j; 2215 unsigned long prev_j; 2216 u64 bytes; 2217 2218 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2219 /* If we're on a boundary, try the previous logical bitmap. */ 2220 if (bitmap_offset == info->offset) { 2221 if (info->offset == 0) 2222 return false; 2223 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2224 } 2225 2226 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2227 if (!bitmap) 2228 return false; 2229 2230 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2231 j = 0; 2232 prev_j = (unsigned long)-1; 2233 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2234 if (j > i) 2235 break; 2236 prev_j = j; 2237 } 2238 if (prev_j == i) 2239 return false; 2240 2241 if (prev_j == (unsigned long)-1) 2242 bytes = (i + 1) * ctl->unit; 2243 else 2244 bytes = (i - prev_j) * ctl->unit; 2245 2246 info->offset -= bytes; 2247 info->bytes += bytes; 2248 2249 if (update_stat) 2250 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2251 else 2252 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2253 2254 if (!bitmap->bytes) 2255 free_bitmap(ctl, bitmap); 2256 2257 return true; 2258 } 2259 2260 /* 2261 * We prefer always to allocate from extent entries, both for clustered and 2262 * non-clustered allocation requests. So when attempting to add a new extent 2263 * entry, try to see if there's adjacent free space in bitmap entries, and if 2264 * there is, migrate that space from the bitmaps to the extent. 2265 * Like this we get better chances of satisfying space allocation requests 2266 * because we attempt to satisfy them based on a single cache entry, and never 2267 * on 2 or more entries - even if the entries represent a contiguous free space 2268 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2269 * ends). 2270 */ 2271 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2272 struct btrfs_free_space *info, 2273 bool update_stat) 2274 { 2275 /* 2276 * Only work with disconnected entries, as we can change their offset, 2277 * and must be extent entries. 2278 */ 2279 ASSERT(!info->bitmap); 2280 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2281 2282 if (ctl->total_bitmaps > 0) { 2283 bool stole_end; 2284 bool stole_front = false; 2285 2286 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2287 if (ctl->total_bitmaps > 0) 2288 stole_front = steal_from_bitmap_to_front(ctl, info, 2289 update_stat); 2290 2291 if (stole_end || stole_front) 2292 try_merge_free_space(ctl, info, update_stat); 2293 } 2294 } 2295 2296 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2297 struct btrfs_free_space_ctl *ctl, 2298 u64 offset, u64 bytes) 2299 { 2300 struct btrfs_free_space *info; 2301 int ret = 0; 2302 2303 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2304 if (!info) 2305 return -ENOMEM; 2306 2307 info->offset = offset; 2308 info->bytes = bytes; 2309 RB_CLEAR_NODE(&info->offset_index); 2310 2311 spin_lock(&ctl->tree_lock); 2312 2313 if (try_merge_free_space(ctl, info, true)) 2314 goto link; 2315 2316 /* 2317 * There was no extent directly to the left or right of this new 2318 * extent then we know we're going to have to allocate a new extent, so 2319 * before we do that see if we need to drop this into a bitmap 2320 */ 2321 ret = insert_into_bitmap(ctl, info); 2322 if (ret < 0) { 2323 goto out; 2324 } else if (ret) { 2325 ret = 0; 2326 goto out; 2327 } 2328 link: 2329 /* 2330 * Only steal free space from adjacent bitmaps if we're sure we're not 2331 * going to add the new free space to existing bitmap entries - because 2332 * that would mean unnecessary work that would be reverted. Therefore 2333 * attempt to steal space from bitmaps if we're adding an extent entry. 2334 */ 2335 steal_from_bitmap(ctl, info, true); 2336 2337 ret = link_free_space(ctl, info); 2338 if (ret) 2339 kmem_cache_free(btrfs_free_space_cachep, info); 2340 out: 2341 spin_unlock(&ctl->tree_lock); 2342 2343 if (ret) { 2344 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2345 ASSERT(ret != -EEXIST); 2346 } 2347 2348 return ret; 2349 } 2350 2351 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2352 u64 offset, u64 bytes) 2353 { 2354 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2355 struct btrfs_free_space *info; 2356 int ret; 2357 bool re_search = false; 2358 2359 spin_lock(&ctl->tree_lock); 2360 2361 again: 2362 ret = 0; 2363 if (!bytes) 2364 goto out_lock; 2365 2366 info = tree_search_offset(ctl, offset, 0, 0); 2367 if (!info) { 2368 /* 2369 * oops didn't find an extent that matched the space we wanted 2370 * to remove, look for a bitmap instead 2371 */ 2372 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2373 1, 0); 2374 if (!info) { 2375 /* 2376 * If we found a partial bit of our free space in a 2377 * bitmap but then couldn't find the other part this may 2378 * be a problem, so WARN about it. 2379 */ 2380 WARN_ON(re_search); 2381 goto out_lock; 2382 } 2383 } 2384 2385 re_search = false; 2386 if (!info->bitmap) { 2387 unlink_free_space(ctl, info); 2388 if (offset == info->offset) { 2389 u64 to_free = min(bytes, info->bytes); 2390 2391 info->bytes -= to_free; 2392 info->offset += to_free; 2393 if (info->bytes) { 2394 ret = link_free_space(ctl, info); 2395 WARN_ON(ret); 2396 } else { 2397 kmem_cache_free(btrfs_free_space_cachep, info); 2398 } 2399 2400 offset += to_free; 2401 bytes -= to_free; 2402 goto again; 2403 } else { 2404 u64 old_end = info->bytes + info->offset; 2405 2406 info->bytes = offset - info->offset; 2407 ret = link_free_space(ctl, info); 2408 WARN_ON(ret); 2409 if (ret) 2410 goto out_lock; 2411 2412 /* Not enough bytes in this entry to satisfy us */ 2413 if (old_end < offset + bytes) { 2414 bytes -= old_end - offset; 2415 offset = old_end; 2416 goto again; 2417 } else if (old_end == offset + bytes) { 2418 /* all done */ 2419 goto out_lock; 2420 } 2421 spin_unlock(&ctl->tree_lock); 2422 2423 ret = btrfs_add_free_space(block_group, offset + bytes, 2424 old_end - (offset + bytes)); 2425 WARN_ON(ret); 2426 goto out; 2427 } 2428 } 2429 2430 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2431 if (ret == -EAGAIN) { 2432 re_search = true; 2433 goto again; 2434 } 2435 out_lock: 2436 spin_unlock(&ctl->tree_lock); 2437 out: 2438 return ret; 2439 } 2440 2441 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2442 u64 bytes) 2443 { 2444 struct btrfs_fs_info *fs_info = block_group->fs_info; 2445 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2446 struct btrfs_free_space *info; 2447 struct rb_node *n; 2448 int count = 0; 2449 2450 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2451 info = rb_entry(n, struct btrfs_free_space, offset_index); 2452 if (info->bytes >= bytes && !block_group->ro) 2453 count++; 2454 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2455 info->offset, info->bytes, 2456 (info->bitmap) ? "yes" : "no"); 2457 } 2458 btrfs_info(fs_info, "block group has cluster?: %s", 2459 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2460 btrfs_info(fs_info, 2461 "%d blocks of free space at or bigger than bytes is", count); 2462 } 2463 2464 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2465 { 2466 struct btrfs_fs_info *fs_info = block_group->fs_info; 2467 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2468 2469 spin_lock_init(&ctl->tree_lock); 2470 ctl->unit = fs_info->sectorsize; 2471 ctl->start = block_group->key.objectid; 2472 ctl->private = block_group; 2473 ctl->op = &free_space_op; 2474 INIT_LIST_HEAD(&ctl->trimming_ranges); 2475 mutex_init(&ctl->cache_writeout_mutex); 2476 2477 /* 2478 * we only want to have 32k of ram per block group for keeping 2479 * track of free space, and if we pass 1/2 of that we want to 2480 * start converting things over to using bitmaps 2481 */ 2482 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2483 } 2484 2485 /* 2486 * for a given cluster, put all of its extents back into the free 2487 * space cache. If the block group passed doesn't match the block group 2488 * pointed to by the cluster, someone else raced in and freed the 2489 * cluster already. In that case, we just return without changing anything 2490 */ 2491 static int 2492 __btrfs_return_cluster_to_free_space( 2493 struct btrfs_block_group_cache *block_group, 2494 struct btrfs_free_cluster *cluster) 2495 { 2496 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2497 struct btrfs_free_space *entry; 2498 struct rb_node *node; 2499 2500 spin_lock(&cluster->lock); 2501 if (cluster->block_group != block_group) 2502 goto out; 2503 2504 cluster->block_group = NULL; 2505 cluster->window_start = 0; 2506 list_del_init(&cluster->block_group_list); 2507 2508 node = rb_first(&cluster->root); 2509 while (node) { 2510 bool bitmap; 2511 2512 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2513 node = rb_next(&entry->offset_index); 2514 rb_erase(&entry->offset_index, &cluster->root); 2515 RB_CLEAR_NODE(&entry->offset_index); 2516 2517 bitmap = (entry->bitmap != NULL); 2518 if (!bitmap) { 2519 try_merge_free_space(ctl, entry, false); 2520 steal_from_bitmap(ctl, entry, false); 2521 } 2522 tree_insert_offset(&ctl->free_space_offset, 2523 entry->offset, &entry->offset_index, bitmap); 2524 } 2525 cluster->root = RB_ROOT; 2526 2527 out: 2528 spin_unlock(&cluster->lock); 2529 btrfs_put_block_group(block_group); 2530 return 0; 2531 } 2532 2533 static void __btrfs_remove_free_space_cache_locked( 2534 struct btrfs_free_space_ctl *ctl) 2535 { 2536 struct btrfs_free_space *info; 2537 struct rb_node *node; 2538 2539 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2540 info = rb_entry(node, struct btrfs_free_space, offset_index); 2541 if (!info->bitmap) { 2542 unlink_free_space(ctl, info); 2543 kmem_cache_free(btrfs_free_space_cachep, info); 2544 } else { 2545 free_bitmap(ctl, info); 2546 } 2547 2548 cond_resched_lock(&ctl->tree_lock); 2549 } 2550 } 2551 2552 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2553 { 2554 spin_lock(&ctl->tree_lock); 2555 __btrfs_remove_free_space_cache_locked(ctl); 2556 spin_unlock(&ctl->tree_lock); 2557 } 2558 2559 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2560 { 2561 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2562 struct btrfs_free_cluster *cluster; 2563 struct list_head *head; 2564 2565 spin_lock(&ctl->tree_lock); 2566 while ((head = block_group->cluster_list.next) != 2567 &block_group->cluster_list) { 2568 cluster = list_entry(head, struct btrfs_free_cluster, 2569 block_group_list); 2570 2571 WARN_ON(cluster->block_group != block_group); 2572 __btrfs_return_cluster_to_free_space(block_group, cluster); 2573 2574 cond_resched_lock(&ctl->tree_lock); 2575 } 2576 __btrfs_remove_free_space_cache_locked(ctl); 2577 spin_unlock(&ctl->tree_lock); 2578 2579 } 2580 2581 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2582 u64 offset, u64 bytes, u64 empty_size, 2583 u64 *max_extent_size) 2584 { 2585 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2586 struct btrfs_free_space *entry = NULL; 2587 u64 bytes_search = bytes + empty_size; 2588 u64 ret = 0; 2589 u64 align_gap = 0; 2590 u64 align_gap_len = 0; 2591 2592 spin_lock(&ctl->tree_lock); 2593 entry = find_free_space(ctl, &offset, &bytes_search, 2594 block_group->full_stripe_len, max_extent_size); 2595 if (!entry) 2596 goto out; 2597 2598 ret = offset; 2599 if (entry->bitmap) { 2600 bitmap_clear_bits(ctl, entry, offset, bytes); 2601 if (!entry->bytes) 2602 free_bitmap(ctl, entry); 2603 } else { 2604 unlink_free_space(ctl, entry); 2605 align_gap_len = offset - entry->offset; 2606 align_gap = entry->offset; 2607 2608 entry->offset = offset + bytes; 2609 WARN_ON(entry->bytes < bytes + align_gap_len); 2610 2611 entry->bytes -= bytes + align_gap_len; 2612 if (!entry->bytes) 2613 kmem_cache_free(btrfs_free_space_cachep, entry); 2614 else 2615 link_free_space(ctl, entry); 2616 } 2617 out: 2618 spin_unlock(&ctl->tree_lock); 2619 2620 if (align_gap_len) 2621 __btrfs_add_free_space(block_group->fs_info, ctl, 2622 align_gap, align_gap_len); 2623 return ret; 2624 } 2625 2626 /* 2627 * given a cluster, put all of its extents back into the free space 2628 * cache. If a block group is passed, this function will only free 2629 * a cluster that belongs to the passed block group. 2630 * 2631 * Otherwise, it'll get a reference on the block group pointed to by the 2632 * cluster and remove the cluster from it. 2633 */ 2634 int btrfs_return_cluster_to_free_space( 2635 struct btrfs_block_group_cache *block_group, 2636 struct btrfs_free_cluster *cluster) 2637 { 2638 struct btrfs_free_space_ctl *ctl; 2639 int ret; 2640 2641 /* first, get a safe pointer to the block group */ 2642 spin_lock(&cluster->lock); 2643 if (!block_group) { 2644 block_group = cluster->block_group; 2645 if (!block_group) { 2646 spin_unlock(&cluster->lock); 2647 return 0; 2648 } 2649 } else if (cluster->block_group != block_group) { 2650 /* someone else has already freed it don't redo their work */ 2651 spin_unlock(&cluster->lock); 2652 return 0; 2653 } 2654 atomic_inc(&block_group->count); 2655 spin_unlock(&cluster->lock); 2656 2657 ctl = block_group->free_space_ctl; 2658 2659 /* now return any extents the cluster had on it */ 2660 spin_lock(&ctl->tree_lock); 2661 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2662 spin_unlock(&ctl->tree_lock); 2663 2664 /* finally drop our ref */ 2665 btrfs_put_block_group(block_group); 2666 return ret; 2667 } 2668 2669 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2670 struct btrfs_free_cluster *cluster, 2671 struct btrfs_free_space *entry, 2672 u64 bytes, u64 min_start, 2673 u64 *max_extent_size) 2674 { 2675 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2676 int err; 2677 u64 search_start = cluster->window_start; 2678 u64 search_bytes = bytes; 2679 u64 ret = 0; 2680 2681 search_start = min_start; 2682 search_bytes = bytes; 2683 2684 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2685 if (err) { 2686 if (search_bytes > *max_extent_size) 2687 *max_extent_size = search_bytes; 2688 return 0; 2689 } 2690 2691 ret = search_start; 2692 __bitmap_clear_bits(ctl, entry, ret, bytes); 2693 2694 return ret; 2695 } 2696 2697 /* 2698 * given a cluster, try to allocate 'bytes' from it, returns 0 2699 * if it couldn't find anything suitably large, or a logical disk offset 2700 * if things worked out 2701 */ 2702 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2703 struct btrfs_free_cluster *cluster, u64 bytes, 2704 u64 min_start, u64 *max_extent_size) 2705 { 2706 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2707 struct btrfs_free_space *entry = NULL; 2708 struct rb_node *node; 2709 u64 ret = 0; 2710 2711 spin_lock(&cluster->lock); 2712 if (bytes > cluster->max_size) 2713 goto out; 2714 2715 if (cluster->block_group != block_group) 2716 goto out; 2717 2718 node = rb_first(&cluster->root); 2719 if (!node) 2720 goto out; 2721 2722 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2723 while (1) { 2724 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2725 *max_extent_size = entry->bytes; 2726 2727 if (entry->bytes < bytes || 2728 (!entry->bitmap && entry->offset < min_start)) { 2729 node = rb_next(&entry->offset_index); 2730 if (!node) 2731 break; 2732 entry = rb_entry(node, struct btrfs_free_space, 2733 offset_index); 2734 continue; 2735 } 2736 2737 if (entry->bitmap) { 2738 ret = btrfs_alloc_from_bitmap(block_group, 2739 cluster, entry, bytes, 2740 cluster->window_start, 2741 max_extent_size); 2742 if (ret == 0) { 2743 node = rb_next(&entry->offset_index); 2744 if (!node) 2745 break; 2746 entry = rb_entry(node, struct btrfs_free_space, 2747 offset_index); 2748 continue; 2749 } 2750 cluster->window_start += bytes; 2751 } else { 2752 ret = entry->offset; 2753 2754 entry->offset += bytes; 2755 entry->bytes -= bytes; 2756 } 2757 2758 if (entry->bytes == 0) 2759 rb_erase(&entry->offset_index, &cluster->root); 2760 break; 2761 } 2762 out: 2763 spin_unlock(&cluster->lock); 2764 2765 if (!ret) 2766 return 0; 2767 2768 spin_lock(&ctl->tree_lock); 2769 2770 ctl->free_space -= bytes; 2771 if (entry->bytes == 0) { 2772 ctl->free_extents--; 2773 if (entry->bitmap) { 2774 kfree(entry->bitmap); 2775 ctl->total_bitmaps--; 2776 ctl->op->recalc_thresholds(ctl); 2777 } 2778 kmem_cache_free(btrfs_free_space_cachep, entry); 2779 } 2780 2781 spin_unlock(&ctl->tree_lock); 2782 2783 return ret; 2784 } 2785 2786 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2787 struct btrfs_free_space *entry, 2788 struct btrfs_free_cluster *cluster, 2789 u64 offset, u64 bytes, 2790 u64 cont1_bytes, u64 min_bytes) 2791 { 2792 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2793 unsigned long next_zero; 2794 unsigned long i; 2795 unsigned long want_bits; 2796 unsigned long min_bits; 2797 unsigned long found_bits; 2798 unsigned long max_bits = 0; 2799 unsigned long start = 0; 2800 unsigned long total_found = 0; 2801 int ret; 2802 2803 i = offset_to_bit(entry->offset, ctl->unit, 2804 max_t(u64, offset, entry->offset)); 2805 want_bits = bytes_to_bits(bytes, ctl->unit); 2806 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2807 2808 /* 2809 * Don't bother looking for a cluster in this bitmap if it's heavily 2810 * fragmented. 2811 */ 2812 if (entry->max_extent_size && 2813 entry->max_extent_size < cont1_bytes) 2814 return -ENOSPC; 2815 again: 2816 found_bits = 0; 2817 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2818 next_zero = find_next_zero_bit(entry->bitmap, 2819 BITS_PER_BITMAP, i); 2820 if (next_zero - i >= min_bits) { 2821 found_bits = next_zero - i; 2822 if (found_bits > max_bits) 2823 max_bits = found_bits; 2824 break; 2825 } 2826 if (next_zero - i > max_bits) 2827 max_bits = next_zero - i; 2828 i = next_zero; 2829 } 2830 2831 if (!found_bits) { 2832 entry->max_extent_size = (u64)max_bits * ctl->unit; 2833 return -ENOSPC; 2834 } 2835 2836 if (!total_found) { 2837 start = i; 2838 cluster->max_size = 0; 2839 } 2840 2841 total_found += found_bits; 2842 2843 if (cluster->max_size < found_bits * ctl->unit) 2844 cluster->max_size = found_bits * ctl->unit; 2845 2846 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2847 i = next_zero + 1; 2848 goto again; 2849 } 2850 2851 cluster->window_start = start * ctl->unit + entry->offset; 2852 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2853 ret = tree_insert_offset(&cluster->root, entry->offset, 2854 &entry->offset_index, 1); 2855 ASSERT(!ret); /* -EEXIST; Logic error */ 2856 2857 trace_btrfs_setup_cluster(block_group, cluster, 2858 total_found * ctl->unit, 1); 2859 return 0; 2860 } 2861 2862 /* 2863 * This searches the block group for just extents to fill the cluster with. 2864 * Try to find a cluster with at least bytes total bytes, at least one 2865 * extent of cont1_bytes, and other clusters of at least min_bytes. 2866 */ 2867 static noinline int 2868 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2869 struct btrfs_free_cluster *cluster, 2870 struct list_head *bitmaps, u64 offset, u64 bytes, 2871 u64 cont1_bytes, u64 min_bytes) 2872 { 2873 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2874 struct btrfs_free_space *first = NULL; 2875 struct btrfs_free_space *entry = NULL; 2876 struct btrfs_free_space *last; 2877 struct rb_node *node; 2878 u64 window_free; 2879 u64 max_extent; 2880 u64 total_size = 0; 2881 2882 entry = tree_search_offset(ctl, offset, 0, 1); 2883 if (!entry) 2884 return -ENOSPC; 2885 2886 /* 2887 * We don't want bitmaps, so just move along until we find a normal 2888 * extent entry. 2889 */ 2890 while (entry->bitmap || entry->bytes < min_bytes) { 2891 if (entry->bitmap && list_empty(&entry->list)) 2892 list_add_tail(&entry->list, bitmaps); 2893 node = rb_next(&entry->offset_index); 2894 if (!node) 2895 return -ENOSPC; 2896 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2897 } 2898 2899 window_free = entry->bytes; 2900 max_extent = entry->bytes; 2901 first = entry; 2902 last = entry; 2903 2904 for (node = rb_next(&entry->offset_index); node; 2905 node = rb_next(&entry->offset_index)) { 2906 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2907 2908 if (entry->bitmap) { 2909 if (list_empty(&entry->list)) 2910 list_add_tail(&entry->list, bitmaps); 2911 continue; 2912 } 2913 2914 if (entry->bytes < min_bytes) 2915 continue; 2916 2917 last = entry; 2918 window_free += entry->bytes; 2919 if (entry->bytes > max_extent) 2920 max_extent = entry->bytes; 2921 } 2922 2923 if (window_free < bytes || max_extent < cont1_bytes) 2924 return -ENOSPC; 2925 2926 cluster->window_start = first->offset; 2927 2928 node = &first->offset_index; 2929 2930 /* 2931 * now we've found our entries, pull them out of the free space 2932 * cache and put them into the cluster rbtree 2933 */ 2934 do { 2935 int ret; 2936 2937 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2938 node = rb_next(&entry->offset_index); 2939 if (entry->bitmap || entry->bytes < min_bytes) 2940 continue; 2941 2942 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2943 ret = tree_insert_offset(&cluster->root, entry->offset, 2944 &entry->offset_index, 0); 2945 total_size += entry->bytes; 2946 ASSERT(!ret); /* -EEXIST; Logic error */ 2947 } while (node && entry != last); 2948 2949 cluster->max_size = max_extent; 2950 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2951 return 0; 2952 } 2953 2954 /* 2955 * This specifically looks for bitmaps that may work in the cluster, we assume 2956 * that we have already failed to find extents that will work. 2957 */ 2958 static noinline int 2959 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2960 struct btrfs_free_cluster *cluster, 2961 struct list_head *bitmaps, u64 offset, u64 bytes, 2962 u64 cont1_bytes, u64 min_bytes) 2963 { 2964 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2965 struct btrfs_free_space *entry = NULL; 2966 int ret = -ENOSPC; 2967 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2968 2969 if (ctl->total_bitmaps == 0) 2970 return -ENOSPC; 2971 2972 /* 2973 * The bitmap that covers offset won't be in the list unless offset 2974 * is just its start offset. 2975 */ 2976 if (!list_empty(bitmaps)) 2977 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2978 2979 if (!entry || entry->offset != bitmap_offset) { 2980 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2981 if (entry && list_empty(&entry->list)) 2982 list_add(&entry->list, bitmaps); 2983 } 2984 2985 list_for_each_entry(entry, bitmaps, list) { 2986 if (entry->bytes < bytes) 2987 continue; 2988 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2989 bytes, cont1_bytes, min_bytes); 2990 if (!ret) 2991 return 0; 2992 } 2993 2994 /* 2995 * The bitmaps list has all the bitmaps that record free space 2996 * starting after offset, so no more search is required. 2997 */ 2998 return -ENOSPC; 2999 } 3000 3001 /* 3002 * here we try to find a cluster of blocks in a block group. The goal 3003 * is to find at least bytes+empty_size. 3004 * We might not find them all in one contiguous area. 3005 * 3006 * returns zero and sets up cluster if things worked out, otherwise 3007 * it returns -enospc 3008 */ 3009 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3010 struct btrfs_block_group_cache *block_group, 3011 struct btrfs_free_cluster *cluster, 3012 u64 offset, u64 bytes, u64 empty_size) 3013 { 3014 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3015 struct btrfs_free_space *entry, *tmp; 3016 LIST_HEAD(bitmaps); 3017 u64 min_bytes; 3018 u64 cont1_bytes; 3019 int ret; 3020 3021 /* 3022 * Choose the minimum extent size we'll require for this 3023 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3024 * For metadata, allow allocates with smaller extents. For 3025 * data, keep it dense. 3026 */ 3027 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3028 cont1_bytes = min_bytes = bytes + empty_size; 3029 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3030 cont1_bytes = bytes; 3031 min_bytes = fs_info->sectorsize; 3032 } else { 3033 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3034 min_bytes = fs_info->sectorsize; 3035 } 3036 3037 spin_lock(&ctl->tree_lock); 3038 3039 /* 3040 * If we know we don't have enough space to make a cluster don't even 3041 * bother doing all the work to try and find one. 3042 */ 3043 if (ctl->free_space < bytes) { 3044 spin_unlock(&ctl->tree_lock); 3045 return -ENOSPC; 3046 } 3047 3048 spin_lock(&cluster->lock); 3049 3050 /* someone already found a cluster, hooray */ 3051 if (cluster->block_group) { 3052 ret = 0; 3053 goto out; 3054 } 3055 3056 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3057 min_bytes); 3058 3059 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3060 bytes + empty_size, 3061 cont1_bytes, min_bytes); 3062 if (ret) 3063 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3064 offset, bytes + empty_size, 3065 cont1_bytes, min_bytes); 3066 3067 /* Clear our temporary list */ 3068 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3069 list_del_init(&entry->list); 3070 3071 if (!ret) { 3072 atomic_inc(&block_group->count); 3073 list_add_tail(&cluster->block_group_list, 3074 &block_group->cluster_list); 3075 cluster->block_group = block_group; 3076 } else { 3077 trace_btrfs_failed_cluster_setup(block_group); 3078 } 3079 out: 3080 spin_unlock(&cluster->lock); 3081 spin_unlock(&ctl->tree_lock); 3082 3083 return ret; 3084 } 3085 3086 /* 3087 * simple code to zero out a cluster 3088 */ 3089 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3090 { 3091 spin_lock_init(&cluster->lock); 3092 spin_lock_init(&cluster->refill_lock); 3093 cluster->root = RB_ROOT; 3094 cluster->max_size = 0; 3095 cluster->fragmented = false; 3096 INIT_LIST_HEAD(&cluster->block_group_list); 3097 cluster->block_group = NULL; 3098 } 3099 3100 static int do_trimming(struct btrfs_block_group_cache *block_group, 3101 u64 *total_trimmed, u64 start, u64 bytes, 3102 u64 reserved_start, u64 reserved_bytes, 3103 struct btrfs_trim_range *trim_entry) 3104 { 3105 struct btrfs_space_info *space_info = block_group->space_info; 3106 struct btrfs_fs_info *fs_info = block_group->fs_info; 3107 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3108 int ret; 3109 int update = 0; 3110 u64 trimmed = 0; 3111 3112 spin_lock(&space_info->lock); 3113 spin_lock(&block_group->lock); 3114 if (!block_group->ro) { 3115 block_group->reserved += reserved_bytes; 3116 space_info->bytes_reserved += reserved_bytes; 3117 update = 1; 3118 } 3119 spin_unlock(&block_group->lock); 3120 spin_unlock(&space_info->lock); 3121 3122 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3123 if (!ret) 3124 *total_trimmed += trimmed; 3125 3126 mutex_lock(&ctl->cache_writeout_mutex); 3127 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3128 list_del(&trim_entry->list); 3129 mutex_unlock(&ctl->cache_writeout_mutex); 3130 3131 if (update) { 3132 spin_lock(&space_info->lock); 3133 spin_lock(&block_group->lock); 3134 if (block_group->ro) 3135 space_info->bytes_readonly += reserved_bytes; 3136 block_group->reserved -= reserved_bytes; 3137 space_info->bytes_reserved -= reserved_bytes; 3138 spin_unlock(&space_info->lock); 3139 spin_unlock(&block_group->lock); 3140 } 3141 3142 return ret; 3143 } 3144 3145 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3146 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3147 { 3148 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3149 struct btrfs_free_space *entry; 3150 struct rb_node *node; 3151 int ret = 0; 3152 u64 extent_start; 3153 u64 extent_bytes; 3154 u64 bytes; 3155 3156 while (start < end) { 3157 struct btrfs_trim_range trim_entry; 3158 3159 mutex_lock(&ctl->cache_writeout_mutex); 3160 spin_lock(&ctl->tree_lock); 3161 3162 if (ctl->free_space < minlen) { 3163 spin_unlock(&ctl->tree_lock); 3164 mutex_unlock(&ctl->cache_writeout_mutex); 3165 break; 3166 } 3167 3168 entry = tree_search_offset(ctl, start, 0, 1); 3169 if (!entry) { 3170 spin_unlock(&ctl->tree_lock); 3171 mutex_unlock(&ctl->cache_writeout_mutex); 3172 break; 3173 } 3174 3175 /* skip bitmaps */ 3176 while (entry->bitmap) { 3177 node = rb_next(&entry->offset_index); 3178 if (!node) { 3179 spin_unlock(&ctl->tree_lock); 3180 mutex_unlock(&ctl->cache_writeout_mutex); 3181 goto out; 3182 } 3183 entry = rb_entry(node, struct btrfs_free_space, 3184 offset_index); 3185 } 3186 3187 if (entry->offset >= end) { 3188 spin_unlock(&ctl->tree_lock); 3189 mutex_unlock(&ctl->cache_writeout_mutex); 3190 break; 3191 } 3192 3193 extent_start = entry->offset; 3194 extent_bytes = entry->bytes; 3195 start = max(start, extent_start); 3196 bytes = min(extent_start + extent_bytes, end) - start; 3197 if (bytes < minlen) { 3198 spin_unlock(&ctl->tree_lock); 3199 mutex_unlock(&ctl->cache_writeout_mutex); 3200 goto next; 3201 } 3202 3203 unlink_free_space(ctl, entry); 3204 kmem_cache_free(btrfs_free_space_cachep, entry); 3205 3206 spin_unlock(&ctl->tree_lock); 3207 trim_entry.start = extent_start; 3208 trim_entry.bytes = extent_bytes; 3209 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3210 mutex_unlock(&ctl->cache_writeout_mutex); 3211 3212 ret = do_trimming(block_group, total_trimmed, start, bytes, 3213 extent_start, extent_bytes, &trim_entry); 3214 if (ret) 3215 break; 3216 next: 3217 start += bytes; 3218 3219 if (fatal_signal_pending(current)) { 3220 ret = -ERESTARTSYS; 3221 break; 3222 } 3223 3224 cond_resched(); 3225 } 3226 out: 3227 return ret; 3228 } 3229 3230 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3231 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3232 { 3233 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3234 struct btrfs_free_space *entry; 3235 int ret = 0; 3236 int ret2; 3237 u64 bytes; 3238 u64 offset = offset_to_bitmap(ctl, start); 3239 3240 while (offset < end) { 3241 bool next_bitmap = false; 3242 struct btrfs_trim_range trim_entry; 3243 3244 mutex_lock(&ctl->cache_writeout_mutex); 3245 spin_lock(&ctl->tree_lock); 3246 3247 if (ctl->free_space < minlen) { 3248 spin_unlock(&ctl->tree_lock); 3249 mutex_unlock(&ctl->cache_writeout_mutex); 3250 break; 3251 } 3252 3253 entry = tree_search_offset(ctl, offset, 1, 0); 3254 if (!entry) { 3255 spin_unlock(&ctl->tree_lock); 3256 mutex_unlock(&ctl->cache_writeout_mutex); 3257 next_bitmap = true; 3258 goto next; 3259 } 3260 3261 bytes = minlen; 3262 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3263 if (ret2 || start >= end) { 3264 spin_unlock(&ctl->tree_lock); 3265 mutex_unlock(&ctl->cache_writeout_mutex); 3266 next_bitmap = true; 3267 goto next; 3268 } 3269 3270 bytes = min(bytes, end - start); 3271 if (bytes < minlen) { 3272 spin_unlock(&ctl->tree_lock); 3273 mutex_unlock(&ctl->cache_writeout_mutex); 3274 goto next; 3275 } 3276 3277 bitmap_clear_bits(ctl, entry, start, bytes); 3278 if (entry->bytes == 0) 3279 free_bitmap(ctl, entry); 3280 3281 spin_unlock(&ctl->tree_lock); 3282 trim_entry.start = start; 3283 trim_entry.bytes = bytes; 3284 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3285 mutex_unlock(&ctl->cache_writeout_mutex); 3286 3287 ret = do_trimming(block_group, total_trimmed, start, bytes, 3288 start, bytes, &trim_entry); 3289 if (ret) 3290 break; 3291 next: 3292 if (next_bitmap) { 3293 offset += BITS_PER_BITMAP * ctl->unit; 3294 } else { 3295 start += bytes; 3296 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3297 offset += BITS_PER_BITMAP * ctl->unit; 3298 } 3299 3300 if (fatal_signal_pending(current)) { 3301 ret = -ERESTARTSYS; 3302 break; 3303 } 3304 3305 cond_resched(); 3306 } 3307 3308 return ret; 3309 } 3310 3311 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3312 { 3313 atomic_inc(&cache->trimming); 3314 } 3315 3316 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3317 { 3318 struct btrfs_fs_info *fs_info = block_group->fs_info; 3319 struct extent_map_tree *em_tree; 3320 struct extent_map *em; 3321 bool cleanup; 3322 3323 spin_lock(&block_group->lock); 3324 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3325 block_group->removed); 3326 spin_unlock(&block_group->lock); 3327 3328 if (cleanup) { 3329 mutex_lock(&fs_info->chunk_mutex); 3330 em_tree = &fs_info->mapping_tree.map_tree; 3331 write_lock(&em_tree->lock); 3332 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3333 1); 3334 BUG_ON(!em); /* logic error, can't happen */ 3335 /* 3336 * remove_extent_mapping() will delete us from the pinned_chunks 3337 * list, which is protected by the chunk mutex. 3338 */ 3339 remove_extent_mapping(em_tree, em); 3340 write_unlock(&em_tree->lock); 3341 mutex_unlock(&fs_info->chunk_mutex); 3342 3343 /* once for us and once for the tree */ 3344 free_extent_map(em); 3345 free_extent_map(em); 3346 3347 /* 3348 * We've left one free space entry and other tasks trimming 3349 * this block group have left 1 entry each one. Free them. 3350 */ 3351 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3352 } 3353 } 3354 3355 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3356 u64 *trimmed, u64 start, u64 end, u64 minlen) 3357 { 3358 int ret; 3359 3360 *trimmed = 0; 3361 3362 spin_lock(&block_group->lock); 3363 if (block_group->removed) { 3364 spin_unlock(&block_group->lock); 3365 return 0; 3366 } 3367 btrfs_get_block_group_trimming(block_group); 3368 spin_unlock(&block_group->lock); 3369 3370 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3371 if (ret) 3372 goto out; 3373 3374 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3375 out: 3376 btrfs_put_block_group_trimming(block_group); 3377 return ret; 3378 } 3379 3380 /* 3381 * Find the left-most item in the cache tree, and then return the 3382 * smallest inode number in the item. 3383 * 3384 * Note: the returned inode number may not be the smallest one in 3385 * the tree, if the left-most item is a bitmap. 3386 */ 3387 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3388 { 3389 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3390 struct btrfs_free_space *entry = NULL; 3391 u64 ino = 0; 3392 3393 spin_lock(&ctl->tree_lock); 3394 3395 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3396 goto out; 3397 3398 entry = rb_entry(rb_first(&ctl->free_space_offset), 3399 struct btrfs_free_space, offset_index); 3400 3401 if (!entry->bitmap) { 3402 ino = entry->offset; 3403 3404 unlink_free_space(ctl, entry); 3405 entry->offset++; 3406 entry->bytes--; 3407 if (!entry->bytes) 3408 kmem_cache_free(btrfs_free_space_cachep, entry); 3409 else 3410 link_free_space(ctl, entry); 3411 } else { 3412 u64 offset = 0; 3413 u64 count = 1; 3414 int ret; 3415 3416 ret = search_bitmap(ctl, entry, &offset, &count, true); 3417 /* Logic error; Should be empty if it can't find anything */ 3418 ASSERT(!ret); 3419 3420 ino = offset; 3421 bitmap_clear_bits(ctl, entry, offset, 1); 3422 if (entry->bytes == 0) 3423 free_bitmap(ctl, entry); 3424 } 3425 out: 3426 spin_unlock(&ctl->tree_lock); 3427 3428 return ino; 3429 } 3430 3431 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3432 struct btrfs_path *path) 3433 { 3434 struct inode *inode = NULL; 3435 3436 spin_lock(&root->ino_cache_lock); 3437 if (root->ino_cache_inode) 3438 inode = igrab(root->ino_cache_inode); 3439 spin_unlock(&root->ino_cache_lock); 3440 if (inode) 3441 return inode; 3442 3443 inode = __lookup_free_space_inode(root, path, 0); 3444 if (IS_ERR(inode)) 3445 return inode; 3446 3447 spin_lock(&root->ino_cache_lock); 3448 if (!btrfs_fs_closing(root->fs_info)) 3449 root->ino_cache_inode = igrab(inode); 3450 spin_unlock(&root->ino_cache_lock); 3451 3452 return inode; 3453 } 3454 3455 int create_free_ino_inode(struct btrfs_root *root, 3456 struct btrfs_trans_handle *trans, 3457 struct btrfs_path *path) 3458 { 3459 return __create_free_space_inode(root, trans, path, 3460 BTRFS_FREE_INO_OBJECTID, 0); 3461 } 3462 3463 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3464 { 3465 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3466 struct btrfs_path *path; 3467 struct inode *inode; 3468 int ret = 0; 3469 u64 root_gen = btrfs_root_generation(&root->root_item); 3470 3471 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3472 return 0; 3473 3474 /* 3475 * If we're unmounting then just return, since this does a search on the 3476 * normal root and not the commit root and we could deadlock. 3477 */ 3478 if (btrfs_fs_closing(fs_info)) 3479 return 0; 3480 3481 path = btrfs_alloc_path(); 3482 if (!path) 3483 return 0; 3484 3485 inode = lookup_free_ino_inode(root, path); 3486 if (IS_ERR(inode)) 3487 goto out; 3488 3489 if (root_gen != BTRFS_I(inode)->generation) 3490 goto out_put; 3491 3492 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3493 3494 if (ret < 0) 3495 btrfs_err(fs_info, 3496 "failed to load free ino cache for root %llu", 3497 root->root_key.objectid); 3498 out_put: 3499 iput(inode); 3500 out: 3501 btrfs_free_path(path); 3502 return ret; 3503 } 3504 3505 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3506 struct btrfs_trans_handle *trans, 3507 struct btrfs_path *path, 3508 struct inode *inode) 3509 { 3510 struct btrfs_fs_info *fs_info = root->fs_info; 3511 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3512 int ret; 3513 struct btrfs_io_ctl io_ctl; 3514 bool release_metadata = true; 3515 3516 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3517 return 0; 3518 3519 memset(&io_ctl, 0, sizeof(io_ctl)); 3520 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3521 if (!ret) { 3522 /* 3523 * At this point writepages() didn't error out, so our metadata 3524 * reservation is released when the writeback finishes, at 3525 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3526 * with or without an error. 3527 */ 3528 release_metadata = false; 3529 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3530 } 3531 3532 if (ret) { 3533 if (release_metadata) 3534 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3535 inode->i_size, true); 3536 #ifdef DEBUG 3537 btrfs_err(fs_info, 3538 "failed to write free ino cache for root %llu", 3539 root->root_key.objectid); 3540 #endif 3541 } 3542 3543 return ret; 3544 } 3545 3546 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3547 /* 3548 * Use this if you need to make a bitmap or extent entry specifically, it 3549 * doesn't do any of the merging that add_free_space does, this acts a lot like 3550 * how the free space cache loading stuff works, so you can get really weird 3551 * configurations. 3552 */ 3553 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3554 u64 offset, u64 bytes, bool bitmap) 3555 { 3556 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3557 struct btrfs_free_space *info = NULL, *bitmap_info; 3558 void *map = NULL; 3559 u64 bytes_added; 3560 int ret; 3561 3562 again: 3563 if (!info) { 3564 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3565 if (!info) 3566 return -ENOMEM; 3567 } 3568 3569 if (!bitmap) { 3570 spin_lock(&ctl->tree_lock); 3571 info->offset = offset; 3572 info->bytes = bytes; 3573 info->max_extent_size = 0; 3574 ret = link_free_space(ctl, info); 3575 spin_unlock(&ctl->tree_lock); 3576 if (ret) 3577 kmem_cache_free(btrfs_free_space_cachep, info); 3578 return ret; 3579 } 3580 3581 if (!map) { 3582 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3583 if (!map) { 3584 kmem_cache_free(btrfs_free_space_cachep, info); 3585 return -ENOMEM; 3586 } 3587 } 3588 3589 spin_lock(&ctl->tree_lock); 3590 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3591 1, 0); 3592 if (!bitmap_info) { 3593 info->bitmap = map; 3594 map = NULL; 3595 add_new_bitmap(ctl, info, offset); 3596 bitmap_info = info; 3597 info = NULL; 3598 } 3599 3600 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3601 3602 bytes -= bytes_added; 3603 offset += bytes_added; 3604 spin_unlock(&ctl->tree_lock); 3605 3606 if (bytes) 3607 goto again; 3608 3609 if (info) 3610 kmem_cache_free(btrfs_free_space_cachep, info); 3611 if (map) 3612 kfree(map); 3613 return 0; 3614 } 3615 3616 /* 3617 * Checks to see if the given range is in the free space cache. This is really 3618 * just used to check the absence of space, so if there is free space in the 3619 * range at all we will return 1. 3620 */ 3621 int test_check_exists(struct btrfs_block_group_cache *cache, 3622 u64 offset, u64 bytes) 3623 { 3624 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3625 struct btrfs_free_space *info; 3626 int ret = 0; 3627 3628 spin_lock(&ctl->tree_lock); 3629 info = tree_search_offset(ctl, offset, 0, 0); 3630 if (!info) { 3631 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3632 1, 0); 3633 if (!info) 3634 goto out; 3635 } 3636 3637 have_info: 3638 if (info->bitmap) { 3639 u64 bit_off, bit_bytes; 3640 struct rb_node *n; 3641 struct btrfs_free_space *tmp; 3642 3643 bit_off = offset; 3644 bit_bytes = ctl->unit; 3645 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3646 if (!ret) { 3647 if (bit_off == offset) { 3648 ret = 1; 3649 goto out; 3650 } else if (bit_off > offset && 3651 offset + bytes > bit_off) { 3652 ret = 1; 3653 goto out; 3654 } 3655 } 3656 3657 n = rb_prev(&info->offset_index); 3658 while (n) { 3659 tmp = rb_entry(n, struct btrfs_free_space, 3660 offset_index); 3661 if (tmp->offset + tmp->bytes < offset) 3662 break; 3663 if (offset + bytes < tmp->offset) { 3664 n = rb_prev(&tmp->offset_index); 3665 continue; 3666 } 3667 info = tmp; 3668 goto have_info; 3669 } 3670 3671 n = rb_next(&info->offset_index); 3672 while (n) { 3673 tmp = rb_entry(n, struct btrfs_free_space, 3674 offset_index); 3675 if (offset + bytes < tmp->offset) 3676 break; 3677 if (tmp->offset + tmp->bytes < offset) { 3678 n = rb_next(&tmp->offset_index); 3679 continue; 3680 } 3681 info = tmp; 3682 goto have_info; 3683 } 3684 3685 ret = 0; 3686 goto out; 3687 } 3688 3689 if (info->offset == offset) { 3690 ret = 1; 3691 goto out; 3692 } 3693 3694 if (offset > info->offset && offset < info->offset + info->bytes) 3695 ret = 1; 3696 out: 3697 spin_unlock(&ctl->tree_lock); 3698 return ret; 3699 } 3700 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3701