1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "inode-map.h" 20 #include "volumes.h" 21 22 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 23 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 24 25 struct btrfs_trim_range { 26 u64 start; 27 u64 bytes; 28 struct list_head list; 29 }; 30 31 static int link_free_space(struct btrfs_free_space_ctl *ctl, 32 struct btrfs_free_space *info); 33 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 34 struct btrfs_free_space *info); 35 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 36 struct btrfs_trans_handle *trans, 37 struct btrfs_io_ctl *io_ctl, 38 struct btrfs_path *path); 39 40 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 41 struct btrfs_path *path, 42 u64 offset) 43 { 44 struct btrfs_fs_info *fs_info = root->fs_info; 45 struct btrfs_key key; 46 struct btrfs_key location; 47 struct btrfs_disk_key disk_key; 48 struct btrfs_free_space_header *header; 49 struct extent_buffer *leaf; 50 struct inode *inode = NULL; 51 unsigned nofs_flag; 52 int ret; 53 54 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 55 key.offset = offset; 56 key.type = 0; 57 58 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 59 if (ret < 0) 60 return ERR_PTR(ret); 61 if (ret > 0) { 62 btrfs_release_path(path); 63 return ERR_PTR(-ENOENT); 64 } 65 66 leaf = path->nodes[0]; 67 header = btrfs_item_ptr(leaf, path->slots[0], 68 struct btrfs_free_space_header); 69 btrfs_free_space_key(leaf, header, &disk_key); 70 btrfs_disk_key_to_cpu(&location, &disk_key); 71 btrfs_release_path(path); 72 73 /* 74 * We are often under a trans handle at this point, so we need to make 75 * sure NOFS is set to keep us from deadlocking. 76 */ 77 nofs_flag = memalloc_nofs_save(); 78 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 79 memalloc_nofs_restore(nofs_flag); 80 if (IS_ERR(inode)) 81 return inode; 82 83 mapping_set_gfp_mask(inode->i_mapping, 84 mapping_gfp_constraint(inode->i_mapping, 85 ~(__GFP_FS | __GFP_HIGHMEM))); 86 87 return inode; 88 } 89 90 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 91 struct btrfs_block_group_cache 92 *block_group, struct btrfs_path *path) 93 { 94 struct inode *inode = NULL; 95 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 96 97 spin_lock(&block_group->lock); 98 if (block_group->inode) 99 inode = igrab(block_group->inode); 100 spin_unlock(&block_group->lock); 101 if (inode) 102 return inode; 103 104 inode = __lookup_free_space_inode(fs_info->tree_root, path, 105 block_group->key.objectid); 106 if (IS_ERR(inode)) 107 return inode; 108 109 spin_lock(&block_group->lock); 110 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 111 btrfs_info(fs_info, "Old style space inode found, converting."); 112 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 113 BTRFS_INODE_NODATACOW; 114 block_group->disk_cache_state = BTRFS_DC_CLEAR; 115 } 116 117 if (!block_group->iref) { 118 block_group->inode = igrab(inode); 119 block_group->iref = 1; 120 } 121 spin_unlock(&block_group->lock); 122 123 return inode; 124 } 125 126 static int __create_free_space_inode(struct btrfs_root *root, 127 struct btrfs_trans_handle *trans, 128 struct btrfs_path *path, 129 u64 ino, u64 offset) 130 { 131 struct btrfs_key key; 132 struct btrfs_disk_key disk_key; 133 struct btrfs_free_space_header *header; 134 struct btrfs_inode_item *inode_item; 135 struct extent_buffer *leaf; 136 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 137 int ret; 138 139 ret = btrfs_insert_empty_inode(trans, root, path, ino); 140 if (ret) 141 return ret; 142 143 /* We inline crc's for the free disk space cache */ 144 if (ino != BTRFS_FREE_INO_OBJECTID) 145 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 146 147 leaf = path->nodes[0]; 148 inode_item = btrfs_item_ptr(leaf, path->slots[0], 149 struct btrfs_inode_item); 150 btrfs_item_key(leaf, &disk_key, path->slots[0]); 151 memzero_extent_buffer(leaf, (unsigned long)inode_item, 152 sizeof(*inode_item)); 153 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 154 btrfs_set_inode_size(leaf, inode_item, 0); 155 btrfs_set_inode_nbytes(leaf, inode_item, 0); 156 btrfs_set_inode_uid(leaf, inode_item, 0); 157 btrfs_set_inode_gid(leaf, inode_item, 0); 158 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 159 btrfs_set_inode_flags(leaf, inode_item, flags); 160 btrfs_set_inode_nlink(leaf, inode_item, 1); 161 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 162 btrfs_set_inode_block_group(leaf, inode_item, offset); 163 btrfs_mark_buffer_dirty(leaf); 164 btrfs_release_path(path); 165 166 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 167 key.offset = offset; 168 key.type = 0; 169 ret = btrfs_insert_empty_item(trans, root, path, &key, 170 sizeof(struct btrfs_free_space_header)); 171 if (ret < 0) { 172 btrfs_release_path(path); 173 return ret; 174 } 175 176 leaf = path->nodes[0]; 177 header = btrfs_item_ptr(leaf, path->slots[0], 178 struct btrfs_free_space_header); 179 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 180 btrfs_set_free_space_key(leaf, header, &disk_key); 181 btrfs_mark_buffer_dirty(leaf); 182 btrfs_release_path(path); 183 184 return 0; 185 } 186 187 int create_free_space_inode(struct btrfs_fs_info *fs_info, 188 struct btrfs_trans_handle *trans, 189 struct btrfs_block_group_cache *block_group, 190 struct btrfs_path *path) 191 { 192 int ret; 193 u64 ino; 194 195 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 196 if (ret < 0) 197 return ret; 198 199 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 200 block_group->key.objectid); 201 } 202 203 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 204 struct btrfs_block_rsv *rsv) 205 { 206 u64 needed_bytes; 207 int ret; 208 209 /* 1 for slack space, 1 for updating the inode */ 210 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 211 btrfs_calc_trans_metadata_size(fs_info, 1); 212 213 spin_lock(&rsv->lock); 214 if (rsv->reserved < needed_bytes) 215 ret = -ENOSPC; 216 else 217 ret = 0; 218 spin_unlock(&rsv->lock); 219 return ret; 220 } 221 222 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 223 struct btrfs_block_group_cache *block_group, 224 struct inode *inode) 225 { 226 struct btrfs_root *root = BTRFS_I(inode)->root; 227 int ret = 0; 228 bool locked = false; 229 230 if (block_group) { 231 struct btrfs_path *path = btrfs_alloc_path(); 232 233 if (!path) { 234 ret = -ENOMEM; 235 goto fail; 236 } 237 locked = true; 238 mutex_lock(&trans->transaction->cache_write_mutex); 239 if (!list_empty(&block_group->io_list)) { 240 list_del_init(&block_group->io_list); 241 242 btrfs_wait_cache_io(trans, block_group, path); 243 btrfs_put_block_group(block_group); 244 } 245 246 /* 247 * now that we've truncated the cache away, its no longer 248 * setup or written 249 */ 250 spin_lock(&block_group->lock); 251 block_group->disk_cache_state = BTRFS_DC_CLEAR; 252 spin_unlock(&block_group->lock); 253 btrfs_free_path(path); 254 } 255 256 btrfs_i_size_write(BTRFS_I(inode), 0); 257 truncate_pagecache(inode, 0); 258 259 /* 260 * We skip the throttling logic for free space cache inodes, so we don't 261 * need to check for -EAGAIN. 262 */ 263 ret = btrfs_truncate_inode_items(trans, root, inode, 264 0, BTRFS_EXTENT_DATA_KEY); 265 if (ret) 266 goto fail; 267 268 ret = btrfs_update_inode(trans, root, inode); 269 270 fail: 271 if (locked) 272 mutex_unlock(&trans->transaction->cache_write_mutex); 273 if (ret) 274 btrfs_abort_transaction(trans, ret); 275 276 return ret; 277 } 278 279 static void readahead_cache(struct inode *inode) 280 { 281 struct file_ra_state *ra; 282 unsigned long last_index; 283 284 ra = kzalloc(sizeof(*ra), GFP_NOFS); 285 if (!ra) 286 return; 287 288 file_ra_state_init(ra, inode->i_mapping); 289 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 290 291 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 292 293 kfree(ra); 294 } 295 296 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 297 int write) 298 { 299 int num_pages; 300 int check_crcs = 0; 301 302 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 303 304 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 305 check_crcs = 1; 306 307 /* Make sure we can fit our crcs and generation into the first page */ 308 if (write && check_crcs && 309 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 310 return -ENOSPC; 311 312 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 313 314 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 315 if (!io_ctl->pages) 316 return -ENOMEM; 317 318 io_ctl->num_pages = num_pages; 319 io_ctl->fs_info = btrfs_sb(inode->i_sb); 320 io_ctl->check_crcs = check_crcs; 321 io_ctl->inode = inode; 322 323 return 0; 324 } 325 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 326 327 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 328 { 329 kfree(io_ctl->pages); 330 io_ctl->pages = NULL; 331 } 332 333 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 334 { 335 if (io_ctl->cur) { 336 io_ctl->cur = NULL; 337 io_ctl->orig = NULL; 338 } 339 } 340 341 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 342 { 343 ASSERT(io_ctl->index < io_ctl->num_pages); 344 io_ctl->page = io_ctl->pages[io_ctl->index++]; 345 io_ctl->cur = page_address(io_ctl->page); 346 io_ctl->orig = io_ctl->cur; 347 io_ctl->size = PAGE_SIZE; 348 if (clear) 349 clear_page(io_ctl->cur); 350 } 351 352 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 353 { 354 int i; 355 356 io_ctl_unmap_page(io_ctl); 357 358 for (i = 0; i < io_ctl->num_pages; i++) { 359 if (io_ctl->pages[i]) { 360 ClearPageChecked(io_ctl->pages[i]); 361 unlock_page(io_ctl->pages[i]); 362 put_page(io_ctl->pages[i]); 363 } 364 } 365 } 366 367 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 368 int uptodate) 369 { 370 struct page *page; 371 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 372 int i; 373 374 for (i = 0; i < io_ctl->num_pages; i++) { 375 page = find_or_create_page(inode->i_mapping, i, mask); 376 if (!page) { 377 io_ctl_drop_pages(io_ctl); 378 return -ENOMEM; 379 } 380 io_ctl->pages[i] = page; 381 if (uptodate && !PageUptodate(page)) { 382 btrfs_readpage(NULL, page); 383 lock_page(page); 384 if (!PageUptodate(page)) { 385 btrfs_err(BTRFS_I(inode)->root->fs_info, 386 "error reading free space cache"); 387 io_ctl_drop_pages(io_ctl); 388 return -EIO; 389 } 390 } 391 } 392 393 for (i = 0; i < io_ctl->num_pages; i++) { 394 clear_page_dirty_for_io(io_ctl->pages[i]); 395 set_page_extent_mapped(io_ctl->pages[i]); 396 } 397 398 return 0; 399 } 400 401 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 402 { 403 __le64 *val; 404 405 io_ctl_map_page(io_ctl, 1); 406 407 /* 408 * Skip the csum areas. If we don't check crcs then we just have a 409 * 64bit chunk at the front of the first page. 410 */ 411 if (io_ctl->check_crcs) { 412 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 413 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 414 } else { 415 io_ctl->cur += sizeof(u64); 416 io_ctl->size -= sizeof(u64) * 2; 417 } 418 419 val = io_ctl->cur; 420 *val = cpu_to_le64(generation); 421 io_ctl->cur += sizeof(u64); 422 } 423 424 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 425 { 426 __le64 *gen; 427 428 /* 429 * Skip the crc area. If we don't check crcs then we just have a 64bit 430 * chunk at the front of the first page. 431 */ 432 if (io_ctl->check_crcs) { 433 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 434 io_ctl->size -= sizeof(u64) + 435 (sizeof(u32) * io_ctl->num_pages); 436 } else { 437 io_ctl->cur += sizeof(u64); 438 io_ctl->size -= sizeof(u64) * 2; 439 } 440 441 gen = io_ctl->cur; 442 if (le64_to_cpu(*gen) != generation) { 443 btrfs_err_rl(io_ctl->fs_info, 444 "space cache generation (%llu) does not match inode (%llu)", 445 *gen, generation); 446 io_ctl_unmap_page(io_ctl); 447 return -EIO; 448 } 449 io_ctl->cur += sizeof(u64); 450 return 0; 451 } 452 453 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 454 { 455 u32 *tmp; 456 u32 crc = ~(u32)0; 457 unsigned offset = 0; 458 459 if (!io_ctl->check_crcs) { 460 io_ctl_unmap_page(io_ctl); 461 return; 462 } 463 464 if (index == 0) 465 offset = sizeof(u32) * io_ctl->num_pages; 466 467 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 468 PAGE_SIZE - offset); 469 btrfs_csum_final(crc, (u8 *)&crc); 470 io_ctl_unmap_page(io_ctl); 471 tmp = page_address(io_ctl->pages[0]); 472 tmp += index; 473 *tmp = crc; 474 } 475 476 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 477 { 478 u32 *tmp, val; 479 u32 crc = ~(u32)0; 480 unsigned offset = 0; 481 482 if (!io_ctl->check_crcs) { 483 io_ctl_map_page(io_ctl, 0); 484 return 0; 485 } 486 487 if (index == 0) 488 offset = sizeof(u32) * io_ctl->num_pages; 489 490 tmp = page_address(io_ctl->pages[0]); 491 tmp += index; 492 val = *tmp; 493 494 io_ctl_map_page(io_ctl, 0); 495 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 496 PAGE_SIZE - offset); 497 btrfs_csum_final(crc, (u8 *)&crc); 498 if (val != crc) { 499 btrfs_err_rl(io_ctl->fs_info, 500 "csum mismatch on free space cache"); 501 io_ctl_unmap_page(io_ctl); 502 return -EIO; 503 } 504 505 return 0; 506 } 507 508 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 509 void *bitmap) 510 { 511 struct btrfs_free_space_entry *entry; 512 513 if (!io_ctl->cur) 514 return -ENOSPC; 515 516 entry = io_ctl->cur; 517 entry->offset = cpu_to_le64(offset); 518 entry->bytes = cpu_to_le64(bytes); 519 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 520 BTRFS_FREE_SPACE_EXTENT; 521 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 522 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 523 524 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 525 return 0; 526 527 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 528 529 /* No more pages to map */ 530 if (io_ctl->index >= io_ctl->num_pages) 531 return 0; 532 533 /* map the next page */ 534 io_ctl_map_page(io_ctl, 1); 535 return 0; 536 } 537 538 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 539 { 540 if (!io_ctl->cur) 541 return -ENOSPC; 542 543 /* 544 * If we aren't at the start of the current page, unmap this one and 545 * map the next one if there is any left. 546 */ 547 if (io_ctl->cur != io_ctl->orig) { 548 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 549 if (io_ctl->index >= io_ctl->num_pages) 550 return -ENOSPC; 551 io_ctl_map_page(io_ctl, 0); 552 } 553 554 copy_page(io_ctl->cur, bitmap); 555 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 556 if (io_ctl->index < io_ctl->num_pages) 557 io_ctl_map_page(io_ctl, 0); 558 return 0; 559 } 560 561 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 562 { 563 /* 564 * If we're not on the boundary we know we've modified the page and we 565 * need to crc the page. 566 */ 567 if (io_ctl->cur != io_ctl->orig) 568 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 569 else 570 io_ctl_unmap_page(io_ctl); 571 572 while (io_ctl->index < io_ctl->num_pages) { 573 io_ctl_map_page(io_ctl, 1); 574 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 575 } 576 } 577 578 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 579 struct btrfs_free_space *entry, u8 *type) 580 { 581 struct btrfs_free_space_entry *e; 582 int ret; 583 584 if (!io_ctl->cur) { 585 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 586 if (ret) 587 return ret; 588 } 589 590 e = io_ctl->cur; 591 entry->offset = le64_to_cpu(e->offset); 592 entry->bytes = le64_to_cpu(e->bytes); 593 *type = e->type; 594 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 595 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 596 597 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 598 return 0; 599 600 io_ctl_unmap_page(io_ctl); 601 602 return 0; 603 } 604 605 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 606 struct btrfs_free_space *entry) 607 { 608 int ret; 609 610 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 611 if (ret) 612 return ret; 613 614 copy_page(entry->bitmap, io_ctl->cur); 615 io_ctl_unmap_page(io_ctl); 616 617 return 0; 618 } 619 620 /* 621 * Since we attach pinned extents after the fact we can have contiguous sections 622 * of free space that are split up in entries. This poses a problem with the 623 * tree logging stuff since it could have allocated across what appears to be 2 624 * entries since we would have merged the entries when adding the pinned extents 625 * back to the free space cache. So run through the space cache that we just 626 * loaded and merge contiguous entries. This will make the log replay stuff not 627 * blow up and it will make for nicer allocator behavior. 628 */ 629 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 630 { 631 struct btrfs_free_space *e, *prev = NULL; 632 struct rb_node *n; 633 634 again: 635 spin_lock(&ctl->tree_lock); 636 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 637 e = rb_entry(n, struct btrfs_free_space, offset_index); 638 if (!prev) 639 goto next; 640 if (e->bitmap || prev->bitmap) 641 goto next; 642 if (prev->offset + prev->bytes == e->offset) { 643 unlink_free_space(ctl, prev); 644 unlink_free_space(ctl, e); 645 prev->bytes += e->bytes; 646 kmem_cache_free(btrfs_free_space_cachep, e); 647 link_free_space(ctl, prev); 648 prev = NULL; 649 spin_unlock(&ctl->tree_lock); 650 goto again; 651 } 652 next: 653 prev = e; 654 } 655 spin_unlock(&ctl->tree_lock); 656 } 657 658 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 659 struct btrfs_free_space_ctl *ctl, 660 struct btrfs_path *path, u64 offset) 661 { 662 struct btrfs_fs_info *fs_info = root->fs_info; 663 struct btrfs_free_space_header *header; 664 struct extent_buffer *leaf; 665 struct btrfs_io_ctl io_ctl; 666 struct btrfs_key key; 667 struct btrfs_free_space *e, *n; 668 LIST_HEAD(bitmaps); 669 u64 num_entries; 670 u64 num_bitmaps; 671 u64 generation; 672 u8 type; 673 int ret = 0; 674 675 /* Nothing in the space cache, goodbye */ 676 if (!i_size_read(inode)) 677 return 0; 678 679 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 680 key.offset = offset; 681 key.type = 0; 682 683 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 684 if (ret < 0) 685 return 0; 686 else if (ret > 0) { 687 btrfs_release_path(path); 688 return 0; 689 } 690 691 ret = -1; 692 693 leaf = path->nodes[0]; 694 header = btrfs_item_ptr(leaf, path->slots[0], 695 struct btrfs_free_space_header); 696 num_entries = btrfs_free_space_entries(leaf, header); 697 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 698 generation = btrfs_free_space_generation(leaf, header); 699 btrfs_release_path(path); 700 701 if (!BTRFS_I(inode)->generation) { 702 btrfs_info(fs_info, 703 "the free space cache file (%llu) is invalid, skip it", 704 offset); 705 return 0; 706 } 707 708 if (BTRFS_I(inode)->generation != generation) { 709 btrfs_err(fs_info, 710 "free space inode generation (%llu) did not match free space cache generation (%llu)", 711 BTRFS_I(inode)->generation, generation); 712 return 0; 713 } 714 715 if (!num_entries) 716 return 0; 717 718 ret = io_ctl_init(&io_ctl, inode, 0); 719 if (ret) 720 return ret; 721 722 readahead_cache(inode); 723 724 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 725 if (ret) 726 goto out; 727 728 ret = io_ctl_check_crc(&io_ctl, 0); 729 if (ret) 730 goto free_cache; 731 732 ret = io_ctl_check_generation(&io_ctl, generation); 733 if (ret) 734 goto free_cache; 735 736 while (num_entries) { 737 e = kmem_cache_zalloc(btrfs_free_space_cachep, 738 GFP_NOFS); 739 if (!e) 740 goto free_cache; 741 742 ret = io_ctl_read_entry(&io_ctl, e, &type); 743 if (ret) { 744 kmem_cache_free(btrfs_free_space_cachep, e); 745 goto free_cache; 746 } 747 748 if (!e->bytes) { 749 kmem_cache_free(btrfs_free_space_cachep, e); 750 goto free_cache; 751 } 752 753 if (type == BTRFS_FREE_SPACE_EXTENT) { 754 spin_lock(&ctl->tree_lock); 755 ret = link_free_space(ctl, e); 756 spin_unlock(&ctl->tree_lock); 757 if (ret) { 758 btrfs_err(fs_info, 759 "Duplicate entries in free space cache, dumping"); 760 kmem_cache_free(btrfs_free_space_cachep, e); 761 goto free_cache; 762 } 763 } else { 764 ASSERT(num_bitmaps); 765 num_bitmaps--; 766 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 767 if (!e->bitmap) { 768 kmem_cache_free( 769 btrfs_free_space_cachep, e); 770 goto free_cache; 771 } 772 spin_lock(&ctl->tree_lock); 773 ret = link_free_space(ctl, e); 774 ctl->total_bitmaps++; 775 ctl->op->recalc_thresholds(ctl); 776 spin_unlock(&ctl->tree_lock); 777 if (ret) { 778 btrfs_err(fs_info, 779 "Duplicate entries in free space cache, dumping"); 780 kmem_cache_free(btrfs_free_space_cachep, e); 781 goto free_cache; 782 } 783 list_add_tail(&e->list, &bitmaps); 784 } 785 786 num_entries--; 787 } 788 789 io_ctl_unmap_page(&io_ctl); 790 791 /* 792 * We add the bitmaps at the end of the entries in order that 793 * the bitmap entries are added to the cache. 794 */ 795 list_for_each_entry_safe(e, n, &bitmaps, list) { 796 list_del_init(&e->list); 797 ret = io_ctl_read_bitmap(&io_ctl, e); 798 if (ret) 799 goto free_cache; 800 } 801 802 io_ctl_drop_pages(&io_ctl); 803 merge_space_tree(ctl); 804 ret = 1; 805 out: 806 io_ctl_free(&io_ctl); 807 return ret; 808 free_cache: 809 io_ctl_drop_pages(&io_ctl); 810 __btrfs_remove_free_space_cache(ctl); 811 goto out; 812 } 813 814 int load_free_space_cache(struct btrfs_fs_info *fs_info, 815 struct btrfs_block_group_cache *block_group) 816 { 817 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 818 struct inode *inode; 819 struct btrfs_path *path; 820 int ret = 0; 821 bool matched; 822 u64 used = btrfs_block_group_used(&block_group->item); 823 824 /* 825 * If this block group has been marked to be cleared for one reason or 826 * another then we can't trust the on disk cache, so just return. 827 */ 828 spin_lock(&block_group->lock); 829 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 830 spin_unlock(&block_group->lock); 831 return 0; 832 } 833 spin_unlock(&block_group->lock); 834 835 path = btrfs_alloc_path(); 836 if (!path) 837 return 0; 838 path->search_commit_root = 1; 839 path->skip_locking = 1; 840 841 inode = lookup_free_space_inode(fs_info, block_group, path); 842 if (IS_ERR(inode)) { 843 btrfs_free_path(path); 844 return 0; 845 } 846 847 /* We may have converted the inode and made the cache invalid. */ 848 spin_lock(&block_group->lock); 849 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 850 spin_unlock(&block_group->lock); 851 btrfs_free_path(path); 852 goto out; 853 } 854 spin_unlock(&block_group->lock); 855 856 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 857 path, block_group->key.objectid); 858 btrfs_free_path(path); 859 if (ret <= 0) 860 goto out; 861 862 spin_lock(&ctl->tree_lock); 863 matched = (ctl->free_space == (block_group->key.offset - used - 864 block_group->bytes_super)); 865 spin_unlock(&ctl->tree_lock); 866 867 if (!matched) { 868 __btrfs_remove_free_space_cache(ctl); 869 btrfs_warn(fs_info, 870 "block group %llu has wrong amount of free space", 871 block_group->key.objectid); 872 ret = -1; 873 } 874 out: 875 if (ret < 0) { 876 /* This cache is bogus, make sure it gets cleared */ 877 spin_lock(&block_group->lock); 878 block_group->disk_cache_state = BTRFS_DC_CLEAR; 879 spin_unlock(&block_group->lock); 880 ret = 0; 881 882 btrfs_warn(fs_info, 883 "failed to load free space cache for block group %llu, rebuilding it now", 884 block_group->key.objectid); 885 } 886 887 iput(inode); 888 return ret; 889 } 890 891 static noinline_for_stack 892 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 893 struct btrfs_free_space_ctl *ctl, 894 struct btrfs_block_group_cache *block_group, 895 int *entries, int *bitmaps, 896 struct list_head *bitmap_list) 897 { 898 int ret; 899 struct btrfs_free_cluster *cluster = NULL; 900 struct btrfs_free_cluster *cluster_locked = NULL; 901 struct rb_node *node = rb_first(&ctl->free_space_offset); 902 struct btrfs_trim_range *trim_entry; 903 904 /* Get the cluster for this block_group if it exists */ 905 if (block_group && !list_empty(&block_group->cluster_list)) { 906 cluster = list_entry(block_group->cluster_list.next, 907 struct btrfs_free_cluster, 908 block_group_list); 909 } 910 911 if (!node && cluster) { 912 cluster_locked = cluster; 913 spin_lock(&cluster_locked->lock); 914 node = rb_first(&cluster->root); 915 cluster = NULL; 916 } 917 918 /* Write out the extent entries */ 919 while (node) { 920 struct btrfs_free_space *e; 921 922 e = rb_entry(node, struct btrfs_free_space, offset_index); 923 *entries += 1; 924 925 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 926 e->bitmap); 927 if (ret) 928 goto fail; 929 930 if (e->bitmap) { 931 list_add_tail(&e->list, bitmap_list); 932 *bitmaps += 1; 933 } 934 node = rb_next(node); 935 if (!node && cluster) { 936 node = rb_first(&cluster->root); 937 cluster_locked = cluster; 938 spin_lock(&cluster_locked->lock); 939 cluster = NULL; 940 } 941 } 942 if (cluster_locked) { 943 spin_unlock(&cluster_locked->lock); 944 cluster_locked = NULL; 945 } 946 947 /* 948 * Make sure we don't miss any range that was removed from our rbtree 949 * because trimming is running. Otherwise after a umount+mount (or crash 950 * after committing the transaction) we would leak free space and get 951 * an inconsistent free space cache report from fsck. 952 */ 953 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 954 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 955 trim_entry->bytes, NULL); 956 if (ret) 957 goto fail; 958 *entries += 1; 959 } 960 961 return 0; 962 fail: 963 if (cluster_locked) 964 spin_unlock(&cluster_locked->lock); 965 return -ENOSPC; 966 } 967 968 static noinline_for_stack int 969 update_cache_item(struct btrfs_trans_handle *trans, 970 struct btrfs_root *root, 971 struct inode *inode, 972 struct btrfs_path *path, u64 offset, 973 int entries, int bitmaps) 974 { 975 struct btrfs_key key; 976 struct btrfs_free_space_header *header; 977 struct extent_buffer *leaf; 978 int ret; 979 980 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 981 key.offset = offset; 982 key.type = 0; 983 984 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 985 if (ret < 0) { 986 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 987 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 988 goto fail; 989 } 990 leaf = path->nodes[0]; 991 if (ret > 0) { 992 struct btrfs_key found_key; 993 ASSERT(path->slots[0]); 994 path->slots[0]--; 995 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 996 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 997 found_key.offset != offset) { 998 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 999 inode->i_size - 1, 1000 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1001 NULL); 1002 btrfs_release_path(path); 1003 goto fail; 1004 } 1005 } 1006 1007 BTRFS_I(inode)->generation = trans->transid; 1008 header = btrfs_item_ptr(leaf, path->slots[0], 1009 struct btrfs_free_space_header); 1010 btrfs_set_free_space_entries(leaf, header, entries); 1011 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1012 btrfs_set_free_space_generation(leaf, header, trans->transid); 1013 btrfs_mark_buffer_dirty(leaf); 1014 btrfs_release_path(path); 1015 1016 return 0; 1017 1018 fail: 1019 return -1; 1020 } 1021 1022 static noinline_for_stack int 1023 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1024 struct btrfs_block_group_cache *block_group, 1025 struct btrfs_io_ctl *io_ctl, 1026 int *entries) 1027 { 1028 u64 start, extent_start, extent_end, len; 1029 struct extent_io_tree *unpin = NULL; 1030 int ret; 1031 1032 if (!block_group) 1033 return 0; 1034 1035 /* 1036 * We want to add any pinned extents to our free space cache 1037 * so we don't leak the space 1038 * 1039 * We shouldn't have switched the pinned extents yet so this is the 1040 * right one 1041 */ 1042 unpin = fs_info->pinned_extents; 1043 1044 start = block_group->key.objectid; 1045 1046 while (start < block_group->key.objectid + block_group->key.offset) { 1047 ret = find_first_extent_bit(unpin, start, 1048 &extent_start, &extent_end, 1049 EXTENT_DIRTY, NULL); 1050 if (ret) 1051 return 0; 1052 1053 /* This pinned extent is out of our range */ 1054 if (extent_start >= block_group->key.objectid + 1055 block_group->key.offset) 1056 return 0; 1057 1058 extent_start = max(extent_start, start); 1059 extent_end = min(block_group->key.objectid + 1060 block_group->key.offset, extent_end + 1); 1061 len = extent_end - extent_start; 1062 1063 *entries += 1; 1064 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1065 if (ret) 1066 return -ENOSPC; 1067 1068 start = extent_end; 1069 } 1070 1071 return 0; 1072 } 1073 1074 static noinline_for_stack int 1075 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1076 { 1077 struct btrfs_free_space *entry, *next; 1078 int ret; 1079 1080 /* Write out the bitmaps */ 1081 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1082 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1083 if (ret) 1084 return -ENOSPC; 1085 list_del_init(&entry->list); 1086 } 1087 1088 return 0; 1089 } 1090 1091 static int flush_dirty_cache(struct inode *inode) 1092 { 1093 int ret; 1094 1095 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1096 if (ret) 1097 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1098 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1099 1100 return ret; 1101 } 1102 1103 static void noinline_for_stack 1104 cleanup_bitmap_list(struct list_head *bitmap_list) 1105 { 1106 struct btrfs_free_space *entry, *next; 1107 1108 list_for_each_entry_safe(entry, next, bitmap_list, list) 1109 list_del_init(&entry->list); 1110 } 1111 1112 static void noinline_for_stack 1113 cleanup_write_cache_enospc(struct inode *inode, 1114 struct btrfs_io_ctl *io_ctl, 1115 struct extent_state **cached_state) 1116 { 1117 io_ctl_drop_pages(io_ctl); 1118 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1119 i_size_read(inode) - 1, cached_state); 1120 } 1121 1122 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1123 struct btrfs_trans_handle *trans, 1124 struct btrfs_block_group_cache *block_group, 1125 struct btrfs_io_ctl *io_ctl, 1126 struct btrfs_path *path, u64 offset) 1127 { 1128 int ret; 1129 struct inode *inode = io_ctl->inode; 1130 1131 if (!inode) 1132 return 0; 1133 1134 /* Flush the dirty pages in the cache file. */ 1135 ret = flush_dirty_cache(inode); 1136 if (ret) 1137 goto out; 1138 1139 /* Update the cache item to tell everyone this cache file is valid. */ 1140 ret = update_cache_item(trans, root, inode, path, offset, 1141 io_ctl->entries, io_ctl->bitmaps); 1142 out: 1143 io_ctl_free(io_ctl); 1144 if (ret) { 1145 invalidate_inode_pages2(inode->i_mapping); 1146 BTRFS_I(inode)->generation = 0; 1147 if (block_group) { 1148 #ifdef DEBUG 1149 btrfs_err(root->fs_info, 1150 "failed to write free space cache for block group %llu", 1151 block_group->key.objectid); 1152 #endif 1153 } 1154 } 1155 btrfs_update_inode(trans, root, inode); 1156 1157 if (block_group) { 1158 /* the dirty list is protected by the dirty_bgs_lock */ 1159 spin_lock(&trans->transaction->dirty_bgs_lock); 1160 1161 /* the disk_cache_state is protected by the block group lock */ 1162 spin_lock(&block_group->lock); 1163 1164 /* 1165 * only mark this as written if we didn't get put back on 1166 * the dirty list while waiting for IO. Otherwise our 1167 * cache state won't be right, and we won't get written again 1168 */ 1169 if (!ret && list_empty(&block_group->dirty_list)) 1170 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1171 else if (ret) 1172 block_group->disk_cache_state = BTRFS_DC_ERROR; 1173 1174 spin_unlock(&block_group->lock); 1175 spin_unlock(&trans->transaction->dirty_bgs_lock); 1176 io_ctl->inode = NULL; 1177 iput(inode); 1178 } 1179 1180 return ret; 1181 1182 } 1183 1184 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1185 struct btrfs_trans_handle *trans, 1186 struct btrfs_io_ctl *io_ctl, 1187 struct btrfs_path *path) 1188 { 1189 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1190 } 1191 1192 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1193 struct btrfs_block_group_cache *block_group, 1194 struct btrfs_path *path) 1195 { 1196 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1197 block_group, &block_group->io_ctl, 1198 path, block_group->key.objectid); 1199 } 1200 1201 /** 1202 * __btrfs_write_out_cache - write out cached info to an inode 1203 * @root - the root the inode belongs to 1204 * @ctl - the free space cache we are going to write out 1205 * @block_group - the block_group for this cache if it belongs to a block_group 1206 * @trans - the trans handle 1207 * 1208 * This function writes out a free space cache struct to disk for quick recovery 1209 * on mount. This will return 0 if it was successful in writing the cache out, 1210 * or an errno if it was not. 1211 */ 1212 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1213 struct btrfs_free_space_ctl *ctl, 1214 struct btrfs_block_group_cache *block_group, 1215 struct btrfs_io_ctl *io_ctl, 1216 struct btrfs_trans_handle *trans) 1217 { 1218 struct btrfs_fs_info *fs_info = root->fs_info; 1219 struct extent_state *cached_state = NULL; 1220 LIST_HEAD(bitmap_list); 1221 int entries = 0; 1222 int bitmaps = 0; 1223 int ret; 1224 int must_iput = 0; 1225 1226 if (!i_size_read(inode)) 1227 return -EIO; 1228 1229 WARN_ON(io_ctl->pages); 1230 ret = io_ctl_init(io_ctl, inode, 1); 1231 if (ret) 1232 return ret; 1233 1234 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1235 down_write(&block_group->data_rwsem); 1236 spin_lock(&block_group->lock); 1237 if (block_group->delalloc_bytes) { 1238 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1239 spin_unlock(&block_group->lock); 1240 up_write(&block_group->data_rwsem); 1241 BTRFS_I(inode)->generation = 0; 1242 ret = 0; 1243 must_iput = 1; 1244 goto out; 1245 } 1246 spin_unlock(&block_group->lock); 1247 } 1248 1249 /* Lock all pages first so we can lock the extent safely. */ 1250 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1251 if (ret) 1252 goto out_unlock; 1253 1254 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1255 &cached_state); 1256 1257 io_ctl_set_generation(io_ctl, trans->transid); 1258 1259 mutex_lock(&ctl->cache_writeout_mutex); 1260 /* Write out the extent entries in the free space cache */ 1261 spin_lock(&ctl->tree_lock); 1262 ret = write_cache_extent_entries(io_ctl, ctl, 1263 block_group, &entries, &bitmaps, 1264 &bitmap_list); 1265 if (ret) 1266 goto out_nospc_locked; 1267 1268 /* 1269 * Some spaces that are freed in the current transaction are pinned, 1270 * they will be added into free space cache after the transaction is 1271 * committed, we shouldn't lose them. 1272 * 1273 * If this changes while we are working we'll get added back to 1274 * the dirty list and redo it. No locking needed 1275 */ 1276 ret = write_pinned_extent_entries(fs_info, block_group, 1277 io_ctl, &entries); 1278 if (ret) 1279 goto out_nospc_locked; 1280 1281 /* 1282 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1283 * locked while doing it because a concurrent trim can be manipulating 1284 * or freeing the bitmap. 1285 */ 1286 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1287 spin_unlock(&ctl->tree_lock); 1288 mutex_unlock(&ctl->cache_writeout_mutex); 1289 if (ret) 1290 goto out_nospc; 1291 1292 /* Zero out the rest of the pages just to make sure */ 1293 io_ctl_zero_remaining_pages(io_ctl); 1294 1295 /* Everything is written out, now we dirty the pages in the file. */ 1296 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1297 i_size_read(inode), &cached_state); 1298 if (ret) 1299 goto out_nospc; 1300 1301 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1302 up_write(&block_group->data_rwsem); 1303 /* 1304 * Release the pages and unlock the extent, we will flush 1305 * them out later 1306 */ 1307 io_ctl_drop_pages(io_ctl); 1308 1309 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1310 i_size_read(inode) - 1, &cached_state); 1311 1312 /* 1313 * at this point the pages are under IO and we're happy, 1314 * The caller is responsible for waiting on them and updating the 1315 * the cache and the inode 1316 */ 1317 io_ctl->entries = entries; 1318 io_ctl->bitmaps = bitmaps; 1319 1320 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1321 if (ret) 1322 goto out; 1323 1324 return 0; 1325 1326 out: 1327 io_ctl->inode = NULL; 1328 io_ctl_free(io_ctl); 1329 if (ret) { 1330 invalidate_inode_pages2(inode->i_mapping); 1331 BTRFS_I(inode)->generation = 0; 1332 } 1333 btrfs_update_inode(trans, root, inode); 1334 if (must_iput) 1335 iput(inode); 1336 return ret; 1337 1338 out_nospc_locked: 1339 cleanup_bitmap_list(&bitmap_list); 1340 spin_unlock(&ctl->tree_lock); 1341 mutex_unlock(&ctl->cache_writeout_mutex); 1342 1343 out_nospc: 1344 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1345 1346 out_unlock: 1347 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1348 up_write(&block_group->data_rwsem); 1349 1350 goto out; 1351 } 1352 1353 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1354 struct btrfs_trans_handle *trans, 1355 struct btrfs_block_group_cache *block_group, 1356 struct btrfs_path *path) 1357 { 1358 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1359 struct inode *inode; 1360 int ret = 0; 1361 1362 spin_lock(&block_group->lock); 1363 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1364 spin_unlock(&block_group->lock); 1365 return 0; 1366 } 1367 spin_unlock(&block_group->lock); 1368 1369 inode = lookup_free_space_inode(fs_info, block_group, path); 1370 if (IS_ERR(inode)) 1371 return 0; 1372 1373 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1374 block_group, &block_group->io_ctl, trans); 1375 if (ret) { 1376 #ifdef DEBUG 1377 btrfs_err(fs_info, 1378 "failed to write free space cache for block group %llu", 1379 block_group->key.objectid); 1380 #endif 1381 spin_lock(&block_group->lock); 1382 block_group->disk_cache_state = BTRFS_DC_ERROR; 1383 spin_unlock(&block_group->lock); 1384 1385 block_group->io_ctl.inode = NULL; 1386 iput(inode); 1387 } 1388 1389 /* 1390 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1391 * to wait for IO and put the inode 1392 */ 1393 1394 return ret; 1395 } 1396 1397 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1398 u64 offset) 1399 { 1400 ASSERT(offset >= bitmap_start); 1401 offset -= bitmap_start; 1402 return (unsigned long)(div_u64(offset, unit)); 1403 } 1404 1405 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1406 { 1407 return (unsigned long)(div_u64(bytes, unit)); 1408 } 1409 1410 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1411 u64 offset) 1412 { 1413 u64 bitmap_start; 1414 u64 bytes_per_bitmap; 1415 1416 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1417 bitmap_start = offset - ctl->start; 1418 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1419 bitmap_start *= bytes_per_bitmap; 1420 bitmap_start += ctl->start; 1421 1422 return bitmap_start; 1423 } 1424 1425 static int tree_insert_offset(struct rb_root *root, u64 offset, 1426 struct rb_node *node, int bitmap) 1427 { 1428 struct rb_node **p = &root->rb_node; 1429 struct rb_node *parent = NULL; 1430 struct btrfs_free_space *info; 1431 1432 while (*p) { 1433 parent = *p; 1434 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1435 1436 if (offset < info->offset) { 1437 p = &(*p)->rb_left; 1438 } else if (offset > info->offset) { 1439 p = &(*p)->rb_right; 1440 } else { 1441 /* 1442 * we could have a bitmap entry and an extent entry 1443 * share the same offset. If this is the case, we want 1444 * the extent entry to always be found first if we do a 1445 * linear search through the tree, since we want to have 1446 * the quickest allocation time, and allocating from an 1447 * extent is faster than allocating from a bitmap. So 1448 * if we're inserting a bitmap and we find an entry at 1449 * this offset, we want to go right, or after this entry 1450 * logically. If we are inserting an extent and we've 1451 * found a bitmap, we want to go left, or before 1452 * logically. 1453 */ 1454 if (bitmap) { 1455 if (info->bitmap) { 1456 WARN_ON_ONCE(1); 1457 return -EEXIST; 1458 } 1459 p = &(*p)->rb_right; 1460 } else { 1461 if (!info->bitmap) { 1462 WARN_ON_ONCE(1); 1463 return -EEXIST; 1464 } 1465 p = &(*p)->rb_left; 1466 } 1467 } 1468 } 1469 1470 rb_link_node(node, parent, p); 1471 rb_insert_color(node, root); 1472 1473 return 0; 1474 } 1475 1476 /* 1477 * searches the tree for the given offset. 1478 * 1479 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1480 * want a section that has at least bytes size and comes at or after the given 1481 * offset. 1482 */ 1483 static struct btrfs_free_space * 1484 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1485 u64 offset, int bitmap_only, int fuzzy) 1486 { 1487 struct rb_node *n = ctl->free_space_offset.rb_node; 1488 struct btrfs_free_space *entry, *prev = NULL; 1489 1490 /* find entry that is closest to the 'offset' */ 1491 while (1) { 1492 if (!n) { 1493 entry = NULL; 1494 break; 1495 } 1496 1497 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1498 prev = entry; 1499 1500 if (offset < entry->offset) 1501 n = n->rb_left; 1502 else if (offset > entry->offset) 1503 n = n->rb_right; 1504 else 1505 break; 1506 } 1507 1508 if (bitmap_only) { 1509 if (!entry) 1510 return NULL; 1511 if (entry->bitmap) 1512 return entry; 1513 1514 /* 1515 * bitmap entry and extent entry may share same offset, 1516 * in that case, bitmap entry comes after extent entry. 1517 */ 1518 n = rb_next(n); 1519 if (!n) 1520 return NULL; 1521 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1522 if (entry->offset != offset) 1523 return NULL; 1524 1525 WARN_ON(!entry->bitmap); 1526 return entry; 1527 } else if (entry) { 1528 if (entry->bitmap) { 1529 /* 1530 * if previous extent entry covers the offset, 1531 * we should return it instead of the bitmap entry 1532 */ 1533 n = rb_prev(&entry->offset_index); 1534 if (n) { 1535 prev = rb_entry(n, struct btrfs_free_space, 1536 offset_index); 1537 if (!prev->bitmap && 1538 prev->offset + prev->bytes > offset) 1539 entry = prev; 1540 } 1541 } 1542 return entry; 1543 } 1544 1545 if (!prev) 1546 return NULL; 1547 1548 /* find last entry before the 'offset' */ 1549 entry = prev; 1550 if (entry->offset > offset) { 1551 n = rb_prev(&entry->offset_index); 1552 if (n) { 1553 entry = rb_entry(n, struct btrfs_free_space, 1554 offset_index); 1555 ASSERT(entry->offset <= offset); 1556 } else { 1557 if (fuzzy) 1558 return entry; 1559 else 1560 return NULL; 1561 } 1562 } 1563 1564 if (entry->bitmap) { 1565 n = rb_prev(&entry->offset_index); 1566 if (n) { 1567 prev = rb_entry(n, struct btrfs_free_space, 1568 offset_index); 1569 if (!prev->bitmap && 1570 prev->offset + prev->bytes > offset) 1571 return prev; 1572 } 1573 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1574 return entry; 1575 } else if (entry->offset + entry->bytes > offset) 1576 return entry; 1577 1578 if (!fuzzy) 1579 return NULL; 1580 1581 while (1) { 1582 if (entry->bitmap) { 1583 if (entry->offset + BITS_PER_BITMAP * 1584 ctl->unit > offset) 1585 break; 1586 } else { 1587 if (entry->offset + entry->bytes > offset) 1588 break; 1589 } 1590 1591 n = rb_next(&entry->offset_index); 1592 if (!n) 1593 return NULL; 1594 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1595 } 1596 return entry; 1597 } 1598 1599 static inline void 1600 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1601 struct btrfs_free_space *info) 1602 { 1603 rb_erase(&info->offset_index, &ctl->free_space_offset); 1604 ctl->free_extents--; 1605 } 1606 1607 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1608 struct btrfs_free_space *info) 1609 { 1610 __unlink_free_space(ctl, info); 1611 ctl->free_space -= info->bytes; 1612 } 1613 1614 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1615 struct btrfs_free_space *info) 1616 { 1617 int ret = 0; 1618 1619 ASSERT(info->bytes || info->bitmap); 1620 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1621 &info->offset_index, (info->bitmap != NULL)); 1622 if (ret) 1623 return ret; 1624 1625 ctl->free_space += info->bytes; 1626 ctl->free_extents++; 1627 return ret; 1628 } 1629 1630 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1631 { 1632 struct btrfs_block_group_cache *block_group = ctl->private; 1633 u64 max_bytes; 1634 u64 bitmap_bytes; 1635 u64 extent_bytes; 1636 u64 size = block_group->key.offset; 1637 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1638 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1639 1640 max_bitmaps = max_t(u64, max_bitmaps, 1); 1641 1642 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1643 1644 /* 1645 * The goal is to keep the total amount of memory used per 1gb of space 1646 * at or below 32k, so we need to adjust how much memory we allow to be 1647 * used by extent based free space tracking 1648 */ 1649 if (size < SZ_1G) 1650 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1651 else 1652 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1653 1654 /* 1655 * we want to account for 1 more bitmap than what we have so we can make 1656 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1657 * we add more bitmaps. 1658 */ 1659 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1660 1661 if (bitmap_bytes >= max_bytes) { 1662 ctl->extents_thresh = 0; 1663 return; 1664 } 1665 1666 /* 1667 * we want the extent entry threshold to always be at most 1/2 the max 1668 * bytes we can have, or whatever is less than that. 1669 */ 1670 extent_bytes = max_bytes - bitmap_bytes; 1671 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1672 1673 ctl->extents_thresh = 1674 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1675 } 1676 1677 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1678 struct btrfs_free_space *info, 1679 u64 offset, u64 bytes) 1680 { 1681 unsigned long start, count; 1682 1683 start = offset_to_bit(info->offset, ctl->unit, offset); 1684 count = bytes_to_bits(bytes, ctl->unit); 1685 ASSERT(start + count <= BITS_PER_BITMAP); 1686 1687 bitmap_clear(info->bitmap, start, count); 1688 1689 info->bytes -= bytes; 1690 if (info->max_extent_size > ctl->unit) 1691 info->max_extent_size = 0; 1692 } 1693 1694 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1695 struct btrfs_free_space *info, u64 offset, 1696 u64 bytes) 1697 { 1698 __bitmap_clear_bits(ctl, info, offset, bytes); 1699 ctl->free_space -= bytes; 1700 } 1701 1702 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1703 struct btrfs_free_space *info, u64 offset, 1704 u64 bytes) 1705 { 1706 unsigned long start, count; 1707 1708 start = offset_to_bit(info->offset, ctl->unit, offset); 1709 count = bytes_to_bits(bytes, ctl->unit); 1710 ASSERT(start + count <= BITS_PER_BITMAP); 1711 1712 bitmap_set(info->bitmap, start, count); 1713 1714 info->bytes += bytes; 1715 ctl->free_space += bytes; 1716 } 1717 1718 /* 1719 * If we can not find suitable extent, we will use bytes to record 1720 * the size of the max extent. 1721 */ 1722 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1723 struct btrfs_free_space *bitmap_info, u64 *offset, 1724 u64 *bytes, bool for_alloc) 1725 { 1726 unsigned long found_bits = 0; 1727 unsigned long max_bits = 0; 1728 unsigned long bits, i; 1729 unsigned long next_zero; 1730 unsigned long extent_bits; 1731 1732 /* 1733 * Skip searching the bitmap if we don't have a contiguous section that 1734 * is large enough for this allocation. 1735 */ 1736 if (for_alloc && 1737 bitmap_info->max_extent_size && 1738 bitmap_info->max_extent_size < *bytes) { 1739 *bytes = bitmap_info->max_extent_size; 1740 return -1; 1741 } 1742 1743 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1744 max_t(u64, *offset, bitmap_info->offset)); 1745 bits = bytes_to_bits(*bytes, ctl->unit); 1746 1747 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1748 if (for_alloc && bits == 1) { 1749 found_bits = 1; 1750 break; 1751 } 1752 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1753 BITS_PER_BITMAP, i); 1754 extent_bits = next_zero - i; 1755 if (extent_bits >= bits) { 1756 found_bits = extent_bits; 1757 break; 1758 } else if (extent_bits > max_bits) { 1759 max_bits = extent_bits; 1760 } 1761 i = next_zero; 1762 } 1763 1764 if (found_bits) { 1765 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1766 *bytes = (u64)(found_bits) * ctl->unit; 1767 return 0; 1768 } 1769 1770 *bytes = (u64)(max_bits) * ctl->unit; 1771 bitmap_info->max_extent_size = *bytes; 1772 return -1; 1773 } 1774 1775 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1776 { 1777 if (entry->bitmap) 1778 return entry->max_extent_size; 1779 return entry->bytes; 1780 } 1781 1782 /* Cache the size of the max extent in bytes */ 1783 static struct btrfs_free_space * 1784 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1785 unsigned long align, u64 *max_extent_size) 1786 { 1787 struct btrfs_free_space *entry; 1788 struct rb_node *node; 1789 u64 tmp; 1790 u64 align_off; 1791 int ret; 1792 1793 if (!ctl->free_space_offset.rb_node) 1794 goto out; 1795 1796 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1797 if (!entry) 1798 goto out; 1799 1800 for (node = &entry->offset_index; node; node = rb_next(node)) { 1801 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1802 if (entry->bytes < *bytes) { 1803 *max_extent_size = max(get_max_extent_size(entry), 1804 *max_extent_size); 1805 continue; 1806 } 1807 1808 /* make sure the space returned is big enough 1809 * to match our requested alignment 1810 */ 1811 if (*bytes >= align) { 1812 tmp = entry->offset - ctl->start + align - 1; 1813 tmp = div64_u64(tmp, align); 1814 tmp = tmp * align + ctl->start; 1815 align_off = tmp - entry->offset; 1816 } else { 1817 align_off = 0; 1818 tmp = entry->offset; 1819 } 1820 1821 if (entry->bytes < *bytes + align_off) { 1822 *max_extent_size = max(get_max_extent_size(entry), 1823 *max_extent_size); 1824 continue; 1825 } 1826 1827 if (entry->bitmap) { 1828 u64 size = *bytes; 1829 1830 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1831 if (!ret) { 1832 *offset = tmp; 1833 *bytes = size; 1834 return entry; 1835 } else { 1836 *max_extent_size = 1837 max(get_max_extent_size(entry), 1838 *max_extent_size); 1839 } 1840 continue; 1841 } 1842 1843 *offset = tmp; 1844 *bytes = entry->bytes - align_off; 1845 return entry; 1846 } 1847 out: 1848 return NULL; 1849 } 1850 1851 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1852 struct btrfs_free_space *info, u64 offset) 1853 { 1854 info->offset = offset_to_bitmap(ctl, offset); 1855 info->bytes = 0; 1856 INIT_LIST_HEAD(&info->list); 1857 link_free_space(ctl, info); 1858 ctl->total_bitmaps++; 1859 1860 ctl->op->recalc_thresholds(ctl); 1861 } 1862 1863 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1864 struct btrfs_free_space *bitmap_info) 1865 { 1866 unlink_free_space(ctl, bitmap_info); 1867 kfree(bitmap_info->bitmap); 1868 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1869 ctl->total_bitmaps--; 1870 ctl->op->recalc_thresholds(ctl); 1871 } 1872 1873 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1874 struct btrfs_free_space *bitmap_info, 1875 u64 *offset, u64 *bytes) 1876 { 1877 u64 end; 1878 u64 search_start, search_bytes; 1879 int ret; 1880 1881 again: 1882 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1883 1884 /* 1885 * We need to search for bits in this bitmap. We could only cover some 1886 * of the extent in this bitmap thanks to how we add space, so we need 1887 * to search for as much as it as we can and clear that amount, and then 1888 * go searching for the next bit. 1889 */ 1890 search_start = *offset; 1891 search_bytes = ctl->unit; 1892 search_bytes = min(search_bytes, end - search_start + 1); 1893 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1894 false); 1895 if (ret < 0 || search_start != *offset) 1896 return -EINVAL; 1897 1898 /* We may have found more bits than what we need */ 1899 search_bytes = min(search_bytes, *bytes); 1900 1901 /* Cannot clear past the end of the bitmap */ 1902 search_bytes = min(search_bytes, end - search_start + 1); 1903 1904 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1905 *offset += search_bytes; 1906 *bytes -= search_bytes; 1907 1908 if (*bytes) { 1909 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1910 if (!bitmap_info->bytes) 1911 free_bitmap(ctl, bitmap_info); 1912 1913 /* 1914 * no entry after this bitmap, but we still have bytes to 1915 * remove, so something has gone wrong. 1916 */ 1917 if (!next) 1918 return -EINVAL; 1919 1920 bitmap_info = rb_entry(next, struct btrfs_free_space, 1921 offset_index); 1922 1923 /* 1924 * if the next entry isn't a bitmap we need to return to let the 1925 * extent stuff do its work. 1926 */ 1927 if (!bitmap_info->bitmap) 1928 return -EAGAIN; 1929 1930 /* 1931 * Ok the next item is a bitmap, but it may not actually hold 1932 * the information for the rest of this free space stuff, so 1933 * look for it, and if we don't find it return so we can try 1934 * everything over again. 1935 */ 1936 search_start = *offset; 1937 search_bytes = ctl->unit; 1938 ret = search_bitmap(ctl, bitmap_info, &search_start, 1939 &search_bytes, false); 1940 if (ret < 0 || search_start != *offset) 1941 return -EAGAIN; 1942 1943 goto again; 1944 } else if (!bitmap_info->bytes) 1945 free_bitmap(ctl, bitmap_info); 1946 1947 return 0; 1948 } 1949 1950 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1951 struct btrfs_free_space *info, u64 offset, 1952 u64 bytes) 1953 { 1954 u64 bytes_to_set = 0; 1955 u64 end; 1956 1957 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1958 1959 bytes_to_set = min(end - offset, bytes); 1960 1961 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1962 1963 /* 1964 * We set some bytes, we have no idea what the max extent size is 1965 * anymore. 1966 */ 1967 info->max_extent_size = 0; 1968 1969 return bytes_to_set; 1970 1971 } 1972 1973 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1974 struct btrfs_free_space *info) 1975 { 1976 struct btrfs_block_group_cache *block_group = ctl->private; 1977 struct btrfs_fs_info *fs_info = block_group->fs_info; 1978 bool forced = false; 1979 1980 #ifdef CONFIG_BTRFS_DEBUG 1981 if (btrfs_should_fragment_free_space(block_group)) 1982 forced = true; 1983 #endif 1984 1985 /* 1986 * If we are below the extents threshold then we can add this as an 1987 * extent, and don't have to deal with the bitmap 1988 */ 1989 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1990 /* 1991 * If this block group has some small extents we don't want to 1992 * use up all of our free slots in the cache with them, we want 1993 * to reserve them to larger extents, however if we have plenty 1994 * of cache left then go ahead an dadd them, no sense in adding 1995 * the overhead of a bitmap if we don't have to. 1996 */ 1997 if (info->bytes <= fs_info->sectorsize * 4) { 1998 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1999 return false; 2000 } else { 2001 return false; 2002 } 2003 } 2004 2005 /* 2006 * The original block groups from mkfs can be really small, like 8 2007 * megabytes, so don't bother with a bitmap for those entries. However 2008 * some block groups can be smaller than what a bitmap would cover but 2009 * are still large enough that they could overflow the 32k memory limit, 2010 * so allow those block groups to still be allowed to have a bitmap 2011 * entry. 2012 */ 2013 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2014 return false; 2015 2016 return true; 2017 } 2018 2019 static const struct btrfs_free_space_op free_space_op = { 2020 .recalc_thresholds = recalculate_thresholds, 2021 .use_bitmap = use_bitmap, 2022 }; 2023 2024 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2025 struct btrfs_free_space *info) 2026 { 2027 struct btrfs_free_space *bitmap_info; 2028 struct btrfs_block_group_cache *block_group = NULL; 2029 int added = 0; 2030 u64 bytes, offset, bytes_added; 2031 int ret; 2032 2033 bytes = info->bytes; 2034 offset = info->offset; 2035 2036 if (!ctl->op->use_bitmap(ctl, info)) 2037 return 0; 2038 2039 if (ctl->op == &free_space_op) 2040 block_group = ctl->private; 2041 again: 2042 /* 2043 * Since we link bitmaps right into the cluster we need to see if we 2044 * have a cluster here, and if so and it has our bitmap we need to add 2045 * the free space to that bitmap. 2046 */ 2047 if (block_group && !list_empty(&block_group->cluster_list)) { 2048 struct btrfs_free_cluster *cluster; 2049 struct rb_node *node; 2050 struct btrfs_free_space *entry; 2051 2052 cluster = list_entry(block_group->cluster_list.next, 2053 struct btrfs_free_cluster, 2054 block_group_list); 2055 spin_lock(&cluster->lock); 2056 node = rb_first(&cluster->root); 2057 if (!node) { 2058 spin_unlock(&cluster->lock); 2059 goto no_cluster_bitmap; 2060 } 2061 2062 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2063 if (!entry->bitmap) { 2064 spin_unlock(&cluster->lock); 2065 goto no_cluster_bitmap; 2066 } 2067 2068 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2069 bytes_added = add_bytes_to_bitmap(ctl, entry, 2070 offset, bytes); 2071 bytes -= bytes_added; 2072 offset += bytes_added; 2073 } 2074 spin_unlock(&cluster->lock); 2075 if (!bytes) { 2076 ret = 1; 2077 goto out; 2078 } 2079 } 2080 2081 no_cluster_bitmap: 2082 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2083 1, 0); 2084 if (!bitmap_info) { 2085 ASSERT(added == 0); 2086 goto new_bitmap; 2087 } 2088 2089 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2090 bytes -= bytes_added; 2091 offset += bytes_added; 2092 added = 0; 2093 2094 if (!bytes) { 2095 ret = 1; 2096 goto out; 2097 } else 2098 goto again; 2099 2100 new_bitmap: 2101 if (info && info->bitmap) { 2102 add_new_bitmap(ctl, info, offset); 2103 added = 1; 2104 info = NULL; 2105 goto again; 2106 } else { 2107 spin_unlock(&ctl->tree_lock); 2108 2109 /* no pre-allocated info, allocate a new one */ 2110 if (!info) { 2111 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2112 GFP_NOFS); 2113 if (!info) { 2114 spin_lock(&ctl->tree_lock); 2115 ret = -ENOMEM; 2116 goto out; 2117 } 2118 } 2119 2120 /* allocate the bitmap */ 2121 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2122 spin_lock(&ctl->tree_lock); 2123 if (!info->bitmap) { 2124 ret = -ENOMEM; 2125 goto out; 2126 } 2127 goto again; 2128 } 2129 2130 out: 2131 if (info) { 2132 kfree(info->bitmap); 2133 kmem_cache_free(btrfs_free_space_cachep, info); 2134 } 2135 2136 return ret; 2137 } 2138 2139 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2140 struct btrfs_free_space *info, bool update_stat) 2141 { 2142 struct btrfs_free_space *left_info; 2143 struct btrfs_free_space *right_info; 2144 bool merged = false; 2145 u64 offset = info->offset; 2146 u64 bytes = info->bytes; 2147 2148 /* 2149 * first we want to see if there is free space adjacent to the range we 2150 * are adding, if there is remove that struct and add a new one to 2151 * cover the entire range 2152 */ 2153 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2154 if (right_info && rb_prev(&right_info->offset_index)) 2155 left_info = rb_entry(rb_prev(&right_info->offset_index), 2156 struct btrfs_free_space, offset_index); 2157 else 2158 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2159 2160 if (right_info && !right_info->bitmap) { 2161 if (update_stat) 2162 unlink_free_space(ctl, right_info); 2163 else 2164 __unlink_free_space(ctl, right_info); 2165 info->bytes += right_info->bytes; 2166 kmem_cache_free(btrfs_free_space_cachep, right_info); 2167 merged = true; 2168 } 2169 2170 if (left_info && !left_info->bitmap && 2171 left_info->offset + left_info->bytes == offset) { 2172 if (update_stat) 2173 unlink_free_space(ctl, left_info); 2174 else 2175 __unlink_free_space(ctl, left_info); 2176 info->offset = left_info->offset; 2177 info->bytes += left_info->bytes; 2178 kmem_cache_free(btrfs_free_space_cachep, left_info); 2179 merged = true; 2180 } 2181 2182 return merged; 2183 } 2184 2185 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2186 struct btrfs_free_space *info, 2187 bool update_stat) 2188 { 2189 struct btrfs_free_space *bitmap; 2190 unsigned long i; 2191 unsigned long j; 2192 const u64 end = info->offset + info->bytes; 2193 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2194 u64 bytes; 2195 2196 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2197 if (!bitmap) 2198 return false; 2199 2200 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2201 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2202 if (j == i) 2203 return false; 2204 bytes = (j - i) * ctl->unit; 2205 info->bytes += bytes; 2206 2207 if (update_stat) 2208 bitmap_clear_bits(ctl, bitmap, end, bytes); 2209 else 2210 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2211 2212 if (!bitmap->bytes) 2213 free_bitmap(ctl, bitmap); 2214 2215 return true; 2216 } 2217 2218 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2219 struct btrfs_free_space *info, 2220 bool update_stat) 2221 { 2222 struct btrfs_free_space *bitmap; 2223 u64 bitmap_offset; 2224 unsigned long i; 2225 unsigned long j; 2226 unsigned long prev_j; 2227 u64 bytes; 2228 2229 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2230 /* If we're on a boundary, try the previous logical bitmap. */ 2231 if (bitmap_offset == info->offset) { 2232 if (info->offset == 0) 2233 return false; 2234 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2235 } 2236 2237 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2238 if (!bitmap) 2239 return false; 2240 2241 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2242 j = 0; 2243 prev_j = (unsigned long)-1; 2244 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2245 if (j > i) 2246 break; 2247 prev_j = j; 2248 } 2249 if (prev_j == i) 2250 return false; 2251 2252 if (prev_j == (unsigned long)-1) 2253 bytes = (i + 1) * ctl->unit; 2254 else 2255 bytes = (i - prev_j) * ctl->unit; 2256 2257 info->offset -= bytes; 2258 info->bytes += bytes; 2259 2260 if (update_stat) 2261 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2262 else 2263 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2264 2265 if (!bitmap->bytes) 2266 free_bitmap(ctl, bitmap); 2267 2268 return true; 2269 } 2270 2271 /* 2272 * We prefer always to allocate from extent entries, both for clustered and 2273 * non-clustered allocation requests. So when attempting to add a new extent 2274 * entry, try to see if there's adjacent free space in bitmap entries, and if 2275 * there is, migrate that space from the bitmaps to the extent. 2276 * Like this we get better chances of satisfying space allocation requests 2277 * because we attempt to satisfy them based on a single cache entry, and never 2278 * on 2 or more entries - even if the entries represent a contiguous free space 2279 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2280 * ends). 2281 */ 2282 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2283 struct btrfs_free_space *info, 2284 bool update_stat) 2285 { 2286 /* 2287 * Only work with disconnected entries, as we can change their offset, 2288 * and must be extent entries. 2289 */ 2290 ASSERT(!info->bitmap); 2291 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2292 2293 if (ctl->total_bitmaps > 0) { 2294 bool stole_end; 2295 bool stole_front = false; 2296 2297 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2298 if (ctl->total_bitmaps > 0) 2299 stole_front = steal_from_bitmap_to_front(ctl, info, 2300 update_stat); 2301 2302 if (stole_end || stole_front) 2303 try_merge_free_space(ctl, info, update_stat); 2304 } 2305 } 2306 2307 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2308 struct btrfs_free_space_ctl *ctl, 2309 u64 offset, u64 bytes) 2310 { 2311 struct btrfs_free_space *info; 2312 int ret = 0; 2313 2314 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2315 if (!info) 2316 return -ENOMEM; 2317 2318 info->offset = offset; 2319 info->bytes = bytes; 2320 RB_CLEAR_NODE(&info->offset_index); 2321 2322 spin_lock(&ctl->tree_lock); 2323 2324 if (try_merge_free_space(ctl, info, true)) 2325 goto link; 2326 2327 /* 2328 * There was no extent directly to the left or right of this new 2329 * extent then we know we're going to have to allocate a new extent, so 2330 * before we do that see if we need to drop this into a bitmap 2331 */ 2332 ret = insert_into_bitmap(ctl, info); 2333 if (ret < 0) { 2334 goto out; 2335 } else if (ret) { 2336 ret = 0; 2337 goto out; 2338 } 2339 link: 2340 /* 2341 * Only steal free space from adjacent bitmaps if we're sure we're not 2342 * going to add the new free space to existing bitmap entries - because 2343 * that would mean unnecessary work that would be reverted. Therefore 2344 * attempt to steal space from bitmaps if we're adding an extent entry. 2345 */ 2346 steal_from_bitmap(ctl, info, true); 2347 2348 ret = link_free_space(ctl, info); 2349 if (ret) 2350 kmem_cache_free(btrfs_free_space_cachep, info); 2351 out: 2352 spin_unlock(&ctl->tree_lock); 2353 2354 if (ret) { 2355 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2356 ASSERT(ret != -EEXIST); 2357 } 2358 2359 return ret; 2360 } 2361 2362 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2363 u64 offset, u64 bytes) 2364 { 2365 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2366 struct btrfs_free_space *info; 2367 int ret; 2368 bool re_search = false; 2369 2370 spin_lock(&ctl->tree_lock); 2371 2372 again: 2373 ret = 0; 2374 if (!bytes) 2375 goto out_lock; 2376 2377 info = tree_search_offset(ctl, offset, 0, 0); 2378 if (!info) { 2379 /* 2380 * oops didn't find an extent that matched the space we wanted 2381 * to remove, look for a bitmap instead 2382 */ 2383 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2384 1, 0); 2385 if (!info) { 2386 /* 2387 * If we found a partial bit of our free space in a 2388 * bitmap but then couldn't find the other part this may 2389 * be a problem, so WARN about it. 2390 */ 2391 WARN_ON(re_search); 2392 goto out_lock; 2393 } 2394 } 2395 2396 re_search = false; 2397 if (!info->bitmap) { 2398 unlink_free_space(ctl, info); 2399 if (offset == info->offset) { 2400 u64 to_free = min(bytes, info->bytes); 2401 2402 info->bytes -= to_free; 2403 info->offset += to_free; 2404 if (info->bytes) { 2405 ret = link_free_space(ctl, info); 2406 WARN_ON(ret); 2407 } else { 2408 kmem_cache_free(btrfs_free_space_cachep, info); 2409 } 2410 2411 offset += to_free; 2412 bytes -= to_free; 2413 goto again; 2414 } else { 2415 u64 old_end = info->bytes + info->offset; 2416 2417 info->bytes = offset - info->offset; 2418 ret = link_free_space(ctl, info); 2419 WARN_ON(ret); 2420 if (ret) 2421 goto out_lock; 2422 2423 /* Not enough bytes in this entry to satisfy us */ 2424 if (old_end < offset + bytes) { 2425 bytes -= old_end - offset; 2426 offset = old_end; 2427 goto again; 2428 } else if (old_end == offset + bytes) { 2429 /* all done */ 2430 goto out_lock; 2431 } 2432 spin_unlock(&ctl->tree_lock); 2433 2434 ret = btrfs_add_free_space(block_group, offset + bytes, 2435 old_end - (offset + bytes)); 2436 WARN_ON(ret); 2437 goto out; 2438 } 2439 } 2440 2441 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2442 if (ret == -EAGAIN) { 2443 re_search = true; 2444 goto again; 2445 } 2446 out_lock: 2447 spin_unlock(&ctl->tree_lock); 2448 out: 2449 return ret; 2450 } 2451 2452 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2453 u64 bytes) 2454 { 2455 struct btrfs_fs_info *fs_info = block_group->fs_info; 2456 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2457 struct btrfs_free_space *info; 2458 struct rb_node *n; 2459 int count = 0; 2460 2461 spin_lock(&ctl->tree_lock); 2462 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2463 info = rb_entry(n, struct btrfs_free_space, offset_index); 2464 if (info->bytes >= bytes && !block_group->ro) 2465 count++; 2466 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2467 info->offset, info->bytes, 2468 (info->bitmap) ? "yes" : "no"); 2469 } 2470 spin_unlock(&ctl->tree_lock); 2471 btrfs_info(fs_info, "block group has cluster?: %s", 2472 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2473 btrfs_info(fs_info, 2474 "%d blocks of free space at or bigger than bytes is", count); 2475 } 2476 2477 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2478 { 2479 struct btrfs_fs_info *fs_info = block_group->fs_info; 2480 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2481 2482 spin_lock_init(&ctl->tree_lock); 2483 ctl->unit = fs_info->sectorsize; 2484 ctl->start = block_group->key.objectid; 2485 ctl->private = block_group; 2486 ctl->op = &free_space_op; 2487 INIT_LIST_HEAD(&ctl->trimming_ranges); 2488 mutex_init(&ctl->cache_writeout_mutex); 2489 2490 /* 2491 * we only want to have 32k of ram per block group for keeping 2492 * track of free space, and if we pass 1/2 of that we want to 2493 * start converting things over to using bitmaps 2494 */ 2495 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2496 } 2497 2498 /* 2499 * for a given cluster, put all of its extents back into the free 2500 * space cache. If the block group passed doesn't match the block group 2501 * pointed to by the cluster, someone else raced in and freed the 2502 * cluster already. In that case, we just return without changing anything 2503 */ 2504 static int 2505 __btrfs_return_cluster_to_free_space( 2506 struct btrfs_block_group_cache *block_group, 2507 struct btrfs_free_cluster *cluster) 2508 { 2509 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2510 struct btrfs_free_space *entry; 2511 struct rb_node *node; 2512 2513 spin_lock(&cluster->lock); 2514 if (cluster->block_group != block_group) 2515 goto out; 2516 2517 cluster->block_group = NULL; 2518 cluster->window_start = 0; 2519 list_del_init(&cluster->block_group_list); 2520 2521 node = rb_first(&cluster->root); 2522 while (node) { 2523 bool bitmap; 2524 2525 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2526 node = rb_next(&entry->offset_index); 2527 rb_erase(&entry->offset_index, &cluster->root); 2528 RB_CLEAR_NODE(&entry->offset_index); 2529 2530 bitmap = (entry->bitmap != NULL); 2531 if (!bitmap) { 2532 try_merge_free_space(ctl, entry, false); 2533 steal_from_bitmap(ctl, entry, false); 2534 } 2535 tree_insert_offset(&ctl->free_space_offset, 2536 entry->offset, &entry->offset_index, bitmap); 2537 } 2538 cluster->root = RB_ROOT; 2539 2540 out: 2541 spin_unlock(&cluster->lock); 2542 btrfs_put_block_group(block_group); 2543 return 0; 2544 } 2545 2546 static void __btrfs_remove_free_space_cache_locked( 2547 struct btrfs_free_space_ctl *ctl) 2548 { 2549 struct btrfs_free_space *info; 2550 struct rb_node *node; 2551 2552 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2553 info = rb_entry(node, struct btrfs_free_space, offset_index); 2554 if (!info->bitmap) { 2555 unlink_free_space(ctl, info); 2556 kmem_cache_free(btrfs_free_space_cachep, info); 2557 } else { 2558 free_bitmap(ctl, info); 2559 } 2560 2561 cond_resched_lock(&ctl->tree_lock); 2562 } 2563 } 2564 2565 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2566 { 2567 spin_lock(&ctl->tree_lock); 2568 __btrfs_remove_free_space_cache_locked(ctl); 2569 spin_unlock(&ctl->tree_lock); 2570 } 2571 2572 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2573 { 2574 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2575 struct btrfs_free_cluster *cluster; 2576 struct list_head *head; 2577 2578 spin_lock(&ctl->tree_lock); 2579 while ((head = block_group->cluster_list.next) != 2580 &block_group->cluster_list) { 2581 cluster = list_entry(head, struct btrfs_free_cluster, 2582 block_group_list); 2583 2584 WARN_ON(cluster->block_group != block_group); 2585 __btrfs_return_cluster_to_free_space(block_group, cluster); 2586 2587 cond_resched_lock(&ctl->tree_lock); 2588 } 2589 __btrfs_remove_free_space_cache_locked(ctl); 2590 spin_unlock(&ctl->tree_lock); 2591 2592 } 2593 2594 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2595 u64 offset, u64 bytes, u64 empty_size, 2596 u64 *max_extent_size) 2597 { 2598 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2599 struct btrfs_free_space *entry = NULL; 2600 u64 bytes_search = bytes + empty_size; 2601 u64 ret = 0; 2602 u64 align_gap = 0; 2603 u64 align_gap_len = 0; 2604 2605 spin_lock(&ctl->tree_lock); 2606 entry = find_free_space(ctl, &offset, &bytes_search, 2607 block_group->full_stripe_len, max_extent_size); 2608 if (!entry) 2609 goto out; 2610 2611 ret = offset; 2612 if (entry->bitmap) { 2613 bitmap_clear_bits(ctl, entry, offset, bytes); 2614 if (!entry->bytes) 2615 free_bitmap(ctl, entry); 2616 } else { 2617 unlink_free_space(ctl, entry); 2618 align_gap_len = offset - entry->offset; 2619 align_gap = entry->offset; 2620 2621 entry->offset = offset + bytes; 2622 WARN_ON(entry->bytes < bytes + align_gap_len); 2623 2624 entry->bytes -= bytes + align_gap_len; 2625 if (!entry->bytes) 2626 kmem_cache_free(btrfs_free_space_cachep, entry); 2627 else 2628 link_free_space(ctl, entry); 2629 } 2630 out: 2631 spin_unlock(&ctl->tree_lock); 2632 2633 if (align_gap_len) 2634 __btrfs_add_free_space(block_group->fs_info, ctl, 2635 align_gap, align_gap_len); 2636 return ret; 2637 } 2638 2639 /* 2640 * given a cluster, put all of its extents back into the free space 2641 * cache. If a block group is passed, this function will only free 2642 * a cluster that belongs to the passed block group. 2643 * 2644 * Otherwise, it'll get a reference on the block group pointed to by the 2645 * cluster and remove the cluster from it. 2646 */ 2647 int btrfs_return_cluster_to_free_space( 2648 struct btrfs_block_group_cache *block_group, 2649 struct btrfs_free_cluster *cluster) 2650 { 2651 struct btrfs_free_space_ctl *ctl; 2652 int ret; 2653 2654 /* first, get a safe pointer to the block group */ 2655 spin_lock(&cluster->lock); 2656 if (!block_group) { 2657 block_group = cluster->block_group; 2658 if (!block_group) { 2659 spin_unlock(&cluster->lock); 2660 return 0; 2661 } 2662 } else if (cluster->block_group != block_group) { 2663 /* someone else has already freed it don't redo their work */ 2664 spin_unlock(&cluster->lock); 2665 return 0; 2666 } 2667 atomic_inc(&block_group->count); 2668 spin_unlock(&cluster->lock); 2669 2670 ctl = block_group->free_space_ctl; 2671 2672 /* now return any extents the cluster had on it */ 2673 spin_lock(&ctl->tree_lock); 2674 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2675 spin_unlock(&ctl->tree_lock); 2676 2677 /* finally drop our ref */ 2678 btrfs_put_block_group(block_group); 2679 return ret; 2680 } 2681 2682 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2683 struct btrfs_free_cluster *cluster, 2684 struct btrfs_free_space *entry, 2685 u64 bytes, u64 min_start, 2686 u64 *max_extent_size) 2687 { 2688 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2689 int err; 2690 u64 search_start = cluster->window_start; 2691 u64 search_bytes = bytes; 2692 u64 ret = 0; 2693 2694 search_start = min_start; 2695 search_bytes = bytes; 2696 2697 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2698 if (err) { 2699 *max_extent_size = max(get_max_extent_size(entry), 2700 *max_extent_size); 2701 return 0; 2702 } 2703 2704 ret = search_start; 2705 __bitmap_clear_bits(ctl, entry, ret, bytes); 2706 2707 return ret; 2708 } 2709 2710 /* 2711 * given a cluster, try to allocate 'bytes' from it, returns 0 2712 * if it couldn't find anything suitably large, or a logical disk offset 2713 * if things worked out 2714 */ 2715 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2716 struct btrfs_free_cluster *cluster, u64 bytes, 2717 u64 min_start, u64 *max_extent_size) 2718 { 2719 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2720 struct btrfs_free_space *entry = NULL; 2721 struct rb_node *node; 2722 u64 ret = 0; 2723 2724 spin_lock(&cluster->lock); 2725 if (bytes > cluster->max_size) 2726 goto out; 2727 2728 if (cluster->block_group != block_group) 2729 goto out; 2730 2731 node = rb_first(&cluster->root); 2732 if (!node) 2733 goto out; 2734 2735 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2736 while (1) { 2737 if (entry->bytes < bytes) 2738 *max_extent_size = max(get_max_extent_size(entry), 2739 *max_extent_size); 2740 2741 if (entry->bytes < bytes || 2742 (!entry->bitmap && entry->offset < min_start)) { 2743 node = rb_next(&entry->offset_index); 2744 if (!node) 2745 break; 2746 entry = rb_entry(node, struct btrfs_free_space, 2747 offset_index); 2748 continue; 2749 } 2750 2751 if (entry->bitmap) { 2752 ret = btrfs_alloc_from_bitmap(block_group, 2753 cluster, entry, bytes, 2754 cluster->window_start, 2755 max_extent_size); 2756 if (ret == 0) { 2757 node = rb_next(&entry->offset_index); 2758 if (!node) 2759 break; 2760 entry = rb_entry(node, struct btrfs_free_space, 2761 offset_index); 2762 continue; 2763 } 2764 cluster->window_start += bytes; 2765 } else { 2766 ret = entry->offset; 2767 2768 entry->offset += bytes; 2769 entry->bytes -= bytes; 2770 } 2771 2772 if (entry->bytes == 0) 2773 rb_erase(&entry->offset_index, &cluster->root); 2774 break; 2775 } 2776 out: 2777 spin_unlock(&cluster->lock); 2778 2779 if (!ret) 2780 return 0; 2781 2782 spin_lock(&ctl->tree_lock); 2783 2784 ctl->free_space -= bytes; 2785 if (entry->bytes == 0) { 2786 ctl->free_extents--; 2787 if (entry->bitmap) { 2788 kfree(entry->bitmap); 2789 ctl->total_bitmaps--; 2790 ctl->op->recalc_thresholds(ctl); 2791 } 2792 kmem_cache_free(btrfs_free_space_cachep, entry); 2793 } 2794 2795 spin_unlock(&ctl->tree_lock); 2796 2797 return ret; 2798 } 2799 2800 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2801 struct btrfs_free_space *entry, 2802 struct btrfs_free_cluster *cluster, 2803 u64 offset, u64 bytes, 2804 u64 cont1_bytes, u64 min_bytes) 2805 { 2806 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2807 unsigned long next_zero; 2808 unsigned long i; 2809 unsigned long want_bits; 2810 unsigned long min_bits; 2811 unsigned long found_bits; 2812 unsigned long max_bits = 0; 2813 unsigned long start = 0; 2814 unsigned long total_found = 0; 2815 int ret; 2816 2817 i = offset_to_bit(entry->offset, ctl->unit, 2818 max_t(u64, offset, entry->offset)); 2819 want_bits = bytes_to_bits(bytes, ctl->unit); 2820 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2821 2822 /* 2823 * Don't bother looking for a cluster in this bitmap if it's heavily 2824 * fragmented. 2825 */ 2826 if (entry->max_extent_size && 2827 entry->max_extent_size < cont1_bytes) 2828 return -ENOSPC; 2829 again: 2830 found_bits = 0; 2831 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2832 next_zero = find_next_zero_bit(entry->bitmap, 2833 BITS_PER_BITMAP, i); 2834 if (next_zero - i >= min_bits) { 2835 found_bits = next_zero - i; 2836 if (found_bits > max_bits) 2837 max_bits = found_bits; 2838 break; 2839 } 2840 if (next_zero - i > max_bits) 2841 max_bits = next_zero - i; 2842 i = next_zero; 2843 } 2844 2845 if (!found_bits) { 2846 entry->max_extent_size = (u64)max_bits * ctl->unit; 2847 return -ENOSPC; 2848 } 2849 2850 if (!total_found) { 2851 start = i; 2852 cluster->max_size = 0; 2853 } 2854 2855 total_found += found_bits; 2856 2857 if (cluster->max_size < found_bits * ctl->unit) 2858 cluster->max_size = found_bits * ctl->unit; 2859 2860 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2861 i = next_zero + 1; 2862 goto again; 2863 } 2864 2865 cluster->window_start = start * ctl->unit + entry->offset; 2866 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2867 ret = tree_insert_offset(&cluster->root, entry->offset, 2868 &entry->offset_index, 1); 2869 ASSERT(!ret); /* -EEXIST; Logic error */ 2870 2871 trace_btrfs_setup_cluster(block_group, cluster, 2872 total_found * ctl->unit, 1); 2873 return 0; 2874 } 2875 2876 /* 2877 * This searches the block group for just extents to fill the cluster with. 2878 * Try to find a cluster with at least bytes total bytes, at least one 2879 * extent of cont1_bytes, and other clusters of at least min_bytes. 2880 */ 2881 static noinline int 2882 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2883 struct btrfs_free_cluster *cluster, 2884 struct list_head *bitmaps, u64 offset, u64 bytes, 2885 u64 cont1_bytes, u64 min_bytes) 2886 { 2887 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2888 struct btrfs_free_space *first = NULL; 2889 struct btrfs_free_space *entry = NULL; 2890 struct btrfs_free_space *last; 2891 struct rb_node *node; 2892 u64 window_free; 2893 u64 max_extent; 2894 u64 total_size = 0; 2895 2896 entry = tree_search_offset(ctl, offset, 0, 1); 2897 if (!entry) 2898 return -ENOSPC; 2899 2900 /* 2901 * We don't want bitmaps, so just move along until we find a normal 2902 * extent entry. 2903 */ 2904 while (entry->bitmap || entry->bytes < min_bytes) { 2905 if (entry->bitmap && list_empty(&entry->list)) 2906 list_add_tail(&entry->list, bitmaps); 2907 node = rb_next(&entry->offset_index); 2908 if (!node) 2909 return -ENOSPC; 2910 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2911 } 2912 2913 window_free = entry->bytes; 2914 max_extent = entry->bytes; 2915 first = entry; 2916 last = entry; 2917 2918 for (node = rb_next(&entry->offset_index); node; 2919 node = rb_next(&entry->offset_index)) { 2920 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2921 2922 if (entry->bitmap) { 2923 if (list_empty(&entry->list)) 2924 list_add_tail(&entry->list, bitmaps); 2925 continue; 2926 } 2927 2928 if (entry->bytes < min_bytes) 2929 continue; 2930 2931 last = entry; 2932 window_free += entry->bytes; 2933 if (entry->bytes > max_extent) 2934 max_extent = entry->bytes; 2935 } 2936 2937 if (window_free < bytes || max_extent < cont1_bytes) 2938 return -ENOSPC; 2939 2940 cluster->window_start = first->offset; 2941 2942 node = &first->offset_index; 2943 2944 /* 2945 * now we've found our entries, pull them out of the free space 2946 * cache and put them into the cluster rbtree 2947 */ 2948 do { 2949 int ret; 2950 2951 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2952 node = rb_next(&entry->offset_index); 2953 if (entry->bitmap || entry->bytes < min_bytes) 2954 continue; 2955 2956 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2957 ret = tree_insert_offset(&cluster->root, entry->offset, 2958 &entry->offset_index, 0); 2959 total_size += entry->bytes; 2960 ASSERT(!ret); /* -EEXIST; Logic error */ 2961 } while (node && entry != last); 2962 2963 cluster->max_size = max_extent; 2964 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2965 return 0; 2966 } 2967 2968 /* 2969 * This specifically looks for bitmaps that may work in the cluster, we assume 2970 * that we have already failed to find extents that will work. 2971 */ 2972 static noinline int 2973 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2974 struct btrfs_free_cluster *cluster, 2975 struct list_head *bitmaps, u64 offset, u64 bytes, 2976 u64 cont1_bytes, u64 min_bytes) 2977 { 2978 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2979 struct btrfs_free_space *entry = NULL; 2980 int ret = -ENOSPC; 2981 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2982 2983 if (ctl->total_bitmaps == 0) 2984 return -ENOSPC; 2985 2986 /* 2987 * The bitmap that covers offset won't be in the list unless offset 2988 * is just its start offset. 2989 */ 2990 if (!list_empty(bitmaps)) 2991 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2992 2993 if (!entry || entry->offset != bitmap_offset) { 2994 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2995 if (entry && list_empty(&entry->list)) 2996 list_add(&entry->list, bitmaps); 2997 } 2998 2999 list_for_each_entry(entry, bitmaps, list) { 3000 if (entry->bytes < bytes) 3001 continue; 3002 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3003 bytes, cont1_bytes, min_bytes); 3004 if (!ret) 3005 return 0; 3006 } 3007 3008 /* 3009 * The bitmaps list has all the bitmaps that record free space 3010 * starting after offset, so no more search is required. 3011 */ 3012 return -ENOSPC; 3013 } 3014 3015 /* 3016 * here we try to find a cluster of blocks in a block group. The goal 3017 * is to find at least bytes+empty_size. 3018 * We might not find them all in one contiguous area. 3019 * 3020 * returns zero and sets up cluster if things worked out, otherwise 3021 * it returns -enospc 3022 */ 3023 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3024 struct btrfs_block_group_cache *block_group, 3025 struct btrfs_free_cluster *cluster, 3026 u64 offset, u64 bytes, u64 empty_size) 3027 { 3028 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3029 struct btrfs_free_space *entry, *tmp; 3030 LIST_HEAD(bitmaps); 3031 u64 min_bytes; 3032 u64 cont1_bytes; 3033 int ret; 3034 3035 /* 3036 * Choose the minimum extent size we'll require for this 3037 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3038 * For metadata, allow allocates with smaller extents. For 3039 * data, keep it dense. 3040 */ 3041 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3042 cont1_bytes = min_bytes = bytes + empty_size; 3043 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3044 cont1_bytes = bytes; 3045 min_bytes = fs_info->sectorsize; 3046 } else { 3047 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3048 min_bytes = fs_info->sectorsize; 3049 } 3050 3051 spin_lock(&ctl->tree_lock); 3052 3053 /* 3054 * If we know we don't have enough space to make a cluster don't even 3055 * bother doing all the work to try and find one. 3056 */ 3057 if (ctl->free_space < bytes) { 3058 spin_unlock(&ctl->tree_lock); 3059 return -ENOSPC; 3060 } 3061 3062 spin_lock(&cluster->lock); 3063 3064 /* someone already found a cluster, hooray */ 3065 if (cluster->block_group) { 3066 ret = 0; 3067 goto out; 3068 } 3069 3070 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3071 min_bytes); 3072 3073 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3074 bytes + empty_size, 3075 cont1_bytes, min_bytes); 3076 if (ret) 3077 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3078 offset, bytes + empty_size, 3079 cont1_bytes, min_bytes); 3080 3081 /* Clear our temporary list */ 3082 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3083 list_del_init(&entry->list); 3084 3085 if (!ret) { 3086 atomic_inc(&block_group->count); 3087 list_add_tail(&cluster->block_group_list, 3088 &block_group->cluster_list); 3089 cluster->block_group = block_group; 3090 } else { 3091 trace_btrfs_failed_cluster_setup(block_group); 3092 } 3093 out: 3094 spin_unlock(&cluster->lock); 3095 spin_unlock(&ctl->tree_lock); 3096 3097 return ret; 3098 } 3099 3100 /* 3101 * simple code to zero out a cluster 3102 */ 3103 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3104 { 3105 spin_lock_init(&cluster->lock); 3106 spin_lock_init(&cluster->refill_lock); 3107 cluster->root = RB_ROOT; 3108 cluster->max_size = 0; 3109 cluster->fragmented = false; 3110 INIT_LIST_HEAD(&cluster->block_group_list); 3111 cluster->block_group = NULL; 3112 } 3113 3114 static int do_trimming(struct btrfs_block_group_cache *block_group, 3115 u64 *total_trimmed, u64 start, u64 bytes, 3116 u64 reserved_start, u64 reserved_bytes, 3117 struct btrfs_trim_range *trim_entry) 3118 { 3119 struct btrfs_space_info *space_info = block_group->space_info; 3120 struct btrfs_fs_info *fs_info = block_group->fs_info; 3121 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3122 int ret; 3123 int update = 0; 3124 u64 trimmed = 0; 3125 3126 spin_lock(&space_info->lock); 3127 spin_lock(&block_group->lock); 3128 if (!block_group->ro) { 3129 block_group->reserved += reserved_bytes; 3130 space_info->bytes_reserved += reserved_bytes; 3131 update = 1; 3132 } 3133 spin_unlock(&block_group->lock); 3134 spin_unlock(&space_info->lock); 3135 3136 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3137 if (!ret) 3138 *total_trimmed += trimmed; 3139 3140 mutex_lock(&ctl->cache_writeout_mutex); 3141 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3142 list_del(&trim_entry->list); 3143 mutex_unlock(&ctl->cache_writeout_mutex); 3144 3145 if (update) { 3146 spin_lock(&space_info->lock); 3147 spin_lock(&block_group->lock); 3148 if (block_group->ro) 3149 space_info->bytes_readonly += reserved_bytes; 3150 block_group->reserved -= reserved_bytes; 3151 space_info->bytes_reserved -= reserved_bytes; 3152 spin_unlock(&space_info->lock); 3153 spin_unlock(&block_group->lock); 3154 } 3155 3156 return ret; 3157 } 3158 3159 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3160 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3161 { 3162 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3163 struct btrfs_free_space *entry; 3164 struct rb_node *node; 3165 int ret = 0; 3166 u64 extent_start; 3167 u64 extent_bytes; 3168 u64 bytes; 3169 3170 while (start < end) { 3171 struct btrfs_trim_range trim_entry; 3172 3173 mutex_lock(&ctl->cache_writeout_mutex); 3174 spin_lock(&ctl->tree_lock); 3175 3176 if (ctl->free_space < minlen) { 3177 spin_unlock(&ctl->tree_lock); 3178 mutex_unlock(&ctl->cache_writeout_mutex); 3179 break; 3180 } 3181 3182 entry = tree_search_offset(ctl, start, 0, 1); 3183 if (!entry) { 3184 spin_unlock(&ctl->tree_lock); 3185 mutex_unlock(&ctl->cache_writeout_mutex); 3186 break; 3187 } 3188 3189 /* skip bitmaps */ 3190 while (entry->bitmap) { 3191 node = rb_next(&entry->offset_index); 3192 if (!node) { 3193 spin_unlock(&ctl->tree_lock); 3194 mutex_unlock(&ctl->cache_writeout_mutex); 3195 goto out; 3196 } 3197 entry = rb_entry(node, struct btrfs_free_space, 3198 offset_index); 3199 } 3200 3201 if (entry->offset >= end) { 3202 spin_unlock(&ctl->tree_lock); 3203 mutex_unlock(&ctl->cache_writeout_mutex); 3204 break; 3205 } 3206 3207 extent_start = entry->offset; 3208 extent_bytes = entry->bytes; 3209 start = max(start, extent_start); 3210 bytes = min(extent_start + extent_bytes, end) - start; 3211 if (bytes < minlen) { 3212 spin_unlock(&ctl->tree_lock); 3213 mutex_unlock(&ctl->cache_writeout_mutex); 3214 goto next; 3215 } 3216 3217 unlink_free_space(ctl, entry); 3218 kmem_cache_free(btrfs_free_space_cachep, entry); 3219 3220 spin_unlock(&ctl->tree_lock); 3221 trim_entry.start = extent_start; 3222 trim_entry.bytes = extent_bytes; 3223 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3224 mutex_unlock(&ctl->cache_writeout_mutex); 3225 3226 ret = do_trimming(block_group, total_trimmed, start, bytes, 3227 extent_start, extent_bytes, &trim_entry); 3228 if (ret) 3229 break; 3230 next: 3231 start += bytes; 3232 3233 if (fatal_signal_pending(current)) { 3234 ret = -ERESTARTSYS; 3235 break; 3236 } 3237 3238 cond_resched(); 3239 } 3240 out: 3241 return ret; 3242 } 3243 3244 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3245 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3246 { 3247 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3248 struct btrfs_free_space *entry; 3249 int ret = 0; 3250 int ret2; 3251 u64 bytes; 3252 u64 offset = offset_to_bitmap(ctl, start); 3253 3254 while (offset < end) { 3255 bool next_bitmap = false; 3256 struct btrfs_trim_range trim_entry; 3257 3258 mutex_lock(&ctl->cache_writeout_mutex); 3259 spin_lock(&ctl->tree_lock); 3260 3261 if (ctl->free_space < minlen) { 3262 spin_unlock(&ctl->tree_lock); 3263 mutex_unlock(&ctl->cache_writeout_mutex); 3264 break; 3265 } 3266 3267 entry = tree_search_offset(ctl, offset, 1, 0); 3268 if (!entry) { 3269 spin_unlock(&ctl->tree_lock); 3270 mutex_unlock(&ctl->cache_writeout_mutex); 3271 next_bitmap = true; 3272 goto next; 3273 } 3274 3275 bytes = minlen; 3276 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3277 if (ret2 || start >= end) { 3278 spin_unlock(&ctl->tree_lock); 3279 mutex_unlock(&ctl->cache_writeout_mutex); 3280 next_bitmap = true; 3281 goto next; 3282 } 3283 3284 bytes = min(bytes, end - start); 3285 if (bytes < minlen) { 3286 spin_unlock(&ctl->tree_lock); 3287 mutex_unlock(&ctl->cache_writeout_mutex); 3288 goto next; 3289 } 3290 3291 bitmap_clear_bits(ctl, entry, start, bytes); 3292 if (entry->bytes == 0) 3293 free_bitmap(ctl, entry); 3294 3295 spin_unlock(&ctl->tree_lock); 3296 trim_entry.start = start; 3297 trim_entry.bytes = bytes; 3298 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3299 mutex_unlock(&ctl->cache_writeout_mutex); 3300 3301 ret = do_trimming(block_group, total_trimmed, start, bytes, 3302 start, bytes, &trim_entry); 3303 if (ret) 3304 break; 3305 next: 3306 if (next_bitmap) { 3307 offset += BITS_PER_BITMAP * ctl->unit; 3308 } else { 3309 start += bytes; 3310 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3311 offset += BITS_PER_BITMAP * ctl->unit; 3312 } 3313 3314 if (fatal_signal_pending(current)) { 3315 ret = -ERESTARTSYS; 3316 break; 3317 } 3318 3319 cond_resched(); 3320 } 3321 3322 return ret; 3323 } 3324 3325 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3326 { 3327 atomic_inc(&cache->trimming); 3328 } 3329 3330 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3331 { 3332 struct btrfs_fs_info *fs_info = block_group->fs_info; 3333 struct extent_map_tree *em_tree; 3334 struct extent_map *em; 3335 bool cleanup; 3336 3337 spin_lock(&block_group->lock); 3338 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3339 block_group->removed); 3340 spin_unlock(&block_group->lock); 3341 3342 if (cleanup) { 3343 mutex_lock(&fs_info->chunk_mutex); 3344 em_tree = &fs_info->mapping_tree.map_tree; 3345 write_lock(&em_tree->lock); 3346 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3347 1); 3348 BUG_ON(!em); /* logic error, can't happen */ 3349 /* 3350 * remove_extent_mapping() will delete us from the pinned_chunks 3351 * list, which is protected by the chunk mutex. 3352 */ 3353 remove_extent_mapping(em_tree, em); 3354 write_unlock(&em_tree->lock); 3355 mutex_unlock(&fs_info->chunk_mutex); 3356 3357 /* once for us and once for the tree */ 3358 free_extent_map(em); 3359 free_extent_map(em); 3360 3361 /* 3362 * We've left one free space entry and other tasks trimming 3363 * this block group have left 1 entry each one. Free them. 3364 */ 3365 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3366 } 3367 } 3368 3369 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3370 u64 *trimmed, u64 start, u64 end, u64 minlen) 3371 { 3372 int ret; 3373 3374 *trimmed = 0; 3375 3376 spin_lock(&block_group->lock); 3377 if (block_group->removed) { 3378 spin_unlock(&block_group->lock); 3379 return 0; 3380 } 3381 btrfs_get_block_group_trimming(block_group); 3382 spin_unlock(&block_group->lock); 3383 3384 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3385 if (ret) 3386 goto out; 3387 3388 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3389 out: 3390 btrfs_put_block_group_trimming(block_group); 3391 return ret; 3392 } 3393 3394 /* 3395 * Find the left-most item in the cache tree, and then return the 3396 * smallest inode number in the item. 3397 * 3398 * Note: the returned inode number may not be the smallest one in 3399 * the tree, if the left-most item is a bitmap. 3400 */ 3401 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3402 { 3403 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3404 struct btrfs_free_space *entry = NULL; 3405 u64 ino = 0; 3406 3407 spin_lock(&ctl->tree_lock); 3408 3409 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3410 goto out; 3411 3412 entry = rb_entry(rb_first(&ctl->free_space_offset), 3413 struct btrfs_free_space, offset_index); 3414 3415 if (!entry->bitmap) { 3416 ino = entry->offset; 3417 3418 unlink_free_space(ctl, entry); 3419 entry->offset++; 3420 entry->bytes--; 3421 if (!entry->bytes) 3422 kmem_cache_free(btrfs_free_space_cachep, entry); 3423 else 3424 link_free_space(ctl, entry); 3425 } else { 3426 u64 offset = 0; 3427 u64 count = 1; 3428 int ret; 3429 3430 ret = search_bitmap(ctl, entry, &offset, &count, true); 3431 /* Logic error; Should be empty if it can't find anything */ 3432 ASSERT(!ret); 3433 3434 ino = offset; 3435 bitmap_clear_bits(ctl, entry, offset, 1); 3436 if (entry->bytes == 0) 3437 free_bitmap(ctl, entry); 3438 } 3439 out: 3440 spin_unlock(&ctl->tree_lock); 3441 3442 return ino; 3443 } 3444 3445 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3446 struct btrfs_path *path) 3447 { 3448 struct inode *inode = NULL; 3449 3450 spin_lock(&root->ino_cache_lock); 3451 if (root->ino_cache_inode) 3452 inode = igrab(root->ino_cache_inode); 3453 spin_unlock(&root->ino_cache_lock); 3454 if (inode) 3455 return inode; 3456 3457 inode = __lookup_free_space_inode(root, path, 0); 3458 if (IS_ERR(inode)) 3459 return inode; 3460 3461 spin_lock(&root->ino_cache_lock); 3462 if (!btrfs_fs_closing(root->fs_info)) 3463 root->ino_cache_inode = igrab(inode); 3464 spin_unlock(&root->ino_cache_lock); 3465 3466 return inode; 3467 } 3468 3469 int create_free_ino_inode(struct btrfs_root *root, 3470 struct btrfs_trans_handle *trans, 3471 struct btrfs_path *path) 3472 { 3473 return __create_free_space_inode(root, trans, path, 3474 BTRFS_FREE_INO_OBJECTID, 0); 3475 } 3476 3477 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3478 { 3479 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3480 struct btrfs_path *path; 3481 struct inode *inode; 3482 int ret = 0; 3483 u64 root_gen = btrfs_root_generation(&root->root_item); 3484 3485 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3486 return 0; 3487 3488 /* 3489 * If we're unmounting then just return, since this does a search on the 3490 * normal root and not the commit root and we could deadlock. 3491 */ 3492 if (btrfs_fs_closing(fs_info)) 3493 return 0; 3494 3495 path = btrfs_alloc_path(); 3496 if (!path) 3497 return 0; 3498 3499 inode = lookup_free_ino_inode(root, path); 3500 if (IS_ERR(inode)) 3501 goto out; 3502 3503 if (root_gen != BTRFS_I(inode)->generation) 3504 goto out_put; 3505 3506 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3507 3508 if (ret < 0) 3509 btrfs_err(fs_info, 3510 "failed to load free ino cache for root %llu", 3511 root->root_key.objectid); 3512 out_put: 3513 iput(inode); 3514 out: 3515 btrfs_free_path(path); 3516 return ret; 3517 } 3518 3519 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3520 struct btrfs_trans_handle *trans, 3521 struct btrfs_path *path, 3522 struct inode *inode) 3523 { 3524 struct btrfs_fs_info *fs_info = root->fs_info; 3525 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3526 int ret; 3527 struct btrfs_io_ctl io_ctl; 3528 bool release_metadata = true; 3529 3530 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3531 return 0; 3532 3533 memset(&io_ctl, 0, sizeof(io_ctl)); 3534 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3535 if (!ret) { 3536 /* 3537 * At this point writepages() didn't error out, so our metadata 3538 * reservation is released when the writeback finishes, at 3539 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3540 * with or without an error. 3541 */ 3542 release_metadata = false; 3543 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3544 } 3545 3546 if (ret) { 3547 if (release_metadata) 3548 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3549 inode->i_size, true); 3550 #ifdef DEBUG 3551 btrfs_err(fs_info, 3552 "failed to write free ino cache for root %llu", 3553 root->root_key.objectid); 3554 #endif 3555 } 3556 3557 return ret; 3558 } 3559 3560 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3561 /* 3562 * Use this if you need to make a bitmap or extent entry specifically, it 3563 * doesn't do any of the merging that add_free_space does, this acts a lot like 3564 * how the free space cache loading stuff works, so you can get really weird 3565 * configurations. 3566 */ 3567 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3568 u64 offset, u64 bytes, bool bitmap) 3569 { 3570 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3571 struct btrfs_free_space *info = NULL, *bitmap_info; 3572 void *map = NULL; 3573 u64 bytes_added; 3574 int ret; 3575 3576 again: 3577 if (!info) { 3578 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3579 if (!info) 3580 return -ENOMEM; 3581 } 3582 3583 if (!bitmap) { 3584 spin_lock(&ctl->tree_lock); 3585 info->offset = offset; 3586 info->bytes = bytes; 3587 info->max_extent_size = 0; 3588 ret = link_free_space(ctl, info); 3589 spin_unlock(&ctl->tree_lock); 3590 if (ret) 3591 kmem_cache_free(btrfs_free_space_cachep, info); 3592 return ret; 3593 } 3594 3595 if (!map) { 3596 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3597 if (!map) { 3598 kmem_cache_free(btrfs_free_space_cachep, info); 3599 return -ENOMEM; 3600 } 3601 } 3602 3603 spin_lock(&ctl->tree_lock); 3604 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3605 1, 0); 3606 if (!bitmap_info) { 3607 info->bitmap = map; 3608 map = NULL; 3609 add_new_bitmap(ctl, info, offset); 3610 bitmap_info = info; 3611 info = NULL; 3612 } 3613 3614 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3615 3616 bytes -= bytes_added; 3617 offset += bytes_added; 3618 spin_unlock(&ctl->tree_lock); 3619 3620 if (bytes) 3621 goto again; 3622 3623 if (info) 3624 kmem_cache_free(btrfs_free_space_cachep, info); 3625 kfree(map); 3626 return 0; 3627 } 3628 3629 /* 3630 * Checks to see if the given range is in the free space cache. This is really 3631 * just used to check the absence of space, so if there is free space in the 3632 * range at all we will return 1. 3633 */ 3634 int test_check_exists(struct btrfs_block_group_cache *cache, 3635 u64 offset, u64 bytes) 3636 { 3637 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3638 struct btrfs_free_space *info; 3639 int ret = 0; 3640 3641 spin_lock(&ctl->tree_lock); 3642 info = tree_search_offset(ctl, offset, 0, 0); 3643 if (!info) { 3644 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3645 1, 0); 3646 if (!info) 3647 goto out; 3648 } 3649 3650 have_info: 3651 if (info->bitmap) { 3652 u64 bit_off, bit_bytes; 3653 struct rb_node *n; 3654 struct btrfs_free_space *tmp; 3655 3656 bit_off = offset; 3657 bit_bytes = ctl->unit; 3658 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3659 if (!ret) { 3660 if (bit_off == offset) { 3661 ret = 1; 3662 goto out; 3663 } else if (bit_off > offset && 3664 offset + bytes > bit_off) { 3665 ret = 1; 3666 goto out; 3667 } 3668 } 3669 3670 n = rb_prev(&info->offset_index); 3671 while (n) { 3672 tmp = rb_entry(n, struct btrfs_free_space, 3673 offset_index); 3674 if (tmp->offset + tmp->bytes < offset) 3675 break; 3676 if (offset + bytes < tmp->offset) { 3677 n = rb_prev(&tmp->offset_index); 3678 continue; 3679 } 3680 info = tmp; 3681 goto have_info; 3682 } 3683 3684 n = rb_next(&info->offset_index); 3685 while (n) { 3686 tmp = rb_entry(n, struct btrfs_free_space, 3687 offset_index); 3688 if (offset + bytes < tmp->offset) 3689 break; 3690 if (tmp->offset + tmp->bytes < offset) { 3691 n = rb_next(&tmp->offset_index); 3692 continue; 3693 } 3694 info = tmp; 3695 goto have_info; 3696 } 3697 3698 ret = 0; 3699 goto out; 3700 } 3701 3702 if (info->offset == offset) { 3703 ret = 1; 3704 goto out; 3705 } 3706 3707 if (offset > info->offset && offset < info->offset + info->bytes) 3708 ret = 1; 3709 out: 3710 spin_unlock(&ctl->tree_lock); 3711 return ret; 3712 } 3713 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3714