1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/sched/signal.h> 22 #include <linux/slab.h> 23 #include <linux/math64.h> 24 #include <linux/ratelimit.h> 25 #include <linux/bpf.h> 26 #include "ctree.h" 27 #include "free-space-cache.h" 28 #include "transaction.h" 29 #include "disk-io.h" 30 #include "extent_io.h" 31 #include "inode-map.h" 32 #include "volumes.h" 33 34 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 35 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 36 37 struct btrfs_trim_range { 38 u64 start; 39 u64 bytes; 40 struct list_head list; 41 }; 42 43 static int link_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 46 struct btrfs_free_space *info); 47 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 48 struct btrfs_trans_handle *trans, 49 struct btrfs_io_ctl *io_ctl, 50 struct btrfs_path *path); 51 52 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 53 struct btrfs_path *path, 54 u64 offset) 55 { 56 struct btrfs_fs_info *fs_info = root->fs_info; 57 struct btrfs_key key; 58 struct btrfs_key location; 59 struct btrfs_disk_key disk_key; 60 struct btrfs_free_space_header *header; 61 struct extent_buffer *leaf; 62 struct inode *inode = NULL; 63 int ret; 64 65 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 66 key.offset = offset; 67 key.type = 0; 68 69 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 70 if (ret < 0) 71 return ERR_PTR(ret); 72 if (ret > 0) { 73 btrfs_release_path(path); 74 return ERR_PTR(-ENOENT); 75 } 76 77 leaf = path->nodes[0]; 78 header = btrfs_item_ptr(leaf, path->slots[0], 79 struct btrfs_free_space_header); 80 btrfs_free_space_key(leaf, header, &disk_key); 81 btrfs_disk_key_to_cpu(&location, &disk_key); 82 btrfs_release_path(path); 83 84 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 85 if (IS_ERR(inode)) 86 return inode; 87 if (is_bad_inode(inode)) { 88 iput(inode); 89 return ERR_PTR(-ENOENT); 90 } 91 92 mapping_set_gfp_mask(inode->i_mapping, 93 mapping_gfp_constraint(inode->i_mapping, 94 ~(__GFP_FS | __GFP_HIGHMEM))); 95 96 return inode; 97 } 98 99 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 100 struct btrfs_block_group_cache 101 *block_group, struct btrfs_path *path) 102 { 103 struct inode *inode = NULL; 104 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 105 106 spin_lock(&block_group->lock); 107 if (block_group->inode) 108 inode = igrab(block_group->inode); 109 spin_unlock(&block_group->lock); 110 if (inode) 111 return inode; 112 113 inode = __lookup_free_space_inode(fs_info->tree_root, path, 114 block_group->key.objectid); 115 if (IS_ERR(inode)) 116 return inode; 117 118 spin_lock(&block_group->lock); 119 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 120 btrfs_info(fs_info, "Old style space inode found, converting."); 121 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 122 BTRFS_INODE_NODATACOW; 123 block_group->disk_cache_state = BTRFS_DC_CLEAR; 124 } 125 126 if (!block_group->iref) { 127 block_group->inode = igrab(inode); 128 block_group->iref = 1; 129 } 130 spin_unlock(&block_group->lock); 131 132 return inode; 133 } 134 135 static int __create_free_space_inode(struct btrfs_root *root, 136 struct btrfs_trans_handle *trans, 137 struct btrfs_path *path, 138 u64 ino, u64 offset) 139 { 140 struct btrfs_key key; 141 struct btrfs_disk_key disk_key; 142 struct btrfs_free_space_header *header; 143 struct btrfs_inode_item *inode_item; 144 struct extent_buffer *leaf; 145 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 146 int ret; 147 148 ret = btrfs_insert_empty_inode(trans, root, path, ino); 149 if (ret) 150 return ret; 151 152 /* We inline crc's for the free disk space cache */ 153 if (ino != BTRFS_FREE_INO_OBJECTID) 154 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 155 156 leaf = path->nodes[0]; 157 inode_item = btrfs_item_ptr(leaf, path->slots[0], 158 struct btrfs_inode_item); 159 btrfs_item_key(leaf, &disk_key, path->slots[0]); 160 memzero_extent_buffer(leaf, (unsigned long)inode_item, 161 sizeof(*inode_item)); 162 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 163 btrfs_set_inode_size(leaf, inode_item, 0); 164 btrfs_set_inode_nbytes(leaf, inode_item, 0); 165 btrfs_set_inode_uid(leaf, inode_item, 0); 166 btrfs_set_inode_gid(leaf, inode_item, 0); 167 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 168 btrfs_set_inode_flags(leaf, inode_item, flags); 169 btrfs_set_inode_nlink(leaf, inode_item, 1); 170 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 171 btrfs_set_inode_block_group(leaf, inode_item, offset); 172 btrfs_mark_buffer_dirty(leaf); 173 btrfs_release_path(path); 174 175 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 176 key.offset = offset; 177 key.type = 0; 178 ret = btrfs_insert_empty_item(trans, root, path, &key, 179 sizeof(struct btrfs_free_space_header)); 180 if (ret < 0) { 181 btrfs_release_path(path); 182 return ret; 183 } 184 185 leaf = path->nodes[0]; 186 header = btrfs_item_ptr(leaf, path->slots[0], 187 struct btrfs_free_space_header); 188 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 189 btrfs_set_free_space_key(leaf, header, &disk_key); 190 btrfs_mark_buffer_dirty(leaf); 191 btrfs_release_path(path); 192 193 return 0; 194 } 195 196 int create_free_space_inode(struct btrfs_fs_info *fs_info, 197 struct btrfs_trans_handle *trans, 198 struct btrfs_block_group_cache *block_group, 199 struct btrfs_path *path) 200 { 201 int ret; 202 u64 ino; 203 204 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 205 if (ret < 0) 206 return ret; 207 208 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 209 block_group->key.objectid); 210 } 211 212 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 213 struct btrfs_block_rsv *rsv) 214 { 215 u64 needed_bytes; 216 int ret; 217 218 /* 1 for slack space, 1 for updating the inode */ 219 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 220 btrfs_calc_trans_metadata_size(fs_info, 1); 221 222 spin_lock(&rsv->lock); 223 if (rsv->reserved < needed_bytes) 224 ret = -ENOSPC; 225 else 226 ret = 0; 227 spin_unlock(&rsv->lock); 228 return ret; 229 } 230 231 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 232 struct btrfs_block_group_cache *block_group, 233 struct inode *inode) 234 { 235 struct btrfs_root *root = BTRFS_I(inode)->root; 236 int ret = 0; 237 bool locked = false; 238 239 if (block_group) { 240 struct btrfs_path *path = btrfs_alloc_path(); 241 242 if (!path) { 243 ret = -ENOMEM; 244 goto fail; 245 } 246 locked = true; 247 mutex_lock(&trans->transaction->cache_write_mutex); 248 if (!list_empty(&block_group->io_list)) { 249 list_del_init(&block_group->io_list); 250 251 btrfs_wait_cache_io(trans, block_group, path); 252 btrfs_put_block_group(block_group); 253 } 254 255 /* 256 * now that we've truncated the cache away, its no longer 257 * setup or written 258 */ 259 spin_lock(&block_group->lock); 260 block_group->disk_cache_state = BTRFS_DC_CLEAR; 261 spin_unlock(&block_group->lock); 262 btrfs_free_path(path); 263 } 264 265 btrfs_i_size_write(BTRFS_I(inode), 0); 266 truncate_pagecache(inode, 0); 267 268 /* 269 * We don't need an orphan item because truncating the free space cache 270 * will never be split across transactions. 271 * We don't need to check for -EAGAIN because we're a free space 272 * cache inode 273 */ 274 ret = btrfs_truncate_inode_items(trans, root, inode, 275 0, BTRFS_EXTENT_DATA_KEY); 276 if (ret) 277 goto fail; 278 279 ret = btrfs_update_inode(trans, root, inode); 280 281 fail: 282 if (locked) 283 mutex_unlock(&trans->transaction->cache_write_mutex); 284 if (ret) 285 btrfs_abort_transaction(trans, ret); 286 287 return ret; 288 } 289 290 static void readahead_cache(struct inode *inode) 291 { 292 struct file_ra_state *ra; 293 unsigned long last_index; 294 295 ra = kzalloc(sizeof(*ra), GFP_NOFS); 296 if (!ra) 297 return; 298 299 file_ra_state_init(ra, inode->i_mapping); 300 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 301 302 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 303 304 kfree(ra); 305 } 306 307 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 308 int write) 309 { 310 int num_pages; 311 int check_crcs = 0; 312 313 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 314 315 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 316 check_crcs = 1; 317 318 /* Make sure we can fit our crcs into the first page */ 319 if (write && check_crcs && 320 (num_pages * sizeof(u32)) >= PAGE_SIZE) 321 return -ENOSPC; 322 323 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 324 325 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 326 if (!io_ctl->pages) 327 return -ENOMEM; 328 329 io_ctl->num_pages = num_pages; 330 io_ctl->fs_info = btrfs_sb(inode->i_sb); 331 io_ctl->check_crcs = check_crcs; 332 io_ctl->inode = inode; 333 334 return 0; 335 } 336 BPF_ALLOW_ERROR_INJECTION(io_ctl_init); 337 338 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 339 { 340 kfree(io_ctl->pages); 341 io_ctl->pages = NULL; 342 } 343 344 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 345 { 346 if (io_ctl->cur) { 347 io_ctl->cur = NULL; 348 io_ctl->orig = NULL; 349 } 350 } 351 352 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 353 { 354 ASSERT(io_ctl->index < io_ctl->num_pages); 355 io_ctl->page = io_ctl->pages[io_ctl->index++]; 356 io_ctl->cur = page_address(io_ctl->page); 357 io_ctl->orig = io_ctl->cur; 358 io_ctl->size = PAGE_SIZE; 359 if (clear) 360 clear_page(io_ctl->cur); 361 } 362 363 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 364 { 365 int i; 366 367 io_ctl_unmap_page(io_ctl); 368 369 for (i = 0; i < io_ctl->num_pages; i++) { 370 if (io_ctl->pages[i]) { 371 ClearPageChecked(io_ctl->pages[i]); 372 unlock_page(io_ctl->pages[i]); 373 put_page(io_ctl->pages[i]); 374 } 375 } 376 } 377 378 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 379 int uptodate) 380 { 381 struct page *page; 382 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 383 int i; 384 385 for (i = 0; i < io_ctl->num_pages; i++) { 386 page = find_or_create_page(inode->i_mapping, i, mask); 387 if (!page) { 388 io_ctl_drop_pages(io_ctl); 389 return -ENOMEM; 390 } 391 io_ctl->pages[i] = page; 392 if (uptodate && !PageUptodate(page)) { 393 btrfs_readpage(NULL, page); 394 lock_page(page); 395 if (!PageUptodate(page)) { 396 btrfs_err(BTRFS_I(inode)->root->fs_info, 397 "error reading free space cache"); 398 io_ctl_drop_pages(io_ctl); 399 return -EIO; 400 } 401 } 402 } 403 404 for (i = 0; i < io_ctl->num_pages; i++) { 405 clear_page_dirty_for_io(io_ctl->pages[i]); 406 set_page_extent_mapped(io_ctl->pages[i]); 407 } 408 409 return 0; 410 } 411 412 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 413 { 414 __le64 *val; 415 416 io_ctl_map_page(io_ctl, 1); 417 418 /* 419 * Skip the csum areas. If we don't check crcs then we just have a 420 * 64bit chunk at the front of the first page. 421 */ 422 if (io_ctl->check_crcs) { 423 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 424 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 425 } else { 426 io_ctl->cur += sizeof(u64); 427 io_ctl->size -= sizeof(u64) * 2; 428 } 429 430 val = io_ctl->cur; 431 *val = cpu_to_le64(generation); 432 io_ctl->cur += sizeof(u64); 433 } 434 435 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 436 { 437 __le64 *gen; 438 439 /* 440 * Skip the crc area. If we don't check crcs then we just have a 64bit 441 * chunk at the front of the first page. 442 */ 443 if (io_ctl->check_crcs) { 444 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 445 io_ctl->size -= sizeof(u64) + 446 (sizeof(u32) * io_ctl->num_pages); 447 } else { 448 io_ctl->cur += sizeof(u64); 449 io_ctl->size -= sizeof(u64) * 2; 450 } 451 452 gen = io_ctl->cur; 453 if (le64_to_cpu(*gen) != generation) { 454 btrfs_err_rl(io_ctl->fs_info, 455 "space cache generation (%llu) does not match inode (%llu)", 456 *gen, generation); 457 io_ctl_unmap_page(io_ctl); 458 return -EIO; 459 } 460 io_ctl->cur += sizeof(u64); 461 return 0; 462 } 463 464 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 465 { 466 u32 *tmp; 467 u32 crc = ~(u32)0; 468 unsigned offset = 0; 469 470 if (!io_ctl->check_crcs) { 471 io_ctl_unmap_page(io_ctl); 472 return; 473 } 474 475 if (index == 0) 476 offset = sizeof(u32) * io_ctl->num_pages; 477 478 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 479 PAGE_SIZE - offset); 480 btrfs_csum_final(crc, (u8 *)&crc); 481 io_ctl_unmap_page(io_ctl); 482 tmp = page_address(io_ctl->pages[0]); 483 tmp += index; 484 *tmp = crc; 485 } 486 487 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 488 { 489 u32 *tmp, val; 490 u32 crc = ~(u32)0; 491 unsigned offset = 0; 492 493 if (!io_ctl->check_crcs) { 494 io_ctl_map_page(io_ctl, 0); 495 return 0; 496 } 497 498 if (index == 0) 499 offset = sizeof(u32) * io_ctl->num_pages; 500 501 tmp = page_address(io_ctl->pages[0]); 502 tmp += index; 503 val = *tmp; 504 505 io_ctl_map_page(io_ctl, 0); 506 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 507 PAGE_SIZE - offset); 508 btrfs_csum_final(crc, (u8 *)&crc); 509 if (val != crc) { 510 btrfs_err_rl(io_ctl->fs_info, 511 "csum mismatch on free space cache"); 512 io_ctl_unmap_page(io_ctl); 513 return -EIO; 514 } 515 516 return 0; 517 } 518 519 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 520 void *bitmap) 521 { 522 struct btrfs_free_space_entry *entry; 523 524 if (!io_ctl->cur) 525 return -ENOSPC; 526 527 entry = io_ctl->cur; 528 entry->offset = cpu_to_le64(offset); 529 entry->bytes = cpu_to_le64(bytes); 530 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 531 BTRFS_FREE_SPACE_EXTENT; 532 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 533 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 534 535 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 536 return 0; 537 538 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 539 540 /* No more pages to map */ 541 if (io_ctl->index >= io_ctl->num_pages) 542 return 0; 543 544 /* map the next page */ 545 io_ctl_map_page(io_ctl, 1); 546 return 0; 547 } 548 549 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 550 { 551 if (!io_ctl->cur) 552 return -ENOSPC; 553 554 /* 555 * If we aren't at the start of the current page, unmap this one and 556 * map the next one if there is any left. 557 */ 558 if (io_ctl->cur != io_ctl->orig) { 559 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 560 if (io_ctl->index >= io_ctl->num_pages) 561 return -ENOSPC; 562 io_ctl_map_page(io_ctl, 0); 563 } 564 565 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 566 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 567 if (io_ctl->index < io_ctl->num_pages) 568 io_ctl_map_page(io_ctl, 0); 569 return 0; 570 } 571 572 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 573 { 574 /* 575 * If we're not on the boundary we know we've modified the page and we 576 * need to crc the page. 577 */ 578 if (io_ctl->cur != io_ctl->orig) 579 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 580 else 581 io_ctl_unmap_page(io_ctl); 582 583 while (io_ctl->index < io_ctl->num_pages) { 584 io_ctl_map_page(io_ctl, 1); 585 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 586 } 587 } 588 589 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 590 struct btrfs_free_space *entry, u8 *type) 591 { 592 struct btrfs_free_space_entry *e; 593 int ret; 594 595 if (!io_ctl->cur) { 596 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 597 if (ret) 598 return ret; 599 } 600 601 e = io_ctl->cur; 602 entry->offset = le64_to_cpu(e->offset); 603 entry->bytes = le64_to_cpu(e->bytes); 604 *type = e->type; 605 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 606 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 607 608 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 609 return 0; 610 611 io_ctl_unmap_page(io_ctl); 612 613 return 0; 614 } 615 616 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 617 struct btrfs_free_space *entry) 618 { 619 int ret; 620 621 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 622 if (ret) 623 return ret; 624 625 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 626 io_ctl_unmap_page(io_ctl); 627 628 return 0; 629 } 630 631 /* 632 * Since we attach pinned extents after the fact we can have contiguous sections 633 * of free space that are split up in entries. This poses a problem with the 634 * tree logging stuff since it could have allocated across what appears to be 2 635 * entries since we would have merged the entries when adding the pinned extents 636 * back to the free space cache. So run through the space cache that we just 637 * loaded and merge contiguous entries. This will make the log replay stuff not 638 * blow up and it will make for nicer allocator behavior. 639 */ 640 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 641 { 642 struct btrfs_free_space *e, *prev = NULL; 643 struct rb_node *n; 644 645 again: 646 spin_lock(&ctl->tree_lock); 647 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 648 e = rb_entry(n, struct btrfs_free_space, offset_index); 649 if (!prev) 650 goto next; 651 if (e->bitmap || prev->bitmap) 652 goto next; 653 if (prev->offset + prev->bytes == e->offset) { 654 unlink_free_space(ctl, prev); 655 unlink_free_space(ctl, e); 656 prev->bytes += e->bytes; 657 kmem_cache_free(btrfs_free_space_cachep, e); 658 link_free_space(ctl, prev); 659 prev = NULL; 660 spin_unlock(&ctl->tree_lock); 661 goto again; 662 } 663 next: 664 prev = e; 665 } 666 spin_unlock(&ctl->tree_lock); 667 } 668 669 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 670 struct btrfs_free_space_ctl *ctl, 671 struct btrfs_path *path, u64 offset) 672 { 673 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 674 struct btrfs_free_space_header *header; 675 struct extent_buffer *leaf; 676 struct btrfs_io_ctl io_ctl; 677 struct btrfs_key key; 678 struct btrfs_free_space *e, *n; 679 LIST_HEAD(bitmaps); 680 u64 num_entries; 681 u64 num_bitmaps; 682 u64 generation; 683 u8 type; 684 int ret = 0; 685 686 /* Nothing in the space cache, goodbye */ 687 if (!i_size_read(inode)) 688 return 0; 689 690 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 691 key.offset = offset; 692 key.type = 0; 693 694 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 695 if (ret < 0) 696 return 0; 697 else if (ret > 0) { 698 btrfs_release_path(path); 699 return 0; 700 } 701 702 ret = -1; 703 704 leaf = path->nodes[0]; 705 header = btrfs_item_ptr(leaf, path->slots[0], 706 struct btrfs_free_space_header); 707 num_entries = btrfs_free_space_entries(leaf, header); 708 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 709 generation = btrfs_free_space_generation(leaf, header); 710 btrfs_release_path(path); 711 712 if (!BTRFS_I(inode)->generation) { 713 btrfs_info(fs_info, 714 "the free space cache file (%llu) is invalid, skip it", 715 offset); 716 return 0; 717 } 718 719 if (BTRFS_I(inode)->generation != generation) { 720 btrfs_err(fs_info, 721 "free space inode generation (%llu) did not match free space cache generation (%llu)", 722 BTRFS_I(inode)->generation, generation); 723 return 0; 724 } 725 726 if (!num_entries) 727 return 0; 728 729 ret = io_ctl_init(&io_ctl, inode, 0); 730 if (ret) 731 return ret; 732 733 readahead_cache(inode); 734 735 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 736 if (ret) 737 goto out; 738 739 ret = io_ctl_check_crc(&io_ctl, 0); 740 if (ret) 741 goto free_cache; 742 743 ret = io_ctl_check_generation(&io_ctl, generation); 744 if (ret) 745 goto free_cache; 746 747 while (num_entries) { 748 e = kmem_cache_zalloc(btrfs_free_space_cachep, 749 GFP_NOFS); 750 if (!e) 751 goto free_cache; 752 753 ret = io_ctl_read_entry(&io_ctl, e, &type); 754 if (ret) { 755 kmem_cache_free(btrfs_free_space_cachep, e); 756 goto free_cache; 757 } 758 759 if (!e->bytes) { 760 kmem_cache_free(btrfs_free_space_cachep, e); 761 goto free_cache; 762 } 763 764 if (type == BTRFS_FREE_SPACE_EXTENT) { 765 spin_lock(&ctl->tree_lock); 766 ret = link_free_space(ctl, e); 767 spin_unlock(&ctl->tree_lock); 768 if (ret) { 769 btrfs_err(fs_info, 770 "Duplicate entries in free space cache, dumping"); 771 kmem_cache_free(btrfs_free_space_cachep, e); 772 goto free_cache; 773 } 774 } else { 775 ASSERT(num_bitmaps); 776 num_bitmaps--; 777 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 778 if (!e->bitmap) { 779 kmem_cache_free( 780 btrfs_free_space_cachep, e); 781 goto free_cache; 782 } 783 spin_lock(&ctl->tree_lock); 784 ret = link_free_space(ctl, e); 785 ctl->total_bitmaps++; 786 ctl->op->recalc_thresholds(ctl); 787 spin_unlock(&ctl->tree_lock); 788 if (ret) { 789 btrfs_err(fs_info, 790 "Duplicate entries in free space cache, dumping"); 791 kmem_cache_free(btrfs_free_space_cachep, e); 792 goto free_cache; 793 } 794 list_add_tail(&e->list, &bitmaps); 795 } 796 797 num_entries--; 798 } 799 800 io_ctl_unmap_page(&io_ctl); 801 802 /* 803 * We add the bitmaps at the end of the entries in order that 804 * the bitmap entries are added to the cache. 805 */ 806 list_for_each_entry_safe(e, n, &bitmaps, list) { 807 list_del_init(&e->list); 808 ret = io_ctl_read_bitmap(&io_ctl, e); 809 if (ret) 810 goto free_cache; 811 } 812 813 io_ctl_drop_pages(&io_ctl); 814 merge_space_tree(ctl); 815 ret = 1; 816 out: 817 io_ctl_free(&io_ctl); 818 return ret; 819 free_cache: 820 io_ctl_drop_pages(&io_ctl); 821 __btrfs_remove_free_space_cache(ctl); 822 goto out; 823 } 824 825 int load_free_space_cache(struct btrfs_fs_info *fs_info, 826 struct btrfs_block_group_cache *block_group) 827 { 828 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 829 struct inode *inode; 830 struct btrfs_path *path; 831 int ret = 0; 832 bool matched; 833 u64 used = btrfs_block_group_used(&block_group->item); 834 835 /* 836 * If this block group has been marked to be cleared for one reason or 837 * another then we can't trust the on disk cache, so just return. 838 */ 839 spin_lock(&block_group->lock); 840 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 841 spin_unlock(&block_group->lock); 842 return 0; 843 } 844 spin_unlock(&block_group->lock); 845 846 path = btrfs_alloc_path(); 847 if (!path) 848 return 0; 849 path->search_commit_root = 1; 850 path->skip_locking = 1; 851 852 inode = lookup_free_space_inode(fs_info, block_group, path); 853 if (IS_ERR(inode)) { 854 btrfs_free_path(path); 855 return 0; 856 } 857 858 /* We may have converted the inode and made the cache invalid. */ 859 spin_lock(&block_group->lock); 860 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 861 spin_unlock(&block_group->lock); 862 btrfs_free_path(path); 863 goto out; 864 } 865 spin_unlock(&block_group->lock); 866 867 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 868 path, block_group->key.objectid); 869 btrfs_free_path(path); 870 if (ret <= 0) 871 goto out; 872 873 spin_lock(&ctl->tree_lock); 874 matched = (ctl->free_space == (block_group->key.offset - used - 875 block_group->bytes_super)); 876 spin_unlock(&ctl->tree_lock); 877 878 if (!matched) { 879 __btrfs_remove_free_space_cache(ctl); 880 btrfs_warn(fs_info, 881 "block group %llu has wrong amount of free space", 882 block_group->key.objectid); 883 ret = -1; 884 } 885 out: 886 if (ret < 0) { 887 /* This cache is bogus, make sure it gets cleared */ 888 spin_lock(&block_group->lock); 889 block_group->disk_cache_state = BTRFS_DC_CLEAR; 890 spin_unlock(&block_group->lock); 891 ret = 0; 892 893 btrfs_warn(fs_info, 894 "failed to load free space cache for block group %llu, rebuilding it now", 895 block_group->key.objectid); 896 } 897 898 iput(inode); 899 return ret; 900 } 901 902 static noinline_for_stack 903 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 904 struct btrfs_free_space_ctl *ctl, 905 struct btrfs_block_group_cache *block_group, 906 int *entries, int *bitmaps, 907 struct list_head *bitmap_list) 908 { 909 int ret; 910 struct btrfs_free_cluster *cluster = NULL; 911 struct btrfs_free_cluster *cluster_locked = NULL; 912 struct rb_node *node = rb_first(&ctl->free_space_offset); 913 struct btrfs_trim_range *trim_entry; 914 915 /* Get the cluster for this block_group if it exists */ 916 if (block_group && !list_empty(&block_group->cluster_list)) { 917 cluster = list_entry(block_group->cluster_list.next, 918 struct btrfs_free_cluster, 919 block_group_list); 920 } 921 922 if (!node && cluster) { 923 cluster_locked = cluster; 924 spin_lock(&cluster_locked->lock); 925 node = rb_first(&cluster->root); 926 cluster = NULL; 927 } 928 929 /* Write out the extent entries */ 930 while (node) { 931 struct btrfs_free_space *e; 932 933 e = rb_entry(node, struct btrfs_free_space, offset_index); 934 *entries += 1; 935 936 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 937 e->bitmap); 938 if (ret) 939 goto fail; 940 941 if (e->bitmap) { 942 list_add_tail(&e->list, bitmap_list); 943 *bitmaps += 1; 944 } 945 node = rb_next(node); 946 if (!node && cluster) { 947 node = rb_first(&cluster->root); 948 cluster_locked = cluster; 949 spin_lock(&cluster_locked->lock); 950 cluster = NULL; 951 } 952 } 953 if (cluster_locked) { 954 spin_unlock(&cluster_locked->lock); 955 cluster_locked = NULL; 956 } 957 958 /* 959 * Make sure we don't miss any range that was removed from our rbtree 960 * because trimming is running. Otherwise after a umount+mount (or crash 961 * after committing the transaction) we would leak free space and get 962 * an inconsistent free space cache report from fsck. 963 */ 964 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 965 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 966 trim_entry->bytes, NULL); 967 if (ret) 968 goto fail; 969 *entries += 1; 970 } 971 972 return 0; 973 fail: 974 if (cluster_locked) 975 spin_unlock(&cluster_locked->lock); 976 return -ENOSPC; 977 } 978 979 static noinline_for_stack int 980 update_cache_item(struct btrfs_trans_handle *trans, 981 struct btrfs_root *root, 982 struct inode *inode, 983 struct btrfs_path *path, u64 offset, 984 int entries, int bitmaps) 985 { 986 struct btrfs_key key; 987 struct btrfs_free_space_header *header; 988 struct extent_buffer *leaf; 989 int ret; 990 991 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 992 key.offset = offset; 993 key.type = 0; 994 995 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 996 if (ret < 0) { 997 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 998 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 999 GFP_NOFS); 1000 goto fail; 1001 } 1002 leaf = path->nodes[0]; 1003 if (ret > 0) { 1004 struct btrfs_key found_key; 1005 ASSERT(path->slots[0]); 1006 path->slots[0]--; 1007 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1008 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1009 found_key.offset != offset) { 1010 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1011 inode->i_size - 1, 1012 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1013 NULL, GFP_NOFS); 1014 btrfs_release_path(path); 1015 goto fail; 1016 } 1017 } 1018 1019 BTRFS_I(inode)->generation = trans->transid; 1020 header = btrfs_item_ptr(leaf, path->slots[0], 1021 struct btrfs_free_space_header); 1022 btrfs_set_free_space_entries(leaf, header, entries); 1023 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1024 btrfs_set_free_space_generation(leaf, header, trans->transid); 1025 btrfs_mark_buffer_dirty(leaf); 1026 btrfs_release_path(path); 1027 1028 return 0; 1029 1030 fail: 1031 return -1; 1032 } 1033 1034 static noinline_for_stack int 1035 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1036 struct btrfs_block_group_cache *block_group, 1037 struct btrfs_io_ctl *io_ctl, 1038 int *entries) 1039 { 1040 u64 start, extent_start, extent_end, len; 1041 struct extent_io_tree *unpin = NULL; 1042 int ret; 1043 1044 if (!block_group) 1045 return 0; 1046 1047 /* 1048 * We want to add any pinned extents to our free space cache 1049 * so we don't leak the space 1050 * 1051 * We shouldn't have switched the pinned extents yet so this is the 1052 * right one 1053 */ 1054 unpin = fs_info->pinned_extents; 1055 1056 start = block_group->key.objectid; 1057 1058 while (start < block_group->key.objectid + block_group->key.offset) { 1059 ret = find_first_extent_bit(unpin, start, 1060 &extent_start, &extent_end, 1061 EXTENT_DIRTY, NULL); 1062 if (ret) 1063 return 0; 1064 1065 /* This pinned extent is out of our range */ 1066 if (extent_start >= block_group->key.objectid + 1067 block_group->key.offset) 1068 return 0; 1069 1070 extent_start = max(extent_start, start); 1071 extent_end = min(block_group->key.objectid + 1072 block_group->key.offset, extent_end + 1); 1073 len = extent_end - extent_start; 1074 1075 *entries += 1; 1076 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1077 if (ret) 1078 return -ENOSPC; 1079 1080 start = extent_end; 1081 } 1082 1083 return 0; 1084 } 1085 1086 static noinline_for_stack int 1087 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1088 { 1089 struct btrfs_free_space *entry, *next; 1090 int ret; 1091 1092 /* Write out the bitmaps */ 1093 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1094 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1095 if (ret) 1096 return -ENOSPC; 1097 list_del_init(&entry->list); 1098 } 1099 1100 return 0; 1101 } 1102 1103 static int flush_dirty_cache(struct inode *inode) 1104 { 1105 int ret; 1106 1107 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1108 if (ret) 1109 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1110 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1111 GFP_NOFS); 1112 1113 return ret; 1114 } 1115 1116 static void noinline_for_stack 1117 cleanup_bitmap_list(struct list_head *bitmap_list) 1118 { 1119 struct btrfs_free_space *entry, *next; 1120 1121 list_for_each_entry_safe(entry, next, bitmap_list, list) 1122 list_del_init(&entry->list); 1123 } 1124 1125 static void noinline_for_stack 1126 cleanup_write_cache_enospc(struct inode *inode, 1127 struct btrfs_io_ctl *io_ctl, 1128 struct extent_state **cached_state) 1129 { 1130 io_ctl_drop_pages(io_ctl); 1131 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1132 i_size_read(inode) - 1, cached_state, 1133 GFP_NOFS); 1134 } 1135 1136 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1137 struct btrfs_trans_handle *trans, 1138 struct btrfs_block_group_cache *block_group, 1139 struct btrfs_io_ctl *io_ctl, 1140 struct btrfs_path *path, u64 offset) 1141 { 1142 int ret; 1143 struct inode *inode = io_ctl->inode; 1144 struct btrfs_fs_info *fs_info; 1145 1146 if (!inode) 1147 return 0; 1148 1149 fs_info = btrfs_sb(inode->i_sb); 1150 1151 /* Flush the dirty pages in the cache file. */ 1152 ret = flush_dirty_cache(inode); 1153 if (ret) 1154 goto out; 1155 1156 /* Update the cache item to tell everyone this cache file is valid. */ 1157 ret = update_cache_item(trans, root, inode, path, offset, 1158 io_ctl->entries, io_ctl->bitmaps); 1159 out: 1160 io_ctl_free(io_ctl); 1161 if (ret) { 1162 invalidate_inode_pages2(inode->i_mapping); 1163 BTRFS_I(inode)->generation = 0; 1164 if (block_group) { 1165 #ifdef DEBUG 1166 btrfs_err(fs_info, 1167 "failed to write free space cache for block group %llu", 1168 block_group->key.objectid); 1169 #endif 1170 } 1171 } 1172 btrfs_update_inode(trans, root, inode); 1173 1174 if (block_group) { 1175 /* the dirty list is protected by the dirty_bgs_lock */ 1176 spin_lock(&trans->transaction->dirty_bgs_lock); 1177 1178 /* the disk_cache_state is protected by the block group lock */ 1179 spin_lock(&block_group->lock); 1180 1181 /* 1182 * only mark this as written if we didn't get put back on 1183 * the dirty list while waiting for IO. Otherwise our 1184 * cache state won't be right, and we won't get written again 1185 */ 1186 if (!ret && list_empty(&block_group->dirty_list)) 1187 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1188 else if (ret) 1189 block_group->disk_cache_state = BTRFS_DC_ERROR; 1190 1191 spin_unlock(&block_group->lock); 1192 spin_unlock(&trans->transaction->dirty_bgs_lock); 1193 io_ctl->inode = NULL; 1194 iput(inode); 1195 } 1196 1197 return ret; 1198 1199 } 1200 1201 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1202 struct btrfs_trans_handle *trans, 1203 struct btrfs_io_ctl *io_ctl, 1204 struct btrfs_path *path) 1205 { 1206 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1207 } 1208 1209 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1210 struct btrfs_block_group_cache *block_group, 1211 struct btrfs_path *path) 1212 { 1213 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1214 block_group, &block_group->io_ctl, 1215 path, block_group->key.objectid); 1216 } 1217 1218 /** 1219 * __btrfs_write_out_cache - write out cached info to an inode 1220 * @root - the root the inode belongs to 1221 * @ctl - the free space cache we are going to write out 1222 * @block_group - the block_group for this cache if it belongs to a block_group 1223 * @trans - the trans handle 1224 * 1225 * This function writes out a free space cache struct to disk for quick recovery 1226 * on mount. This will return 0 if it was successful in writing the cache out, 1227 * or an errno if it was not. 1228 */ 1229 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1230 struct btrfs_free_space_ctl *ctl, 1231 struct btrfs_block_group_cache *block_group, 1232 struct btrfs_io_ctl *io_ctl, 1233 struct btrfs_trans_handle *trans) 1234 { 1235 struct btrfs_fs_info *fs_info = root->fs_info; 1236 struct extent_state *cached_state = NULL; 1237 LIST_HEAD(bitmap_list); 1238 int entries = 0; 1239 int bitmaps = 0; 1240 int ret; 1241 int must_iput = 0; 1242 1243 if (!i_size_read(inode)) 1244 return -EIO; 1245 1246 WARN_ON(io_ctl->pages); 1247 ret = io_ctl_init(io_ctl, inode, 1); 1248 if (ret) 1249 return ret; 1250 1251 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1252 down_write(&block_group->data_rwsem); 1253 spin_lock(&block_group->lock); 1254 if (block_group->delalloc_bytes) { 1255 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1256 spin_unlock(&block_group->lock); 1257 up_write(&block_group->data_rwsem); 1258 BTRFS_I(inode)->generation = 0; 1259 ret = 0; 1260 must_iput = 1; 1261 goto out; 1262 } 1263 spin_unlock(&block_group->lock); 1264 } 1265 1266 /* Lock all pages first so we can lock the extent safely. */ 1267 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1268 if (ret) 1269 goto out_unlock; 1270 1271 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1272 &cached_state); 1273 1274 io_ctl_set_generation(io_ctl, trans->transid); 1275 1276 mutex_lock(&ctl->cache_writeout_mutex); 1277 /* Write out the extent entries in the free space cache */ 1278 spin_lock(&ctl->tree_lock); 1279 ret = write_cache_extent_entries(io_ctl, ctl, 1280 block_group, &entries, &bitmaps, 1281 &bitmap_list); 1282 if (ret) 1283 goto out_nospc_locked; 1284 1285 /* 1286 * Some spaces that are freed in the current transaction are pinned, 1287 * they will be added into free space cache after the transaction is 1288 * committed, we shouldn't lose them. 1289 * 1290 * If this changes while we are working we'll get added back to 1291 * the dirty list and redo it. No locking needed 1292 */ 1293 ret = write_pinned_extent_entries(fs_info, block_group, 1294 io_ctl, &entries); 1295 if (ret) 1296 goto out_nospc_locked; 1297 1298 /* 1299 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1300 * locked while doing it because a concurrent trim can be manipulating 1301 * or freeing the bitmap. 1302 */ 1303 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1304 spin_unlock(&ctl->tree_lock); 1305 mutex_unlock(&ctl->cache_writeout_mutex); 1306 if (ret) 1307 goto out_nospc; 1308 1309 /* Zero out the rest of the pages just to make sure */ 1310 io_ctl_zero_remaining_pages(io_ctl); 1311 1312 /* Everything is written out, now we dirty the pages in the file. */ 1313 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1314 i_size_read(inode), &cached_state); 1315 if (ret) 1316 goto out_nospc; 1317 1318 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1319 up_write(&block_group->data_rwsem); 1320 /* 1321 * Release the pages and unlock the extent, we will flush 1322 * them out later 1323 */ 1324 io_ctl_drop_pages(io_ctl); 1325 1326 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1327 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1328 1329 /* 1330 * at this point the pages are under IO and we're happy, 1331 * The caller is responsible for waiting on them and updating the 1332 * the cache and the inode 1333 */ 1334 io_ctl->entries = entries; 1335 io_ctl->bitmaps = bitmaps; 1336 1337 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1338 if (ret) 1339 goto out; 1340 1341 return 0; 1342 1343 out: 1344 io_ctl->inode = NULL; 1345 io_ctl_free(io_ctl); 1346 if (ret) { 1347 invalidate_inode_pages2(inode->i_mapping); 1348 BTRFS_I(inode)->generation = 0; 1349 } 1350 btrfs_update_inode(trans, root, inode); 1351 if (must_iput) 1352 iput(inode); 1353 return ret; 1354 1355 out_nospc_locked: 1356 cleanup_bitmap_list(&bitmap_list); 1357 spin_unlock(&ctl->tree_lock); 1358 mutex_unlock(&ctl->cache_writeout_mutex); 1359 1360 out_nospc: 1361 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1362 1363 out_unlock: 1364 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1365 up_write(&block_group->data_rwsem); 1366 1367 goto out; 1368 } 1369 1370 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1371 struct btrfs_trans_handle *trans, 1372 struct btrfs_block_group_cache *block_group, 1373 struct btrfs_path *path) 1374 { 1375 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1376 struct inode *inode; 1377 int ret = 0; 1378 1379 spin_lock(&block_group->lock); 1380 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1381 spin_unlock(&block_group->lock); 1382 return 0; 1383 } 1384 spin_unlock(&block_group->lock); 1385 1386 inode = lookup_free_space_inode(fs_info, block_group, path); 1387 if (IS_ERR(inode)) 1388 return 0; 1389 1390 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1391 block_group, &block_group->io_ctl, trans); 1392 if (ret) { 1393 #ifdef DEBUG 1394 btrfs_err(fs_info, 1395 "failed to write free space cache for block group %llu", 1396 block_group->key.objectid); 1397 #endif 1398 spin_lock(&block_group->lock); 1399 block_group->disk_cache_state = BTRFS_DC_ERROR; 1400 spin_unlock(&block_group->lock); 1401 1402 block_group->io_ctl.inode = NULL; 1403 iput(inode); 1404 } 1405 1406 /* 1407 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1408 * to wait for IO and put the inode 1409 */ 1410 1411 return ret; 1412 } 1413 1414 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1415 u64 offset) 1416 { 1417 ASSERT(offset >= bitmap_start); 1418 offset -= bitmap_start; 1419 return (unsigned long)(div_u64(offset, unit)); 1420 } 1421 1422 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1423 { 1424 return (unsigned long)(div_u64(bytes, unit)); 1425 } 1426 1427 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1428 u64 offset) 1429 { 1430 u64 bitmap_start; 1431 u64 bytes_per_bitmap; 1432 1433 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1434 bitmap_start = offset - ctl->start; 1435 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1436 bitmap_start *= bytes_per_bitmap; 1437 bitmap_start += ctl->start; 1438 1439 return bitmap_start; 1440 } 1441 1442 static int tree_insert_offset(struct rb_root *root, u64 offset, 1443 struct rb_node *node, int bitmap) 1444 { 1445 struct rb_node **p = &root->rb_node; 1446 struct rb_node *parent = NULL; 1447 struct btrfs_free_space *info; 1448 1449 while (*p) { 1450 parent = *p; 1451 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1452 1453 if (offset < info->offset) { 1454 p = &(*p)->rb_left; 1455 } else if (offset > info->offset) { 1456 p = &(*p)->rb_right; 1457 } else { 1458 /* 1459 * we could have a bitmap entry and an extent entry 1460 * share the same offset. If this is the case, we want 1461 * the extent entry to always be found first if we do a 1462 * linear search through the tree, since we want to have 1463 * the quickest allocation time, and allocating from an 1464 * extent is faster than allocating from a bitmap. So 1465 * if we're inserting a bitmap and we find an entry at 1466 * this offset, we want to go right, or after this entry 1467 * logically. If we are inserting an extent and we've 1468 * found a bitmap, we want to go left, or before 1469 * logically. 1470 */ 1471 if (bitmap) { 1472 if (info->bitmap) { 1473 WARN_ON_ONCE(1); 1474 return -EEXIST; 1475 } 1476 p = &(*p)->rb_right; 1477 } else { 1478 if (!info->bitmap) { 1479 WARN_ON_ONCE(1); 1480 return -EEXIST; 1481 } 1482 p = &(*p)->rb_left; 1483 } 1484 } 1485 } 1486 1487 rb_link_node(node, parent, p); 1488 rb_insert_color(node, root); 1489 1490 return 0; 1491 } 1492 1493 /* 1494 * searches the tree for the given offset. 1495 * 1496 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1497 * want a section that has at least bytes size and comes at or after the given 1498 * offset. 1499 */ 1500 static struct btrfs_free_space * 1501 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1502 u64 offset, int bitmap_only, int fuzzy) 1503 { 1504 struct rb_node *n = ctl->free_space_offset.rb_node; 1505 struct btrfs_free_space *entry, *prev = NULL; 1506 1507 /* find entry that is closest to the 'offset' */ 1508 while (1) { 1509 if (!n) { 1510 entry = NULL; 1511 break; 1512 } 1513 1514 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1515 prev = entry; 1516 1517 if (offset < entry->offset) 1518 n = n->rb_left; 1519 else if (offset > entry->offset) 1520 n = n->rb_right; 1521 else 1522 break; 1523 } 1524 1525 if (bitmap_only) { 1526 if (!entry) 1527 return NULL; 1528 if (entry->bitmap) 1529 return entry; 1530 1531 /* 1532 * bitmap entry and extent entry may share same offset, 1533 * in that case, bitmap entry comes after extent entry. 1534 */ 1535 n = rb_next(n); 1536 if (!n) 1537 return NULL; 1538 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1539 if (entry->offset != offset) 1540 return NULL; 1541 1542 WARN_ON(!entry->bitmap); 1543 return entry; 1544 } else if (entry) { 1545 if (entry->bitmap) { 1546 /* 1547 * if previous extent entry covers the offset, 1548 * we should return it instead of the bitmap entry 1549 */ 1550 n = rb_prev(&entry->offset_index); 1551 if (n) { 1552 prev = rb_entry(n, struct btrfs_free_space, 1553 offset_index); 1554 if (!prev->bitmap && 1555 prev->offset + prev->bytes > offset) 1556 entry = prev; 1557 } 1558 } 1559 return entry; 1560 } 1561 1562 if (!prev) 1563 return NULL; 1564 1565 /* find last entry before the 'offset' */ 1566 entry = prev; 1567 if (entry->offset > offset) { 1568 n = rb_prev(&entry->offset_index); 1569 if (n) { 1570 entry = rb_entry(n, struct btrfs_free_space, 1571 offset_index); 1572 ASSERT(entry->offset <= offset); 1573 } else { 1574 if (fuzzy) 1575 return entry; 1576 else 1577 return NULL; 1578 } 1579 } 1580 1581 if (entry->bitmap) { 1582 n = rb_prev(&entry->offset_index); 1583 if (n) { 1584 prev = rb_entry(n, struct btrfs_free_space, 1585 offset_index); 1586 if (!prev->bitmap && 1587 prev->offset + prev->bytes > offset) 1588 return prev; 1589 } 1590 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1591 return entry; 1592 } else if (entry->offset + entry->bytes > offset) 1593 return entry; 1594 1595 if (!fuzzy) 1596 return NULL; 1597 1598 while (1) { 1599 if (entry->bitmap) { 1600 if (entry->offset + BITS_PER_BITMAP * 1601 ctl->unit > offset) 1602 break; 1603 } else { 1604 if (entry->offset + entry->bytes > offset) 1605 break; 1606 } 1607 1608 n = rb_next(&entry->offset_index); 1609 if (!n) 1610 return NULL; 1611 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1612 } 1613 return entry; 1614 } 1615 1616 static inline void 1617 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1618 struct btrfs_free_space *info) 1619 { 1620 rb_erase(&info->offset_index, &ctl->free_space_offset); 1621 ctl->free_extents--; 1622 } 1623 1624 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1625 struct btrfs_free_space *info) 1626 { 1627 __unlink_free_space(ctl, info); 1628 ctl->free_space -= info->bytes; 1629 } 1630 1631 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1632 struct btrfs_free_space *info) 1633 { 1634 int ret = 0; 1635 1636 ASSERT(info->bytes || info->bitmap); 1637 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1638 &info->offset_index, (info->bitmap != NULL)); 1639 if (ret) 1640 return ret; 1641 1642 ctl->free_space += info->bytes; 1643 ctl->free_extents++; 1644 return ret; 1645 } 1646 1647 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1648 { 1649 struct btrfs_block_group_cache *block_group = ctl->private; 1650 u64 max_bytes; 1651 u64 bitmap_bytes; 1652 u64 extent_bytes; 1653 u64 size = block_group->key.offset; 1654 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1655 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1656 1657 max_bitmaps = max_t(u64, max_bitmaps, 1); 1658 1659 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1660 1661 /* 1662 * The goal is to keep the total amount of memory used per 1gb of space 1663 * at or below 32k, so we need to adjust how much memory we allow to be 1664 * used by extent based free space tracking 1665 */ 1666 if (size < SZ_1G) 1667 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1668 else 1669 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1670 1671 /* 1672 * we want to account for 1 more bitmap than what we have so we can make 1673 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1674 * we add more bitmaps. 1675 */ 1676 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1677 1678 if (bitmap_bytes >= max_bytes) { 1679 ctl->extents_thresh = 0; 1680 return; 1681 } 1682 1683 /* 1684 * we want the extent entry threshold to always be at most 1/2 the max 1685 * bytes we can have, or whatever is less than that. 1686 */ 1687 extent_bytes = max_bytes - bitmap_bytes; 1688 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1689 1690 ctl->extents_thresh = 1691 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1692 } 1693 1694 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1695 struct btrfs_free_space *info, 1696 u64 offset, u64 bytes) 1697 { 1698 unsigned long start, count; 1699 1700 start = offset_to_bit(info->offset, ctl->unit, offset); 1701 count = bytes_to_bits(bytes, ctl->unit); 1702 ASSERT(start + count <= BITS_PER_BITMAP); 1703 1704 bitmap_clear(info->bitmap, start, count); 1705 1706 info->bytes -= bytes; 1707 } 1708 1709 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1710 struct btrfs_free_space *info, u64 offset, 1711 u64 bytes) 1712 { 1713 __bitmap_clear_bits(ctl, info, offset, bytes); 1714 ctl->free_space -= bytes; 1715 } 1716 1717 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1718 struct btrfs_free_space *info, u64 offset, 1719 u64 bytes) 1720 { 1721 unsigned long start, count; 1722 1723 start = offset_to_bit(info->offset, ctl->unit, offset); 1724 count = bytes_to_bits(bytes, ctl->unit); 1725 ASSERT(start + count <= BITS_PER_BITMAP); 1726 1727 bitmap_set(info->bitmap, start, count); 1728 1729 info->bytes += bytes; 1730 ctl->free_space += bytes; 1731 } 1732 1733 /* 1734 * If we can not find suitable extent, we will use bytes to record 1735 * the size of the max extent. 1736 */ 1737 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1738 struct btrfs_free_space *bitmap_info, u64 *offset, 1739 u64 *bytes, bool for_alloc) 1740 { 1741 unsigned long found_bits = 0; 1742 unsigned long max_bits = 0; 1743 unsigned long bits, i; 1744 unsigned long next_zero; 1745 unsigned long extent_bits; 1746 1747 /* 1748 * Skip searching the bitmap if we don't have a contiguous section that 1749 * is large enough for this allocation. 1750 */ 1751 if (for_alloc && 1752 bitmap_info->max_extent_size && 1753 bitmap_info->max_extent_size < *bytes) { 1754 *bytes = bitmap_info->max_extent_size; 1755 return -1; 1756 } 1757 1758 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1759 max_t(u64, *offset, bitmap_info->offset)); 1760 bits = bytes_to_bits(*bytes, ctl->unit); 1761 1762 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1763 if (for_alloc && bits == 1) { 1764 found_bits = 1; 1765 break; 1766 } 1767 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1768 BITS_PER_BITMAP, i); 1769 extent_bits = next_zero - i; 1770 if (extent_bits >= bits) { 1771 found_bits = extent_bits; 1772 break; 1773 } else if (extent_bits > max_bits) { 1774 max_bits = extent_bits; 1775 } 1776 i = next_zero; 1777 } 1778 1779 if (found_bits) { 1780 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1781 *bytes = (u64)(found_bits) * ctl->unit; 1782 return 0; 1783 } 1784 1785 *bytes = (u64)(max_bits) * ctl->unit; 1786 bitmap_info->max_extent_size = *bytes; 1787 return -1; 1788 } 1789 1790 /* Cache the size of the max extent in bytes */ 1791 static struct btrfs_free_space * 1792 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1793 unsigned long align, u64 *max_extent_size) 1794 { 1795 struct btrfs_free_space *entry; 1796 struct rb_node *node; 1797 u64 tmp; 1798 u64 align_off; 1799 int ret; 1800 1801 if (!ctl->free_space_offset.rb_node) 1802 goto out; 1803 1804 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1805 if (!entry) 1806 goto out; 1807 1808 for (node = &entry->offset_index; node; node = rb_next(node)) { 1809 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1810 if (entry->bytes < *bytes) { 1811 if (entry->bytes > *max_extent_size) 1812 *max_extent_size = entry->bytes; 1813 continue; 1814 } 1815 1816 /* make sure the space returned is big enough 1817 * to match our requested alignment 1818 */ 1819 if (*bytes >= align) { 1820 tmp = entry->offset - ctl->start + align - 1; 1821 tmp = div64_u64(tmp, align); 1822 tmp = tmp * align + ctl->start; 1823 align_off = tmp - entry->offset; 1824 } else { 1825 align_off = 0; 1826 tmp = entry->offset; 1827 } 1828 1829 if (entry->bytes < *bytes + align_off) { 1830 if (entry->bytes > *max_extent_size) 1831 *max_extent_size = entry->bytes; 1832 continue; 1833 } 1834 1835 if (entry->bitmap) { 1836 u64 size = *bytes; 1837 1838 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1839 if (!ret) { 1840 *offset = tmp; 1841 *bytes = size; 1842 return entry; 1843 } else if (size > *max_extent_size) { 1844 *max_extent_size = size; 1845 } 1846 continue; 1847 } 1848 1849 *offset = tmp; 1850 *bytes = entry->bytes - align_off; 1851 return entry; 1852 } 1853 out: 1854 return NULL; 1855 } 1856 1857 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1858 struct btrfs_free_space *info, u64 offset) 1859 { 1860 info->offset = offset_to_bitmap(ctl, offset); 1861 info->bytes = 0; 1862 INIT_LIST_HEAD(&info->list); 1863 link_free_space(ctl, info); 1864 ctl->total_bitmaps++; 1865 1866 ctl->op->recalc_thresholds(ctl); 1867 } 1868 1869 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1870 struct btrfs_free_space *bitmap_info) 1871 { 1872 unlink_free_space(ctl, bitmap_info); 1873 kfree(bitmap_info->bitmap); 1874 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1875 ctl->total_bitmaps--; 1876 ctl->op->recalc_thresholds(ctl); 1877 } 1878 1879 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1880 struct btrfs_free_space *bitmap_info, 1881 u64 *offset, u64 *bytes) 1882 { 1883 u64 end; 1884 u64 search_start, search_bytes; 1885 int ret; 1886 1887 again: 1888 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1889 1890 /* 1891 * We need to search for bits in this bitmap. We could only cover some 1892 * of the extent in this bitmap thanks to how we add space, so we need 1893 * to search for as much as it as we can and clear that amount, and then 1894 * go searching for the next bit. 1895 */ 1896 search_start = *offset; 1897 search_bytes = ctl->unit; 1898 search_bytes = min(search_bytes, end - search_start + 1); 1899 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1900 false); 1901 if (ret < 0 || search_start != *offset) 1902 return -EINVAL; 1903 1904 /* We may have found more bits than what we need */ 1905 search_bytes = min(search_bytes, *bytes); 1906 1907 /* Cannot clear past the end of the bitmap */ 1908 search_bytes = min(search_bytes, end - search_start + 1); 1909 1910 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1911 *offset += search_bytes; 1912 *bytes -= search_bytes; 1913 1914 if (*bytes) { 1915 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1916 if (!bitmap_info->bytes) 1917 free_bitmap(ctl, bitmap_info); 1918 1919 /* 1920 * no entry after this bitmap, but we still have bytes to 1921 * remove, so something has gone wrong. 1922 */ 1923 if (!next) 1924 return -EINVAL; 1925 1926 bitmap_info = rb_entry(next, struct btrfs_free_space, 1927 offset_index); 1928 1929 /* 1930 * if the next entry isn't a bitmap we need to return to let the 1931 * extent stuff do its work. 1932 */ 1933 if (!bitmap_info->bitmap) 1934 return -EAGAIN; 1935 1936 /* 1937 * Ok the next item is a bitmap, but it may not actually hold 1938 * the information for the rest of this free space stuff, so 1939 * look for it, and if we don't find it return so we can try 1940 * everything over again. 1941 */ 1942 search_start = *offset; 1943 search_bytes = ctl->unit; 1944 ret = search_bitmap(ctl, bitmap_info, &search_start, 1945 &search_bytes, false); 1946 if (ret < 0 || search_start != *offset) 1947 return -EAGAIN; 1948 1949 goto again; 1950 } else if (!bitmap_info->bytes) 1951 free_bitmap(ctl, bitmap_info); 1952 1953 return 0; 1954 } 1955 1956 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1957 struct btrfs_free_space *info, u64 offset, 1958 u64 bytes) 1959 { 1960 u64 bytes_to_set = 0; 1961 u64 end; 1962 1963 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1964 1965 bytes_to_set = min(end - offset, bytes); 1966 1967 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1968 1969 /* 1970 * We set some bytes, we have no idea what the max extent size is 1971 * anymore. 1972 */ 1973 info->max_extent_size = 0; 1974 1975 return bytes_to_set; 1976 1977 } 1978 1979 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1980 struct btrfs_free_space *info) 1981 { 1982 struct btrfs_block_group_cache *block_group = ctl->private; 1983 struct btrfs_fs_info *fs_info = block_group->fs_info; 1984 bool forced = false; 1985 1986 #ifdef CONFIG_BTRFS_DEBUG 1987 if (btrfs_should_fragment_free_space(block_group)) 1988 forced = true; 1989 #endif 1990 1991 /* 1992 * If we are below the extents threshold then we can add this as an 1993 * extent, and don't have to deal with the bitmap 1994 */ 1995 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1996 /* 1997 * If this block group has some small extents we don't want to 1998 * use up all of our free slots in the cache with them, we want 1999 * to reserve them to larger extents, however if we have plenty 2000 * of cache left then go ahead an dadd them, no sense in adding 2001 * the overhead of a bitmap if we don't have to. 2002 */ 2003 if (info->bytes <= fs_info->sectorsize * 4) { 2004 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2005 return false; 2006 } else { 2007 return false; 2008 } 2009 } 2010 2011 /* 2012 * The original block groups from mkfs can be really small, like 8 2013 * megabytes, so don't bother with a bitmap for those entries. However 2014 * some block groups can be smaller than what a bitmap would cover but 2015 * are still large enough that they could overflow the 32k memory limit, 2016 * so allow those block groups to still be allowed to have a bitmap 2017 * entry. 2018 */ 2019 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2020 return false; 2021 2022 return true; 2023 } 2024 2025 static const struct btrfs_free_space_op free_space_op = { 2026 .recalc_thresholds = recalculate_thresholds, 2027 .use_bitmap = use_bitmap, 2028 }; 2029 2030 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2031 struct btrfs_free_space *info) 2032 { 2033 struct btrfs_free_space *bitmap_info; 2034 struct btrfs_block_group_cache *block_group = NULL; 2035 int added = 0; 2036 u64 bytes, offset, bytes_added; 2037 int ret; 2038 2039 bytes = info->bytes; 2040 offset = info->offset; 2041 2042 if (!ctl->op->use_bitmap(ctl, info)) 2043 return 0; 2044 2045 if (ctl->op == &free_space_op) 2046 block_group = ctl->private; 2047 again: 2048 /* 2049 * Since we link bitmaps right into the cluster we need to see if we 2050 * have a cluster here, and if so and it has our bitmap we need to add 2051 * the free space to that bitmap. 2052 */ 2053 if (block_group && !list_empty(&block_group->cluster_list)) { 2054 struct btrfs_free_cluster *cluster; 2055 struct rb_node *node; 2056 struct btrfs_free_space *entry; 2057 2058 cluster = list_entry(block_group->cluster_list.next, 2059 struct btrfs_free_cluster, 2060 block_group_list); 2061 spin_lock(&cluster->lock); 2062 node = rb_first(&cluster->root); 2063 if (!node) { 2064 spin_unlock(&cluster->lock); 2065 goto no_cluster_bitmap; 2066 } 2067 2068 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2069 if (!entry->bitmap) { 2070 spin_unlock(&cluster->lock); 2071 goto no_cluster_bitmap; 2072 } 2073 2074 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2075 bytes_added = add_bytes_to_bitmap(ctl, entry, 2076 offset, bytes); 2077 bytes -= bytes_added; 2078 offset += bytes_added; 2079 } 2080 spin_unlock(&cluster->lock); 2081 if (!bytes) { 2082 ret = 1; 2083 goto out; 2084 } 2085 } 2086 2087 no_cluster_bitmap: 2088 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2089 1, 0); 2090 if (!bitmap_info) { 2091 ASSERT(added == 0); 2092 goto new_bitmap; 2093 } 2094 2095 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2096 bytes -= bytes_added; 2097 offset += bytes_added; 2098 added = 0; 2099 2100 if (!bytes) { 2101 ret = 1; 2102 goto out; 2103 } else 2104 goto again; 2105 2106 new_bitmap: 2107 if (info && info->bitmap) { 2108 add_new_bitmap(ctl, info, offset); 2109 added = 1; 2110 info = NULL; 2111 goto again; 2112 } else { 2113 spin_unlock(&ctl->tree_lock); 2114 2115 /* no pre-allocated info, allocate a new one */ 2116 if (!info) { 2117 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2118 GFP_NOFS); 2119 if (!info) { 2120 spin_lock(&ctl->tree_lock); 2121 ret = -ENOMEM; 2122 goto out; 2123 } 2124 } 2125 2126 /* allocate the bitmap */ 2127 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2128 spin_lock(&ctl->tree_lock); 2129 if (!info->bitmap) { 2130 ret = -ENOMEM; 2131 goto out; 2132 } 2133 goto again; 2134 } 2135 2136 out: 2137 if (info) { 2138 if (info->bitmap) 2139 kfree(info->bitmap); 2140 kmem_cache_free(btrfs_free_space_cachep, info); 2141 } 2142 2143 return ret; 2144 } 2145 2146 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2147 struct btrfs_free_space *info, bool update_stat) 2148 { 2149 struct btrfs_free_space *left_info; 2150 struct btrfs_free_space *right_info; 2151 bool merged = false; 2152 u64 offset = info->offset; 2153 u64 bytes = info->bytes; 2154 2155 /* 2156 * first we want to see if there is free space adjacent to the range we 2157 * are adding, if there is remove that struct and add a new one to 2158 * cover the entire range 2159 */ 2160 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2161 if (right_info && rb_prev(&right_info->offset_index)) 2162 left_info = rb_entry(rb_prev(&right_info->offset_index), 2163 struct btrfs_free_space, offset_index); 2164 else 2165 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2166 2167 if (right_info && !right_info->bitmap) { 2168 if (update_stat) 2169 unlink_free_space(ctl, right_info); 2170 else 2171 __unlink_free_space(ctl, right_info); 2172 info->bytes += right_info->bytes; 2173 kmem_cache_free(btrfs_free_space_cachep, right_info); 2174 merged = true; 2175 } 2176 2177 if (left_info && !left_info->bitmap && 2178 left_info->offset + left_info->bytes == offset) { 2179 if (update_stat) 2180 unlink_free_space(ctl, left_info); 2181 else 2182 __unlink_free_space(ctl, left_info); 2183 info->offset = left_info->offset; 2184 info->bytes += left_info->bytes; 2185 kmem_cache_free(btrfs_free_space_cachep, left_info); 2186 merged = true; 2187 } 2188 2189 return merged; 2190 } 2191 2192 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2193 struct btrfs_free_space *info, 2194 bool update_stat) 2195 { 2196 struct btrfs_free_space *bitmap; 2197 unsigned long i; 2198 unsigned long j; 2199 const u64 end = info->offset + info->bytes; 2200 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2201 u64 bytes; 2202 2203 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2204 if (!bitmap) 2205 return false; 2206 2207 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2208 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2209 if (j == i) 2210 return false; 2211 bytes = (j - i) * ctl->unit; 2212 info->bytes += bytes; 2213 2214 if (update_stat) 2215 bitmap_clear_bits(ctl, bitmap, end, bytes); 2216 else 2217 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2218 2219 if (!bitmap->bytes) 2220 free_bitmap(ctl, bitmap); 2221 2222 return true; 2223 } 2224 2225 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2226 struct btrfs_free_space *info, 2227 bool update_stat) 2228 { 2229 struct btrfs_free_space *bitmap; 2230 u64 bitmap_offset; 2231 unsigned long i; 2232 unsigned long j; 2233 unsigned long prev_j; 2234 u64 bytes; 2235 2236 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2237 /* If we're on a boundary, try the previous logical bitmap. */ 2238 if (bitmap_offset == info->offset) { 2239 if (info->offset == 0) 2240 return false; 2241 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2242 } 2243 2244 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2245 if (!bitmap) 2246 return false; 2247 2248 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2249 j = 0; 2250 prev_j = (unsigned long)-1; 2251 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2252 if (j > i) 2253 break; 2254 prev_j = j; 2255 } 2256 if (prev_j == i) 2257 return false; 2258 2259 if (prev_j == (unsigned long)-1) 2260 bytes = (i + 1) * ctl->unit; 2261 else 2262 bytes = (i - prev_j) * ctl->unit; 2263 2264 info->offset -= bytes; 2265 info->bytes += bytes; 2266 2267 if (update_stat) 2268 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2269 else 2270 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2271 2272 if (!bitmap->bytes) 2273 free_bitmap(ctl, bitmap); 2274 2275 return true; 2276 } 2277 2278 /* 2279 * We prefer always to allocate from extent entries, both for clustered and 2280 * non-clustered allocation requests. So when attempting to add a new extent 2281 * entry, try to see if there's adjacent free space in bitmap entries, and if 2282 * there is, migrate that space from the bitmaps to the extent. 2283 * Like this we get better chances of satisfying space allocation requests 2284 * because we attempt to satisfy them based on a single cache entry, and never 2285 * on 2 or more entries - even if the entries represent a contiguous free space 2286 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2287 * ends). 2288 */ 2289 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2290 struct btrfs_free_space *info, 2291 bool update_stat) 2292 { 2293 /* 2294 * Only work with disconnected entries, as we can change their offset, 2295 * and must be extent entries. 2296 */ 2297 ASSERT(!info->bitmap); 2298 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2299 2300 if (ctl->total_bitmaps > 0) { 2301 bool stole_end; 2302 bool stole_front = false; 2303 2304 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2305 if (ctl->total_bitmaps > 0) 2306 stole_front = steal_from_bitmap_to_front(ctl, info, 2307 update_stat); 2308 2309 if (stole_end || stole_front) 2310 try_merge_free_space(ctl, info, update_stat); 2311 } 2312 } 2313 2314 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2315 struct btrfs_free_space_ctl *ctl, 2316 u64 offset, u64 bytes) 2317 { 2318 struct btrfs_free_space *info; 2319 int ret = 0; 2320 2321 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2322 if (!info) 2323 return -ENOMEM; 2324 2325 info->offset = offset; 2326 info->bytes = bytes; 2327 RB_CLEAR_NODE(&info->offset_index); 2328 2329 spin_lock(&ctl->tree_lock); 2330 2331 if (try_merge_free_space(ctl, info, true)) 2332 goto link; 2333 2334 /* 2335 * There was no extent directly to the left or right of this new 2336 * extent then we know we're going to have to allocate a new extent, so 2337 * before we do that see if we need to drop this into a bitmap 2338 */ 2339 ret = insert_into_bitmap(ctl, info); 2340 if (ret < 0) { 2341 goto out; 2342 } else if (ret) { 2343 ret = 0; 2344 goto out; 2345 } 2346 link: 2347 /* 2348 * Only steal free space from adjacent bitmaps if we're sure we're not 2349 * going to add the new free space to existing bitmap entries - because 2350 * that would mean unnecessary work that would be reverted. Therefore 2351 * attempt to steal space from bitmaps if we're adding an extent entry. 2352 */ 2353 steal_from_bitmap(ctl, info, true); 2354 2355 ret = link_free_space(ctl, info); 2356 if (ret) 2357 kmem_cache_free(btrfs_free_space_cachep, info); 2358 out: 2359 spin_unlock(&ctl->tree_lock); 2360 2361 if (ret) { 2362 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2363 ASSERT(ret != -EEXIST); 2364 } 2365 2366 return ret; 2367 } 2368 2369 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2370 u64 offset, u64 bytes) 2371 { 2372 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2373 struct btrfs_free_space *info; 2374 int ret; 2375 bool re_search = false; 2376 2377 spin_lock(&ctl->tree_lock); 2378 2379 again: 2380 ret = 0; 2381 if (!bytes) 2382 goto out_lock; 2383 2384 info = tree_search_offset(ctl, offset, 0, 0); 2385 if (!info) { 2386 /* 2387 * oops didn't find an extent that matched the space we wanted 2388 * to remove, look for a bitmap instead 2389 */ 2390 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2391 1, 0); 2392 if (!info) { 2393 /* 2394 * If we found a partial bit of our free space in a 2395 * bitmap but then couldn't find the other part this may 2396 * be a problem, so WARN about it. 2397 */ 2398 WARN_ON(re_search); 2399 goto out_lock; 2400 } 2401 } 2402 2403 re_search = false; 2404 if (!info->bitmap) { 2405 unlink_free_space(ctl, info); 2406 if (offset == info->offset) { 2407 u64 to_free = min(bytes, info->bytes); 2408 2409 info->bytes -= to_free; 2410 info->offset += to_free; 2411 if (info->bytes) { 2412 ret = link_free_space(ctl, info); 2413 WARN_ON(ret); 2414 } else { 2415 kmem_cache_free(btrfs_free_space_cachep, info); 2416 } 2417 2418 offset += to_free; 2419 bytes -= to_free; 2420 goto again; 2421 } else { 2422 u64 old_end = info->bytes + info->offset; 2423 2424 info->bytes = offset - info->offset; 2425 ret = link_free_space(ctl, info); 2426 WARN_ON(ret); 2427 if (ret) 2428 goto out_lock; 2429 2430 /* Not enough bytes in this entry to satisfy us */ 2431 if (old_end < offset + bytes) { 2432 bytes -= old_end - offset; 2433 offset = old_end; 2434 goto again; 2435 } else if (old_end == offset + bytes) { 2436 /* all done */ 2437 goto out_lock; 2438 } 2439 spin_unlock(&ctl->tree_lock); 2440 2441 ret = btrfs_add_free_space(block_group, offset + bytes, 2442 old_end - (offset + bytes)); 2443 WARN_ON(ret); 2444 goto out; 2445 } 2446 } 2447 2448 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2449 if (ret == -EAGAIN) { 2450 re_search = true; 2451 goto again; 2452 } 2453 out_lock: 2454 spin_unlock(&ctl->tree_lock); 2455 out: 2456 return ret; 2457 } 2458 2459 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2460 u64 bytes) 2461 { 2462 struct btrfs_fs_info *fs_info = block_group->fs_info; 2463 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2464 struct btrfs_free_space *info; 2465 struct rb_node *n; 2466 int count = 0; 2467 2468 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2469 info = rb_entry(n, struct btrfs_free_space, offset_index); 2470 if (info->bytes >= bytes && !block_group->ro) 2471 count++; 2472 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2473 info->offset, info->bytes, 2474 (info->bitmap) ? "yes" : "no"); 2475 } 2476 btrfs_info(fs_info, "block group has cluster?: %s", 2477 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2478 btrfs_info(fs_info, 2479 "%d blocks of free space at or bigger than bytes is", count); 2480 } 2481 2482 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2483 { 2484 struct btrfs_fs_info *fs_info = block_group->fs_info; 2485 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2486 2487 spin_lock_init(&ctl->tree_lock); 2488 ctl->unit = fs_info->sectorsize; 2489 ctl->start = block_group->key.objectid; 2490 ctl->private = block_group; 2491 ctl->op = &free_space_op; 2492 INIT_LIST_HEAD(&ctl->trimming_ranges); 2493 mutex_init(&ctl->cache_writeout_mutex); 2494 2495 /* 2496 * we only want to have 32k of ram per block group for keeping 2497 * track of free space, and if we pass 1/2 of that we want to 2498 * start converting things over to using bitmaps 2499 */ 2500 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2501 } 2502 2503 /* 2504 * for a given cluster, put all of its extents back into the free 2505 * space cache. If the block group passed doesn't match the block group 2506 * pointed to by the cluster, someone else raced in and freed the 2507 * cluster already. In that case, we just return without changing anything 2508 */ 2509 static int 2510 __btrfs_return_cluster_to_free_space( 2511 struct btrfs_block_group_cache *block_group, 2512 struct btrfs_free_cluster *cluster) 2513 { 2514 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2515 struct btrfs_free_space *entry; 2516 struct rb_node *node; 2517 2518 spin_lock(&cluster->lock); 2519 if (cluster->block_group != block_group) 2520 goto out; 2521 2522 cluster->block_group = NULL; 2523 cluster->window_start = 0; 2524 list_del_init(&cluster->block_group_list); 2525 2526 node = rb_first(&cluster->root); 2527 while (node) { 2528 bool bitmap; 2529 2530 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2531 node = rb_next(&entry->offset_index); 2532 rb_erase(&entry->offset_index, &cluster->root); 2533 RB_CLEAR_NODE(&entry->offset_index); 2534 2535 bitmap = (entry->bitmap != NULL); 2536 if (!bitmap) { 2537 try_merge_free_space(ctl, entry, false); 2538 steal_from_bitmap(ctl, entry, false); 2539 } 2540 tree_insert_offset(&ctl->free_space_offset, 2541 entry->offset, &entry->offset_index, bitmap); 2542 } 2543 cluster->root = RB_ROOT; 2544 2545 out: 2546 spin_unlock(&cluster->lock); 2547 btrfs_put_block_group(block_group); 2548 return 0; 2549 } 2550 2551 static void __btrfs_remove_free_space_cache_locked( 2552 struct btrfs_free_space_ctl *ctl) 2553 { 2554 struct btrfs_free_space *info; 2555 struct rb_node *node; 2556 2557 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2558 info = rb_entry(node, struct btrfs_free_space, offset_index); 2559 if (!info->bitmap) { 2560 unlink_free_space(ctl, info); 2561 kmem_cache_free(btrfs_free_space_cachep, info); 2562 } else { 2563 free_bitmap(ctl, info); 2564 } 2565 2566 cond_resched_lock(&ctl->tree_lock); 2567 } 2568 } 2569 2570 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2571 { 2572 spin_lock(&ctl->tree_lock); 2573 __btrfs_remove_free_space_cache_locked(ctl); 2574 spin_unlock(&ctl->tree_lock); 2575 } 2576 2577 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2578 { 2579 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2580 struct btrfs_free_cluster *cluster; 2581 struct list_head *head; 2582 2583 spin_lock(&ctl->tree_lock); 2584 while ((head = block_group->cluster_list.next) != 2585 &block_group->cluster_list) { 2586 cluster = list_entry(head, struct btrfs_free_cluster, 2587 block_group_list); 2588 2589 WARN_ON(cluster->block_group != block_group); 2590 __btrfs_return_cluster_to_free_space(block_group, cluster); 2591 2592 cond_resched_lock(&ctl->tree_lock); 2593 } 2594 __btrfs_remove_free_space_cache_locked(ctl); 2595 spin_unlock(&ctl->tree_lock); 2596 2597 } 2598 2599 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2600 u64 offset, u64 bytes, u64 empty_size, 2601 u64 *max_extent_size) 2602 { 2603 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2604 struct btrfs_free_space *entry = NULL; 2605 u64 bytes_search = bytes + empty_size; 2606 u64 ret = 0; 2607 u64 align_gap = 0; 2608 u64 align_gap_len = 0; 2609 2610 spin_lock(&ctl->tree_lock); 2611 entry = find_free_space(ctl, &offset, &bytes_search, 2612 block_group->full_stripe_len, max_extent_size); 2613 if (!entry) 2614 goto out; 2615 2616 ret = offset; 2617 if (entry->bitmap) { 2618 bitmap_clear_bits(ctl, entry, offset, bytes); 2619 if (!entry->bytes) 2620 free_bitmap(ctl, entry); 2621 } else { 2622 unlink_free_space(ctl, entry); 2623 align_gap_len = offset - entry->offset; 2624 align_gap = entry->offset; 2625 2626 entry->offset = offset + bytes; 2627 WARN_ON(entry->bytes < bytes + align_gap_len); 2628 2629 entry->bytes -= bytes + align_gap_len; 2630 if (!entry->bytes) 2631 kmem_cache_free(btrfs_free_space_cachep, entry); 2632 else 2633 link_free_space(ctl, entry); 2634 } 2635 out: 2636 spin_unlock(&ctl->tree_lock); 2637 2638 if (align_gap_len) 2639 __btrfs_add_free_space(block_group->fs_info, ctl, 2640 align_gap, align_gap_len); 2641 return ret; 2642 } 2643 2644 /* 2645 * given a cluster, put all of its extents back into the free space 2646 * cache. If a block group is passed, this function will only free 2647 * a cluster that belongs to the passed block group. 2648 * 2649 * Otherwise, it'll get a reference on the block group pointed to by the 2650 * cluster and remove the cluster from it. 2651 */ 2652 int btrfs_return_cluster_to_free_space( 2653 struct btrfs_block_group_cache *block_group, 2654 struct btrfs_free_cluster *cluster) 2655 { 2656 struct btrfs_free_space_ctl *ctl; 2657 int ret; 2658 2659 /* first, get a safe pointer to the block group */ 2660 spin_lock(&cluster->lock); 2661 if (!block_group) { 2662 block_group = cluster->block_group; 2663 if (!block_group) { 2664 spin_unlock(&cluster->lock); 2665 return 0; 2666 } 2667 } else if (cluster->block_group != block_group) { 2668 /* someone else has already freed it don't redo their work */ 2669 spin_unlock(&cluster->lock); 2670 return 0; 2671 } 2672 atomic_inc(&block_group->count); 2673 spin_unlock(&cluster->lock); 2674 2675 ctl = block_group->free_space_ctl; 2676 2677 /* now return any extents the cluster had on it */ 2678 spin_lock(&ctl->tree_lock); 2679 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2680 spin_unlock(&ctl->tree_lock); 2681 2682 /* finally drop our ref */ 2683 btrfs_put_block_group(block_group); 2684 return ret; 2685 } 2686 2687 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2688 struct btrfs_free_cluster *cluster, 2689 struct btrfs_free_space *entry, 2690 u64 bytes, u64 min_start, 2691 u64 *max_extent_size) 2692 { 2693 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2694 int err; 2695 u64 search_start = cluster->window_start; 2696 u64 search_bytes = bytes; 2697 u64 ret = 0; 2698 2699 search_start = min_start; 2700 search_bytes = bytes; 2701 2702 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2703 if (err) { 2704 if (search_bytes > *max_extent_size) 2705 *max_extent_size = search_bytes; 2706 return 0; 2707 } 2708 2709 ret = search_start; 2710 __bitmap_clear_bits(ctl, entry, ret, bytes); 2711 2712 return ret; 2713 } 2714 2715 /* 2716 * given a cluster, try to allocate 'bytes' from it, returns 0 2717 * if it couldn't find anything suitably large, or a logical disk offset 2718 * if things worked out 2719 */ 2720 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2721 struct btrfs_free_cluster *cluster, u64 bytes, 2722 u64 min_start, u64 *max_extent_size) 2723 { 2724 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2725 struct btrfs_free_space *entry = NULL; 2726 struct rb_node *node; 2727 u64 ret = 0; 2728 2729 spin_lock(&cluster->lock); 2730 if (bytes > cluster->max_size) 2731 goto out; 2732 2733 if (cluster->block_group != block_group) 2734 goto out; 2735 2736 node = rb_first(&cluster->root); 2737 if (!node) 2738 goto out; 2739 2740 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2741 while (1) { 2742 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2743 *max_extent_size = entry->bytes; 2744 2745 if (entry->bytes < bytes || 2746 (!entry->bitmap && entry->offset < min_start)) { 2747 node = rb_next(&entry->offset_index); 2748 if (!node) 2749 break; 2750 entry = rb_entry(node, struct btrfs_free_space, 2751 offset_index); 2752 continue; 2753 } 2754 2755 if (entry->bitmap) { 2756 ret = btrfs_alloc_from_bitmap(block_group, 2757 cluster, entry, bytes, 2758 cluster->window_start, 2759 max_extent_size); 2760 if (ret == 0) { 2761 node = rb_next(&entry->offset_index); 2762 if (!node) 2763 break; 2764 entry = rb_entry(node, struct btrfs_free_space, 2765 offset_index); 2766 continue; 2767 } 2768 cluster->window_start += bytes; 2769 } else { 2770 ret = entry->offset; 2771 2772 entry->offset += bytes; 2773 entry->bytes -= bytes; 2774 } 2775 2776 if (entry->bytes == 0) 2777 rb_erase(&entry->offset_index, &cluster->root); 2778 break; 2779 } 2780 out: 2781 spin_unlock(&cluster->lock); 2782 2783 if (!ret) 2784 return 0; 2785 2786 spin_lock(&ctl->tree_lock); 2787 2788 ctl->free_space -= bytes; 2789 if (entry->bytes == 0) { 2790 ctl->free_extents--; 2791 if (entry->bitmap) { 2792 kfree(entry->bitmap); 2793 ctl->total_bitmaps--; 2794 ctl->op->recalc_thresholds(ctl); 2795 } 2796 kmem_cache_free(btrfs_free_space_cachep, entry); 2797 } 2798 2799 spin_unlock(&ctl->tree_lock); 2800 2801 return ret; 2802 } 2803 2804 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2805 struct btrfs_free_space *entry, 2806 struct btrfs_free_cluster *cluster, 2807 u64 offset, u64 bytes, 2808 u64 cont1_bytes, u64 min_bytes) 2809 { 2810 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2811 unsigned long next_zero; 2812 unsigned long i; 2813 unsigned long want_bits; 2814 unsigned long min_bits; 2815 unsigned long found_bits; 2816 unsigned long max_bits = 0; 2817 unsigned long start = 0; 2818 unsigned long total_found = 0; 2819 int ret; 2820 2821 i = offset_to_bit(entry->offset, ctl->unit, 2822 max_t(u64, offset, entry->offset)); 2823 want_bits = bytes_to_bits(bytes, ctl->unit); 2824 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2825 2826 /* 2827 * Don't bother looking for a cluster in this bitmap if it's heavily 2828 * fragmented. 2829 */ 2830 if (entry->max_extent_size && 2831 entry->max_extent_size < cont1_bytes) 2832 return -ENOSPC; 2833 again: 2834 found_bits = 0; 2835 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2836 next_zero = find_next_zero_bit(entry->bitmap, 2837 BITS_PER_BITMAP, i); 2838 if (next_zero - i >= min_bits) { 2839 found_bits = next_zero - i; 2840 if (found_bits > max_bits) 2841 max_bits = found_bits; 2842 break; 2843 } 2844 if (next_zero - i > max_bits) 2845 max_bits = next_zero - i; 2846 i = next_zero; 2847 } 2848 2849 if (!found_bits) { 2850 entry->max_extent_size = (u64)max_bits * ctl->unit; 2851 return -ENOSPC; 2852 } 2853 2854 if (!total_found) { 2855 start = i; 2856 cluster->max_size = 0; 2857 } 2858 2859 total_found += found_bits; 2860 2861 if (cluster->max_size < found_bits * ctl->unit) 2862 cluster->max_size = found_bits * ctl->unit; 2863 2864 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2865 i = next_zero + 1; 2866 goto again; 2867 } 2868 2869 cluster->window_start = start * ctl->unit + entry->offset; 2870 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2871 ret = tree_insert_offset(&cluster->root, entry->offset, 2872 &entry->offset_index, 1); 2873 ASSERT(!ret); /* -EEXIST; Logic error */ 2874 2875 trace_btrfs_setup_cluster(block_group, cluster, 2876 total_found * ctl->unit, 1); 2877 return 0; 2878 } 2879 2880 /* 2881 * This searches the block group for just extents to fill the cluster with. 2882 * Try to find a cluster with at least bytes total bytes, at least one 2883 * extent of cont1_bytes, and other clusters of at least min_bytes. 2884 */ 2885 static noinline int 2886 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2887 struct btrfs_free_cluster *cluster, 2888 struct list_head *bitmaps, u64 offset, u64 bytes, 2889 u64 cont1_bytes, u64 min_bytes) 2890 { 2891 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2892 struct btrfs_free_space *first = NULL; 2893 struct btrfs_free_space *entry = NULL; 2894 struct btrfs_free_space *last; 2895 struct rb_node *node; 2896 u64 window_free; 2897 u64 max_extent; 2898 u64 total_size = 0; 2899 2900 entry = tree_search_offset(ctl, offset, 0, 1); 2901 if (!entry) 2902 return -ENOSPC; 2903 2904 /* 2905 * We don't want bitmaps, so just move along until we find a normal 2906 * extent entry. 2907 */ 2908 while (entry->bitmap || entry->bytes < min_bytes) { 2909 if (entry->bitmap && list_empty(&entry->list)) 2910 list_add_tail(&entry->list, bitmaps); 2911 node = rb_next(&entry->offset_index); 2912 if (!node) 2913 return -ENOSPC; 2914 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2915 } 2916 2917 window_free = entry->bytes; 2918 max_extent = entry->bytes; 2919 first = entry; 2920 last = entry; 2921 2922 for (node = rb_next(&entry->offset_index); node; 2923 node = rb_next(&entry->offset_index)) { 2924 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2925 2926 if (entry->bitmap) { 2927 if (list_empty(&entry->list)) 2928 list_add_tail(&entry->list, bitmaps); 2929 continue; 2930 } 2931 2932 if (entry->bytes < min_bytes) 2933 continue; 2934 2935 last = entry; 2936 window_free += entry->bytes; 2937 if (entry->bytes > max_extent) 2938 max_extent = entry->bytes; 2939 } 2940 2941 if (window_free < bytes || max_extent < cont1_bytes) 2942 return -ENOSPC; 2943 2944 cluster->window_start = first->offset; 2945 2946 node = &first->offset_index; 2947 2948 /* 2949 * now we've found our entries, pull them out of the free space 2950 * cache and put them into the cluster rbtree 2951 */ 2952 do { 2953 int ret; 2954 2955 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2956 node = rb_next(&entry->offset_index); 2957 if (entry->bitmap || entry->bytes < min_bytes) 2958 continue; 2959 2960 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2961 ret = tree_insert_offset(&cluster->root, entry->offset, 2962 &entry->offset_index, 0); 2963 total_size += entry->bytes; 2964 ASSERT(!ret); /* -EEXIST; Logic error */ 2965 } while (node && entry != last); 2966 2967 cluster->max_size = max_extent; 2968 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2969 return 0; 2970 } 2971 2972 /* 2973 * This specifically looks for bitmaps that may work in the cluster, we assume 2974 * that we have already failed to find extents that will work. 2975 */ 2976 static noinline int 2977 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2978 struct btrfs_free_cluster *cluster, 2979 struct list_head *bitmaps, u64 offset, u64 bytes, 2980 u64 cont1_bytes, u64 min_bytes) 2981 { 2982 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2983 struct btrfs_free_space *entry = NULL; 2984 int ret = -ENOSPC; 2985 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2986 2987 if (ctl->total_bitmaps == 0) 2988 return -ENOSPC; 2989 2990 /* 2991 * The bitmap that covers offset won't be in the list unless offset 2992 * is just its start offset. 2993 */ 2994 if (!list_empty(bitmaps)) 2995 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2996 2997 if (!entry || entry->offset != bitmap_offset) { 2998 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2999 if (entry && list_empty(&entry->list)) 3000 list_add(&entry->list, bitmaps); 3001 } 3002 3003 list_for_each_entry(entry, bitmaps, list) { 3004 if (entry->bytes < bytes) 3005 continue; 3006 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3007 bytes, cont1_bytes, min_bytes); 3008 if (!ret) 3009 return 0; 3010 } 3011 3012 /* 3013 * The bitmaps list has all the bitmaps that record free space 3014 * starting after offset, so no more search is required. 3015 */ 3016 return -ENOSPC; 3017 } 3018 3019 /* 3020 * here we try to find a cluster of blocks in a block group. The goal 3021 * is to find at least bytes+empty_size. 3022 * We might not find them all in one contiguous area. 3023 * 3024 * returns zero and sets up cluster if things worked out, otherwise 3025 * it returns -enospc 3026 */ 3027 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3028 struct btrfs_block_group_cache *block_group, 3029 struct btrfs_free_cluster *cluster, 3030 u64 offset, u64 bytes, u64 empty_size) 3031 { 3032 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3033 struct btrfs_free_space *entry, *tmp; 3034 LIST_HEAD(bitmaps); 3035 u64 min_bytes; 3036 u64 cont1_bytes; 3037 int ret; 3038 3039 /* 3040 * Choose the minimum extent size we'll require for this 3041 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3042 * For metadata, allow allocates with smaller extents. For 3043 * data, keep it dense. 3044 */ 3045 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3046 cont1_bytes = min_bytes = bytes + empty_size; 3047 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3048 cont1_bytes = bytes; 3049 min_bytes = fs_info->sectorsize; 3050 } else { 3051 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3052 min_bytes = fs_info->sectorsize; 3053 } 3054 3055 spin_lock(&ctl->tree_lock); 3056 3057 /* 3058 * If we know we don't have enough space to make a cluster don't even 3059 * bother doing all the work to try and find one. 3060 */ 3061 if (ctl->free_space < bytes) { 3062 spin_unlock(&ctl->tree_lock); 3063 return -ENOSPC; 3064 } 3065 3066 spin_lock(&cluster->lock); 3067 3068 /* someone already found a cluster, hooray */ 3069 if (cluster->block_group) { 3070 ret = 0; 3071 goto out; 3072 } 3073 3074 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3075 min_bytes); 3076 3077 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3078 bytes + empty_size, 3079 cont1_bytes, min_bytes); 3080 if (ret) 3081 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3082 offset, bytes + empty_size, 3083 cont1_bytes, min_bytes); 3084 3085 /* Clear our temporary list */ 3086 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3087 list_del_init(&entry->list); 3088 3089 if (!ret) { 3090 atomic_inc(&block_group->count); 3091 list_add_tail(&cluster->block_group_list, 3092 &block_group->cluster_list); 3093 cluster->block_group = block_group; 3094 } else { 3095 trace_btrfs_failed_cluster_setup(block_group); 3096 } 3097 out: 3098 spin_unlock(&cluster->lock); 3099 spin_unlock(&ctl->tree_lock); 3100 3101 return ret; 3102 } 3103 3104 /* 3105 * simple code to zero out a cluster 3106 */ 3107 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3108 { 3109 spin_lock_init(&cluster->lock); 3110 spin_lock_init(&cluster->refill_lock); 3111 cluster->root = RB_ROOT; 3112 cluster->max_size = 0; 3113 cluster->fragmented = false; 3114 INIT_LIST_HEAD(&cluster->block_group_list); 3115 cluster->block_group = NULL; 3116 } 3117 3118 static int do_trimming(struct btrfs_block_group_cache *block_group, 3119 u64 *total_trimmed, u64 start, u64 bytes, 3120 u64 reserved_start, u64 reserved_bytes, 3121 struct btrfs_trim_range *trim_entry) 3122 { 3123 struct btrfs_space_info *space_info = block_group->space_info; 3124 struct btrfs_fs_info *fs_info = block_group->fs_info; 3125 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3126 int ret; 3127 int update = 0; 3128 u64 trimmed = 0; 3129 3130 spin_lock(&space_info->lock); 3131 spin_lock(&block_group->lock); 3132 if (!block_group->ro) { 3133 block_group->reserved += reserved_bytes; 3134 space_info->bytes_reserved += reserved_bytes; 3135 update = 1; 3136 } 3137 spin_unlock(&block_group->lock); 3138 spin_unlock(&space_info->lock); 3139 3140 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3141 if (!ret) 3142 *total_trimmed += trimmed; 3143 3144 mutex_lock(&ctl->cache_writeout_mutex); 3145 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3146 list_del(&trim_entry->list); 3147 mutex_unlock(&ctl->cache_writeout_mutex); 3148 3149 if (update) { 3150 spin_lock(&space_info->lock); 3151 spin_lock(&block_group->lock); 3152 if (block_group->ro) 3153 space_info->bytes_readonly += reserved_bytes; 3154 block_group->reserved -= reserved_bytes; 3155 space_info->bytes_reserved -= reserved_bytes; 3156 spin_unlock(&space_info->lock); 3157 spin_unlock(&block_group->lock); 3158 } 3159 3160 return ret; 3161 } 3162 3163 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3164 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3165 { 3166 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3167 struct btrfs_free_space *entry; 3168 struct rb_node *node; 3169 int ret = 0; 3170 u64 extent_start; 3171 u64 extent_bytes; 3172 u64 bytes; 3173 3174 while (start < end) { 3175 struct btrfs_trim_range trim_entry; 3176 3177 mutex_lock(&ctl->cache_writeout_mutex); 3178 spin_lock(&ctl->tree_lock); 3179 3180 if (ctl->free_space < minlen) { 3181 spin_unlock(&ctl->tree_lock); 3182 mutex_unlock(&ctl->cache_writeout_mutex); 3183 break; 3184 } 3185 3186 entry = tree_search_offset(ctl, start, 0, 1); 3187 if (!entry) { 3188 spin_unlock(&ctl->tree_lock); 3189 mutex_unlock(&ctl->cache_writeout_mutex); 3190 break; 3191 } 3192 3193 /* skip bitmaps */ 3194 while (entry->bitmap) { 3195 node = rb_next(&entry->offset_index); 3196 if (!node) { 3197 spin_unlock(&ctl->tree_lock); 3198 mutex_unlock(&ctl->cache_writeout_mutex); 3199 goto out; 3200 } 3201 entry = rb_entry(node, struct btrfs_free_space, 3202 offset_index); 3203 } 3204 3205 if (entry->offset >= end) { 3206 spin_unlock(&ctl->tree_lock); 3207 mutex_unlock(&ctl->cache_writeout_mutex); 3208 break; 3209 } 3210 3211 extent_start = entry->offset; 3212 extent_bytes = entry->bytes; 3213 start = max(start, extent_start); 3214 bytes = min(extent_start + extent_bytes, end) - start; 3215 if (bytes < minlen) { 3216 spin_unlock(&ctl->tree_lock); 3217 mutex_unlock(&ctl->cache_writeout_mutex); 3218 goto next; 3219 } 3220 3221 unlink_free_space(ctl, entry); 3222 kmem_cache_free(btrfs_free_space_cachep, entry); 3223 3224 spin_unlock(&ctl->tree_lock); 3225 trim_entry.start = extent_start; 3226 trim_entry.bytes = extent_bytes; 3227 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3228 mutex_unlock(&ctl->cache_writeout_mutex); 3229 3230 ret = do_trimming(block_group, total_trimmed, start, bytes, 3231 extent_start, extent_bytes, &trim_entry); 3232 if (ret) 3233 break; 3234 next: 3235 start += bytes; 3236 3237 if (fatal_signal_pending(current)) { 3238 ret = -ERESTARTSYS; 3239 break; 3240 } 3241 3242 cond_resched(); 3243 } 3244 out: 3245 return ret; 3246 } 3247 3248 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3249 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3250 { 3251 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3252 struct btrfs_free_space *entry; 3253 int ret = 0; 3254 int ret2; 3255 u64 bytes; 3256 u64 offset = offset_to_bitmap(ctl, start); 3257 3258 while (offset < end) { 3259 bool next_bitmap = false; 3260 struct btrfs_trim_range trim_entry; 3261 3262 mutex_lock(&ctl->cache_writeout_mutex); 3263 spin_lock(&ctl->tree_lock); 3264 3265 if (ctl->free_space < minlen) { 3266 spin_unlock(&ctl->tree_lock); 3267 mutex_unlock(&ctl->cache_writeout_mutex); 3268 break; 3269 } 3270 3271 entry = tree_search_offset(ctl, offset, 1, 0); 3272 if (!entry) { 3273 spin_unlock(&ctl->tree_lock); 3274 mutex_unlock(&ctl->cache_writeout_mutex); 3275 next_bitmap = true; 3276 goto next; 3277 } 3278 3279 bytes = minlen; 3280 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3281 if (ret2 || start >= end) { 3282 spin_unlock(&ctl->tree_lock); 3283 mutex_unlock(&ctl->cache_writeout_mutex); 3284 next_bitmap = true; 3285 goto next; 3286 } 3287 3288 bytes = min(bytes, end - start); 3289 if (bytes < minlen) { 3290 spin_unlock(&ctl->tree_lock); 3291 mutex_unlock(&ctl->cache_writeout_mutex); 3292 goto next; 3293 } 3294 3295 bitmap_clear_bits(ctl, entry, start, bytes); 3296 if (entry->bytes == 0) 3297 free_bitmap(ctl, entry); 3298 3299 spin_unlock(&ctl->tree_lock); 3300 trim_entry.start = start; 3301 trim_entry.bytes = bytes; 3302 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3303 mutex_unlock(&ctl->cache_writeout_mutex); 3304 3305 ret = do_trimming(block_group, total_trimmed, start, bytes, 3306 start, bytes, &trim_entry); 3307 if (ret) 3308 break; 3309 next: 3310 if (next_bitmap) { 3311 offset += BITS_PER_BITMAP * ctl->unit; 3312 } else { 3313 start += bytes; 3314 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3315 offset += BITS_PER_BITMAP * ctl->unit; 3316 } 3317 3318 if (fatal_signal_pending(current)) { 3319 ret = -ERESTARTSYS; 3320 break; 3321 } 3322 3323 cond_resched(); 3324 } 3325 3326 return ret; 3327 } 3328 3329 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3330 { 3331 atomic_inc(&cache->trimming); 3332 } 3333 3334 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3335 { 3336 struct btrfs_fs_info *fs_info = block_group->fs_info; 3337 struct extent_map_tree *em_tree; 3338 struct extent_map *em; 3339 bool cleanup; 3340 3341 spin_lock(&block_group->lock); 3342 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3343 block_group->removed); 3344 spin_unlock(&block_group->lock); 3345 3346 if (cleanup) { 3347 mutex_lock(&fs_info->chunk_mutex); 3348 em_tree = &fs_info->mapping_tree.map_tree; 3349 write_lock(&em_tree->lock); 3350 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3351 1); 3352 BUG_ON(!em); /* logic error, can't happen */ 3353 /* 3354 * remove_extent_mapping() will delete us from the pinned_chunks 3355 * list, which is protected by the chunk mutex. 3356 */ 3357 remove_extent_mapping(em_tree, em); 3358 write_unlock(&em_tree->lock); 3359 mutex_unlock(&fs_info->chunk_mutex); 3360 3361 /* once for us and once for the tree */ 3362 free_extent_map(em); 3363 free_extent_map(em); 3364 3365 /* 3366 * We've left one free space entry and other tasks trimming 3367 * this block group have left 1 entry each one. Free them. 3368 */ 3369 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3370 } 3371 } 3372 3373 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3374 u64 *trimmed, u64 start, u64 end, u64 minlen) 3375 { 3376 int ret; 3377 3378 *trimmed = 0; 3379 3380 spin_lock(&block_group->lock); 3381 if (block_group->removed) { 3382 spin_unlock(&block_group->lock); 3383 return 0; 3384 } 3385 btrfs_get_block_group_trimming(block_group); 3386 spin_unlock(&block_group->lock); 3387 3388 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3389 if (ret) 3390 goto out; 3391 3392 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3393 out: 3394 btrfs_put_block_group_trimming(block_group); 3395 return ret; 3396 } 3397 3398 /* 3399 * Find the left-most item in the cache tree, and then return the 3400 * smallest inode number in the item. 3401 * 3402 * Note: the returned inode number may not be the smallest one in 3403 * the tree, if the left-most item is a bitmap. 3404 */ 3405 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3406 { 3407 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3408 struct btrfs_free_space *entry = NULL; 3409 u64 ino = 0; 3410 3411 spin_lock(&ctl->tree_lock); 3412 3413 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3414 goto out; 3415 3416 entry = rb_entry(rb_first(&ctl->free_space_offset), 3417 struct btrfs_free_space, offset_index); 3418 3419 if (!entry->bitmap) { 3420 ino = entry->offset; 3421 3422 unlink_free_space(ctl, entry); 3423 entry->offset++; 3424 entry->bytes--; 3425 if (!entry->bytes) 3426 kmem_cache_free(btrfs_free_space_cachep, entry); 3427 else 3428 link_free_space(ctl, entry); 3429 } else { 3430 u64 offset = 0; 3431 u64 count = 1; 3432 int ret; 3433 3434 ret = search_bitmap(ctl, entry, &offset, &count, true); 3435 /* Logic error; Should be empty if it can't find anything */ 3436 ASSERT(!ret); 3437 3438 ino = offset; 3439 bitmap_clear_bits(ctl, entry, offset, 1); 3440 if (entry->bytes == 0) 3441 free_bitmap(ctl, entry); 3442 } 3443 out: 3444 spin_unlock(&ctl->tree_lock); 3445 3446 return ino; 3447 } 3448 3449 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3450 struct btrfs_path *path) 3451 { 3452 struct inode *inode = NULL; 3453 3454 spin_lock(&root->ino_cache_lock); 3455 if (root->ino_cache_inode) 3456 inode = igrab(root->ino_cache_inode); 3457 spin_unlock(&root->ino_cache_lock); 3458 if (inode) 3459 return inode; 3460 3461 inode = __lookup_free_space_inode(root, path, 0); 3462 if (IS_ERR(inode)) 3463 return inode; 3464 3465 spin_lock(&root->ino_cache_lock); 3466 if (!btrfs_fs_closing(root->fs_info)) 3467 root->ino_cache_inode = igrab(inode); 3468 spin_unlock(&root->ino_cache_lock); 3469 3470 return inode; 3471 } 3472 3473 int create_free_ino_inode(struct btrfs_root *root, 3474 struct btrfs_trans_handle *trans, 3475 struct btrfs_path *path) 3476 { 3477 return __create_free_space_inode(root, trans, path, 3478 BTRFS_FREE_INO_OBJECTID, 0); 3479 } 3480 3481 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3482 { 3483 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3484 struct btrfs_path *path; 3485 struct inode *inode; 3486 int ret = 0; 3487 u64 root_gen = btrfs_root_generation(&root->root_item); 3488 3489 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3490 return 0; 3491 3492 /* 3493 * If we're unmounting then just return, since this does a search on the 3494 * normal root and not the commit root and we could deadlock. 3495 */ 3496 if (btrfs_fs_closing(fs_info)) 3497 return 0; 3498 3499 path = btrfs_alloc_path(); 3500 if (!path) 3501 return 0; 3502 3503 inode = lookup_free_ino_inode(root, path); 3504 if (IS_ERR(inode)) 3505 goto out; 3506 3507 if (root_gen != BTRFS_I(inode)->generation) 3508 goto out_put; 3509 3510 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3511 3512 if (ret < 0) 3513 btrfs_err(fs_info, 3514 "failed to load free ino cache for root %llu", 3515 root->root_key.objectid); 3516 out_put: 3517 iput(inode); 3518 out: 3519 btrfs_free_path(path); 3520 return ret; 3521 } 3522 3523 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3524 struct btrfs_trans_handle *trans, 3525 struct btrfs_path *path, 3526 struct inode *inode) 3527 { 3528 struct btrfs_fs_info *fs_info = root->fs_info; 3529 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3530 int ret; 3531 struct btrfs_io_ctl io_ctl; 3532 bool release_metadata = true; 3533 3534 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3535 return 0; 3536 3537 memset(&io_ctl, 0, sizeof(io_ctl)); 3538 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3539 if (!ret) { 3540 /* 3541 * At this point writepages() didn't error out, so our metadata 3542 * reservation is released when the writeback finishes, at 3543 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3544 * with or without an error. 3545 */ 3546 release_metadata = false; 3547 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3548 } 3549 3550 if (ret) { 3551 if (release_metadata) 3552 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3553 inode->i_size); 3554 #ifdef DEBUG 3555 btrfs_err(fs_info, 3556 "failed to write free ino cache for root %llu", 3557 root->root_key.objectid); 3558 #endif 3559 } 3560 3561 return ret; 3562 } 3563 3564 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3565 /* 3566 * Use this if you need to make a bitmap or extent entry specifically, it 3567 * doesn't do any of the merging that add_free_space does, this acts a lot like 3568 * how the free space cache loading stuff works, so you can get really weird 3569 * configurations. 3570 */ 3571 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3572 u64 offset, u64 bytes, bool bitmap) 3573 { 3574 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3575 struct btrfs_free_space *info = NULL, *bitmap_info; 3576 void *map = NULL; 3577 u64 bytes_added; 3578 int ret; 3579 3580 again: 3581 if (!info) { 3582 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3583 if (!info) 3584 return -ENOMEM; 3585 } 3586 3587 if (!bitmap) { 3588 spin_lock(&ctl->tree_lock); 3589 info->offset = offset; 3590 info->bytes = bytes; 3591 info->max_extent_size = 0; 3592 ret = link_free_space(ctl, info); 3593 spin_unlock(&ctl->tree_lock); 3594 if (ret) 3595 kmem_cache_free(btrfs_free_space_cachep, info); 3596 return ret; 3597 } 3598 3599 if (!map) { 3600 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3601 if (!map) { 3602 kmem_cache_free(btrfs_free_space_cachep, info); 3603 return -ENOMEM; 3604 } 3605 } 3606 3607 spin_lock(&ctl->tree_lock); 3608 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3609 1, 0); 3610 if (!bitmap_info) { 3611 info->bitmap = map; 3612 map = NULL; 3613 add_new_bitmap(ctl, info, offset); 3614 bitmap_info = info; 3615 info = NULL; 3616 } 3617 3618 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3619 3620 bytes -= bytes_added; 3621 offset += bytes_added; 3622 spin_unlock(&ctl->tree_lock); 3623 3624 if (bytes) 3625 goto again; 3626 3627 if (info) 3628 kmem_cache_free(btrfs_free_space_cachep, info); 3629 if (map) 3630 kfree(map); 3631 return 0; 3632 } 3633 3634 /* 3635 * Checks to see if the given range is in the free space cache. This is really 3636 * just used to check the absence of space, so if there is free space in the 3637 * range at all we will return 1. 3638 */ 3639 int test_check_exists(struct btrfs_block_group_cache *cache, 3640 u64 offset, u64 bytes) 3641 { 3642 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3643 struct btrfs_free_space *info; 3644 int ret = 0; 3645 3646 spin_lock(&ctl->tree_lock); 3647 info = tree_search_offset(ctl, offset, 0, 0); 3648 if (!info) { 3649 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3650 1, 0); 3651 if (!info) 3652 goto out; 3653 } 3654 3655 have_info: 3656 if (info->bitmap) { 3657 u64 bit_off, bit_bytes; 3658 struct rb_node *n; 3659 struct btrfs_free_space *tmp; 3660 3661 bit_off = offset; 3662 bit_bytes = ctl->unit; 3663 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3664 if (!ret) { 3665 if (bit_off == offset) { 3666 ret = 1; 3667 goto out; 3668 } else if (bit_off > offset && 3669 offset + bytes > bit_off) { 3670 ret = 1; 3671 goto out; 3672 } 3673 } 3674 3675 n = rb_prev(&info->offset_index); 3676 while (n) { 3677 tmp = rb_entry(n, struct btrfs_free_space, 3678 offset_index); 3679 if (tmp->offset + tmp->bytes < offset) 3680 break; 3681 if (offset + bytes < tmp->offset) { 3682 n = rb_prev(&tmp->offset_index); 3683 continue; 3684 } 3685 info = tmp; 3686 goto have_info; 3687 } 3688 3689 n = rb_next(&info->offset_index); 3690 while (n) { 3691 tmp = rb_entry(n, struct btrfs_free_space, 3692 offset_index); 3693 if (offset + bytes < tmp->offset) 3694 break; 3695 if (tmp->offset + tmp->bytes < offset) { 3696 n = rb_next(&tmp->offset_index); 3697 continue; 3698 } 3699 info = tmp; 3700 goto have_info; 3701 } 3702 3703 ret = 0; 3704 goto out; 3705 } 3706 3707 if (info->offset == offset) { 3708 ret = 1; 3709 goto out; 3710 } 3711 3712 if (offset > info->offset && offset < info->offset + info->bytes) 3713 ret = 1; 3714 out: 3715 spin_unlock(&ctl->tree_lock); 3716 return ret; 3717 } 3718 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3719