1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/sched/signal.h> 22 #include <linux/slab.h> 23 #include <linux/math64.h> 24 #include <linux/ratelimit.h> 25 #include <linux/error-injection.h> 26 #include "ctree.h" 27 #include "free-space-cache.h" 28 #include "transaction.h" 29 #include "disk-io.h" 30 #include "extent_io.h" 31 #include "inode-map.h" 32 #include "volumes.h" 33 34 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 35 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 36 37 struct btrfs_trim_range { 38 u64 start; 39 u64 bytes; 40 struct list_head list; 41 }; 42 43 static int link_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 46 struct btrfs_free_space *info); 47 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 48 struct btrfs_trans_handle *trans, 49 struct btrfs_io_ctl *io_ctl, 50 struct btrfs_path *path); 51 52 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 53 struct btrfs_path *path, 54 u64 offset) 55 { 56 struct btrfs_fs_info *fs_info = root->fs_info; 57 struct btrfs_key key; 58 struct btrfs_key location; 59 struct btrfs_disk_key disk_key; 60 struct btrfs_free_space_header *header; 61 struct extent_buffer *leaf; 62 struct inode *inode = NULL; 63 int ret; 64 65 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 66 key.offset = offset; 67 key.type = 0; 68 69 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 70 if (ret < 0) 71 return ERR_PTR(ret); 72 if (ret > 0) { 73 btrfs_release_path(path); 74 return ERR_PTR(-ENOENT); 75 } 76 77 leaf = path->nodes[0]; 78 header = btrfs_item_ptr(leaf, path->slots[0], 79 struct btrfs_free_space_header); 80 btrfs_free_space_key(leaf, header, &disk_key); 81 btrfs_disk_key_to_cpu(&location, &disk_key); 82 btrfs_release_path(path); 83 84 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 85 if (IS_ERR(inode)) 86 return inode; 87 if (is_bad_inode(inode)) { 88 iput(inode); 89 return ERR_PTR(-ENOENT); 90 } 91 92 mapping_set_gfp_mask(inode->i_mapping, 93 mapping_gfp_constraint(inode->i_mapping, 94 ~(__GFP_FS | __GFP_HIGHMEM))); 95 96 return inode; 97 } 98 99 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 100 struct btrfs_block_group_cache 101 *block_group, struct btrfs_path *path) 102 { 103 struct inode *inode = NULL; 104 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 105 106 spin_lock(&block_group->lock); 107 if (block_group->inode) 108 inode = igrab(block_group->inode); 109 spin_unlock(&block_group->lock); 110 if (inode) 111 return inode; 112 113 inode = __lookup_free_space_inode(fs_info->tree_root, path, 114 block_group->key.objectid); 115 if (IS_ERR(inode)) 116 return inode; 117 118 spin_lock(&block_group->lock); 119 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 120 btrfs_info(fs_info, "Old style space inode found, converting."); 121 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 122 BTRFS_INODE_NODATACOW; 123 block_group->disk_cache_state = BTRFS_DC_CLEAR; 124 } 125 126 if (!block_group->iref) { 127 block_group->inode = igrab(inode); 128 block_group->iref = 1; 129 } 130 spin_unlock(&block_group->lock); 131 132 return inode; 133 } 134 135 static int __create_free_space_inode(struct btrfs_root *root, 136 struct btrfs_trans_handle *trans, 137 struct btrfs_path *path, 138 u64 ino, u64 offset) 139 { 140 struct btrfs_key key; 141 struct btrfs_disk_key disk_key; 142 struct btrfs_free_space_header *header; 143 struct btrfs_inode_item *inode_item; 144 struct extent_buffer *leaf; 145 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 146 int ret; 147 148 ret = btrfs_insert_empty_inode(trans, root, path, ino); 149 if (ret) 150 return ret; 151 152 /* We inline crc's for the free disk space cache */ 153 if (ino != BTRFS_FREE_INO_OBJECTID) 154 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 155 156 leaf = path->nodes[0]; 157 inode_item = btrfs_item_ptr(leaf, path->slots[0], 158 struct btrfs_inode_item); 159 btrfs_item_key(leaf, &disk_key, path->slots[0]); 160 memzero_extent_buffer(leaf, (unsigned long)inode_item, 161 sizeof(*inode_item)); 162 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 163 btrfs_set_inode_size(leaf, inode_item, 0); 164 btrfs_set_inode_nbytes(leaf, inode_item, 0); 165 btrfs_set_inode_uid(leaf, inode_item, 0); 166 btrfs_set_inode_gid(leaf, inode_item, 0); 167 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 168 btrfs_set_inode_flags(leaf, inode_item, flags); 169 btrfs_set_inode_nlink(leaf, inode_item, 1); 170 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 171 btrfs_set_inode_block_group(leaf, inode_item, offset); 172 btrfs_mark_buffer_dirty(leaf); 173 btrfs_release_path(path); 174 175 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 176 key.offset = offset; 177 key.type = 0; 178 ret = btrfs_insert_empty_item(trans, root, path, &key, 179 sizeof(struct btrfs_free_space_header)); 180 if (ret < 0) { 181 btrfs_release_path(path); 182 return ret; 183 } 184 185 leaf = path->nodes[0]; 186 header = btrfs_item_ptr(leaf, path->slots[0], 187 struct btrfs_free_space_header); 188 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 189 btrfs_set_free_space_key(leaf, header, &disk_key); 190 btrfs_mark_buffer_dirty(leaf); 191 btrfs_release_path(path); 192 193 return 0; 194 } 195 196 int create_free_space_inode(struct btrfs_fs_info *fs_info, 197 struct btrfs_trans_handle *trans, 198 struct btrfs_block_group_cache *block_group, 199 struct btrfs_path *path) 200 { 201 int ret; 202 u64 ino; 203 204 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 205 if (ret < 0) 206 return ret; 207 208 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 209 block_group->key.objectid); 210 } 211 212 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 213 struct btrfs_block_rsv *rsv) 214 { 215 u64 needed_bytes; 216 int ret; 217 218 /* 1 for slack space, 1 for updating the inode */ 219 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 220 btrfs_calc_trans_metadata_size(fs_info, 1); 221 222 spin_lock(&rsv->lock); 223 if (rsv->reserved < needed_bytes) 224 ret = -ENOSPC; 225 else 226 ret = 0; 227 spin_unlock(&rsv->lock); 228 return ret; 229 } 230 231 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 232 struct btrfs_block_group_cache *block_group, 233 struct inode *inode) 234 { 235 struct btrfs_root *root = BTRFS_I(inode)->root; 236 int ret = 0; 237 bool locked = false; 238 239 if (block_group) { 240 struct btrfs_path *path = btrfs_alloc_path(); 241 242 if (!path) { 243 ret = -ENOMEM; 244 goto fail; 245 } 246 locked = true; 247 mutex_lock(&trans->transaction->cache_write_mutex); 248 if (!list_empty(&block_group->io_list)) { 249 list_del_init(&block_group->io_list); 250 251 btrfs_wait_cache_io(trans, block_group, path); 252 btrfs_put_block_group(block_group); 253 } 254 255 /* 256 * now that we've truncated the cache away, its no longer 257 * setup or written 258 */ 259 spin_lock(&block_group->lock); 260 block_group->disk_cache_state = BTRFS_DC_CLEAR; 261 spin_unlock(&block_group->lock); 262 btrfs_free_path(path); 263 } 264 265 btrfs_i_size_write(BTRFS_I(inode), 0); 266 truncate_pagecache(inode, 0); 267 268 /* 269 * We don't need an orphan item because truncating the free space cache 270 * will never be split across transactions. 271 * We don't need to check for -EAGAIN because we're a free space 272 * cache inode 273 */ 274 ret = btrfs_truncate_inode_items(trans, root, inode, 275 0, BTRFS_EXTENT_DATA_KEY); 276 if (ret) 277 goto fail; 278 279 ret = btrfs_update_inode(trans, root, inode); 280 281 fail: 282 if (locked) 283 mutex_unlock(&trans->transaction->cache_write_mutex); 284 if (ret) 285 btrfs_abort_transaction(trans, ret); 286 287 return ret; 288 } 289 290 static void readahead_cache(struct inode *inode) 291 { 292 struct file_ra_state *ra; 293 unsigned long last_index; 294 295 ra = kzalloc(sizeof(*ra), GFP_NOFS); 296 if (!ra) 297 return; 298 299 file_ra_state_init(ra, inode->i_mapping); 300 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 301 302 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 303 304 kfree(ra); 305 } 306 307 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 308 int write) 309 { 310 int num_pages; 311 int check_crcs = 0; 312 313 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 314 315 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 316 check_crcs = 1; 317 318 /* Make sure we can fit our crcs into the first page */ 319 if (write && check_crcs && 320 (num_pages * sizeof(u32)) >= PAGE_SIZE) 321 return -ENOSPC; 322 323 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 324 325 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 326 if (!io_ctl->pages) 327 return -ENOMEM; 328 329 io_ctl->num_pages = num_pages; 330 io_ctl->fs_info = btrfs_sb(inode->i_sb); 331 io_ctl->check_crcs = check_crcs; 332 io_ctl->inode = inode; 333 334 return 0; 335 } 336 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 337 338 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 339 { 340 kfree(io_ctl->pages); 341 io_ctl->pages = NULL; 342 } 343 344 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 345 { 346 if (io_ctl->cur) { 347 io_ctl->cur = NULL; 348 io_ctl->orig = NULL; 349 } 350 } 351 352 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 353 { 354 ASSERT(io_ctl->index < io_ctl->num_pages); 355 io_ctl->page = io_ctl->pages[io_ctl->index++]; 356 io_ctl->cur = page_address(io_ctl->page); 357 io_ctl->orig = io_ctl->cur; 358 io_ctl->size = PAGE_SIZE; 359 if (clear) 360 clear_page(io_ctl->cur); 361 } 362 363 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 364 { 365 int i; 366 367 io_ctl_unmap_page(io_ctl); 368 369 for (i = 0; i < io_ctl->num_pages; i++) { 370 if (io_ctl->pages[i]) { 371 ClearPageChecked(io_ctl->pages[i]); 372 unlock_page(io_ctl->pages[i]); 373 put_page(io_ctl->pages[i]); 374 } 375 } 376 } 377 378 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 379 int uptodate) 380 { 381 struct page *page; 382 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 383 int i; 384 385 for (i = 0; i < io_ctl->num_pages; i++) { 386 page = find_or_create_page(inode->i_mapping, i, mask); 387 if (!page) { 388 io_ctl_drop_pages(io_ctl); 389 return -ENOMEM; 390 } 391 io_ctl->pages[i] = page; 392 if (uptodate && !PageUptodate(page)) { 393 btrfs_readpage(NULL, page); 394 lock_page(page); 395 if (!PageUptodate(page)) { 396 btrfs_err(BTRFS_I(inode)->root->fs_info, 397 "error reading free space cache"); 398 io_ctl_drop_pages(io_ctl); 399 return -EIO; 400 } 401 } 402 } 403 404 for (i = 0; i < io_ctl->num_pages; i++) { 405 clear_page_dirty_for_io(io_ctl->pages[i]); 406 set_page_extent_mapped(io_ctl->pages[i]); 407 } 408 409 return 0; 410 } 411 412 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 413 { 414 __le64 *val; 415 416 io_ctl_map_page(io_ctl, 1); 417 418 /* 419 * Skip the csum areas. If we don't check crcs then we just have a 420 * 64bit chunk at the front of the first page. 421 */ 422 if (io_ctl->check_crcs) { 423 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 424 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 425 } else { 426 io_ctl->cur += sizeof(u64); 427 io_ctl->size -= sizeof(u64) * 2; 428 } 429 430 val = io_ctl->cur; 431 *val = cpu_to_le64(generation); 432 io_ctl->cur += sizeof(u64); 433 } 434 435 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 436 { 437 __le64 *gen; 438 439 /* 440 * Skip the crc area. If we don't check crcs then we just have a 64bit 441 * chunk at the front of the first page. 442 */ 443 if (io_ctl->check_crcs) { 444 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 445 io_ctl->size -= sizeof(u64) + 446 (sizeof(u32) * io_ctl->num_pages); 447 } else { 448 io_ctl->cur += sizeof(u64); 449 io_ctl->size -= sizeof(u64) * 2; 450 } 451 452 gen = io_ctl->cur; 453 if (le64_to_cpu(*gen) != generation) { 454 btrfs_err_rl(io_ctl->fs_info, 455 "space cache generation (%llu) does not match inode (%llu)", 456 *gen, generation); 457 io_ctl_unmap_page(io_ctl); 458 return -EIO; 459 } 460 io_ctl->cur += sizeof(u64); 461 return 0; 462 } 463 464 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 465 { 466 u32 *tmp; 467 u32 crc = ~(u32)0; 468 unsigned offset = 0; 469 470 if (!io_ctl->check_crcs) { 471 io_ctl_unmap_page(io_ctl); 472 return; 473 } 474 475 if (index == 0) 476 offset = sizeof(u32) * io_ctl->num_pages; 477 478 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 479 PAGE_SIZE - offset); 480 btrfs_csum_final(crc, (u8 *)&crc); 481 io_ctl_unmap_page(io_ctl); 482 tmp = page_address(io_ctl->pages[0]); 483 tmp += index; 484 *tmp = crc; 485 } 486 487 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 488 { 489 u32 *tmp, val; 490 u32 crc = ~(u32)0; 491 unsigned offset = 0; 492 493 if (!io_ctl->check_crcs) { 494 io_ctl_map_page(io_ctl, 0); 495 return 0; 496 } 497 498 if (index == 0) 499 offset = sizeof(u32) * io_ctl->num_pages; 500 501 tmp = page_address(io_ctl->pages[0]); 502 tmp += index; 503 val = *tmp; 504 505 io_ctl_map_page(io_ctl, 0); 506 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 507 PAGE_SIZE - offset); 508 btrfs_csum_final(crc, (u8 *)&crc); 509 if (val != crc) { 510 btrfs_err_rl(io_ctl->fs_info, 511 "csum mismatch on free space cache"); 512 io_ctl_unmap_page(io_ctl); 513 return -EIO; 514 } 515 516 return 0; 517 } 518 519 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 520 void *bitmap) 521 { 522 struct btrfs_free_space_entry *entry; 523 524 if (!io_ctl->cur) 525 return -ENOSPC; 526 527 entry = io_ctl->cur; 528 entry->offset = cpu_to_le64(offset); 529 entry->bytes = cpu_to_le64(bytes); 530 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 531 BTRFS_FREE_SPACE_EXTENT; 532 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 533 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 534 535 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 536 return 0; 537 538 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 539 540 /* No more pages to map */ 541 if (io_ctl->index >= io_ctl->num_pages) 542 return 0; 543 544 /* map the next page */ 545 io_ctl_map_page(io_ctl, 1); 546 return 0; 547 } 548 549 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 550 { 551 if (!io_ctl->cur) 552 return -ENOSPC; 553 554 /* 555 * If we aren't at the start of the current page, unmap this one and 556 * map the next one if there is any left. 557 */ 558 if (io_ctl->cur != io_ctl->orig) { 559 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 560 if (io_ctl->index >= io_ctl->num_pages) 561 return -ENOSPC; 562 io_ctl_map_page(io_ctl, 0); 563 } 564 565 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 566 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 567 if (io_ctl->index < io_ctl->num_pages) 568 io_ctl_map_page(io_ctl, 0); 569 return 0; 570 } 571 572 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 573 { 574 /* 575 * If we're not on the boundary we know we've modified the page and we 576 * need to crc the page. 577 */ 578 if (io_ctl->cur != io_ctl->orig) 579 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 580 else 581 io_ctl_unmap_page(io_ctl); 582 583 while (io_ctl->index < io_ctl->num_pages) { 584 io_ctl_map_page(io_ctl, 1); 585 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 586 } 587 } 588 589 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 590 struct btrfs_free_space *entry, u8 *type) 591 { 592 struct btrfs_free_space_entry *e; 593 int ret; 594 595 if (!io_ctl->cur) { 596 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 597 if (ret) 598 return ret; 599 } 600 601 e = io_ctl->cur; 602 entry->offset = le64_to_cpu(e->offset); 603 entry->bytes = le64_to_cpu(e->bytes); 604 *type = e->type; 605 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 606 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 607 608 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 609 return 0; 610 611 io_ctl_unmap_page(io_ctl); 612 613 return 0; 614 } 615 616 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 617 struct btrfs_free_space *entry) 618 { 619 int ret; 620 621 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 622 if (ret) 623 return ret; 624 625 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 626 io_ctl_unmap_page(io_ctl); 627 628 return 0; 629 } 630 631 /* 632 * Since we attach pinned extents after the fact we can have contiguous sections 633 * of free space that are split up in entries. This poses a problem with the 634 * tree logging stuff since it could have allocated across what appears to be 2 635 * entries since we would have merged the entries when adding the pinned extents 636 * back to the free space cache. So run through the space cache that we just 637 * loaded and merge contiguous entries. This will make the log replay stuff not 638 * blow up and it will make for nicer allocator behavior. 639 */ 640 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 641 { 642 struct btrfs_free_space *e, *prev = NULL; 643 struct rb_node *n; 644 645 again: 646 spin_lock(&ctl->tree_lock); 647 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 648 e = rb_entry(n, struct btrfs_free_space, offset_index); 649 if (!prev) 650 goto next; 651 if (e->bitmap || prev->bitmap) 652 goto next; 653 if (prev->offset + prev->bytes == e->offset) { 654 unlink_free_space(ctl, prev); 655 unlink_free_space(ctl, e); 656 prev->bytes += e->bytes; 657 kmem_cache_free(btrfs_free_space_cachep, e); 658 link_free_space(ctl, prev); 659 prev = NULL; 660 spin_unlock(&ctl->tree_lock); 661 goto again; 662 } 663 next: 664 prev = e; 665 } 666 spin_unlock(&ctl->tree_lock); 667 } 668 669 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 670 struct btrfs_free_space_ctl *ctl, 671 struct btrfs_path *path, u64 offset) 672 { 673 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 674 struct btrfs_free_space_header *header; 675 struct extent_buffer *leaf; 676 struct btrfs_io_ctl io_ctl; 677 struct btrfs_key key; 678 struct btrfs_free_space *e, *n; 679 LIST_HEAD(bitmaps); 680 u64 num_entries; 681 u64 num_bitmaps; 682 u64 generation; 683 u8 type; 684 int ret = 0; 685 686 /* Nothing in the space cache, goodbye */ 687 if (!i_size_read(inode)) 688 return 0; 689 690 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 691 key.offset = offset; 692 key.type = 0; 693 694 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 695 if (ret < 0) 696 return 0; 697 else if (ret > 0) { 698 btrfs_release_path(path); 699 return 0; 700 } 701 702 ret = -1; 703 704 leaf = path->nodes[0]; 705 header = btrfs_item_ptr(leaf, path->slots[0], 706 struct btrfs_free_space_header); 707 num_entries = btrfs_free_space_entries(leaf, header); 708 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 709 generation = btrfs_free_space_generation(leaf, header); 710 btrfs_release_path(path); 711 712 if (!BTRFS_I(inode)->generation) { 713 btrfs_info(fs_info, 714 "the free space cache file (%llu) is invalid, skip it", 715 offset); 716 return 0; 717 } 718 719 if (BTRFS_I(inode)->generation != generation) { 720 btrfs_err(fs_info, 721 "free space inode generation (%llu) did not match free space cache generation (%llu)", 722 BTRFS_I(inode)->generation, generation); 723 return 0; 724 } 725 726 if (!num_entries) 727 return 0; 728 729 ret = io_ctl_init(&io_ctl, inode, 0); 730 if (ret) 731 return ret; 732 733 readahead_cache(inode); 734 735 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 736 if (ret) 737 goto out; 738 739 ret = io_ctl_check_crc(&io_ctl, 0); 740 if (ret) 741 goto free_cache; 742 743 ret = io_ctl_check_generation(&io_ctl, generation); 744 if (ret) 745 goto free_cache; 746 747 while (num_entries) { 748 e = kmem_cache_zalloc(btrfs_free_space_cachep, 749 GFP_NOFS); 750 if (!e) 751 goto free_cache; 752 753 ret = io_ctl_read_entry(&io_ctl, e, &type); 754 if (ret) { 755 kmem_cache_free(btrfs_free_space_cachep, e); 756 goto free_cache; 757 } 758 759 if (!e->bytes) { 760 kmem_cache_free(btrfs_free_space_cachep, e); 761 goto free_cache; 762 } 763 764 if (type == BTRFS_FREE_SPACE_EXTENT) { 765 spin_lock(&ctl->tree_lock); 766 ret = link_free_space(ctl, e); 767 spin_unlock(&ctl->tree_lock); 768 if (ret) { 769 btrfs_err(fs_info, 770 "Duplicate entries in free space cache, dumping"); 771 kmem_cache_free(btrfs_free_space_cachep, e); 772 goto free_cache; 773 } 774 } else { 775 ASSERT(num_bitmaps); 776 num_bitmaps--; 777 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 778 if (!e->bitmap) { 779 kmem_cache_free( 780 btrfs_free_space_cachep, e); 781 goto free_cache; 782 } 783 spin_lock(&ctl->tree_lock); 784 ret = link_free_space(ctl, e); 785 ctl->total_bitmaps++; 786 ctl->op->recalc_thresholds(ctl); 787 spin_unlock(&ctl->tree_lock); 788 if (ret) { 789 btrfs_err(fs_info, 790 "Duplicate entries in free space cache, dumping"); 791 kmem_cache_free(btrfs_free_space_cachep, e); 792 goto free_cache; 793 } 794 list_add_tail(&e->list, &bitmaps); 795 } 796 797 num_entries--; 798 } 799 800 io_ctl_unmap_page(&io_ctl); 801 802 /* 803 * We add the bitmaps at the end of the entries in order that 804 * the bitmap entries are added to the cache. 805 */ 806 list_for_each_entry_safe(e, n, &bitmaps, list) { 807 list_del_init(&e->list); 808 ret = io_ctl_read_bitmap(&io_ctl, e); 809 if (ret) 810 goto free_cache; 811 } 812 813 io_ctl_drop_pages(&io_ctl); 814 merge_space_tree(ctl); 815 ret = 1; 816 out: 817 io_ctl_free(&io_ctl); 818 return ret; 819 free_cache: 820 io_ctl_drop_pages(&io_ctl); 821 __btrfs_remove_free_space_cache(ctl); 822 goto out; 823 } 824 825 int load_free_space_cache(struct btrfs_fs_info *fs_info, 826 struct btrfs_block_group_cache *block_group) 827 { 828 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 829 struct inode *inode; 830 struct btrfs_path *path; 831 int ret = 0; 832 bool matched; 833 u64 used = btrfs_block_group_used(&block_group->item); 834 835 /* 836 * If this block group has been marked to be cleared for one reason or 837 * another then we can't trust the on disk cache, so just return. 838 */ 839 spin_lock(&block_group->lock); 840 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 841 spin_unlock(&block_group->lock); 842 return 0; 843 } 844 spin_unlock(&block_group->lock); 845 846 path = btrfs_alloc_path(); 847 if (!path) 848 return 0; 849 path->search_commit_root = 1; 850 path->skip_locking = 1; 851 852 inode = lookup_free_space_inode(fs_info, block_group, path); 853 if (IS_ERR(inode)) { 854 btrfs_free_path(path); 855 return 0; 856 } 857 858 /* We may have converted the inode and made the cache invalid. */ 859 spin_lock(&block_group->lock); 860 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 861 spin_unlock(&block_group->lock); 862 btrfs_free_path(path); 863 goto out; 864 } 865 spin_unlock(&block_group->lock); 866 867 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 868 path, block_group->key.objectid); 869 btrfs_free_path(path); 870 if (ret <= 0) 871 goto out; 872 873 spin_lock(&ctl->tree_lock); 874 matched = (ctl->free_space == (block_group->key.offset - used - 875 block_group->bytes_super)); 876 spin_unlock(&ctl->tree_lock); 877 878 if (!matched) { 879 __btrfs_remove_free_space_cache(ctl); 880 btrfs_warn(fs_info, 881 "block group %llu has wrong amount of free space", 882 block_group->key.objectid); 883 ret = -1; 884 } 885 out: 886 if (ret < 0) { 887 /* This cache is bogus, make sure it gets cleared */ 888 spin_lock(&block_group->lock); 889 block_group->disk_cache_state = BTRFS_DC_CLEAR; 890 spin_unlock(&block_group->lock); 891 ret = 0; 892 893 btrfs_warn(fs_info, 894 "failed to load free space cache for block group %llu, rebuilding it now", 895 block_group->key.objectid); 896 } 897 898 iput(inode); 899 return ret; 900 } 901 902 static noinline_for_stack 903 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 904 struct btrfs_free_space_ctl *ctl, 905 struct btrfs_block_group_cache *block_group, 906 int *entries, int *bitmaps, 907 struct list_head *bitmap_list) 908 { 909 int ret; 910 struct btrfs_free_cluster *cluster = NULL; 911 struct btrfs_free_cluster *cluster_locked = NULL; 912 struct rb_node *node = rb_first(&ctl->free_space_offset); 913 struct btrfs_trim_range *trim_entry; 914 915 /* Get the cluster for this block_group if it exists */ 916 if (block_group && !list_empty(&block_group->cluster_list)) { 917 cluster = list_entry(block_group->cluster_list.next, 918 struct btrfs_free_cluster, 919 block_group_list); 920 } 921 922 if (!node && cluster) { 923 cluster_locked = cluster; 924 spin_lock(&cluster_locked->lock); 925 node = rb_first(&cluster->root); 926 cluster = NULL; 927 } 928 929 /* Write out the extent entries */ 930 while (node) { 931 struct btrfs_free_space *e; 932 933 e = rb_entry(node, struct btrfs_free_space, offset_index); 934 *entries += 1; 935 936 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 937 e->bitmap); 938 if (ret) 939 goto fail; 940 941 if (e->bitmap) { 942 list_add_tail(&e->list, bitmap_list); 943 *bitmaps += 1; 944 } 945 node = rb_next(node); 946 if (!node && cluster) { 947 node = rb_first(&cluster->root); 948 cluster_locked = cluster; 949 spin_lock(&cluster_locked->lock); 950 cluster = NULL; 951 } 952 } 953 if (cluster_locked) { 954 spin_unlock(&cluster_locked->lock); 955 cluster_locked = NULL; 956 } 957 958 /* 959 * Make sure we don't miss any range that was removed from our rbtree 960 * because trimming is running. Otherwise after a umount+mount (or crash 961 * after committing the transaction) we would leak free space and get 962 * an inconsistent free space cache report from fsck. 963 */ 964 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 965 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 966 trim_entry->bytes, NULL); 967 if (ret) 968 goto fail; 969 *entries += 1; 970 } 971 972 return 0; 973 fail: 974 if (cluster_locked) 975 spin_unlock(&cluster_locked->lock); 976 return -ENOSPC; 977 } 978 979 static noinline_for_stack int 980 update_cache_item(struct btrfs_trans_handle *trans, 981 struct btrfs_root *root, 982 struct inode *inode, 983 struct btrfs_path *path, u64 offset, 984 int entries, int bitmaps) 985 { 986 struct btrfs_key key; 987 struct btrfs_free_space_header *header; 988 struct extent_buffer *leaf; 989 int ret; 990 991 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 992 key.offset = offset; 993 key.type = 0; 994 995 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 996 if (ret < 0) { 997 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 998 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 999 goto fail; 1000 } 1001 leaf = path->nodes[0]; 1002 if (ret > 0) { 1003 struct btrfs_key found_key; 1004 ASSERT(path->slots[0]); 1005 path->slots[0]--; 1006 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1007 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1008 found_key.offset != offset) { 1009 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1010 inode->i_size - 1, 1011 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1012 NULL); 1013 btrfs_release_path(path); 1014 goto fail; 1015 } 1016 } 1017 1018 BTRFS_I(inode)->generation = trans->transid; 1019 header = btrfs_item_ptr(leaf, path->slots[0], 1020 struct btrfs_free_space_header); 1021 btrfs_set_free_space_entries(leaf, header, entries); 1022 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1023 btrfs_set_free_space_generation(leaf, header, trans->transid); 1024 btrfs_mark_buffer_dirty(leaf); 1025 btrfs_release_path(path); 1026 1027 return 0; 1028 1029 fail: 1030 return -1; 1031 } 1032 1033 static noinline_for_stack int 1034 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1035 struct btrfs_block_group_cache *block_group, 1036 struct btrfs_io_ctl *io_ctl, 1037 int *entries) 1038 { 1039 u64 start, extent_start, extent_end, len; 1040 struct extent_io_tree *unpin = NULL; 1041 int ret; 1042 1043 if (!block_group) 1044 return 0; 1045 1046 /* 1047 * We want to add any pinned extents to our free space cache 1048 * so we don't leak the space 1049 * 1050 * We shouldn't have switched the pinned extents yet so this is the 1051 * right one 1052 */ 1053 unpin = fs_info->pinned_extents; 1054 1055 start = block_group->key.objectid; 1056 1057 while (start < block_group->key.objectid + block_group->key.offset) { 1058 ret = find_first_extent_bit(unpin, start, 1059 &extent_start, &extent_end, 1060 EXTENT_DIRTY, NULL); 1061 if (ret) 1062 return 0; 1063 1064 /* This pinned extent is out of our range */ 1065 if (extent_start >= block_group->key.objectid + 1066 block_group->key.offset) 1067 return 0; 1068 1069 extent_start = max(extent_start, start); 1070 extent_end = min(block_group->key.objectid + 1071 block_group->key.offset, extent_end + 1); 1072 len = extent_end - extent_start; 1073 1074 *entries += 1; 1075 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1076 if (ret) 1077 return -ENOSPC; 1078 1079 start = extent_end; 1080 } 1081 1082 return 0; 1083 } 1084 1085 static noinline_for_stack int 1086 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1087 { 1088 struct btrfs_free_space *entry, *next; 1089 int ret; 1090 1091 /* Write out the bitmaps */ 1092 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1093 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1094 if (ret) 1095 return -ENOSPC; 1096 list_del_init(&entry->list); 1097 } 1098 1099 return 0; 1100 } 1101 1102 static int flush_dirty_cache(struct inode *inode) 1103 { 1104 int ret; 1105 1106 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1107 if (ret) 1108 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1109 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); 1110 1111 return ret; 1112 } 1113 1114 static void noinline_for_stack 1115 cleanup_bitmap_list(struct list_head *bitmap_list) 1116 { 1117 struct btrfs_free_space *entry, *next; 1118 1119 list_for_each_entry_safe(entry, next, bitmap_list, list) 1120 list_del_init(&entry->list); 1121 } 1122 1123 static void noinline_for_stack 1124 cleanup_write_cache_enospc(struct inode *inode, 1125 struct btrfs_io_ctl *io_ctl, 1126 struct extent_state **cached_state) 1127 { 1128 io_ctl_drop_pages(io_ctl); 1129 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1130 i_size_read(inode) - 1, cached_state); 1131 } 1132 1133 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1134 struct btrfs_trans_handle *trans, 1135 struct btrfs_block_group_cache *block_group, 1136 struct btrfs_io_ctl *io_ctl, 1137 struct btrfs_path *path, u64 offset) 1138 { 1139 int ret; 1140 struct inode *inode = io_ctl->inode; 1141 struct btrfs_fs_info *fs_info; 1142 1143 if (!inode) 1144 return 0; 1145 1146 fs_info = btrfs_sb(inode->i_sb); 1147 1148 /* Flush the dirty pages in the cache file. */ 1149 ret = flush_dirty_cache(inode); 1150 if (ret) 1151 goto out; 1152 1153 /* Update the cache item to tell everyone this cache file is valid. */ 1154 ret = update_cache_item(trans, root, inode, path, offset, 1155 io_ctl->entries, io_ctl->bitmaps); 1156 out: 1157 io_ctl_free(io_ctl); 1158 if (ret) { 1159 invalidate_inode_pages2(inode->i_mapping); 1160 BTRFS_I(inode)->generation = 0; 1161 if (block_group) { 1162 #ifdef DEBUG 1163 btrfs_err(fs_info, 1164 "failed to write free space cache for block group %llu", 1165 block_group->key.objectid); 1166 #endif 1167 } 1168 } 1169 btrfs_update_inode(trans, root, inode); 1170 1171 if (block_group) { 1172 /* the dirty list is protected by the dirty_bgs_lock */ 1173 spin_lock(&trans->transaction->dirty_bgs_lock); 1174 1175 /* the disk_cache_state is protected by the block group lock */ 1176 spin_lock(&block_group->lock); 1177 1178 /* 1179 * only mark this as written if we didn't get put back on 1180 * the dirty list while waiting for IO. Otherwise our 1181 * cache state won't be right, and we won't get written again 1182 */ 1183 if (!ret && list_empty(&block_group->dirty_list)) 1184 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1185 else if (ret) 1186 block_group->disk_cache_state = BTRFS_DC_ERROR; 1187 1188 spin_unlock(&block_group->lock); 1189 spin_unlock(&trans->transaction->dirty_bgs_lock); 1190 io_ctl->inode = NULL; 1191 iput(inode); 1192 } 1193 1194 return ret; 1195 1196 } 1197 1198 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1199 struct btrfs_trans_handle *trans, 1200 struct btrfs_io_ctl *io_ctl, 1201 struct btrfs_path *path) 1202 { 1203 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1204 } 1205 1206 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1207 struct btrfs_block_group_cache *block_group, 1208 struct btrfs_path *path) 1209 { 1210 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1211 block_group, &block_group->io_ctl, 1212 path, block_group->key.objectid); 1213 } 1214 1215 /** 1216 * __btrfs_write_out_cache - write out cached info to an inode 1217 * @root - the root the inode belongs to 1218 * @ctl - the free space cache we are going to write out 1219 * @block_group - the block_group for this cache if it belongs to a block_group 1220 * @trans - the trans handle 1221 * 1222 * This function writes out a free space cache struct to disk for quick recovery 1223 * on mount. This will return 0 if it was successful in writing the cache out, 1224 * or an errno if it was not. 1225 */ 1226 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1227 struct btrfs_free_space_ctl *ctl, 1228 struct btrfs_block_group_cache *block_group, 1229 struct btrfs_io_ctl *io_ctl, 1230 struct btrfs_trans_handle *trans) 1231 { 1232 struct btrfs_fs_info *fs_info = root->fs_info; 1233 struct extent_state *cached_state = NULL; 1234 LIST_HEAD(bitmap_list); 1235 int entries = 0; 1236 int bitmaps = 0; 1237 int ret; 1238 int must_iput = 0; 1239 1240 if (!i_size_read(inode)) 1241 return -EIO; 1242 1243 WARN_ON(io_ctl->pages); 1244 ret = io_ctl_init(io_ctl, inode, 1); 1245 if (ret) 1246 return ret; 1247 1248 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1249 down_write(&block_group->data_rwsem); 1250 spin_lock(&block_group->lock); 1251 if (block_group->delalloc_bytes) { 1252 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1253 spin_unlock(&block_group->lock); 1254 up_write(&block_group->data_rwsem); 1255 BTRFS_I(inode)->generation = 0; 1256 ret = 0; 1257 must_iput = 1; 1258 goto out; 1259 } 1260 spin_unlock(&block_group->lock); 1261 } 1262 1263 /* Lock all pages first so we can lock the extent safely. */ 1264 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1265 if (ret) 1266 goto out_unlock; 1267 1268 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1269 &cached_state); 1270 1271 io_ctl_set_generation(io_ctl, trans->transid); 1272 1273 mutex_lock(&ctl->cache_writeout_mutex); 1274 /* Write out the extent entries in the free space cache */ 1275 spin_lock(&ctl->tree_lock); 1276 ret = write_cache_extent_entries(io_ctl, ctl, 1277 block_group, &entries, &bitmaps, 1278 &bitmap_list); 1279 if (ret) 1280 goto out_nospc_locked; 1281 1282 /* 1283 * Some spaces that are freed in the current transaction are pinned, 1284 * they will be added into free space cache after the transaction is 1285 * committed, we shouldn't lose them. 1286 * 1287 * If this changes while we are working we'll get added back to 1288 * the dirty list and redo it. No locking needed 1289 */ 1290 ret = write_pinned_extent_entries(fs_info, block_group, 1291 io_ctl, &entries); 1292 if (ret) 1293 goto out_nospc_locked; 1294 1295 /* 1296 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1297 * locked while doing it because a concurrent trim can be manipulating 1298 * or freeing the bitmap. 1299 */ 1300 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1301 spin_unlock(&ctl->tree_lock); 1302 mutex_unlock(&ctl->cache_writeout_mutex); 1303 if (ret) 1304 goto out_nospc; 1305 1306 /* Zero out the rest of the pages just to make sure */ 1307 io_ctl_zero_remaining_pages(io_ctl); 1308 1309 /* Everything is written out, now we dirty the pages in the file. */ 1310 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1311 i_size_read(inode), &cached_state); 1312 if (ret) 1313 goto out_nospc; 1314 1315 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1316 up_write(&block_group->data_rwsem); 1317 /* 1318 * Release the pages and unlock the extent, we will flush 1319 * them out later 1320 */ 1321 io_ctl_drop_pages(io_ctl); 1322 1323 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1324 i_size_read(inode) - 1, &cached_state); 1325 1326 /* 1327 * at this point the pages are under IO and we're happy, 1328 * The caller is responsible for waiting on them and updating the 1329 * the cache and the inode 1330 */ 1331 io_ctl->entries = entries; 1332 io_ctl->bitmaps = bitmaps; 1333 1334 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1335 if (ret) 1336 goto out; 1337 1338 return 0; 1339 1340 out: 1341 io_ctl->inode = NULL; 1342 io_ctl_free(io_ctl); 1343 if (ret) { 1344 invalidate_inode_pages2(inode->i_mapping); 1345 BTRFS_I(inode)->generation = 0; 1346 } 1347 btrfs_update_inode(trans, root, inode); 1348 if (must_iput) 1349 iput(inode); 1350 return ret; 1351 1352 out_nospc_locked: 1353 cleanup_bitmap_list(&bitmap_list); 1354 spin_unlock(&ctl->tree_lock); 1355 mutex_unlock(&ctl->cache_writeout_mutex); 1356 1357 out_nospc: 1358 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1359 1360 out_unlock: 1361 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1362 up_write(&block_group->data_rwsem); 1363 1364 goto out; 1365 } 1366 1367 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1368 struct btrfs_trans_handle *trans, 1369 struct btrfs_block_group_cache *block_group, 1370 struct btrfs_path *path) 1371 { 1372 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1373 struct inode *inode; 1374 int ret = 0; 1375 1376 spin_lock(&block_group->lock); 1377 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1378 spin_unlock(&block_group->lock); 1379 return 0; 1380 } 1381 spin_unlock(&block_group->lock); 1382 1383 inode = lookup_free_space_inode(fs_info, block_group, path); 1384 if (IS_ERR(inode)) 1385 return 0; 1386 1387 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1388 block_group, &block_group->io_ctl, trans); 1389 if (ret) { 1390 #ifdef DEBUG 1391 btrfs_err(fs_info, 1392 "failed to write free space cache for block group %llu", 1393 block_group->key.objectid); 1394 #endif 1395 spin_lock(&block_group->lock); 1396 block_group->disk_cache_state = BTRFS_DC_ERROR; 1397 spin_unlock(&block_group->lock); 1398 1399 block_group->io_ctl.inode = NULL; 1400 iput(inode); 1401 } 1402 1403 /* 1404 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1405 * to wait for IO and put the inode 1406 */ 1407 1408 return ret; 1409 } 1410 1411 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1412 u64 offset) 1413 { 1414 ASSERT(offset >= bitmap_start); 1415 offset -= bitmap_start; 1416 return (unsigned long)(div_u64(offset, unit)); 1417 } 1418 1419 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1420 { 1421 return (unsigned long)(div_u64(bytes, unit)); 1422 } 1423 1424 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1425 u64 offset) 1426 { 1427 u64 bitmap_start; 1428 u64 bytes_per_bitmap; 1429 1430 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1431 bitmap_start = offset - ctl->start; 1432 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1433 bitmap_start *= bytes_per_bitmap; 1434 bitmap_start += ctl->start; 1435 1436 return bitmap_start; 1437 } 1438 1439 static int tree_insert_offset(struct rb_root *root, u64 offset, 1440 struct rb_node *node, int bitmap) 1441 { 1442 struct rb_node **p = &root->rb_node; 1443 struct rb_node *parent = NULL; 1444 struct btrfs_free_space *info; 1445 1446 while (*p) { 1447 parent = *p; 1448 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1449 1450 if (offset < info->offset) { 1451 p = &(*p)->rb_left; 1452 } else if (offset > info->offset) { 1453 p = &(*p)->rb_right; 1454 } else { 1455 /* 1456 * we could have a bitmap entry and an extent entry 1457 * share the same offset. If this is the case, we want 1458 * the extent entry to always be found first if we do a 1459 * linear search through the tree, since we want to have 1460 * the quickest allocation time, and allocating from an 1461 * extent is faster than allocating from a bitmap. So 1462 * if we're inserting a bitmap and we find an entry at 1463 * this offset, we want to go right, or after this entry 1464 * logically. If we are inserting an extent and we've 1465 * found a bitmap, we want to go left, or before 1466 * logically. 1467 */ 1468 if (bitmap) { 1469 if (info->bitmap) { 1470 WARN_ON_ONCE(1); 1471 return -EEXIST; 1472 } 1473 p = &(*p)->rb_right; 1474 } else { 1475 if (!info->bitmap) { 1476 WARN_ON_ONCE(1); 1477 return -EEXIST; 1478 } 1479 p = &(*p)->rb_left; 1480 } 1481 } 1482 } 1483 1484 rb_link_node(node, parent, p); 1485 rb_insert_color(node, root); 1486 1487 return 0; 1488 } 1489 1490 /* 1491 * searches the tree for the given offset. 1492 * 1493 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1494 * want a section that has at least bytes size and comes at or after the given 1495 * offset. 1496 */ 1497 static struct btrfs_free_space * 1498 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1499 u64 offset, int bitmap_only, int fuzzy) 1500 { 1501 struct rb_node *n = ctl->free_space_offset.rb_node; 1502 struct btrfs_free_space *entry, *prev = NULL; 1503 1504 /* find entry that is closest to the 'offset' */ 1505 while (1) { 1506 if (!n) { 1507 entry = NULL; 1508 break; 1509 } 1510 1511 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1512 prev = entry; 1513 1514 if (offset < entry->offset) 1515 n = n->rb_left; 1516 else if (offset > entry->offset) 1517 n = n->rb_right; 1518 else 1519 break; 1520 } 1521 1522 if (bitmap_only) { 1523 if (!entry) 1524 return NULL; 1525 if (entry->bitmap) 1526 return entry; 1527 1528 /* 1529 * bitmap entry and extent entry may share same offset, 1530 * in that case, bitmap entry comes after extent entry. 1531 */ 1532 n = rb_next(n); 1533 if (!n) 1534 return NULL; 1535 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1536 if (entry->offset != offset) 1537 return NULL; 1538 1539 WARN_ON(!entry->bitmap); 1540 return entry; 1541 } else if (entry) { 1542 if (entry->bitmap) { 1543 /* 1544 * if previous extent entry covers the offset, 1545 * we should return it instead of the bitmap entry 1546 */ 1547 n = rb_prev(&entry->offset_index); 1548 if (n) { 1549 prev = rb_entry(n, struct btrfs_free_space, 1550 offset_index); 1551 if (!prev->bitmap && 1552 prev->offset + prev->bytes > offset) 1553 entry = prev; 1554 } 1555 } 1556 return entry; 1557 } 1558 1559 if (!prev) 1560 return NULL; 1561 1562 /* find last entry before the 'offset' */ 1563 entry = prev; 1564 if (entry->offset > offset) { 1565 n = rb_prev(&entry->offset_index); 1566 if (n) { 1567 entry = rb_entry(n, struct btrfs_free_space, 1568 offset_index); 1569 ASSERT(entry->offset <= offset); 1570 } else { 1571 if (fuzzy) 1572 return entry; 1573 else 1574 return NULL; 1575 } 1576 } 1577 1578 if (entry->bitmap) { 1579 n = rb_prev(&entry->offset_index); 1580 if (n) { 1581 prev = rb_entry(n, struct btrfs_free_space, 1582 offset_index); 1583 if (!prev->bitmap && 1584 prev->offset + prev->bytes > offset) 1585 return prev; 1586 } 1587 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1588 return entry; 1589 } else if (entry->offset + entry->bytes > offset) 1590 return entry; 1591 1592 if (!fuzzy) 1593 return NULL; 1594 1595 while (1) { 1596 if (entry->bitmap) { 1597 if (entry->offset + BITS_PER_BITMAP * 1598 ctl->unit > offset) 1599 break; 1600 } else { 1601 if (entry->offset + entry->bytes > offset) 1602 break; 1603 } 1604 1605 n = rb_next(&entry->offset_index); 1606 if (!n) 1607 return NULL; 1608 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1609 } 1610 return entry; 1611 } 1612 1613 static inline void 1614 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1615 struct btrfs_free_space *info) 1616 { 1617 rb_erase(&info->offset_index, &ctl->free_space_offset); 1618 ctl->free_extents--; 1619 } 1620 1621 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1622 struct btrfs_free_space *info) 1623 { 1624 __unlink_free_space(ctl, info); 1625 ctl->free_space -= info->bytes; 1626 } 1627 1628 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1629 struct btrfs_free_space *info) 1630 { 1631 int ret = 0; 1632 1633 ASSERT(info->bytes || info->bitmap); 1634 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1635 &info->offset_index, (info->bitmap != NULL)); 1636 if (ret) 1637 return ret; 1638 1639 ctl->free_space += info->bytes; 1640 ctl->free_extents++; 1641 return ret; 1642 } 1643 1644 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1645 { 1646 struct btrfs_block_group_cache *block_group = ctl->private; 1647 u64 max_bytes; 1648 u64 bitmap_bytes; 1649 u64 extent_bytes; 1650 u64 size = block_group->key.offset; 1651 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1652 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1653 1654 max_bitmaps = max_t(u64, max_bitmaps, 1); 1655 1656 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1657 1658 /* 1659 * The goal is to keep the total amount of memory used per 1gb of space 1660 * at or below 32k, so we need to adjust how much memory we allow to be 1661 * used by extent based free space tracking 1662 */ 1663 if (size < SZ_1G) 1664 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1665 else 1666 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1667 1668 /* 1669 * we want to account for 1 more bitmap than what we have so we can make 1670 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1671 * we add more bitmaps. 1672 */ 1673 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1674 1675 if (bitmap_bytes >= max_bytes) { 1676 ctl->extents_thresh = 0; 1677 return; 1678 } 1679 1680 /* 1681 * we want the extent entry threshold to always be at most 1/2 the max 1682 * bytes we can have, or whatever is less than that. 1683 */ 1684 extent_bytes = max_bytes - bitmap_bytes; 1685 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1686 1687 ctl->extents_thresh = 1688 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1689 } 1690 1691 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1692 struct btrfs_free_space *info, 1693 u64 offset, u64 bytes) 1694 { 1695 unsigned long start, count; 1696 1697 start = offset_to_bit(info->offset, ctl->unit, offset); 1698 count = bytes_to_bits(bytes, ctl->unit); 1699 ASSERT(start + count <= BITS_PER_BITMAP); 1700 1701 bitmap_clear(info->bitmap, start, count); 1702 1703 info->bytes -= bytes; 1704 } 1705 1706 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1707 struct btrfs_free_space *info, u64 offset, 1708 u64 bytes) 1709 { 1710 __bitmap_clear_bits(ctl, info, offset, bytes); 1711 ctl->free_space -= bytes; 1712 } 1713 1714 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1715 struct btrfs_free_space *info, u64 offset, 1716 u64 bytes) 1717 { 1718 unsigned long start, count; 1719 1720 start = offset_to_bit(info->offset, ctl->unit, offset); 1721 count = bytes_to_bits(bytes, ctl->unit); 1722 ASSERT(start + count <= BITS_PER_BITMAP); 1723 1724 bitmap_set(info->bitmap, start, count); 1725 1726 info->bytes += bytes; 1727 ctl->free_space += bytes; 1728 } 1729 1730 /* 1731 * If we can not find suitable extent, we will use bytes to record 1732 * the size of the max extent. 1733 */ 1734 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1735 struct btrfs_free_space *bitmap_info, u64 *offset, 1736 u64 *bytes, bool for_alloc) 1737 { 1738 unsigned long found_bits = 0; 1739 unsigned long max_bits = 0; 1740 unsigned long bits, i; 1741 unsigned long next_zero; 1742 unsigned long extent_bits; 1743 1744 /* 1745 * Skip searching the bitmap if we don't have a contiguous section that 1746 * is large enough for this allocation. 1747 */ 1748 if (for_alloc && 1749 bitmap_info->max_extent_size && 1750 bitmap_info->max_extent_size < *bytes) { 1751 *bytes = bitmap_info->max_extent_size; 1752 return -1; 1753 } 1754 1755 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1756 max_t(u64, *offset, bitmap_info->offset)); 1757 bits = bytes_to_bits(*bytes, ctl->unit); 1758 1759 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1760 if (for_alloc && bits == 1) { 1761 found_bits = 1; 1762 break; 1763 } 1764 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1765 BITS_PER_BITMAP, i); 1766 extent_bits = next_zero - i; 1767 if (extent_bits >= bits) { 1768 found_bits = extent_bits; 1769 break; 1770 } else if (extent_bits > max_bits) { 1771 max_bits = extent_bits; 1772 } 1773 i = next_zero; 1774 } 1775 1776 if (found_bits) { 1777 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1778 *bytes = (u64)(found_bits) * ctl->unit; 1779 return 0; 1780 } 1781 1782 *bytes = (u64)(max_bits) * ctl->unit; 1783 bitmap_info->max_extent_size = *bytes; 1784 return -1; 1785 } 1786 1787 /* Cache the size of the max extent in bytes */ 1788 static struct btrfs_free_space * 1789 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1790 unsigned long align, u64 *max_extent_size) 1791 { 1792 struct btrfs_free_space *entry; 1793 struct rb_node *node; 1794 u64 tmp; 1795 u64 align_off; 1796 int ret; 1797 1798 if (!ctl->free_space_offset.rb_node) 1799 goto out; 1800 1801 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1802 if (!entry) 1803 goto out; 1804 1805 for (node = &entry->offset_index; node; node = rb_next(node)) { 1806 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1807 if (entry->bytes < *bytes) { 1808 if (entry->bytes > *max_extent_size) 1809 *max_extent_size = entry->bytes; 1810 continue; 1811 } 1812 1813 /* make sure the space returned is big enough 1814 * to match our requested alignment 1815 */ 1816 if (*bytes >= align) { 1817 tmp = entry->offset - ctl->start + align - 1; 1818 tmp = div64_u64(tmp, align); 1819 tmp = tmp * align + ctl->start; 1820 align_off = tmp - entry->offset; 1821 } else { 1822 align_off = 0; 1823 tmp = entry->offset; 1824 } 1825 1826 if (entry->bytes < *bytes + align_off) { 1827 if (entry->bytes > *max_extent_size) 1828 *max_extent_size = entry->bytes; 1829 continue; 1830 } 1831 1832 if (entry->bitmap) { 1833 u64 size = *bytes; 1834 1835 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1836 if (!ret) { 1837 *offset = tmp; 1838 *bytes = size; 1839 return entry; 1840 } else if (size > *max_extent_size) { 1841 *max_extent_size = size; 1842 } 1843 continue; 1844 } 1845 1846 *offset = tmp; 1847 *bytes = entry->bytes - align_off; 1848 return entry; 1849 } 1850 out: 1851 return NULL; 1852 } 1853 1854 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1855 struct btrfs_free_space *info, u64 offset) 1856 { 1857 info->offset = offset_to_bitmap(ctl, offset); 1858 info->bytes = 0; 1859 INIT_LIST_HEAD(&info->list); 1860 link_free_space(ctl, info); 1861 ctl->total_bitmaps++; 1862 1863 ctl->op->recalc_thresholds(ctl); 1864 } 1865 1866 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1867 struct btrfs_free_space *bitmap_info) 1868 { 1869 unlink_free_space(ctl, bitmap_info); 1870 kfree(bitmap_info->bitmap); 1871 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1872 ctl->total_bitmaps--; 1873 ctl->op->recalc_thresholds(ctl); 1874 } 1875 1876 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1877 struct btrfs_free_space *bitmap_info, 1878 u64 *offset, u64 *bytes) 1879 { 1880 u64 end; 1881 u64 search_start, search_bytes; 1882 int ret; 1883 1884 again: 1885 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1886 1887 /* 1888 * We need to search for bits in this bitmap. We could only cover some 1889 * of the extent in this bitmap thanks to how we add space, so we need 1890 * to search for as much as it as we can and clear that amount, and then 1891 * go searching for the next bit. 1892 */ 1893 search_start = *offset; 1894 search_bytes = ctl->unit; 1895 search_bytes = min(search_bytes, end - search_start + 1); 1896 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1897 false); 1898 if (ret < 0 || search_start != *offset) 1899 return -EINVAL; 1900 1901 /* We may have found more bits than what we need */ 1902 search_bytes = min(search_bytes, *bytes); 1903 1904 /* Cannot clear past the end of the bitmap */ 1905 search_bytes = min(search_bytes, end - search_start + 1); 1906 1907 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1908 *offset += search_bytes; 1909 *bytes -= search_bytes; 1910 1911 if (*bytes) { 1912 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1913 if (!bitmap_info->bytes) 1914 free_bitmap(ctl, bitmap_info); 1915 1916 /* 1917 * no entry after this bitmap, but we still have bytes to 1918 * remove, so something has gone wrong. 1919 */ 1920 if (!next) 1921 return -EINVAL; 1922 1923 bitmap_info = rb_entry(next, struct btrfs_free_space, 1924 offset_index); 1925 1926 /* 1927 * if the next entry isn't a bitmap we need to return to let the 1928 * extent stuff do its work. 1929 */ 1930 if (!bitmap_info->bitmap) 1931 return -EAGAIN; 1932 1933 /* 1934 * Ok the next item is a bitmap, but it may not actually hold 1935 * the information for the rest of this free space stuff, so 1936 * look for it, and if we don't find it return so we can try 1937 * everything over again. 1938 */ 1939 search_start = *offset; 1940 search_bytes = ctl->unit; 1941 ret = search_bitmap(ctl, bitmap_info, &search_start, 1942 &search_bytes, false); 1943 if (ret < 0 || search_start != *offset) 1944 return -EAGAIN; 1945 1946 goto again; 1947 } else if (!bitmap_info->bytes) 1948 free_bitmap(ctl, bitmap_info); 1949 1950 return 0; 1951 } 1952 1953 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1954 struct btrfs_free_space *info, u64 offset, 1955 u64 bytes) 1956 { 1957 u64 bytes_to_set = 0; 1958 u64 end; 1959 1960 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1961 1962 bytes_to_set = min(end - offset, bytes); 1963 1964 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1965 1966 /* 1967 * We set some bytes, we have no idea what the max extent size is 1968 * anymore. 1969 */ 1970 info->max_extent_size = 0; 1971 1972 return bytes_to_set; 1973 1974 } 1975 1976 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1977 struct btrfs_free_space *info) 1978 { 1979 struct btrfs_block_group_cache *block_group = ctl->private; 1980 struct btrfs_fs_info *fs_info = block_group->fs_info; 1981 bool forced = false; 1982 1983 #ifdef CONFIG_BTRFS_DEBUG 1984 if (btrfs_should_fragment_free_space(block_group)) 1985 forced = true; 1986 #endif 1987 1988 /* 1989 * If we are below the extents threshold then we can add this as an 1990 * extent, and don't have to deal with the bitmap 1991 */ 1992 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1993 /* 1994 * If this block group has some small extents we don't want to 1995 * use up all of our free slots in the cache with them, we want 1996 * to reserve them to larger extents, however if we have plenty 1997 * of cache left then go ahead an dadd them, no sense in adding 1998 * the overhead of a bitmap if we don't have to. 1999 */ 2000 if (info->bytes <= fs_info->sectorsize * 4) { 2001 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2002 return false; 2003 } else { 2004 return false; 2005 } 2006 } 2007 2008 /* 2009 * The original block groups from mkfs can be really small, like 8 2010 * megabytes, so don't bother with a bitmap for those entries. However 2011 * some block groups can be smaller than what a bitmap would cover but 2012 * are still large enough that they could overflow the 32k memory limit, 2013 * so allow those block groups to still be allowed to have a bitmap 2014 * entry. 2015 */ 2016 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2017 return false; 2018 2019 return true; 2020 } 2021 2022 static const struct btrfs_free_space_op free_space_op = { 2023 .recalc_thresholds = recalculate_thresholds, 2024 .use_bitmap = use_bitmap, 2025 }; 2026 2027 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2028 struct btrfs_free_space *info) 2029 { 2030 struct btrfs_free_space *bitmap_info; 2031 struct btrfs_block_group_cache *block_group = NULL; 2032 int added = 0; 2033 u64 bytes, offset, bytes_added; 2034 int ret; 2035 2036 bytes = info->bytes; 2037 offset = info->offset; 2038 2039 if (!ctl->op->use_bitmap(ctl, info)) 2040 return 0; 2041 2042 if (ctl->op == &free_space_op) 2043 block_group = ctl->private; 2044 again: 2045 /* 2046 * Since we link bitmaps right into the cluster we need to see if we 2047 * have a cluster here, and if so and it has our bitmap we need to add 2048 * the free space to that bitmap. 2049 */ 2050 if (block_group && !list_empty(&block_group->cluster_list)) { 2051 struct btrfs_free_cluster *cluster; 2052 struct rb_node *node; 2053 struct btrfs_free_space *entry; 2054 2055 cluster = list_entry(block_group->cluster_list.next, 2056 struct btrfs_free_cluster, 2057 block_group_list); 2058 spin_lock(&cluster->lock); 2059 node = rb_first(&cluster->root); 2060 if (!node) { 2061 spin_unlock(&cluster->lock); 2062 goto no_cluster_bitmap; 2063 } 2064 2065 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2066 if (!entry->bitmap) { 2067 spin_unlock(&cluster->lock); 2068 goto no_cluster_bitmap; 2069 } 2070 2071 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2072 bytes_added = add_bytes_to_bitmap(ctl, entry, 2073 offset, bytes); 2074 bytes -= bytes_added; 2075 offset += bytes_added; 2076 } 2077 spin_unlock(&cluster->lock); 2078 if (!bytes) { 2079 ret = 1; 2080 goto out; 2081 } 2082 } 2083 2084 no_cluster_bitmap: 2085 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2086 1, 0); 2087 if (!bitmap_info) { 2088 ASSERT(added == 0); 2089 goto new_bitmap; 2090 } 2091 2092 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2093 bytes -= bytes_added; 2094 offset += bytes_added; 2095 added = 0; 2096 2097 if (!bytes) { 2098 ret = 1; 2099 goto out; 2100 } else 2101 goto again; 2102 2103 new_bitmap: 2104 if (info && info->bitmap) { 2105 add_new_bitmap(ctl, info, offset); 2106 added = 1; 2107 info = NULL; 2108 goto again; 2109 } else { 2110 spin_unlock(&ctl->tree_lock); 2111 2112 /* no pre-allocated info, allocate a new one */ 2113 if (!info) { 2114 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2115 GFP_NOFS); 2116 if (!info) { 2117 spin_lock(&ctl->tree_lock); 2118 ret = -ENOMEM; 2119 goto out; 2120 } 2121 } 2122 2123 /* allocate the bitmap */ 2124 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2125 spin_lock(&ctl->tree_lock); 2126 if (!info->bitmap) { 2127 ret = -ENOMEM; 2128 goto out; 2129 } 2130 goto again; 2131 } 2132 2133 out: 2134 if (info) { 2135 if (info->bitmap) 2136 kfree(info->bitmap); 2137 kmem_cache_free(btrfs_free_space_cachep, info); 2138 } 2139 2140 return ret; 2141 } 2142 2143 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2144 struct btrfs_free_space *info, bool update_stat) 2145 { 2146 struct btrfs_free_space *left_info; 2147 struct btrfs_free_space *right_info; 2148 bool merged = false; 2149 u64 offset = info->offset; 2150 u64 bytes = info->bytes; 2151 2152 /* 2153 * first we want to see if there is free space adjacent to the range we 2154 * are adding, if there is remove that struct and add a new one to 2155 * cover the entire range 2156 */ 2157 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2158 if (right_info && rb_prev(&right_info->offset_index)) 2159 left_info = rb_entry(rb_prev(&right_info->offset_index), 2160 struct btrfs_free_space, offset_index); 2161 else 2162 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2163 2164 if (right_info && !right_info->bitmap) { 2165 if (update_stat) 2166 unlink_free_space(ctl, right_info); 2167 else 2168 __unlink_free_space(ctl, right_info); 2169 info->bytes += right_info->bytes; 2170 kmem_cache_free(btrfs_free_space_cachep, right_info); 2171 merged = true; 2172 } 2173 2174 if (left_info && !left_info->bitmap && 2175 left_info->offset + left_info->bytes == offset) { 2176 if (update_stat) 2177 unlink_free_space(ctl, left_info); 2178 else 2179 __unlink_free_space(ctl, left_info); 2180 info->offset = left_info->offset; 2181 info->bytes += left_info->bytes; 2182 kmem_cache_free(btrfs_free_space_cachep, left_info); 2183 merged = true; 2184 } 2185 2186 return merged; 2187 } 2188 2189 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2190 struct btrfs_free_space *info, 2191 bool update_stat) 2192 { 2193 struct btrfs_free_space *bitmap; 2194 unsigned long i; 2195 unsigned long j; 2196 const u64 end = info->offset + info->bytes; 2197 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2198 u64 bytes; 2199 2200 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2201 if (!bitmap) 2202 return false; 2203 2204 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2205 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2206 if (j == i) 2207 return false; 2208 bytes = (j - i) * ctl->unit; 2209 info->bytes += bytes; 2210 2211 if (update_stat) 2212 bitmap_clear_bits(ctl, bitmap, end, bytes); 2213 else 2214 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2215 2216 if (!bitmap->bytes) 2217 free_bitmap(ctl, bitmap); 2218 2219 return true; 2220 } 2221 2222 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2223 struct btrfs_free_space *info, 2224 bool update_stat) 2225 { 2226 struct btrfs_free_space *bitmap; 2227 u64 bitmap_offset; 2228 unsigned long i; 2229 unsigned long j; 2230 unsigned long prev_j; 2231 u64 bytes; 2232 2233 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2234 /* If we're on a boundary, try the previous logical bitmap. */ 2235 if (bitmap_offset == info->offset) { 2236 if (info->offset == 0) 2237 return false; 2238 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2239 } 2240 2241 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2242 if (!bitmap) 2243 return false; 2244 2245 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2246 j = 0; 2247 prev_j = (unsigned long)-1; 2248 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2249 if (j > i) 2250 break; 2251 prev_j = j; 2252 } 2253 if (prev_j == i) 2254 return false; 2255 2256 if (prev_j == (unsigned long)-1) 2257 bytes = (i + 1) * ctl->unit; 2258 else 2259 bytes = (i - prev_j) * ctl->unit; 2260 2261 info->offset -= bytes; 2262 info->bytes += bytes; 2263 2264 if (update_stat) 2265 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2266 else 2267 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2268 2269 if (!bitmap->bytes) 2270 free_bitmap(ctl, bitmap); 2271 2272 return true; 2273 } 2274 2275 /* 2276 * We prefer always to allocate from extent entries, both for clustered and 2277 * non-clustered allocation requests. So when attempting to add a new extent 2278 * entry, try to see if there's adjacent free space in bitmap entries, and if 2279 * there is, migrate that space from the bitmaps to the extent. 2280 * Like this we get better chances of satisfying space allocation requests 2281 * because we attempt to satisfy them based on a single cache entry, and never 2282 * on 2 or more entries - even if the entries represent a contiguous free space 2283 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2284 * ends). 2285 */ 2286 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2287 struct btrfs_free_space *info, 2288 bool update_stat) 2289 { 2290 /* 2291 * Only work with disconnected entries, as we can change their offset, 2292 * and must be extent entries. 2293 */ 2294 ASSERT(!info->bitmap); 2295 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2296 2297 if (ctl->total_bitmaps > 0) { 2298 bool stole_end; 2299 bool stole_front = false; 2300 2301 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2302 if (ctl->total_bitmaps > 0) 2303 stole_front = steal_from_bitmap_to_front(ctl, info, 2304 update_stat); 2305 2306 if (stole_end || stole_front) 2307 try_merge_free_space(ctl, info, update_stat); 2308 } 2309 } 2310 2311 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2312 struct btrfs_free_space_ctl *ctl, 2313 u64 offset, u64 bytes) 2314 { 2315 struct btrfs_free_space *info; 2316 int ret = 0; 2317 2318 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2319 if (!info) 2320 return -ENOMEM; 2321 2322 info->offset = offset; 2323 info->bytes = bytes; 2324 RB_CLEAR_NODE(&info->offset_index); 2325 2326 spin_lock(&ctl->tree_lock); 2327 2328 if (try_merge_free_space(ctl, info, true)) 2329 goto link; 2330 2331 /* 2332 * There was no extent directly to the left or right of this new 2333 * extent then we know we're going to have to allocate a new extent, so 2334 * before we do that see if we need to drop this into a bitmap 2335 */ 2336 ret = insert_into_bitmap(ctl, info); 2337 if (ret < 0) { 2338 goto out; 2339 } else if (ret) { 2340 ret = 0; 2341 goto out; 2342 } 2343 link: 2344 /* 2345 * Only steal free space from adjacent bitmaps if we're sure we're not 2346 * going to add the new free space to existing bitmap entries - because 2347 * that would mean unnecessary work that would be reverted. Therefore 2348 * attempt to steal space from bitmaps if we're adding an extent entry. 2349 */ 2350 steal_from_bitmap(ctl, info, true); 2351 2352 ret = link_free_space(ctl, info); 2353 if (ret) 2354 kmem_cache_free(btrfs_free_space_cachep, info); 2355 out: 2356 spin_unlock(&ctl->tree_lock); 2357 2358 if (ret) { 2359 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2360 ASSERT(ret != -EEXIST); 2361 } 2362 2363 return ret; 2364 } 2365 2366 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2367 u64 offset, u64 bytes) 2368 { 2369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2370 struct btrfs_free_space *info; 2371 int ret; 2372 bool re_search = false; 2373 2374 spin_lock(&ctl->tree_lock); 2375 2376 again: 2377 ret = 0; 2378 if (!bytes) 2379 goto out_lock; 2380 2381 info = tree_search_offset(ctl, offset, 0, 0); 2382 if (!info) { 2383 /* 2384 * oops didn't find an extent that matched the space we wanted 2385 * to remove, look for a bitmap instead 2386 */ 2387 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2388 1, 0); 2389 if (!info) { 2390 /* 2391 * If we found a partial bit of our free space in a 2392 * bitmap but then couldn't find the other part this may 2393 * be a problem, so WARN about it. 2394 */ 2395 WARN_ON(re_search); 2396 goto out_lock; 2397 } 2398 } 2399 2400 re_search = false; 2401 if (!info->bitmap) { 2402 unlink_free_space(ctl, info); 2403 if (offset == info->offset) { 2404 u64 to_free = min(bytes, info->bytes); 2405 2406 info->bytes -= to_free; 2407 info->offset += to_free; 2408 if (info->bytes) { 2409 ret = link_free_space(ctl, info); 2410 WARN_ON(ret); 2411 } else { 2412 kmem_cache_free(btrfs_free_space_cachep, info); 2413 } 2414 2415 offset += to_free; 2416 bytes -= to_free; 2417 goto again; 2418 } else { 2419 u64 old_end = info->bytes + info->offset; 2420 2421 info->bytes = offset - info->offset; 2422 ret = link_free_space(ctl, info); 2423 WARN_ON(ret); 2424 if (ret) 2425 goto out_lock; 2426 2427 /* Not enough bytes in this entry to satisfy us */ 2428 if (old_end < offset + bytes) { 2429 bytes -= old_end - offset; 2430 offset = old_end; 2431 goto again; 2432 } else if (old_end == offset + bytes) { 2433 /* all done */ 2434 goto out_lock; 2435 } 2436 spin_unlock(&ctl->tree_lock); 2437 2438 ret = btrfs_add_free_space(block_group, offset + bytes, 2439 old_end - (offset + bytes)); 2440 WARN_ON(ret); 2441 goto out; 2442 } 2443 } 2444 2445 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2446 if (ret == -EAGAIN) { 2447 re_search = true; 2448 goto again; 2449 } 2450 out_lock: 2451 spin_unlock(&ctl->tree_lock); 2452 out: 2453 return ret; 2454 } 2455 2456 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2457 u64 bytes) 2458 { 2459 struct btrfs_fs_info *fs_info = block_group->fs_info; 2460 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2461 struct btrfs_free_space *info; 2462 struct rb_node *n; 2463 int count = 0; 2464 2465 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2466 info = rb_entry(n, struct btrfs_free_space, offset_index); 2467 if (info->bytes >= bytes && !block_group->ro) 2468 count++; 2469 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2470 info->offset, info->bytes, 2471 (info->bitmap) ? "yes" : "no"); 2472 } 2473 btrfs_info(fs_info, "block group has cluster?: %s", 2474 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2475 btrfs_info(fs_info, 2476 "%d blocks of free space at or bigger than bytes is", count); 2477 } 2478 2479 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2480 { 2481 struct btrfs_fs_info *fs_info = block_group->fs_info; 2482 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2483 2484 spin_lock_init(&ctl->tree_lock); 2485 ctl->unit = fs_info->sectorsize; 2486 ctl->start = block_group->key.objectid; 2487 ctl->private = block_group; 2488 ctl->op = &free_space_op; 2489 INIT_LIST_HEAD(&ctl->trimming_ranges); 2490 mutex_init(&ctl->cache_writeout_mutex); 2491 2492 /* 2493 * we only want to have 32k of ram per block group for keeping 2494 * track of free space, and if we pass 1/2 of that we want to 2495 * start converting things over to using bitmaps 2496 */ 2497 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2498 } 2499 2500 /* 2501 * for a given cluster, put all of its extents back into the free 2502 * space cache. If the block group passed doesn't match the block group 2503 * pointed to by the cluster, someone else raced in and freed the 2504 * cluster already. In that case, we just return without changing anything 2505 */ 2506 static int 2507 __btrfs_return_cluster_to_free_space( 2508 struct btrfs_block_group_cache *block_group, 2509 struct btrfs_free_cluster *cluster) 2510 { 2511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2512 struct btrfs_free_space *entry; 2513 struct rb_node *node; 2514 2515 spin_lock(&cluster->lock); 2516 if (cluster->block_group != block_group) 2517 goto out; 2518 2519 cluster->block_group = NULL; 2520 cluster->window_start = 0; 2521 list_del_init(&cluster->block_group_list); 2522 2523 node = rb_first(&cluster->root); 2524 while (node) { 2525 bool bitmap; 2526 2527 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2528 node = rb_next(&entry->offset_index); 2529 rb_erase(&entry->offset_index, &cluster->root); 2530 RB_CLEAR_NODE(&entry->offset_index); 2531 2532 bitmap = (entry->bitmap != NULL); 2533 if (!bitmap) { 2534 try_merge_free_space(ctl, entry, false); 2535 steal_from_bitmap(ctl, entry, false); 2536 } 2537 tree_insert_offset(&ctl->free_space_offset, 2538 entry->offset, &entry->offset_index, bitmap); 2539 } 2540 cluster->root = RB_ROOT; 2541 2542 out: 2543 spin_unlock(&cluster->lock); 2544 btrfs_put_block_group(block_group); 2545 return 0; 2546 } 2547 2548 static void __btrfs_remove_free_space_cache_locked( 2549 struct btrfs_free_space_ctl *ctl) 2550 { 2551 struct btrfs_free_space *info; 2552 struct rb_node *node; 2553 2554 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2555 info = rb_entry(node, struct btrfs_free_space, offset_index); 2556 if (!info->bitmap) { 2557 unlink_free_space(ctl, info); 2558 kmem_cache_free(btrfs_free_space_cachep, info); 2559 } else { 2560 free_bitmap(ctl, info); 2561 } 2562 2563 cond_resched_lock(&ctl->tree_lock); 2564 } 2565 } 2566 2567 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2568 { 2569 spin_lock(&ctl->tree_lock); 2570 __btrfs_remove_free_space_cache_locked(ctl); 2571 spin_unlock(&ctl->tree_lock); 2572 } 2573 2574 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2575 { 2576 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2577 struct btrfs_free_cluster *cluster; 2578 struct list_head *head; 2579 2580 spin_lock(&ctl->tree_lock); 2581 while ((head = block_group->cluster_list.next) != 2582 &block_group->cluster_list) { 2583 cluster = list_entry(head, struct btrfs_free_cluster, 2584 block_group_list); 2585 2586 WARN_ON(cluster->block_group != block_group); 2587 __btrfs_return_cluster_to_free_space(block_group, cluster); 2588 2589 cond_resched_lock(&ctl->tree_lock); 2590 } 2591 __btrfs_remove_free_space_cache_locked(ctl); 2592 spin_unlock(&ctl->tree_lock); 2593 2594 } 2595 2596 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2597 u64 offset, u64 bytes, u64 empty_size, 2598 u64 *max_extent_size) 2599 { 2600 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2601 struct btrfs_free_space *entry = NULL; 2602 u64 bytes_search = bytes + empty_size; 2603 u64 ret = 0; 2604 u64 align_gap = 0; 2605 u64 align_gap_len = 0; 2606 2607 spin_lock(&ctl->tree_lock); 2608 entry = find_free_space(ctl, &offset, &bytes_search, 2609 block_group->full_stripe_len, max_extent_size); 2610 if (!entry) 2611 goto out; 2612 2613 ret = offset; 2614 if (entry->bitmap) { 2615 bitmap_clear_bits(ctl, entry, offset, bytes); 2616 if (!entry->bytes) 2617 free_bitmap(ctl, entry); 2618 } else { 2619 unlink_free_space(ctl, entry); 2620 align_gap_len = offset - entry->offset; 2621 align_gap = entry->offset; 2622 2623 entry->offset = offset + bytes; 2624 WARN_ON(entry->bytes < bytes + align_gap_len); 2625 2626 entry->bytes -= bytes + align_gap_len; 2627 if (!entry->bytes) 2628 kmem_cache_free(btrfs_free_space_cachep, entry); 2629 else 2630 link_free_space(ctl, entry); 2631 } 2632 out: 2633 spin_unlock(&ctl->tree_lock); 2634 2635 if (align_gap_len) 2636 __btrfs_add_free_space(block_group->fs_info, ctl, 2637 align_gap, align_gap_len); 2638 return ret; 2639 } 2640 2641 /* 2642 * given a cluster, put all of its extents back into the free space 2643 * cache. If a block group is passed, this function will only free 2644 * a cluster that belongs to the passed block group. 2645 * 2646 * Otherwise, it'll get a reference on the block group pointed to by the 2647 * cluster and remove the cluster from it. 2648 */ 2649 int btrfs_return_cluster_to_free_space( 2650 struct btrfs_block_group_cache *block_group, 2651 struct btrfs_free_cluster *cluster) 2652 { 2653 struct btrfs_free_space_ctl *ctl; 2654 int ret; 2655 2656 /* first, get a safe pointer to the block group */ 2657 spin_lock(&cluster->lock); 2658 if (!block_group) { 2659 block_group = cluster->block_group; 2660 if (!block_group) { 2661 spin_unlock(&cluster->lock); 2662 return 0; 2663 } 2664 } else if (cluster->block_group != block_group) { 2665 /* someone else has already freed it don't redo their work */ 2666 spin_unlock(&cluster->lock); 2667 return 0; 2668 } 2669 atomic_inc(&block_group->count); 2670 spin_unlock(&cluster->lock); 2671 2672 ctl = block_group->free_space_ctl; 2673 2674 /* now return any extents the cluster had on it */ 2675 spin_lock(&ctl->tree_lock); 2676 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2677 spin_unlock(&ctl->tree_lock); 2678 2679 /* finally drop our ref */ 2680 btrfs_put_block_group(block_group); 2681 return ret; 2682 } 2683 2684 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2685 struct btrfs_free_cluster *cluster, 2686 struct btrfs_free_space *entry, 2687 u64 bytes, u64 min_start, 2688 u64 *max_extent_size) 2689 { 2690 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2691 int err; 2692 u64 search_start = cluster->window_start; 2693 u64 search_bytes = bytes; 2694 u64 ret = 0; 2695 2696 search_start = min_start; 2697 search_bytes = bytes; 2698 2699 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2700 if (err) { 2701 if (search_bytes > *max_extent_size) 2702 *max_extent_size = search_bytes; 2703 return 0; 2704 } 2705 2706 ret = search_start; 2707 __bitmap_clear_bits(ctl, entry, ret, bytes); 2708 2709 return ret; 2710 } 2711 2712 /* 2713 * given a cluster, try to allocate 'bytes' from it, returns 0 2714 * if it couldn't find anything suitably large, or a logical disk offset 2715 * if things worked out 2716 */ 2717 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2718 struct btrfs_free_cluster *cluster, u64 bytes, 2719 u64 min_start, u64 *max_extent_size) 2720 { 2721 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2722 struct btrfs_free_space *entry = NULL; 2723 struct rb_node *node; 2724 u64 ret = 0; 2725 2726 spin_lock(&cluster->lock); 2727 if (bytes > cluster->max_size) 2728 goto out; 2729 2730 if (cluster->block_group != block_group) 2731 goto out; 2732 2733 node = rb_first(&cluster->root); 2734 if (!node) 2735 goto out; 2736 2737 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2738 while (1) { 2739 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2740 *max_extent_size = entry->bytes; 2741 2742 if (entry->bytes < bytes || 2743 (!entry->bitmap && entry->offset < min_start)) { 2744 node = rb_next(&entry->offset_index); 2745 if (!node) 2746 break; 2747 entry = rb_entry(node, struct btrfs_free_space, 2748 offset_index); 2749 continue; 2750 } 2751 2752 if (entry->bitmap) { 2753 ret = btrfs_alloc_from_bitmap(block_group, 2754 cluster, entry, bytes, 2755 cluster->window_start, 2756 max_extent_size); 2757 if (ret == 0) { 2758 node = rb_next(&entry->offset_index); 2759 if (!node) 2760 break; 2761 entry = rb_entry(node, struct btrfs_free_space, 2762 offset_index); 2763 continue; 2764 } 2765 cluster->window_start += bytes; 2766 } else { 2767 ret = entry->offset; 2768 2769 entry->offset += bytes; 2770 entry->bytes -= bytes; 2771 } 2772 2773 if (entry->bytes == 0) 2774 rb_erase(&entry->offset_index, &cluster->root); 2775 break; 2776 } 2777 out: 2778 spin_unlock(&cluster->lock); 2779 2780 if (!ret) 2781 return 0; 2782 2783 spin_lock(&ctl->tree_lock); 2784 2785 ctl->free_space -= bytes; 2786 if (entry->bytes == 0) { 2787 ctl->free_extents--; 2788 if (entry->bitmap) { 2789 kfree(entry->bitmap); 2790 ctl->total_bitmaps--; 2791 ctl->op->recalc_thresholds(ctl); 2792 } 2793 kmem_cache_free(btrfs_free_space_cachep, entry); 2794 } 2795 2796 spin_unlock(&ctl->tree_lock); 2797 2798 return ret; 2799 } 2800 2801 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2802 struct btrfs_free_space *entry, 2803 struct btrfs_free_cluster *cluster, 2804 u64 offset, u64 bytes, 2805 u64 cont1_bytes, u64 min_bytes) 2806 { 2807 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2808 unsigned long next_zero; 2809 unsigned long i; 2810 unsigned long want_bits; 2811 unsigned long min_bits; 2812 unsigned long found_bits; 2813 unsigned long max_bits = 0; 2814 unsigned long start = 0; 2815 unsigned long total_found = 0; 2816 int ret; 2817 2818 i = offset_to_bit(entry->offset, ctl->unit, 2819 max_t(u64, offset, entry->offset)); 2820 want_bits = bytes_to_bits(bytes, ctl->unit); 2821 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2822 2823 /* 2824 * Don't bother looking for a cluster in this bitmap if it's heavily 2825 * fragmented. 2826 */ 2827 if (entry->max_extent_size && 2828 entry->max_extent_size < cont1_bytes) 2829 return -ENOSPC; 2830 again: 2831 found_bits = 0; 2832 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2833 next_zero = find_next_zero_bit(entry->bitmap, 2834 BITS_PER_BITMAP, i); 2835 if (next_zero - i >= min_bits) { 2836 found_bits = next_zero - i; 2837 if (found_bits > max_bits) 2838 max_bits = found_bits; 2839 break; 2840 } 2841 if (next_zero - i > max_bits) 2842 max_bits = next_zero - i; 2843 i = next_zero; 2844 } 2845 2846 if (!found_bits) { 2847 entry->max_extent_size = (u64)max_bits * ctl->unit; 2848 return -ENOSPC; 2849 } 2850 2851 if (!total_found) { 2852 start = i; 2853 cluster->max_size = 0; 2854 } 2855 2856 total_found += found_bits; 2857 2858 if (cluster->max_size < found_bits * ctl->unit) 2859 cluster->max_size = found_bits * ctl->unit; 2860 2861 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2862 i = next_zero + 1; 2863 goto again; 2864 } 2865 2866 cluster->window_start = start * ctl->unit + entry->offset; 2867 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2868 ret = tree_insert_offset(&cluster->root, entry->offset, 2869 &entry->offset_index, 1); 2870 ASSERT(!ret); /* -EEXIST; Logic error */ 2871 2872 trace_btrfs_setup_cluster(block_group, cluster, 2873 total_found * ctl->unit, 1); 2874 return 0; 2875 } 2876 2877 /* 2878 * This searches the block group for just extents to fill the cluster with. 2879 * Try to find a cluster with at least bytes total bytes, at least one 2880 * extent of cont1_bytes, and other clusters of at least min_bytes. 2881 */ 2882 static noinline int 2883 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2884 struct btrfs_free_cluster *cluster, 2885 struct list_head *bitmaps, u64 offset, u64 bytes, 2886 u64 cont1_bytes, u64 min_bytes) 2887 { 2888 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2889 struct btrfs_free_space *first = NULL; 2890 struct btrfs_free_space *entry = NULL; 2891 struct btrfs_free_space *last; 2892 struct rb_node *node; 2893 u64 window_free; 2894 u64 max_extent; 2895 u64 total_size = 0; 2896 2897 entry = tree_search_offset(ctl, offset, 0, 1); 2898 if (!entry) 2899 return -ENOSPC; 2900 2901 /* 2902 * We don't want bitmaps, so just move along until we find a normal 2903 * extent entry. 2904 */ 2905 while (entry->bitmap || entry->bytes < min_bytes) { 2906 if (entry->bitmap && list_empty(&entry->list)) 2907 list_add_tail(&entry->list, bitmaps); 2908 node = rb_next(&entry->offset_index); 2909 if (!node) 2910 return -ENOSPC; 2911 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2912 } 2913 2914 window_free = entry->bytes; 2915 max_extent = entry->bytes; 2916 first = entry; 2917 last = entry; 2918 2919 for (node = rb_next(&entry->offset_index); node; 2920 node = rb_next(&entry->offset_index)) { 2921 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2922 2923 if (entry->bitmap) { 2924 if (list_empty(&entry->list)) 2925 list_add_tail(&entry->list, bitmaps); 2926 continue; 2927 } 2928 2929 if (entry->bytes < min_bytes) 2930 continue; 2931 2932 last = entry; 2933 window_free += entry->bytes; 2934 if (entry->bytes > max_extent) 2935 max_extent = entry->bytes; 2936 } 2937 2938 if (window_free < bytes || max_extent < cont1_bytes) 2939 return -ENOSPC; 2940 2941 cluster->window_start = first->offset; 2942 2943 node = &first->offset_index; 2944 2945 /* 2946 * now we've found our entries, pull them out of the free space 2947 * cache and put them into the cluster rbtree 2948 */ 2949 do { 2950 int ret; 2951 2952 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2953 node = rb_next(&entry->offset_index); 2954 if (entry->bitmap || entry->bytes < min_bytes) 2955 continue; 2956 2957 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2958 ret = tree_insert_offset(&cluster->root, entry->offset, 2959 &entry->offset_index, 0); 2960 total_size += entry->bytes; 2961 ASSERT(!ret); /* -EEXIST; Logic error */ 2962 } while (node && entry != last); 2963 2964 cluster->max_size = max_extent; 2965 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2966 return 0; 2967 } 2968 2969 /* 2970 * This specifically looks for bitmaps that may work in the cluster, we assume 2971 * that we have already failed to find extents that will work. 2972 */ 2973 static noinline int 2974 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2975 struct btrfs_free_cluster *cluster, 2976 struct list_head *bitmaps, u64 offset, u64 bytes, 2977 u64 cont1_bytes, u64 min_bytes) 2978 { 2979 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2980 struct btrfs_free_space *entry = NULL; 2981 int ret = -ENOSPC; 2982 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2983 2984 if (ctl->total_bitmaps == 0) 2985 return -ENOSPC; 2986 2987 /* 2988 * The bitmap that covers offset won't be in the list unless offset 2989 * is just its start offset. 2990 */ 2991 if (!list_empty(bitmaps)) 2992 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2993 2994 if (!entry || entry->offset != bitmap_offset) { 2995 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2996 if (entry && list_empty(&entry->list)) 2997 list_add(&entry->list, bitmaps); 2998 } 2999 3000 list_for_each_entry(entry, bitmaps, list) { 3001 if (entry->bytes < bytes) 3002 continue; 3003 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3004 bytes, cont1_bytes, min_bytes); 3005 if (!ret) 3006 return 0; 3007 } 3008 3009 /* 3010 * The bitmaps list has all the bitmaps that record free space 3011 * starting after offset, so no more search is required. 3012 */ 3013 return -ENOSPC; 3014 } 3015 3016 /* 3017 * here we try to find a cluster of blocks in a block group. The goal 3018 * is to find at least bytes+empty_size. 3019 * We might not find them all in one contiguous area. 3020 * 3021 * returns zero and sets up cluster if things worked out, otherwise 3022 * it returns -enospc 3023 */ 3024 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3025 struct btrfs_block_group_cache *block_group, 3026 struct btrfs_free_cluster *cluster, 3027 u64 offset, u64 bytes, u64 empty_size) 3028 { 3029 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3030 struct btrfs_free_space *entry, *tmp; 3031 LIST_HEAD(bitmaps); 3032 u64 min_bytes; 3033 u64 cont1_bytes; 3034 int ret; 3035 3036 /* 3037 * Choose the minimum extent size we'll require for this 3038 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3039 * For metadata, allow allocates with smaller extents. For 3040 * data, keep it dense. 3041 */ 3042 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3043 cont1_bytes = min_bytes = bytes + empty_size; 3044 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3045 cont1_bytes = bytes; 3046 min_bytes = fs_info->sectorsize; 3047 } else { 3048 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3049 min_bytes = fs_info->sectorsize; 3050 } 3051 3052 spin_lock(&ctl->tree_lock); 3053 3054 /* 3055 * If we know we don't have enough space to make a cluster don't even 3056 * bother doing all the work to try and find one. 3057 */ 3058 if (ctl->free_space < bytes) { 3059 spin_unlock(&ctl->tree_lock); 3060 return -ENOSPC; 3061 } 3062 3063 spin_lock(&cluster->lock); 3064 3065 /* someone already found a cluster, hooray */ 3066 if (cluster->block_group) { 3067 ret = 0; 3068 goto out; 3069 } 3070 3071 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3072 min_bytes); 3073 3074 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3075 bytes + empty_size, 3076 cont1_bytes, min_bytes); 3077 if (ret) 3078 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3079 offset, bytes + empty_size, 3080 cont1_bytes, min_bytes); 3081 3082 /* Clear our temporary list */ 3083 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3084 list_del_init(&entry->list); 3085 3086 if (!ret) { 3087 atomic_inc(&block_group->count); 3088 list_add_tail(&cluster->block_group_list, 3089 &block_group->cluster_list); 3090 cluster->block_group = block_group; 3091 } else { 3092 trace_btrfs_failed_cluster_setup(block_group); 3093 } 3094 out: 3095 spin_unlock(&cluster->lock); 3096 spin_unlock(&ctl->tree_lock); 3097 3098 return ret; 3099 } 3100 3101 /* 3102 * simple code to zero out a cluster 3103 */ 3104 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3105 { 3106 spin_lock_init(&cluster->lock); 3107 spin_lock_init(&cluster->refill_lock); 3108 cluster->root = RB_ROOT; 3109 cluster->max_size = 0; 3110 cluster->fragmented = false; 3111 INIT_LIST_HEAD(&cluster->block_group_list); 3112 cluster->block_group = NULL; 3113 } 3114 3115 static int do_trimming(struct btrfs_block_group_cache *block_group, 3116 u64 *total_trimmed, u64 start, u64 bytes, 3117 u64 reserved_start, u64 reserved_bytes, 3118 struct btrfs_trim_range *trim_entry) 3119 { 3120 struct btrfs_space_info *space_info = block_group->space_info; 3121 struct btrfs_fs_info *fs_info = block_group->fs_info; 3122 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3123 int ret; 3124 int update = 0; 3125 u64 trimmed = 0; 3126 3127 spin_lock(&space_info->lock); 3128 spin_lock(&block_group->lock); 3129 if (!block_group->ro) { 3130 block_group->reserved += reserved_bytes; 3131 space_info->bytes_reserved += reserved_bytes; 3132 update = 1; 3133 } 3134 spin_unlock(&block_group->lock); 3135 spin_unlock(&space_info->lock); 3136 3137 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3138 if (!ret) 3139 *total_trimmed += trimmed; 3140 3141 mutex_lock(&ctl->cache_writeout_mutex); 3142 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3143 list_del(&trim_entry->list); 3144 mutex_unlock(&ctl->cache_writeout_mutex); 3145 3146 if (update) { 3147 spin_lock(&space_info->lock); 3148 spin_lock(&block_group->lock); 3149 if (block_group->ro) 3150 space_info->bytes_readonly += reserved_bytes; 3151 block_group->reserved -= reserved_bytes; 3152 space_info->bytes_reserved -= reserved_bytes; 3153 spin_unlock(&space_info->lock); 3154 spin_unlock(&block_group->lock); 3155 } 3156 3157 return ret; 3158 } 3159 3160 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3161 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3162 { 3163 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3164 struct btrfs_free_space *entry; 3165 struct rb_node *node; 3166 int ret = 0; 3167 u64 extent_start; 3168 u64 extent_bytes; 3169 u64 bytes; 3170 3171 while (start < end) { 3172 struct btrfs_trim_range trim_entry; 3173 3174 mutex_lock(&ctl->cache_writeout_mutex); 3175 spin_lock(&ctl->tree_lock); 3176 3177 if (ctl->free_space < minlen) { 3178 spin_unlock(&ctl->tree_lock); 3179 mutex_unlock(&ctl->cache_writeout_mutex); 3180 break; 3181 } 3182 3183 entry = tree_search_offset(ctl, start, 0, 1); 3184 if (!entry) { 3185 spin_unlock(&ctl->tree_lock); 3186 mutex_unlock(&ctl->cache_writeout_mutex); 3187 break; 3188 } 3189 3190 /* skip bitmaps */ 3191 while (entry->bitmap) { 3192 node = rb_next(&entry->offset_index); 3193 if (!node) { 3194 spin_unlock(&ctl->tree_lock); 3195 mutex_unlock(&ctl->cache_writeout_mutex); 3196 goto out; 3197 } 3198 entry = rb_entry(node, struct btrfs_free_space, 3199 offset_index); 3200 } 3201 3202 if (entry->offset >= end) { 3203 spin_unlock(&ctl->tree_lock); 3204 mutex_unlock(&ctl->cache_writeout_mutex); 3205 break; 3206 } 3207 3208 extent_start = entry->offset; 3209 extent_bytes = entry->bytes; 3210 start = max(start, extent_start); 3211 bytes = min(extent_start + extent_bytes, end) - start; 3212 if (bytes < minlen) { 3213 spin_unlock(&ctl->tree_lock); 3214 mutex_unlock(&ctl->cache_writeout_mutex); 3215 goto next; 3216 } 3217 3218 unlink_free_space(ctl, entry); 3219 kmem_cache_free(btrfs_free_space_cachep, entry); 3220 3221 spin_unlock(&ctl->tree_lock); 3222 trim_entry.start = extent_start; 3223 trim_entry.bytes = extent_bytes; 3224 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3225 mutex_unlock(&ctl->cache_writeout_mutex); 3226 3227 ret = do_trimming(block_group, total_trimmed, start, bytes, 3228 extent_start, extent_bytes, &trim_entry); 3229 if (ret) 3230 break; 3231 next: 3232 start += bytes; 3233 3234 if (fatal_signal_pending(current)) { 3235 ret = -ERESTARTSYS; 3236 break; 3237 } 3238 3239 cond_resched(); 3240 } 3241 out: 3242 return ret; 3243 } 3244 3245 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3246 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3247 { 3248 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3249 struct btrfs_free_space *entry; 3250 int ret = 0; 3251 int ret2; 3252 u64 bytes; 3253 u64 offset = offset_to_bitmap(ctl, start); 3254 3255 while (offset < end) { 3256 bool next_bitmap = false; 3257 struct btrfs_trim_range trim_entry; 3258 3259 mutex_lock(&ctl->cache_writeout_mutex); 3260 spin_lock(&ctl->tree_lock); 3261 3262 if (ctl->free_space < minlen) { 3263 spin_unlock(&ctl->tree_lock); 3264 mutex_unlock(&ctl->cache_writeout_mutex); 3265 break; 3266 } 3267 3268 entry = tree_search_offset(ctl, offset, 1, 0); 3269 if (!entry) { 3270 spin_unlock(&ctl->tree_lock); 3271 mutex_unlock(&ctl->cache_writeout_mutex); 3272 next_bitmap = true; 3273 goto next; 3274 } 3275 3276 bytes = minlen; 3277 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3278 if (ret2 || start >= end) { 3279 spin_unlock(&ctl->tree_lock); 3280 mutex_unlock(&ctl->cache_writeout_mutex); 3281 next_bitmap = true; 3282 goto next; 3283 } 3284 3285 bytes = min(bytes, end - start); 3286 if (bytes < minlen) { 3287 spin_unlock(&ctl->tree_lock); 3288 mutex_unlock(&ctl->cache_writeout_mutex); 3289 goto next; 3290 } 3291 3292 bitmap_clear_bits(ctl, entry, start, bytes); 3293 if (entry->bytes == 0) 3294 free_bitmap(ctl, entry); 3295 3296 spin_unlock(&ctl->tree_lock); 3297 trim_entry.start = start; 3298 trim_entry.bytes = bytes; 3299 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3300 mutex_unlock(&ctl->cache_writeout_mutex); 3301 3302 ret = do_trimming(block_group, total_trimmed, start, bytes, 3303 start, bytes, &trim_entry); 3304 if (ret) 3305 break; 3306 next: 3307 if (next_bitmap) { 3308 offset += BITS_PER_BITMAP * ctl->unit; 3309 } else { 3310 start += bytes; 3311 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3312 offset += BITS_PER_BITMAP * ctl->unit; 3313 } 3314 3315 if (fatal_signal_pending(current)) { 3316 ret = -ERESTARTSYS; 3317 break; 3318 } 3319 3320 cond_resched(); 3321 } 3322 3323 return ret; 3324 } 3325 3326 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3327 { 3328 atomic_inc(&cache->trimming); 3329 } 3330 3331 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3332 { 3333 struct btrfs_fs_info *fs_info = block_group->fs_info; 3334 struct extent_map_tree *em_tree; 3335 struct extent_map *em; 3336 bool cleanup; 3337 3338 spin_lock(&block_group->lock); 3339 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3340 block_group->removed); 3341 spin_unlock(&block_group->lock); 3342 3343 if (cleanup) { 3344 mutex_lock(&fs_info->chunk_mutex); 3345 em_tree = &fs_info->mapping_tree.map_tree; 3346 write_lock(&em_tree->lock); 3347 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3348 1); 3349 BUG_ON(!em); /* logic error, can't happen */ 3350 /* 3351 * remove_extent_mapping() will delete us from the pinned_chunks 3352 * list, which is protected by the chunk mutex. 3353 */ 3354 remove_extent_mapping(em_tree, em); 3355 write_unlock(&em_tree->lock); 3356 mutex_unlock(&fs_info->chunk_mutex); 3357 3358 /* once for us and once for the tree */ 3359 free_extent_map(em); 3360 free_extent_map(em); 3361 3362 /* 3363 * We've left one free space entry and other tasks trimming 3364 * this block group have left 1 entry each one. Free them. 3365 */ 3366 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3367 } 3368 } 3369 3370 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3371 u64 *trimmed, u64 start, u64 end, u64 minlen) 3372 { 3373 int ret; 3374 3375 *trimmed = 0; 3376 3377 spin_lock(&block_group->lock); 3378 if (block_group->removed) { 3379 spin_unlock(&block_group->lock); 3380 return 0; 3381 } 3382 btrfs_get_block_group_trimming(block_group); 3383 spin_unlock(&block_group->lock); 3384 3385 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3386 if (ret) 3387 goto out; 3388 3389 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3390 out: 3391 btrfs_put_block_group_trimming(block_group); 3392 return ret; 3393 } 3394 3395 /* 3396 * Find the left-most item in the cache tree, and then return the 3397 * smallest inode number in the item. 3398 * 3399 * Note: the returned inode number may not be the smallest one in 3400 * the tree, if the left-most item is a bitmap. 3401 */ 3402 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3403 { 3404 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3405 struct btrfs_free_space *entry = NULL; 3406 u64 ino = 0; 3407 3408 spin_lock(&ctl->tree_lock); 3409 3410 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3411 goto out; 3412 3413 entry = rb_entry(rb_first(&ctl->free_space_offset), 3414 struct btrfs_free_space, offset_index); 3415 3416 if (!entry->bitmap) { 3417 ino = entry->offset; 3418 3419 unlink_free_space(ctl, entry); 3420 entry->offset++; 3421 entry->bytes--; 3422 if (!entry->bytes) 3423 kmem_cache_free(btrfs_free_space_cachep, entry); 3424 else 3425 link_free_space(ctl, entry); 3426 } else { 3427 u64 offset = 0; 3428 u64 count = 1; 3429 int ret; 3430 3431 ret = search_bitmap(ctl, entry, &offset, &count, true); 3432 /* Logic error; Should be empty if it can't find anything */ 3433 ASSERT(!ret); 3434 3435 ino = offset; 3436 bitmap_clear_bits(ctl, entry, offset, 1); 3437 if (entry->bytes == 0) 3438 free_bitmap(ctl, entry); 3439 } 3440 out: 3441 spin_unlock(&ctl->tree_lock); 3442 3443 return ino; 3444 } 3445 3446 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3447 struct btrfs_path *path) 3448 { 3449 struct inode *inode = NULL; 3450 3451 spin_lock(&root->ino_cache_lock); 3452 if (root->ino_cache_inode) 3453 inode = igrab(root->ino_cache_inode); 3454 spin_unlock(&root->ino_cache_lock); 3455 if (inode) 3456 return inode; 3457 3458 inode = __lookup_free_space_inode(root, path, 0); 3459 if (IS_ERR(inode)) 3460 return inode; 3461 3462 spin_lock(&root->ino_cache_lock); 3463 if (!btrfs_fs_closing(root->fs_info)) 3464 root->ino_cache_inode = igrab(inode); 3465 spin_unlock(&root->ino_cache_lock); 3466 3467 return inode; 3468 } 3469 3470 int create_free_ino_inode(struct btrfs_root *root, 3471 struct btrfs_trans_handle *trans, 3472 struct btrfs_path *path) 3473 { 3474 return __create_free_space_inode(root, trans, path, 3475 BTRFS_FREE_INO_OBJECTID, 0); 3476 } 3477 3478 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3479 { 3480 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3481 struct btrfs_path *path; 3482 struct inode *inode; 3483 int ret = 0; 3484 u64 root_gen = btrfs_root_generation(&root->root_item); 3485 3486 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3487 return 0; 3488 3489 /* 3490 * If we're unmounting then just return, since this does a search on the 3491 * normal root and not the commit root and we could deadlock. 3492 */ 3493 if (btrfs_fs_closing(fs_info)) 3494 return 0; 3495 3496 path = btrfs_alloc_path(); 3497 if (!path) 3498 return 0; 3499 3500 inode = lookup_free_ino_inode(root, path); 3501 if (IS_ERR(inode)) 3502 goto out; 3503 3504 if (root_gen != BTRFS_I(inode)->generation) 3505 goto out_put; 3506 3507 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3508 3509 if (ret < 0) 3510 btrfs_err(fs_info, 3511 "failed to load free ino cache for root %llu", 3512 root->root_key.objectid); 3513 out_put: 3514 iput(inode); 3515 out: 3516 btrfs_free_path(path); 3517 return ret; 3518 } 3519 3520 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3521 struct btrfs_trans_handle *trans, 3522 struct btrfs_path *path, 3523 struct inode *inode) 3524 { 3525 struct btrfs_fs_info *fs_info = root->fs_info; 3526 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3527 int ret; 3528 struct btrfs_io_ctl io_ctl; 3529 bool release_metadata = true; 3530 3531 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3532 return 0; 3533 3534 memset(&io_ctl, 0, sizeof(io_ctl)); 3535 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3536 if (!ret) { 3537 /* 3538 * At this point writepages() didn't error out, so our metadata 3539 * reservation is released when the writeback finishes, at 3540 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3541 * with or without an error. 3542 */ 3543 release_metadata = false; 3544 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3545 } 3546 3547 if (ret) { 3548 if (release_metadata) 3549 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3550 inode->i_size); 3551 #ifdef DEBUG 3552 btrfs_err(fs_info, 3553 "failed to write free ino cache for root %llu", 3554 root->root_key.objectid); 3555 #endif 3556 } 3557 3558 return ret; 3559 } 3560 3561 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3562 /* 3563 * Use this if you need to make a bitmap or extent entry specifically, it 3564 * doesn't do any of the merging that add_free_space does, this acts a lot like 3565 * how the free space cache loading stuff works, so you can get really weird 3566 * configurations. 3567 */ 3568 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3569 u64 offset, u64 bytes, bool bitmap) 3570 { 3571 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3572 struct btrfs_free_space *info = NULL, *bitmap_info; 3573 void *map = NULL; 3574 u64 bytes_added; 3575 int ret; 3576 3577 again: 3578 if (!info) { 3579 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3580 if (!info) 3581 return -ENOMEM; 3582 } 3583 3584 if (!bitmap) { 3585 spin_lock(&ctl->tree_lock); 3586 info->offset = offset; 3587 info->bytes = bytes; 3588 info->max_extent_size = 0; 3589 ret = link_free_space(ctl, info); 3590 spin_unlock(&ctl->tree_lock); 3591 if (ret) 3592 kmem_cache_free(btrfs_free_space_cachep, info); 3593 return ret; 3594 } 3595 3596 if (!map) { 3597 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3598 if (!map) { 3599 kmem_cache_free(btrfs_free_space_cachep, info); 3600 return -ENOMEM; 3601 } 3602 } 3603 3604 spin_lock(&ctl->tree_lock); 3605 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3606 1, 0); 3607 if (!bitmap_info) { 3608 info->bitmap = map; 3609 map = NULL; 3610 add_new_bitmap(ctl, info, offset); 3611 bitmap_info = info; 3612 info = NULL; 3613 } 3614 3615 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3616 3617 bytes -= bytes_added; 3618 offset += bytes_added; 3619 spin_unlock(&ctl->tree_lock); 3620 3621 if (bytes) 3622 goto again; 3623 3624 if (info) 3625 kmem_cache_free(btrfs_free_space_cachep, info); 3626 if (map) 3627 kfree(map); 3628 return 0; 3629 } 3630 3631 /* 3632 * Checks to see if the given range is in the free space cache. This is really 3633 * just used to check the absence of space, so if there is free space in the 3634 * range at all we will return 1. 3635 */ 3636 int test_check_exists(struct btrfs_block_group_cache *cache, 3637 u64 offset, u64 bytes) 3638 { 3639 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3640 struct btrfs_free_space *info; 3641 int ret = 0; 3642 3643 spin_lock(&ctl->tree_lock); 3644 info = tree_search_offset(ctl, offset, 0, 0); 3645 if (!info) { 3646 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3647 1, 0); 3648 if (!info) 3649 goto out; 3650 } 3651 3652 have_info: 3653 if (info->bitmap) { 3654 u64 bit_off, bit_bytes; 3655 struct rb_node *n; 3656 struct btrfs_free_space *tmp; 3657 3658 bit_off = offset; 3659 bit_bytes = ctl->unit; 3660 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3661 if (!ret) { 3662 if (bit_off == offset) { 3663 ret = 1; 3664 goto out; 3665 } else if (bit_off > offset && 3666 offset + bytes > bit_off) { 3667 ret = 1; 3668 goto out; 3669 } 3670 } 3671 3672 n = rb_prev(&info->offset_index); 3673 while (n) { 3674 tmp = rb_entry(n, struct btrfs_free_space, 3675 offset_index); 3676 if (tmp->offset + tmp->bytes < offset) 3677 break; 3678 if (offset + bytes < tmp->offset) { 3679 n = rb_prev(&tmp->offset_index); 3680 continue; 3681 } 3682 info = tmp; 3683 goto have_info; 3684 } 3685 3686 n = rb_next(&info->offset_index); 3687 while (n) { 3688 tmp = rb_entry(n, struct btrfs_free_space, 3689 offset_index); 3690 if (offset + bytes < tmp->offset) 3691 break; 3692 if (tmp->offset + tmp->bytes < offset) { 3693 n = rb_next(&tmp->offset_index); 3694 continue; 3695 } 3696 info = tmp; 3697 goto have_info; 3698 } 3699 3700 ret = 0; 3701 goto out; 3702 } 3703 3704 if (info->offset == offset) { 3705 ret = 1; 3706 goto out; 3707 } 3708 3709 if (offset > info->offset && offset < info->offset + info->bytes) 3710 ret = 1; 3711 out: 3712 spin_unlock(&ctl->tree_lock); 3713 return ret; 3714 } 3715 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3716