1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/sched/signal.h> 22 #include <linux/slab.h> 23 #include <linux/math64.h> 24 #include <linux/ratelimit.h> 25 #include "ctree.h" 26 #include "free-space-cache.h" 27 #include "transaction.h" 28 #include "disk-io.h" 29 #include "extent_io.h" 30 #include "inode-map.h" 31 #include "volumes.h" 32 33 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 34 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 35 36 struct btrfs_trim_range { 37 u64 start; 38 u64 bytes; 39 struct list_head list; 40 }; 41 42 static int link_free_space(struct btrfs_free_space_ctl *ctl, 43 struct btrfs_free_space *info); 44 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 45 struct btrfs_free_space *info); 46 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 47 struct btrfs_trans_handle *trans, 48 struct btrfs_io_ctl *io_ctl, 49 struct btrfs_path *path); 50 51 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 52 struct btrfs_path *path, 53 u64 offset) 54 { 55 struct btrfs_fs_info *fs_info = root->fs_info; 56 struct btrfs_key key; 57 struct btrfs_key location; 58 struct btrfs_disk_key disk_key; 59 struct btrfs_free_space_header *header; 60 struct extent_buffer *leaf; 61 struct inode *inode = NULL; 62 int ret; 63 64 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 65 key.offset = offset; 66 key.type = 0; 67 68 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 69 if (ret < 0) 70 return ERR_PTR(ret); 71 if (ret > 0) { 72 btrfs_release_path(path); 73 return ERR_PTR(-ENOENT); 74 } 75 76 leaf = path->nodes[0]; 77 header = btrfs_item_ptr(leaf, path->slots[0], 78 struct btrfs_free_space_header); 79 btrfs_free_space_key(leaf, header, &disk_key); 80 btrfs_disk_key_to_cpu(&location, &disk_key); 81 btrfs_release_path(path); 82 83 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 84 if (IS_ERR(inode)) 85 return inode; 86 if (is_bad_inode(inode)) { 87 iput(inode); 88 return ERR_PTR(-ENOENT); 89 } 90 91 mapping_set_gfp_mask(inode->i_mapping, 92 mapping_gfp_constraint(inode->i_mapping, 93 ~(__GFP_FS | __GFP_HIGHMEM))); 94 95 return inode; 96 } 97 98 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 99 struct btrfs_block_group_cache 100 *block_group, struct btrfs_path *path) 101 { 102 struct inode *inode = NULL; 103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 104 105 spin_lock(&block_group->lock); 106 if (block_group->inode) 107 inode = igrab(block_group->inode); 108 spin_unlock(&block_group->lock); 109 if (inode) 110 return inode; 111 112 inode = __lookup_free_space_inode(fs_info->tree_root, path, 113 block_group->key.objectid); 114 if (IS_ERR(inode)) 115 return inode; 116 117 spin_lock(&block_group->lock); 118 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 119 btrfs_info(fs_info, "Old style space inode found, converting."); 120 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 121 BTRFS_INODE_NODATACOW; 122 block_group->disk_cache_state = BTRFS_DC_CLEAR; 123 } 124 125 if (!block_group->iref) { 126 block_group->inode = igrab(inode); 127 block_group->iref = 1; 128 } 129 spin_unlock(&block_group->lock); 130 131 return inode; 132 } 133 134 static int __create_free_space_inode(struct btrfs_root *root, 135 struct btrfs_trans_handle *trans, 136 struct btrfs_path *path, 137 u64 ino, u64 offset) 138 { 139 struct btrfs_key key; 140 struct btrfs_disk_key disk_key; 141 struct btrfs_free_space_header *header; 142 struct btrfs_inode_item *inode_item; 143 struct extent_buffer *leaf; 144 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 145 int ret; 146 147 ret = btrfs_insert_empty_inode(trans, root, path, ino); 148 if (ret) 149 return ret; 150 151 /* We inline crc's for the free disk space cache */ 152 if (ino != BTRFS_FREE_INO_OBJECTID) 153 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193 } 194 195 int create_free_space_inode(struct btrfs_fs_info *fs_info, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_block_group_cache *block_group, 198 struct btrfs_path *path) 199 { 200 int ret; 201 u64 ino; 202 203 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 204 if (ret < 0) 205 return ret; 206 207 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 208 block_group->key.objectid); 209 } 210 211 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 212 struct btrfs_block_rsv *rsv) 213 { 214 u64 needed_bytes; 215 int ret; 216 217 /* 1 for slack space, 1 for updating the inode */ 218 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 219 btrfs_calc_trans_metadata_size(fs_info, 1); 220 221 spin_lock(&rsv->lock); 222 if (rsv->reserved < needed_bytes) 223 ret = -ENOSPC; 224 else 225 ret = 0; 226 spin_unlock(&rsv->lock); 227 return ret; 228 } 229 230 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 231 struct btrfs_block_group_cache *block_group, 232 struct inode *inode) 233 { 234 struct btrfs_root *root = BTRFS_I(inode)->root; 235 int ret = 0; 236 bool locked = false; 237 238 if (block_group) { 239 struct btrfs_path *path = btrfs_alloc_path(); 240 241 if (!path) { 242 ret = -ENOMEM; 243 goto fail; 244 } 245 locked = true; 246 mutex_lock(&trans->transaction->cache_write_mutex); 247 if (!list_empty(&block_group->io_list)) { 248 list_del_init(&block_group->io_list); 249 250 btrfs_wait_cache_io(trans, block_group, path); 251 btrfs_put_block_group(block_group); 252 } 253 254 /* 255 * now that we've truncated the cache away, its no longer 256 * setup or written 257 */ 258 spin_lock(&block_group->lock); 259 block_group->disk_cache_state = BTRFS_DC_CLEAR; 260 spin_unlock(&block_group->lock); 261 btrfs_free_path(path); 262 } 263 264 btrfs_i_size_write(BTRFS_I(inode), 0); 265 truncate_pagecache(inode, 0); 266 267 /* 268 * We don't need an orphan item because truncating the free space cache 269 * will never be split across transactions. 270 * We don't need to check for -EAGAIN because we're a free space 271 * cache inode 272 */ 273 ret = btrfs_truncate_inode_items(trans, root, inode, 274 0, BTRFS_EXTENT_DATA_KEY); 275 if (ret) 276 goto fail; 277 278 ret = btrfs_update_inode(trans, root, inode); 279 280 fail: 281 if (locked) 282 mutex_unlock(&trans->transaction->cache_write_mutex); 283 if (ret) 284 btrfs_abort_transaction(trans, ret); 285 286 return ret; 287 } 288 289 static void readahead_cache(struct inode *inode) 290 { 291 struct file_ra_state *ra; 292 unsigned long last_index; 293 294 ra = kzalloc(sizeof(*ra), GFP_NOFS); 295 if (!ra) 296 return; 297 298 file_ra_state_init(ra, inode->i_mapping); 299 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 300 301 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 302 303 kfree(ra); 304 } 305 306 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 307 int write) 308 { 309 int num_pages; 310 int check_crcs = 0; 311 312 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 313 314 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 315 check_crcs = 1; 316 317 /* Make sure we can fit our crcs into the first page */ 318 if (write && check_crcs && 319 (num_pages * sizeof(u32)) >= PAGE_SIZE) 320 return -ENOSPC; 321 322 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 323 324 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 325 if (!io_ctl->pages) 326 return -ENOMEM; 327 328 io_ctl->num_pages = num_pages; 329 io_ctl->fs_info = btrfs_sb(inode->i_sb); 330 io_ctl->check_crcs = check_crcs; 331 io_ctl->inode = inode; 332 333 return 0; 334 } 335 336 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 337 { 338 kfree(io_ctl->pages); 339 io_ctl->pages = NULL; 340 } 341 342 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 343 { 344 if (io_ctl->cur) { 345 io_ctl->cur = NULL; 346 io_ctl->orig = NULL; 347 } 348 } 349 350 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 351 { 352 ASSERT(io_ctl->index < io_ctl->num_pages); 353 io_ctl->page = io_ctl->pages[io_ctl->index++]; 354 io_ctl->cur = page_address(io_ctl->page); 355 io_ctl->orig = io_ctl->cur; 356 io_ctl->size = PAGE_SIZE; 357 if (clear) 358 clear_page(io_ctl->cur); 359 } 360 361 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 362 { 363 int i; 364 365 io_ctl_unmap_page(io_ctl); 366 367 for (i = 0; i < io_ctl->num_pages; i++) { 368 if (io_ctl->pages[i]) { 369 ClearPageChecked(io_ctl->pages[i]); 370 unlock_page(io_ctl->pages[i]); 371 put_page(io_ctl->pages[i]); 372 } 373 } 374 } 375 376 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 377 int uptodate) 378 { 379 struct page *page; 380 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 381 int i; 382 383 for (i = 0; i < io_ctl->num_pages; i++) { 384 page = find_or_create_page(inode->i_mapping, i, mask); 385 if (!page) { 386 io_ctl_drop_pages(io_ctl); 387 return -ENOMEM; 388 } 389 io_ctl->pages[i] = page; 390 if (uptodate && !PageUptodate(page)) { 391 btrfs_readpage(NULL, page); 392 lock_page(page); 393 if (!PageUptodate(page)) { 394 btrfs_err(BTRFS_I(inode)->root->fs_info, 395 "error reading free space cache"); 396 io_ctl_drop_pages(io_ctl); 397 return -EIO; 398 } 399 } 400 } 401 402 for (i = 0; i < io_ctl->num_pages; i++) { 403 clear_page_dirty_for_io(io_ctl->pages[i]); 404 set_page_extent_mapped(io_ctl->pages[i]); 405 } 406 407 return 0; 408 } 409 410 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 411 { 412 __le64 *val; 413 414 io_ctl_map_page(io_ctl, 1); 415 416 /* 417 * Skip the csum areas. If we don't check crcs then we just have a 418 * 64bit chunk at the front of the first page. 419 */ 420 if (io_ctl->check_crcs) { 421 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 422 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 423 } else { 424 io_ctl->cur += sizeof(u64); 425 io_ctl->size -= sizeof(u64) * 2; 426 } 427 428 val = io_ctl->cur; 429 *val = cpu_to_le64(generation); 430 io_ctl->cur += sizeof(u64); 431 } 432 433 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 434 { 435 __le64 *gen; 436 437 /* 438 * Skip the crc area. If we don't check crcs then we just have a 64bit 439 * chunk at the front of the first page. 440 */ 441 if (io_ctl->check_crcs) { 442 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 443 io_ctl->size -= sizeof(u64) + 444 (sizeof(u32) * io_ctl->num_pages); 445 } else { 446 io_ctl->cur += sizeof(u64); 447 io_ctl->size -= sizeof(u64) * 2; 448 } 449 450 gen = io_ctl->cur; 451 if (le64_to_cpu(*gen) != generation) { 452 btrfs_err_rl(io_ctl->fs_info, 453 "space cache generation (%llu) does not match inode (%llu)", 454 *gen, generation); 455 io_ctl_unmap_page(io_ctl); 456 return -EIO; 457 } 458 io_ctl->cur += sizeof(u64); 459 return 0; 460 } 461 462 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 463 { 464 u32 *tmp; 465 u32 crc = ~(u32)0; 466 unsigned offset = 0; 467 468 if (!io_ctl->check_crcs) { 469 io_ctl_unmap_page(io_ctl); 470 return; 471 } 472 473 if (index == 0) 474 offset = sizeof(u32) * io_ctl->num_pages; 475 476 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 477 PAGE_SIZE - offset); 478 btrfs_csum_final(crc, (u8 *)&crc); 479 io_ctl_unmap_page(io_ctl); 480 tmp = page_address(io_ctl->pages[0]); 481 tmp += index; 482 *tmp = crc; 483 } 484 485 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 486 { 487 u32 *tmp, val; 488 u32 crc = ~(u32)0; 489 unsigned offset = 0; 490 491 if (!io_ctl->check_crcs) { 492 io_ctl_map_page(io_ctl, 0); 493 return 0; 494 } 495 496 if (index == 0) 497 offset = sizeof(u32) * io_ctl->num_pages; 498 499 tmp = page_address(io_ctl->pages[0]); 500 tmp += index; 501 val = *tmp; 502 503 io_ctl_map_page(io_ctl, 0); 504 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 505 PAGE_SIZE - offset); 506 btrfs_csum_final(crc, (u8 *)&crc); 507 if (val != crc) { 508 btrfs_err_rl(io_ctl->fs_info, 509 "csum mismatch on free space cache"); 510 io_ctl_unmap_page(io_ctl); 511 return -EIO; 512 } 513 514 return 0; 515 } 516 517 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 518 void *bitmap) 519 { 520 struct btrfs_free_space_entry *entry; 521 522 if (!io_ctl->cur) 523 return -ENOSPC; 524 525 entry = io_ctl->cur; 526 entry->offset = cpu_to_le64(offset); 527 entry->bytes = cpu_to_le64(bytes); 528 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 529 BTRFS_FREE_SPACE_EXTENT; 530 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 531 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 532 533 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 534 return 0; 535 536 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 537 538 /* No more pages to map */ 539 if (io_ctl->index >= io_ctl->num_pages) 540 return 0; 541 542 /* map the next page */ 543 io_ctl_map_page(io_ctl, 1); 544 return 0; 545 } 546 547 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 548 { 549 if (!io_ctl->cur) 550 return -ENOSPC; 551 552 /* 553 * If we aren't at the start of the current page, unmap this one and 554 * map the next one if there is any left. 555 */ 556 if (io_ctl->cur != io_ctl->orig) { 557 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 558 if (io_ctl->index >= io_ctl->num_pages) 559 return -ENOSPC; 560 io_ctl_map_page(io_ctl, 0); 561 } 562 563 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 564 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 565 if (io_ctl->index < io_ctl->num_pages) 566 io_ctl_map_page(io_ctl, 0); 567 return 0; 568 } 569 570 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 571 { 572 /* 573 * If we're not on the boundary we know we've modified the page and we 574 * need to crc the page. 575 */ 576 if (io_ctl->cur != io_ctl->orig) 577 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 578 else 579 io_ctl_unmap_page(io_ctl); 580 581 while (io_ctl->index < io_ctl->num_pages) { 582 io_ctl_map_page(io_ctl, 1); 583 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 584 } 585 } 586 587 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 588 struct btrfs_free_space *entry, u8 *type) 589 { 590 struct btrfs_free_space_entry *e; 591 int ret; 592 593 if (!io_ctl->cur) { 594 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 595 if (ret) 596 return ret; 597 } 598 599 e = io_ctl->cur; 600 entry->offset = le64_to_cpu(e->offset); 601 entry->bytes = le64_to_cpu(e->bytes); 602 *type = e->type; 603 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 604 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 605 606 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 607 return 0; 608 609 io_ctl_unmap_page(io_ctl); 610 611 return 0; 612 } 613 614 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 615 struct btrfs_free_space *entry) 616 { 617 int ret; 618 619 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 620 if (ret) 621 return ret; 622 623 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 624 io_ctl_unmap_page(io_ctl); 625 626 return 0; 627 } 628 629 /* 630 * Since we attach pinned extents after the fact we can have contiguous sections 631 * of free space that are split up in entries. This poses a problem with the 632 * tree logging stuff since it could have allocated across what appears to be 2 633 * entries since we would have merged the entries when adding the pinned extents 634 * back to the free space cache. So run through the space cache that we just 635 * loaded and merge contiguous entries. This will make the log replay stuff not 636 * blow up and it will make for nicer allocator behavior. 637 */ 638 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 639 { 640 struct btrfs_free_space *e, *prev = NULL; 641 struct rb_node *n; 642 643 again: 644 spin_lock(&ctl->tree_lock); 645 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 646 e = rb_entry(n, struct btrfs_free_space, offset_index); 647 if (!prev) 648 goto next; 649 if (e->bitmap || prev->bitmap) 650 goto next; 651 if (prev->offset + prev->bytes == e->offset) { 652 unlink_free_space(ctl, prev); 653 unlink_free_space(ctl, e); 654 prev->bytes += e->bytes; 655 kmem_cache_free(btrfs_free_space_cachep, e); 656 link_free_space(ctl, prev); 657 prev = NULL; 658 spin_unlock(&ctl->tree_lock); 659 goto again; 660 } 661 next: 662 prev = e; 663 } 664 spin_unlock(&ctl->tree_lock); 665 } 666 667 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 668 struct btrfs_free_space_ctl *ctl, 669 struct btrfs_path *path, u64 offset) 670 { 671 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 672 struct btrfs_free_space_header *header; 673 struct extent_buffer *leaf; 674 struct btrfs_io_ctl io_ctl; 675 struct btrfs_key key; 676 struct btrfs_free_space *e, *n; 677 LIST_HEAD(bitmaps); 678 u64 num_entries; 679 u64 num_bitmaps; 680 u64 generation; 681 u8 type; 682 int ret = 0; 683 684 /* Nothing in the space cache, goodbye */ 685 if (!i_size_read(inode)) 686 return 0; 687 688 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 689 key.offset = offset; 690 key.type = 0; 691 692 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 693 if (ret < 0) 694 return 0; 695 else if (ret > 0) { 696 btrfs_release_path(path); 697 return 0; 698 } 699 700 ret = -1; 701 702 leaf = path->nodes[0]; 703 header = btrfs_item_ptr(leaf, path->slots[0], 704 struct btrfs_free_space_header); 705 num_entries = btrfs_free_space_entries(leaf, header); 706 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 707 generation = btrfs_free_space_generation(leaf, header); 708 btrfs_release_path(path); 709 710 if (!BTRFS_I(inode)->generation) { 711 btrfs_info(fs_info, 712 "The free space cache file (%llu) is invalid. skip it\n", 713 offset); 714 return 0; 715 } 716 717 if (BTRFS_I(inode)->generation != generation) { 718 btrfs_err(fs_info, 719 "free space inode generation (%llu) did not match free space cache generation (%llu)", 720 BTRFS_I(inode)->generation, generation); 721 return 0; 722 } 723 724 if (!num_entries) 725 return 0; 726 727 ret = io_ctl_init(&io_ctl, inode, 0); 728 if (ret) 729 return ret; 730 731 readahead_cache(inode); 732 733 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 734 if (ret) 735 goto out; 736 737 ret = io_ctl_check_crc(&io_ctl, 0); 738 if (ret) 739 goto free_cache; 740 741 ret = io_ctl_check_generation(&io_ctl, generation); 742 if (ret) 743 goto free_cache; 744 745 while (num_entries) { 746 e = kmem_cache_zalloc(btrfs_free_space_cachep, 747 GFP_NOFS); 748 if (!e) 749 goto free_cache; 750 751 ret = io_ctl_read_entry(&io_ctl, e, &type); 752 if (ret) { 753 kmem_cache_free(btrfs_free_space_cachep, e); 754 goto free_cache; 755 } 756 757 if (!e->bytes) { 758 kmem_cache_free(btrfs_free_space_cachep, e); 759 goto free_cache; 760 } 761 762 if (type == BTRFS_FREE_SPACE_EXTENT) { 763 spin_lock(&ctl->tree_lock); 764 ret = link_free_space(ctl, e); 765 spin_unlock(&ctl->tree_lock); 766 if (ret) { 767 btrfs_err(fs_info, 768 "Duplicate entries in free space cache, dumping"); 769 kmem_cache_free(btrfs_free_space_cachep, e); 770 goto free_cache; 771 } 772 } else { 773 ASSERT(num_bitmaps); 774 num_bitmaps--; 775 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 776 if (!e->bitmap) { 777 kmem_cache_free( 778 btrfs_free_space_cachep, e); 779 goto free_cache; 780 } 781 spin_lock(&ctl->tree_lock); 782 ret = link_free_space(ctl, e); 783 ctl->total_bitmaps++; 784 ctl->op->recalc_thresholds(ctl); 785 spin_unlock(&ctl->tree_lock); 786 if (ret) { 787 btrfs_err(fs_info, 788 "Duplicate entries in free space cache, dumping"); 789 kmem_cache_free(btrfs_free_space_cachep, e); 790 goto free_cache; 791 } 792 list_add_tail(&e->list, &bitmaps); 793 } 794 795 num_entries--; 796 } 797 798 io_ctl_unmap_page(&io_ctl); 799 800 /* 801 * We add the bitmaps at the end of the entries in order that 802 * the bitmap entries are added to the cache. 803 */ 804 list_for_each_entry_safe(e, n, &bitmaps, list) { 805 list_del_init(&e->list); 806 ret = io_ctl_read_bitmap(&io_ctl, e); 807 if (ret) 808 goto free_cache; 809 } 810 811 io_ctl_drop_pages(&io_ctl); 812 merge_space_tree(ctl); 813 ret = 1; 814 out: 815 io_ctl_free(&io_ctl); 816 return ret; 817 free_cache: 818 io_ctl_drop_pages(&io_ctl); 819 __btrfs_remove_free_space_cache(ctl); 820 goto out; 821 } 822 823 int load_free_space_cache(struct btrfs_fs_info *fs_info, 824 struct btrfs_block_group_cache *block_group) 825 { 826 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 827 struct inode *inode; 828 struct btrfs_path *path; 829 int ret = 0; 830 bool matched; 831 u64 used = btrfs_block_group_used(&block_group->item); 832 833 /* 834 * If this block group has been marked to be cleared for one reason or 835 * another then we can't trust the on disk cache, so just return. 836 */ 837 spin_lock(&block_group->lock); 838 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 839 spin_unlock(&block_group->lock); 840 return 0; 841 } 842 spin_unlock(&block_group->lock); 843 844 path = btrfs_alloc_path(); 845 if (!path) 846 return 0; 847 path->search_commit_root = 1; 848 path->skip_locking = 1; 849 850 inode = lookup_free_space_inode(fs_info, block_group, path); 851 if (IS_ERR(inode)) { 852 btrfs_free_path(path); 853 return 0; 854 } 855 856 /* We may have converted the inode and made the cache invalid. */ 857 spin_lock(&block_group->lock); 858 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 859 spin_unlock(&block_group->lock); 860 btrfs_free_path(path); 861 goto out; 862 } 863 spin_unlock(&block_group->lock); 864 865 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 866 path, block_group->key.objectid); 867 btrfs_free_path(path); 868 if (ret <= 0) 869 goto out; 870 871 spin_lock(&ctl->tree_lock); 872 matched = (ctl->free_space == (block_group->key.offset - used - 873 block_group->bytes_super)); 874 spin_unlock(&ctl->tree_lock); 875 876 if (!matched) { 877 __btrfs_remove_free_space_cache(ctl); 878 btrfs_warn(fs_info, 879 "block group %llu has wrong amount of free space", 880 block_group->key.objectid); 881 ret = -1; 882 } 883 out: 884 if (ret < 0) { 885 /* This cache is bogus, make sure it gets cleared */ 886 spin_lock(&block_group->lock); 887 block_group->disk_cache_state = BTRFS_DC_CLEAR; 888 spin_unlock(&block_group->lock); 889 ret = 0; 890 891 btrfs_warn(fs_info, 892 "failed to load free space cache for block group %llu, rebuilding it now", 893 block_group->key.objectid); 894 } 895 896 iput(inode); 897 return ret; 898 } 899 900 static noinline_for_stack 901 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 902 struct btrfs_free_space_ctl *ctl, 903 struct btrfs_block_group_cache *block_group, 904 int *entries, int *bitmaps, 905 struct list_head *bitmap_list) 906 { 907 int ret; 908 struct btrfs_free_cluster *cluster = NULL; 909 struct btrfs_free_cluster *cluster_locked = NULL; 910 struct rb_node *node = rb_first(&ctl->free_space_offset); 911 struct btrfs_trim_range *trim_entry; 912 913 /* Get the cluster for this block_group if it exists */ 914 if (block_group && !list_empty(&block_group->cluster_list)) { 915 cluster = list_entry(block_group->cluster_list.next, 916 struct btrfs_free_cluster, 917 block_group_list); 918 } 919 920 if (!node && cluster) { 921 cluster_locked = cluster; 922 spin_lock(&cluster_locked->lock); 923 node = rb_first(&cluster->root); 924 cluster = NULL; 925 } 926 927 /* Write out the extent entries */ 928 while (node) { 929 struct btrfs_free_space *e; 930 931 e = rb_entry(node, struct btrfs_free_space, offset_index); 932 *entries += 1; 933 934 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 935 e->bitmap); 936 if (ret) 937 goto fail; 938 939 if (e->bitmap) { 940 list_add_tail(&e->list, bitmap_list); 941 *bitmaps += 1; 942 } 943 node = rb_next(node); 944 if (!node && cluster) { 945 node = rb_first(&cluster->root); 946 cluster_locked = cluster; 947 spin_lock(&cluster_locked->lock); 948 cluster = NULL; 949 } 950 } 951 if (cluster_locked) { 952 spin_unlock(&cluster_locked->lock); 953 cluster_locked = NULL; 954 } 955 956 /* 957 * Make sure we don't miss any range that was removed from our rbtree 958 * because trimming is running. Otherwise after a umount+mount (or crash 959 * after committing the transaction) we would leak free space and get 960 * an inconsistent free space cache report from fsck. 961 */ 962 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 963 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 964 trim_entry->bytes, NULL); 965 if (ret) 966 goto fail; 967 *entries += 1; 968 } 969 970 return 0; 971 fail: 972 if (cluster_locked) 973 spin_unlock(&cluster_locked->lock); 974 return -ENOSPC; 975 } 976 977 static noinline_for_stack int 978 update_cache_item(struct btrfs_trans_handle *trans, 979 struct btrfs_root *root, 980 struct inode *inode, 981 struct btrfs_path *path, u64 offset, 982 int entries, int bitmaps) 983 { 984 struct btrfs_key key; 985 struct btrfs_free_space_header *header; 986 struct extent_buffer *leaf; 987 int ret; 988 989 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 990 key.offset = offset; 991 key.type = 0; 992 993 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 994 if (ret < 0) { 995 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 996 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 997 GFP_NOFS); 998 goto fail; 999 } 1000 leaf = path->nodes[0]; 1001 if (ret > 0) { 1002 struct btrfs_key found_key; 1003 ASSERT(path->slots[0]); 1004 path->slots[0]--; 1005 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1006 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1007 found_key.offset != offset) { 1008 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1009 inode->i_size - 1, 1010 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1011 NULL, GFP_NOFS); 1012 btrfs_release_path(path); 1013 goto fail; 1014 } 1015 } 1016 1017 BTRFS_I(inode)->generation = trans->transid; 1018 header = btrfs_item_ptr(leaf, path->slots[0], 1019 struct btrfs_free_space_header); 1020 btrfs_set_free_space_entries(leaf, header, entries); 1021 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1022 btrfs_set_free_space_generation(leaf, header, trans->transid); 1023 btrfs_mark_buffer_dirty(leaf); 1024 btrfs_release_path(path); 1025 1026 return 0; 1027 1028 fail: 1029 return -1; 1030 } 1031 1032 static noinline_for_stack int 1033 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1034 struct btrfs_block_group_cache *block_group, 1035 struct btrfs_io_ctl *io_ctl, 1036 int *entries) 1037 { 1038 u64 start, extent_start, extent_end, len; 1039 struct extent_io_tree *unpin = NULL; 1040 int ret; 1041 1042 if (!block_group) 1043 return 0; 1044 1045 /* 1046 * We want to add any pinned extents to our free space cache 1047 * so we don't leak the space 1048 * 1049 * We shouldn't have switched the pinned extents yet so this is the 1050 * right one 1051 */ 1052 unpin = fs_info->pinned_extents; 1053 1054 start = block_group->key.objectid; 1055 1056 while (start < block_group->key.objectid + block_group->key.offset) { 1057 ret = find_first_extent_bit(unpin, start, 1058 &extent_start, &extent_end, 1059 EXTENT_DIRTY, NULL); 1060 if (ret) 1061 return 0; 1062 1063 /* This pinned extent is out of our range */ 1064 if (extent_start >= block_group->key.objectid + 1065 block_group->key.offset) 1066 return 0; 1067 1068 extent_start = max(extent_start, start); 1069 extent_end = min(block_group->key.objectid + 1070 block_group->key.offset, extent_end + 1); 1071 len = extent_end - extent_start; 1072 1073 *entries += 1; 1074 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1075 if (ret) 1076 return -ENOSPC; 1077 1078 start = extent_end; 1079 } 1080 1081 return 0; 1082 } 1083 1084 static noinline_for_stack int 1085 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1086 { 1087 struct btrfs_free_space *entry, *next; 1088 int ret; 1089 1090 /* Write out the bitmaps */ 1091 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1092 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1093 if (ret) 1094 return -ENOSPC; 1095 list_del_init(&entry->list); 1096 } 1097 1098 return 0; 1099 } 1100 1101 static int flush_dirty_cache(struct inode *inode) 1102 { 1103 int ret; 1104 1105 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1106 if (ret) 1107 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1108 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1109 GFP_NOFS); 1110 1111 return ret; 1112 } 1113 1114 static void noinline_for_stack 1115 cleanup_bitmap_list(struct list_head *bitmap_list) 1116 { 1117 struct btrfs_free_space *entry, *next; 1118 1119 list_for_each_entry_safe(entry, next, bitmap_list, list) 1120 list_del_init(&entry->list); 1121 } 1122 1123 static void noinline_for_stack 1124 cleanup_write_cache_enospc(struct inode *inode, 1125 struct btrfs_io_ctl *io_ctl, 1126 struct extent_state **cached_state) 1127 { 1128 io_ctl_drop_pages(io_ctl); 1129 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1130 i_size_read(inode) - 1, cached_state, 1131 GFP_NOFS); 1132 } 1133 1134 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1135 struct btrfs_trans_handle *trans, 1136 struct btrfs_block_group_cache *block_group, 1137 struct btrfs_io_ctl *io_ctl, 1138 struct btrfs_path *path, u64 offset) 1139 { 1140 int ret; 1141 struct inode *inode = io_ctl->inode; 1142 struct btrfs_fs_info *fs_info; 1143 1144 if (!inode) 1145 return 0; 1146 1147 fs_info = btrfs_sb(inode->i_sb); 1148 1149 /* Flush the dirty pages in the cache file. */ 1150 ret = flush_dirty_cache(inode); 1151 if (ret) 1152 goto out; 1153 1154 /* Update the cache item to tell everyone this cache file is valid. */ 1155 ret = update_cache_item(trans, root, inode, path, offset, 1156 io_ctl->entries, io_ctl->bitmaps); 1157 out: 1158 io_ctl_free(io_ctl); 1159 if (ret) { 1160 invalidate_inode_pages2(inode->i_mapping); 1161 BTRFS_I(inode)->generation = 0; 1162 if (block_group) { 1163 #ifdef DEBUG 1164 btrfs_err(fs_info, 1165 "failed to write free space cache for block group %llu", 1166 block_group->key.objectid); 1167 #endif 1168 } 1169 } 1170 btrfs_update_inode(trans, root, inode); 1171 1172 if (block_group) { 1173 /* the dirty list is protected by the dirty_bgs_lock */ 1174 spin_lock(&trans->transaction->dirty_bgs_lock); 1175 1176 /* the disk_cache_state is protected by the block group lock */ 1177 spin_lock(&block_group->lock); 1178 1179 /* 1180 * only mark this as written if we didn't get put back on 1181 * the dirty list while waiting for IO. Otherwise our 1182 * cache state won't be right, and we won't get written again 1183 */ 1184 if (!ret && list_empty(&block_group->dirty_list)) 1185 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1186 else if (ret) 1187 block_group->disk_cache_state = BTRFS_DC_ERROR; 1188 1189 spin_unlock(&block_group->lock); 1190 spin_unlock(&trans->transaction->dirty_bgs_lock); 1191 io_ctl->inode = NULL; 1192 iput(inode); 1193 } 1194 1195 return ret; 1196 1197 } 1198 1199 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1200 struct btrfs_trans_handle *trans, 1201 struct btrfs_io_ctl *io_ctl, 1202 struct btrfs_path *path) 1203 { 1204 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1205 } 1206 1207 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1208 struct btrfs_block_group_cache *block_group, 1209 struct btrfs_path *path) 1210 { 1211 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1212 block_group, &block_group->io_ctl, 1213 path, block_group->key.objectid); 1214 } 1215 1216 /** 1217 * __btrfs_write_out_cache - write out cached info to an inode 1218 * @root - the root the inode belongs to 1219 * @ctl - the free space cache we are going to write out 1220 * @block_group - the block_group for this cache if it belongs to a block_group 1221 * @trans - the trans handle 1222 * 1223 * This function writes out a free space cache struct to disk for quick recovery 1224 * on mount. This will return 0 if it was successful in writing the cache out, 1225 * or an errno if it was not. 1226 */ 1227 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1228 struct btrfs_free_space_ctl *ctl, 1229 struct btrfs_block_group_cache *block_group, 1230 struct btrfs_io_ctl *io_ctl, 1231 struct btrfs_trans_handle *trans) 1232 { 1233 struct btrfs_fs_info *fs_info = root->fs_info; 1234 struct extent_state *cached_state = NULL; 1235 LIST_HEAD(bitmap_list); 1236 int entries = 0; 1237 int bitmaps = 0; 1238 int ret; 1239 int must_iput = 0; 1240 1241 if (!i_size_read(inode)) 1242 return -EIO; 1243 1244 WARN_ON(io_ctl->pages); 1245 ret = io_ctl_init(io_ctl, inode, 1); 1246 if (ret) 1247 return ret; 1248 1249 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1250 down_write(&block_group->data_rwsem); 1251 spin_lock(&block_group->lock); 1252 if (block_group->delalloc_bytes) { 1253 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1254 spin_unlock(&block_group->lock); 1255 up_write(&block_group->data_rwsem); 1256 BTRFS_I(inode)->generation = 0; 1257 ret = 0; 1258 must_iput = 1; 1259 goto out; 1260 } 1261 spin_unlock(&block_group->lock); 1262 } 1263 1264 /* Lock all pages first so we can lock the extent safely. */ 1265 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1266 if (ret) 1267 goto out; 1268 1269 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1270 &cached_state); 1271 1272 io_ctl_set_generation(io_ctl, trans->transid); 1273 1274 mutex_lock(&ctl->cache_writeout_mutex); 1275 /* Write out the extent entries in the free space cache */ 1276 spin_lock(&ctl->tree_lock); 1277 ret = write_cache_extent_entries(io_ctl, ctl, 1278 block_group, &entries, &bitmaps, 1279 &bitmap_list); 1280 if (ret) 1281 goto out_nospc_locked; 1282 1283 /* 1284 * Some spaces that are freed in the current transaction are pinned, 1285 * they will be added into free space cache after the transaction is 1286 * committed, we shouldn't lose them. 1287 * 1288 * If this changes while we are working we'll get added back to 1289 * the dirty list and redo it. No locking needed 1290 */ 1291 ret = write_pinned_extent_entries(fs_info, block_group, 1292 io_ctl, &entries); 1293 if (ret) 1294 goto out_nospc_locked; 1295 1296 /* 1297 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1298 * locked while doing it because a concurrent trim can be manipulating 1299 * or freeing the bitmap. 1300 */ 1301 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1302 spin_unlock(&ctl->tree_lock); 1303 mutex_unlock(&ctl->cache_writeout_mutex); 1304 if (ret) 1305 goto out_nospc; 1306 1307 /* Zero out the rest of the pages just to make sure */ 1308 io_ctl_zero_remaining_pages(io_ctl); 1309 1310 /* Everything is written out, now we dirty the pages in the file. */ 1311 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1312 i_size_read(inode), &cached_state); 1313 if (ret) 1314 goto out_nospc; 1315 1316 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1317 up_write(&block_group->data_rwsem); 1318 /* 1319 * Release the pages and unlock the extent, we will flush 1320 * them out later 1321 */ 1322 io_ctl_drop_pages(io_ctl); 1323 1324 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1325 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1326 1327 /* 1328 * at this point the pages are under IO and we're happy, 1329 * The caller is responsible for waiting on them and updating the 1330 * the cache and the inode 1331 */ 1332 io_ctl->entries = entries; 1333 io_ctl->bitmaps = bitmaps; 1334 1335 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1336 if (ret) 1337 goto out; 1338 1339 return 0; 1340 1341 out: 1342 io_ctl->inode = NULL; 1343 io_ctl_free(io_ctl); 1344 if (ret) { 1345 invalidate_inode_pages2(inode->i_mapping); 1346 BTRFS_I(inode)->generation = 0; 1347 } 1348 btrfs_update_inode(trans, root, inode); 1349 if (must_iput) 1350 iput(inode); 1351 return ret; 1352 1353 out_nospc_locked: 1354 cleanup_bitmap_list(&bitmap_list); 1355 spin_unlock(&ctl->tree_lock); 1356 mutex_unlock(&ctl->cache_writeout_mutex); 1357 1358 out_nospc: 1359 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1360 1361 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1362 up_write(&block_group->data_rwsem); 1363 1364 goto out; 1365 } 1366 1367 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1368 struct btrfs_trans_handle *trans, 1369 struct btrfs_block_group_cache *block_group, 1370 struct btrfs_path *path) 1371 { 1372 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1373 struct inode *inode; 1374 int ret = 0; 1375 1376 spin_lock(&block_group->lock); 1377 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1378 spin_unlock(&block_group->lock); 1379 return 0; 1380 } 1381 spin_unlock(&block_group->lock); 1382 1383 inode = lookup_free_space_inode(fs_info, block_group, path); 1384 if (IS_ERR(inode)) 1385 return 0; 1386 1387 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1388 block_group, &block_group->io_ctl, trans); 1389 if (ret) { 1390 #ifdef DEBUG 1391 btrfs_err(fs_info, 1392 "failed to write free space cache for block group %llu", 1393 block_group->key.objectid); 1394 #endif 1395 spin_lock(&block_group->lock); 1396 block_group->disk_cache_state = BTRFS_DC_ERROR; 1397 spin_unlock(&block_group->lock); 1398 1399 block_group->io_ctl.inode = NULL; 1400 iput(inode); 1401 } 1402 1403 /* 1404 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1405 * to wait for IO and put the inode 1406 */ 1407 1408 return ret; 1409 } 1410 1411 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1412 u64 offset) 1413 { 1414 ASSERT(offset >= bitmap_start); 1415 offset -= bitmap_start; 1416 return (unsigned long)(div_u64(offset, unit)); 1417 } 1418 1419 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1420 { 1421 return (unsigned long)(div_u64(bytes, unit)); 1422 } 1423 1424 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1425 u64 offset) 1426 { 1427 u64 bitmap_start; 1428 u64 bytes_per_bitmap; 1429 1430 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1431 bitmap_start = offset - ctl->start; 1432 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1433 bitmap_start *= bytes_per_bitmap; 1434 bitmap_start += ctl->start; 1435 1436 return bitmap_start; 1437 } 1438 1439 static int tree_insert_offset(struct rb_root *root, u64 offset, 1440 struct rb_node *node, int bitmap) 1441 { 1442 struct rb_node **p = &root->rb_node; 1443 struct rb_node *parent = NULL; 1444 struct btrfs_free_space *info; 1445 1446 while (*p) { 1447 parent = *p; 1448 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1449 1450 if (offset < info->offset) { 1451 p = &(*p)->rb_left; 1452 } else if (offset > info->offset) { 1453 p = &(*p)->rb_right; 1454 } else { 1455 /* 1456 * we could have a bitmap entry and an extent entry 1457 * share the same offset. If this is the case, we want 1458 * the extent entry to always be found first if we do a 1459 * linear search through the tree, since we want to have 1460 * the quickest allocation time, and allocating from an 1461 * extent is faster than allocating from a bitmap. So 1462 * if we're inserting a bitmap and we find an entry at 1463 * this offset, we want to go right, or after this entry 1464 * logically. If we are inserting an extent and we've 1465 * found a bitmap, we want to go left, or before 1466 * logically. 1467 */ 1468 if (bitmap) { 1469 if (info->bitmap) { 1470 WARN_ON_ONCE(1); 1471 return -EEXIST; 1472 } 1473 p = &(*p)->rb_right; 1474 } else { 1475 if (!info->bitmap) { 1476 WARN_ON_ONCE(1); 1477 return -EEXIST; 1478 } 1479 p = &(*p)->rb_left; 1480 } 1481 } 1482 } 1483 1484 rb_link_node(node, parent, p); 1485 rb_insert_color(node, root); 1486 1487 return 0; 1488 } 1489 1490 /* 1491 * searches the tree for the given offset. 1492 * 1493 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1494 * want a section that has at least bytes size and comes at or after the given 1495 * offset. 1496 */ 1497 static struct btrfs_free_space * 1498 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1499 u64 offset, int bitmap_only, int fuzzy) 1500 { 1501 struct rb_node *n = ctl->free_space_offset.rb_node; 1502 struct btrfs_free_space *entry, *prev = NULL; 1503 1504 /* find entry that is closest to the 'offset' */ 1505 while (1) { 1506 if (!n) { 1507 entry = NULL; 1508 break; 1509 } 1510 1511 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1512 prev = entry; 1513 1514 if (offset < entry->offset) 1515 n = n->rb_left; 1516 else if (offset > entry->offset) 1517 n = n->rb_right; 1518 else 1519 break; 1520 } 1521 1522 if (bitmap_only) { 1523 if (!entry) 1524 return NULL; 1525 if (entry->bitmap) 1526 return entry; 1527 1528 /* 1529 * bitmap entry and extent entry may share same offset, 1530 * in that case, bitmap entry comes after extent entry. 1531 */ 1532 n = rb_next(n); 1533 if (!n) 1534 return NULL; 1535 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1536 if (entry->offset != offset) 1537 return NULL; 1538 1539 WARN_ON(!entry->bitmap); 1540 return entry; 1541 } else if (entry) { 1542 if (entry->bitmap) { 1543 /* 1544 * if previous extent entry covers the offset, 1545 * we should return it instead of the bitmap entry 1546 */ 1547 n = rb_prev(&entry->offset_index); 1548 if (n) { 1549 prev = rb_entry(n, struct btrfs_free_space, 1550 offset_index); 1551 if (!prev->bitmap && 1552 prev->offset + prev->bytes > offset) 1553 entry = prev; 1554 } 1555 } 1556 return entry; 1557 } 1558 1559 if (!prev) 1560 return NULL; 1561 1562 /* find last entry before the 'offset' */ 1563 entry = prev; 1564 if (entry->offset > offset) { 1565 n = rb_prev(&entry->offset_index); 1566 if (n) { 1567 entry = rb_entry(n, struct btrfs_free_space, 1568 offset_index); 1569 ASSERT(entry->offset <= offset); 1570 } else { 1571 if (fuzzy) 1572 return entry; 1573 else 1574 return NULL; 1575 } 1576 } 1577 1578 if (entry->bitmap) { 1579 n = rb_prev(&entry->offset_index); 1580 if (n) { 1581 prev = rb_entry(n, struct btrfs_free_space, 1582 offset_index); 1583 if (!prev->bitmap && 1584 prev->offset + prev->bytes > offset) 1585 return prev; 1586 } 1587 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1588 return entry; 1589 } else if (entry->offset + entry->bytes > offset) 1590 return entry; 1591 1592 if (!fuzzy) 1593 return NULL; 1594 1595 while (1) { 1596 if (entry->bitmap) { 1597 if (entry->offset + BITS_PER_BITMAP * 1598 ctl->unit > offset) 1599 break; 1600 } else { 1601 if (entry->offset + entry->bytes > offset) 1602 break; 1603 } 1604 1605 n = rb_next(&entry->offset_index); 1606 if (!n) 1607 return NULL; 1608 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1609 } 1610 return entry; 1611 } 1612 1613 static inline void 1614 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1615 struct btrfs_free_space *info) 1616 { 1617 rb_erase(&info->offset_index, &ctl->free_space_offset); 1618 ctl->free_extents--; 1619 } 1620 1621 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1622 struct btrfs_free_space *info) 1623 { 1624 __unlink_free_space(ctl, info); 1625 ctl->free_space -= info->bytes; 1626 } 1627 1628 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1629 struct btrfs_free_space *info) 1630 { 1631 int ret = 0; 1632 1633 ASSERT(info->bytes || info->bitmap); 1634 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1635 &info->offset_index, (info->bitmap != NULL)); 1636 if (ret) 1637 return ret; 1638 1639 ctl->free_space += info->bytes; 1640 ctl->free_extents++; 1641 return ret; 1642 } 1643 1644 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1645 { 1646 struct btrfs_block_group_cache *block_group = ctl->private; 1647 u64 max_bytes; 1648 u64 bitmap_bytes; 1649 u64 extent_bytes; 1650 u64 size = block_group->key.offset; 1651 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1652 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1653 1654 max_bitmaps = max_t(u64, max_bitmaps, 1); 1655 1656 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1657 1658 /* 1659 * The goal is to keep the total amount of memory used per 1gb of space 1660 * at or below 32k, so we need to adjust how much memory we allow to be 1661 * used by extent based free space tracking 1662 */ 1663 if (size < SZ_1G) 1664 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1665 else 1666 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1667 1668 /* 1669 * we want to account for 1 more bitmap than what we have so we can make 1670 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1671 * we add more bitmaps. 1672 */ 1673 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1674 1675 if (bitmap_bytes >= max_bytes) { 1676 ctl->extents_thresh = 0; 1677 return; 1678 } 1679 1680 /* 1681 * we want the extent entry threshold to always be at most 1/2 the max 1682 * bytes we can have, or whatever is less than that. 1683 */ 1684 extent_bytes = max_bytes - bitmap_bytes; 1685 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1686 1687 ctl->extents_thresh = 1688 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1689 } 1690 1691 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1692 struct btrfs_free_space *info, 1693 u64 offset, u64 bytes) 1694 { 1695 unsigned long start, count; 1696 1697 start = offset_to_bit(info->offset, ctl->unit, offset); 1698 count = bytes_to_bits(bytes, ctl->unit); 1699 ASSERT(start + count <= BITS_PER_BITMAP); 1700 1701 bitmap_clear(info->bitmap, start, count); 1702 1703 info->bytes -= bytes; 1704 } 1705 1706 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1707 struct btrfs_free_space *info, u64 offset, 1708 u64 bytes) 1709 { 1710 __bitmap_clear_bits(ctl, info, offset, bytes); 1711 ctl->free_space -= bytes; 1712 } 1713 1714 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1715 struct btrfs_free_space *info, u64 offset, 1716 u64 bytes) 1717 { 1718 unsigned long start, count; 1719 1720 start = offset_to_bit(info->offset, ctl->unit, offset); 1721 count = bytes_to_bits(bytes, ctl->unit); 1722 ASSERT(start + count <= BITS_PER_BITMAP); 1723 1724 bitmap_set(info->bitmap, start, count); 1725 1726 info->bytes += bytes; 1727 ctl->free_space += bytes; 1728 } 1729 1730 /* 1731 * If we can not find suitable extent, we will use bytes to record 1732 * the size of the max extent. 1733 */ 1734 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1735 struct btrfs_free_space *bitmap_info, u64 *offset, 1736 u64 *bytes, bool for_alloc) 1737 { 1738 unsigned long found_bits = 0; 1739 unsigned long max_bits = 0; 1740 unsigned long bits, i; 1741 unsigned long next_zero; 1742 unsigned long extent_bits; 1743 1744 /* 1745 * Skip searching the bitmap if we don't have a contiguous section that 1746 * is large enough for this allocation. 1747 */ 1748 if (for_alloc && 1749 bitmap_info->max_extent_size && 1750 bitmap_info->max_extent_size < *bytes) { 1751 *bytes = bitmap_info->max_extent_size; 1752 return -1; 1753 } 1754 1755 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1756 max_t(u64, *offset, bitmap_info->offset)); 1757 bits = bytes_to_bits(*bytes, ctl->unit); 1758 1759 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1760 if (for_alloc && bits == 1) { 1761 found_bits = 1; 1762 break; 1763 } 1764 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1765 BITS_PER_BITMAP, i); 1766 extent_bits = next_zero - i; 1767 if (extent_bits >= bits) { 1768 found_bits = extent_bits; 1769 break; 1770 } else if (extent_bits > max_bits) { 1771 max_bits = extent_bits; 1772 } 1773 i = next_zero; 1774 } 1775 1776 if (found_bits) { 1777 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1778 *bytes = (u64)(found_bits) * ctl->unit; 1779 return 0; 1780 } 1781 1782 *bytes = (u64)(max_bits) * ctl->unit; 1783 bitmap_info->max_extent_size = *bytes; 1784 return -1; 1785 } 1786 1787 /* Cache the size of the max extent in bytes */ 1788 static struct btrfs_free_space * 1789 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1790 unsigned long align, u64 *max_extent_size) 1791 { 1792 struct btrfs_free_space *entry; 1793 struct rb_node *node; 1794 u64 tmp; 1795 u64 align_off; 1796 int ret; 1797 1798 if (!ctl->free_space_offset.rb_node) 1799 goto out; 1800 1801 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1802 if (!entry) 1803 goto out; 1804 1805 for (node = &entry->offset_index; node; node = rb_next(node)) { 1806 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1807 if (entry->bytes < *bytes) { 1808 if (entry->bytes > *max_extent_size) 1809 *max_extent_size = entry->bytes; 1810 continue; 1811 } 1812 1813 /* make sure the space returned is big enough 1814 * to match our requested alignment 1815 */ 1816 if (*bytes >= align) { 1817 tmp = entry->offset - ctl->start + align - 1; 1818 tmp = div64_u64(tmp, align); 1819 tmp = tmp * align + ctl->start; 1820 align_off = tmp - entry->offset; 1821 } else { 1822 align_off = 0; 1823 tmp = entry->offset; 1824 } 1825 1826 if (entry->bytes < *bytes + align_off) { 1827 if (entry->bytes > *max_extent_size) 1828 *max_extent_size = entry->bytes; 1829 continue; 1830 } 1831 1832 if (entry->bitmap) { 1833 u64 size = *bytes; 1834 1835 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1836 if (!ret) { 1837 *offset = tmp; 1838 *bytes = size; 1839 return entry; 1840 } else if (size > *max_extent_size) { 1841 *max_extent_size = size; 1842 } 1843 continue; 1844 } 1845 1846 *offset = tmp; 1847 *bytes = entry->bytes - align_off; 1848 return entry; 1849 } 1850 out: 1851 return NULL; 1852 } 1853 1854 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1855 struct btrfs_free_space *info, u64 offset) 1856 { 1857 info->offset = offset_to_bitmap(ctl, offset); 1858 info->bytes = 0; 1859 INIT_LIST_HEAD(&info->list); 1860 link_free_space(ctl, info); 1861 ctl->total_bitmaps++; 1862 1863 ctl->op->recalc_thresholds(ctl); 1864 } 1865 1866 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1867 struct btrfs_free_space *bitmap_info) 1868 { 1869 unlink_free_space(ctl, bitmap_info); 1870 kfree(bitmap_info->bitmap); 1871 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1872 ctl->total_bitmaps--; 1873 ctl->op->recalc_thresholds(ctl); 1874 } 1875 1876 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1877 struct btrfs_free_space *bitmap_info, 1878 u64 *offset, u64 *bytes) 1879 { 1880 u64 end; 1881 u64 search_start, search_bytes; 1882 int ret; 1883 1884 again: 1885 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1886 1887 /* 1888 * We need to search for bits in this bitmap. We could only cover some 1889 * of the extent in this bitmap thanks to how we add space, so we need 1890 * to search for as much as it as we can and clear that amount, and then 1891 * go searching for the next bit. 1892 */ 1893 search_start = *offset; 1894 search_bytes = ctl->unit; 1895 search_bytes = min(search_bytes, end - search_start + 1); 1896 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1897 false); 1898 if (ret < 0 || search_start != *offset) 1899 return -EINVAL; 1900 1901 /* We may have found more bits than what we need */ 1902 search_bytes = min(search_bytes, *bytes); 1903 1904 /* Cannot clear past the end of the bitmap */ 1905 search_bytes = min(search_bytes, end - search_start + 1); 1906 1907 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1908 *offset += search_bytes; 1909 *bytes -= search_bytes; 1910 1911 if (*bytes) { 1912 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1913 if (!bitmap_info->bytes) 1914 free_bitmap(ctl, bitmap_info); 1915 1916 /* 1917 * no entry after this bitmap, but we still have bytes to 1918 * remove, so something has gone wrong. 1919 */ 1920 if (!next) 1921 return -EINVAL; 1922 1923 bitmap_info = rb_entry(next, struct btrfs_free_space, 1924 offset_index); 1925 1926 /* 1927 * if the next entry isn't a bitmap we need to return to let the 1928 * extent stuff do its work. 1929 */ 1930 if (!bitmap_info->bitmap) 1931 return -EAGAIN; 1932 1933 /* 1934 * Ok the next item is a bitmap, but it may not actually hold 1935 * the information for the rest of this free space stuff, so 1936 * look for it, and if we don't find it return so we can try 1937 * everything over again. 1938 */ 1939 search_start = *offset; 1940 search_bytes = ctl->unit; 1941 ret = search_bitmap(ctl, bitmap_info, &search_start, 1942 &search_bytes, false); 1943 if (ret < 0 || search_start != *offset) 1944 return -EAGAIN; 1945 1946 goto again; 1947 } else if (!bitmap_info->bytes) 1948 free_bitmap(ctl, bitmap_info); 1949 1950 return 0; 1951 } 1952 1953 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1954 struct btrfs_free_space *info, u64 offset, 1955 u64 bytes) 1956 { 1957 u64 bytes_to_set = 0; 1958 u64 end; 1959 1960 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1961 1962 bytes_to_set = min(end - offset, bytes); 1963 1964 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1965 1966 /* 1967 * We set some bytes, we have no idea what the max extent size is 1968 * anymore. 1969 */ 1970 info->max_extent_size = 0; 1971 1972 return bytes_to_set; 1973 1974 } 1975 1976 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1977 struct btrfs_free_space *info) 1978 { 1979 struct btrfs_block_group_cache *block_group = ctl->private; 1980 struct btrfs_fs_info *fs_info = block_group->fs_info; 1981 bool forced = false; 1982 1983 #ifdef CONFIG_BTRFS_DEBUG 1984 if (btrfs_should_fragment_free_space(block_group)) 1985 forced = true; 1986 #endif 1987 1988 /* 1989 * If we are below the extents threshold then we can add this as an 1990 * extent, and don't have to deal with the bitmap 1991 */ 1992 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1993 /* 1994 * If this block group has some small extents we don't want to 1995 * use up all of our free slots in the cache with them, we want 1996 * to reserve them to larger extents, however if we have plenty 1997 * of cache left then go ahead an dadd them, no sense in adding 1998 * the overhead of a bitmap if we don't have to. 1999 */ 2000 if (info->bytes <= fs_info->sectorsize * 4) { 2001 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2002 return false; 2003 } else { 2004 return false; 2005 } 2006 } 2007 2008 /* 2009 * The original block groups from mkfs can be really small, like 8 2010 * megabytes, so don't bother with a bitmap for those entries. However 2011 * some block groups can be smaller than what a bitmap would cover but 2012 * are still large enough that they could overflow the 32k memory limit, 2013 * so allow those block groups to still be allowed to have a bitmap 2014 * entry. 2015 */ 2016 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2017 return false; 2018 2019 return true; 2020 } 2021 2022 static const struct btrfs_free_space_op free_space_op = { 2023 .recalc_thresholds = recalculate_thresholds, 2024 .use_bitmap = use_bitmap, 2025 }; 2026 2027 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2028 struct btrfs_free_space *info) 2029 { 2030 struct btrfs_free_space *bitmap_info; 2031 struct btrfs_block_group_cache *block_group = NULL; 2032 int added = 0; 2033 u64 bytes, offset, bytes_added; 2034 int ret; 2035 2036 bytes = info->bytes; 2037 offset = info->offset; 2038 2039 if (!ctl->op->use_bitmap(ctl, info)) 2040 return 0; 2041 2042 if (ctl->op == &free_space_op) 2043 block_group = ctl->private; 2044 again: 2045 /* 2046 * Since we link bitmaps right into the cluster we need to see if we 2047 * have a cluster here, and if so and it has our bitmap we need to add 2048 * the free space to that bitmap. 2049 */ 2050 if (block_group && !list_empty(&block_group->cluster_list)) { 2051 struct btrfs_free_cluster *cluster; 2052 struct rb_node *node; 2053 struct btrfs_free_space *entry; 2054 2055 cluster = list_entry(block_group->cluster_list.next, 2056 struct btrfs_free_cluster, 2057 block_group_list); 2058 spin_lock(&cluster->lock); 2059 node = rb_first(&cluster->root); 2060 if (!node) { 2061 spin_unlock(&cluster->lock); 2062 goto no_cluster_bitmap; 2063 } 2064 2065 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2066 if (!entry->bitmap) { 2067 spin_unlock(&cluster->lock); 2068 goto no_cluster_bitmap; 2069 } 2070 2071 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2072 bytes_added = add_bytes_to_bitmap(ctl, entry, 2073 offset, bytes); 2074 bytes -= bytes_added; 2075 offset += bytes_added; 2076 } 2077 spin_unlock(&cluster->lock); 2078 if (!bytes) { 2079 ret = 1; 2080 goto out; 2081 } 2082 } 2083 2084 no_cluster_bitmap: 2085 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2086 1, 0); 2087 if (!bitmap_info) { 2088 ASSERT(added == 0); 2089 goto new_bitmap; 2090 } 2091 2092 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2093 bytes -= bytes_added; 2094 offset += bytes_added; 2095 added = 0; 2096 2097 if (!bytes) { 2098 ret = 1; 2099 goto out; 2100 } else 2101 goto again; 2102 2103 new_bitmap: 2104 if (info && info->bitmap) { 2105 add_new_bitmap(ctl, info, offset); 2106 added = 1; 2107 info = NULL; 2108 goto again; 2109 } else { 2110 spin_unlock(&ctl->tree_lock); 2111 2112 /* no pre-allocated info, allocate a new one */ 2113 if (!info) { 2114 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2115 GFP_NOFS); 2116 if (!info) { 2117 spin_lock(&ctl->tree_lock); 2118 ret = -ENOMEM; 2119 goto out; 2120 } 2121 } 2122 2123 /* allocate the bitmap */ 2124 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2125 spin_lock(&ctl->tree_lock); 2126 if (!info->bitmap) { 2127 ret = -ENOMEM; 2128 goto out; 2129 } 2130 goto again; 2131 } 2132 2133 out: 2134 if (info) { 2135 if (info->bitmap) 2136 kfree(info->bitmap); 2137 kmem_cache_free(btrfs_free_space_cachep, info); 2138 } 2139 2140 return ret; 2141 } 2142 2143 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2144 struct btrfs_free_space *info, bool update_stat) 2145 { 2146 struct btrfs_free_space *left_info; 2147 struct btrfs_free_space *right_info; 2148 bool merged = false; 2149 u64 offset = info->offset; 2150 u64 bytes = info->bytes; 2151 2152 /* 2153 * first we want to see if there is free space adjacent to the range we 2154 * are adding, if there is remove that struct and add a new one to 2155 * cover the entire range 2156 */ 2157 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2158 if (right_info && rb_prev(&right_info->offset_index)) 2159 left_info = rb_entry(rb_prev(&right_info->offset_index), 2160 struct btrfs_free_space, offset_index); 2161 else 2162 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2163 2164 if (right_info && !right_info->bitmap) { 2165 if (update_stat) 2166 unlink_free_space(ctl, right_info); 2167 else 2168 __unlink_free_space(ctl, right_info); 2169 info->bytes += right_info->bytes; 2170 kmem_cache_free(btrfs_free_space_cachep, right_info); 2171 merged = true; 2172 } 2173 2174 if (left_info && !left_info->bitmap && 2175 left_info->offset + left_info->bytes == offset) { 2176 if (update_stat) 2177 unlink_free_space(ctl, left_info); 2178 else 2179 __unlink_free_space(ctl, left_info); 2180 info->offset = left_info->offset; 2181 info->bytes += left_info->bytes; 2182 kmem_cache_free(btrfs_free_space_cachep, left_info); 2183 merged = true; 2184 } 2185 2186 return merged; 2187 } 2188 2189 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2190 struct btrfs_free_space *info, 2191 bool update_stat) 2192 { 2193 struct btrfs_free_space *bitmap; 2194 unsigned long i; 2195 unsigned long j; 2196 const u64 end = info->offset + info->bytes; 2197 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2198 u64 bytes; 2199 2200 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2201 if (!bitmap) 2202 return false; 2203 2204 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2205 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2206 if (j == i) 2207 return false; 2208 bytes = (j - i) * ctl->unit; 2209 info->bytes += bytes; 2210 2211 if (update_stat) 2212 bitmap_clear_bits(ctl, bitmap, end, bytes); 2213 else 2214 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2215 2216 if (!bitmap->bytes) 2217 free_bitmap(ctl, bitmap); 2218 2219 return true; 2220 } 2221 2222 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2223 struct btrfs_free_space *info, 2224 bool update_stat) 2225 { 2226 struct btrfs_free_space *bitmap; 2227 u64 bitmap_offset; 2228 unsigned long i; 2229 unsigned long j; 2230 unsigned long prev_j; 2231 u64 bytes; 2232 2233 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2234 /* If we're on a boundary, try the previous logical bitmap. */ 2235 if (bitmap_offset == info->offset) { 2236 if (info->offset == 0) 2237 return false; 2238 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2239 } 2240 2241 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2242 if (!bitmap) 2243 return false; 2244 2245 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2246 j = 0; 2247 prev_j = (unsigned long)-1; 2248 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2249 if (j > i) 2250 break; 2251 prev_j = j; 2252 } 2253 if (prev_j == i) 2254 return false; 2255 2256 if (prev_j == (unsigned long)-1) 2257 bytes = (i + 1) * ctl->unit; 2258 else 2259 bytes = (i - prev_j) * ctl->unit; 2260 2261 info->offset -= bytes; 2262 info->bytes += bytes; 2263 2264 if (update_stat) 2265 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2266 else 2267 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2268 2269 if (!bitmap->bytes) 2270 free_bitmap(ctl, bitmap); 2271 2272 return true; 2273 } 2274 2275 /* 2276 * We prefer always to allocate from extent entries, both for clustered and 2277 * non-clustered allocation requests. So when attempting to add a new extent 2278 * entry, try to see if there's adjacent free space in bitmap entries, and if 2279 * there is, migrate that space from the bitmaps to the extent. 2280 * Like this we get better chances of satisfying space allocation requests 2281 * because we attempt to satisfy them based on a single cache entry, and never 2282 * on 2 or more entries - even if the entries represent a contiguous free space 2283 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2284 * ends). 2285 */ 2286 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2287 struct btrfs_free_space *info, 2288 bool update_stat) 2289 { 2290 /* 2291 * Only work with disconnected entries, as we can change their offset, 2292 * and must be extent entries. 2293 */ 2294 ASSERT(!info->bitmap); 2295 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2296 2297 if (ctl->total_bitmaps > 0) { 2298 bool stole_end; 2299 bool stole_front = false; 2300 2301 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2302 if (ctl->total_bitmaps > 0) 2303 stole_front = steal_from_bitmap_to_front(ctl, info, 2304 update_stat); 2305 2306 if (stole_end || stole_front) 2307 try_merge_free_space(ctl, info, update_stat); 2308 } 2309 } 2310 2311 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2312 struct btrfs_free_space_ctl *ctl, 2313 u64 offset, u64 bytes) 2314 { 2315 struct btrfs_free_space *info; 2316 int ret = 0; 2317 2318 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2319 if (!info) 2320 return -ENOMEM; 2321 2322 info->offset = offset; 2323 info->bytes = bytes; 2324 RB_CLEAR_NODE(&info->offset_index); 2325 2326 spin_lock(&ctl->tree_lock); 2327 2328 if (try_merge_free_space(ctl, info, true)) 2329 goto link; 2330 2331 /* 2332 * There was no extent directly to the left or right of this new 2333 * extent then we know we're going to have to allocate a new extent, so 2334 * before we do that see if we need to drop this into a bitmap 2335 */ 2336 ret = insert_into_bitmap(ctl, info); 2337 if (ret < 0) { 2338 goto out; 2339 } else if (ret) { 2340 ret = 0; 2341 goto out; 2342 } 2343 link: 2344 /* 2345 * Only steal free space from adjacent bitmaps if we're sure we're not 2346 * going to add the new free space to existing bitmap entries - because 2347 * that would mean unnecessary work that would be reverted. Therefore 2348 * attempt to steal space from bitmaps if we're adding an extent entry. 2349 */ 2350 steal_from_bitmap(ctl, info, true); 2351 2352 ret = link_free_space(ctl, info); 2353 if (ret) 2354 kmem_cache_free(btrfs_free_space_cachep, info); 2355 out: 2356 spin_unlock(&ctl->tree_lock); 2357 2358 if (ret) { 2359 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2360 ASSERT(ret != -EEXIST); 2361 } 2362 2363 return ret; 2364 } 2365 2366 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2367 u64 offset, u64 bytes) 2368 { 2369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2370 struct btrfs_free_space *info; 2371 int ret; 2372 bool re_search = false; 2373 2374 spin_lock(&ctl->tree_lock); 2375 2376 again: 2377 ret = 0; 2378 if (!bytes) 2379 goto out_lock; 2380 2381 info = tree_search_offset(ctl, offset, 0, 0); 2382 if (!info) { 2383 /* 2384 * oops didn't find an extent that matched the space we wanted 2385 * to remove, look for a bitmap instead 2386 */ 2387 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2388 1, 0); 2389 if (!info) { 2390 /* 2391 * If we found a partial bit of our free space in a 2392 * bitmap but then couldn't find the other part this may 2393 * be a problem, so WARN about it. 2394 */ 2395 WARN_ON(re_search); 2396 goto out_lock; 2397 } 2398 } 2399 2400 re_search = false; 2401 if (!info->bitmap) { 2402 unlink_free_space(ctl, info); 2403 if (offset == info->offset) { 2404 u64 to_free = min(bytes, info->bytes); 2405 2406 info->bytes -= to_free; 2407 info->offset += to_free; 2408 if (info->bytes) { 2409 ret = link_free_space(ctl, info); 2410 WARN_ON(ret); 2411 } else { 2412 kmem_cache_free(btrfs_free_space_cachep, info); 2413 } 2414 2415 offset += to_free; 2416 bytes -= to_free; 2417 goto again; 2418 } else { 2419 u64 old_end = info->bytes + info->offset; 2420 2421 info->bytes = offset - info->offset; 2422 ret = link_free_space(ctl, info); 2423 WARN_ON(ret); 2424 if (ret) 2425 goto out_lock; 2426 2427 /* Not enough bytes in this entry to satisfy us */ 2428 if (old_end < offset + bytes) { 2429 bytes -= old_end - offset; 2430 offset = old_end; 2431 goto again; 2432 } else if (old_end == offset + bytes) { 2433 /* all done */ 2434 goto out_lock; 2435 } 2436 spin_unlock(&ctl->tree_lock); 2437 2438 ret = btrfs_add_free_space(block_group, offset + bytes, 2439 old_end - (offset + bytes)); 2440 WARN_ON(ret); 2441 goto out; 2442 } 2443 } 2444 2445 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2446 if (ret == -EAGAIN) { 2447 re_search = true; 2448 goto again; 2449 } 2450 out_lock: 2451 spin_unlock(&ctl->tree_lock); 2452 out: 2453 return ret; 2454 } 2455 2456 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2457 u64 bytes) 2458 { 2459 struct btrfs_fs_info *fs_info = block_group->fs_info; 2460 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2461 struct btrfs_free_space *info; 2462 struct rb_node *n; 2463 int count = 0; 2464 2465 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2466 info = rb_entry(n, struct btrfs_free_space, offset_index); 2467 if (info->bytes >= bytes && !block_group->ro) 2468 count++; 2469 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2470 info->offset, info->bytes, 2471 (info->bitmap) ? "yes" : "no"); 2472 } 2473 btrfs_info(fs_info, "block group has cluster?: %s", 2474 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2475 btrfs_info(fs_info, 2476 "%d blocks of free space at or bigger than bytes is", count); 2477 } 2478 2479 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2480 { 2481 struct btrfs_fs_info *fs_info = block_group->fs_info; 2482 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2483 2484 spin_lock_init(&ctl->tree_lock); 2485 ctl->unit = fs_info->sectorsize; 2486 ctl->start = block_group->key.objectid; 2487 ctl->private = block_group; 2488 ctl->op = &free_space_op; 2489 INIT_LIST_HEAD(&ctl->trimming_ranges); 2490 mutex_init(&ctl->cache_writeout_mutex); 2491 2492 /* 2493 * we only want to have 32k of ram per block group for keeping 2494 * track of free space, and if we pass 1/2 of that we want to 2495 * start converting things over to using bitmaps 2496 */ 2497 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2498 } 2499 2500 /* 2501 * for a given cluster, put all of its extents back into the free 2502 * space cache. If the block group passed doesn't match the block group 2503 * pointed to by the cluster, someone else raced in and freed the 2504 * cluster already. In that case, we just return without changing anything 2505 */ 2506 static int 2507 __btrfs_return_cluster_to_free_space( 2508 struct btrfs_block_group_cache *block_group, 2509 struct btrfs_free_cluster *cluster) 2510 { 2511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2512 struct btrfs_free_space *entry; 2513 struct rb_node *node; 2514 2515 spin_lock(&cluster->lock); 2516 if (cluster->block_group != block_group) 2517 goto out; 2518 2519 cluster->block_group = NULL; 2520 cluster->window_start = 0; 2521 list_del_init(&cluster->block_group_list); 2522 2523 node = rb_first(&cluster->root); 2524 while (node) { 2525 bool bitmap; 2526 2527 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2528 node = rb_next(&entry->offset_index); 2529 rb_erase(&entry->offset_index, &cluster->root); 2530 RB_CLEAR_NODE(&entry->offset_index); 2531 2532 bitmap = (entry->bitmap != NULL); 2533 if (!bitmap) { 2534 try_merge_free_space(ctl, entry, false); 2535 steal_from_bitmap(ctl, entry, false); 2536 } 2537 tree_insert_offset(&ctl->free_space_offset, 2538 entry->offset, &entry->offset_index, bitmap); 2539 } 2540 cluster->root = RB_ROOT; 2541 2542 out: 2543 spin_unlock(&cluster->lock); 2544 btrfs_put_block_group(block_group); 2545 return 0; 2546 } 2547 2548 static void __btrfs_remove_free_space_cache_locked( 2549 struct btrfs_free_space_ctl *ctl) 2550 { 2551 struct btrfs_free_space *info; 2552 struct rb_node *node; 2553 2554 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2555 info = rb_entry(node, struct btrfs_free_space, offset_index); 2556 if (!info->bitmap) { 2557 unlink_free_space(ctl, info); 2558 kmem_cache_free(btrfs_free_space_cachep, info); 2559 } else { 2560 free_bitmap(ctl, info); 2561 } 2562 2563 cond_resched_lock(&ctl->tree_lock); 2564 } 2565 } 2566 2567 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2568 { 2569 spin_lock(&ctl->tree_lock); 2570 __btrfs_remove_free_space_cache_locked(ctl); 2571 spin_unlock(&ctl->tree_lock); 2572 } 2573 2574 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2575 { 2576 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2577 struct btrfs_free_cluster *cluster; 2578 struct list_head *head; 2579 2580 spin_lock(&ctl->tree_lock); 2581 while ((head = block_group->cluster_list.next) != 2582 &block_group->cluster_list) { 2583 cluster = list_entry(head, struct btrfs_free_cluster, 2584 block_group_list); 2585 2586 WARN_ON(cluster->block_group != block_group); 2587 __btrfs_return_cluster_to_free_space(block_group, cluster); 2588 2589 cond_resched_lock(&ctl->tree_lock); 2590 } 2591 __btrfs_remove_free_space_cache_locked(ctl); 2592 spin_unlock(&ctl->tree_lock); 2593 2594 } 2595 2596 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2597 u64 offset, u64 bytes, u64 empty_size, 2598 u64 *max_extent_size) 2599 { 2600 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2601 struct btrfs_free_space *entry = NULL; 2602 u64 bytes_search = bytes + empty_size; 2603 u64 ret = 0; 2604 u64 align_gap = 0; 2605 u64 align_gap_len = 0; 2606 2607 spin_lock(&ctl->tree_lock); 2608 entry = find_free_space(ctl, &offset, &bytes_search, 2609 block_group->full_stripe_len, max_extent_size); 2610 if (!entry) 2611 goto out; 2612 2613 ret = offset; 2614 if (entry->bitmap) { 2615 bitmap_clear_bits(ctl, entry, offset, bytes); 2616 if (!entry->bytes) 2617 free_bitmap(ctl, entry); 2618 } else { 2619 unlink_free_space(ctl, entry); 2620 align_gap_len = offset - entry->offset; 2621 align_gap = entry->offset; 2622 2623 entry->offset = offset + bytes; 2624 WARN_ON(entry->bytes < bytes + align_gap_len); 2625 2626 entry->bytes -= bytes + align_gap_len; 2627 if (!entry->bytes) 2628 kmem_cache_free(btrfs_free_space_cachep, entry); 2629 else 2630 link_free_space(ctl, entry); 2631 } 2632 out: 2633 spin_unlock(&ctl->tree_lock); 2634 2635 if (align_gap_len) 2636 __btrfs_add_free_space(block_group->fs_info, ctl, 2637 align_gap, align_gap_len); 2638 return ret; 2639 } 2640 2641 /* 2642 * given a cluster, put all of its extents back into the free space 2643 * cache. If a block group is passed, this function will only free 2644 * a cluster that belongs to the passed block group. 2645 * 2646 * Otherwise, it'll get a reference on the block group pointed to by the 2647 * cluster and remove the cluster from it. 2648 */ 2649 int btrfs_return_cluster_to_free_space( 2650 struct btrfs_block_group_cache *block_group, 2651 struct btrfs_free_cluster *cluster) 2652 { 2653 struct btrfs_free_space_ctl *ctl; 2654 int ret; 2655 2656 /* first, get a safe pointer to the block group */ 2657 spin_lock(&cluster->lock); 2658 if (!block_group) { 2659 block_group = cluster->block_group; 2660 if (!block_group) { 2661 spin_unlock(&cluster->lock); 2662 return 0; 2663 } 2664 } else if (cluster->block_group != block_group) { 2665 /* someone else has already freed it don't redo their work */ 2666 spin_unlock(&cluster->lock); 2667 return 0; 2668 } 2669 atomic_inc(&block_group->count); 2670 spin_unlock(&cluster->lock); 2671 2672 ctl = block_group->free_space_ctl; 2673 2674 /* now return any extents the cluster had on it */ 2675 spin_lock(&ctl->tree_lock); 2676 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2677 spin_unlock(&ctl->tree_lock); 2678 2679 /* finally drop our ref */ 2680 btrfs_put_block_group(block_group); 2681 return ret; 2682 } 2683 2684 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2685 struct btrfs_free_cluster *cluster, 2686 struct btrfs_free_space *entry, 2687 u64 bytes, u64 min_start, 2688 u64 *max_extent_size) 2689 { 2690 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2691 int err; 2692 u64 search_start = cluster->window_start; 2693 u64 search_bytes = bytes; 2694 u64 ret = 0; 2695 2696 search_start = min_start; 2697 search_bytes = bytes; 2698 2699 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2700 if (err) { 2701 if (search_bytes > *max_extent_size) 2702 *max_extent_size = search_bytes; 2703 return 0; 2704 } 2705 2706 ret = search_start; 2707 __bitmap_clear_bits(ctl, entry, ret, bytes); 2708 2709 return ret; 2710 } 2711 2712 /* 2713 * given a cluster, try to allocate 'bytes' from it, returns 0 2714 * if it couldn't find anything suitably large, or a logical disk offset 2715 * if things worked out 2716 */ 2717 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2718 struct btrfs_free_cluster *cluster, u64 bytes, 2719 u64 min_start, u64 *max_extent_size) 2720 { 2721 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2722 struct btrfs_free_space *entry = NULL; 2723 struct rb_node *node; 2724 u64 ret = 0; 2725 2726 spin_lock(&cluster->lock); 2727 if (bytes > cluster->max_size) 2728 goto out; 2729 2730 if (cluster->block_group != block_group) 2731 goto out; 2732 2733 node = rb_first(&cluster->root); 2734 if (!node) 2735 goto out; 2736 2737 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2738 while (1) { 2739 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2740 *max_extent_size = entry->bytes; 2741 2742 if (entry->bytes < bytes || 2743 (!entry->bitmap && entry->offset < min_start)) { 2744 node = rb_next(&entry->offset_index); 2745 if (!node) 2746 break; 2747 entry = rb_entry(node, struct btrfs_free_space, 2748 offset_index); 2749 continue; 2750 } 2751 2752 if (entry->bitmap) { 2753 ret = btrfs_alloc_from_bitmap(block_group, 2754 cluster, entry, bytes, 2755 cluster->window_start, 2756 max_extent_size); 2757 if (ret == 0) { 2758 node = rb_next(&entry->offset_index); 2759 if (!node) 2760 break; 2761 entry = rb_entry(node, struct btrfs_free_space, 2762 offset_index); 2763 continue; 2764 } 2765 cluster->window_start += bytes; 2766 } else { 2767 ret = entry->offset; 2768 2769 entry->offset += bytes; 2770 entry->bytes -= bytes; 2771 } 2772 2773 if (entry->bytes == 0) 2774 rb_erase(&entry->offset_index, &cluster->root); 2775 break; 2776 } 2777 out: 2778 spin_unlock(&cluster->lock); 2779 2780 if (!ret) 2781 return 0; 2782 2783 spin_lock(&ctl->tree_lock); 2784 2785 ctl->free_space -= bytes; 2786 if (entry->bytes == 0) { 2787 ctl->free_extents--; 2788 if (entry->bitmap) { 2789 kfree(entry->bitmap); 2790 ctl->total_bitmaps--; 2791 ctl->op->recalc_thresholds(ctl); 2792 } 2793 kmem_cache_free(btrfs_free_space_cachep, entry); 2794 } 2795 2796 spin_unlock(&ctl->tree_lock); 2797 2798 return ret; 2799 } 2800 2801 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2802 struct btrfs_free_space *entry, 2803 struct btrfs_free_cluster *cluster, 2804 u64 offset, u64 bytes, 2805 u64 cont1_bytes, u64 min_bytes) 2806 { 2807 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2808 unsigned long next_zero; 2809 unsigned long i; 2810 unsigned long want_bits; 2811 unsigned long min_bits; 2812 unsigned long found_bits; 2813 unsigned long max_bits = 0; 2814 unsigned long start = 0; 2815 unsigned long total_found = 0; 2816 int ret; 2817 2818 i = offset_to_bit(entry->offset, ctl->unit, 2819 max_t(u64, offset, entry->offset)); 2820 want_bits = bytes_to_bits(bytes, ctl->unit); 2821 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2822 2823 /* 2824 * Don't bother looking for a cluster in this bitmap if it's heavily 2825 * fragmented. 2826 */ 2827 if (entry->max_extent_size && 2828 entry->max_extent_size < cont1_bytes) 2829 return -ENOSPC; 2830 again: 2831 found_bits = 0; 2832 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2833 next_zero = find_next_zero_bit(entry->bitmap, 2834 BITS_PER_BITMAP, i); 2835 if (next_zero - i >= min_bits) { 2836 found_bits = next_zero - i; 2837 if (found_bits > max_bits) 2838 max_bits = found_bits; 2839 break; 2840 } 2841 if (next_zero - i > max_bits) 2842 max_bits = next_zero - i; 2843 i = next_zero; 2844 } 2845 2846 if (!found_bits) { 2847 entry->max_extent_size = (u64)max_bits * ctl->unit; 2848 return -ENOSPC; 2849 } 2850 2851 if (!total_found) { 2852 start = i; 2853 cluster->max_size = 0; 2854 } 2855 2856 total_found += found_bits; 2857 2858 if (cluster->max_size < found_bits * ctl->unit) 2859 cluster->max_size = found_bits * ctl->unit; 2860 2861 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2862 i = next_zero + 1; 2863 goto again; 2864 } 2865 2866 cluster->window_start = start * ctl->unit + entry->offset; 2867 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2868 ret = tree_insert_offset(&cluster->root, entry->offset, 2869 &entry->offset_index, 1); 2870 ASSERT(!ret); /* -EEXIST; Logic error */ 2871 2872 trace_btrfs_setup_cluster(block_group, cluster, 2873 total_found * ctl->unit, 1); 2874 return 0; 2875 } 2876 2877 /* 2878 * This searches the block group for just extents to fill the cluster with. 2879 * Try to find a cluster with at least bytes total bytes, at least one 2880 * extent of cont1_bytes, and other clusters of at least min_bytes. 2881 */ 2882 static noinline int 2883 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2884 struct btrfs_free_cluster *cluster, 2885 struct list_head *bitmaps, u64 offset, u64 bytes, 2886 u64 cont1_bytes, u64 min_bytes) 2887 { 2888 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2889 struct btrfs_free_space *first = NULL; 2890 struct btrfs_free_space *entry = NULL; 2891 struct btrfs_free_space *last; 2892 struct rb_node *node; 2893 u64 window_free; 2894 u64 max_extent; 2895 u64 total_size = 0; 2896 2897 entry = tree_search_offset(ctl, offset, 0, 1); 2898 if (!entry) 2899 return -ENOSPC; 2900 2901 /* 2902 * We don't want bitmaps, so just move along until we find a normal 2903 * extent entry. 2904 */ 2905 while (entry->bitmap || entry->bytes < min_bytes) { 2906 if (entry->bitmap && list_empty(&entry->list)) 2907 list_add_tail(&entry->list, bitmaps); 2908 node = rb_next(&entry->offset_index); 2909 if (!node) 2910 return -ENOSPC; 2911 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2912 } 2913 2914 window_free = entry->bytes; 2915 max_extent = entry->bytes; 2916 first = entry; 2917 last = entry; 2918 2919 for (node = rb_next(&entry->offset_index); node; 2920 node = rb_next(&entry->offset_index)) { 2921 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2922 2923 if (entry->bitmap) { 2924 if (list_empty(&entry->list)) 2925 list_add_tail(&entry->list, bitmaps); 2926 continue; 2927 } 2928 2929 if (entry->bytes < min_bytes) 2930 continue; 2931 2932 last = entry; 2933 window_free += entry->bytes; 2934 if (entry->bytes > max_extent) 2935 max_extent = entry->bytes; 2936 } 2937 2938 if (window_free < bytes || max_extent < cont1_bytes) 2939 return -ENOSPC; 2940 2941 cluster->window_start = first->offset; 2942 2943 node = &first->offset_index; 2944 2945 /* 2946 * now we've found our entries, pull them out of the free space 2947 * cache and put them into the cluster rbtree 2948 */ 2949 do { 2950 int ret; 2951 2952 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2953 node = rb_next(&entry->offset_index); 2954 if (entry->bitmap || entry->bytes < min_bytes) 2955 continue; 2956 2957 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2958 ret = tree_insert_offset(&cluster->root, entry->offset, 2959 &entry->offset_index, 0); 2960 total_size += entry->bytes; 2961 ASSERT(!ret); /* -EEXIST; Logic error */ 2962 } while (node && entry != last); 2963 2964 cluster->max_size = max_extent; 2965 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2966 return 0; 2967 } 2968 2969 /* 2970 * This specifically looks for bitmaps that may work in the cluster, we assume 2971 * that we have already failed to find extents that will work. 2972 */ 2973 static noinline int 2974 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2975 struct btrfs_free_cluster *cluster, 2976 struct list_head *bitmaps, u64 offset, u64 bytes, 2977 u64 cont1_bytes, u64 min_bytes) 2978 { 2979 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2980 struct btrfs_free_space *entry = NULL; 2981 int ret = -ENOSPC; 2982 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2983 2984 if (ctl->total_bitmaps == 0) 2985 return -ENOSPC; 2986 2987 /* 2988 * The bitmap that covers offset won't be in the list unless offset 2989 * is just its start offset. 2990 */ 2991 if (!list_empty(bitmaps)) 2992 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2993 2994 if (!entry || entry->offset != bitmap_offset) { 2995 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2996 if (entry && list_empty(&entry->list)) 2997 list_add(&entry->list, bitmaps); 2998 } 2999 3000 list_for_each_entry(entry, bitmaps, list) { 3001 if (entry->bytes < bytes) 3002 continue; 3003 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3004 bytes, cont1_bytes, min_bytes); 3005 if (!ret) 3006 return 0; 3007 } 3008 3009 /* 3010 * The bitmaps list has all the bitmaps that record free space 3011 * starting after offset, so no more search is required. 3012 */ 3013 return -ENOSPC; 3014 } 3015 3016 /* 3017 * here we try to find a cluster of blocks in a block group. The goal 3018 * is to find at least bytes+empty_size. 3019 * We might not find them all in one contiguous area. 3020 * 3021 * returns zero and sets up cluster if things worked out, otherwise 3022 * it returns -enospc 3023 */ 3024 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3025 struct btrfs_block_group_cache *block_group, 3026 struct btrfs_free_cluster *cluster, 3027 u64 offset, u64 bytes, u64 empty_size) 3028 { 3029 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3030 struct btrfs_free_space *entry, *tmp; 3031 LIST_HEAD(bitmaps); 3032 u64 min_bytes; 3033 u64 cont1_bytes; 3034 int ret; 3035 3036 /* 3037 * Choose the minimum extent size we'll require for this 3038 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3039 * For metadata, allow allocates with smaller extents. For 3040 * data, keep it dense. 3041 */ 3042 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3043 cont1_bytes = min_bytes = bytes + empty_size; 3044 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3045 cont1_bytes = bytes; 3046 min_bytes = fs_info->sectorsize; 3047 } else { 3048 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3049 min_bytes = fs_info->sectorsize; 3050 } 3051 3052 spin_lock(&ctl->tree_lock); 3053 3054 /* 3055 * If we know we don't have enough space to make a cluster don't even 3056 * bother doing all the work to try and find one. 3057 */ 3058 if (ctl->free_space < bytes) { 3059 spin_unlock(&ctl->tree_lock); 3060 return -ENOSPC; 3061 } 3062 3063 spin_lock(&cluster->lock); 3064 3065 /* someone already found a cluster, hooray */ 3066 if (cluster->block_group) { 3067 ret = 0; 3068 goto out; 3069 } 3070 3071 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3072 min_bytes); 3073 3074 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3075 bytes + empty_size, 3076 cont1_bytes, min_bytes); 3077 if (ret) 3078 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3079 offset, bytes + empty_size, 3080 cont1_bytes, min_bytes); 3081 3082 /* Clear our temporary list */ 3083 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3084 list_del_init(&entry->list); 3085 3086 if (!ret) { 3087 atomic_inc(&block_group->count); 3088 list_add_tail(&cluster->block_group_list, 3089 &block_group->cluster_list); 3090 cluster->block_group = block_group; 3091 } else { 3092 trace_btrfs_failed_cluster_setup(block_group); 3093 } 3094 out: 3095 spin_unlock(&cluster->lock); 3096 spin_unlock(&ctl->tree_lock); 3097 3098 return ret; 3099 } 3100 3101 /* 3102 * simple code to zero out a cluster 3103 */ 3104 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3105 { 3106 spin_lock_init(&cluster->lock); 3107 spin_lock_init(&cluster->refill_lock); 3108 cluster->root = RB_ROOT; 3109 cluster->max_size = 0; 3110 cluster->fragmented = false; 3111 INIT_LIST_HEAD(&cluster->block_group_list); 3112 cluster->block_group = NULL; 3113 } 3114 3115 static int do_trimming(struct btrfs_block_group_cache *block_group, 3116 u64 *total_trimmed, u64 start, u64 bytes, 3117 u64 reserved_start, u64 reserved_bytes, 3118 struct btrfs_trim_range *trim_entry) 3119 { 3120 struct btrfs_space_info *space_info = block_group->space_info; 3121 struct btrfs_fs_info *fs_info = block_group->fs_info; 3122 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3123 int ret; 3124 int update = 0; 3125 u64 trimmed = 0; 3126 3127 spin_lock(&space_info->lock); 3128 spin_lock(&block_group->lock); 3129 if (!block_group->ro) { 3130 block_group->reserved += reserved_bytes; 3131 space_info->bytes_reserved += reserved_bytes; 3132 update = 1; 3133 } 3134 spin_unlock(&block_group->lock); 3135 spin_unlock(&space_info->lock); 3136 3137 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3138 if (!ret) 3139 *total_trimmed += trimmed; 3140 3141 mutex_lock(&ctl->cache_writeout_mutex); 3142 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3143 list_del(&trim_entry->list); 3144 mutex_unlock(&ctl->cache_writeout_mutex); 3145 3146 if (update) { 3147 spin_lock(&space_info->lock); 3148 spin_lock(&block_group->lock); 3149 if (block_group->ro) 3150 space_info->bytes_readonly += reserved_bytes; 3151 block_group->reserved -= reserved_bytes; 3152 space_info->bytes_reserved -= reserved_bytes; 3153 spin_unlock(&space_info->lock); 3154 spin_unlock(&block_group->lock); 3155 } 3156 3157 return ret; 3158 } 3159 3160 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3161 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3162 { 3163 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3164 struct btrfs_free_space *entry; 3165 struct rb_node *node; 3166 int ret = 0; 3167 u64 extent_start; 3168 u64 extent_bytes; 3169 u64 bytes; 3170 3171 while (start < end) { 3172 struct btrfs_trim_range trim_entry; 3173 3174 mutex_lock(&ctl->cache_writeout_mutex); 3175 spin_lock(&ctl->tree_lock); 3176 3177 if (ctl->free_space < minlen) { 3178 spin_unlock(&ctl->tree_lock); 3179 mutex_unlock(&ctl->cache_writeout_mutex); 3180 break; 3181 } 3182 3183 entry = tree_search_offset(ctl, start, 0, 1); 3184 if (!entry) { 3185 spin_unlock(&ctl->tree_lock); 3186 mutex_unlock(&ctl->cache_writeout_mutex); 3187 break; 3188 } 3189 3190 /* skip bitmaps */ 3191 while (entry->bitmap) { 3192 node = rb_next(&entry->offset_index); 3193 if (!node) { 3194 spin_unlock(&ctl->tree_lock); 3195 mutex_unlock(&ctl->cache_writeout_mutex); 3196 goto out; 3197 } 3198 entry = rb_entry(node, struct btrfs_free_space, 3199 offset_index); 3200 } 3201 3202 if (entry->offset >= end) { 3203 spin_unlock(&ctl->tree_lock); 3204 mutex_unlock(&ctl->cache_writeout_mutex); 3205 break; 3206 } 3207 3208 extent_start = entry->offset; 3209 extent_bytes = entry->bytes; 3210 start = max(start, extent_start); 3211 bytes = min(extent_start + extent_bytes, end) - start; 3212 if (bytes < minlen) { 3213 spin_unlock(&ctl->tree_lock); 3214 mutex_unlock(&ctl->cache_writeout_mutex); 3215 goto next; 3216 } 3217 3218 unlink_free_space(ctl, entry); 3219 kmem_cache_free(btrfs_free_space_cachep, entry); 3220 3221 spin_unlock(&ctl->tree_lock); 3222 trim_entry.start = extent_start; 3223 trim_entry.bytes = extent_bytes; 3224 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3225 mutex_unlock(&ctl->cache_writeout_mutex); 3226 3227 ret = do_trimming(block_group, total_trimmed, start, bytes, 3228 extent_start, extent_bytes, &trim_entry); 3229 if (ret) 3230 break; 3231 next: 3232 start += bytes; 3233 3234 if (fatal_signal_pending(current)) { 3235 ret = -ERESTARTSYS; 3236 break; 3237 } 3238 3239 cond_resched(); 3240 } 3241 out: 3242 return ret; 3243 } 3244 3245 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3246 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3247 { 3248 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3249 struct btrfs_free_space *entry; 3250 int ret = 0; 3251 int ret2; 3252 u64 bytes; 3253 u64 offset = offset_to_bitmap(ctl, start); 3254 3255 while (offset < end) { 3256 bool next_bitmap = false; 3257 struct btrfs_trim_range trim_entry; 3258 3259 mutex_lock(&ctl->cache_writeout_mutex); 3260 spin_lock(&ctl->tree_lock); 3261 3262 if (ctl->free_space < minlen) { 3263 spin_unlock(&ctl->tree_lock); 3264 mutex_unlock(&ctl->cache_writeout_mutex); 3265 break; 3266 } 3267 3268 entry = tree_search_offset(ctl, offset, 1, 0); 3269 if (!entry) { 3270 spin_unlock(&ctl->tree_lock); 3271 mutex_unlock(&ctl->cache_writeout_mutex); 3272 next_bitmap = true; 3273 goto next; 3274 } 3275 3276 bytes = minlen; 3277 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3278 if (ret2 || start >= end) { 3279 spin_unlock(&ctl->tree_lock); 3280 mutex_unlock(&ctl->cache_writeout_mutex); 3281 next_bitmap = true; 3282 goto next; 3283 } 3284 3285 bytes = min(bytes, end - start); 3286 if (bytes < minlen) { 3287 spin_unlock(&ctl->tree_lock); 3288 mutex_unlock(&ctl->cache_writeout_mutex); 3289 goto next; 3290 } 3291 3292 bitmap_clear_bits(ctl, entry, start, bytes); 3293 if (entry->bytes == 0) 3294 free_bitmap(ctl, entry); 3295 3296 spin_unlock(&ctl->tree_lock); 3297 trim_entry.start = start; 3298 trim_entry.bytes = bytes; 3299 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3300 mutex_unlock(&ctl->cache_writeout_mutex); 3301 3302 ret = do_trimming(block_group, total_trimmed, start, bytes, 3303 start, bytes, &trim_entry); 3304 if (ret) 3305 break; 3306 next: 3307 if (next_bitmap) { 3308 offset += BITS_PER_BITMAP * ctl->unit; 3309 } else { 3310 start += bytes; 3311 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3312 offset += BITS_PER_BITMAP * ctl->unit; 3313 } 3314 3315 if (fatal_signal_pending(current)) { 3316 ret = -ERESTARTSYS; 3317 break; 3318 } 3319 3320 cond_resched(); 3321 } 3322 3323 return ret; 3324 } 3325 3326 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3327 { 3328 atomic_inc(&cache->trimming); 3329 } 3330 3331 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3332 { 3333 struct btrfs_fs_info *fs_info = block_group->fs_info; 3334 struct extent_map_tree *em_tree; 3335 struct extent_map *em; 3336 bool cleanup; 3337 3338 spin_lock(&block_group->lock); 3339 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3340 block_group->removed); 3341 spin_unlock(&block_group->lock); 3342 3343 if (cleanup) { 3344 mutex_lock(&fs_info->chunk_mutex); 3345 em_tree = &fs_info->mapping_tree.map_tree; 3346 write_lock(&em_tree->lock); 3347 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3348 1); 3349 BUG_ON(!em); /* logic error, can't happen */ 3350 /* 3351 * remove_extent_mapping() will delete us from the pinned_chunks 3352 * list, which is protected by the chunk mutex. 3353 */ 3354 remove_extent_mapping(em_tree, em); 3355 write_unlock(&em_tree->lock); 3356 mutex_unlock(&fs_info->chunk_mutex); 3357 3358 /* once for us and once for the tree */ 3359 free_extent_map(em); 3360 free_extent_map(em); 3361 3362 /* 3363 * We've left one free space entry and other tasks trimming 3364 * this block group have left 1 entry each one. Free them. 3365 */ 3366 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3367 } 3368 } 3369 3370 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3371 u64 *trimmed, u64 start, u64 end, u64 minlen) 3372 { 3373 int ret; 3374 3375 *trimmed = 0; 3376 3377 spin_lock(&block_group->lock); 3378 if (block_group->removed) { 3379 spin_unlock(&block_group->lock); 3380 return 0; 3381 } 3382 btrfs_get_block_group_trimming(block_group); 3383 spin_unlock(&block_group->lock); 3384 3385 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3386 if (ret) 3387 goto out; 3388 3389 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3390 out: 3391 btrfs_put_block_group_trimming(block_group); 3392 return ret; 3393 } 3394 3395 /* 3396 * Find the left-most item in the cache tree, and then return the 3397 * smallest inode number in the item. 3398 * 3399 * Note: the returned inode number may not be the smallest one in 3400 * the tree, if the left-most item is a bitmap. 3401 */ 3402 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3403 { 3404 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3405 struct btrfs_free_space *entry = NULL; 3406 u64 ino = 0; 3407 3408 spin_lock(&ctl->tree_lock); 3409 3410 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3411 goto out; 3412 3413 entry = rb_entry(rb_first(&ctl->free_space_offset), 3414 struct btrfs_free_space, offset_index); 3415 3416 if (!entry->bitmap) { 3417 ino = entry->offset; 3418 3419 unlink_free_space(ctl, entry); 3420 entry->offset++; 3421 entry->bytes--; 3422 if (!entry->bytes) 3423 kmem_cache_free(btrfs_free_space_cachep, entry); 3424 else 3425 link_free_space(ctl, entry); 3426 } else { 3427 u64 offset = 0; 3428 u64 count = 1; 3429 int ret; 3430 3431 ret = search_bitmap(ctl, entry, &offset, &count, true); 3432 /* Logic error; Should be empty if it can't find anything */ 3433 ASSERT(!ret); 3434 3435 ino = offset; 3436 bitmap_clear_bits(ctl, entry, offset, 1); 3437 if (entry->bytes == 0) 3438 free_bitmap(ctl, entry); 3439 } 3440 out: 3441 spin_unlock(&ctl->tree_lock); 3442 3443 return ino; 3444 } 3445 3446 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3447 struct btrfs_path *path) 3448 { 3449 struct inode *inode = NULL; 3450 3451 spin_lock(&root->ino_cache_lock); 3452 if (root->ino_cache_inode) 3453 inode = igrab(root->ino_cache_inode); 3454 spin_unlock(&root->ino_cache_lock); 3455 if (inode) 3456 return inode; 3457 3458 inode = __lookup_free_space_inode(root, path, 0); 3459 if (IS_ERR(inode)) 3460 return inode; 3461 3462 spin_lock(&root->ino_cache_lock); 3463 if (!btrfs_fs_closing(root->fs_info)) 3464 root->ino_cache_inode = igrab(inode); 3465 spin_unlock(&root->ino_cache_lock); 3466 3467 return inode; 3468 } 3469 3470 int create_free_ino_inode(struct btrfs_root *root, 3471 struct btrfs_trans_handle *trans, 3472 struct btrfs_path *path) 3473 { 3474 return __create_free_space_inode(root, trans, path, 3475 BTRFS_FREE_INO_OBJECTID, 0); 3476 } 3477 3478 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3479 { 3480 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3481 struct btrfs_path *path; 3482 struct inode *inode; 3483 int ret = 0; 3484 u64 root_gen = btrfs_root_generation(&root->root_item); 3485 3486 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3487 return 0; 3488 3489 /* 3490 * If we're unmounting then just return, since this does a search on the 3491 * normal root and not the commit root and we could deadlock. 3492 */ 3493 if (btrfs_fs_closing(fs_info)) 3494 return 0; 3495 3496 path = btrfs_alloc_path(); 3497 if (!path) 3498 return 0; 3499 3500 inode = lookup_free_ino_inode(root, path); 3501 if (IS_ERR(inode)) 3502 goto out; 3503 3504 if (root_gen != BTRFS_I(inode)->generation) 3505 goto out_put; 3506 3507 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3508 3509 if (ret < 0) 3510 btrfs_err(fs_info, 3511 "failed to load free ino cache for root %llu", 3512 root->root_key.objectid); 3513 out_put: 3514 iput(inode); 3515 out: 3516 btrfs_free_path(path); 3517 return ret; 3518 } 3519 3520 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3521 struct btrfs_trans_handle *trans, 3522 struct btrfs_path *path, 3523 struct inode *inode) 3524 { 3525 struct btrfs_fs_info *fs_info = root->fs_info; 3526 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3527 int ret; 3528 struct btrfs_io_ctl io_ctl; 3529 bool release_metadata = true; 3530 3531 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3532 return 0; 3533 3534 memset(&io_ctl, 0, sizeof(io_ctl)); 3535 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3536 if (!ret) { 3537 /* 3538 * At this point writepages() didn't error out, so our metadata 3539 * reservation is released when the writeback finishes, at 3540 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3541 * with or without an error. 3542 */ 3543 release_metadata = false; 3544 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3545 } 3546 3547 if (ret) { 3548 if (release_metadata) 3549 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3550 inode->i_size); 3551 #ifdef DEBUG 3552 btrfs_err(fs_info, 3553 "failed to write free ino cache for root %llu", 3554 root->root_key.objectid); 3555 #endif 3556 } 3557 3558 return ret; 3559 } 3560 3561 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3562 /* 3563 * Use this if you need to make a bitmap or extent entry specifically, it 3564 * doesn't do any of the merging that add_free_space does, this acts a lot like 3565 * how the free space cache loading stuff works, so you can get really weird 3566 * configurations. 3567 */ 3568 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3569 u64 offset, u64 bytes, bool bitmap) 3570 { 3571 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3572 struct btrfs_free_space *info = NULL, *bitmap_info; 3573 void *map = NULL; 3574 u64 bytes_added; 3575 int ret; 3576 3577 again: 3578 if (!info) { 3579 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3580 if (!info) 3581 return -ENOMEM; 3582 } 3583 3584 if (!bitmap) { 3585 spin_lock(&ctl->tree_lock); 3586 info->offset = offset; 3587 info->bytes = bytes; 3588 info->max_extent_size = 0; 3589 ret = link_free_space(ctl, info); 3590 spin_unlock(&ctl->tree_lock); 3591 if (ret) 3592 kmem_cache_free(btrfs_free_space_cachep, info); 3593 return ret; 3594 } 3595 3596 if (!map) { 3597 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3598 if (!map) { 3599 kmem_cache_free(btrfs_free_space_cachep, info); 3600 return -ENOMEM; 3601 } 3602 } 3603 3604 spin_lock(&ctl->tree_lock); 3605 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3606 1, 0); 3607 if (!bitmap_info) { 3608 info->bitmap = map; 3609 map = NULL; 3610 add_new_bitmap(ctl, info, offset); 3611 bitmap_info = info; 3612 info = NULL; 3613 } 3614 3615 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3616 3617 bytes -= bytes_added; 3618 offset += bytes_added; 3619 spin_unlock(&ctl->tree_lock); 3620 3621 if (bytes) 3622 goto again; 3623 3624 if (info) 3625 kmem_cache_free(btrfs_free_space_cachep, info); 3626 if (map) 3627 kfree(map); 3628 return 0; 3629 } 3630 3631 /* 3632 * Checks to see if the given range is in the free space cache. This is really 3633 * just used to check the absence of space, so if there is free space in the 3634 * range at all we will return 1. 3635 */ 3636 int test_check_exists(struct btrfs_block_group_cache *cache, 3637 u64 offset, u64 bytes) 3638 { 3639 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3640 struct btrfs_free_space *info; 3641 int ret = 0; 3642 3643 spin_lock(&ctl->tree_lock); 3644 info = tree_search_offset(ctl, offset, 0, 0); 3645 if (!info) { 3646 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3647 1, 0); 3648 if (!info) 3649 goto out; 3650 } 3651 3652 have_info: 3653 if (info->bitmap) { 3654 u64 bit_off, bit_bytes; 3655 struct rb_node *n; 3656 struct btrfs_free_space *tmp; 3657 3658 bit_off = offset; 3659 bit_bytes = ctl->unit; 3660 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3661 if (!ret) { 3662 if (bit_off == offset) { 3663 ret = 1; 3664 goto out; 3665 } else if (bit_off > offset && 3666 offset + bytes > bit_off) { 3667 ret = 1; 3668 goto out; 3669 } 3670 } 3671 3672 n = rb_prev(&info->offset_index); 3673 while (n) { 3674 tmp = rb_entry(n, struct btrfs_free_space, 3675 offset_index); 3676 if (tmp->offset + tmp->bytes < offset) 3677 break; 3678 if (offset + bytes < tmp->offset) { 3679 n = rb_prev(&tmp->offset_index); 3680 continue; 3681 } 3682 info = tmp; 3683 goto have_info; 3684 } 3685 3686 n = rb_next(&info->offset_index); 3687 while (n) { 3688 tmp = rb_entry(n, struct btrfs_free_space, 3689 offset_index); 3690 if (offset + bytes < tmp->offset) 3691 break; 3692 if (tmp->offset + tmp->bytes < offset) { 3693 n = rb_next(&tmp->offset_index); 3694 continue; 3695 } 3696 info = tmp; 3697 goto have_info; 3698 } 3699 3700 ret = 0; 3701 goto out; 3702 } 3703 3704 if (info->offset == offset) { 3705 ret = 1; 3706 goto out; 3707 } 3708 3709 if (offset > info->offset && offset < info->offset + info->bytes) 3710 ret = 1; 3711 out: 3712 spin_unlock(&ctl->tree_lock); 3713 return ret; 3714 } 3715 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3716