1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 #include "volumes.h" 31 32 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 33 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 34 35 struct btrfs_trim_range { 36 u64 start; 37 u64 bytes; 38 struct list_head list; 39 }; 40 41 static int link_free_space(struct btrfs_free_space_ctl *ctl, 42 struct btrfs_free_space *info); 43 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 46 struct btrfs_trans_handle *trans, 47 struct btrfs_io_ctl *io_ctl, 48 struct btrfs_path *path); 49 50 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 51 struct btrfs_path *path, 52 u64 offset) 53 { 54 struct btrfs_fs_info *fs_info = root->fs_info; 55 struct btrfs_key key; 56 struct btrfs_key location; 57 struct btrfs_disk_key disk_key; 58 struct btrfs_free_space_header *header; 59 struct extent_buffer *leaf; 60 struct inode *inode = NULL; 61 int ret; 62 63 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 64 key.offset = offset; 65 key.type = 0; 66 67 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 68 if (ret < 0) 69 return ERR_PTR(ret); 70 if (ret > 0) { 71 btrfs_release_path(path); 72 return ERR_PTR(-ENOENT); 73 } 74 75 leaf = path->nodes[0]; 76 header = btrfs_item_ptr(leaf, path->slots[0], 77 struct btrfs_free_space_header); 78 btrfs_free_space_key(leaf, header, &disk_key); 79 btrfs_disk_key_to_cpu(&location, &disk_key); 80 btrfs_release_path(path); 81 82 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 83 if (IS_ERR(inode)) 84 return inode; 85 if (is_bad_inode(inode)) { 86 iput(inode); 87 return ERR_PTR(-ENOENT); 88 } 89 90 mapping_set_gfp_mask(inode->i_mapping, 91 mapping_gfp_constraint(inode->i_mapping, 92 ~(__GFP_FS | __GFP_HIGHMEM))); 93 94 return inode; 95 } 96 97 struct inode *lookup_free_space_inode(struct btrfs_fs_info *fs_info, 98 struct btrfs_block_group_cache 99 *block_group, struct btrfs_path *path) 100 { 101 struct inode *inode = NULL; 102 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 103 104 spin_lock(&block_group->lock); 105 if (block_group->inode) 106 inode = igrab(block_group->inode); 107 spin_unlock(&block_group->lock); 108 if (inode) 109 return inode; 110 111 inode = __lookup_free_space_inode(fs_info->tree_root, path, 112 block_group->key.objectid); 113 if (IS_ERR(inode)) 114 return inode; 115 116 spin_lock(&block_group->lock); 117 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 118 btrfs_info(fs_info, "Old style space inode found, converting."); 119 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 120 BTRFS_INODE_NODATACOW; 121 block_group->disk_cache_state = BTRFS_DC_CLEAR; 122 } 123 124 if (!block_group->iref) { 125 block_group->inode = igrab(inode); 126 block_group->iref = 1; 127 } 128 spin_unlock(&block_group->lock); 129 130 return inode; 131 } 132 133 static int __create_free_space_inode(struct btrfs_root *root, 134 struct btrfs_trans_handle *trans, 135 struct btrfs_path *path, 136 u64 ino, u64 offset) 137 { 138 struct btrfs_key key; 139 struct btrfs_disk_key disk_key; 140 struct btrfs_free_space_header *header; 141 struct btrfs_inode_item *inode_item; 142 struct extent_buffer *leaf; 143 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 144 int ret; 145 146 ret = btrfs_insert_empty_inode(trans, root, path, ino); 147 if (ret) 148 return ret; 149 150 /* We inline crc's for the free disk space cache */ 151 if (ino != BTRFS_FREE_INO_OBJECTID) 152 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 153 154 leaf = path->nodes[0]; 155 inode_item = btrfs_item_ptr(leaf, path->slots[0], 156 struct btrfs_inode_item); 157 btrfs_item_key(leaf, &disk_key, path->slots[0]); 158 memzero_extent_buffer(leaf, (unsigned long)inode_item, 159 sizeof(*inode_item)); 160 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 161 btrfs_set_inode_size(leaf, inode_item, 0); 162 btrfs_set_inode_nbytes(leaf, inode_item, 0); 163 btrfs_set_inode_uid(leaf, inode_item, 0); 164 btrfs_set_inode_gid(leaf, inode_item, 0); 165 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 166 btrfs_set_inode_flags(leaf, inode_item, flags); 167 btrfs_set_inode_nlink(leaf, inode_item, 1); 168 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 169 btrfs_set_inode_block_group(leaf, inode_item, offset); 170 btrfs_mark_buffer_dirty(leaf); 171 btrfs_release_path(path); 172 173 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 174 key.offset = offset; 175 key.type = 0; 176 ret = btrfs_insert_empty_item(trans, root, path, &key, 177 sizeof(struct btrfs_free_space_header)); 178 if (ret < 0) { 179 btrfs_release_path(path); 180 return ret; 181 } 182 183 leaf = path->nodes[0]; 184 header = btrfs_item_ptr(leaf, path->slots[0], 185 struct btrfs_free_space_header); 186 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 187 btrfs_set_free_space_key(leaf, header, &disk_key); 188 btrfs_mark_buffer_dirty(leaf); 189 btrfs_release_path(path); 190 191 return 0; 192 } 193 194 int create_free_space_inode(struct btrfs_fs_info *fs_info, 195 struct btrfs_trans_handle *trans, 196 struct btrfs_block_group_cache *block_group, 197 struct btrfs_path *path) 198 { 199 int ret; 200 u64 ino; 201 202 ret = btrfs_find_free_objectid(fs_info->tree_root, &ino); 203 if (ret < 0) 204 return ret; 205 206 return __create_free_space_inode(fs_info->tree_root, trans, path, ino, 207 block_group->key.objectid); 208 } 209 210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 211 struct btrfs_block_rsv *rsv) 212 { 213 u64 needed_bytes; 214 int ret; 215 216 /* 1 for slack space, 1 for updating the inode */ 217 needed_bytes = btrfs_calc_trunc_metadata_size(fs_info, 1) + 218 btrfs_calc_trans_metadata_size(fs_info, 1); 219 220 spin_lock(&rsv->lock); 221 if (rsv->reserved < needed_bytes) 222 ret = -ENOSPC; 223 else 224 ret = 0; 225 spin_unlock(&rsv->lock); 226 return ret; 227 } 228 229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 230 struct btrfs_block_group_cache *block_group, 231 struct inode *inode) 232 { 233 struct btrfs_root *root = BTRFS_I(inode)->root; 234 int ret = 0; 235 bool locked = false; 236 237 if (block_group) { 238 struct btrfs_path *path = btrfs_alloc_path(); 239 240 if (!path) { 241 ret = -ENOMEM; 242 goto fail; 243 } 244 locked = true; 245 mutex_lock(&trans->transaction->cache_write_mutex); 246 if (!list_empty(&block_group->io_list)) { 247 list_del_init(&block_group->io_list); 248 249 btrfs_wait_cache_io(trans, block_group, path); 250 btrfs_put_block_group(block_group); 251 } 252 253 /* 254 * now that we've truncated the cache away, its no longer 255 * setup or written 256 */ 257 spin_lock(&block_group->lock); 258 block_group->disk_cache_state = BTRFS_DC_CLEAR; 259 spin_unlock(&block_group->lock); 260 btrfs_free_path(path); 261 } 262 263 btrfs_i_size_write(inode, 0); 264 truncate_pagecache(inode, 0); 265 266 /* 267 * We don't need an orphan item because truncating the free space cache 268 * will never be split across transactions. 269 * We don't need to check for -EAGAIN because we're a free space 270 * cache inode 271 */ 272 ret = btrfs_truncate_inode_items(trans, root, inode, 273 0, BTRFS_EXTENT_DATA_KEY); 274 if (ret) 275 goto fail; 276 277 ret = btrfs_update_inode(trans, root, inode); 278 279 fail: 280 if (locked) 281 mutex_unlock(&trans->transaction->cache_write_mutex); 282 if (ret) 283 btrfs_abort_transaction(trans, ret); 284 285 return ret; 286 } 287 288 static void readahead_cache(struct inode *inode) 289 { 290 struct file_ra_state *ra; 291 unsigned long last_index; 292 293 ra = kzalloc(sizeof(*ra), GFP_NOFS); 294 if (!ra) 295 return; 296 297 file_ra_state_init(ra, inode->i_mapping); 298 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 299 300 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 301 302 kfree(ra); 303 } 304 305 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 306 int write) 307 { 308 int num_pages; 309 int check_crcs = 0; 310 311 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 312 313 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 314 check_crcs = 1; 315 316 /* Make sure we can fit our crcs into the first page */ 317 if (write && check_crcs && 318 (num_pages * sizeof(u32)) >= PAGE_SIZE) 319 return -ENOSPC; 320 321 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 322 323 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 324 if (!io_ctl->pages) 325 return -ENOMEM; 326 327 io_ctl->num_pages = num_pages; 328 io_ctl->fs_info = btrfs_sb(inode->i_sb); 329 io_ctl->check_crcs = check_crcs; 330 io_ctl->inode = inode; 331 332 return 0; 333 } 334 335 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 336 { 337 kfree(io_ctl->pages); 338 io_ctl->pages = NULL; 339 } 340 341 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 342 { 343 if (io_ctl->cur) { 344 io_ctl->cur = NULL; 345 io_ctl->orig = NULL; 346 } 347 } 348 349 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 350 { 351 ASSERT(io_ctl->index < io_ctl->num_pages); 352 io_ctl->page = io_ctl->pages[io_ctl->index++]; 353 io_ctl->cur = page_address(io_ctl->page); 354 io_ctl->orig = io_ctl->cur; 355 io_ctl->size = PAGE_SIZE; 356 if (clear) 357 memset(io_ctl->cur, 0, PAGE_SIZE); 358 } 359 360 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 361 { 362 int i; 363 364 io_ctl_unmap_page(io_ctl); 365 366 for (i = 0; i < io_ctl->num_pages; i++) { 367 if (io_ctl->pages[i]) { 368 ClearPageChecked(io_ctl->pages[i]); 369 unlock_page(io_ctl->pages[i]); 370 put_page(io_ctl->pages[i]); 371 } 372 } 373 } 374 375 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 376 int uptodate) 377 { 378 struct page *page; 379 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 380 int i; 381 382 for (i = 0; i < io_ctl->num_pages; i++) { 383 page = find_or_create_page(inode->i_mapping, i, mask); 384 if (!page) { 385 io_ctl_drop_pages(io_ctl); 386 return -ENOMEM; 387 } 388 io_ctl->pages[i] = page; 389 if (uptodate && !PageUptodate(page)) { 390 btrfs_readpage(NULL, page); 391 lock_page(page); 392 if (!PageUptodate(page)) { 393 btrfs_err(BTRFS_I(inode)->root->fs_info, 394 "error reading free space cache"); 395 io_ctl_drop_pages(io_ctl); 396 return -EIO; 397 } 398 } 399 } 400 401 for (i = 0; i < io_ctl->num_pages; i++) { 402 clear_page_dirty_for_io(io_ctl->pages[i]); 403 set_page_extent_mapped(io_ctl->pages[i]); 404 } 405 406 return 0; 407 } 408 409 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 410 { 411 __le64 *val; 412 413 io_ctl_map_page(io_ctl, 1); 414 415 /* 416 * Skip the csum areas. If we don't check crcs then we just have a 417 * 64bit chunk at the front of the first page. 418 */ 419 if (io_ctl->check_crcs) { 420 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 421 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 422 } else { 423 io_ctl->cur += sizeof(u64); 424 io_ctl->size -= sizeof(u64) * 2; 425 } 426 427 val = io_ctl->cur; 428 *val = cpu_to_le64(generation); 429 io_ctl->cur += sizeof(u64); 430 } 431 432 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 433 { 434 __le64 *gen; 435 436 /* 437 * Skip the crc area. If we don't check crcs then we just have a 64bit 438 * chunk at the front of the first page. 439 */ 440 if (io_ctl->check_crcs) { 441 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 442 io_ctl->size -= sizeof(u64) + 443 (sizeof(u32) * io_ctl->num_pages); 444 } else { 445 io_ctl->cur += sizeof(u64); 446 io_ctl->size -= sizeof(u64) * 2; 447 } 448 449 gen = io_ctl->cur; 450 if (le64_to_cpu(*gen) != generation) { 451 btrfs_err_rl(io_ctl->fs_info, 452 "space cache generation (%llu) does not match inode (%llu)", 453 *gen, generation); 454 io_ctl_unmap_page(io_ctl); 455 return -EIO; 456 } 457 io_ctl->cur += sizeof(u64); 458 return 0; 459 } 460 461 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 462 { 463 u32 *tmp; 464 u32 crc = ~(u32)0; 465 unsigned offset = 0; 466 467 if (!io_ctl->check_crcs) { 468 io_ctl_unmap_page(io_ctl); 469 return; 470 } 471 472 if (index == 0) 473 offset = sizeof(u32) * io_ctl->num_pages; 474 475 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 476 PAGE_SIZE - offset); 477 btrfs_csum_final(crc, (u8 *)&crc); 478 io_ctl_unmap_page(io_ctl); 479 tmp = page_address(io_ctl->pages[0]); 480 tmp += index; 481 *tmp = crc; 482 } 483 484 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 485 { 486 u32 *tmp, val; 487 u32 crc = ~(u32)0; 488 unsigned offset = 0; 489 490 if (!io_ctl->check_crcs) { 491 io_ctl_map_page(io_ctl, 0); 492 return 0; 493 } 494 495 if (index == 0) 496 offset = sizeof(u32) * io_ctl->num_pages; 497 498 tmp = page_address(io_ctl->pages[0]); 499 tmp += index; 500 val = *tmp; 501 502 io_ctl_map_page(io_ctl, 0); 503 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 504 PAGE_SIZE - offset); 505 btrfs_csum_final(crc, (u8 *)&crc); 506 if (val != crc) { 507 btrfs_err_rl(io_ctl->fs_info, 508 "csum mismatch on free space cache"); 509 io_ctl_unmap_page(io_ctl); 510 return -EIO; 511 } 512 513 return 0; 514 } 515 516 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 517 void *bitmap) 518 { 519 struct btrfs_free_space_entry *entry; 520 521 if (!io_ctl->cur) 522 return -ENOSPC; 523 524 entry = io_ctl->cur; 525 entry->offset = cpu_to_le64(offset); 526 entry->bytes = cpu_to_le64(bytes); 527 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 528 BTRFS_FREE_SPACE_EXTENT; 529 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 530 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 531 532 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 533 return 0; 534 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 537 /* No more pages to map */ 538 if (io_ctl->index >= io_ctl->num_pages) 539 return 0; 540 541 /* map the next page */ 542 io_ctl_map_page(io_ctl, 1); 543 return 0; 544 } 545 546 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 547 { 548 if (!io_ctl->cur) 549 return -ENOSPC; 550 551 /* 552 * If we aren't at the start of the current page, unmap this one and 553 * map the next one if there is any left. 554 */ 555 if (io_ctl->cur != io_ctl->orig) { 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 557 if (io_ctl->index >= io_ctl->num_pages) 558 return -ENOSPC; 559 io_ctl_map_page(io_ctl, 0); 560 } 561 562 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 563 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 564 if (io_ctl->index < io_ctl->num_pages) 565 io_ctl_map_page(io_ctl, 0); 566 return 0; 567 } 568 569 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 570 { 571 /* 572 * If we're not on the boundary we know we've modified the page and we 573 * need to crc the page. 574 */ 575 if (io_ctl->cur != io_ctl->orig) 576 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 577 else 578 io_ctl_unmap_page(io_ctl); 579 580 while (io_ctl->index < io_ctl->num_pages) { 581 io_ctl_map_page(io_ctl, 1); 582 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 583 } 584 } 585 586 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 587 struct btrfs_free_space *entry, u8 *type) 588 { 589 struct btrfs_free_space_entry *e; 590 int ret; 591 592 if (!io_ctl->cur) { 593 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 594 if (ret) 595 return ret; 596 } 597 598 e = io_ctl->cur; 599 entry->offset = le64_to_cpu(e->offset); 600 entry->bytes = le64_to_cpu(e->bytes); 601 *type = e->type; 602 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 603 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 604 605 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 606 return 0; 607 608 io_ctl_unmap_page(io_ctl); 609 610 return 0; 611 } 612 613 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 614 struct btrfs_free_space *entry) 615 { 616 int ret; 617 618 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 619 if (ret) 620 return ret; 621 622 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 623 io_ctl_unmap_page(io_ctl); 624 625 return 0; 626 } 627 628 /* 629 * Since we attach pinned extents after the fact we can have contiguous sections 630 * of free space that are split up in entries. This poses a problem with the 631 * tree logging stuff since it could have allocated across what appears to be 2 632 * entries since we would have merged the entries when adding the pinned extents 633 * back to the free space cache. So run through the space cache that we just 634 * loaded and merge contiguous entries. This will make the log replay stuff not 635 * blow up and it will make for nicer allocator behavior. 636 */ 637 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 638 { 639 struct btrfs_free_space *e, *prev = NULL; 640 struct rb_node *n; 641 642 again: 643 spin_lock(&ctl->tree_lock); 644 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 645 e = rb_entry(n, struct btrfs_free_space, offset_index); 646 if (!prev) 647 goto next; 648 if (e->bitmap || prev->bitmap) 649 goto next; 650 if (prev->offset + prev->bytes == e->offset) { 651 unlink_free_space(ctl, prev); 652 unlink_free_space(ctl, e); 653 prev->bytes += e->bytes; 654 kmem_cache_free(btrfs_free_space_cachep, e); 655 link_free_space(ctl, prev); 656 prev = NULL; 657 spin_unlock(&ctl->tree_lock); 658 goto again; 659 } 660 next: 661 prev = e; 662 } 663 spin_unlock(&ctl->tree_lock); 664 } 665 666 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 667 struct btrfs_free_space_ctl *ctl, 668 struct btrfs_path *path, u64 offset) 669 { 670 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 671 struct btrfs_free_space_header *header; 672 struct extent_buffer *leaf; 673 struct btrfs_io_ctl io_ctl; 674 struct btrfs_key key; 675 struct btrfs_free_space *e, *n; 676 LIST_HEAD(bitmaps); 677 u64 num_entries; 678 u64 num_bitmaps; 679 u64 generation; 680 u8 type; 681 int ret = 0; 682 683 /* Nothing in the space cache, goodbye */ 684 if (!i_size_read(inode)) 685 return 0; 686 687 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 688 key.offset = offset; 689 key.type = 0; 690 691 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 692 if (ret < 0) 693 return 0; 694 else if (ret > 0) { 695 btrfs_release_path(path); 696 return 0; 697 } 698 699 ret = -1; 700 701 leaf = path->nodes[0]; 702 header = btrfs_item_ptr(leaf, path->slots[0], 703 struct btrfs_free_space_header); 704 num_entries = btrfs_free_space_entries(leaf, header); 705 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 706 generation = btrfs_free_space_generation(leaf, header); 707 btrfs_release_path(path); 708 709 if (!BTRFS_I(inode)->generation) { 710 btrfs_info(fs_info, 711 "The free space cache file (%llu) is invalid. skip it\n", 712 offset); 713 return 0; 714 } 715 716 if (BTRFS_I(inode)->generation != generation) { 717 btrfs_err(fs_info, 718 "free space inode generation (%llu) did not match free space cache generation (%llu)", 719 BTRFS_I(inode)->generation, generation); 720 return 0; 721 } 722 723 if (!num_entries) 724 return 0; 725 726 ret = io_ctl_init(&io_ctl, inode, 0); 727 if (ret) 728 return ret; 729 730 readahead_cache(inode); 731 732 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 733 if (ret) 734 goto out; 735 736 ret = io_ctl_check_crc(&io_ctl, 0); 737 if (ret) 738 goto free_cache; 739 740 ret = io_ctl_check_generation(&io_ctl, generation); 741 if (ret) 742 goto free_cache; 743 744 while (num_entries) { 745 e = kmem_cache_zalloc(btrfs_free_space_cachep, 746 GFP_NOFS); 747 if (!e) 748 goto free_cache; 749 750 ret = io_ctl_read_entry(&io_ctl, e, &type); 751 if (ret) { 752 kmem_cache_free(btrfs_free_space_cachep, e); 753 goto free_cache; 754 } 755 756 if (!e->bytes) { 757 kmem_cache_free(btrfs_free_space_cachep, e); 758 goto free_cache; 759 } 760 761 if (type == BTRFS_FREE_SPACE_EXTENT) { 762 spin_lock(&ctl->tree_lock); 763 ret = link_free_space(ctl, e); 764 spin_unlock(&ctl->tree_lock); 765 if (ret) { 766 btrfs_err(fs_info, 767 "Duplicate entries in free space cache, dumping"); 768 kmem_cache_free(btrfs_free_space_cachep, e); 769 goto free_cache; 770 } 771 } else { 772 ASSERT(num_bitmaps); 773 num_bitmaps--; 774 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 775 if (!e->bitmap) { 776 kmem_cache_free( 777 btrfs_free_space_cachep, e); 778 goto free_cache; 779 } 780 spin_lock(&ctl->tree_lock); 781 ret = link_free_space(ctl, e); 782 ctl->total_bitmaps++; 783 ctl->op->recalc_thresholds(ctl); 784 spin_unlock(&ctl->tree_lock); 785 if (ret) { 786 btrfs_err(fs_info, 787 "Duplicate entries in free space cache, dumping"); 788 kmem_cache_free(btrfs_free_space_cachep, e); 789 goto free_cache; 790 } 791 list_add_tail(&e->list, &bitmaps); 792 } 793 794 num_entries--; 795 } 796 797 io_ctl_unmap_page(&io_ctl); 798 799 /* 800 * We add the bitmaps at the end of the entries in order that 801 * the bitmap entries are added to the cache. 802 */ 803 list_for_each_entry_safe(e, n, &bitmaps, list) { 804 list_del_init(&e->list); 805 ret = io_ctl_read_bitmap(&io_ctl, e); 806 if (ret) 807 goto free_cache; 808 } 809 810 io_ctl_drop_pages(&io_ctl); 811 merge_space_tree(ctl); 812 ret = 1; 813 out: 814 io_ctl_free(&io_ctl); 815 return ret; 816 free_cache: 817 io_ctl_drop_pages(&io_ctl); 818 __btrfs_remove_free_space_cache(ctl); 819 goto out; 820 } 821 822 int load_free_space_cache(struct btrfs_fs_info *fs_info, 823 struct btrfs_block_group_cache *block_group) 824 { 825 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 826 struct inode *inode; 827 struct btrfs_path *path; 828 int ret = 0; 829 bool matched; 830 u64 used = btrfs_block_group_used(&block_group->item); 831 832 /* 833 * If this block group has been marked to be cleared for one reason or 834 * another then we can't trust the on disk cache, so just return. 835 */ 836 spin_lock(&block_group->lock); 837 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 838 spin_unlock(&block_group->lock); 839 return 0; 840 } 841 spin_unlock(&block_group->lock); 842 843 path = btrfs_alloc_path(); 844 if (!path) 845 return 0; 846 path->search_commit_root = 1; 847 path->skip_locking = 1; 848 849 inode = lookup_free_space_inode(fs_info, block_group, path); 850 if (IS_ERR(inode)) { 851 btrfs_free_path(path); 852 return 0; 853 } 854 855 /* We may have converted the inode and made the cache invalid. */ 856 spin_lock(&block_group->lock); 857 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 858 spin_unlock(&block_group->lock); 859 btrfs_free_path(path); 860 goto out; 861 } 862 spin_unlock(&block_group->lock); 863 864 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 865 path, block_group->key.objectid); 866 btrfs_free_path(path); 867 if (ret <= 0) 868 goto out; 869 870 spin_lock(&ctl->tree_lock); 871 matched = (ctl->free_space == (block_group->key.offset - used - 872 block_group->bytes_super)); 873 spin_unlock(&ctl->tree_lock); 874 875 if (!matched) { 876 __btrfs_remove_free_space_cache(ctl); 877 btrfs_warn(fs_info, 878 "block group %llu has wrong amount of free space", 879 block_group->key.objectid); 880 ret = -1; 881 } 882 out: 883 if (ret < 0) { 884 /* This cache is bogus, make sure it gets cleared */ 885 spin_lock(&block_group->lock); 886 block_group->disk_cache_state = BTRFS_DC_CLEAR; 887 spin_unlock(&block_group->lock); 888 ret = 0; 889 890 btrfs_warn(fs_info, 891 "failed to load free space cache for block group %llu, rebuilding it now", 892 block_group->key.objectid); 893 } 894 895 iput(inode); 896 return ret; 897 } 898 899 static noinline_for_stack 900 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 901 struct btrfs_free_space_ctl *ctl, 902 struct btrfs_block_group_cache *block_group, 903 int *entries, int *bitmaps, 904 struct list_head *bitmap_list) 905 { 906 int ret; 907 struct btrfs_free_cluster *cluster = NULL; 908 struct btrfs_free_cluster *cluster_locked = NULL; 909 struct rb_node *node = rb_first(&ctl->free_space_offset); 910 struct btrfs_trim_range *trim_entry; 911 912 /* Get the cluster for this block_group if it exists */ 913 if (block_group && !list_empty(&block_group->cluster_list)) { 914 cluster = list_entry(block_group->cluster_list.next, 915 struct btrfs_free_cluster, 916 block_group_list); 917 } 918 919 if (!node && cluster) { 920 cluster_locked = cluster; 921 spin_lock(&cluster_locked->lock); 922 node = rb_first(&cluster->root); 923 cluster = NULL; 924 } 925 926 /* Write out the extent entries */ 927 while (node) { 928 struct btrfs_free_space *e; 929 930 e = rb_entry(node, struct btrfs_free_space, offset_index); 931 *entries += 1; 932 933 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 934 e->bitmap); 935 if (ret) 936 goto fail; 937 938 if (e->bitmap) { 939 list_add_tail(&e->list, bitmap_list); 940 *bitmaps += 1; 941 } 942 node = rb_next(node); 943 if (!node && cluster) { 944 node = rb_first(&cluster->root); 945 cluster_locked = cluster; 946 spin_lock(&cluster_locked->lock); 947 cluster = NULL; 948 } 949 } 950 if (cluster_locked) { 951 spin_unlock(&cluster_locked->lock); 952 cluster_locked = NULL; 953 } 954 955 /* 956 * Make sure we don't miss any range that was removed from our rbtree 957 * because trimming is running. Otherwise after a umount+mount (or crash 958 * after committing the transaction) we would leak free space and get 959 * an inconsistent free space cache report from fsck. 960 */ 961 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 962 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 963 trim_entry->bytes, NULL); 964 if (ret) 965 goto fail; 966 *entries += 1; 967 } 968 969 return 0; 970 fail: 971 if (cluster_locked) 972 spin_unlock(&cluster_locked->lock); 973 return -ENOSPC; 974 } 975 976 static noinline_for_stack int 977 update_cache_item(struct btrfs_trans_handle *trans, 978 struct btrfs_root *root, 979 struct inode *inode, 980 struct btrfs_path *path, u64 offset, 981 int entries, int bitmaps) 982 { 983 struct btrfs_key key; 984 struct btrfs_free_space_header *header; 985 struct extent_buffer *leaf; 986 int ret; 987 988 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 989 key.offset = offset; 990 key.type = 0; 991 992 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 993 if (ret < 0) { 994 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 995 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 996 GFP_NOFS); 997 goto fail; 998 } 999 leaf = path->nodes[0]; 1000 if (ret > 0) { 1001 struct btrfs_key found_key; 1002 ASSERT(path->slots[0]); 1003 path->slots[0]--; 1004 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1005 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1006 found_key.offset != offset) { 1007 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1008 inode->i_size - 1, 1009 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1010 NULL, GFP_NOFS); 1011 btrfs_release_path(path); 1012 goto fail; 1013 } 1014 } 1015 1016 BTRFS_I(inode)->generation = trans->transid; 1017 header = btrfs_item_ptr(leaf, path->slots[0], 1018 struct btrfs_free_space_header); 1019 btrfs_set_free_space_entries(leaf, header, entries); 1020 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1021 btrfs_set_free_space_generation(leaf, header, trans->transid); 1022 btrfs_mark_buffer_dirty(leaf); 1023 btrfs_release_path(path); 1024 1025 return 0; 1026 1027 fail: 1028 return -1; 1029 } 1030 1031 static noinline_for_stack int 1032 write_pinned_extent_entries(struct btrfs_fs_info *fs_info, 1033 struct btrfs_block_group_cache *block_group, 1034 struct btrfs_io_ctl *io_ctl, 1035 int *entries) 1036 { 1037 u64 start, extent_start, extent_end, len; 1038 struct extent_io_tree *unpin = NULL; 1039 int ret; 1040 1041 if (!block_group) 1042 return 0; 1043 1044 /* 1045 * We want to add any pinned extents to our free space cache 1046 * so we don't leak the space 1047 * 1048 * We shouldn't have switched the pinned extents yet so this is the 1049 * right one 1050 */ 1051 unpin = fs_info->pinned_extents; 1052 1053 start = block_group->key.objectid; 1054 1055 while (start < block_group->key.objectid + block_group->key.offset) { 1056 ret = find_first_extent_bit(unpin, start, 1057 &extent_start, &extent_end, 1058 EXTENT_DIRTY, NULL); 1059 if (ret) 1060 return 0; 1061 1062 /* This pinned extent is out of our range */ 1063 if (extent_start >= block_group->key.objectid + 1064 block_group->key.offset) 1065 return 0; 1066 1067 extent_start = max(extent_start, start); 1068 extent_end = min(block_group->key.objectid + 1069 block_group->key.offset, extent_end + 1); 1070 len = extent_end - extent_start; 1071 1072 *entries += 1; 1073 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1074 if (ret) 1075 return -ENOSPC; 1076 1077 start = extent_end; 1078 } 1079 1080 return 0; 1081 } 1082 1083 static noinline_for_stack int 1084 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1085 { 1086 struct btrfs_free_space *entry, *next; 1087 int ret; 1088 1089 /* Write out the bitmaps */ 1090 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1091 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1092 if (ret) 1093 return -ENOSPC; 1094 list_del_init(&entry->list); 1095 } 1096 1097 return 0; 1098 } 1099 1100 static int flush_dirty_cache(struct inode *inode) 1101 { 1102 int ret; 1103 1104 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1105 if (ret) 1106 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1107 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1108 GFP_NOFS); 1109 1110 return ret; 1111 } 1112 1113 static void noinline_for_stack 1114 cleanup_bitmap_list(struct list_head *bitmap_list) 1115 { 1116 struct btrfs_free_space *entry, *next; 1117 1118 list_for_each_entry_safe(entry, next, bitmap_list, list) 1119 list_del_init(&entry->list); 1120 } 1121 1122 static void noinline_for_stack 1123 cleanup_write_cache_enospc(struct inode *inode, 1124 struct btrfs_io_ctl *io_ctl, 1125 struct extent_state **cached_state) 1126 { 1127 io_ctl_drop_pages(io_ctl); 1128 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1129 i_size_read(inode) - 1, cached_state, 1130 GFP_NOFS); 1131 } 1132 1133 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1134 struct btrfs_trans_handle *trans, 1135 struct btrfs_block_group_cache *block_group, 1136 struct btrfs_io_ctl *io_ctl, 1137 struct btrfs_path *path, u64 offset) 1138 { 1139 int ret; 1140 struct inode *inode = io_ctl->inode; 1141 struct btrfs_fs_info *fs_info; 1142 1143 if (!inode) 1144 return 0; 1145 1146 fs_info = btrfs_sb(inode->i_sb); 1147 1148 /* Flush the dirty pages in the cache file. */ 1149 ret = flush_dirty_cache(inode); 1150 if (ret) 1151 goto out; 1152 1153 /* Update the cache item to tell everyone this cache file is valid. */ 1154 ret = update_cache_item(trans, root, inode, path, offset, 1155 io_ctl->entries, io_ctl->bitmaps); 1156 out: 1157 io_ctl_free(io_ctl); 1158 if (ret) { 1159 invalidate_inode_pages2(inode->i_mapping); 1160 BTRFS_I(inode)->generation = 0; 1161 if (block_group) { 1162 #ifdef DEBUG 1163 btrfs_err(fs_info, 1164 "failed to write free space cache for block group %llu", 1165 block_group->key.objectid); 1166 #endif 1167 } 1168 } 1169 btrfs_update_inode(trans, root, inode); 1170 1171 if (block_group) { 1172 /* the dirty list is protected by the dirty_bgs_lock */ 1173 spin_lock(&trans->transaction->dirty_bgs_lock); 1174 1175 /* the disk_cache_state is protected by the block group lock */ 1176 spin_lock(&block_group->lock); 1177 1178 /* 1179 * only mark this as written if we didn't get put back on 1180 * the dirty list while waiting for IO. Otherwise our 1181 * cache state won't be right, and we won't get written again 1182 */ 1183 if (!ret && list_empty(&block_group->dirty_list)) 1184 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1185 else if (ret) 1186 block_group->disk_cache_state = BTRFS_DC_ERROR; 1187 1188 spin_unlock(&block_group->lock); 1189 spin_unlock(&trans->transaction->dirty_bgs_lock); 1190 io_ctl->inode = NULL; 1191 iput(inode); 1192 } 1193 1194 return ret; 1195 1196 } 1197 1198 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1199 struct btrfs_trans_handle *trans, 1200 struct btrfs_io_ctl *io_ctl, 1201 struct btrfs_path *path) 1202 { 1203 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1204 } 1205 1206 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1207 struct btrfs_block_group_cache *block_group, 1208 struct btrfs_path *path) 1209 { 1210 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1211 block_group, &block_group->io_ctl, 1212 path, block_group->key.objectid); 1213 } 1214 1215 /** 1216 * __btrfs_write_out_cache - write out cached info to an inode 1217 * @root - the root the inode belongs to 1218 * @ctl - the free space cache we are going to write out 1219 * @block_group - the block_group for this cache if it belongs to a block_group 1220 * @trans - the trans handle 1221 * 1222 * This function writes out a free space cache struct to disk for quick recovery 1223 * on mount. This will return 0 if it was successful in writing the cache out, 1224 * or an errno if it was not. 1225 */ 1226 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1227 struct btrfs_free_space_ctl *ctl, 1228 struct btrfs_block_group_cache *block_group, 1229 struct btrfs_io_ctl *io_ctl, 1230 struct btrfs_trans_handle *trans) 1231 { 1232 struct btrfs_fs_info *fs_info = root->fs_info; 1233 struct extent_state *cached_state = NULL; 1234 LIST_HEAD(bitmap_list); 1235 int entries = 0; 1236 int bitmaps = 0; 1237 int ret; 1238 int must_iput = 0; 1239 1240 if (!i_size_read(inode)) 1241 return -EIO; 1242 1243 WARN_ON(io_ctl->pages); 1244 ret = io_ctl_init(io_ctl, inode, 1); 1245 if (ret) 1246 return ret; 1247 1248 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1249 down_write(&block_group->data_rwsem); 1250 spin_lock(&block_group->lock); 1251 if (block_group->delalloc_bytes) { 1252 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1253 spin_unlock(&block_group->lock); 1254 up_write(&block_group->data_rwsem); 1255 BTRFS_I(inode)->generation = 0; 1256 ret = 0; 1257 must_iput = 1; 1258 goto out; 1259 } 1260 spin_unlock(&block_group->lock); 1261 } 1262 1263 /* Lock all pages first so we can lock the extent safely. */ 1264 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1265 if (ret) 1266 goto out; 1267 1268 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1269 &cached_state); 1270 1271 io_ctl_set_generation(io_ctl, trans->transid); 1272 1273 mutex_lock(&ctl->cache_writeout_mutex); 1274 /* Write out the extent entries in the free space cache */ 1275 spin_lock(&ctl->tree_lock); 1276 ret = write_cache_extent_entries(io_ctl, ctl, 1277 block_group, &entries, &bitmaps, 1278 &bitmap_list); 1279 if (ret) 1280 goto out_nospc_locked; 1281 1282 /* 1283 * Some spaces that are freed in the current transaction are pinned, 1284 * they will be added into free space cache after the transaction is 1285 * committed, we shouldn't lose them. 1286 * 1287 * If this changes while we are working we'll get added back to 1288 * the dirty list and redo it. No locking needed 1289 */ 1290 ret = write_pinned_extent_entries(fs_info, block_group, 1291 io_ctl, &entries); 1292 if (ret) 1293 goto out_nospc_locked; 1294 1295 /* 1296 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1297 * locked while doing it because a concurrent trim can be manipulating 1298 * or freeing the bitmap. 1299 */ 1300 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1301 spin_unlock(&ctl->tree_lock); 1302 mutex_unlock(&ctl->cache_writeout_mutex); 1303 if (ret) 1304 goto out_nospc; 1305 1306 /* Zero out the rest of the pages just to make sure */ 1307 io_ctl_zero_remaining_pages(io_ctl); 1308 1309 /* Everything is written out, now we dirty the pages in the file. */ 1310 ret = btrfs_dirty_pages(inode, io_ctl->pages, io_ctl->num_pages, 0, 1311 i_size_read(inode), &cached_state); 1312 if (ret) 1313 goto out_nospc; 1314 1315 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1316 up_write(&block_group->data_rwsem); 1317 /* 1318 * Release the pages and unlock the extent, we will flush 1319 * them out later 1320 */ 1321 io_ctl_drop_pages(io_ctl); 1322 1323 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1324 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1325 1326 /* 1327 * at this point the pages are under IO and we're happy, 1328 * The caller is responsible for waiting on them and updating the 1329 * the cache and the inode 1330 */ 1331 io_ctl->entries = entries; 1332 io_ctl->bitmaps = bitmaps; 1333 1334 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1335 if (ret) 1336 goto out; 1337 1338 return 0; 1339 1340 out: 1341 io_ctl->inode = NULL; 1342 io_ctl_free(io_ctl); 1343 if (ret) { 1344 invalidate_inode_pages2(inode->i_mapping); 1345 BTRFS_I(inode)->generation = 0; 1346 } 1347 btrfs_update_inode(trans, root, inode); 1348 if (must_iput) 1349 iput(inode); 1350 return ret; 1351 1352 out_nospc_locked: 1353 cleanup_bitmap_list(&bitmap_list); 1354 spin_unlock(&ctl->tree_lock); 1355 mutex_unlock(&ctl->cache_writeout_mutex); 1356 1357 out_nospc: 1358 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1359 1360 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1361 up_write(&block_group->data_rwsem); 1362 1363 goto out; 1364 } 1365 1366 int btrfs_write_out_cache(struct btrfs_fs_info *fs_info, 1367 struct btrfs_trans_handle *trans, 1368 struct btrfs_block_group_cache *block_group, 1369 struct btrfs_path *path) 1370 { 1371 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1372 struct inode *inode; 1373 int ret = 0; 1374 1375 spin_lock(&block_group->lock); 1376 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1377 spin_unlock(&block_group->lock); 1378 return 0; 1379 } 1380 spin_unlock(&block_group->lock); 1381 1382 inode = lookup_free_space_inode(fs_info, block_group, path); 1383 if (IS_ERR(inode)) 1384 return 0; 1385 1386 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1387 block_group, &block_group->io_ctl, trans); 1388 if (ret) { 1389 #ifdef DEBUG 1390 btrfs_err(fs_info, 1391 "failed to write free space cache for block group %llu", 1392 block_group->key.objectid); 1393 #endif 1394 spin_lock(&block_group->lock); 1395 block_group->disk_cache_state = BTRFS_DC_ERROR; 1396 spin_unlock(&block_group->lock); 1397 1398 block_group->io_ctl.inode = NULL; 1399 iput(inode); 1400 } 1401 1402 /* 1403 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1404 * to wait for IO and put the inode 1405 */ 1406 1407 return ret; 1408 } 1409 1410 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1411 u64 offset) 1412 { 1413 ASSERT(offset >= bitmap_start); 1414 offset -= bitmap_start; 1415 return (unsigned long)(div_u64(offset, unit)); 1416 } 1417 1418 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1419 { 1420 return (unsigned long)(div_u64(bytes, unit)); 1421 } 1422 1423 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1424 u64 offset) 1425 { 1426 u64 bitmap_start; 1427 u64 bytes_per_bitmap; 1428 1429 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1430 bitmap_start = offset - ctl->start; 1431 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1432 bitmap_start *= bytes_per_bitmap; 1433 bitmap_start += ctl->start; 1434 1435 return bitmap_start; 1436 } 1437 1438 static int tree_insert_offset(struct rb_root *root, u64 offset, 1439 struct rb_node *node, int bitmap) 1440 { 1441 struct rb_node **p = &root->rb_node; 1442 struct rb_node *parent = NULL; 1443 struct btrfs_free_space *info; 1444 1445 while (*p) { 1446 parent = *p; 1447 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1448 1449 if (offset < info->offset) { 1450 p = &(*p)->rb_left; 1451 } else if (offset > info->offset) { 1452 p = &(*p)->rb_right; 1453 } else { 1454 /* 1455 * we could have a bitmap entry and an extent entry 1456 * share the same offset. If this is the case, we want 1457 * the extent entry to always be found first if we do a 1458 * linear search through the tree, since we want to have 1459 * the quickest allocation time, and allocating from an 1460 * extent is faster than allocating from a bitmap. So 1461 * if we're inserting a bitmap and we find an entry at 1462 * this offset, we want to go right, or after this entry 1463 * logically. If we are inserting an extent and we've 1464 * found a bitmap, we want to go left, or before 1465 * logically. 1466 */ 1467 if (bitmap) { 1468 if (info->bitmap) { 1469 WARN_ON_ONCE(1); 1470 return -EEXIST; 1471 } 1472 p = &(*p)->rb_right; 1473 } else { 1474 if (!info->bitmap) { 1475 WARN_ON_ONCE(1); 1476 return -EEXIST; 1477 } 1478 p = &(*p)->rb_left; 1479 } 1480 } 1481 } 1482 1483 rb_link_node(node, parent, p); 1484 rb_insert_color(node, root); 1485 1486 return 0; 1487 } 1488 1489 /* 1490 * searches the tree for the given offset. 1491 * 1492 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1493 * want a section that has at least bytes size and comes at or after the given 1494 * offset. 1495 */ 1496 static struct btrfs_free_space * 1497 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1498 u64 offset, int bitmap_only, int fuzzy) 1499 { 1500 struct rb_node *n = ctl->free_space_offset.rb_node; 1501 struct btrfs_free_space *entry, *prev = NULL; 1502 1503 /* find entry that is closest to the 'offset' */ 1504 while (1) { 1505 if (!n) { 1506 entry = NULL; 1507 break; 1508 } 1509 1510 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1511 prev = entry; 1512 1513 if (offset < entry->offset) 1514 n = n->rb_left; 1515 else if (offset > entry->offset) 1516 n = n->rb_right; 1517 else 1518 break; 1519 } 1520 1521 if (bitmap_only) { 1522 if (!entry) 1523 return NULL; 1524 if (entry->bitmap) 1525 return entry; 1526 1527 /* 1528 * bitmap entry and extent entry may share same offset, 1529 * in that case, bitmap entry comes after extent entry. 1530 */ 1531 n = rb_next(n); 1532 if (!n) 1533 return NULL; 1534 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1535 if (entry->offset != offset) 1536 return NULL; 1537 1538 WARN_ON(!entry->bitmap); 1539 return entry; 1540 } else if (entry) { 1541 if (entry->bitmap) { 1542 /* 1543 * if previous extent entry covers the offset, 1544 * we should return it instead of the bitmap entry 1545 */ 1546 n = rb_prev(&entry->offset_index); 1547 if (n) { 1548 prev = rb_entry(n, struct btrfs_free_space, 1549 offset_index); 1550 if (!prev->bitmap && 1551 prev->offset + prev->bytes > offset) 1552 entry = prev; 1553 } 1554 } 1555 return entry; 1556 } 1557 1558 if (!prev) 1559 return NULL; 1560 1561 /* find last entry before the 'offset' */ 1562 entry = prev; 1563 if (entry->offset > offset) { 1564 n = rb_prev(&entry->offset_index); 1565 if (n) { 1566 entry = rb_entry(n, struct btrfs_free_space, 1567 offset_index); 1568 ASSERT(entry->offset <= offset); 1569 } else { 1570 if (fuzzy) 1571 return entry; 1572 else 1573 return NULL; 1574 } 1575 } 1576 1577 if (entry->bitmap) { 1578 n = rb_prev(&entry->offset_index); 1579 if (n) { 1580 prev = rb_entry(n, struct btrfs_free_space, 1581 offset_index); 1582 if (!prev->bitmap && 1583 prev->offset + prev->bytes > offset) 1584 return prev; 1585 } 1586 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1587 return entry; 1588 } else if (entry->offset + entry->bytes > offset) 1589 return entry; 1590 1591 if (!fuzzy) 1592 return NULL; 1593 1594 while (1) { 1595 if (entry->bitmap) { 1596 if (entry->offset + BITS_PER_BITMAP * 1597 ctl->unit > offset) 1598 break; 1599 } else { 1600 if (entry->offset + entry->bytes > offset) 1601 break; 1602 } 1603 1604 n = rb_next(&entry->offset_index); 1605 if (!n) 1606 return NULL; 1607 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1608 } 1609 return entry; 1610 } 1611 1612 static inline void 1613 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1614 struct btrfs_free_space *info) 1615 { 1616 rb_erase(&info->offset_index, &ctl->free_space_offset); 1617 ctl->free_extents--; 1618 } 1619 1620 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1621 struct btrfs_free_space *info) 1622 { 1623 __unlink_free_space(ctl, info); 1624 ctl->free_space -= info->bytes; 1625 } 1626 1627 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1628 struct btrfs_free_space *info) 1629 { 1630 int ret = 0; 1631 1632 ASSERT(info->bytes || info->bitmap); 1633 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1634 &info->offset_index, (info->bitmap != NULL)); 1635 if (ret) 1636 return ret; 1637 1638 ctl->free_space += info->bytes; 1639 ctl->free_extents++; 1640 return ret; 1641 } 1642 1643 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1644 { 1645 struct btrfs_block_group_cache *block_group = ctl->private; 1646 u64 max_bytes; 1647 u64 bitmap_bytes; 1648 u64 extent_bytes; 1649 u64 size = block_group->key.offset; 1650 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1651 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1652 1653 max_bitmaps = max_t(u64, max_bitmaps, 1); 1654 1655 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1656 1657 /* 1658 * The goal is to keep the total amount of memory used per 1gb of space 1659 * at or below 32k, so we need to adjust how much memory we allow to be 1660 * used by extent based free space tracking 1661 */ 1662 if (size < SZ_1G) 1663 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1664 else 1665 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1666 1667 /* 1668 * we want to account for 1 more bitmap than what we have so we can make 1669 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1670 * we add more bitmaps. 1671 */ 1672 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1673 1674 if (bitmap_bytes >= max_bytes) { 1675 ctl->extents_thresh = 0; 1676 return; 1677 } 1678 1679 /* 1680 * we want the extent entry threshold to always be at most 1/2 the max 1681 * bytes we can have, or whatever is less than that. 1682 */ 1683 extent_bytes = max_bytes - bitmap_bytes; 1684 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1685 1686 ctl->extents_thresh = 1687 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1688 } 1689 1690 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1691 struct btrfs_free_space *info, 1692 u64 offset, u64 bytes) 1693 { 1694 unsigned long start, count; 1695 1696 start = offset_to_bit(info->offset, ctl->unit, offset); 1697 count = bytes_to_bits(bytes, ctl->unit); 1698 ASSERT(start + count <= BITS_PER_BITMAP); 1699 1700 bitmap_clear(info->bitmap, start, count); 1701 1702 info->bytes -= bytes; 1703 } 1704 1705 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1706 struct btrfs_free_space *info, u64 offset, 1707 u64 bytes) 1708 { 1709 __bitmap_clear_bits(ctl, info, offset, bytes); 1710 ctl->free_space -= bytes; 1711 } 1712 1713 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1714 struct btrfs_free_space *info, u64 offset, 1715 u64 bytes) 1716 { 1717 unsigned long start, count; 1718 1719 start = offset_to_bit(info->offset, ctl->unit, offset); 1720 count = bytes_to_bits(bytes, ctl->unit); 1721 ASSERT(start + count <= BITS_PER_BITMAP); 1722 1723 bitmap_set(info->bitmap, start, count); 1724 1725 info->bytes += bytes; 1726 ctl->free_space += bytes; 1727 } 1728 1729 /* 1730 * If we can not find suitable extent, we will use bytes to record 1731 * the size of the max extent. 1732 */ 1733 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1734 struct btrfs_free_space *bitmap_info, u64 *offset, 1735 u64 *bytes, bool for_alloc) 1736 { 1737 unsigned long found_bits = 0; 1738 unsigned long max_bits = 0; 1739 unsigned long bits, i; 1740 unsigned long next_zero; 1741 unsigned long extent_bits; 1742 1743 /* 1744 * Skip searching the bitmap if we don't have a contiguous section that 1745 * is large enough for this allocation. 1746 */ 1747 if (for_alloc && 1748 bitmap_info->max_extent_size && 1749 bitmap_info->max_extent_size < *bytes) { 1750 *bytes = bitmap_info->max_extent_size; 1751 return -1; 1752 } 1753 1754 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1755 max_t(u64, *offset, bitmap_info->offset)); 1756 bits = bytes_to_bits(*bytes, ctl->unit); 1757 1758 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1759 if (for_alloc && bits == 1) { 1760 found_bits = 1; 1761 break; 1762 } 1763 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1764 BITS_PER_BITMAP, i); 1765 extent_bits = next_zero - i; 1766 if (extent_bits >= bits) { 1767 found_bits = extent_bits; 1768 break; 1769 } else if (extent_bits > max_bits) { 1770 max_bits = extent_bits; 1771 } 1772 i = next_zero; 1773 } 1774 1775 if (found_bits) { 1776 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1777 *bytes = (u64)(found_bits) * ctl->unit; 1778 return 0; 1779 } 1780 1781 *bytes = (u64)(max_bits) * ctl->unit; 1782 bitmap_info->max_extent_size = *bytes; 1783 return -1; 1784 } 1785 1786 /* Cache the size of the max extent in bytes */ 1787 static struct btrfs_free_space * 1788 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1789 unsigned long align, u64 *max_extent_size) 1790 { 1791 struct btrfs_free_space *entry; 1792 struct rb_node *node; 1793 u64 tmp; 1794 u64 align_off; 1795 int ret; 1796 1797 if (!ctl->free_space_offset.rb_node) 1798 goto out; 1799 1800 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1801 if (!entry) 1802 goto out; 1803 1804 for (node = &entry->offset_index; node; node = rb_next(node)) { 1805 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1806 if (entry->bytes < *bytes) { 1807 if (entry->bytes > *max_extent_size) 1808 *max_extent_size = entry->bytes; 1809 continue; 1810 } 1811 1812 /* make sure the space returned is big enough 1813 * to match our requested alignment 1814 */ 1815 if (*bytes >= align) { 1816 tmp = entry->offset - ctl->start + align - 1; 1817 tmp = div64_u64(tmp, align); 1818 tmp = tmp * align + ctl->start; 1819 align_off = tmp - entry->offset; 1820 } else { 1821 align_off = 0; 1822 tmp = entry->offset; 1823 } 1824 1825 if (entry->bytes < *bytes + align_off) { 1826 if (entry->bytes > *max_extent_size) 1827 *max_extent_size = entry->bytes; 1828 continue; 1829 } 1830 1831 if (entry->bitmap) { 1832 u64 size = *bytes; 1833 1834 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1835 if (!ret) { 1836 *offset = tmp; 1837 *bytes = size; 1838 return entry; 1839 } else if (size > *max_extent_size) { 1840 *max_extent_size = size; 1841 } 1842 continue; 1843 } 1844 1845 *offset = tmp; 1846 *bytes = entry->bytes - align_off; 1847 return entry; 1848 } 1849 out: 1850 return NULL; 1851 } 1852 1853 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1854 struct btrfs_free_space *info, u64 offset) 1855 { 1856 info->offset = offset_to_bitmap(ctl, offset); 1857 info->bytes = 0; 1858 INIT_LIST_HEAD(&info->list); 1859 link_free_space(ctl, info); 1860 ctl->total_bitmaps++; 1861 1862 ctl->op->recalc_thresholds(ctl); 1863 } 1864 1865 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1866 struct btrfs_free_space *bitmap_info) 1867 { 1868 unlink_free_space(ctl, bitmap_info); 1869 kfree(bitmap_info->bitmap); 1870 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1871 ctl->total_bitmaps--; 1872 ctl->op->recalc_thresholds(ctl); 1873 } 1874 1875 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1876 struct btrfs_free_space *bitmap_info, 1877 u64 *offset, u64 *bytes) 1878 { 1879 u64 end; 1880 u64 search_start, search_bytes; 1881 int ret; 1882 1883 again: 1884 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1885 1886 /* 1887 * We need to search for bits in this bitmap. We could only cover some 1888 * of the extent in this bitmap thanks to how we add space, so we need 1889 * to search for as much as it as we can and clear that amount, and then 1890 * go searching for the next bit. 1891 */ 1892 search_start = *offset; 1893 search_bytes = ctl->unit; 1894 search_bytes = min(search_bytes, end - search_start + 1); 1895 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1896 false); 1897 if (ret < 0 || search_start != *offset) 1898 return -EINVAL; 1899 1900 /* We may have found more bits than what we need */ 1901 search_bytes = min(search_bytes, *bytes); 1902 1903 /* Cannot clear past the end of the bitmap */ 1904 search_bytes = min(search_bytes, end - search_start + 1); 1905 1906 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1907 *offset += search_bytes; 1908 *bytes -= search_bytes; 1909 1910 if (*bytes) { 1911 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1912 if (!bitmap_info->bytes) 1913 free_bitmap(ctl, bitmap_info); 1914 1915 /* 1916 * no entry after this bitmap, but we still have bytes to 1917 * remove, so something has gone wrong. 1918 */ 1919 if (!next) 1920 return -EINVAL; 1921 1922 bitmap_info = rb_entry(next, struct btrfs_free_space, 1923 offset_index); 1924 1925 /* 1926 * if the next entry isn't a bitmap we need to return to let the 1927 * extent stuff do its work. 1928 */ 1929 if (!bitmap_info->bitmap) 1930 return -EAGAIN; 1931 1932 /* 1933 * Ok the next item is a bitmap, but it may not actually hold 1934 * the information for the rest of this free space stuff, so 1935 * look for it, and if we don't find it return so we can try 1936 * everything over again. 1937 */ 1938 search_start = *offset; 1939 search_bytes = ctl->unit; 1940 ret = search_bitmap(ctl, bitmap_info, &search_start, 1941 &search_bytes, false); 1942 if (ret < 0 || search_start != *offset) 1943 return -EAGAIN; 1944 1945 goto again; 1946 } else if (!bitmap_info->bytes) 1947 free_bitmap(ctl, bitmap_info); 1948 1949 return 0; 1950 } 1951 1952 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1953 struct btrfs_free_space *info, u64 offset, 1954 u64 bytes) 1955 { 1956 u64 bytes_to_set = 0; 1957 u64 end; 1958 1959 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1960 1961 bytes_to_set = min(end - offset, bytes); 1962 1963 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1964 1965 /* 1966 * We set some bytes, we have no idea what the max extent size is 1967 * anymore. 1968 */ 1969 info->max_extent_size = 0; 1970 1971 return bytes_to_set; 1972 1973 } 1974 1975 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1976 struct btrfs_free_space *info) 1977 { 1978 struct btrfs_block_group_cache *block_group = ctl->private; 1979 struct btrfs_fs_info *fs_info = block_group->fs_info; 1980 bool forced = false; 1981 1982 #ifdef CONFIG_BTRFS_DEBUG 1983 if (btrfs_should_fragment_free_space(block_group)) 1984 forced = true; 1985 #endif 1986 1987 /* 1988 * If we are below the extents threshold then we can add this as an 1989 * extent, and don't have to deal with the bitmap 1990 */ 1991 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1992 /* 1993 * If this block group has some small extents we don't want to 1994 * use up all of our free slots in the cache with them, we want 1995 * to reserve them to larger extents, however if we have plenty 1996 * of cache left then go ahead an dadd them, no sense in adding 1997 * the overhead of a bitmap if we don't have to. 1998 */ 1999 if (info->bytes <= fs_info->sectorsize * 4) { 2000 if (ctl->free_extents * 2 <= ctl->extents_thresh) 2001 return false; 2002 } else { 2003 return false; 2004 } 2005 } 2006 2007 /* 2008 * The original block groups from mkfs can be really small, like 8 2009 * megabytes, so don't bother with a bitmap for those entries. However 2010 * some block groups can be smaller than what a bitmap would cover but 2011 * are still large enough that they could overflow the 32k memory limit, 2012 * so allow those block groups to still be allowed to have a bitmap 2013 * entry. 2014 */ 2015 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2016 return false; 2017 2018 return true; 2019 } 2020 2021 static const struct btrfs_free_space_op free_space_op = { 2022 .recalc_thresholds = recalculate_thresholds, 2023 .use_bitmap = use_bitmap, 2024 }; 2025 2026 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2027 struct btrfs_free_space *info) 2028 { 2029 struct btrfs_free_space *bitmap_info; 2030 struct btrfs_block_group_cache *block_group = NULL; 2031 int added = 0; 2032 u64 bytes, offset, bytes_added; 2033 int ret; 2034 2035 bytes = info->bytes; 2036 offset = info->offset; 2037 2038 if (!ctl->op->use_bitmap(ctl, info)) 2039 return 0; 2040 2041 if (ctl->op == &free_space_op) 2042 block_group = ctl->private; 2043 again: 2044 /* 2045 * Since we link bitmaps right into the cluster we need to see if we 2046 * have a cluster here, and if so and it has our bitmap we need to add 2047 * the free space to that bitmap. 2048 */ 2049 if (block_group && !list_empty(&block_group->cluster_list)) { 2050 struct btrfs_free_cluster *cluster; 2051 struct rb_node *node; 2052 struct btrfs_free_space *entry; 2053 2054 cluster = list_entry(block_group->cluster_list.next, 2055 struct btrfs_free_cluster, 2056 block_group_list); 2057 spin_lock(&cluster->lock); 2058 node = rb_first(&cluster->root); 2059 if (!node) { 2060 spin_unlock(&cluster->lock); 2061 goto no_cluster_bitmap; 2062 } 2063 2064 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2065 if (!entry->bitmap) { 2066 spin_unlock(&cluster->lock); 2067 goto no_cluster_bitmap; 2068 } 2069 2070 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2071 bytes_added = add_bytes_to_bitmap(ctl, entry, 2072 offset, bytes); 2073 bytes -= bytes_added; 2074 offset += bytes_added; 2075 } 2076 spin_unlock(&cluster->lock); 2077 if (!bytes) { 2078 ret = 1; 2079 goto out; 2080 } 2081 } 2082 2083 no_cluster_bitmap: 2084 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2085 1, 0); 2086 if (!bitmap_info) { 2087 ASSERT(added == 0); 2088 goto new_bitmap; 2089 } 2090 2091 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2092 bytes -= bytes_added; 2093 offset += bytes_added; 2094 added = 0; 2095 2096 if (!bytes) { 2097 ret = 1; 2098 goto out; 2099 } else 2100 goto again; 2101 2102 new_bitmap: 2103 if (info && info->bitmap) { 2104 add_new_bitmap(ctl, info, offset); 2105 added = 1; 2106 info = NULL; 2107 goto again; 2108 } else { 2109 spin_unlock(&ctl->tree_lock); 2110 2111 /* no pre-allocated info, allocate a new one */ 2112 if (!info) { 2113 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2114 GFP_NOFS); 2115 if (!info) { 2116 spin_lock(&ctl->tree_lock); 2117 ret = -ENOMEM; 2118 goto out; 2119 } 2120 } 2121 2122 /* allocate the bitmap */ 2123 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2124 spin_lock(&ctl->tree_lock); 2125 if (!info->bitmap) { 2126 ret = -ENOMEM; 2127 goto out; 2128 } 2129 goto again; 2130 } 2131 2132 out: 2133 if (info) { 2134 if (info->bitmap) 2135 kfree(info->bitmap); 2136 kmem_cache_free(btrfs_free_space_cachep, info); 2137 } 2138 2139 return ret; 2140 } 2141 2142 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2143 struct btrfs_free_space *info, bool update_stat) 2144 { 2145 struct btrfs_free_space *left_info; 2146 struct btrfs_free_space *right_info; 2147 bool merged = false; 2148 u64 offset = info->offset; 2149 u64 bytes = info->bytes; 2150 2151 /* 2152 * first we want to see if there is free space adjacent to the range we 2153 * are adding, if there is remove that struct and add a new one to 2154 * cover the entire range 2155 */ 2156 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2157 if (right_info && rb_prev(&right_info->offset_index)) 2158 left_info = rb_entry(rb_prev(&right_info->offset_index), 2159 struct btrfs_free_space, offset_index); 2160 else 2161 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2162 2163 if (right_info && !right_info->bitmap) { 2164 if (update_stat) 2165 unlink_free_space(ctl, right_info); 2166 else 2167 __unlink_free_space(ctl, right_info); 2168 info->bytes += right_info->bytes; 2169 kmem_cache_free(btrfs_free_space_cachep, right_info); 2170 merged = true; 2171 } 2172 2173 if (left_info && !left_info->bitmap && 2174 left_info->offset + left_info->bytes == offset) { 2175 if (update_stat) 2176 unlink_free_space(ctl, left_info); 2177 else 2178 __unlink_free_space(ctl, left_info); 2179 info->offset = left_info->offset; 2180 info->bytes += left_info->bytes; 2181 kmem_cache_free(btrfs_free_space_cachep, left_info); 2182 merged = true; 2183 } 2184 2185 return merged; 2186 } 2187 2188 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2189 struct btrfs_free_space *info, 2190 bool update_stat) 2191 { 2192 struct btrfs_free_space *bitmap; 2193 unsigned long i; 2194 unsigned long j; 2195 const u64 end = info->offset + info->bytes; 2196 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2197 u64 bytes; 2198 2199 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2200 if (!bitmap) 2201 return false; 2202 2203 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2204 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2205 if (j == i) 2206 return false; 2207 bytes = (j - i) * ctl->unit; 2208 info->bytes += bytes; 2209 2210 if (update_stat) 2211 bitmap_clear_bits(ctl, bitmap, end, bytes); 2212 else 2213 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2214 2215 if (!bitmap->bytes) 2216 free_bitmap(ctl, bitmap); 2217 2218 return true; 2219 } 2220 2221 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2222 struct btrfs_free_space *info, 2223 bool update_stat) 2224 { 2225 struct btrfs_free_space *bitmap; 2226 u64 bitmap_offset; 2227 unsigned long i; 2228 unsigned long j; 2229 unsigned long prev_j; 2230 u64 bytes; 2231 2232 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2233 /* If we're on a boundary, try the previous logical bitmap. */ 2234 if (bitmap_offset == info->offset) { 2235 if (info->offset == 0) 2236 return false; 2237 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2238 } 2239 2240 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2241 if (!bitmap) 2242 return false; 2243 2244 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2245 j = 0; 2246 prev_j = (unsigned long)-1; 2247 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2248 if (j > i) 2249 break; 2250 prev_j = j; 2251 } 2252 if (prev_j == i) 2253 return false; 2254 2255 if (prev_j == (unsigned long)-1) 2256 bytes = (i + 1) * ctl->unit; 2257 else 2258 bytes = (i - prev_j) * ctl->unit; 2259 2260 info->offset -= bytes; 2261 info->bytes += bytes; 2262 2263 if (update_stat) 2264 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2265 else 2266 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2267 2268 if (!bitmap->bytes) 2269 free_bitmap(ctl, bitmap); 2270 2271 return true; 2272 } 2273 2274 /* 2275 * We prefer always to allocate from extent entries, both for clustered and 2276 * non-clustered allocation requests. So when attempting to add a new extent 2277 * entry, try to see if there's adjacent free space in bitmap entries, and if 2278 * there is, migrate that space from the bitmaps to the extent. 2279 * Like this we get better chances of satisfying space allocation requests 2280 * because we attempt to satisfy them based on a single cache entry, and never 2281 * on 2 or more entries - even if the entries represent a contiguous free space 2282 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2283 * ends). 2284 */ 2285 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2286 struct btrfs_free_space *info, 2287 bool update_stat) 2288 { 2289 /* 2290 * Only work with disconnected entries, as we can change their offset, 2291 * and must be extent entries. 2292 */ 2293 ASSERT(!info->bitmap); 2294 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2295 2296 if (ctl->total_bitmaps > 0) { 2297 bool stole_end; 2298 bool stole_front = false; 2299 2300 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2301 if (ctl->total_bitmaps > 0) 2302 stole_front = steal_from_bitmap_to_front(ctl, info, 2303 update_stat); 2304 2305 if (stole_end || stole_front) 2306 try_merge_free_space(ctl, info, update_stat); 2307 } 2308 } 2309 2310 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2311 struct btrfs_free_space_ctl *ctl, 2312 u64 offset, u64 bytes) 2313 { 2314 struct btrfs_free_space *info; 2315 int ret = 0; 2316 2317 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2318 if (!info) 2319 return -ENOMEM; 2320 2321 info->offset = offset; 2322 info->bytes = bytes; 2323 RB_CLEAR_NODE(&info->offset_index); 2324 2325 spin_lock(&ctl->tree_lock); 2326 2327 if (try_merge_free_space(ctl, info, true)) 2328 goto link; 2329 2330 /* 2331 * There was no extent directly to the left or right of this new 2332 * extent then we know we're going to have to allocate a new extent, so 2333 * before we do that see if we need to drop this into a bitmap 2334 */ 2335 ret = insert_into_bitmap(ctl, info); 2336 if (ret < 0) { 2337 goto out; 2338 } else if (ret) { 2339 ret = 0; 2340 goto out; 2341 } 2342 link: 2343 /* 2344 * Only steal free space from adjacent bitmaps if we're sure we're not 2345 * going to add the new free space to existing bitmap entries - because 2346 * that would mean unnecessary work that would be reverted. Therefore 2347 * attempt to steal space from bitmaps if we're adding an extent entry. 2348 */ 2349 steal_from_bitmap(ctl, info, true); 2350 2351 ret = link_free_space(ctl, info); 2352 if (ret) 2353 kmem_cache_free(btrfs_free_space_cachep, info); 2354 out: 2355 spin_unlock(&ctl->tree_lock); 2356 2357 if (ret) { 2358 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2359 ASSERT(ret != -EEXIST); 2360 } 2361 2362 return ret; 2363 } 2364 2365 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2366 u64 offset, u64 bytes) 2367 { 2368 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2369 struct btrfs_free_space *info; 2370 int ret; 2371 bool re_search = false; 2372 2373 spin_lock(&ctl->tree_lock); 2374 2375 again: 2376 ret = 0; 2377 if (!bytes) 2378 goto out_lock; 2379 2380 info = tree_search_offset(ctl, offset, 0, 0); 2381 if (!info) { 2382 /* 2383 * oops didn't find an extent that matched the space we wanted 2384 * to remove, look for a bitmap instead 2385 */ 2386 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2387 1, 0); 2388 if (!info) { 2389 /* 2390 * If we found a partial bit of our free space in a 2391 * bitmap but then couldn't find the other part this may 2392 * be a problem, so WARN about it. 2393 */ 2394 WARN_ON(re_search); 2395 goto out_lock; 2396 } 2397 } 2398 2399 re_search = false; 2400 if (!info->bitmap) { 2401 unlink_free_space(ctl, info); 2402 if (offset == info->offset) { 2403 u64 to_free = min(bytes, info->bytes); 2404 2405 info->bytes -= to_free; 2406 info->offset += to_free; 2407 if (info->bytes) { 2408 ret = link_free_space(ctl, info); 2409 WARN_ON(ret); 2410 } else { 2411 kmem_cache_free(btrfs_free_space_cachep, info); 2412 } 2413 2414 offset += to_free; 2415 bytes -= to_free; 2416 goto again; 2417 } else { 2418 u64 old_end = info->bytes + info->offset; 2419 2420 info->bytes = offset - info->offset; 2421 ret = link_free_space(ctl, info); 2422 WARN_ON(ret); 2423 if (ret) 2424 goto out_lock; 2425 2426 /* Not enough bytes in this entry to satisfy us */ 2427 if (old_end < offset + bytes) { 2428 bytes -= old_end - offset; 2429 offset = old_end; 2430 goto again; 2431 } else if (old_end == offset + bytes) { 2432 /* all done */ 2433 goto out_lock; 2434 } 2435 spin_unlock(&ctl->tree_lock); 2436 2437 ret = btrfs_add_free_space(block_group, offset + bytes, 2438 old_end - (offset + bytes)); 2439 WARN_ON(ret); 2440 goto out; 2441 } 2442 } 2443 2444 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2445 if (ret == -EAGAIN) { 2446 re_search = true; 2447 goto again; 2448 } 2449 out_lock: 2450 spin_unlock(&ctl->tree_lock); 2451 out: 2452 return ret; 2453 } 2454 2455 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2456 u64 bytes) 2457 { 2458 struct btrfs_fs_info *fs_info = block_group->fs_info; 2459 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2460 struct btrfs_free_space *info; 2461 struct rb_node *n; 2462 int count = 0; 2463 2464 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2465 info = rb_entry(n, struct btrfs_free_space, offset_index); 2466 if (info->bytes >= bytes && !block_group->ro) 2467 count++; 2468 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2469 info->offset, info->bytes, 2470 (info->bitmap) ? "yes" : "no"); 2471 } 2472 btrfs_info(fs_info, "block group has cluster?: %s", 2473 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2474 btrfs_info(fs_info, 2475 "%d blocks of free space at or bigger than bytes is", count); 2476 } 2477 2478 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2479 { 2480 struct btrfs_fs_info *fs_info = block_group->fs_info; 2481 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2482 2483 spin_lock_init(&ctl->tree_lock); 2484 ctl->unit = fs_info->sectorsize; 2485 ctl->start = block_group->key.objectid; 2486 ctl->private = block_group; 2487 ctl->op = &free_space_op; 2488 INIT_LIST_HEAD(&ctl->trimming_ranges); 2489 mutex_init(&ctl->cache_writeout_mutex); 2490 2491 /* 2492 * we only want to have 32k of ram per block group for keeping 2493 * track of free space, and if we pass 1/2 of that we want to 2494 * start converting things over to using bitmaps 2495 */ 2496 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2497 } 2498 2499 /* 2500 * for a given cluster, put all of its extents back into the free 2501 * space cache. If the block group passed doesn't match the block group 2502 * pointed to by the cluster, someone else raced in and freed the 2503 * cluster already. In that case, we just return without changing anything 2504 */ 2505 static int 2506 __btrfs_return_cluster_to_free_space( 2507 struct btrfs_block_group_cache *block_group, 2508 struct btrfs_free_cluster *cluster) 2509 { 2510 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2511 struct btrfs_free_space *entry; 2512 struct rb_node *node; 2513 2514 spin_lock(&cluster->lock); 2515 if (cluster->block_group != block_group) 2516 goto out; 2517 2518 cluster->block_group = NULL; 2519 cluster->window_start = 0; 2520 list_del_init(&cluster->block_group_list); 2521 2522 node = rb_first(&cluster->root); 2523 while (node) { 2524 bool bitmap; 2525 2526 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2527 node = rb_next(&entry->offset_index); 2528 rb_erase(&entry->offset_index, &cluster->root); 2529 RB_CLEAR_NODE(&entry->offset_index); 2530 2531 bitmap = (entry->bitmap != NULL); 2532 if (!bitmap) { 2533 try_merge_free_space(ctl, entry, false); 2534 steal_from_bitmap(ctl, entry, false); 2535 } 2536 tree_insert_offset(&ctl->free_space_offset, 2537 entry->offset, &entry->offset_index, bitmap); 2538 } 2539 cluster->root = RB_ROOT; 2540 2541 out: 2542 spin_unlock(&cluster->lock); 2543 btrfs_put_block_group(block_group); 2544 return 0; 2545 } 2546 2547 static void __btrfs_remove_free_space_cache_locked( 2548 struct btrfs_free_space_ctl *ctl) 2549 { 2550 struct btrfs_free_space *info; 2551 struct rb_node *node; 2552 2553 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2554 info = rb_entry(node, struct btrfs_free_space, offset_index); 2555 if (!info->bitmap) { 2556 unlink_free_space(ctl, info); 2557 kmem_cache_free(btrfs_free_space_cachep, info); 2558 } else { 2559 free_bitmap(ctl, info); 2560 } 2561 2562 cond_resched_lock(&ctl->tree_lock); 2563 } 2564 } 2565 2566 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2567 { 2568 spin_lock(&ctl->tree_lock); 2569 __btrfs_remove_free_space_cache_locked(ctl); 2570 spin_unlock(&ctl->tree_lock); 2571 } 2572 2573 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2574 { 2575 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2576 struct btrfs_free_cluster *cluster; 2577 struct list_head *head; 2578 2579 spin_lock(&ctl->tree_lock); 2580 while ((head = block_group->cluster_list.next) != 2581 &block_group->cluster_list) { 2582 cluster = list_entry(head, struct btrfs_free_cluster, 2583 block_group_list); 2584 2585 WARN_ON(cluster->block_group != block_group); 2586 __btrfs_return_cluster_to_free_space(block_group, cluster); 2587 2588 cond_resched_lock(&ctl->tree_lock); 2589 } 2590 __btrfs_remove_free_space_cache_locked(ctl); 2591 spin_unlock(&ctl->tree_lock); 2592 2593 } 2594 2595 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2596 u64 offset, u64 bytes, u64 empty_size, 2597 u64 *max_extent_size) 2598 { 2599 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2600 struct btrfs_free_space *entry = NULL; 2601 u64 bytes_search = bytes + empty_size; 2602 u64 ret = 0; 2603 u64 align_gap = 0; 2604 u64 align_gap_len = 0; 2605 2606 spin_lock(&ctl->tree_lock); 2607 entry = find_free_space(ctl, &offset, &bytes_search, 2608 block_group->full_stripe_len, max_extent_size); 2609 if (!entry) 2610 goto out; 2611 2612 ret = offset; 2613 if (entry->bitmap) { 2614 bitmap_clear_bits(ctl, entry, offset, bytes); 2615 if (!entry->bytes) 2616 free_bitmap(ctl, entry); 2617 } else { 2618 unlink_free_space(ctl, entry); 2619 align_gap_len = offset - entry->offset; 2620 align_gap = entry->offset; 2621 2622 entry->offset = offset + bytes; 2623 WARN_ON(entry->bytes < bytes + align_gap_len); 2624 2625 entry->bytes -= bytes + align_gap_len; 2626 if (!entry->bytes) 2627 kmem_cache_free(btrfs_free_space_cachep, entry); 2628 else 2629 link_free_space(ctl, entry); 2630 } 2631 out: 2632 spin_unlock(&ctl->tree_lock); 2633 2634 if (align_gap_len) 2635 __btrfs_add_free_space(block_group->fs_info, ctl, 2636 align_gap, align_gap_len); 2637 return ret; 2638 } 2639 2640 /* 2641 * given a cluster, put all of its extents back into the free space 2642 * cache. If a block group is passed, this function will only free 2643 * a cluster that belongs to the passed block group. 2644 * 2645 * Otherwise, it'll get a reference on the block group pointed to by the 2646 * cluster and remove the cluster from it. 2647 */ 2648 int btrfs_return_cluster_to_free_space( 2649 struct btrfs_block_group_cache *block_group, 2650 struct btrfs_free_cluster *cluster) 2651 { 2652 struct btrfs_free_space_ctl *ctl; 2653 int ret; 2654 2655 /* first, get a safe pointer to the block group */ 2656 spin_lock(&cluster->lock); 2657 if (!block_group) { 2658 block_group = cluster->block_group; 2659 if (!block_group) { 2660 spin_unlock(&cluster->lock); 2661 return 0; 2662 } 2663 } else if (cluster->block_group != block_group) { 2664 /* someone else has already freed it don't redo their work */ 2665 spin_unlock(&cluster->lock); 2666 return 0; 2667 } 2668 atomic_inc(&block_group->count); 2669 spin_unlock(&cluster->lock); 2670 2671 ctl = block_group->free_space_ctl; 2672 2673 /* now return any extents the cluster had on it */ 2674 spin_lock(&ctl->tree_lock); 2675 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2676 spin_unlock(&ctl->tree_lock); 2677 2678 /* finally drop our ref */ 2679 btrfs_put_block_group(block_group); 2680 return ret; 2681 } 2682 2683 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2684 struct btrfs_free_cluster *cluster, 2685 struct btrfs_free_space *entry, 2686 u64 bytes, u64 min_start, 2687 u64 *max_extent_size) 2688 { 2689 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2690 int err; 2691 u64 search_start = cluster->window_start; 2692 u64 search_bytes = bytes; 2693 u64 ret = 0; 2694 2695 search_start = min_start; 2696 search_bytes = bytes; 2697 2698 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2699 if (err) { 2700 if (search_bytes > *max_extent_size) 2701 *max_extent_size = search_bytes; 2702 return 0; 2703 } 2704 2705 ret = search_start; 2706 __bitmap_clear_bits(ctl, entry, ret, bytes); 2707 2708 return ret; 2709 } 2710 2711 /* 2712 * given a cluster, try to allocate 'bytes' from it, returns 0 2713 * if it couldn't find anything suitably large, or a logical disk offset 2714 * if things worked out 2715 */ 2716 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2717 struct btrfs_free_cluster *cluster, u64 bytes, 2718 u64 min_start, u64 *max_extent_size) 2719 { 2720 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2721 struct btrfs_free_space *entry = NULL; 2722 struct rb_node *node; 2723 u64 ret = 0; 2724 2725 spin_lock(&cluster->lock); 2726 if (bytes > cluster->max_size) 2727 goto out; 2728 2729 if (cluster->block_group != block_group) 2730 goto out; 2731 2732 node = rb_first(&cluster->root); 2733 if (!node) 2734 goto out; 2735 2736 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2737 while (1) { 2738 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2739 *max_extent_size = entry->bytes; 2740 2741 if (entry->bytes < bytes || 2742 (!entry->bitmap && entry->offset < min_start)) { 2743 node = rb_next(&entry->offset_index); 2744 if (!node) 2745 break; 2746 entry = rb_entry(node, struct btrfs_free_space, 2747 offset_index); 2748 continue; 2749 } 2750 2751 if (entry->bitmap) { 2752 ret = btrfs_alloc_from_bitmap(block_group, 2753 cluster, entry, bytes, 2754 cluster->window_start, 2755 max_extent_size); 2756 if (ret == 0) { 2757 node = rb_next(&entry->offset_index); 2758 if (!node) 2759 break; 2760 entry = rb_entry(node, struct btrfs_free_space, 2761 offset_index); 2762 continue; 2763 } 2764 cluster->window_start += bytes; 2765 } else { 2766 ret = entry->offset; 2767 2768 entry->offset += bytes; 2769 entry->bytes -= bytes; 2770 } 2771 2772 if (entry->bytes == 0) 2773 rb_erase(&entry->offset_index, &cluster->root); 2774 break; 2775 } 2776 out: 2777 spin_unlock(&cluster->lock); 2778 2779 if (!ret) 2780 return 0; 2781 2782 spin_lock(&ctl->tree_lock); 2783 2784 ctl->free_space -= bytes; 2785 if (entry->bytes == 0) { 2786 ctl->free_extents--; 2787 if (entry->bitmap) { 2788 kfree(entry->bitmap); 2789 ctl->total_bitmaps--; 2790 ctl->op->recalc_thresholds(ctl); 2791 } 2792 kmem_cache_free(btrfs_free_space_cachep, entry); 2793 } 2794 2795 spin_unlock(&ctl->tree_lock); 2796 2797 return ret; 2798 } 2799 2800 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2801 struct btrfs_free_space *entry, 2802 struct btrfs_free_cluster *cluster, 2803 u64 offset, u64 bytes, 2804 u64 cont1_bytes, u64 min_bytes) 2805 { 2806 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2807 unsigned long next_zero; 2808 unsigned long i; 2809 unsigned long want_bits; 2810 unsigned long min_bits; 2811 unsigned long found_bits; 2812 unsigned long max_bits = 0; 2813 unsigned long start = 0; 2814 unsigned long total_found = 0; 2815 int ret; 2816 2817 i = offset_to_bit(entry->offset, ctl->unit, 2818 max_t(u64, offset, entry->offset)); 2819 want_bits = bytes_to_bits(bytes, ctl->unit); 2820 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2821 2822 /* 2823 * Don't bother looking for a cluster in this bitmap if it's heavily 2824 * fragmented. 2825 */ 2826 if (entry->max_extent_size && 2827 entry->max_extent_size < cont1_bytes) 2828 return -ENOSPC; 2829 again: 2830 found_bits = 0; 2831 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2832 next_zero = find_next_zero_bit(entry->bitmap, 2833 BITS_PER_BITMAP, i); 2834 if (next_zero - i >= min_bits) { 2835 found_bits = next_zero - i; 2836 if (found_bits > max_bits) 2837 max_bits = found_bits; 2838 break; 2839 } 2840 if (next_zero - i > max_bits) 2841 max_bits = next_zero - i; 2842 i = next_zero; 2843 } 2844 2845 if (!found_bits) { 2846 entry->max_extent_size = (u64)max_bits * ctl->unit; 2847 return -ENOSPC; 2848 } 2849 2850 if (!total_found) { 2851 start = i; 2852 cluster->max_size = 0; 2853 } 2854 2855 total_found += found_bits; 2856 2857 if (cluster->max_size < found_bits * ctl->unit) 2858 cluster->max_size = found_bits * ctl->unit; 2859 2860 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2861 i = next_zero + 1; 2862 goto again; 2863 } 2864 2865 cluster->window_start = start * ctl->unit + entry->offset; 2866 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2867 ret = tree_insert_offset(&cluster->root, entry->offset, 2868 &entry->offset_index, 1); 2869 ASSERT(!ret); /* -EEXIST; Logic error */ 2870 2871 trace_btrfs_setup_cluster(block_group, cluster, 2872 total_found * ctl->unit, 1); 2873 return 0; 2874 } 2875 2876 /* 2877 * This searches the block group for just extents to fill the cluster with. 2878 * Try to find a cluster with at least bytes total bytes, at least one 2879 * extent of cont1_bytes, and other clusters of at least min_bytes. 2880 */ 2881 static noinline int 2882 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2883 struct btrfs_free_cluster *cluster, 2884 struct list_head *bitmaps, u64 offset, u64 bytes, 2885 u64 cont1_bytes, u64 min_bytes) 2886 { 2887 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2888 struct btrfs_free_space *first = NULL; 2889 struct btrfs_free_space *entry = NULL; 2890 struct btrfs_free_space *last; 2891 struct rb_node *node; 2892 u64 window_free; 2893 u64 max_extent; 2894 u64 total_size = 0; 2895 2896 entry = tree_search_offset(ctl, offset, 0, 1); 2897 if (!entry) 2898 return -ENOSPC; 2899 2900 /* 2901 * We don't want bitmaps, so just move along until we find a normal 2902 * extent entry. 2903 */ 2904 while (entry->bitmap || entry->bytes < min_bytes) { 2905 if (entry->bitmap && list_empty(&entry->list)) 2906 list_add_tail(&entry->list, bitmaps); 2907 node = rb_next(&entry->offset_index); 2908 if (!node) 2909 return -ENOSPC; 2910 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2911 } 2912 2913 window_free = entry->bytes; 2914 max_extent = entry->bytes; 2915 first = entry; 2916 last = entry; 2917 2918 for (node = rb_next(&entry->offset_index); node; 2919 node = rb_next(&entry->offset_index)) { 2920 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2921 2922 if (entry->bitmap) { 2923 if (list_empty(&entry->list)) 2924 list_add_tail(&entry->list, bitmaps); 2925 continue; 2926 } 2927 2928 if (entry->bytes < min_bytes) 2929 continue; 2930 2931 last = entry; 2932 window_free += entry->bytes; 2933 if (entry->bytes > max_extent) 2934 max_extent = entry->bytes; 2935 } 2936 2937 if (window_free < bytes || max_extent < cont1_bytes) 2938 return -ENOSPC; 2939 2940 cluster->window_start = first->offset; 2941 2942 node = &first->offset_index; 2943 2944 /* 2945 * now we've found our entries, pull them out of the free space 2946 * cache and put them into the cluster rbtree 2947 */ 2948 do { 2949 int ret; 2950 2951 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2952 node = rb_next(&entry->offset_index); 2953 if (entry->bitmap || entry->bytes < min_bytes) 2954 continue; 2955 2956 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2957 ret = tree_insert_offset(&cluster->root, entry->offset, 2958 &entry->offset_index, 0); 2959 total_size += entry->bytes; 2960 ASSERT(!ret); /* -EEXIST; Logic error */ 2961 } while (node && entry != last); 2962 2963 cluster->max_size = max_extent; 2964 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2965 return 0; 2966 } 2967 2968 /* 2969 * This specifically looks for bitmaps that may work in the cluster, we assume 2970 * that we have already failed to find extents that will work. 2971 */ 2972 static noinline int 2973 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2974 struct btrfs_free_cluster *cluster, 2975 struct list_head *bitmaps, u64 offset, u64 bytes, 2976 u64 cont1_bytes, u64 min_bytes) 2977 { 2978 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2979 struct btrfs_free_space *entry = NULL; 2980 int ret = -ENOSPC; 2981 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2982 2983 if (ctl->total_bitmaps == 0) 2984 return -ENOSPC; 2985 2986 /* 2987 * The bitmap that covers offset won't be in the list unless offset 2988 * is just its start offset. 2989 */ 2990 if (!list_empty(bitmaps)) 2991 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2992 2993 if (!entry || entry->offset != bitmap_offset) { 2994 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2995 if (entry && list_empty(&entry->list)) 2996 list_add(&entry->list, bitmaps); 2997 } 2998 2999 list_for_each_entry(entry, bitmaps, list) { 3000 if (entry->bytes < bytes) 3001 continue; 3002 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3003 bytes, cont1_bytes, min_bytes); 3004 if (!ret) 3005 return 0; 3006 } 3007 3008 /* 3009 * The bitmaps list has all the bitmaps that record free space 3010 * starting after offset, so no more search is required. 3011 */ 3012 return -ENOSPC; 3013 } 3014 3015 /* 3016 * here we try to find a cluster of blocks in a block group. The goal 3017 * is to find at least bytes+empty_size. 3018 * We might not find them all in one contiguous area. 3019 * 3020 * returns zero and sets up cluster if things worked out, otherwise 3021 * it returns -enospc 3022 */ 3023 int btrfs_find_space_cluster(struct btrfs_fs_info *fs_info, 3024 struct btrfs_block_group_cache *block_group, 3025 struct btrfs_free_cluster *cluster, 3026 u64 offset, u64 bytes, u64 empty_size) 3027 { 3028 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3029 struct btrfs_free_space *entry, *tmp; 3030 LIST_HEAD(bitmaps); 3031 u64 min_bytes; 3032 u64 cont1_bytes; 3033 int ret; 3034 3035 /* 3036 * Choose the minimum extent size we'll require for this 3037 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3038 * For metadata, allow allocates with smaller extents. For 3039 * data, keep it dense. 3040 */ 3041 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3042 cont1_bytes = min_bytes = bytes + empty_size; 3043 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3044 cont1_bytes = bytes; 3045 min_bytes = fs_info->sectorsize; 3046 } else { 3047 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3048 min_bytes = fs_info->sectorsize; 3049 } 3050 3051 spin_lock(&ctl->tree_lock); 3052 3053 /* 3054 * If we know we don't have enough space to make a cluster don't even 3055 * bother doing all the work to try and find one. 3056 */ 3057 if (ctl->free_space < bytes) { 3058 spin_unlock(&ctl->tree_lock); 3059 return -ENOSPC; 3060 } 3061 3062 spin_lock(&cluster->lock); 3063 3064 /* someone already found a cluster, hooray */ 3065 if (cluster->block_group) { 3066 ret = 0; 3067 goto out; 3068 } 3069 3070 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3071 min_bytes); 3072 3073 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3074 bytes + empty_size, 3075 cont1_bytes, min_bytes); 3076 if (ret) 3077 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3078 offset, bytes + empty_size, 3079 cont1_bytes, min_bytes); 3080 3081 /* Clear our temporary list */ 3082 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3083 list_del_init(&entry->list); 3084 3085 if (!ret) { 3086 atomic_inc(&block_group->count); 3087 list_add_tail(&cluster->block_group_list, 3088 &block_group->cluster_list); 3089 cluster->block_group = block_group; 3090 } else { 3091 trace_btrfs_failed_cluster_setup(block_group); 3092 } 3093 out: 3094 spin_unlock(&cluster->lock); 3095 spin_unlock(&ctl->tree_lock); 3096 3097 return ret; 3098 } 3099 3100 /* 3101 * simple code to zero out a cluster 3102 */ 3103 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3104 { 3105 spin_lock_init(&cluster->lock); 3106 spin_lock_init(&cluster->refill_lock); 3107 cluster->root = RB_ROOT; 3108 cluster->max_size = 0; 3109 cluster->fragmented = false; 3110 INIT_LIST_HEAD(&cluster->block_group_list); 3111 cluster->block_group = NULL; 3112 } 3113 3114 static int do_trimming(struct btrfs_block_group_cache *block_group, 3115 u64 *total_trimmed, u64 start, u64 bytes, 3116 u64 reserved_start, u64 reserved_bytes, 3117 struct btrfs_trim_range *trim_entry) 3118 { 3119 struct btrfs_space_info *space_info = block_group->space_info; 3120 struct btrfs_fs_info *fs_info = block_group->fs_info; 3121 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3122 int ret; 3123 int update = 0; 3124 u64 trimmed = 0; 3125 3126 spin_lock(&space_info->lock); 3127 spin_lock(&block_group->lock); 3128 if (!block_group->ro) { 3129 block_group->reserved += reserved_bytes; 3130 space_info->bytes_reserved += reserved_bytes; 3131 update = 1; 3132 } 3133 spin_unlock(&block_group->lock); 3134 spin_unlock(&space_info->lock); 3135 3136 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3137 if (!ret) 3138 *total_trimmed += trimmed; 3139 3140 mutex_lock(&ctl->cache_writeout_mutex); 3141 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3142 list_del(&trim_entry->list); 3143 mutex_unlock(&ctl->cache_writeout_mutex); 3144 3145 if (update) { 3146 spin_lock(&space_info->lock); 3147 spin_lock(&block_group->lock); 3148 if (block_group->ro) 3149 space_info->bytes_readonly += reserved_bytes; 3150 block_group->reserved -= reserved_bytes; 3151 space_info->bytes_reserved -= reserved_bytes; 3152 spin_unlock(&space_info->lock); 3153 spin_unlock(&block_group->lock); 3154 } 3155 3156 return ret; 3157 } 3158 3159 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3160 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3161 { 3162 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3163 struct btrfs_free_space *entry; 3164 struct rb_node *node; 3165 int ret = 0; 3166 u64 extent_start; 3167 u64 extent_bytes; 3168 u64 bytes; 3169 3170 while (start < end) { 3171 struct btrfs_trim_range trim_entry; 3172 3173 mutex_lock(&ctl->cache_writeout_mutex); 3174 spin_lock(&ctl->tree_lock); 3175 3176 if (ctl->free_space < minlen) { 3177 spin_unlock(&ctl->tree_lock); 3178 mutex_unlock(&ctl->cache_writeout_mutex); 3179 break; 3180 } 3181 3182 entry = tree_search_offset(ctl, start, 0, 1); 3183 if (!entry) { 3184 spin_unlock(&ctl->tree_lock); 3185 mutex_unlock(&ctl->cache_writeout_mutex); 3186 break; 3187 } 3188 3189 /* skip bitmaps */ 3190 while (entry->bitmap) { 3191 node = rb_next(&entry->offset_index); 3192 if (!node) { 3193 spin_unlock(&ctl->tree_lock); 3194 mutex_unlock(&ctl->cache_writeout_mutex); 3195 goto out; 3196 } 3197 entry = rb_entry(node, struct btrfs_free_space, 3198 offset_index); 3199 } 3200 3201 if (entry->offset >= end) { 3202 spin_unlock(&ctl->tree_lock); 3203 mutex_unlock(&ctl->cache_writeout_mutex); 3204 break; 3205 } 3206 3207 extent_start = entry->offset; 3208 extent_bytes = entry->bytes; 3209 start = max(start, extent_start); 3210 bytes = min(extent_start + extent_bytes, end) - start; 3211 if (bytes < minlen) { 3212 spin_unlock(&ctl->tree_lock); 3213 mutex_unlock(&ctl->cache_writeout_mutex); 3214 goto next; 3215 } 3216 3217 unlink_free_space(ctl, entry); 3218 kmem_cache_free(btrfs_free_space_cachep, entry); 3219 3220 spin_unlock(&ctl->tree_lock); 3221 trim_entry.start = extent_start; 3222 trim_entry.bytes = extent_bytes; 3223 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3224 mutex_unlock(&ctl->cache_writeout_mutex); 3225 3226 ret = do_trimming(block_group, total_trimmed, start, bytes, 3227 extent_start, extent_bytes, &trim_entry); 3228 if (ret) 3229 break; 3230 next: 3231 start += bytes; 3232 3233 if (fatal_signal_pending(current)) { 3234 ret = -ERESTARTSYS; 3235 break; 3236 } 3237 3238 cond_resched(); 3239 } 3240 out: 3241 return ret; 3242 } 3243 3244 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3245 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3246 { 3247 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3248 struct btrfs_free_space *entry; 3249 int ret = 0; 3250 int ret2; 3251 u64 bytes; 3252 u64 offset = offset_to_bitmap(ctl, start); 3253 3254 while (offset < end) { 3255 bool next_bitmap = false; 3256 struct btrfs_trim_range trim_entry; 3257 3258 mutex_lock(&ctl->cache_writeout_mutex); 3259 spin_lock(&ctl->tree_lock); 3260 3261 if (ctl->free_space < minlen) { 3262 spin_unlock(&ctl->tree_lock); 3263 mutex_unlock(&ctl->cache_writeout_mutex); 3264 break; 3265 } 3266 3267 entry = tree_search_offset(ctl, offset, 1, 0); 3268 if (!entry) { 3269 spin_unlock(&ctl->tree_lock); 3270 mutex_unlock(&ctl->cache_writeout_mutex); 3271 next_bitmap = true; 3272 goto next; 3273 } 3274 3275 bytes = minlen; 3276 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3277 if (ret2 || start >= end) { 3278 spin_unlock(&ctl->tree_lock); 3279 mutex_unlock(&ctl->cache_writeout_mutex); 3280 next_bitmap = true; 3281 goto next; 3282 } 3283 3284 bytes = min(bytes, end - start); 3285 if (bytes < minlen) { 3286 spin_unlock(&ctl->tree_lock); 3287 mutex_unlock(&ctl->cache_writeout_mutex); 3288 goto next; 3289 } 3290 3291 bitmap_clear_bits(ctl, entry, start, bytes); 3292 if (entry->bytes == 0) 3293 free_bitmap(ctl, entry); 3294 3295 spin_unlock(&ctl->tree_lock); 3296 trim_entry.start = start; 3297 trim_entry.bytes = bytes; 3298 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3299 mutex_unlock(&ctl->cache_writeout_mutex); 3300 3301 ret = do_trimming(block_group, total_trimmed, start, bytes, 3302 start, bytes, &trim_entry); 3303 if (ret) 3304 break; 3305 next: 3306 if (next_bitmap) { 3307 offset += BITS_PER_BITMAP * ctl->unit; 3308 } else { 3309 start += bytes; 3310 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3311 offset += BITS_PER_BITMAP * ctl->unit; 3312 } 3313 3314 if (fatal_signal_pending(current)) { 3315 ret = -ERESTARTSYS; 3316 break; 3317 } 3318 3319 cond_resched(); 3320 } 3321 3322 return ret; 3323 } 3324 3325 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3326 { 3327 atomic_inc(&cache->trimming); 3328 } 3329 3330 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3331 { 3332 struct btrfs_fs_info *fs_info = block_group->fs_info; 3333 struct extent_map_tree *em_tree; 3334 struct extent_map *em; 3335 bool cleanup; 3336 3337 spin_lock(&block_group->lock); 3338 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3339 block_group->removed); 3340 spin_unlock(&block_group->lock); 3341 3342 if (cleanup) { 3343 mutex_lock(&fs_info->chunk_mutex); 3344 em_tree = &fs_info->mapping_tree.map_tree; 3345 write_lock(&em_tree->lock); 3346 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3347 1); 3348 BUG_ON(!em); /* logic error, can't happen */ 3349 /* 3350 * remove_extent_mapping() will delete us from the pinned_chunks 3351 * list, which is protected by the chunk mutex. 3352 */ 3353 remove_extent_mapping(em_tree, em); 3354 write_unlock(&em_tree->lock); 3355 mutex_unlock(&fs_info->chunk_mutex); 3356 3357 /* once for us and once for the tree */ 3358 free_extent_map(em); 3359 free_extent_map(em); 3360 3361 /* 3362 * We've left one free space entry and other tasks trimming 3363 * this block group have left 1 entry each one. Free them. 3364 */ 3365 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3366 } 3367 } 3368 3369 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3370 u64 *trimmed, u64 start, u64 end, u64 minlen) 3371 { 3372 int ret; 3373 3374 *trimmed = 0; 3375 3376 spin_lock(&block_group->lock); 3377 if (block_group->removed) { 3378 spin_unlock(&block_group->lock); 3379 return 0; 3380 } 3381 btrfs_get_block_group_trimming(block_group); 3382 spin_unlock(&block_group->lock); 3383 3384 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3385 if (ret) 3386 goto out; 3387 3388 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3389 out: 3390 btrfs_put_block_group_trimming(block_group); 3391 return ret; 3392 } 3393 3394 /* 3395 * Find the left-most item in the cache tree, and then return the 3396 * smallest inode number in the item. 3397 * 3398 * Note: the returned inode number may not be the smallest one in 3399 * the tree, if the left-most item is a bitmap. 3400 */ 3401 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3402 { 3403 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3404 struct btrfs_free_space *entry = NULL; 3405 u64 ino = 0; 3406 3407 spin_lock(&ctl->tree_lock); 3408 3409 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3410 goto out; 3411 3412 entry = rb_entry(rb_first(&ctl->free_space_offset), 3413 struct btrfs_free_space, offset_index); 3414 3415 if (!entry->bitmap) { 3416 ino = entry->offset; 3417 3418 unlink_free_space(ctl, entry); 3419 entry->offset++; 3420 entry->bytes--; 3421 if (!entry->bytes) 3422 kmem_cache_free(btrfs_free_space_cachep, entry); 3423 else 3424 link_free_space(ctl, entry); 3425 } else { 3426 u64 offset = 0; 3427 u64 count = 1; 3428 int ret; 3429 3430 ret = search_bitmap(ctl, entry, &offset, &count, true); 3431 /* Logic error; Should be empty if it can't find anything */ 3432 ASSERT(!ret); 3433 3434 ino = offset; 3435 bitmap_clear_bits(ctl, entry, offset, 1); 3436 if (entry->bytes == 0) 3437 free_bitmap(ctl, entry); 3438 } 3439 out: 3440 spin_unlock(&ctl->tree_lock); 3441 3442 return ino; 3443 } 3444 3445 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3446 struct btrfs_path *path) 3447 { 3448 struct inode *inode = NULL; 3449 3450 spin_lock(&root->ino_cache_lock); 3451 if (root->ino_cache_inode) 3452 inode = igrab(root->ino_cache_inode); 3453 spin_unlock(&root->ino_cache_lock); 3454 if (inode) 3455 return inode; 3456 3457 inode = __lookup_free_space_inode(root, path, 0); 3458 if (IS_ERR(inode)) 3459 return inode; 3460 3461 spin_lock(&root->ino_cache_lock); 3462 if (!btrfs_fs_closing(root->fs_info)) 3463 root->ino_cache_inode = igrab(inode); 3464 spin_unlock(&root->ino_cache_lock); 3465 3466 return inode; 3467 } 3468 3469 int create_free_ino_inode(struct btrfs_root *root, 3470 struct btrfs_trans_handle *trans, 3471 struct btrfs_path *path) 3472 { 3473 return __create_free_space_inode(root, trans, path, 3474 BTRFS_FREE_INO_OBJECTID, 0); 3475 } 3476 3477 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3478 { 3479 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3480 struct btrfs_path *path; 3481 struct inode *inode; 3482 int ret = 0; 3483 u64 root_gen = btrfs_root_generation(&root->root_item); 3484 3485 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3486 return 0; 3487 3488 /* 3489 * If we're unmounting then just return, since this does a search on the 3490 * normal root and not the commit root and we could deadlock. 3491 */ 3492 if (btrfs_fs_closing(fs_info)) 3493 return 0; 3494 3495 path = btrfs_alloc_path(); 3496 if (!path) 3497 return 0; 3498 3499 inode = lookup_free_ino_inode(root, path); 3500 if (IS_ERR(inode)) 3501 goto out; 3502 3503 if (root_gen != BTRFS_I(inode)->generation) 3504 goto out_put; 3505 3506 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3507 3508 if (ret < 0) 3509 btrfs_err(fs_info, 3510 "failed to load free ino cache for root %llu", 3511 root->root_key.objectid); 3512 out_put: 3513 iput(inode); 3514 out: 3515 btrfs_free_path(path); 3516 return ret; 3517 } 3518 3519 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3520 struct btrfs_trans_handle *trans, 3521 struct btrfs_path *path, 3522 struct inode *inode) 3523 { 3524 struct btrfs_fs_info *fs_info = root->fs_info; 3525 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3526 int ret; 3527 struct btrfs_io_ctl io_ctl; 3528 bool release_metadata = true; 3529 3530 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3531 return 0; 3532 3533 memset(&io_ctl, 0, sizeof(io_ctl)); 3534 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3535 if (!ret) { 3536 /* 3537 * At this point writepages() didn't error out, so our metadata 3538 * reservation is released when the writeback finishes, at 3539 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3540 * with or without an error. 3541 */ 3542 release_metadata = false; 3543 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3544 } 3545 3546 if (ret) { 3547 if (release_metadata) 3548 btrfs_delalloc_release_metadata(inode, inode->i_size); 3549 #ifdef DEBUG 3550 btrfs_err(fs_info, 3551 "failed to write free ino cache for root %llu", 3552 root->root_key.objectid); 3553 #endif 3554 } 3555 3556 return ret; 3557 } 3558 3559 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3560 /* 3561 * Use this if you need to make a bitmap or extent entry specifically, it 3562 * doesn't do any of the merging that add_free_space does, this acts a lot like 3563 * how the free space cache loading stuff works, so you can get really weird 3564 * configurations. 3565 */ 3566 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3567 u64 offset, u64 bytes, bool bitmap) 3568 { 3569 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3570 struct btrfs_free_space *info = NULL, *bitmap_info; 3571 void *map = NULL; 3572 u64 bytes_added; 3573 int ret; 3574 3575 again: 3576 if (!info) { 3577 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3578 if (!info) 3579 return -ENOMEM; 3580 } 3581 3582 if (!bitmap) { 3583 spin_lock(&ctl->tree_lock); 3584 info->offset = offset; 3585 info->bytes = bytes; 3586 info->max_extent_size = 0; 3587 ret = link_free_space(ctl, info); 3588 spin_unlock(&ctl->tree_lock); 3589 if (ret) 3590 kmem_cache_free(btrfs_free_space_cachep, info); 3591 return ret; 3592 } 3593 3594 if (!map) { 3595 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3596 if (!map) { 3597 kmem_cache_free(btrfs_free_space_cachep, info); 3598 return -ENOMEM; 3599 } 3600 } 3601 3602 spin_lock(&ctl->tree_lock); 3603 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3604 1, 0); 3605 if (!bitmap_info) { 3606 info->bitmap = map; 3607 map = NULL; 3608 add_new_bitmap(ctl, info, offset); 3609 bitmap_info = info; 3610 info = NULL; 3611 } 3612 3613 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3614 3615 bytes -= bytes_added; 3616 offset += bytes_added; 3617 spin_unlock(&ctl->tree_lock); 3618 3619 if (bytes) 3620 goto again; 3621 3622 if (info) 3623 kmem_cache_free(btrfs_free_space_cachep, info); 3624 if (map) 3625 kfree(map); 3626 return 0; 3627 } 3628 3629 /* 3630 * Checks to see if the given range is in the free space cache. This is really 3631 * just used to check the absence of space, so if there is free space in the 3632 * range at all we will return 1. 3633 */ 3634 int test_check_exists(struct btrfs_block_group_cache *cache, 3635 u64 offset, u64 bytes) 3636 { 3637 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3638 struct btrfs_free_space *info; 3639 int ret = 0; 3640 3641 spin_lock(&ctl->tree_lock); 3642 info = tree_search_offset(ctl, offset, 0, 0); 3643 if (!info) { 3644 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3645 1, 0); 3646 if (!info) 3647 goto out; 3648 } 3649 3650 have_info: 3651 if (info->bitmap) { 3652 u64 bit_off, bit_bytes; 3653 struct rb_node *n; 3654 struct btrfs_free_space *tmp; 3655 3656 bit_off = offset; 3657 bit_bytes = ctl->unit; 3658 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3659 if (!ret) { 3660 if (bit_off == offset) { 3661 ret = 1; 3662 goto out; 3663 } else if (bit_off > offset && 3664 offset + bytes > bit_off) { 3665 ret = 1; 3666 goto out; 3667 } 3668 } 3669 3670 n = rb_prev(&info->offset_index); 3671 while (n) { 3672 tmp = rb_entry(n, struct btrfs_free_space, 3673 offset_index); 3674 if (tmp->offset + tmp->bytes < offset) 3675 break; 3676 if (offset + bytes < tmp->offset) { 3677 n = rb_prev(&tmp->offset_index); 3678 continue; 3679 } 3680 info = tmp; 3681 goto have_info; 3682 } 3683 3684 n = rb_next(&info->offset_index); 3685 while (n) { 3686 tmp = rb_entry(n, struct btrfs_free_space, 3687 offset_index); 3688 if (offset + bytes < tmp->offset) 3689 break; 3690 if (tmp->offset + tmp->bytes < offset) { 3691 n = rb_next(&tmp->offset_index); 3692 continue; 3693 } 3694 info = tmp; 3695 goto have_info; 3696 } 3697 3698 ret = 0; 3699 goto out; 3700 } 3701 3702 if (info->offset == offset) { 3703 ret = 1; 3704 goto out; 3705 } 3706 3707 if (offset > info->offset && offset < info->offset + info->bytes) 3708 ret = 1; 3709 out: 3710 spin_unlock(&ctl->tree_lock); 3711 return ret; 3712 } 3713 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3714