1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/fs.h> 20 #include <linux/sched.h> 21 #include <linux/writeback.h> 22 #include <linux/pagemap.h> 23 #include <linux/blkdev.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "locking.h" 28 #include "tree-log.h" 29 30 #define BTRFS_ROOT_TRANS_TAG 0 31 32 static noinline void put_transaction(struct btrfs_transaction *transaction) 33 { 34 WARN_ON(transaction->use_count == 0); 35 transaction->use_count--; 36 if (transaction->use_count == 0) { 37 list_del_init(&transaction->list); 38 memset(transaction, 0, sizeof(*transaction)); 39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 } 41 } 42 43 static noinline void switch_commit_root(struct btrfs_root *root) 44 { 45 free_extent_buffer(root->commit_root); 46 root->commit_root = btrfs_root_node(root); 47 } 48 49 /* 50 * either allocate a new transaction or hop into the existing one 51 */ 52 static noinline int join_transaction(struct btrfs_root *root) 53 { 54 struct btrfs_transaction *cur_trans; 55 cur_trans = root->fs_info->running_transaction; 56 if (!cur_trans) { 57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 58 GFP_NOFS); 59 BUG_ON(!cur_trans); 60 root->fs_info->generation++; 61 cur_trans->num_writers = 1; 62 cur_trans->num_joined = 0; 63 cur_trans->transid = root->fs_info->generation; 64 init_waitqueue_head(&cur_trans->writer_wait); 65 init_waitqueue_head(&cur_trans->commit_wait); 66 cur_trans->in_commit = 0; 67 cur_trans->blocked = 0; 68 cur_trans->use_count = 1; 69 cur_trans->commit_done = 0; 70 cur_trans->start_time = get_seconds(); 71 72 cur_trans->delayed_refs.root.rb_node = NULL; 73 cur_trans->delayed_refs.num_entries = 0; 74 cur_trans->delayed_refs.num_heads_ready = 0; 75 cur_trans->delayed_refs.num_heads = 0; 76 cur_trans->delayed_refs.flushing = 0; 77 cur_trans->delayed_refs.run_delayed_start = 0; 78 spin_lock_init(&cur_trans->delayed_refs.lock); 79 80 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 82 extent_io_tree_init(&cur_trans->dirty_pages, 83 root->fs_info->btree_inode->i_mapping, 84 GFP_NOFS); 85 spin_lock(&root->fs_info->new_trans_lock); 86 root->fs_info->running_transaction = cur_trans; 87 spin_unlock(&root->fs_info->new_trans_lock); 88 } else { 89 cur_trans->num_writers++; 90 cur_trans->num_joined++; 91 } 92 93 return 0; 94 } 95 96 /* 97 * this does all the record keeping required to make sure that a reference 98 * counted root is properly recorded in a given transaction. This is required 99 * to make sure the old root from before we joined the transaction is deleted 100 * when the transaction commits 101 */ 102 static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 103 struct btrfs_root *root) 104 { 105 if (root->ref_cows && root->last_trans < trans->transid) { 106 WARN_ON(root == root->fs_info->extent_root); 107 WARN_ON(root->commit_root != root->node); 108 109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 110 (unsigned long)root->root_key.objectid, 111 BTRFS_ROOT_TRANS_TAG); 112 root->last_trans = trans->transid; 113 btrfs_init_reloc_root(trans, root); 114 } 115 return 0; 116 } 117 118 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 119 struct btrfs_root *root) 120 { 121 if (!root->ref_cows) 122 return 0; 123 124 mutex_lock(&root->fs_info->trans_mutex); 125 if (root->last_trans == trans->transid) { 126 mutex_unlock(&root->fs_info->trans_mutex); 127 return 0; 128 } 129 130 record_root_in_trans(trans, root); 131 mutex_unlock(&root->fs_info->trans_mutex); 132 return 0; 133 } 134 135 /* wait for commit against the current transaction to become unblocked 136 * when this is done, it is safe to start a new transaction, but the current 137 * transaction might not be fully on disk. 138 */ 139 static void wait_current_trans(struct btrfs_root *root) 140 { 141 struct btrfs_transaction *cur_trans; 142 143 cur_trans = root->fs_info->running_transaction; 144 if (cur_trans && cur_trans->blocked) { 145 DEFINE_WAIT(wait); 146 cur_trans->use_count++; 147 while (1) { 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 149 TASK_UNINTERRUPTIBLE); 150 if (cur_trans->blocked) { 151 mutex_unlock(&root->fs_info->trans_mutex); 152 schedule(); 153 mutex_lock(&root->fs_info->trans_mutex); 154 finish_wait(&root->fs_info->transaction_wait, 155 &wait); 156 } else { 157 finish_wait(&root->fs_info->transaction_wait, 158 &wait); 159 break; 160 } 161 } 162 put_transaction(cur_trans); 163 } 164 } 165 166 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 167 int num_blocks, int wait) 168 { 169 struct btrfs_trans_handle *h = 170 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 171 int ret; 172 173 mutex_lock(&root->fs_info->trans_mutex); 174 if (!root->fs_info->log_root_recovering && 175 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 176 wait_current_trans(root); 177 ret = join_transaction(root); 178 BUG_ON(ret); 179 180 h->transid = root->fs_info->running_transaction->transid; 181 h->transaction = root->fs_info->running_transaction; 182 h->blocks_reserved = num_blocks; 183 h->blocks_used = 0; 184 h->block_group = 0; 185 h->alloc_exclude_nr = 0; 186 h->alloc_exclude_start = 0; 187 h->delayed_ref_updates = 0; 188 189 if (!current->journal_info) 190 current->journal_info = h; 191 192 root->fs_info->running_transaction->use_count++; 193 record_root_in_trans(h, root); 194 mutex_unlock(&root->fs_info->trans_mutex); 195 return h; 196 } 197 198 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 199 int num_blocks) 200 { 201 return start_transaction(root, num_blocks, 1); 202 } 203 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 204 int num_blocks) 205 { 206 return start_transaction(root, num_blocks, 0); 207 } 208 209 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 210 int num_blocks) 211 { 212 return start_transaction(r, num_blocks, 2); 213 } 214 215 /* wait for a transaction commit to be fully complete */ 216 static noinline int wait_for_commit(struct btrfs_root *root, 217 struct btrfs_transaction *commit) 218 { 219 DEFINE_WAIT(wait); 220 mutex_lock(&root->fs_info->trans_mutex); 221 while (!commit->commit_done) { 222 prepare_to_wait(&commit->commit_wait, &wait, 223 TASK_UNINTERRUPTIBLE); 224 if (commit->commit_done) 225 break; 226 mutex_unlock(&root->fs_info->trans_mutex); 227 schedule(); 228 mutex_lock(&root->fs_info->trans_mutex); 229 } 230 mutex_unlock(&root->fs_info->trans_mutex); 231 finish_wait(&commit->commit_wait, &wait); 232 return 0; 233 } 234 235 #if 0 236 /* 237 * rate limit against the drop_snapshot code. This helps to slow down new 238 * operations if the drop_snapshot code isn't able to keep up. 239 */ 240 static void throttle_on_drops(struct btrfs_root *root) 241 { 242 struct btrfs_fs_info *info = root->fs_info; 243 int harder_count = 0; 244 245 harder: 246 if (atomic_read(&info->throttles)) { 247 DEFINE_WAIT(wait); 248 int thr; 249 thr = atomic_read(&info->throttle_gen); 250 251 do { 252 prepare_to_wait(&info->transaction_throttle, 253 &wait, TASK_UNINTERRUPTIBLE); 254 if (!atomic_read(&info->throttles)) { 255 finish_wait(&info->transaction_throttle, &wait); 256 break; 257 } 258 schedule(); 259 finish_wait(&info->transaction_throttle, &wait); 260 } while (thr == atomic_read(&info->throttle_gen)); 261 harder_count++; 262 263 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 264 harder_count < 2) 265 goto harder; 266 267 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 268 harder_count < 10) 269 goto harder; 270 271 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 272 harder_count < 20) 273 goto harder; 274 } 275 } 276 #endif 277 278 void btrfs_throttle(struct btrfs_root *root) 279 { 280 mutex_lock(&root->fs_info->trans_mutex); 281 if (!root->fs_info->open_ioctl_trans) 282 wait_current_trans(root); 283 mutex_unlock(&root->fs_info->trans_mutex); 284 } 285 286 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 287 struct btrfs_root *root, int throttle) 288 { 289 struct btrfs_transaction *cur_trans; 290 struct btrfs_fs_info *info = root->fs_info; 291 int count = 0; 292 293 while (count < 4) { 294 unsigned long cur = trans->delayed_ref_updates; 295 trans->delayed_ref_updates = 0; 296 if (cur && 297 trans->transaction->delayed_refs.num_heads_ready > 64) { 298 trans->delayed_ref_updates = 0; 299 300 /* 301 * do a full flush if the transaction is trying 302 * to close 303 */ 304 if (trans->transaction->delayed_refs.flushing) 305 cur = 0; 306 btrfs_run_delayed_refs(trans, root, cur); 307 } else { 308 break; 309 } 310 count++; 311 } 312 313 mutex_lock(&info->trans_mutex); 314 cur_trans = info->running_transaction; 315 WARN_ON(cur_trans != trans->transaction); 316 WARN_ON(cur_trans->num_writers < 1); 317 cur_trans->num_writers--; 318 319 if (waitqueue_active(&cur_trans->writer_wait)) 320 wake_up(&cur_trans->writer_wait); 321 put_transaction(cur_trans); 322 mutex_unlock(&info->trans_mutex); 323 324 if (current->journal_info == trans) 325 current->journal_info = NULL; 326 memset(trans, 0, sizeof(*trans)); 327 kmem_cache_free(btrfs_trans_handle_cachep, trans); 328 329 return 0; 330 } 331 332 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 333 struct btrfs_root *root) 334 { 335 return __btrfs_end_transaction(trans, root, 0); 336 } 337 338 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 339 struct btrfs_root *root) 340 { 341 return __btrfs_end_transaction(trans, root, 1); 342 } 343 344 /* 345 * when btree blocks are allocated, they have some corresponding bits set for 346 * them in one of two extent_io trees. This is used to make sure all of 347 * those extents are on disk for transaction or log commit 348 */ 349 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 350 struct extent_io_tree *dirty_pages) 351 { 352 int ret; 353 int err = 0; 354 int werr = 0; 355 struct page *page; 356 struct inode *btree_inode = root->fs_info->btree_inode; 357 u64 start = 0; 358 u64 end; 359 unsigned long index; 360 361 while (1) { 362 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 363 EXTENT_DIRTY); 364 if (ret) 365 break; 366 while (start <= end) { 367 cond_resched(); 368 369 index = start >> PAGE_CACHE_SHIFT; 370 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 371 page = find_get_page(btree_inode->i_mapping, index); 372 if (!page) 373 continue; 374 375 btree_lock_page_hook(page); 376 if (!page->mapping) { 377 unlock_page(page); 378 page_cache_release(page); 379 continue; 380 } 381 382 if (PageWriteback(page)) { 383 if (PageDirty(page)) 384 wait_on_page_writeback(page); 385 else { 386 unlock_page(page); 387 page_cache_release(page); 388 continue; 389 } 390 } 391 err = write_one_page(page, 0); 392 if (err) 393 werr = err; 394 page_cache_release(page); 395 } 396 } 397 while (1) { 398 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 399 EXTENT_DIRTY); 400 if (ret) 401 break; 402 403 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 404 while (start <= end) { 405 index = start >> PAGE_CACHE_SHIFT; 406 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 407 page = find_get_page(btree_inode->i_mapping, index); 408 if (!page) 409 continue; 410 if (PageDirty(page)) { 411 btree_lock_page_hook(page); 412 wait_on_page_writeback(page); 413 err = write_one_page(page, 0); 414 if (err) 415 werr = err; 416 } 417 wait_on_page_writeback(page); 418 page_cache_release(page); 419 cond_resched(); 420 } 421 } 422 if (err) 423 werr = err; 424 return werr; 425 } 426 427 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 428 struct btrfs_root *root) 429 { 430 if (!trans || !trans->transaction) { 431 struct inode *btree_inode; 432 btree_inode = root->fs_info->btree_inode; 433 return filemap_write_and_wait(btree_inode->i_mapping); 434 } 435 return btrfs_write_and_wait_marked_extents(root, 436 &trans->transaction->dirty_pages); 437 } 438 439 /* 440 * this is used to update the root pointer in the tree of tree roots. 441 * 442 * But, in the case of the extent allocation tree, updating the root 443 * pointer may allocate blocks which may change the root of the extent 444 * allocation tree. 445 * 446 * So, this loops and repeats and makes sure the cowonly root didn't 447 * change while the root pointer was being updated in the metadata. 448 */ 449 static int update_cowonly_root(struct btrfs_trans_handle *trans, 450 struct btrfs_root *root) 451 { 452 int ret; 453 u64 old_root_bytenr; 454 struct btrfs_root *tree_root = root->fs_info->tree_root; 455 456 btrfs_write_dirty_block_groups(trans, root); 457 458 while (1) { 459 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 460 if (old_root_bytenr == root->node->start) 461 break; 462 463 btrfs_set_root_node(&root->root_item, root->node); 464 ret = btrfs_update_root(trans, tree_root, 465 &root->root_key, 466 &root->root_item); 467 BUG_ON(ret); 468 469 ret = btrfs_write_dirty_block_groups(trans, root); 470 BUG_ON(ret); 471 } 472 473 if (root != root->fs_info->extent_root) 474 switch_commit_root(root); 475 476 return 0; 477 } 478 479 /* 480 * update all the cowonly tree roots on disk 481 */ 482 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 483 struct btrfs_root *root) 484 { 485 struct btrfs_fs_info *fs_info = root->fs_info; 486 struct list_head *next; 487 struct extent_buffer *eb; 488 int ret; 489 490 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 491 BUG_ON(ret); 492 493 eb = btrfs_lock_root_node(fs_info->tree_root); 494 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 495 btrfs_tree_unlock(eb); 496 free_extent_buffer(eb); 497 498 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 499 BUG_ON(ret); 500 501 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 502 next = fs_info->dirty_cowonly_roots.next; 503 list_del_init(next); 504 root = list_entry(next, struct btrfs_root, dirty_list); 505 506 update_cowonly_root(trans, root); 507 } 508 509 down_write(&fs_info->extent_commit_sem); 510 switch_commit_root(fs_info->extent_root); 511 up_write(&fs_info->extent_commit_sem); 512 513 return 0; 514 } 515 516 /* 517 * dead roots are old snapshots that need to be deleted. This allocates 518 * a dirty root struct and adds it into the list of dead roots that need to 519 * be deleted 520 */ 521 int btrfs_add_dead_root(struct btrfs_root *root) 522 { 523 mutex_lock(&root->fs_info->trans_mutex); 524 list_add(&root->root_list, &root->fs_info->dead_roots); 525 mutex_unlock(&root->fs_info->trans_mutex); 526 return 0; 527 } 528 529 /* 530 * update all the cowonly tree roots on disk 531 */ 532 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, 533 struct btrfs_root *root) 534 { 535 struct btrfs_root *gang[8]; 536 struct btrfs_fs_info *fs_info = root->fs_info; 537 int i; 538 int ret; 539 int err = 0; 540 541 while (1) { 542 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 543 (void **)gang, 0, 544 ARRAY_SIZE(gang), 545 BTRFS_ROOT_TRANS_TAG); 546 if (ret == 0) 547 break; 548 for (i = 0; i < ret; i++) { 549 root = gang[i]; 550 radix_tree_tag_clear(&fs_info->fs_roots_radix, 551 (unsigned long)root->root_key.objectid, 552 BTRFS_ROOT_TRANS_TAG); 553 554 btrfs_free_log(trans, root); 555 btrfs_update_reloc_root(trans, root); 556 557 if (root->commit_root != root->node) { 558 switch_commit_root(root); 559 btrfs_set_root_node(&root->root_item, 560 root->node); 561 } 562 563 err = btrfs_update_root(trans, fs_info->tree_root, 564 &root->root_key, 565 &root->root_item); 566 if (err) 567 break; 568 } 569 } 570 return err; 571 } 572 573 /* 574 * defrag a given btree. If cacheonly == 1, this won't read from the disk, 575 * otherwise every leaf in the btree is read and defragged. 576 */ 577 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 578 { 579 struct btrfs_fs_info *info = root->fs_info; 580 int ret; 581 struct btrfs_trans_handle *trans; 582 unsigned long nr; 583 584 smp_mb(); 585 if (root->defrag_running) 586 return 0; 587 trans = btrfs_start_transaction(root, 1); 588 while (1) { 589 root->defrag_running = 1; 590 ret = btrfs_defrag_leaves(trans, root, cacheonly); 591 nr = trans->blocks_used; 592 btrfs_end_transaction(trans, root); 593 btrfs_btree_balance_dirty(info->tree_root, nr); 594 cond_resched(); 595 596 trans = btrfs_start_transaction(root, 1); 597 if (root->fs_info->closing || ret != -EAGAIN) 598 break; 599 } 600 root->defrag_running = 0; 601 smp_mb(); 602 btrfs_end_transaction(trans, root); 603 return 0; 604 } 605 606 #if 0 607 /* 608 * when dropping snapshots, we generate a ton of delayed refs, and it makes 609 * sense not to join the transaction while it is trying to flush the current 610 * queue of delayed refs out. 611 * 612 * This is used by the drop snapshot code only 613 */ 614 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) 615 { 616 DEFINE_WAIT(wait); 617 618 mutex_lock(&info->trans_mutex); 619 while (info->running_transaction && 620 info->running_transaction->delayed_refs.flushing) { 621 prepare_to_wait(&info->transaction_wait, &wait, 622 TASK_UNINTERRUPTIBLE); 623 mutex_unlock(&info->trans_mutex); 624 625 schedule(); 626 627 mutex_lock(&info->trans_mutex); 628 finish_wait(&info->transaction_wait, &wait); 629 } 630 mutex_unlock(&info->trans_mutex); 631 return 0; 632 } 633 634 /* 635 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 636 * all of them 637 */ 638 int btrfs_drop_dead_root(struct btrfs_root *root) 639 { 640 struct btrfs_trans_handle *trans; 641 struct btrfs_root *tree_root = root->fs_info->tree_root; 642 unsigned long nr; 643 int ret; 644 645 while (1) { 646 /* 647 * we don't want to jump in and create a bunch of 648 * delayed refs if the transaction is starting to close 649 */ 650 wait_transaction_pre_flush(tree_root->fs_info); 651 trans = btrfs_start_transaction(tree_root, 1); 652 653 /* 654 * we've joined a transaction, make sure it isn't 655 * closing right now 656 */ 657 if (trans->transaction->delayed_refs.flushing) { 658 btrfs_end_transaction(trans, tree_root); 659 continue; 660 } 661 662 ret = btrfs_drop_snapshot(trans, root); 663 if (ret != -EAGAIN) 664 break; 665 666 ret = btrfs_update_root(trans, tree_root, 667 &root->root_key, 668 &root->root_item); 669 if (ret) 670 break; 671 672 nr = trans->blocks_used; 673 ret = btrfs_end_transaction(trans, tree_root); 674 BUG_ON(ret); 675 676 btrfs_btree_balance_dirty(tree_root, nr); 677 cond_resched(); 678 } 679 BUG_ON(ret); 680 681 ret = btrfs_del_root(trans, tree_root, &root->root_key); 682 BUG_ON(ret); 683 684 nr = trans->blocks_used; 685 ret = btrfs_end_transaction(trans, tree_root); 686 BUG_ON(ret); 687 688 free_extent_buffer(root->node); 689 free_extent_buffer(root->commit_root); 690 kfree(root); 691 692 btrfs_btree_balance_dirty(tree_root, nr); 693 return ret; 694 } 695 #endif 696 697 /* 698 * new snapshots need to be created at a very specific time in the 699 * transaction commit. This does the actual creation 700 */ 701 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 702 struct btrfs_fs_info *fs_info, 703 struct btrfs_pending_snapshot *pending) 704 { 705 struct btrfs_key key; 706 struct btrfs_root_item *new_root_item; 707 struct btrfs_root *tree_root = fs_info->tree_root; 708 struct btrfs_root *root = pending->root; 709 struct extent_buffer *tmp; 710 struct extent_buffer *old; 711 int ret; 712 u64 objectid; 713 714 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 715 if (!new_root_item) { 716 ret = -ENOMEM; 717 goto fail; 718 } 719 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 720 if (ret) 721 goto fail; 722 723 record_root_in_trans(trans, root); 724 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 725 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 726 727 key.objectid = objectid; 728 /* record when the snapshot was created in key.offset */ 729 key.offset = trans->transid; 730 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 731 732 old = btrfs_lock_root_node(root); 733 btrfs_cow_block(trans, root, old, NULL, 0, &old); 734 btrfs_set_lock_blocking(old); 735 736 btrfs_copy_root(trans, root, old, &tmp, objectid); 737 btrfs_tree_unlock(old); 738 free_extent_buffer(old); 739 740 btrfs_set_root_node(new_root_item, tmp); 741 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 742 new_root_item); 743 btrfs_tree_unlock(tmp); 744 free_extent_buffer(tmp); 745 if (ret) 746 goto fail; 747 748 key.offset = (u64)-1; 749 memcpy(&pending->root_key, &key, sizeof(key)); 750 fail: 751 kfree(new_root_item); 752 btrfs_unreserve_metadata_space(root, 6); 753 return ret; 754 } 755 756 static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, 757 struct btrfs_pending_snapshot *pending) 758 { 759 int ret; 760 int namelen; 761 u64 index = 0; 762 struct btrfs_trans_handle *trans; 763 struct inode *parent_inode; 764 struct inode *inode; 765 struct btrfs_root *parent_root; 766 767 parent_inode = pending->dentry->d_parent->d_inode; 768 parent_root = BTRFS_I(parent_inode)->root; 769 trans = btrfs_join_transaction(parent_root, 1); 770 771 /* 772 * insert the directory item 773 */ 774 namelen = strlen(pending->name); 775 ret = btrfs_set_inode_index(parent_inode, &index); 776 ret = btrfs_insert_dir_item(trans, parent_root, 777 pending->name, namelen, 778 parent_inode->i_ino, 779 &pending->root_key, BTRFS_FT_DIR, index); 780 781 if (ret) 782 goto fail; 783 784 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 785 ret = btrfs_update_inode(trans, parent_root, parent_inode); 786 BUG_ON(ret); 787 788 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 789 pending->root_key.objectid, 790 parent_root->root_key.objectid, 791 parent_inode->i_ino, index, pending->name, 792 namelen); 793 794 BUG_ON(ret); 795 796 inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 797 d_instantiate(pending->dentry, inode); 798 fail: 799 btrfs_end_transaction(trans, fs_info->fs_root); 800 return ret; 801 } 802 803 /* 804 * create all the snapshots we've scheduled for creation 805 */ 806 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 807 struct btrfs_fs_info *fs_info) 808 { 809 struct btrfs_pending_snapshot *pending; 810 struct list_head *head = &trans->transaction->pending_snapshots; 811 int ret; 812 813 list_for_each_entry(pending, head, list) { 814 ret = create_pending_snapshot(trans, fs_info, pending); 815 BUG_ON(ret); 816 } 817 return 0; 818 } 819 820 static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, 821 struct btrfs_fs_info *fs_info) 822 { 823 struct btrfs_pending_snapshot *pending; 824 struct list_head *head = &trans->transaction->pending_snapshots; 825 int ret; 826 827 while (!list_empty(head)) { 828 pending = list_entry(head->next, 829 struct btrfs_pending_snapshot, list); 830 ret = finish_pending_snapshot(fs_info, pending); 831 BUG_ON(ret); 832 list_del(&pending->list); 833 kfree(pending->name); 834 kfree(pending); 835 } 836 return 0; 837 } 838 839 static void update_super_roots(struct btrfs_root *root) 840 { 841 struct btrfs_root_item *root_item; 842 struct btrfs_super_block *super; 843 844 super = &root->fs_info->super_copy; 845 846 root_item = &root->fs_info->chunk_root->root_item; 847 super->chunk_root = root_item->bytenr; 848 super->chunk_root_generation = root_item->generation; 849 super->chunk_root_level = root_item->level; 850 851 root_item = &root->fs_info->tree_root->root_item; 852 super->root = root_item->bytenr; 853 super->generation = root_item->generation; 854 super->root_level = root_item->level; 855 } 856 857 int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 858 { 859 int ret = 0; 860 spin_lock(&info->new_trans_lock); 861 if (info->running_transaction) 862 ret = info->running_transaction->in_commit; 863 spin_unlock(&info->new_trans_lock); 864 return ret; 865 } 866 867 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 868 struct btrfs_root *root) 869 { 870 unsigned long joined = 0; 871 unsigned long timeout = 1; 872 struct btrfs_transaction *cur_trans; 873 struct btrfs_transaction *prev_trans = NULL; 874 DEFINE_WAIT(wait); 875 int ret; 876 int should_grow = 0; 877 unsigned long now = get_seconds(); 878 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 879 880 btrfs_run_ordered_operations(root, 0); 881 882 /* make a pass through all the delayed refs we have so far 883 * any runnings procs may add more while we are here 884 */ 885 ret = btrfs_run_delayed_refs(trans, root, 0); 886 BUG_ON(ret); 887 888 cur_trans = trans->transaction; 889 /* 890 * set the flushing flag so procs in this transaction have to 891 * start sending their work down. 892 */ 893 cur_trans->delayed_refs.flushing = 1; 894 895 ret = btrfs_run_delayed_refs(trans, root, 0); 896 BUG_ON(ret); 897 898 mutex_lock(&root->fs_info->trans_mutex); 899 if (cur_trans->in_commit) { 900 cur_trans->use_count++; 901 mutex_unlock(&root->fs_info->trans_mutex); 902 btrfs_end_transaction(trans, root); 903 904 ret = wait_for_commit(root, cur_trans); 905 BUG_ON(ret); 906 907 mutex_lock(&root->fs_info->trans_mutex); 908 put_transaction(cur_trans); 909 mutex_unlock(&root->fs_info->trans_mutex); 910 911 return 0; 912 } 913 914 trans->transaction->in_commit = 1; 915 trans->transaction->blocked = 1; 916 if (cur_trans->list.prev != &root->fs_info->trans_list) { 917 prev_trans = list_entry(cur_trans->list.prev, 918 struct btrfs_transaction, list); 919 if (!prev_trans->commit_done) { 920 prev_trans->use_count++; 921 mutex_unlock(&root->fs_info->trans_mutex); 922 923 wait_for_commit(root, prev_trans); 924 925 mutex_lock(&root->fs_info->trans_mutex); 926 put_transaction(prev_trans); 927 } 928 } 929 930 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 931 should_grow = 1; 932 933 do { 934 int snap_pending = 0; 935 joined = cur_trans->num_joined; 936 if (!list_empty(&trans->transaction->pending_snapshots)) 937 snap_pending = 1; 938 939 WARN_ON(cur_trans != trans->transaction); 940 prepare_to_wait(&cur_trans->writer_wait, &wait, 941 TASK_UNINTERRUPTIBLE); 942 943 if (cur_trans->num_writers > 1) 944 timeout = MAX_SCHEDULE_TIMEOUT; 945 else if (should_grow) 946 timeout = 1; 947 948 mutex_unlock(&root->fs_info->trans_mutex); 949 950 if (flush_on_commit) { 951 btrfs_start_delalloc_inodes(root); 952 ret = btrfs_wait_ordered_extents(root, 0); 953 BUG_ON(ret); 954 } else if (snap_pending) { 955 ret = btrfs_wait_ordered_extents(root, 1); 956 BUG_ON(ret); 957 } 958 959 /* 960 * rename don't use btrfs_join_transaction, so, once we 961 * set the transaction to blocked above, we aren't going 962 * to get any new ordered operations. We can safely run 963 * it here and no for sure that nothing new will be added 964 * to the list 965 */ 966 btrfs_run_ordered_operations(root, 1); 967 968 smp_mb(); 969 if (cur_trans->num_writers > 1 || should_grow) 970 schedule_timeout(timeout); 971 972 mutex_lock(&root->fs_info->trans_mutex); 973 finish_wait(&cur_trans->writer_wait, &wait); 974 } while (cur_trans->num_writers > 1 || 975 (should_grow && cur_trans->num_joined != joined)); 976 977 ret = create_pending_snapshots(trans, root->fs_info); 978 BUG_ON(ret); 979 980 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 981 BUG_ON(ret); 982 983 WARN_ON(cur_trans != trans->transaction); 984 985 /* btrfs_commit_tree_roots is responsible for getting the 986 * various roots consistent with each other. Every pointer 987 * in the tree of tree roots has to point to the most up to date 988 * root for every subvolume and other tree. So, we have to keep 989 * the tree logging code from jumping in and changing any 990 * of the trees. 991 * 992 * At this point in the commit, there can't be any tree-log 993 * writers, but a little lower down we drop the trans mutex 994 * and let new people in. By holding the tree_log_mutex 995 * from now until after the super is written, we avoid races 996 * with the tree-log code. 997 */ 998 mutex_lock(&root->fs_info->tree_log_mutex); 999 1000 ret = commit_fs_roots(trans, root); 1001 BUG_ON(ret); 1002 1003 /* commit_fs_roots gets rid of all the tree log roots, it is now 1004 * safe to free the root of tree log roots 1005 */ 1006 btrfs_free_log_root_tree(trans, root->fs_info); 1007 1008 ret = commit_cowonly_roots(trans, root); 1009 BUG_ON(ret); 1010 1011 btrfs_prepare_extent_commit(trans, root); 1012 1013 cur_trans = root->fs_info->running_transaction; 1014 spin_lock(&root->fs_info->new_trans_lock); 1015 root->fs_info->running_transaction = NULL; 1016 spin_unlock(&root->fs_info->new_trans_lock); 1017 1018 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1019 root->fs_info->tree_root->node); 1020 switch_commit_root(root->fs_info->tree_root); 1021 1022 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1023 root->fs_info->chunk_root->node); 1024 switch_commit_root(root->fs_info->chunk_root); 1025 1026 update_super_roots(root); 1027 1028 if (!root->fs_info->log_root_recovering) { 1029 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1030 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); 1031 } 1032 1033 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 1034 sizeof(root->fs_info->super_copy)); 1035 1036 trans->transaction->blocked = 0; 1037 1038 wake_up(&root->fs_info->transaction_wait); 1039 1040 mutex_unlock(&root->fs_info->trans_mutex); 1041 ret = btrfs_write_and_wait_transaction(trans, root); 1042 BUG_ON(ret); 1043 write_ctree_super(trans, root, 0); 1044 1045 /* 1046 * the super is written, we can safely allow the tree-loggers 1047 * to go about their business 1048 */ 1049 mutex_unlock(&root->fs_info->tree_log_mutex); 1050 1051 btrfs_finish_extent_commit(trans, root); 1052 1053 /* do the directory inserts of any pending snapshot creations */ 1054 finish_pending_snapshots(trans, root->fs_info); 1055 1056 mutex_lock(&root->fs_info->trans_mutex); 1057 1058 cur_trans->commit_done = 1; 1059 1060 root->fs_info->last_trans_committed = cur_trans->transid; 1061 1062 wake_up(&cur_trans->commit_wait); 1063 1064 put_transaction(cur_trans); 1065 put_transaction(cur_trans); 1066 1067 mutex_unlock(&root->fs_info->trans_mutex); 1068 1069 if (current->journal_info == trans) 1070 current->journal_info = NULL; 1071 1072 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1073 return ret; 1074 } 1075 1076 /* 1077 * interface function to delete all the snapshots we have scheduled for deletion 1078 */ 1079 int btrfs_clean_old_snapshots(struct btrfs_root *root) 1080 { 1081 LIST_HEAD(list); 1082 struct btrfs_fs_info *fs_info = root->fs_info; 1083 1084 mutex_lock(&fs_info->trans_mutex); 1085 list_splice_init(&fs_info->dead_roots, &list); 1086 mutex_unlock(&fs_info->trans_mutex); 1087 1088 while (!list_empty(&list)) { 1089 root = list_entry(list.next, struct btrfs_root, root_list); 1090 list_del(&root->root_list); 1091 1092 if (btrfs_header_backref_rev(root->node) < 1093 BTRFS_MIXED_BACKREF_REV) 1094 btrfs_drop_snapshot(root, 0); 1095 else 1096 btrfs_drop_snapshot(root, 1); 1097 } 1098 return 0; 1099 } 1100