1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/fs.h> 20 #include <linux/sched.h> 21 #include <linux/writeback.h> 22 #include <linux/pagemap.h> 23 #include <linux/blkdev.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "locking.h" 28 #include "tree-log.h" 29 30 #define BTRFS_ROOT_TRANS_TAG 0 31 32 static noinline void put_transaction(struct btrfs_transaction *transaction) 33 { 34 WARN_ON(transaction->use_count == 0); 35 transaction->use_count--; 36 if (transaction->use_count == 0) { 37 list_del_init(&transaction->list); 38 memset(transaction, 0, sizeof(*transaction)); 39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 } 41 } 42 43 static noinline void switch_commit_root(struct btrfs_root *root) 44 { 45 free_extent_buffer(root->commit_root); 46 root->commit_root = btrfs_root_node(root); 47 } 48 49 /* 50 * either allocate a new transaction or hop into the existing one 51 */ 52 static noinline int join_transaction(struct btrfs_root *root) 53 { 54 struct btrfs_transaction *cur_trans; 55 cur_trans = root->fs_info->running_transaction; 56 if (!cur_trans) { 57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 58 GFP_NOFS); 59 BUG_ON(!cur_trans); 60 root->fs_info->generation++; 61 cur_trans->num_writers = 1; 62 cur_trans->num_joined = 0; 63 cur_trans->transid = root->fs_info->generation; 64 init_waitqueue_head(&cur_trans->writer_wait); 65 init_waitqueue_head(&cur_trans->commit_wait); 66 cur_trans->in_commit = 0; 67 cur_trans->blocked = 0; 68 cur_trans->use_count = 1; 69 cur_trans->commit_done = 0; 70 cur_trans->start_time = get_seconds(); 71 72 cur_trans->delayed_refs.root.rb_node = NULL; 73 cur_trans->delayed_refs.num_entries = 0; 74 cur_trans->delayed_refs.num_heads_ready = 0; 75 cur_trans->delayed_refs.num_heads = 0; 76 cur_trans->delayed_refs.flushing = 0; 77 cur_trans->delayed_refs.run_delayed_start = 0; 78 spin_lock_init(&cur_trans->delayed_refs.lock); 79 80 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 82 extent_io_tree_init(&cur_trans->dirty_pages, 83 root->fs_info->btree_inode->i_mapping, 84 GFP_NOFS); 85 spin_lock(&root->fs_info->new_trans_lock); 86 root->fs_info->running_transaction = cur_trans; 87 spin_unlock(&root->fs_info->new_trans_lock); 88 } else { 89 cur_trans->num_writers++; 90 cur_trans->num_joined++; 91 } 92 93 return 0; 94 } 95 96 /* 97 * this does all the record keeping required to make sure that a reference 98 * counted root is properly recorded in a given transaction. This is required 99 * to make sure the old root from before we joined the transaction is deleted 100 * when the transaction commits 101 */ 102 static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 103 struct btrfs_root *root) 104 { 105 if (root->ref_cows && root->last_trans < trans->transid) { 106 WARN_ON(root == root->fs_info->extent_root); 107 WARN_ON(root->root_item.refs == 0); 108 WARN_ON(root->commit_root != root->node); 109 110 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 111 (unsigned long)root->root_key.objectid, 112 BTRFS_ROOT_TRANS_TAG); 113 root->last_trans = trans->transid; 114 btrfs_init_reloc_root(trans, root); 115 } 116 return 0; 117 } 118 119 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 120 struct btrfs_root *root) 121 { 122 if (!root->ref_cows) 123 return 0; 124 125 mutex_lock(&root->fs_info->trans_mutex); 126 if (root->last_trans == trans->transid) { 127 mutex_unlock(&root->fs_info->trans_mutex); 128 return 0; 129 } 130 131 record_root_in_trans(trans, root); 132 mutex_unlock(&root->fs_info->trans_mutex); 133 return 0; 134 } 135 136 /* wait for commit against the current transaction to become unblocked 137 * when this is done, it is safe to start a new transaction, but the current 138 * transaction might not be fully on disk. 139 */ 140 static void wait_current_trans(struct btrfs_root *root) 141 { 142 struct btrfs_transaction *cur_trans; 143 144 cur_trans = root->fs_info->running_transaction; 145 if (cur_trans && cur_trans->blocked) { 146 DEFINE_WAIT(wait); 147 cur_trans->use_count++; 148 while (1) { 149 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 150 TASK_UNINTERRUPTIBLE); 151 if (cur_trans->blocked) { 152 mutex_unlock(&root->fs_info->trans_mutex); 153 schedule(); 154 mutex_lock(&root->fs_info->trans_mutex); 155 finish_wait(&root->fs_info->transaction_wait, 156 &wait); 157 } else { 158 finish_wait(&root->fs_info->transaction_wait, 159 &wait); 160 break; 161 } 162 } 163 put_transaction(cur_trans); 164 } 165 } 166 167 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 168 int num_blocks, int wait) 169 { 170 struct btrfs_trans_handle *h = 171 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 172 int ret; 173 174 mutex_lock(&root->fs_info->trans_mutex); 175 if (!root->fs_info->log_root_recovering && 176 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 177 wait_current_trans(root); 178 ret = join_transaction(root); 179 BUG_ON(ret); 180 181 h->transid = root->fs_info->running_transaction->transid; 182 h->transaction = root->fs_info->running_transaction; 183 h->blocks_reserved = num_blocks; 184 h->blocks_used = 0; 185 h->block_group = 0; 186 h->alloc_exclude_nr = 0; 187 h->alloc_exclude_start = 0; 188 h->delayed_ref_updates = 0; 189 190 root->fs_info->running_transaction->use_count++; 191 record_root_in_trans(h, root); 192 mutex_unlock(&root->fs_info->trans_mutex); 193 return h; 194 } 195 196 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 197 int num_blocks) 198 { 199 return start_transaction(root, num_blocks, 1); 200 } 201 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 202 int num_blocks) 203 { 204 return start_transaction(root, num_blocks, 0); 205 } 206 207 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 208 int num_blocks) 209 { 210 return start_transaction(r, num_blocks, 2); 211 } 212 213 /* wait for a transaction commit to be fully complete */ 214 static noinline int wait_for_commit(struct btrfs_root *root, 215 struct btrfs_transaction *commit) 216 { 217 DEFINE_WAIT(wait); 218 mutex_lock(&root->fs_info->trans_mutex); 219 while (!commit->commit_done) { 220 prepare_to_wait(&commit->commit_wait, &wait, 221 TASK_UNINTERRUPTIBLE); 222 if (commit->commit_done) 223 break; 224 mutex_unlock(&root->fs_info->trans_mutex); 225 schedule(); 226 mutex_lock(&root->fs_info->trans_mutex); 227 } 228 mutex_unlock(&root->fs_info->trans_mutex); 229 finish_wait(&commit->commit_wait, &wait); 230 return 0; 231 } 232 233 #if 0 234 /* 235 * rate limit against the drop_snapshot code. This helps to slow down new 236 * operations if the drop_snapshot code isn't able to keep up. 237 */ 238 static void throttle_on_drops(struct btrfs_root *root) 239 { 240 struct btrfs_fs_info *info = root->fs_info; 241 int harder_count = 0; 242 243 harder: 244 if (atomic_read(&info->throttles)) { 245 DEFINE_WAIT(wait); 246 int thr; 247 thr = atomic_read(&info->throttle_gen); 248 249 do { 250 prepare_to_wait(&info->transaction_throttle, 251 &wait, TASK_UNINTERRUPTIBLE); 252 if (!atomic_read(&info->throttles)) { 253 finish_wait(&info->transaction_throttle, &wait); 254 break; 255 } 256 schedule(); 257 finish_wait(&info->transaction_throttle, &wait); 258 } while (thr == atomic_read(&info->throttle_gen)); 259 harder_count++; 260 261 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 262 harder_count < 2) 263 goto harder; 264 265 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 266 harder_count < 10) 267 goto harder; 268 269 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 270 harder_count < 20) 271 goto harder; 272 } 273 } 274 #endif 275 276 void btrfs_throttle(struct btrfs_root *root) 277 { 278 mutex_lock(&root->fs_info->trans_mutex); 279 if (!root->fs_info->open_ioctl_trans) 280 wait_current_trans(root); 281 mutex_unlock(&root->fs_info->trans_mutex); 282 } 283 284 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 285 struct btrfs_root *root, int throttle) 286 { 287 struct btrfs_transaction *cur_trans; 288 struct btrfs_fs_info *info = root->fs_info; 289 int count = 0; 290 291 while (count < 4) { 292 unsigned long cur = trans->delayed_ref_updates; 293 trans->delayed_ref_updates = 0; 294 if (cur && 295 trans->transaction->delayed_refs.num_heads_ready > 64) { 296 trans->delayed_ref_updates = 0; 297 298 /* 299 * do a full flush if the transaction is trying 300 * to close 301 */ 302 if (trans->transaction->delayed_refs.flushing) 303 cur = 0; 304 btrfs_run_delayed_refs(trans, root, cur); 305 } else { 306 break; 307 } 308 count++; 309 } 310 311 mutex_lock(&info->trans_mutex); 312 cur_trans = info->running_transaction; 313 WARN_ON(cur_trans != trans->transaction); 314 WARN_ON(cur_trans->num_writers < 1); 315 cur_trans->num_writers--; 316 317 if (waitqueue_active(&cur_trans->writer_wait)) 318 wake_up(&cur_trans->writer_wait); 319 put_transaction(cur_trans); 320 mutex_unlock(&info->trans_mutex); 321 memset(trans, 0, sizeof(*trans)); 322 kmem_cache_free(btrfs_trans_handle_cachep, trans); 323 324 return 0; 325 } 326 327 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 328 struct btrfs_root *root) 329 { 330 return __btrfs_end_transaction(trans, root, 0); 331 } 332 333 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 334 struct btrfs_root *root) 335 { 336 return __btrfs_end_transaction(trans, root, 1); 337 } 338 339 /* 340 * when btree blocks are allocated, they have some corresponding bits set for 341 * them in one of two extent_io trees. This is used to make sure all of 342 * those extents are on disk for transaction or log commit 343 */ 344 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 345 struct extent_io_tree *dirty_pages) 346 { 347 int ret; 348 int err = 0; 349 int werr = 0; 350 struct page *page; 351 struct inode *btree_inode = root->fs_info->btree_inode; 352 u64 start = 0; 353 u64 end; 354 unsigned long index; 355 356 while (1) { 357 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 358 EXTENT_DIRTY); 359 if (ret) 360 break; 361 while (start <= end) { 362 cond_resched(); 363 364 index = start >> PAGE_CACHE_SHIFT; 365 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 366 page = find_get_page(btree_inode->i_mapping, index); 367 if (!page) 368 continue; 369 370 btree_lock_page_hook(page); 371 if (!page->mapping) { 372 unlock_page(page); 373 page_cache_release(page); 374 continue; 375 } 376 377 if (PageWriteback(page)) { 378 if (PageDirty(page)) 379 wait_on_page_writeback(page); 380 else { 381 unlock_page(page); 382 page_cache_release(page); 383 continue; 384 } 385 } 386 err = write_one_page(page, 0); 387 if (err) 388 werr = err; 389 page_cache_release(page); 390 } 391 } 392 while (1) { 393 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 394 EXTENT_DIRTY); 395 if (ret) 396 break; 397 398 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 399 while (start <= end) { 400 index = start >> PAGE_CACHE_SHIFT; 401 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 402 page = find_get_page(btree_inode->i_mapping, index); 403 if (!page) 404 continue; 405 if (PageDirty(page)) { 406 btree_lock_page_hook(page); 407 wait_on_page_writeback(page); 408 err = write_one_page(page, 0); 409 if (err) 410 werr = err; 411 } 412 wait_on_page_writeback(page); 413 page_cache_release(page); 414 cond_resched(); 415 } 416 } 417 if (err) 418 werr = err; 419 return werr; 420 } 421 422 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 423 struct btrfs_root *root) 424 { 425 if (!trans || !trans->transaction) { 426 struct inode *btree_inode; 427 btree_inode = root->fs_info->btree_inode; 428 return filemap_write_and_wait(btree_inode->i_mapping); 429 } 430 return btrfs_write_and_wait_marked_extents(root, 431 &trans->transaction->dirty_pages); 432 } 433 434 /* 435 * this is used to update the root pointer in the tree of tree roots. 436 * 437 * But, in the case of the extent allocation tree, updating the root 438 * pointer may allocate blocks which may change the root of the extent 439 * allocation tree. 440 * 441 * So, this loops and repeats and makes sure the cowonly root didn't 442 * change while the root pointer was being updated in the metadata. 443 */ 444 static int update_cowonly_root(struct btrfs_trans_handle *trans, 445 struct btrfs_root *root) 446 { 447 int ret; 448 u64 old_root_bytenr; 449 struct btrfs_root *tree_root = root->fs_info->tree_root; 450 451 btrfs_write_dirty_block_groups(trans, root); 452 453 while (1) { 454 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 455 if (old_root_bytenr == root->node->start) 456 break; 457 458 btrfs_set_root_node(&root->root_item, root->node); 459 ret = btrfs_update_root(trans, tree_root, 460 &root->root_key, 461 &root->root_item); 462 BUG_ON(ret); 463 464 ret = btrfs_write_dirty_block_groups(trans, root); 465 BUG_ON(ret); 466 } 467 468 if (root != root->fs_info->extent_root) 469 switch_commit_root(root); 470 471 return 0; 472 } 473 474 /* 475 * update all the cowonly tree roots on disk 476 */ 477 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 478 struct btrfs_root *root) 479 { 480 struct btrfs_fs_info *fs_info = root->fs_info; 481 struct list_head *next; 482 struct extent_buffer *eb; 483 int ret; 484 485 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 486 BUG_ON(ret); 487 488 eb = btrfs_lock_root_node(fs_info->tree_root); 489 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 490 btrfs_tree_unlock(eb); 491 free_extent_buffer(eb); 492 493 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 494 BUG_ON(ret); 495 496 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 497 next = fs_info->dirty_cowonly_roots.next; 498 list_del_init(next); 499 root = list_entry(next, struct btrfs_root, dirty_list); 500 501 update_cowonly_root(trans, root); 502 } 503 504 down_write(&fs_info->extent_commit_sem); 505 switch_commit_root(fs_info->extent_root); 506 up_write(&fs_info->extent_commit_sem); 507 508 return 0; 509 } 510 511 /* 512 * dead roots are old snapshots that need to be deleted. This allocates 513 * a dirty root struct and adds it into the list of dead roots that need to 514 * be deleted 515 */ 516 int btrfs_add_dead_root(struct btrfs_root *root) 517 { 518 mutex_lock(&root->fs_info->trans_mutex); 519 list_add(&root->root_list, &root->fs_info->dead_roots); 520 mutex_unlock(&root->fs_info->trans_mutex); 521 return 0; 522 } 523 524 /* 525 * update all the cowonly tree roots on disk 526 */ 527 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, 528 struct btrfs_root *root) 529 { 530 struct btrfs_root *gang[8]; 531 struct btrfs_fs_info *fs_info = root->fs_info; 532 int i; 533 int ret; 534 int err = 0; 535 536 while (1) { 537 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 538 (void **)gang, 0, 539 ARRAY_SIZE(gang), 540 BTRFS_ROOT_TRANS_TAG); 541 if (ret == 0) 542 break; 543 for (i = 0; i < ret; i++) { 544 root = gang[i]; 545 radix_tree_tag_clear(&fs_info->fs_roots_radix, 546 (unsigned long)root->root_key.objectid, 547 BTRFS_ROOT_TRANS_TAG); 548 549 btrfs_free_log(trans, root); 550 btrfs_update_reloc_root(trans, root); 551 552 if (root->commit_root != root->node) { 553 switch_commit_root(root); 554 btrfs_set_root_node(&root->root_item, 555 root->node); 556 } 557 558 err = btrfs_update_root(trans, fs_info->tree_root, 559 &root->root_key, 560 &root->root_item); 561 if (err) 562 break; 563 } 564 } 565 return err; 566 } 567 568 /* 569 * defrag a given btree. If cacheonly == 1, this won't read from the disk, 570 * otherwise every leaf in the btree is read and defragged. 571 */ 572 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 573 { 574 struct btrfs_fs_info *info = root->fs_info; 575 int ret; 576 struct btrfs_trans_handle *trans; 577 unsigned long nr; 578 579 smp_mb(); 580 if (root->defrag_running) 581 return 0; 582 trans = btrfs_start_transaction(root, 1); 583 while (1) { 584 root->defrag_running = 1; 585 ret = btrfs_defrag_leaves(trans, root, cacheonly); 586 nr = trans->blocks_used; 587 btrfs_end_transaction(trans, root); 588 btrfs_btree_balance_dirty(info->tree_root, nr); 589 cond_resched(); 590 591 trans = btrfs_start_transaction(root, 1); 592 if (root->fs_info->closing || ret != -EAGAIN) 593 break; 594 } 595 root->defrag_running = 0; 596 smp_mb(); 597 btrfs_end_transaction(trans, root); 598 return 0; 599 } 600 601 #if 0 602 /* 603 * when dropping snapshots, we generate a ton of delayed refs, and it makes 604 * sense not to join the transaction while it is trying to flush the current 605 * queue of delayed refs out. 606 * 607 * This is used by the drop snapshot code only 608 */ 609 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) 610 { 611 DEFINE_WAIT(wait); 612 613 mutex_lock(&info->trans_mutex); 614 while (info->running_transaction && 615 info->running_transaction->delayed_refs.flushing) { 616 prepare_to_wait(&info->transaction_wait, &wait, 617 TASK_UNINTERRUPTIBLE); 618 mutex_unlock(&info->trans_mutex); 619 620 schedule(); 621 622 mutex_lock(&info->trans_mutex); 623 finish_wait(&info->transaction_wait, &wait); 624 } 625 mutex_unlock(&info->trans_mutex); 626 return 0; 627 } 628 629 /* 630 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 631 * all of them 632 */ 633 int btrfs_drop_dead_root(struct btrfs_root *root) 634 { 635 struct btrfs_trans_handle *trans; 636 struct btrfs_root *tree_root = root->fs_info->tree_root; 637 unsigned long nr; 638 int ret; 639 640 while (1) { 641 /* 642 * we don't want to jump in and create a bunch of 643 * delayed refs if the transaction is starting to close 644 */ 645 wait_transaction_pre_flush(tree_root->fs_info); 646 trans = btrfs_start_transaction(tree_root, 1); 647 648 /* 649 * we've joined a transaction, make sure it isn't 650 * closing right now 651 */ 652 if (trans->transaction->delayed_refs.flushing) { 653 btrfs_end_transaction(trans, tree_root); 654 continue; 655 } 656 657 ret = btrfs_drop_snapshot(trans, root); 658 if (ret != -EAGAIN) 659 break; 660 661 ret = btrfs_update_root(trans, tree_root, 662 &root->root_key, 663 &root->root_item); 664 if (ret) 665 break; 666 667 nr = trans->blocks_used; 668 ret = btrfs_end_transaction(trans, tree_root); 669 BUG_ON(ret); 670 671 btrfs_btree_balance_dirty(tree_root, nr); 672 cond_resched(); 673 } 674 BUG_ON(ret); 675 676 ret = btrfs_del_root(trans, tree_root, &root->root_key); 677 BUG_ON(ret); 678 679 nr = trans->blocks_used; 680 ret = btrfs_end_transaction(trans, tree_root); 681 BUG_ON(ret); 682 683 free_extent_buffer(root->node); 684 free_extent_buffer(root->commit_root); 685 kfree(root); 686 687 btrfs_btree_balance_dirty(tree_root, nr); 688 return ret; 689 } 690 #endif 691 692 /* 693 * new snapshots need to be created at a very specific time in the 694 * transaction commit. This does the actual creation 695 */ 696 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 697 struct btrfs_fs_info *fs_info, 698 struct btrfs_pending_snapshot *pending) 699 { 700 struct btrfs_key key; 701 struct btrfs_root_item *new_root_item; 702 struct btrfs_root *tree_root = fs_info->tree_root; 703 struct btrfs_root *root = pending->root; 704 struct extent_buffer *tmp; 705 struct extent_buffer *old; 706 int ret; 707 u64 objectid; 708 709 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 710 if (!new_root_item) { 711 ret = -ENOMEM; 712 goto fail; 713 } 714 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 715 if (ret) 716 goto fail; 717 718 record_root_in_trans(trans, root); 719 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 720 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 721 722 key.objectid = objectid; 723 key.offset = 0; 724 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 725 726 old = btrfs_lock_root_node(root); 727 btrfs_cow_block(trans, root, old, NULL, 0, &old); 728 btrfs_set_lock_blocking(old); 729 730 btrfs_copy_root(trans, root, old, &tmp, objectid); 731 btrfs_tree_unlock(old); 732 free_extent_buffer(old); 733 734 btrfs_set_root_node(new_root_item, tmp); 735 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 736 new_root_item); 737 btrfs_tree_unlock(tmp); 738 free_extent_buffer(tmp); 739 if (ret) 740 goto fail; 741 742 key.offset = (u64)-1; 743 memcpy(&pending->root_key, &key, sizeof(key)); 744 fail: 745 kfree(new_root_item); 746 return ret; 747 } 748 749 static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, 750 struct btrfs_pending_snapshot *pending) 751 { 752 int ret; 753 int namelen; 754 u64 index = 0; 755 struct btrfs_trans_handle *trans; 756 struct inode *parent_inode; 757 struct inode *inode; 758 struct btrfs_root *parent_root; 759 760 parent_inode = pending->dentry->d_parent->d_inode; 761 parent_root = BTRFS_I(parent_inode)->root; 762 trans = btrfs_join_transaction(parent_root, 1); 763 764 /* 765 * insert the directory item 766 */ 767 namelen = strlen(pending->name); 768 ret = btrfs_set_inode_index(parent_inode, &index); 769 ret = btrfs_insert_dir_item(trans, parent_root, 770 pending->name, namelen, 771 parent_inode->i_ino, 772 &pending->root_key, BTRFS_FT_DIR, index); 773 774 if (ret) 775 goto fail; 776 777 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 778 ret = btrfs_update_inode(trans, parent_root, parent_inode); 779 BUG_ON(ret); 780 781 /* add the backref first */ 782 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 783 pending->root_key.objectid, 784 BTRFS_ROOT_BACKREF_KEY, 785 parent_root->root_key.objectid, 786 parent_inode->i_ino, index, pending->name, 787 namelen); 788 789 BUG_ON(ret); 790 791 /* now add the forward ref */ 792 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 793 parent_root->root_key.objectid, 794 BTRFS_ROOT_REF_KEY, 795 pending->root_key.objectid, 796 parent_inode->i_ino, index, pending->name, 797 namelen); 798 799 inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 800 d_instantiate(pending->dentry, inode); 801 fail: 802 btrfs_end_transaction(trans, fs_info->fs_root); 803 return ret; 804 } 805 806 /* 807 * create all the snapshots we've scheduled for creation 808 */ 809 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 810 struct btrfs_fs_info *fs_info) 811 { 812 struct btrfs_pending_snapshot *pending; 813 struct list_head *head = &trans->transaction->pending_snapshots; 814 int ret; 815 816 list_for_each_entry(pending, head, list) { 817 ret = create_pending_snapshot(trans, fs_info, pending); 818 BUG_ON(ret); 819 } 820 return 0; 821 } 822 823 static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, 824 struct btrfs_fs_info *fs_info) 825 { 826 struct btrfs_pending_snapshot *pending; 827 struct list_head *head = &trans->transaction->pending_snapshots; 828 int ret; 829 830 while (!list_empty(head)) { 831 pending = list_entry(head->next, 832 struct btrfs_pending_snapshot, list); 833 ret = finish_pending_snapshot(fs_info, pending); 834 BUG_ON(ret); 835 list_del(&pending->list); 836 kfree(pending->name); 837 kfree(pending); 838 } 839 return 0; 840 } 841 842 static void update_super_roots(struct btrfs_root *root) 843 { 844 struct btrfs_root_item *root_item; 845 struct btrfs_super_block *super; 846 847 super = &root->fs_info->super_copy; 848 849 root_item = &root->fs_info->chunk_root->root_item; 850 super->chunk_root = root_item->bytenr; 851 super->chunk_root_generation = root_item->generation; 852 super->chunk_root_level = root_item->level; 853 854 root_item = &root->fs_info->tree_root->root_item; 855 super->root = root_item->bytenr; 856 super->generation = root_item->generation; 857 super->root_level = root_item->level; 858 } 859 860 int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 861 { 862 int ret = 0; 863 spin_lock(&info->new_trans_lock); 864 if (info->running_transaction) 865 ret = info->running_transaction->in_commit; 866 spin_unlock(&info->new_trans_lock); 867 return ret; 868 } 869 870 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 871 struct btrfs_root *root) 872 { 873 unsigned long joined = 0; 874 unsigned long timeout = 1; 875 struct btrfs_transaction *cur_trans; 876 struct btrfs_transaction *prev_trans = NULL; 877 struct extent_io_tree *pinned_copy; 878 DEFINE_WAIT(wait); 879 int ret; 880 int should_grow = 0; 881 unsigned long now = get_seconds(); 882 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 883 884 btrfs_run_ordered_operations(root, 0); 885 886 /* make a pass through all the delayed refs we have so far 887 * any runnings procs may add more while we are here 888 */ 889 ret = btrfs_run_delayed_refs(trans, root, 0); 890 BUG_ON(ret); 891 892 cur_trans = trans->transaction; 893 /* 894 * set the flushing flag so procs in this transaction have to 895 * start sending their work down. 896 */ 897 cur_trans->delayed_refs.flushing = 1; 898 899 ret = btrfs_run_delayed_refs(trans, root, 0); 900 BUG_ON(ret); 901 902 mutex_lock(&root->fs_info->trans_mutex); 903 if (cur_trans->in_commit) { 904 cur_trans->use_count++; 905 mutex_unlock(&root->fs_info->trans_mutex); 906 btrfs_end_transaction(trans, root); 907 908 ret = wait_for_commit(root, cur_trans); 909 BUG_ON(ret); 910 911 mutex_lock(&root->fs_info->trans_mutex); 912 put_transaction(cur_trans); 913 mutex_unlock(&root->fs_info->trans_mutex); 914 915 return 0; 916 } 917 918 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); 919 if (!pinned_copy) 920 return -ENOMEM; 921 922 extent_io_tree_init(pinned_copy, 923 root->fs_info->btree_inode->i_mapping, GFP_NOFS); 924 925 trans->transaction->in_commit = 1; 926 trans->transaction->blocked = 1; 927 if (cur_trans->list.prev != &root->fs_info->trans_list) { 928 prev_trans = list_entry(cur_trans->list.prev, 929 struct btrfs_transaction, list); 930 if (!prev_trans->commit_done) { 931 prev_trans->use_count++; 932 mutex_unlock(&root->fs_info->trans_mutex); 933 934 wait_for_commit(root, prev_trans); 935 936 mutex_lock(&root->fs_info->trans_mutex); 937 put_transaction(prev_trans); 938 } 939 } 940 941 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 942 should_grow = 1; 943 944 do { 945 int snap_pending = 0; 946 joined = cur_trans->num_joined; 947 if (!list_empty(&trans->transaction->pending_snapshots)) 948 snap_pending = 1; 949 950 WARN_ON(cur_trans != trans->transaction); 951 prepare_to_wait(&cur_trans->writer_wait, &wait, 952 TASK_UNINTERRUPTIBLE); 953 954 if (cur_trans->num_writers > 1) 955 timeout = MAX_SCHEDULE_TIMEOUT; 956 else if (should_grow) 957 timeout = 1; 958 959 mutex_unlock(&root->fs_info->trans_mutex); 960 961 if (flush_on_commit) { 962 btrfs_start_delalloc_inodes(root); 963 ret = btrfs_wait_ordered_extents(root, 0); 964 BUG_ON(ret); 965 } else if (snap_pending) { 966 ret = btrfs_wait_ordered_extents(root, 1); 967 BUG_ON(ret); 968 } 969 970 /* 971 * rename don't use btrfs_join_transaction, so, once we 972 * set the transaction to blocked above, we aren't going 973 * to get any new ordered operations. We can safely run 974 * it here and no for sure that nothing new will be added 975 * to the list 976 */ 977 btrfs_run_ordered_operations(root, 1); 978 979 smp_mb(); 980 if (cur_trans->num_writers > 1 || should_grow) 981 schedule_timeout(timeout); 982 983 mutex_lock(&root->fs_info->trans_mutex); 984 finish_wait(&cur_trans->writer_wait, &wait); 985 } while (cur_trans->num_writers > 1 || 986 (should_grow && cur_trans->num_joined != joined)); 987 988 ret = create_pending_snapshots(trans, root->fs_info); 989 BUG_ON(ret); 990 991 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 992 BUG_ON(ret); 993 994 WARN_ON(cur_trans != trans->transaction); 995 996 /* btrfs_commit_tree_roots is responsible for getting the 997 * various roots consistent with each other. Every pointer 998 * in the tree of tree roots has to point to the most up to date 999 * root for every subvolume and other tree. So, we have to keep 1000 * the tree logging code from jumping in and changing any 1001 * of the trees. 1002 * 1003 * At this point in the commit, there can't be any tree-log 1004 * writers, but a little lower down we drop the trans mutex 1005 * and let new people in. By holding the tree_log_mutex 1006 * from now until after the super is written, we avoid races 1007 * with the tree-log code. 1008 */ 1009 mutex_lock(&root->fs_info->tree_log_mutex); 1010 1011 ret = commit_fs_roots(trans, root); 1012 BUG_ON(ret); 1013 1014 /* commit_fs_roots gets rid of all the tree log roots, it is now 1015 * safe to free the root of tree log roots 1016 */ 1017 btrfs_free_log_root_tree(trans, root->fs_info); 1018 1019 ret = commit_cowonly_roots(trans, root); 1020 BUG_ON(ret); 1021 1022 cur_trans = root->fs_info->running_transaction; 1023 spin_lock(&root->fs_info->new_trans_lock); 1024 root->fs_info->running_transaction = NULL; 1025 spin_unlock(&root->fs_info->new_trans_lock); 1026 1027 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1028 root->fs_info->tree_root->node); 1029 switch_commit_root(root->fs_info->tree_root); 1030 1031 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1032 root->fs_info->chunk_root->node); 1033 switch_commit_root(root->fs_info->chunk_root); 1034 1035 update_super_roots(root); 1036 1037 if (!root->fs_info->log_root_recovering) { 1038 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1039 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); 1040 } 1041 1042 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 1043 sizeof(root->fs_info->super_copy)); 1044 1045 btrfs_copy_pinned(root, pinned_copy); 1046 1047 trans->transaction->blocked = 0; 1048 1049 wake_up(&root->fs_info->transaction_wait); 1050 1051 mutex_unlock(&root->fs_info->trans_mutex); 1052 ret = btrfs_write_and_wait_transaction(trans, root); 1053 BUG_ON(ret); 1054 write_ctree_super(trans, root, 0); 1055 1056 /* 1057 * the super is written, we can safely allow the tree-loggers 1058 * to go about their business 1059 */ 1060 mutex_unlock(&root->fs_info->tree_log_mutex); 1061 1062 btrfs_finish_extent_commit(trans, root, pinned_copy); 1063 kfree(pinned_copy); 1064 1065 /* do the directory inserts of any pending snapshot creations */ 1066 finish_pending_snapshots(trans, root->fs_info); 1067 1068 mutex_lock(&root->fs_info->trans_mutex); 1069 1070 cur_trans->commit_done = 1; 1071 1072 root->fs_info->last_trans_committed = cur_trans->transid; 1073 1074 wake_up(&cur_trans->commit_wait); 1075 1076 put_transaction(cur_trans); 1077 put_transaction(cur_trans); 1078 1079 mutex_unlock(&root->fs_info->trans_mutex); 1080 1081 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1082 return ret; 1083 } 1084 1085 /* 1086 * interface function to delete all the snapshots we have scheduled for deletion 1087 */ 1088 int btrfs_clean_old_snapshots(struct btrfs_root *root) 1089 { 1090 LIST_HEAD(list); 1091 struct btrfs_fs_info *fs_info = root->fs_info; 1092 1093 mutex_lock(&fs_info->trans_mutex); 1094 list_splice_init(&fs_info->dead_roots, &list); 1095 mutex_unlock(&fs_info->trans_mutex); 1096 1097 while (!list_empty(&list)) { 1098 root = list_entry(list.next, struct btrfs_root, root_list); 1099 list_del_init(&root->root_list); 1100 btrfs_drop_snapshot(root, 0); 1101 } 1102 return 0; 1103 } 1104