16cbd5570SChris Mason /* 26cbd5570SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 36cbd5570SChris Mason * 46cbd5570SChris Mason * This program is free software; you can redistribute it and/or 56cbd5570SChris Mason * modify it under the terms of the GNU General Public 66cbd5570SChris Mason * License v2 as published by the Free Software Foundation. 76cbd5570SChris Mason * 86cbd5570SChris Mason * This program is distributed in the hope that it will be useful, 96cbd5570SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 106cbd5570SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 116cbd5570SChris Mason * General Public License for more details. 126cbd5570SChris Mason * 136cbd5570SChris Mason * You should have received a copy of the GNU General Public 146cbd5570SChris Mason * License along with this program; if not, write to the 156cbd5570SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 166cbd5570SChris Mason * Boston, MA 021110-1307, USA. 176cbd5570SChris Mason */ 186cbd5570SChris Mason 1979154b1bSChris Mason #include <linux/fs.h> 2034088780SChris Mason #include <linux/sched.h> 21d3c2fdcfSChris Mason #include <linux/writeback.h> 225f39d397SChris Mason #include <linux/pagemap.h> 235f2cc086SChris Mason #include <linux/blkdev.h> 2479154b1bSChris Mason #include "ctree.h" 2579154b1bSChris Mason #include "disk-io.h" 2679154b1bSChris Mason #include "transaction.h" 27925baeddSChris Mason #include "locking.h" 2831153d81SYan Zheng #include "ref-cache.h" 29e02119d5SChris Mason #include "tree-log.h" 3079154b1bSChris Mason 310f7d52f4SChris Mason #define BTRFS_ROOT_TRANS_TAG 0 320f7d52f4SChris Mason 3380b6794dSChris Mason static noinline void put_transaction(struct btrfs_transaction *transaction) 3479154b1bSChris Mason { 352c90e5d6SChris Mason WARN_ON(transaction->use_count == 0); 3679154b1bSChris Mason transaction->use_count--; 3778fae27eSChris Mason if (transaction->use_count == 0) { 388fd17795SChris Mason list_del_init(&transaction->list); 392c90e5d6SChris Mason memset(transaction, 0, sizeof(*transaction)); 402c90e5d6SChris Mason kmem_cache_free(btrfs_transaction_cachep, transaction); 4179154b1bSChris Mason } 4278fae27eSChris Mason } 4379154b1bSChris Mason 44d352ac68SChris Mason /* 45d352ac68SChris Mason * either allocate a new transaction or hop into the existing one 46d352ac68SChris Mason */ 4780b6794dSChris Mason static noinline int join_transaction(struct btrfs_root *root) 4879154b1bSChris Mason { 4979154b1bSChris Mason struct btrfs_transaction *cur_trans; 5079154b1bSChris Mason cur_trans = root->fs_info->running_transaction; 5179154b1bSChris Mason if (!cur_trans) { 522c90e5d6SChris Mason cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 532c90e5d6SChris Mason GFP_NOFS); 5479154b1bSChris Mason BUG_ON(!cur_trans); 550f7d52f4SChris Mason root->fs_info->generation++; 56e18e4809SChris Mason root->fs_info->last_alloc = 0; 574529ba49SChris Mason root->fs_info->last_data_alloc = 0; 5815ee9bc7SJosef Bacik cur_trans->num_writers = 1; 5915ee9bc7SJosef Bacik cur_trans->num_joined = 0; 600f7d52f4SChris Mason cur_trans->transid = root->fs_info->generation; 6179154b1bSChris Mason init_waitqueue_head(&cur_trans->writer_wait); 6279154b1bSChris Mason init_waitqueue_head(&cur_trans->commit_wait); 6379154b1bSChris Mason cur_trans->in_commit = 0; 64f9295749SChris Mason cur_trans->blocked = 0; 65d5719762SChris Mason cur_trans->use_count = 1; 6679154b1bSChris Mason cur_trans->commit_done = 0; 6708607c1bSChris Mason cur_trans->start_time = get_seconds(); 6856bec294SChris Mason 6956bec294SChris Mason cur_trans->delayed_refs.root.rb_node = NULL; 7056bec294SChris Mason cur_trans->delayed_refs.num_entries = 0; 71*c3e69d58SChris Mason cur_trans->delayed_refs.num_heads_ready = 0; 72*c3e69d58SChris Mason cur_trans->delayed_refs.num_heads = 0; 7356bec294SChris Mason cur_trans->delayed_refs.flushing = 0; 74*c3e69d58SChris Mason cur_trans->delayed_refs.run_delayed_start = 0; 7556bec294SChris Mason spin_lock_init(&cur_trans->delayed_refs.lock); 7656bec294SChris Mason 773063d29fSChris Mason INIT_LIST_HEAD(&cur_trans->pending_snapshots); 788fd17795SChris Mason list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 79d1310b2eSChris Mason extent_io_tree_init(&cur_trans->dirty_pages, 805f39d397SChris Mason root->fs_info->btree_inode->i_mapping, 815f39d397SChris Mason GFP_NOFS); 8248ec2cf8SChris Mason spin_lock(&root->fs_info->new_trans_lock); 8348ec2cf8SChris Mason root->fs_info->running_transaction = cur_trans; 8448ec2cf8SChris Mason spin_unlock(&root->fs_info->new_trans_lock); 8515ee9bc7SJosef Bacik } else { 8679154b1bSChris Mason cur_trans->num_writers++; 8715ee9bc7SJosef Bacik cur_trans->num_joined++; 8815ee9bc7SJosef Bacik } 8915ee9bc7SJosef Bacik 9079154b1bSChris Mason return 0; 9179154b1bSChris Mason } 9279154b1bSChris Mason 93d352ac68SChris Mason /* 94d397712bSChris Mason * this does all the record keeping required to make sure that a reference 95d397712bSChris Mason * counted root is properly recorded in a given transaction. This is required 96d397712bSChris Mason * to make sure the old root from before we joined the transaction is deleted 97d397712bSChris Mason * when the transaction commits 98d352ac68SChris Mason */ 99e02119d5SChris Mason noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 1006702ed49SChris Mason { 101f321e491SYan Zheng struct btrfs_dirty_root *dirty; 1026702ed49SChris Mason u64 running_trans_id = root->fs_info->running_transaction->transid; 1036702ed49SChris Mason if (root->ref_cows && root->last_trans < running_trans_id) { 1046702ed49SChris Mason WARN_ON(root == root->fs_info->extent_root); 1056702ed49SChris Mason if (root->root_item.refs != 0) { 1066702ed49SChris Mason radix_tree_tag_set(&root->fs_info->fs_roots_radix, 1076702ed49SChris Mason (unsigned long)root->root_key.objectid, 1086702ed49SChris Mason BTRFS_ROOT_TRANS_TAG); 10931153d81SYan Zheng 11031153d81SYan Zheng dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 11131153d81SYan Zheng BUG_ON(!dirty); 11231153d81SYan Zheng dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 11331153d81SYan Zheng BUG_ON(!dirty->root); 11431153d81SYan Zheng dirty->latest_root = root; 11531153d81SYan Zheng INIT_LIST_HEAD(&dirty->list); 11631153d81SYan Zheng 117925baeddSChris Mason root->commit_root = btrfs_root_node(root); 11831153d81SYan Zheng 11931153d81SYan Zheng memcpy(dirty->root, root, sizeof(*root)); 12031153d81SYan Zheng spin_lock_init(&dirty->root->node_lock); 121bcc63abbSYan spin_lock_init(&dirty->root->list_lock); 12231153d81SYan Zheng mutex_init(&dirty->root->objectid_mutex); 1235b21f2edSZheng Yan mutex_init(&dirty->root->log_mutex); 124bcc63abbSYan INIT_LIST_HEAD(&dirty->root->dead_list); 12531153d81SYan Zheng dirty->root->node = root->commit_root; 12631153d81SYan Zheng dirty->root->commit_root = NULL; 127bcc63abbSYan 128bcc63abbSYan spin_lock(&root->list_lock); 129bcc63abbSYan list_add(&dirty->root->dead_list, &root->dead_list); 130bcc63abbSYan spin_unlock(&root->list_lock); 131bcc63abbSYan 132bcc63abbSYan root->dirty_root = dirty; 1336702ed49SChris Mason } else { 1346702ed49SChris Mason WARN_ON(1); 1356702ed49SChris Mason } 1366702ed49SChris Mason root->last_trans = running_trans_id; 1376702ed49SChris Mason } 1386702ed49SChris Mason return 0; 1396702ed49SChris Mason } 1406702ed49SChris Mason 141d352ac68SChris Mason /* wait for commit against the current transaction to become unblocked 142d352ac68SChris Mason * when this is done, it is safe to start a new transaction, but the current 143d352ac68SChris Mason * transaction might not be fully on disk. 144d352ac68SChris Mason */ 14537d1aeeeSChris Mason static void wait_current_trans(struct btrfs_root *root) 14679154b1bSChris Mason { 147f9295749SChris Mason struct btrfs_transaction *cur_trans; 14879154b1bSChris Mason 149f9295749SChris Mason cur_trans = root->fs_info->running_transaction; 15037d1aeeeSChris Mason if (cur_trans && cur_trans->blocked) { 151f9295749SChris Mason DEFINE_WAIT(wait); 152f9295749SChris Mason cur_trans->use_count++; 153f9295749SChris Mason while (1) { 154f9295749SChris Mason prepare_to_wait(&root->fs_info->transaction_wait, &wait, 155f9295749SChris Mason TASK_UNINTERRUPTIBLE); 156f9295749SChris Mason if (cur_trans->blocked) { 157f9295749SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 158f9295749SChris Mason schedule(); 159f9295749SChris Mason mutex_lock(&root->fs_info->trans_mutex); 160f9295749SChris Mason finish_wait(&root->fs_info->transaction_wait, 161f9295749SChris Mason &wait); 162f9295749SChris Mason } else { 163f9295749SChris Mason finish_wait(&root->fs_info->transaction_wait, 164f9295749SChris Mason &wait); 165f9295749SChris Mason break; 166f9295749SChris Mason } 167f9295749SChris Mason } 168f9295749SChris Mason put_transaction(cur_trans); 169f9295749SChris Mason } 17037d1aeeeSChris Mason } 17137d1aeeeSChris Mason 172e02119d5SChris Mason static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 1739ca9ee09SSage Weil int num_blocks, int wait) 17437d1aeeeSChris Mason { 17537d1aeeeSChris Mason struct btrfs_trans_handle *h = 17637d1aeeeSChris Mason kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 17737d1aeeeSChris Mason int ret; 17837d1aeeeSChris Mason 17937d1aeeeSChris Mason mutex_lock(&root->fs_info->trans_mutex); 1804bef0848SChris Mason if (!root->fs_info->log_root_recovering && 1814bef0848SChris Mason ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 18237d1aeeeSChris Mason wait_current_trans(root); 18379154b1bSChris Mason ret = join_transaction(root); 18479154b1bSChris Mason BUG_ON(ret); 1850f7d52f4SChris Mason 186e02119d5SChris Mason btrfs_record_root_in_trans(root); 1876702ed49SChris Mason h->transid = root->fs_info->running_transaction->transid; 18879154b1bSChris Mason h->transaction = root->fs_info->running_transaction; 18979154b1bSChris Mason h->blocks_reserved = num_blocks; 19079154b1bSChris Mason h->blocks_used = 0; 191d2fb3437SYan Zheng h->block_group = 0; 19226b8003fSChris Mason h->alloc_exclude_nr = 0; 19326b8003fSChris Mason h->alloc_exclude_start = 0; 19456bec294SChris Mason h->delayed_ref_updates = 0; 19579154b1bSChris Mason root->fs_info->running_transaction->use_count++; 19679154b1bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 19779154b1bSChris Mason return h; 19879154b1bSChris Mason } 19979154b1bSChris Mason 200f9295749SChris Mason struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 201f9295749SChris Mason int num_blocks) 202f9295749SChris Mason { 2039ca9ee09SSage Weil return start_transaction(root, num_blocks, 1); 204f9295749SChris Mason } 205f9295749SChris Mason struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 206f9295749SChris Mason int num_blocks) 207f9295749SChris Mason { 2089ca9ee09SSage Weil return start_transaction(root, num_blocks, 0); 209f9295749SChris Mason } 210f9295749SChris Mason 2119ca9ee09SSage Weil struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 2129ca9ee09SSage Weil int num_blocks) 2139ca9ee09SSage Weil { 2149ca9ee09SSage Weil return start_transaction(r, num_blocks, 2); 2159ca9ee09SSage Weil } 2169ca9ee09SSage Weil 217d352ac68SChris Mason /* wait for a transaction commit to be fully complete */ 21889ce8a63SChris Mason static noinline int wait_for_commit(struct btrfs_root *root, 21989ce8a63SChris Mason struct btrfs_transaction *commit) 22089ce8a63SChris Mason { 22189ce8a63SChris Mason DEFINE_WAIT(wait); 22289ce8a63SChris Mason mutex_lock(&root->fs_info->trans_mutex); 22389ce8a63SChris Mason while (!commit->commit_done) { 22489ce8a63SChris Mason prepare_to_wait(&commit->commit_wait, &wait, 22589ce8a63SChris Mason TASK_UNINTERRUPTIBLE); 22689ce8a63SChris Mason if (commit->commit_done) 22789ce8a63SChris Mason break; 22889ce8a63SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 22989ce8a63SChris Mason schedule(); 23089ce8a63SChris Mason mutex_lock(&root->fs_info->trans_mutex); 23189ce8a63SChris Mason } 23289ce8a63SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 23389ce8a63SChris Mason finish_wait(&commit->commit_wait, &wait); 23489ce8a63SChris Mason return 0; 23589ce8a63SChris Mason } 23689ce8a63SChris Mason 237d352ac68SChris Mason /* 238d397712bSChris Mason * rate limit against the drop_snapshot code. This helps to slow down new 239d397712bSChris Mason * operations if the drop_snapshot code isn't able to keep up. 240d352ac68SChris Mason */ 24137d1aeeeSChris Mason static void throttle_on_drops(struct btrfs_root *root) 242ab78c84dSChris Mason { 243ab78c84dSChris Mason struct btrfs_fs_info *info = root->fs_info; 2442dd3e67bSChris Mason int harder_count = 0; 245ab78c84dSChris Mason 2462dd3e67bSChris Mason harder: 247ab78c84dSChris Mason if (atomic_read(&info->throttles)) { 248ab78c84dSChris Mason DEFINE_WAIT(wait); 249ab78c84dSChris Mason int thr; 250ab78c84dSChris Mason thr = atomic_read(&info->throttle_gen); 251ab78c84dSChris Mason 252ab78c84dSChris Mason do { 253ab78c84dSChris Mason prepare_to_wait(&info->transaction_throttle, 254ab78c84dSChris Mason &wait, TASK_UNINTERRUPTIBLE); 255ab78c84dSChris Mason if (!atomic_read(&info->throttles)) { 256ab78c84dSChris Mason finish_wait(&info->transaction_throttle, &wait); 257ab78c84dSChris Mason break; 258ab78c84dSChris Mason } 259ab78c84dSChris Mason schedule(); 260ab78c84dSChris Mason finish_wait(&info->transaction_throttle, &wait); 261ab78c84dSChris Mason } while (thr == atomic_read(&info->throttle_gen)); 2622dd3e67bSChris Mason harder_count++; 2632dd3e67bSChris Mason 2642dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 2652dd3e67bSChris Mason harder_count < 2) 2662dd3e67bSChris Mason goto harder; 2672dd3e67bSChris Mason 2682dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 2692dd3e67bSChris Mason harder_count < 10) 2702dd3e67bSChris Mason goto harder; 2712dd3e67bSChris Mason 2722dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 2732dd3e67bSChris Mason harder_count < 20) 2742dd3e67bSChris Mason goto harder; 275ab78c84dSChris Mason } 276ab78c84dSChris Mason } 277ab78c84dSChris Mason 27837d1aeeeSChris Mason void btrfs_throttle(struct btrfs_root *root) 27937d1aeeeSChris Mason { 28037d1aeeeSChris Mason mutex_lock(&root->fs_info->trans_mutex); 2819ca9ee09SSage Weil if (!root->fs_info->open_ioctl_trans) 28237d1aeeeSChris Mason wait_current_trans(root); 28337d1aeeeSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 28437d1aeeeSChris Mason 28537d1aeeeSChris Mason throttle_on_drops(root); 28637d1aeeeSChris Mason } 28737d1aeeeSChris Mason 28889ce8a63SChris Mason static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 28989ce8a63SChris Mason struct btrfs_root *root, int throttle) 29079154b1bSChris Mason { 29179154b1bSChris Mason struct btrfs_transaction *cur_trans; 292ab78c84dSChris Mason struct btrfs_fs_info *info = root->fs_info; 293*c3e69d58SChris Mason int count = 0; 294d6e4a428SChris Mason 295*c3e69d58SChris Mason while (count < 4) { 296*c3e69d58SChris Mason unsigned long cur = trans->delayed_ref_updates; 297*c3e69d58SChris Mason trans->delayed_ref_updates = 0; 298*c3e69d58SChris Mason if (cur && 299*c3e69d58SChris Mason trans->transaction->delayed_refs.num_heads_ready > 64) { 300*c3e69d58SChris Mason trans->delayed_ref_updates = 0; 301*c3e69d58SChris Mason btrfs_run_delayed_refs(trans, root, cur); 302*c3e69d58SChris Mason } else { 303*c3e69d58SChris Mason break; 304*c3e69d58SChris Mason } 305*c3e69d58SChris Mason count++; 30656bec294SChris Mason } 30756bec294SChris Mason 308ab78c84dSChris Mason mutex_lock(&info->trans_mutex); 309ab78c84dSChris Mason cur_trans = info->running_transaction; 310ccd467d6SChris Mason WARN_ON(cur_trans != trans->transaction); 311d5719762SChris Mason WARN_ON(cur_trans->num_writers < 1); 312ccd467d6SChris Mason cur_trans->num_writers--; 31389ce8a63SChris Mason 31479154b1bSChris Mason if (waitqueue_active(&cur_trans->writer_wait)) 31579154b1bSChris Mason wake_up(&cur_trans->writer_wait); 31679154b1bSChris Mason put_transaction(cur_trans); 317ab78c84dSChris Mason mutex_unlock(&info->trans_mutex); 318d6025579SChris Mason memset(trans, 0, sizeof(*trans)); 3192c90e5d6SChris Mason kmem_cache_free(btrfs_trans_handle_cachep, trans); 320ab78c84dSChris Mason 321ab78c84dSChris Mason if (throttle) 32237d1aeeeSChris Mason throttle_on_drops(root); 323ab78c84dSChris Mason 32479154b1bSChris Mason return 0; 32579154b1bSChris Mason } 32679154b1bSChris Mason 32789ce8a63SChris Mason int btrfs_end_transaction(struct btrfs_trans_handle *trans, 32889ce8a63SChris Mason struct btrfs_root *root) 32989ce8a63SChris Mason { 33089ce8a63SChris Mason return __btrfs_end_transaction(trans, root, 0); 33189ce8a63SChris Mason } 33289ce8a63SChris Mason 33389ce8a63SChris Mason int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 33489ce8a63SChris Mason struct btrfs_root *root) 33589ce8a63SChris Mason { 33689ce8a63SChris Mason return __btrfs_end_transaction(trans, root, 1); 33789ce8a63SChris Mason } 33889ce8a63SChris Mason 339d352ac68SChris Mason /* 340d352ac68SChris Mason * when btree blocks are allocated, they have some corresponding bits set for 341d352ac68SChris Mason * them in one of two extent_io trees. This is used to make sure all of 342d352ac68SChris Mason * those extents are on disk for transaction or log commit 343d352ac68SChris Mason */ 344d0c803c4SChris Mason int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 345d0c803c4SChris Mason struct extent_io_tree *dirty_pages) 34679154b1bSChris Mason { 3477c4452b9SChris Mason int ret; 348777e6bd7SChris Mason int err = 0; 3497c4452b9SChris Mason int werr = 0; 3507c4452b9SChris Mason struct page *page; 3517c4452b9SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 352777e6bd7SChris Mason u64 start = 0; 3535f39d397SChris Mason u64 end; 3545f39d397SChris Mason unsigned long index; 3557c4452b9SChris Mason 3567c4452b9SChris Mason while (1) { 357777e6bd7SChris Mason ret = find_first_extent_bit(dirty_pages, start, &start, &end, 3585f39d397SChris Mason EXTENT_DIRTY); 3595f39d397SChris Mason if (ret) 3607c4452b9SChris Mason break; 3615f39d397SChris Mason while (start <= end) { 362777e6bd7SChris Mason cond_resched(); 363777e6bd7SChris Mason 3645f39d397SChris Mason index = start >> PAGE_CACHE_SHIFT; 36535ebb934SChris Mason start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 3664bef0848SChris Mason page = find_get_page(btree_inode->i_mapping, index); 3677c4452b9SChris Mason if (!page) 3687c4452b9SChris Mason continue; 3694bef0848SChris Mason 3704bef0848SChris Mason btree_lock_page_hook(page); 3714bef0848SChris Mason if (!page->mapping) { 3724bef0848SChris Mason unlock_page(page); 3734bef0848SChris Mason page_cache_release(page); 3744bef0848SChris Mason continue; 3754bef0848SChris Mason } 3764bef0848SChris Mason 3776702ed49SChris Mason if (PageWriteback(page)) { 3786702ed49SChris Mason if (PageDirty(page)) 3796702ed49SChris Mason wait_on_page_writeback(page); 3806702ed49SChris Mason else { 3816702ed49SChris Mason unlock_page(page); 3826702ed49SChris Mason page_cache_release(page); 3836702ed49SChris Mason continue; 3846702ed49SChris Mason } 3856702ed49SChris Mason } 3867c4452b9SChris Mason err = write_one_page(page, 0); 3877c4452b9SChris Mason if (err) 3887c4452b9SChris Mason werr = err; 3897c4452b9SChris Mason page_cache_release(page); 3907c4452b9SChris Mason } 3917c4452b9SChris Mason } 392777e6bd7SChris Mason while (1) { 393777e6bd7SChris Mason ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 394777e6bd7SChris Mason EXTENT_DIRTY); 395777e6bd7SChris Mason if (ret) 396777e6bd7SChris Mason break; 397777e6bd7SChris Mason 398777e6bd7SChris Mason clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 399777e6bd7SChris Mason while (start <= end) { 400777e6bd7SChris Mason index = start >> PAGE_CACHE_SHIFT; 401777e6bd7SChris Mason start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 402777e6bd7SChris Mason page = find_get_page(btree_inode->i_mapping, index); 403777e6bd7SChris Mason if (!page) 404777e6bd7SChris Mason continue; 405777e6bd7SChris Mason if (PageDirty(page)) { 4064bef0848SChris Mason btree_lock_page_hook(page); 4074bef0848SChris Mason wait_on_page_writeback(page); 408777e6bd7SChris Mason err = write_one_page(page, 0); 409777e6bd7SChris Mason if (err) 410777e6bd7SChris Mason werr = err; 411777e6bd7SChris Mason } 412777e6bd7SChris Mason wait_on_page_writeback(page); 413777e6bd7SChris Mason page_cache_release(page); 414777e6bd7SChris Mason cond_resched(); 415777e6bd7SChris Mason } 416777e6bd7SChris Mason } 4177c4452b9SChris Mason if (err) 4187c4452b9SChris Mason werr = err; 4197c4452b9SChris Mason return werr; 42079154b1bSChris Mason } 42179154b1bSChris Mason 422d0c803c4SChris Mason int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 423d0c803c4SChris Mason struct btrfs_root *root) 424d0c803c4SChris Mason { 425d0c803c4SChris Mason if (!trans || !trans->transaction) { 426d0c803c4SChris Mason struct inode *btree_inode; 427d0c803c4SChris Mason btree_inode = root->fs_info->btree_inode; 428d0c803c4SChris Mason return filemap_write_and_wait(btree_inode->i_mapping); 429d0c803c4SChris Mason } 430d0c803c4SChris Mason return btrfs_write_and_wait_marked_extents(root, 431d0c803c4SChris Mason &trans->transaction->dirty_pages); 432d0c803c4SChris Mason } 433d0c803c4SChris Mason 434d352ac68SChris Mason /* 435d352ac68SChris Mason * this is used to update the root pointer in the tree of tree roots. 436d352ac68SChris Mason * 437d352ac68SChris Mason * But, in the case of the extent allocation tree, updating the root 438d352ac68SChris Mason * pointer may allocate blocks which may change the root of the extent 439d352ac68SChris Mason * allocation tree. 440d352ac68SChris Mason * 441d352ac68SChris Mason * So, this loops and repeats and makes sure the cowonly root didn't 442d352ac68SChris Mason * change while the root pointer was being updated in the metadata. 443d352ac68SChris Mason */ 4440b86a832SChris Mason static int update_cowonly_root(struct btrfs_trans_handle *trans, 44579154b1bSChris Mason struct btrfs_root *root) 44679154b1bSChris Mason { 44779154b1bSChris Mason int ret; 4480b86a832SChris Mason u64 old_root_bytenr; 4490b86a832SChris Mason struct btrfs_root *tree_root = root->fs_info->tree_root; 45079154b1bSChris Mason 4510b86a832SChris Mason btrfs_write_dirty_block_groups(trans, root); 45256bec294SChris Mason 45356bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 45456bec294SChris Mason BUG_ON(ret); 45587ef2bb4SChris Mason 45679154b1bSChris Mason while (1) { 4570b86a832SChris Mason old_root_bytenr = btrfs_root_bytenr(&root->root_item); 4580b86a832SChris Mason if (old_root_bytenr == root->node->start) 45979154b1bSChris Mason break; 4600b86a832SChris Mason btrfs_set_root_bytenr(&root->root_item, 4610b86a832SChris Mason root->node->start); 4620b86a832SChris Mason btrfs_set_root_level(&root->root_item, 4630b86a832SChris Mason btrfs_header_level(root->node)); 46484234f3aSYan Zheng btrfs_set_root_generation(&root->root_item, trans->transid); 46587ef2bb4SChris Mason 46679154b1bSChris Mason ret = btrfs_update_root(trans, tree_root, 4670b86a832SChris Mason &root->root_key, 4680b86a832SChris Mason &root->root_item); 46979154b1bSChris Mason BUG_ON(ret); 4700b86a832SChris Mason btrfs_write_dirty_block_groups(trans, root); 47156bec294SChris Mason 47256bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 47356bec294SChris Mason BUG_ON(ret); 4740b86a832SChris Mason } 4750b86a832SChris Mason return 0; 4760b86a832SChris Mason } 4770b86a832SChris Mason 478d352ac68SChris Mason /* 479d352ac68SChris Mason * update all the cowonly tree roots on disk 480d352ac68SChris Mason */ 4810b86a832SChris Mason int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 4820b86a832SChris Mason struct btrfs_root *root) 4830b86a832SChris Mason { 4840b86a832SChris Mason struct btrfs_fs_info *fs_info = root->fs_info; 4850b86a832SChris Mason struct list_head *next; 48684234f3aSYan Zheng struct extent_buffer *eb; 48756bec294SChris Mason int ret; 48884234f3aSYan Zheng 48956bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 49056bec294SChris Mason BUG_ON(ret); 49187ef2bb4SChris Mason 49284234f3aSYan Zheng eb = btrfs_lock_root_node(fs_info->tree_root); 4939fa8cfe7SChris Mason btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 49484234f3aSYan Zheng btrfs_tree_unlock(eb); 49584234f3aSYan Zheng free_extent_buffer(eb); 4960b86a832SChris Mason 49756bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 49856bec294SChris Mason BUG_ON(ret); 49987ef2bb4SChris Mason 5000b86a832SChris Mason while (!list_empty(&fs_info->dirty_cowonly_roots)) { 5010b86a832SChris Mason next = fs_info->dirty_cowonly_roots.next; 5020b86a832SChris Mason list_del_init(next); 5030b86a832SChris Mason root = list_entry(next, struct btrfs_root, dirty_list); 50487ef2bb4SChris Mason 5050b86a832SChris Mason update_cowonly_root(trans, root); 50656bec294SChris Mason 50756bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 50856bec294SChris Mason BUG_ON(ret); 50979154b1bSChris Mason } 51079154b1bSChris Mason return 0; 51179154b1bSChris Mason } 51279154b1bSChris Mason 513d352ac68SChris Mason /* 514d352ac68SChris Mason * dead roots are old snapshots that need to be deleted. This allocates 515d352ac68SChris Mason * a dirty root struct and adds it into the list of dead roots that need to 516d352ac68SChris Mason * be deleted 517d352ac68SChris Mason */ 518b48652c1SYan Zheng int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 5195eda7b5eSChris Mason { 520f321e491SYan Zheng struct btrfs_dirty_root *dirty; 5215eda7b5eSChris Mason 5225eda7b5eSChris Mason dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 5235eda7b5eSChris Mason if (!dirty) 5245eda7b5eSChris Mason return -ENOMEM; 5255eda7b5eSChris Mason dirty->root = root; 5265ce14bbcSChris Mason dirty->latest_root = latest; 527b48652c1SYan Zheng 528b48652c1SYan Zheng mutex_lock(&root->fs_info->trans_mutex); 529b48652c1SYan Zheng list_add(&dirty->list, &latest->fs_info->dead_roots); 530b48652c1SYan Zheng mutex_unlock(&root->fs_info->trans_mutex); 5315eda7b5eSChris Mason return 0; 5325eda7b5eSChris Mason } 5335eda7b5eSChris Mason 534d352ac68SChris Mason /* 535d352ac68SChris Mason * at transaction commit time we need to schedule the old roots for 536d352ac68SChris Mason * deletion via btrfs_drop_snapshot. This runs through all the 537d352ac68SChris Mason * reference counted roots that were modified in the current 538d352ac68SChris Mason * transaction and puts them into the drop list 539d352ac68SChris Mason */ 54080b6794dSChris Mason static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 54135b7e476SChris Mason struct radix_tree_root *radix, 54235b7e476SChris Mason struct list_head *list) 5430f7d52f4SChris Mason { 544f321e491SYan Zheng struct btrfs_dirty_root *dirty; 5450f7d52f4SChris Mason struct btrfs_root *gang[8]; 5460f7d52f4SChris Mason struct btrfs_root *root; 5470f7d52f4SChris Mason int i; 5480f7d52f4SChris Mason int ret; 54954aa1f4dSChris Mason int err = 0; 5505eda7b5eSChris Mason u32 refs; 55154aa1f4dSChris Mason 5520f7d52f4SChris Mason while (1) { 5530f7d52f4SChris Mason ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 5540f7d52f4SChris Mason ARRAY_SIZE(gang), 5550f7d52f4SChris Mason BTRFS_ROOT_TRANS_TAG); 5560f7d52f4SChris Mason if (ret == 0) 5570f7d52f4SChris Mason break; 5580f7d52f4SChris Mason for (i = 0; i < ret; i++) { 5590f7d52f4SChris Mason root = gang[i]; 5602619ba1fSChris Mason radix_tree_tag_clear(radix, 5612619ba1fSChris Mason (unsigned long)root->root_key.objectid, 5620f7d52f4SChris Mason BTRFS_ROOT_TRANS_TAG); 56331153d81SYan Zheng 56431153d81SYan Zheng BUG_ON(!root->ref_tree); 565017e5369SChris Mason dirty = root->dirty_root; 56631153d81SYan Zheng 567e02119d5SChris Mason btrfs_free_log(trans, root); 568f82d02d9SYan Zheng btrfs_free_reloc_root(trans, root); 569e02119d5SChris Mason 5700f7d52f4SChris Mason if (root->commit_root == root->node) { 571db94535dSChris Mason WARN_ON(root->node->start != 572db94535dSChris Mason btrfs_root_bytenr(&root->root_item)); 57331153d81SYan Zheng 5745f39d397SChris Mason free_extent_buffer(root->commit_root); 5750f7d52f4SChris Mason root->commit_root = NULL; 5767ea394f1SYan Zheng root->dirty_root = NULL; 57731153d81SYan Zheng 578bcc63abbSYan spin_lock(&root->list_lock); 579bcc63abbSYan list_del_init(&dirty->root->dead_list); 580bcc63abbSYan spin_unlock(&root->list_lock); 581bcc63abbSYan 58231153d81SYan Zheng kfree(dirty->root); 58331153d81SYan Zheng kfree(dirty); 58458176a96SJosef Bacik 58558176a96SJosef Bacik /* make sure to update the root on disk 58658176a96SJosef Bacik * so we get any updates to the block used 58758176a96SJosef Bacik * counts 58858176a96SJosef Bacik */ 58958176a96SJosef Bacik err = btrfs_update_root(trans, 59058176a96SJosef Bacik root->fs_info->tree_root, 59158176a96SJosef Bacik &root->root_key, 59258176a96SJosef Bacik &root->root_item); 5930f7d52f4SChris Mason continue; 5940f7d52f4SChris Mason } 5959f3a7427SChris Mason 5969f3a7427SChris Mason memset(&root->root_item.drop_progress, 0, 5979f3a7427SChris Mason sizeof(struct btrfs_disk_key)); 5989f3a7427SChris Mason root->root_item.drop_level = 0; 5990f7d52f4SChris Mason root->commit_root = NULL; 6007ea394f1SYan Zheng root->dirty_root = NULL; 6010f7d52f4SChris Mason root->root_key.offset = root->fs_info->generation; 602db94535dSChris Mason btrfs_set_root_bytenr(&root->root_item, 603db94535dSChris Mason root->node->start); 604db94535dSChris Mason btrfs_set_root_level(&root->root_item, 605db94535dSChris Mason btrfs_header_level(root->node)); 60684234f3aSYan Zheng btrfs_set_root_generation(&root->root_item, 60784234f3aSYan Zheng root->root_key.offset); 60884234f3aSYan Zheng 6090f7d52f4SChris Mason err = btrfs_insert_root(trans, root->fs_info->tree_root, 6100f7d52f4SChris Mason &root->root_key, 6110f7d52f4SChris Mason &root->root_item); 61254aa1f4dSChris Mason if (err) 61354aa1f4dSChris Mason break; 6149f3a7427SChris Mason 6159f3a7427SChris Mason refs = btrfs_root_refs(&dirty->root->root_item); 6169f3a7427SChris Mason btrfs_set_root_refs(&dirty->root->root_item, refs - 1); 6175eda7b5eSChris Mason err = btrfs_update_root(trans, root->fs_info->tree_root, 6189f3a7427SChris Mason &dirty->root->root_key, 6199f3a7427SChris Mason &dirty->root->root_item); 6205eda7b5eSChris Mason 6215eda7b5eSChris Mason BUG_ON(err); 6229f3a7427SChris Mason if (refs == 1) { 6230f7d52f4SChris Mason list_add(&dirty->list, list); 6249f3a7427SChris Mason } else { 6259f3a7427SChris Mason WARN_ON(1); 62631153d81SYan Zheng free_extent_buffer(dirty->root->node); 6279f3a7427SChris Mason kfree(dirty->root); 6285eda7b5eSChris Mason kfree(dirty); 6290f7d52f4SChris Mason } 6300f7d52f4SChris Mason } 6319f3a7427SChris Mason } 63254aa1f4dSChris Mason return err; 6330f7d52f4SChris Mason } 6340f7d52f4SChris Mason 635d352ac68SChris Mason /* 636d352ac68SChris Mason * defrag a given btree. If cacheonly == 1, this won't read from the disk, 637d352ac68SChris Mason * otherwise every leaf in the btree is read and defragged. 638d352ac68SChris Mason */ 639e9d0b13bSChris Mason int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 640e9d0b13bSChris Mason { 641e9d0b13bSChris Mason struct btrfs_fs_info *info = root->fs_info; 642e9d0b13bSChris Mason int ret; 643e9d0b13bSChris Mason struct btrfs_trans_handle *trans; 644d3c2fdcfSChris Mason unsigned long nr; 645e9d0b13bSChris Mason 646a2135011SChris Mason smp_mb(); 647e9d0b13bSChris Mason if (root->defrag_running) 648e9d0b13bSChris Mason return 0; 649e9d0b13bSChris Mason trans = btrfs_start_transaction(root, 1); 6506b80053dSChris Mason while (1) { 651e9d0b13bSChris Mason root->defrag_running = 1; 652e9d0b13bSChris Mason ret = btrfs_defrag_leaves(trans, root, cacheonly); 653d3c2fdcfSChris Mason nr = trans->blocks_used; 654e9d0b13bSChris Mason btrfs_end_transaction(trans, root); 655d3c2fdcfSChris Mason btrfs_btree_balance_dirty(info->tree_root, nr); 656e9d0b13bSChris Mason cond_resched(); 657e9d0b13bSChris Mason 658e9d0b13bSChris Mason trans = btrfs_start_transaction(root, 1); 6593f157a2fSChris Mason if (root->fs_info->closing || ret != -EAGAIN) 660e9d0b13bSChris Mason break; 661e9d0b13bSChris Mason } 662e9d0b13bSChris Mason root->defrag_running = 0; 663a2135011SChris Mason smp_mb(); 664e9d0b13bSChris Mason btrfs_end_transaction(trans, root); 665e9d0b13bSChris Mason return 0; 666e9d0b13bSChris Mason } 667e9d0b13bSChris Mason 668d352ac68SChris Mason /* 669d352ac68SChris Mason * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 670d352ac68SChris Mason * all of them 671d352ac68SChris Mason */ 67280b6794dSChris Mason static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 67335b7e476SChris Mason struct list_head *list) 6740f7d52f4SChris Mason { 675f321e491SYan Zheng struct btrfs_dirty_root *dirty; 6760f7d52f4SChris Mason struct btrfs_trans_handle *trans; 677d3c2fdcfSChris Mason unsigned long nr; 678db94535dSChris Mason u64 num_bytes; 679db94535dSChris Mason u64 bytes_used; 680bcc63abbSYan u64 max_useless; 68154aa1f4dSChris Mason int ret = 0; 6829f3a7427SChris Mason int err; 6839f3a7427SChris Mason 6840f7d52f4SChris Mason while (!list_empty(list)) { 68558176a96SJosef Bacik struct btrfs_root *root; 68658176a96SJosef Bacik 687f321e491SYan Zheng dirty = list_entry(list->prev, struct btrfs_dirty_root, list); 6880f7d52f4SChris Mason list_del_init(&dirty->list); 6895eda7b5eSChris Mason 690db94535dSChris Mason num_bytes = btrfs_root_used(&dirty->root->root_item); 69158176a96SJosef Bacik root = dirty->latest_root; 692a2135011SChris Mason atomic_inc(&root->fs_info->throttles); 69358176a96SJosef Bacik 6949f3a7427SChris Mason while (1) { 6950f7d52f4SChris Mason trans = btrfs_start_transaction(tree_root, 1); 6965b21f2edSZheng Yan mutex_lock(&root->fs_info->drop_mutex); 6979f3a7427SChris Mason ret = btrfs_drop_snapshot(trans, dirty->root); 698d397712bSChris Mason if (ret != -EAGAIN) 6999f3a7427SChris Mason break; 7005b21f2edSZheng Yan mutex_unlock(&root->fs_info->drop_mutex); 70158176a96SJosef Bacik 7029f3a7427SChris Mason err = btrfs_update_root(trans, 7039f3a7427SChris Mason tree_root, 7049f3a7427SChris Mason &dirty->root->root_key, 7059f3a7427SChris Mason &dirty->root->root_item); 7069f3a7427SChris Mason if (err) 7079f3a7427SChris Mason ret = err; 708d3c2fdcfSChris Mason nr = trans->blocks_used; 709017e5369SChris Mason ret = btrfs_end_transaction(trans, tree_root); 7100f7d52f4SChris Mason BUG_ON(ret); 711a2135011SChris Mason 712d3c2fdcfSChris Mason btrfs_btree_balance_dirty(tree_root, nr); 7134dc11904SChris Mason cond_resched(); 7149f3a7427SChris Mason } 7159f3a7427SChris Mason BUG_ON(ret); 716a2135011SChris Mason atomic_dec(&root->fs_info->throttles); 717017e5369SChris Mason wake_up(&root->fs_info->transaction_throttle); 71858176a96SJosef Bacik 719db94535dSChris Mason num_bytes -= btrfs_root_used(&dirty->root->root_item); 720db94535dSChris Mason bytes_used = btrfs_root_used(&root->root_item); 721db94535dSChris Mason if (num_bytes) { 72224562425SYan Zheng mutex_lock(&root->fs_info->trans_mutex); 723e02119d5SChris Mason btrfs_record_root_in_trans(root); 72424562425SYan Zheng mutex_unlock(&root->fs_info->trans_mutex); 7255f39d397SChris Mason btrfs_set_root_used(&root->root_item, 726db94535dSChris Mason bytes_used - num_bytes); 72758176a96SJosef Bacik } 728a2135011SChris Mason 7299f3a7427SChris Mason ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 73058176a96SJosef Bacik if (ret) { 73158176a96SJosef Bacik BUG(); 73254aa1f4dSChris Mason break; 73358176a96SJosef Bacik } 734a2135011SChris Mason mutex_unlock(&root->fs_info->drop_mutex); 735a2135011SChris Mason 736bcc63abbSYan spin_lock(&root->list_lock); 737bcc63abbSYan list_del_init(&dirty->root->dead_list); 738bcc63abbSYan if (!list_empty(&root->dead_list)) { 739bcc63abbSYan struct btrfs_root *oldest; 740bcc63abbSYan oldest = list_entry(root->dead_list.prev, 741bcc63abbSYan struct btrfs_root, dead_list); 742bcc63abbSYan max_useless = oldest->root_key.offset - 1; 743bcc63abbSYan } else { 744bcc63abbSYan max_useless = root->root_key.offset - 1; 745bcc63abbSYan } 746bcc63abbSYan spin_unlock(&root->list_lock); 747bcc63abbSYan 748d3c2fdcfSChris Mason nr = trans->blocks_used; 7490f7d52f4SChris Mason ret = btrfs_end_transaction(trans, tree_root); 7500f7d52f4SChris Mason BUG_ON(ret); 7515eda7b5eSChris Mason 752e4657689SZheng Yan ret = btrfs_remove_leaf_refs(root, max_useless, 0); 753bcc63abbSYan BUG_ON(ret); 754bcc63abbSYan 755f510cfecSChris Mason free_extent_buffer(dirty->root->node); 7565eda7b5eSChris Mason kfree(dirty->root); 7570f7d52f4SChris Mason kfree(dirty); 758d3c2fdcfSChris Mason 759d3c2fdcfSChris Mason btrfs_btree_balance_dirty(tree_root, nr); 7604dc11904SChris Mason cond_resched(); 7610f7d52f4SChris Mason } 76254aa1f4dSChris Mason return ret; 7630f7d52f4SChris Mason } 7640f7d52f4SChris Mason 765d352ac68SChris Mason /* 766d352ac68SChris Mason * new snapshots need to be created at a very specific time in the 767d352ac68SChris Mason * transaction commit. This does the actual creation 768d352ac68SChris Mason */ 76980b6794dSChris Mason static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 7703063d29fSChris Mason struct btrfs_fs_info *fs_info, 7713063d29fSChris Mason struct btrfs_pending_snapshot *pending) 7723063d29fSChris Mason { 7733063d29fSChris Mason struct btrfs_key key; 77480b6794dSChris Mason struct btrfs_root_item *new_root_item; 7753063d29fSChris Mason struct btrfs_root *tree_root = fs_info->tree_root; 7763063d29fSChris Mason struct btrfs_root *root = pending->root; 7773063d29fSChris Mason struct extent_buffer *tmp; 778925baeddSChris Mason struct extent_buffer *old; 7793063d29fSChris Mason int ret; 7803063d29fSChris Mason u64 objectid; 7813063d29fSChris Mason 78280b6794dSChris Mason new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 78380b6794dSChris Mason if (!new_root_item) { 78480b6794dSChris Mason ret = -ENOMEM; 78580b6794dSChris Mason goto fail; 78680b6794dSChris Mason } 7873063d29fSChris Mason ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 7883063d29fSChris Mason if (ret) 7893063d29fSChris Mason goto fail; 7903063d29fSChris Mason 79180ff3856SYan Zheng btrfs_record_root_in_trans(root); 79280ff3856SYan Zheng btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 79380b6794dSChris Mason memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 7943063d29fSChris Mason 7953063d29fSChris Mason key.objectid = objectid; 7965b21f2edSZheng Yan key.offset = trans->transid; 7973063d29fSChris Mason btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 7983063d29fSChris Mason 799925baeddSChris Mason old = btrfs_lock_root_node(root); 8009fa8cfe7SChris Mason btrfs_cow_block(trans, root, old, NULL, 0, &old); 8013063d29fSChris Mason 802925baeddSChris Mason btrfs_copy_root(trans, root, old, &tmp, objectid); 803925baeddSChris Mason btrfs_tree_unlock(old); 804925baeddSChris Mason free_extent_buffer(old); 8053063d29fSChris Mason 80680b6794dSChris Mason btrfs_set_root_bytenr(new_root_item, tmp->start); 80780b6794dSChris Mason btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); 80884234f3aSYan Zheng btrfs_set_root_generation(new_root_item, trans->transid); 8093063d29fSChris Mason ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 81080b6794dSChris Mason new_root_item); 811925baeddSChris Mason btrfs_tree_unlock(tmp); 8123063d29fSChris Mason free_extent_buffer(tmp); 8133063d29fSChris Mason if (ret) 8143063d29fSChris Mason goto fail; 8153063d29fSChris Mason 8163de4586cSChris Mason key.offset = (u64)-1; 8173de4586cSChris Mason memcpy(&pending->root_key, &key, sizeof(key)); 8183de4586cSChris Mason fail: 8193de4586cSChris Mason kfree(new_root_item); 8203de4586cSChris Mason return ret; 8213de4586cSChris Mason } 8223de4586cSChris Mason 8233de4586cSChris Mason static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, 8243de4586cSChris Mason struct btrfs_pending_snapshot *pending) 8253de4586cSChris Mason { 8263de4586cSChris Mason int ret; 8273de4586cSChris Mason int namelen; 8283de4586cSChris Mason u64 index = 0; 8293de4586cSChris Mason struct btrfs_trans_handle *trans; 8303de4586cSChris Mason struct inode *parent_inode; 8313de4586cSChris Mason struct inode *inode; 8320660b5afSChris Mason struct btrfs_root *parent_root; 8333de4586cSChris Mason 8343394e160SChris Mason parent_inode = pending->dentry->d_parent->d_inode; 8350660b5afSChris Mason parent_root = BTRFS_I(parent_inode)->root; 836180591bcSYan Zheng trans = btrfs_join_transaction(parent_root, 1); 8373de4586cSChris Mason 8383063d29fSChris Mason /* 8393063d29fSChris Mason * insert the directory item 8403063d29fSChris Mason */ 8413b96362cSSven Wegener namelen = strlen(pending->name); 8423de4586cSChris Mason ret = btrfs_set_inode_index(parent_inode, &index); 8430660b5afSChris Mason ret = btrfs_insert_dir_item(trans, parent_root, 8443b96362cSSven Wegener pending->name, namelen, 8453de4586cSChris Mason parent_inode->i_ino, 8463de4586cSChris Mason &pending->root_key, BTRFS_FT_DIR, index); 8473063d29fSChris Mason 8483063d29fSChris Mason if (ret) 8493063d29fSChris Mason goto fail; 8500660b5afSChris Mason 85152c26179SYan Zheng btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 85252c26179SYan Zheng ret = btrfs_update_inode(trans, parent_root, parent_inode); 85352c26179SYan Zheng BUG_ON(ret); 85452c26179SYan Zheng 8550660b5afSChris Mason /* add the backref first */ 8560660b5afSChris Mason ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 8570660b5afSChris Mason pending->root_key.objectid, 8580660b5afSChris Mason BTRFS_ROOT_BACKREF_KEY, 8590660b5afSChris Mason parent_root->root_key.objectid, 8600660b5afSChris Mason parent_inode->i_ino, index, pending->name, 8610660b5afSChris Mason namelen); 8620660b5afSChris Mason 8630660b5afSChris Mason BUG_ON(ret); 8640660b5afSChris Mason 8650660b5afSChris Mason /* now add the forward ref */ 8660660b5afSChris Mason ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 8670660b5afSChris Mason parent_root->root_key.objectid, 8680660b5afSChris Mason BTRFS_ROOT_REF_KEY, 8690660b5afSChris Mason pending->root_key.objectid, 8700660b5afSChris Mason parent_inode->i_ino, index, pending->name, 8710660b5afSChris Mason namelen); 8720660b5afSChris Mason 8733de4586cSChris Mason inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 8743de4586cSChris Mason d_instantiate(pending->dentry, inode); 8753063d29fSChris Mason fail: 8763de4586cSChris Mason btrfs_end_transaction(trans, fs_info->fs_root); 8773063d29fSChris Mason return ret; 8783063d29fSChris Mason } 8793063d29fSChris Mason 880d352ac68SChris Mason /* 881d352ac68SChris Mason * create all the snapshots we've scheduled for creation 882d352ac68SChris Mason */ 88380b6794dSChris Mason static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 8843063d29fSChris Mason struct btrfs_fs_info *fs_info) 8853063d29fSChris Mason { 8863063d29fSChris Mason struct btrfs_pending_snapshot *pending; 8873063d29fSChris Mason struct list_head *head = &trans->transaction->pending_snapshots; 8883de4586cSChris Mason int ret; 8893de4586cSChris Mason 890c6e30871SQinghuang Feng list_for_each_entry(pending, head, list) { 8913de4586cSChris Mason ret = create_pending_snapshot(trans, fs_info, pending); 8923de4586cSChris Mason BUG_ON(ret); 8933de4586cSChris Mason } 8943de4586cSChris Mason return 0; 8953de4586cSChris Mason } 8963de4586cSChris Mason 8973de4586cSChris Mason static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, 8983de4586cSChris Mason struct btrfs_fs_info *fs_info) 8993de4586cSChris Mason { 9003de4586cSChris Mason struct btrfs_pending_snapshot *pending; 9013de4586cSChris Mason struct list_head *head = &trans->transaction->pending_snapshots; 9023063d29fSChris Mason int ret; 9033063d29fSChris Mason 9043063d29fSChris Mason while (!list_empty(head)) { 9053063d29fSChris Mason pending = list_entry(head->next, 9063063d29fSChris Mason struct btrfs_pending_snapshot, list); 9073de4586cSChris Mason ret = finish_pending_snapshot(fs_info, pending); 9083063d29fSChris Mason BUG_ON(ret); 9093063d29fSChris Mason list_del(&pending->list); 9103063d29fSChris Mason kfree(pending->name); 9113063d29fSChris Mason kfree(pending); 9123063d29fSChris Mason } 913dc17ff8fSChris Mason return 0; 914dc17ff8fSChris Mason } 915dc17ff8fSChris Mason 91679154b1bSChris Mason int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 91779154b1bSChris Mason struct btrfs_root *root) 91879154b1bSChris Mason { 91915ee9bc7SJosef Bacik unsigned long joined = 0; 92015ee9bc7SJosef Bacik unsigned long timeout = 1; 92179154b1bSChris Mason struct btrfs_transaction *cur_trans; 9228fd17795SChris Mason struct btrfs_transaction *prev_trans = NULL; 9230b86a832SChris Mason struct btrfs_root *chunk_root = root->fs_info->chunk_root; 9240f7d52f4SChris Mason struct list_head dirty_fs_roots; 925d1310b2eSChris Mason struct extent_io_tree *pinned_copy; 92679154b1bSChris Mason DEFINE_WAIT(wait); 92715ee9bc7SJosef Bacik int ret; 92879154b1bSChris Mason 92956bec294SChris Mason /* make a pass through all the delayed refs we have so far 93056bec294SChris Mason * any runnings procs may add more while we are here 93156bec294SChris Mason */ 93256bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, 0); 93356bec294SChris Mason BUG_ON(ret); 93456bec294SChris Mason 93556bec294SChris Mason /* 93656bec294SChris Mason * set the flushing flag so procs in this transaction have to 93756bec294SChris Mason * start sending their work down. 93856bec294SChris Mason */ 93956bec294SChris Mason trans->transaction->delayed_refs.flushing = 1; 94056bec294SChris Mason 941*c3e69d58SChris Mason ret = btrfs_run_delayed_refs(trans, root, 0); 94256bec294SChris Mason BUG_ON(ret); 94356bec294SChris Mason 9440f7d52f4SChris Mason INIT_LIST_HEAD(&dirty_fs_roots); 94579154b1bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 94679154b1bSChris Mason if (trans->transaction->in_commit) { 94779154b1bSChris Mason cur_trans = trans->transaction; 94879154b1bSChris Mason trans->transaction->use_count++; 949ccd467d6SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 95079154b1bSChris Mason btrfs_end_transaction(trans, root); 951ccd467d6SChris Mason 95279154b1bSChris Mason ret = wait_for_commit(root, cur_trans); 95379154b1bSChris Mason BUG_ON(ret); 95415ee9bc7SJosef Bacik 95515ee9bc7SJosef Bacik mutex_lock(&root->fs_info->trans_mutex); 95679154b1bSChris Mason put_transaction(cur_trans); 95715ee9bc7SJosef Bacik mutex_unlock(&root->fs_info->trans_mutex); 95815ee9bc7SJosef Bacik 95979154b1bSChris Mason return 0; 96079154b1bSChris Mason } 9614313b399SChris Mason 9624313b399SChris Mason pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); 9634313b399SChris Mason if (!pinned_copy) 9644313b399SChris Mason return -ENOMEM; 9654313b399SChris Mason 966d1310b2eSChris Mason extent_io_tree_init(pinned_copy, 9674313b399SChris Mason root->fs_info->btree_inode->i_mapping, GFP_NOFS); 9684313b399SChris Mason 9692c90e5d6SChris Mason trans->transaction->in_commit = 1; 970f9295749SChris Mason trans->transaction->blocked = 1; 971ccd467d6SChris Mason cur_trans = trans->transaction; 972ccd467d6SChris Mason if (cur_trans->list.prev != &root->fs_info->trans_list) { 973ccd467d6SChris Mason prev_trans = list_entry(cur_trans->list.prev, 974ccd467d6SChris Mason struct btrfs_transaction, list); 975ccd467d6SChris Mason if (!prev_trans->commit_done) { 976ccd467d6SChris Mason prev_trans->use_count++; 977ccd467d6SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 978ccd467d6SChris Mason 979ccd467d6SChris Mason wait_for_commit(root, prev_trans); 980ccd467d6SChris Mason 981ccd467d6SChris Mason mutex_lock(&root->fs_info->trans_mutex); 98215ee9bc7SJosef Bacik put_transaction(prev_trans); 983ccd467d6SChris Mason } 984ccd467d6SChris Mason } 98515ee9bc7SJosef Bacik 98615ee9bc7SJosef Bacik do { 9877ea394f1SYan Zheng int snap_pending = 0; 98815ee9bc7SJosef Bacik joined = cur_trans->num_joined; 9897ea394f1SYan Zheng if (!list_empty(&trans->transaction->pending_snapshots)) 9907ea394f1SYan Zheng snap_pending = 1; 9917ea394f1SYan Zheng 9922c90e5d6SChris Mason WARN_ON(cur_trans != trans->transaction); 99315ee9bc7SJosef Bacik prepare_to_wait(&cur_trans->writer_wait, &wait, 99479154b1bSChris Mason TASK_UNINTERRUPTIBLE); 99515ee9bc7SJosef Bacik 99615ee9bc7SJosef Bacik if (cur_trans->num_writers > 1) 99715ee9bc7SJosef Bacik timeout = MAX_SCHEDULE_TIMEOUT; 99815ee9bc7SJosef Bacik else 99915ee9bc7SJosef Bacik timeout = 1; 100015ee9bc7SJosef Bacik 100179154b1bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 100215ee9bc7SJosef Bacik 10037ea394f1SYan Zheng if (snap_pending) { 10047ea394f1SYan Zheng ret = btrfs_wait_ordered_extents(root, 1); 10057ea394f1SYan Zheng BUG_ON(ret); 10067ea394f1SYan Zheng } 10077ea394f1SYan Zheng 100815ee9bc7SJosef Bacik schedule_timeout(timeout); 100915ee9bc7SJosef Bacik 101079154b1bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 101115ee9bc7SJosef Bacik finish_wait(&cur_trans->writer_wait, &wait); 101215ee9bc7SJosef Bacik } while (cur_trans->num_writers > 1 || 101315ee9bc7SJosef Bacik (cur_trans->num_joined != joined)); 101415ee9bc7SJosef Bacik 10153063d29fSChris Mason ret = create_pending_snapshots(trans, root->fs_info); 10163063d29fSChris Mason BUG_ON(ret); 10173063d29fSChris Mason 101856bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 101956bec294SChris Mason BUG_ON(ret); 102056bec294SChris Mason 10212c90e5d6SChris Mason WARN_ON(cur_trans != trans->transaction); 1022dc17ff8fSChris Mason 1023e02119d5SChris Mason /* btrfs_commit_tree_roots is responsible for getting the 1024e02119d5SChris Mason * various roots consistent with each other. Every pointer 1025e02119d5SChris Mason * in the tree of tree roots has to point to the most up to date 1026e02119d5SChris Mason * root for every subvolume and other tree. So, we have to keep 1027e02119d5SChris Mason * the tree logging code from jumping in and changing any 1028e02119d5SChris Mason * of the trees. 1029e02119d5SChris Mason * 1030e02119d5SChris Mason * At this point in the commit, there can't be any tree-log 1031e02119d5SChris Mason * writers, but a little lower down we drop the trans mutex 1032e02119d5SChris Mason * and let new people in. By holding the tree_log_mutex 1033e02119d5SChris Mason * from now until after the super is written, we avoid races 1034e02119d5SChris Mason * with the tree-log code. 1035e02119d5SChris Mason */ 1036e02119d5SChris Mason mutex_lock(&root->fs_info->tree_log_mutex); 10371a40e23bSZheng Yan /* 10381a40e23bSZheng Yan * keep tree reloc code from adding new reloc trees 10391a40e23bSZheng Yan */ 10401a40e23bSZheng Yan mutex_lock(&root->fs_info->tree_reloc_mutex); 10411a40e23bSZheng Yan 1042e02119d5SChris Mason 104354aa1f4dSChris Mason ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 104454aa1f4dSChris Mason &dirty_fs_roots); 104554aa1f4dSChris Mason BUG_ON(ret); 104654aa1f4dSChris Mason 1047e02119d5SChris Mason /* add_dirty_roots gets rid of all the tree log roots, it is now 1048e02119d5SChris Mason * safe to free the root of tree log roots 1049e02119d5SChris Mason */ 1050e02119d5SChris Mason btrfs_free_log_root_tree(trans, root->fs_info); 1051e02119d5SChris Mason 105279154b1bSChris Mason ret = btrfs_commit_tree_roots(trans, root); 105379154b1bSChris Mason BUG_ON(ret); 105454aa1f4dSChris Mason 105578fae27eSChris Mason cur_trans = root->fs_info->running_transaction; 1056cee36a03SChris Mason spin_lock(&root->fs_info->new_trans_lock); 105778fae27eSChris Mason root->fs_info->running_transaction = NULL; 1058cee36a03SChris Mason spin_unlock(&root->fs_info->new_trans_lock); 10594b52dff6SChris Mason btrfs_set_super_generation(&root->fs_info->super_copy, 10604b52dff6SChris Mason cur_trans->transid); 10614b52dff6SChris Mason btrfs_set_super_root(&root->fs_info->super_copy, 1062db94535dSChris Mason root->fs_info->tree_root->node->start); 1063db94535dSChris Mason btrfs_set_super_root_level(&root->fs_info->super_copy, 1064db94535dSChris Mason btrfs_header_level(root->fs_info->tree_root->node)); 10655f39d397SChris Mason 10660b86a832SChris Mason btrfs_set_super_chunk_root(&root->fs_info->super_copy, 10670b86a832SChris Mason chunk_root->node->start); 10680b86a832SChris Mason btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 10690b86a832SChris Mason btrfs_header_level(chunk_root->node)); 107084234f3aSYan Zheng btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 107184234f3aSYan Zheng btrfs_header_generation(chunk_root->node)); 1072e02119d5SChris Mason 1073e02119d5SChris Mason if (!root->fs_info->log_root_recovering) { 1074e02119d5SChris Mason btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1075e02119d5SChris Mason btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); 1076e02119d5SChris Mason } 1077e02119d5SChris Mason 1078a061fc8dSChris Mason memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 10794b52dff6SChris Mason sizeof(root->fs_info->super_copy)); 1080ccd467d6SChris Mason 10814313b399SChris Mason btrfs_copy_pinned(root, pinned_copy); 1082ccd467d6SChris Mason 1083f9295749SChris Mason trans->transaction->blocked = 0; 1084e6dcd2dcSChris Mason wake_up(&root->fs_info->transaction_throttle); 1085f9295749SChris Mason wake_up(&root->fs_info->transaction_wait); 1086e6dcd2dcSChris Mason 108778fae27eSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 108879154b1bSChris Mason ret = btrfs_write_and_wait_transaction(trans, root); 108979154b1bSChris Mason BUG_ON(ret); 1090a512bbf8SYan Zheng write_ctree_super(trans, root, 0); 10914313b399SChris Mason 1092e02119d5SChris Mason /* 1093e02119d5SChris Mason * the super is written, we can safely allow the tree-loggers 1094e02119d5SChris Mason * to go about their business 1095e02119d5SChris Mason */ 1096e02119d5SChris Mason mutex_unlock(&root->fs_info->tree_log_mutex); 1097e02119d5SChris Mason 10984313b399SChris Mason btrfs_finish_extent_commit(trans, root, pinned_copy); 10994313b399SChris Mason kfree(pinned_copy); 11004313b399SChris Mason 11011a40e23bSZheng Yan btrfs_drop_dead_reloc_roots(root); 11021a40e23bSZheng Yan mutex_unlock(&root->fs_info->tree_reloc_mutex); 11031a40e23bSZheng Yan 11043de4586cSChris Mason /* do the directory inserts of any pending snapshot creations */ 11053de4586cSChris Mason finish_pending_snapshots(trans, root->fs_info); 11063de4586cSChris Mason 11071a40e23bSZheng Yan mutex_lock(&root->fs_info->trans_mutex); 11081a40e23bSZheng Yan 11092c90e5d6SChris Mason cur_trans->commit_done = 1; 111015ee9bc7SJosef Bacik root->fs_info->last_trans_committed = cur_trans->transid; 11112c90e5d6SChris Mason wake_up(&cur_trans->commit_wait); 11123de4586cSChris Mason 111379154b1bSChris Mason put_transaction(cur_trans); 111478fae27eSChris Mason put_transaction(cur_trans); 111558176a96SJosef Bacik 1116bcc63abbSYan list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); 1117facda1e7SChris Mason if (root->fs_info->closing) 1118facda1e7SChris Mason list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); 111958176a96SJosef Bacik 112078fae27eSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 11213de4586cSChris Mason 11222c90e5d6SChris Mason kmem_cache_free(btrfs_trans_handle_cachep, trans); 112379154b1bSChris Mason 1124d397712bSChris Mason if (root->fs_info->closing) 11250f7d52f4SChris Mason drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); 112679154b1bSChris Mason return ret; 112779154b1bSChris Mason } 112879154b1bSChris Mason 1129d352ac68SChris Mason /* 1130d352ac68SChris Mason * interface function to delete all the snapshots we have scheduled for deletion 1131d352ac68SChris Mason */ 1132e9d0b13bSChris Mason int btrfs_clean_old_snapshots(struct btrfs_root *root) 1133e9d0b13bSChris Mason { 1134e9d0b13bSChris Mason struct list_head dirty_roots; 1135e9d0b13bSChris Mason INIT_LIST_HEAD(&dirty_roots); 1136a74a4b97SChris Mason again: 1137e9d0b13bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 1138e9d0b13bSChris Mason list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1139e9d0b13bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 1140e9d0b13bSChris Mason 1141e9d0b13bSChris Mason if (!list_empty(&dirty_roots)) { 1142e9d0b13bSChris Mason drop_dirty_roots(root, &dirty_roots); 1143a74a4b97SChris Mason goto again; 1144e9d0b13bSChris Mason } 1145e9d0b13bSChris Mason return 0; 1146e9d0b13bSChris Mason } 1147