16cbd5570SChris Mason /* 26cbd5570SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 36cbd5570SChris Mason * 46cbd5570SChris Mason * This program is free software; you can redistribute it and/or 56cbd5570SChris Mason * modify it under the terms of the GNU General Public 66cbd5570SChris Mason * License v2 as published by the Free Software Foundation. 76cbd5570SChris Mason * 86cbd5570SChris Mason * This program is distributed in the hope that it will be useful, 96cbd5570SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 106cbd5570SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 116cbd5570SChris Mason * General Public License for more details. 126cbd5570SChris Mason * 136cbd5570SChris Mason * You should have received a copy of the GNU General Public 146cbd5570SChris Mason * License along with this program; if not, write to the 156cbd5570SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 166cbd5570SChris Mason * Boston, MA 021110-1307, USA. 176cbd5570SChris Mason */ 186cbd5570SChris Mason 1979154b1bSChris Mason #include <linux/fs.h> 2034088780SChris Mason #include <linux/sched.h> 21d3c2fdcfSChris Mason #include <linux/writeback.h> 225f39d397SChris Mason #include <linux/pagemap.h> 235f2cc086SChris Mason #include <linux/blkdev.h> 2479154b1bSChris Mason #include "ctree.h" 2579154b1bSChris Mason #include "disk-io.h" 2679154b1bSChris Mason #include "transaction.h" 27925baeddSChris Mason #include "locking.h" 2831153d81SYan Zheng #include "ref-cache.h" 29e02119d5SChris Mason #include "tree-log.h" 3079154b1bSChris Mason 310f7d52f4SChris Mason #define BTRFS_ROOT_TRANS_TAG 0 320f7d52f4SChris Mason 3380b6794dSChris Mason static noinline void put_transaction(struct btrfs_transaction *transaction) 3479154b1bSChris Mason { 352c90e5d6SChris Mason WARN_ON(transaction->use_count == 0); 3679154b1bSChris Mason transaction->use_count--; 3778fae27eSChris Mason if (transaction->use_count == 0) { 388fd17795SChris Mason list_del_init(&transaction->list); 392c90e5d6SChris Mason memset(transaction, 0, sizeof(*transaction)); 402c90e5d6SChris Mason kmem_cache_free(btrfs_transaction_cachep, transaction); 4179154b1bSChris Mason } 4278fae27eSChris Mason } 4379154b1bSChris Mason 44d352ac68SChris Mason /* 45d352ac68SChris Mason * either allocate a new transaction or hop into the existing one 46d352ac68SChris Mason */ 4780b6794dSChris Mason static noinline int join_transaction(struct btrfs_root *root) 4879154b1bSChris Mason { 4979154b1bSChris Mason struct btrfs_transaction *cur_trans; 5079154b1bSChris Mason cur_trans = root->fs_info->running_transaction; 5179154b1bSChris Mason if (!cur_trans) { 522c90e5d6SChris Mason cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 532c90e5d6SChris Mason GFP_NOFS); 5479154b1bSChris Mason BUG_ON(!cur_trans); 550f7d52f4SChris Mason root->fs_info->generation++; 56e18e4809SChris Mason root->fs_info->last_alloc = 0; 574529ba49SChris Mason root->fs_info->last_data_alloc = 0; 5815ee9bc7SJosef Bacik cur_trans->num_writers = 1; 5915ee9bc7SJosef Bacik cur_trans->num_joined = 0; 600f7d52f4SChris Mason cur_trans->transid = root->fs_info->generation; 6179154b1bSChris Mason init_waitqueue_head(&cur_trans->writer_wait); 6279154b1bSChris Mason init_waitqueue_head(&cur_trans->commit_wait); 6379154b1bSChris Mason cur_trans->in_commit = 0; 64f9295749SChris Mason cur_trans->blocked = 0; 65d5719762SChris Mason cur_trans->use_count = 1; 6679154b1bSChris Mason cur_trans->commit_done = 0; 6708607c1bSChris Mason cur_trans->start_time = get_seconds(); 6856bec294SChris Mason 6956bec294SChris Mason cur_trans->delayed_refs.root.rb_node = NULL; 7056bec294SChris Mason cur_trans->delayed_refs.num_entries = 0; 71c3e69d58SChris Mason cur_trans->delayed_refs.num_heads_ready = 0; 72c3e69d58SChris Mason cur_trans->delayed_refs.num_heads = 0; 7356bec294SChris Mason cur_trans->delayed_refs.flushing = 0; 74c3e69d58SChris Mason cur_trans->delayed_refs.run_delayed_start = 0; 7556bec294SChris Mason spin_lock_init(&cur_trans->delayed_refs.lock); 7656bec294SChris Mason 773063d29fSChris Mason INIT_LIST_HEAD(&cur_trans->pending_snapshots); 788fd17795SChris Mason list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 79d1310b2eSChris Mason extent_io_tree_init(&cur_trans->dirty_pages, 805f39d397SChris Mason root->fs_info->btree_inode->i_mapping, 815f39d397SChris Mason GFP_NOFS); 8248ec2cf8SChris Mason spin_lock(&root->fs_info->new_trans_lock); 8348ec2cf8SChris Mason root->fs_info->running_transaction = cur_trans; 8448ec2cf8SChris Mason spin_unlock(&root->fs_info->new_trans_lock); 8515ee9bc7SJosef Bacik } else { 8679154b1bSChris Mason cur_trans->num_writers++; 8715ee9bc7SJosef Bacik cur_trans->num_joined++; 8815ee9bc7SJosef Bacik } 8915ee9bc7SJosef Bacik 9079154b1bSChris Mason return 0; 9179154b1bSChris Mason } 9279154b1bSChris Mason 93d352ac68SChris Mason /* 94d397712bSChris Mason * this does all the record keeping required to make sure that a reference 95d397712bSChris Mason * counted root is properly recorded in a given transaction. This is required 96d397712bSChris Mason * to make sure the old root from before we joined the transaction is deleted 97d397712bSChris Mason * when the transaction commits 98d352ac68SChris Mason */ 99e02119d5SChris Mason noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 1006702ed49SChris Mason { 101f321e491SYan Zheng struct btrfs_dirty_root *dirty; 1026702ed49SChris Mason u64 running_trans_id = root->fs_info->running_transaction->transid; 1036702ed49SChris Mason if (root->ref_cows && root->last_trans < running_trans_id) { 1046702ed49SChris Mason WARN_ON(root == root->fs_info->extent_root); 1056702ed49SChris Mason if (root->root_item.refs != 0) { 1066702ed49SChris Mason radix_tree_tag_set(&root->fs_info->fs_roots_radix, 1076702ed49SChris Mason (unsigned long)root->root_key.objectid, 1086702ed49SChris Mason BTRFS_ROOT_TRANS_TAG); 10931153d81SYan Zheng 11031153d81SYan Zheng dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 11131153d81SYan Zheng BUG_ON(!dirty); 11231153d81SYan Zheng dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 11331153d81SYan Zheng BUG_ON(!dirty->root); 11431153d81SYan Zheng dirty->latest_root = root; 11531153d81SYan Zheng INIT_LIST_HEAD(&dirty->list); 11631153d81SYan Zheng 117925baeddSChris Mason root->commit_root = btrfs_root_node(root); 11831153d81SYan Zheng 11931153d81SYan Zheng memcpy(dirty->root, root, sizeof(*root)); 12031153d81SYan Zheng spin_lock_init(&dirty->root->node_lock); 121bcc63abbSYan spin_lock_init(&dirty->root->list_lock); 12231153d81SYan Zheng mutex_init(&dirty->root->objectid_mutex); 1235b21f2edSZheng Yan mutex_init(&dirty->root->log_mutex); 124bcc63abbSYan INIT_LIST_HEAD(&dirty->root->dead_list); 12531153d81SYan Zheng dirty->root->node = root->commit_root; 12631153d81SYan Zheng dirty->root->commit_root = NULL; 127bcc63abbSYan 128bcc63abbSYan spin_lock(&root->list_lock); 129bcc63abbSYan list_add(&dirty->root->dead_list, &root->dead_list); 130bcc63abbSYan spin_unlock(&root->list_lock); 131bcc63abbSYan 132bcc63abbSYan root->dirty_root = dirty; 1336702ed49SChris Mason } else { 1346702ed49SChris Mason WARN_ON(1); 1356702ed49SChris Mason } 1366702ed49SChris Mason root->last_trans = running_trans_id; 1376702ed49SChris Mason } 1386702ed49SChris Mason return 0; 1396702ed49SChris Mason } 1406702ed49SChris Mason 141d352ac68SChris Mason /* wait for commit against the current transaction to become unblocked 142d352ac68SChris Mason * when this is done, it is safe to start a new transaction, but the current 143d352ac68SChris Mason * transaction might not be fully on disk. 144d352ac68SChris Mason */ 14537d1aeeeSChris Mason static void wait_current_trans(struct btrfs_root *root) 14679154b1bSChris Mason { 147f9295749SChris Mason struct btrfs_transaction *cur_trans; 14879154b1bSChris Mason 149f9295749SChris Mason cur_trans = root->fs_info->running_transaction; 15037d1aeeeSChris Mason if (cur_trans && cur_trans->blocked) { 151f9295749SChris Mason DEFINE_WAIT(wait); 152f9295749SChris Mason cur_trans->use_count++; 153f9295749SChris Mason while (1) { 154f9295749SChris Mason prepare_to_wait(&root->fs_info->transaction_wait, &wait, 155f9295749SChris Mason TASK_UNINTERRUPTIBLE); 156f9295749SChris Mason if (cur_trans->blocked) { 157f9295749SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 158f9295749SChris Mason schedule(); 159f9295749SChris Mason mutex_lock(&root->fs_info->trans_mutex); 160f9295749SChris Mason finish_wait(&root->fs_info->transaction_wait, 161f9295749SChris Mason &wait); 162f9295749SChris Mason } else { 163f9295749SChris Mason finish_wait(&root->fs_info->transaction_wait, 164f9295749SChris Mason &wait); 165f9295749SChris Mason break; 166f9295749SChris Mason } 167f9295749SChris Mason } 168f9295749SChris Mason put_transaction(cur_trans); 169f9295749SChris Mason } 17037d1aeeeSChris Mason } 17137d1aeeeSChris Mason 172e02119d5SChris Mason static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 1739ca9ee09SSage Weil int num_blocks, int wait) 17437d1aeeeSChris Mason { 17537d1aeeeSChris Mason struct btrfs_trans_handle *h = 17637d1aeeeSChris Mason kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 17737d1aeeeSChris Mason int ret; 17837d1aeeeSChris Mason 17937d1aeeeSChris Mason mutex_lock(&root->fs_info->trans_mutex); 1804bef0848SChris Mason if (!root->fs_info->log_root_recovering && 1814bef0848SChris Mason ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 18237d1aeeeSChris Mason wait_current_trans(root); 18379154b1bSChris Mason ret = join_transaction(root); 18479154b1bSChris Mason BUG_ON(ret); 1850f7d52f4SChris Mason 186e02119d5SChris Mason btrfs_record_root_in_trans(root); 1876702ed49SChris Mason h->transid = root->fs_info->running_transaction->transid; 18879154b1bSChris Mason h->transaction = root->fs_info->running_transaction; 18979154b1bSChris Mason h->blocks_reserved = num_blocks; 19079154b1bSChris Mason h->blocks_used = 0; 191d2fb3437SYan Zheng h->block_group = 0; 19226b8003fSChris Mason h->alloc_exclude_nr = 0; 19326b8003fSChris Mason h->alloc_exclude_start = 0; 19456bec294SChris Mason h->delayed_ref_updates = 0; 195*b7ec40d7SChris Mason 19679154b1bSChris Mason root->fs_info->running_transaction->use_count++; 19779154b1bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 19879154b1bSChris Mason return h; 19979154b1bSChris Mason } 20079154b1bSChris Mason 201f9295749SChris Mason struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 202f9295749SChris Mason int num_blocks) 203f9295749SChris Mason { 2049ca9ee09SSage Weil return start_transaction(root, num_blocks, 1); 205f9295749SChris Mason } 206f9295749SChris Mason struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 207f9295749SChris Mason int num_blocks) 208f9295749SChris Mason { 2099ca9ee09SSage Weil return start_transaction(root, num_blocks, 0); 210f9295749SChris Mason } 211f9295749SChris Mason 2129ca9ee09SSage Weil struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 2139ca9ee09SSage Weil int num_blocks) 2149ca9ee09SSage Weil { 2159ca9ee09SSage Weil return start_transaction(r, num_blocks, 2); 2169ca9ee09SSage Weil } 2179ca9ee09SSage Weil 218d352ac68SChris Mason /* wait for a transaction commit to be fully complete */ 21989ce8a63SChris Mason static noinline int wait_for_commit(struct btrfs_root *root, 22089ce8a63SChris Mason struct btrfs_transaction *commit) 22189ce8a63SChris Mason { 22289ce8a63SChris Mason DEFINE_WAIT(wait); 22389ce8a63SChris Mason mutex_lock(&root->fs_info->trans_mutex); 22489ce8a63SChris Mason while (!commit->commit_done) { 22589ce8a63SChris Mason prepare_to_wait(&commit->commit_wait, &wait, 22689ce8a63SChris Mason TASK_UNINTERRUPTIBLE); 22789ce8a63SChris Mason if (commit->commit_done) 22889ce8a63SChris Mason break; 22989ce8a63SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 23089ce8a63SChris Mason schedule(); 23189ce8a63SChris Mason mutex_lock(&root->fs_info->trans_mutex); 23289ce8a63SChris Mason } 23389ce8a63SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 23489ce8a63SChris Mason finish_wait(&commit->commit_wait, &wait); 23589ce8a63SChris Mason return 0; 23689ce8a63SChris Mason } 23789ce8a63SChris Mason 238d352ac68SChris Mason /* 239d397712bSChris Mason * rate limit against the drop_snapshot code. This helps to slow down new 240d397712bSChris Mason * operations if the drop_snapshot code isn't able to keep up. 241d352ac68SChris Mason */ 24237d1aeeeSChris Mason static void throttle_on_drops(struct btrfs_root *root) 243ab78c84dSChris Mason { 244ab78c84dSChris Mason struct btrfs_fs_info *info = root->fs_info; 2452dd3e67bSChris Mason int harder_count = 0; 246ab78c84dSChris Mason 2472dd3e67bSChris Mason harder: 248ab78c84dSChris Mason if (atomic_read(&info->throttles)) { 249ab78c84dSChris Mason DEFINE_WAIT(wait); 250ab78c84dSChris Mason int thr; 251ab78c84dSChris Mason thr = atomic_read(&info->throttle_gen); 252ab78c84dSChris Mason 253ab78c84dSChris Mason do { 254ab78c84dSChris Mason prepare_to_wait(&info->transaction_throttle, 255ab78c84dSChris Mason &wait, TASK_UNINTERRUPTIBLE); 256ab78c84dSChris Mason if (!atomic_read(&info->throttles)) { 257ab78c84dSChris Mason finish_wait(&info->transaction_throttle, &wait); 258ab78c84dSChris Mason break; 259ab78c84dSChris Mason } 260ab78c84dSChris Mason schedule(); 261ab78c84dSChris Mason finish_wait(&info->transaction_throttle, &wait); 262ab78c84dSChris Mason } while (thr == atomic_read(&info->throttle_gen)); 2632dd3e67bSChris Mason harder_count++; 2642dd3e67bSChris Mason 2652dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 2662dd3e67bSChris Mason harder_count < 2) 2672dd3e67bSChris Mason goto harder; 2682dd3e67bSChris Mason 2692dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 2702dd3e67bSChris Mason harder_count < 10) 2712dd3e67bSChris Mason goto harder; 2722dd3e67bSChris Mason 2732dd3e67bSChris Mason if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 2742dd3e67bSChris Mason harder_count < 20) 2752dd3e67bSChris Mason goto harder; 276ab78c84dSChris Mason } 277ab78c84dSChris Mason } 278ab78c84dSChris Mason 27937d1aeeeSChris Mason void btrfs_throttle(struct btrfs_root *root) 28037d1aeeeSChris Mason { 28137d1aeeeSChris Mason mutex_lock(&root->fs_info->trans_mutex); 2829ca9ee09SSage Weil if (!root->fs_info->open_ioctl_trans) 28337d1aeeeSChris Mason wait_current_trans(root); 28437d1aeeeSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 28537d1aeeeSChris Mason throttle_on_drops(root); 28637d1aeeeSChris Mason } 28737d1aeeeSChris Mason 28889ce8a63SChris Mason static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 28989ce8a63SChris Mason struct btrfs_root *root, int throttle) 29079154b1bSChris Mason { 29179154b1bSChris Mason struct btrfs_transaction *cur_trans; 292ab78c84dSChris Mason struct btrfs_fs_info *info = root->fs_info; 293c3e69d58SChris Mason int count = 0; 294d6e4a428SChris Mason 295c3e69d58SChris Mason while (count < 4) { 296c3e69d58SChris Mason unsigned long cur = trans->delayed_ref_updates; 297c3e69d58SChris Mason trans->delayed_ref_updates = 0; 298c3e69d58SChris Mason if (cur && 299c3e69d58SChris Mason trans->transaction->delayed_refs.num_heads_ready > 64) { 300c3e69d58SChris Mason trans->delayed_ref_updates = 0; 301*b7ec40d7SChris Mason 302*b7ec40d7SChris Mason /* 303*b7ec40d7SChris Mason * do a full flush if the transaction is trying 304*b7ec40d7SChris Mason * to close 305*b7ec40d7SChris Mason */ 306*b7ec40d7SChris Mason if (trans->transaction->delayed_refs.flushing) 307*b7ec40d7SChris Mason cur = 0; 308c3e69d58SChris Mason btrfs_run_delayed_refs(trans, root, cur); 309c3e69d58SChris Mason } else { 310c3e69d58SChris Mason break; 311c3e69d58SChris Mason } 312c3e69d58SChris Mason count++; 31356bec294SChris Mason } 31456bec294SChris Mason 315ab78c84dSChris Mason mutex_lock(&info->trans_mutex); 316ab78c84dSChris Mason cur_trans = info->running_transaction; 317ccd467d6SChris Mason WARN_ON(cur_trans != trans->transaction); 318d5719762SChris Mason WARN_ON(cur_trans->num_writers < 1); 319ccd467d6SChris Mason cur_trans->num_writers--; 32089ce8a63SChris Mason 32179154b1bSChris Mason if (waitqueue_active(&cur_trans->writer_wait)) 32279154b1bSChris Mason wake_up(&cur_trans->writer_wait); 32379154b1bSChris Mason put_transaction(cur_trans); 324ab78c84dSChris Mason mutex_unlock(&info->trans_mutex); 325d6025579SChris Mason memset(trans, 0, sizeof(*trans)); 3262c90e5d6SChris Mason kmem_cache_free(btrfs_trans_handle_cachep, trans); 327ab78c84dSChris Mason 328ab78c84dSChris Mason if (throttle) 32937d1aeeeSChris Mason throttle_on_drops(root); 330ab78c84dSChris Mason 33179154b1bSChris Mason return 0; 33279154b1bSChris Mason } 33379154b1bSChris Mason 33489ce8a63SChris Mason int btrfs_end_transaction(struct btrfs_trans_handle *trans, 33589ce8a63SChris Mason struct btrfs_root *root) 33689ce8a63SChris Mason { 33789ce8a63SChris Mason return __btrfs_end_transaction(trans, root, 0); 33889ce8a63SChris Mason } 33989ce8a63SChris Mason 34089ce8a63SChris Mason int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 34189ce8a63SChris Mason struct btrfs_root *root) 34289ce8a63SChris Mason { 34389ce8a63SChris Mason return __btrfs_end_transaction(trans, root, 1); 34489ce8a63SChris Mason } 34589ce8a63SChris Mason 346d352ac68SChris Mason /* 347d352ac68SChris Mason * when btree blocks are allocated, they have some corresponding bits set for 348d352ac68SChris Mason * them in one of two extent_io trees. This is used to make sure all of 349d352ac68SChris Mason * those extents are on disk for transaction or log commit 350d352ac68SChris Mason */ 351d0c803c4SChris Mason int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 352d0c803c4SChris Mason struct extent_io_tree *dirty_pages) 35379154b1bSChris Mason { 3547c4452b9SChris Mason int ret; 355777e6bd7SChris Mason int err = 0; 3567c4452b9SChris Mason int werr = 0; 3577c4452b9SChris Mason struct page *page; 3587c4452b9SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 359777e6bd7SChris Mason u64 start = 0; 3605f39d397SChris Mason u64 end; 3615f39d397SChris Mason unsigned long index; 3627c4452b9SChris Mason 3637c4452b9SChris Mason while (1) { 364777e6bd7SChris Mason ret = find_first_extent_bit(dirty_pages, start, &start, &end, 3655f39d397SChris Mason EXTENT_DIRTY); 3665f39d397SChris Mason if (ret) 3677c4452b9SChris Mason break; 3685f39d397SChris Mason while (start <= end) { 369777e6bd7SChris Mason cond_resched(); 370777e6bd7SChris Mason 3715f39d397SChris Mason index = start >> PAGE_CACHE_SHIFT; 37235ebb934SChris Mason start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 3734bef0848SChris Mason page = find_get_page(btree_inode->i_mapping, index); 3747c4452b9SChris Mason if (!page) 3757c4452b9SChris Mason continue; 3764bef0848SChris Mason 3774bef0848SChris Mason btree_lock_page_hook(page); 3784bef0848SChris Mason if (!page->mapping) { 3794bef0848SChris Mason unlock_page(page); 3804bef0848SChris Mason page_cache_release(page); 3814bef0848SChris Mason continue; 3824bef0848SChris Mason } 3834bef0848SChris Mason 3846702ed49SChris Mason if (PageWriteback(page)) { 3856702ed49SChris Mason if (PageDirty(page)) 3866702ed49SChris Mason wait_on_page_writeback(page); 3876702ed49SChris Mason else { 3886702ed49SChris Mason unlock_page(page); 3896702ed49SChris Mason page_cache_release(page); 3906702ed49SChris Mason continue; 3916702ed49SChris Mason } 3926702ed49SChris Mason } 3937c4452b9SChris Mason err = write_one_page(page, 0); 3947c4452b9SChris Mason if (err) 3957c4452b9SChris Mason werr = err; 3967c4452b9SChris Mason page_cache_release(page); 3977c4452b9SChris Mason } 3987c4452b9SChris Mason } 399777e6bd7SChris Mason while (1) { 400777e6bd7SChris Mason ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 401777e6bd7SChris Mason EXTENT_DIRTY); 402777e6bd7SChris Mason if (ret) 403777e6bd7SChris Mason break; 404777e6bd7SChris Mason 405777e6bd7SChris Mason clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 406777e6bd7SChris Mason while (start <= end) { 407777e6bd7SChris Mason index = start >> PAGE_CACHE_SHIFT; 408777e6bd7SChris Mason start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 409777e6bd7SChris Mason page = find_get_page(btree_inode->i_mapping, index); 410777e6bd7SChris Mason if (!page) 411777e6bd7SChris Mason continue; 412777e6bd7SChris Mason if (PageDirty(page)) { 4134bef0848SChris Mason btree_lock_page_hook(page); 4144bef0848SChris Mason wait_on_page_writeback(page); 415777e6bd7SChris Mason err = write_one_page(page, 0); 416777e6bd7SChris Mason if (err) 417777e6bd7SChris Mason werr = err; 418777e6bd7SChris Mason } 419777e6bd7SChris Mason wait_on_page_writeback(page); 420777e6bd7SChris Mason page_cache_release(page); 421777e6bd7SChris Mason cond_resched(); 422777e6bd7SChris Mason } 423777e6bd7SChris Mason } 4247c4452b9SChris Mason if (err) 4257c4452b9SChris Mason werr = err; 4267c4452b9SChris Mason return werr; 42779154b1bSChris Mason } 42879154b1bSChris Mason 429d0c803c4SChris Mason int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 430d0c803c4SChris Mason struct btrfs_root *root) 431d0c803c4SChris Mason { 432d0c803c4SChris Mason if (!trans || !trans->transaction) { 433d0c803c4SChris Mason struct inode *btree_inode; 434d0c803c4SChris Mason btree_inode = root->fs_info->btree_inode; 435d0c803c4SChris Mason return filemap_write_and_wait(btree_inode->i_mapping); 436d0c803c4SChris Mason } 437d0c803c4SChris Mason return btrfs_write_and_wait_marked_extents(root, 438d0c803c4SChris Mason &trans->transaction->dirty_pages); 439d0c803c4SChris Mason } 440d0c803c4SChris Mason 441d352ac68SChris Mason /* 442d352ac68SChris Mason * this is used to update the root pointer in the tree of tree roots. 443d352ac68SChris Mason * 444d352ac68SChris Mason * But, in the case of the extent allocation tree, updating the root 445d352ac68SChris Mason * pointer may allocate blocks which may change the root of the extent 446d352ac68SChris Mason * allocation tree. 447d352ac68SChris Mason * 448d352ac68SChris Mason * So, this loops and repeats and makes sure the cowonly root didn't 449d352ac68SChris Mason * change while the root pointer was being updated in the metadata. 450d352ac68SChris Mason */ 4510b86a832SChris Mason static int update_cowonly_root(struct btrfs_trans_handle *trans, 45279154b1bSChris Mason struct btrfs_root *root) 45379154b1bSChris Mason { 45479154b1bSChris Mason int ret; 4550b86a832SChris Mason u64 old_root_bytenr; 4560b86a832SChris Mason struct btrfs_root *tree_root = root->fs_info->tree_root; 45779154b1bSChris Mason 4580b86a832SChris Mason btrfs_write_dirty_block_groups(trans, root); 45956bec294SChris Mason 46056bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 46156bec294SChris Mason BUG_ON(ret); 46287ef2bb4SChris Mason 46379154b1bSChris Mason while (1) { 4640b86a832SChris Mason old_root_bytenr = btrfs_root_bytenr(&root->root_item); 4650b86a832SChris Mason if (old_root_bytenr == root->node->start) 46679154b1bSChris Mason break; 4670b86a832SChris Mason btrfs_set_root_bytenr(&root->root_item, 4680b86a832SChris Mason root->node->start); 4690b86a832SChris Mason btrfs_set_root_level(&root->root_item, 4700b86a832SChris Mason btrfs_header_level(root->node)); 47184234f3aSYan Zheng btrfs_set_root_generation(&root->root_item, trans->transid); 47287ef2bb4SChris Mason 47379154b1bSChris Mason ret = btrfs_update_root(trans, tree_root, 4740b86a832SChris Mason &root->root_key, 4750b86a832SChris Mason &root->root_item); 47679154b1bSChris Mason BUG_ON(ret); 4770b86a832SChris Mason btrfs_write_dirty_block_groups(trans, root); 47856bec294SChris Mason 47956bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 48056bec294SChris Mason BUG_ON(ret); 4810b86a832SChris Mason } 4820b86a832SChris Mason return 0; 4830b86a832SChris Mason } 4840b86a832SChris Mason 485d352ac68SChris Mason /* 486d352ac68SChris Mason * update all the cowonly tree roots on disk 487d352ac68SChris Mason */ 4880b86a832SChris Mason int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 4890b86a832SChris Mason struct btrfs_root *root) 4900b86a832SChris Mason { 4910b86a832SChris Mason struct btrfs_fs_info *fs_info = root->fs_info; 4920b86a832SChris Mason struct list_head *next; 49384234f3aSYan Zheng struct extent_buffer *eb; 49456bec294SChris Mason int ret; 49584234f3aSYan Zheng 49656bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 49756bec294SChris Mason BUG_ON(ret); 49887ef2bb4SChris Mason 49984234f3aSYan Zheng eb = btrfs_lock_root_node(fs_info->tree_root); 5009fa8cfe7SChris Mason btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 50184234f3aSYan Zheng btrfs_tree_unlock(eb); 50284234f3aSYan Zheng free_extent_buffer(eb); 5030b86a832SChris Mason 50456bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 50556bec294SChris Mason BUG_ON(ret); 50687ef2bb4SChris Mason 5070b86a832SChris Mason while (!list_empty(&fs_info->dirty_cowonly_roots)) { 5080b86a832SChris Mason next = fs_info->dirty_cowonly_roots.next; 5090b86a832SChris Mason list_del_init(next); 5100b86a832SChris Mason root = list_entry(next, struct btrfs_root, dirty_list); 51187ef2bb4SChris Mason 5120b86a832SChris Mason update_cowonly_root(trans, root); 51356bec294SChris Mason 51456bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 51556bec294SChris Mason BUG_ON(ret); 51679154b1bSChris Mason } 51779154b1bSChris Mason return 0; 51879154b1bSChris Mason } 51979154b1bSChris Mason 520d352ac68SChris Mason /* 521d352ac68SChris Mason * dead roots are old snapshots that need to be deleted. This allocates 522d352ac68SChris Mason * a dirty root struct and adds it into the list of dead roots that need to 523d352ac68SChris Mason * be deleted 524d352ac68SChris Mason */ 525b48652c1SYan Zheng int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 5265eda7b5eSChris Mason { 527f321e491SYan Zheng struct btrfs_dirty_root *dirty; 5285eda7b5eSChris Mason 5295eda7b5eSChris Mason dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 5305eda7b5eSChris Mason if (!dirty) 5315eda7b5eSChris Mason return -ENOMEM; 5325eda7b5eSChris Mason dirty->root = root; 5335ce14bbcSChris Mason dirty->latest_root = latest; 534b48652c1SYan Zheng 535b48652c1SYan Zheng mutex_lock(&root->fs_info->trans_mutex); 536b48652c1SYan Zheng list_add(&dirty->list, &latest->fs_info->dead_roots); 537b48652c1SYan Zheng mutex_unlock(&root->fs_info->trans_mutex); 5385eda7b5eSChris Mason return 0; 5395eda7b5eSChris Mason } 5405eda7b5eSChris Mason 541d352ac68SChris Mason /* 542d352ac68SChris Mason * at transaction commit time we need to schedule the old roots for 543d352ac68SChris Mason * deletion via btrfs_drop_snapshot. This runs through all the 544d352ac68SChris Mason * reference counted roots that were modified in the current 545d352ac68SChris Mason * transaction and puts them into the drop list 546d352ac68SChris Mason */ 54780b6794dSChris Mason static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 54835b7e476SChris Mason struct radix_tree_root *radix, 54935b7e476SChris Mason struct list_head *list) 5500f7d52f4SChris Mason { 551f321e491SYan Zheng struct btrfs_dirty_root *dirty; 5520f7d52f4SChris Mason struct btrfs_root *gang[8]; 5530f7d52f4SChris Mason struct btrfs_root *root; 5540f7d52f4SChris Mason int i; 5550f7d52f4SChris Mason int ret; 55654aa1f4dSChris Mason int err = 0; 5575eda7b5eSChris Mason u32 refs; 55854aa1f4dSChris Mason 5590f7d52f4SChris Mason while (1) { 5600f7d52f4SChris Mason ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 5610f7d52f4SChris Mason ARRAY_SIZE(gang), 5620f7d52f4SChris Mason BTRFS_ROOT_TRANS_TAG); 5630f7d52f4SChris Mason if (ret == 0) 5640f7d52f4SChris Mason break; 5650f7d52f4SChris Mason for (i = 0; i < ret; i++) { 5660f7d52f4SChris Mason root = gang[i]; 5672619ba1fSChris Mason radix_tree_tag_clear(radix, 5682619ba1fSChris Mason (unsigned long)root->root_key.objectid, 5690f7d52f4SChris Mason BTRFS_ROOT_TRANS_TAG); 57031153d81SYan Zheng 57131153d81SYan Zheng BUG_ON(!root->ref_tree); 572017e5369SChris Mason dirty = root->dirty_root; 57331153d81SYan Zheng 574e02119d5SChris Mason btrfs_free_log(trans, root); 575f82d02d9SYan Zheng btrfs_free_reloc_root(trans, root); 576e02119d5SChris Mason 5770f7d52f4SChris Mason if (root->commit_root == root->node) { 578db94535dSChris Mason WARN_ON(root->node->start != 579db94535dSChris Mason btrfs_root_bytenr(&root->root_item)); 58031153d81SYan Zheng 5815f39d397SChris Mason free_extent_buffer(root->commit_root); 5820f7d52f4SChris Mason root->commit_root = NULL; 5837ea394f1SYan Zheng root->dirty_root = NULL; 58431153d81SYan Zheng 585bcc63abbSYan spin_lock(&root->list_lock); 586bcc63abbSYan list_del_init(&dirty->root->dead_list); 587bcc63abbSYan spin_unlock(&root->list_lock); 588bcc63abbSYan 58931153d81SYan Zheng kfree(dirty->root); 59031153d81SYan Zheng kfree(dirty); 59158176a96SJosef Bacik 59258176a96SJosef Bacik /* make sure to update the root on disk 59358176a96SJosef Bacik * so we get any updates to the block used 59458176a96SJosef Bacik * counts 59558176a96SJosef Bacik */ 59658176a96SJosef Bacik err = btrfs_update_root(trans, 59758176a96SJosef Bacik root->fs_info->tree_root, 59858176a96SJosef Bacik &root->root_key, 59958176a96SJosef Bacik &root->root_item); 6000f7d52f4SChris Mason continue; 6010f7d52f4SChris Mason } 6029f3a7427SChris Mason 6039f3a7427SChris Mason memset(&root->root_item.drop_progress, 0, 6049f3a7427SChris Mason sizeof(struct btrfs_disk_key)); 6059f3a7427SChris Mason root->root_item.drop_level = 0; 6060f7d52f4SChris Mason root->commit_root = NULL; 6077ea394f1SYan Zheng root->dirty_root = NULL; 6080f7d52f4SChris Mason root->root_key.offset = root->fs_info->generation; 609db94535dSChris Mason btrfs_set_root_bytenr(&root->root_item, 610db94535dSChris Mason root->node->start); 611db94535dSChris Mason btrfs_set_root_level(&root->root_item, 612db94535dSChris Mason btrfs_header_level(root->node)); 61384234f3aSYan Zheng btrfs_set_root_generation(&root->root_item, 61484234f3aSYan Zheng root->root_key.offset); 61584234f3aSYan Zheng 6160f7d52f4SChris Mason err = btrfs_insert_root(trans, root->fs_info->tree_root, 6170f7d52f4SChris Mason &root->root_key, 6180f7d52f4SChris Mason &root->root_item); 61954aa1f4dSChris Mason if (err) 62054aa1f4dSChris Mason break; 6219f3a7427SChris Mason 6229f3a7427SChris Mason refs = btrfs_root_refs(&dirty->root->root_item); 6239f3a7427SChris Mason btrfs_set_root_refs(&dirty->root->root_item, refs - 1); 6245eda7b5eSChris Mason err = btrfs_update_root(trans, root->fs_info->tree_root, 6259f3a7427SChris Mason &dirty->root->root_key, 6269f3a7427SChris Mason &dirty->root->root_item); 6275eda7b5eSChris Mason 6285eda7b5eSChris Mason BUG_ON(err); 6299f3a7427SChris Mason if (refs == 1) { 6300f7d52f4SChris Mason list_add(&dirty->list, list); 6319f3a7427SChris Mason } else { 6329f3a7427SChris Mason WARN_ON(1); 63331153d81SYan Zheng free_extent_buffer(dirty->root->node); 6349f3a7427SChris Mason kfree(dirty->root); 6355eda7b5eSChris Mason kfree(dirty); 6360f7d52f4SChris Mason } 6370f7d52f4SChris Mason } 6389f3a7427SChris Mason } 63954aa1f4dSChris Mason return err; 6400f7d52f4SChris Mason } 6410f7d52f4SChris Mason 642d352ac68SChris Mason /* 643d352ac68SChris Mason * defrag a given btree. If cacheonly == 1, this won't read from the disk, 644d352ac68SChris Mason * otherwise every leaf in the btree is read and defragged. 645d352ac68SChris Mason */ 646e9d0b13bSChris Mason int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 647e9d0b13bSChris Mason { 648e9d0b13bSChris Mason struct btrfs_fs_info *info = root->fs_info; 649e9d0b13bSChris Mason int ret; 650e9d0b13bSChris Mason struct btrfs_trans_handle *trans; 651d3c2fdcfSChris Mason unsigned long nr; 652e9d0b13bSChris Mason 653a2135011SChris Mason smp_mb(); 654e9d0b13bSChris Mason if (root->defrag_running) 655e9d0b13bSChris Mason return 0; 656e9d0b13bSChris Mason trans = btrfs_start_transaction(root, 1); 6576b80053dSChris Mason while (1) { 658e9d0b13bSChris Mason root->defrag_running = 1; 659e9d0b13bSChris Mason ret = btrfs_defrag_leaves(trans, root, cacheonly); 660d3c2fdcfSChris Mason nr = trans->blocks_used; 661e9d0b13bSChris Mason btrfs_end_transaction(trans, root); 662d3c2fdcfSChris Mason btrfs_btree_balance_dirty(info->tree_root, nr); 663e9d0b13bSChris Mason cond_resched(); 664e9d0b13bSChris Mason 665e9d0b13bSChris Mason trans = btrfs_start_transaction(root, 1); 6663f157a2fSChris Mason if (root->fs_info->closing || ret != -EAGAIN) 667e9d0b13bSChris Mason break; 668e9d0b13bSChris Mason } 669e9d0b13bSChris Mason root->defrag_running = 0; 670a2135011SChris Mason smp_mb(); 671e9d0b13bSChris Mason btrfs_end_transaction(trans, root); 672e9d0b13bSChris Mason return 0; 673e9d0b13bSChris Mason } 674e9d0b13bSChris Mason 675d352ac68SChris Mason /* 676*b7ec40d7SChris Mason * when dropping snapshots, we generate a ton of delayed refs, and it makes 677*b7ec40d7SChris Mason * sense not to join the transaction while it is trying to flush the current 678*b7ec40d7SChris Mason * queue of delayed refs out. 679*b7ec40d7SChris Mason * 680*b7ec40d7SChris Mason * This is used by the drop snapshot code only 681*b7ec40d7SChris Mason */ 682*b7ec40d7SChris Mason static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) 683*b7ec40d7SChris Mason { 684*b7ec40d7SChris Mason DEFINE_WAIT(wait); 685*b7ec40d7SChris Mason 686*b7ec40d7SChris Mason mutex_lock(&info->trans_mutex); 687*b7ec40d7SChris Mason while (info->running_transaction && 688*b7ec40d7SChris Mason info->running_transaction->delayed_refs.flushing) { 689*b7ec40d7SChris Mason prepare_to_wait(&info->transaction_wait, &wait, 690*b7ec40d7SChris Mason TASK_UNINTERRUPTIBLE); 691*b7ec40d7SChris Mason mutex_unlock(&info->trans_mutex); 692*b7ec40d7SChris Mason schedule(); 693*b7ec40d7SChris Mason mutex_lock(&info->trans_mutex); 694*b7ec40d7SChris Mason finish_wait(&info->transaction_wait, &wait); 695*b7ec40d7SChris Mason } 696*b7ec40d7SChris Mason mutex_unlock(&info->trans_mutex); 697*b7ec40d7SChris Mason return 0; 698*b7ec40d7SChris Mason } 699*b7ec40d7SChris Mason 700*b7ec40d7SChris Mason /* 701d352ac68SChris Mason * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 702d352ac68SChris Mason * all of them 703d352ac68SChris Mason */ 70480b6794dSChris Mason static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 70535b7e476SChris Mason struct list_head *list) 7060f7d52f4SChris Mason { 707f321e491SYan Zheng struct btrfs_dirty_root *dirty; 7080f7d52f4SChris Mason struct btrfs_trans_handle *trans; 709d3c2fdcfSChris Mason unsigned long nr; 710db94535dSChris Mason u64 num_bytes; 711db94535dSChris Mason u64 bytes_used; 712bcc63abbSYan u64 max_useless; 71354aa1f4dSChris Mason int ret = 0; 7149f3a7427SChris Mason int err; 7159f3a7427SChris Mason 7160f7d52f4SChris Mason while (!list_empty(list)) { 71758176a96SJosef Bacik struct btrfs_root *root; 71858176a96SJosef Bacik 719f321e491SYan Zheng dirty = list_entry(list->prev, struct btrfs_dirty_root, list); 7200f7d52f4SChris Mason list_del_init(&dirty->list); 7215eda7b5eSChris Mason 722db94535dSChris Mason num_bytes = btrfs_root_used(&dirty->root->root_item); 72358176a96SJosef Bacik root = dirty->latest_root; 724a2135011SChris Mason atomic_inc(&root->fs_info->throttles); 72558176a96SJosef Bacik 7269f3a7427SChris Mason while (1) { 727*b7ec40d7SChris Mason /* 728*b7ec40d7SChris Mason * we don't want to jump in and create a bunch of 729*b7ec40d7SChris Mason * delayed refs if the transaction is starting to close 730*b7ec40d7SChris Mason */ 731*b7ec40d7SChris Mason wait_transaction_pre_flush(tree_root->fs_info); 7320f7d52f4SChris Mason trans = btrfs_start_transaction(tree_root, 1); 733*b7ec40d7SChris Mason 734*b7ec40d7SChris Mason /* 735*b7ec40d7SChris Mason * we've joined a transaction, make sure it isn't 736*b7ec40d7SChris Mason * closing right now 737*b7ec40d7SChris Mason */ 738*b7ec40d7SChris Mason if (trans->transaction->delayed_refs.flushing) { 739*b7ec40d7SChris Mason btrfs_end_transaction(trans, tree_root); 740*b7ec40d7SChris Mason continue; 741*b7ec40d7SChris Mason } 742*b7ec40d7SChris Mason 7435b21f2edSZheng Yan mutex_lock(&root->fs_info->drop_mutex); 7449f3a7427SChris Mason ret = btrfs_drop_snapshot(trans, dirty->root); 745d397712bSChris Mason if (ret != -EAGAIN) 7469f3a7427SChris Mason break; 7475b21f2edSZheng Yan mutex_unlock(&root->fs_info->drop_mutex); 74858176a96SJosef Bacik 7499f3a7427SChris Mason err = btrfs_update_root(trans, 7509f3a7427SChris Mason tree_root, 7519f3a7427SChris Mason &dirty->root->root_key, 7529f3a7427SChris Mason &dirty->root->root_item); 7539f3a7427SChris Mason if (err) 7549f3a7427SChris Mason ret = err; 755d3c2fdcfSChris Mason nr = trans->blocks_used; 756017e5369SChris Mason ret = btrfs_end_transaction(trans, tree_root); 7570f7d52f4SChris Mason BUG_ON(ret); 758a2135011SChris Mason 759d3c2fdcfSChris Mason btrfs_btree_balance_dirty(tree_root, nr); 7604dc11904SChris Mason cond_resched(); 7619f3a7427SChris Mason } 7629f3a7427SChris Mason BUG_ON(ret); 763a2135011SChris Mason atomic_dec(&root->fs_info->throttles); 764017e5369SChris Mason wake_up(&root->fs_info->transaction_throttle); 76558176a96SJosef Bacik 766db94535dSChris Mason num_bytes -= btrfs_root_used(&dirty->root->root_item); 767db94535dSChris Mason bytes_used = btrfs_root_used(&root->root_item); 768db94535dSChris Mason if (num_bytes) { 76924562425SYan Zheng mutex_lock(&root->fs_info->trans_mutex); 770e02119d5SChris Mason btrfs_record_root_in_trans(root); 77124562425SYan Zheng mutex_unlock(&root->fs_info->trans_mutex); 7725f39d397SChris Mason btrfs_set_root_used(&root->root_item, 773db94535dSChris Mason bytes_used - num_bytes); 77458176a96SJosef Bacik } 775a2135011SChris Mason 7769f3a7427SChris Mason ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 77758176a96SJosef Bacik if (ret) { 77858176a96SJosef Bacik BUG(); 77954aa1f4dSChris Mason break; 78058176a96SJosef Bacik } 781a2135011SChris Mason mutex_unlock(&root->fs_info->drop_mutex); 782a2135011SChris Mason 783bcc63abbSYan spin_lock(&root->list_lock); 784bcc63abbSYan list_del_init(&dirty->root->dead_list); 785bcc63abbSYan if (!list_empty(&root->dead_list)) { 786bcc63abbSYan struct btrfs_root *oldest; 787bcc63abbSYan oldest = list_entry(root->dead_list.prev, 788bcc63abbSYan struct btrfs_root, dead_list); 789bcc63abbSYan max_useless = oldest->root_key.offset - 1; 790bcc63abbSYan } else { 791bcc63abbSYan max_useless = root->root_key.offset - 1; 792bcc63abbSYan } 793bcc63abbSYan spin_unlock(&root->list_lock); 794bcc63abbSYan 795d3c2fdcfSChris Mason nr = trans->blocks_used; 7960f7d52f4SChris Mason ret = btrfs_end_transaction(trans, tree_root); 7970f7d52f4SChris Mason BUG_ON(ret); 7985eda7b5eSChris Mason 799e4657689SZheng Yan ret = btrfs_remove_leaf_refs(root, max_useless, 0); 800bcc63abbSYan BUG_ON(ret); 801bcc63abbSYan 802f510cfecSChris Mason free_extent_buffer(dirty->root->node); 8035eda7b5eSChris Mason kfree(dirty->root); 8040f7d52f4SChris Mason kfree(dirty); 805d3c2fdcfSChris Mason 806d3c2fdcfSChris Mason btrfs_btree_balance_dirty(tree_root, nr); 8074dc11904SChris Mason cond_resched(); 8080f7d52f4SChris Mason } 80954aa1f4dSChris Mason return ret; 8100f7d52f4SChris Mason } 8110f7d52f4SChris Mason 812d352ac68SChris Mason /* 813d352ac68SChris Mason * new snapshots need to be created at a very specific time in the 814d352ac68SChris Mason * transaction commit. This does the actual creation 815d352ac68SChris Mason */ 81680b6794dSChris Mason static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 8173063d29fSChris Mason struct btrfs_fs_info *fs_info, 8183063d29fSChris Mason struct btrfs_pending_snapshot *pending) 8193063d29fSChris Mason { 8203063d29fSChris Mason struct btrfs_key key; 82180b6794dSChris Mason struct btrfs_root_item *new_root_item; 8223063d29fSChris Mason struct btrfs_root *tree_root = fs_info->tree_root; 8233063d29fSChris Mason struct btrfs_root *root = pending->root; 8243063d29fSChris Mason struct extent_buffer *tmp; 825925baeddSChris Mason struct extent_buffer *old; 8263063d29fSChris Mason int ret; 8273063d29fSChris Mason u64 objectid; 8283063d29fSChris Mason 82980b6794dSChris Mason new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 83080b6794dSChris Mason if (!new_root_item) { 83180b6794dSChris Mason ret = -ENOMEM; 83280b6794dSChris Mason goto fail; 83380b6794dSChris Mason } 8343063d29fSChris Mason ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 8353063d29fSChris Mason if (ret) 8363063d29fSChris Mason goto fail; 8373063d29fSChris Mason 83880ff3856SYan Zheng btrfs_record_root_in_trans(root); 83980ff3856SYan Zheng btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 84080b6794dSChris Mason memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 8413063d29fSChris Mason 8423063d29fSChris Mason key.objectid = objectid; 8435b21f2edSZheng Yan key.offset = trans->transid; 8443063d29fSChris Mason btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 8453063d29fSChris Mason 846925baeddSChris Mason old = btrfs_lock_root_node(root); 8479fa8cfe7SChris Mason btrfs_cow_block(trans, root, old, NULL, 0, &old); 8483063d29fSChris Mason 849925baeddSChris Mason btrfs_copy_root(trans, root, old, &tmp, objectid); 850925baeddSChris Mason btrfs_tree_unlock(old); 851925baeddSChris Mason free_extent_buffer(old); 8523063d29fSChris Mason 85380b6794dSChris Mason btrfs_set_root_bytenr(new_root_item, tmp->start); 85480b6794dSChris Mason btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); 85584234f3aSYan Zheng btrfs_set_root_generation(new_root_item, trans->transid); 8563063d29fSChris Mason ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 85780b6794dSChris Mason new_root_item); 858925baeddSChris Mason btrfs_tree_unlock(tmp); 8593063d29fSChris Mason free_extent_buffer(tmp); 8603063d29fSChris Mason if (ret) 8613063d29fSChris Mason goto fail; 8623063d29fSChris Mason 8633de4586cSChris Mason key.offset = (u64)-1; 8643de4586cSChris Mason memcpy(&pending->root_key, &key, sizeof(key)); 8653de4586cSChris Mason fail: 8663de4586cSChris Mason kfree(new_root_item); 8673de4586cSChris Mason return ret; 8683de4586cSChris Mason } 8693de4586cSChris Mason 8703de4586cSChris Mason static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, 8713de4586cSChris Mason struct btrfs_pending_snapshot *pending) 8723de4586cSChris Mason { 8733de4586cSChris Mason int ret; 8743de4586cSChris Mason int namelen; 8753de4586cSChris Mason u64 index = 0; 8763de4586cSChris Mason struct btrfs_trans_handle *trans; 8773de4586cSChris Mason struct inode *parent_inode; 8783de4586cSChris Mason struct inode *inode; 8790660b5afSChris Mason struct btrfs_root *parent_root; 8803de4586cSChris Mason 8813394e160SChris Mason parent_inode = pending->dentry->d_parent->d_inode; 8820660b5afSChris Mason parent_root = BTRFS_I(parent_inode)->root; 883180591bcSYan Zheng trans = btrfs_join_transaction(parent_root, 1); 8843de4586cSChris Mason 8853063d29fSChris Mason /* 8863063d29fSChris Mason * insert the directory item 8873063d29fSChris Mason */ 8883b96362cSSven Wegener namelen = strlen(pending->name); 8893de4586cSChris Mason ret = btrfs_set_inode_index(parent_inode, &index); 8900660b5afSChris Mason ret = btrfs_insert_dir_item(trans, parent_root, 8913b96362cSSven Wegener pending->name, namelen, 8923de4586cSChris Mason parent_inode->i_ino, 8933de4586cSChris Mason &pending->root_key, BTRFS_FT_DIR, index); 8943063d29fSChris Mason 8953063d29fSChris Mason if (ret) 8963063d29fSChris Mason goto fail; 8970660b5afSChris Mason 89852c26179SYan Zheng btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 89952c26179SYan Zheng ret = btrfs_update_inode(trans, parent_root, parent_inode); 90052c26179SYan Zheng BUG_ON(ret); 90152c26179SYan Zheng 9020660b5afSChris Mason /* add the backref first */ 9030660b5afSChris Mason ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 9040660b5afSChris Mason pending->root_key.objectid, 9050660b5afSChris Mason BTRFS_ROOT_BACKREF_KEY, 9060660b5afSChris Mason parent_root->root_key.objectid, 9070660b5afSChris Mason parent_inode->i_ino, index, pending->name, 9080660b5afSChris Mason namelen); 9090660b5afSChris Mason 9100660b5afSChris Mason BUG_ON(ret); 9110660b5afSChris Mason 9120660b5afSChris Mason /* now add the forward ref */ 9130660b5afSChris Mason ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 9140660b5afSChris Mason parent_root->root_key.objectid, 9150660b5afSChris Mason BTRFS_ROOT_REF_KEY, 9160660b5afSChris Mason pending->root_key.objectid, 9170660b5afSChris Mason parent_inode->i_ino, index, pending->name, 9180660b5afSChris Mason namelen); 9190660b5afSChris Mason 9203de4586cSChris Mason inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 9213de4586cSChris Mason d_instantiate(pending->dentry, inode); 9223063d29fSChris Mason fail: 9233de4586cSChris Mason btrfs_end_transaction(trans, fs_info->fs_root); 9243063d29fSChris Mason return ret; 9253063d29fSChris Mason } 9263063d29fSChris Mason 927d352ac68SChris Mason /* 928d352ac68SChris Mason * create all the snapshots we've scheduled for creation 929d352ac68SChris Mason */ 93080b6794dSChris Mason static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 9313063d29fSChris Mason struct btrfs_fs_info *fs_info) 9323063d29fSChris Mason { 9333063d29fSChris Mason struct btrfs_pending_snapshot *pending; 9343063d29fSChris Mason struct list_head *head = &trans->transaction->pending_snapshots; 9353de4586cSChris Mason int ret; 9363de4586cSChris Mason 937c6e30871SQinghuang Feng list_for_each_entry(pending, head, list) { 9383de4586cSChris Mason ret = create_pending_snapshot(trans, fs_info, pending); 9393de4586cSChris Mason BUG_ON(ret); 9403de4586cSChris Mason } 9413de4586cSChris Mason return 0; 9423de4586cSChris Mason } 9433de4586cSChris Mason 9443de4586cSChris Mason static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, 9453de4586cSChris Mason struct btrfs_fs_info *fs_info) 9463de4586cSChris Mason { 9473de4586cSChris Mason struct btrfs_pending_snapshot *pending; 9483de4586cSChris Mason struct list_head *head = &trans->transaction->pending_snapshots; 9493063d29fSChris Mason int ret; 9503063d29fSChris Mason 9513063d29fSChris Mason while (!list_empty(head)) { 9523063d29fSChris Mason pending = list_entry(head->next, 9533063d29fSChris Mason struct btrfs_pending_snapshot, list); 9543de4586cSChris Mason ret = finish_pending_snapshot(fs_info, pending); 9553063d29fSChris Mason BUG_ON(ret); 9563063d29fSChris Mason list_del(&pending->list); 9573063d29fSChris Mason kfree(pending->name); 9583063d29fSChris Mason kfree(pending); 9593063d29fSChris Mason } 960dc17ff8fSChris Mason return 0; 961dc17ff8fSChris Mason } 962dc17ff8fSChris Mason 96379154b1bSChris Mason int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 96479154b1bSChris Mason struct btrfs_root *root) 96579154b1bSChris Mason { 96615ee9bc7SJosef Bacik unsigned long joined = 0; 96715ee9bc7SJosef Bacik unsigned long timeout = 1; 96879154b1bSChris Mason struct btrfs_transaction *cur_trans; 9698fd17795SChris Mason struct btrfs_transaction *prev_trans = NULL; 9700b86a832SChris Mason struct btrfs_root *chunk_root = root->fs_info->chunk_root; 9710f7d52f4SChris Mason struct list_head dirty_fs_roots; 972d1310b2eSChris Mason struct extent_io_tree *pinned_copy; 97379154b1bSChris Mason DEFINE_WAIT(wait); 97415ee9bc7SJosef Bacik int ret; 97579154b1bSChris Mason 97656bec294SChris Mason /* make a pass through all the delayed refs we have so far 97756bec294SChris Mason * any runnings procs may add more while we are here 97856bec294SChris Mason */ 97956bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, 0); 98056bec294SChris Mason BUG_ON(ret); 98156bec294SChris Mason 982*b7ec40d7SChris Mason cur_trans = trans->transaction; 98356bec294SChris Mason /* 98456bec294SChris Mason * set the flushing flag so procs in this transaction have to 98556bec294SChris Mason * start sending their work down. 98656bec294SChris Mason */ 987*b7ec40d7SChris Mason cur_trans->delayed_refs.flushing = 1; 98856bec294SChris Mason 989c3e69d58SChris Mason ret = btrfs_run_delayed_refs(trans, root, 0); 99056bec294SChris Mason BUG_ON(ret); 99156bec294SChris Mason 99279154b1bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 993*b7ec40d7SChris Mason INIT_LIST_HEAD(&dirty_fs_roots); 994*b7ec40d7SChris Mason if (cur_trans->in_commit) { 995*b7ec40d7SChris Mason cur_trans->use_count++; 996ccd467d6SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 99779154b1bSChris Mason btrfs_end_transaction(trans, root); 998ccd467d6SChris Mason 99979154b1bSChris Mason ret = wait_for_commit(root, cur_trans); 100079154b1bSChris Mason BUG_ON(ret); 100115ee9bc7SJosef Bacik 100215ee9bc7SJosef Bacik mutex_lock(&root->fs_info->trans_mutex); 100379154b1bSChris Mason put_transaction(cur_trans); 100415ee9bc7SJosef Bacik mutex_unlock(&root->fs_info->trans_mutex); 100515ee9bc7SJosef Bacik 100679154b1bSChris Mason return 0; 100779154b1bSChris Mason } 10084313b399SChris Mason 10094313b399SChris Mason pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); 10104313b399SChris Mason if (!pinned_copy) 10114313b399SChris Mason return -ENOMEM; 10124313b399SChris Mason 1013d1310b2eSChris Mason extent_io_tree_init(pinned_copy, 10144313b399SChris Mason root->fs_info->btree_inode->i_mapping, GFP_NOFS); 10154313b399SChris Mason 10162c90e5d6SChris Mason trans->transaction->in_commit = 1; 1017f9295749SChris Mason trans->transaction->blocked = 1; 1018ccd467d6SChris Mason if (cur_trans->list.prev != &root->fs_info->trans_list) { 1019ccd467d6SChris Mason prev_trans = list_entry(cur_trans->list.prev, 1020ccd467d6SChris Mason struct btrfs_transaction, list); 1021ccd467d6SChris Mason if (!prev_trans->commit_done) { 1022ccd467d6SChris Mason prev_trans->use_count++; 1023ccd467d6SChris Mason mutex_unlock(&root->fs_info->trans_mutex); 1024ccd467d6SChris Mason 1025ccd467d6SChris Mason wait_for_commit(root, prev_trans); 1026ccd467d6SChris Mason 1027ccd467d6SChris Mason mutex_lock(&root->fs_info->trans_mutex); 102815ee9bc7SJosef Bacik put_transaction(prev_trans); 1029ccd467d6SChris Mason } 1030ccd467d6SChris Mason } 103115ee9bc7SJosef Bacik 103215ee9bc7SJosef Bacik do { 10337ea394f1SYan Zheng int snap_pending = 0; 103415ee9bc7SJosef Bacik joined = cur_trans->num_joined; 10357ea394f1SYan Zheng if (!list_empty(&trans->transaction->pending_snapshots)) 10367ea394f1SYan Zheng snap_pending = 1; 10377ea394f1SYan Zheng 10382c90e5d6SChris Mason WARN_ON(cur_trans != trans->transaction); 103915ee9bc7SJosef Bacik prepare_to_wait(&cur_trans->writer_wait, &wait, 104079154b1bSChris Mason TASK_UNINTERRUPTIBLE); 104115ee9bc7SJosef Bacik 104215ee9bc7SJosef Bacik if (cur_trans->num_writers > 1) 104315ee9bc7SJosef Bacik timeout = MAX_SCHEDULE_TIMEOUT; 104415ee9bc7SJosef Bacik else 104515ee9bc7SJosef Bacik timeout = 1; 104615ee9bc7SJosef Bacik 104779154b1bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 104815ee9bc7SJosef Bacik 10497ea394f1SYan Zheng if (snap_pending) { 10507ea394f1SYan Zheng ret = btrfs_wait_ordered_extents(root, 1); 10517ea394f1SYan Zheng BUG_ON(ret); 10527ea394f1SYan Zheng } 10537ea394f1SYan Zheng 105415ee9bc7SJosef Bacik schedule_timeout(timeout); 105515ee9bc7SJosef Bacik 105679154b1bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 105715ee9bc7SJosef Bacik finish_wait(&cur_trans->writer_wait, &wait); 105815ee9bc7SJosef Bacik } while (cur_trans->num_writers > 1 || 105915ee9bc7SJosef Bacik (cur_trans->num_joined != joined)); 106015ee9bc7SJosef Bacik 10613063d29fSChris Mason ret = create_pending_snapshots(trans, root->fs_info); 10623063d29fSChris Mason BUG_ON(ret); 10633063d29fSChris Mason 106456bec294SChris Mason ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 106556bec294SChris Mason BUG_ON(ret); 106656bec294SChris Mason 10672c90e5d6SChris Mason WARN_ON(cur_trans != trans->transaction); 1068dc17ff8fSChris Mason 1069e02119d5SChris Mason /* btrfs_commit_tree_roots is responsible for getting the 1070e02119d5SChris Mason * various roots consistent with each other. Every pointer 1071e02119d5SChris Mason * in the tree of tree roots has to point to the most up to date 1072e02119d5SChris Mason * root for every subvolume and other tree. So, we have to keep 1073e02119d5SChris Mason * the tree logging code from jumping in and changing any 1074e02119d5SChris Mason * of the trees. 1075e02119d5SChris Mason * 1076e02119d5SChris Mason * At this point in the commit, there can't be any tree-log 1077e02119d5SChris Mason * writers, but a little lower down we drop the trans mutex 1078e02119d5SChris Mason * and let new people in. By holding the tree_log_mutex 1079e02119d5SChris Mason * from now until after the super is written, we avoid races 1080e02119d5SChris Mason * with the tree-log code. 1081e02119d5SChris Mason */ 1082e02119d5SChris Mason mutex_lock(&root->fs_info->tree_log_mutex); 10831a40e23bSZheng Yan /* 10841a40e23bSZheng Yan * keep tree reloc code from adding new reloc trees 10851a40e23bSZheng Yan */ 10861a40e23bSZheng Yan mutex_lock(&root->fs_info->tree_reloc_mutex); 10871a40e23bSZheng Yan 1088e02119d5SChris Mason 108954aa1f4dSChris Mason ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 109054aa1f4dSChris Mason &dirty_fs_roots); 109154aa1f4dSChris Mason BUG_ON(ret); 109254aa1f4dSChris Mason 1093e02119d5SChris Mason /* add_dirty_roots gets rid of all the tree log roots, it is now 1094e02119d5SChris Mason * safe to free the root of tree log roots 1095e02119d5SChris Mason */ 1096e02119d5SChris Mason btrfs_free_log_root_tree(trans, root->fs_info); 1097e02119d5SChris Mason 109879154b1bSChris Mason ret = btrfs_commit_tree_roots(trans, root); 109979154b1bSChris Mason BUG_ON(ret); 110054aa1f4dSChris Mason 110178fae27eSChris Mason cur_trans = root->fs_info->running_transaction; 1102cee36a03SChris Mason spin_lock(&root->fs_info->new_trans_lock); 110378fae27eSChris Mason root->fs_info->running_transaction = NULL; 1104cee36a03SChris Mason spin_unlock(&root->fs_info->new_trans_lock); 11054b52dff6SChris Mason btrfs_set_super_generation(&root->fs_info->super_copy, 11064b52dff6SChris Mason cur_trans->transid); 11074b52dff6SChris Mason btrfs_set_super_root(&root->fs_info->super_copy, 1108db94535dSChris Mason root->fs_info->tree_root->node->start); 1109db94535dSChris Mason btrfs_set_super_root_level(&root->fs_info->super_copy, 1110db94535dSChris Mason btrfs_header_level(root->fs_info->tree_root->node)); 11115f39d397SChris Mason 11120b86a832SChris Mason btrfs_set_super_chunk_root(&root->fs_info->super_copy, 11130b86a832SChris Mason chunk_root->node->start); 11140b86a832SChris Mason btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 11150b86a832SChris Mason btrfs_header_level(chunk_root->node)); 111684234f3aSYan Zheng btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 111784234f3aSYan Zheng btrfs_header_generation(chunk_root->node)); 1118e02119d5SChris Mason 1119e02119d5SChris Mason if (!root->fs_info->log_root_recovering) { 1120e02119d5SChris Mason btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1121e02119d5SChris Mason btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); 1122e02119d5SChris Mason } 1123e02119d5SChris Mason 1124a061fc8dSChris Mason memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 11254b52dff6SChris Mason sizeof(root->fs_info->super_copy)); 1126ccd467d6SChris Mason 11274313b399SChris Mason btrfs_copy_pinned(root, pinned_copy); 1128ccd467d6SChris Mason 1129f9295749SChris Mason trans->transaction->blocked = 0; 1130*b7ec40d7SChris Mason 1131e6dcd2dcSChris Mason wake_up(&root->fs_info->transaction_throttle); 1132f9295749SChris Mason wake_up(&root->fs_info->transaction_wait); 1133e6dcd2dcSChris Mason 113478fae27eSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 113579154b1bSChris Mason ret = btrfs_write_and_wait_transaction(trans, root); 113679154b1bSChris Mason BUG_ON(ret); 1137a512bbf8SYan Zheng write_ctree_super(trans, root, 0); 11384313b399SChris Mason 1139e02119d5SChris Mason /* 1140e02119d5SChris Mason * the super is written, we can safely allow the tree-loggers 1141e02119d5SChris Mason * to go about their business 1142e02119d5SChris Mason */ 1143e02119d5SChris Mason mutex_unlock(&root->fs_info->tree_log_mutex); 1144e02119d5SChris Mason 11454313b399SChris Mason btrfs_finish_extent_commit(trans, root, pinned_copy); 11464313b399SChris Mason kfree(pinned_copy); 11474313b399SChris Mason 11481a40e23bSZheng Yan btrfs_drop_dead_reloc_roots(root); 11491a40e23bSZheng Yan mutex_unlock(&root->fs_info->tree_reloc_mutex); 11501a40e23bSZheng Yan 11513de4586cSChris Mason /* do the directory inserts of any pending snapshot creations */ 11523de4586cSChris Mason finish_pending_snapshots(trans, root->fs_info); 11533de4586cSChris Mason 11541a40e23bSZheng Yan mutex_lock(&root->fs_info->trans_mutex); 11551a40e23bSZheng Yan 11562c90e5d6SChris Mason cur_trans->commit_done = 1; 1157*b7ec40d7SChris Mason 115815ee9bc7SJosef Bacik root->fs_info->last_trans_committed = cur_trans->transid; 11592c90e5d6SChris Mason wake_up(&cur_trans->commit_wait); 11603de4586cSChris Mason 116179154b1bSChris Mason put_transaction(cur_trans); 116278fae27eSChris Mason put_transaction(cur_trans); 116358176a96SJosef Bacik 1164bcc63abbSYan list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); 1165facda1e7SChris Mason if (root->fs_info->closing) 1166facda1e7SChris Mason list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); 116758176a96SJosef Bacik 116878fae27eSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 11693de4586cSChris Mason 11702c90e5d6SChris Mason kmem_cache_free(btrfs_trans_handle_cachep, trans); 117179154b1bSChris Mason 1172d397712bSChris Mason if (root->fs_info->closing) 11730f7d52f4SChris Mason drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); 117479154b1bSChris Mason return ret; 117579154b1bSChris Mason } 117679154b1bSChris Mason 1177d352ac68SChris Mason /* 1178d352ac68SChris Mason * interface function to delete all the snapshots we have scheduled for deletion 1179d352ac68SChris Mason */ 1180e9d0b13bSChris Mason int btrfs_clean_old_snapshots(struct btrfs_root *root) 1181e9d0b13bSChris Mason { 1182e9d0b13bSChris Mason struct list_head dirty_roots; 1183e9d0b13bSChris Mason INIT_LIST_HEAD(&dirty_roots); 1184a74a4b97SChris Mason again: 1185e9d0b13bSChris Mason mutex_lock(&root->fs_info->trans_mutex); 1186e9d0b13bSChris Mason list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1187e9d0b13bSChris Mason mutex_unlock(&root->fs_info->trans_mutex); 1188e9d0b13bSChris Mason 1189e9d0b13bSChris Mason if (!list_empty(&dirty_roots)) { 1190e9d0b13bSChris Mason drop_dirty_roots(root, &dirty_roots); 1191a74a4b97SChris Mason goto again; 1192e9d0b13bSChris Mason } 1193e9d0b13bSChris Mason return 0; 1194e9d0b13bSChris Mason } 1195