16cbd5570SChris Mason /* 26cbd5570SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 36cbd5570SChris Mason * 46cbd5570SChris Mason * This program is free software; you can redistribute it and/or 56cbd5570SChris Mason * modify it under the terms of the GNU General Public 66cbd5570SChris Mason * License v2 as published by the Free Software Foundation. 76cbd5570SChris Mason * 86cbd5570SChris Mason * This program is distributed in the hope that it will be useful, 96cbd5570SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 106cbd5570SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 116cbd5570SChris Mason * General Public License for more details. 126cbd5570SChris Mason * 136cbd5570SChris Mason * You should have received a copy of the GNU General Public 146cbd5570SChris Mason * License along with this program; if not, write to the 156cbd5570SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 166cbd5570SChris Mason * Boston, MA 021110-1307, USA. 176cbd5570SChris Mason */ 186cbd5570SChris Mason 19e20d96d6SChris Mason #include <linux/fs.h> 20d98237b3SChris Mason #include <linux/blkdev.h> 2187cbda5cSChris Mason #include <linux/scatterlist.h> 2222b0ebdaSChris Mason #include <linux/swap.h> 230f7d52f4SChris Mason #include <linux/radix-tree.h> 2435b7e476SChris Mason #include <linux/writeback.h> 25d397712bSChris Mason #include <linux/buffer_head.h> 26ce9adaa5SChris Mason #include <linux/workqueue.h> 27a74a4b97SChris Mason #include <linux/kthread.h> 28a74a4b97SChris Mason #include <linux/freezer.h> 29163e783eSDavid Woodhouse #include <linux/crc32c.h> 305a0e3ad6STejun Heo #include <linux/slab.h> 31784b4e29SChris Mason #include <linux/migrate.h> 327a36ddecSDavid Sterba #include <linux/ratelimit.h> 337e75bf3fSDavid Sterba #include <asm/unaligned.h> 344b4e25f2SChris Mason #include "compat.h" 35eb60ceacSChris Mason #include "ctree.h" 36eb60ceacSChris Mason #include "disk-io.h" 37e089f05cSChris Mason #include "transaction.h" 380f7d52f4SChris Mason #include "btrfs_inode.h" 390b86a832SChris Mason #include "volumes.h" 40db94535dSChris Mason #include "print-tree.h" 418b712842SChris Mason #include "async-thread.h" 42925baeddSChris Mason #include "locking.h" 43e02119d5SChris Mason #include "tree-log.h" 44fa9c0d79SChris Mason #include "free-space-cache.h" 45eb60ceacSChris Mason 46d1310b2eSChris Mason static struct extent_io_ops btree_extent_io_ops; 478b712842SChris Mason static void end_workqueue_fn(struct btrfs_work *work); 484df27c4dSYan, Zheng static void free_fs_root(struct btrfs_root *root); 49acce952bSliubo static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 50acce952bSliubo int read_only); 51acce952bSliubo static int btrfs_destroy_ordered_operations(struct btrfs_root *root); 52acce952bSliubo static int btrfs_destroy_ordered_extents(struct btrfs_root *root); 53acce952bSliubo static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 54acce952bSliubo struct btrfs_root *root); 55acce952bSliubo static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 56acce952bSliubo static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 57acce952bSliubo static int btrfs_destroy_marked_extents(struct btrfs_root *root, 58acce952bSliubo struct extent_io_tree *dirty_pages, 59acce952bSliubo int mark); 60acce952bSliubo static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 61acce952bSliubo struct extent_io_tree *pinned_extents); 62acce952bSliubo static int btrfs_cleanup_transaction(struct btrfs_root *root); 63ce9adaa5SChris Mason 64d352ac68SChris Mason /* 65d352ac68SChris Mason * end_io_wq structs are used to do processing in task context when an IO is 66d352ac68SChris Mason * complete. This is used during reads to verify checksums, and it is used 67d352ac68SChris Mason * by writes to insert metadata for new file extents after IO is complete. 68d352ac68SChris Mason */ 69ce9adaa5SChris Mason struct end_io_wq { 70ce9adaa5SChris Mason struct bio *bio; 71ce9adaa5SChris Mason bio_end_io_t *end_io; 72ce9adaa5SChris Mason void *private; 73ce9adaa5SChris Mason struct btrfs_fs_info *info; 74ce9adaa5SChris Mason int error; 7522c59948SChris Mason int metadata; 76ce9adaa5SChris Mason struct list_head list; 778b712842SChris Mason struct btrfs_work work; 78ce9adaa5SChris Mason }; 790da5468fSChris Mason 80d352ac68SChris Mason /* 81d352ac68SChris Mason * async submit bios are used to offload expensive checksumming 82d352ac68SChris Mason * onto the worker threads. They checksum file and metadata bios 83d352ac68SChris Mason * just before they are sent down the IO stack. 84d352ac68SChris Mason */ 8544b8bd7eSChris Mason struct async_submit_bio { 8644b8bd7eSChris Mason struct inode *inode; 8744b8bd7eSChris Mason struct bio *bio; 8844b8bd7eSChris Mason struct list_head list; 894a69a410SChris Mason extent_submit_bio_hook_t *submit_bio_start; 904a69a410SChris Mason extent_submit_bio_hook_t *submit_bio_done; 9144b8bd7eSChris Mason int rw; 9244b8bd7eSChris Mason int mirror_num; 93c8b97818SChris Mason unsigned long bio_flags; 94eaf25d93SChris Mason /* 95eaf25d93SChris Mason * bio_offset is optional, can be used if the pages in the bio 96eaf25d93SChris Mason * can't tell us where in the file the bio should go 97eaf25d93SChris Mason */ 98eaf25d93SChris Mason u64 bio_offset; 998b712842SChris Mason struct btrfs_work work; 10044b8bd7eSChris Mason }; 10144b8bd7eSChris Mason 1024008c04aSChris Mason /* These are used to set the lockdep class on the extent buffer locks. 1034008c04aSChris Mason * The class is set by the readpage_end_io_hook after the buffer has 1044008c04aSChris Mason * passed csum validation but before the pages are unlocked. 1054008c04aSChris Mason * 1064008c04aSChris Mason * The lockdep class is also set by btrfs_init_new_buffer on freshly 1074008c04aSChris Mason * allocated blocks. 1084008c04aSChris Mason * 1094008c04aSChris Mason * The class is based on the level in the tree block, which allows lockdep 1104008c04aSChris Mason * to know that lower nodes nest inside the locks of higher nodes. 1114008c04aSChris Mason * 1124008c04aSChris Mason * We also add a check to make sure the highest level of the tree is 1134008c04aSChris Mason * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this 1144008c04aSChris Mason * code needs update as well. 1154008c04aSChris Mason */ 1164008c04aSChris Mason #ifdef CONFIG_DEBUG_LOCK_ALLOC 1174008c04aSChris Mason # if BTRFS_MAX_LEVEL != 8 1184008c04aSChris Mason # error 1194008c04aSChris Mason # endif 1204008c04aSChris Mason static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; 1214008c04aSChris Mason static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { 1224008c04aSChris Mason /* leaf */ 1234008c04aSChris Mason "btrfs-extent-00", 1244008c04aSChris Mason "btrfs-extent-01", 1254008c04aSChris Mason "btrfs-extent-02", 1264008c04aSChris Mason "btrfs-extent-03", 1274008c04aSChris Mason "btrfs-extent-04", 1284008c04aSChris Mason "btrfs-extent-05", 1294008c04aSChris Mason "btrfs-extent-06", 1304008c04aSChris Mason "btrfs-extent-07", 1314008c04aSChris Mason /* highest possible level */ 1324008c04aSChris Mason "btrfs-extent-08", 1334008c04aSChris Mason }; 1344008c04aSChris Mason #endif 1354008c04aSChris Mason 136d352ac68SChris Mason /* 137d352ac68SChris Mason * extents on the btree inode are pretty simple, there's one extent 138d352ac68SChris Mason * that covers the entire device 139d352ac68SChris Mason */ 140b2950863SChristoph Hellwig static struct extent_map *btree_get_extent(struct inode *inode, 141306e16ceSDavid Sterba struct page *page, size_t pg_offset, u64 start, u64 len, 1425f39d397SChris Mason int create) 1435f39d397SChris Mason { 1445f39d397SChris Mason struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 1455f39d397SChris Mason struct extent_map *em; 1465f39d397SChris Mason int ret; 1475f39d397SChris Mason 148890871beSChris Mason read_lock(&em_tree->lock); 149d1310b2eSChris Mason em = lookup_extent_mapping(em_tree, start, len); 150a061fc8dSChris Mason if (em) { 151a061fc8dSChris Mason em->bdev = 152a061fc8dSChris Mason BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 153890871beSChris Mason read_unlock(&em_tree->lock); 1545f39d397SChris Mason goto out; 155a061fc8dSChris Mason } 156890871beSChris Mason read_unlock(&em_tree->lock); 1577b13b7b1SChris Mason 158172ddd60SDavid Sterba em = alloc_extent_map(); 1595f39d397SChris Mason if (!em) { 1605f39d397SChris Mason em = ERR_PTR(-ENOMEM); 1615f39d397SChris Mason goto out; 1625f39d397SChris Mason } 1635f39d397SChris Mason em->start = 0; 1640afbaf8cSChris Mason em->len = (u64)-1; 165c8b97818SChris Mason em->block_len = (u64)-1; 1665f39d397SChris Mason em->block_start = 0; 167a061fc8dSChris Mason em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 168d1310b2eSChris Mason 169890871beSChris Mason write_lock(&em_tree->lock); 1705f39d397SChris Mason ret = add_extent_mapping(em_tree, em); 1715f39d397SChris Mason if (ret == -EEXIST) { 1720afbaf8cSChris Mason u64 failed_start = em->start; 1730afbaf8cSChris Mason u64 failed_len = em->len; 1740afbaf8cSChris Mason 1755f39d397SChris Mason free_extent_map(em); 1767b13b7b1SChris Mason em = lookup_extent_mapping(em_tree, start, len); 1770afbaf8cSChris Mason if (em) { 1787b13b7b1SChris Mason ret = 0; 1790afbaf8cSChris Mason } else { 1800afbaf8cSChris Mason em = lookup_extent_mapping(em_tree, failed_start, 1810afbaf8cSChris Mason failed_len); 1827b13b7b1SChris Mason ret = -EIO; 1830afbaf8cSChris Mason } 1845f39d397SChris Mason } else if (ret) { 1857b13b7b1SChris Mason free_extent_map(em); 1867b13b7b1SChris Mason em = NULL; 1875f39d397SChris Mason } 188890871beSChris Mason write_unlock(&em_tree->lock); 1897b13b7b1SChris Mason 1907b13b7b1SChris Mason if (ret) 1917b13b7b1SChris Mason em = ERR_PTR(ret); 1925f39d397SChris Mason out: 1935f39d397SChris Mason return em; 1945f39d397SChris Mason } 1955f39d397SChris Mason 19619c00ddcSChris Mason u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 19719c00ddcSChris Mason { 198163e783eSDavid Woodhouse return crc32c(seed, data, len); 19919c00ddcSChris Mason } 20019c00ddcSChris Mason 20119c00ddcSChris Mason void btrfs_csum_final(u32 crc, char *result) 20219c00ddcSChris Mason { 2037e75bf3fSDavid Sterba put_unaligned_le32(~crc, result); 20419c00ddcSChris Mason } 20519c00ddcSChris Mason 206d352ac68SChris Mason /* 207d352ac68SChris Mason * compute the csum for a btree block, and either verify it or write it 208d352ac68SChris Mason * into the csum field of the block. 209d352ac68SChris Mason */ 21019c00ddcSChris Mason static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 21119c00ddcSChris Mason int verify) 21219c00ddcSChris Mason { 213607d432dSJosef Bacik u16 csum_size = 214607d432dSJosef Bacik btrfs_super_csum_size(&root->fs_info->super_copy); 215607d432dSJosef Bacik char *result = NULL; 21619c00ddcSChris Mason unsigned long len; 21719c00ddcSChris Mason unsigned long cur_len; 21819c00ddcSChris Mason unsigned long offset = BTRFS_CSUM_SIZE; 21919c00ddcSChris Mason char *map_token = NULL; 22019c00ddcSChris Mason char *kaddr; 22119c00ddcSChris Mason unsigned long map_start; 22219c00ddcSChris Mason unsigned long map_len; 22319c00ddcSChris Mason int err; 22419c00ddcSChris Mason u32 crc = ~(u32)0; 225607d432dSJosef Bacik unsigned long inline_result; 22619c00ddcSChris Mason 22719c00ddcSChris Mason len = buf->len - offset; 22819c00ddcSChris Mason while (len > 0) { 22919c00ddcSChris Mason err = map_private_extent_buffer(buf, offset, 32, 23019c00ddcSChris Mason &map_token, &kaddr, 23119c00ddcSChris Mason &map_start, &map_len, KM_USER0); 232d397712bSChris Mason if (err) 23319c00ddcSChris Mason return 1; 23419c00ddcSChris Mason cur_len = min(len, map_len - (offset - map_start)); 23519c00ddcSChris Mason crc = btrfs_csum_data(root, kaddr + offset - map_start, 23619c00ddcSChris Mason crc, cur_len); 23719c00ddcSChris Mason len -= cur_len; 23819c00ddcSChris Mason offset += cur_len; 23919c00ddcSChris Mason unmap_extent_buffer(buf, map_token, KM_USER0); 24019c00ddcSChris Mason } 241607d432dSJosef Bacik if (csum_size > sizeof(inline_result)) { 242607d432dSJosef Bacik result = kzalloc(csum_size * sizeof(char), GFP_NOFS); 243607d432dSJosef Bacik if (!result) 244607d432dSJosef Bacik return 1; 245607d432dSJosef Bacik } else { 246607d432dSJosef Bacik result = (char *)&inline_result; 247607d432dSJosef Bacik } 248607d432dSJosef Bacik 24919c00ddcSChris Mason btrfs_csum_final(crc, result); 25019c00ddcSChris Mason 25119c00ddcSChris Mason if (verify) { 252607d432dSJosef Bacik if (memcmp_extent_buffer(buf, result, 0, csum_size)) { 253e4204dedSChris Mason u32 val; 254e4204dedSChris Mason u32 found = 0; 255607d432dSJosef Bacik memcpy(&found, result, csum_size); 256e4204dedSChris Mason 257607d432dSJosef Bacik read_extent_buffer(buf, &val, 0, csum_size); 2587a36ddecSDavid Sterba printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 259193f284dSChris Mason "failed on %llu wanted %X found %X " 260193f284dSChris Mason "level %d\n", 26119c00ddcSChris Mason root->fs_info->sb->s_id, 262193f284dSChris Mason (unsigned long long)buf->start, val, found, 263193f284dSChris Mason btrfs_header_level(buf)); 264607d432dSJosef Bacik if (result != (char *)&inline_result) 265607d432dSJosef Bacik kfree(result); 26619c00ddcSChris Mason return 1; 26719c00ddcSChris Mason } 26819c00ddcSChris Mason } else { 269607d432dSJosef Bacik write_extent_buffer(buf, result, 0, csum_size); 27019c00ddcSChris Mason } 271607d432dSJosef Bacik if (result != (char *)&inline_result) 272607d432dSJosef Bacik kfree(result); 27319c00ddcSChris Mason return 0; 27419c00ddcSChris Mason } 27519c00ddcSChris Mason 276d352ac68SChris Mason /* 277d352ac68SChris Mason * we can't consider a given block up to date unless the transid of the 278d352ac68SChris Mason * block matches the transid in the parent node's pointer. This is how we 279d352ac68SChris Mason * detect blocks that either didn't get written at all or got written 280d352ac68SChris Mason * in the wrong place. 281d352ac68SChris Mason */ 2821259ab75SChris Mason static int verify_parent_transid(struct extent_io_tree *io_tree, 2831259ab75SChris Mason struct extent_buffer *eb, u64 parent_transid) 2841259ab75SChris Mason { 2852ac55d41SJosef Bacik struct extent_state *cached_state = NULL; 2861259ab75SChris Mason int ret; 2871259ab75SChris Mason 2881259ab75SChris Mason if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 2891259ab75SChris Mason return 0; 2901259ab75SChris Mason 2912ac55d41SJosef Bacik lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 2922ac55d41SJosef Bacik 0, &cached_state, GFP_NOFS); 2932ac55d41SJosef Bacik if (extent_buffer_uptodate(io_tree, eb, cached_state) && 2941259ab75SChris Mason btrfs_header_generation(eb) == parent_transid) { 2951259ab75SChris Mason ret = 0; 2961259ab75SChris Mason goto out; 2971259ab75SChris Mason } 2987a36ddecSDavid Sterba printk_ratelimited("parent transid verify failed on %llu wanted %llu " 299193f284dSChris Mason "found %llu\n", 3001259ab75SChris Mason (unsigned long long)eb->start, 3011259ab75SChris Mason (unsigned long long)parent_transid, 3021259ab75SChris Mason (unsigned long long)btrfs_header_generation(eb)); 3031259ab75SChris Mason ret = 1; 3042ac55d41SJosef Bacik clear_extent_buffer_uptodate(io_tree, eb, &cached_state); 30533958dc6SChris Mason out: 3062ac55d41SJosef Bacik unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 3072ac55d41SJosef Bacik &cached_state, GFP_NOFS); 3081259ab75SChris Mason return ret; 3091259ab75SChris Mason } 3101259ab75SChris Mason 311d352ac68SChris Mason /* 312d352ac68SChris Mason * helper to read a given tree block, doing retries as required when 313d352ac68SChris Mason * the checksums don't match and we have alternate mirrors to try. 314d352ac68SChris Mason */ 315f188591eSChris Mason static int btree_read_extent_buffer_pages(struct btrfs_root *root, 316f188591eSChris Mason struct extent_buffer *eb, 317ca7a79adSChris Mason u64 start, u64 parent_transid) 318f188591eSChris Mason { 319f188591eSChris Mason struct extent_io_tree *io_tree; 320f188591eSChris Mason int ret; 321f188591eSChris Mason int num_copies = 0; 322f188591eSChris Mason int mirror_num = 0; 323f188591eSChris Mason 324a826d6dcSJosef Bacik clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 325f188591eSChris Mason io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 326f188591eSChris Mason while (1) { 327f188591eSChris Mason ret = read_extent_buffer_pages(io_tree, eb, start, 1, 328f188591eSChris Mason btree_get_extent, mirror_num); 3291259ab75SChris Mason if (!ret && 3301259ab75SChris Mason !verify_parent_transid(io_tree, eb, parent_transid)) 331f188591eSChris Mason return ret; 332d397712bSChris Mason 333a826d6dcSJosef Bacik /* 334a826d6dcSJosef Bacik * This buffer's crc is fine, but its contents are corrupted, so 335a826d6dcSJosef Bacik * there is no reason to read the other copies, they won't be 336a826d6dcSJosef Bacik * any less wrong. 337a826d6dcSJosef Bacik */ 338a826d6dcSJosef Bacik if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 339a826d6dcSJosef Bacik return ret; 340a826d6dcSJosef Bacik 341f188591eSChris Mason num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 342f188591eSChris Mason eb->start, eb->len); 3434235298eSChris Mason if (num_copies == 1) 344f188591eSChris Mason return ret; 3454235298eSChris Mason 346f188591eSChris Mason mirror_num++; 3474235298eSChris Mason if (mirror_num > num_copies) 348f188591eSChris Mason return ret; 349f188591eSChris Mason } 350f188591eSChris Mason return -EIO; 351f188591eSChris Mason } 35219c00ddcSChris Mason 353d352ac68SChris Mason /* 354d397712bSChris Mason * checksum a dirty tree block before IO. This has extra checks to make sure 355d397712bSChris Mason * we only fill in the checksum field in the first page of a multi-page block 356d352ac68SChris Mason */ 357d397712bSChris Mason 358b2950863SChristoph Hellwig static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 35919c00ddcSChris Mason { 360d1310b2eSChris Mason struct extent_io_tree *tree; 36135ebb934SChris Mason u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 36219c00ddcSChris Mason u64 found_start; 36319c00ddcSChris Mason unsigned long len; 36419c00ddcSChris Mason struct extent_buffer *eb; 365f188591eSChris Mason int ret; 366f188591eSChris Mason 367d1310b2eSChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 36819c00ddcSChris Mason 369eb14ab8eSChris Mason if (page->private == EXTENT_PAGE_PRIVATE) { 370eb14ab8eSChris Mason WARN_ON(1); 37119c00ddcSChris Mason goto out; 372eb14ab8eSChris Mason } 373eb14ab8eSChris Mason if (!page->private) { 374eb14ab8eSChris Mason WARN_ON(1); 37519c00ddcSChris Mason goto out; 376eb14ab8eSChris Mason } 37719c00ddcSChris Mason len = page->private >> 2; 378d397712bSChris Mason WARN_ON(len == 0); 379d397712bSChris Mason 380ba144192SDavid Sterba eb = alloc_extent_buffer(tree, start, len, page); 38191ca338dSTsutomu Itoh if (eb == NULL) { 38291ca338dSTsutomu Itoh WARN_ON(1); 38391ca338dSTsutomu Itoh goto out; 38491ca338dSTsutomu Itoh } 385ca7a79adSChris Mason ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 386ca7a79adSChris Mason btrfs_header_generation(eb)); 387f188591eSChris Mason BUG_ON(ret); 388784b4e29SChris Mason WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); 389784b4e29SChris Mason 39019c00ddcSChris Mason found_start = btrfs_header_bytenr(eb); 39119c00ddcSChris Mason if (found_start != start) { 39255c69072SChris Mason WARN_ON(1); 39355c69072SChris Mason goto err; 39455c69072SChris Mason } 39555c69072SChris Mason if (eb->first_page != page) { 39655c69072SChris Mason WARN_ON(1); 39755c69072SChris Mason goto err; 39855c69072SChris Mason } 39955c69072SChris Mason if (!PageUptodate(page)) { 40055c69072SChris Mason WARN_ON(1); 40155c69072SChris Mason goto err; 40219c00ddcSChris Mason } 40319c00ddcSChris Mason csum_tree_block(root, eb, 0); 40455c69072SChris Mason err: 40519c00ddcSChris Mason free_extent_buffer(eb); 40619c00ddcSChris Mason out: 40719c00ddcSChris Mason return 0; 40819c00ddcSChris Mason } 40919c00ddcSChris Mason 4102b82032cSYan Zheng static int check_tree_block_fsid(struct btrfs_root *root, 4112b82032cSYan Zheng struct extent_buffer *eb) 4122b82032cSYan Zheng { 4132b82032cSYan Zheng struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 4142b82032cSYan Zheng u8 fsid[BTRFS_UUID_SIZE]; 4152b82032cSYan Zheng int ret = 1; 4162b82032cSYan Zheng 4172b82032cSYan Zheng read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), 4182b82032cSYan Zheng BTRFS_FSID_SIZE); 4192b82032cSYan Zheng while (fs_devices) { 4202b82032cSYan Zheng if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { 4212b82032cSYan Zheng ret = 0; 4222b82032cSYan Zheng break; 4232b82032cSYan Zheng } 4242b82032cSYan Zheng fs_devices = fs_devices->seed; 4252b82032cSYan Zheng } 4262b82032cSYan Zheng return ret; 4272b82032cSYan Zheng } 4282b82032cSYan Zheng 429a826d6dcSJosef Bacik #define CORRUPT(reason, eb, root, slot) \ 430a826d6dcSJosef Bacik printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 431a826d6dcSJosef Bacik "root=%llu, slot=%d\n", reason, \ 432a826d6dcSJosef Bacik (unsigned long long)btrfs_header_bytenr(eb), \ 433a826d6dcSJosef Bacik (unsigned long long)root->objectid, slot) 434a826d6dcSJosef Bacik 435a826d6dcSJosef Bacik static noinline int check_leaf(struct btrfs_root *root, 436a826d6dcSJosef Bacik struct extent_buffer *leaf) 437a826d6dcSJosef Bacik { 438a826d6dcSJosef Bacik struct btrfs_key key; 439a826d6dcSJosef Bacik struct btrfs_key leaf_key; 440a826d6dcSJosef Bacik u32 nritems = btrfs_header_nritems(leaf); 441a826d6dcSJosef Bacik int slot; 442a826d6dcSJosef Bacik 443a826d6dcSJosef Bacik if (nritems == 0) 444a826d6dcSJosef Bacik return 0; 445a826d6dcSJosef Bacik 446a826d6dcSJosef Bacik /* Check the 0 item */ 447a826d6dcSJosef Bacik if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != 448a826d6dcSJosef Bacik BTRFS_LEAF_DATA_SIZE(root)) { 449a826d6dcSJosef Bacik CORRUPT("invalid item offset size pair", leaf, root, 0); 450a826d6dcSJosef Bacik return -EIO; 451a826d6dcSJosef Bacik } 452a826d6dcSJosef Bacik 453a826d6dcSJosef Bacik /* 454a826d6dcSJosef Bacik * Check to make sure each items keys are in the correct order and their 455a826d6dcSJosef Bacik * offsets make sense. We only have to loop through nritems-1 because 456a826d6dcSJosef Bacik * we check the current slot against the next slot, which verifies the 457a826d6dcSJosef Bacik * next slot's offset+size makes sense and that the current's slot 458a826d6dcSJosef Bacik * offset is correct. 459a826d6dcSJosef Bacik */ 460a826d6dcSJosef Bacik for (slot = 0; slot < nritems - 1; slot++) { 461a826d6dcSJosef Bacik btrfs_item_key_to_cpu(leaf, &leaf_key, slot); 462a826d6dcSJosef Bacik btrfs_item_key_to_cpu(leaf, &key, slot + 1); 463a826d6dcSJosef Bacik 464a826d6dcSJosef Bacik /* Make sure the keys are in the right order */ 465a826d6dcSJosef Bacik if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { 466a826d6dcSJosef Bacik CORRUPT("bad key order", leaf, root, slot); 467a826d6dcSJosef Bacik return -EIO; 468a826d6dcSJosef Bacik } 469a826d6dcSJosef Bacik 470a826d6dcSJosef Bacik /* 471a826d6dcSJosef Bacik * Make sure the offset and ends are right, remember that the 472a826d6dcSJosef Bacik * item data starts at the end of the leaf and grows towards the 473a826d6dcSJosef Bacik * front. 474a826d6dcSJosef Bacik */ 475a826d6dcSJosef Bacik if (btrfs_item_offset_nr(leaf, slot) != 476a826d6dcSJosef Bacik btrfs_item_end_nr(leaf, slot + 1)) { 477a826d6dcSJosef Bacik CORRUPT("slot offset bad", leaf, root, slot); 478a826d6dcSJosef Bacik return -EIO; 479a826d6dcSJosef Bacik } 480a826d6dcSJosef Bacik 481a826d6dcSJosef Bacik /* 482a826d6dcSJosef Bacik * Check to make sure that we don't point outside of the leaf, 483a826d6dcSJosef Bacik * just incase all the items are consistent to eachother, but 484a826d6dcSJosef Bacik * all point outside of the leaf. 485a826d6dcSJosef Bacik */ 486a826d6dcSJosef Bacik if (btrfs_item_end_nr(leaf, slot) > 487a826d6dcSJosef Bacik BTRFS_LEAF_DATA_SIZE(root)) { 488a826d6dcSJosef Bacik CORRUPT("slot end outside of leaf", leaf, root, slot); 489a826d6dcSJosef Bacik return -EIO; 490a826d6dcSJosef Bacik } 491a826d6dcSJosef Bacik } 492a826d6dcSJosef Bacik 493a826d6dcSJosef Bacik return 0; 494a826d6dcSJosef Bacik } 495a826d6dcSJosef Bacik 4964008c04aSChris Mason #ifdef CONFIG_DEBUG_LOCK_ALLOC 4974008c04aSChris Mason void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) 4984008c04aSChris Mason { 4994008c04aSChris Mason lockdep_set_class_and_name(&eb->lock, 5004008c04aSChris Mason &btrfs_eb_class[level], 5014008c04aSChris Mason btrfs_eb_name[level]); 5024008c04aSChris Mason } 5034008c04aSChris Mason #endif 5044008c04aSChris Mason 505b2950863SChristoph Hellwig static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 506ce9adaa5SChris Mason struct extent_state *state) 507ce9adaa5SChris Mason { 508ce9adaa5SChris Mason struct extent_io_tree *tree; 509ce9adaa5SChris Mason u64 found_start; 510ce9adaa5SChris Mason int found_level; 511ce9adaa5SChris Mason unsigned long len; 512ce9adaa5SChris Mason struct extent_buffer *eb; 513ce9adaa5SChris Mason struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 514f188591eSChris Mason int ret = 0; 515ce9adaa5SChris Mason 516ce9adaa5SChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 517ce9adaa5SChris Mason if (page->private == EXTENT_PAGE_PRIVATE) 518ce9adaa5SChris Mason goto out; 519ce9adaa5SChris Mason if (!page->private) 520ce9adaa5SChris Mason goto out; 521d397712bSChris Mason 522ce9adaa5SChris Mason len = page->private >> 2; 523d397712bSChris Mason WARN_ON(len == 0); 524d397712bSChris Mason 525ba144192SDavid Sterba eb = alloc_extent_buffer(tree, start, len, page); 52691ca338dSTsutomu Itoh if (eb == NULL) { 52791ca338dSTsutomu Itoh ret = -EIO; 52891ca338dSTsutomu Itoh goto out; 52991ca338dSTsutomu Itoh } 530f188591eSChris Mason 531ce9adaa5SChris Mason found_start = btrfs_header_bytenr(eb); 53223a07867SChris Mason if (found_start != start) { 5337a36ddecSDavid Sterba printk_ratelimited(KERN_INFO "btrfs bad tree block start " 534193f284dSChris Mason "%llu %llu\n", 535a1b32a59SChris Mason (unsigned long long)found_start, 536a1b32a59SChris Mason (unsigned long long)eb->start); 537f188591eSChris Mason ret = -EIO; 538ce9adaa5SChris Mason goto err; 539ce9adaa5SChris Mason } 540ce9adaa5SChris Mason if (eb->first_page != page) { 541d397712bSChris Mason printk(KERN_INFO "btrfs bad first page %lu %lu\n", 542d397712bSChris Mason eb->first_page->index, page->index); 543ce9adaa5SChris Mason WARN_ON(1); 544f188591eSChris Mason ret = -EIO; 545ce9adaa5SChris Mason goto err; 546ce9adaa5SChris Mason } 5472b82032cSYan Zheng if (check_tree_block_fsid(root, eb)) { 5487a36ddecSDavid Sterba printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 549d397712bSChris Mason (unsigned long long)eb->start); 5501259ab75SChris Mason ret = -EIO; 5511259ab75SChris Mason goto err; 5521259ab75SChris Mason } 553ce9adaa5SChris Mason found_level = btrfs_header_level(eb); 554ce9adaa5SChris Mason 5554008c04aSChris Mason btrfs_set_buffer_lockdep_class(eb, found_level); 5564008c04aSChris Mason 557ce9adaa5SChris Mason ret = csum_tree_block(root, eb, 1); 558a826d6dcSJosef Bacik if (ret) { 559f188591eSChris Mason ret = -EIO; 560a826d6dcSJosef Bacik goto err; 561a826d6dcSJosef Bacik } 562a826d6dcSJosef Bacik 563a826d6dcSJosef Bacik /* 564a826d6dcSJosef Bacik * If this is a leaf block and it is corrupt, set the corrupt bit so 565a826d6dcSJosef Bacik * that we don't try and read the other copies of this block, just 566a826d6dcSJosef Bacik * return -EIO. 567a826d6dcSJosef Bacik */ 568a826d6dcSJosef Bacik if (found_level == 0 && check_leaf(root, eb)) { 569a826d6dcSJosef Bacik set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 570a826d6dcSJosef Bacik ret = -EIO; 571a826d6dcSJosef Bacik } 572ce9adaa5SChris Mason 573ce9adaa5SChris Mason end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 574ce9adaa5SChris Mason end = eb->start + end - 1; 575ce9adaa5SChris Mason err: 576ce9adaa5SChris Mason free_extent_buffer(eb); 577ce9adaa5SChris Mason out: 578f188591eSChris Mason return ret; 579ce9adaa5SChris Mason } 580ce9adaa5SChris Mason 581ce9adaa5SChris Mason static void end_workqueue_bio(struct bio *bio, int err) 582ce9adaa5SChris Mason { 583ce9adaa5SChris Mason struct end_io_wq *end_io_wq = bio->bi_private; 584ce9adaa5SChris Mason struct btrfs_fs_info *fs_info; 585ce9adaa5SChris Mason 586ce9adaa5SChris Mason fs_info = end_io_wq->info; 587ce9adaa5SChris Mason end_io_wq->error = err; 5888b712842SChris Mason end_io_wq->work.func = end_workqueue_fn; 5898b712842SChris Mason end_io_wq->work.flags = 0; 590d20f7043SChris Mason 5917b6d91daSChristoph Hellwig if (bio->bi_rw & REQ_WRITE) { 5920cb59c99SJosef Bacik if (end_io_wq->metadata == 1) 593cad321adSChris Mason btrfs_queue_worker(&fs_info->endio_meta_write_workers, 594cad321adSChris Mason &end_io_wq->work); 5950cb59c99SJosef Bacik else if (end_io_wq->metadata == 2) 5960cb59c99SJosef Bacik btrfs_queue_worker(&fs_info->endio_freespace_worker, 5970cb59c99SJosef Bacik &end_io_wq->work); 598cad321adSChris Mason else 599e6dcd2dcSChris Mason btrfs_queue_worker(&fs_info->endio_write_workers, 600e6dcd2dcSChris Mason &end_io_wq->work); 601d20f7043SChris Mason } else { 602d20f7043SChris Mason if (end_io_wq->metadata) 603d20f7043SChris Mason btrfs_queue_worker(&fs_info->endio_meta_workers, 604d20f7043SChris Mason &end_io_wq->work); 605e6dcd2dcSChris Mason else 606d20f7043SChris Mason btrfs_queue_worker(&fs_info->endio_workers, 607d20f7043SChris Mason &end_io_wq->work); 608d20f7043SChris Mason } 609ce9adaa5SChris Mason } 610ce9adaa5SChris Mason 6110cb59c99SJosef Bacik /* 6120cb59c99SJosef Bacik * For the metadata arg you want 6130cb59c99SJosef Bacik * 6140cb59c99SJosef Bacik * 0 - if data 6150cb59c99SJosef Bacik * 1 - if normal metadta 6160cb59c99SJosef Bacik * 2 - if writing to the free space cache area 6170cb59c99SJosef Bacik */ 61822c59948SChris Mason int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 61922c59948SChris Mason int metadata) 6200b86a832SChris Mason { 621ce9adaa5SChris Mason struct end_io_wq *end_io_wq; 622ce9adaa5SChris Mason end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); 623ce9adaa5SChris Mason if (!end_io_wq) 624ce9adaa5SChris Mason return -ENOMEM; 625ce9adaa5SChris Mason 626ce9adaa5SChris Mason end_io_wq->private = bio->bi_private; 627ce9adaa5SChris Mason end_io_wq->end_io = bio->bi_end_io; 62822c59948SChris Mason end_io_wq->info = info; 629ce9adaa5SChris Mason end_io_wq->error = 0; 630ce9adaa5SChris Mason end_io_wq->bio = bio; 63122c59948SChris Mason end_io_wq->metadata = metadata; 632ce9adaa5SChris Mason 633ce9adaa5SChris Mason bio->bi_private = end_io_wq; 634ce9adaa5SChris Mason bio->bi_end_io = end_workqueue_bio; 63522c59948SChris Mason return 0; 63622c59948SChris Mason } 63722c59948SChris Mason 638b64a2851SChris Mason unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) 6394854ddd0SChris Mason { 6404854ddd0SChris Mason unsigned long limit = min_t(unsigned long, 6414854ddd0SChris Mason info->workers.max_workers, 6424854ddd0SChris Mason info->fs_devices->open_devices); 6434854ddd0SChris Mason return 256 * limit; 6444854ddd0SChris Mason } 6454854ddd0SChris Mason 6464a69a410SChris Mason static void run_one_async_start(struct btrfs_work *work) 6474a69a410SChris Mason { 6484a69a410SChris Mason struct async_submit_bio *async; 6494a69a410SChris Mason 6504a69a410SChris Mason async = container_of(work, struct async_submit_bio, work); 6514a69a410SChris Mason async->submit_bio_start(async->inode, async->rw, async->bio, 652eaf25d93SChris Mason async->mirror_num, async->bio_flags, 653eaf25d93SChris Mason async->bio_offset); 6544a69a410SChris Mason } 6554a69a410SChris Mason 6564a69a410SChris Mason static void run_one_async_done(struct btrfs_work *work) 6578b712842SChris Mason { 6588b712842SChris Mason struct btrfs_fs_info *fs_info; 6598b712842SChris Mason struct async_submit_bio *async; 6604854ddd0SChris Mason int limit; 6618b712842SChris Mason 6628b712842SChris Mason async = container_of(work, struct async_submit_bio, work); 6638b712842SChris Mason fs_info = BTRFS_I(async->inode)->root->fs_info; 6644854ddd0SChris Mason 665b64a2851SChris Mason limit = btrfs_async_submit_limit(fs_info); 6664854ddd0SChris Mason limit = limit * 2 / 3; 6674854ddd0SChris Mason 6688b712842SChris Mason atomic_dec(&fs_info->nr_async_submits); 6690986fe9eSChris Mason 670b64a2851SChris Mason if (atomic_read(&fs_info->nr_async_submits) < limit && 671b64a2851SChris Mason waitqueue_active(&fs_info->async_submit_wait)) 6724854ddd0SChris Mason wake_up(&fs_info->async_submit_wait); 6734854ddd0SChris Mason 6744a69a410SChris Mason async->submit_bio_done(async->inode, async->rw, async->bio, 675eaf25d93SChris Mason async->mirror_num, async->bio_flags, 676eaf25d93SChris Mason async->bio_offset); 6774a69a410SChris Mason } 6784a69a410SChris Mason 6794a69a410SChris Mason static void run_one_async_free(struct btrfs_work *work) 6804a69a410SChris Mason { 6814a69a410SChris Mason struct async_submit_bio *async; 6824a69a410SChris Mason 6834a69a410SChris Mason async = container_of(work, struct async_submit_bio, work); 6848b712842SChris Mason kfree(async); 6858b712842SChris Mason } 6868b712842SChris Mason 68744b8bd7eSChris Mason int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 68844b8bd7eSChris Mason int rw, struct bio *bio, int mirror_num, 689c8b97818SChris Mason unsigned long bio_flags, 690eaf25d93SChris Mason u64 bio_offset, 6914a69a410SChris Mason extent_submit_bio_hook_t *submit_bio_start, 6924a69a410SChris Mason extent_submit_bio_hook_t *submit_bio_done) 69344b8bd7eSChris Mason { 69444b8bd7eSChris Mason struct async_submit_bio *async; 69544b8bd7eSChris Mason 69644b8bd7eSChris Mason async = kmalloc(sizeof(*async), GFP_NOFS); 69744b8bd7eSChris Mason if (!async) 69844b8bd7eSChris Mason return -ENOMEM; 69944b8bd7eSChris Mason 70044b8bd7eSChris Mason async->inode = inode; 70144b8bd7eSChris Mason async->rw = rw; 70244b8bd7eSChris Mason async->bio = bio; 70344b8bd7eSChris Mason async->mirror_num = mirror_num; 7044a69a410SChris Mason async->submit_bio_start = submit_bio_start; 7054a69a410SChris Mason async->submit_bio_done = submit_bio_done; 7064a69a410SChris Mason 7074a69a410SChris Mason async->work.func = run_one_async_start; 7084a69a410SChris Mason async->work.ordered_func = run_one_async_done; 7094a69a410SChris Mason async->work.ordered_free = run_one_async_free; 7104a69a410SChris Mason 7118b712842SChris Mason async->work.flags = 0; 712c8b97818SChris Mason async->bio_flags = bio_flags; 713eaf25d93SChris Mason async->bio_offset = bio_offset; 7148c8bee1dSChris Mason 715cb03c743SChris Mason atomic_inc(&fs_info->nr_async_submits); 716d313d7a3SChris Mason 7177b6d91daSChristoph Hellwig if (rw & REQ_SYNC) 718d313d7a3SChris Mason btrfs_set_work_high_prio(&async->work); 719d313d7a3SChris Mason 7208b712842SChris Mason btrfs_queue_worker(&fs_info->workers, &async->work); 7219473f16cSChris Mason 722771ed689SChris Mason while (atomic_read(&fs_info->async_submit_draining) && 723771ed689SChris Mason atomic_read(&fs_info->nr_async_submits)) { 724771ed689SChris Mason wait_event(fs_info->async_submit_wait, 725771ed689SChris Mason (atomic_read(&fs_info->nr_async_submits) == 0)); 726771ed689SChris Mason } 727771ed689SChris Mason 72844b8bd7eSChris Mason return 0; 72944b8bd7eSChris Mason } 73044b8bd7eSChris Mason 731ce3ed71aSChris Mason static int btree_csum_one_bio(struct bio *bio) 732ce3ed71aSChris Mason { 733ce3ed71aSChris Mason struct bio_vec *bvec = bio->bi_io_vec; 734ce3ed71aSChris Mason int bio_index = 0; 735ce3ed71aSChris Mason struct btrfs_root *root; 736ce3ed71aSChris Mason 737ce3ed71aSChris Mason WARN_ON(bio->bi_vcnt <= 0); 738ce3ed71aSChris Mason while (bio_index < bio->bi_vcnt) { 739ce3ed71aSChris Mason root = BTRFS_I(bvec->bv_page->mapping->host)->root; 740ce3ed71aSChris Mason csum_dirty_buffer(root, bvec->bv_page); 741ce3ed71aSChris Mason bio_index++; 742ce3ed71aSChris Mason bvec++; 743ce3ed71aSChris Mason } 744ce3ed71aSChris Mason return 0; 745ce3ed71aSChris Mason } 746ce3ed71aSChris Mason 7474a69a410SChris Mason static int __btree_submit_bio_start(struct inode *inode, int rw, 7484a69a410SChris Mason struct bio *bio, int mirror_num, 749eaf25d93SChris Mason unsigned long bio_flags, 750eaf25d93SChris Mason u64 bio_offset) 75122c59948SChris Mason { 7528b712842SChris Mason /* 7538b712842SChris Mason * when we're called for a write, we're already in the async 7545443be45SChris Mason * submission context. Just jump into btrfs_map_bio 7558b712842SChris Mason */ 756ce3ed71aSChris Mason btree_csum_one_bio(bio); 7574a69a410SChris Mason return 0; 75822c59948SChris Mason } 75922c59948SChris Mason 7604a69a410SChris Mason static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 761eaf25d93SChris Mason int mirror_num, unsigned long bio_flags, 762eaf25d93SChris Mason u64 bio_offset) 7634a69a410SChris Mason { 7648b712842SChris Mason /* 7654a69a410SChris Mason * when we're called for a write, we're already in the async 7664a69a410SChris Mason * submission context. Just jump into btrfs_map_bio 7678b712842SChris Mason */ 7688b712842SChris Mason return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); 7690b86a832SChris Mason } 7700b86a832SChris Mason 77144b8bd7eSChris Mason static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 772eaf25d93SChris Mason int mirror_num, unsigned long bio_flags, 773eaf25d93SChris Mason u64 bio_offset) 77444b8bd7eSChris Mason { 7754a69a410SChris Mason int ret; 776cad321adSChris Mason 7774a69a410SChris Mason ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, 7784a69a410SChris Mason bio, 1); 7794a69a410SChris Mason BUG_ON(ret); 7804a69a410SChris Mason 7817b6d91daSChristoph Hellwig if (!(rw & REQ_WRITE)) { 782cad321adSChris Mason /* 783cad321adSChris Mason * called for a read, do the setup so that checksum validation 784cad321adSChris Mason * can happen in the async kernel threads 785cad321adSChris Mason */ 7864a69a410SChris Mason return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 7876f3577bdSChris Mason mirror_num, 0); 78844b8bd7eSChris Mason } 789d313d7a3SChris Mason 790cad321adSChris Mason /* 791cad321adSChris Mason * kthread helpers are used to submit writes so that checksumming 792cad321adSChris Mason * can happen in parallel across all CPUs 793cad321adSChris Mason */ 79444b8bd7eSChris Mason return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 795c8b97818SChris Mason inode, rw, bio, mirror_num, 0, 796eaf25d93SChris Mason bio_offset, 7974a69a410SChris Mason __btree_submit_bio_start, 7984a69a410SChris Mason __btree_submit_bio_done); 79944b8bd7eSChris Mason } 80044b8bd7eSChris Mason 8013dd1462eSJan Beulich #ifdef CONFIG_MIGRATION 802784b4e29SChris Mason static int btree_migratepage(struct address_space *mapping, 803784b4e29SChris Mason struct page *newpage, struct page *page) 804784b4e29SChris Mason { 805784b4e29SChris Mason /* 806784b4e29SChris Mason * we can't safely write a btree page from here, 807784b4e29SChris Mason * we haven't done the locking hook 808784b4e29SChris Mason */ 809784b4e29SChris Mason if (PageDirty(page)) 810784b4e29SChris Mason return -EAGAIN; 811784b4e29SChris Mason /* 812784b4e29SChris Mason * Buffers may be managed in a filesystem specific way. 813784b4e29SChris Mason * We must have no buffers or drop them. 814784b4e29SChris Mason */ 815784b4e29SChris Mason if (page_has_private(page) && 816784b4e29SChris Mason !try_to_release_page(page, GFP_KERNEL)) 817784b4e29SChris Mason return -EAGAIN; 818784b4e29SChris Mason return migrate_page(mapping, newpage, page); 819784b4e29SChris Mason } 8203dd1462eSJan Beulich #endif 821784b4e29SChris Mason 8225f39d397SChris Mason static int btree_writepage(struct page *page, struct writeback_control *wbc) 8235f39d397SChris Mason { 824d1310b2eSChris Mason struct extent_io_tree *tree; 825b9473439SChris Mason struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 826b9473439SChris Mason struct extent_buffer *eb; 827b9473439SChris Mason int was_dirty; 8285443be45SChris Mason 829b9473439SChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 830b9473439SChris Mason if (!(current->flags & PF_MEMALLOC)) { 831b9473439SChris Mason return extent_write_full_page(tree, page, 832b9473439SChris Mason btree_get_extent, wbc); 833b9473439SChris Mason } 834b9473439SChris Mason 8355443be45SChris Mason redirty_page_for_writepage(wbc, page); 836784b4e29SChris Mason eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); 837b9473439SChris Mason WARN_ON(!eb); 838b9473439SChris Mason 839b9473439SChris Mason was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 840b9473439SChris Mason if (!was_dirty) { 841b9473439SChris Mason spin_lock(&root->fs_info->delalloc_lock); 842b9473439SChris Mason root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; 843b9473439SChris Mason spin_unlock(&root->fs_info->delalloc_lock); 844b9473439SChris Mason } 845b9473439SChris Mason free_extent_buffer(eb); 846b9473439SChris Mason 8475443be45SChris Mason unlock_page(page); 8485443be45SChris Mason return 0; 8495443be45SChris Mason } 8500da5468fSChris Mason 8510da5468fSChris Mason static int btree_writepages(struct address_space *mapping, 8520da5468fSChris Mason struct writeback_control *wbc) 8530da5468fSChris Mason { 854d1310b2eSChris Mason struct extent_io_tree *tree; 855d1310b2eSChris Mason tree = &BTRFS_I(mapping->host)->io_tree; 856d8d5f3e1SChris Mason if (wbc->sync_mode == WB_SYNC_NONE) { 857b9473439SChris Mason struct btrfs_root *root = BTRFS_I(mapping->host)->root; 858793955bcSChris Mason u64 num_dirty; 85924ab9cd8SChris Mason unsigned long thresh = 32 * 1024 * 1024; 860448d640bSChris Mason 861448d640bSChris Mason if (wbc->for_kupdate) 862448d640bSChris Mason return 0; 863448d640bSChris Mason 864b9473439SChris Mason /* this is a bit racy, but that's ok */ 865b9473439SChris Mason num_dirty = root->fs_info->dirty_metadata_bytes; 866d397712bSChris Mason if (num_dirty < thresh) 867793955bcSChris Mason return 0; 868793955bcSChris Mason } 8690da5468fSChris Mason return extent_writepages(tree, mapping, btree_get_extent, wbc); 8700da5468fSChris Mason } 8710da5468fSChris Mason 872b2950863SChristoph Hellwig static int btree_readpage(struct file *file, struct page *page) 8735f39d397SChris Mason { 874d1310b2eSChris Mason struct extent_io_tree *tree; 875d1310b2eSChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 8765f39d397SChris Mason return extent_read_full_page(tree, page, btree_get_extent); 8775f39d397SChris Mason } 8785f39d397SChris Mason 87970dec807SChris Mason static int btree_releasepage(struct page *page, gfp_t gfp_flags) 8805f39d397SChris Mason { 881d1310b2eSChris Mason struct extent_io_tree *tree; 882d1310b2eSChris Mason struct extent_map_tree *map; 8835f39d397SChris Mason int ret; 8845f39d397SChris Mason 88598509cfcSChris Mason if (PageWriteback(page) || PageDirty(page)) 88698509cfcSChris Mason return 0; 88798509cfcSChris Mason 888d1310b2eSChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 889d1310b2eSChris Mason map = &BTRFS_I(page->mapping->host)->extent_tree; 8906af118ceSChris Mason 8917b13b7b1SChris Mason ret = try_release_extent_state(map, tree, page, gfp_flags); 892d397712bSChris Mason if (!ret) 8936af118ceSChris Mason return 0; 8946af118ceSChris Mason 8956af118ceSChris Mason ret = try_release_extent_buffer(tree, page); 8965f39d397SChris Mason if (ret == 1) { 8975f39d397SChris Mason ClearPagePrivate(page); 8985f39d397SChris Mason set_page_private(page, 0); 899d98237b3SChris Mason page_cache_release(page); 9005f39d397SChris Mason } 9016af118ceSChris Mason 902d98237b3SChris Mason return ret; 903d98237b3SChris Mason } 904d98237b3SChris Mason 9055f39d397SChris Mason static void btree_invalidatepage(struct page *page, unsigned long offset) 906d98237b3SChris Mason { 907d1310b2eSChris Mason struct extent_io_tree *tree; 908d1310b2eSChris Mason tree = &BTRFS_I(page->mapping->host)->io_tree; 9095f39d397SChris Mason extent_invalidatepage(tree, page, offset); 9105f39d397SChris Mason btree_releasepage(page, GFP_NOFS); 9119ad6b7bcSChris Mason if (PagePrivate(page)) { 912d397712bSChris Mason printk(KERN_WARNING "btrfs warning page private not zero " 913d397712bSChris Mason "on page %llu\n", (unsigned long long)page_offset(page)); 9149ad6b7bcSChris Mason ClearPagePrivate(page); 9159ad6b7bcSChris Mason set_page_private(page, 0); 9169ad6b7bcSChris Mason page_cache_release(page); 9179ad6b7bcSChris Mason } 918d98237b3SChris Mason } 919d98237b3SChris Mason 9207f09410bSAlexey Dobriyan static const struct address_space_operations btree_aops = { 921d98237b3SChris Mason .readpage = btree_readpage, 922d98237b3SChris Mason .writepage = btree_writepage, 9230da5468fSChris Mason .writepages = btree_writepages, 9245f39d397SChris Mason .releasepage = btree_releasepage, 9255f39d397SChris Mason .invalidatepage = btree_invalidatepage, 9265a92bc88SChris Mason #ifdef CONFIG_MIGRATION 927784b4e29SChris Mason .migratepage = btree_migratepage, 9285a92bc88SChris Mason #endif 929d98237b3SChris Mason }; 930123abc88SChris Mason 931ca7a79adSChris Mason int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 932ca7a79adSChris Mason u64 parent_transid) 933090d1875SChris Mason { 9345f39d397SChris Mason struct extent_buffer *buf = NULL; 9355f39d397SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 936de428b63SChris Mason int ret = 0; 937090d1875SChris Mason 938db94535dSChris Mason buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 9395f39d397SChris Mason if (!buf) 940090d1875SChris Mason return 0; 941d1310b2eSChris Mason read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 942f188591eSChris Mason buf, 0, 0, btree_get_extent, 0); 9435f39d397SChris Mason free_extent_buffer(buf); 944de428b63SChris Mason return ret; 945090d1875SChris Mason } 946090d1875SChris Mason 9470999df54SChris Mason struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 9480999df54SChris Mason u64 bytenr, u32 blocksize) 9490999df54SChris Mason { 9500999df54SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 9510999df54SChris Mason struct extent_buffer *eb; 9520999df54SChris Mason eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 953f09d1f60SDavid Sterba bytenr, blocksize); 9540999df54SChris Mason return eb; 9550999df54SChris Mason } 9560999df54SChris Mason 9570999df54SChris Mason struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 9580999df54SChris Mason u64 bytenr, u32 blocksize) 9590999df54SChris Mason { 9600999df54SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 9610999df54SChris Mason struct extent_buffer *eb; 9620999df54SChris Mason 9630999df54SChris Mason eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, 964ba144192SDavid Sterba bytenr, blocksize, NULL); 9650999df54SChris Mason return eb; 9660999df54SChris Mason } 9670999df54SChris Mason 9680999df54SChris Mason 969e02119d5SChris Mason int btrfs_write_tree_block(struct extent_buffer *buf) 970e02119d5SChris Mason { 9718aa38c31SChristoph Hellwig return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, 9728aa38c31SChristoph Hellwig buf->start + buf->len - 1); 973e02119d5SChris Mason } 974e02119d5SChris Mason 975e02119d5SChris Mason int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) 976e02119d5SChris Mason { 9778aa38c31SChristoph Hellwig return filemap_fdatawait_range(buf->first_page->mapping, 9788aa38c31SChristoph Hellwig buf->start, buf->start + buf->len - 1); 979e02119d5SChris Mason } 980e02119d5SChris Mason 981db94535dSChris Mason struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, 982ca7a79adSChris Mason u32 blocksize, u64 parent_transid) 983e20d96d6SChris Mason { 9845f39d397SChris Mason struct extent_buffer *buf = NULL; 98519c00ddcSChris Mason int ret; 98619c00ddcSChris Mason 987db94535dSChris Mason buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 9885f39d397SChris Mason if (!buf) 989d98237b3SChris Mason return NULL; 990e4204dedSChris Mason 991ca7a79adSChris Mason ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 992ce9adaa5SChris Mason 993d397712bSChris Mason if (ret == 0) 994b4ce94deSChris Mason set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 9955f39d397SChris Mason return buf; 996ce9adaa5SChris Mason 997eb60ceacSChris Mason } 998eb60ceacSChris Mason 999e089f05cSChris Mason int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 10005f39d397SChris Mason struct extent_buffer *buf) 1001ed2ff2cbSChris Mason { 10025f39d397SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 100355c69072SChris Mason if (btrfs_header_generation(buf) == 1004925baeddSChris Mason root->fs_info->running_transaction->transid) { 1005b9447ef8SChris Mason btrfs_assert_tree_locked(buf); 1006b4ce94deSChris Mason 1007b9473439SChris Mason if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1008b9473439SChris Mason spin_lock(&root->fs_info->delalloc_lock); 1009b9473439SChris Mason if (root->fs_info->dirty_metadata_bytes >= buf->len) 1010b9473439SChris Mason root->fs_info->dirty_metadata_bytes -= buf->len; 1011b9473439SChris Mason else 1012b9473439SChris Mason WARN_ON(1); 1013b9473439SChris Mason spin_unlock(&root->fs_info->delalloc_lock); 1014b9473439SChris Mason } 1015b4ce94deSChris Mason 1016b9473439SChris Mason /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1017b9473439SChris Mason btrfs_set_lock_blocking(buf); 1018d1310b2eSChris Mason clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 101955c69072SChris Mason buf); 1020925baeddSChris Mason } 10215f39d397SChris Mason return 0; 10225f39d397SChris Mason } 10235f39d397SChris Mason 1024db94535dSChris Mason static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, 102587ee04ebSChris Mason u32 stripesize, struct btrfs_root *root, 10269f5fae2fSChris Mason struct btrfs_fs_info *fs_info, 1027e20d96d6SChris Mason u64 objectid) 1028d97e63b6SChris Mason { 1029cfaa7295SChris Mason root->node = NULL; 1030a28ec197SChris Mason root->commit_root = NULL; 1031db94535dSChris Mason root->sectorsize = sectorsize; 1032db94535dSChris Mason root->nodesize = nodesize; 1033db94535dSChris Mason root->leafsize = leafsize; 103487ee04ebSChris Mason root->stripesize = stripesize; 1035123abc88SChris Mason root->ref_cows = 0; 10360b86a832SChris Mason root->track_dirty = 0; 1037c71bf099SYan, Zheng root->in_radix = 0; 1038d68fc57bSYan, Zheng root->orphan_item_inserted = 0; 1039d68fc57bSYan, Zheng root->orphan_cleanup_state = 0; 10400b86a832SChris Mason 10419f5fae2fSChris Mason root->fs_info = fs_info; 10420f7d52f4SChris Mason root->objectid = objectid; 10430f7d52f4SChris Mason root->last_trans = 0; 104413a8a7c8SYan, Zheng root->highest_objectid = 0; 104558176a96SJosef Bacik root->name = NULL; 10464313b399SChris Mason root->in_sysfs = 0; 10476bef4d31SEric Paris root->inode_tree = RB_ROOT; 1048f0486c68SYan, Zheng root->block_rsv = NULL; 1049d68fc57bSYan, Zheng root->orphan_block_rsv = NULL; 10500b86a832SChris Mason 10510b86a832SChris Mason INIT_LIST_HEAD(&root->dirty_list); 10527b128766SJosef Bacik INIT_LIST_HEAD(&root->orphan_list); 10535d4f98a2SYan Zheng INIT_LIST_HEAD(&root->root_list); 1054925baeddSChris Mason spin_lock_init(&root->node_lock); 1055d68fc57bSYan, Zheng spin_lock_init(&root->orphan_lock); 10565d4f98a2SYan Zheng spin_lock_init(&root->inode_lock); 1057f0486c68SYan, Zheng spin_lock_init(&root->accounting_lock); 1058a2135011SChris Mason mutex_init(&root->objectid_mutex); 1059e02119d5SChris Mason mutex_init(&root->log_mutex); 10607237f183SYan Zheng init_waitqueue_head(&root->log_writer_wait); 10617237f183SYan Zheng init_waitqueue_head(&root->log_commit_wait[0]); 10627237f183SYan Zheng init_waitqueue_head(&root->log_commit_wait[1]); 10637237f183SYan Zheng atomic_set(&root->log_commit[0], 0); 10647237f183SYan Zheng atomic_set(&root->log_commit[1], 0); 10657237f183SYan Zheng atomic_set(&root->log_writers, 0); 10667237f183SYan Zheng root->log_batch = 0; 10677237f183SYan Zheng root->log_transid = 0; 1068257c62e1SChris Mason root->last_log_commit = 0; 1069d0c803c4SChris Mason extent_io_tree_init(&root->dirty_log_pages, 1070f993c883SDavid Sterba fs_info->btree_inode->i_mapping); 1071017e5369SChris Mason 10723768f368SChris Mason memset(&root->root_key, 0, sizeof(root->root_key)); 10733768f368SChris Mason memset(&root->root_item, 0, sizeof(root->root_item)); 10746702ed49SChris Mason memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 107558176a96SJosef Bacik memset(&root->root_kobj, 0, sizeof(root->root_kobj)); 10763f157a2fSChris Mason root->defrag_trans_start = fs_info->generation; 107758176a96SJosef Bacik init_completion(&root->kobj_unregister); 10786702ed49SChris Mason root->defrag_running = 0; 10794d775673SChris Mason root->root_key.objectid = objectid; 10803394e160SChris Mason root->anon_super.s_root = NULL; 10813394e160SChris Mason root->anon_super.s_dev = 0; 10823394e160SChris Mason INIT_LIST_HEAD(&root->anon_super.s_list); 10833394e160SChris Mason INIT_LIST_HEAD(&root->anon_super.s_instances); 10843394e160SChris Mason init_rwsem(&root->anon_super.s_umount); 10853394e160SChris Mason 10863768f368SChris Mason return 0; 10873768f368SChris Mason } 10883768f368SChris Mason 1089db94535dSChris Mason static int find_and_setup_root(struct btrfs_root *tree_root, 10909f5fae2fSChris Mason struct btrfs_fs_info *fs_info, 10919f5fae2fSChris Mason u64 objectid, 1092e20d96d6SChris Mason struct btrfs_root *root) 10933768f368SChris Mason { 10943768f368SChris Mason int ret; 1095db94535dSChris Mason u32 blocksize; 109684234f3aSYan Zheng u64 generation; 10973768f368SChris Mason 1098db94535dSChris Mason __setup_root(tree_root->nodesize, tree_root->leafsize, 109987ee04ebSChris Mason tree_root->sectorsize, tree_root->stripesize, 110087ee04ebSChris Mason root, fs_info, objectid); 11013768f368SChris Mason ret = btrfs_find_last_root(tree_root, objectid, 11023768f368SChris Mason &root->root_item, &root->root_key); 11034df27c4dSYan, Zheng if (ret > 0) 11044df27c4dSYan, Zheng return -ENOENT; 11053768f368SChris Mason BUG_ON(ret); 11063768f368SChris Mason 110784234f3aSYan Zheng generation = btrfs_root_generation(&root->root_item); 1108db94535dSChris Mason blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1109db94535dSChris Mason root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 111084234f3aSYan Zheng blocksize, generation); 111168433b73SChris Mason if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { 111268433b73SChris Mason free_extent_buffer(root->node); 111368433b73SChris Mason return -EIO; 111468433b73SChris Mason } 11154df27c4dSYan, Zheng root->commit_root = btrfs_root_node(root); 1116d97e63b6SChris Mason return 0; 1117d97e63b6SChris Mason } 1118d97e63b6SChris Mason 11197237f183SYan Zheng static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1120e02119d5SChris Mason struct btrfs_fs_info *fs_info) 11210f7d52f4SChris Mason { 11220f7d52f4SChris Mason struct btrfs_root *root; 11230f7d52f4SChris Mason struct btrfs_root *tree_root = fs_info->tree_root; 11247237f183SYan Zheng struct extent_buffer *leaf; 1125e02119d5SChris Mason 1126e02119d5SChris Mason root = kzalloc(sizeof(*root), GFP_NOFS); 1127e02119d5SChris Mason if (!root) 11287237f183SYan Zheng return ERR_PTR(-ENOMEM); 1129e02119d5SChris Mason 1130e02119d5SChris Mason __setup_root(tree_root->nodesize, tree_root->leafsize, 1131e02119d5SChris Mason tree_root->sectorsize, tree_root->stripesize, 1132e02119d5SChris Mason root, fs_info, BTRFS_TREE_LOG_OBJECTID); 1133e02119d5SChris Mason 1134e02119d5SChris Mason root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 1135e02119d5SChris Mason root->root_key.type = BTRFS_ROOT_ITEM_KEY; 1136e02119d5SChris Mason root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 11377237f183SYan Zheng /* 11387237f183SYan Zheng * log trees do not get reference counted because they go away 11397237f183SYan Zheng * before a real commit is actually done. They do store pointers 11407237f183SYan Zheng * to file data extents, and those reference counts still get 11417237f183SYan Zheng * updated (along with back refs to the log tree). 11427237f183SYan Zheng */ 1143e02119d5SChris Mason root->ref_cows = 0; 1144e02119d5SChris Mason 11455d4f98a2SYan Zheng leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 11465d4f98a2SYan Zheng BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); 11477237f183SYan Zheng if (IS_ERR(leaf)) { 11487237f183SYan Zheng kfree(root); 11497237f183SYan Zheng return ERR_CAST(leaf); 11507237f183SYan Zheng } 1151e02119d5SChris Mason 11525d4f98a2SYan Zheng memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 11535d4f98a2SYan Zheng btrfs_set_header_bytenr(leaf, leaf->start); 11545d4f98a2SYan Zheng btrfs_set_header_generation(leaf, trans->transid); 11555d4f98a2SYan Zheng btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 11565d4f98a2SYan Zheng btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); 11577237f183SYan Zheng root->node = leaf; 1158e02119d5SChris Mason 1159e02119d5SChris Mason write_extent_buffer(root->node, root->fs_info->fsid, 1160e02119d5SChris Mason (unsigned long)btrfs_header_fsid(root->node), 1161e02119d5SChris Mason BTRFS_FSID_SIZE); 1162e02119d5SChris Mason btrfs_mark_buffer_dirty(root->node); 1163e02119d5SChris Mason btrfs_tree_unlock(root->node); 11647237f183SYan Zheng return root; 11657237f183SYan Zheng } 11667237f183SYan Zheng 11677237f183SYan Zheng int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 11687237f183SYan Zheng struct btrfs_fs_info *fs_info) 11697237f183SYan Zheng { 11707237f183SYan Zheng struct btrfs_root *log_root; 11717237f183SYan Zheng 11727237f183SYan Zheng log_root = alloc_log_tree(trans, fs_info); 11737237f183SYan Zheng if (IS_ERR(log_root)) 11747237f183SYan Zheng return PTR_ERR(log_root); 11757237f183SYan Zheng WARN_ON(fs_info->log_root_tree); 11767237f183SYan Zheng fs_info->log_root_tree = log_root; 11777237f183SYan Zheng return 0; 11787237f183SYan Zheng } 11797237f183SYan Zheng 11807237f183SYan Zheng int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 11817237f183SYan Zheng struct btrfs_root *root) 11827237f183SYan Zheng { 11837237f183SYan Zheng struct btrfs_root *log_root; 11847237f183SYan Zheng struct btrfs_inode_item *inode_item; 11857237f183SYan Zheng 11867237f183SYan Zheng log_root = alloc_log_tree(trans, root->fs_info); 11877237f183SYan Zheng if (IS_ERR(log_root)) 11887237f183SYan Zheng return PTR_ERR(log_root); 11897237f183SYan Zheng 11907237f183SYan Zheng log_root->last_trans = trans->transid; 11917237f183SYan Zheng log_root->root_key.offset = root->root_key.objectid; 11927237f183SYan Zheng 11937237f183SYan Zheng inode_item = &log_root->root_item.inode; 11947237f183SYan Zheng inode_item->generation = cpu_to_le64(1); 11957237f183SYan Zheng inode_item->size = cpu_to_le64(3); 11967237f183SYan Zheng inode_item->nlink = cpu_to_le32(1); 11977237f183SYan Zheng inode_item->nbytes = cpu_to_le64(root->leafsize); 11987237f183SYan Zheng inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 11997237f183SYan Zheng 12005d4f98a2SYan Zheng btrfs_set_root_node(&log_root->root_item, log_root->node); 12017237f183SYan Zheng 12027237f183SYan Zheng WARN_ON(root->log_root); 12037237f183SYan Zheng root->log_root = log_root; 12047237f183SYan Zheng root->log_transid = 0; 1205257c62e1SChris Mason root->last_log_commit = 0; 1206e02119d5SChris Mason return 0; 1207e02119d5SChris Mason } 1208e02119d5SChris Mason 1209e02119d5SChris Mason struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 1210e02119d5SChris Mason struct btrfs_key *location) 1211e02119d5SChris Mason { 1212e02119d5SChris Mason struct btrfs_root *root; 1213e02119d5SChris Mason struct btrfs_fs_info *fs_info = tree_root->fs_info; 12140f7d52f4SChris Mason struct btrfs_path *path; 12155f39d397SChris Mason struct extent_buffer *l; 121684234f3aSYan Zheng u64 generation; 1217db94535dSChris Mason u32 blocksize; 12180f7d52f4SChris Mason int ret = 0; 12190f7d52f4SChris Mason 12205eda7b5eSChris Mason root = kzalloc(sizeof(*root), GFP_NOFS); 12210cf6c620SChris Mason if (!root) 12220f7d52f4SChris Mason return ERR_PTR(-ENOMEM); 12230f7d52f4SChris Mason if (location->offset == (u64)-1) { 1224db94535dSChris Mason ret = find_and_setup_root(tree_root, fs_info, 12250f7d52f4SChris Mason location->objectid, root); 12260f7d52f4SChris Mason if (ret) { 12270f7d52f4SChris Mason kfree(root); 12280f7d52f4SChris Mason return ERR_PTR(ret); 12290f7d52f4SChris Mason } 123013a8a7c8SYan, Zheng goto out; 12310f7d52f4SChris Mason } 12320f7d52f4SChris Mason 1233db94535dSChris Mason __setup_root(tree_root->nodesize, tree_root->leafsize, 123487ee04ebSChris Mason tree_root->sectorsize, tree_root->stripesize, 123587ee04ebSChris Mason root, fs_info, location->objectid); 12360f7d52f4SChris Mason 12370f7d52f4SChris Mason path = btrfs_alloc_path(); 1238db5b493aSTsutomu Itoh if (!path) { 1239db5b493aSTsutomu Itoh kfree(root); 1240db5b493aSTsutomu Itoh return ERR_PTR(-ENOMEM); 1241db5b493aSTsutomu Itoh } 12420f7d52f4SChris Mason ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); 124313a8a7c8SYan, Zheng if (ret == 0) { 12445f39d397SChris Mason l = path->nodes[0]; 12455f39d397SChris Mason read_extent_buffer(l, &root->root_item, 12465f39d397SChris Mason btrfs_item_ptr_offset(l, path->slots[0]), 12470f7d52f4SChris Mason sizeof(root->root_item)); 124844b36eb2SYan Zheng memcpy(&root->root_key, location, sizeof(*location)); 124913a8a7c8SYan, Zheng } 12500f7d52f4SChris Mason btrfs_free_path(path); 12510f7d52f4SChris Mason if (ret) { 12525e540f77STsutomu Itoh kfree(root); 125313a8a7c8SYan, Zheng if (ret > 0) 125413a8a7c8SYan, Zheng ret = -ENOENT; 12550f7d52f4SChris Mason return ERR_PTR(ret); 12560f7d52f4SChris Mason } 125713a8a7c8SYan, Zheng 125884234f3aSYan Zheng generation = btrfs_root_generation(&root->root_item); 1259db94535dSChris Mason blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1260db94535dSChris Mason root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 126184234f3aSYan Zheng blocksize, generation); 12625d4f98a2SYan Zheng root->commit_root = btrfs_root_node(root); 12630f7d52f4SChris Mason BUG_ON(!root->node); 126413a8a7c8SYan, Zheng out: 126508fe4db1SLi Zefan if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 12660f7d52f4SChris Mason root->ref_cows = 1; 126708fe4db1SLi Zefan btrfs_check_and_init_root_item(&root->root_item); 126808fe4db1SLi Zefan } 126913a8a7c8SYan, Zheng 12705eda7b5eSChris Mason return root; 12715eda7b5eSChris Mason } 12725eda7b5eSChris Mason 1273edbd8d4eSChris Mason struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, 1274edbd8d4eSChris Mason struct btrfs_key *location) 12755eda7b5eSChris Mason { 12765eda7b5eSChris Mason struct btrfs_root *root; 12775eda7b5eSChris Mason int ret; 12785eda7b5eSChris Mason 1279edbd8d4eSChris Mason if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) 1280edbd8d4eSChris Mason return fs_info->tree_root; 1281edbd8d4eSChris Mason if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) 1282edbd8d4eSChris Mason return fs_info->extent_root; 12838f18cf13SChris Mason if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) 12848f18cf13SChris Mason return fs_info->chunk_root; 12858f18cf13SChris Mason if (location->objectid == BTRFS_DEV_TREE_OBJECTID) 12868f18cf13SChris Mason return fs_info->dev_root; 12870403e47eSYan Zheng if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) 12880403e47eSYan Zheng return fs_info->csum_root; 12894df27c4dSYan, Zheng again: 12904df27c4dSYan, Zheng spin_lock(&fs_info->fs_roots_radix_lock); 12915eda7b5eSChris Mason root = radix_tree_lookup(&fs_info->fs_roots_radix, 12925eda7b5eSChris Mason (unsigned long)location->objectid); 12934df27c4dSYan, Zheng spin_unlock(&fs_info->fs_roots_radix_lock); 12945eda7b5eSChris Mason if (root) 12955eda7b5eSChris Mason return root; 12965eda7b5eSChris Mason 1297e02119d5SChris Mason root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); 12985eda7b5eSChris Mason if (IS_ERR(root)) 12995eda7b5eSChris Mason return root; 13003394e160SChris Mason 13013394e160SChris Mason set_anon_super(&root->anon_super, NULL); 13023394e160SChris Mason 1303d68fc57bSYan, Zheng if (btrfs_root_refs(&root->root_item) == 0) { 1304d68fc57bSYan, Zheng ret = -ENOENT; 1305d68fc57bSYan, Zheng goto fail; 1306d68fc57bSYan, Zheng } 1307d68fc57bSYan, Zheng 1308d68fc57bSYan, Zheng ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); 1309d68fc57bSYan, Zheng if (ret < 0) 1310d68fc57bSYan, Zheng goto fail; 1311d68fc57bSYan, Zheng if (ret == 0) 1312d68fc57bSYan, Zheng root->orphan_item_inserted = 1; 1313d68fc57bSYan, Zheng 13144df27c4dSYan, Zheng ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 13154df27c4dSYan, Zheng if (ret) 13164df27c4dSYan, Zheng goto fail; 13174df27c4dSYan, Zheng 13184df27c4dSYan, Zheng spin_lock(&fs_info->fs_roots_radix_lock); 13192619ba1fSChris Mason ret = radix_tree_insert(&fs_info->fs_roots_radix, 13202619ba1fSChris Mason (unsigned long)root->root_key.objectid, 13210f7d52f4SChris Mason root); 1322d68fc57bSYan, Zheng if (ret == 0) 13234df27c4dSYan, Zheng root->in_radix = 1; 1324d68fc57bSYan, Zheng 13254df27c4dSYan, Zheng spin_unlock(&fs_info->fs_roots_radix_lock); 13264df27c4dSYan, Zheng radix_tree_preload_end(); 13270f7d52f4SChris Mason if (ret) { 13284df27c4dSYan, Zheng if (ret == -EEXIST) { 13294df27c4dSYan, Zheng free_fs_root(root); 13304df27c4dSYan, Zheng goto again; 13310f7d52f4SChris Mason } 13324df27c4dSYan, Zheng goto fail; 13334df27c4dSYan, Zheng } 13344df27c4dSYan, Zheng 1335edbd8d4eSChris Mason ret = btrfs_find_dead_roots(fs_info->tree_root, 13365d4f98a2SYan Zheng root->root_key.objectid); 13374df27c4dSYan, Zheng WARN_ON(ret); 1338edbd8d4eSChris Mason return root; 13394df27c4dSYan, Zheng fail: 13404df27c4dSYan, Zheng free_fs_root(root); 13414df27c4dSYan, Zheng return ERR_PTR(ret); 1342edbd8d4eSChris Mason } 1343edbd8d4eSChris Mason 134404160088SChris Mason static int btrfs_congested_fn(void *congested_data, int bdi_bits) 134504160088SChris Mason { 134604160088SChris Mason struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 134704160088SChris Mason int ret = 0; 134804160088SChris Mason struct btrfs_device *device; 134904160088SChris Mason struct backing_dev_info *bdi; 1350b7967db7SChris Mason 1351c6e30871SQinghuang Feng list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1352dfe25020SChris Mason if (!device->bdev) 1353dfe25020SChris Mason continue; 135404160088SChris Mason bdi = blk_get_backing_dev_info(device->bdev); 135504160088SChris Mason if (bdi && bdi_congested(bdi, bdi_bits)) { 135604160088SChris Mason ret = 1; 135704160088SChris Mason break; 135804160088SChris Mason } 135904160088SChris Mason } 136004160088SChris Mason return ret; 136104160088SChris Mason } 136204160088SChris Mason 136338b66988SChris Mason /* 1364ad081f14SJens Axboe * If this fails, caller must call bdi_destroy() to get rid of the 1365ad081f14SJens Axboe * bdi again. 1366ad081f14SJens Axboe */ 136704160088SChris Mason static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) 136804160088SChris Mason { 1369ad081f14SJens Axboe int err; 1370ad081f14SJens Axboe 1371ad081f14SJens Axboe bdi->capabilities = BDI_CAP_MAP_COPY; 1372e6d086d8SJens Axboe err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); 1373ad081f14SJens Axboe if (err) 1374ad081f14SJens Axboe return err; 1375ad081f14SJens Axboe 13764575c9ccSChris Mason bdi->ra_pages = default_backing_dev_info.ra_pages; 137704160088SChris Mason bdi->congested_fn = btrfs_congested_fn; 137804160088SChris Mason bdi->congested_data = info; 137904160088SChris Mason return 0; 138004160088SChris Mason } 138104160088SChris Mason 1382ce9adaa5SChris Mason static int bio_ready_for_csum(struct bio *bio) 1383ce9adaa5SChris Mason { 1384ce9adaa5SChris Mason u64 length = 0; 1385ce9adaa5SChris Mason u64 buf_len = 0; 1386ce9adaa5SChris Mason u64 start = 0; 1387ce9adaa5SChris Mason struct page *page; 1388ce9adaa5SChris Mason struct extent_io_tree *io_tree = NULL; 1389ce9adaa5SChris Mason struct bio_vec *bvec; 1390ce9adaa5SChris Mason int i; 1391ce9adaa5SChris Mason int ret; 1392ce9adaa5SChris Mason 1393ce9adaa5SChris Mason bio_for_each_segment(bvec, bio, i) { 1394ce9adaa5SChris Mason page = bvec->bv_page; 1395ce9adaa5SChris Mason if (page->private == EXTENT_PAGE_PRIVATE) { 1396ce9adaa5SChris Mason length += bvec->bv_len; 1397ce9adaa5SChris Mason continue; 1398ce9adaa5SChris Mason } 1399ce9adaa5SChris Mason if (!page->private) { 1400ce9adaa5SChris Mason length += bvec->bv_len; 1401ce9adaa5SChris Mason continue; 1402ce9adaa5SChris Mason } 1403ce9adaa5SChris Mason length = bvec->bv_len; 1404ce9adaa5SChris Mason buf_len = page->private >> 2; 1405ce9adaa5SChris Mason start = page_offset(page) + bvec->bv_offset; 1406ce9adaa5SChris Mason io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1407ce9adaa5SChris Mason } 1408ce9adaa5SChris Mason /* are we fully contained in this bio? */ 1409ce9adaa5SChris Mason if (buf_len <= length) 1410ce9adaa5SChris Mason return 1; 1411ce9adaa5SChris Mason 1412ce9adaa5SChris Mason ret = extent_range_uptodate(io_tree, start + length, 1413ce9adaa5SChris Mason start + buf_len - 1); 1414ce9adaa5SChris Mason return ret; 1415ce9adaa5SChris Mason } 1416ce9adaa5SChris Mason 14178b712842SChris Mason /* 14188b712842SChris Mason * called by the kthread helper functions to finally call the bio end_io 14198b712842SChris Mason * functions. This is where read checksum verification actually happens 14208b712842SChris Mason */ 14218b712842SChris Mason static void end_workqueue_fn(struct btrfs_work *work) 1422ce9adaa5SChris Mason { 1423ce9adaa5SChris Mason struct bio *bio; 14248b712842SChris Mason struct end_io_wq *end_io_wq; 14258b712842SChris Mason struct btrfs_fs_info *fs_info; 1426ce9adaa5SChris Mason int error; 1427ce9adaa5SChris Mason 14288b712842SChris Mason end_io_wq = container_of(work, struct end_io_wq, work); 1429ce9adaa5SChris Mason bio = end_io_wq->bio; 14308b712842SChris Mason fs_info = end_io_wq->info; 14318b712842SChris Mason 1432cad321adSChris Mason /* metadata bio reads are special because the whole tree block must 14338b712842SChris Mason * be checksummed at once. This makes sure the entire block is in 14348b712842SChris Mason * ram and up to date before trying to verify things. For 14358b712842SChris Mason * blocksize <= pagesize, it is basically a noop 14368b712842SChris Mason */ 14377b6d91daSChristoph Hellwig if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && 1438cad321adSChris Mason !bio_ready_for_csum(bio)) { 1439d20f7043SChris Mason btrfs_queue_worker(&fs_info->endio_meta_workers, 14408b712842SChris Mason &end_io_wq->work); 1441ce9adaa5SChris Mason return; 1442ce9adaa5SChris Mason } 1443ce9adaa5SChris Mason error = end_io_wq->error; 1444ce9adaa5SChris Mason bio->bi_private = end_io_wq->private; 1445ce9adaa5SChris Mason bio->bi_end_io = end_io_wq->end_io; 1446ce9adaa5SChris Mason kfree(end_io_wq); 1447ce9adaa5SChris Mason bio_endio(bio, error); 1448ce9adaa5SChris Mason } 144944b8bd7eSChris Mason 1450a74a4b97SChris Mason static int cleaner_kthread(void *arg) 1451a74a4b97SChris Mason { 1452a74a4b97SChris Mason struct btrfs_root *root = arg; 1453a74a4b97SChris Mason 1454a74a4b97SChris Mason do { 1455a74a4b97SChris Mason vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 145676dda93cSYan, Zheng 145776dda93cSYan, Zheng if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 145876dda93cSYan, Zheng mutex_trylock(&root->fs_info->cleaner_mutex)) { 145924bbcf04SYan, Zheng btrfs_run_delayed_iputs(root); 1460a74a4b97SChris Mason btrfs_clean_old_snapshots(root); 1461a74a4b97SChris Mason mutex_unlock(&root->fs_info->cleaner_mutex); 146276dda93cSYan, Zheng } 1463a74a4b97SChris Mason 1464a74a4b97SChris Mason if (freezing(current)) { 1465a74a4b97SChris Mason refrigerator(); 1466a74a4b97SChris Mason } else { 1467a74a4b97SChris Mason set_current_state(TASK_INTERRUPTIBLE); 14688929ecfaSYan, Zheng if (!kthread_should_stop()) 1469a74a4b97SChris Mason schedule(); 1470a74a4b97SChris Mason __set_current_state(TASK_RUNNING); 1471a74a4b97SChris Mason } 1472a74a4b97SChris Mason } while (!kthread_should_stop()); 1473a74a4b97SChris Mason return 0; 1474a74a4b97SChris Mason } 1475a74a4b97SChris Mason 1476a74a4b97SChris Mason static int transaction_kthread(void *arg) 1477a74a4b97SChris Mason { 1478a74a4b97SChris Mason struct btrfs_root *root = arg; 1479a74a4b97SChris Mason struct btrfs_trans_handle *trans; 1480a74a4b97SChris Mason struct btrfs_transaction *cur; 14818929ecfaSYan, Zheng u64 transid; 1482a74a4b97SChris Mason unsigned long now; 1483a74a4b97SChris Mason unsigned long delay; 1484a74a4b97SChris Mason int ret; 1485a74a4b97SChris Mason 1486a74a4b97SChris Mason do { 1487a74a4b97SChris Mason delay = HZ * 30; 1488a74a4b97SChris Mason vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1489a74a4b97SChris Mason mutex_lock(&root->fs_info->transaction_kthread_mutex); 1490a74a4b97SChris Mason 14918929ecfaSYan, Zheng spin_lock(&root->fs_info->new_trans_lock); 1492a74a4b97SChris Mason cur = root->fs_info->running_transaction; 1493a74a4b97SChris Mason if (!cur) { 14948929ecfaSYan, Zheng spin_unlock(&root->fs_info->new_trans_lock); 1495a74a4b97SChris Mason goto sleep; 1496a74a4b97SChris Mason } 149731153d81SYan Zheng 1498a74a4b97SChris Mason now = get_seconds(); 14998929ecfaSYan, Zheng if (!cur->blocked && 15008929ecfaSYan, Zheng (now < cur->start_time || now - cur->start_time < 30)) { 15018929ecfaSYan, Zheng spin_unlock(&root->fs_info->new_trans_lock); 1502a74a4b97SChris Mason delay = HZ * 5; 1503a74a4b97SChris Mason goto sleep; 1504a74a4b97SChris Mason } 15058929ecfaSYan, Zheng transid = cur->transid; 15068929ecfaSYan, Zheng spin_unlock(&root->fs_info->new_trans_lock); 150756bec294SChris Mason 15088929ecfaSYan, Zheng trans = btrfs_join_transaction(root, 1); 15093612b495STsutomu Itoh BUG_ON(IS_ERR(trans)); 15108929ecfaSYan, Zheng if (transid == trans->transid) { 15118929ecfaSYan, Zheng ret = btrfs_commit_transaction(trans, root); 15128929ecfaSYan, Zheng BUG_ON(ret); 15138929ecfaSYan, Zheng } else { 15148929ecfaSYan, Zheng btrfs_end_transaction(trans, root); 15158929ecfaSYan, Zheng } 1516a74a4b97SChris Mason sleep: 1517a74a4b97SChris Mason wake_up_process(root->fs_info->cleaner_kthread); 1518a74a4b97SChris Mason mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1519a74a4b97SChris Mason 1520a74a4b97SChris Mason if (freezing(current)) { 1521a74a4b97SChris Mason refrigerator(); 1522a74a4b97SChris Mason } else { 1523a74a4b97SChris Mason set_current_state(TASK_INTERRUPTIBLE); 15248929ecfaSYan, Zheng if (!kthread_should_stop() && 15258929ecfaSYan, Zheng !btrfs_transaction_blocked(root->fs_info)) 1526a74a4b97SChris Mason schedule_timeout(delay); 1527a74a4b97SChris Mason __set_current_state(TASK_RUNNING); 1528a74a4b97SChris Mason } 1529a74a4b97SChris Mason } while (!kthread_should_stop()); 1530a74a4b97SChris Mason return 0; 1531a74a4b97SChris Mason } 1532a74a4b97SChris Mason 15338a4b83ccSChris Mason struct btrfs_root *open_ctree(struct super_block *sb, 1534dfe25020SChris Mason struct btrfs_fs_devices *fs_devices, 1535dfe25020SChris Mason char *options) 1536eb60ceacSChris Mason { 1537db94535dSChris Mason u32 sectorsize; 1538db94535dSChris Mason u32 nodesize; 1539db94535dSChris Mason u32 leafsize; 1540db94535dSChris Mason u32 blocksize; 154187ee04ebSChris Mason u32 stripesize; 154284234f3aSYan Zheng u64 generation; 1543f2b636e8SJosef Bacik u64 features; 15443de4586cSChris Mason struct btrfs_key location; 1545a061fc8dSChris Mason struct buffer_head *bh; 1546e02119d5SChris Mason struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), 1547e20d96d6SChris Mason GFP_NOFS); 1548d20f7043SChris Mason struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1549d20f7043SChris Mason GFP_NOFS); 1550450ba0eaSJosef Bacik struct btrfs_root *tree_root = btrfs_sb(sb); 15514891aca2SDavid Sterba struct btrfs_fs_info *fs_info = NULL; 1552e02119d5SChris Mason struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 15530b86a832SChris Mason GFP_NOFS); 1554e02119d5SChris Mason struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 15550b86a832SChris Mason GFP_NOFS); 1556e02119d5SChris Mason struct btrfs_root *log_tree_root; 1557e02119d5SChris Mason 1558eb60ceacSChris Mason int ret; 1559e58ca020SYan int err = -EINVAL; 15604543df7eSChris Mason 15612c90e5d6SChris Mason struct btrfs_super_block *disk_super; 15628790d502SChris Mason 15634891aca2SDavid Sterba if (!extent_root || !tree_root || !tree_root->fs_info || 1564d20f7043SChris Mason !chunk_root || !dev_root || !csum_root) { 156539279cc3SChris Mason err = -ENOMEM; 156639279cc3SChris Mason goto fail; 156739279cc3SChris Mason } 15684891aca2SDavid Sterba fs_info = tree_root->fs_info; 156976dda93cSYan, Zheng 157076dda93cSYan, Zheng ret = init_srcu_struct(&fs_info->subvol_srcu); 157176dda93cSYan, Zheng if (ret) { 157276dda93cSYan, Zheng err = ret; 157376dda93cSYan, Zheng goto fail; 157476dda93cSYan, Zheng } 157576dda93cSYan, Zheng 157676dda93cSYan, Zheng ret = setup_bdi(fs_info, &fs_info->bdi); 157776dda93cSYan, Zheng if (ret) { 157876dda93cSYan, Zheng err = ret; 157976dda93cSYan, Zheng goto fail_srcu; 158076dda93cSYan, Zheng } 158176dda93cSYan, Zheng 158276dda93cSYan, Zheng fs_info->btree_inode = new_inode(sb); 158376dda93cSYan, Zheng if (!fs_info->btree_inode) { 158476dda93cSYan, Zheng err = -ENOMEM; 158576dda93cSYan, Zheng goto fail_bdi; 158676dda93cSYan, Zheng } 158776dda93cSYan, Zheng 15881561dedaSMiao Xie fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; 15891561dedaSMiao Xie 159076dda93cSYan, Zheng INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 15918fd17795SChris Mason INIT_LIST_HEAD(&fs_info->trans_list); 1592facda1e7SChris Mason INIT_LIST_HEAD(&fs_info->dead_roots); 159324bbcf04SYan, Zheng INIT_LIST_HEAD(&fs_info->delayed_iputs); 159419c00ddcSChris Mason INIT_LIST_HEAD(&fs_info->hashers); 1595ea8c2819SChris Mason INIT_LIST_HEAD(&fs_info->delalloc_inodes); 15965a3f23d5SChris Mason INIT_LIST_HEAD(&fs_info->ordered_operations); 159711833d66SYan Zheng INIT_LIST_HEAD(&fs_info->caching_block_groups); 15981832a6d5SChris Mason spin_lock_init(&fs_info->delalloc_lock); 1599cee36a03SChris Mason spin_lock_init(&fs_info->new_trans_lock); 160031153d81SYan Zheng spin_lock_init(&fs_info->ref_cache_lock); 160176dda93cSYan, Zheng spin_lock_init(&fs_info->fs_roots_radix_lock); 160224bbcf04SYan, Zheng spin_lock_init(&fs_info->delayed_iput_lock); 160319c00ddcSChris Mason 160458176a96SJosef Bacik init_completion(&fs_info->kobj_unregister); 16059f5fae2fSChris Mason fs_info->tree_root = tree_root; 16069f5fae2fSChris Mason fs_info->extent_root = extent_root; 1607d20f7043SChris Mason fs_info->csum_root = csum_root; 16080b86a832SChris Mason fs_info->chunk_root = chunk_root; 16090b86a832SChris Mason fs_info->dev_root = dev_root; 16108a4b83ccSChris Mason fs_info->fs_devices = fs_devices; 16110b86a832SChris Mason INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 16126324fbf3SChris Mason INIT_LIST_HEAD(&fs_info->space_info); 16130b86a832SChris Mason btrfs_mapping_init(&fs_info->mapping_tree); 1614f0486c68SYan, Zheng btrfs_init_block_rsv(&fs_info->global_block_rsv); 1615f0486c68SYan, Zheng btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); 1616f0486c68SYan, Zheng btrfs_init_block_rsv(&fs_info->trans_block_rsv); 1617f0486c68SYan, Zheng btrfs_init_block_rsv(&fs_info->chunk_block_rsv); 1618f0486c68SYan, Zheng btrfs_init_block_rsv(&fs_info->empty_block_rsv); 1619f0486c68SYan, Zheng INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); 1620f0486c68SYan, Zheng mutex_init(&fs_info->durable_block_rsv_mutex); 1621cb03c743SChris Mason atomic_set(&fs_info->nr_async_submits, 0); 1622771ed689SChris Mason atomic_set(&fs_info->async_delalloc_pages, 0); 16238c8bee1dSChris Mason atomic_set(&fs_info->async_submit_draining, 0); 16240986fe9eSChris Mason atomic_set(&fs_info->nr_async_bios, 0); 1625e20d96d6SChris Mason fs_info->sb = sb; 16266f568d35SChris Mason fs_info->max_inline = 8192 * 1024; 16279ed74f2dSJosef Bacik fs_info->metadata_ratio = 0; 1628c8b97818SChris Mason 1629b34b086cSChris Mason fs_info->thread_pool_size = min_t(unsigned long, 1630b34b086cSChris Mason num_online_cpus() + 2, 8); 16310afbaf8cSChris Mason 16323eaa2885SChris Mason INIT_LIST_HEAD(&fs_info->ordered_extents); 16333eaa2885SChris Mason spin_lock_init(&fs_info->ordered_extent_lock); 16343eaa2885SChris Mason 1635a061fc8dSChris Mason sb->s_blocksize = 4096; 1636a061fc8dSChris Mason sb->s_blocksize_bits = blksize_bits(4096); 163732a88aa1SJens Axboe sb->s_bdi = &fs_info->bdi; 1638a061fc8dSChris Mason 163976dda93cSYan, Zheng fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 164076dda93cSYan, Zheng fs_info->btree_inode->i_nlink = 1; 16410afbaf8cSChris Mason /* 16420afbaf8cSChris Mason * we set the i_size on the btree inode to the max possible int. 16430afbaf8cSChris Mason * the real end of the address space is determined by all of 16440afbaf8cSChris Mason * the devices in the system 16450afbaf8cSChris Mason */ 16460afbaf8cSChris Mason fs_info->btree_inode->i_size = OFFSET_MAX; 1647d98237b3SChris Mason fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 164804160088SChris Mason fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 164904160088SChris Mason 16505d4f98a2SYan Zheng RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); 1651d1310b2eSChris Mason extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1652f993c883SDavid Sterba fs_info->btree_inode->i_mapping); 1653a8067e02SDavid Sterba extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); 16540da5468fSChris Mason 1655d1310b2eSChris Mason BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; 1656d1310b2eSChris Mason 16570f7d52f4SChris Mason BTRFS_I(fs_info->btree_inode)->root = tree_root; 16580f7d52f4SChris Mason memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 16590f7d52f4SChris Mason sizeof(struct btrfs_key)); 166076dda93cSYan, Zheng BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; 166122b0ebdaSChris Mason insert_inode_hash(fs_info->btree_inode); 166239279cc3SChris Mason 1663e02119d5SChris Mason spin_lock_init(&fs_info->block_group_cache_lock); 16646bef4d31SEric Paris fs_info->block_group_cache_tree = RB_ROOT; 1665925baeddSChris Mason 166611833d66SYan Zheng extent_io_tree_init(&fs_info->freed_extents[0], 1667f993c883SDavid Sterba fs_info->btree_inode->i_mapping); 166811833d66SYan Zheng extent_io_tree_init(&fs_info->freed_extents[1], 1669f993c883SDavid Sterba fs_info->btree_inode->i_mapping); 167011833d66SYan Zheng fs_info->pinned_extents = &fs_info->freed_extents[0]; 1671e6dcd2dcSChris Mason fs_info->do_barriers = 1; 1672f9295749SChris Mason 1673d98237b3SChris Mason 1674509659cdSChris Mason mutex_init(&fs_info->trans_mutex); 16755a3f23d5SChris Mason mutex_init(&fs_info->ordered_operations_mutex); 167630ae8467SChris Mason mutex_init(&fs_info->tree_log_mutex); 1677925baeddSChris Mason mutex_init(&fs_info->chunk_mutex); 1678925baeddSChris Mason mutex_init(&fs_info->transaction_kthread_mutex); 1679a74a4b97SChris Mason mutex_init(&fs_info->cleaner_mutex); 1680a74a4b97SChris Mason mutex_init(&fs_info->volume_mutex); 1681276e680dSYan Zheng init_rwsem(&fs_info->extent_commit_sem); 1682c71bf099SYan, Zheng init_rwsem(&fs_info->cleanup_work_sem); 168376dda93cSYan, Zheng init_rwsem(&fs_info->subvol_sem); 1684fa9c0d79SChris Mason 1685fa9c0d79SChris Mason btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1686fa9c0d79SChris Mason btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 1687fa9c0d79SChris Mason 16887d9eb12cSChris Mason init_waitqueue_head(&fs_info->transaction_throttle); 16893768f368SChris Mason init_waitqueue_head(&fs_info->transaction_wait); 1690bb9c12c9SSage Weil init_waitqueue_head(&fs_info->transaction_blocked_wait); 16914854ddd0SChris Mason init_waitqueue_head(&fs_info->async_submit_wait); 16929a8dd150SChris Mason 16930b86a832SChris Mason __setup_root(4096, 4096, 4096, 4096, tree_root, 16942c90e5d6SChris Mason fs_info, BTRFS_ROOT_TREE_OBJECTID); 16957eccb903SChris Mason 1696a512bbf8SYan Zheng bh = btrfs_read_dev_super(fs_devices->latest_bdev); 169720b45077SDave Young if (!bh) { 169820b45077SDave Young err = -EINVAL; 169939279cc3SChris Mason goto fail_iput; 170020b45077SDave Young } 170139279cc3SChris Mason 1702a061fc8dSChris Mason memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 17032d69a0f8SYan Zheng memcpy(&fs_info->super_for_commit, &fs_info->super_copy, 17042d69a0f8SYan Zheng sizeof(fs_info->super_for_commit)); 1705a061fc8dSChris Mason brelse(bh); 17065f39d397SChris Mason 1707a061fc8dSChris Mason memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); 17080b86a832SChris Mason 17095f39d397SChris Mason disk_super = &fs_info->super_copy; 17100f7d52f4SChris Mason if (!btrfs_super_root(disk_super)) 1711c6e2bac1SJosef Bacik goto fail_iput; 17120f7d52f4SChris Mason 1713acce952bSliubo /* check FS state, whether FS is broken. */ 1714acce952bSliubo fs_info->fs_state |= btrfs_super_flags(disk_super); 1715acce952bSliubo 1716acce952bSliubo btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 1717acce952bSliubo 171875e7cb7fSLiu Bo /* 171975e7cb7fSLiu Bo * In the long term, we'll store the compression type in the super 172075e7cb7fSLiu Bo * block, and it'll be used for per file compression control. 172175e7cb7fSLiu Bo */ 172275e7cb7fSLiu Bo fs_info->compress_type = BTRFS_COMPRESS_ZLIB; 172375e7cb7fSLiu Bo 17242b82032cSYan Zheng ret = btrfs_parse_options(tree_root, options); 17252b82032cSYan Zheng if (ret) { 17262b82032cSYan Zheng err = ret; 1727c6e2bac1SJosef Bacik goto fail_iput; 17282b82032cSYan Zheng } 1729dfe25020SChris Mason 1730f2b636e8SJosef Bacik features = btrfs_super_incompat_flags(disk_super) & 1731f2b636e8SJosef Bacik ~BTRFS_FEATURE_INCOMPAT_SUPP; 1732f2b636e8SJosef Bacik if (features) { 1733f2b636e8SJosef Bacik printk(KERN_ERR "BTRFS: couldn't mount because of " 1734f2b636e8SJosef Bacik "unsupported optional features (%Lx).\n", 173521380931SJoel Becker (unsigned long long)features); 1736f2b636e8SJosef Bacik err = -EINVAL; 1737c6e2bac1SJosef Bacik goto fail_iput; 1738f2b636e8SJosef Bacik } 1739f2b636e8SJosef Bacik 17405d4f98a2SYan Zheng features = btrfs_super_incompat_flags(disk_super); 17415d4f98a2SYan Zheng features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 1742a6fa6faeSLi Zefan if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 1743a6fa6faeSLi Zefan features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 17445d4f98a2SYan Zheng btrfs_set_super_incompat_flags(disk_super, features); 17455d4f98a2SYan Zheng 1746f2b636e8SJosef Bacik features = btrfs_super_compat_ro_flags(disk_super) & 1747f2b636e8SJosef Bacik ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1748f2b636e8SJosef Bacik if (!(sb->s_flags & MS_RDONLY) && features) { 1749f2b636e8SJosef Bacik printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1750f2b636e8SJosef Bacik "unsupported option features (%Lx).\n", 175121380931SJoel Becker (unsigned long long)features); 1752f2b636e8SJosef Bacik err = -EINVAL; 1753c6e2bac1SJosef Bacik goto fail_iput; 1754f2b636e8SJosef Bacik } 175561d92c32SChris Mason 175661d92c32SChris Mason btrfs_init_workers(&fs_info->generic_worker, 175761d92c32SChris Mason "genwork", 1, NULL); 175861d92c32SChris Mason 17595443be45SChris Mason btrfs_init_workers(&fs_info->workers, "worker", 176061d92c32SChris Mason fs_info->thread_pool_size, 176161d92c32SChris Mason &fs_info->generic_worker); 1762c8b97818SChris Mason 1763771ed689SChris Mason btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", 176461d92c32SChris Mason fs_info->thread_pool_size, 176561d92c32SChris Mason &fs_info->generic_worker); 1766771ed689SChris Mason 17675443be45SChris Mason btrfs_init_workers(&fs_info->submit_workers, "submit", 1768b720d209SChris Mason min_t(u64, fs_devices->num_devices, 176961d92c32SChris Mason fs_info->thread_pool_size), 177061d92c32SChris Mason &fs_info->generic_worker); 177161b49440SChris Mason 177261b49440SChris Mason /* a higher idle thresh on the submit workers makes it much more 177361b49440SChris Mason * likely that bios will be send down in a sane order to the 177461b49440SChris Mason * devices 177561b49440SChris Mason */ 177661b49440SChris Mason fs_info->submit_workers.idle_thresh = 64; 177753863232SChris Mason 1778771ed689SChris Mason fs_info->workers.idle_thresh = 16; 17794a69a410SChris Mason fs_info->workers.ordered = 1; 178061b49440SChris Mason 1781771ed689SChris Mason fs_info->delalloc_workers.idle_thresh = 2; 1782771ed689SChris Mason fs_info->delalloc_workers.ordered = 1; 1783771ed689SChris Mason 178461d92c32SChris Mason btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, 178561d92c32SChris Mason &fs_info->generic_worker); 17865443be45SChris Mason btrfs_init_workers(&fs_info->endio_workers, "endio", 178761d92c32SChris Mason fs_info->thread_pool_size, 178861d92c32SChris Mason &fs_info->generic_worker); 1789d20f7043SChris Mason btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", 179061d92c32SChris Mason fs_info->thread_pool_size, 179161d92c32SChris Mason &fs_info->generic_worker); 1792cad321adSChris Mason btrfs_init_workers(&fs_info->endio_meta_write_workers, 179361d92c32SChris Mason "endio-meta-write", fs_info->thread_pool_size, 179461d92c32SChris Mason &fs_info->generic_worker); 17955443be45SChris Mason btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 179661d92c32SChris Mason fs_info->thread_pool_size, 179761d92c32SChris Mason &fs_info->generic_worker); 17980cb59c99SJosef Bacik btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", 17990cb59c99SJosef Bacik 1, &fs_info->generic_worker); 180061b49440SChris Mason 180161b49440SChris Mason /* 180261b49440SChris Mason * endios are largely parallel and should have a very 180361b49440SChris Mason * low idle thresh 180461b49440SChris Mason */ 180561b49440SChris Mason fs_info->endio_workers.idle_thresh = 4; 1806b51912c9SChris Mason fs_info->endio_meta_workers.idle_thresh = 4; 1807b51912c9SChris Mason 18089042846bSChris Mason fs_info->endio_write_workers.idle_thresh = 2; 18099042846bSChris Mason fs_info->endio_meta_write_workers.idle_thresh = 2; 18109042846bSChris Mason 18114543df7eSChris Mason btrfs_start_workers(&fs_info->workers, 1); 181261d92c32SChris Mason btrfs_start_workers(&fs_info->generic_worker, 1); 18131cc127b5SChris Mason btrfs_start_workers(&fs_info->submit_workers, 1); 1814771ed689SChris Mason btrfs_start_workers(&fs_info->delalloc_workers, 1); 1815247e743cSChris Mason btrfs_start_workers(&fs_info->fixup_workers, 1); 18169042846bSChris Mason btrfs_start_workers(&fs_info->endio_workers, 1); 18179042846bSChris Mason btrfs_start_workers(&fs_info->endio_meta_workers, 1); 18189042846bSChris Mason btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 18199042846bSChris Mason btrfs_start_workers(&fs_info->endio_write_workers, 1); 18200cb59c99SJosef Bacik btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 18214543df7eSChris Mason 18224575c9ccSChris Mason fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1823c8b97818SChris Mason fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1824c8b97818SChris Mason 4 * 1024 * 1024 / PAGE_CACHE_SIZE); 18254575c9ccSChris Mason 1826db94535dSChris Mason nodesize = btrfs_super_nodesize(disk_super); 1827db94535dSChris Mason leafsize = btrfs_super_leafsize(disk_super); 1828db94535dSChris Mason sectorsize = btrfs_super_sectorsize(disk_super); 182987ee04ebSChris Mason stripesize = btrfs_super_stripesize(disk_super); 1830db94535dSChris Mason tree_root->nodesize = nodesize; 1831db94535dSChris Mason tree_root->leafsize = leafsize; 1832db94535dSChris Mason tree_root->sectorsize = sectorsize; 183387ee04ebSChris Mason tree_root->stripesize = stripesize; 1834a061fc8dSChris Mason 1835a061fc8dSChris Mason sb->s_blocksize = sectorsize; 1836a061fc8dSChris Mason sb->s_blocksize_bits = blksize_bits(sectorsize); 1837db94535dSChris Mason 183839279cc3SChris Mason if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 183939279cc3SChris Mason sizeof(disk_super->magic))) { 1840d397712bSChris Mason printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 184139279cc3SChris Mason goto fail_sb_buffer; 184239279cc3SChris Mason } 184319c00ddcSChris Mason 1844925baeddSChris Mason mutex_lock(&fs_info->chunk_mutex); 1845e4404d6eSYan Zheng ret = btrfs_read_sys_array(tree_root); 1846925baeddSChris Mason mutex_unlock(&fs_info->chunk_mutex); 184784eed90fSChris Mason if (ret) { 1848d397712bSChris Mason printk(KERN_WARNING "btrfs: failed to read the system " 1849d397712bSChris Mason "array on %s\n", sb->s_id); 18505d4f98a2SYan Zheng goto fail_sb_buffer; 185184eed90fSChris Mason } 18520b86a832SChris Mason 18530b86a832SChris Mason blocksize = btrfs_level_size(tree_root, 18540b86a832SChris Mason btrfs_super_chunk_root_level(disk_super)); 185584234f3aSYan Zheng generation = btrfs_super_chunk_root_generation(disk_super); 18560b86a832SChris Mason 18570b86a832SChris Mason __setup_root(nodesize, leafsize, sectorsize, stripesize, 18580b86a832SChris Mason chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); 18590b86a832SChris Mason 18600b86a832SChris Mason chunk_root->node = read_tree_block(chunk_root, 18610b86a832SChris Mason btrfs_super_chunk_root(disk_super), 186284234f3aSYan Zheng blocksize, generation); 18630b86a832SChris Mason BUG_ON(!chunk_root->node); 186483121942SDavid Woodhouse if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 186583121942SDavid Woodhouse printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 186683121942SDavid Woodhouse sb->s_id); 186783121942SDavid Woodhouse goto fail_chunk_root; 186883121942SDavid Woodhouse } 18695d4f98a2SYan Zheng btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 18705d4f98a2SYan Zheng chunk_root->commit_root = btrfs_root_node(chunk_root); 18710b86a832SChris Mason 1872e17cade2SChris Mason read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1873e17cade2SChris Mason (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1874e17cade2SChris Mason BTRFS_UUID_SIZE); 1875e17cade2SChris Mason 1876925baeddSChris Mason mutex_lock(&fs_info->chunk_mutex); 18770b86a832SChris Mason ret = btrfs_read_chunk_tree(chunk_root); 1878925baeddSChris Mason mutex_unlock(&fs_info->chunk_mutex); 18792b82032cSYan Zheng if (ret) { 1880d397712bSChris Mason printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 1881d397712bSChris Mason sb->s_id); 18822b82032cSYan Zheng goto fail_chunk_root; 18832b82032cSYan Zheng } 18840b86a832SChris Mason 1885dfe25020SChris Mason btrfs_close_extra_devices(fs_devices); 1886dfe25020SChris Mason 1887db94535dSChris Mason blocksize = btrfs_level_size(tree_root, 1888db94535dSChris Mason btrfs_super_root_level(disk_super)); 188984234f3aSYan Zheng generation = btrfs_super_generation(disk_super); 18900b86a832SChris Mason 1891e20d96d6SChris Mason tree_root->node = read_tree_block(tree_root, 1892db94535dSChris Mason btrfs_super_root(disk_super), 189384234f3aSYan Zheng blocksize, generation); 189439279cc3SChris Mason if (!tree_root->node) 18952b82032cSYan Zheng goto fail_chunk_root; 189683121942SDavid Woodhouse if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { 189783121942SDavid Woodhouse printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 189883121942SDavid Woodhouse sb->s_id); 189983121942SDavid Woodhouse goto fail_tree_root; 190083121942SDavid Woodhouse } 19015d4f98a2SYan Zheng btrfs_set_root_node(&tree_root->root_item, tree_root->node); 19025d4f98a2SYan Zheng tree_root->commit_root = btrfs_root_node(tree_root); 1903db94535dSChris Mason 1904db94535dSChris Mason ret = find_and_setup_root(tree_root, fs_info, 1905e20d96d6SChris Mason BTRFS_EXTENT_TREE_OBJECTID, extent_root); 19060b86a832SChris Mason if (ret) 190739279cc3SChris Mason goto fail_tree_root; 19080b86a832SChris Mason extent_root->track_dirty = 1; 19090b86a832SChris Mason 19100b86a832SChris Mason ret = find_and_setup_root(tree_root, fs_info, 19110b86a832SChris Mason BTRFS_DEV_TREE_OBJECTID, dev_root); 19120b86a832SChris Mason if (ret) 19130b86a832SChris Mason goto fail_extent_root; 19145d4f98a2SYan Zheng dev_root->track_dirty = 1; 19153768f368SChris Mason 1916d20f7043SChris Mason ret = find_and_setup_root(tree_root, fs_info, 1917d20f7043SChris Mason BTRFS_CSUM_TREE_OBJECTID, csum_root); 1918d20f7043SChris Mason if (ret) 19195d4f98a2SYan Zheng goto fail_dev_root; 1920d20f7043SChris Mason 1921d20f7043SChris Mason csum_root->track_dirty = 1; 1922d20f7043SChris Mason 19238929ecfaSYan, Zheng fs_info->generation = generation; 19248929ecfaSYan, Zheng fs_info->last_trans_committed = generation; 19258929ecfaSYan, Zheng fs_info->data_alloc_profile = (u64)-1; 19268929ecfaSYan, Zheng fs_info->metadata_alloc_profile = (u64)-1; 19278929ecfaSYan, Zheng fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 19288929ecfaSYan, Zheng 1929c59021f8Sliubo ret = btrfs_init_space_info(fs_info); 1930c59021f8Sliubo if (ret) { 1931c59021f8Sliubo printk(KERN_ERR "Failed to initial space info: %d\n", ret); 1932c59021f8Sliubo goto fail_block_groups; 1933c59021f8Sliubo } 1934c59021f8Sliubo 19351b1d1f66SJosef Bacik ret = btrfs_read_block_groups(extent_root); 19361b1d1f66SJosef Bacik if (ret) { 19371b1d1f66SJosef Bacik printk(KERN_ERR "Failed to read block groups: %d\n", ret); 19381b1d1f66SJosef Bacik goto fail_block_groups; 19391b1d1f66SJosef Bacik } 19409078a3e1SChris Mason 1941a74a4b97SChris Mason fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1942a74a4b97SChris Mason "btrfs-cleaner"); 194357506d50SQinghuang Feng if (IS_ERR(fs_info->cleaner_kthread)) 19441b1d1f66SJosef Bacik goto fail_block_groups; 1945a74a4b97SChris Mason 1946a74a4b97SChris Mason fs_info->transaction_kthread = kthread_run(transaction_kthread, 1947a74a4b97SChris Mason tree_root, 1948a74a4b97SChris Mason "btrfs-transaction"); 194957506d50SQinghuang Feng if (IS_ERR(fs_info->transaction_kthread)) 19503f157a2fSChris Mason goto fail_cleaner; 1951a74a4b97SChris Mason 1952c289811cSChris Mason if (!btrfs_test_opt(tree_root, SSD) && 1953c289811cSChris Mason !btrfs_test_opt(tree_root, NOSSD) && 1954c289811cSChris Mason !fs_info->fs_devices->rotating) { 1955c289811cSChris Mason printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " 1956c289811cSChris Mason "mode\n"); 1957c289811cSChris Mason btrfs_set_opt(fs_info->mount_opt, SSD); 1958c289811cSChris Mason } 1959c289811cSChris Mason 1960acce952bSliubo /* do not make disk changes in broken FS */ 1961acce952bSliubo if (btrfs_super_log_root(disk_super) != 0 && 1962acce952bSliubo !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { 1963e02119d5SChris Mason u64 bytenr = btrfs_super_log_root(disk_super); 1964d18a2c44SChris Mason 19657c2ca468SChris Mason if (fs_devices->rw_devices == 0) { 1966d397712bSChris Mason printk(KERN_WARNING "Btrfs log replay required " 1967d397712bSChris Mason "on RO media\n"); 19687c2ca468SChris Mason err = -EIO; 19697c2ca468SChris Mason goto fail_trans_kthread; 19707c2ca468SChris Mason } 1971e02119d5SChris Mason blocksize = 1972e02119d5SChris Mason btrfs_level_size(tree_root, 1973e02119d5SChris Mason btrfs_super_log_root_level(disk_super)); 1974e02119d5SChris Mason 1975676e4c86SDan Carpenter log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); 1976676e4c86SDan Carpenter if (!log_tree_root) { 1977676e4c86SDan Carpenter err = -ENOMEM; 1978676e4c86SDan Carpenter goto fail_trans_kthread; 1979676e4c86SDan Carpenter } 1980e02119d5SChris Mason 1981e02119d5SChris Mason __setup_root(nodesize, leafsize, sectorsize, stripesize, 1982e02119d5SChris Mason log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); 1983e02119d5SChris Mason 1984e02119d5SChris Mason log_tree_root->node = read_tree_block(tree_root, bytenr, 198584234f3aSYan Zheng blocksize, 198684234f3aSYan Zheng generation + 1); 1987e02119d5SChris Mason ret = btrfs_recover_log_trees(log_tree_root); 1988e02119d5SChris Mason BUG_ON(ret); 1989e556ce2cSYan Zheng 1990e556ce2cSYan Zheng if (sb->s_flags & MS_RDONLY) { 1991e556ce2cSYan Zheng ret = btrfs_commit_super(tree_root); 1992e556ce2cSYan Zheng BUG_ON(ret); 1993e556ce2cSYan Zheng } 1994e02119d5SChris Mason } 19951a40e23bSZheng Yan 199676dda93cSYan, Zheng ret = btrfs_find_orphan_roots(tree_root); 199776dda93cSYan, Zheng BUG_ON(ret); 199876dda93cSYan, Zheng 19997c2ca468SChris Mason if (!(sb->s_flags & MS_RDONLY)) { 2000d68fc57bSYan, Zheng ret = btrfs_cleanup_fs_roots(fs_info); 2001d68fc57bSYan, Zheng BUG_ON(ret); 2002d68fc57bSYan, Zheng 20035d4f98a2SYan Zheng ret = btrfs_recover_relocation(tree_root); 2004d7ce5843SMiao Xie if (ret < 0) { 2005d7ce5843SMiao Xie printk(KERN_WARNING 2006d7ce5843SMiao Xie "btrfs: failed to recover relocation\n"); 2007d7ce5843SMiao Xie err = -EINVAL; 2008d7ce5843SMiao Xie goto fail_trans_kthread; 2009d7ce5843SMiao Xie } 20107c2ca468SChris Mason } 20111a40e23bSZheng Yan 20123de4586cSChris Mason location.objectid = BTRFS_FS_TREE_OBJECTID; 20133de4586cSChris Mason location.type = BTRFS_ROOT_ITEM_KEY; 20143de4586cSChris Mason location.offset = (u64)-1; 20153de4586cSChris Mason 20163de4586cSChris Mason fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 20173de4586cSChris Mason if (!fs_info->fs_root) 20187c2ca468SChris Mason goto fail_trans_kthread; 20193140c9a3SDan Carpenter if (IS_ERR(fs_info->fs_root)) { 20203140c9a3SDan Carpenter err = PTR_ERR(fs_info->fs_root); 20213140c9a3SDan Carpenter goto fail_trans_kthread; 20223140c9a3SDan Carpenter } 2023c289811cSChris Mason 2024e3acc2a6SJosef Bacik if (!(sb->s_flags & MS_RDONLY)) { 2025e3acc2a6SJosef Bacik down_read(&fs_info->cleanup_work_sem); 202666b4ffd1SJosef Bacik err = btrfs_orphan_cleanup(fs_info->fs_root); 202766b4ffd1SJosef Bacik if (!err) 202866b4ffd1SJosef Bacik err = btrfs_orphan_cleanup(fs_info->tree_root); 2029e3acc2a6SJosef Bacik up_read(&fs_info->cleanup_work_sem); 203066b4ffd1SJosef Bacik if (err) { 203166b4ffd1SJosef Bacik close_ctree(tree_root); 203266b4ffd1SJosef Bacik return ERR_PTR(err); 203366b4ffd1SJosef Bacik } 2034e3acc2a6SJosef Bacik } 2035e3acc2a6SJosef Bacik 20360f7d52f4SChris Mason return tree_root; 203739279cc3SChris Mason 20387c2ca468SChris Mason fail_trans_kthread: 20397c2ca468SChris Mason kthread_stop(fs_info->transaction_kthread); 20403f157a2fSChris Mason fail_cleaner: 2041a74a4b97SChris Mason kthread_stop(fs_info->cleaner_kthread); 20427c2ca468SChris Mason 20437c2ca468SChris Mason /* 20447c2ca468SChris Mason * make sure we're done with the btree inode before we stop our 20457c2ca468SChris Mason * kthreads 20467c2ca468SChris Mason */ 20477c2ca468SChris Mason filemap_write_and_wait(fs_info->btree_inode->i_mapping); 20487c2ca468SChris Mason invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 20497c2ca468SChris Mason 20501b1d1f66SJosef Bacik fail_block_groups: 20511b1d1f66SJosef Bacik btrfs_free_block_groups(fs_info); 2052d20f7043SChris Mason free_extent_buffer(csum_root->node); 20535d4f98a2SYan Zheng free_extent_buffer(csum_root->commit_root); 20545d4f98a2SYan Zheng fail_dev_root: 20555d4f98a2SYan Zheng free_extent_buffer(dev_root->node); 20565d4f98a2SYan Zheng free_extent_buffer(dev_root->commit_root); 20570b86a832SChris Mason fail_extent_root: 20580b86a832SChris Mason free_extent_buffer(extent_root->node); 20595d4f98a2SYan Zheng free_extent_buffer(extent_root->commit_root); 206039279cc3SChris Mason fail_tree_root: 20615f39d397SChris Mason free_extent_buffer(tree_root->node); 20625d4f98a2SYan Zheng free_extent_buffer(tree_root->commit_root); 20632b82032cSYan Zheng fail_chunk_root: 20642b82032cSYan Zheng free_extent_buffer(chunk_root->node); 20655d4f98a2SYan Zheng free_extent_buffer(chunk_root->commit_root); 206639279cc3SChris Mason fail_sb_buffer: 206761d92c32SChris Mason btrfs_stop_workers(&fs_info->generic_worker); 2068247e743cSChris Mason btrfs_stop_workers(&fs_info->fixup_workers); 2069771ed689SChris Mason btrfs_stop_workers(&fs_info->delalloc_workers); 20708b712842SChris Mason btrfs_stop_workers(&fs_info->workers); 20718b712842SChris Mason btrfs_stop_workers(&fs_info->endio_workers); 2072d20f7043SChris Mason btrfs_stop_workers(&fs_info->endio_meta_workers); 2073cad321adSChris Mason btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2074e6dcd2dcSChris Mason btrfs_stop_workers(&fs_info->endio_write_workers); 20750cb59c99SJosef Bacik btrfs_stop_workers(&fs_info->endio_freespace_worker); 20761cc127b5SChris Mason btrfs_stop_workers(&fs_info->submit_workers); 20774543df7eSChris Mason fail_iput: 20787c2ca468SChris Mason invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 20794543df7eSChris Mason iput(fs_info->btree_inode); 20807e662854SQinghuang Feng 2081dfe25020SChris Mason btrfs_close_devices(fs_info->fs_devices); 208284eed90fSChris Mason btrfs_mapping_tree_free(&fs_info->mapping_tree); 2083ad081f14SJens Axboe fail_bdi: 20847e662854SQinghuang Feng bdi_destroy(&fs_info->bdi); 208576dda93cSYan, Zheng fail_srcu: 208676dda93cSYan, Zheng cleanup_srcu_struct(&fs_info->subvol_srcu); 20877e662854SQinghuang Feng fail: 208839279cc3SChris Mason kfree(extent_root); 208939279cc3SChris Mason kfree(tree_root); 209039279cc3SChris Mason kfree(fs_info); 209183afeac4SJim Meyering kfree(chunk_root); 209283afeac4SJim Meyering kfree(dev_root); 2093d20f7043SChris Mason kfree(csum_root); 209439279cc3SChris Mason return ERR_PTR(err); 2095eb60ceacSChris Mason } 2096eb60ceacSChris Mason 2097f2984462SChris Mason static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2098f2984462SChris Mason { 2099f2984462SChris Mason char b[BDEVNAME_SIZE]; 2100f2984462SChris Mason 2101f2984462SChris Mason if (uptodate) { 2102f2984462SChris Mason set_buffer_uptodate(bh); 2103f2984462SChris Mason } else { 21047a36ddecSDavid Sterba printk_ratelimited(KERN_WARNING "lost page write due to " 2105f2984462SChris Mason "I/O error on %s\n", 2106f2984462SChris Mason bdevname(bh->b_bdev, b)); 21071259ab75SChris Mason /* note, we dont' set_buffer_write_io_error because we have 21081259ab75SChris Mason * our own ways of dealing with the IO errors 21091259ab75SChris Mason */ 2110f2984462SChris Mason clear_buffer_uptodate(bh); 2111f2984462SChris Mason } 2112f2984462SChris Mason unlock_buffer(bh); 2113f2984462SChris Mason put_bh(bh); 2114f2984462SChris Mason } 2115f2984462SChris Mason 2116a512bbf8SYan Zheng struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) 2117a512bbf8SYan Zheng { 2118a512bbf8SYan Zheng struct buffer_head *bh; 2119a512bbf8SYan Zheng struct buffer_head *latest = NULL; 2120a512bbf8SYan Zheng struct btrfs_super_block *super; 2121a512bbf8SYan Zheng int i; 2122a512bbf8SYan Zheng u64 transid = 0; 2123a512bbf8SYan Zheng u64 bytenr; 2124a512bbf8SYan Zheng 2125a512bbf8SYan Zheng /* we would like to check all the supers, but that would make 2126a512bbf8SYan Zheng * a btrfs mount succeed after a mkfs from a different FS. 2127a512bbf8SYan Zheng * So, we need to add a special mount option to scan for 2128a512bbf8SYan Zheng * later supers, using BTRFS_SUPER_MIRROR_MAX instead 2129a512bbf8SYan Zheng */ 2130a512bbf8SYan Zheng for (i = 0; i < 1; i++) { 2131a512bbf8SYan Zheng bytenr = btrfs_sb_offset(i); 2132a512bbf8SYan Zheng if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) 2133a512bbf8SYan Zheng break; 2134a512bbf8SYan Zheng bh = __bread(bdev, bytenr / 4096, 4096); 2135a512bbf8SYan Zheng if (!bh) 2136a512bbf8SYan Zheng continue; 2137a512bbf8SYan Zheng 2138a512bbf8SYan Zheng super = (struct btrfs_super_block *)bh->b_data; 2139a512bbf8SYan Zheng if (btrfs_super_bytenr(super) != bytenr || 2140a512bbf8SYan Zheng strncmp((char *)(&super->magic), BTRFS_MAGIC, 2141a512bbf8SYan Zheng sizeof(super->magic))) { 2142a512bbf8SYan Zheng brelse(bh); 2143a512bbf8SYan Zheng continue; 2144a512bbf8SYan Zheng } 2145a512bbf8SYan Zheng 2146a512bbf8SYan Zheng if (!latest || btrfs_super_generation(super) > transid) { 2147a512bbf8SYan Zheng brelse(latest); 2148a512bbf8SYan Zheng latest = bh; 2149a512bbf8SYan Zheng transid = btrfs_super_generation(super); 2150a512bbf8SYan Zheng } else { 2151a512bbf8SYan Zheng brelse(bh); 2152a512bbf8SYan Zheng } 2153a512bbf8SYan Zheng } 2154a512bbf8SYan Zheng return latest; 2155a512bbf8SYan Zheng } 2156a512bbf8SYan Zheng 21574eedeb75SHisashi Hifumi /* 21584eedeb75SHisashi Hifumi * this should be called twice, once with wait == 0 and 21594eedeb75SHisashi Hifumi * once with wait == 1. When wait == 0 is done, all the buffer heads 21604eedeb75SHisashi Hifumi * we write are pinned. 21614eedeb75SHisashi Hifumi * 21624eedeb75SHisashi Hifumi * They are released when wait == 1 is done. 21634eedeb75SHisashi Hifumi * max_mirrors must be the same for both runs, and it indicates how 21644eedeb75SHisashi Hifumi * many supers on this one device should be written. 21654eedeb75SHisashi Hifumi * 21664eedeb75SHisashi Hifumi * max_mirrors == 0 means to write them all. 21674eedeb75SHisashi Hifumi */ 2168a512bbf8SYan Zheng static int write_dev_supers(struct btrfs_device *device, 2169a512bbf8SYan Zheng struct btrfs_super_block *sb, 2170a512bbf8SYan Zheng int do_barriers, int wait, int max_mirrors) 2171a512bbf8SYan Zheng { 2172a512bbf8SYan Zheng struct buffer_head *bh; 2173a512bbf8SYan Zheng int i; 2174a512bbf8SYan Zheng int ret; 2175a512bbf8SYan Zheng int errors = 0; 2176a512bbf8SYan Zheng u32 crc; 2177a512bbf8SYan Zheng u64 bytenr; 2178a512bbf8SYan Zheng int last_barrier = 0; 2179a512bbf8SYan Zheng 2180a512bbf8SYan Zheng if (max_mirrors == 0) 2181a512bbf8SYan Zheng max_mirrors = BTRFS_SUPER_MIRROR_MAX; 2182a512bbf8SYan Zheng 2183a512bbf8SYan Zheng /* make sure only the last submit_bh does a barrier */ 2184a512bbf8SYan Zheng if (do_barriers) { 2185a512bbf8SYan Zheng for (i = 0; i < max_mirrors; i++) { 2186a512bbf8SYan Zheng bytenr = btrfs_sb_offset(i); 2187a512bbf8SYan Zheng if (bytenr + BTRFS_SUPER_INFO_SIZE >= 2188a512bbf8SYan Zheng device->total_bytes) 2189a512bbf8SYan Zheng break; 2190a512bbf8SYan Zheng last_barrier = i; 2191a512bbf8SYan Zheng } 2192a512bbf8SYan Zheng } 2193a512bbf8SYan Zheng 2194a512bbf8SYan Zheng for (i = 0; i < max_mirrors; i++) { 2195a512bbf8SYan Zheng bytenr = btrfs_sb_offset(i); 2196a512bbf8SYan Zheng if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2197a512bbf8SYan Zheng break; 2198a512bbf8SYan Zheng 2199a512bbf8SYan Zheng if (wait) { 2200a512bbf8SYan Zheng bh = __find_get_block(device->bdev, bytenr / 4096, 2201a512bbf8SYan Zheng BTRFS_SUPER_INFO_SIZE); 2202a512bbf8SYan Zheng BUG_ON(!bh); 2203a512bbf8SYan Zheng wait_on_buffer(bh); 22044eedeb75SHisashi Hifumi if (!buffer_uptodate(bh)) 22054eedeb75SHisashi Hifumi errors++; 22064eedeb75SHisashi Hifumi 22074eedeb75SHisashi Hifumi /* drop our reference */ 22084eedeb75SHisashi Hifumi brelse(bh); 22094eedeb75SHisashi Hifumi 22104eedeb75SHisashi Hifumi /* drop the reference from the wait == 0 run */ 2211a512bbf8SYan Zheng brelse(bh); 2212a512bbf8SYan Zheng continue; 2213a512bbf8SYan Zheng } else { 2214a512bbf8SYan Zheng btrfs_set_super_bytenr(sb, bytenr); 2215a512bbf8SYan Zheng 2216a512bbf8SYan Zheng crc = ~(u32)0; 2217a512bbf8SYan Zheng crc = btrfs_csum_data(NULL, (char *)sb + 2218a512bbf8SYan Zheng BTRFS_CSUM_SIZE, crc, 2219a512bbf8SYan Zheng BTRFS_SUPER_INFO_SIZE - 2220a512bbf8SYan Zheng BTRFS_CSUM_SIZE); 2221a512bbf8SYan Zheng btrfs_csum_final(crc, sb->csum); 2222a512bbf8SYan Zheng 22234eedeb75SHisashi Hifumi /* 22244eedeb75SHisashi Hifumi * one reference for us, and we leave it for the 22254eedeb75SHisashi Hifumi * caller 22264eedeb75SHisashi Hifumi */ 2227a512bbf8SYan Zheng bh = __getblk(device->bdev, bytenr / 4096, 2228a512bbf8SYan Zheng BTRFS_SUPER_INFO_SIZE); 2229a512bbf8SYan Zheng memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 2230a512bbf8SYan Zheng 22314eedeb75SHisashi Hifumi /* one reference for submit_bh */ 2232a512bbf8SYan Zheng get_bh(bh); 22334eedeb75SHisashi Hifumi 22344eedeb75SHisashi Hifumi set_buffer_uptodate(bh); 2235a512bbf8SYan Zheng lock_buffer(bh); 2236a512bbf8SYan Zheng bh->b_end_io = btrfs_end_buffer_write_sync; 2237a512bbf8SYan Zheng } 2238a512bbf8SYan Zheng 2239c3b9a62cSChristoph Hellwig if (i == last_barrier && do_barriers) 2240c3b9a62cSChristoph Hellwig ret = submit_bh(WRITE_FLUSH_FUA, bh); 2241c3b9a62cSChristoph Hellwig else 2242ffbd517dSChris Mason ret = submit_bh(WRITE_SYNC, bh); 2243a512bbf8SYan Zheng 22444eedeb75SHisashi Hifumi if (ret) 2245a512bbf8SYan Zheng errors++; 2246a512bbf8SYan Zheng } 2247a512bbf8SYan Zheng return errors < i ? 0 : -1; 2248a512bbf8SYan Zheng } 2249a512bbf8SYan Zheng 2250a512bbf8SYan Zheng int write_all_supers(struct btrfs_root *root, int max_mirrors) 2251f2984462SChris Mason { 2252e5e9a520SChris Mason struct list_head *head; 2253f2984462SChris Mason struct btrfs_device *dev; 2254a061fc8dSChris Mason struct btrfs_super_block *sb; 2255f2984462SChris Mason struct btrfs_dev_item *dev_item; 2256f2984462SChris Mason int ret; 2257f2984462SChris Mason int do_barriers; 2258a236aed1SChris Mason int max_errors; 2259a236aed1SChris Mason int total_errors = 0; 2260a061fc8dSChris Mason u64 flags; 2261f2984462SChris Mason 2262a236aed1SChris Mason max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 2263f2984462SChris Mason do_barriers = !btrfs_test_opt(root, NOBARRIER); 2264f2984462SChris Mason 2265a061fc8dSChris Mason sb = &root->fs_info->super_for_commit; 2266a061fc8dSChris Mason dev_item = &sb->dev_item; 2267e5e9a520SChris Mason 2268e5e9a520SChris Mason mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2269e5e9a520SChris Mason head = &root->fs_info->fs_devices->devices; 2270c6e30871SQinghuang Feng list_for_each_entry(dev, head, dev_list) { 2271dfe25020SChris Mason if (!dev->bdev) { 2272dfe25020SChris Mason total_errors++; 2273dfe25020SChris Mason continue; 2274dfe25020SChris Mason } 22752b82032cSYan Zheng if (!dev->in_fs_metadata || !dev->writeable) 2276dfe25020SChris Mason continue; 2277dfe25020SChris Mason 22782b82032cSYan Zheng btrfs_set_stack_device_generation(dev_item, 0); 2279a061fc8dSChris Mason btrfs_set_stack_device_type(dev_item, dev->type); 2280a061fc8dSChris Mason btrfs_set_stack_device_id(dev_item, dev->devid); 2281a061fc8dSChris Mason btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); 2282a061fc8dSChris Mason btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); 2283a061fc8dSChris Mason btrfs_set_stack_device_io_align(dev_item, dev->io_align); 2284a061fc8dSChris Mason btrfs_set_stack_device_io_width(dev_item, dev->io_width); 2285a061fc8dSChris Mason btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); 2286a061fc8dSChris Mason memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); 22872b82032cSYan Zheng memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); 2288a512bbf8SYan Zheng 2289a061fc8dSChris Mason flags = btrfs_super_flags(sb); 2290a061fc8dSChris Mason btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); 2291f2984462SChris Mason 2292a512bbf8SYan Zheng ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); 2293a236aed1SChris Mason if (ret) 2294a236aed1SChris Mason total_errors++; 2295f2984462SChris Mason } 2296a236aed1SChris Mason if (total_errors > max_errors) { 2297d397712bSChris Mason printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2298d397712bSChris Mason total_errors); 2299a236aed1SChris Mason BUG(); 2300a236aed1SChris Mason } 2301f2984462SChris Mason 2302a512bbf8SYan Zheng total_errors = 0; 2303c6e30871SQinghuang Feng list_for_each_entry(dev, head, dev_list) { 2304dfe25020SChris Mason if (!dev->bdev) 2305dfe25020SChris Mason continue; 23062b82032cSYan Zheng if (!dev->in_fs_metadata || !dev->writeable) 2307dfe25020SChris Mason continue; 2308dfe25020SChris Mason 2309a512bbf8SYan Zheng ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); 2310a512bbf8SYan Zheng if (ret) 23111259ab75SChris Mason total_errors++; 2312f2984462SChris Mason } 2313e5e9a520SChris Mason mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 2314a236aed1SChris Mason if (total_errors > max_errors) { 2315d397712bSChris Mason printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2316d397712bSChris Mason total_errors); 2317a236aed1SChris Mason BUG(); 2318a236aed1SChris Mason } 2319f2984462SChris Mason return 0; 2320f2984462SChris Mason } 2321f2984462SChris Mason 2322a512bbf8SYan Zheng int write_ctree_super(struct btrfs_trans_handle *trans, 2323a512bbf8SYan Zheng struct btrfs_root *root, int max_mirrors) 2324cfaa7295SChris Mason { 2325e66f709bSChris Mason int ret; 23262c90e5d6SChris Mason 2327a512bbf8SYan Zheng ret = write_all_supers(root, max_mirrors); 23285f39d397SChris Mason return ret; 2329cfaa7295SChris Mason } 2330cfaa7295SChris Mason 23315eda7b5eSChris Mason int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 23322619ba1fSChris Mason { 23334df27c4dSYan, Zheng spin_lock(&fs_info->fs_roots_radix_lock); 23342619ba1fSChris Mason radix_tree_delete(&fs_info->fs_roots_radix, 23352619ba1fSChris Mason (unsigned long)root->root_key.objectid); 23364df27c4dSYan, Zheng spin_unlock(&fs_info->fs_roots_radix_lock); 233776dda93cSYan, Zheng 233876dda93cSYan, Zheng if (btrfs_root_refs(&root->root_item) == 0) 233976dda93cSYan, Zheng synchronize_srcu(&fs_info->subvol_srcu); 234076dda93cSYan, Zheng 23414df27c4dSYan, Zheng free_fs_root(root); 23424df27c4dSYan, Zheng return 0; 23434df27c4dSYan, Zheng } 23444df27c4dSYan, Zheng 23454df27c4dSYan, Zheng static void free_fs_root(struct btrfs_root *root) 23464df27c4dSYan, Zheng { 23474df27c4dSYan, Zheng WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 23483394e160SChris Mason if (root->anon_super.s_dev) { 23493394e160SChris Mason down_write(&root->anon_super.s_umount); 23503394e160SChris Mason kill_anon_super(&root->anon_super); 23513394e160SChris Mason } 23525f39d397SChris Mason free_extent_buffer(root->node); 23535f39d397SChris Mason free_extent_buffer(root->commit_root); 235458176a96SJosef Bacik kfree(root->name); 23552619ba1fSChris Mason kfree(root); 23562619ba1fSChris Mason } 23572619ba1fSChris Mason 235835b7e476SChris Mason static int del_fs_roots(struct btrfs_fs_info *fs_info) 23590f7d52f4SChris Mason { 23600f7d52f4SChris Mason int ret; 23610f7d52f4SChris Mason struct btrfs_root *gang[8]; 23620f7d52f4SChris Mason int i; 23630f7d52f4SChris Mason 236476dda93cSYan, Zheng while (!list_empty(&fs_info->dead_roots)) { 236576dda93cSYan, Zheng gang[0] = list_entry(fs_info->dead_roots.next, 236676dda93cSYan, Zheng struct btrfs_root, root_list); 236776dda93cSYan, Zheng list_del(&gang[0]->root_list); 236876dda93cSYan, Zheng 236976dda93cSYan, Zheng if (gang[0]->in_radix) { 237076dda93cSYan, Zheng btrfs_free_fs_root(fs_info, gang[0]); 237176dda93cSYan, Zheng } else { 237276dda93cSYan, Zheng free_extent_buffer(gang[0]->node); 237376dda93cSYan, Zheng free_extent_buffer(gang[0]->commit_root); 237476dda93cSYan, Zheng kfree(gang[0]); 237576dda93cSYan, Zheng } 237676dda93cSYan, Zheng } 237776dda93cSYan, Zheng 23780f7d52f4SChris Mason while (1) { 23790f7d52f4SChris Mason ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 23800f7d52f4SChris Mason (void **)gang, 0, 23810f7d52f4SChris Mason ARRAY_SIZE(gang)); 23820f7d52f4SChris Mason if (!ret) 23830f7d52f4SChris Mason break; 23842619ba1fSChris Mason for (i = 0; i < ret; i++) 23855eda7b5eSChris Mason btrfs_free_fs_root(fs_info, gang[i]); 23860f7d52f4SChris Mason } 23870f7d52f4SChris Mason return 0; 23880f7d52f4SChris Mason } 2389b4100d64SChris Mason 2390c146afadSYan Zheng int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 2391c146afadSYan Zheng { 2392c146afadSYan Zheng u64 root_objectid = 0; 2393c146afadSYan Zheng struct btrfs_root *gang[8]; 2394c146afadSYan Zheng int i; 2395c146afadSYan Zheng int ret; 2396c146afadSYan Zheng 2397c146afadSYan Zheng while (1) { 2398c146afadSYan Zheng ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, 2399c146afadSYan Zheng (void **)gang, root_objectid, 2400c146afadSYan Zheng ARRAY_SIZE(gang)); 2401c146afadSYan Zheng if (!ret) 2402c146afadSYan Zheng break; 24035d4f98a2SYan Zheng 24045d4f98a2SYan Zheng root_objectid = gang[ret - 1]->root_key.objectid + 1; 2405c146afadSYan Zheng for (i = 0; i < ret; i++) { 240666b4ffd1SJosef Bacik int err; 240766b4ffd1SJosef Bacik 2408c146afadSYan Zheng root_objectid = gang[i]->root_key.objectid; 240966b4ffd1SJosef Bacik err = btrfs_orphan_cleanup(gang[i]); 241066b4ffd1SJosef Bacik if (err) 241166b4ffd1SJosef Bacik return err; 2412c146afadSYan Zheng } 2413c146afadSYan Zheng root_objectid++; 2414c146afadSYan Zheng } 2415c146afadSYan Zheng return 0; 2416c146afadSYan Zheng } 2417c146afadSYan Zheng 2418c146afadSYan Zheng int btrfs_commit_super(struct btrfs_root *root) 2419c146afadSYan Zheng { 2420c146afadSYan Zheng struct btrfs_trans_handle *trans; 2421c146afadSYan Zheng int ret; 2422c146afadSYan Zheng 2423c146afadSYan Zheng mutex_lock(&root->fs_info->cleaner_mutex); 242424bbcf04SYan, Zheng btrfs_run_delayed_iputs(root); 2425c146afadSYan Zheng btrfs_clean_old_snapshots(root); 2426c146afadSYan Zheng mutex_unlock(&root->fs_info->cleaner_mutex); 2427c71bf099SYan, Zheng 2428c71bf099SYan, Zheng /* wait until ongoing cleanup work done */ 2429c71bf099SYan, Zheng down_write(&root->fs_info->cleanup_work_sem); 2430c71bf099SYan, Zheng up_write(&root->fs_info->cleanup_work_sem); 2431c71bf099SYan, Zheng 2432a22285a6SYan, Zheng trans = btrfs_join_transaction(root, 1); 24333612b495STsutomu Itoh if (IS_ERR(trans)) 24343612b495STsutomu Itoh return PTR_ERR(trans); 2435c146afadSYan Zheng ret = btrfs_commit_transaction(trans, root); 2436c146afadSYan Zheng BUG_ON(ret); 2437c146afadSYan Zheng /* run commit again to drop the original snapshot */ 2438a22285a6SYan, Zheng trans = btrfs_join_transaction(root, 1); 24393612b495STsutomu Itoh if (IS_ERR(trans)) 24403612b495STsutomu Itoh return PTR_ERR(trans); 2441c146afadSYan Zheng btrfs_commit_transaction(trans, root); 2442c146afadSYan Zheng ret = btrfs_write_and_wait_transaction(NULL, root); 2443c146afadSYan Zheng BUG_ON(ret); 2444c146afadSYan Zheng 2445a512bbf8SYan Zheng ret = write_ctree_super(NULL, root, 0); 2446c146afadSYan Zheng return ret; 2447c146afadSYan Zheng } 2448c146afadSYan Zheng 2449e20d96d6SChris Mason int close_ctree(struct btrfs_root *root) 2450eb60ceacSChris Mason { 24510f7d52f4SChris Mason struct btrfs_fs_info *fs_info = root->fs_info; 2452c146afadSYan Zheng int ret; 2453e089f05cSChris Mason 2454facda1e7SChris Mason fs_info->closing = 1; 2455a2135011SChris Mason smp_mb(); 2456a2135011SChris Mason 24570af3d00bSJosef Bacik btrfs_put_block_group_cache(fs_info); 2458acce952bSliubo 2459acce952bSliubo /* 2460acce952bSliubo * Here come 2 situations when btrfs is broken to flip readonly: 2461acce952bSliubo * 2462acce952bSliubo * 1. when btrfs flips readonly somewhere else before 2463acce952bSliubo * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, 2464acce952bSliubo * and btrfs will skip to write sb directly to keep 2465acce952bSliubo * ERROR state on disk. 2466acce952bSliubo * 2467acce952bSliubo * 2. when btrfs flips readonly just in btrfs_commit_super, 2468ae0e47f0SJustin P. Mattock * and in such case, btrfs cannot write sb via btrfs_commit_super, 2469acce952bSliubo * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 2470acce952bSliubo * btrfs will cleanup all FS resources first and write sb then. 2471acce952bSliubo */ 2472c146afadSYan Zheng if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2473c146afadSYan Zheng ret = btrfs_commit_super(root); 2474d397712bSChris Mason if (ret) 2475d397712bSChris Mason printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2476c146afadSYan Zheng } 2477ed2ff2cbSChris Mason 2478acce952bSliubo if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 2479acce952bSliubo ret = btrfs_error_commit_super(root); 2480acce952bSliubo if (ret) 2481acce952bSliubo printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2482acce952bSliubo } 2483acce952bSliubo 24848929ecfaSYan, Zheng kthread_stop(root->fs_info->transaction_kthread); 24858929ecfaSYan, Zheng kthread_stop(root->fs_info->cleaner_kthread); 24868929ecfaSYan, Zheng 2487f25784b3SYan Zheng fs_info->closing = 2; 2488f25784b3SYan Zheng smp_mb(); 2489f25784b3SYan Zheng 2490b0c68f8bSChris Mason if (fs_info->delalloc_bytes) { 2491d397712bSChris Mason printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 249221380931SJoel Becker (unsigned long long)fs_info->delalloc_bytes); 2493b0c68f8bSChris Mason } 249431153d81SYan Zheng if (fs_info->total_ref_cache_size) { 2495d397712bSChris Mason printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2496d397712bSChris Mason (unsigned long long)fs_info->total_ref_cache_size); 249731153d81SYan Zheng } 249831153d81SYan Zheng 24995f39d397SChris Mason free_extent_buffer(fs_info->extent_root->node); 25005d4f98a2SYan Zheng free_extent_buffer(fs_info->extent_root->commit_root); 25015f39d397SChris Mason free_extent_buffer(fs_info->tree_root->node); 25025d4f98a2SYan Zheng free_extent_buffer(fs_info->tree_root->commit_root); 25030b86a832SChris Mason free_extent_buffer(root->fs_info->chunk_root->node); 25045d4f98a2SYan Zheng free_extent_buffer(root->fs_info->chunk_root->commit_root); 25050b86a832SChris Mason free_extent_buffer(root->fs_info->dev_root->node); 25065d4f98a2SYan Zheng free_extent_buffer(root->fs_info->dev_root->commit_root); 2507d20f7043SChris Mason free_extent_buffer(root->fs_info->csum_root->node); 25085d4f98a2SYan Zheng free_extent_buffer(root->fs_info->csum_root->commit_root); 2509d20f7043SChris Mason 25109078a3e1SChris Mason btrfs_free_block_groups(root->fs_info); 2511c146afadSYan Zheng 25120f7d52f4SChris Mason del_fs_roots(fs_info); 2513d10c5f31SChris Mason 2514c146afadSYan Zheng iput(fs_info->btree_inode); 25159ad6b7bcSChris Mason 251661d92c32SChris Mason btrfs_stop_workers(&fs_info->generic_worker); 2517247e743cSChris Mason btrfs_stop_workers(&fs_info->fixup_workers); 2518771ed689SChris Mason btrfs_stop_workers(&fs_info->delalloc_workers); 25198b712842SChris Mason btrfs_stop_workers(&fs_info->workers); 25208b712842SChris Mason btrfs_stop_workers(&fs_info->endio_workers); 2521d20f7043SChris Mason btrfs_stop_workers(&fs_info->endio_meta_workers); 2522cad321adSChris Mason btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2523e6dcd2dcSChris Mason btrfs_stop_workers(&fs_info->endio_write_workers); 25240cb59c99SJosef Bacik btrfs_stop_workers(&fs_info->endio_freespace_worker); 25251cc127b5SChris Mason btrfs_stop_workers(&fs_info->submit_workers); 2526d6bfde87SChris Mason 2527dfe25020SChris Mason btrfs_close_devices(fs_info->fs_devices); 25280b86a832SChris Mason btrfs_mapping_tree_free(&fs_info->mapping_tree); 2529b248a415SChris Mason 253004160088SChris Mason bdi_destroy(&fs_info->bdi); 253176dda93cSYan, Zheng cleanup_srcu_struct(&fs_info->subvol_srcu); 25320b86a832SChris Mason 25330f7d52f4SChris Mason kfree(fs_info->extent_root); 25340f7d52f4SChris Mason kfree(fs_info->tree_root); 25350b86a832SChris Mason kfree(fs_info->chunk_root); 25360b86a832SChris Mason kfree(fs_info->dev_root); 2537d20f7043SChris Mason kfree(fs_info->csum_root); 253883a4d548SLi Zefan kfree(fs_info); 253983a4d548SLi Zefan 2540eb60ceacSChris Mason return 0; 2541eb60ceacSChris Mason } 2542eb60ceacSChris Mason 25431259ab75SChris Mason int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) 2544ccd467d6SChris Mason { 25451259ab75SChris Mason int ret; 2546810191ffSChris Mason struct inode *btree_inode = buf->first_page->mapping->host; 25471259ab75SChris Mason 25482ac55d41SJosef Bacik ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, 25492ac55d41SJosef Bacik NULL); 25501259ab75SChris Mason if (!ret) 25511259ab75SChris Mason return ret; 25521259ab75SChris Mason 25531259ab75SChris Mason ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, 25541259ab75SChris Mason parent_transid); 25551259ab75SChris Mason return !ret; 25565f39d397SChris Mason } 25576702ed49SChris Mason 25585f39d397SChris Mason int btrfs_set_buffer_uptodate(struct extent_buffer *buf) 25595f39d397SChris Mason { 2560810191ffSChris Mason struct inode *btree_inode = buf->first_page->mapping->host; 2561d1310b2eSChris Mason return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, 25625f39d397SChris Mason buf); 25635f39d397SChris Mason } 25645f39d397SChris Mason 25655f39d397SChris Mason void btrfs_mark_buffer_dirty(struct extent_buffer *buf) 25665f39d397SChris Mason { 2567810191ffSChris Mason struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 25685f39d397SChris Mason u64 transid = btrfs_header_generation(buf); 25695f39d397SChris Mason struct inode *btree_inode = root->fs_info->btree_inode; 2570b9473439SChris Mason int was_dirty; 2571b4ce94deSChris Mason 2572b9447ef8SChris Mason btrfs_assert_tree_locked(buf); 2573ccd467d6SChris Mason if (transid != root->fs_info->generation) { 2574d397712bSChris Mason printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 2575d397712bSChris Mason "found %llu running %llu\n", 2576db94535dSChris Mason (unsigned long long)buf->start, 2577d397712bSChris Mason (unsigned long long)transid, 2578d397712bSChris Mason (unsigned long long)root->fs_info->generation); 2579ccd467d6SChris Mason WARN_ON(1); 2580ccd467d6SChris Mason } 2581b9473439SChris Mason was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 2582b9473439SChris Mason buf); 2583b9473439SChris Mason if (!was_dirty) { 2584b9473439SChris Mason spin_lock(&root->fs_info->delalloc_lock); 2585b9473439SChris Mason root->fs_info->dirty_metadata_bytes += buf->len; 2586b9473439SChris Mason spin_unlock(&root->fs_info->delalloc_lock); 2587b9473439SChris Mason } 2588eb60ceacSChris Mason } 2589eb60ceacSChris Mason 2590d3c2fdcfSChris Mason void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 259135b7e476SChris Mason { 2592188de649SChris Mason /* 2593188de649SChris Mason * looks as though older kernels can get into trouble with 2594188de649SChris Mason * this code, they end up stuck in balance_dirty_pages forever 2595188de649SChris Mason */ 2596d6bfde87SChris Mason u64 num_dirty; 2597771ed689SChris Mason unsigned long thresh = 32 * 1024 * 1024; 2598d6bfde87SChris Mason 25996933c02eSJens Axboe if (current->flags & PF_MEMALLOC) 2600d6bfde87SChris Mason return; 2601d6bfde87SChris Mason 2602585ad2c3SChris Mason num_dirty = root->fs_info->dirty_metadata_bytes; 2603585ad2c3SChris Mason 2604d6bfde87SChris Mason if (num_dirty > thresh) { 2605d3c2fdcfSChris Mason balance_dirty_pages_ratelimited_nr( 2606304fced6SChris Mason root->fs_info->btree_inode->i_mapping, 1); 260735b7e476SChris Mason } 2608188de649SChris Mason return; 2609d6bfde87SChris Mason } 26106b80053dSChris Mason 2611ca7a79adSChris Mason int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 26126b80053dSChris Mason { 2613810191ffSChris Mason struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2614ce9adaa5SChris Mason int ret; 2615ca7a79adSChris Mason ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2616d397712bSChris Mason if (ret == 0) 2617b4ce94deSChris Mason set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); 2618ce9adaa5SChris Mason return ret; 26196b80053dSChris Mason } 26200da5468fSChris Mason 26214bef0848SChris Mason int btree_lock_page_hook(struct page *page) 26224bef0848SChris Mason { 26234bef0848SChris Mason struct inode *inode = page->mapping->host; 2624b9473439SChris Mason struct btrfs_root *root = BTRFS_I(inode)->root; 26254bef0848SChris Mason struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 26264bef0848SChris Mason struct extent_buffer *eb; 26274bef0848SChris Mason unsigned long len; 26284bef0848SChris Mason u64 bytenr = page_offset(page); 26294bef0848SChris Mason 26304bef0848SChris Mason if (page->private == EXTENT_PAGE_PRIVATE) 26314bef0848SChris Mason goto out; 26324bef0848SChris Mason 26334bef0848SChris Mason len = page->private >> 2; 2634f09d1f60SDavid Sterba eb = find_extent_buffer(io_tree, bytenr, len); 26354bef0848SChris Mason if (!eb) 26364bef0848SChris Mason goto out; 26374bef0848SChris Mason 26384bef0848SChris Mason btrfs_tree_lock(eb); 26394bef0848SChris Mason btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2640b9473439SChris Mason 2641b9473439SChris Mason if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 2642b9473439SChris Mason spin_lock(&root->fs_info->delalloc_lock); 2643b9473439SChris Mason if (root->fs_info->dirty_metadata_bytes >= eb->len) 2644b9473439SChris Mason root->fs_info->dirty_metadata_bytes -= eb->len; 2645b9473439SChris Mason else 2646b9473439SChris Mason WARN_ON(1); 2647b9473439SChris Mason spin_unlock(&root->fs_info->delalloc_lock); 2648b9473439SChris Mason } 2649b9473439SChris Mason 26504bef0848SChris Mason btrfs_tree_unlock(eb); 26514bef0848SChris Mason free_extent_buffer(eb); 26524bef0848SChris Mason out: 26534bef0848SChris Mason lock_page(page); 26544bef0848SChris Mason return 0; 26554bef0848SChris Mason } 26564bef0848SChris Mason 2657acce952bSliubo static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 2658acce952bSliubo int read_only) 2659acce952bSliubo { 2660acce952bSliubo if (read_only) 2661acce952bSliubo return; 2662acce952bSliubo 2663acce952bSliubo if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 2664acce952bSliubo printk(KERN_WARNING "warning: mount fs with errors, " 2665acce952bSliubo "running btrfsck is recommended\n"); 2666acce952bSliubo } 2667acce952bSliubo 2668acce952bSliubo int btrfs_error_commit_super(struct btrfs_root *root) 2669acce952bSliubo { 2670acce952bSliubo int ret; 2671acce952bSliubo 2672acce952bSliubo mutex_lock(&root->fs_info->cleaner_mutex); 2673acce952bSliubo btrfs_run_delayed_iputs(root); 2674acce952bSliubo mutex_unlock(&root->fs_info->cleaner_mutex); 2675acce952bSliubo 2676acce952bSliubo down_write(&root->fs_info->cleanup_work_sem); 2677acce952bSliubo up_write(&root->fs_info->cleanup_work_sem); 2678acce952bSliubo 2679acce952bSliubo /* cleanup FS via transaction */ 2680acce952bSliubo btrfs_cleanup_transaction(root); 2681acce952bSliubo 2682acce952bSliubo ret = write_ctree_super(NULL, root, 0); 2683acce952bSliubo 2684acce952bSliubo return ret; 2685acce952bSliubo } 2686acce952bSliubo 2687acce952bSliubo static int btrfs_destroy_ordered_operations(struct btrfs_root *root) 2688acce952bSliubo { 2689acce952bSliubo struct btrfs_inode *btrfs_inode; 2690acce952bSliubo struct list_head splice; 2691acce952bSliubo 2692acce952bSliubo INIT_LIST_HEAD(&splice); 2693acce952bSliubo 2694acce952bSliubo mutex_lock(&root->fs_info->ordered_operations_mutex); 2695acce952bSliubo spin_lock(&root->fs_info->ordered_extent_lock); 2696acce952bSliubo 2697acce952bSliubo list_splice_init(&root->fs_info->ordered_operations, &splice); 2698acce952bSliubo while (!list_empty(&splice)) { 2699acce952bSliubo btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2700acce952bSliubo ordered_operations); 2701acce952bSliubo 2702acce952bSliubo list_del_init(&btrfs_inode->ordered_operations); 2703acce952bSliubo 2704acce952bSliubo btrfs_invalidate_inodes(btrfs_inode->root); 2705acce952bSliubo } 2706acce952bSliubo 2707acce952bSliubo spin_unlock(&root->fs_info->ordered_extent_lock); 2708acce952bSliubo mutex_unlock(&root->fs_info->ordered_operations_mutex); 2709acce952bSliubo 2710acce952bSliubo return 0; 2711acce952bSliubo } 2712acce952bSliubo 2713acce952bSliubo static int btrfs_destroy_ordered_extents(struct btrfs_root *root) 2714acce952bSliubo { 2715acce952bSliubo struct list_head splice; 2716acce952bSliubo struct btrfs_ordered_extent *ordered; 2717acce952bSliubo struct inode *inode; 2718acce952bSliubo 2719acce952bSliubo INIT_LIST_HEAD(&splice); 2720acce952bSliubo 2721acce952bSliubo spin_lock(&root->fs_info->ordered_extent_lock); 2722acce952bSliubo 2723acce952bSliubo list_splice_init(&root->fs_info->ordered_extents, &splice); 2724acce952bSliubo while (!list_empty(&splice)) { 2725acce952bSliubo ordered = list_entry(splice.next, struct btrfs_ordered_extent, 2726acce952bSliubo root_extent_list); 2727acce952bSliubo 2728acce952bSliubo list_del_init(&ordered->root_extent_list); 2729acce952bSliubo atomic_inc(&ordered->refs); 2730acce952bSliubo 2731acce952bSliubo /* the inode may be getting freed (in sys_unlink path). */ 2732acce952bSliubo inode = igrab(ordered->inode); 2733acce952bSliubo 2734acce952bSliubo spin_unlock(&root->fs_info->ordered_extent_lock); 2735acce952bSliubo if (inode) 2736acce952bSliubo iput(inode); 2737acce952bSliubo 2738acce952bSliubo atomic_set(&ordered->refs, 1); 2739acce952bSliubo btrfs_put_ordered_extent(ordered); 2740acce952bSliubo 2741acce952bSliubo spin_lock(&root->fs_info->ordered_extent_lock); 2742acce952bSliubo } 2743acce952bSliubo 2744acce952bSliubo spin_unlock(&root->fs_info->ordered_extent_lock); 2745acce952bSliubo 2746acce952bSliubo return 0; 2747acce952bSliubo } 2748acce952bSliubo 2749acce952bSliubo static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 2750acce952bSliubo struct btrfs_root *root) 2751acce952bSliubo { 2752acce952bSliubo struct rb_node *node; 2753acce952bSliubo struct btrfs_delayed_ref_root *delayed_refs; 2754acce952bSliubo struct btrfs_delayed_ref_node *ref; 2755acce952bSliubo int ret = 0; 2756acce952bSliubo 2757acce952bSliubo delayed_refs = &trans->delayed_refs; 2758acce952bSliubo 2759acce952bSliubo spin_lock(&delayed_refs->lock); 2760acce952bSliubo if (delayed_refs->num_entries == 0) { 2761cfece4dbSDavid Sterba spin_unlock(&delayed_refs->lock); 2762acce952bSliubo printk(KERN_INFO "delayed_refs has NO entry\n"); 2763acce952bSliubo return ret; 2764acce952bSliubo } 2765acce952bSliubo 2766acce952bSliubo node = rb_first(&delayed_refs->root); 2767acce952bSliubo while (node) { 2768acce952bSliubo ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2769acce952bSliubo node = rb_next(node); 2770acce952bSliubo 2771acce952bSliubo ref->in_tree = 0; 2772acce952bSliubo rb_erase(&ref->rb_node, &delayed_refs->root); 2773acce952bSliubo delayed_refs->num_entries--; 2774acce952bSliubo 2775acce952bSliubo atomic_set(&ref->refs, 1); 2776acce952bSliubo if (btrfs_delayed_ref_is_head(ref)) { 2777acce952bSliubo struct btrfs_delayed_ref_head *head; 2778acce952bSliubo 2779acce952bSliubo head = btrfs_delayed_node_to_head(ref); 2780acce952bSliubo mutex_lock(&head->mutex); 2781acce952bSliubo kfree(head->extent_op); 2782acce952bSliubo delayed_refs->num_heads--; 2783acce952bSliubo if (list_empty(&head->cluster)) 2784acce952bSliubo delayed_refs->num_heads_ready--; 2785acce952bSliubo list_del_init(&head->cluster); 2786acce952bSliubo mutex_unlock(&head->mutex); 2787acce952bSliubo } 2788acce952bSliubo 2789acce952bSliubo spin_unlock(&delayed_refs->lock); 2790acce952bSliubo btrfs_put_delayed_ref(ref); 2791acce952bSliubo 2792acce952bSliubo cond_resched(); 2793acce952bSliubo spin_lock(&delayed_refs->lock); 2794acce952bSliubo } 2795acce952bSliubo 2796acce952bSliubo spin_unlock(&delayed_refs->lock); 2797acce952bSliubo 2798acce952bSliubo return ret; 2799acce952bSliubo } 2800acce952bSliubo 2801acce952bSliubo static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 2802acce952bSliubo { 2803acce952bSliubo struct btrfs_pending_snapshot *snapshot; 2804acce952bSliubo struct list_head splice; 2805acce952bSliubo 2806acce952bSliubo INIT_LIST_HEAD(&splice); 2807acce952bSliubo 2808acce952bSliubo list_splice_init(&t->pending_snapshots, &splice); 2809acce952bSliubo 2810acce952bSliubo while (!list_empty(&splice)) { 2811acce952bSliubo snapshot = list_entry(splice.next, 2812acce952bSliubo struct btrfs_pending_snapshot, 2813acce952bSliubo list); 2814acce952bSliubo 2815acce952bSliubo list_del_init(&snapshot->list); 2816acce952bSliubo 2817acce952bSliubo kfree(snapshot); 2818acce952bSliubo } 2819acce952bSliubo 2820acce952bSliubo return 0; 2821acce952bSliubo } 2822acce952bSliubo 2823acce952bSliubo static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) 2824acce952bSliubo { 2825acce952bSliubo struct btrfs_inode *btrfs_inode; 2826acce952bSliubo struct list_head splice; 2827acce952bSliubo 2828acce952bSliubo INIT_LIST_HEAD(&splice); 2829acce952bSliubo 2830acce952bSliubo list_splice_init(&root->fs_info->delalloc_inodes, &splice); 2831acce952bSliubo 2832acce952bSliubo spin_lock(&root->fs_info->delalloc_lock); 2833acce952bSliubo 2834acce952bSliubo while (!list_empty(&splice)) { 2835acce952bSliubo btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2836acce952bSliubo delalloc_inodes); 2837acce952bSliubo 2838acce952bSliubo list_del_init(&btrfs_inode->delalloc_inodes); 2839acce952bSliubo 2840acce952bSliubo btrfs_invalidate_inodes(btrfs_inode->root); 2841acce952bSliubo } 2842acce952bSliubo 2843acce952bSliubo spin_unlock(&root->fs_info->delalloc_lock); 2844acce952bSliubo 2845acce952bSliubo return 0; 2846acce952bSliubo } 2847acce952bSliubo 2848acce952bSliubo static int btrfs_destroy_marked_extents(struct btrfs_root *root, 2849acce952bSliubo struct extent_io_tree *dirty_pages, 2850acce952bSliubo int mark) 2851acce952bSliubo { 2852acce952bSliubo int ret; 2853acce952bSliubo struct page *page; 2854acce952bSliubo struct inode *btree_inode = root->fs_info->btree_inode; 2855acce952bSliubo struct extent_buffer *eb; 2856acce952bSliubo u64 start = 0; 2857acce952bSliubo u64 end; 2858acce952bSliubo u64 offset; 2859acce952bSliubo unsigned long index; 2860acce952bSliubo 2861acce952bSliubo while (1) { 2862acce952bSliubo ret = find_first_extent_bit(dirty_pages, start, &start, &end, 2863acce952bSliubo mark); 2864acce952bSliubo if (ret) 2865acce952bSliubo break; 2866acce952bSliubo 2867acce952bSliubo clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 2868acce952bSliubo while (start <= end) { 2869acce952bSliubo index = start >> PAGE_CACHE_SHIFT; 2870acce952bSliubo start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 2871acce952bSliubo page = find_get_page(btree_inode->i_mapping, index); 2872acce952bSliubo if (!page) 2873acce952bSliubo continue; 2874acce952bSliubo offset = page_offset(page); 2875acce952bSliubo 2876acce952bSliubo spin_lock(&dirty_pages->buffer_lock); 2877acce952bSliubo eb = radix_tree_lookup( 2878acce952bSliubo &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 2879acce952bSliubo offset >> PAGE_CACHE_SHIFT); 2880acce952bSliubo spin_unlock(&dirty_pages->buffer_lock); 2881acce952bSliubo if (eb) { 2882acce952bSliubo ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 2883acce952bSliubo &eb->bflags); 2884acce952bSliubo atomic_set(&eb->refs, 1); 2885acce952bSliubo } 2886acce952bSliubo if (PageWriteback(page)) 2887acce952bSliubo end_page_writeback(page); 2888acce952bSliubo 2889acce952bSliubo lock_page(page); 2890acce952bSliubo if (PageDirty(page)) { 2891acce952bSliubo clear_page_dirty_for_io(page); 2892acce952bSliubo spin_lock_irq(&page->mapping->tree_lock); 2893acce952bSliubo radix_tree_tag_clear(&page->mapping->page_tree, 2894acce952bSliubo page_index(page), 2895acce952bSliubo PAGECACHE_TAG_DIRTY); 2896acce952bSliubo spin_unlock_irq(&page->mapping->tree_lock); 2897acce952bSliubo } 2898acce952bSliubo 2899acce952bSliubo page->mapping->a_ops->invalidatepage(page, 0); 2900acce952bSliubo unlock_page(page); 2901acce952bSliubo } 2902acce952bSliubo } 2903acce952bSliubo 2904acce952bSliubo return ret; 2905acce952bSliubo } 2906acce952bSliubo 2907acce952bSliubo static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 2908acce952bSliubo struct extent_io_tree *pinned_extents) 2909acce952bSliubo { 2910acce952bSliubo struct extent_io_tree *unpin; 2911acce952bSliubo u64 start; 2912acce952bSliubo u64 end; 2913acce952bSliubo int ret; 2914acce952bSliubo 2915acce952bSliubo unpin = pinned_extents; 2916acce952bSliubo while (1) { 2917acce952bSliubo ret = find_first_extent_bit(unpin, 0, &start, &end, 2918acce952bSliubo EXTENT_DIRTY); 2919acce952bSliubo if (ret) 2920acce952bSliubo break; 2921acce952bSliubo 2922acce952bSliubo /* opt_discard */ 29235378e607SLi Dongyang if (btrfs_test_opt(root, DISCARD)) 29245378e607SLi Dongyang ret = btrfs_error_discard_extent(root, start, 29255378e607SLi Dongyang end + 1 - start, 29265378e607SLi Dongyang NULL); 2927acce952bSliubo 2928acce952bSliubo clear_extent_dirty(unpin, start, end, GFP_NOFS); 2929acce952bSliubo btrfs_error_unpin_extent_range(root, start, end); 2930acce952bSliubo cond_resched(); 2931acce952bSliubo } 2932acce952bSliubo 2933acce952bSliubo return 0; 2934acce952bSliubo } 2935acce952bSliubo 2936acce952bSliubo static int btrfs_cleanup_transaction(struct btrfs_root *root) 2937acce952bSliubo { 2938acce952bSliubo struct btrfs_transaction *t; 2939acce952bSliubo LIST_HEAD(list); 2940acce952bSliubo 2941acce952bSliubo WARN_ON(1); 2942acce952bSliubo 2943acce952bSliubo mutex_lock(&root->fs_info->trans_mutex); 2944acce952bSliubo mutex_lock(&root->fs_info->transaction_kthread_mutex); 2945acce952bSliubo 2946acce952bSliubo list_splice_init(&root->fs_info->trans_list, &list); 2947acce952bSliubo while (!list_empty(&list)) { 2948acce952bSliubo t = list_entry(list.next, struct btrfs_transaction, list); 2949acce952bSliubo if (!t) 2950acce952bSliubo break; 2951acce952bSliubo 2952acce952bSliubo btrfs_destroy_ordered_operations(root); 2953acce952bSliubo 2954acce952bSliubo btrfs_destroy_ordered_extents(root); 2955acce952bSliubo 2956acce952bSliubo btrfs_destroy_delayed_refs(t, root); 2957acce952bSliubo 2958acce952bSliubo btrfs_block_rsv_release(root, 2959acce952bSliubo &root->fs_info->trans_block_rsv, 2960acce952bSliubo t->dirty_pages.dirty_bytes); 2961acce952bSliubo 2962acce952bSliubo /* FIXME: cleanup wait for commit */ 2963acce952bSliubo t->in_commit = 1; 2964acce952bSliubo t->blocked = 1; 2965acce952bSliubo if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 2966acce952bSliubo wake_up(&root->fs_info->transaction_blocked_wait); 2967acce952bSliubo 2968acce952bSliubo t->blocked = 0; 2969acce952bSliubo if (waitqueue_active(&root->fs_info->transaction_wait)) 2970acce952bSliubo wake_up(&root->fs_info->transaction_wait); 2971acce952bSliubo mutex_unlock(&root->fs_info->trans_mutex); 2972acce952bSliubo 2973acce952bSliubo mutex_lock(&root->fs_info->trans_mutex); 2974acce952bSliubo t->commit_done = 1; 2975acce952bSliubo if (waitqueue_active(&t->commit_wait)) 2976acce952bSliubo wake_up(&t->commit_wait); 2977acce952bSliubo mutex_unlock(&root->fs_info->trans_mutex); 2978acce952bSliubo 2979acce952bSliubo mutex_lock(&root->fs_info->trans_mutex); 2980acce952bSliubo 2981acce952bSliubo btrfs_destroy_pending_snapshots(t); 2982acce952bSliubo 2983acce952bSliubo btrfs_destroy_delalloc_inodes(root); 2984acce952bSliubo 2985acce952bSliubo spin_lock(&root->fs_info->new_trans_lock); 2986acce952bSliubo root->fs_info->running_transaction = NULL; 2987acce952bSliubo spin_unlock(&root->fs_info->new_trans_lock); 2988acce952bSliubo 2989acce952bSliubo btrfs_destroy_marked_extents(root, &t->dirty_pages, 2990acce952bSliubo EXTENT_DIRTY); 2991acce952bSliubo 2992acce952bSliubo btrfs_destroy_pinned_extent(root, 2993acce952bSliubo root->fs_info->pinned_extents); 2994acce952bSliubo 299513c5a93eSJosef Bacik atomic_set(&t->use_count, 0); 2996acce952bSliubo list_del_init(&t->list); 2997acce952bSliubo memset(t, 0, sizeof(*t)); 2998acce952bSliubo kmem_cache_free(btrfs_transaction_cachep, t); 2999acce952bSliubo } 3000acce952bSliubo 3001acce952bSliubo mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3002acce952bSliubo mutex_unlock(&root->fs_info->trans_mutex); 3003acce952bSliubo 3004acce952bSliubo return 0; 3005acce952bSliubo } 3006acce952bSliubo 3007d1310b2eSChris Mason static struct extent_io_ops btree_extent_io_ops = { 30084bef0848SChris Mason .write_cache_pages_lock_hook = btree_lock_page_hook, 3009ce9adaa5SChris Mason .readpage_end_io_hook = btree_readpage_end_io_hook, 30100b86a832SChris Mason .submit_bio_hook = btree_submit_bio_hook, 3011239b14b3SChris Mason /* note we're sharing with inode.c for the merge bio hook */ 3012239b14b3SChris Mason .merge_bio_hook = btrfs_merge_bio_hook, 30130da5468fSChris Mason }; 3014