xref: /openbmc/linux/fs/btrfs/disk-io.c (revision 7e75bf3f)
16cbd5570SChris Mason /*
26cbd5570SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
36cbd5570SChris Mason  *
46cbd5570SChris Mason  * This program is free software; you can redistribute it and/or
56cbd5570SChris Mason  * modify it under the terms of the GNU General Public
66cbd5570SChris Mason  * License v2 as published by the Free Software Foundation.
76cbd5570SChris Mason  *
86cbd5570SChris Mason  * This program is distributed in the hope that it will be useful,
96cbd5570SChris Mason  * but WITHOUT ANY WARRANTY; without even the implied warranty of
106cbd5570SChris Mason  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
116cbd5570SChris Mason  * General Public License for more details.
126cbd5570SChris Mason  *
136cbd5570SChris Mason  * You should have received a copy of the GNU General Public
146cbd5570SChris Mason  * License along with this program; if not, write to the
156cbd5570SChris Mason  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
166cbd5570SChris Mason  * Boston, MA 021110-1307, USA.
176cbd5570SChris Mason  */
186cbd5570SChris Mason 
19e20d96d6SChris Mason #include <linux/fs.h>
20d98237b3SChris Mason #include <linux/blkdev.h>
2187cbda5cSChris Mason #include <linux/scatterlist.h>
2222b0ebdaSChris Mason #include <linux/swap.h>
230f7d52f4SChris Mason #include <linux/radix-tree.h>
2435b7e476SChris Mason #include <linux/writeback.h>
25d397712bSChris Mason #include <linux/buffer_head.h>
26ce9adaa5SChris Mason #include <linux/workqueue.h>
27a74a4b97SChris Mason #include <linux/kthread.h>
28a74a4b97SChris Mason #include <linux/freezer.h>
29163e783eSDavid Woodhouse #include <linux/crc32c.h>
305a0e3ad6STejun Heo #include <linux/slab.h>
31784b4e29SChris Mason #include <linux/migrate.h>
327e75bf3fSDavid Sterba #include <asm/unaligned.h>
334b4e25f2SChris Mason #include "compat.h"
34eb60ceacSChris Mason #include "ctree.h"
35eb60ceacSChris Mason #include "disk-io.h"
36e089f05cSChris Mason #include "transaction.h"
370f7d52f4SChris Mason #include "btrfs_inode.h"
380b86a832SChris Mason #include "volumes.h"
39db94535dSChris Mason #include "print-tree.h"
408b712842SChris Mason #include "async-thread.h"
41925baeddSChris Mason #include "locking.h"
42e02119d5SChris Mason #include "tree-log.h"
43fa9c0d79SChris Mason #include "free-space-cache.h"
44eb60ceacSChris Mason 
45d1310b2eSChris Mason static struct extent_io_ops btree_extent_io_ops;
468b712842SChris Mason static void end_workqueue_fn(struct btrfs_work *work);
474df27c4dSYan, Zheng static void free_fs_root(struct btrfs_root *root);
48acce952bSliubo static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
49acce952bSliubo 				    int read_only);
50acce952bSliubo static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
51acce952bSliubo static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
52acce952bSliubo static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
53acce952bSliubo 				      struct btrfs_root *root);
54acce952bSliubo static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
55acce952bSliubo static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
56acce952bSliubo static int btrfs_destroy_marked_extents(struct btrfs_root *root,
57acce952bSliubo 					struct extent_io_tree *dirty_pages,
58acce952bSliubo 					int mark);
59acce952bSliubo static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
60acce952bSliubo 				       struct extent_io_tree *pinned_extents);
61acce952bSliubo static int btrfs_cleanup_transaction(struct btrfs_root *root);
62ce9adaa5SChris Mason 
63d352ac68SChris Mason /*
64d352ac68SChris Mason  * end_io_wq structs are used to do processing in task context when an IO is
65d352ac68SChris Mason  * complete.  This is used during reads to verify checksums, and it is used
66d352ac68SChris Mason  * by writes to insert metadata for new file extents after IO is complete.
67d352ac68SChris Mason  */
68ce9adaa5SChris Mason struct end_io_wq {
69ce9adaa5SChris Mason 	struct bio *bio;
70ce9adaa5SChris Mason 	bio_end_io_t *end_io;
71ce9adaa5SChris Mason 	void *private;
72ce9adaa5SChris Mason 	struct btrfs_fs_info *info;
73ce9adaa5SChris Mason 	int error;
7422c59948SChris Mason 	int metadata;
75ce9adaa5SChris Mason 	struct list_head list;
768b712842SChris Mason 	struct btrfs_work work;
77ce9adaa5SChris Mason };
780da5468fSChris Mason 
79d352ac68SChris Mason /*
80d352ac68SChris Mason  * async submit bios are used to offload expensive checksumming
81d352ac68SChris Mason  * onto the worker threads.  They checksum file and metadata bios
82d352ac68SChris Mason  * just before they are sent down the IO stack.
83d352ac68SChris Mason  */
8444b8bd7eSChris Mason struct async_submit_bio {
8544b8bd7eSChris Mason 	struct inode *inode;
8644b8bd7eSChris Mason 	struct bio *bio;
8744b8bd7eSChris Mason 	struct list_head list;
884a69a410SChris Mason 	extent_submit_bio_hook_t *submit_bio_start;
894a69a410SChris Mason 	extent_submit_bio_hook_t *submit_bio_done;
9044b8bd7eSChris Mason 	int rw;
9144b8bd7eSChris Mason 	int mirror_num;
92c8b97818SChris Mason 	unsigned long bio_flags;
93eaf25d93SChris Mason 	/*
94eaf25d93SChris Mason 	 * bio_offset is optional, can be used if the pages in the bio
95eaf25d93SChris Mason 	 * can't tell us where in the file the bio should go
96eaf25d93SChris Mason 	 */
97eaf25d93SChris Mason 	u64 bio_offset;
988b712842SChris Mason 	struct btrfs_work work;
9944b8bd7eSChris Mason };
10044b8bd7eSChris Mason 
1014008c04aSChris Mason /* These are used to set the lockdep class on the extent buffer locks.
1024008c04aSChris Mason  * The class is set by the readpage_end_io_hook after the buffer has
1034008c04aSChris Mason  * passed csum validation but before the pages are unlocked.
1044008c04aSChris Mason  *
1054008c04aSChris Mason  * The lockdep class is also set by btrfs_init_new_buffer on freshly
1064008c04aSChris Mason  * allocated blocks.
1074008c04aSChris Mason  *
1084008c04aSChris Mason  * The class is based on the level in the tree block, which allows lockdep
1094008c04aSChris Mason  * to know that lower nodes nest inside the locks of higher nodes.
1104008c04aSChris Mason  *
1114008c04aSChris Mason  * We also add a check to make sure the highest level of the tree is
1124008c04aSChris Mason  * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
1134008c04aSChris Mason  * code needs update as well.
1144008c04aSChris Mason  */
1154008c04aSChris Mason #ifdef CONFIG_DEBUG_LOCK_ALLOC
1164008c04aSChris Mason # if BTRFS_MAX_LEVEL != 8
1174008c04aSChris Mason #  error
1184008c04aSChris Mason # endif
1194008c04aSChris Mason static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
1204008c04aSChris Mason static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
1214008c04aSChris Mason 	/* leaf */
1224008c04aSChris Mason 	"btrfs-extent-00",
1234008c04aSChris Mason 	"btrfs-extent-01",
1244008c04aSChris Mason 	"btrfs-extent-02",
1254008c04aSChris Mason 	"btrfs-extent-03",
1264008c04aSChris Mason 	"btrfs-extent-04",
1274008c04aSChris Mason 	"btrfs-extent-05",
1284008c04aSChris Mason 	"btrfs-extent-06",
1294008c04aSChris Mason 	"btrfs-extent-07",
1304008c04aSChris Mason 	/* highest possible level */
1314008c04aSChris Mason 	"btrfs-extent-08",
1324008c04aSChris Mason };
1334008c04aSChris Mason #endif
1344008c04aSChris Mason 
135d352ac68SChris Mason /*
136d352ac68SChris Mason  * extents on the btree inode are pretty simple, there's one extent
137d352ac68SChris Mason  * that covers the entire device
138d352ac68SChris Mason  */
139b2950863SChristoph Hellwig static struct extent_map *btree_get_extent(struct inode *inode,
140b2950863SChristoph Hellwig 		struct page *page, size_t page_offset, u64 start, u64 len,
1415f39d397SChris Mason 		int create)
1425f39d397SChris Mason {
1435f39d397SChris Mason 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1445f39d397SChris Mason 	struct extent_map *em;
1455f39d397SChris Mason 	int ret;
1465f39d397SChris Mason 
147890871beSChris Mason 	read_lock(&em_tree->lock);
148d1310b2eSChris Mason 	em = lookup_extent_mapping(em_tree, start, len);
149a061fc8dSChris Mason 	if (em) {
150a061fc8dSChris Mason 		em->bdev =
151a061fc8dSChris Mason 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
152890871beSChris Mason 		read_unlock(&em_tree->lock);
1535f39d397SChris Mason 		goto out;
154a061fc8dSChris Mason 	}
155890871beSChris Mason 	read_unlock(&em_tree->lock);
1567b13b7b1SChris Mason 
1575f39d397SChris Mason 	em = alloc_extent_map(GFP_NOFS);
1585f39d397SChris Mason 	if (!em) {
1595f39d397SChris Mason 		em = ERR_PTR(-ENOMEM);
1605f39d397SChris Mason 		goto out;
1615f39d397SChris Mason 	}
1625f39d397SChris Mason 	em->start = 0;
1630afbaf8cSChris Mason 	em->len = (u64)-1;
164c8b97818SChris Mason 	em->block_len = (u64)-1;
1655f39d397SChris Mason 	em->block_start = 0;
166a061fc8dSChris Mason 	em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
167d1310b2eSChris Mason 
168890871beSChris Mason 	write_lock(&em_tree->lock);
1695f39d397SChris Mason 	ret = add_extent_mapping(em_tree, em);
1705f39d397SChris Mason 	if (ret == -EEXIST) {
1710afbaf8cSChris Mason 		u64 failed_start = em->start;
1720afbaf8cSChris Mason 		u64 failed_len = em->len;
1730afbaf8cSChris Mason 
1745f39d397SChris Mason 		free_extent_map(em);
1757b13b7b1SChris Mason 		em = lookup_extent_mapping(em_tree, start, len);
1760afbaf8cSChris Mason 		if (em) {
1777b13b7b1SChris Mason 			ret = 0;
1780afbaf8cSChris Mason 		} else {
1790afbaf8cSChris Mason 			em = lookup_extent_mapping(em_tree, failed_start,
1800afbaf8cSChris Mason 						   failed_len);
1817b13b7b1SChris Mason 			ret = -EIO;
1820afbaf8cSChris Mason 		}
1835f39d397SChris Mason 	} else if (ret) {
1847b13b7b1SChris Mason 		free_extent_map(em);
1857b13b7b1SChris Mason 		em = NULL;
1865f39d397SChris Mason 	}
187890871beSChris Mason 	write_unlock(&em_tree->lock);
1887b13b7b1SChris Mason 
1897b13b7b1SChris Mason 	if (ret)
1907b13b7b1SChris Mason 		em = ERR_PTR(ret);
1915f39d397SChris Mason out:
1925f39d397SChris Mason 	return em;
1935f39d397SChris Mason }
1945f39d397SChris Mason 
19519c00ddcSChris Mason u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
19619c00ddcSChris Mason {
197163e783eSDavid Woodhouse 	return crc32c(seed, data, len);
19819c00ddcSChris Mason }
19919c00ddcSChris Mason 
20019c00ddcSChris Mason void btrfs_csum_final(u32 crc, char *result)
20119c00ddcSChris Mason {
2027e75bf3fSDavid Sterba 	put_unaligned_le32(~crc, result);
20319c00ddcSChris Mason }
20419c00ddcSChris Mason 
205d352ac68SChris Mason /*
206d352ac68SChris Mason  * compute the csum for a btree block, and either verify it or write it
207d352ac68SChris Mason  * into the csum field of the block.
208d352ac68SChris Mason  */
20919c00ddcSChris Mason static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
21019c00ddcSChris Mason 			   int verify)
21119c00ddcSChris Mason {
212607d432dSJosef Bacik 	u16 csum_size =
213607d432dSJosef Bacik 		btrfs_super_csum_size(&root->fs_info->super_copy);
214607d432dSJosef Bacik 	char *result = NULL;
21519c00ddcSChris Mason 	unsigned long len;
21619c00ddcSChris Mason 	unsigned long cur_len;
21719c00ddcSChris Mason 	unsigned long offset = BTRFS_CSUM_SIZE;
21819c00ddcSChris Mason 	char *map_token = NULL;
21919c00ddcSChris Mason 	char *kaddr;
22019c00ddcSChris Mason 	unsigned long map_start;
22119c00ddcSChris Mason 	unsigned long map_len;
22219c00ddcSChris Mason 	int err;
22319c00ddcSChris Mason 	u32 crc = ~(u32)0;
224607d432dSJosef Bacik 	unsigned long inline_result;
22519c00ddcSChris Mason 
22619c00ddcSChris Mason 	len = buf->len - offset;
22719c00ddcSChris Mason 	while (len > 0) {
22819c00ddcSChris Mason 		err = map_private_extent_buffer(buf, offset, 32,
22919c00ddcSChris Mason 					&map_token, &kaddr,
23019c00ddcSChris Mason 					&map_start, &map_len, KM_USER0);
231d397712bSChris Mason 		if (err)
23219c00ddcSChris Mason 			return 1;
23319c00ddcSChris Mason 		cur_len = min(len, map_len - (offset - map_start));
23419c00ddcSChris Mason 		crc = btrfs_csum_data(root, kaddr + offset - map_start,
23519c00ddcSChris Mason 				      crc, cur_len);
23619c00ddcSChris Mason 		len -= cur_len;
23719c00ddcSChris Mason 		offset += cur_len;
23819c00ddcSChris Mason 		unmap_extent_buffer(buf, map_token, KM_USER0);
23919c00ddcSChris Mason 	}
240607d432dSJosef Bacik 	if (csum_size > sizeof(inline_result)) {
241607d432dSJosef Bacik 		result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
242607d432dSJosef Bacik 		if (!result)
243607d432dSJosef Bacik 			return 1;
244607d432dSJosef Bacik 	} else {
245607d432dSJosef Bacik 		result = (char *)&inline_result;
246607d432dSJosef Bacik 	}
247607d432dSJosef Bacik 
24819c00ddcSChris Mason 	btrfs_csum_final(crc, result);
24919c00ddcSChris Mason 
25019c00ddcSChris Mason 	if (verify) {
251607d432dSJosef Bacik 		if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
252e4204dedSChris Mason 			u32 val;
253e4204dedSChris Mason 			u32 found = 0;
254607d432dSJosef Bacik 			memcpy(&found, result, csum_size);
255e4204dedSChris Mason 
256607d432dSJosef Bacik 			read_extent_buffer(buf, &val, 0, csum_size);
257193f284dSChris Mason 			if (printk_ratelimit()) {
258193f284dSChris Mason 				printk(KERN_INFO "btrfs: %s checksum verify "
259193f284dSChris Mason 				       "failed on %llu wanted %X found %X "
260193f284dSChris Mason 				       "level %d\n",
26119c00ddcSChris Mason 				       root->fs_info->sb->s_id,
262193f284dSChris Mason 				       (unsigned long long)buf->start, val, found,
263193f284dSChris Mason 				       btrfs_header_level(buf));
264193f284dSChris Mason 			}
265607d432dSJosef Bacik 			if (result != (char *)&inline_result)
266607d432dSJosef Bacik 				kfree(result);
26719c00ddcSChris Mason 			return 1;
26819c00ddcSChris Mason 		}
26919c00ddcSChris Mason 	} else {
270607d432dSJosef Bacik 		write_extent_buffer(buf, result, 0, csum_size);
27119c00ddcSChris Mason 	}
272607d432dSJosef Bacik 	if (result != (char *)&inline_result)
273607d432dSJosef Bacik 		kfree(result);
27419c00ddcSChris Mason 	return 0;
27519c00ddcSChris Mason }
27619c00ddcSChris Mason 
277d352ac68SChris Mason /*
278d352ac68SChris Mason  * we can't consider a given block up to date unless the transid of the
279d352ac68SChris Mason  * block matches the transid in the parent node's pointer.  This is how we
280d352ac68SChris Mason  * detect blocks that either didn't get written at all or got written
281d352ac68SChris Mason  * in the wrong place.
282d352ac68SChris Mason  */
2831259ab75SChris Mason static int verify_parent_transid(struct extent_io_tree *io_tree,
2841259ab75SChris Mason 				 struct extent_buffer *eb, u64 parent_transid)
2851259ab75SChris Mason {
2862ac55d41SJosef Bacik 	struct extent_state *cached_state = NULL;
2871259ab75SChris Mason 	int ret;
2881259ab75SChris Mason 
2891259ab75SChris Mason 	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
2901259ab75SChris Mason 		return 0;
2911259ab75SChris Mason 
2922ac55d41SJosef Bacik 	lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
2932ac55d41SJosef Bacik 			 0, &cached_state, GFP_NOFS);
2942ac55d41SJosef Bacik 	if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
2951259ab75SChris Mason 	    btrfs_header_generation(eb) == parent_transid) {
2961259ab75SChris Mason 		ret = 0;
2971259ab75SChris Mason 		goto out;
2981259ab75SChris Mason 	}
299193f284dSChris Mason 	if (printk_ratelimit()) {
300193f284dSChris Mason 		printk("parent transid verify failed on %llu wanted %llu "
301193f284dSChris Mason 		       "found %llu\n",
3021259ab75SChris Mason 		       (unsigned long long)eb->start,
3031259ab75SChris Mason 		       (unsigned long long)parent_transid,
3041259ab75SChris Mason 		       (unsigned long long)btrfs_header_generation(eb));
305193f284dSChris Mason 	}
3061259ab75SChris Mason 	ret = 1;
3072ac55d41SJosef Bacik 	clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
30833958dc6SChris Mason out:
3092ac55d41SJosef Bacik 	unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
3102ac55d41SJosef Bacik 			     &cached_state, GFP_NOFS);
3111259ab75SChris Mason 	return ret;
3121259ab75SChris Mason }
3131259ab75SChris Mason 
314d352ac68SChris Mason /*
315d352ac68SChris Mason  * helper to read a given tree block, doing retries as required when
316d352ac68SChris Mason  * the checksums don't match and we have alternate mirrors to try.
317d352ac68SChris Mason  */
318f188591eSChris Mason static int btree_read_extent_buffer_pages(struct btrfs_root *root,
319f188591eSChris Mason 					  struct extent_buffer *eb,
320ca7a79adSChris Mason 					  u64 start, u64 parent_transid)
321f188591eSChris Mason {
322f188591eSChris Mason 	struct extent_io_tree *io_tree;
323f188591eSChris Mason 	int ret;
324f188591eSChris Mason 	int num_copies = 0;
325f188591eSChris Mason 	int mirror_num = 0;
326f188591eSChris Mason 
327a826d6dcSJosef Bacik 	clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
328f188591eSChris Mason 	io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
329f188591eSChris Mason 	while (1) {
330f188591eSChris Mason 		ret = read_extent_buffer_pages(io_tree, eb, start, 1,
331f188591eSChris Mason 					       btree_get_extent, mirror_num);
3321259ab75SChris Mason 		if (!ret &&
3331259ab75SChris Mason 		    !verify_parent_transid(io_tree, eb, parent_transid))
334f188591eSChris Mason 			return ret;
335d397712bSChris Mason 
336a826d6dcSJosef Bacik 		/*
337a826d6dcSJosef Bacik 		 * This buffer's crc is fine, but its contents are corrupted, so
338a826d6dcSJosef Bacik 		 * there is no reason to read the other copies, they won't be
339a826d6dcSJosef Bacik 		 * any less wrong.
340a826d6dcSJosef Bacik 		 */
341a826d6dcSJosef Bacik 		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
342a826d6dcSJosef Bacik 			return ret;
343a826d6dcSJosef Bacik 
344f188591eSChris Mason 		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
345f188591eSChris Mason 					      eb->start, eb->len);
3464235298eSChris Mason 		if (num_copies == 1)
347f188591eSChris Mason 			return ret;
3484235298eSChris Mason 
349f188591eSChris Mason 		mirror_num++;
3504235298eSChris Mason 		if (mirror_num > num_copies)
351f188591eSChris Mason 			return ret;
352f188591eSChris Mason 	}
353f188591eSChris Mason 	return -EIO;
354f188591eSChris Mason }
35519c00ddcSChris Mason 
356d352ac68SChris Mason /*
357d397712bSChris Mason  * checksum a dirty tree block before IO.  This has extra checks to make sure
358d397712bSChris Mason  * we only fill in the checksum field in the first page of a multi-page block
359d352ac68SChris Mason  */
360d397712bSChris Mason 
361b2950863SChristoph Hellwig static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
36219c00ddcSChris Mason {
363d1310b2eSChris Mason 	struct extent_io_tree *tree;
36435ebb934SChris Mason 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
36519c00ddcSChris Mason 	u64 found_start;
36619c00ddcSChris Mason 	unsigned long len;
36719c00ddcSChris Mason 	struct extent_buffer *eb;
368f188591eSChris Mason 	int ret;
369f188591eSChris Mason 
370d1310b2eSChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
37119c00ddcSChris Mason 
372eb14ab8eSChris Mason 	if (page->private == EXTENT_PAGE_PRIVATE) {
373eb14ab8eSChris Mason 		WARN_ON(1);
37419c00ddcSChris Mason 		goto out;
375eb14ab8eSChris Mason 	}
376eb14ab8eSChris Mason 	if (!page->private) {
377eb14ab8eSChris Mason 		WARN_ON(1);
37819c00ddcSChris Mason 		goto out;
379eb14ab8eSChris Mason 	}
38019c00ddcSChris Mason 	len = page->private >> 2;
381d397712bSChris Mason 	WARN_ON(len == 0);
382d397712bSChris Mason 
38319c00ddcSChris Mason 	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
38491ca338dSTsutomu Itoh 	if (eb == NULL) {
38591ca338dSTsutomu Itoh 		WARN_ON(1);
38691ca338dSTsutomu Itoh 		goto out;
38791ca338dSTsutomu Itoh 	}
388ca7a79adSChris Mason 	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
389ca7a79adSChris Mason 					     btrfs_header_generation(eb));
390f188591eSChris Mason 	BUG_ON(ret);
391784b4e29SChris Mason 	WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
392784b4e29SChris Mason 
39319c00ddcSChris Mason 	found_start = btrfs_header_bytenr(eb);
39419c00ddcSChris Mason 	if (found_start != start) {
39555c69072SChris Mason 		WARN_ON(1);
39655c69072SChris Mason 		goto err;
39755c69072SChris Mason 	}
39855c69072SChris Mason 	if (eb->first_page != page) {
39955c69072SChris Mason 		WARN_ON(1);
40055c69072SChris Mason 		goto err;
40155c69072SChris Mason 	}
40255c69072SChris Mason 	if (!PageUptodate(page)) {
40355c69072SChris Mason 		WARN_ON(1);
40455c69072SChris Mason 		goto err;
40519c00ddcSChris Mason 	}
40619c00ddcSChris Mason 	csum_tree_block(root, eb, 0);
40755c69072SChris Mason err:
40819c00ddcSChris Mason 	free_extent_buffer(eb);
40919c00ddcSChris Mason out:
41019c00ddcSChris Mason 	return 0;
41119c00ddcSChris Mason }
41219c00ddcSChris Mason 
4132b82032cSYan Zheng static int check_tree_block_fsid(struct btrfs_root *root,
4142b82032cSYan Zheng 				 struct extent_buffer *eb)
4152b82032cSYan Zheng {
4162b82032cSYan Zheng 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
4172b82032cSYan Zheng 	u8 fsid[BTRFS_UUID_SIZE];
4182b82032cSYan Zheng 	int ret = 1;
4192b82032cSYan Zheng 
4202b82032cSYan Zheng 	read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
4212b82032cSYan Zheng 			   BTRFS_FSID_SIZE);
4222b82032cSYan Zheng 	while (fs_devices) {
4232b82032cSYan Zheng 		if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
4242b82032cSYan Zheng 			ret = 0;
4252b82032cSYan Zheng 			break;
4262b82032cSYan Zheng 		}
4272b82032cSYan Zheng 		fs_devices = fs_devices->seed;
4282b82032cSYan Zheng 	}
4292b82032cSYan Zheng 	return ret;
4302b82032cSYan Zheng }
4312b82032cSYan Zheng 
432a826d6dcSJosef Bacik #define CORRUPT(reason, eb, root, slot)				\
433a826d6dcSJosef Bacik 	printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu,"	\
434a826d6dcSJosef Bacik 	       "root=%llu, slot=%d\n", reason,			\
435a826d6dcSJosef Bacik 	       (unsigned long long)btrfs_header_bytenr(eb),	\
436a826d6dcSJosef Bacik 	       (unsigned long long)root->objectid, slot)
437a826d6dcSJosef Bacik 
438a826d6dcSJosef Bacik static noinline int check_leaf(struct btrfs_root *root,
439a826d6dcSJosef Bacik 			       struct extent_buffer *leaf)
440a826d6dcSJosef Bacik {
441a826d6dcSJosef Bacik 	struct btrfs_key key;
442a826d6dcSJosef Bacik 	struct btrfs_key leaf_key;
443a826d6dcSJosef Bacik 	u32 nritems = btrfs_header_nritems(leaf);
444a826d6dcSJosef Bacik 	int slot;
445a826d6dcSJosef Bacik 
446a826d6dcSJosef Bacik 	if (nritems == 0)
447a826d6dcSJosef Bacik 		return 0;
448a826d6dcSJosef Bacik 
449a826d6dcSJosef Bacik 	/* Check the 0 item */
450a826d6dcSJosef Bacik 	if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
451a826d6dcSJosef Bacik 	    BTRFS_LEAF_DATA_SIZE(root)) {
452a826d6dcSJosef Bacik 		CORRUPT("invalid item offset size pair", leaf, root, 0);
453a826d6dcSJosef Bacik 		return -EIO;
454a826d6dcSJosef Bacik 	}
455a826d6dcSJosef Bacik 
456a826d6dcSJosef Bacik 	/*
457a826d6dcSJosef Bacik 	 * Check to make sure each items keys are in the correct order and their
458a826d6dcSJosef Bacik 	 * offsets make sense.  We only have to loop through nritems-1 because
459a826d6dcSJosef Bacik 	 * we check the current slot against the next slot, which verifies the
460a826d6dcSJosef Bacik 	 * next slot's offset+size makes sense and that the current's slot
461a826d6dcSJosef Bacik 	 * offset is correct.
462a826d6dcSJosef Bacik 	 */
463a826d6dcSJosef Bacik 	for (slot = 0; slot < nritems - 1; slot++) {
464a826d6dcSJosef Bacik 		btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
465a826d6dcSJosef Bacik 		btrfs_item_key_to_cpu(leaf, &key, slot + 1);
466a826d6dcSJosef Bacik 
467a826d6dcSJosef Bacik 		/* Make sure the keys are in the right order */
468a826d6dcSJosef Bacik 		if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
469a826d6dcSJosef Bacik 			CORRUPT("bad key order", leaf, root, slot);
470a826d6dcSJosef Bacik 			return -EIO;
471a826d6dcSJosef Bacik 		}
472a826d6dcSJosef Bacik 
473a826d6dcSJosef Bacik 		/*
474a826d6dcSJosef Bacik 		 * Make sure the offset and ends are right, remember that the
475a826d6dcSJosef Bacik 		 * item data starts at the end of the leaf and grows towards the
476a826d6dcSJosef Bacik 		 * front.
477a826d6dcSJosef Bacik 		 */
478a826d6dcSJosef Bacik 		if (btrfs_item_offset_nr(leaf, slot) !=
479a826d6dcSJosef Bacik 			btrfs_item_end_nr(leaf, slot + 1)) {
480a826d6dcSJosef Bacik 			CORRUPT("slot offset bad", leaf, root, slot);
481a826d6dcSJosef Bacik 			return -EIO;
482a826d6dcSJosef Bacik 		}
483a826d6dcSJosef Bacik 
484a826d6dcSJosef Bacik 		/*
485a826d6dcSJosef Bacik 		 * Check to make sure that we don't point outside of the leaf,
486a826d6dcSJosef Bacik 		 * just incase all the items are consistent to eachother, but
487a826d6dcSJosef Bacik 		 * all point outside of the leaf.
488a826d6dcSJosef Bacik 		 */
489a826d6dcSJosef Bacik 		if (btrfs_item_end_nr(leaf, slot) >
490a826d6dcSJosef Bacik 		    BTRFS_LEAF_DATA_SIZE(root)) {
491a826d6dcSJosef Bacik 			CORRUPT("slot end outside of leaf", leaf, root, slot);
492a826d6dcSJosef Bacik 			return -EIO;
493a826d6dcSJosef Bacik 		}
494a826d6dcSJosef Bacik 	}
495a826d6dcSJosef Bacik 
496a826d6dcSJosef Bacik 	return 0;
497a826d6dcSJosef Bacik }
498a826d6dcSJosef Bacik 
4994008c04aSChris Mason #ifdef CONFIG_DEBUG_LOCK_ALLOC
5004008c04aSChris Mason void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
5014008c04aSChris Mason {
5024008c04aSChris Mason 	lockdep_set_class_and_name(&eb->lock,
5034008c04aSChris Mason 			   &btrfs_eb_class[level],
5044008c04aSChris Mason 			   btrfs_eb_name[level]);
5054008c04aSChris Mason }
5064008c04aSChris Mason #endif
5074008c04aSChris Mason 
508b2950863SChristoph Hellwig static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
509ce9adaa5SChris Mason 			       struct extent_state *state)
510ce9adaa5SChris Mason {
511ce9adaa5SChris Mason 	struct extent_io_tree *tree;
512ce9adaa5SChris Mason 	u64 found_start;
513ce9adaa5SChris Mason 	int found_level;
514ce9adaa5SChris Mason 	unsigned long len;
515ce9adaa5SChris Mason 	struct extent_buffer *eb;
516ce9adaa5SChris Mason 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
517f188591eSChris Mason 	int ret = 0;
518ce9adaa5SChris Mason 
519ce9adaa5SChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
520ce9adaa5SChris Mason 	if (page->private == EXTENT_PAGE_PRIVATE)
521ce9adaa5SChris Mason 		goto out;
522ce9adaa5SChris Mason 	if (!page->private)
523ce9adaa5SChris Mason 		goto out;
524d397712bSChris Mason 
525ce9adaa5SChris Mason 	len = page->private >> 2;
526d397712bSChris Mason 	WARN_ON(len == 0);
527d397712bSChris Mason 
528ce9adaa5SChris Mason 	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
52991ca338dSTsutomu Itoh 	if (eb == NULL) {
53091ca338dSTsutomu Itoh 		ret = -EIO;
53191ca338dSTsutomu Itoh 		goto out;
53291ca338dSTsutomu Itoh 	}
533f188591eSChris Mason 
534ce9adaa5SChris Mason 	found_start = btrfs_header_bytenr(eb);
53523a07867SChris Mason 	if (found_start != start) {
536193f284dSChris Mason 		if (printk_ratelimit()) {
537193f284dSChris Mason 			printk(KERN_INFO "btrfs bad tree block start "
538193f284dSChris Mason 			       "%llu %llu\n",
539a1b32a59SChris Mason 			       (unsigned long long)found_start,
540a1b32a59SChris Mason 			       (unsigned long long)eb->start);
541193f284dSChris Mason 		}
542f188591eSChris Mason 		ret = -EIO;
543ce9adaa5SChris Mason 		goto err;
544ce9adaa5SChris Mason 	}
545ce9adaa5SChris Mason 	if (eb->first_page != page) {
546d397712bSChris Mason 		printk(KERN_INFO "btrfs bad first page %lu %lu\n",
547d397712bSChris Mason 		       eb->first_page->index, page->index);
548ce9adaa5SChris Mason 		WARN_ON(1);
549f188591eSChris Mason 		ret = -EIO;
550ce9adaa5SChris Mason 		goto err;
551ce9adaa5SChris Mason 	}
5522b82032cSYan Zheng 	if (check_tree_block_fsid(root, eb)) {
553193f284dSChris Mason 		if (printk_ratelimit()) {
554d397712bSChris Mason 			printk(KERN_INFO "btrfs bad fsid on block %llu\n",
555d397712bSChris Mason 			       (unsigned long long)eb->start);
556193f284dSChris Mason 		}
5571259ab75SChris Mason 		ret = -EIO;
5581259ab75SChris Mason 		goto err;
5591259ab75SChris Mason 	}
560ce9adaa5SChris Mason 	found_level = btrfs_header_level(eb);
561ce9adaa5SChris Mason 
5624008c04aSChris Mason 	btrfs_set_buffer_lockdep_class(eb, found_level);
5634008c04aSChris Mason 
564ce9adaa5SChris Mason 	ret = csum_tree_block(root, eb, 1);
565a826d6dcSJosef Bacik 	if (ret) {
566f188591eSChris Mason 		ret = -EIO;
567a826d6dcSJosef Bacik 		goto err;
568a826d6dcSJosef Bacik 	}
569a826d6dcSJosef Bacik 
570a826d6dcSJosef Bacik 	/*
571a826d6dcSJosef Bacik 	 * If this is a leaf block and it is corrupt, set the corrupt bit so
572a826d6dcSJosef Bacik 	 * that we don't try and read the other copies of this block, just
573a826d6dcSJosef Bacik 	 * return -EIO.
574a826d6dcSJosef Bacik 	 */
575a826d6dcSJosef Bacik 	if (found_level == 0 && check_leaf(root, eb)) {
576a826d6dcSJosef Bacik 		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
577a826d6dcSJosef Bacik 		ret = -EIO;
578a826d6dcSJosef Bacik 	}
579ce9adaa5SChris Mason 
580ce9adaa5SChris Mason 	end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
581ce9adaa5SChris Mason 	end = eb->start + end - 1;
582ce9adaa5SChris Mason err:
583ce9adaa5SChris Mason 	free_extent_buffer(eb);
584ce9adaa5SChris Mason out:
585f188591eSChris Mason 	return ret;
586ce9adaa5SChris Mason }
587ce9adaa5SChris Mason 
588ce9adaa5SChris Mason static void end_workqueue_bio(struct bio *bio, int err)
589ce9adaa5SChris Mason {
590ce9adaa5SChris Mason 	struct end_io_wq *end_io_wq = bio->bi_private;
591ce9adaa5SChris Mason 	struct btrfs_fs_info *fs_info;
592ce9adaa5SChris Mason 
593ce9adaa5SChris Mason 	fs_info = end_io_wq->info;
594ce9adaa5SChris Mason 	end_io_wq->error = err;
5958b712842SChris Mason 	end_io_wq->work.func = end_workqueue_fn;
5968b712842SChris Mason 	end_io_wq->work.flags = 0;
597d20f7043SChris Mason 
5987b6d91daSChristoph Hellwig 	if (bio->bi_rw & REQ_WRITE) {
5990cb59c99SJosef Bacik 		if (end_io_wq->metadata == 1)
600cad321adSChris Mason 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
601cad321adSChris Mason 					   &end_io_wq->work);
6020cb59c99SJosef Bacik 		else if (end_io_wq->metadata == 2)
6030cb59c99SJosef Bacik 			btrfs_queue_worker(&fs_info->endio_freespace_worker,
6040cb59c99SJosef Bacik 					   &end_io_wq->work);
605cad321adSChris Mason 		else
606e6dcd2dcSChris Mason 			btrfs_queue_worker(&fs_info->endio_write_workers,
607e6dcd2dcSChris Mason 					   &end_io_wq->work);
608d20f7043SChris Mason 	} else {
609d20f7043SChris Mason 		if (end_io_wq->metadata)
610d20f7043SChris Mason 			btrfs_queue_worker(&fs_info->endio_meta_workers,
611d20f7043SChris Mason 					   &end_io_wq->work);
612e6dcd2dcSChris Mason 		else
613d20f7043SChris Mason 			btrfs_queue_worker(&fs_info->endio_workers,
614d20f7043SChris Mason 					   &end_io_wq->work);
615d20f7043SChris Mason 	}
616ce9adaa5SChris Mason }
617ce9adaa5SChris Mason 
6180cb59c99SJosef Bacik /*
6190cb59c99SJosef Bacik  * For the metadata arg you want
6200cb59c99SJosef Bacik  *
6210cb59c99SJosef Bacik  * 0 - if data
6220cb59c99SJosef Bacik  * 1 - if normal metadta
6230cb59c99SJosef Bacik  * 2 - if writing to the free space cache area
6240cb59c99SJosef Bacik  */
62522c59948SChris Mason int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
62622c59948SChris Mason 			int metadata)
6270b86a832SChris Mason {
628ce9adaa5SChris Mason 	struct end_io_wq *end_io_wq;
629ce9adaa5SChris Mason 	end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
630ce9adaa5SChris Mason 	if (!end_io_wq)
631ce9adaa5SChris Mason 		return -ENOMEM;
632ce9adaa5SChris Mason 
633ce9adaa5SChris Mason 	end_io_wq->private = bio->bi_private;
634ce9adaa5SChris Mason 	end_io_wq->end_io = bio->bi_end_io;
63522c59948SChris Mason 	end_io_wq->info = info;
636ce9adaa5SChris Mason 	end_io_wq->error = 0;
637ce9adaa5SChris Mason 	end_io_wq->bio = bio;
63822c59948SChris Mason 	end_io_wq->metadata = metadata;
639ce9adaa5SChris Mason 
640ce9adaa5SChris Mason 	bio->bi_private = end_io_wq;
641ce9adaa5SChris Mason 	bio->bi_end_io = end_workqueue_bio;
64222c59948SChris Mason 	return 0;
64322c59948SChris Mason }
64422c59948SChris Mason 
645b64a2851SChris Mason unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
6464854ddd0SChris Mason {
6474854ddd0SChris Mason 	unsigned long limit = min_t(unsigned long,
6484854ddd0SChris Mason 				    info->workers.max_workers,
6494854ddd0SChris Mason 				    info->fs_devices->open_devices);
6504854ddd0SChris Mason 	return 256 * limit;
6514854ddd0SChris Mason }
6524854ddd0SChris Mason 
653777e6bd7SChris Mason int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
6540986fe9eSChris Mason {
655b64a2851SChris Mason 	return atomic_read(&info->nr_async_bios) >
656b64a2851SChris Mason 		btrfs_async_submit_limit(info);
6570986fe9eSChris Mason }
6580986fe9eSChris Mason 
6594a69a410SChris Mason static void run_one_async_start(struct btrfs_work *work)
6604a69a410SChris Mason {
6614a69a410SChris Mason 	struct async_submit_bio *async;
6624a69a410SChris Mason 
6634a69a410SChris Mason 	async = container_of(work, struct  async_submit_bio, work);
6644a69a410SChris Mason 	async->submit_bio_start(async->inode, async->rw, async->bio,
665eaf25d93SChris Mason 			       async->mirror_num, async->bio_flags,
666eaf25d93SChris Mason 			       async->bio_offset);
6674a69a410SChris Mason }
6684a69a410SChris Mason 
6694a69a410SChris Mason static void run_one_async_done(struct btrfs_work *work)
6708b712842SChris Mason {
6718b712842SChris Mason 	struct btrfs_fs_info *fs_info;
6728b712842SChris Mason 	struct async_submit_bio *async;
6734854ddd0SChris Mason 	int limit;
6748b712842SChris Mason 
6758b712842SChris Mason 	async = container_of(work, struct  async_submit_bio, work);
6768b712842SChris Mason 	fs_info = BTRFS_I(async->inode)->root->fs_info;
6774854ddd0SChris Mason 
678b64a2851SChris Mason 	limit = btrfs_async_submit_limit(fs_info);
6794854ddd0SChris Mason 	limit = limit * 2 / 3;
6804854ddd0SChris Mason 
6818b712842SChris Mason 	atomic_dec(&fs_info->nr_async_submits);
6820986fe9eSChris Mason 
683b64a2851SChris Mason 	if (atomic_read(&fs_info->nr_async_submits) < limit &&
684b64a2851SChris Mason 	    waitqueue_active(&fs_info->async_submit_wait))
6854854ddd0SChris Mason 		wake_up(&fs_info->async_submit_wait);
6864854ddd0SChris Mason 
6874a69a410SChris Mason 	async->submit_bio_done(async->inode, async->rw, async->bio,
688eaf25d93SChris Mason 			       async->mirror_num, async->bio_flags,
689eaf25d93SChris Mason 			       async->bio_offset);
6904a69a410SChris Mason }
6914a69a410SChris Mason 
6924a69a410SChris Mason static void run_one_async_free(struct btrfs_work *work)
6934a69a410SChris Mason {
6944a69a410SChris Mason 	struct async_submit_bio *async;
6954a69a410SChris Mason 
6964a69a410SChris Mason 	async = container_of(work, struct  async_submit_bio, work);
6978b712842SChris Mason 	kfree(async);
6988b712842SChris Mason }
6998b712842SChris Mason 
70044b8bd7eSChris Mason int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
70144b8bd7eSChris Mason 			int rw, struct bio *bio, int mirror_num,
702c8b97818SChris Mason 			unsigned long bio_flags,
703eaf25d93SChris Mason 			u64 bio_offset,
7044a69a410SChris Mason 			extent_submit_bio_hook_t *submit_bio_start,
7054a69a410SChris Mason 			extent_submit_bio_hook_t *submit_bio_done)
70644b8bd7eSChris Mason {
70744b8bd7eSChris Mason 	struct async_submit_bio *async;
70844b8bd7eSChris Mason 
70944b8bd7eSChris Mason 	async = kmalloc(sizeof(*async), GFP_NOFS);
71044b8bd7eSChris Mason 	if (!async)
71144b8bd7eSChris Mason 		return -ENOMEM;
71244b8bd7eSChris Mason 
71344b8bd7eSChris Mason 	async->inode = inode;
71444b8bd7eSChris Mason 	async->rw = rw;
71544b8bd7eSChris Mason 	async->bio = bio;
71644b8bd7eSChris Mason 	async->mirror_num = mirror_num;
7174a69a410SChris Mason 	async->submit_bio_start = submit_bio_start;
7184a69a410SChris Mason 	async->submit_bio_done = submit_bio_done;
7194a69a410SChris Mason 
7204a69a410SChris Mason 	async->work.func = run_one_async_start;
7214a69a410SChris Mason 	async->work.ordered_func = run_one_async_done;
7224a69a410SChris Mason 	async->work.ordered_free = run_one_async_free;
7234a69a410SChris Mason 
7248b712842SChris Mason 	async->work.flags = 0;
725c8b97818SChris Mason 	async->bio_flags = bio_flags;
726eaf25d93SChris Mason 	async->bio_offset = bio_offset;
7278c8bee1dSChris Mason 
728cb03c743SChris Mason 	atomic_inc(&fs_info->nr_async_submits);
729d313d7a3SChris Mason 
7307b6d91daSChristoph Hellwig 	if (rw & REQ_SYNC)
731d313d7a3SChris Mason 		btrfs_set_work_high_prio(&async->work);
732d313d7a3SChris Mason 
7338b712842SChris Mason 	btrfs_queue_worker(&fs_info->workers, &async->work);
7349473f16cSChris Mason 
735771ed689SChris Mason 	while (atomic_read(&fs_info->async_submit_draining) &&
736771ed689SChris Mason 	      atomic_read(&fs_info->nr_async_submits)) {
737771ed689SChris Mason 		wait_event(fs_info->async_submit_wait,
738771ed689SChris Mason 			   (atomic_read(&fs_info->nr_async_submits) == 0));
739771ed689SChris Mason 	}
740771ed689SChris Mason 
74144b8bd7eSChris Mason 	return 0;
74244b8bd7eSChris Mason }
74344b8bd7eSChris Mason 
744ce3ed71aSChris Mason static int btree_csum_one_bio(struct bio *bio)
745ce3ed71aSChris Mason {
746ce3ed71aSChris Mason 	struct bio_vec *bvec = bio->bi_io_vec;
747ce3ed71aSChris Mason 	int bio_index = 0;
748ce3ed71aSChris Mason 	struct btrfs_root *root;
749ce3ed71aSChris Mason 
750ce3ed71aSChris Mason 	WARN_ON(bio->bi_vcnt <= 0);
751ce3ed71aSChris Mason 	while (bio_index < bio->bi_vcnt) {
752ce3ed71aSChris Mason 		root = BTRFS_I(bvec->bv_page->mapping->host)->root;
753ce3ed71aSChris Mason 		csum_dirty_buffer(root, bvec->bv_page);
754ce3ed71aSChris Mason 		bio_index++;
755ce3ed71aSChris Mason 		bvec++;
756ce3ed71aSChris Mason 	}
757ce3ed71aSChris Mason 	return 0;
758ce3ed71aSChris Mason }
759ce3ed71aSChris Mason 
7604a69a410SChris Mason static int __btree_submit_bio_start(struct inode *inode, int rw,
7614a69a410SChris Mason 				    struct bio *bio, int mirror_num,
762eaf25d93SChris Mason 				    unsigned long bio_flags,
763eaf25d93SChris Mason 				    u64 bio_offset)
76422c59948SChris Mason {
7658b712842SChris Mason 	/*
7668b712842SChris Mason 	 * when we're called for a write, we're already in the async
7675443be45SChris Mason 	 * submission context.  Just jump into btrfs_map_bio
7688b712842SChris Mason 	 */
769ce3ed71aSChris Mason 	btree_csum_one_bio(bio);
7704a69a410SChris Mason 	return 0;
77122c59948SChris Mason }
77222c59948SChris Mason 
7734a69a410SChris Mason static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
774eaf25d93SChris Mason 				 int mirror_num, unsigned long bio_flags,
775eaf25d93SChris Mason 				 u64 bio_offset)
7764a69a410SChris Mason {
7778b712842SChris Mason 	/*
7784a69a410SChris Mason 	 * when we're called for a write, we're already in the async
7794a69a410SChris Mason 	 * submission context.  Just jump into btrfs_map_bio
7808b712842SChris Mason 	 */
7818b712842SChris Mason 	return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
7820b86a832SChris Mason }
7830b86a832SChris Mason 
78444b8bd7eSChris Mason static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
785eaf25d93SChris Mason 				 int mirror_num, unsigned long bio_flags,
786eaf25d93SChris Mason 				 u64 bio_offset)
78744b8bd7eSChris Mason {
7884a69a410SChris Mason 	int ret;
789cad321adSChris Mason 
7904a69a410SChris Mason 	ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
7914a69a410SChris Mason 					  bio, 1);
7924a69a410SChris Mason 	BUG_ON(ret);
7934a69a410SChris Mason 
7947b6d91daSChristoph Hellwig 	if (!(rw & REQ_WRITE)) {
795cad321adSChris Mason 		/*
796cad321adSChris Mason 		 * called for a read, do the setup so that checksum validation
797cad321adSChris Mason 		 * can happen in the async kernel threads
798cad321adSChris Mason 		 */
7994a69a410SChris Mason 		return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
8006f3577bdSChris Mason 				     mirror_num, 0);
80144b8bd7eSChris Mason 	}
802d313d7a3SChris Mason 
803cad321adSChris Mason 	/*
804cad321adSChris Mason 	 * kthread helpers are used to submit writes so that checksumming
805cad321adSChris Mason 	 * can happen in parallel across all CPUs
806cad321adSChris Mason 	 */
80744b8bd7eSChris Mason 	return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
808c8b97818SChris Mason 				   inode, rw, bio, mirror_num, 0,
809eaf25d93SChris Mason 				   bio_offset,
8104a69a410SChris Mason 				   __btree_submit_bio_start,
8114a69a410SChris Mason 				   __btree_submit_bio_done);
81244b8bd7eSChris Mason }
81344b8bd7eSChris Mason 
8143dd1462eSJan Beulich #ifdef CONFIG_MIGRATION
815784b4e29SChris Mason static int btree_migratepage(struct address_space *mapping,
816784b4e29SChris Mason 			struct page *newpage, struct page *page)
817784b4e29SChris Mason {
818784b4e29SChris Mason 	/*
819784b4e29SChris Mason 	 * we can't safely write a btree page from here,
820784b4e29SChris Mason 	 * we haven't done the locking hook
821784b4e29SChris Mason 	 */
822784b4e29SChris Mason 	if (PageDirty(page))
823784b4e29SChris Mason 		return -EAGAIN;
824784b4e29SChris Mason 	/*
825784b4e29SChris Mason 	 * Buffers may be managed in a filesystem specific way.
826784b4e29SChris Mason 	 * We must have no buffers or drop them.
827784b4e29SChris Mason 	 */
828784b4e29SChris Mason 	if (page_has_private(page) &&
829784b4e29SChris Mason 	    !try_to_release_page(page, GFP_KERNEL))
830784b4e29SChris Mason 		return -EAGAIN;
831784b4e29SChris Mason 	return migrate_page(mapping, newpage, page);
832784b4e29SChris Mason }
8333dd1462eSJan Beulich #endif
834784b4e29SChris Mason 
8355f39d397SChris Mason static int btree_writepage(struct page *page, struct writeback_control *wbc)
8365f39d397SChris Mason {
837d1310b2eSChris Mason 	struct extent_io_tree *tree;
838b9473439SChris Mason 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
839b9473439SChris Mason 	struct extent_buffer *eb;
840b9473439SChris Mason 	int was_dirty;
8415443be45SChris Mason 
842b9473439SChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
843b9473439SChris Mason 	if (!(current->flags & PF_MEMALLOC)) {
844b9473439SChris Mason 		return extent_write_full_page(tree, page,
845b9473439SChris Mason 					      btree_get_extent, wbc);
846b9473439SChris Mason 	}
847b9473439SChris Mason 
8485443be45SChris Mason 	redirty_page_for_writepage(wbc, page);
849784b4e29SChris Mason 	eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
850b9473439SChris Mason 	WARN_ON(!eb);
851b9473439SChris Mason 
852b9473439SChris Mason 	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
853b9473439SChris Mason 	if (!was_dirty) {
854b9473439SChris Mason 		spin_lock(&root->fs_info->delalloc_lock);
855b9473439SChris Mason 		root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
856b9473439SChris Mason 		spin_unlock(&root->fs_info->delalloc_lock);
857b9473439SChris Mason 	}
858b9473439SChris Mason 	free_extent_buffer(eb);
859b9473439SChris Mason 
8605443be45SChris Mason 	unlock_page(page);
8615443be45SChris Mason 	return 0;
8625443be45SChris Mason }
8630da5468fSChris Mason 
8640da5468fSChris Mason static int btree_writepages(struct address_space *mapping,
8650da5468fSChris Mason 			    struct writeback_control *wbc)
8660da5468fSChris Mason {
867d1310b2eSChris Mason 	struct extent_io_tree *tree;
868d1310b2eSChris Mason 	tree = &BTRFS_I(mapping->host)->io_tree;
869d8d5f3e1SChris Mason 	if (wbc->sync_mode == WB_SYNC_NONE) {
870b9473439SChris Mason 		struct btrfs_root *root = BTRFS_I(mapping->host)->root;
871793955bcSChris Mason 		u64 num_dirty;
87224ab9cd8SChris Mason 		unsigned long thresh = 32 * 1024 * 1024;
873448d640bSChris Mason 
874448d640bSChris Mason 		if (wbc->for_kupdate)
875448d640bSChris Mason 			return 0;
876448d640bSChris Mason 
877b9473439SChris Mason 		/* this is a bit racy, but that's ok */
878b9473439SChris Mason 		num_dirty = root->fs_info->dirty_metadata_bytes;
879d397712bSChris Mason 		if (num_dirty < thresh)
880793955bcSChris Mason 			return 0;
881793955bcSChris Mason 	}
8820da5468fSChris Mason 	return extent_writepages(tree, mapping, btree_get_extent, wbc);
8830da5468fSChris Mason }
8840da5468fSChris Mason 
885b2950863SChristoph Hellwig static int btree_readpage(struct file *file, struct page *page)
8865f39d397SChris Mason {
887d1310b2eSChris Mason 	struct extent_io_tree *tree;
888d1310b2eSChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
8895f39d397SChris Mason 	return extent_read_full_page(tree, page, btree_get_extent);
8905f39d397SChris Mason }
8915f39d397SChris Mason 
89270dec807SChris Mason static int btree_releasepage(struct page *page, gfp_t gfp_flags)
8935f39d397SChris Mason {
894d1310b2eSChris Mason 	struct extent_io_tree *tree;
895d1310b2eSChris Mason 	struct extent_map_tree *map;
8965f39d397SChris Mason 	int ret;
8975f39d397SChris Mason 
89898509cfcSChris Mason 	if (PageWriteback(page) || PageDirty(page))
89998509cfcSChris Mason 		return 0;
90098509cfcSChris Mason 
901d1310b2eSChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
902d1310b2eSChris Mason 	map = &BTRFS_I(page->mapping->host)->extent_tree;
9036af118ceSChris Mason 
9047b13b7b1SChris Mason 	ret = try_release_extent_state(map, tree, page, gfp_flags);
905d397712bSChris Mason 	if (!ret)
9066af118ceSChris Mason 		return 0;
9076af118ceSChris Mason 
9086af118ceSChris Mason 	ret = try_release_extent_buffer(tree, page);
9095f39d397SChris Mason 	if (ret == 1) {
9105f39d397SChris Mason 		ClearPagePrivate(page);
9115f39d397SChris Mason 		set_page_private(page, 0);
912d98237b3SChris Mason 		page_cache_release(page);
9135f39d397SChris Mason 	}
9146af118ceSChris Mason 
915d98237b3SChris Mason 	return ret;
916d98237b3SChris Mason }
917d98237b3SChris Mason 
9185f39d397SChris Mason static void btree_invalidatepage(struct page *page, unsigned long offset)
919d98237b3SChris Mason {
920d1310b2eSChris Mason 	struct extent_io_tree *tree;
921d1310b2eSChris Mason 	tree = &BTRFS_I(page->mapping->host)->io_tree;
9225f39d397SChris Mason 	extent_invalidatepage(tree, page, offset);
9235f39d397SChris Mason 	btree_releasepage(page, GFP_NOFS);
9249ad6b7bcSChris Mason 	if (PagePrivate(page)) {
925d397712bSChris Mason 		printk(KERN_WARNING "btrfs warning page private not zero "
926d397712bSChris Mason 		       "on page %llu\n", (unsigned long long)page_offset(page));
9279ad6b7bcSChris Mason 		ClearPagePrivate(page);
9289ad6b7bcSChris Mason 		set_page_private(page, 0);
9299ad6b7bcSChris Mason 		page_cache_release(page);
9309ad6b7bcSChris Mason 	}
931d98237b3SChris Mason }
932d98237b3SChris Mason 
9337f09410bSAlexey Dobriyan static const struct address_space_operations btree_aops = {
934d98237b3SChris Mason 	.readpage	= btree_readpage,
935d98237b3SChris Mason 	.writepage	= btree_writepage,
9360da5468fSChris Mason 	.writepages	= btree_writepages,
9375f39d397SChris Mason 	.releasepage	= btree_releasepage,
9385f39d397SChris Mason 	.invalidatepage = btree_invalidatepage,
939d98237b3SChris Mason 	.sync_page	= block_sync_page,
9405a92bc88SChris Mason #ifdef CONFIG_MIGRATION
941784b4e29SChris Mason 	.migratepage	= btree_migratepage,
9425a92bc88SChris Mason #endif
943d98237b3SChris Mason };
944123abc88SChris Mason 
945ca7a79adSChris Mason int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
946ca7a79adSChris Mason 			 u64 parent_transid)
947090d1875SChris Mason {
9485f39d397SChris Mason 	struct extent_buffer *buf = NULL;
9495f39d397SChris Mason 	struct inode *btree_inode = root->fs_info->btree_inode;
950de428b63SChris Mason 	int ret = 0;
951090d1875SChris Mason 
952db94535dSChris Mason 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
9535f39d397SChris Mason 	if (!buf)
954090d1875SChris Mason 		return 0;
955d1310b2eSChris Mason 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
956f188591eSChris Mason 				 buf, 0, 0, btree_get_extent, 0);
9575f39d397SChris Mason 	free_extent_buffer(buf);
958de428b63SChris Mason 	return ret;
959090d1875SChris Mason }
960090d1875SChris Mason 
9610999df54SChris Mason struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
9620999df54SChris Mason 					    u64 bytenr, u32 blocksize)
9630999df54SChris Mason {
9640999df54SChris Mason 	struct inode *btree_inode = root->fs_info->btree_inode;
9650999df54SChris Mason 	struct extent_buffer *eb;
9660999df54SChris Mason 	eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
9670999df54SChris Mason 				bytenr, blocksize, GFP_NOFS);
9680999df54SChris Mason 	return eb;
9690999df54SChris Mason }
9700999df54SChris Mason 
9710999df54SChris Mason struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
9720999df54SChris Mason 						 u64 bytenr, u32 blocksize)
9730999df54SChris Mason {
9740999df54SChris Mason 	struct inode *btree_inode = root->fs_info->btree_inode;
9750999df54SChris Mason 	struct extent_buffer *eb;
9760999df54SChris Mason 
9770999df54SChris Mason 	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
9780999df54SChris Mason 				 bytenr, blocksize, NULL, GFP_NOFS);
9790999df54SChris Mason 	return eb;
9800999df54SChris Mason }
9810999df54SChris Mason 
9820999df54SChris Mason 
983e02119d5SChris Mason int btrfs_write_tree_block(struct extent_buffer *buf)
984e02119d5SChris Mason {
9858aa38c31SChristoph Hellwig 	return filemap_fdatawrite_range(buf->first_page->mapping, buf->start,
9868aa38c31SChristoph Hellwig 					buf->start + buf->len - 1);
987e02119d5SChris Mason }
988e02119d5SChris Mason 
989e02119d5SChris Mason int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
990e02119d5SChris Mason {
9918aa38c31SChristoph Hellwig 	return filemap_fdatawait_range(buf->first_page->mapping,
9928aa38c31SChristoph Hellwig 				       buf->start, buf->start + buf->len - 1);
993e02119d5SChris Mason }
994e02119d5SChris Mason 
995db94535dSChris Mason struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
996ca7a79adSChris Mason 				      u32 blocksize, u64 parent_transid)
997e20d96d6SChris Mason {
9985f39d397SChris Mason 	struct extent_buffer *buf = NULL;
99919c00ddcSChris Mason 	int ret;
100019c00ddcSChris Mason 
1001db94535dSChris Mason 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
10025f39d397SChris Mason 	if (!buf)
1003d98237b3SChris Mason 		return NULL;
1004e4204dedSChris Mason 
1005ca7a79adSChris Mason 	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
1006ce9adaa5SChris Mason 
1007d397712bSChris Mason 	if (ret == 0)
1008b4ce94deSChris Mason 		set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
10095f39d397SChris Mason 	return buf;
1010ce9adaa5SChris Mason 
1011eb60ceacSChris Mason }
1012eb60ceacSChris Mason 
1013e089f05cSChris Mason int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
10145f39d397SChris Mason 		     struct extent_buffer *buf)
1015ed2ff2cbSChris Mason {
10165f39d397SChris Mason 	struct inode *btree_inode = root->fs_info->btree_inode;
101755c69072SChris Mason 	if (btrfs_header_generation(buf) ==
1018925baeddSChris Mason 	    root->fs_info->running_transaction->transid) {
1019b9447ef8SChris Mason 		btrfs_assert_tree_locked(buf);
1020b4ce94deSChris Mason 
1021b9473439SChris Mason 		if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1022b9473439SChris Mason 			spin_lock(&root->fs_info->delalloc_lock);
1023b9473439SChris Mason 			if (root->fs_info->dirty_metadata_bytes >= buf->len)
1024b9473439SChris Mason 				root->fs_info->dirty_metadata_bytes -= buf->len;
1025b9473439SChris Mason 			else
1026b9473439SChris Mason 				WARN_ON(1);
1027b9473439SChris Mason 			spin_unlock(&root->fs_info->delalloc_lock);
1028b9473439SChris Mason 		}
1029b4ce94deSChris Mason 
1030b9473439SChris Mason 		/* ugh, clear_extent_buffer_dirty needs to lock the page */
1031b9473439SChris Mason 		btrfs_set_lock_blocking(buf);
1032d1310b2eSChris Mason 		clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
103355c69072SChris Mason 					  buf);
1034925baeddSChris Mason 	}
10355f39d397SChris Mason 	return 0;
10365f39d397SChris Mason }
10375f39d397SChris Mason 
1038db94535dSChris Mason static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
103987ee04ebSChris Mason 			u32 stripesize, struct btrfs_root *root,
10409f5fae2fSChris Mason 			struct btrfs_fs_info *fs_info,
1041e20d96d6SChris Mason 			u64 objectid)
1042d97e63b6SChris Mason {
1043cfaa7295SChris Mason 	root->node = NULL;
1044a28ec197SChris Mason 	root->commit_root = NULL;
1045db94535dSChris Mason 	root->sectorsize = sectorsize;
1046db94535dSChris Mason 	root->nodesize = nodesize;
1047db94535dSChris Mason 	root->leafsize = leafsize;
104887ee04ebSChris Mason 	root->stripesize = stripesize;
1049123abc88SChris Mason 	root->ref_cows = 0;
10500b86a832SChris Mason 	root->track_dirty = 0;
1051c71bf099SYan, Zheng 	root->in_radix = 0;
1052d68fc57bSYan, Zheng 	root->orphan_item_inserted = 0;
1053d68fc57bSYan, Zheng 	root->orphan_cleanup_state = 0;
10540b86a832SChris Mason 
10559f5fae2fSChris Mason 	root->fs_info = fs_info;
10560f7d52f4SChris Mason 	root->objectid = objectid;
10570f7d52f4SChris Mason 	root->last_trans = 0;
105813a8a7c8SYan, Zheng 	root->highest_objectid = 0;
105958176a96SJosef Bacik 	root->name = NULL;
10604313b399SChris Mason 	root->in_sysfs = 0;
10616bef4d31SEric Paris 	root->inode_tree = RB_ROOT;
1062f0486c68SYan, Zheng 	root->block_rsv = NULL;
1063d68fc57bSYan, Zheng 	root->orphan_block_rsv = NULL;
10640b86a832SChris Mason 
10650b86a832SChris Mason 	INIT_LIST_HEAD(&root->dirty_list);
10667b128766SJosef Bacik 	INIT_LIST_HEAD(&root->orphan_list);
10675d4f98a2SYan Zheng 	INIT_LIST_HEAD(&root->root_list);
1068925baeddSChris Mason 	spin_lock_init(&root->node_lock);
1069d68fc57bSYan, Zheng 	spin_lock_init(&root->orphan_lock);
10705d4f98a2SYan Zheng 	spin_lock_init(&root->inode_lock);
1071f0486c68SYan, Zheng 	spin_lock_init(&root->accounting_lock);
1072a2135011SChris Mason 	mutex_init(&root->objectid_mutex);
1073e02119d5SChris Mason 	mutex_init(&root->log_mutex);
10747237f183SYan Zheng 	init_waitqueue_head(&root->log_writer_wait);
10757237f183SYan Zheng 	init_waitqueue_head(&root->log_commit_wait[0]);
10767237f183SYan Zheng 	init_waitqueue_head(&root->log_commit_wait[1]);
10777237f183SYan Zheng 	atomic_set(&root->log_commit[0], 0);
10787237f183SYan Zheng 	atomic_set(&root->log_commit[1], 0);
10797237f183SYan Zheng 	atomic_set(&root->log_writers, 0);
10807237f183SYan Zheng 	root->log_batch = 0;
10817237f183SYan Zheng 	root->log_transid = 0;
1082257c62e1SChris Mason 	root->last_log_commit = 0;
1083d0c803c4SChris Mason 	extent_io_tree_init(&root->dirty_log_pages,
1084d0c803c4SChris Mason 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
1085017e5369SChris Mason 
10863768f368SChris Mason 	memset(&root->root_key, 0, sizeof(root->root_key));
10873768f368SChris Mason 	memset(&root->root_item, 0, sizeof(root->root_item));
10886702ed49SChris Mason 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
108958176a96SJosef Bacik 	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
10903f157a2fSChris Mason 	root->defrag_trans_start = fs_info->generation;
109158176a96SJosef Bacik 	init_completion(&root->kobj_unregister);
10926702ed49SChris Mason 	root->defrag_running = 0;
10934d775673SChris Mason 	root->root_key.objectid = objectid;
10943394e160SChris Mason 	root->anon_super.s_root = NULL;
10953394e160SChris Mason 	root->anon_super.s_dev = 0;
10963394e160SChris Mason 	INIT_LIST_HEAD(&root->anon_super.s_list);
10973394e160SChris Mason 	INIT_LIST_HEAD(&root->anon_super.s_instances);
10983394e160SChris Mason 	init_rwsem(&root->anon_super.s_umount);
10993394e160SChris Mason 
11003768f368SChris Mason 	return 0;
11013768f368SChris Mason }
11023768f368SChris Mason 
1103db94535dSChris Mason static int find_and_setup_root(struct btrfs_root *tree_root,
11049f5fae2fSChris Mason 			       struct btrfs_fs_info *fs_info,
11059f5fae2fSChris Mason 			       u64 objectid,
1106e20d96d6SChris Mason 			       struct btrfs_root *root)
11073768f368SChris Mason {
11083768f368SChris Mason 	int ret;
1109db94535dSChris Mason 	u32 blocksize;
111084234f3aSYan Zheng 	u64 generation;
11113768f368SChris Mason 
1112db94535dSChris Mason 	__setup_root(tree_root->nodesize, tree_root->leafsize,
111387ee04ebSChris Mason 		     tree_root->sectorsize, tree_root->stripesize,
111487ee04ebSChris Mason 		     root, fs_info, objectid);
11153768f368SChris Mason 	ret = btrfs_find_last_root(tree_root, objectid,
11163768f368SChris Mason 				   &root->root_item, &root->root_key);
11174df27c4dSYan, Zheng 	if (ret > 0)
11184df27c4dSYan, Zheng 		return -ENOENT;
11193768f368SChris Mason 	BUG_ON(ret);
11203768f368SChris Mason 
112184234f3aSYan Zheng 	generation = btrfs_root_generation(&root->root_item);
1122db94535dSChris Mason 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1123db94535dSChris Mason 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
112484234f3aSYan Zheng 				     blocksize, generation);
112568433b73SChris Mason 	if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
112668433b73SChris Mason 		free_extent_buffer(root->node);
112768433b73SChris Mason 		return -EIO;
112868433b73SChris Mason 	}
11294df27c4dSYan, Zheng 	root->commit_root = btrfs_root_node(root);
1130d97e63b6SChris Mason 	return 0;
1131d97e63b6SChris Mason }
1132d97e63b6SChris Mason 
11337237f183SYan Zheng static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1134e02119d5SChris Mason 					 struct btrfs_fs_info *fs_info)
11350f7d52f4SChris Mason {
11360f7d52f4SChris Mason 	struct btrfs_root *root;
11370f7d52f4SChris Mason 	struct btrfs_root *tree_root = fs_info->tree_root;
11387237f183SYan Zheng 	struct extent_buffer *leaf;
1139e02119d5SChris Mason 
1140e02119d5SChris Mason 	root = kzalloc(sizeof(*root), GFP_NOFS);
1141e02119d5SChris Mason 	if (!root)
11427237f183SYan Zheng 		return ERR_PTR(-ENOMEM);
1143e02119d5SChris Mason 
1144e02119d5SChris Mason 	__setup_root(tree_root->nodesize, tree_root->leafsize,
1145e02119d5SChris Mason 		     tree_root->sectorsize, tree_root->stripesize,
1146e02119d5SChris Mason 		     root, fs_info, BTRFS_TREE_LOG_OBJECTID);
1147e02119d5SChris Mason 
1148e02119d5SChris Mason 	root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
1149e02119d5SChris Mason 	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
1150e02119d5SChris Mason 	root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
11517237f183SYan Zheng 	/*
11527237f183SYan Zheng 	 * log trees do not get reference counted because they go away
11537237f183SYan Zheng 	 * before a real commit is actually done.  They do store pointers
11547237f183SYan Zheng 	 * to file data extents, and those reference counts still get
11557237f183SYan Zheng 	 * updated (along with back refs to the log tree).
11567237f183SYan Zheng 	 */
1157e02119d5SChris Mason 	root->ref_cows = 0;
1158e02119d5SChris Mason 
11595d4f98a2SYan Zheng 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
11605d4f98a2SYan Zheng 				      BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
11617237f183SYan Zheng 	if (IS_ERR(leaf)) {
11627237f183SYan Zheng 		kfree(root);
11637237f183SYan Zheng 		return ERR_CAST(leaf);
11647237f183SYan Zheng 	}
1165e02119d5SChris Mason 
11665d4f98a2SYan Zheng 	memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
11675d4f98a2SYan Zheng 	btrfs_set_header_bytenr(leaf, leaf->start);
11685d4f98a2SYan Zheng 	btrfs_set_header_generation(leaf, trans->transid);
11695d4f98a2SYan Zheng 	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
11705d4f98a2SYan Zheng 	btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
11717237f183SYan Zheng 	root->node = leaf;
1172e02119d5SChris Mason 
1173e02119d5SChris Mason 	write_extent_buffer(root->node, root->fs_info->fsid,
1174e02119d5SChris Mason 			    (unsigned long)btrfs_header_fsid(root->node),
1175e02119d5SChris Mason 			    BTRFS_FSID_SIZE);
1176e02119d5SChris Mason 	btrfs_mark_buffer_dirty(root->node);
1177e02119d5SChris Mason 	btrfs_tree_unlock(root->node);
11787237f183SYan Zheng 	return root;
11797237f183SYan Zheng }
11807237f183SYan Zheng 
11817237f183SYan Zheng int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
11827237f183SYan Zheng 			     struct btrfs_fs_info *fs_info)
11837237f183SYan Zheng {
11847237f183SYan Zheng 	struct btrfs_root *log_root;
11857237f183SYan Zheng 
11867237f183SYan Zheng 	log_root = alloc_log_tree(trans, fs_info);
11877237f183SYan Zheng 	if (IS_ERR(log_root))
11887237f183SYan Zheng 		return PTR_ERR(log_root);
11897237f183SYan Zheng 	WARN_ON(fs_info->log_root_tree);
11907237f183SYan Zheng 	fs_info->log_root_tree = log_root;
11917237f183SYan Zheng 	return 0;
11927237f183SYan Zheng }
11937237f183SYan Zheng 
11947237f183SYan Zheng int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
11957237f183SYan Zheng 		       struct btrfs_root *root)
11967237f183SYan Zheng {
11977237f183SYan Zheng 	struct btrfs_root *log_root;
11987237f183SYan Zheng 	struct btrfs_inode_item *inode_item;
11997237f183SYan Zheng 
12007237f183SYan Zheng 	log_root = alloc_log_tree(trans, root->fs_info);
12017237f183SYan Zheng 	if (IS_ERR(log_root))
12027237f183SYan Zheng 		return PTR_ERR(log_root);
12037237f183SYan Zheng 
12047237f183SYan Zheng 	log_root->last_trans = trans->transid;
12057237f183SYan Zheng 	log_root->root_key.offset = root->root_key.objectid;
12067237f183SYan Zheng 
12077237f183SYan Zheng 	inode_item = &log_root->root_item.inode;
12087237f183SYan Zheng 	inode_item->generation = cpu_to_le64(1);
12097237f183SYan Zheng 	inode_item->size = cpu_to_le64(3);
12107237f183SYan Zheng 	inode_item->nlink = cpu_to_le32(1);
12117237f183SYan Zheng 	inode_item->nbytes = cpu_to_le64(root->leafsize);
12127237f183SYan Zheng 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
12137237f183SYan Zheng 
12145d4f98a2SYan Zheng 	btrfs_set_root_node(&log_root->root_item, log_root->node);
12157237f183SYan Zheng 
12167237f183SYan Zheng 	WARN_ON(root->log_root);
12177237f183SYan Zheng 	root->log_root = log_root;
12187237f183SYan Zheng 	root->log_transid = 0;
1219257c62e1SChris Mason 	root->last_log_commit = 0;
1220e02119d5SChris Mason 	return 0;
1221e02119d5SChris Mason }
1222e02119d5SChris Mason 
1223e02119d5SChris Mason struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1224e02119d5SChris Mason 					       struct btrfs_key *location)
1225e02119d5SChris Mason {
1226e02119d5SChris Mason 	struct btrfs_root *root;
1227e02119d5SChris Mason 	struct btrfs_fs_info *fs_info = tree_root->fs_info;
12280f7d52f4SChris Mason 	struct btrfs_path *path;
12295f39d397SChris Mason 	struct extent_buffer *l;
123084234f3aSYan Zheng 	u64 generation;
1231db94535dSChris Mason 	u32 blocksize;
12320f7d52f4SChris Mason 	int ret = 0;
12330f7d52f4SChris Mason 
12345eda7b5eSChris Mason 	root = kzalloc(sizeof(*root), GFP_NOFS);
12350cf6c620SChris Mason 	if (!root)
12360f7d52f4SChris Mason 		return ERR_PTR(-ENOMEM);
12370f7d52f4SChris Mason 	if (location->offset == (u64)-1) {
1238db94535dSChris Mason 		ret = find_and_setup_root(tree_root, fs_info,
12390f7d52f4SChris Mason 					  location->objectid, root);
12400f7d52f4SChris Mason 		if (ret) {
12410f7d52f4SChris Mason 			kfree(root);
12420f7d52f4SChris Mason 			return ERR_PTR(ret);
12430f7d52f4SChris Mason 		}
124413a8a7c8SYan, Zheng 		goto out;
12450f7d52f4SChris Mason 	}
12460f7d52f4SChris Mason 
1247db94535dSChris Mason 	__setup_root(tree_root->nodesize, tree_root->leafsize,
124887ee04ebSChris Mason 		     tree_root->sectorsize, tree_root->stripesize,
124987ee04ebSChris Mason 		     root, fs_info, location->objectid);
12500f7d52f4SChris Mason 
12510f7d52f4SChris Mason 	path = btrfs_alloc_path();
1252db5b493aSTsutomu Itoh 	if (!path) {
1253db5b493aSTsutomu Itoh 		kfree(root);
1254db5b493aSTsutomu Itoh 		return ERR_PTR(-ENOMEM);
1255db5b493aSTsutomu Itoh 	}
12560f7d52f4SChris Mason 	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
125713a8a7c8SYan, Zheng 	if (ret == 0) {
12585f39d397SChris Mason 		l = path->nodes[0];
12595f39d397SChris Mason 		read_extent_buffer(l, &root->root_item,
12605f39d397SChris Mason 				btrfs_item_ptr_offset(l, path->slots[0]),
12610f7d52f4SChris Mason 				sizeof(root->root_item));
126244b36eb2SYan Zheng 		memcpy(&root->root_key, location, sizeof(*location));
126313a8a7c8SYan, Zheng 	}
12640f7d52f4SChris Mason 	btrfs_free_path(path);
12650f7d52f4SChris Mason 	if (ret) {
12665e540f77STsutomu Itoh 		kfree(root);
126713a8a7c8SYan, Zheng 		if (ret > 0)
126813a8a7c8SYan, Zheng 			ret = -ENOENT;
12690f7d52f4SChris Mason 		return ERR_PTR(ret);
12700f7d52f4SChris Mason 	}
127113a8a7c8SYan, Zheng 
127284234f3aSYan Zheng 	generation = btrfs_root_generation(&root->root_item);
1273db94535dSChris Mason 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1274db94535dSChris Mason 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
127584234f3aSYan Zheng 				     blocksize, generation);
12765d4f98a2SYan Zheng 	root->commit_root = btrfs_root_node(root);
12770f7d52f4SChris Mason 	BUG_ON(!root->node);
127813a8a7c8SYan, Zheng out:
127913a8a7c8SYan, Zheng 	if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
12800f7d52f4SChris Mason 		root->ref_cows = 1;
128113a8a7c8SYan, Zheng 
12825eda7b5eSChris Mason 	return root;
12835eda7b5eSChris Mason }
12845eda7b5eSChris Mason 
1285dc17ff8fSChris Mason struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1286dc17ff8fSChris Mason 					u64 root_objectid)
1287dc17ff8fSChris Mason {
1288dc17ff8fSChris Mason 	struct btrfs_root *root;
1289dc17ff8fSChris Mason 
1290dc17ff8fSChris Mason 	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
1291dc17ff8fSChris Mason 		return fs_info->tree_root;
1292dc17ff8fSChris Mason 	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
1293dc17ff8fSChris Mason 		return fs_info->extent_root;
1294dc17ff8fSChris Mason 
1295dc17ff8fSChris Mason 	root = radix_tree_lookup(&fs_info->fs_roots_radix,
1296dc17ff8fSChris Mason 				 (unsigned long)root_objectid);
1297dc17ff8fSChris Mason 	return root;
1298dc17ff8fSChris Mason }
1299dc17ff8fSChris Mason 
1300edbd8d4eSChris Mason struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1301edbd8d4eSChris Mason 					      struct btrfs_key *location)
13025eda7b5eSChris Mason {
13035eda7b5eSChris Mason 	struct btrfs_root *root;
13045eda7b5eSChris Mason 	int ret;
13055eda7b5eSChris Mason 
1306edbd8d4eSChris Mason 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1307edbd8d4eSChris Mason 		return fs_info->tree_root;
1308edbd8d4eSChris Mason 	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
1309edbd8d4eSChris Mason 		return fs_info->extent_root;
13108f18cf13SChris Mason 	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
13118f18cf13SChris Mason 		return fs_info->chunk_root;
13128f18cf13SChris Mason 	if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
13138f18cf13SChris Mason 		return fs_info->dev_root;
13140403e47eSYan Zheng 	if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
13150403e47eSYan Zheng 		return fs_info->csum_root;
13164df27c4dSYan, Zheng again:
13174df27c4dSYan, Zheng 	spin_lock(&fs_info->fs_roots_radix_lock);
13185eda7b5eSChris Mason 	root = radix_tree_lookup(&fs_info->fs_roots_radix,
13195eda7b5eSChris Mason 				 (unsigned long)location->objectid);
13204df27c4dSYan, Zheng 	spin_unlock(&fs_info->fs_roots_radix_lock);
13215eda7b5eSChris Mason 	if (root)
13225eda7b5eSChris Mason 		return root;
13235eda7b5eSChris Mason 
1324e02119d5SChris Mason 	root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
13255eda7b5eSChris Mason 	if (IS_ERR(root))
13265eda7b5eSChris Mason 		return root;
13273394e160SChris Mason 
13283394e160SChris Mason 	set_anon_super(&root->anon_super, NULL);
13293394e160SChris Mason 
1330d68fc57bSYan, Zheng 	if (btrfs_root_refs(&root->root_item) == 0) {
1331d68fc57bSYan, Zheng 		ret = -ENOENT;
1332d68fc57bSYan, Zheng 		goto fail;
1333d68fc57bSYan, Zheng 	}
1334d68fc57bSYan, Zheng 
1335d68fc57bSYan, Zheng 	ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
1336d68fc57bSYan, Zheng 	if (ret < 0)
1337d68fc57bSYan, Zheng 		goto fail;
1338d68fc57bSYan, Zheng 	if (ret == 0)
1339d68fc57bSYan, Zheng 		root->orphan_item_inserted = 1;
1340d68fc57bSYan, Zheng 
13414df27c4dSYan, Zheng 	ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
13424df27c4dSYan, Zheng 	if (ret)
13434df27c4dSYan, Zheng 		goto fail;
13444df27c4dSYan, Zheng 
13454df27c4dSYan, Zheng 	spin_lock(&fs_info->fs_roots_radix_lock);
13462619ba1fSChris Mason 	ret = radix_tree_insert(&fs_info->fs_roots_radix,
13472619ba1fSChris Mason 				(unsigned long)root->root_key.objectid,
13480f7d52f4SChris Mason 				root);
1349d68fc57bSYan, Zheng 	if (ret == 0)
13504df27c4dSYan, Zheng 		root->in_radix = 1;
1351d68fc57bSYan, Zheng 
13524df27c4dSYan, Zheng 	spin_unlock(&fs_info->fs_roots_radix_lock);
13534df27c4dSYan, Zheng 	radix_tree_preload_end();
13540f7d52f4SChris Mason 	if (ret) {
13554df27c4dSYan, Zheng 		if (ret == -EEXIST) {
13564df27c4dSYan, Zheng 			free_fs_root(root);
13574df27c4dSYan, Zheng 			goto again;
13580f7d52f4SChris Mason 		}
13594df27c4dSYan, Zheng 		goto fail;
13604df27c4dSYan, Zheng 	}
13614df27c4dSYan, Zheng 
1362edbd8d4eSChris Mason 	ret = btrfs_find_dead_roots(fs_info->tree_root,
13635d4f98a2SYan Zheng 				    root->root_key.objectid);
13644df27c4dSYan, Zheng 	WARN_ON(ret);
1365edbd8d4eSChris Mason 	return root;
13664df27c4dSYan, Zheng fail:
13674df27c4dSYan, Zheng 	free_fs_root(root);
13684df27c4dSYan, Zheng 	return ERR_PTR(ret);
1369edbd8d4eSChris Mason }
1370edbd8d4eSChris Mason 
1371edbd8d4eSChris Mason struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
1372edbd8d4eSChris Mason 				      struct btrfs_key *location,
1373edbd8d4eSChris Mason 				      const char *name, int namelen)
1374edbd8d4eSChris Mason {
13754df27c4dSYan, Zheng 	return btrfs_read_fs_root_no_name(fs_info, location);
13764df27c4dSYan, Zheng #if 0
1377edbd8d4eSChris Mason 	struct btrfs_root *root;
1378edbd8d4eSChris Mason 	int ret;
1379edbd8d4eSChris Mason 
1380edbd8d4eSChris Mason 	root = btrfs_read_fs_root_no_name(fs_info, location);
1381edbd8d4eSChris Mason 	if (!root)
1382edbd8d4eSChris Mason 		return NULL;
138358176a96SJosef Bacik 
13844313b399SChris Mason 	if (root->in_sysfs)
13854313b399SChris Mason 		return root;
13864313b399SChris Mason 
138758176a96SJosef Bacik 	ret = btrfs_set_root_name(root, name, namelen);
138858176a96SJosef Bacik 	if (ret) {
13895f39d397SChris Mason 		free_extent_buffer(root->node);
139058176a96SJosef Bacik 		kfree(root);
139158176a96SJosef Bacik 		return ERR_PTR(ret);
139258176a96SJosef Bacik 	}
13934df27c4dSYan, Zheng 
139458176a96SJosef Bacik 	ret = btrfs_sysfs_add_root(root);
139558176a96SJosef Bacik 	if (ret) {
13965f39d397SChris Mason 		free_extent_buffer(root->node);
139758176a96SJosef Bacik 		kfree(root->name);
139858176a96SJosef Bacik 		kfree(root);
139958176a96SJosef Bacik 		return ERR_PTR(ret);
140058176a96SJosef Bacik 	}
14014313b399SChris Mason 	root->in_sysfs = 1;
14020f7d52f4SChris Mason 	return root;
14034df27c4dSYan, Zheng #endif
14040f7d52f4SChris Mason }
140504160088SChris Mason 
140604160088SChris Mason static int btrfs_congested_fn(void *congested_data, int bdi_bits)
140704160088SChris Mason {
140804160088SChris Mason 	struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
140904160088SChris Mason 	int ret = 0;
141004160088SChris Mason 	struct btrfs_device *device;
141104160088SChris Mason 	struct backing_dev_info *bdi;
1412b7967db7SChris Mason 
1413c6e30871SQinghuang Feng 	list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1414dfe25020SChris Mason 		if (!device->bdev)
1415dfe25020SChris Mason 			continue;
141604160088SChris Mason 		bdi = blk_get_backing_dev_info(device->bdev);
141704160088SChris Mason 		if (bdi && bdi_congested(bdi, bdi_bits)) {
141804160088SChris Mason 			ret = 1;
141904160088SChris Mason 			break;
142004160088SChris Mason 		}
142104160088SChris Mason 	}
142204160088SChris Mason 	return ret;
142304160088SChris Mason }
142404160088SChris Mason 
142538b66988SChris Mason /*
142638b66988SChris Mason  * this unplugs every device on the box, and it is only used when page
142738b66988SChris Mason  * is null
142838b66988SChris Mason  */
142938b66988SChris Mason static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
143038b66988SChris Mason {
143138b66988SChris Mason 	struct btrfs_device *device;
143238b66988SChris Mason 	struct btrfs_fs_info *info;
143338b66988SChris Mason 
143438b66988SChris Mason 	info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1435c6e30871SQinghuang Feng 	list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1436d20f7043SChris Mason 		if (!device->bdev)
1437d20f7043SChris Mason 			continue;
1438d20f7043SChris Mason 
143938b66988SChris Mason 		bdi = blk_get_backing_dev_info(device->bdev);
1440d397712bSChris Mason 		if (bdi->unplug_io_fn)
144138b66988SChris Mason 			bdi->unplug_io_fn(bdi, page);
144238b66988SChris Mason 	}
144338b66988SChris Mason }
144438b66988SChris Mason 
1445b2950863SChristoph Hellwig static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
144604160088SChris Mason {
144738b66988SChris Mason 	struct inode *inode;
1448f2d8d74dSChris Mason 	struct extent_map_tree *em_tree;
1449f2d8d74dSChris Mason 	struct extent_map *em;
1450bcbfce8aSChris Mason 	struct address_space *mapping;
145138b66988SChris Mason 	u64 offset;
145238b66988SChris Mason 
1453bcbfce8aSChris Mason 	/* the generic O_DIRECT read code does this */
14549f0ba5bdSChris Mason 	if (1 || !page) {
145538b66988SChris Mason 		__unplug_io_fn(bdi, page);
145638b66988SChris Mason 		return;
145738b66988SChris Mason 	}
145838b66988SChris Mason 
1459bcbfce8aSChris Mason 	/*
1460bcbfce8aSChris Mason 	 * page->mapping may change at any time.  Get a consistent copy
1461bcbfce8aSChris Mason 	 * and use that for everything below
1462bcbfce8aSChris Mason 	 */
1463bcbfce8aSChris Mason 	smp_mb();
1464bcbfce8aSChris Mason 	mapping = page->mapping;
1465bcbfce8aSChris Mason 	if (!mapping)
1466bcbfce8aSChris Mason 		return;
1467bcbfce8aSChris Mason 
1468bcbfce8aSChris Mason 	inode = mapping->host;
1469240d5d48SChris Mason 
1470240d5d48SChris Mason 	/*
1471240d5d48SChris Mason 	 * don't do the expensive searching for a small number of
1472240d5d48SChris Mason 	 * devices
1473240d5d48SChris Mason 	 */
1474240d5d48SChris Mason 	if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
1475240d5d48SChris Mason 		__unplug_io_fn(bdi, page);
1476240d5d48SChris Mason 		return;
1477240d5d48SChris Mason 	}
1478240d5d48SChris Mason 
147938b66988SChris Mason 	offset = page_offset(page);
148004160088SChris Mason 
1481f2d8d74dSChris Mason 	em_tree = &BTRFS_I(inode)->extent_tree;
1482890871beSChris Mason 	read_lock(&em_tree->lock);
1483f2d8d74dSChris Mason 	em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1484890871beSChris Mason 	read_unlock(&em_tree->lock);
148589642229SChris Mason 	if (!em) {
148689642229SChris Mason 		__unplug_io_fn(bdi, page);
1487f2d8d74dSChris Mason 		return;
148889642229SChris Mason 	}
1489f2d8d74dSChris Mason 
149089642229SChris Mason 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
149189642229SChris Mason 		free_extent_map(em);
149289642229SChris Mason 		__unplug_io_fn(bdi, page);
149389642229SChris Mason 		return;
149489642229SChris Mason 	}
1495f2d8d74dSChris Mason 	offset = offset - em->start;
1496f2d8d74dSChris Mason 	btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
1497f2d8d74dSChris Mason 			  em->block_start + offset, page);
1498f2d8d74dSChris Mason 	free_extent_map(em);
149904160088SChris Mason }
150004160088SChris Mason 
1501ad081f14SJens Axboe /*
1502ad081f14SJens Axboe  * If this fails, caller must call bdi_destroy() to get rid of the
1503ad081f14SJens Axboe  * bdi again.
1504ad081f14SJens Axboe  */
150504160088SChris Mason static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
150604160088SChris Mason {
1507ad081f14SJens Axboe 	int err;
1508ad081f14SJens Axboe 
1509ad081f14SJens Axboe 	bdi->capabilities = BDI_CAP_MAP_COPY;
1510e6d086d8SJens Axboe 	err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
1511ad081f14SJens Axboe 	if (err)
1512ad081f14SJens Axboe 		return err;
1513ad081f14SJens Axboe 
15144575c9ccSChris Mason 	bdi->ra_pages	= default_backing_dev_info.ra_pages;
151504160088SChris Mason 	bdi->unplug_io_fn	= btrfs_unplug_io_fn;
151604160088SChris Mason 	bdi->unplug_io_data	= info;
151704160088SChris Mason 	bdi->congested_fn	= btrfs_congested_fn;
151804160088SChris Mason 	bdi->congested_data	= info;
151904160088SChris Mason 	return 0;
152004160088SChris Mason }
152104160088SChris Mason 
1522ce9adaa5SChris Mason static int bio_ready_for_csum(struct bio *bio)
1523ce9adaa5SChris Mason {
1524ce9adaa5SChris Mason 	u64 length = 0;
1525ce9adaa5SChris Mason 	u64 buf_len = 0;
1526ce9adaa5SChris Mason 	u64 start = 0;
1527ce9adaa5SChris Mason 	struct page *page;
1528ce9adaa5SChris Mason 	struct extent_io_tree *io_tree = NULL;
1529ce9adaa5SChris Mason 	struct bio_vec *bvec;
1530ce9adaa5SChris Mason 	int i;
1531ce9adaa5SChris Mason 	int ret;
1532ce9adaa5SChris Mason 
1533ce9adaa5SChris Mason 	bio_for_each_segment(bvec, bio, i) {
1534ce9adaa5SChris Mason 		page = bvec->bv_page;
1535ce9adaa5SChris Mason 		if (page->private == EXTENT_PAGE_PRIVATE) {
1536ce9adaa5SChris Mason 			length += bvec->bv_len;
1537ce9adaa5SChris Mason 			continue;
1538ce9adaa5SChris Mason 		}
1539ce9adaa5SChris Mason 		if (!page->private) {
1540ce9adaa5SChris Mason 			length += bvec->bv_len;
1541ce9adaa5SChris Mason 			continue;
1542ce9adaa5SChris Mason 		}
1543ce9adaa5SChris Mason 		length = bvec->bv_len;
1544ce9adaa5SChris Mason 		buf_len = page->private >> 2;
1545ce9adaa5SChris Mason 		start = page_offset(page) + bvec->bv_offset;
1546ce9adaa5SChris Mason 		io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1547ce9adaa5SChris Mason 	}
1548ce9adaa5SChris Mason 	/* are we fully contained in this bio? */
1549ce9adaa5SChris Mason 	if (buf_len <= length)
1550ce9adaa5SChris Mason 		return 1;
1551ce9adaa5SChris Mason 
1552ce9adaa5SChris Mason 	ret = extent_range_uptodate(io_tree, start + length,
1553ce9adaa5SChris Mason 				    start + buf_len - 1);
1554ce9adaa5SChris Mason 	return ret;
1555ce9adaa5SChris Mason }
1556ce9adaa5SChris Mason 
15578b712842SChris Mason /*
15588b712842SChris Mason  * called by the kthread helper functions to finally call the bio end_io
15598b712842SChris Mason  * functions.  This is where read checksum verification actually happens
15608b712842SChris Mason  */
15618b712842SChris Mason static void end_workqueue_fn(struct btrfs_work *work)
1562ce9adaa5SChris Mason {
1563ce9adaa5SChris Mason 	struct bio *bio;
15648b712842SChris Mason 	struct end_io_wq *end_io_wq;
15658b712842SChris Mason 	struct btrfs_fs_info *fs_info;
1566ce9adaa5SChris Mason 	int error;
1567ce9adaa5SChris Mason 
15688b712842SChris Mason 	end_io_wq = container_of(work, struct end_io_wq, work);
1569ce9adaa5SChris Mason 	bio = end_io_wq->bio;
15708b712842SChris Mason 	fs_info = end_io_wq->info;
15718b712842SChris Mason 
1572cad321adSChris Mason 	/* metadata bio reads are special because the whole tree block must
15738b712842SChris Mason 	 * be checksummed at once.  This makes sure the entire block is in
15748b712842SChris Mason 	 * ram and up to date before trying to verify things.  For
15758b712842SChris Mason 	 * blocksize <= pagesize, it is basically a noop
15768b712842SChris Mason 	 */
15777b6d91daSChristoph Hellwig 	if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1578cad321adSChris Mason 	    !bio_ready_for_csum(bio)) {
1579d20f7043SChris Mason 		btrfs_queue_worker(&fs_info->endio_meta_workers,
15808b712842SChris Mason 				   &end_io_wq->work);
1581ce9adaa5SChris Mason 		return;
1582ce9adaa5SChris Mason 	}
1583ce9adaa5SChris Mason 	error = end_io_wq->error;
1584ce9adaa5SChris Mason 	bio->bi_private = end_io_wq->private;
1585ce9adaa5SChris Mason 	bio->bi_end_io = end_io_wq->end_io;
1586ce9adaa5SChris Mason 	kfree(end_io_wq);
1587ce9adaa5SChris Mason 	bio_endio(bio, error);
1588ce9adaa5SChris Mason }
158944b8bd7eSChris Mason 
1590a74a4b97SChris Mason static int cleaner_kthread(void *arg)
1591a74a4b97SChris Mason {
1592a74a4b97SChris Mason 	struct btrfs_root *root = arg;
1593a74a4b97SChris Mason 
1594a74a4b97SChris Mason 	do {
1595a74a4b97SChris Mason 		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
159676dda93cSYan, Zheng 
159776dda93cSYan, Zheng 		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
159876dda93cSYan, Zheng 		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
159924bbcf04SYan, Zheng 			btrfs_run_delayed_iputs(root);
1600a74a4b97SChris Mason 			btrfs_clean_old_snapshots(root);
1601a74a4b97SChris Mason 			mutex_unlock(&root->fs_info->cleaner_mutex);
160276dda93cSYan, Zheng 		}
1603a74a4b97SChris Mason 
1604a74a4b97SChris Mason 		if (freezing(current)) {
1605a74a4b97SChris Mason 			refrigerator();
1606a74a4b97SChris Mason 		} else {
1607a74a4b97SChris Mason 			set_current_state(TASK_INTERRUPTIBLE);
16088929ecfaSYan, Zheng 			if (!kthread_should_stop())
1609a74a4b97SChris Mason 				schedule();
1610a74a4b97SChris Mason 			__set_current_state(TASK_RUNNING);
1611a74a4b97SChris Mason 		}
1612a74a4b97SChris Mason 	} while (!kthread_should_stop());
1613a74a4b97SChris Mason 	return 0;
1614a74a4b97SChris Mason }
1615a74a4b97SChris Mason 
1616a74a4b97SChris Mason static int transaction_kthread(void *arg)
1617a74a4b97SChris Mason {
1618a74a4b97SChris Mason 	struct btrfs_root *root = arg;
1619a74a4b97SChris Mason 	struct btrfs_trans_handle *trans;
1620a74a4b97SChris Mason 	struct btrfs_transaction *cur;
16218929ecfaSYan, Zheng 	u64 transid;
1622a74a4b97SChris Mason 	unsigned long now;
1623a74a4b97SChris Mason 	unsigned long delay;
1624a74a4b97SChris Mason 	int ret;
1625a74a4b97SChris Mason 
1626a74a4b97SChris Mason 	do {
1627a74a4b97SChris Mason 		delay = HZ * 30;
1628a74a4b97SChris Mason 		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1629a74a4b97SChris Mason 		mutex_lock(&root->fs_info->transaction_kthread_mutex);
1630a74a4b97SChris Mason 
16318929ecfaSYan, Zheng 		spin_lock(&root->fs_info->new_trans_lock);
1632a74a4b97SChris Mason 		cur = root->fs_info->running_transaction;
1633a74a4b97SChris Mason 		if (!cur) {
16348929ecfaSYan, Zheng 			spin_unlock(&root->fs_info->new_trans_lock);
1635a74a4b97SChris Mason 			goto sleep;
1636a74a4b97SChris Mason 		}
163731153d81SYan Zheng 
1638a74a4b97SChris Mason 		now = get_seconds();
16398929ecfaSYan, Zheng 		if (!cur->blocked &&
16408929ecfaSYan, Zheng 		    (now < cur->start_time || now - cur->start_time < 30)) {
16418929ecfaSYan, Zheng 			spin_unlock(&root->fs_info->new_trans_lock);
1642a74a4b97SChris Mason 			delay = HZ * 5;
1643a74a4b97SChris Mason 			goto sleep;
1644a74a4b97SChris Mason 		}
16458929ecfaSYan, Zheng 		transid = cur->transid;
16468929ecfaSYan, Zheng 		spin_unlock(&root->fs_info->new_trans_lock);
164756bec294SChris Mason 
16488929ecfaSYan, Zheng 		trans = btrfs_join_transaction(root, 1);
16493612b495STsutomu Itoh 		BUG_ON(IS_ERR(trans));
16508929ecfaSYan, Zheng 		if (transid == trans->transid) {
16518929ecfaSYan, Zheng 			ret = btrfs_commit_transaction(trans, root);
16528929ecfaSYan, Zheng 			BUG_ON(ret);
16538929ecfaSYan, Zheng 		} else {
16548929ecfaSYan, Zheng 			btrfs_end_transaction(trans, root);
16558929ecfaSYan, Zheng 		}
1656a74a4b97SChris Mason sleep:
1657a74a4b97SChris Mason 		wake_up_process(root->fs_info->cleaner_kthread);
1658a74a4b97SChris Mason 		mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1659a74a4b97SChris Mason 
1660a74a4b97SChris Mason 		if (freezing(current)) {
1661a74a4b97SChris Mason 			refrigerator();
1662a74a4b97SChris Mason 		} else {
1663a74a4b97SChris Mason 			set_current_state(TASK_INTERRUPTIBLE);
16648929ecfaSYan, Zheng 			if (!kthread_should_stop() &&
16658929ecfaSYan, Zheng 			    !btrfs_transaction_blocked(root->fs_info))
1666a74a4b97SChris Mason 				schedule_timeout(delay);
1667a74a4b97SChris Mason 			__set_current_state(TASK_RUNNING);
1668a74a4b97SChris Mason 		}
1669a74a4b97SChris Mason 	} while (!kthread_should_stop());
1670a74a4b97SChris Mason 	return 0;
1671a74a4b97SChris Mason }
1672a74a4b97SChris Mason 
16738a4b83ccSChris Mason struct btrfs_root *open_ctree(struct super_block *sb,
1674dfe25020SChris Mason 			      struct btrfs_fs_devices *fs_devices,
1675dfe25020SChris Mason 			      char *options)
1676eb60ceacSChris Mason {
1677db94535dSChris Mason 	u32 sectorsize;
1678db94535dSChris Mason 	u32 nodesize;
1679db94535dSChris Mason 	u32 leafsize;
1680db94535dSChris Mason 	u32 blocksize;
168187ee04ebSChris Mason 	u32 stripesize;
168284234f3aSYan Zheng 	u64 generation;
1683f2b636e8SJosef Bacik 	u64 features;
16843de4586cSChris Mason 	struct btrfs_key location;
1685a061fc8dSChris Mason 	struct buffer_head *bh;
1686e02119d5SChris Mason 	struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
1687e20d96d6SChris Mason 						 GFP_NOFS);
1688d20f7043SChris Mason 	struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1689d20f7043SChris Mason 						 GFP_NOFS);
1690450ba0eaSJosef Bacik 	struct btrfs_root *tree_root = btrfs_sb(sb);
1691450ba0eaSJosef Bacik 	struct btrfs_fs_info *fs_info = tree_root->fs_info;
1692e02119d5SChris Mason 	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
16930b86a832SChris Mason 						GFP_NOFS);
1694e02119d5SChris Mason 	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
16950b86a832SChris Mason 					      GFP_NOFS);
1696e02119d5SChris Mason 	struct btrfs_root *log_tree_root;
1697e02119d5SChris Mason 
1698eb60ceacSChris Mason 	int ret;
1699e58ca020SYan 	int err = -EINVAL;
17004543df7eSChris Mason 
17012c90e5d6SChris Mason 	struct btrfs_super_block *disk_super;
17028790d502SChris Mason 
17030463bb4eSJim Meyering 	if (!extent_root || !tree_root || !fs_info ||
1704d20f7043SChris Mason 	    !chunk_root || !dev_root || !csum_root) {
170539279cc3SChris Mason 		err = -ENOMEM;
170639279cc3SChris Mason 		goto fail;
170739279cc3SChris Mason 	}
170876dda93cSYan, Zheng 
170976dda93cSYan, Zheng 	ret = init_srcu_struct(&fs_info->subvol_srcu);
171076dda93cSYan, Zheng 	if (ret) {
171176dda93cSYan, Zheng 		err = ret;
171276dda93cSYan, Zheng 		goto fail;
171376dda93cSYan, Zheng 	}
171476dda93cSYan, Zheng 
171576dda93cSYan, Zheng 	ret = setup_bdi(fs_info, &fs_info->bdi);
171676dda93cSYan, Zheng 	if (ret) {
171776dda93cSYan, Zheng 		err = ret;
171876dda93cSYan, Zheng 		goto fail_srcu;
171976dda93cSYan, Zheng 	}
172076dda93cSYan, Zheng 
172176dda93cSYan, Zheng 	fs_info->btree_inode = new_inode(sb);
172276dda93cSYan, Zheng 	if (!fs_info->btree_inode) {
172376dda93cSYan, Zheng 		err = -ENOMEM;
172476dda93cSYan, Zheng 		goto fail_bdi;
172576dda93cSYan, Zheng 	}
172676dda93cSYan, Zheng 
172776dda93cSYan, Zheng 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
17288fd17795SChris Mason 	INIT_LIST_HEAD(&fs_info->trans_list);
1729facda1e7SChris Mason 	INIT_LIST_HEAD(&fs_info->dead_roots);
173024bbcf04SYan, Zheng 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
173119c00ddcSChris Mason 	INIT_LIST_HEAD(&fs_info->hashers);
1732ea8c2819SChris Mason 	INIT_LIST_HEAD(&fs_info->delalloc_inodes);
17335a3f23d5SChris Mason 	INIT_LIST_HEAD(&fs_info->ordered_operations);
173411833d66SYan Zheng 	INIT_LIST_HEAD(&fs_info->caching_block_groups);
17351832a6d5SChris Mason 	spin_lock_init(&fs_info->delalloc_lock);
1736cee36a03SChris Mason 	spin_lock_init(&fs_info->new_trans_lock);
173731153d81SYan Zheng 	spin_lock_init(&fs_info->ref_cache_lock);
173876dda93cSYan, Zheng 	spin_lock_init(&fs_info->fs_roots_radix_lock);
173924bbcf04SYan, Zheng 	spin_lock_init(&fs_info->delayed_iput_lock);
174019c00ddcSChris Mason 
174158176a96SJosef Bacik 	init_completion(&fs_info->kobj_unregister);
17429f5fae2fSChris Mason 	fs_info->tree_root = tree_root;
17439f5fae2fSChris Mason 	fs_info->extent_root = extent_root;
1744d20f7043SChris Mason 	fs_info->csum_root = csum_root;
17450b86a832SChris Mason 	fs_info->chunk_root = chunk_root;
17460b86a832SChris Mason 	fs_info->dev_root = dev_root;
17478a4b83ccSChris Mason 	fs_info->fs_devices = fs_devices;
17480b86a832SChris Mason 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
17496324fbf3SChris Mason 	INIT_LIST_HEAD(&fs_info->space_info);
17500b86a832SChris Mason 	btrfs_mapping_init(&fs_info->mapping_tree);
1751f0486c68SYan, Zheng 	btrfs_init_block_rsv(&fs_info->global_block_rsv);
1752f0486c68SYan, Zheng 	btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
1753f0486c68SYan, Zheng 	btrfs_init_block_rsv(&fs_info->trans_block_rsv);
1754f0486c68SYan, Zheng 	btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
1755f0486c68SYan, Zheng 	btrfs_init_block_rsv(&fs_info->empty_block_rsv);
1756f0486c68SYan, Zheng 	INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
1757f0486c68SYan, Zheng 	mutex_init(&fs_info->durable_block_rsv_mutex);
1758cb03c743SChris Mason 	atomic_set(&fs_info->nr_async_submits, 0);
1759771ed689SChris Mason 	atomic_set(&fs_info->async_delalloc_pages, 0);
17608c8bee1dSChris Mason 	atomic_set(&fs_info->async_submit_draining, 0);
17610986fe9eSChris Mason 	atomic_set(&fs_info->nr_async_bios, 0);
1762e20d96d6SChris Mason 	fs_info->sb = sb;
17636f568d35SChris Mason 	fs_info->max_inline = 8192 * 1024;
17649ed74f2dSJosef Bacik 	fs_info->metadata_ratio = 0;
1765c8b97818SChris Mason 
1766b34b086cSChris Mason 	fs_info->thread_pool_size = min_t(unsigned long,
1767b34b086cSChris Mason 					  num_online_cpus() + 2, 8);
17680afbaf8cSChris Mason 
17693eaa2885SChris Mason 	INIT_LIST_HEAD(&fs_info->ordered_extents);
17703eaa2885SChris Mason 	spin_lock_init(&fs_info->ordered_extent_lock);
17713eaa2885SChris Mason 
1772a061fc8dSChris Mason 	sb->s_blocksize = 4096;
1773a061fc8dSChris Mason 	sb->s_blocksize_bits = blksize_bits(4096);
177432a88aa1SJens Axboe 	sb->s_bdi = &fs_info->bdi;
1775a061fc8dSChris Mason 
177676dda93cSYan, Zheng 	fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
177776dda93cSYan, Zheng 	fs_info->btree_inode->i_nlink = 1;
17780afbaf8cSChris Mason 	/*
17790afbaf8cSChris Mason 	 * we set the i_size on the btree inode to the max possible int.
17800afbaf8cSChris Mason 	 * the real end of the address space is determined by all of
17810afbaf8cSChris Mason 	 * the devices in the system
17820afbaf8cSChris Mason 	 */
17830afbaf8cSChris Mason 	fs_info->btree_inode->i_size = OFFSET_MAX;
1784d98237b3SChris Mason 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
178504160088SChris Mason 	fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
178604160088SChris Mason 
17875d4f98a2SYan Zheng 	RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1788d1310b2eSChris Mason 	extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
17895f39d397SChris Mason 			     fs_info->btree_inode->i_mapping,
17905f39d397SChris Mason 			     GFP_NOFS);
1791d1310b2eSChris Mason 	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
1792d1310b2eSChris Mason 			     GFP_NOFS);
17930da5468fSChris Mason 
1794d1310b2eSChris Mason 	BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
1795d1310b2eSChris Mason 
17960f7d52f4SChris Mason 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
17970f7d52f4SChris Mason 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
17980f7d52f4SChris Mason 	       sizeof(struct btrfs_key));
179976dda93cSYan, Zheng 	BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
180022b0ebdaSChris Mason 	insert_inode_hash(fs_info->btree_inode);
180139279cc3SChris Mason 
1802e02119d5SChris Mason 	spin_lock_init(&fs_info->block_group_cache_lock);
18036bef4d31SEric Paris 	fs_info->block_group_cache_tree = RB_ROOT;
1804925baeddSChris Mason 
180511833d66SYan Zheng 	extent_io_tree_init(&fs_info->freed_extents[0],
18067d9eb12cSChris Mason 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
180711833d66SYan Zheng 	extent_io_tree_init(&fs_info->freed_extents[1],
180811833d66SYan Zheng 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
180911833d66SYan Zheng 	fs_info->pinned_extents = &fs_info->freed_extents[0];
1810e6dcd2dcSChris Mason 	fs_info->do_barriers = 1;
1811f9295749SChris Mason 
1812d98237b3SChris Mason 
1813509659cdSChris Mason 	mutex_init(&fs_info->trans_mutex);
18145a3f23d5SChris Mason 	mutex_init(&fs_info->ordered_operations_mutex);
181530ae8467SChris Mason 	mutex_init(&fs_info->tree_log_mutex);
1816925baeddSChris Mason 	mutex_init(&fs_info->chunk_mutex);
1817925baeddSChris Mason 	mutex_init(&fs_info->transaction_kthread_mutex);
1818a74a4b97SChris Mason 	mutex_init(&fs_info->cleaner_mutex);
1819a74a4b97SChris Mason 	mutex_init(&fs_info->volume_mutex);
1820276e680dSYan Zheng 	init_rwsem(&fs_info->extent_commit_sem);
1821c71bf099SYan, Zheng 	init_rwsem(&fs_info->cleanup_work_sem);
182276dda93cSYan, Zheng 	init_rwsem(&fs_info->subvol_sem);
1823fa9c0d79SChris Mason 
1824fa9c0d79SChris Mason 	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1825fa9c0d79SChris Mason 	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
1826fa9c0d79SChris Mason 
18273768f368SChris Mason 	init_waitqueue_head(&fs_info->transaction_throttle);
182819c00ddcSChris Mason 	init_waitqueue_head(&fs_info->transaction_wait);
1829bb9c12c9SSage Weil 	init_waitqueue_head(&fs_info->transaction_blocked_wait);
18304854ddd0SChris Mason 	init_waitqueue_head(&fs_info->async_submit_wait);
18319a8dd150SChris Mason 
18320b86a832SChris Mason 	__setup_root(4096, 4096, 4096, 4096, tree_root,
18332c90e5d6SChris Mason 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
18347eccb903SChris Mason 
1835a512bbf8SYan Zheng 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
183620b45077SDave Young 	if (!bh) {
183720b45077SDave Young 		err = -EINVAL;
183839279cc3SChris Mason 		goto fail_iput;
183920b45077SDave Young 	}
184039279cc3SChris Mason 
1841a061fc8dSChris Mason 	memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
18422d69a0f8SYan Zheng 	memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
18432d69a0f8SYan Zheng 	       sizeof(fs_info->super_for_commit));
1844a061fc8dSChris Mason 	brelse(bh);
18455f39d397SChris Mason 
1846a061fc8dSChris Mason 	memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE);
18470b86a832SChris Mason 
18485f39d397SChris Mason 	disk_super = &fs_info->super_copy;
18490f7d52f4SChris Mason 	if (!btrfs_super_root(disk_super))
1850c6e2bac1SJosef Bacik 		goto fail_iput;
18510f7d52f4SChris Mason 
1852acce952bSliubo 	/* check FS state, whether FS is broken. */
1853acce952bSliubo 	fs_info->fs_state |= btrfs_super_flags(disk_super);
1854acce952bSliubo 
1855acce952bSliubo 	btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1856acce952bSliubo 
18572b82032cSYan Zheng 	ret = btrfs_parse_options(tree_root, options);
18582b82032cSYan Zheng 	if (ret) {
18592b82032cSYan Zheng 		err = ret;
1860c6e2bac1SJosef Bacik 		goto fail_iput;
18612b82032cSYan Zheng 	}
1862dfe25020SChris Mason 
1863f2b636e8SJosef Bacik 	features = btrfs_super_incompat_flags(disk_super) &
1864f2b636e8SJosef Bacik 		~BTRFS_FEATURE_INCOMPAT_SUPP;
1865f2b636e8SJosef Bacik 	if (features) {
1866f2b636e8SJosef Bacik 		printk(KERN_ERR "BTRFS: couldn't mount because of "
1867f2b636e8SJosef Bacik 		       "unsupported optional features (%Lx).\n",
186821380931SJoel Becker 		       (unsigned long long)features);
1869f2b636e8SJosef Bacik 		err = -EINVAL;
1870c6e2bac1SJosef Bacik 		goto fail_iput;
1871f2b636e8SJosef Bacik 	}
1872f2b636e8SJosef Bacik 
18735d4f98a2SYan Zheng 	features = btrfs_super_incompat_flags(disk_super);
18745d4f98a2SYan Zheng 	features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1875a6fa6faeSLi Zefan 	if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
1876a6fa6faeSLi Zefan 		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
18775d4f98a2SYan Zheng 	btrfs_set_super_incompat_flags(disk_super, features);
18785d4f98a2SYan Zheng 
1879f2b636e8SJosef Bacik 	features = btrfs_super_compat_ro_flags(disk_super) &
1880f2b636e8SJosef Bacik 		~BTRFS_FEATURE_COMPAT_RO_SUPP;
1881f2b636e8SJosef Bacik 	if (!(sb->s_flags & MS_RDONLY) && features) {
1882f2b636e8SJosef Bacik 		printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1883f2b636e8SJosef Bacik 		       "unsupported option features (%Lx).\n",
188421380931SJoel Becker 		       (unsigned long long)features);
1885f2b636e8SJosef Bacik 		err = -EINVAL;
1886c6e2bac1SJosef Bacik 		goto fail_iput;
1887f2b636e8SJosef Bacik 	}
188861d92c32SChris Mason 
188961d92c32SChris Mason 	btrfs_init_workers(&fs_info->generic_worker,
189061d92c32SChris Mason 			   "genwork", 1, NULL);
189161d92c32SChris Mason 
18925443be45SChris Mason 	btrfs_init_workers(&fs_info->workers, "worker",
189361d92c32SChris Mason 			   fs_info->thread_pool_size,
189461d92c32SChris Mason 			   &fs_info->generic_worker);
1895c8b97818SChris Mason 
1896771ed689SChris Mason 	btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
189761d92c32SChris Mason 			   fs_info->thread_pool_size,
189861d92c32SChris Mason 			   &fs_info->generic_worker);
1899771ed689SChris Mason 
19005443be45SChris Mason 	btrfs_init_workers(&fs_info->submit_workers, "submit",
1901b720d209SChris Mason 			   min_t(u64, fs_devices->num_devices,
190261d92c32SChris Mason 			   fs_info->thread_pool_size),
190361d92c32SChris Mason 			   &fs_info->generic_worker);
190461b49440SChris Mason 
190561b49440SChris Mason 	/* a higher idle thresh on the submit workers makes it much more
190661b49440SChris Mason 	 * likely that bios will be send down in a sane order to the
190761b49440SChris Mason 	 * devices
190861b49440SChris Mason 	 */
190961b49440SChris Mason 	fs_info->submit_workers.idle_thresh = 64;
191053863232SChris Mason 
1911771ed689SChris Mason 	fs_info->workers.idle_thresh = 16;
19124a69a410SChris Mason 	fs_info->workers.ordered = 1;
191361b49440SChris Mason 
1914771ed689SChris Mason 	fs_info->delalloc_workers.idle_thresh = 2;
1915771ed689SChris Mason 	fs_info->delalloc_workers.ordered = 1;
1916771ed689SChris Mason 
191761d92c32SChris Mason 	btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
191861d92c32SChris Mason 			   &fs_info->generic_worker);
19195443be45SChris Mason 	btrfs_init_workers(&fs_info->endio_workers, "endio",
192061d92c32SChris Mason 			   fs_info->thread_pool_size,
192161d92c32SChris Mason 			   &fs_info->generic_worker);
1922d20f7043SChris Mason 	btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
192361d92c32SChris Mason 			   fs_info->thread_pool_size,
192461d92c32SChris Mason 			   &fs_info->generic_worker);
1925cad321adSChris Mason 	btrfs_init_workers(&fs_info->endio_meta_write_workers,
192661d92c32SChris Mason 			   "endio-meta-write", fs_info->thread_pool_size,
192761d92c32SChris Mason 			   &fs_info->generic_worker);
19285443be45SChris Mason 	btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
192961d92c32SChris Mason 			   fs_info->thread_pool_size,
193061d92c32SChris Mason 			   &fs_info->generic_worker);
19310cb59c99SJosef Bacik 	btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
19320cb59c99SJosef Bacik 			   1, &fs_info->generic_worker);
193361b49440SChris Mason 
193461b49440SChris Mason 	/*
193561b49440SChris Mason 	 * endios are largely parallel and should have a very
193661b49440SChris Mason 	 * low idle thresh
193761b49440SChris Mason 	 */
193861b49440SChris Mason 	fs_info->endio_workers.idle_thresh = 4;
1939b51912c9SChris Mason 	fs_info->endio_meta_workers.idle_thresh = 4;
1940b51912c9SChris Mason 
19419042846bSChris Mason 	fs_info->endio_write_workers.idle_thresh = 2;
19429042846bSChris Mason 	fs_info->endio_meta_write_workers.idle_thresh = 2;
19439042846bSChris Mason 
19444543df7eSChris Mason 	btrfs_start_workers(&fs_info->workers, 1);
194561d92c32SChris Mason 	btrfs_start_workers(&fs_info->generic_worker, 1);
19461cc127b5SChris Mason 	btrfs_start_workers(&fs_info->submit_workers, 1);
1947771ed689SChris Mason 	btrfs_start_workers(&fs_info->delalloc_workers, 1);
1948247e743cSChris Mason 	btrfs_start_workers(&fs_info->fixup_workers, 1);
19499042846bSChris Mason 	btrfs_start_workers(&fs_info->endio_workers, 1);
19509042846bSChris Mason 	btrfs_start_workers(&fs_info->endio_meta_workers, 1);
19519042846bSChris Mason 	btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
19529042846bSChris Mason 	btrfs_start_workers(&fs_info->endio_write_workers, 1);
19530cb59c99SJosef Bacik 	btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
19544543df7eSChris Mason 
19554575c9ccSChris Mason 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1956c8b97818SChris Mason 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
1957c8b97818SChris Mason 				    4 * 1024 * 1024 / PAGE_CACHE_SIZE);
19584575c9ccSChris Mason 
1959db94535dSChris Mason 	nodesize = btrfs_super_nodesize(disk_super);
1960db94535dSChris Mason 	leafsize = btrfs_super_leafsize(disk_super);
1961db94535dSChris Mason 	sectorsize = btrfs_super_sectorsize(disk_super);
196287ee04ebSChris Mason 	stripesize = btrfs_super_stripesize(disk_super);
1963db94535dSChris Mason 	tree_root->nodesize = nodesize;
1964db94535dSChris Mason 	tree_root->leafsize = leafsize;
1965db94535dSChris Mason 	tree_root->sectorsize = sectorsize;
196687ee04ebSChris Mason 	tree_root->stripesize = stripesize;
1967a061fc8dSChris Mason 
1968a061fc8dSChris Mason 	sb->s_blocksize = sectorsize;
1969a061fc8dSChris Mason 	sb->s_blocksize_bits = blksize_bits(sectorsize);
1970db94535dSChris Mason 
197139279cc3SChris Mason 	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
197239279cc3SChris Mason 		    sizeof(disk_super->magic))) {
1973d397712bSChris Mason 		printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
197439279cc3SChris Mason 		goto fail_sb_buffer;
197539279cc3SChris Mason 	}
197619c00ddcSChris Mason 
1977925baeddSChris Mason 	mutex_lock(&fs_info->chunk_mutex);
1978e4404d6eSYan Zheng 	ret = btrfs_read_sys_array(tree_root);
1979925baeddSChris Mason 	mutex_unlock(&fs_info->chunk_mutex);
198084eed90fSChris Mason 	if (ret) {
1981d397712bSChris Mason 		printk(KERN_WARNING "btrfs: failed to read the system "
1982d397712bSChris Mason 		       "array on %s\n", sb->s_id);
19835d4f98a2SYan Zheng 		goto fail_sb_buffer;
198484eed90fSChris Mason 	}
19850b86a832SChris Mason 
19860b86a832SChris Mason 	blocksize = btrfs_level_size(tree_root,
19870b86a832SChris Mason 				     btrfs_super_chunk_root_level(disk_super));
198884234f3aSYan Zheng 	generation = btrfs_super_chunk_root_generation(disk_super);
19890b86a832SChris Mason 
19900b86a832SChris Mason 	__setup_root(nodesize, leafsize, sectorsize, stripesize,
19910b86a832SChris Mason 		     chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
19920b86a832SChris Mason 
19930b86a832SChris Mason 	chunk_root->node = read_tree_block(chunk_root,
19940b86a832SChris Mason 					   btrfs_super_chunk_root(disk_super),
199584234f3aSYan Zheng 					   blocksize, generation);
19960b86a832SChris Mason 	BUG_ON(!chunk_root->node);
199783121942SDavid Woodhouse 	if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
199883121942SDavid Woodhouse 		printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
199983121942SDavid Woodhouse 		       sb->s_id);
200083121942SDavid Woodhouse 		goto fail_chunk_root;
200183121942SDavid Woodhouse 	}
20025d4f98a2SYan Zheng 	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
20035d4f98a2SYan Zheng 	chunk_root->commit_root = btrfs_root_node(chunk_root);
20040b86a832SChris Mason 
2005e17cade2SChris Mason 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
2006e17cade2SChris Mason 	   (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
2007e17cade2SChris Mason 	   BTRFS_UUID_SIZE);
2008e17cade2SChris Mason 
2009925baeddSChris Mason 	mutex_lock(&fs_info->chunk_mutex);
20100b86a832SChris Mason 	ret = btrfs_read_chunk_tree(chunk_root);
2011925baeddSChris Mason 	mutex_unlock(&fs_info->chunk_mutex);
20122b82032cSYan Zheng 	if (ret) {
2013d397712bSChris Mason 		printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
2014d397712bSChris Mason 		       sb->s_id);
20152b82032cSYan Zheng 		goto fail_chunk_root;
20162b82032cSYan Zheng 	}
20170b86a832SChris Mason 
2018dfe25020SChris Mason 	btrfs_close_extra_devices(fs_devices);
2019dfe25020SChris Mason 
2020db94535dSChris Mason 	blocksize = btrfs_level_size(tree_root,
2021db94535dSChris Mason 				     btrfs_super_root_level(disk_super));
202284234f3aSYan Zheng 	generation = btrfs_super_generation(disk_super);
20230b86a832SChris Mason 
2024e20d96d6SChris Mason 	tree_root->node = read_tree_block(tree_root,
2025db94535dSChris Mason 					  btrfs_super_root(disk_super),
202684234f3aSYan Zheng 					  blocksize, generation);
202739279cc3SChris Mason 	if (!tree_root->node)
20282b82032cSYan Zheng 		goto fail_chunk_root;
202983121942SDavid Woodhouse 	if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
203083121942SDavid Woodhouse 		printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
203183121942SDavid Woodhouse 		       sb->s_id);
203283121942SDavid Woodhouse 		goto fail_tree_root;
203383121942SDavid Woodhouse 	}
20345d4f98a2SYan Zheng 	btrfs_set_root_node(&tree_root->root_item, tree_root->node);
20355d4f98a2SYan Zheng 	tree_root->commit_root = btrfs_root_node(tree_root);
2036db94535dSChris Mason 
2037db94535dSChris Mason 	ret = find_and_setup_root(tree_root, fs_info,
2038e20d96d6SChris Mason 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
20390b86a832SChris Mason 	if (ret)
204039279cc3SChris Mason 		goto fail_tree_root;
20410b86a832SChris Mason 	extent_root->track_dirty = 1;
20420b86a832SChris Mason 
20430b86a832SChris Mason 	ret = find_and_setup_root(tree_root, fs_info,
20440b86a832SChris Mason 				  BTRFS_DEV_TREE_OBJECTID, dev_root);
20450b86a832SChris Mason 	if (ret)
20460b86a832SChris Mason 		goto fail_extent_root;
20475d4f98a2SYan Zheng 	dev_root->track_dirty = 1;
20483768f368SChris Mason 
2049d20f7043SChris Mason 	ret = find_and_setup_root(tree_root, fs_info,
2050d20f7043SChris Mason 				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
2051d20f7043SChris Mason 	if (ret)
20525d4f98a2SYan Zheng 		goto fail_dev_root;
2053d20f7043SChris Mason 
2054d20f7043SChris Mason 	csum_root->track_dirty = 1;
2055d20f7043SChris Mason 
20568929ecfaSYan, Zheng 	fs_info->generation = generation;
20578929ecfaSYan, Zheng 	fs_info->last_trans_committed = generation;
20588929ecfaSYan, Zheng 	fs_info->data_alloc_profile = (u64)-1;
20598929ecfaSYan, Zheng 	fs_info->metadata_alloc_profile = (u64)-1;
20608929ecfaSYan, Zheng 	fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
20618929ecfaSYan, Zheng 
20621b1d1f66SJosef Bacik 	ret = btrfs_read_block_groups(extent_root);
20631b1d1f66SJosef Bacik 	if (ret) {
20641b1d1f66SJosef Bacik 		printk(KERN_ERR "Failed to read block groups: %d\n", ret);
20651b1d1f66SJosef Bacik 		goto fail_block_groups;
20661b1d1f66SJosef Bacik 	}
20679078a3e1SChris Mason 
2068a74a4b97SChris Mason 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2069a74a4b97SChris Mason 					       "btrfs-cleaner");
207057506d50SQinghuang Feng 	if (IS_ERR(fs_info->cleaner_kthread))
20711b1d1f66SJosef Bacik 		goto fail_block_groups;
2072a74a4b97SChris Mason 
2073a74a4b97SChris Mason 	fs_info->transaction_kthread = kthread_run(transaction_kthread,
2074a74a4b97SChris Mason 						   tree_root,
2075a74a4b97SChris Mason 						   "btrfs-transaction");
207657506d50SQinghuang Feng 	if (IS_ERR(fs_info->transaction_kthread))
20773f157a2fSChris Mason 		goto fail_cleaner;
2078a74a4b97SChris Mason 
2079c289811cSChris Mason 	if (!btrfs_test_opt(tree_root, SSD) &&
2080c289811cSChris Mason 	    !btrfs_test_opt(tree_root, NOSSD) &&
2081c289811cSChris Mason 	    !fs_info->fs_devices->rotating) {
2082c289811cSChris Mason 		printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
2083c289811cSChris Mason 		       "mode\n");
2084c289811cSChris Mason 		btrfs_set_opt(fs_info->mount_opt, SSD);
2085c289811cSChris Mason 	}
2086c289811cSChris Mason 
2087acce952bSliubo 	/* do not make disk changes in broken FS */
2088acce952bSliubo 	if (btrfs_super_log_root(disk_super) != 0 &&
2089acce952bSliubo 	    !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
2090e02119d5SChris Mason 		u64 bytenr = btrfs_super_log_root(disk_super);
2091d18a2c44SChris Mason 
20927c2ca468SChris Mason 		if (fs_devices->rw_devices == 0) {
2093d397712bSChris Mason 			printk(KERN_WARNING "Btrfs log replay required "
2094d397712bSChris Mason 			       "on RO media\n");
20957c2ca468SChris Mason 			err = -EIO;
20967c2ca468SChris Mason 			goto fail_trans_kthread;
20977c2ca468SChris Mason 		}
2098e02119d5SChris Mason 		blocksize =
2099e02119d5SChris Mason 		     btrfs_level_size(tree_root,
2100e02119d5SChris Mason 				      btrfs_super_log_root_level(disk_super));
2101e02119d5SChris Mason 
2102676e4c86SDan Carpenter 		log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
2103676e4c86SDan Carpenter 		if (!log_tree_root) {
2104676e4c86SDan Carpenter 			err = -ENOMEM;
2105676e4c86SDan Carpenter 			goto fail_trans_kthread;
2106676e4c86SDan Carpenter 		}
2107e02119d5SChris Mason 
2108e02119d5SChris Mason 		__setup_root(nodesize, leafsize, sectorsize, stripesize,
2109e02119d5SChris Mason 			     log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2110e02119d5SChris Mason 
2111e02119d5SChris Mason 		log_tree_root->node = read_tree_block(tree_root, bytenr,
211284234f3aSYan Zheng 						      blocksize,
211384234f3aSYan Zheng 						      generation + 1);
2114e02119d5SChris Mason 		ret = btrfs_recover_log_trees(log_tree_root);
2115e02119d5SChris Mason 		BUG_ON(ret);
2116e556ce2cSYan Zheng 
2117e556ce2cSYan Zheng 		if (sb->s_flags & MS_RDONLY) {
2118e556ce2cSYan Zheng 			ret =  btrfs_commit_super(tree_root);
2119e556ce2cSYan Zheng 			BUG_ON(ret);
2120e556ce2cSYan Zheng 		}
2121e02119d5SChris Mason 	}
21221a40e23bSZheng Yan 
212376dda93cSYan, Zheng 	ret = btrfs_find_orphan_roots(tree_root);
212476dda93cSYan, Zheng 	BUG_ON(ret);
212576dda93cSYan, Zheng 
21267c2ca468SChris Mason 	if (!(sb->s_flags & MS_RDONLY)) {
2127d68fc57bSYan, Zheng 		ret = btrfs_cleanup_fs_roots(fs_info);
2128d68fc57bSYan, Zheng 		BUG_ON(ret);
2129d68fc57bSYan, Zheng 
21305d4f98a2SYan Zheng 		ret = btrfs_recover_relocation(tree_root);
2131d7ce5843SMiao Xie 		if (ret < 0) {
2132d7ce5843SMiao Xie 			printk(KERN_WARNING
2133d7ce5843SMiao Xie 			       "btrfs: failed to recover relocation\n");
2134d7ce5843SMiao Xie 			err = -EINVAL;
2135d7ce5843SMiao Xie 			goto fail_trans_kthread;
2136d7ce5843SMiao Xie 		}
21377c2ca468SChris Mason 	}
21381a40e23bSZheng Yan 
21393de4586cSChris Mason 	location.objectid = BTRFS_FS_TREE_OBJECTID;
21403de4586cSChris Mason 	location.type = BTRFS_ROOT_ITEM_KEY;
21413de4586cSChris Mason 	location.offset = (u64)-1;
21423de4586cSChris Mason 
21433de4586cSChris Mason 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
21443de4586cSChris Mason 	if (!fs_info->fs_root)
21457c2ca468SChris Mason 		goto fail_trans_kthread;
21463140c9a3SDan Carpenter 	if (IS_ERR(fs_info->fs_root)) {
21473140c9a3SDan Carpenter 		err = PTR_ERR(fs_info->fs_root);
21483140c9a3SDan Carpenter 		goto fail_trans_kthread;
21493140c9a3SDan Carpenter 	}
2150c289811cSChris Mason 
2151e3acc2a6SJosef Bacik 	if (!(sb->s_flags & MS_RDONLY)) {
2152e3acc2a6SJosef Bacik 		down_read(&fs_info->cleanup_work_sem);
215366b4ffd1SJosef Bacik 		err = btrfs_orphan_cleanup(fs_info->fs_root);
215466b4ffd1SJosef Bacik 		if (!err)
215566b4ffd1SJosef Bacik 			err = btrfs_orphan_cleanup(fs_info->tree_root);
2156e3acc2a6SJosef Bacik 		up_read(&fs_info->cleanup_work_sem);
215766b4ffd1SJosef Bacik 		if (err) {
215866b4ffd1SJosef Bacik 			close_ctree(tree_root);
215966b4ffd1SJosef Bacik 			return ERR_PTR(err);
216066b4ffd1SJosef Bacik 		}
2161e3acc2a6SJosef Bacik 	}
2162e3acc2a6SJosef Bacik 
21630f7d52f4SChris Mason 	return tree_root;
216439279cc3SChris Mason 
21657c2ca468SChris Mason fail_trans_kthread:
21667c2ca468SChris Mason 	kthread_stop(fs_info->transaction_kthread);
21673f157a2fSChris Mason fail_cleaner:
2168a74a4b97SChris Mason 	kthread_stop(fs_info->cleaner_kthread);
21697c2ca468SChris Mason 
21707c2ca468SChris Mason 	/*
21717c2ca468SChris Mason 	 * make sure we're done with the btree inode before we stop our
21727c2ca468SChris Mason 	 * kthreads
21737c2ca468SChris Mason 	 */
21747c2ca468SChris Mason 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
21757c2ca468SChris Mason 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
21767c2ca468SChris Mason 
21771b1d1f66SJosef Bacik fail_block_groups:
21781b1d1f66SJosef Bacik 	btrfs_free_block_groups(fs_info);
2179d20f7043SChris Mason 	free_extent_buffer(csum_root->node);
21805d4f98a2SYan Zheng 	free_extent_buffer(csum_root->commit_root);
21815d4f98a2SYan Zheng fail_dev_root:
21825d4f98a2SYan Zheng 	free_extent_buffer(dev_root->node);
21835d4f98a2SYan Zheng 	free_extent_buffer(dev_root->commit_root);
21840b86a832SChris Mason fail_extent_root:
21850b86a832SChris Mason 	free_extent_buffer(extent_root->node);
21865d4f98a2SYan Zheng 	free_extent_buffer(extent_root->commit_root);
218739279cc3SChris Mason fail_tree_root:
21885f39d397SChris Mason 	free_extent_buffer(tree_root->node);
21895d4f98a2SYan Zheng 	free_extent_buffer(tree_root->commit_root);
21902b82032cSYan Zheng fail_chunk_root:
21912b82032cSYan Zheng 	free_extent_buffer(chunk_root->node);
21925d4f98a2SYan Zheng 	free_extent_buffer(chunk_root->commit_root);
219339279cc3SChris Mason fail_sb_buffer:
219461d92c32SChris Mason 	btrfs_stop_workers(&fs_info->generic_worker);
2195247e743cSChris Mason 	btrfs_stop_workers(&fs_info->fixup_workers);
2196771ed689SChris Mason 	btrfs_stop_workers(&fs_info->delalloc_workers);
21978b712842SChris Mason 	btrfs_stop_workers(&fs_info->workers);
21988b712842SChris Mason 	btrfs_stop_workers(&fs_info->endio_workers);
2199d20f7043SChris Mason 	btrfs_stop_workers(&fs_info->endio_meta_workers);
2200cad321adSChris Mason 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2201e6dcd2dcSChris Mason 	btrfs_stop_workers(&fs_info->endio_write_workers);
22020cb59c99SJosef Bacik 	btrfs_stop_workers(&fs_info->endio_freespace_worker);
22031cc127b5SChris Mason 	btrfs_stop_workers(&fs_info->submit_workers);
22044543df7eSChris Mason fail_iput:
22057c2ca468SChris Mason 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
22064543df7eSChris Mason 	iput(fs_info->btree_inode);
22077e662854SQinghuang Feng 
2208dfe25020SChris Mason 	btrfs_close_devices(fs_info->fs_devices);
220984eed90fSChris Mason 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
2210ad081f14SJens Axboe fail_bdi:
22117e662854SQinghuang Feng 	bdi_destroy(&fs_info->bdi);
221276dda93cSYan, Zheng fail_srcu:
221376dda93cSYan, Zheng 	cleanup_srcu_struct(&fs_info->subvol_srcu);
22147e662854SQinghuang Feng fail:
221539279cc3SChris Mason 	kfree(extent_root);
221639279cc3SChris Mason 	kfree(tree_root);
221739279cc3SChris Mason 	kfree(fs_info);
221883afeac4SJim Meyering 	kfree(chunk_root);
221983afeac4SJim Meyering 	kfree(dev_root);
2220d20f7043SChris Mason 	kfree(csum_root);
222139279cc3SChris Mason 	return ERR_PTR(err);
2222eb60ceacSChris Mason }
2223eb60ceacSChris Mason 
2224f2984462SChris Mason static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2225f2984462SChris Mason {
2226f2984462SChris Mason 	char b[BDEVNAME_SIZE];
2227f2984462SChris Mason 
2228f2984462SChris Mason 	if (uptodate) {
2229f2984462SChris Mason 		set_buffer_uptodate(bh);
2230f2984462SChris Mason 	} else {
2231c3b9a62cSChristoph Hellwig 		if (printk_ratelimit()) {
2232f2984462SChris Mason 			printk(KERN_WARNING "lost page write due to "
2233f2984462SChris Mason 					"I/O error on %s\n",
2234f2984462SChris Mason 				       bdevname(bh->b_bdev, b));
2235f2984462SChris Mason 		}
22361259ab75SChris Mason 		/* note, we dont' set_buffer_write_io_error because we have
22371259ab75SChris Mason 		 * our own ways of dealing with the IO errors
22381259ab75SChris Mason 		 */
2239f2984462SChris Mason 		clear_buffer_uptodate(bh);
2240f2984462SChris Mason 	}
2241f2984462SChris Mason 	unlock_buffer(bh);
2242f2984462SChris Mason 	put_bh(bh);
2243f2984462SChris Mason }
2244f2984462SChris Mason 
2245a512bbf8SYan Zheng struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2246a512bbf8SYan Zheng {
2247a512bbf8SYan Zheng 	struct buffer_head *bh;
2248a512bbf8SYan Zheng 	struct buffer_head *latest = NULL;
2249a512bbf8SYan Zheng 	struct btrfs_super_block *super;
2250a512bbf8SYan Zheng 	int i;
2251a512bbf8SYan Zheng 	u64 transid = 0;
2252a512bbf8SYan Zheng 	u64 bytenr;
2253a512bbf8SYan Zheng 
2254a512bbf8SYan Zheng 	/* we would like to check all the supers, but that would make
2255a512bbf8SYan Zheng 	 * a btrfs mount succeed after a mkfs from a different FS.
2256a512bbf8SYan Zheng 	 * So, we need to add a special mount option to scan for
2257a512bbf8SYan Zheng 	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
2258a512bbf8SYan Zheng 	 */
2259a512bbf8SYan Zheng 	for (i = 0; i < 1; i++) {
2260a512bbf8SYan Zheng 		bytenr = btrfs_sb_offset(i);
2261a512bbf8SYan Zheng 		if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
2262a512bbf8SYan Zheng 			break;
2263a512bbf8SYan Zheng 		bh = __bread(bdev, bytenr / 4096, 4096);
2264a512bbf8SYan Zheng 		if (!bh)
2265a512bbf8SYan Zheng 			continue;
2266a512bbf8SYan Zheng 
2267a512bbf8SYan Zheng 		super = (struct btrfs_super_block *)bh->b_data;
2268a512bbf8SYan Zheng 		if (btrfs_super_bytenr(super) != bytenr ||
2269a512bbf8SYan Zheng 		    strncmp((char *)(&super->magic), BTRFS_MAGIC,
2270a512bbf8SYan Zheng 			    sizeof(super->magic))) {
2271a512bbf8SYan Zheng 			brelse(bh);
2272a512bbf8SYan Zheng 			continue;
2273a512bbf8SYan Zheng 		}
2274a512bbf8SYan Zheng 
2275a512bbf8SYan Zheng 		if (!latest || btrfs_super_generation(super) > transid) {
2276a512bbf8SYan Zheng 			brelse(latest);
2277a512bbf8SYan Zheng 			latest = bh;
2278a512bbf8SYan Zheng 			transid = btrfs_super_generation(super);
2279a512bbf8SYan Zheng 		} else {
2280a512bbf8SYan Zheng 			brelse(bh);
2281a512bbf8SYan Zheng 		}
2282a512bbf8SYan Zheng 	}
2283a512bbf8SYan Zheng 	return latest;
2284a512bbf8SYan Zheng }
2285a512bbf8SYan Zheng 
22864eedeb75SHisashi Hifumi /*
22874eedeb75SHisashi Hifumi  * this should be called twice, once with wait == 0 and
22884eedeb75SHisashi Hifumi  * once with wait == 1.  When wait == 0 is done, all the buffer heads
22894eedeb75SHisashi Hifumi  * we write are pinned.
22904eedeb75SHisashi Hifumi  *
22914eedeb75SHisashi Hifumi  * They are released when wait == 1 is done.
22924eedeb75SHisashi Hifumi  * max_mirrors must be the same for both runs, and it indicates how
22934eedeb75SHisashi Hifumi  * many supers on this one device should be written.
22944eedeb75SHisashi Hifumi  *
22954eedeb75SHisashi Hifumi  * max_mirrors == 0 means to write them all.
22964eedeb75SHisashi Hifumi  */
2297a512bbf8SYan Zheng static int write_dev_supers(struct btrfs_device *device,
2298a512bbf8SYan Zheng 			    struct btrfs_super_block *sb,
2299a512bbf8SYan Zheng 			    int do_barriers, int wait, int max_mirrors)
2300a512bbf8SYan Zheng {
2301a512bbf8SYan Zheng 	struct buffer_head *bh;
2302a512bbf8SYan Zheng 	int i;
2303a512bbf8SYan Zheng 	int ret;
2304a512bbf8SYan Zheng 	int errors = 0;
2305a512bbf8SYan Zheng 	u32 crc;
2306a512bbf8SYan Zheng 	u64 bytenr;
2307a512bbf8SYan Zheng 	int last_barrier = 0;
2308a512bbf8SYan Zheng 
2309a512bbf8SYan Zheng 	if (max_mirrors == 0)
2310a512bbf8SYan Zheng 		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
2311a512bbf8SYan Zheng 
2312a512bbf8SYan Zheng 	/* make sure only the last submit_bh does a barrier */
2313a512bbf8SYan Zheng 	if (do_barriers) {
2314a512bbf8SYan Zheng 		for (i = 0; i < max_mirrors; i++) {
2315a512bbf8SYan Zheng 			bytenr = btrfs_sb_offset(i);
2316a512bbf8SYan Zheng 			if (bytenr + BTRFS_SUPER_INFO_SIZE >=
2317a512bbf8SYan Zheng 			    device->total_bytes)
2318a512bbf8SYan Zheng 				break;
2319a512bbf8SYan Zheng 			last_barrier = i;
2320a512bbf8SYan Zheng 		}
2321a512bbf8SYan Zheng 	}
2322a512bbf8SYan Zheng 
2323a512bbf8SYan Zheng 	for (i = 0; i < max_mirrors; i++) {
2324a512bbf8SYan Zheng 		bytenr = btrfs_sb_offset(i);
2325a512bbf8SYan Zheng 		if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
2326a512bbf8SYan Zheng 			break;
2327a512bbf8SYan Zheng 
2328a512bbf8SYan Zheng 		if (wait) {
2329a512bbf8SYan Zheng 			bh = __find_get_block(device->bdev, bytenr / 4096,
2330a512bbf8SYan Zheng 					      BTRFS_SUPER_INFO_SIZE);
2331a512bbf8SYan Zheng 			BUG_ON(!bh);
2332a512bbf8SYan Zheng 			wait_on_buffer(bh);
23334eedeb75SHisashi Hifumi 			if (!buffer_uptodate(bh))
23344eedeb75SHisashi Hifumi 				errors++;
23354eedeb75SHisashi Hifumi 
23364eedeb75SHisashi Hifumi 			/* drop our reference */
23374eedeb75SHisashi Hifumi 			brelse(bh);
23384eedeb75SHisashi Hifumi 
23394eedeb75SHisashi Hifumi 			/* drop the reference from the wait == 0 run */
2340a512bbf8SYan Zheng 			brelse(bh);
2341a512bbf8SYan Zheng 			continue;
2342a512bbf8SYan Zheng 		} else {
2343a512bbf8SYan Zheng 			btrfs_set_super_bytenr(sb, bytenr);
2344a512bbf8SYan Zheng 
2345a512bbf8SYan Zheng 			crc = ~(u32)0;
2346a512bbf8SYan Zheng 			crc = btrfs_csum_data(NULL, (char *)sb +
2347a512bbf8SYan Zheng 					      BTRFS_CSUM_SIZE, crc,
2348a512bbf8SYan Zheng 					      BTRFS_SUPER_INFO_SIZE -
2349a512bbf8SYan Zheng 					      BTRFS_CSUM_SIZE);
2350a512bbf8SYan Zheng 			btrfs_csum_final(crc, sb->csum);
2351a512bbf8SYan Zheng 
23524eedeb75SHisashi Hifumi 			/*
23534eedeb75SHisashi Hifumi 			 * one reference for us, and we leave it for the
23544eedeb75SHisashi Hifumi 			 * caller
23554eedeb75SHisashi Hifumi 			 */
2356a512bbf8SYan Zheng 			bh = __getblk(device->bdev, bytenr / 4096,
2357a512bbf8SYan Zheng 				      BTRFS_SUPER_INFO_SIZE);
2358a512bbf8SYan Zheng 			memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2359a512bbf8SYan Zheng 
23604eedeb75SHisashi Hifumi 			/* one reference for submit_bh */
2361a512bbf8SYan Zheng 			get_bh(bh);
23624eedeb75SHisashi Hifumi 
23634eedeb75SHisashi Hifumi 			set_buffer_uptodate(bh);
2364a512bbf8SYan Zheng 			lock_buffer(bh);
2365a512bbf8SYan Zheng 			bh->b_end_io = btrfs_end_buffer_write_sync;
2366a512bbf8SYan Zheng 		}
2367a512bbf8SYan Zheng 
2368c3b9a62cSChristoph Hellwig 		if (i == last_barrier && do_barriers)
2369c3b9a62cSChristoph Hellwig 			ret = submit_bh(WRITE_FLUSH_FUA, bh);
2370c3b9a62cSChristoph Hellwig 		else
2371ffbd517dSChris Mason 			ret = submit_bh(WRITE_SYNC, bh);
2372a512bbf8SYan Zheng 
23734eedeb75SHisashi Hifumi 		if (ret)
2374a512bbf8SYan Zheng 			errors++;
2375a512bbf8SYan Zheng 	}
2376a512bbf8SYan Zheng 	return errors < i ? 0 : -1;
2377a512bbf8SYan Zheng }
2378a512bbf8SYan Zheng 
2379a512bbf8SYan Zheng int write_all_supers(struct btrfs_root *root, int max_mirrors)
2380f2984462SChris Mason {
2381e5e9a520SChris Mason 	struct list_head *head;
2382f2984462SChris Mason 	struct btrfs_device *dev;
2383a061fc8dSChris Mason 	struct btrfs_super_block *sb;
2384f2984462SChris Mason 	struct btrfs_dev_item *dev_item;
2385f2984462SChris Mason 	int ret;
2386f2984462SChris Mason 	int do_barriers;
2387a236aed1SChris Mason 	int max_errors;
2388a236aed1SChris Mason 	int total_errors = 0;
2389a061fc8dSChris Mason 	u64 flags;
2390f2984462SChris Mason 
2391a236aed1SChris Mason 	max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
2392f2984462SChris Mason 	do_barriers = !btrfs_test_opt(root, NOBARRIER);
2393f2984462SChris Mason 
2394a061fc8dSChris Mason 	sb = &root->fs_info->super_for_commit;
2395a061fc8dSChris Mason 	dev_item = &sb->dev_item;
2396e5e9a520SChris Mason 
2397e5e9a520SChris Mason 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2398e5e9a520SChris Mason 	head = &root->fs_info->fs_devices->devices;
2399c6e30871SQinghuang Feng 	list_for_each_entry(dev, head, dev_list) {
2400dfe25020SChris Mason 		if (!dev->bdev) {
2401dfe25020SChris Mason 			total_errors++;
2402dfe25020SChris Mason 			continue;
2403dfe25020SChris Mason 		}
24042b82032cSYan Zheng 		if (!dev->in_fs_metadata || !dev->writeable)
2405dfe25020SChris Mason 			continue;
2406dfe25020SChris Mason 
24072b82032cSYan Zheng 		btrfs_set_stack_device_generation(dev_item, 0);
2408a061fc8dSChris Mason 		btrfs_set_stack_device_type(dev_item, dev->type);
2409a061fc8dSChris Mason 		btrfs_set_stack_device_id(dev_item, dev->devid);
2410a061fc8dSChris Mason 		btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
2411a061fc8dSChris Mason 		btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
2412a061fc8dSChris Mason 		btrfs_set_stack_device_io_align(dev_item, dev->io_align);
2413a061fc8dSChris Mason 		btrfs_set_stack_device_io_width(dev_item, dev->io_width);
2414a061fc8dSChris Mason 		btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
2415a061fc8dSChris Mason 		memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
24162b82032cSYan Zheng 		memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
2417a512bbf8SYan Zheng 
2418a061fc8dSChris Mason 		flags = btrfs_super_flags(sb);
2419a061fc8dSChris Mason 		btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
2420f2984462SChris Mason 
2421a512bbf8SYan Zheng 		ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
2422a236aed1SChris Mason 		if (ret)
2423a236aed1SChris Mason 			total_errors++;
2424f2984462SChris Mason 	}
2425a236aed1SChris Mason 	if (total_errors > max_errors) {
2426d397712bSChris Mason 		printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2427d397712bSChris Mason 		       total_errors);
2428a236aed1SChris Mason 		BUG();
2429a236aed1SChris Mason 	}
2430f2984462SChris Mason 
2431a512bbf8SYan Zheng 	total_errors = 0;
2432c6e30871SQinghuang Feng 	list_for_each_entry(dev, head, dev_list) {
2433dfe25020SChris Mason 		if (!dev->bdev)
2434dfe25020SChris Mason 			continue;
24352b82032cSYan Zheng 		if (!dev->in_fs_metadata || !dev->writeable)
2436dfe25020SChris Mason 			continue;
2437dfe25020SChris Mason 
2438a512bbf8SYan Zheng 		ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
2439a512bbf8SYan Zheng 		if (ret)
24401259ab75SChris Mason 			total_errors++;
2441f2984462SChris Mason 	}
2442e5e9a520SChris Mason 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2443a236aed1SChris Mason 	if (total_errors > max_errors) {
2444d397712bSChris Mason 		printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2445d397712bSChris Mason 		       total_errors);
2446a236aed1SChris Mason 		BUG();
2447a236aed1SChris Mason 	}
2448f2984462SChris Mason 	return 0;
2449f2984462SChris Mason }
2450f2984462SChris Mason 
2451a512bbf8SYan Zheng int write_ctree_super(struct btrfs_trans_handle *trans,
2452a512bbf8SYan Zheng 		      struct btrfs_root *root, int max_mirrors)
2453cfaa7295SChris Mason {
2454e66f709bSChris Mason 	int ret;
24552c90e5d6SChris Mason 
2456a512bbf8SYan Zheng 	ret = write_all_supers(root, max_mirrors);
24575f39d397SChris Mason 	return ret;
2458cfaa7295SChris Mason }
2459cfaa7295SChris Mason 
24605eda7b5eSChris Mason int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
24612619ba1fSChris Mason {
24624df27c4dSYan, Zheng 	spin_lock(&fs_info->fs_roots_radix_lock);
24632619ba1fSChris Mason 	radix_tree_delete(&fs_info->fs_roots_radix,
24642619ba1fSChris Mason 			  (unsigned long)root->root_key.objectid);
24654df27c4dSYan, Zheng 	spin_unlock(&fs_info->fs_roots_radix_lock);
246676dda93cSYan, Zheng 
246776dda93cSYan, Zheng 	if (btrfs_root_refs(&root->root_item) == 0)
246876dda93cSYan, Zheng 		synchronize_srcu(&fs_info->subvol_srcu);
246976dda93cSYan, Zheng 
24704df27c4dSYan, Zheng 	free_fs_root(root);
24714df27c4dSYan, Zheng 	return 0;
24724df27c4dSYan, Zheng }
24734df27c4dSYan, Zheng 
24744df27c4dSYan, Zheng static void free_fs_root(struct btrfs_root *root)
24754df27c4dSYan, Zheng {
24764df27c4dSYan, Zheng 	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
24773394e160SChris Mason 	if (root->anon_super.s_dev) {
24783394e160SChris Mason 		down_write(&root->anon_super.s_umount);
24793394e160SChris Mason 		kill_anon_super(&root->anon_super);
24803394e160SChris Mason 	}
24815f39d397SChris Mason 	free_extent_buffer(root->node);
24825f39d397SChris Mason 	free_extent_buffer(root->commit_root);
248358176a96SJosef Bacik 	kfree(root->name);
24842619ba1fSChris Mason 	kfree(root);
24852619ba1fSChris Mason }
24862619ba1fSChris Mason 
248735b7e476SChris Mason static int del_fs_roots(struct btrfs_fs_info *fs_info)
24880f7d52f4SChris Mason {
24890f7d52f4SChris Mason 	int ret;
24900f7d52f4SChris Mason 	struct btrfs_root *gang[8];
24910f7d52f4SChris Mason 	int i;
24920f7d52f4SChris Mason 
249376dda93cSYan, Zheng 	while (!list_empty(&fs_info->dead_roots)) {
249476dda93cSYan, Zheng 		gang[0] = list_entry(fs_info->dead_roots.next,
249576dda93cSYan, Zheng 				     struct btrfs_root, root_list);
249676dda93cSYan, Zheng 		list_del(&gang[0]->root_list);
249776dda93cSYan, Zheng 
249876dda93cSYan, Zheng 		if (gang[0]->in_radix) {
249976dda93cSYan, Zheng 			btrfs_free_fs_root(fs_info, gang[0]);
250076dda93cSYan, Zheng 		} else {
250176dda93cSYan, Zheng 			free_extent_buffer(gang[0]->node);
250276dda93cSYan, Zheng 			free_extent_buffer(gang[0]->commit_root);
250376dda93cSYan, Zheng 			kfree(gang[0]);
250476dda93cSYan, Zheng 		}
250576dda93cSYan, Zheng 	}
250676dda93cSYan, Zheng 
25070f7d52f4SChris Mason 	while (1) {
25080f7d52f4SChris Mason 		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
25090f7d52f4SChris Mason 					     (void **)gang, 0,
25100f7d52f4SChris Mason 					     ARRAY_SIZE(gang));
25110f7d52f4SChris Mason 		if (!ret)
25120f7d52f4SChris Mason 			break;
25132619ba1fSChris Mason 		for (i = 0; i < ret; i++)
25145eda7b5eSChris Mason 			btrfs_free_fs_root(fs_info, gang[i]);
25150f7d52f4SChris Mason 	}
25160f7d52f4SChris Mason 	return 0;
25170f7d52f4SChris Mason }
2518b4100d64SChris Mason 
2519c146afadSYan Zheng int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2520c146afadSYan Zheng {
2521c146afadSYan Zheng 	u64 root_objectid = 0;
2522c146afadSYan Zheng 	struct btrfs_root *gang[8];
2523c146afadSYan Zheng 	int i;
2524c146afadSYan Zheng 	int ret;
2525c146afadSYan Zheng 
2526c146afadSYan Zheng 	while (1) {
2527c146afadSYan Zheng 		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2528c146afadSYan Zheng 					     (void **)gang, root_objectid,
2529c146afadSYan Zheng 					     ARRAY_SIZE(gang));
2530c146afadSYan Zheng 		if (!ret)
2531c146afadSYan Zheng 			break;
25325d4f98a2SYan Zheng 
25335d4f98a2SYan Zheng 		root_objectid = gang[ret - 1]->root_key.objectid + 1;
2534c146afadSYan Zheng 		for (i = 0; i < ret; i++) {
253566b4ffd1SJosef Bacik 			int err;
253666b4ffd1SJosef Bacik 
2537c146afadSYan Zheng 			root_objectid = gang[i]->root_key.objectid;
253866b4ffd1SJosef Bacik 			err = btrfs_orphan_cleanup(gang[i]);
253966b4ffd1SJosef Bacik 			if (err)
254066b4ffd1SJosef Bacik 				return err;
2541c146afadSYan Zheng 		}
2542c146afadSYan Zheng 		root_objectid++;
2543c146afadSYan Zheng 	}
2544c146afadSYan Zheng 	return 0;
2545c146afadSYan Zheng }
2546c146afadSYan Zheng 
2547c146afadSYan Zheng int btrfs_commit_super(struct btrfs_root *root)
2548c146afadSYan Zheng {
2549c146afadSYan Zheng 	struct btrfs_trans_handle *trans;
2550c146afadSYan Zheng 	int ret;
2551c146afadSYan Zheng 
2552c146afadSYan Zheng 	mutex_lock(&root->fs_info->cleaner_mutex);
255324bbcf04SYan, Zheng 	btrfs_run_delayed_iputs(root);
2554c146afadSYan Zheng 	btrfs_clean_old_snapshots(root);
2555c146afadSYan Zheng 	mutex_unlock(&root->fs_info->cleaner_mutex);
2556c71bf099SYan, Zheng 
2557c71bf099SYan, Zheng 	/* wait until ongoing cleanup work done */
2558c71bf099SYan, Zheng 	down_write(&root->fs_info->cleanup_work_sem);
2559c71bf099SYan, Zheng 	up_write(&root->fs_info->cleanup_work_sem);
2560c71bf099SYan, Zheng 
2561a22285a6SYan, Zheng 	trans = btrfs_join_transaction(root, 1);
25623612b495STsutomu Itoh 	if (IS_ERR(trans))
25633612b495STsutomu Itoh 		return PTR_ERR(trans);
2564c146afadSYan Zheng 	ret = btrfs_commit_transaction(trans, root);
2565c146afadSYan Zheng 	BUG_ON(ret);
2566c146afadSYan Zheng 	/* run commit again to drop the original snapshot */
2567a22285a6SYan, Zheng 	trans = btrfs_join_transaction(root, 1);
25683612b495STsutomu Itoh 	if (IS_ERR(trans))
25693612b495STsutomu Itoh 		return PTR_ERR(trans);
2570c146afadSYan Zheng 	btrfs_commit_transaction(trans, root);
2571c146afadSYan Zheng 	ret = btrfs_write_and_wait_transaction(NULL, root);
2572c146afadSYan Zheng 	BUG_ON(ret);
2573c146afadSYan Zheng 
2574a512bbf8SYan Zheng 	ret = write_ctree_super(NULL, root, 0);
2575c146afadSYan Zheng 	return ret;
2576c146afadSYan Zheng }
2577c146afadSYan Zheng 
2578e20d96d6SChris Mason int close_ctree(struct btrfs_root *root)
2579eb60ceacSChris Mason {
25800f7d52f4SChris Mason 	struct btrfs_fs_info *fs_info = root->fs_info;
2581c146afadSYan Zheng 	int ret;
2582e089f05cSChris Mason 
2583facda1e7SChris Mason 	fs_info->closing = 1;
2584a2135011SChris Mason 	smp_mb();
2585a2135011SChris Mason 
25860af3d00bSJosef Bacik 	btrfs_put_block_group_cache(fs_info);
2587acce952bSliubo 
2588acce952bSliubo 	/*
2589acce952bSliubo 	 * Here come 2 situations when btrfs is broken to flip readonly:
2590acce952bSliubo 	 *
2591acce952bSliubo 	 * 1. when btrfs flips readonly somewhere else before
2592acce952bSliubo 	 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
2593acce952bSliubo 	 * and btrfs will skip to write sb directly to keep
2594acce952bSliubo 	 * ERROR state on disk.
2595acce952bSliubo 	 *
2596acce952bSliubo 	 * 2. when btrfs flips readonly just in btrfs_commit_super,
2597acce952bSliubo 	 * and in such case, btrfs cannnot write sb via btrfs_commit_super,
2598acce952bSliubo 	 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2599acce952bSliubo 	 * btrfs will cleanup all FS resources first and write sb then.
2600acce952bSliubo 	 */
2601c146afadSYan Zheng 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2602c146afadSYan Zheng 		ret = btrfs_commit_super(root);
2603d397712bSChris Mason 		if (ret)
2604d397712bSChris Mason 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2605c146afadSYan Zheng 	}
2606ed2ff2cbSChris Mason 
2607acce952bSliubo 	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
2608acce952bSliubo 		ret = btrfs_error_commit_super(root);
2609acce952bSliubo 		if (ret)
2610acce952bSliubo 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2611acce952bSliubo 	}
2612acce952bSliubo 
26138929ecfaSYan, Zheng 	kthread_stop(root->fs_info->transaction_kthread);
26148929ecfaSYan, Zheng 	kthread_stop(root->fs_info->cleaner_kthread);
26158929ecfaSYan, Zheng 
2616f25784b3SYan Zheng 	fs_info->closing = 2;
2617f25784b3SYan Zheng 	smp_mb();
2618f25784b3SYan Zheng 
2619b0c68f8bSChris Mason 	if (fs_info->delalloc_bytes) {
2620d397712bSChris Mason 		printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
262121380931SJoel Becker 		       (unsigned long long)fs_info->delalloc_bytes);
2622b0c68f8bSChris Mason 	}
262331153d81SYan Zheng 	if (fs_info->total_ref_cache_size) {
2624d397712bSChris Mason 		printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
2625d397712bSChris Mason 		       (unsigned long long)fs_info->total_ref_cache_size);
262631153d81SYan Zheng 	}
262731153d81SYan Zheng 
26285f39d397SChris Mason 	free_extent_buffer(fs_info->extent_root->node);
26295d4f98a2SYan Zheng 	free_extent_buffer(fs_info->extent_root->commit_root);
26305f39d397SChris Mason 	free_extent_buffer(fs_info->tree_root->node);
26315d4f98a2SYan Zheng 	free_extent_buffer(fs_info->tree_root->commit_root);
26320b86a832SChris Mason 	free_extent_buffer(root->fs_info->chunk_root->node);
26335d4f98a2SYan Zheng 	free_extent_buffer(root->fs_info->chunk_root->commit_root);
26340b86a832SChris Mason 	free_extent_buffer(root->fs_info->dev_root->node);
26355d4f98a2SYan Zheng 	free_extent_buffer(root->fs_info->dev_root->commit_root);
2636d20f7043SChris Mason 	free_extent_buffer(root->fs_info->csum_root->node);
26375d4f98a2SYan Zheng 	free_extent_buffer(root->fs_info->csum_root->commit_root);
2638d20f7043SChris Mason 
26399078a3e1SChris Mason 	btrfs_free_block_groups(root->fs_info);
2640c146afadSYan Zheng 
26410f7d52f4SChris Mason 	del_fs_roots(fs_info);
2642d10c5f31SChris Mason 
2643c146afadSYan Zheng 	iput(fs_info->btree_inode);
26449ad6b7bcSChris Mason 
264561d92c32SChris Mason 	btrfs_stop_workers(&fs_info->generic_worker);
2646247e743cSChris Mason 	btrfs_stop_workers(&fs_info->fixup_workers);
2647771ed689SChris Mason 	btrfs_stop_workers(&fs_info->delalloc_workers);
26488b712842SChris Mason 	btrfs_stop_workers(&fs_info->workers);
26498b712842SChris Mason 	btrfs_stop_workers(&fs_info->endio_workers);
2650d20f7043SChris Mason 	btrfs_stop_workers(&fs_info->endio_meta_workers);
2651cad321adSChris Mason 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2652e6dcd2dcSChris Mason 	btrfs_stop_workers(&fs_info->endio_write_workers);
26530cb59c99SJosef Bacik 	btrfs_stop_workers(&fs_info->endio_freespace_worker);
26541cc127b5SChris Mason 	btrfs_stop_workers(&fs_info->submit_workers);
2655d6bfde87SChris Mason 
2656dfe25020SChris Mason 	btrfs_close_devices(fs_info->fs_devices);
26570b86a832SChris Mason 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
2658b248a415SChris Mason 
265904160088SChris Mason 	bdi_destroy(&fs_info->bdi);
266076dda93cSYan, Zheng 	cleanup_srcu_struct(&fs_info->subvol_srcu);
26610b86a832SChris Mason 
26620f7d52f4SChris Mason 	kfree(fs_info->extent_root);
26630f7d52f4SChris Mason 	kfree(fs_info->tree_root);
26640b86a832SChris Mason 	kfree(fs_info->chunk_root);
26650b86a832SChris Mason 	kfree(fs_info->dev_root);
2666d20f7043SChris Mason 	kfree(fs_info->csum_root);
266783a4d548SLi Zefan 	kfree(fs_info);
266883a4d548SLi Zefan 
2669eb60ceacSChris Mason 	return 0;
2670eb60ceacSChris Mason }
2671eb60ceacSChris Mason 
26721259ab75SChris Mason int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
2673ccd467d6SChris Mason {
26741259ab75SChris Mason 	int ret;
2675810191ffSChris Mason 	struct inode *btree_inode = buf->first_page->mapping->host;
26761259ab75SChris Mason 
26772ac55d41SJosef Bacik 	ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
26782ac55d41SJosef Bacik 				     NULL);
26791259ab75SChris Mason 	if (!ret)
26801259ab75SChris Mason 		return ret;
26811259ab75SChris Mason 
26821259ab75SChris Mason 	ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
26831259ab75SChris Mason 				    parent_transid);
26841259ab75SChris Mason 	return !ret;
26855f39d397SChris Mason }
26866702ed49SChris Mason 
26875f39d397SChris Mason int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
26885f39d397SChris Mason {
2689810191ffSChris Mason 	struct inode *btree_inode = buf->first_page->mapping->host;
2690d1310b2eSChris Mason 	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
26915f39d397SChris Mason 					  buf);
26925f39d397SChris Mason }
26935f39d397SChris Mason 
26945f39d397SChris Mason void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
26955f39d397SChris Mason {
2696810191ffSChris Mason 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
26975f39d397SChris Mason 	u64 transid = btrfs_header_generation(buf);
26985f39d397SChris Mason 	struct inode *btree_inode = root->fs_info->btree_inode;
2699b9473439SChris Mason 	int was_dirty;
2700b4ce94deSChris Mason 
2701b9447ef8SChris Mason 	btrfs_assert_tree_locked(buf);
2702ccd467d6SChris Mason 	if (transid != root->fs_info->generation) {
2703d397712bSChris Mason 		printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2704d397712bSChris Mason 		       "found %llu running %llu\n",
2705db94535dSChris Mason 			(unsigned long long)buf->start,
2706d397712bSChris Mason 			(unsigned long long)transid,
2707d397712bSChris Mason 			(unsigned long long)root->fs_info->generation);
2708ccd467d6SChris Mason 		WARN_ON(1);
2709ccd467d6SChris Mason 	}
2710b9473439SChris Mason 	was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2711b9473439SChris Mason 					    buf);
2712b9473439SChris Mason 	if (!was_dirty) {
2713b9473439SChris Mason 		spin_lock(&root->fs_info->delalloc_lock);
2714b9473439SChris Mason 		root->fs_info->dirty_metadata_bytes += buf->len;
2715b9473439SChris Mason 		spin_unlock(&root->fs_info->delalloc_lock);
2716b9473439SChris Mason 	}
2717eb60ceacSChris Mason }
2718eb60ceacSChris Mason 
2719d3c2fdcfSChris Mason void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
272035b7e476SChris Mason {
2721188de649SChris Mason 	/*
2722188de649SChris Mason 	 * looks as though older kernels can get into trouble with
2723188de649SChris Mason 	 * this code, they end up stuck in balance_dirty_pages forever
2724188de649SChris Mason 	 */
2725d6bfde87SChris Mason 	u64 num_dirty;
2726771ed689SChris Mason 	unsigned long thresh = 32 * 1024 * 1024;
2727d6bfde87SChris Mason 
27286933c02eSJens Axboe 	if (current->flags & PF_MEMALLOC)
2729d6bfde87SChris Mason 		return;
2730d6bfde87SChris Mason 
2731585ad2c3SChris Mason 	num_dirty = root->fs_info->dirty_metadata_bytes;
2732585ad2c3SChris Mason 
2733d6bfde87SChris Mason 	if (num_dirty > thresh) {
2734d3c2fdcfSChris Mason 		balance_dirty_pages_ratelimited_nr(
2735304fced6SChris Mason 				   root->fs_info->btree_inode->i_mapping, 1);
273635b7e476SChris Mason 	}
2737188de649SChris Mason 	return;
2738d6bfde87SChris Mason }
27396b80053dSChris Mason 
2740ca7a79adSChris Mason int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
27416b80053dSChris Mason {
2742810191ffSChris Mason 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2743ce9adaa5SChris Mason 	int ret;
2744ca7a79adSChris Mason 	ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
2745d397712bSChris Mason 	if (ret == 0)
2746b4ce94deSChris Mason 		set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
2747ce9adaa5SChris Mason 	return ret;
27486b80053dSChris Mason }
27490da5468fSChris Mason 
27504bef0848SChris Mason int btree_lock_page_hook(struct page *page)
27514bef0848SChris Mason {
27524bef0848SChris Mason 	struct inode *inode = page->mapping->host;
2753b9473439SChris Mason 	struct btrfs_root *root = BTRFS_I(inode)->root;
27544bef0848SChris Mason 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
27554bef0848SChris Mason 	struct extent_buffer *eb;
27564bef0848SChris Mason 	unsigned long len;
27574bef0848SChris Mason 	u64 bytenr = page_offset(page);
27584bef0848SChris Mason 
27594bef0848SChris Mason 	if (page->private == EXTENT_PAGE_PRIVATE)
27604bef0848SChris Mason 		goto out;
27614bef0848SChris Mason 
27624bef0848SChris Mason 	len = page->private >> 2;
27634bef0848SChris Mason 	eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS);
27644bef0848SChris Mason 	if (!eb)
27654bef0848SChris Mason 		goto out;
27664bef0848SChris Mason 
27674bef0848SChris Mason 	btrfs_tree_lock(eb);
27684bef0848SChris Mason 	btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2769b9473439SChris Mason 
2770b9473439SChris Mason 	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2771b9473439SChris Mason 		spin_lock(&root->fs_info->delalloc_lock);
2772b9473439SChris Mason 		if (root->fs_info->dirty_metadata_bytes >= eb->len)
2773b9473439SChris Mason 			root->fs_info->dirty_metadata_bytes -= eb->len;
2774b9473439SChris Mason 		else
2775b9473439SChris Mason 			WARN_ON(1);
2776b9473439SChris Mason 		spin_unlock(&root->fs_info->delalloc_lock);
2777b9473439SChris Mason 	}
2778b9473439SChris Mason 
27794bef0848SChris Mason 	btrfs_tree_unlock(eb);
27804bef0848SChris Mason 	free_extent_buffer(eb);
27814bef0848SChris Mason out:
27824bef0848SChris Mason 	lock_page(page);
27834bef0848SChris Mason 	return 0;
27844bef0848SChris Mason }
27854bef0848SChris Mason 
2786acce952bSliubo static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
2787acce952bSliubo 			      int read_only)
2788acce952bSliubo {
2789acce952bSliubo 	if (read_only)
2790acce952bSliubo 		return;
2791acce952bSliubo 
2792acce952bSliubo 	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2793acce952bSliubo 		printk(KERN_WARNING "warning: mount fs with errors, "
2794acce952bSliubo 		       "running btrfsck is recommended\n");
2795acce952bSliubo }
2796acce952bSliubo 
2797acce952bSliubo int btrfs_error_commit_super(struct btrfs_root *root)
2798acce952bSliubo {
2799acce952bSliubo 	int ret;
2800acce952bSliubo 
2801acce952bSliubo 	mutex_lock(&root->fs_info->cleaner_mutex);
2802acce952bSliubo 	btrfs_run_delayed_iputs(root);
2803acce952bSliubo 	mutex_unlock(&root->fs_info->cleaner_mutex);
2804acce952bSliubo 
2805acce952bSliubo 	down_write(&root->fs_info->cleanup_work_sem);
2806acce952bSliubo 	up_write(&root->fs_info->cleanup_work_sem);
2807acce952bSliubo 
2808acce952bSliubo 	/* cleanup FS via transaction */
2809acce952bSliubo 	btrfs_cleanup_transaction(root);
2810acce952bSliubo 
2811acce952bSliubo 	ret = write_ctree_super(NULL, root, 0);
2812acce952bSliubo 
2813acce952bSliubo 	return ret;
2814acce952bSliubo }
2815acce952bSliubo 
2816acce952bSliubo static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
2817acce952bSliubo {
2818acce952bSliubo 	struct btrfs_inode *btrfs_inode;
2819acce952bSliubo 	struct list_head splice;
2820acce952bSliubo 
2821acce952bSliubo 	INIT_LIST_HEAD(&splice);
2822acce952bSliubo 
2823acce952bSliubo 	mutex_lock(&root->fs_info->ordered_operations_mutex);
2824acce952bSliubo 	spin_lock(&root->fs_info->ordered_extent_lock);
2825acce952bSliubo 
2826acce952bSliubo 	list_splice_init(&root->fs_info->ordered_operations, &splice);
2827acce952bSliubo 	while (!list_empty(&splice)) {
2828acce952bSliubo 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2829acce952bSliubo 					 ordered_operations);
2830acce952bSliubo 
2831acce952bSliubo 		list_del_init(&btrfs_inode->ordered_operations);
2832acce952bSliubo 
2833acce952bSliubo 		btrfs_invalidate_inodes(btrfs_inode->root);
2834acce952bSliubo 	}
2835acce952bSliubo 
2836acce952bSliubo 	spin_unlock(&root->fs_info->ordered_extent_lock);
2837acce952bSliubo 	mutex_unlock(&root->fs_info->ordered_operations_mutex);
2838acce952bSliubo 
2839acce952bSliubo 	return 0;
2840acce952bSliubo }
2841acce952bSliubo 
2842acce952bSliubo static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
2843acce952bSliubo {
2844acce952bSliubo 	struct list_head splice;
2845acce952bSliubo 	struct btrfs_ordered_extent *ordered;
2846acce952bSliubo 	struct inode *inode;
2847acce952bSliubo 
2848acce952bSliubo 	INIT_LIST_HEAD(&splice);
2849acce952bSliubo 
2850acce952bSliubo 	spin_lock(&root->fs_info->ordered_extent_lock);
2851acce952bSliubo 
2852acce952bSliubo 	list_splice_init(&root->fs_info->ordered_extents, &splice);
2853acce952bSliubo 	while (!list_empty(&splice)) {
2854acce952bSliubo 		ordered = list_entry(splice.next, struct btrfs_ordered_extent,
2855acce952bSliubo 				     root_extent_list);
2856acce952bSliubo 
2857acce952bSliubo 		list_del_init(&ordered->root_extent_list);
2858acce952bSliubo 		atomic_inc(&ordered->refs);
2859acce952bSliubo 
2860acce952bSliubo 		/* the inode may be getting freed (in sys_unlink path). */
2861acce952bSliubo 		inode = igrab(ordered->inode);
2862acce952bSliubo 
2863acce952bSliubo 		spin_unlock(&root->fs_info->ordered_extent_lock);
2864acce952bSliubo 		if (inode)
2865acce952bSliubo 			iput(inode);
2866acce952bSliubo 
2867acce952bSliubo 		atomic_set(&ordered->refs, 1);
2868acce952bSliubo 		btrfs_put_ordered_extent(ordered);
2869acce952bSliubo 
2870acce952bSliubo 		spin_lock(&root->fs_info->ordered_extent_lock);
2871acce952bSliubo 	}
2872acce952bSliubo 
2873acce952bSliubo 	spin_unlock(&root->fs_info->ordered_extent_lock);
2874acce952bSliubo 
2875acce952bSliubo 	return 0;
2876acce952bSliubo }
2877acce952bSliubo 
2878acce952bSliubo static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2879acce952bSliubo 				      struct btrfs_root *root)
2880acce952bSliubo {
2881acce952bSliubo 	struct rb_node *node;
2882acce952bSliubo 	struct btrfs_delayed_ref_root *delayed_refs;
2883acce952bSliubo 	struct btrfs_delayed_ref_node *ref;
2884acce952bSliubo 	int ret = 0;
2885acce952bSliubo 
2886acce952bSliubo 	delayed_refs = &trans->delayed_refs;
2887acce952bSliubo 
2888acce952bSliubo 	spin_lock(&delayed_refs->lock);
2889acce952bSliubo 	if (delayed_refs->num_entries == 0) {
2890acce952bSliubo 		printk(KERN_INFO "delayed_refs has NO entry\n");
2891acce952bSliubo 		return ret;
2892acce952bSliubo 	}
2893acce952bSliubo 
2894acce952bSliubo 	node = rb_first(&delayed_refs->root);
2895acce952bSliubo 	while (node) {
2896acce952bSliubo 		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2897acce952bSliubo 		node = rb_next(node);
2898acce952bSliubo 
2899acce952bSliubo 		ref->in_tree = 0;
2900acce952bSliubo 		rb_erase(&ref->rb_node, &delayed_refs->root);
2901acce952bSliubo 		delayed_refs->num_entries--;
2902acce952bSliubo 
2903acce952bSliubo 		atomic_set(&ref->refs, 1);
2904acce952bSliubo 		if (btrfs_delayed_ref_is_head(ref)) {
2905acce952bSliubo 			struct btrfs_delayed_ref_head *head;
2906acce952bSliubo 
2907acce952bSliubo 			head = btrfs_delayed_node_to_head(ref);
2908acce952bSliubo 			mutex_lock(&head->mutex);
2909acce952bSliubo 			kfree(head->extent_op);
2910acce952bSliubo 			delayed_refs->num_heads--;
2911acce952bSliubo 			if (list_empty(&head->cluster))
2912acce952bSliubo 				delayed_refs->num_heads_ready--;
2913acce952bSliubo 			list_del_init(&head->cluster);
2914acce952bSliubo 			mutex_unlock(&head->mutex);
2915acce952bSliubo 		}
2916acce952bSliubo 
2917acce952bSliubo 		spin_unlock(&delayed_refs->lock);
2918acce952bSliubo 		btrfs_put_delayed_ref(ref);
2919acce952bSliubo 
2920acce952bSliubo 		cond_resched();
2921acce952bSliubo 		spin_lock(&delayed_refs->lock);
2922acce952bSliubo 	}
2923acce952bSliubo 
2924acce952bSliubo 	spin_unlock(&delayed_refs->lock);
2925acce952bSliubo 
2926acce952bSliubo 	return ret;
2927acce952bSliubo }
2928acce952bSliubo 
2929acce952bSliubo static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
2930acce952bSliubo {
2931acce952bSliubo 	struct btrfs_pending_snapshot *snapshot;
2932acce952bSliubo 	struct list_head splice;
2933acce952bSliubo 
2934acce952bSliubo 	INIT_LIST_HEAD(&splice);
2935acce952bSliubo 
2936acce952bSliubo 	list_splice_init(&t->pending_snapshots, &splice);
2937acce952bSliubo 
2938acce952bSliubo 	while (!list_empty(&splice)) {
2939acce952bSliubo 		snapshot = list_entry(splice.next,
2940acce952bSliubo 				      struct btrfs_pending_snapshot,
2941acce952bSliubo 				      list);
2942acce952bSliubo 
2943acce952bSliubo 		list_del_init(&snapshot->list);
2944acce952bSliubo 
2945acce952bSliubo 		kfree(snapshot);
2946acce952bSliubo 	}
2947acce952bSliubo 
2948acce952bSliubo 	return 0;
2949acce952bSliubo }
2950acce952bSliubo 
2951acce952bSliubo static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2952acce952bSliubo {
2953acce952bSliubo 	struct btrfs_inode *btrfs_inode;
2954acce952bSliubo 	struct list_head splice;
2955acce952bSliubo 
2956acce952bSliubo 	INIT_LIST_HEAD(&splice);
2957acce952bSliubo 
2958acce952bSliubo 	list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2959acce952bSliubo 
2960acce952bSliubo 	spin_lock(&root->fs_info->delalloc_lock);
2961acce952bSliubo 
2962acce952bSliubo 	while (!list_empty(&splice)) {
2963acce952bSliubo 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
2964acce952bSliubo 				    delalloc_inodes);
2965acce952bSliubo 
2966acce952bSliubo 		list_del_init(&btrfs_inode->delalloc_inodes);
2967acce952bSliubo 
2968acce952bSliubo 		btrfs_invalidate_inodes(btrfs_inode->root);
2969acce952bSliubo 	}
2970acce952bSliubo 
2971acce952bSliubo 	spin_unlock(&root->fs_info->delalloc_lock);
2972acce952bSliubo 
2973acce952bSliubo 	return 0;
2974acce952bSliubo }
2975acce952bSliubo 
2976acce952bSliubo static int btrfs_destroy_marked_extents(struct btrfs_root *root,
2977acce952bSliubo 					struct extent_io_tree *dirty_pages,
2978acce952bSliubo 					int mark)
2979acce952bSliubo {
2980acce952bSliubo 	int ret;
2981acce952bSliubo 	struct page *page;
2982acce952bSliubo 	struct inode *btree_inode = root->fs_info->btree_inode;
2983acce952bSliubo 	struct extent_buffer *eb;
2984acce952bSliubo 	u64 start = 0;
2985acce952bSliubo 	u64 end;
2986acce952bSliubo 	u64 offset;
2987acce952bSliubo 	unsigned long index;
2988acce952bSliubo 
2989acce952bSliubo 	while (1) {
2990acce952bSliubo 		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
2991acce952bSliubo 					    mark);
2992acce952bSliubo 		if (ret)
2993acce952bSliubo 			break;
2994acce952bSliubo 
2995acce952bSliubo 		clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
2996acce952bSliubo 		while (start <= end) {
2997acce952bSliubo 			index = start >> PAGE_CACHE_SHIFT;
2998acce952bSliubo 			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
2999acce952bSliubo 			page = find_get_page(btree_inode->i_mapping, index);
3000acce952bSliubo 			if (!page)
3001acce952bSliubo 				continue;
3002acce952bSliubo 			offset = page_offset(page);
3003acce952bSliubo 
3004acce952bSliubo 			spin_lock(&dirty_pages->buffer_lock);
3005acce952bSliubo 			eb = radix_tree_lookup(
3006acce952bSliubo 			     &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3007acce952bSliubo 					       offset >> PAGE_CACHE_SHIFT);
3008acce952bSliubo 			spin_unlock(&dirty_pages->buffer_lock);
3009acce952bSliubo 			if (eb) {
3010acce952bSliubo 				ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3011acce952bSliubo 							 &eb->bflags);
3012acce952bSliubo 				atomic_set(&eb->refs, 1);
3013acce952bSliubo 			}
3014acce952bSliubo 			if (PageWriteback(page))
3015acce952bSliubo 				end_page_writeback(page);
3016acce952bSliubo 
3017acce952bSliubo 			lock_page(page);
3018acce952bSliubo 			if (PageDirty(page)) {
3019acce952bSliubo 				clear_page_dirty_for_io(page);
3020acce952bSliubo 				spin_lock_irq(&page->mapping->tree_lock);
3021acce952bSliubo 				radix_tree_tag_clear(&page->mapping->page_tree,
3022acce952bSliubo 							page_index(page),
3023acce952bSliubo 							PAGECACHE_TAG_DIRTY);
3024acce952bSliubo 				spin_unlock_irq(&page->mapping->tree_lock);
3025acce952bSliubo 			}
3026acce952bSliubo 
3027acce952bSliubo 			page->mapping->a_ops->invalidatepage(page, 0);
3028acce952bSliubo 			unlock_page(page);
3029acce952bSliubo 		}
3030acce952bSliubo 	}
3031acce952bSliubo 
3032acce952bSliubo 	return ret;
3033acce952bSliubo }
3034acce952bSliubo 
3035acce952bSliubo static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3036acce952bSliubo 				       struct extent_io_tree *pinned_extents)
3037acce952bSliubo {
3038acce952bSliubo 	struct extent_io_tree *unpin;
3039acce952bSliubo 	u64 start;
3040acce952bSliubo 	u64 end;
3041acce952bSliubo 	int ret;
3042acce952bSliubo 
3043acce952bSliubo 	unpin = pinned_extents;
3044acce952bSliubo 	while (1) {
3045acce952bSliubo 		ret = find_first_extent_bit(unpin, 0, &start, &end,
3046acce952bSliubo 					    EXTENT_DIRTY);
3047acce952bSliubo 		if (ret)
3048acce952bSliubo 			break;
3049acce952bSliubo 
3050acce952bSliubo 		/* opt_discard */
3051acce952bSliubo 		ret = btrfs_error_discard_extent(root, start, end + 1 - start);
3052acce952bSliubo 
3053acce952bSliubo 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
3054acce952bSliubo 		btrfs_error_unpin_extent_range(root, start, end);
3055acce952bSliubo 		cond_resched();
3056acce952bSliubo 	}
3057acce952bSliubo 
3058acce952bSliubo 	return 0;
3059acce952bSliubo }
3060acce952bSliubo 
3061acce952bSliubo static int btrfs_cleanup_transaction(struct btrfs_root *root)
3062acce952bSliubo {
3063acce952bSliubo 	struct btrfs_transaction *t;
3064acce952bSliubo 	LIST_HEAD(list);
3065acce952bSliubo 
3066acce952bSliubo 	WARN_ON(1);
3067acce952bSliubo 
3068acce952bSliubo 	mutex_lock(&root->fs_info->trans_mutex);
3069acce952bSliubo 	mutex_lock(&root->fs_info->transaction_kthread_mutex);
3070acce952bSliubo 
3071acce952bSliubo 	list_splice_init(&root->fs_info->trans_list, &list);
3072acce952bSliubo 	while (!list_empty(&list)) {
3073acce952bSliubo 		t = list_entry(list.next, struct btrfs_transaction, list);
3074acce952bSliubo 		if (!t)
3075acce952bSliubo 			break;
3076acce952bSliubo 
3077acce952bSliubo 		btrfs_destroy_ordered_operations(root);
3078acce952bSliubo 
3079acce952bSliubo 		btrfs_destroy_ordered_extents(root);
3080acce952bSliubo 
3081acce952bSliubo 		btrfs_destroy_delayed_refs(t, root);
3082acce952bSliubo 
3083acce952bSliubo 		btrfs_block_rsv_release(root,
3084acce952bSliubo 					&root->fs_info->trans_block_rsv,
3085acce952bSliubo 					t->dirty_pages.dirty_bytes);
3086acce952bSliubo 
3087acce952bSliubo 		/* FIXME: cleanup wait for commit */
3088acce952bSliubo 		t->in_commit = 1;
3089acce952bSliubo 		t->blocked = 1;
3090acce952bSliubo 		if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3091acce952bSliubo 			wake_up(&root->fs_info->transaction_blocked_wait);
3092acce952bSliubo 
3093acce952bSliubo 		t->blocked = 0;
3094acce952bSliubo 		if (waitqueue_active(&root->fs_info->transaction_wait))
3095acce952bSliubo 			wake_up(&root->fs_info->transaction_wait);
3096acce952bSliubo 		mutex_unlock(&root->fs_info->trans_mutex);
3097acce952bSliubo 
3098acce952bSliubo 		mutex_lock(&root->fs_info->trans_mutex);
3099acce952bSliubo 		t->commit_done = 1;
3100acce952bSliubo 		if (waitqueue_active(&t->commit_wait))
3101acce952bSliubo 			wake_up(&t->commit_wait);
3102acce952bSliubo 		mutex_unlock(&root->fs_info->trans_mutex);
3103acce952bSliubo 
3104acce952bSliubo 		mutex_lock(&root->fs_info->trans_mutex);
3105acce952bSliubo 
3106acce952bSliubo 		btrfs_destroy_pending_snapshots(t);
3107acce952bSliubo 
3108acce952bSliubo 		btrfs_destroy_delalloc_inodes(root);
3109acce952bSliubo 
3110acce952bSliubo 		spin_lock(&root->fs_info->new_trans_lock);
3111acce952bSliubo 		root->fs_info->running_transaction = NULL;
3112acce952bSliubo 		spin_unlock(&root->fs_info->new_trans_lock);
3113acce952bSliubo 
3114acce952bSliubo 		btrfs_destroy_marked_extents(root, &t->dirty_pages,
3115acce952bSliubo 					     EXTENT_DIRTY);
3116acce952bSliubo 
3117acce952bSliubo 		btrfs_destroy_pinned_extent(root,
3118acce952bSliubo 					    root->fs_info->pinned_extents);
3119acce952bSliubo 
3120acce952bSliubo 		t->use_count = 0;
3121acce952bSliubo 		list_del_init(&t->list);
3122acce952bSliubo 		memset(t, 0, sizeof(*t));
3123acce952bSliubo 		kmem_cache_free(btrfs_transaction_cachep, t);
3124acce952bSliubo 	}
3125acce952bSliubo 
3126acce952bSliubo 	mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3127acce952bSliubo 	mutex_unlock(&root->fs_info->trans_mutex);
3128acce952bSliubo 
3129acce952bSliubo 	return 0;
3130acce952bSliubo }
3131acce952bSliubo 
3132d1310b2eSChris Mason static struct extent_io_ops btree_extent_io_ops = {
31334bef0848SChris Mason 	.write_cache_pages_lock_hook = btree_lock_page_hook,
3134ce9adaa5SChris Mason 	.readpage_end_io_hook = btree_readpage_end_io_hook,
31350b86a832SChris Mason 	.submit_bio_hook = btree_submit_bio_hook,
3136239b14b3SChris Mason 	/* note we're sharing with inode.c for the merge bio hook */
3137239b14b3SChris Mason 	.merge_bio_hook = btrfs_merge_bio_hook,
31380da5468fSChris Mason };
3139