xref: /openbmc/linux/fs/btrfs/disk-io.c (revision 21ad10cf3e9c1ef42e725e5c3a593c49f779a16b)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/fs.h>
20 #include <linux/blkdev.h>
21 #include <linux/crc32c.h>
22 #include <linux/scatterlist.h>
23 #include <linux/swap.h>
24 #include <linux/radix-tree.h>
25 #include <linux/writeback.h>
26 #include <linux/buffer_head.h> // for block_sync_page
27 #include "ctree.h"
28 #include "disk-io.h"
29 #include "transaction.h"
30 #include "btrfs_inode.h"
31 #include "print-tree.h"
32 
33 #if 0
34 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
35 {
36 	if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
37 		printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
38 		       (unsigned long long)extent_buffer_blocknr(buf),
39 		       (unsigned long long)btrfs_header_blocknr(buf));
40 		return 1;
41 	}
42 	return 0;
43 }
44 #endif
45 
46 static struct extent_map_ops btree_extent_map_ops;
47 
48 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
49 					    u64 bytenr, u32 blocksize)
50 {
51 	struct inode *btree_inode = root->fs_info->btree_inode;
52 	struct extent_buffer *eb;
53 	eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
54 				bytenr, blocksize, GFP_NOFS);
55 	return eb;
56 }
57 
58 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
59 						 u64 bytenr, u32 blocksize)
60 {
61 	struct inode *btree_inode = root->fs_info->btree_inode;
62 	struct extent_buffer *eb;
63 
64 	eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
65 				 bytenr, blocksize, NULL, GFP_NOFS);
66 	return eb;
67 }
68 
69 struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
70 				    size_t page_offset, u64 start, u64 end,
71 				    int create)
72 {
73 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
74 	struct extent_map *em;
75 	int ret;
76 
77 again:
78 	em = lookup_extent_mapping(em_tree, start, end);
79 	if (em) {
80 		goto out;
81 	}
82 	em = alloc_extent_map(GFP_NOFS);
83 	if (!em) {
84 		em = ERR_PTR(-ENOMEM);
85 		goto out;
86 	}
87 	em->start = 0;
88 	em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
89 	em->block_start = 0;
90 	em->block_end = em->end;
91 	em->bdev = inode->i_sb->s_bdev;
92 	ret = add_extent_mapping(em_tree, em);
93 	if (ret == -EEXIST) {
94 		free_extent_map(em);
95 		em = NULL;
96 		goto again;
97 	} else if (ret) {
98 		em = ERR_PTR(ret);
99 	}
100 out:
101 	return em;
102 }
103 
104 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
105 {
106 	return crc32c(seed, data, len);
107 }
108 
109 void btrfs_csum_final(u32 crc, char *result)
110 {
111 	*(__le32 *)result = ~cpu_to_le32(crc);
112 }
113 
114 static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
115 			   int verify)
116 {
117 	char result[BTRFS_CRC32_SIZE];
118 	unsigned long len;
119 	unsigned long cur_len;
120 	unsigned long offset = BTRFS_CSUM_SIZE;
121 	char *map_token = NULL;
122 	char *kaddr;
123 	unsigned long map_start;
124 	unsigned long map_len;
125 	int err;
126 	u32 crc = ~(u32)0;
127 
128 	len = buf->len - offset;
129 	while(len > 0) {
130 		err = map_private_extent_buffer(buf, offset, 32,
131 					&map_token, &kaddr,
132 					&map_start, &map_len, KM_USER0);
133 		if (err) {
134 			printk("failed to map extent buffer! %lu\n",
135 			       offset);
136 			return 1;
137 		}
138 		cur_len = min(len, map_len - (offset - map_start));
139 		crc = btrfs_csum_data(root, kaddr + offset - map_start,
140 				      crc, cur_len);
141 		len -= cur_len;
142 		offset += cur_len;
143 		unmap_extent_buffer(buf, map_token, KM_USER0);
144 	}
145 	btrfs_csum_final(crc, result);
146 
147 	if (verify) {
148 		int from_this_trans = 0;
149 
150 		if (root->fs_info->running_transaction &&
151 		    btrfs_header_generation(buf) ==
152 		    root->fs_info->running_transaction->transid)
153 			from_this_trans = 1;
154 
155 		/* FIXME, this is not good */
156 		if (from_this_trans == 0 &&
157 		    memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
158 			u32 val;
159 			u32 found = 0;
160 			memcpy(&found, result, BTRFS_CRC32_SIZE);
161 
162 			read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
163 			printk("btrfs: %s checksum verify failed on %llu "
164 			       "wanted %X found %X from_this_trans %d\n",
165 			       root->fs_info->sb->s_id,
166 			       buf->start, val, found, from_this_trans);
167 			return 1;
168 		}
169 	} else {
170 		write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
171 	}
172 	return 0;
173 }
174 
175 
176 int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
177 {
178 	struct extent_map_tree *tree;
179 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
180 	u64 found_start;
181 	int found_level;
182 	unsigned long len;
183 	struct extent_buffer *eb;
184 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
185 
186 	if (page->private == EXTENT_PAGE_PRIVATE)
187 		goto out;
188 	if (!page->private)
189 		goto out;
190 	len = page->private >> 2;
191 	if (len == 0) {
192 		WARN_ON(1);
193 	}
194 	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
195 	read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1);
196 	found_start = btrfs_header_bytenr(eb);
197 	if (found_start != start) {
198 		printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
199 		       start, found_start, len);
200 	}
201 	found_level = btrfs_header_level(eb);
202 	csum_tree_block(root, eb, 0);
203 	free_extent_buffer(eb);
204 out:
205 	return 0;
206 }
207 
208 static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
209 {
210 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
211 
212 	csum_dirty_buffer(root, page);
213 	return 0;
214 }
215 
216 static int btree_writepage(struct page *page, struct writeback_control *wbc)
217 {
218 	struct extent_map_tree *tree;
219 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
220 	return extent_write_full_page(tree, page, btree_get_extent, wbc);
221 }
222 
223 static int btree_writepages(struct address_space *mapping,
224 			    struct writeback_control *wbc)
225 {
226 	struct extent_map_tree *tree;
227 	tree = &BTRFS_I(mapping->host)->extent_tree;
228 	if (wbc->sync_mode == WB_SYNC_NONE) {
229 		u64 num_dirty;
230 		u64 start = 0;
231 		unsigned long thresh = 96 * 1024 * 1024;
232 
233 		if (wbc->for_kupdate)
234 			return 0;
235 
236 		if (current_is_pdflush()) {
237 			thresh = 96 * 1024 * 1024;
238 		} else {
239 			thresh = 8 * 1024 * 1024;
240 		}
241 		num_dirty = count_range_bits(tree, &start, (u64)-1,
242 					     thresh, EXTENT_DIRTY);
243 		if (num_dirty < thresh) {
244 			return 0;
245 		}
246 	}
247 	return extent_writepages(tree, mapping, btree_get_extent, wbc);
248 }
249 
250 int btree_readpage(struct file *file, struct page *page)
251 {
252 	struct extent_map_tree *tree;
253 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
254 	return extent_read_full_page(tree, page, btree_get_extent);
255 }
256 
257 static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
258 {
259 	struct extent_map_tree *tree;
260 	int ret;
261 
262 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
263 	ret = try_release_extent_mapping(tree, page);
264 	if (ret == 1) {
265 		ClearPagePrivate(page);
266 		set_page_private(page, 0);
267 		page_cache_release(page);
268 	}
269 	return ret;
270 }
271 
272 static void btree_invalidatepage(struct page *page, unsigned long offset)
273 {
274 	struct extent_map_tree *tree;
275 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
276 	extent_invalidatepage(tree, page, offset);
277 	btree_releasepage(page, GFP_NOFS);
278 }
279 
280 #if 0
281 static int btree_writepage(struct page *page, struct writeback_control *wbc)
282 {
283 	struct buffer_head *bh;
284 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
285 	struct buffer_head *head;
286 	if (!page_has_buffers(page)) {
287 		create_empty_buffers(page, root->fs_info->sb->s_blocksize,
288 					(1 << BH_Dirty)|(1 << BH_Uptodate));
289 	}
290 	head = page_buffers(page);
291 	bh = head;
292 	do {
293 		if (buffer_dirty(bh))
294 			csum_tree_block(root, bh, 0);
295 		bh = bh->b_this_page;
296 	} while (bh != head);
297 	return block_write_full_page(page, btree_get_block, wbc);
298 }
299 #endif
300 
301 static struct address_space_operations btree_aops = {
302 	.readpage	= btree_readpage,
303 	.writepage	= btree_writepage,
304 	.writepages	= btree_writepages,
305 	.releasepage	= btree_releasepage,
306 	.invalidatepage = btree_invalidatepage,
307 	.sync_page	= block_sync_page,
308 };
309 
310 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
311 {
312 	struct extent_buffer *buf = NULL;
313 	struct inode *btree_inode = root->fs_info->btree_inode;
314 	int ret = 0;
315 
316 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
317 	if (!buf)
318 		return 0;
319 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
320 				 buf, 0, 0);
321 	free_extent_buffer(buf);
322 	return ret;
323 }
324 
325 struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
326 				      u32 blocksize)
327 {
328 	struct extent_buffer *buf = NULL;
329 	struct inode *btree_inode = root->fs_info->btree_inode;
330 	struct extent_map_tree *extent_tree;
331 	u64 end;
332 	int ret;
333 
334 	extent_tree = &BTRFS_I(btree_inode)->extent_tree;
335 
336 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
337 	if (!buf)
338 		return NULL;
339 	read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
340 				 buf, 0, 1);
341 
342 	if (buf->flags & EXTENT_CSUM)
343 		return buf;
344 
345 	end = buf->start + PAGE_CACHE_SIZE - 1;
346 	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
347 		buf->flags |= EXTENT_CSUM;
348 		return buf;
349 	}
350 
351 	lock_extent(extent_tree, buf->start, end, GFP_NOFS);
352 
353 	if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
354 		buf->flags |= EXTENT_CSUM;
355 		goto out_unlock;
356 	}
357 
358 	ret = csum_tree_block(root, buf, 1);
359 	set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
360 	buf->flags |= EXTENT_CSUM;
361 
362 out_unlock:
363 	unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
364 	return buf;
365 }
366 
367 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
368 		     struct extent_buffer *buf)
369 {
370 	struct inode *btree_inode = root->fs_info->btree_inode;
371 	clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
372 	return 0;
373 }
374 
375 int wait_on_tree_block_writeback(struct btrfs_root *root,
376 				 struct extent_buffer *buf)
377 {
378 	struct inode *btree_inode = root->fs_info->btree_inode;
379 	wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
380 					buf);
381 	return 0;
382 }
383 
384 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
385 			u32 stripesize, struct btrfs_root *root,
386 			struct btrfs_fs_info *fs_info,
387 			u64 objectid)
388 {
389 	root->node = NULL;
390 	root->inode = NULL;
391 	root->commit_root = NULL;
392 	root->sectorsize = sectorsize;
393 	root->nodesize = nodesize;
394 	root->leafsize = leafsize;
395 	root->stripesize = stripesize;
396 	root->ref_cows = 0;
397 	root->fs_info = fs_info;
398 	root->objectid = objectid;
399 	root->last_trans = 0;
400 	root->highest_inode = 0;
401 	root->last_inode_alloc = 0;
402 	root->name = NULL;
403 	root->in_sysfs = 0;
404 	memset(&root->root_key, 0, sizeof(root->root_key));
405 	memset(&root->root_item, 0, sizeof(root->root_item));
406 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
407 	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
408 	init_completion(&root->kobj_unregister);
409 	root->defrag_running = 0;
410 	root->defrag_level = 0;
411 	root->root_key.objectid = objectid;
412 	return 0;
413 }
414 
415 static int find_and_setup_root(struct btrfs_root *tree_root,
416 			       struct btrfs_fs_info *fs_info,
417 			       u64 objectid,
418 			       struct btrfs_root *root)
419 {
420 	int ret;
421 	u32 blocksize;
422 
423 	__setup_root(tree_root->nodesize, tree_root->leafsize,
424 		     tree_root->sectorsize, tree_root->stripesize,
425 		     root, fs_info, objectid);
426 	ret = btrfs_find_last_root(tree_root, objectid,
427 				   &root->root_item, &root->root_key);
428 	BUG_ON(ret);
429 
430 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
431 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
432 				     blocksize);
433 	BUG_ON(!root->node);
434 	return 0;
435 }
436 
437 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
438 					       struct btrfs_key *location)
439 {
440 	struct btrfs_root *root;
441 	struct btrfs_root *tree_root = fs_info->tree_root;
442 	struct btrfs_path *path;
443 	struct extent_buffer *l;
444 	u64 highest_inode;
445 	u32 blocksize;
446 	int ret = 0;
447 
448 	root = kzalloc(sizeof(*root), GFP_NOFS);
449 	if (!root)
450 		return ERR_PTR(-ENOMEM);
451 	if (location->offset == (u64)-1) {
452 		ret = find_and_setup_root(tree_root, fs_info,
453 					  location->objectid, root);
454 		if (ret) {
455 			kfree(root);
456 			return ERR_PTR(ret);
457 		}
458 		goto insert;
459 	}
460 
461 	__setup_root(tree_root->nodesize, tree_root->leafsize,
462 		     tree_root->sectorsize, tree_root->stripesize,
463 		     root, fs_info, location->objectid);
464 
465 	path = btrfs_alloc_path();
466 	BUG_ON(!path);
467 	ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
468 	if (ret != 0) {
469 		if (ret > 0)
470 			ret = -ENOENT;
471 		goto out;
472 	}
473 	l = path->nodes[0];
474 	read_extent_buffer(l, &root->root_item,
475 	       btrfs_item_ptr_offset(l, path->slots[0]),
476 	       sizeof(root->root_item));
477 	memcpy(&root->root_key, location, sizeof(*location));
478 	ret = 0;
479 out:
480 	btrfs_release_path(root, path);
481 	btrfs_free_path(path);
482 	if (ret) {
483 		kfree(root);
484 		return ERR_PTR(ret);
485 	}
486 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
487 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
488 				     blocksize);
489 	BUG_ON(!root->node);
490 insert:
491 	root->ref_cows = 1;
492 	ret = btrfs_find_highest_inode(root, &highest_inode);
493 	if (ret == 0) {
494 		root->highest_inode = highest_inode;
495 		root->last_inode_alloc = highest_inode;
496 	}
497 	return root;
498 }
499 
500 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
501 					u64 root_objectid)
502 {
503 	struct btrfs_root *root;
504 
505 	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
506 		return fs_info->tree_root;
507 	if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
508 		return fs_info->extent_root;
509 
510 	root = radix_tree_lookup(&fs_info->fs_roots_radix,
511 				 (unsigned long)root_objectid);
512 	return root;
513 }
514 
515 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
516 					      struct btrfs_key *location)
517 {
518 	struct btrfs_root *root;
519 	int ret;
520 
521 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
522 		return fs_info->tree_root;
523 	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
524 		return fs_info->extent_root;
525 
526 	root = radix_tree_lookup(&fs_info->fs_roots_radix,
527 				 (unsigned long)location->objectid);
528 	if (root)
529 		return root;
530 
531 	root = btrfs_read_fs_root_no_radix(fs_info, location);
532 	if (IS_ERR(root))
533 		return root;
534 	ret = radix_tree_insert(&fs_info->fs_roots_radix,
535 				(unsigned long)root->root_key.objectid,
536 				root);
537 	if (ret) {
538 		free_extent_buffer(root->node);
539 		kfree(root);
540 		return ERR_PTR(ret);
541 	}
542 	ret = btrfs_find_dead_roots(fs_info->tree_root,
543 				    root->root_key.objectid, root);
544 	BUG_ON(ret);
545 
546 	return root;
547 }
548 
549 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
550 				      struct btrfs_key *location,
551 				      const char *name, int namelen)
552 {
553 	struct btrfs_root *root;
554 	int ret;
555 
556 	root = btrfs_read_fs_root_no_name(fs_info, location);
557 	if (!root)
558 		return NULL;
559 
560 	if (root->in_sysfs)
561 		return root;
562 
563 	ret = btrfs_set_root_name(root, name, namelen);
564 	if (ret) {
565 		free_extent_buffer(root->node);
566 		kfree(root);
567 		return ERR_PTR(ret);
568 	}
569 
570 	ret = btrfs_sysfs_add_root(root);
571 	if (ret) {
572 		free_extent_buffer(root->node);
573 		kfree(root->name);
574 		kfree(root);
575 		return ERR_PTR(ret);
576 	}
577 	root->in_sysfs = 1;
578 	return root;
579 }
580 #if 0
581 static int add_hasher(struct btrfs_fs_info *info, char *type) {
582 	struct btrfs_hasher *hasher;
583 
584 	hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
585 	if (!hasher)
586 		return -ENOMEM;
587 	hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
588 	if (!hasher->hash_tfm) {
589 		kfree(hasher);
590 		return -EINVAL;
591 	}
592 	spin_lock(&info->hash_lock);
593 	list_add(&hasher->list, &info->hashers);
594 	spin_unlock(&info->hash_lock);
595 	return 0;
596 }
597 #endif
598 struct btrfs_root *open_ctree(struct super_block *sb)
599 {
600 	u32 sectorsize;
601 	u32 nodesize;
602 	u32 leafsize;
603 	u32 blocksize;
604 	u32 stripesize;
605 	struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
606 						 GFP_NOFS);
607 	struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
608 					       GFP_NOFS);
609 	struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
610 						GFP_NOFS);
611 	int ret;
612 	int err = -EIO;
613 	struct btrfs_super_block *disk_super;
614 
615 	if (!extent_root || !tree_root || !fs_info) {
616 		err = -ENOMEM;
617 		goto fail;
618 	}
619 	INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
620 	INIT_LIST_HEAD(&fs_info->trans_list);
621 	INIT_LIST_HEAD(&fs_info->dead_roots);
622 	INIT_LIST_HEAD(&fs_info->hashers);
623 	spin_lock_init(&fs_info->hash_lock);
624 	spin_lock_init(&fs_info->delalloc_lock);
625 
626 	memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
627 	init_completion(&fs_info->kobj_unregister);
628 	sb_set_blocksize(sb, 4096);
629 	fs_info->running_transaction = NULL;
630 	fs_info->last_trans_committed = 0;
631 	fs_info->tree_root = tree_root;
632 	fs_info->extent_root = extent_root;
633 	fs_info->sb = sb;
634 	fs_info->throttles = 0;
635 	fs_info->mount_opt = 0;
636 	fs_info->max_extent = (u64)-1;
637 	fs_info->delalloc_bytes = 0;
638 	fs_info->btree_inode = new_inode(sb);
639 	fs_info->btree_inode->i_ino = 1;
640 	fs_info->btree_inode->i_nlink = 1;
641 	fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
642 	fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
643 	extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
644 			     fs_info->btree_inode->i_mapping,
645 			     GFP_NOFS);
646 	BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;
647 
648 	extent_map_tree_init(&fs_info->free_space_cache,
649 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
650 	extent_map_tree_init(&fs_info->block_group_cache,
651 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
652 	extent_map_tree_init(&fs_info->pinned_extents,
653 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
654 	extent_map_tree_init(&fs_info->pending_del,
655 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
656 	extent_map_tree_init(&fs_info->extent_ins,
657 			     fs_info->btree_inode->i_mapping, GFP_NOFS);
658 	fs_info->do_barriers = 1;
659 	fs_info->closing = 0;
660 	fs_info->total_pinned = 0;
661 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
662 	INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
663 #else
664 	INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
665 #endif
666 	BTRFS_I(fs_info->btree_inode)->root = tree_root;
667 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
668 	       sizeof(struct btrfs_key));
669 	insert_inode_hash(fs_info->btree_inode);
670 	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
671 
672 	mutex_init(&fs_info->trans_mutex);
673 	mutex_init(&fs_info->fs_mutex);
674 
675 #if 0
676 	ret = add_hasher(fs_info, "crc32c");
677 	if (ret) {
678 		printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
679 		err = -ENOMEM;
680 		goto fail_iput;
681 	}
682 #endif
683 	__setup_root(512, 512, 512, 512, tree_root,
684 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
685 
686 	fs_info->sb_buffer = read_tree_block(tree_root,
687 					     BTRFS_SUPER_INFO_OFFSET,
688 					     512);
689 
690 	if (!fs_info->sb_buffer)
691 		goto fail_iput;
692 
693 	read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
694 			   sizeof(fs_info->super_copy));
695 
696 	read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
697 			   (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
698 			   BTRFS_FSID_SIZE);
699 	disk_super = &fs_info->super_copy;
700 	if (!btrfs_super_root(disk_super))
701 		goto fail_sb_buffer;
702 
703 	nodesize = btrfs_super_nodesize(disk_super);
704 	leafsize = btrfs_super_leafsize(disk_super);
705 	sectorsize = btrfs_super_sectorsize(disk_super);
706 	stripesize = btrfs_super_stripesize(disk_super);
707 	tree_root->nodesize = nodesize;
708 	tree_root->leafsize = leafsize;
709 	tree_root->sectorsize = sectorsize;
710 	tree_root->stripesize = stripesize;
711 	sb_set_blocksize(sb, sectorsize);
712 
713 	i_size_write(fs_info->btree_inode,
714 		     btrfs_super_total_bytes(disk_super));
715 
716 	if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
717 		    sizeof(disk_super->magic))) {
718 		printk("btrfs: valid FS not found on %s\n", sb->s_id);
719 		goto fail_sb_buffer;
720 	}
721 
722 	blocksize = btrfs_level_size(tree_root,
723 				     btrfs_super_root_level(disk_super));
724 
725 	tree_root->node = read_tree_block(tree_root,
726 					  btrfs_super_root(disk_super),
727 					  blocksize);
728 	if (!tree_root->node)
729 		goto fail_sb_buffer;
730 
731 	mutex_lock(&fs_info->fs_mutex);
732 
733 	ret = find_and_setup_root(tree_root, fs_info,
734 				  BTRFS_EXTENT_TREE_OBJECTID, extent_root);
735 	if (ret) {
736 		mutex_unlock(&fs_info->fs_mutex);
737 		goto fail_tree_root;
738 	}
739 
740 	btrfs_read_block_groups(extent_root);
741 
742 	fs_info->generation = btrfs_super_generation(disk_super) + 1;
743 	mutex_unlock(&fs_info->fs_mutex);
744 	return tree_root;
745 
746 fail_tree_root:
747 	free_extent_buffer(tree_root->node);
748 fail_sb_buffer:
749 	free_extent_buffer(fs_info->sb_buffer);
750 fail_iput:
751 	iput(fs_info->btree_inode);
752 fail:
753 	kfree(extent_root);
754 	kfree(tree_root);
755 	kfree(fs_info);
756 	return ERR_PTR(err);
757 }
758 
759 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
760 		      *root)
761 {
762 	int ret;
763 	struct extent_buffer *super = root->fs_info->sb_buffer;
764 	struct inode *btree_inode = root->fs_info->btree_inode;
765 	struct super_block *sb = root->fs_info->sb;
766 
767 	if (!btrfs_test_opt(root, NOBARRIER))
768 		blkdev_issue_flush(sb->s_bdev, NULL);
769 	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
770 	ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
771 				     super->start, super->len);
772 	if (!btrfs_test_opt(root, NOBARRIER))
773 		blkdev_issue_flush(sb->s_bdev, NULL);
774 	return ret;
775 }
776 
777 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
778 {
779 	radix_tree_delete(&fs_info->fs_roots_radix,
780 			  (unsigned long)root->root_key.objectid);
781 	btrfs_sysfs_del_root(root);
782 	if (root->inode)
783 		iput(root->inode);
784 	if (root->node)
785 		free_extent_buffer(root->node);
786 	if (root->commit_root)
787 		free_extent_buffer(root->commit_root);
788 	if (root->name)
789 		kfree(root->name);
790 	kfree(root);
791 	return 0;
792 }
793 
794 static int del_fs_roots(struct btrfs_fs_info *fs_info)
795 {
796 	int ret;
797 	struct btrfs_root *gang[8];
798 	int i;
799 
800 	while(1) {
801 		ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
802 					     (void **)gang, 0,
803 					     ARRAY_SIZE(gang));
804 		if (!ret)
805 			break;
806 		for (i = 0; i < ret; i++)
807 			btrfs_free_fs_root(fs_info, gang[i]);
808 	}
809 	return 0;
810 }
811 
812 int close_ctree(struct btrfs_root *root)
813 {
814 	int ret;
815 	struct btrfs_trans_handle *trans;
816 	struct btrfs_fs_info *fs_info = root->fs_info;
817 
818 	fs_info->closing = 1;
819 	btrfs_transaction_flush_work(root);
820 	mutex_lock(&fs_info->fs_mutex);
821 	btrfs_defrag_dirty_roots(root->fs_info);
822 	trans = btrfs_start_transaction(root, 1);
823 	ret = btrfs_commit_transaction(trans, root);
824 	/* run commit again to  drop the original snapshot */
825 	trans = btrfs_start_transaction(root, 1);
826 	btrfs_commit_transaction(trans, root);
827 	ret = btrfs_write_and_wait_transaction(NULL, root);
828 	BUG_ON(ret);
829 	write_ctree_super(NULL, root);
830 	mutex_unlock(&fs_info->fs_mutex);
831 
832 	if (fs_info->extent_root->node)
833 		free_extent_buffer(fs_info->extent_root->node);
834 
835 	if (fs_info->tree_root->node)
836 		free_extent_buffer(fs_info->tree_root->node);
837 
838 	free_extent_buffer(fs_info->sb_buffer);
839 
840 	btrfs_free_block_groups(root->fs_info);
841 	del_fs_roots(fs_info);
842 
843 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
844 
845 	extent_map_tree_empty_lru(&fs_info->free_space_cache);
846 	extent_map_tree_empty_lru(&fs_info->block_group_cache);
847 	extent_map_tree_empty_lru(&fs_info->pinned_extents);
848 	extent_map_tree_empty_lru(&fs_info->pending_del);
849 	extent_map_tree_empty_lru(&fs_info->extent_ins);
850 	extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
851 
852 	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
853 
854 	iput(fs_info->btree_inode);
855 #if 0
856 	while(!list_empty(&fs_info->hashers)) {
857 		struct btrfs_hasher *hasher;
858 		hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
859 				    hashers);
860 		list_del(&hasher->hashers);
861 		crypto_free_hash(&fs_info->hash_tfm);
862 		kfree(hasher);
863 	}
864 #endif
865 	kfree(fs_info->extent_root);
866 	kfree(fs_info->tree_root);
867 	return 0;
868 }
869 
870 int btrfs_buffer_uptodate(struct extent_buffer *buf)
871 {
872 	struct inode *btree_inode = buf->first_page->mapping->host;
873 	return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
874 }
875 
876 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
877 {
878 	struct inode *btree_inode = buf->first_page->mapping->host;
879 	return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
880 					  buf);
881 }
882 
883 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
884 {
885 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
886 	u64 transid = btrfs_header_generation(buf);
887 	struct inode *btree_inode = root->fs_info->btree_inode;
888 
889 	if (transid != root->fs_info->generation) {
890 		printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
891 			(unsigned long long)buf->start,
892 			transid, root->fs_info->generation);
893 		WARN_ON(1);
894 	}
895 	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
896 }
897 
898 void btrfs_throttle(struct btrfs_root *root)
899 {
900 	if (root->fs_info->throttles)
901 		congestion_wait(WRITE, HZ/10);
902 }
903 
904 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
905 {
906 	balance_dirty_pages_ratelimited_nr(
907 			root->fs_info->btree_inode->i_mapping, 1);
908 }
909 
910 void btrfs_set_buffer_defrag(struct extent_buffer *buf)
911 {
912 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
913 	struct inode *btree_inode = root->fs_info->btree_inode;
914 	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
915 			buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
916 }
917 
918 void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
919 {
920 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
921 	struct inode *btree_inode = root->fs_info->btree_inode;
922 	set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
923 			buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
924 			GFP_NOFS);
925 }
926 
927 int btrfs_buffer_defrag(struct extent_buffer *buf)
928 {
929 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
930 	struct inode *btree_inode = root->fs_info->btree_inode;
931 	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
932 		     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
933 }
934 
935 int btrfs_buffer_defrag_done(struct extent_buffer *buf)
936 {
937 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
938 	struct inode *btree_inode = root->fs_info->btree_inode;
939 	return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
940 		     buf->start, buf->start + buf->len - 1,
941 		     EXTENT_DEFRAG_DONE, 0);
942 }
943 
944 int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
945 {
946 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
947 	struct inode *btree_inode = root->fs_info->btree_inode;
948 	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
949 		     buf->start, buf->start + buf->len - 1,
950 		     EXTENT_DEFRAG_DONE, GFP_NOFS);
951 }
952 
953 int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
954 {
955 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
956 	struct inode *btree_inode = root->fs_info->btree_inode;
957 	return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
958 		     buf->start, buf->start + buf->len - 1,
959 		     EXTENT_DEFRAG, GFP_NOFS);
960 }
961 
962 int btrfs_read_buffer(struct extent_buffer *buf)
963 {
964 	struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
965 	struct inode *btree_inode = root->fs_info->btree_inode;
966 	return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
967 					buf, 0, 1);
968 }
969 
970 static struct extent_map_ops btree_extent_map_ops = {
971 	.writepage_io_hook = btree_writepage_io_hook,
972 };
973