xref: /openbmc/linux/fs/btrfs/inode.c (revision 5f56406aabdf5444d040c5955effc665b1d0dbaf)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42 
43 struct btrfs_iget_args {
44 	u64 ino;
45 	struct btrfs_root *root;
46 };
47 
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_map_ops btrfs_extent_map_ops;
57 
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63 
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66 	[S_IFREG >> S_SHIFT]	= BTRFS_FT_REG_FILE,
67 	[S_IFDIR >> S_SHIFT]	= BTRFS_FT_DIR,
68 	[S_IFCHR >> S_SHIFT]	= BTRFS_FT_CHRDEV,
69 	[S_IFBLK >> S_SHIFT]	= BTRFS_FT_BLKDEV,
70 	[S_IFIFO >> S_SHIFT]	= BTRFS_FT_FIFO,
71 	[S_IFSOCK >> S_SHIFT]	= BTRFS_FT_SOCK,
72 	[S_IFLNK >> S_SHIFT]	= BTRFS_FT_SYMLINK,
73 };
74 
75 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
76 			   int for_del)
77 {
78 	u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
79 	u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
80 	u64 thresh;
81 	int ret = 0;
82 
83 	return 0;
84 
85 	if (for_del)
86 		thresh = total * 90;
87 	else
88 		thresh = total * 85;
89 
90 	do_div(thresh, 100);
91 
92 	spin_lock(&root->fs_info->delalloc_lock);
93 	if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94 		ret = -ENOSPC;
95 	spin_unlock(&root->fs_info->delalloc_lock);
96 	return ret;
97 }
98 
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101 	struct btrfs_root *root = BTRFS_I(inode)->root;
102 	struct btrfs_trans_handle *trans;
103 	u64 alloc_hint = 0;
104 	u64 num_bytes;
105 	u64 cur_alloc_size;
106 	u64 blocksize = root->sectorsize;
107 	struct btrfs_key ins;
108 	int ret;
109 
110 	trans = btrfs_start_transaction(root, 1);
111 	BUG_ON(!trans);
112 	btrfs_set_trans_block_group(trans, inode);
113 
114 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
115 	num_bytes = max(blocksize,  num_bytes);
116 	ret = btrfs_drop_extents(trans, root, inode,
117 				 start, start + num_bytes, start, &alloc_hint);
118 
119 	if (alloc_hint == EXTENT_MAP_INLINE)
120 		goto out;
121 
122 	while(num_bytes > 0) {
123 		cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
124 		ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
125 					 root->root_key.objectid,
126 					 trans->transid,
127 					 inode->i_ino, start, 0,
128 					 alloc_hint, (u64)-1, &ins, 1);
129 		if (ret) {
130 			WARN_ON(1);
131 			goto out;
132 		}
133 		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
134 					       start, ins.objectid, ins.offset,
135 					       ins.offset);
136 		btrfs_check_file(root, inode);
137 		num_bytes -= cur_alloc_size;
138 		alloc_hint = ins.objectid + ins.offset;
139 		start += cur_alloc_size;
140 	}
141 	btrfs_add_ordered_inode(inode);
142 out:
143 	btrfs_end_transaction(trans, root);
144 	return ret;
145 }
146 
147 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
148 {
149 	u64 extent_start;
150 	u64 extent_end;
151 	u64 bytenr;
152 	u64 cow_end;
153 	u64 loops = 0;
154 	u64 total_fs_bytes;
155 	struct btrfs_root *root = BTRFS_I(inode)->root;
156 	struct extent_buffer *leaf;
157 	int found_type;
158 	struct btrfs_path *path;
159 	struct btrfs_file_extent_item *item;
160 	int ret;
161 	int err;
162 	struct btrfs_key found_key;
163 
164 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
165 	path = btrfs_alloc_path();
166 	BUG_ON(!path);
167 again:
168 	ret = btrfs_lookup_file_extent(NULL, root, path,
169 				       inode->i_ino, start, 0);
170 	if (ret < 0) {
171 		btrfs_free_path(path);
172 		return ret;
173 	}
174 
175 	cow_end = end;
176 	if (ret != 0) {
177 		if (path->slots[0] == 0)
178 			goto not_found;
179 		path->slots[0]--;
180 	}
181 
182 	leaf = path->nodes[0];
183 	item = btrfs_item_ptr(leaf, path->slots[0],
184 			      struct btrfs_file_extent_item);
185 
186 	/* are we inside the extent that was found? */
187 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
188 	found_type = btrfs_key_type(&found_key);
189 	if (found_key.objectid != inode->i_ino ||
190 	    found_type != BTRFS_EXTENT_DATA_KEY) {
191 		goto not_found;
192 	}
193 
194 	found_type = btrfs_file_extent_type(leaf, item);
195 	extent_start = found_key.offset;
196 	if (found_type == BTRFS_FILE_EXTENT_REG) {
197 		u64 extent_num_bytes;
198 
199 		extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
200 		extent_end = extent_start + extent_num_bytes;
201 		err = 0;
202 
203 		if (loops && start != extent_start)
204 			goto not_found;
205 
206 		if (start < extent_start || start >= extent_end)
207 			goto not_found;
208 
209 		cow_end = min(end, extent_end - 1);
210 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
211 		if (bytenr == 0)
212 			goto not_found;
213 
214 		/*
215 		 * we may be called by the resizer, make sure we're inside
216 		 * the limits of the FS
217 		 */
218 		if (bytenr + extent_num_bytes > total_fs_bytes)
219 			goto not_found;
220 
221 		if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
222 			goto not_found;
223 		}
224 
225 		start = extent_end;
226 	} else {
227 		goto not_found;
228 	}
229 loop:
230 	if (start > end) {
231 		btrfs_free_path(path);
232 		return 0;
233 	}
234 	btrfs_release_path(root, path);
235 	loops++;
236 	goto again;
237 
238 not_found:
239 	cow_file_range(inode, start, cow_end);
240 	start = cow_end + 1;
241 	goto loop;
242 }
243 
244 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
245 {
246 	struct btrfs_root *root = BTRFS_I(inode)->root;
247 	u64 num_bytes;
248 	int ret;
249 	mutex_lock(&root->fs_info->fs_mutex);
250 	if (btrfs_test_opt(root, NODATACOW) ||
251 	    btrfs_test_flag(inode, NODATACOW))
252 		ret = run_delalloc_nocow(inode, start, end);
253 	else
254 		ret = cow_file_range(inode, start, end);
255 
256 	spin_lock(&root->fs_info->delalloc_lock);
257 	num_bytes = end + 1 - start;
258 	if (root->fs_info->delalloc_bytes < num_bytes) {
259 		printk("delalloc accounting error total %llu sub %llu\n",
260 		       root->fs_info->delalloc_bytes, num_bytes);
261 	} else {
262 		root->fs_info->delalloc_bytes -= num_bytes;
263 	}
264 	spin_unlock(&root->fs_info->delalloc_lock);
265 
266 	mutex_unlock(&root->fs_info->fs_mutex);
267 	return ret;
268 }
269 
270 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
271 {
272 	struct inode *inode = page->mapping->host;
273 	struct btrfs_root *root = BTRFS_I(inode)->root;
274 	struct btrfs_trans_handle *trans;
275 	char *kaddr;
276 	int ret = 0;
277 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
278 	size_t offset = start - page_start;
279 	if (btrfs_test_opt(root, NODATASUM) ||
280 	    btrfs_test_flag(inode, NODATASUM))
281 		return 0;
282 	mutex_lock(&root->fs_info->fs_mutex);
283 	trans = btrfs_start_transaction(root, 1);
284 	btrfs_set_trans_block_group(trans, inode);
285 	kaddr = kmap(page);
286 	btrfs_csum_file_block(trans, root, inode, inode->i_ino,
287 			      start, kaddr + offset, end - start + 1);
288 	kunmap(page);
289 	ret = btrfs_end_transaction(trans, root);
290 	BUG_ON(ret);
291 	mutex_unlock(&root->fs_info->fs_mutex);
292 	return ret;
293 }
294 
295 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
296 {
297 	int ret = 0;
298 	struct inode *inode = page->mapping->host;
299 	struct btrfs_root *root = BTRFS_I(inode)->root;
300 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
301 	struct btrfs_csum_item *item;
302 	struct btrfs_path *path = NULL;
303 	u32 csum;
304 	if (btrfs_test_opt(root, NODATASUM) ||
305 	    btrfs_test_flag(inode, NODATASUM))
306 		return 0;
307 	mutex_lock(&root->fs_info->fs_mutex);
308 	path = btrfs_alloc_path();
309 	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
310 	if (IS_ERR(item)) {
311 		ret = PTR_ERR(item);
312 		/* a csum that isn't present is a preallocated region. */
313 		if (ret == -ENOENT || ret == -EFBIG)
314 			ret = 0;
315 		csum = 0;
316 		goto out;
317 	}
318 	read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
319 			   BTRFS_CRC32_SIZE);
320 	set_state_private(em_tree, start, csum);
321 out:
322 	if (path)
323 		btrfs_free_path(path);
324 	mutex_unlock(&root->fs_info->fs_mutex);
325 	return ret;
326 }
327 
328 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
329 {
330 	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
331 	struct inode *inode = page->mapping->host;
332 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
333 	char *kaddr;
334 	u64 private;
335 	int ret;
336 	struct btrfs_root *root = BTRFS_I(inode)->root;
337 	u32 csum = ~(u32)0;
338 	unsigned long flags;
339 	if (btrfs_test_opt(root, NODATASUM) ||
340 	    btrfs_test_flag(inode, NODATASUM))
341 		return 0;
342 	ret = get_state_private(em_tree, start, &private);
343 	local_irq_save(flags);
344 	kaddr = kmap_atomic(page, KM_IRQ0);
345 	if (ret) {
346 		goto zeroit;
347 	}
348 	csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
349 	btrfs_csum_final(csum, (char *)&csum);
350 	if (csum != private) {
351 		goto zeroit;
352 	}
353 	kunmap_atomic(kaddr, KM_IRQ0);
354 	local_irq_restore(flags);
355 	return 0;
356 
357 zeroit:
358 	printk("btrfs csum failed ino %lu off %llu\n",
359 	       page->mapping->host->i_ino, (unsigned long long)start);
360 	memset(kaddr + offset, 1, end - start + 1);
361 	flush_dcache_page(page);
362 	kunmap_atomic(kaddr, KM_IRQ0);
363 	local_irq_restore(flags);
364 	return 0;
365 }
366 
367 void btrfs_read_locked_inode(struct inode *inode)
368 {
369 	struct btrfs_path *path;
370 	struct extent_buffer *leaf;
371 	struct btrfs_inode_item *inode_item;
372 	struct btrfs_inode_timespec *tspec;
373 	struct btrfs_root *root = BTRFS_I(inode)->root;
374 	struct btrfs_key location;
375 	u64 alloc_group_block;
376 	u32 rdev;
377 	int ret;
378 
379 	path = btrfs_alloc_path();
380 	BUG_ON(!path);
381 	mutex_lock(&root->fs_info->fs_mutex);
382 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
383 
384 	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
385 	if (ret)
386 		goto make_bad;
387 
388 	leaf = path->nodes[0];
389 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
390 				    struct btrfs_inode_item);
391 
392 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
393 	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
394 	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
395 	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
396 	inode->i_size = btrfs_inode_size(leaf, inode_item);
397 
398 	tspec = btrfs_inode_atime(inode_item);
399 	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
400 	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
401 
402 	tspec = btrfs_inode_mtime(inode_item);
403 	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
404 	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
405 
406 	tspec = btrfs_inode_ctime(inode_item);
407 	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
408 	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
409 
410 	inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
411 	inode->i_generation = btrfs_inode_generation(leaf, inode_item);
412 	inode->i_rdev = 0;
413 	rdev = btrfs_inode_rdev(leaf, inode_item);
414 
415 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
416 	BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
417 						       alloc_group_block);
418 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
419 	if (!BTRFS_I(inode)->block_group) {
420 		BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
421 						         NULL, 0, 0, 0);
422 	}
423 	btrfs_free_path(path);
424 	inode_item = NULL;
425 
426 	mutex_unlock(&root->fs_info->fs_mutex);
427 
428 	switch (inode->i_mode & S_IFMT) {
429 	case S_IFREG:
430 		inode->i_mapping->a_ops = &btrfs_aops;
431 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
432 		inode->i_fop = &btrfs_file_operations;
433 		inode->i_op = &btrfs_file_inode_operations;
434 		break;
435 	case S_IFDIR:
436 		inode->i_fop = &btrfs_dir_file_operations;
437 		if (root == root->fs_info->tree_root)
438 			inode->i_op = &btrfs_dir_ro_inode_operations;
439 		else
440 			inode->i_op = &btrfs_dir_inode_operations;
441 		break;
442 	case S_IFLNK:
443 		inode->i_op = &btrfs_symlink_inode_operations;
444 		inode->i_mapping->a_ops = &btrfs_symlink_aops;
445 		break;
446 	default:
447 		init_special_inode(inode, inode->i_mode, rdev);
448 		break;
449 	}
450 	return;
451 
452 make_bad:
453 	btrfs_release_path(root, path);
454 	btrfs_free_path(path);
455 	mutex_unlock(&root->fs_info->fs_mutex);
456 	make_bad_inode(inode);
457 }
458 
459 static void fill_inode_item(struct extent_buffer *leaf,
460 			    struct btrfs_inode_item *item,
461 			    struct inode *inode)
462 {
463 	btrfs_set_inode_uid(leaf, item, inode->i_uid);
464 	btrfs_set_inode_gid(leaf, item, inode->i_gid);
465 	btrfs_set_inode_size(leaf, item, inode->i_size);
466 	btrfs_set_inode_mode(leaf, item, inode->i_mode);
467 	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
468 
469 	btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
470 			       inode->i_atime.tv_sec);
471 	btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
472 				inode->i_atime.tv_nsec);
473 
474 	btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
475 			       inode->i_mtime.tv_sec);
476 	btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
477 				inode->i_mtime.tv_nsec);
478 
479 	btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
480 			       inode->i_ctime.tv_sec);
481 	btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
482 				inode->i_ctime.tv_nsec);
483 
484 	btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
485 	btrfs_set_inode_generation(leaf, item, inode->i_generation);
486 	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
487 	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
488 	btrfs_set_inode_block_group(leaf, item,
489 				    BTRFS_I(inode)->block_group->key.objectid);
490 }
491 
492 int btrfs_update_inode(struct btrfs_trans_handle *trans,
493 			      struct btrfs_root *root,
494 			      struct inode *inode)
495 {
496 	struct btrfs_inode_item *inode_item;
497 	struct btrfs_path *path;
498 	struct extent_buffer *leaf;
499 	int ret;
500 
501 	path = btrfs_alloc_path();
502 	BUG_ON(!path);
503 	ret = btrfs_lookup_inode(trans, root, path,
504 				 &BTRFS_I(inode)->location, 1);
505 	if (ret) {
506 		if (ret > 0)
507 			ret = -ENOENT;
508 		goto failed;
509 	}
510 
511 	leaf = path->nodes[0];
512 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
513 				  struct btrfs_inode_item);
514 
515 	fill_inode_item(leaf, inode_item, inode);
516 	btrfs_mark_buffer_dirty(leaf);
517 	btrfs_set_inode_last_trans(trans, inode);
518 	ret = 0;
519 failed:
520 	btrfs_release_path(root, path);
521 	btrfs_free_path(path);
522 	return ret;
523 }
524 
525 
526 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
527 			      struct btrfs_root *root,
528 			      struct inode *dir,
529 			      struct dentry *dentry)
530 {
531 	struct btrfs_path *path;
532 	const char *name = dentry->d_name.name;
533 	int name_len = dentry->d_name.len;
534 	int ret = 0;
535 	struct extent_buffer *leaf;
536 	struct btrfs_dir_item *di;
537 	struct btrfs_key key;
538 
539 	path = btrfs_alloc_path();
540 	if (!path) {
541 		ret = -ENOMEM;
542 		goto err;
543 	}
544 
545 	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
546 				    name, name_len, -1);
547 	if (IS_ERR(di)) {
548 		ret = PTR_ERR(di);
549 		goto err;
550 	}
551 	if (!di) {
552 		ret = -ENOENT;
553 		goto err;
554 	}
555 	leaf = path->nodes[0];
556 	btrfs_dir_item_key_to_cpu(leaf, di, &key);
557 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
558 	if (ret)
559 		goto err;
560 	btrfs_release_path(root, path);
561 
562 	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
563 					 key.objectid, name, name_len, -1);
564 	if (IS_ERR(di)) {
565 		ret = PTR_ERR(di);
566 		goto err;
567 	}
568 	if (!di) {
569 		ret = -ENOENT;
570 		goto err;
571 	}
572 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
573 
574 	dentry->d_inode->i_ctime = dir->i_ctime;
575 	ret = btrfs_del_inode_ref(trans, root, name, name_len,
576 				  dentry->d_inode->i_ino,
577 				  dentry->d_parent->d_inode->i_ino);
578 	if (ret) {
579 		printk("failed to delete reference to %.*s, "
580 		       "inode %lu parent %lu\n", name_len, name,
581 		       dentry->d_inode->i_ino,
582 		       dentry->d_parent->d_inode->i_ino);
583 	}
584 err:
585 	btrfs_free_path(path);
586 	if (!ret) {
587 		dir->i_size -= name_len * 2;
588 		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
589 		btrfs_update_inode(trans, root, dir);
590 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
591 		dentry->d_inode->i_nlink--;
592 #else
593 		drop_nlink(dentry->d_inode);
594 #endif
595 		ret = btrfs_update_inode(trans, root, dentry->d_inode);
596 		dir->i_sb->s_dirt = 1;
597 	}
598 	return ret;
599 }
600 
601 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
602 {
603 	struct btrfs_root *root;
604 	struct btrfs_trans_handle *trans;
605 	struct inode *inode = dentry->d_inode;
606 	int ret;
607 	unsigned long nr = 0;
608 
609 	root = BTRFS_I(dir)->root;
610 	mutex_lock(&root->fs_info->fs_mutex);
611 
612 	ret = btrfs_check_free_space(root, 1, 1);
613 	if (ret)
614 		goto fail;
615 
616 	trans = btrfs_start_transaction(root, 1);
617 
618 	btrfs_set_trans_block_group(trans, dir);
619 	ret = btrfs_unlink_trans(trans, root, dir, dentry);
620 	nr = trans->blocks_used;
621 
622 	if (inode->i_nlink == 0) {
623 		int found;
624 		/* if the inode isn't linked anywhere,
625 		 * we don't need to worry about
626 		 * data=ordered
627 		 */
628 		found = btrfs_del_ordered_inode(inode);
629 		if (found == 1) {
630 			atomic_dec(&inode->i_count);
631 		}
632 	}
633 
634 	btrfs_end_transaction(trans, root);
635 fail:
636 	mutex_unlock(&root->fs_info->fs_mutex);
637 	btrfs_btree_balance_dirty(root, nr);
638 	btrfs_throttle(root);
639 	return ret;
640 }
641 
642 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
643 {
644 	struct inode *inode = dentry->d_inode;
645 	int err = 0;
646 	int ret;
647 	struct btrfs_root *root = BTRFS_I(dir)->root;
648 	struct btrfs_trans_handle *trans;
649 	unsigned long nr = 0;
650 
651 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
652 		return -ENOTEMPTY;
653 
654 	mutex_lock(&root->fs_info->fs_mutex);
655 	ret = btrfs_check_free_space(root, 1, 1);
656 	if (ret)
657 		goto fail;
658 
659 	trans = btrfs_start_transaction(root, 1);
660 	btrfs_set_trans_block_group(trans, dir);
661 
662 	/* now the directory is empty */
663 	err = btrfs_unlink_trans(trans, root, dir, dentry);
664 	if (!err) {
665 		inode->i_size = 0;
666 	}
667 
668 	nr = trans->blocks_used;
669 	ret = btrfs_end_transaction(trans, root);
670 fail:
671 	mutex_unlock(&root->fs_info->fs_mutex);
672 	btrfs_btree_balance_dirty(root, nr);
673 	btrfs_throttle(root);
674 
675 	if (ret && !err)
676 		err = ret;
677 	return err;
678 }
679 
680 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
681 			    struct btrfs_root *root,
682 			    struct inode *inode)
683 {
684 	struct btrfs_path *path;
685 	int ret;
686 
687 	clear_inode(inode);
688 
689 	path = btrfs_alloc_path();
690 	BUG_ON(!path);
691 	ret = btrfs_lookup_inode(trans, root, path,
692 				 &BTRFS_I(inode)->location, -1);
693 	if (ret > 0)
694 		ret = -ENOENT;
695 	if (!ret)
696 		ret = btrfs_del_item(trans, root, path);
697 	btrfs_free_path(path);
698 	return ret;
699 }
700 
701 /*
702  * this can truncate away extent items, csum items and directory items.
703  * It starts at a high offset and removes keys until it can't find
704  * any higher than i_size.
705  *
706  * csum items that cross the new i_size are truncated to the new size
707  * as well.
708  */
709 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
710 				   struct btrfs_root *root,
711 				   struct inode *inode)
712 {
713 	int ret;
714 	struct btrfs_path *path;
715 	struct btrfs_key key;
716 	struct btrfs_key found_key;
717 	u32 found_type;
718 	struct extent_buffer *leaf;
719 	struct btrfs_file_extent_item *fi;
720 	u64 extent_start = 0;
721 	u64 extent_num_bytes = 0;
722 	u64 item_end = 0;
723 	u64 root_gen = 0;
724 	u64 root_owner = 0;
725 	int found_extent;
726 	int del_item;
727 	int extent_type = -1;
728 
729 	btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
730 	path = btrfs_alloc_path();
731 	path->reada = -1;
732 	BUG_ON(!path);
733 
734 	/* FIXME, add redo link to tree so we don't leak on crash */
735 	key.objectid = inode->i_ino;
736 	key.offset = (u64)-1;
737 	key.type = (u8)-1;
738 
739 	while(1) {
740 		btrfs_init_path(path);
741 		fi = NULL;
742 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
743 		if (ret < 0) {
744 			goto error;
745 		}
746 		if (ret > 0) {
747 			BUG_ON(path->slots[0] == 0);
748 			path->slots[0]--;
749 		}
750 		leaf = path->nodes[0];
751 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
752 		found_type = btrfs_key_type(&found_key);
753 
754 		if (found_key.objectid != inode->i_ino)
755 			break;
756 
757 		if (found_type != BTRFS_CSUM_ITEM_KEY &&
758 		    found_type != BTRFS_DIR_ITEM_KEY &&
759 		    found_type != BTRFS_DIR_INDEX_KEY &&
760 		    found_type != BTRFS_EXTENT_DATA_KEY)
761 			break;
762 
763 		item_end = found_key.offset;
764 		if (found_type == BTRFS_EXTENT_DATA_KEY) {
765 			fi = btrfs_item_ptr(leaf, path->slots[0],
766 					    struct btrfs_file_extent_item);
767 			extent_type = btrfs_file_extent_type(leaf, fi);
768 			if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
769 				item_end +=
770 				    btrfs_file_extent_num_bytes(leaf, fi);
771 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
772 				struct btrfs_item *item = btrfs_item_nr(leaf,
773 							        path->slots[0]);
774 				item_end += btrfs_file_extent_inline_len(leaf,
775 									 item);
776 			}
777 			item_end--;
778 		}
779 		if (found_type == BTRFS_CSUM_ITEM_KEY) {
780 			ret = btrfs_csum_truncate(trans, root, path,
781 						  inode->i_size);
782 			BUG_ON(ret);
783 		}
784 		if (item_end < inode->i_size) {
785 			if (found_type == BTRFS_DIR_ITEM_KEY) {
786 				found_type = BTRFS_INODE_ITEM_KEY;
787 			} else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
788 				found_type = BTRFS_CSUM_ITEM_KEY;
789 			} else if (found_type) {
790 				found_type--;
791 			} else {
792 				break;
793 			}
794 			btrfs_set_key_type(&key, found_type);
795 			btrfs_release_path(root, path);
796 			continue;
797 		}
798 		if (found_key.offset >= inode->i_size)
799 			del_item = 1;
800 		else
801 			del_item = 0;
802 		found_extent = 0;
803 
804 		/* FIXME, shrink the extent if the ref count is only 1 */
805 		if (found_type != BTRFS_EXTENT_DATA_KEY)
806 			goto delete;
807 
808 		if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
809 			u64 num_dec;
810 			extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
811 			if (!del_item) {
812 				u64 orig_num_bytes =
813 					btrfs_file_extent_num_bytes(leaf, fi);
814 				extent_num_bytes = inode->i_size -
815 					found_key.offset + root->sectorsize - 1;
816 				btrfs_set_file_extent_num_bytes(leaf, fi,
817 							 extent_num_bytes);
818 				num_dec = (orig_num_bytes -
819 					   extent_num_bytes) >> 9;
820 				if (extent_start != 0) {
821 					inode->i_blocks -= num_dec;
822 				}
823 				btrfs_mark_buffer_dirty(leaf);
824 			} else {
825 				extent_num_bytes =
826 					btrfs_file_extent_disk_num_bytes(leaf,
827 									 fi);
828 				/* FIXME blocksize != 4096 */
829 				num_dec = btrfs_file_extent_num_bytes(leaf,
830 								       fi) >> 9;
831 				if (extent_start != 0) {
832 					found_extent = 1;
833 					inode->i_blocks -= num_dec;
834 				}
835 				root_gen = btrfs_header_generation(leaf);
836 				root_owner = btrfs_header_owner(leaf);
837 			}
838 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
839 			   !del_item) {
840 			u32 newsize = inode->i_size - found_key.offset;
841 			newsize = btrfs_file_extent_calc_inline_size(newsize);
842 			ret = btrfs_truncate_item(trans, root, path,
843 						  newsize, 1);
844 			BUG_ON(ret);
845 		}
846 delete:
847 		if (del_item) {
848 			ret = btrfs_del_item(trans, root, path);
849 			if (ret)
850 				goto error;
851 		} else {
852 			break;
853 		}
854 		btrfs_release_path(root, path);
855 		if (found_extent) {
856 			ret = btrfs_free_extent(trans, root, extent_start,
857 						extent_num_bytes,
858 						root_owner,
859 						root_gen, inode->i_ino,
860 						found_key.offset, 0);
861 			BUG_ON(ret);
862 		}
863 	}
864 	ret = 0;
865 error:
866 	btrfs_release_path(root, path);
867 	btrfs_free_path(path);
868 	inode->i_sb->s_dirt = 1;
869 	return ret;
870 }
871 
872 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
873 			      size_t zero_start)
874 {
875 	char *kaddr;
876 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
877 	struct btrfs_root *root = BTRFS_I(inode)->root;
878 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
879 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
880 	u64 existing_delalloc;
881 	u64 delalloc_start;
882 	int ret = 0;
883 
884 	WARN_ON(!PageLocked(page));
885 	set_page_extent_mapped(page);
886 
887 	lock_extent(em_tree, page_start, page_end, GFP_NOFS);
888 	delalloc_start = page_start;
889 	existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
890 					     &delalloc_start, page_end,
891 					     PAGE_CACHE_SIZE, EXTENT_DELALLOC);
892 	set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
893 			    page_end, GFP_NOFS);
894 
895 	spin_lock(&root->fs_info->delalloc_lock);
896 	root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc;
897 	spin_unlock(&root->fs_info->delalloc_lock);
898 
899 	if (zero_start != PAGE_CACHE_SIZE) {
900 		kaddr = kmap(page);
901 		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
902 		flush_dcache_page(page);
903 		kunmap(page);
904 	}
905 	set_page_dirty(page);
906 	unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
907 
908 	return ret;
909 }
910 
911 /*
912  * taken from block_truncate_page, but does cow as it zeros out
913  * any bytes left in the last page in the file.
914  */
915 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
916 {
917 	struct inode *inode = mapping->host;
918 	struct btrfs_root *root = BTRFS_I(inode)->root;
919 	u32 blocksize = root->sectorsize;
920 	pgoff_t index = from >> PAGE_CACHE_SHIFT;
921 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
922 	struct page *page;
923 	int ret = 0;
924 	u64 page_start;
925 
926 	if ((offset & (blocksize - 1)) == 0)
927 		goto out;
928 
929 	ret = -ENOMEM;
930 	page = grab_cache_page(mapping, index);
931 	if (!page)
932 		goto out;
933 	if (!PageUptodate(page)) {
934 		ret = btrfs_readpage(NULL, page);
935 		lock_page(page);
936 		if (!PageUptodate(page)) {
937 			ret = -EIO;
938 			goto out;
939 		}
940 	}
941 	page_start = (u64)page->index << PAGE_CACHE_SHIFT;
942 
943 	ret = btrfs_cow_one_page(inode, page, offset);
944 
945 	unlock_page(page);
946 	page_cache_release(page);
947 out:
948 	return ret;
949 }
950 
951 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
952 {
953 	struct inode *inode = dentry->d_inode;
954 	int err;
955 
956 	err = inode_change_ok(inode, attr);
957 	if (err)
958 		return err;
959 
960 	if (S_ISREG(inode->i_mode) &&
961 	    attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
962 		struct btrfs_trans_handle *trans;
963 		struct btrfs_root *root = BTRFS_I(inode)->root;
964 		struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
965 
966 		u64 mask = root->sectorsize - 1;
967 		u64 pos = (inode->i_size + mask) & ~mask;
968 		u64 block_end = attr->ia_size | mask;
969 		u64 hole_start;
970 		u64 hole_size;
971 		u64 alloc_hint = 0;
972 
973 		if (attr->ia_size <= pos)
974 			goto out;
975 
976 		if (pos != inode->i_size)
977 			hole_start = pos + root->sectorsize;
978 		else
979 			hole_start = pos;
980 
981 		mutex_lock(&root->fs_info->fs_mutex);
982 		err = btrfs_check_free_space(root, 1, 0);
983 		mutex_unlock(&root->fs_info->fs_mutex);
984 		if (err)
985 			goto fail;
986 
987 		btrfs_truncate_page(inode->i_mapping, inode->i_size);
988 
989 		lock_extent(em_tree, pos, block_end, GFP_NOFS);
990 		hole_size = block_end - hole_start;
991 
992 		mutex_lock(&root->fs_info->fs_mutex);
993 		trans = btrfs_start_transaction(root, 1);
994 		btrfs_set_trans_block_group(trans, inode);
995 		err = btrfs_drop_extents(trans, root, inode,
996 					 pos, block_end, pos,
997 					 &alloc_hint);
998 
999 		if (alloc_hint != EXTENT_MAP_INLINE) {
1000 			err = btrfs_insert_file_extent(trans, root,
1001 						       inode->i_ino,
1002 						       hole_start, 0, 0,
1003 						       hole_size);
1004 			btrfs_check_file(root, inode);
1005 		}
1006 		btrfs_end_transaction(trans, root);
1007 		mutex_unlock(&root->fs_info->fs_mutex);
1008 		unlock_extent(em_tree, pos, block_end, GFP_NOFS);
1009 		if (err)
1010 			return err;
1011 	}
1012 out:
1013 	err = inode_setattr(inode, attr);
1014 fail:
1015 	return err;
1016 }
1017 
1018 void btrfs_put_inode(struct inode *inode)
1019 {
1020 	int ret;
1021 
1022 	if (!BTRFS_I(inode)->ordered_trans) {
1023 		return;
1024 	}
1025 
1026 	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1027 	    mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1028 		return;
1029 
1030 	ret = btrfs_del_ordered_inode(inode);
1031 	if (ret == 1) {
1032 		atomic_dec(&inode->i_count);
1033 	}
1034 }
1035 
1036 void btrfs_delete_inode(struct inode *inode)
1037 {
1038 	struct btrfs_trans_handle *trans;
1039 	struct btrfs_root *root = BTRFS_I(inode)->root;
1040 	unsigned long nr;
1041 	int ret;
1042 
1043 	truncate_inode_pages(&inode->i_data, 0);
1044 	if (is_bad_inode(inode)) {
1045 		goto no_delete;
1046 	}
1047 
1048 	inode->i_size = 0;
1049 	mutex_lock(&root->fs_info->fs_mutex);
1050 	trans = btrfs_start_transaction(root, 1);
1051 
1052 	btrfs_set_trans_block_group(trans, inode);
1053 	ret = btrfs_truncate_in_trans(trans, root, inode);
1054 	if (ret)
1055 		goto no_delete_lock;
1056 	ret = btrfs_delete_xattrs(trans, root, inode);
1057 	if (ret)
1058 		goto no_delete_lock;
1059 	ret = btrfs_free_inode(trans, root, inode);
1060 	if (ret)
1061 		goto no_delete_lock;
1062 	nr = trans->blocks_used;
1063 
1064 	btrfs_end_transaction(trans, root);
1065 	mutex_unlock(&root->fs_info->fs_mutex);
1066 	btrfs_btree_balance_dirty(root, nr);
1067 	btrfs_throttle(root);
1068 	return;
1069 
1070 no_delete_lock:
1071 	nr = trans->blocks_used;
1072 	btrfs_end_transaction(trans, root);
1073 	mutex_unlock(&root->fs_info->fs_mutex);
1074 	btrfs_btree_balance_dirty(root, nr);
1075 	btrfs_throttle(root);
1076 no_delete:
1077 	clear_inode(inode);
1078 }
1079 
1080 /*
1081  * this returns the key found in the dir entry in the location pointer.
1082  * If no dir entries were found, location->objectid is 0.
1083  */
1084 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1085 			       struct btrfs_key *location)
1086 {
1087 	const char *name = dentry->d_name.name;
1088 	int namelen = dentry->d_name.len;
1089 	struct btrfs_dir_item *di;
1090 	struct btrfs_path *path;
1091 	struct btrfs_root *root = BTRFS_I(dir)->root;
1092 	int ret = 0;
1093 
1094 	if (namelen == 1 && strcmp(name, ".") == 0) {
1095 		location->objectid = dir->i_ino;
1096 		location->type = BTRFS_INODE_ITEM_KEY;
1097 		location->offset = 0;
1098 		return 0;
1099 	}
1100 	path = btrfs_alloc_path();
1101 	BUG_ON(!path);
1102 
1103 	if (namelen == 2 && strcmp(name, "..") == 0) {
1104 		struct btrfs_key key;
1105 		struct extent_buffer *leaf;
1106 		u32 nritems;
1107 		int slot;
1108 
1109 		key.objectid = dir->i_ino;
1110 		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1111 		key.offset = 0;
1112 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1113 		BUG_ON(ret == 0);
1114 		ret = 0;
1115 
1116 		leaf = path->nodes[0];
1117 		slot = path->slots[0];
1118 		nritems = btrfs_header_nritems(leaf);
1119 		if (slot >= nritems)
1120 			goto out_err;
1121 
1122 		btrfs_item_key_to_cpu(leaf, &key, slot);
1123 		if (key.objectid != dir->i_ino ||
1124 		    key.type != BTRFS_INODE_REF_KEY) {
1125 			goto out_err;
1126 		}
1127 		location->objectid = key.offset;
1128 		location->type = BTRFS_INODE_ITEM_KEY;
1129 		location->offset = 0;
1130 		goto out;
1131 	}
1132 
1133 	di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1134 				    namelen, 0);
1135 	if (IS_ERR(di))
1136 		ret = PTR_ERR(di);
1137 	if (!di || IS_ERR(di)) {
1138 		goto out_err;
1139 	}
1140 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1141 out:
1142 	btrfs_free_path(path);
1143 	return ret;
1144 out_err:
1145 	location->objectid = 0;
1146 	goto out;
1147 }
1148 
1149 /*
1150  * when we hit a tree root in a directory, the btrfs part of the inode
1151  * needs to be changed to reflect the root directory of the tree root.  This
1152  * is kind of like crossing a mount point.
1153  */
1154 static int fixup_tree_root_location(struct btrfs_root *root,
1155 			     struct btrfs_key *location,
1156 			     struct btrfs_root **sub_root,
1157 			     struct dentry *dentry)
1158 {
1159 	struct btrfs_path *path;
1160 	struct btrfs_root_item *ri;
1161 
1162 	if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1163 		return 0;
1164 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1165 		return 0;
1166 
1167 	path = btrfs_alloc_path();
1168 	BUG_ON(!path);
1169 	mutex_lock(&root->fs_info->fs_mutex);
1170 
1171 	*sub_root = btrfs_read_fs_root(root->fs_info, location,
1172 					dentry->d_name.name,
1173 					dentry->d_name.len);
1174 	if (IS_ERR(*sub_root))
1175 		return PTR_ERR(*sub_root);
1176 
1177 	ri = &(*sub_root)->root_item;
1178 	location->objectid = btrfs_root_dirid(ri);
1179 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1180 	location->offset = 0;
1181 
1182 	btrfs_free_path(path);
1183 	mutex_unlock(&root->fs_info->fs_mutex);
1184 	return 0;
1185 }
1186 
1187 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1188 {
1189 	struct btrfs_iget_args *args = p;
1190 	inode->i_ino = args->ino;
1191 	BTRFS_I(inode)->root = args->root;
1192 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1193 			     inode->i_mapping, GFP_NOFS);
1194 	return 0;
1195 }
1196 
1197 static int btrfs_find_actor(struct inode *inode, void *opaque)
1198 {
1199 	struct btrfs_iget_args *args = opaque;
1200 	return (args->ino == inode->i_ino &&
1201 		args->root == BTRFS_I(inode)->root);
1202 }
1203 
1204 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1205 			    u64 root_objectid)
1206 {
1207 	struct btrfs_iget_args args;
1208 	args.ino = objectid;
1209 	args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1210 
1211 	if (!args.root)
1212 		return NULL;
1213 
1214 	return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1215 }
1216 
1217 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1218 				struct btrfs_root *root)
1219 {
1220 	struct inode *inode;
1221 	struct btrfs_iget_args args;
1222 	args.ino = objectid;
1223 	args.root = root;
1224 
1225 	inode = iget5_locked(s, objectid, btrfs_find_actor,
1226 			     btrfs_init_locked_inode,
1227 			     (void *)&args);
1228 	return inode;
1229 }
1230 
1231 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1232 				   struct nameidata *nd)
1233 {
1234 	struct inode * inode;
1235 	struct btrfs_inode *bi = BTRFS_I(dir);
1236 	struct btrfs_root *root = bi->root;
1237 	struct btrfs_root *sub_root = root;
1238 	struct btrfs_key location;
1239 	int ret;
1240 
1241 	if (dentry->d_name.len > BTRFS_NAME_LEN)
1242 		return ERR_PTR(-ENAMETOOLONG);
1243 
1244 	mutex_lock(&root->fs_info->fs_mutex);
1245 	ret = btrfs_inode_by_name(dir, dentry, &location);
1246 	mutex_unlock(&root->fs_info->fs_mutex);
1247 
1248 	if (ret < 0)
1249 		return ERR_PTR(ret);
1250 
1251 	inode = NULL;
1252 	if (location.objectid) {
1253 		ret = fixup_tree_root_location(root, &location, &sub_root,
1254 						dentry);
1255 		if (ret < 0)
1256 			return ERR_PTR(ret);
1257 		if (ret > 0)
1258 			return ERR_PTR(-ENOENT);
1259 		inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1260 					  sub_root);
1261 		if (!inode)
1262 			return ERR_PTR(-EACCES);
1263 		if (inode->i_state & I_NEW) {
1264 			/* the inode and parent dir are two different roots */
1265 			if (sub_root != root) {
1266 				igrab(inode);
1267 				sub_root->inode = inode;
1268 			}
1269 			BTRFS_I(inode)->root = sub_root;
1270 			memcpy(&BTRFS_I(inode)->location, &location,
1271 			       sizeof(location));
1272 			btrfs_read_locked_inode(inode);
1273 			unlock_new_inode(inode);
1274 		}
1275 	}
1276 	return d_splice_alias(inode, dentry);
1277 }
1278 
1279 static unsigned char btrfs_filetype_table[] = {
1280 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1281 };
1282 
1283 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1284 {
1285 	struct inode *inode = filp->f_dentry->d_inode;
1286 	struct btrfs_root *root = BTRFS_I(inode)->root;
1287 	struct btrfs_item *item;
1288 	struct btrfs_dir_item *di;
1289 	struct btrfs_key key;
1290 	struct btrfs_key found_key;
1291 	struct btrfs_path *path;
1292 	int ret;
1293 	u32 nritems;
1294 	struct extent_buffer *leaf;
1295 	int slot;
1296 	int advance;
1297 	unsigned char d_type;
1298 	int over = 0;
1299 	u32 di_cur;
1300 	u32 di_total;
1301 	u32 di_len;
1302 	int key_type = BTRFS_DIR_INDEX_KEY;
1303 	char tmp_name[32];
1304 	char *name_ptr;
1305 	int name_len;
1306 
1307 	/* FIXME, use a real flag for deciding about the key type */
1308 	if (root->fs_info->tree_root == root)
1309 		key_type = BTRFS_DIR_ITEM_KEY;
1310 
1311 	/* special case for "." */
1312 	if (filp->f_pos == 0) {
1313 		over = filldir(dirent, ".", 1,
1314 			       1, inode->i_ino,
1315 			       DT_DIR);
1316 		if (over)
1317 			return 0;
1318 		filp->f_pos = 1;
1319 	}
1320 
1321 	mutex_lock(&root->fs_info->fs_mutex);
1322 	key.objectid = inode->i_ino;
1323 	path = btrfs_alloc_path();
1324 	path->reada = 2;
1325 
1326 	/* special case for .., just use the back ref */
1327 	if (filp->f_pos == 1) {
1328 		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1329 		key.offset = 0;
1330 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1331 		BUG_ON(ret == 0);
1332 		leaf = path->nodes[0];
1333 		slot = path->slots[0];
1334 		nritems = btrfs_header_nritems(leaf);
1335 		if (slot >= nritems) {
1336 			btrfs_release_path(root, path);
1337 			goto read_dir_items;
1338 		}
1339 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
1340 		btrfs_release_path(root, path);
1341 		if (found_key.objectid != key.objectid ||
1342 		    found_key.type != BTRFS_INODE_REF_KEY)
1343 			goto read_dir_items;
1344 		over = filldir(dirent, "..", 2,
1345 			       2, found_key.offset, DT_DIR);
1346 		if (over)
1347 			goto nopos;
1348 		filp->f_pos = 2;
1349 	}
1350 
1351 read_dir_items:
1352 	btrfs_set_key_type(&key, key_type);
1353 	key.offset = filp->f_pos;
1354 
1355 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1356 	if (ret < 0)
1357 		goto err;
1358 	advance = 0;
1359 	while(1) {
1360 		leaf = path->nodes[0];
1361 		nritems = btrfs_header_nritems(leaf);
1362 		slot = path->slots[0];
1363 		if (advance || slot >= nritems) {
1364 			if (slot >= nritems -1) {
1365 				ret = btrfs_next_leaf(root, path);
1366 				if (ret)
1367 					break;
1368 				leaf = path->nodes[0];
1369 				nritems = btrfs_header_nritems(leaf);
1370 				slot = path->slots[0];
1371 			} else {
1372 				slot++;
1373 				path->slots[0]++;
1374 			}
1375 		}
1376 		advance = 1;
1377 		item = btrfs_item_nr(leaf, slot);
1378 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
1379 
1380 		if (found_key.objectid != key.objectid)
1381 			break;
1382 		if (btrfs_key_type(&found_key) != key_type)
1383 			break;
1384 		if (found_key.offset < filp->f_pos)
1385 			continue;
1386 
1387 		filp->f_pos = found_key.offset;
1388 		advance = 1;
1389 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1390 		di_cur = 0;
1391 		di_total = btrfs_item_size(leaf, item);
1392 		while(di_cur < di_total) {
1393 			struct btrfs_key location;
1394 
1395 			name_len = btrfs_dir_name_len(leaf, di);
1396 			if (name_len < 32) {
1397 				name_ptr = tmp_name;
1398 			} else {
1399 				name_ptr = kmalloc(name_len, GFP_NOFS);
1400 				BUG_ON(!name_ptr);
1401 			}
1402 			read_extent_buffer(leaf, name_ptr,
1403 					   (unsigned long)(di + 1), name_len);
1404 
1405 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1406 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
1407 			over = filldir(dirent, name_ptr, name_len,
1408 				       found_key.offset,
1409 				       location.objectid,
1410 				       d_type);
1411 
1412 			if (name_ptr != tmp_name)
1413 				kfree(name_ptr);
1414 
1415 			if (over)
1416 				goto nopos;
1417 			di_len = btrfs_dir_name_len(leaf, di) +
1418 				btrfs_dir_data_len(leaf, di) +sizeof(*di);
1419 			di_cur += di_len;
1420 			di = (struct btrfs_dir_item *)((char *)di + di_len);
1421 		}
1422 	}
1423 	filp->f_pos++;
1424 nopos:
1425 	ret = 0;
1426 err:
1427 	btrfs_release_path(root, path);
1428 	btrfs_free_path(path);
1429 	mutex_unlock(&root->fs_info->fs_mutex);
1430 	return ret;
1431 }
1432 
1433 int btrfs_write_inode(struct inode *inode, int wait)
1434 {
1435 	struct btrfs_root *root = BTRFS_I(inode)->root;
1436 	struct btrfs_trans_handle *trans;
1437 	int ret = 0;
1438 
1439 	if (wait) {
1440 		mutex_lock(&root->fs_info->fs_mutex);
1441 		trans = btrfs_start_transaction(root, 1);
1442 		btrfs_set_trans_block_group(trans, inode);
1443 		ret = btrfs_commit_transaction(trans, root);
1444 		mutex_unlock(&root->fs_info->fs_mutex);
1445 	}
1446 	return ret;
1447 }
1448 
1449 /*
1450  * This is somewhat expensive, updating the tree every time the
1451  * inode changes.  But, it is most likely to find the inode in cache.
1452  * FIXME, needs more benchmarking...there are no reasons other than performance
1453  * to keep or drop this code.
1454  */
1455 void btrfs_dirty_inode(struct inode *inode)
1456 {
1457 	struct btrfs_root *root = BTRFS_I(inode)->root;
1458 	struct btrfs_trans_handle *trans;
1459 
1460 	mutex_lock(&root->fs_info->fs_mutex);
1461 	trans = btrfs_start_transaction(root, 1);
1462 	btrfs_set_trans_block_group(trans, inode);
1463 	btrfs_update_inode(trans, root, inode);
1464 	btrfs_end_transaction(trans, root);
1465 	mutex_unlock(&root->fs_info->fs_mutex);
1466 }
1467 
1468 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1469 				     struct btrfs_root *root,
1470 				     u64 objectid,
1471 				     struct btrfs_block_group_cache *group,
1472 				     int mode)
1473 {
1474 	struct inode *inode;
1475 	struct btrfs_inode_item *inode_item;
1476 	struct btrfs_key *location;
1477 	struct btrfs_path *path;
1478 	int ret;
1479 	int owner;
1480 
1481 	path = btrfs_alloc_path();
1482 	BUG_ON(!path);
1483 
1484 	inode = new_inode(root->fs_info->sb);
1485 	if (!inode)
1486 		return ERR_PTR(-ENOMEM);
1487 
1488 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1489 			     inode->i_mapping, GFP_NOFS);
1490 	BTRFS_I(inode)->root = root;
1491 
1492 	if (mode & S_IFDIR)
1493 		owner = 0;
1494 	else
1495 		owner = 1;
1496 	group = btrfs_find_block_group(root, group, 0, 0, owner);
1497 	BTRFS_I(inode)->block_group = group;
1498 	BTRFS_I(inode)->flags = 0;
1499 	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
1500 	if (ret)
1501 		goto fail;
1502 
1503 	inode->i_uid = current->fsuid;
1504 	inode->i_gid = current->fsgid;
1505 	inode->i_mode = mode;
1506 	inode->i_ino = objectid;
1507 	inode->i_blocks = 0;
1508 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1509 	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1510 				  struct btrfs_inode_item);
1511 	fill_inode_item(path->nodes[0], inode_item, inode);
1512 	btrfs_mark_buffer_dirty(path->nodes[0]);
1513 	btrfs_free_path(path);
1514 
1515 	location = &BTRFS_I(inode)->location;
1516 	location->objectid = objectid;
1517 	location->offset = 0;
1518 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1519 
1520 	insert_inode_hash(inode);
1521 	return inode;
1522 fail:
1523 	btrfs_free_path(path);
1524 	return ERR_PTR(ret);
1525 }
1526 
1527 static inline u8 btrfs_inode_type(struct inode *inode)
1528 {
1529 	return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1530 }
1531 
1532 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1533 			    struct dentry *dentry, struct inode *inode)
1534 {
1535 	int ret;
1536 	struct btrfs_key key;
1537 	struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1538 	struct inode *parent_inode;
1539 
1540 	key.objectid = inode->i_ino;
1541 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1542 	key.offset = 0;
1543 
1544 	ret = btrfs_insert_dir_item(trans, root,
1545 				    dentry->d_name.name, dentry->d_name.len,
1546 				    dentry->d_parent->d_inode->i_ino,
1547 				    &key, btrfs_inode_type(inode));
1548 	if (ret == 0) {
1549 		ret = btrfs_insert_inode_ref(trans, root,
1550 				     dentry->d_name.name,
1551 				     dentry->d_name.len,
1552 				     inode->i_ino,
1553 				     dentry->d_parent->d_inode->i_ino);
1554 		parent_inode = dentry->d_parent->d_inode;
1555 		parent_inode->i_size += dentry->d_name.len * 2;
1556 		parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1557 		ret = btrfs_update_inode(trans, root,
1558 					 dentry->d_parent->d_inode);
1559 	}
1560 	return ret;
1561 }
1562 
1563 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1564 			    struct dentry *dentry, struct inode *inode)
1565 {
1566 	int err = btrfs_add_link(trans, dentry, inode);
1567 	if (!err) {
1568 		d_instantiate(dentry, inode);
1569 		return 0;
1570 	}
1571 	if (err > 0)
1572 		err = -EEXIST;
1573 	return err;
1574 }
1575 
1576 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1577 			int mode, dev_t rdev)
1578 {
1579 	struct btrfs_trans_handle *trans;
1580 	struct btrfs_root *root = BTRFS_I(dir)->root;
1581 	struct inode *inode = NULL;
1582 	int err;
1583 	int drop_inode = 0;
1584 	u64 objectid;
1585 	unsigned long nr = 0;
1586 
1587 	if (!new_valid_dev(rdev))
1588 		return -EINVAL;
1589 
1590 	mutex_lock(&root->fs_info->fs_mutex);
1591 	err = btrfs_check_free_space(root, 1, 0);
1592 	if (err)
1593 		goto fail;
1594 
1595 	trans = btrfs_start_transaction(root, 1);
1596 	btrfs_set_trans_block_group(trans, dir);
1597 
1598 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1599 	if (err) {
1600 		err = -ENOSPC;
1601 		goto out_unlock;
1602 	}
1603 
1604 	inode = btrfs_new_inode(trans, root, objectid,
1605 				BTRFS_I(dir)->block_group, mode);
1606 	err = PTR_ERR(inode);
1607 	if (IS_ERR(inode))
1608 		goto out_unlock;
1609 
1610 	btrfs_set_trans_block_group(trans, inode);
1611 	err = btrfs_add_nondir(trans, dentry, inode);
1612 	if (err)
1613 		drop_inode = 1;
1614 	else {
1615 		inode->i_op = &btrfs_special_inode_operations;
1616 		init_special_inode(inode, inode->i_mode, rdev);
1617 		btrfs_update_inode(trans, root, inode);
1618 	}
1619 	dir->i_sb->s_dirt = 1;
1620 	btrfs_update_inode_block_group(trans, inode);
1621 	btrfs_update_inode_block_group(trans, dir);
1622 out_unlock:
1623 	nr = trans->blocks_used;
1624 	btrfs_end_transaction(trans, root);
1625 fail:
1626 	mutex_unlock(&root->fs_info->fs_mutex);
1627 
1628 	if (drop_inode) {
1629 		inode_dec_link_count(inode);
1630 		iput(inode);
1631 	}
1632 	btrfs_btree_balance_dirty(root, nr);
1633 	btrfs_throttle(root);
1634 	return err;
1635 }
1636 
1637 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1638 			int mode, struct nameidata *nd)
1639 {
1640 	struct btrfs_trans_handle *trans;
1641 	struct btrfs_root *root = BTRFS_I(dir)->root;
1642 	struct inode *inode = NULL;
1643 	int err;
1644 	int drop_inode = 0;
1645 	unsigned long nr = 0;
1646 	u64 objectid;
1647 
1648 	mutex_lock(&root->fs_info->fs_mutex);
1649 	err = btrfs_check_free_space(root, 1, 0);
1650 	if (err)
1651 		goto fail;
1652 	trans = btrfs_start_transaction(root, 1);
1653 	btrfs_set_trans_block_group(trans, dir);
1654 
1655 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1656 	if (err) {
1657 		err = -ENOSPC;
1658 		goto out_unlock;
1659 	}
1660 
1661 	inode = btrfs_new_inode(trans, root, objectid,
1662 				BTRFS_I(dir)->block_group, mode);
1663 	err = PTR_ERR(inode);
1664 	if (IS_ERR(inode))
1665 		goto out_unlock;
1666 
1667 	btrfs_set_trans_block_group(trans, inode);
1668 	err = btrfs_add_nondir(trans, dentry, inode);
1669 	if (err)
1670 		drop_inode = 1;
1671 	else {
1672 		inode->i_mapping->a_ops = &btrfs_aops;
1673 		inode->i_fop = &btrfs_file_operations;
1674 		inode->i_op = &btrfs_file_inode_operations;
1675 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1676 				     inode->i_mapping, GFP_NOFS);
1677 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
1678 	}
1679 	dir->i_sb->s_dirt = 1;
1680 	btrfs_update_inode_block_group(trans, inode);
1681 	btrfs_update_inode_block_group(trans, dir);
1682 out_unlock:
1683 	nr = trans->blocks_used;
1684 	btrfs_end_transaction(trans, root);
1685 fail:
1686 	mutex_unlock(&root->fs_info->fs_mutex);
1687 
1688 	if (drop_inode) {
1689 		inode_dec_link_count(inode);
1690 		iput(inode);
1691 	}
1692 	btrfs_btree_balance_dirty(root, nr);
1693 	btrfs_throttle(root);
1694 	return err;
1695 }
1696 
1697 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1698 		      struct dentry *dentry)
1699 {
1700 	struct btrfs_trans_handle *trans;
1701 	struct btrfs_root *root = BTRFS_I(dir)->root;
1702 	struct inode *inode = old_dentry->d_inode;
1703 	unsigned long nr = 0;
1704 	int err;
1705 	int drop_inode = 0;
1706 
1707 	if (inode->i_nlink == 0)
1708 		return -ENOENT;
1709 
1710 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1711 	inode->i_nlink++;
1712 #else
1713 	inc_nlink(inode);
1714 #endif
1715 	mutex_lock(&root->fs_info->fs_mutex);
1716 	err = btrfs_check_free_space(root, 1, 0);
1717 	if (err)
1718 		goto fail;
1719 	trans = btrfs_start_transaction(root, 1);
1720 
1721 	btrfs_set_trans_block_group(trans, dir);
1722 	atomic_inc(&inode->i_count);
1723 	err = btrfs_add_nondir(trans, dentry, inode);
1724 
1725 	if (err)
1726 		drop_inode = 1;
1727 
1728 	dir->i_sb->s_dirt = 1;
1729 	btrfs_update_inode_block_group(trans, dir);
1730 	err = btrfs_update_inode(trans, root, inode);
1731 
1732 	if (err)
1733 		drop_inode = 1;
1734 
1735 	nr = trans->blocks_used;
1736 	btrfs_end_transaction(trans, root);
1737 fail:
1738 	mutex_unlock(&root->fs_info->fs_mutex);
1739 
1740 	if (drop_inode) {
1741 		inode_dec_link_count(inode);
1742 		iput(inode);
1743 	}
1744 	btrfs_btree_balance_dirty(root, nr);
1745 	btrfs_throttle(root);
1746 	return err;
1747 }
1748 
1749 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1750 {
1751 	struct inode *inode;
1752 	struct btrfs_trans_handle *trans;
1753 	struct btrfs_root *root = BTRFS_I(dir)->root;
1754 	int err = 0;
1755 	int drop_on_err = 0;
1756 	u64 objectid;
1757 	unsigned long nr = 1;
1758 
1759 	mutex_lock(&root->fs_info->fs_mutex);
1760 	err = btrfs_check_free_space(root, 1, 0);
1761 	if (err)
1762 		goto out_unlock;
1763 
1764 	trans = btrfs_start_transaction(root, 1);
1765 	btrfs_set_trans_block_group(trans, dir);
1766 
1767 	if (IS_ERR(trans)) {
1768 		err = PTR_ERR(trans);
1769 		goto out_unlock;
1770 	}
1771 
1772 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1773 	if (err) {
1774 		err = -ENOSPC;
1775 		goto out_unlock;
1776 	}
1777 
1778 	inode = btrfs_new_inode(trans, root, objectid,
1779 				BTRFS_I(dir)->block_group, S_IFDIR | mode);
1780 	if (IS_ERR(inode)) {
1781 		err = PTR_ERR(inode);
1782 		goto out_fail;
1783 	}
1784 
1785 	drop_on_err = 1;
1786 	inode->i_op = &btrfs_dir_inode_operations;
1787 	inode->i_fop = &btrfs_dir_file_operations;
1788 	btrfs_set_trans_block_group(trans, inode);
1789 
1790 	inode->i_size = 0;
1791 	err = btrfs_update_inode(trans, root, inode);
1792 	if (err)
1793 		goto out_fail;
1794 
1795 	err = btrfs_add_link(trans, dentry, inode);
1796 	if (err)
1797 		goto out_fail;
1798 
1799 	d_instantiate(dentry, inode);
1800 	drop_on_err = 0;
1801 	dir->i_sb->s_dirt = 1;
1802 	btrfs_update_inode_block_group(trans, inode);
1803 	btrfs_update_inode_block_group(trans, dir);
1804 
1805 out_fail:
1806 	nr = trans->blocks_used;
1807 	btrfs_end_transaction(trans, root);
1808 
1809 out_unlock:
1810 	mutex_unlock(&root->fs_info->fs_mutex);
1811 	if (drop_on_err)
1812 		iput(inode);
1813 	btrfs_btree_balance_dirty(root, nr);
1814 	btrfs_throttle(root);
1815 	return err;
1816 }
1817 
1818 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1819 				    size_t page_offset, u64 start, u64 end,
1820 				    int create)
1821 {
1822 	int ret;
1823 	int err = 0;
1824 	u64 bytenr;
1825 	u64 extent_start = 0;
1826 	u64 extent_end = 0;
1827 	u64 objectid = inode->i_ino;
1828 	u32 found_type;
1829 	int failed_insert = 0;
1830 	struct btrfs_path *path;
1831 	struct btrfs_root *root = BTRFS_I(inode)->root;
1832 	struct btrfs_file_extent_item *item;
1833 	struct extent_buffer *leaf;
1834 	struct btrfs_key found_key;
1835 	struct extent_map *em = NULL;
1836 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1837 	struct btrfs_trans_handle *trans = NULL;
1838 
1839 	path = btrfs_alloc_path();
1840 	BUG_ON(!path);
1841 	mutex_lock(&root->fs_info->fs_mutex);
1842 
1843 again:
1844 	em = lookup_extent_mapping(em_tree, start, end);
1845 	if (em) {
1846 		if (em->start > start) {
1847 			printk("get_extent start %Lu em start %Lu\n",
1848 			       start, em->start);
1849 			WARN_ON(1);
1850 		}
1851 		goto out;
1852 	}
1853 	if (!em) {
1854 		em = alloc_extent_map(GFP_NOFS);
1855 		if (!em) {
1856 			err = -ENOMEM;
1857 			goto out;
1858 		}
1859 		em->start = EXTENT_MAP_HOLE;
1860 		em->end = EXTENT_MAP_HOLE;
1861 	}
1862 	em->bdev = inode->i_sb->s_bdev;
1863 	ret = btrfs_lookup_file_extent(trans, root, path,
1864 				       objectid, start, trans != NULL);
1865 	if (ret < 0) {
1866 		err = ret;
1867 		goto out;
1868 	}
1869 
1870 	if (ret != 0) {
1871 		if (path->slots[0] == 0)
1872 			goto not_found;
1873 		path->slots[0]--;
1874 	}
1875 
1876 	leaf = path->nodes[0];
1877 	item = btrfs_item_ptr(leaf, path->slots[0],
1878 			      struct btrfs_file_extent_item);
1879 	/* are we inside the extent that was found? */
1880 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1881 	found_type = btrfs_key_type(&found_key);
1882 	if (found_key.objectid != objectid ||
1883 	    found_type != BTRFS_EXTENT_DATA_KEY) {
1884 		goto not_found;
1885 	}
1886 
1887 	found_type = btrfs_file_extent_type(leaf, item);
1888 	extent_start = found_key.offset;
1889 	if (found_type == BTRFS_FILE_EXTENT_REG) {
1890 		extent_end = extent_start +
1891 		       btrfs_file_extent_num_bytes(leaf, item);
1892 		err = 0;
1893 		if (start < extent_start || start >= extent_end) {
1894 			em->start = start;
1895 			if (start < extent_start) {
1896 				if (end < extent_start)
1897 					goto not_found;
1898 				em->end = extent_end - 1;
1899 			} else {
1900 				em->end = end;
1901 			}
1902 			goto not_found_em;
1903 		}
1904 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1905 		if (bytenr == 0) {
1906 			em->start = extent_start;
1907 			em->end = extent_end - 1;
1908 			em->block_start = EXTENT_MAP_HOLE;
1909 			em->block_end = EXTENT_MAP_HOLE;
1910 			goto insert;
1911 		}
1912 		bytenr += btrfs_file_extent_offset(leaf, item);
1913 		em->block_start = bytenr;
1914 		em->block_end = em->block_start +
1915 			btrfs_file_extent_num_bytes(leaf, item) - 1;
1916 		em->start = extent_start;
1917 		em->end = extent_end - 1;
1918 		goto insert;
1919 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1920 		unsigned long ptr;
1921 		char *map;
1922 		size_t size;
1923 		size_t extent_offset;
1924 		size_t copy_size;
1925 
1926 		size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
1927 						    path->slots[0]));
1928 		extent_end = (extent_start + size - 1) |
1929 			((u64)root->sectorsize - 1);
1930 		if (start < extent_start || start >= extent_end) {
1931 			em->start = start;
1932 			if (start < extent_start) {
1933 				if (end < extent_start)
1934 					goto not_found;
1935 				em->end = extent_end;
1936 			} else {
1937 				em->end = end;
1938 			}
1939 			goto not_found_em;
1940 		}
1941 		em->block_start = EXTENT_MAP_INLINE;
1942 		em->block_end = EXTENT_MAP_INLINE;
1943 
1944 		if (!page) {
1945 			em->start = extent_start;
1946 			em->end = extent_start + size - 1;
1947 			goto out;
1948 		}
1949 
1950 		extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) -
1951 			extent_start + page_offset;
1952 		copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
1953 				size - extent_offset);
1954 		em->start = extent_start + extent_offset;
1955 		em->end = (em->start + copy_size -1) |
1956 			((u64)root->sectorsize -1);
1957 		map = kmap(page);
1958 		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
1959 		if (create == 0 && !PageUptodate(page)) {
1960 			read_extent_buffer(leaf, map + page_offset, ptr,
1961 					   copy_size);
1962 			flush_dcache_page(page);
1963 		} else if (create && PageUptodate(page)) {
1964 			if (!trans) {
1965 				kunmap(page);
1966 				free_extent_map(em);
1967 				em = NULL;
1968 				btrfs_release_path(root, path);
1969 				trans = btrfs_start_transaction(root, 1);
1970 				goto again;
1971 			}
1972 			write_extent_buffer(leaf, map + page_offset, ptr,
1973 					    copy_size);
1974 			btrfs_mark_buffer_dirty(leaf);
1975 		}
1976 		kunmap(page);
1977 		set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
1978 		goto insert;
1979 	} else {
1980 		printk("unkknown found_type %d\n", found_type);
1981 		WARN_ON(1);
1982 	}
1983 not_found:
1984 	em->start = start;
1985 	em->end = end;
1986 not_found_em:
1987 	em->block_start = EXTENT_MAP_HOLE;
1988 	em->block_end = EXTENT_MAP_HOLE;
1989 insert:
1990 	btrfs_release_path(root, path);
1991 	if (em->start > start || em->end < start) {
1992 		printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
1993 		err = -EIO;
1994 		goto out;
1995 	}
1996 	ret = add_extent_mapping(em_tree, em);
1997 	if (ret == -EEXIST) {
1998 		free_extent_map(em);
1999 		em = NULL;
2000 		if (0 && failed_insert == 1) {
2001 			btrfs_drop_extent_cache(inode, start, end);
2002 		}
2003 		failed_insert++;
2004 		if (failed_insert > 5) {
2005 			printk("failing to insert %Lu %Lu\n", start, end);
2006 			err = -EIO;
2007 			goto out;
2008 		}
2009 		goto again;
2010 	}
2011 	err = 0;
2012 out:
2013 	btrfs_free_path(path);
2014 	if (trans) {
2015 		ret = btrfs_end_transaction(trans, root);
2016 		if (!err)
2017 			err = ret;
2018 	}
2019 	mutex_unlock(&root->fs_info->fs_mutex);
2020 	if (err) {
2021 		free_extent_map(em);
2022 		WARN_ON(1);
2023 		return ERR_PTR(err);
2024 	}
2025 	return em;
2026 }
2027 
2028 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2029 {
2030 	return extent_bmap(mapping, iblock, btrfs_get_extent);
2031 }
2032 
2033 int btrfs_readpage(struct file *file, struct page *page)
2034 {
2035 	struct extent_map_tree *tree;
2036 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
2037 	return extent_read_full_page(tree, page, btrfs_get_extent);
2038 }
2039 
2040 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2041 {
2042 	struct extent_map_tree *tree;
2043 
2044 
2045 	if (current->flags & PF_MEMALLOC) {
2046 		redirty_page_for_writepage(wbc, page);
2047 		unlock_page(page);
2048 		return 0;
2049 	}
2050 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
2051 	return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2052 }
2053 
2054 static int btrfs_writepages(struct address_space *mapping,
2055 			    struct writeback_control *wbc)
2056 {
2057 	struct extent_map_tree *tree;
2058 	tree = &BTRFS_I(mapping->host)->extent_tree;
2059 	return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2060 }
2061 
2062 static int
2063 btrfs_readpages(struct file *file, struct address_space *mapping,
2064 		struct list_head *pages, unsigned nr_pages)
2065 {
2066 	struct extent_map_tree *tree;
2067 	tree = &BTRFS_I(mapping->host)->extent_tree;
2068 	return extent_readpages(tree, mapping, pages, nr_pages,
2069 				btrfs_get_extent);
2070 }
2071 
2072 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
2073 {
2074 	struct extent_map_tree *tree;
2075 	int ret;
2076 
2077 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
2078 	ret = try_release_extent_mapping(tree, page);
2079 	if (ret == 1) {
2080 		ClearPagePrivate(page);
2081 		set_page_private(page, 0);
2082 		page_cache_release(page);
2083 	}
2084 	return ret;
2085 }
2086 
2087 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2088 {
2089 	struct extent_map_tree *tree;
2090 
2091 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
2092 	extent_invalidatepage(tree, page, offset);
2093 	btrfs_releasepage(page, GFP_NOFS);
2094 }
2095 
2096 /*
2097  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2098  * called from a page fault handler when a page is first dirtied. Hence we must
2099  * be careful to check for EOF conditions here. We set the page up correctly
2100  * for a written page which means we get ENOSPC checking when writing into
2101  * holes and correct delalloc and unwritten extent mapping on filesystems that
2102  * support these features.
2103  *
2104  * We are not allowed to take the i_mutex here so we have to play games to
2105  * protect against truncate races as the page could now be beyond EOF.  Because
2106  * vmtruncate() writes the inode size before removing pages, once we have the
2107  * page lock we can determine safely if the page is beyond EOF. If it is not
2108  * beyond EOF, then the page is guaranteed safe against truncation until we
2109  * unlock the page.
2110  */
2111 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2112 {
2113 	struct inode *inode = fdentry(vma->vm_file)->d_inode;
2114 	struct btrfs_root *root = BTRFS_I(inode)->root;
2115 	unsigned long end;
2116 	loff_t size;
2117 	int ret;
2118 	u64 page_start;
2119 
2120 	mutex_lock(&root->fs_info->fs_mutex);
2121 	ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2122 	mutex_unlock(&root->fs_info->fs_mutex);
2123 	if (ret)
2124 		goto out;
2125 
2126 	ret = -EINVAL;
2127 
2128 	lock_page(page);
2129 	wait_on_page_writeback(page);
2130 	size = i_size_read(inode);
2131 	page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2132 
2133 	if ((page->mapping != inode->i_mapping) ||
2134 	    (page_start > size)) {
2135 		/* page got truncated out from underneath us */
2136 		goto out_unlock;
2137 	}
2138 
2139 	/* page is wholly or partially inside EOF */
2140 	if (page_start + PAGE_CACHE_SIZE > size)
2141 		end = size & ~PAGE_CACHE_MASK;
2142 	else
2143 		end = PAGE_CACHE_SIZE;
2144 
2145 	ret = btrfs_cow_one_page(inode, page, end);
2146 
2147 out_unlock:
2148 	unlock_page(page);
2149 out:
2150 	return ret;
2151 }
2152 
2153 static void btrfs_truncate(struct inode *inode)
2154 {
2155 	struct btrfs_root *root = BTRFS_I(inode)->root;
2156 	int ret;
2157 	struct btrfs_trans_handle *trans;
2158 	unsigned long nr;
2159 
2160 	if (!S_ISREG(inode->i_mode))
2161 		return;
2162 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2163 		return;
2164 
2165 	btrfs_truncate_page(inode->i_mapping, inode->i_size);
2166 
2167 	mutex_lock(&root->fs_info->fs_mutex);
2168 	trans = btrfs_start_transaction(root, 1);
2169 	btrfs_set_trans_block_group(trans, inode);
2170 
2171 	/* FIXME, add redo link to tree so we don't leak on crash */
2172 	ret = btrfs_truncate_in_trans(trans, root, inode);
2173 	btrfs_update_inode(trans, root, inode);
2174 	nr = trans->blocks_used;
2175 
2176 	ret = btrfs_end_transaction(trans, root);
2177 	BUG_ON(ret);
2178 	mutex_unlock(&root->fs_info->fs_mutex);
2179 	btrfs_btree_balance_dirty(root, nr);
2180 	btrfs_throttle(root);
2181 }
2182 
2183 static int noinline create_subvol(struct btrfs_root *root, char *name,
2184 				  int namelen)
2185 {
2186 	struct btrfs_trans_handle *trans;
2187 	struct btrfs_key key;
2188 	struct btrfs_root_item root_item;
2189 	struct btrfs_inode_item *inode_item;
2190 	struct extent_buffer *leaf;
2191 	struct btrfs_root *new_root = root;
2192 	struct inode *inode;
2193 	struct inode *dir;
2194 	int ret;
2195 	int err;
2196 	u64 objectid;
2197 	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2198 	unsigned long nr = 1;
2199 
2200 	mutex_lock(&root->fs_info->fs_mutex);
2201 	ret = btrfs_check_free_space(root, 1, 0);
2202 	if (ret)
2203 		goto fail_commit;
2204 
2205 	trans = btrfs_start_transaction(root, 1);
2206 	BUG_ON(!trans);
2207 
2208 	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2209 				       0, &objectid);
2210 	if (ret)
2211 		goto fail;
2212 
2213 	leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2214 					objectid, trans->transid, 0, 0,
2215 					0, 0);
2216 	if (IS_ERR(leaf))
2217 		return PTR_ERR(leaf);
2218 
2219 	btrfs_set_header_nritems(leaf, 0);
2220 	btrfs_set_header_level(leaf, 0);
2221 	btrfs_set_header_bytenr(leaf, leaf->start);
2222 	btrfs_set_header_generation(leaf, trans->transid);
2223 	btrfs_set_header_owner(leaf, objectid);
2224 
2225 	write_extent_buffer(leaf, root->fs_info->fsid,
2226 			    (unsigned long)btrfs_header_fsid(leaf),
2227 			    BTRFS_FSID_SIZE);
2228 	btrfs_mark_buffer_dirty(leaf);
2229 
2230 	inode_item = &root_item.inode;
2231 	memset(inode_item, 0, sizeof(*inode_item));
2232 	inode_item->generation = cpu_to_le64(1);
2233 	inode_item->size = cpu_to_le64(3);
2234 	inode_item->nlink = cpu_to_le32(1);
2235 	inode_item->nblocks = cpu_to_le64(1);
2236 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2237 
2238 	btrfs_set_root_bytenr(&root_item, leaf->start);
2239 	btrfs_set_root_level(&root_item, 0);
2240 	btrfs_set_root_refs(&root_item, 1);
2241 	btrfs_set_root_used(&root_item, 0);
2242 
2243 	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2244 	root_item.drop_level = 0;
2245 
2246 	free_extent_buffer(leaf);
2247 	leaf = NULL;
2248 
2249 	btrfs_set_root_dirid(&root_item, new_dirid);
2250 
2251 	key.objectid = objectid;
2252 	key.offset = 1;
2253 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2254 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2255 				&root_item);
2256 	if (ret)
2257 		goto fail;
2258 
2259 	/*
2260 	 * insert the directory item
2261 	 */
2262 	key.offset = (u64)-1;
2263 	dir = root->fs_info->sb->s_root->d_inode;
2264 	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2265 				    name, namelen, dir->i_ino, &key,
2266 				    BTRFS_FT_DIR);
2267 	if (ret)
2268 		goto fail;
2269 
2270 	ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2271 			     name, namelen, objectid,
2272 			     root->fs_info->sb->s_root->d_inode->i_ino);
2273 	if (ret)
2274 		goto fail;
2275 
2276 	ret = btrfs_commit_transaction(trans, root);
2277 	if (ret)
2278 		goto fail_commit;
2279 
2280 	new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2281 	BUG_ON(!new_root);
2282 
2283 	trans = btrfs_start_transaction(new_root, 1);
2284 	BUG_ON(!trans);
2285 
2286 	inode = btrfs_new_inode(trans, new_root, new_dirid,
2287 				BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2288 	if (IS_ERR(inode))
2289 		goto fail;
2290 	inode->i_op = &btrfs_dir_inode_operations;
2291 	inode->i_fop = &btrfs_dir_file_operations;
2292 	new_root->inode = inode;
2293 
2294 	ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2295 				     new_dirid);
2296 	inode->i_nlink = 1;
2297 	inode->i_size = 0;
2298 	ret = btrfs_update_inode(trans, new_root, inode);
2299 	if (ret)
2300 		goto fail;
2301 fail:
2302 	nr = trans->blocks_used;
2303 	err = btrfs_commit_transaction(trans, new_root);
2304 	if (err && !ret)
2305 		ret = err;
2306 fail_commit:
2307 	mutex_unlock(&root->fs_info->fs_mutex);
2308 	btrfs_btree_balance_dirty(root, nr);
2309 	btrfs_throttle(root);
2310 	return ret;
2311 }
2312 
2313 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2314 {
2315 	struct btrfs_pending_snapshot *pending_snapshot;
2316 	struct btrfs_trans_handle *trans;
2317 	int ret;
2318 	int err;
2319 	unsigned long nr = 0;
2320 
2321 	if (!root->ref_cows)
2322 		return -EINVAL;
2323 
2324 	mutex_lock(&root->fs_info->fs_mutex);
2325 	ret = btrfs_check_free_space(root, 1, 0);
2326 	if (ret)
2327 		goto fail_unlock;
2328 
2329 	pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2330 	if (!pending_snapshot) {
2331 		ret = -ENOMEM;
2332 		goto fail_unlock;
2333 	}
2334 	pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2335 	if (!pending_snapshot->name) {
2336 		ret = -ENOMEM;
2337 		kfree(pending_snapshot);
2338 		goto fail_unlock;
2339 	}
2340 	memcpy(pending_snapshot->name, name, namelen);
2341 	pending_snapshot->name[namelen] = '\0';
2342 	trans = btrfs_start_transaction(root, 1);
2343 	BUG_ON(!trans);
2344 	pending_snapshot->root = root;
2345 	list_add(&pending_snapshot->list,
2346 		 &trans->transaction->pending_snapshots);
2347 	ret = btrfs_update_inode(trans, root, root->inode);
2348 	err = btrfs_commit_transaction(trans, root);
2349 
2350 fail_unlock:
2351 	mutex_unlock(&root->fs_info->fs_mutex);
2352 	btrfs_btree_balance_dirty(root, nr);
2353 	btrfs_throttle(root);
2354 	return ret;
2355 }
2356 
2357 unsigned long btrfs_force_ra(struct address_space *mapping,
2358 			      struct file_ra_state *ra, struct file *file,
2359 			      pgoff_t offset, pgoff_t last_index)
2360 {
2361 	pgoff_t req_size;
2362 
2363 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2364 	req_size = last_index - offset + 1;
2365 	offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2366 	return offset;
2367 #else
2368 	req_size = min(last_index - offset + 1, (pgoff_t)128);
2369 	page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2370 	return offset + req_size;
2371 #endif
2372 }
2373 
2374 int btrfs_defrag_file(struct file *file) {
2375 	struct inode *inode = fdentry(file)->d_inode;
2376 	struct btrfs_root *root = BTRFS_I(inode)->root;
2377 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2378 	struct page *page;
2379 	unsigned long last_index;
2380 	unsigned long ra_index = 0;
2381 	u64 page_start;
2382 	u64 page_end;
2383 	u64 delalloc_start;
2384 	u64 existing_delalloc;
2385 	unsigned long i;
2386 	int ret;
2387 
2388 	mutex_lock(&root->fs_info->fs_mutex);
2389 	ret = btrfs_check_free_space(root, inode->i_size, 0);
2390 	mutex_unlock(&root->fs_info->fs_mutex);
2391 	if (ret)
2392 		return -ENOSPC;
2393 
2394 	mutex_lock(&inode->i_mutex);
2395 	last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2396 	for (i = 0; i <= last_index; i++) {
2397 		if (i == ra_index) {
2398 			ra_index = btrfs_force_ra(inode->i_mapping,
2399 						  &file->f_ra,
2400 						  file, ra_index, last_index);
2401 		}
2402 		page = grab_cache_page(inode->i_mapping, i);
2403 		if (!page)
2404 			goto out_unlock;
2405 		if (!PageUptodate(page)) {
2406 			btrfs_readpage(NULL, page);
2407 			lock_page(page);
2408 			if (!PageUptodate(page)) {
2409 				unlock_page(page);
2410 				page_cache_release(page);
2411 				goto out_unlock;
2412 			}
2413 		}
2414 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2415 		page_end = page_start + PAGE_CACHE_SIZE - 1;
2416 
2417 		lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2418 		delalloc_start = page_start;
2419 		existing_delalloc =
2420 			count_range_bits(&BTRFS_I(inode)->extent_tree,
2421 					 &delalloc_start, page_end,
2422 					 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2423 		set_extent_delalloc(em_tree, page_start,
2424 				    page_end, GFP_NOFS);
2425 
2426 		spin_lock(&root->fs_info->delalloc_lock);
2427 		root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2428 						 existing_delalloc;
2429 		spin_unlock(&root->fs_info->delalloc_lock);
2430 
2431 		unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2432 		set_page_dirty(page);
2433 		unlock_page(page);
2434 		page_cache_release(page);
2435 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2436 	}
2437 
2438 out_unlock:
2439 	mutex_unlock(&inode->i_mutex);
2440 	return 0;
2441 }
2442 
2443 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2444 {
2445 	u64 new_size;
2446 	u64 old_size;
2447 	struct btrfs_ioctl_vol_args *vol_args;
2448 	struct btrfs_trans_handle *trans;
2449 	char *sizestr;
2450 	int ret = 0;
2451 	int namelen;
2452 	int mod = 0;
2453 
2454 	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2455 
2456 	if (!vol_args)
2457 		return -ENOMEM;
2458 
2459 	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2460 		ret = -EFAULT;
2461 		goto out;
2462 	}
2463 	namelen = strlen(vol_args->name);
2464 	if (namelen > BTRFS_VOL_NAME_MAX) {
2465 		ret = -EINVAL;
2466 		goto out;
2467 	}
2468 
2469 	sizestr = vol_args->name;
2470 	if (!strcmp(sizestr, "max"))
2471 		new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2472 	else {
2473 		if (sizestr[0] == '-') {
2474 			mod = -1;
2475 			sizestr++;
2476 		} else if (sizestr[0] == '+') {
2477 			mod = 1;
2478 			sizestr++;
2479 		}
2480 		new_size = btrfs_parse_size(sizestr);
2481 		if (new_size == 0) {
2482 			ret = -EINVAL;
2483 			goto out;
2484 		}
2485 	}
2486 
2487 	mutex_lock(&root->fs_info->fs_mutex);
2488 	old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2489 
2490 	if (mod < 0) {
2491 		if (new_size > old_size) {
2492 			ret = -EINVAL;
2493 			goto out_unlock;
2494 		}
2495 		new_size = old_size - new_size;
2496 	} else if (mod > 0) {
2497 		new_size = old_size + new_size;
2498 	}
2499 
2500 	if (new_size < 256 * 1024 * 1024) {
2501 		ret = -EINVAL;
2502 		goto out_unlock;
2503 	}
2504 	if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2505 		ret = -EFBIG;
2506 		goto out_unlock;
2507 	}
2508 
2509 	do_div(new_size, root->sectorsize);
2510 	new_size *= root->sectorsize;
2511 
2512 printk("new size is %Lu\n", new_size);
2513 	if (new_size > old_size) {
2514 		trans = btrfs_start_transaction(root, 1);
2515 		ret = btrfs_grow_extent_tree(trans, root, new_size);
2516 		btrfs_commit_transaction(trans, root);
2517 	} else {
2518 		ret = btrfs_shrink_extent_tree(root, new_size);
2519 	}
2520 
2521 out_unlock:
2522 	mutex_unlock(&root->fs_info->fs_mutex);
2523 out:
2524 	kfree(vol_args);
2525 	return ret;
2526 }
2527 
2528 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2529 					    void __user *arg)
2530 {
2531 	struct btrfs_ioctl_vol_args *vol_args;
2532 	struct btrfs_dir_item *di;
2533 	struct btrfs_path *path;
2534 	u64 root_dirid;
2535 	int namelen;
2536 	int ret;
2537 
2538 	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2539 
2540 	if (!vol_args)
2541 		return -ENOMEM;
2542 
2543 	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2544 		ret = -EFAULT;
2545 		goto out;
2546 	}
2547 
2548 	namelen = strlen(vol_args->name);
2549 	if (namelen > BTRFS_VOL_NAME_MAX) {
2550 		ret = -EINVAL;
2551 		goto out;
2552 	}
2553 	if (strchr(vol_args->name, '/')) {
2554 		ret = -EINVAL;
2555 		goto out;
2556 	}
2557 
2558 	path = btrfs_alloc_path();
2559 	if (!path) {
2560 		ret = -ENOMEM;
2561 		goto out;
2562 	}
2563 
2564 	root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2565 	mutex_lock(&root->fs_info->fs_mutex);
2566 	di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2567 			    path, root_dirid,
2568 			    vol_args->name, namelen, 0);
2569 	mutex_unlock(&root->fs_info->fs_mutex);
2570 	btrfs_free_path(path);
2571 
2572 	if (di && !IS_ERR(di)) {
2573 		ret = -EEXIST;
2574 		goto out;
2575 	}
2576 
2577 	if (IS_ERR(di)) {
2578 		ret = PTR_ERR(di);
2579 		goto out;
2580 	}
2581 
2582 	if (root == root->fs_info->tree_root)
2583 		ret = create_subvol(root, vol_args->name, namelen);
2584 	else
2585 		ret = create_snapshot(root, vol_args->name, namelen);
2586 out:
2587 	kfree(vol_args);
2588 	return ret;
2589 }
2590 
2591 static int btrfs_ioctl_defrag(struct file *file)
2592 {
2593 	struct inode *inode = fdentry(file)->d_inode;
2594 	struct btrfs_root *root = BTRFS_I(inode)->root;
2595 
2596 	switch (inode->i_mode & S_IFMT) {
2597 	case S_IFDIR:
2598 		mutex_lock(&root->fs_info->fs_mutex);
2599 		btrfs_defrag_root(root, 0);
2600 		btrfs_defrag_root(root->fs_info->extent_root, 0);
2601 		mutex_unlock(&root->fs_info->fs_mutex);
2602 		break;
2603 	case S_IFREG:
2604 		btrfs_defrag_file(file);
2605 		break;
2606 	}
2607 
2608 	return 0;
2609 }
2610 
2611 long btrfs_ioctl(struct file *file, unsigned int
2612 		cmd, unsigned long arg)
2613 {
2614 	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2615 
2616 	switch (cmd) {
2617 	case BTRFS_IOC_SNAP_CREATE:
2618 		return btrfs_ioctl_snap_create(root, (void __user *)arg);
2619 	case BTRFS_IOC_DEFRAG:
2620 		return btrfs_ioctl_defrag(file);
2621 	case BTRFS_IOC_RESIZE:
2622 		return btrfs_ioctl_resize(root, (void __user *)arg);
2623 	}
2624 
2625 	return -ENOTTY;
2626 }
2627 
2628 /*
2629  * Called inside transaction, so use GFP_NOFS
2630  */
2631 struct inode *btrfs_alloc_inode(struct super_block *sb)
2632 {
2633 	struct btrfs_inode *ei;
2634 
2635 	ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2636 	if (!ei)
2637 		return NULL;
2638 	ei->last_trans = 0;
2639 	ei->ordered_trans = 0;
2640 	return &ei->vfs_inode;
2641 }
2642 
2643 void btrfs_destroy_inode(struct inode *inode)
2644 {
2645 	WARN_ON(!list_empty(&inode->i_dentry));
2646 	WARN_ON(inode->i_data.nrpages);
2647 
2648 	btrfs_drop_extent_cache(inode, 0, (u64)-1);
2649 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2650 }
2651 
2652 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2653 static void init_once(struct kmem_cache * cachep, void *foo)
2654 #else
2655 static void init_once(void * foo, struct kmem_cache * cachep,
2656 		      unsigned long flags)
2657 #endif
2658 {
2659 	struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2660 
2661 	inode_init_once(&ei->vfs_inode);
2662 }
2663 
2664 void btrfs_destroy_cachep(void)
2665 {
2666 	if (btrfs_inode_cachep)
2667 		kmem_cache_destroy(btrfs_inode_cachep);
2668 	if (btrfs_trans_handle_cachep)
2669 		kmem_cache_destroy(btrfs_trans_handle_cachep);
2670 	if (btrfs_transaction_cachep)
2671 		kmem_cache_destroy(btrfs_transaction_cachep);
2672 	if (btrfs_bit_radix_cachep)
2673 		kmem_cache_destroy(btrfs_bit_radix_cachep);
2674 	if (btrfs_path_cachep)
2675 		kmem_cache_destroy(btrfs_path_cachep);
2676 }
2677 
2678 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2679 				       unsigned long extra_flags,
2680 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2681 				       void (*ctor)(struct kmem_cache *, void *)
2682 #else
2683 				       void (*ctor)(void *, struct kmem_cache *,
2684 						    unsigned long)
2685 #endif
2686 				     )
2687 {
2688 	return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2689 				 SLAB_MEM_SPREAD | extra_flags), ctor
2690 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2691 				 ,NULL
2692 #endif
2693 				);
2694 }
2695 
2696 int btrfs_init_cachep(void)
2697 {
2698 	btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2699 					  sizeof(struct btrfs_inode),
2700 					  0, init_once);
2701 	if (!btrfs_inode_cachep)
2702 		goto fail;
2703 	btrfs_trans_handle_cachep =
2704 			btrfs_cache_create("btrfs_trans_handle_cache",
2705 					   sizeof(struct btrfs_trans_handle),
2706 					   0, NULL);
2707 	if (!btrfs_trans_handle_cachep)
2708 		goto fail;
2709 	btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2710 					     sizeof(struct btrfs_transaction),
2711 					     0, NULL);
2712 	if (!btrfs_transaction_cachep)
2713 		goto fail;
2714 	btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2715 					 sizeof(struct btrfs_path),
2716 					 0, NULL);
2717 	if (!btrfs_path_cachep)
2718 		goto fail;
2719 	btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2720 					      SLAB_DESTROY_BY_RCU, NULL);
2721 	if (!btrfs_bit_radix_cachep)
2722 		goto fail;
2723 	return 0;
2724 fail:
2725 	btrfs_destroy_cachep();
2726 	return -ENOMEM;
2727 }
2728 
2729 static int btrfs_getattr(struct vfsmount *mnt,
2730 			 struct dentry *dentry, struct kstat *stat)
2731 {
2732 	struct inode *inode = dentry->d_inode;
2733 	generic_fillattr(inode, stat);
2734 	stat->blksize = PAGE_CACHE_SIZE;
2735 	return 0;
2736 }
2737 
2738 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2739 			   struct inode * new_dir,struct dentry *new_dentry)
2740 {
2741 	struct btrfs_trans_handle *trans;
2742 	struct btrfs_root *root = BTRFS_I(old_dir)->root;
2743 	struct inode *new_inode = new_dentry->d_inode;
2744 	struct inode *old_inode = old_dentry->d_inode;
2745 	struct timespec ctime = CURRENT_TIME;
2746 	struct btrfs_path *path;
2747 	int ret;
2748 
2749 	if (S_ISDIR(old_inode->i_mode) && new_inode &&
2750 	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2751 		return -ENOTEMPTY;
2752 	}
2753 
2754 	mutex_lock(&root->fs_info->fs_mutex);
2755 	ret = btrfs_check_free_space(root, 1, 0);
2756 	if (ret)
2757 		goto out_unlock;
2758 
2759 	trans = btrfs_start_transaction(root, 1);
2760 
2761 	btrfs_set_trans_block_group(trans, new_dir);
2762 	path = btrfs_alloc_path();
2763 	if (!path) {
2764 		ret = -ENOMEM;
2765 		goto out_fail;
2766 	}
2767 
2768 	old_dentry->d_inode->i_nlink++;
2769 	old_dir->i_ctime = old_dir->i_mtime = ctime;
2770 	new_dir->i_ctime = new_dir->i_mtime = ctime;
2771 	old_inode->i_ctime = ctime;
2772 
2773 	ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2774 	if (ret)
2775 		goto out_fail;
2776 
2777 	if (new_inode) {
2778 		new_inode->i_ctime = CURRENT_TIME;
2779 		ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2780 		if (ret)
2781 			goto out_fail;
2782 	}
2783 	ret = btrfs_add_link(trans, new_dentry, old_inode);
2784 	if (ret)
2785 		goto out_fail;
2786 
2787 out_fail:
2788 	btrfs_free_path(path);
2789 	btrfs_end_transaction(trans, root);
2790 out_unlock:
2791 	mutex_unlock(&root->fs_info->fs_mutex);
2792 	return ret;
2793 }
2794 
2795 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2796 			 const char *symname)
2797 {
2798 	struct btrfs_trans_handle *trans;
2799 	struct btrfs_root *root = BTRFS_I(dir)->root;
2800 	struct btrfs_path *path;
2801 	struct btrfs_key key;
2802 	struct inode *inode = NULL;
2803 	int err;
2804 	int drop_inode = 0;
2805 	u64 objectid;
2806 	int name_len;
2807 	int datasize;
2808 	unsigned long ptr;
2809 	struct btrfs_file_extent_item *ei;
2810 	struct extent_buffer *leaf;
2811 	unsigned long nr = 0;
2812 
2813 	name_len = strlen(symname) + 1;
2814 	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2815 		return -ENAMETOOLONG;
2816 
2817 	mutex_lock(&root->fs_info->fs_mutex);
2818 	err = btrfs_check_free_space(root, 1, 0);
2819 	if (err)
2820 		goto out_fail;
2821 
2822 	trans = btrfs_start_transaction(root, 1);
2823 	btrfs_set_trans_block_group(trans, dir);
2824 
2825 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2826 	if (err) {
2827 		err = -ENOSPC;
2828 		goto out_unlock;
2829 	}
2830 
2831 	inode = btrfs_new_inode(trans, root, objectid,
2832 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2833 	err = PTR_ERR(inode);
2834 	if (IS_ERR(inode))
2835 		goto out_unlock;
2836 
2837 	btrfs_set_trans_block_group(trans, inode);
2838 	err = btrfs_add_nondir(trans, dentry, inode);
2839 	if (err)
2840 		drop_inode = 1;
2841 	else {
2842 		inode->i_mapping->a_ops = &btrfs_aops;
2843 		inode->i_fop = &btrfs_file_operations;
2844 		inode->i_op = &btrfs_file_inode_operations;
2845 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2846 				     inode->i_mapping, GFP_NOFS);
2847 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
2848 	}
2849 	dir->i_sb->s_dirt = 1;
2850 	btrfs_update_inode_block_group(trans, inode);
2851 	btrfs_update_inode_block_group(trans, dir);
2852 	if (drop_inode)
2853 		goto out_unlock;
2854 
2855 	path = btrfs_alloc_path();
2856 	BUG_ON(!path);
2857 	key.objectid = inode->i_ino;
2858 	key.offset = 0;
2859 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2860 	datasize = btrfs_file_extent_calc_inline_size(name_len);
2861 	err = btrfs_insert_empty_item(trans, root, path, &key,
2862 				      datasize);
2863 	if (err) {
2864 		drop_inode = 1;
2865 		goto out_unlock;
2866 	}
2867 	leaf = path->nodes[0];
2868 	ei = btrfs_item_ptr(leaf, path->slots[0],
2869 			    struct btrfs_file_extent_item);
2870 	btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2871 	btrfs_set_file_extent_type(leaf, ei,
2872 				   BTRFS_FILE_EXTENT_INLINE);
2873 	ptr = btrfs_file_extent_inline_start(ei);
2874 	write_extent_buffer(leaf, symname, ptr, name_len);
2875 	btrfs_mark_buffer_dirty(leaf);
2876 	btrfs_free_path(path);
2877 
2878 	inode->i_op = &btrfs_symlink_inode_operations;
2879 	inode->i_mapping->a_ops = &btrfs_symlink_aops;
2880 	inode->i_size = name_len - 1;
2881 	err = btrfs_update_inode(trans, root, inode);
2882 	if (err)
2883 		drop_inode = 1;
2884 
2885 out_unlock:
2886 	nr = trans->blocks_used;
2887 	btrfs_end_transaction(trans, root);
2888 out_fail:
2889 	mutex_unlock(&root->fs_info->fs_mutex);
2890 	if (drop_inode) {
2891 		inode_dec_link_count(inode);
2892 		iput(inode);
2893 	}
2894 	btrfs_btree_balance_dirty(root, nr);
2895 	btrfs_throttle(root);
2896 	return err;
2897 }
2898 static int btrfs_permission(struct inode *inode, int mask,
2899 			    struct nameidata *nd)
2900 {
2901 	if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
2902 		return -EACCES;
2903 	return generic_permission(inode, mask, NULL);
2904 }
2905 
2906 static struct inode_operations btrfs_dir_inode_operations = {
2907 	.lookup		= btrfs_lookup,
2908 	.create		= btrfs_create,
2909 	.unlink		= btrfs_unlink,
2910 	.link		= btrfs_link,
2911 	.mkdir		= btrfs_mkdir,
2912 	.rmdir		= btrfs_rmdir,
2913 	.rename		= btrfs_rename,
2914 	.symlink	= btrfs_symlink,
2915 	.setattr	= btrfs_setattr,
2916 	.mknod		= btrfs_mknod,
2917 	.setxattr	= generic_setxattr,
2918 	.getxattr	= generic_getxattr,
2919 	.listxattr	= btrfs_listxattr,
2920 	.removexattr	= generic_removexattr,
2921 	.permission	= btrfs_permission,
2922 };
2923 static struct inode_operations btrfs_dir_ro_inode_operations = {
2924 	.lookup		= btrfs_lookup,
2925 	.permission	= btrfs_permission,
2926 };
2927 static struct file_operations btrfs_dir_file_operations = {
2928 	.llseek		= generic_file_llseek,
2929 	.read		= generic_read_dir,
2930 	.readdir	= btrfs_readdir,
2931 	.unlocked_ioctl	= btrfs_ioctl,
2932 #ifdef CONFIG_COMPAT
2933 	.compat_ioctl	= btrfs_ioctl,
2934 #endif
2935 };
2936 
2937 static struct extent_map_ops btrfs_extent_map_ops = {
2938 	.fill_delalloc = run_delalloc_range,
2939 	.writepage_io_hook = btrfs_writepage_io_hook,
2940 	.readpage_io_hook = btrfs_readpage_io_hook,
2941 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
2942 };
2943 
2944 static struct address_space_operations btrfs_aops = {
2945 	.readpage	= btrfs_readpage,
2946 	.writepage	= btrfs_writepage,
2947 	.writepages	= btrfs_writepages,
2948 	.readpages	= btrfs_readpages,
2949 	.sync_page	= block_sync_page,
2950 	.bmap		= btrfs_bmap,
2951 	.invalidatepage = btrfs_invalidatepage,
2952 	.releasepage	= btrfs_releasepage,
2953 	.set_page_dirty	= __set_page_dirty_nobuffers,
2954 };
2955 
2956 static struct address_space_operations btrfs_symlink_aops = {
2957 	.readpage	= btrfs_readpage,
2958 	.writepage	= btrfs_writepage,
2959 	.invalidatepage = btrfs_invalidatepage,
2960 	.releasepage	= btrfs_releasepage,
2961 };
2962 
2963 static struct inode_operations btrfs_file_inode_operations = {
2964 	.truncate	= btrfs_truncate,
2965 	.getattr	= btrfs_getattr,
2966 	.setattr	= btrfs_setattr,
2967 	.setxattr	= generic_setxattr,
2968 	.getxattr	= generic_getxattr,
2969 	.listxattr      = btrfs_listxattr,
2970 	.removexattr	= generic_removexattr,
2971 	.permission	= btrfs_permission,
2972 };
2973 static struct inode_operations btrfs_special_inode_operations = {
2974 	.getattr	= btrfs_getattr,
2975 	.setattr	= btrfs_setattr,
2976 	.permission	= btrfs_permission,
2977 };
2978 static struct inode_operations btrfs_symlink_inode_operations = {
2979 	.readlink	= generic_readlink,
2980 	.follow_link	= page_follow_link_light,
2981 	.put_link	= page_put_link,
2982 	.permission	= btrfs_permission,
2983 };
2984