xref: /openbmc/linux/fs/btrfs/inode.c (revision 879c1cfc3104b632dea85d07490f1b55e54275a9)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42 
43 struct btrfs_iget_args {
44 	u64 ino;
45 	struct btrfs_root *root;
46 };
47 
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_map_ops btrfs_extent_map_ops;
57 
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63 
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66 	[S_IFREG >> S_SHIFT]	= BTRFS_FT_REG_FILE,
67 	[S_IFDIR >> S_SHIFT]	= BTRFS_FT_DIR,
68 	[S_IFCHR >> S_SHIFT]	= BTRFS_FT_CHRDEV,
69 	[S_IFBLK >> S_SHIFT]	= BTRFS_FT_BLKDEV,
70 	[S_IFIFO >> S_SHIFT]	= BTRFS_FT_FIFO,
71 	[S_IFSOCK >> S_SHIFT]	= BTRFS_FT_SOCK,
72 	[S_IFLNK >> S_SHIFT]	= BTRFS_FT_SYMLINK,
73 };
74 
75 static int cow_file_range(struct inode *inode, u64 start, u64 end)
76 {
77 	struct btrfs_root *root = BTRFS_I(inode)->root;
78 	struct btrfs_trans_handle *trans;
79 	u64 alloc_hint = 0;
80 	u64 num_bytes;
81 	u64 cur_alloc_size;
82 	u64 blocksize = root->sectorsize;
83 	struct btrfs_key ins;
84 	int ret;
85 
86 	trans = btrfs_start_transaction(root, 1);
87 	BUG_ON(!trans);
88 	btrfs_set_trans_block_group(trans, inode);
89 
90 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
91 	num_bytes = max(blocksize,  num_bytes);
92 	ret = btrfs_drop_extents(trans, root, inode,
93 				 start, start + num_bytes, start, &alloc_hint);
94 
95 	if (alloc_hint == EXTENT_MAP_INLINE)
96 		goto out;
97 
98 	while(num_bytes > 0) {
99 		cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
100 		ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
101 					 root->root_key.objectid,
102 					 trans->transid,
103 					 inode->i_ino, start, 0,
104 					 alloc_hint, (u64)-1, &ins, 1);
105 		if (ret) {
106 			WARN_ON(1);
107 			goto out;
108 		}
109 		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
110 					       start, ins.objectid, ins.offset,
111 					       ins.offset);
112 		num_bytes -= cur_alloc_size;
113 		alloc_hint = ins.objectid + ins.offset;
114 		start += cur_alloc_size;
115 	}
116 out:
117 	btrfs_end_transaction(trans, root);
118 	return ret;
119 }
120 
121 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
122 {
123 	u64 extent_start;
124 	u64 extent_end;
125 	u64 bytenr;
126 	u64 cow_end;
127 	struct btrfs_root *root = BTRFS_I(inode)->root;
128 	struct extent_buffer *leaf;
129 	int found_type;
130 	struct btrfs_path *path;
131 	struct btrfs_file_extent_item *item;
132 	int ret;
133 	int err;
134 	struct btrfs_key found_key;
135 
136 	path = btrfs_alloc_path();
137 	BUG_ON(!path);
138 again:
139 	ret = btrfs_lookup_file_extent(NULL, root, path,
140 				       inode->i_ino, start, 0);
141 	if (ret < 0) {
142 		btrfs_free_path(path);
143 		return ret;
144 	}
145 
146 	cow_end = end;
147 	if (ret != 0) {
148 		if (path->slots[0] == 0)
149 			goto not_found;
150 		path->slots[0]--;
151 	}
152 
153 	leaf = path->nodes[0];
154 	item = btrfs_item_ptr(leaf, path->slots[0],
155 			      struct btrfs_file_extent_item);
156 
157 	/* are we inside the extent that was found? */
158 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
159 	found_type = btrfs_key_type(&found_key);
160 	if (found_key.objectid != inode->i_ino ||
161 	    found_type != BTRFS_EXTENT_DATA_KEY) {
162 		goto not_found;
163 	}
164 
165 	found_type = btrfs_file_extent_type(leaf, item);
166 	extent_start = found_key.offset;
167 	if (found_type == BTRFS_FILE_EXTENT_REG) {
168 		extent_end = extent_start +
169 		       btrfs_file_extent_num_bytes(leaf, item);
170 		err = 0;
171 
172 		if (start < extent_start || start >= extent_end)
173 			goto not_found;
174 
175 		cow_end = min(end, extent_end - 1);
176 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
177 		if (bytenr == 0)
178 			goto not_found;
179 
180 		if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
181 			goto not_found;
182 		}
183 
184 		start = extent_end;
185 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
186 		goto not_found;
187 	}
188 loop:
189 	if (start > end) {
190 		btrfs_free_path(path);
191 		return 0;
192 	}
193 	btrfs_release_path(root, path);
194 	goto again;
195 
196 not_found:
197 	cow_file_range(inode, start, cow_end);
198 	start = cow_end + 1;
199 	goto loop;
200 }
201 
202 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
203 {
204 	struct btrfs_root *root = BTRFS_I(inode)->root;
205 	int ret;
206 
207 	mutex_lock(&root->fs_info->fs_mutex);
208 	if (btrfs_test_opt(root, NODATACOW))
209 		ret = run_delalloc_nocow(inode, start, end);
210 	else
211 		ret = cow_file_range(inode, start, end);
212 	mutex_unlock(&root->fs_info->fs_mutex);
213 	return ret;
214 }
215 
216 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
217 {
218 	struct inode *inode = page->mapping->host;
219 	struct btrfs_root *root = BTRFS_I(inode)->root;
220 	struct btrfs_trans_handle *trans;
221 	char *kaddr;
222 	int ret = 0;
223 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
224 	size_t offset = start - page_start;
225 
226 	if (btrfs_test_opt(root, NODATASUM))
227 		return 0;
228 
229 	mutex_lock(&root->fs_info->fs_mutex);
230 	trans = btrfs_start_transaction(root, 1);
231 	btrfs_set_trans_block_group(trans, inode);
232 	kaddr = kmap(page);
233 	btrfs_csum_file_block(trans, root, inode, inode->i_ino,
234 			      start, kaddr + offset, end - start + 1);
235 	kunmap(page);
236 	ret = btrfs_end_transaction(trans, root);
237 	BUG_ON(ret);
238 	mutex_unlock(&root->fs_info->fs_mutex);
239 	return ret;
240 }
241 
242 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
243 {
244 	int ret = 0;
245 	struct inode *inode = page->mapping->host;
246 	struct btrfs_root *root = BTRFS_I(inode)->root;
247 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
248 	struct btrfs_csum_item *item;
249 	struct btrfs_path *path = NULL;
250 	u32 csum;
251 
252 	if (btrfs_test_opt(root, NODATASUM))
253 		return 0;
254 
255 	mutex_lock(&root->fs_info->fs_mutex);
256 	path = btrfs_alloc_path();
257 	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
258 	if (IS_ERR(item)) {
259 		ret = PTR_ERR(item);
260 		/* a csum that isn't present is a preallocated region. */
261 		if (ret == -ENOENT || ret == -EFBIG)
262 			ret = 0;
263 		csum = 0;
264 		goto out;
265 	}
266 	read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
267 			   BTRFS_CRC32_SIZE);
268 	set_state_private(em_tree, start, csum);
269 out:
270 	if (path)
271 		btrfs_free_path(path);
272 	mutex_unlock(&root->fs_info->fs_mutex);
273 	return ret;
274 }
275 
276 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
277 {
278 	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
279 	struct inode *inode = page->mapping->host;
280 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
281 	char *kaddr;
282 	u64 private;
283 	int ret;
284 	struct btrfs_root *root = BTRFS_I(inode)->root;
285 	u32 csum = ~(u32)0;
286 	unsigned long flags;
287 
288 	if (btrfs_test_opt(root, NODATASUM))
289 		return 0;
290 
291 	ret = get_state_private(em_tree, start, &private);
292 	local_irq_save(flags);
293 	kaddr = kmap_atomic(page, KM_IRQ0);
294 	if (ret) {
295 		goto zeroit;
296 	}
297 	csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
298 	btrfs_csum_final(csum, (char *)&csum);
299 	if (csum != private) {
300 		goto zeroit;
301 	}
302 	kunmap_atomic(kaddr, KM_IRQ0);
303 	local_irq_restore(flags);
304 	return 0;
305 
306 zeroit:
307 	printk("btrfs csum failed ino %lu off %llu\n",
308 	       page->mapping->host->i_ino, (unsigned long long)start);
309 	memset(kaddr + offset, 1, end - start + 1);
310 	flush_dcache_page(page);
311 	kunmap_atomic(kaddr, KM_IRQ0);
312 	local_irq_restore(flags);
313 	return 0;
314 }
315 
316 void btrfs_read_locked_inode(struct inode *inode)
317 {
318 	struct btrfs_path *path;
319 	struct extent_buffer *leaf;
320 	struct btrfs_inode_item *inode_item;
321 	struct btrfs_inode_timespec *tspec;
322 	struct btrfs_root *root = BTRFS_I(inode)->root;
323 	struct btrfs_key location;
324 	u64 alloc_group_block;
325 	u32 rdev;
326 	int ret;
327 
328 	path = btrfs_alloc_path();
329 	BUG_ON(!path);
330 	mutex_lock(&root->fs_info->fs_mutex);
331 
332 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
333 	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
334 	if (ret)
335 		goto make_bad;
336 
337 	leaf = path->nodes[0];
338 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
339 				    struct btrfs_inode_item);
340 
341 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
342 	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
343 	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
344 	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
345 	inode->i_size = btrfs_inode_size(leaf, inode_item);
346 
347 	tspec = btrfs_inode_atime(inode_item);
348 	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
349 	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
350 
351 	tspec = btrfs_inode_mtime(inode_item);
352 	inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
353 	inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
354 
355 	tspec = btrfs_inode_ctime(inode_item);
356 	inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
357 	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
358 
359 	inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
360 	inode->i_generation = btrfs_inode_generation(leaf, inode_item);
361 	inode->i_rdev = 0;
362 	rdev = btrfs_inode_rdev(leaf, inode_item);
363 
364 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
365 	BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
366 						       alloc_group_block);
367 
368 	btrfs_free_path(path);
369 	inode_item = NULL;
370 
371 	mutex_unlock(&root->fs_info->fs_mutex);
372 
373 	switch (inode->i_mode & S_IFMT) {
374 	case S_IFREG:
375 		inode->i_mapping->a_ops = &btrfs_aops;
376 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
377 		inode->i_fop = &btrfs_file_operations;
378 		inode->i_op = &btrfs_file_inode_operations;
379 		break;
380 	case S_IFDIR:
381 		inode->i_fop = &btrfs_dir_file_operations;
382 		if (root == root->fs_info->tree_root)
383 			inode->i_op = &btrfs_dir_ro_inode_operations;
384 		else
385 			inode->i_op = &btrfs_dir_inode_operations;
386 		break;
387 	case S_IFLNK:
388 		inode->i_op = &btrfs_symlink_inode_operations;
389 		inode->i_mapping->a_ops = &btrfs_symlink_aops;
390 		break;
391 	default:
392 		init_special_inode(inode, inode->i_mode, rdev);
393 		break;
394 	}
395 	return;
396 
397 make_bad:
398 	btrfs_release_path(root, path);
399 	btrfs_free_path(path);
400 	mutex_unlock(&root->fs_info->fs_mutex);
401 	make_bad_inode(inode);
402 }
403 
404 static void fill_inode_item(struct extent_buffer *leaf,
405 			    struct btrfs_inode_item *item,
406 			    struct inode *inode)
407 {
408 	btrfs_set_inode_uid(leaf, item, inode->i_uid);
409 	btrfs_set_inode_gid(leaf, item, inode->i_gid);
410 	btrfs_set_inode_size(leaf, item, inode->i_size);
411 	btrfs_set_inode_mode(leaf, item, inode->i_mode);
412 	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
413 
414 	btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
415 			       inode->i_atime.tv_sec);
416 	btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
417 				inode->i_atime.tv_nsec);
418 
419 	btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
420 			       inode->i_mtime.tv_sec);
421 	btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
422 				inode->i_mtime.tv_nsec);
423 
424 	btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
425 			       inode->i_ctime.tv_sec);
426 	btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
427 				inode->i_ctime.tv_nsec);
428 
429 	btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
430 	btrfs_set_inode_generation(leaf, item, inode->i_generation);
431 	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
432 	btrfs_set_inode_block_group(leaf, item,
433 				    BTRFS_I(inode)->block_group->key.objectid);
434 }
435 
436 int btrfs_update_inode(struct btrfs_trans_handle *trans,
437 			      struct btrfs_root *root,
438 			      struct inode *inode)
439 {
440 	struct btrfs_inode_item *inode_item;
441 	struct btrfs_path *path;
442 	struct extent_buffer *leaf;
443 	int ret;
444 
445 	path = btrfs_alloc_path();
446 	BUG_ON(!path);
447 	ret = btrfs_lookup_inode(trans, root, path,
448 				 &BTRFS_I(inode)->location, 1);
449 	if (ret) {
450 		if (ret > 0)
451 			ret = -ENOENT;
452 		goto failed;
453 	}
454 
455 	leaf = path->nodes[0];
456 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
457 				  struct btrfs_inode_item);
458 
459 	fill_inode_item(leaf, inode_item, inode);
460 	btrfs_mark_buffer_dirty(leaf);
461 	btrfs_set_inode_last_trans(trans, inode);
462 	ret = 0;
463 failed:
464 	btrfs_release_path(root, path);
465 	btrfs_free_path(path);
466 	return ret;
467 }
468 
469 
470 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
471 			      struct btrfs_root *root,
472 			      struct inode *dir,
473 			      struct dentry *dentry)
474 {
475 	struct btrfs_path *path;
476 	const char *name = dentry->d_name.name;
477 	int name_len = dentry->d_name.len;
478 	int ret = 0;
479 	struct extent_buffer *leaf;
480 	struct btrfs_dir_item *di;
481 	struct btrfs_key key;
482 
483 	path = btrfs_alloc_path();
484 	if (!path) {
485 		ret = -ENOMEM;
486 		goto err;
487 	}
488 
489 	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
490 				    name, name_len, -1);
491 	if (IS_ERR(di)) {
492 		ret = PTR_ERR(di);
493 		goto err;
494 	}
495 	if (!di) {
496 		ret = -ENOENT;
497 		goto err;
498 	}
499 	leaf = path->nodes[0];
500 	btrfs_dir_item_key_to_cpu(leaf, di, &key);
501 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
502 	if (ret)
503 		goto err;
504 	btrfs_release_path(root, path);
505 
506 	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
507 					 key.objectid, name, name_len, -1);
508 	if (IS_ERR(di)) {
509 		ret = PTR_ERR(di);
510 		goto err;
511 	}
512 	if (!di) {
513 		ret = -ENOENT;
514 		goto err;
515 	}
516 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
517 
518 	dentry->d_inode->i_ctime = dir->i_ctime;
519 	ret = btrfs_del_inode_ref(trans, root, name, name_len,
520 				  dentry->d_inode->i_ino,
521 				  dentry->d_parent->d_inode->i_ino);
522 	if (ret) {
523 		printk("failed to delete reference to %.*s, "
524 		       "inode %lu parent %lu\n", name_len, name,
525 		       dentry->d_inode->i_ino,
526 		       dentry->d_parent->d_inode->i_ino);
527 	}
528 err:
529 	btrfs_free_path(path);
530 	if (!ret) {
531 		dir->i_size -= name_len * 2;
532 		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
533 		btrfs_update_inode(trans, root, dir);
534 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
535 		dentry->d_inode->i_nlink--;
536 #else
537 		drop_nlink(dentry->d_inode);
538 #endif
539 		ret = btrfs_update_inode(trans, root, dentry->d_inode);
540 		dir->i_sb->s_dirt = 1;
541 	}
542 	return ret;
543 }
544 
545 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
546 {
547 	struct btrfs_root *root;
548 	struct btrfs_trans_handle *trans;
549 	int ret;
550 	unsigned long nr;
551 
552 	root = BTRFS_I(dir)->root;
553 	mutex_lock(&root->fs_info->fs_mutex);
554 	trans = btrfs_start_transaction(root, 1);
555 
556 	btrfs_set_trans_block_group(trans, dir);
557 	ret = btrfs_unlink_trans(trans, root, dir, dentry);
558 	nr = trans->blocks_used;
559 
560 	btrfs_end_transaction(trans, root);
561 	mutex_unlock(&root->fs_info->fs_mutex);
562 	btrfs_btree_balance_dirty(root, nr);
563 
564 	return ret;
565 }
566 
567 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
568 {
569 	struct inode *inode = dentry->d_inode;
570 	int err;
571 	int ret;
572 	struct btrfs_root *root = BTRFS_I(dir)->root;
573 	struct btrfs_trans_handle *trans;
574 	unsigned long nr;
575 
576 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
577 		return -ENOTEMPTY;
578 
579 	mutex_lock(&root->fs_info->fs_mutex);
580 	trans = btrfs_start_transaction(root, 1);
581 	btrfs_set_trans_block_group(trans, dir);
582 
583 	/* now the directory is empty */
584 	err = btrfs_unlink_trans(trans, root, dir, dentry);
585 	if (!err) {
586 		inode->i_size = 0;
587 	}
588 
589 	nr = trans->blocks_used;
590 	ret = btrfs_end_transaction(trans, root);
591 	mutex_unlock(&root->fs_info->fs_mutex);
592 	btrfs_btree_balance_dirty(root, nr);
593 
594 	if (ret && !err)
595 		err = ret;
596 	return err;
597 }
598 
599 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
600 			    struct btrfs_root *root,
601 			    struct inode *inode)
602 {
603 	struct btrfs_path *path;
604 	int ret;
605 
606 	clear_inode(inode);
607 
608 	path = btrfs_alloc_path();
609 	BUG_ON(!path);
610 	ret = btrfs_lookup_inode(trans, root, path,
611 				 &BTRFS_I(inode)->location, -1);
612 	if (ret > 0)
613 		ret = -ENOENT;
614 	if (!ret)
615 		ret = btrfs_del_item(trans, root, path);
616 	btrfs_free_path(path);
617 	return ret;
618 }
619 
620 /*
621  * this can truncate away extent items, csum items and directory items.
622  * It starts at a high offset and removes keys until it can't find
623  * any higher than i_size.
624  *
625  * csum items that cross the new i_size are truncated to the new size
626  * as well.
627  */
628 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
629 				   struct btrfs_root *root,
630 				   struct inode *inode)
631 {
632 	int ret;
633 	struct btrfs_path *path;
634 	struct btrfs_key key;
635 	struct btrfs_key found_key;
636 	u32 found_type;
637 	struct extent_buffer *leaf;
638 	struct btrfs_file_extent_item *fi;
639 	u64 extent_start = 0;
640 	u64 extent_num_bytes = 0;
641 	u64 item_end = 0;
642 	u64 root_gen = 0;
643 	u64 root_owner = 0;
644 	int found_extent;
645 	int del_item;
646 	int extent_type = -1;
647 
648 	btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
649 	path = btrfs_alloc_path();
650 	path->reada = -1;
651 	BUG_ON(!path);
652 
653 	/* FIXME, add redo link to tree so we don't leak on crash */
654 	key.objectid = inode->i_ino;
655 	key.offset = (u64)-1;
656 	key.type = (u8)-1;
657 
658 	while(1) {
659 		btrfs_init_path(path);
660 		fi = NULL;
661 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
662 		if (ret < 0) {
663 			goto error;
664 		}
665 		if (ret > 0) {
666 			BUG_ON(path->slots[0] == 0);
667 			path->slots[0]--;
668 		}
669 		leaf = path->nodes[0];
670 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
671 		found_type = btrfs_key_type(&found_key);
672 
673 		if (found_key.objectid != inode->i_ino)
674 			break;
675 
676 		if (found_type != BTRFS_CSUM_ITEM_KEY &&
677 		    found_type != BTRFS_DIR_ITEM_KEY &&
678 		    found_type != BTRFS_DIR_INDEX_KEY &&
679 		    found_type != BTRFS_EXTENT_DATA_KEY)
680 			break;
681 
682 		item_end = found_key.offset;
683 		if (found_type == BTRFS_EXTENT_DATA_KEY) {
684 			fi = btrfs_item_ptr(leaf, path->slots[0],
685 					    struct btrfs_file_extent_item);
686 			extent_type = btrfs_file_extent_type(leaf, fi);
687 			if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
688 				item_end +=
689 				    btrfs_file_extent_num_bytes(leaf, fi);
690 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
691 				struct btrfs_item *item = btrfs_item_nr(leaf,
692 							        path->slots[0]);
693 				item_end += btrfs_file_extent_inline_len(leaf,
694 									 item);
695 			}
696 			item_end--;
697 		}
698 		if (found_type == BTRFS_CSUM_ITEM_KEY) {
699 			ret = btrfs_csum_truncate(trans, root, path,
700 						  inode->i_size);
701 			BUG_ON(ret);
702 		}
703 		if (item_end < inode->i_size) {
704 			if (found_type == BTRFS_DIR_ITEM_KEY) {
705 				found_type = BTRFS_INODE_ITEM_KEY;
706 			} else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
707 				found_type = BTRFS_CSUM_ITEM_KEY;
708 			} else if (found_type) {
709 				found_type--;
710 			} else {
711 				break;
712 			}
713 			btrfs_set_key_type(&key, found_type);
714 			btrfs_release_path(root, path);
715 			continue;
716 		}
717 		if (found_key.offset >= inode->i_size)
718 			del_item = 1;
719 		else
720 			del_item = 0;
721 		found_extent = 0;
722 
723 		/* FIXME, shrink the extent if the ref count is only 1 */
724 		if (found_type != BTRFS_EXTENT_DATA_KEY)
725 			goto delete;
726 
727 		if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
728 			u64 num_dec;
729 			extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
730 			if (!del_item) {
731 				u64 orig_num_bytes =
732 					btrfs_file_extent_num_bytes(leaf, fi);
733 				extent_num_bytes = inode->i_size -
734 					found_key.offset + root->sectorsize - 1;
735 				btrfs_set_file_extent_num_bytes(leaf, fi,
736 							 extent_num_bytes);
737 				num_dec = (orig_num_bytes -
738 					   extent_num_bytes) >> 9;
739 				if (extent_start != 0) {
740 					inode->i_blocks -= num_dec;
741 				}
742 				btrfs_mark_buffer_dirty(leaf);
743 			} else {
744 				extent_num_bytes =
745 					btrfs_file_extent_disk_num_bytes(leaf,
746 									 fi);
747 				/* FIXME blocksize != 4096 */
748 				num_dec = btrfs_file_extent_num_bytes(leaf,
749 								       fi) >> 9;
750 				if (extent_start != 0) {
751 					found_extent = 1;
752 					inode->i_blocks -= num_dec;
753 				}
754 				root_gen = btrfs_header_generation(leaf);
755 				root_owner = btrfs_header_owner(leaf);
756 			}
757 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
758 			   !del_item) {
759 			u32 newsize = inode->i_size - found_key.offset;
760 			newsize = btrfs_file_extent_calc_inline_size(newsize);
761 			ret = btrfs_truncate_item(trans, root, path,
762 						  newsize, 1);
763 			BUG_ON(ret);
764 		}
765 delete:
766 		if (del_item) {
767 			ret = btrfs_del_item(trans, root, path);
768 			if (ret)
769 				goto error;
770 		} else {
771 			break;
772 		}
773 		btrfs_release_path(root, path);
774 		if (found_extent) {
775 			ret = btrfs_free_extent(trans, root, extent_start,
776 						extent_num_bytes,
777 						root_owner,
778 						root_gen, inode->i_ino,
779 						found_key.offset, 0);
780 			BUG_ON(ret);
781 		}
782 	}
783 	ret = 0;
784 error:
785 	btrfs_release_path(root, path);
786 	btrfs_free_path(path);
787 	inode->i_sb->s_dirt = 1;
788 	return ret;
789 }
790 
791 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
792 			      size_t zero_start)
793 {
794 	char *kaddr;
795 	int ret = 0;
796 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
797 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
798 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
799 
800 	WARN_ON(!PageLocked(page));
801 	set_page_extent_mapped(page);
802 
803 	lock_extent(em_tree, page_start, page_end, GFP_NOFS);
804 	set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
805 			    page_end, GFP_NOFS);
806 	if (zero_start != PAGE_CACHE_SIZE) {
807 		kaddr = kmap(page);
808 		memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
809 		flush_dcache_page(page);
810 		kunmap(page);
811 	}
812 	set_page_dirty(page);
813 	unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
814 
815 	return ret;
816 }
817 
818 /*
819  * taken from block_truncate_page, but does cow as it zeros out
820  * any bytes left in the last page in the file.
821  */
822 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
823 {
824 	struct inode *inode = mapping->host;
825 	struct btrfs_root *root = BTRFS_I(inode)->root;
826 	u32 blocksize = root->sectorsize;
827 	pgoff_t index = from >> PAGE_CACHE_SHIFT;
828 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
829 	struct page *page;
830 	int ret = 0;
831 	u64 page_start;
832 
833 	if ((offset & (blocksize - 1)) == 0)
834 		goto out;
835 
836 	down_read(&root->snap_sem);
837 	ret = -ENOMEM;
838 	page = grab_cache_page(mapping, index);
839 	if (!page)
840 		goto out;
841 	if (!PageUptodate(page)) {
842 		ret = btrfs_readpage(NULL, page);
843 		lock_page(page);
844 		if (!PageUptodate(page)) {
845 			ret = -EIO;
846 			goto out;
847 		}
848 	}
849 	page_start = (u64)page->index << PAGE_CACHE_SHIFT;
850 
851 	ret = btrfs_cow_one_page(inode, page, offset);
852 
853 	unlock_page(page);
854 	page_cache_release(page);
855 	up_read(&BTRFS_I(inode)->root->snap_sem);
856 out:
857 	return ret;
858 }
859 
860 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
861 {
862 	struct inode *inode = dentry->d_inode;
863 	int err;
864 
865 	err = inode_change_ok(inode, attr);
866 	if (err)
867 		return err;
868 
869 	if (S_ISREG(inode->i_mode) &&
870 	    attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
871 		struct btrfs_trans_handle *trans;
872 		struct btrfs_root *root = BTRFS_I(inode)->root;
873 		struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
874 
875 		u64 mask = root->sectorsize - 1;
876 		u64 pos = (inode->i_size + mask) & ~mask;
877 		u64 block_end = attr->ia_size | mask;
878 		u64 hole_size;
879 		u64 alloc_hint = 0;
880 
881 		if (attr->ia_size <= pos)
882 			goto out;
883 
884 		btrfs_truncate_page(inode->i_mapping, inode->i_size);
885 
886 		lock_extent(em_tree, pos, block_end, GFP_NOFS);
887 		hole_size = (attr->ia_size - pos + mask) & ~mask;
888 
889 		mutex_lock(&root->fs_info->fs_mutex);
890 		trans = btrfs_start_transaction(root, 1);
891 		btrfs_set_trans_block_group(trans, inode);
892 		err = btrfs_drop_extents(trans, root, inode,
893 					 pos, pos + hole_size, pos,
894 					 &alloc_hint);
895 
896 		if (alloc_hint != EXTENT_MAP_INLINE) {
897 			err = btrfs_insert_file_extent(trans, root,
898 						       inode->i_ino,
899 						       pos, 0, 0, hole_size);
900 		}
901 		btrfs_end_transaction(trans, root);
902 		mutex_unlock(&root->fs_info->fs_mutex);
903 		unlock_extent(em_tree, pos, block_end, GFP_NOFS);
904 		if (err)
905 			return err;
906 	}
907 out:
908 	err = inode_setattr(inode, attr);
909 
910 	return err;
911 }
912 void btrfs_delete_inode(struct inode *inode)
913 {
914 	struct btrfs_trans_handle *trans;
915 	struct btrfs_root *root = BTRFS_I(inode)->root;
916 	unsigned long nr;
917 	int ret;
918 
919 	truncate_inode_pages(&inode->i_data, 0);
920 	if (is_bad_inode(inode)) {
921 		goto no_delete;
922 	}
923 
924 	inode->i_size = 0;
925 	mutex_lock(&root->fs_info->fs_mutex);
926 	trans = btrfs_start_transaction(root, 1);
927 
928 	btrfs_set_trans_block_group(trans, inode);
929 	ret = btrfs_truncate_in_trans(trans, root, inode);
930 	if (ret)
931 		goto no_delete_lock;
932 	ret = btrfs_delete_xattrs(trans, root, inode);
933 	if (ret)
934 		goto no_delete_lock;
935 	ret = btrfs_free_inode(trans, root, inode);
936 	if (ret)
937 		goto no_delete_lock;
938 	nr = trans->blocks_used;
939 
940 	btrfs_end_transaction(trans, root);
941 	mutex_unlock(&root->fs_info->fs_mutex);
942 	btrfs_btree_balance_dirty(root, nr);
943 	return;
944 
945 no_delete_lock:
946 	nr = trans->blocks_used;
947 	btrfs_end_transaction(trans, root);
948 	mutex_unlock(&root->fs_info->fs_mutex);
949 	btrfs_btree_balance_dirty(root, nr);
950 no_delete:
951 	clear_inode(inode);
952 }
953 
954 /*
955  * this returns the key found in the dir entry in the location pointer.
956  * If no dir entries were found, location->objectid is 0.
957  */
958 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
959 			       struct btrfs_key *location)
960 {
961 	const char *name = dentry->d_name.name;
962 	int namelen = dentry->d_name.len;
963 	struct btrfs_dir_item *di;
964 	struct btrfs_path *path;
965 	struct btrfs_root *root = BTRFS_I(dir)->root;
966 	int ret = 0;
967 
968 	if (namelen == 1 && strcmp(name, ".") == 0) {
969 		location->objectid = dir->i_ino;
970 		location->type = BTRFS_INODE_ITEM_KEY;
971 		location->offset = 0;
972 		return 0;
973 	}
974 	path = btrfs_alloc_path();
975 	BUG_ON(!path);
976 
977 	if (namelen == 2 && strcmp(name, "..") == 0) {
978 		struct btrfs_key key;
979 		struct extent_buffer *leaf;
980 		u32 nritems;
981 		int slot;
982 
983 		key.objectid = dir->i_ino;
984 		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
985 		key.offset = 0;
986 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
987 		BUG_ON(ret == 0);
988 		ret = 0;
989 
990 		leaf = path->nodes[0];
991 		slot = path->slots[0];
992 		nritems = btrfs_header_nritems(leaf);
993 		if (slot >= nritems)
994 			goto out_err;
995 
996 		btrfs_item_key_to_cpu(leaf, &key, slot);
997 		if (key.objectid != dir->i_ino ||
998 		    key.type != BTRFS_INODE_REF_KEY) {
999 			goto out_err;
1000 		}
1001 		location->objectid = key.offset;
1002 		location->type = BTRFS_INODE_ITEM_KEY;
1003 		location->offset = 0;
1004 		goto out;
1005 	}
1006 
1007 	di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1008 				    namelen, 0);
1009 	if (IS_ERR(di))
1010 		ret = PTR_ERR(di);
1011 	if (!di || IS_ERR(di)) {
1012 		goto out_err;
1013 	}
1014 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1015 out:
1016 	btrfs_free_path(path);
1017 	return ret;
1018 out_err:
1019 	location->objectid = 0;
1020 	goto out;
1021 }
1022 
1023 /*
1024  * when we hit a tree root in a directory, the btrfs part of the inode
1025  * needs to be changed to reflect the root directory of the tree root.  This
1026  * is kind of like crossing a mount point.
1027  */
1028 static int fixup_tree_root_location(struct btrfs_root *root,
1029 			     struct btrfs_key *location,
1030 			     struct btrfs_root **sub_root,
1031 			     struct dentry *dentry)
1032 {
1033 	struct btrfs_path *path;
1034 	struct btrfs_root_item *ri;
1035 
1036 	if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1037 		return 0;
1038 	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1039 		return 0;
1040 
1041 	path = btrfs_alloc_path();
1042 	BUG_ON(!path);
1043 	mutex_lock(&root->fs_info->fs_mutex);
1044 
1045 	*sub_root = btrfs_read_fs_root(root->fs_info, location,
1046 					dentry->d_name.name,
1047 					dentry->d_name.len);
1048 	if (IS_ERR(*sub_root))
1049 		return PTR_ERR(*sub_root);
1050 
1051 	ri = &(*sub_root)->root_item;
1052 	location->objectid = btrfs_root_dirid(ri);
1053 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1054 	location->offset = 0;
1055 
1056 	btrfs_free_path(path);
1057 	mutex_unlock(&root->fs_info->fs_mutex);
1058 	return 0;
1059 }
1060 
1061 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1062 {
1063 	struct btrfs_iget_args *args = p;
1064 	inode->i_ino = args->ino;
1065 	BTRFS_I(inode)->root = args->root;
1066 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1067 			     inode->i_mapping, GFP_NOFS);
1068 	return 0;
1069 }
1070 
1071 static int btrfs_find_actor(struct inode *inode, void *opaque)
1072 {
1073 	struct btrfs_iget_args *args = opaque;
1074 	return (args->ino == inode->i_ino &&
1075 		args->root == BTRFS_I(inode)->root);
1076 }
1077 
1078 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1079 				struct btrfs_root *root)
1080 {
1081 	struct inode *inode;
1082 	struct btrfs_iget_args args;
1083 	args.ino = objectid;
1084 	args.root = root;
1085 
1086 	inode = iget5_locked(s, objectid, btrfs_find_actor,
1087 			     btrfs_init_locked_inode,
1088 			     (void *)&args);
1089 	return inode;
1090 }
1091 
1092 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1093 				   struct nameidata *nd)
1094 {
1095 	struct inode * inode;
1096 	struct btrfs_inode *bi = BTRFS_I(dir);
1097 	struct btrfs_root *root = bi->root;
1098 	struct btrfs_root *sub_root = root;
1099 	struct btrfs_key location;
1100 	int ret;
1101 
1102 	if (dentry->d_name.len > BTRFS_NAME_LEN)
1103 		return ERR_PTR(-ENAMETOOLONG);
1104 
1105 	mutex_lock(&root->fs_info->fs_mutex);
1106 	ret = btrfs_inode_by_name(dir, dentry, &location);
1107 	mutex_unlock(&root->fs_info->fs_mutex);
1108 
1109 	if (ret < 0)
1110 		return ERR_PTR(ret);
1111 
1112 	inode = NULL;
1113 	if (location.objectid) {
1114 		ret = fixup_tree_root_location(root, &location, &sub_root,
1115 						dentry);
1116 		if (ret < 0)
1117 			return ERR_PTR(ret);
1118 		if (ret > 0)
1119 			return ERR_PTR(-ENOENT);
1120 		inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1121 					  sub_root);
1122 		if (!inode)
1123 			return ERR_PTR(-EACCES);
1124 		if (inode->i_state & I_NEW) {
1125 			/* the inode and parent dir are two different roots */
1126 			if (sub_root != root) {
1127 				igrab(inode);
1128 				sub_root->inode = inode;
1129 			}
1130 			BTRFS_I(inode)->root = sub_root;
1131 			memcpy(&BTRFS_I(inode)->location, &location,
1132 			       sizeof(location));
1133 			btrfs_read_locked_inode(inode);
1134 			unlock_new_inode(inode);
1135 		}
1136 	}
1137 	return d_splice_alias(inode, dentry);
1138 }
1139 
1140 static unsigned char btrfs_filetype_table[] = {
1141 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1142 };
1143 
1144 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1145 {
1146 	struct inode *inode = filp->f_dentry->d_inode;
1147 	struct btrfs_root *root = BTRFS_I(inode)->root;
1148 	struct btrfs_item *item;
1149 	struct btrfs_dir_item *di;
1150 	struct btrfs_key key;
1151 	struct btrfs_key found_key;
1152 	struct btrfs_path *path;
1153 	int ret;
1154 	u32 nritems;
1155 	struct extent_buffer *leaf;
1156 	int slot;
1157 	int advance;
1158 	unsigned char d_type;
1159 	int over = 0;
1160 	u32 di_cur;
1161 	u32 di_total;
1162 	u32 di_len;
1163 	int key_type = BTRFS_DIR_INDEX_KEY;
1164 	char tmp_name[32];
1165 	char *name_ptr;
1166 	int name_len;
1167 
1168 	/* FIXME, use a real flag for deciding about the key type */
1169 	if (root->fs_info->tree_root == root)
1170 		key_type = BTRFS_DIR_ITEM_KEY;
1171 
1172 	/* special case for "." */
1173 	if (filp->f_pos == 0) {
1174 		over = filldir(dirent, ".", 1,
1175 			       1, inode->i_ino,
1176 			       DT_DIR);
1177 		if (over)
1178 			return 0;
1179 		filp->f_pos = 1;
1180 	}
1181 
1182 	mutex_lock(&root->fs_info->fs_mutex);
1183 	key.objectid = inode->i_ino;
1184 	path = btrfs_alloc_path();
1185 	path->reada = 2;
1186 
1187 	/* special case for .., just use the back ref */
1188 	if (filp->f_pos == 1) {
1189 		btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1190 		key.offset = 0;
1191 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1192 		BUG_ON(ret == 0);
1193 		leaf = path->nodes[0];
1194 		slot = path->slots[0];
1195 		nritems = btrfs_header_nritems(leaf);
1196 		if (slot >= nritems) {
1197 			btrfs_release_path(root, path);
1198 			goto read_dir_items;
1199 		}
1200 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
1201 		btrfs_release_path(root, path);
1202 		if (found_key.objectid != key.objectid ||
1203 		    found_key.type != BTRFS_INODE_REF_KEY)
1204 			goto read_dir_items;
1205 		over = filldir(dirent, "..", 2,
1206 			       2, found_key.offset, DT_DIR);
1207 		if (over)
1208 			goto nopos;
1209 		filp->f_pos = 2;
1210 	}
1211 
1212 read_dir_items:
1213 	btrfs_set_key_type(&key, key_type);
1214 	key.offset = filp->f_pos;
1215 
1216 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1217 	if (ret < 0)
1218 		goto err;
1219 	advance = 0;
1220 	while(1) {
1221 		leaf = path->nodes[0];
1222 		nritems = btrfs_header_nritems(leaf);
1223 		slot = path->slots[0];
1224 		if (advance || slot >= nritems) {
1225 			if (slot >= nritems -1) {
1226 				ret = btrfs_next_leaf(root, path);
1227 				if (ret)
1228 					break;
1229 				leaf = path->nodes[0];
1230 				nritems = btrfs_header_nritems(leaf);
1231 				slot = path->slots[0];
1232 			} else {
1233 				slot++;
1234 				path->slots[0]++;
1235 			}
1236 		}
1237 		advance = 1;
1238 		item = btrfs_item_nr(leaf, slot);
1239 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
1240 
1241 		if (found_key.objectid != key.objectid)
1242 			break;
1243 		if (btrfs_key_type(&found_key) != key_type)
1244 			break;
1245 		if (found_key.offset < filp->f_pos)
1246 			continue;
1247 
1248 		filp->f_pos = found_key.offset;
1249 		advance = 1;
1250 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1251 		di_cur = 0;
1252 		di_total = btrfs_item_size(leaf, item);
1253 		while(di_cur < di_total) {
1254 			struct btrfs_key location;
1255 
1256 			name_len = btrfs_dir_name_len(leaf, di);
1257 			if (name_len < 32) {
1258 				name_ptr = tmp_name;
1259 			} else {
1260 				name_ptr = kmalloc(name_len, GFP_NOFS);
1261 				BUG_ON(!name_ptr);
1262 			}
1263 			read_extent_buffer(leaf, name_ptr,
1264 					   (unsigned long)(di + 1), name_len);
1265 
1266 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1267 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
1268 
1269 			over = filldir(dirent, name_ptr, name_len,
1270 				       found_key.offset,
1271 				       location.objectid,
1272 				       d_type);
1273 
1274 			if (name_ptr != tmp_name)
1275 				kfree(name_ptr);
1276 
1277 			if (over)
1278 				goto nopos;
1279 			di_len = btrfs_dir_name_len(leaf, di) +
1280 				btrfs_dir_data_len(leaf, di) +sizeof(*di);
1281 			di_cur += di_len;
1282 			di = (struct btrfs_dir_item *)((char *)di + di_len);
1283 		}
1284 	}
1285 	filp->f_pos++;
1286 nopos:
1287 	ret = 0;
1288 err:
1289 	btrfs_release_path(root, path);
1290 	btrfs_free_path(path);
1291 	mutex_unlock(&root->fs_info->fs_mutex);
1292 	return ret;
1293 }
1294 
1295 int btrfs_write_inode(struct inode *inode, int wait)
1296 {
1297 	struct btrfs_root *root = BTRFS_I(inode)->root;
1298 	struct btrfs_trans_handle *trans;
1299 	int ret = 0;
1300 
1301 	if (wait) {
1302 		mutex_lock(&root->fs_info->fs_mutex);
1303 		trans = btrfs_start_transaction(root, 1);
1304 		btrfs_set_trans_block_group(trans, inode);
1305 		ret = btrfs_commit_transaction(trans, root);
1306 		mutex_unlock(&root->fs_info->fs_mutex);
1307 	}
1308 	return ret;
1309 }
1310 
1311 /*
1312  * This is somewhat expensive, updating the tree every time the
1313  * inode changes.  But, it is most likely to find the inode in cache.
1314  * FIXME, needs more benchmarking...there are no reasons other than performance
1315  * to keep or drop this code.
1316  */
1317 void btrfs_dirty_inode(struct inode *inode)
1318 {
1319 	struct btrfs_root *root = BTRFS_I(inode)->root;
1320 	struct btrfs_trans_handle *trans;
1321 
1322 	mutex_lock(&root->fs_info->fs_mutex);
1323 	trans = btrfs_start_transaction(root, 1);
1324 	btrfs_set_trans_block_group(trans, inode);
1325 	btrfs_update_inode(trans, root, inode);
1326 	btrfs_end_transaction(trans, root);
1327 	mutex_unlock(&root->fs_info->fs_mutex);
1328 }
1329 
1330 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1331 				     struct btrfs_root *root,
1332 				     u64 objectid,
1333 				     struct btrfs_block_group_cache *group,
1334 				     int mode)
1335 {
1336 	struct inode *inode;
1337 	struct btrfs_inode_item *inode_item;
1338 	struct btrfs_key *location;
1339 	struct btrfs_path *path;
1340 	int ret;
1341 	int owner;
1342 
1343 	path = btrfs_alloc_path();
1344 	BUG_ON(!path);
1345 
1346 	inode = new_inode(root->fs_info->sb);
1347 	if (!inode)
1348 		return ERR_PTR(-ENOMEM);
1349 
1350 	extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1351 			     inode->i_mapping, GFP_NOFS);
1352 	BTRFS_I(inode)->root = root;
1353 
1354 	if (mode & S_IFDIR)
1355 		owner = 0;
1356 	else
1357 		owner = 1;
1358 	group = btrfs_find_block_group(root, group, 0, 0, owner);
1359 	BTRFS_I(inode)->block_group = group;
1360 
1361 	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
1362 	if (ret)
1363 		goto fail;
1364 
1365 	inode->i_uid = current->fsuid;
1366 	inode->i_gid = current->fsgid;
1367 	inode->i_mode = mode;
1368 	inode->i_ino = objectid;
1369 	inode->i_blocks = 0;
1370 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1371 	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1372 				  struct btrfs_inode_item);
1373 	fill_inode_item(path->nodes[0], inode_item, inode);
1374 	btrfs_mark_buffer_dirty(path->nodes[0]);
1375 	btrfs_free_path(path);
1376 
1377 	location = &BTRFS_I(inode)->location;
1378 	location->objectid = objectid;
1379 	location->offset = 0;
1380 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1381 
1382 	insert_inode_hash(inode);
1383 	return inode;
1384 fail:
1385 	btrfs_free_path(path);
1386 	return ERR_PTR(ret);
1387 }
1388 
1389 static inline u8 btrfs_inode_type(struct inode *inode)
1390 {
1391 	return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1392 }
1393 
1394 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1395 			    struct dentry *dentry, struct inode *inode)
1396 {
1397 	int ret;
1398 	struct btrfs_key key;
1399 	struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1400 	struct inode *parent_inode;
1401 
1402 	key.objectid = inode->i_ino;
1403 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1404 	key.offset = 0;
1405 
1406 	ret = btrfs_insert_dir_item(trans, root,
1407 				    dentry->d_name.name, dentry->d_name.len,
1408 				    dentry->d_parent->d_inode->i_ino,
1409 				    &key, btrfs_inode_type(inode));
1410 	if (ret == 0) {
1411 		ret = btrfs_insert_inode_ref(trans, root,
1412 				     dentry->d_name.name,
1413 				     dentry->d_name.len,
1414 				     inode->i_ino,
1415 				     dentry->d_parent->d_inode->i_ino);
1416 		parent_inode = dentry->d_parent->d_inode;
1417 		parent_inode->i_size += dentry->d_name.len * 2;
1418 		parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1419 		ret = btrfs_update_inode(trans, root,
1420 					 dentry->d_parent->d_inode);
1421 	}
1422 	return ret;
1423 }
1424 
1425 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1426 			    struct dentry *dentry, struct inode *inode)
1427 {
1428 	int err = btrfs_add_link(trans, dentry, inode);
1429 	if (!err) {
1430 		d_instantiate(dentry, inode);
1431 		return 0;
1432 	}
1433 	if (err > 0)
1434 		err = -EEXIST;
1435 	return err;
1436 }
1437 
1438 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1439 			int mode, dev_t rdev)
1440 {
1441 	struct btrfs_trans_handle *trans;
1442 	struct btrfs_root *root = BTRFS_I(dir)->root;
1443 	struct inode *inode;
1444 	int err;
1445 	int drop_inode = 0;
1446 	u64 objectid;
1447 	unsigned long nr;
1448 
1449 	if (!new_valid_dev(rdev))
1450 		return -EINVAL;
1451 
1452 	mutex_lock(&root->fs_info->fs_mutex);
1453 	trans = btrfs_start_transaction(root, 1);
1454 	btrfs_set_trans_block_group(trans, dir);
1455 
1456 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1457 	if (err) {
1458 		err = -ENOSPC;
1459 		goto out_unlock;
1460 	}
1461 
1462 	inode = btrfs_new_inode(trans, root, objectid,
1463 				BTRFS_I(dir)->block_group, mode);
1464 	err = PTR_ERR(inode);
1465 	if (IS_ERR(inode))
1466 		goto out_unlock;
1467 
1468 	btrfs_set_trans_block_group(trans, inode);
1469 	err = btrfs_add_nondir(trans, dentry, inode);
1470 	if (err)
1471 		drop_inode = 1;
1472 	else {
1473 		inode->i_op = &btrfs_special_inode_operations;
1474 		init_special_inode(inode, inode->i_mode, rdev);
1475 		btrfs_update_inode(trans, root, inode);
1476 	}
1477 	dir->i_sb->s_dirt = 1;
1478 	btrfs_update_inode_block_group(trans, inode);
1479 	btrfs_update_inode_block_group(trans, dir);
1480 out_unlock:
1481 	nr = trans->blocks_used;
1482 	btrfs_end_transaction(trans, root);
1483 	mutex_unlock(&root->fs_info->fs_mutex);
1484 
1485 	if (drop_inode) {
1486 		inode_dec_link_count(inode);
1487 		iput(inode);
1488 	}
1489 	btrfs_btree_balance_dirty(root, nr);
1490 	return err;
1491 }
1492 
1493 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1494 			int mode, struct nameidata *nd)
1495 {
1496 	struct btrfs_trans_handle *trans;
1497 	struct btrfs_root *root = BTRFS_I(dir)->root;
1498 	struct inode *inode;
1499 	int err;
1500 	int drop_inode = 0;
1501 	unsigned long nr;
1502 	u64 objectid;
1503 
1504 	mutex_lock(&root->fs_info->fs_mutex);
1505 	trans = btrfs_start_transaction(root, 1);
1506 	btrfs_set_trans_block_group(trans, dir);
1507 
1508 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1509 	if (err) {
1510 		err = -ENOSPC;
1511 		goto out_unlock;
1512 	}
1513 
1514 	inode = btrfs_new_inode(trans, root, objectid,
1515 				BTRFS_I(dir)->block_group, mode);
1516 	err = PTR_ERR(inode);
1517 	if (IS_ERR(inode))
1518 		goto out_unlock;
1519 
1520 	btrfs_set_trans_block_group(trans, inode);
1521 	err = btrfs_add_nondir(trans, dentry, inode);
1522 	if (err)
1523 		drop_inode = 1;
1524 	else {
1525 		inode->i_mapping->a_ops = &btrfs_aops;
1526 		inode->i_fop = &btrfs_file_operations;
1527 		inode->i_op = &btrfs_file_inode_operations;
1528 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1529 				     inode->i_mapping, GFP_NOFS);
1530 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
1531 	}
1532 	dir->i_sb->s_dirt = 1;
1533 	btrfs_update_inode_block_group(trans, inode);
1534 	btrfs_update_inode_block_group(trans, dir);
1535 out_unlock:
1536 	nr = trans->blocks_used;
1537 	btrfs_end_transaction(trans, root);
1538 	mutex_unlock(&root->fs_info->fs_mutex);
1539 
1540 	if (drop_inode) {
1541 		inode_dec_link_count(inode);
1542 		iput(inode);
1543 	}
1544 	btrfs_btree_balance_dirty(root, nr);
1545 	return err;
1546 }
1547 
1548 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1549 		      struct dentry *dentry)
1550 {
1551 	struct btrfs_trans_handle *trans;
1552 	struct btrfs_root *root = BTRFS_I(dir)->root;
1553 	struct inode *inode = old_dentry->d_inode;
1554 	unsigned long nr;
1555 	int err;
1556 	int drop_inode = 0;
1557 
1558 	if (inode->i_nlink == 0)
1559 		return -ENOENT;
1560 
1561 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1562 	inode->i_nlink++;
1563 #else
1564 	inc_nlink(inode);
1565 #endif
1566 	mutex_lock(&root->fs_info->fs_mutex);
1567 	trans = btrfs_start_transaction(root, 1);
1568 
1569 	btrfs_set_trans_block_group(trans, dir);
1570 	atomic_inc(&inode->i_count);
1571 	err = btrfs_add_nondir(trans, dentry, inode);
1572 
1573 	if (err)
1574 		drop_inode = 1;
1575 
1576 	dir->i_sb->s_dirt = 1;
1577 	btrfs_update_inode_block_group(trans, dir);
1578 	err = btrfs_update_inode(trans, root, inode);
1579 
1580 	if (err)
1581 		drop_inode = 1;
1582 
1583 	nr = trans->blocks_used;
1584 	btrfs_end_transaction(trans, root);
1585 	mutex_unlock(&root->fs_info->fs_mutex);
1586 
1587 	if (drop_inode) {
1588 		inode_dec_link_count(inode);
1589 		iput(inode);
1590 	}
1591 	btrfs_btree_balance_dirty(root, nr);
1592 	return err;
1593 }
1594 
1595 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1596 {
1597 	struct inode *inode;
1598 	struct btrfs_trans_handle *trans;
1599 	struct btrfs_root *root = BTRFS_I(dir)->root;
1600 	int err = 0;
1601 	int drop_on_err = 0;
1602 	u64 objectid;
1603 	unsigned long nr = 1;
1604 
1605 	mutex_lock(&root->fs_info->fs_mutex);
1606 	trans = btrfs_start_transaction(root, 1);
1607 	btrfs_set_trans_block_group(trans, dir);
1608 
1609 	if (IS_ERR(trans)) {
1610 		err = PTR_ERR(trans);
1611 		goto out_unlock;
1612 	}
1613 
1614 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1615 	if (err) {
1616 		err = -ENOSPC;
1617 		goto out_unlock;
1618 	}
1619 
1620 	inode = btrfs_new_inode(trans, root, objectid,
1621 				BTRFS_I(dir)->block_group, S_IFDIR | mode);
1622 	if (IS_ERR(inode)) {
1623 		err = PTR_ERR(inode);
1624 		goto out_fail;
1625 	}
1626 
1627 	drop_on_err = 1;
1628 	inode->i_op = &btrfs_dir_inode_operations;
1629 	inode->i_fop = &btrfs_dir_file_operations;
1630 	btrfs_set_trans_block_group(trans, inode);
1631 
1632 	inode->i_size = 0;
1633 	err = btrfs_update_inode(trans, root, inode);
1634 	if (err)
1635 		goto out_fail;
1636 
1637 	err = btrfs_add_link(trans, dentry, inode);
1638 	if (err)
1639 		goto out_fail;
1640 
1641 	d_instantiate(dentry, inode);
1642 	drop_on_err = 0;
1643 	dir->i_sb->s_dirt = 1;
1644 	btrfs_update_inode_block_group(trans, inode);
1645 	btrfs_update_inode_block_group(trans, dir);
1646 
1647 out_fail:
1648 	nr = trans->blocks_used;
1649 	btrfs_end_transaction(trans, root);
1650 
1651 out_unlock:
1652 	mutex_unlock(&root->fs_info->fs_mutex);
1653 	if (drop_on_err)
1654 		iput(inode);
1655 	btrfs_btree_balance_dirty(root, nr);
1656 	return err;
1657 }
1658 
1659 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1660 				    size_t page_offset, u64 start, u64 end,
1661 				    int create)
1662 {
1663 	int ret;
1664 	int err = 0;
1665 	u64 bytenr;
1666 	u64 extent_start = 0;
1667 	u64 extent_end = 0;
1668 	u64 objectid = inode->i_ino;
1669 	u32 found_type;
1670 	int failed_insert = 0;
1671 	struct btrfs_path *path;
1672 	struct btrfs_root *root = BTRFS_I(inode)->root;
1673 	struct btrfs_file_extent_item *item;
1674 	struct extent_buffer *leaf;
1675 	struct btrfs_key found_key;
1676 	struct extent_map *em = NULL;
1677 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1678 	struct btrfs_trans_handle *trans = NULL;
1679 
1680 	path = btrfs_alloc_path();
1681 	BUG_ON(!path);
1682 	mutex_lock(&root->fs_info->fs_mutex);
1683 
1684 again:
1685 	em = lookup_extent_mapping(em_tree, start, end);
1686 	if (em) {
1687 		goto out;
1688 	}
1689 	if (!em) {
1690 		em = alloc_extent_map(GFP_NOFS);
1691 		if (!em) {
1692 			err = -ENOMEM;
1693 			goto out;
1694 		}
1695 		em->start = EXTENT_MAP_HOLE;
1696 		em->end = EXTENT_MAP_HOLE;
1697 	}
1698 	em->bdev = inode->i_sb->s_bdev;
1699 	ret = btrfs_lookup_file_extent(trans, root, path,
1700 				       objectid, start, trans != NULL);
1701 	if (ret < 0) {
1702 		err = ret;
1703 		goto out;
1704 	}
1705 
1706 	if (ret != 0) {
1707 		if (path->slots[0] == 0)
1708 			goto not_found;
1709 		path->slots[0]--;
1710 	}
1711 
1712 	leaf = path->nodes[0];
1713 	item = btrfs_item_ptr(leaf, path->slots[0],
1714 			      struct btrfs_file_extent_item);
1715 	/* are we inside the extent that was found? */
1716 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1717 	found_type = btrfs_key_type(&found_key);
1718 	if (found_key.objectid != objectid ||
1719 	    found_type != BTRFS_EXTENT_DATA_KEY) {
1720 		goto not_found;
1721 	}
1722 
1723 	found_type = btrfs_file_extent_type(leaf, item);
1724 	extent_start = found_key.offset;
1725 	if (found_type == BTRFS_FILE_EXTENT_REG) {
1726 		extent_end = extent_start +
1727 		       btrfs_file_extent_num_bytes(leaf, item);
1728 		err = 0;
1729 		if (start < extent_start || start >= extent_end) {
1730 			em->start = start;
1731 			if (start < extent_start) {
1732 				if (end < extent_start)
1733 					goto not_found;
1734 				em->end = extent_end - 1;
1735 			} else {
1736 				em->end = end;
1737 			}
1738 			goto not_found_em;
1739 		}
1740 		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1741 		if (bytenr == 0) {
1742 			em->start = extent_start;
1743 			em->end = extent_end - 1;
1744 			em->block_start = EXTENT_MAP_HOLE;
1745 			em->block_end = EXTENT_MAP_HOLE;
1746 			goto insert;
1747 		}
1748 		bytenr += btrfs_file_extent_offset(leaf, item);
1749 		em->block_start = bytenr;
1750 		em->block_end = em->block_start +
1751 			btrfs_file_extent_num_bytes(leaf, item) - 1;
1752 		em->start = extent_start;
1753 		em->end = extent_end - 1;
1754 		goto insert;
1755 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1756 		unsigned long ptr;
1757 		char *map;
1758 		size_t size;
1759 		size_t extent_offset;
1760 		size_t copy_size;
1761 
1762 		size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
1763 						    path->slots[0]));
1764 		extent_end = (extent_start + size - 1) |
1765 			((u64)root->sectorsize - 1);
1766 		if (start < extent_start || start >= extent_end) {
1767 			em->start = start;
1768 			if (start < extent_start) {
1769 				if (end < extent_start)
1770 					goto not_found;
1771 				em->end = extent_end;
1772 			} else {
1773 				em->end = end;
1774 			}
1775 			goto not_found_em;
1776 		}
1777 		em->block_start = EXTENT_MAP_INLINE;
1778 		em->block_end = EXTENT_MAP_INLINE;
1779 
1780 		if (!page) {
1781 			em->start = extent_start;
1782 			em->end = extent_start + size - 1;
1783 			goto out;
1784 		}
1785 
1786 		extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) -
1787 			extent_start + page_offset;
1788 		copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
1789 				size - extent_offset);
1790 		em->start = extent_start + extent_offset;
1791 		em->end = (em->start + copy_size -1) |
1792 			((u64)root->sectorsize -1);
1793 		map = kmap(page);
1794 		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
1795 		if (create == 0 && !PageUptodate(page)) {
1796 			read_extent_buffer(leaf, map + page_offset, ptr,
1797 					   copy_size);
1798 			flush_dcache_page(page);
1799 		} else if (create && PageUptodate(page)) {
1800 			if (!trans) {
1801 				kunmap(page);
1802 				free_extent_map(em);
1803 				em = NULL;
1804 				btrfs_release_path(root, path);
1805 				trans = btrfs_start_transaction(root, 1);
1806 				goto again;
1807 			}
1808 			write_extent_buffer(leaf, map + page_offset, ptr,
1809 					    copy_size);
1810 			btrfs_mark_buffer_dirty(leaf);
1811 		}
1812 		kunmap(page);
1813 		set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
1814 		goto insert;
1815 	} else {
1816 		printk("unkknown found_type %d\n", found_type);
1817 		WARN_ON(1);
1818 	}
1819 not_found:
1820 	em->start = start;
1821 	em->end = end;
1822 not_found_em:
1823 	em->block_start = EXTENT_MAP_HOLE;
1824 	em->block_end = EXTENT_MAP_HOLE;
1825 insert:
1826 	btrfs_release_path(root, path);
1827 	if (em->start > start || em->end < start) {
1828 		printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
1829 		err = -EIO;
1830 		goto out;
1831 	}
1832 	ret = add_extent_mapping(em_tree, em);
1833 	if (ret == -EEXIST) {
1834 		free_extent_map(em);
1835 		em = NULL;
1836 		if (0 && failed_insert == 1) {
1837 			btrfs_drop_extent_cache(inode, start, end);
1838 		}
1839 		failed_insert++;
1840 		if (failed_insert > 5) {
1841 			printk("failing to insert %Lu %Lu\n", start, end);
1842 			err = -EIO;
1843 			goto out;
1844 		}
1845 		goto again;
1846 	}
1847 	err = 0;
1848 out:
1849 	btrfs_free_path(path);
1850 	if (trans) {
1851 		ret = btrfs_end_transaction(trans, root);
1852 		if (!err)
1853 			err = ret;
1854 	}
1855 	mutex_unlock(&root->fs_info->fs_mutex);
1856 	if (err) {
1857 		free_extent_map(em);
1858 		WARN_ON(1);
1859 		return ERR_PTR(err);
1860 	}
1861 	return em;
1862 }
1863 
1864 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
1865 {
1866 	return extent_bmap(mapping, iblock, btrfs_get_extent);
1867 }
1868 
1869 static int btrfs_prepare_write(struct file *file, struct page *page,
1870 			       unsigned from, unsigned to)
1871 {
1872 	return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree,
1873 				    page->mapping->host, page, from, to,
1874 				    btrfs_get_extent);
1875 }
1876 
1877 int btrfs_readpage(struct file *file, struct page *page)
1878 {
1879 	struct extent_map_tree *tree;
1880 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
1881 	return extent_read_full_page(tree, page, btrfs_get_extent);
1882 }
1883 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1884 {
1885 	struct extent_map_tree *tree;
1886 
1887 
1888 	if (current->flags & PF_MEMALLOC) {
1889 		redirty_page_for_writepage(wbc, page);
1890 		unlock_page(page);
1891 		return 0;
1892 	}
1893 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
1894 	return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
1895 }
1896 
1897 static int btrfs_writepages(struct address_space *mapping,
1898 			    struct writeback_control *wbc)
1899 {
1900 	struct extent_map_tree *tree;
1901 	tree = &BTRFS_I(mapping->host)->extent_tree;
1902 	return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
1903 }
1904 
1905 static int
1906 btrfs_readpages(struct file *file, struct address_space *mapping,
1907 		struct list_head *pages, unsigned nr_pages)
1908 {
1909 	struct extent_map_tree *tree;
1910 	tree = &BTRFS_I(mapping->host)->extent_tree;
1911 	return extent_readpages(tree, mapping, pages, nr_pages,
1912 				btrfs_get_extent);
1913 }
1914 
1915 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1916 {
1917 	struct extent_map_tree *tree;
1918 	int ret;
1919 
1920 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
1921 	ret = try_release_extent_mapping(tree, page);
1922 	if (ret == 1) {
1923 		ClearPagePrivate(page);
1924 		set_page_private(page, 0);
1925 		page_cache_release(page);
1926 	}
1927 	return ret;
1928 }
1929 
1930 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
1931 {
1932 	struct extent_map_tree *tree;
1933 
1934 	tree = &BTRFS_I(page->mapping->host)->extent_tree;
1935 	extent_invalidatepage(tree, page, offset);
1936 	btrfs_releasepage(page, GFP_NOFS);
1937 }
1938 
1939 /*
1940  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
1941  * called from a page fault handler when a page is first dirtied. Hence we must
1942  * be careful to check for EOF conditions here. We set the page up correctly
1943  * for a written page which means we get ENOSPC checking when writing into
1944  * holes and correct delalloc and unwritten extent mapping on filesystems that
1945  * support these features.
1946  *
1947  * We are not allowed to take the i_mutex here so we have to play games to
1948  * protect against truncate races as the page could now be beyond EOF.  Because
1949  * vmtruncate() writes the inode size before removing pages, once we have the
1950  * page lock we can determine safely if the page is beyond EOF. If it is not
1951  * beyond EOF, then the page is guaranteed safe against truncation until we
1952  * unlock the page.
1953  */
1954 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1955 {
1956 	struct inode *inode = fdentry(vma->vm_file)->d_inode;
1957 	unsigned long end;
1958 	loff_t size;
1959 	int ret = -EINVAL;
1960 	u64 page_start;
1961 
1962 	down_read(&BTRFS_I(inode)->root->snap_sem);
1963 	lock_page(page);
1964 	wait_on_page_writeback(page);
1965 	size = i_size_read(inode);
1966 	page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1967 
1968 	if ((page->mapping != inode->i_mapping) ||
1969 	    (page_start > size)) {
1970 		/* page got truncated out from underneath us */
1971 		goto out_unlock;
1972 	}
1973 
1974 	/* page is wholly or partially inside EOF */
1975 	if (page_start + PAGE_CACHE_SIZE > size)
1976 		end = size & ~PAGE_CACHE_MASK;
1977 	else
1978 		end = PAGE_CACHE_SIZE;
1979 
1980 	ret = btrfs_cow_one_page(inode, page, end);
1981 
1982 out_unlock:
1983 	up_read(&BTRFS_I(inode)->root->snap_sem);
1984 	unlock_page(page);
1985 	return ret;
1986 }
1987 
1988 static void btrfs_truncate(struct inode *inode)
1989 {
1990 	struct btrfs_root *root = BTRFS_I(inode)->root;
1991 	int ret;
1992 	struct btrfs_trans_handle *trans;
1993 	unsigned long nr;
1994 
1995 	if (!S_ISREG(inode->i_mode))
1996 		return;
1997 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1998 		return;
1999 
2000 	btrfs_truncate_page(inode->i_mapping, inode->i_size);
2001 
2002 	mutex_lock(&root->fs_info->fs_mutex);
2003 	trans = btrfs_start_transaction(root, 1);
2004 	btrfs_set_trans_block_group(trans, inode);
2005 
2006 	/* FIXME, add redo link to tree so we don't leak on crash */
2007 	ret = btrfs_truncate_in_trans(trans, root, inode);
2008 	btrfs_update_inode(trans, root, inode);
2009 	nr = trans->blocks_used;
2010 
2011 	ret = btrfs_end_transaction(trans, root);
2012 	BUG_ON(ret);
2013 	mutex_unlock(&root->fs_info->fs_mutex);
2014 	btrfs_btree_balance_dirty(root, nr);
2015 }
2016 
2017 int btrfs_commit_write(struct file *file, struct page *page,
2018 		       unsigned from, unsigned to)
2019 {
2020 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2021 	struct inode *inode = page->mapping->host;
2022 
2023 	btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE);
2024 
2025 	if (pos > inode->i_size) {
2026 		i_size_write(inode, pos);
2027 		mark_inode_dirty(inode);
2028 	}
2029 	return 0;
2030 }
2031 
2032 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2033 {
2034 	struct btrfs_trans_handle *trans;
2035 	struct btrfs_key key;
2036 	struct btrfs_root_item root_item;
2037 	struct btrfs_inode_item *inode_item;
2038 	struct extent_buffer *leaf;
2039 	struct btrfs_root *new_root;
2040 	struct inode *inode;
2041 	struct inode *dir;
2042 	int ret;
2043 	int err;
2044 	u64 objectid;
2045 	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2046 	unsigned long nr = 1;
2047 
2048 	mutex_lock(&root->fs_info->fs_mutex);
2049 	trans = btrfs_start_transaction(root, 1);
2050 	BUG_ON(!trans);
2051 
2052 	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2053 				       0, &objectid);
2054 	if (ret)
2055 		goto fail;
2056 
2057 	leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2058 					objectid, trans->transid, 0, 0,
2059 					0, 0);
2060 	if (IS_ERR(leaf))
2061 		return PTR_ERR(leaf);
2062 
2063 	btrfs_set_header_nritems(leaf, 0);
2064 	btrfs_set_header_level(leaf, 0);
2065 	btrfs_set_header_bytenr(leaf, leaf->start);
2066 	btrfs_set_header_generation(leaf, trans->transid);
2067 	btrfs_set_header_owner(leaf, objectid);
2068 
2069 	write_extent_buffer(leaf, root->fs_info->fsid,
2070 			    (unsigned long)btrfs_header_fsid(leaf),
2071 			    BTRFS_FSID_SIZE);
2072 	btrfs_mark_buffer_dirty(leaf);
2073 
2074 	inode_item = &root_item.inode;
2075 	memset(inode_item, 0, sizeof(*inode_item));
2076 	inode_item->generation = cpu_to_le64(1);
2077 	inode_item->size = cpu_to_le64(3);
2078 	inode_item->nlink = cpu_to_le32(1);
2079 	inode_item->nblocks = cpu_to_le64(1);
2080 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2081 
2082 	btrfs_set_root_bytenr(&root_item, leaf->start);
2083 	btrfs_set_root_level(&root_item, 0);
2084 	btrfs_set_root_refs(&root_item, 1);
2085 	btrfs_set_root_used(&root_item, 0);
2086 
2087 	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2088 	root_item.drop_level = 0;
2089 
2090 	free_extent_buffer(leaf);
2091 	leaf = NULL;
2092 
2093 	btrfs_set_root_dirid(&root_item, new_dirid);
2094 
2095 	key.objectid = objectid;
2096 	key.offset = 1;
2097 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2098 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2099 				&root_item);
2100 	if (ret)
2101 		goto fail;
2102 
2103 	/*
2104 	 * insert the directory item
2105 	 */
2106 	key.offset = (u64)-1;
2107 	dir = root->fs_info->sb->s_root->d_inode;
2108 	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2109 				    name, namelen, dir->i_ino, &key,
2110 				    BTRFS_FT_DIR);
2111 	if (ret)
2112 		goto fail;
2113 
2114 	ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2115 			     name, namelen, objectid,
2116 			     root->fs_info->sb->s_root->d_inode->i_ino);
2117 	if (ret)
2118 		goto fail;
2119 
2120 	ret = btrfs_commit_transaction(trans, root);
2121 	if (ret)
2122 		goto fail_commit;
2123 
2124 	new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2125 	BUG_ON(!new_root);
2126 
2127 	trans = btrfs_start_transaction(new_root, 1);
2128 	BUG_ON(!trans);
2129 
2130 	inode = btrfs_new_inode(trans, new_root, new_dirid,
2131 				BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2132 	if (IS_ERR(inode))
2133 		goto fail;
2134 	inode->i_op = &btrfs_dir_inode_operations;
2135 	inode->i_fop = &btrfs_dir_file_operations;
2136 	new_root->inode = inode;
2137 
2138 	ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2139 				     new_dirid);
2140 	inode->i_nlink = 1;
2141 	inode->i_size = 0;
2142 	ret = btrfs_update_inode(trans, new_root, inode);
2143 	if (ret)
2144 		goto fail;
2145 fail:
2146 	nr = trans->blocks_used;
2147 	err = btrfs_commit_transaction(trans, root);
2148 	if (err && !ret)
2149 		ret = err;
2150 fail_commit:
2151 	mutex_unlock(&root->fs_info->fs_mutex);
2152 	btrfs_btree_balance_dirty(root, nr);
2153 	return ret;
2154 }
2155 
2156 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2157 {
2158 	struct btrfs_trans_handle *trans;
2159 	struct btrfs_key key;
2160 	struct btrfs_root_item new_root_item;
2161 	struct extent_buffer *tmp;
2162 	int ret;
2163 	int err;
2164 	u64 objectid;
2165 	unsigned long nr;
2166 
2167 	if (!root->ref_cows)
2168 		return -EINVAL;
2169 
2170 	down_write(&root->snap_sem);
2171 	freeze_bdev(root->fs_info->sb->s_bdev);
2172 	thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb);
2173 
2174 	mutex_lock(&root->fs_info->fs_mutex);
2175 	trans = btrfs_start_transaction(root, 1);
2176 	BUG_ON(!trans);
2177 
2178 	ret = btrfs_update_inode(trans, root, root->inode);
2179 	if (ret)
2180 		goto fail;
2181 
2182 	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2183 				       0, &objectid);
2184 	if (ret)
2185 		goto fail;
2186 
2187 	memcpy(&new_root_item, &root->root_item,
2188 	       sizeof(new_root_item));
2189 
2190 	key.objectid = objectid;
2191 	key.offset = 1;
2192 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2193 
2194 	extent_buffer_get(root->node);
2195 	btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
2196 	free_extent_buffer(tmp);
2197 
2198 	btrfs_copy_root(trans, root, root->node, &tmp, objectid);
2199 
2200 	btrfs_set_root_bytenr(&new_root_item, tmp->start);
2201 	btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp));
2202 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2203 				&new_root_item);
2204 	free_extent_buffer(tmp);
2205 	if (ret)
2206 		goto fail;
2207 
2208 	/*
2209 	 * insert the directory item
2210 	 */
2211 	key.offset = (u64)-1;
2212 	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2213 				    name, namelen,
2214 				    root->fs_info->sb->s_root->d_inode->i_ino,
2215 				    &key, BTRFS_FT_DIR);
2216 
2217 	if (ret)
2218 		goto fail;
2219 
2220 	ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2221 			     name, namelen, objectid,
2222 			     root->fs_info->sb->s_root->d_inode->i_ino);
2223 
2224 	if (ret)
2225 		goto fail;
2226 fail:
2227 	nr = trans->blocks_used;
2228 	err = btrfs_commit_transaction(trans, root);
2229 
2230 	if (err && !ret)
2231 		ret = err;
2232 
2233 	mutex_unlock(&root->fs_info->fs_mutex);
2234 	up_write(&root->snap_sem);
2235 	btrfs_btree_balance_dirty(root, nr);
2236 	return ret;
2237 }
2238 
2239 static unsigned long force_ra(struct address_space *mapping,
2240 			      struct file_ra_state *ra, struct file *file,
2241 			      pgoff_t offset, pgoff_t last_index)
2242 {
2243 	pgoff_t req_size;
2244 
2245 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2246 	req_size = last_index - offset + 1;
2247 	offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2248 	return offset;
2249 #else
2250 	req_size = min(last_index - offset + 1, (pgoff_t)128);
2251 	page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2252 	return offset + req_size;
2253 #endif
2254 }
2255 
2256 int btrfs_defrag_file(struct file *file) {
2257 	struct inode *inode = fdentry(file)->d_inode;
2258 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2259 	struct page *page;
2260 	unsigned long last_index;
2261 	unsigned long ra_index = 0;
2262 	u64 page_start;
2263 	u64 page_end;
2264 	unsigned long i;
2265 
2266 	mutex_lock(&inode->i_mutex);
2267 	last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2268 	for (i = 0; i <= last_index; i++) {
2269 		if (i == ra_index) {
2270 			ra_index = force_ra(inode->i_mapping, &file->f_ra,
2271 					    file, ra_index, last_index);
2272 		}
2273 		page = grab_cache_page(inode->i_mapping, i);
2274 		if (!page)
2275 			goto out_unlock;
2276 		if (!PageUptodate(page)) {
2277 			btrfs_readpage(NULL, page);
2278 			lock_page(page);
2279 			if (!PageUptodate(page)) {
2280 				unlock_page(page);
2281 				page_cache_release(page);
2282 				goto out_unlock;
2283 			}
2284 		}
2285 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2286 		page_end = page_start + PAGE_CACHE_SIZE - 1;
2287 
2288 		lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2289 		set_extent_delalloc(em_tree, page_start,
2290 				    page_end, GFP_NOFS);
2291 		unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2292 		set_page_dirty(page);
2293 		unlock_page(page);
2294 		page_cache_release(page);
2295 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2296 	}
2297 
2298 out_unlock:
2299 	mutex_unlock(&inode->i_mutex);
2300 	return 0;
2301 }
2302 
2303 static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg)
2304 {
2305 	struct btrfs_ioctl_vol_args *vol_args;
2306 	struct btrfs_dir_item *di;
2307 	struct btrfs_path *path;
2308 	u64 root_dirid;
2309 	int namelen;
2310 	int ret;
2311 
2312 	vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2313 
2314 	if (!vol_args)
2315 		return -ENOMEM;
2316 
2317 	if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2318 		ret = -EFAULT;
2319 		goto out;
2320 	}
2321 
2322 	namelen = strlen(vol_args->name);
2323 	if (namelen > BTRFS_VOL_NAME_MAX) {
2324 		ret = -EINVAL;
2325 		goto out;
2326 	}
2327 	if (strchr(vol_args->name, '/')) {
2328 		ret = -EINVAL;
2329 		goto out;
2330 	}
2331 
2332 	path = btrfs_alloc_path();
2333 	if (!path) {
2334 		ret = -ENOMEM;
2335 		goto out;
2336 	}
2337 
2338 	root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2339 	mutex_lock(&root->fs_info->fs_mutex);
2340 	di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2341 			    path, root_dirid,
2342 			    vol_args->name, namelen, 0);
2343 	mutex_unlock(&root->fs_info->fs_mutex);
2344 	btrfs_free_path(path);
2345 
2346 	if (di && !IS_ERR(di)) {
2347 		ret = -EEXIST;
2348 		goto out;
2349 	}
2350 
2351 	if (IS_ERR(di)) {
2352 		ret = PTR_ERR(di);
2353 		goto out;
2354 	}
2355 
2356 	if (root == root->fs_info->tree_root)
2357 		ret = create_subvol(root, vol_args->name, namelen);
2358 	else
2359 		ret = create_snapshot(root, vol_args->name, namelen);
2360 out:
2361 	kfree(vol_args);
2362 	return ret;
2363 }
2364 
2365 static int btrfs_ioctl_defrag(struct file *file)
2366 {
2367 	struct inode *inode = fdentry(file)->d_inode;
2368 	struct btrfs_root *root = BTRFS_I(inode)->root;
2369 
2370 	switch (inode->i_mode & S_IFMT) {
2371 	case S_IFDIR:
2372 		mutex_lock(&root->fs_info->fs_mutex);
2373 		btrfs_defrag_root(root, 0);
2374 		btrfs_defrag_root(root->fs_info->extent_root, 0);
2375 		mutex_unlock(&root->fs_info->fs_mutex);
2376 		break;
2377 	case S_IFREG:
2378 		btrfs_defrag_file(file);
2379 		break;
2380 	}
2381 
2382 	return 0;
2383 }
2384 
2385 long btrfs_ioctl(struct file *file, unsigned int
2386 		cmd, unsigned long arg)
2387 {
2388 	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2389 
2390 	switch (cmd) {
2391 	case BTRFS_IOC_SNAP_CREATE:
2392 		return btrfs_ioctl_snap_create(root, (void __user *)arg);
2393 	case BTRFS_IOC_DEFRAG:
2394 		return btrfs_ioctl_defrag(file);
2395 	}
2396 
2397 	return -ENOTTY;
2398 }
2399 
2400 /*
2401  * Called inside transaction, so use GFP_NOFS
2402  */
2403 struct inode *btrfs_alloc_inode(struct super_block *sb)
2404 {
2405 	struct btrfs_inode *ei;
2406 
2407 	ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2408 	if (!ei)
2409 		return NULL;
2410 	ei->last_trans = 0;
2411 	return &ei->vfs_inode;
2412 }
2413 
2414 void btrfs_destroy_inode(struct inode *inode)
2415 {
2416 	WARN_ON(!list_empty(&inode->i_dentry));
2417 	WARN_ON(inode->i_data.nrpages);
2418 
2419 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2420 }
2421 
2422 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2423 static void init_once(struct kmem_cache * cachep, void *foo)
2424 #else
2425 static void init_once(void * foo, struct kmem_cache * cachep,
2426 		      unsigned long flags)
2427 #endif
2428 {
2429 	struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2430 
2431 	inode_init_once(&ei->vfs_inode);
2432 }
2433 
2434 void btrfs_destroy_cachep(void)
2435 {
2436 	if (btrfs_inode_cachep)
2437 		kmem_cache_destroy(btrfs_inode_cachep);
2438 	if (btrfs_trans_handle_cachep)
2439 		kmem_cache_destroy(btrfs_trans_handle_cachep);
2440 	if (btrfs_transaction_cachep)
2441 		kmem_cache_destroy(btrfs_transaction_cachep);
2442 	if (btrfs_bit_radix_cachep)
2443 		kmem_cache_destroy(btrfs_bit_radix_cachep);
2444 	if (btrfs_path_cachep)
2445 		kmem_cache_destroy(btrfs_path_cachep);
2446 }
2447 
2448 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2449 				       unsigned long extra_flags,
2450 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2451 				       void (*ctor)(struct kmem_cache *, void *)
2452 #else
2453 				       void (*ctor)(void *, struct kmem_cache *,
2454 						    unsigned long)
2455 #endif
2456 				     )
2457 {
2458 	return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2459 				 SLAB_MEM_SPREAD | extra_flags), ctor
2460 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2461 				 ,NULL
2462 #endif
2463 				);
2464 }
2465 
2466 int btrfs_init_cachep(void)
2467 {
2468 	btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2469 					  sizeof(struct btrfs_inode),
2470 					  0, init_once);
2471 	if (!btrfs_inode_cachep)
2472 		goto fail;
2473 	btrfs_trans_handle_cachep =
2474 			btrfs_cache_create("btrfs_trans_handle_cache",
2475 					   sizeof(struct btrfs_trans_handle),
2476 					   0, NULL);
2477 	if (!btrfs_trans_handle_cachep)
2478 		goto fail;
2479 	btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2480 					     sizeof(struct btrfs_transaction),
2481 					     0, NULL);
2482 	if (!btrfs_transaction_cachep)
2483 		goto fail;
2484 	btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2485 					 sizeof(struct btrfs_path),
2486 					 0, NULL);
2487 	if (!btrfs_path_cachep)
2488 		goto fail;
2489 	btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2490 					      SLAB_DESTROY_BY_RCU, NULL);
2491 	if (!btrfs_bit_radix_cachep)
2492 		goto fail;
2493 	return 0;
2494 fail:
2495 	btrfs_destroy_cachep();
2496 	return -ENOMEM;
2497 }
2498 
2499 static int btrfs_getattr(struct vfsmount *mnt,
2500 			 struct dentry *dentry, struct kstat *stat)
2501 {
2502 	struct inode *inode = dentry->d_inode;
2503 	generic_fillattr(inode, stat);
2504 	stat->blksize = 256 * 1024;
2505 	return 0;
2506 }
2507 
2508 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2509 			   struct inode * new_dir,struct dentry *new_dentry)
2510 {
2511 	struct btrfs_trans_handle *trans;
2512 	struct btrfs_root *root = BTRFS_I(old_dir)->root;
2513 	struct inode *new_inode = new_dentry->d_inode;
2514 	struct inode *old_inode = old_dentry->d_inode;
2515 	struct timespec ctime = CURRENT_TIME;
2516 	struct btrfs_path *path;
2517 	int ret;
2518 
2519 	if (S_ISDIR(old_inode->i_mode) && new_inode &&
2520 	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2521 		return -ENOTEMPTY;
2522 	}
2523 
2524 	mutex_lock(&root->fs_info->fs_mutex);
2525 	trans = btrfs_start_transaction(root, 1);
2526 
2527 	btrfs_set_trans_block_group(trans, new_dir);
2528 	path = btrfs_alloc_path();
2529 	if (!path) {
2530 		ret = -ENOMEM;
2531 		goto out_fail;
2532 	}
2533 
2534 	old_dentry->d_inode->i_nlink++;
2535 	old_dir->i_ctime = old_dir->i_mtime = ctime;
2536 	new_dir->i_ctime = new_dir->i_mtime = ctime;
2537 	old_inode->i_ctime = ctime;
2538 
2539 	ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2540 	if (ret)
2541 		goto out_fail;
2542 
2543 	if (new_inode) {
2544 		new_inode->i_ctime = CURRENT_TIME;
2545 		ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2546 		if (ret)
2547 			goto out_fail;
2548 	}
2549 	ret = btrfs_add_link(trans, new_dentry, old_inode);
2550 	if (ret)
2551 		goto out_fail;
2552 
2553 out_fail:
2554 	btrfs_free_path(path);
2555 	btrfs_end_transaction(trans, root);
2556 	mutex_unlock(&root->fs_info->fs_mutex);
2557 	return ret;
2558 }
2559 
2560 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2561 			 const char *symname)
2562 {
2563 	struct btrfs_trans_handle *trans;
2564 	struct btrfs_root *root = BTRFS_I(dir)->root;
2565 	struct btrfs_path *path;
2566 	struct btrfs_key key;
2567 	struct inode *inode;
2568 	int err;
2569 	int drop_inode = 0;
2570 	u64 objectid;
2571 	int name_len;
2572 	int datasize;
2573 	unsigned long ptr;
2574 	struct btrfs_file_extent_item *ei;
2575 	struct extent_buffer *leaf;
2576 	unsigned long nr;
2577 
2578 	name_len = strlen(symname) + 1;
2579 	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2580 		return -ENAMETOOLONG;
2581 	mutex_lock(&root->fs_info->fs_mutex);
2582 	trans = btrfs_start_transaction(root, 1);
2583 	btrfs_set_trans_block_group(trans, dir);
2584 
2585 	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2586 	if (err) {
2587 		err = -ENOSPC;
2588 		goto out_unlock;
2589 	}
2590 
2591 	inode = btrfs_new_inode(trans, root, objectid,
2592 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2593 	err = PTR_ERR(inode);
2594 	if (IS_ERR(inode))
2595 		goto out_unlock;
2596 
2597 	btrfs_set_trans_block_group(trans, inode);
2598 	err = btrfs_add_nondir(trans, dentry, inode);
2599 	if (err)
2600 		drop_inode = 1;
2601 	else {
2602 		inode->i_mapping->a_ops = &btrfs_aops;
2603 		inode->i_fop = &btrfs_file_operations;
2604 		inode->i_op = &btrfs_file_inode_operations;
2605 		extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2606 				     inode->i_mapping, GFP_NOFS);
2607 		BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
2608 	}
2609 	dir->i_sb->s_dirt = 1;
2610 	btrfs_update_inode_block_group(trans, inode);
2611 	btrfs_update_inode_block_group(trans, dir);
2612 	if (drop_inode)
2613 		goto out_unlock;
2614 
2615 	path = btrfs_alloc_path();
2616 	BUG_ON(!path);
2617 	key.objectid = inode->i_ino;
2618 	key.offset = 0;
2619 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2620 	datasize = btrfs_file_extent_calc_inline_size(name_len);
2621 	err = btrfs_insert_empty_item(trans, root, path, &key,
2622 				      datasize);
2623 	if (err) {
2624 		drop_inode = 1;
2625 		goto out_unlock;
2626 	}
2627 	leaf = path->nodes[0];
2628 	ei = btrfs_item_ptr(leaf, path->slots[0],
2629 			    struct btrfs_file_extent_item);
2630 	btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2631 	btrfs_set_file_extent_type(leaf, ei,
2632 				   BTRFS_FILE_EXTENT_INLINE);
2633 	ptr = btrfs_file_extent_inline_start(ei);
2634 	write_extent_buffer(leaf, symname, ptr, name_len);
2635 	btrfs_mark_buffer_dirty(leaf);
2636 	btrfs_free_path(path);
2637 
2638 	inode->i_op = &btrfs_symlink_inode_operations;
2639 	inode->i_mapping->a_ops = &btrfs_symlink_aops;
2640 	inode->i_size = name_len - 1;
2641 	err = btrfs_update_inode(trans, root, inode);
2642 	if (err)
2643 		drop_inode = 1;
2644 
2645 out_unlock:
2646 	nr = trans->blocks_used;
2647 	btrfs_end_transaction(trans, root);
2648 	mutex_unlock(&root->fs_info->fs_mutex);
2649 	if (drop_inode) {
2650 		inode_dec_link_count(inode);
2651 		iput(inode);
2652 	}
2653 	btrfs_btree_balance_dirty(root, nr);
2654 	return err;
2655 }
2656 
2657 static struct inode_operations btrfs_dir_inode_operations = {
2658 	.lookup		= btrfs_lookup,
2659 	.create		= btrfs_create,
2660 	.unlink		= btrfs_unlink,
2661 	.link		= btrfs_link,
2662 	.mkdir		= btrfs_mkdir,
2663 	.rmdir		= btrfs_rmdir,
2664 	.rename		= btrfs_rename,
2665 	.symlink	= btrfs_symlink,
2666 	.setattr	= btrfs_setattr,
2667 	.mknod		= btrfs_mknod,
2668 	.setxattr	= generic_setxattr,
2669 	.getxattr	= generic_getxattr,
2670 	.listxattr	= btrfs_listxattr,
2671 	.removexattr	= generic_removexattr,
2672 };
2673 
2674 static struct inode_operations btrfs_dir_ro_inode_operations = {
2675 	.lookup		= btrfs_lookup,
2676 };
2677 
2678 static struct file_operations btrfs_dir_file_operations = {
2679 	.llseek		= generic_file_llseek,
2680 	.read		= generic_read_dir,
2681 	.readdir	= btrfs_readdir,
2682 	.unlocked_ioctl	= btrfs_ioctl,
2683 #ifdef CONFIG_COMPAT
2684 	.compat_ioctl	= btrfs_ioctl,
2685 #endif
2686 };
2687 
2688 static struct extent_map_ops btrfs_extent_map_ops = {
2689 	.fill_delalloc = run_delalloc_range,
2690 	.writepage_io_hook = btrfs_writepage_io_hook,
2691 	.readpage_io_hook = btrfs_readpage_io_hook,
2692 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
2693 };
2694 
2695 static struct address_space_operations btrfs_aops = {
2696 	.readpage	= btrfs_readpage,
2697 	.writepage	= btrfs_writepage,
2698 	.writepages	= btrfs_writepages,
2699 	.readpages	= btrfs_readpages,
2700 	.sync_page	= block_sync_page,
2701 	.prepare_write	= btrfs_prepare_write,
2702 	.commit_write	= btrfs_commit_write,
2703 	.bmap		= btrfs_bmap,
2704 	.invalidatepage = btrfs_invalidatepage,
2705 	.releasepage	= btrfs_releasepage,
2706 	.set_page_dirty	= __set_page_dirty_nobuffers,
2707 };
2708 
2709 static struct address_space_operations btrfs_symlink_aops = {
2710 	.readpage	= btrfs_readpage,
2711 	.writepage	= btrfs_writepage,
2712 	.invalidatepage = btrfs_invalidatepage,
2713 	.releasepage	= btrfs_releasepage,
2714 };
2715 
2716 static struct inode_operations btrfs_file_inode_operations = {
2717 	.truncate	= btrfs_truncate,
2718 	.getattr	= btrfs_getattr,
2719 	.setattr	= btrfs_setattr,
2720 	.setxattr	= generic_setxattr,
2721 	.getxattr	= generic_getxattr,
2722 	.listxattr      = btrfs_listxattr,
2723 	.removexattr	= generic_removexattr,
2724 };
2725 
2726 static struct inode_operations btrfs_special_inode_operations = {
2727 	.getattr	= btrfs_getattr,
2728 	.setattr	= btrfs_setattr,
2729 };
2730 
2731 static struct inode_operations btrfs_symlink_inode_operations = {
2732 	.readlink	= generic_readlink,
2733 	.follow_link	= page_follow_link_light,
2734 	.put_link	= page_put_link,
2735 };
2736