xref: /openbmc/linux/fs/btrfs/extent-tree.c (revision e18e4809b10e6c9efb5fe10c1ddcb4ebb690d517)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 
28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31 
32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33 				 btrfs_root *extent_root);
34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35 			       btrfs_root *extent_root);
36 static int find_previous_extent(struct btrfs_root *root,
37 				struct btrfs_path *path)
38 {
39 	struct btrfs_key found_key;
40 	struct extent_buffer *leaf;
41 	int ret;
42 
43 	while(1) {
44 		if (path->slots[0] == 0) {
45 			ret = btrfs_prev_leaf(root, path);
46 			if (ret != 0)
47 				return ret;
48 		} else {
49 			path->slots[0]--;
50 		}
51 		leaf = path->nodes[0];
52 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
53 		if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
54 			return 0;
55 	}
56 	return 1;
57 }
58 
59 static int cache_block_group(struct btrfs_root *root,
60 			     struct btrfs_block_group_cache *block_group)
61 {
62 	struct btrfs_path *path;
63 	int ret;
64 	struct btrfs_key key;
65 	struct extent_buffer *leaf;
66 	struct extent_map_tree *free_space_cache;
67 	int slot;
68 	u64 last = 0;
69 	u64 hole_size;
70 	u64 first_free;
71 	int found = 0;
72 
73 	if (!block_group)
74 		return 0;
75 
76 	root = root->fs_info->extent_root;
77 	free_space_cache = &root->fs_info->free_space_cache;
78 
79 	if (block_group->cached)
80 		return 0;
81 
82 	path = btrfs_alloc_path();
83 	if (!path)
84 		return -ENOMEM;
85 
86 	path->reada = 2;
87 	first_free = block_group->key.objectid;
88 	key.objectid = block_group->key.objectid;
89 	key.offset = 0;
90 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
91 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
92 	if (ret < 0)
93 		return ret;
94 	ret = find_previous_extent(root, path);
95 	if (ret < 0)
96 		return ret;
97 	if (ret == 0) {
98 		leaf = path->nodes[0];
99 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
100 		if (key.objectid + key.offset > first_free)
101 			first_free = key.objectid + key.offset;
102 	}
103 	while(1) {
104 		leaf = path->nodes[0];
105 		slot = path->slots[0];
106 		if (slot >= btrfs_header_nritems(leaf)) {
107 			ret = btrfs_next_leaf(root, path);
108 			if (ret < 0)
109 				goto err;
110 			if (ret == 0) {
111 				continue;
112 			} else {
113 				break;
114 			}
115 		}
116 		btrfs_item_key_to_cpu(leaf, &key, slot);
117 		if (key.objectid < block_group->key.objectid) {
118 			goto next;
119 		}
120 		if (key.objectid >= block_group->key.objectid +
121 		    block_group->key.offset) {
122 			break;
123 		}
124 
125 		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
126 			if (!found) {
127 				last = first_free;
128 				found = 1;
129 			}
130 			if (key.objectid > last) {
131 				hole_size = key.objectid - last;
132 				set_extent_dirty(free_space_cache, last,
133 						 last + hole_size - 1,
134 						 GFP_NOFS);
135 			}
136 			last = key.objectid + key.offset;
137 		}
138 next:
139 		path->slots[0]++;
140 	}
141 
142 	if (!found)
143 		last = first_free;
144 	if (block_group->key.objectid +
145 	    block_group->key.offset > last) {
146 		hole_size = block_group->key.objectid +
147 			block_group->key.offset - last;
148 		set_extent_dirty(free_space_cache, last,
149 				 last + hole_size - 1, GFP_NOFS);
150 	}
151 	block_group->cached = 1;
152 err:
153 	btrfs_free_path(path);
154 	return 0;
155 }
156 
157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
158 							 btrfs_fs_info *info,
159 							 u64 bytenr)
160 {
161 	struct extent_map_tree *block_group_cache;
162 	struct btrfs_block_group_cache *block_group = NULL;
163 	u64 ptr;
164 	u64 start;
165 	u64 end;
166 	int ret;
167 
168 	block_group_cache = &info->block_group_cache;
169 	ret = find_first_extent_bit(block_group_cache,
170 				    bytenr, &start, &end,
171 				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
172 	if (ret) {
173 		return NULL;
174 	}
175 	ret = get_state_private(block_group_cache, start, &ptr);
176 	if (ret)
177 		return NULL;
178 
179 	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
180 	if (block_group->key.objectid <= bytenr && bytenr <
181 	    block_group->key.objectid + block_group->key.offset)
182 		return block_group;
183 	return NULL;
184 }
185 static u64 noinline find_search_start(struct btrfs_root *root,
186 			      struct btrfs_block_group_cache **cache_ret,
187 			      u64 search_start, int num, int data)
188 {
189 	int ret;
190 	struct btrfs_block_group_cache *cache = *cache_ret;
191 	u64 last;
192 	u64 start = 0;
193 	u64 end = 0;
194 	u64 cache_miss = 0;
195 	u64 total_fs_bytes;
196 	int wrapped = 0;
197 
198 	if (!cache) {
199 		goto out;
200 	}
201 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
202 again:
203 	ret = cache_block_group(root, cache);
204 	if (ret)
205 		goto out;
206 
207 	last = max(search_start, cache->key.objectid);
208 
209 	while(1) {
210 		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
211 					    last, &start, &end, EXTENT_DIRTY);
212 		if (ret) {
213 			if (!cache_miss)
214 				cache_miss = last;
215 			goto new_group;
216 		}
217 
218 		start = max(last, start);
219 		last = end + 1;
220 		if (last - start < num) {
221 			if (last == cache->key.objectid + cache->key.offset)
222 				cache_miss = start;
223 			continue;
224 		}
225 		if (data != BTRFS_BLOCK_GROUP_MIXED &&
226 		    start + num > cache->key.objectid + cache->key.offset)
227 			goto new_group;
228 		if (start + num  > total_fs_bytes)
229 			goto new_group;
230 		return start;
231 	}
232 out:
233 	cache = btrfs_lookup_block_group(root->fs_info, search_start);
234 	if (!cache) {
235 		printk("Unable to find block group for %Lu\n",
236 		       search_start);
237 		WARN_ON(1);
238 		return search_start;
239 	}
240 	return search_start;
241 
242 new_group:
243 	last = cache->key.objectid + cache->key.offset;
244 wrapped:
245 	cache = btrfs_lookup_block_group(root->fs_info, last);
246 	if (!cache || cache->key.objectid >= total_fs_bytes) {
247 no_cache:
248 		if (!wrapped) {
249 			wrapped = 1;
250 			last = search_start;
251 			data = BTRFS_BLOCK_GROUP_MIXED;
252 			goto wrapped;
253 		}
254 		goto out;
255 	}
256 	if (cache_miss && !cache->cached) {
257 		cache_block_group(root, cache);
258 		last = cache_miss;
259 		cache = btrfs_lookup_block_group(root->fs_info, last);
260 	}
261 	cache = btrfs_find_block_group(root, cache, last, data, 0);
262 	if (!cache)
263 		goto no_cache;
264 	*cache_ret = cache;
265 	cache_miss = 0;
266 	goto again;
267 }
268 
269 static u64 div_factor(u64 num, int factor)
270 {
271 	if (factor == 10)
272 		return num;
273 	num *= factor;
274 	do_div(num, 10);
275 	return num;
276 }
277 
278 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
279 						 struct btrfs_block_group_cache
280 						 *hint, u64 search_start,
281 						 int data, int owner)
282 {
283 	struct btrfs_block_group_cache *cache;
284 	struct extent_map_tree *block_group_cache;
285 	struct btrfs_block_group_cache *found_group = NULL;
286 	struct btrfs_fs_info *info = root->fs_info;
287 	u64 used;
288 	u64 last = 0;
289 	u64 hint_last;
290 	u64 start;
291 	u64 end;
292 	u64 free_check;
293 	u64 ptr;
294 	u64 total_fs_bytes;
295 	int bit;
296 	int ret;
297 	int full_search = 0;
298 	int factor = 8;
299 	int data_swap = 0;
300 
301 	block_group_cache = &info->block_group_cache;
302 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
303 
304 	if (!owner)
305 		factor = 8;
306 
307 	if (data == BTRFS_BLOCK_GROUP_MIXED) {
308 		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
309 		factor = 10;
310 	} else if (data)
311 		bit = BLOCK_GROUP_DATA;
312 	else
313 		bit = BLOCK_GROUP_METADATA;
314 
315 	if (search_start && search_start < total_fs_bytes) {
316 		struct btrfs_block_group_cache *shint;
317 		shint = btrfs_lookup_block_group(info, search_start);
318 		if (shint && (shint->data == data ||
319 			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
320 			used = btrfs_block_group_used(&shint->item);
321 			if (used + shint->pinned <
322 			    div_factor(shint->key.offset, factor)) {
323 				return shint;
324 			}
325 		}
326 	}
327 	if (hint && hint->key.objectid < total_fs_bytes &&
328 	    (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
329 		used = btrfs_block_group_used(&hint->item);
330 		if (used + hint->pinned <
331 		    div_factor(hint->key.offset, factor)) {
332 			return hint;
333 		}
334 		last = hint->key.objectid + hint->key.offset;
335 		hint_last = last;
336 	} else {
337 		if (hint)
338 			hint_last = max(hint->key.objectid, search_start);
339 		else
340 			hint_last = search_start;
341 
342 		if (hint_last >= total_fs_bytes)
343 			hint_last = search_start;
344 		last = hint_last;
345 	}
346 again:
347 	while(1) {
348 		ret = find_first_extent_bit(block_group_cache, last,
349 					    &start, &end, bit);
350 		if (ret)
351 			break;
352 
353 		ret = get_state_private(block_group_cache, start, &ptr);
354 		if (ret)
355 			break;
356 
357 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
358 		last = cache->key.objectid + cache->key.offset;
359 		used = btrfs_block_group_used(&cache->item);
360 
361 		if (cache->key.objectid > total_fs_bytes)
362 			break;
363 
364 		if (full_search)
365 			free_check = cache->key.offset;
366 		else
367 			free_check = div_factor(cache->key.offset, factor);
368 		if (used + cache->pinned < free_check) {
369 			found_group = cache;
370 			goto found;
371 		}
372 		cond_resched();
373 	}
374 	if (!full_search) {
375 		last = search_start;
376 		full_search = 1;
377 		goto again;
378 	}
379 	if (!data_swap) {
380 		data_swap = 1;
381 		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
382 		last = search_start;
383 		goto again;
384 	}
385 found:
386 	return found_group;
387 }
388 
389 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
390 			   u64 owner, u64 owner_offset)
391 {
392 	u32 high_crc = ~(u32)0;
393 	u32 low_crc = ~(u32)0;
394 	__le64 lenum;
395 
396 	lenum = cpu_to_le64(root_objectid);
397 	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
398 	lenum = cpu_to_le64(ref_generation);
399 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
400 
401 #if 0
402 	lenum = cpu_to_le64(owner);
403 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
404 	lenum = cpu_to_le64(owner_offset);
405 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
406 #endif
407 	return ((u64)high_crc << 32) | (u64)low_crc;
408 }
409 
410 static int match_extent_ref(struct extent_buffer *leaf,
411 			    struct btrfs_extent_ref *disk_ref,
412 			    struct btrfs_extent_ref *cpu_ref)
413 {
414 	int ret;
415 	int len;
416 
417 	if (cpu_ref->objectid)
418 		len = sizeof(*cpu_ref);
419 	else
420 		len = 2 * sizeof(u64);
421 	ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
422 				   len);
423 	return ret == 0;
424 }
425 
426 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
427 					  struct btrfs_root *root,
428 					  struct btrfs_path *path, u64 bytenr,
429 					  u64 root_objectid,
430 					  u64 ref_generation, u64 owner,
431 					  u64 owner_offset, int del)
432 {
433 	u64 hash;
434 	struct btrfs_key key;
435 	struct btrfs_key found_key;
436 	struct btrfs_extent_ref ref;
437 	struct extent_buffer *leaf;
438 	struct btrfs_extent_ref *disk_ref;
439 	int ret;
440 	int ret2;
441 
442 	btrfs_set_stack_ref_root(&ref, root_objectid);
443 	btrfs_set_stack_ref_generation(&ref, ref_generation);
444 	btrfs_set_stack_ref_objectid(&ref, owner);
445 	btrfs_set_stack_ref_offset(&ref, owner_offset);
446 
447 	hash = hash_extent_ref(root_objectid, ref_generation, owner,
448 			       owner_offset);
449 	key.offset = hash;
450 	key.objectid = bytenr;
451 	key.type = BTRFS_EXTENT_REF_KEY;
452 
453 	while (1) {
454 		ret = btrfs_search_slot(trans, root, &key, path,
455 					del ? -1 : 0, del);
456 		if (ret < 0)
457 			goto out;
458 		leaf = path->nodes[0];
459 		if (ret != 0) {
460 			u32 nritems = btrfs_header_nritems(leaf);
461 			if (path->slots[0] >= nritems) {
462 				ret2 = btrfs_next_leaf(root, path);
463 				if (ret2)
464 					goto out;
465 				leaf = path->nodes[0];
466 			}
467 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
468 			if (found_key.objectid != bytenr ||
469 			    found_key.type != BTRFS_EXTENT_REF_KEY)
470 				goto out;
471 			key.offset = found_key.offset;
472 			if (del) {
473 				btrfs_release_path(root, path);
474 				continue;
475 			}
476 		}
477 		disk_ref = btrfs_item_ptr(path->nodes[0],
478 					  path->slots[0],
479 					  struct btrfs_extent_ref);
480 		if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
481 			ret = 0;
482 			goto out;
483 		}
484 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
485 		key.offset = found_key.offset + 1;
486 		btrfs_release_path(root, path);
487 	}
488 out:
489 	return ret;
490 }
491 
492 /*
493  * Back reference rules.  Back refs have three main goals:
494  *
495  * 1) differentiate between all holders of references to an extent so that
496  *    when a reference is dropped we can make sure it was a valid reference
497  *    before freeing the extent.
498  *
499  * 2) Provide enough information to quickly find the holders of an extent
500  *    if we notice a given block is corrupted or bad.
501  *
502  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
503  *    maintenance.  This is actually the same as #2, but with a slightly
504  *    different use case.
505  *
506  * File extents can be referenced by:
507  *
508  * - multiple snapshots, subvolumes, or different generations in one subvol
509  * - different files inside a single subvolume (in theory, not implemented yet)
510  * - different offsets inside a file (bookend extents in file.c)
511  *
512  * The extent ref structure has fields for:
513  *
514  * - Objectid of the subvolume root
515  * - Generation number of the tree holding the reference
516  * - objectid of the file holding the reference
517  * - offset in the file corresponding to the key holding the reference
518  *
519  * When a file extent is allocated the fields are filled in:
520  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
521  *
522  * When a leaf is cow'd new references are added for every file extent found
523  * in the leaf.  It looks the same as the create case, but trans->transid
524  * will be different when the block is cow'd.
525  *
526  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
527  *
528  * When a file extent is removed either during snapshot deletion or file
529  * truncation, the corresponding back reference is found
530  * by searching for:
531  *
532  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
533  *      inode objectid, offset in file)
534  *
535  * Btree extents can be referenced by:
536  *
537  * - Different subvolumes
538  * - Different generations of the same subvolume
539  *
540  * Storing sufficient information for a full reverse mapping of a btree
541  * block would require storing the lowest key of the block in the backref,
542  * and it would require updating that lowest key either before write out or
543  * every time it changed.  Instead, the objectid of the lowest key is stored
544  * along with the level of the tree block.  This provides a hint
545  * about where in the btree the block can be found.  Searches through the
546  * btree only need to look for a pointer to that block, so they stop one
547  * level higher than the level recorded in the backref.
548  *
549  * Some btrees do not do reference counting on their extents.  These
550  * include the extent tree and the tree of tree roots.  Backrefs for these
551  * trees always have a generation of zero.
552  *
553  * When a tree block is created, back references are inserted:
554  *
555  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
556  *
557  * When a tree block is cow'd in a reference counted root,
558  * new back references are added for all the blocks it points to.
559  * These are of the form (trans->transid will have increased since creation):
560  *
561  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
562  *
563  * Because the lowest_key_objectid and the level are just hints
564  * they are not used when backrefs are deleted.  When a backref is deleted:
565  *
566  * if backref was for a tree root:
567  *     root_objectid = root->root_key.objectid
568  * else
569  *     root_objectid = btrfs_header_owner(parent)
570  *
571  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
572  *
573  * Back Reference Key hashing:
574  *
575  * Back references have four fields, each 64 bits long.  Unfortunately,
576  * This is hashed into a single 64 bit number and placed into the key offset.
577  * The key objectid corresponds to the first byte in the extent, and the
578  * key type is set to BTRFS_EXTENT_REF_KEY
579  */
580 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
581 				 struct btrfs_root *root,
582 				 struct btrfs_path *path, u64 bytenr,
583 				 u64 root_objectid, u64 ref_generation,
584 				 u64 owner, u64 owner_offset)
585 {
586 	u64 hash;
587 	struct btrfs_key key;
588 	struct btrfs_extent_ref ref;
589 	struct btrfs_extent_ref *disk_ref;
590 	int ret;
591 
592 	btrfs_set_stack_ref_root(&ref, root_objectid);
593 	btrfs_set_stack_ref_generation(&ref, ref_generation);
594 	btrfs_set_stack_ref_objectid(&ref, owner);
595 	btrfs_set_stack_ref_offset(&ref, owner_offset);
596 
597 	hash = hash_extent_ref(root_objectid, ref_generation, owner,
598 			       owner_offset);
599 	key.offset = hash;
600 	key.objectid = bytenr;
601 	key.type = BTRFS_EXTENT_REF_KEY;
602 
603 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
604 	while (ret == -EEXIST) {
605 		disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
606 					  struct btrfs_extent_ref);
607 		if (match_extent_ref(path->nodes[0], disk_ref, &ref))
608 			goto out;
609 		key.offset++;
610 		btrfs_release_path(root, path);
611 		ret = btrfs_insert_empty_item(trans, root, path, &key,
612 					      sizeof(ref));
613 	}
614 	if (ret)
615 		goto out;
616 	disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
617 				  struct btrfs_extent_ref);
618 	write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
619 			    sizeof(ref));
620 	btrfs_mark_buffer_dirty(path->nodes[0]);
621 out:
622 	btrfs_release_path(root, path);
623 	return ret;
624 }
625 
626 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
627 				struct btrfs_root *root,
628 				u64 bytenr, u64 num_bytes,
629 				u64 root_objectid, u64 ref_generation,
630 				u64 owner, u64 owner_offset)
631 {
632 	struct btrfs_path *path;
633 	int ret;
634 	struct btrfs_key key;
635 	struct extent_buffer *l;
636 	struct btrfs_extent_item *item;
637 	u32 refs;
638 
639 	WARN_ON(num_bytes < root->sectorsize);
640 	path = btrfs_alloc_path();
641 	if (!path)
642 		return -ENOMEM;
643 
644 	path->reada = 0;
645 	key.objectid = bytenr;
646 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
647 	key.offset = num_bytes;
648 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
649 				0, 1);
650 	if (ret < 0)
651 		return ret;
652 	if (ret != 0) {
653 		BUG();
654 	}
655 	BUG_ON(ret != 0);
656 	l = path->nodes[0];
657 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
658 	refs = btrfs_extent_refs(l, item);
659 	btrfs_set_extent_refs(l, item, refs + 1);
660 	btrfs_mark_buffer_dirty(path->nodes[0]);
661 
662 	btrfs_release_path(root->fs_info->extent_root, path);
663 
664 	path->reada = 0;
665 	ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
666 					  path, bytenr, root_objectid,
667 					  ref_generation, owner, owner_offset);
668 	BUG_ON(ret);
669 	finish_current_insert(trans, root->fs_info->extent_root);
670 	del_pending_extents(trans, root->fs_info->extent_root);
671 
672 	btrfs_free_path(path);
673 	return 0;
674 }
675 
676 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
677 			 struct btrfs_root *root)
678 {
679 	finish_current_insert(trans, root->fs_info->extent_root);
680 	del_pending_extents(trans, root->fs_info->extent_root);
681 	return 0;
682 }
683 
684 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
685 			     struct btrfs_root *root, u64 bytenr,
686 			     u64 num_bytes, u32 *refs)
687 {
688 	struct btrfs_path *path;
689 	int ret;
690 	struct btrfs_key key;
691 	struct extent_buffer *l;
692 	struct btrfs_extent_item *item;
693 
694 	WARN_ON(num_bytes < root->sectorsize);
695 	path = btrfs_alloc_path();
696 	path->reada = 0;
697 	key.objectid = bytenr;
698 	key.offset = num_bytes;
699 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
700 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
701 				0, 0);
702 	if (ret < 0)
703 		goto out;
704 	if (ret != 0) {
705 		btrfs_print_leaf(root, path->nodes[0]);
706 		printk("failed to find block number %Lu\n", bytenr);
707 		BUG();
708 	}
709 	l = path->nodes[0];
710 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
711 	*refs = btrfs_extent_refs(l, item);
712 out:
713 	btrfs_free_path(path);
714 	return 0;
715 }
716 
717 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
718 				  struct btrfs_path *count_path,
719 				  u64 first_extent)
720 {
721 	struct btrfs_root *extent_root = root->fs_info->extent_root;
722 	struct btrfs_path *path;
723 	u64 bytenr;
724 	u64 found_objectid;
725 	u64 root_objectid = root->root_key.objectid;
726 	u32 total_count = 0;
727 	u32 cur_count;
728 	u32 nritems;
729 	int ret;
730 	struct btrfs_key key;
731 	struct btrfs_key found_key;
732 	struct extent_buffer *l;
733 	struct btrfs_extent_item *item;
734 	struct btrfs_extent_ref *ref_item;
735 	int level = -1;
736 
737 	path = btrfs_alloc_path();
738 again:
739 	if (level == -1)
740 		bytenr = first_extent;
741 	else
742 		bytenr = count_path->nodes[level]->start;
743 
744 	cur_count = 0;
745 	key.objectid = bytenr;
746 	key.offset = 0;
747 
748 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
749 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
750 	if (ret < 0)
751 		goto out;
752 	BUG_ON(ret == 0);
753 
754 	l = path->nodes[0];
755 	btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
756 
757 	if (found_key.objectid != bytenr ||
758 	    found_key.type != BTRFS_EXTENT_ITEM_KEY) {
759 		goto out;
760 	}
761 
762 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
763 	while (1) {
764 		l = path->nodes[0];
765 		nritems = btrfs_header_nritems(l);
766 		if (path->slots[0] >= nritems) {
767 			ret = btrfs_next_leaf(extent_root, path);
768 			if (ret == 0)
769 				continue;
770 			break;
771 		}
772 		btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
773 		if (found_key.objectid != bytenr)
774 			break;
775 
776 		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
777 			path->slots[0]++;
778 			continue;
779 		}
780 
781 		cur_count++;
782 		ref_item = btrfs_item_ptr(l, path->slots[0],
783 					  struct btrfs_extent_ref);
784 		found_objectid = btrfs_ref_root(l, ref_item);
785 
786 		if (found_objectid != root_objectid) {
787 			total_count = 2;
788 			goto out;
789 		}
790 		total_count = 1;
791 		path->slots[0]++;
792 	}
793 	if (cur_count == 0) {
794 		total_count = 0;
795 		goto out;
796 	}
797 	if (level >= 0 && root->node == count_path->nodes[level])
798 		goto out;
799 	level++;
800 	btrfs_release_path(root, path);
801 	goto again;
802 
803 out:
804 	btrfs_free_path(path);
805 	return total_count;
806 }
807 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
808 		       struct btrfs_root *root, u64 owner_objectid)
809 {
810 	u64 generation;
811 	u64 key_objectid;
812 	u64 level;
813 	u32 nritems;
814 	struct btrfs_disk_key disk_key;
815 
816 	level = btrfs_header_level(root->node);
817 	generation = trans->transid;
818 	nritems = btrfs_header_nritems(root->node);
819 	if (nritems > 0) {
820 		if (level == 0)
821 			btrfs_item_key(root->node, &disk_key, 0);
822 		else
823 			btrfs_node_key(root->node, &disk_key, 0);
824 		key_objectid = btrfs_disk_key_objectid(&disk_key);
825 	} else {
826 		key_objectid = 0;
827 	}
828 	return btrfs_inc_extent_ref(trans, root, root->node->start,
829 				    root->node->len, owner_objectid,
830 				    generation, level, key_objectid);
831 }
832 
833 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
834 		  struct extent_buffer *buf)
835 {
836 	u64 bytenr;
837 	u32 nritems;
838 	struct btrfs_key key;
839 	struct btrfs_file_extent_item *fi;
840 	int i;
841 	int level;
842 	int ret;
843 	int faili;
844 
845 	if (!root->ref_cows)
846 		return 0;
847 
848 	level = btrfs_header_level(buf);
849 	nritems = btrfs_header_nritems(buf);
850 	for (i = 0; i < nritems; i++) {
851 		if (level == 0) {
852 			u64 disk_bytenr;
853 			btrfs_item_key_to_cpu(buf, &key, i);
854 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
855 				continue;
856 			fi = btrfs_item_ptr(buf, i,
857 					    struct btrfs_file_extent_item);
858 			if (btrfs_file_extent_type(buf, fi) ==
859 			    BTRFS_FILE_EXTENT_INLINE)
860 				continue;
861 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
862 			if (disk_bytenr == 0)
863 				continue;
864 			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
865 				    btrfs_file_extent_disk_num_bytes(buf, fi),
866 				    root->root_key.objectid, trans->transid,
867 				    key.objectid, key.offset);
868 			if (ret) {
869 				faili = i;
870 				goto fail;
871 			}
872 		} else {
873 			bytenr = btrfs_node_blockptr(buf, i);
874 			btrfs_node_key_to_cpu(buf, &key, i);
875 			ret = btrfs_inc_extent_ref(trans, root, bytenr,
876 					   btrfs_level_size(root, level - 1),
877 					   root->root_key.objectid,
878 					   trans->transid,
879 					   level - 1, key.objectid);
880 			if (ret) {
881 				faili = i;
882 				goto fail;
883 			}
884 		}
885 	}
886 	return 0;
887 fail:
888 	WARN_ON(1);
889 #if 0
890 	for (i =0; i < faili; i++) {
891 		if (level == 0) {
892 			u64 disk_bytenr;
893 			btrfs_item_key_to_cpu(buf, &key, i);
894 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
895 				continue;
896 			fi = btrfs_item_ptr(buf, i,
897 					    struct btrfs_file_extent_item);
898 			if (btrfs_file_extent_type(buf, fi) ==
899 			    BTRFS_FILE_EXTENT_INLINE)
900 				continue;
901 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
902 			if (disk_bytenr == 0)
903 				continue;
904 			err = btrfs_free_extent(trans, root, disk_bytenr,
905 				    btrfs_file_extent_disk_num_bytes(buf,
906 								      fi), 0);
907 			BUG_ON(err);
908 		} else {
909 			bytenr = btrfs_node_blockptr(buf, i);
910 			err = btrfs_free_extent(trans, root, bytenr,
911 					btrfs_level_size(root, level - 1), 0);
912 			BUG_ON(err);
913 		}
914 	}
915 #endif
916 	return ret;
917 }
918 
919 static int write_one_cache_group(struct btrfs_trans_handle *trans,
920 				 struct btrfs_root *root,
921 				 struct btrfs_path *path,
922 				 struct btrfs_block_group_cache *cache)
923 {
924 	int ret;
925 	int pending_ret;
926 	struct btrfs_root *extent_root = root->fs_info->extent_root;
927 	unsigned long bi;
928 	struct extent_buffer *leaf;
929 
930 	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
931 	if (ret < 0)
932 		goto fail;
933 	BUG_ON(ret);
934 
935 	leaf = path->nodes[0];
936 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
937 	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
938 	btrfs_mark_buffer_dirty(leaf);
939 	btrfs_release_path(extent_root, path);
940 fail:
941 	finish_current_insert(trans, extent_root);
942 	pending_ret = del_pending_extents(trans, extent_root);
943 	if (ret)
944 		return ret;
945 	if (pending_ret)
946 		return pending_ret;
947 	return 0;
948 
949 }
950 
951 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
952 				   struct btrfs_root *root)
953 {
954 	struct extent_map_tree *block_group_cache;
955 	struct btrfs_block_group_cache *cache;
956 	int ret;
957 	int err = 0;
958 	int werr = 0;
959 	struct btrfs_path *path;
960 	u64 last = 0;
961 	u64 start;
962 	u64 end;
963 	u64 ptr;
964 
965 	block_group_cache = &root->fs_info->block_group_cache;
966 	path = btrfs_alloc_path();
967 	if (!path)
968 		return -ENOMEM;
969 
970 	while(1) {
971 		ret = find_first_extent_bit(block_group_cache, last,
972 					    &start, &end, BLOCK_GROUP_DIRTY);
973 		if (ret)
974 			break;
975 
976 		last = end + 1;
977 		ret = get_state_private(block_group_cache, start, &ptr);
978 		if (ret)
979 			break;
980 
981 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
982 		err = write_one_cache_group(trans, root,
983 					    path, cache);
984 		/*
985 		 * if we fail to write the cache group, we want
986 		 * to keep it marked dirty in hopes that a later
987 		 * write will work
988 		 */
989 		if (err) {
990 			werr = err;
991 			continue;
992 		}
993 		clear_extent_bits(block_group_cache, start, end,
994 				  BLOCK_GROUP_DIRTY, GFP_NOFS);
995 	}
996 	btrfs_free_path(path);
997 	return werr;
998 }
999 
1000 static int update_block_group(struct btrfs_trans_handle *trans,
1001 			      struct btrfs_root *root,
1002 			      u64 bytenr, u64 num_bytes, int alloc,
1003 			      int mark_free, int data)
1004 {
1005 	struct btrfs_block_group_cache *cache;
1006 	struct btrfs_fs_info *info = root->fs_info;
1007 	u64 total = num_bytes;
1008 	u64 old_val;
1009 	u64 byte_in_group;
1010 	u64 start;
1011 	u64 end;
1012 
1013 	while(total) {
1014 		cache = btrfs_lookup_block_group(info, bytenr);
1015 		if (!cache) {
1016 			return -1;
1017 		}
1018 		byte_in_group = bytenr - cache->key.objectid;
1019 		WARN_ON(byte_in_group > cache->key.offset);
1020 		start = cache->key.objectid;
1021 		end = start + cache->key.offset - 1;
1022 		set_extent_bits(&info->block_group_cache, start, end,
1023 				BLOCK_GROUP_DIRTY, GFP_NOFS);
1024 
1025 		old_val = btrfs_block_group_used(&cache->item);
1026 		num_bytes = min(total, cache->key.offset - byte_in_group);
1027 		if (alloc) {
1028 			if (cache->data != data &&
1029 			    old_val < (cache->key.offset >> 1)) {
1030 				int bit_to_clear;
1031 				int bit_to_set;
1032 				cache->data = data;
1033 				if (data) {
1034 					bit_to_clear = BLOCK_GROUP_METADATA;
1035 					bit_to_set = BLOCK_GROUP_DATA;
1036 					cache->item.flags &=
1037 						~BTRFS_BLOCK_GROUP_MIXED;
1038 					cache->item.flags |=
1039 						BTRFS_BLOCK_GROUP_DATA;
1040 				} else {
1041 					bit_to_clear = BLOCK_GROUP_DATA;
1042 					bit_to_set = BLOCK_GROUP_METADATA;
1043 					cache->item.flags &=
1044 						~BTRFS_BLOCK_GROUP_MIXED;
1045 					cache->item.flags &=
1046 						~BTRFS_BLOCK_GROUP_DATA;
1047 				}
1048 				clear_extent_bits(&info->block_group_cache,
1049 						  start, end, bit_to_clear,
1050 						  GFP_NOFS);
1051 				set_extent_bits(&info->block_group_cache,
1052 						start, end, bit_to_set,
1053 						GFP_NOFS);
1054 			} else if (cache->data != data &&
1055 				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1056 				cache->data = BTRFS_BLOCK_GROUP_MIXED;
1057 				set_extent_bits(&info->block_group_cache,
1058 						start, end,
1059 						BLOCK_GROUP_DATA |
1060 						BLOCK_GROUP_METADATA,
1061 						GFP_NOFS);
1062 			}
1063 			old_val += num_bytes;
1064 		} else {
1065 			old_val -= num_bytes;
1066 			if (mark_free) {
1067 				set_extent_dirty(&info->free_space_cache,
1068 						 bytenr, bytenr + num_bytes - 1,
1069 						 GFP_NOFS);
1070 			}
1071 		}
1072 		btrfs_set_block_group_used(&cache->item, old_val);
1073 		total -= num_bytes;
1074 		bytenr += num_bytes;
1075 	}
1076 	return 0;
1077 }
1078 static int update_pinned_extents(struct btrfs_root *root,
1079 				u64 bytenr, u64 num, int pin)
1080 {
1081 	u64 len;
1082 	struct btrfs_block_group_cache *cache;
1083 	struct btrfs_fs_info *fs_info = root->fs_info;
1084 
1085 	if (pin) {
1086 		set_extent_dirty(&fs_info->pinned_extents,
1087 				bytenr, bytenr + num - 1, GFP_NOFS);
1088 	} else {
1089 		clear_extent_dirty(&fs_info->pinned_extents,
1090 				bytenr, bytenr + num - 1, GFP_NOFS);
1091 	}
1092 	while (num > 0) {
1093 		cache = btrfs_lookup_block_group(fs_info, bytenr);
1094 		WARN_ON(!cache);
1095 		len = min(num, cache->key.offset -
1096 			  (bytenr - cache->key.objectid));
1097 		if (pin) {
1098 			cache->pinned += len;
1099 			fs_info->total_pinned += len;
1100 		} else {
1101 			cache->pinned -= len;
1102 			fs_info->total_pinned -= len;
1103 		}
1104 		bytenr += len;
1105 		num -= len;
1106 	}
1107 	return 0;
1108 }
1109 
1110 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
1111 {
1112 	u64 last = 0;
1113 	u64 start;
1114 	u64 end;
1115 	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
1116 	int ret;
1117 
1118 	while(1) {
1119 		ret = find_first_extent_bit(pinned_extents, last,
1120 					    &start, &end, EXTENT_DIRTY);
1121 		if (ret)
1122 			break;
1123 		set_extent_dirty(copy, start, end, GFP_NOFS);
1124 		last = end + 1;
1125 	}
1126 	return 0;
1127 }
1128 
1129 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1130 			       struct btrfs_root *root,
1131 			       struct extent_map_tree *unpin)
1132 {
1133 	u64 start;
1134 	u64 end;
1135 	int ret;
1136 	struct extent_map_tree *free_space_cache;
1137 	free_space_cache = &root->fs_info->free_space_cache;
1138 
1139 	while(1) {
1140 		ret = find_first_extent_bit(unpin, 0, &start, &end,
1141 					    EXTENT_DIRTY);
1142 		if (ret)
1143 			break;
1144 		update_pinned_extents(root, start, end + 1 - start, 0);
1145 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
1146 		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1147 	}
1148 	return 0;
1149 }
1150 
1151 static int finish_current_insert(struct btrfs_trans_handle *trans,
1152 				 struct btrfs_root *extent_root)
1153 {
1154 	u64 start;
1155 	u64 end;
1156 	struct btrfs_fs_info *info = extent_root->fs_info;
1157 	struct extent_buffer *eb;
1158 	struct btrfs_path *path;
1159 	struct btrfs_key ins;
1160 	struct btrfs_disk_key first;
1161 	struct btrfs_extent_item extent_item;
1162 	int ret;
1163 	int level;
1164 	int err = 0;
1165 
1166 	btrfs_set_stack_extent_refs(&extent_item, 1);
1167 	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1168 	path = btrfs_alloc_path();
1169 
1170 	while(1) {
1171 		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1172 					    &end, EXTENT_LOCKED);
1173 		if (ret)
1174 			break;
1175 
1176 		ins.objectid = start;
1177 		ins.offset = end + 1 - start;
1178 		err = btrfs_insert_item(trans, extent_root, &ins,
1179 					&extent_item, sizeof(extent_item));
1180 		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1181 				  GFP_NOFS);
1182 		eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1183 		level = btrfs_header_level(eb);
1184 		if (level == 0) {
1185 			btrfs_item_key(eb, &first, 0);
1186 		} else {
1187 			btrfs_node_key(eb, &first, 0);
1188 		}
1189 		err = btrfs_insert_extent_backref(trans, extent_root, path,
1190 					  start, extent_root->root_key.objectid,
1191 					  0, level,
1192 					  btrfs_disk_key_objectid(&first));
1193 		BUG_ON(err);
1194 		free_extent_buffer(eb);
1195 	}
1196 	btrfs_free_path(path);
1197 	return 0;
1198 }
1199 
1200 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1201 			  int pending)
1202 {
1203 	int err = 0;
1204 	struct extent_buffer *buf;
1205 
1206 	if (!pending) {
1207 		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1208 		if (buf) {
1209 			if (btrfs_buffer_uptodate(buf)) {
1210 				u64 transid =
1211 				    root->fs_info->running_transaction->transid;
1212 				u64 header_transid =
1213 					btrfs_header_generation(buf);
1214 				if (header_transid == transid) {
1215 					clean_tree_block(NULL, root, buf);
1216 					free_extent_buffer(buf);
1217 					return 1;
1218 				}
1219 			}
1220 			free_extent_buffer(buf);
1221 		}
1222 		update_pinned_extents(root, bytenr, num_bytes, 1);
1223 	} else {
1224 		set_extent_bits(&root->fs_info->pending_del,
1225 				bytenr, bytenr + num_bytes - 1,
1226 				EXTENT_LOCKED, GFP_NOFS);
1227 	}
1228 	BUG_ON(err < 0);
1229 	return 0;
1230 }
1231 
1232 /*
1233  * remove an extent from the root, returns 0 on success
1234  */
1235 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1236 			 *root, u64 bytenr, u64 num_bytes,
1237 			 u64 root_objectid, u64 ref_generation,
1238 			 u64 owner_objectid, u64 owner_offset, int pin,
1239 			 int mark_free)
1240 {
1241 	struct btrfs_path *path;
1242 	struct btrfs_key key;
1243 	struct btrfs_fs_info *info = root->fs_info;
1244 	struct btrfs_root *extent_root = info->extent_root;
1245 	struct extent_buffer *leaf;
1246 	int ret;
1247 	struct btrfs_extent_item *ei;
1248 	u32 refs;
1249 
1250 	key.objectid = bytenr;
1251 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1252 	key.offset = num_bytes;
1253 	path = btrfs_alloc_path();
1254 	if (!path)
1255 		return -ENOMEM;
1256 
1257 	path->reada = 0;
1258 	ret = lookup_extent_backref(trans, extent_root, path,
1259 				    bytenr, root_objectid,
1260 				    ref_generation,
1261 				    owner_objectid, owner_offset, 1);
1262 	if (ret == 0) {
1263 		ret = btrfs_del_item(trans, extent_root, path);
1264 	} else {
1265 		btrfs_print_leaf(extent_root, path->nodes[0]);
1266 		WARN_ON(1);
1267 		printk("Unable to find ref byte nr %Lu root %Lu "
1268 		       " gen %Lu owner %Lu offset %Lu\n", bytenr,
1269 		       root_objectid, ref_generation, owner_objectid,
1270 		       owner_offset);
1271 	}
1272 	btrfs_release_path(extent_root, path);
1273 	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1274 	if (ret < 0)
1275 		return ret;
1276 	BUG_ON(ret);
1277 
1278 	leaf = path->nodes[0];
1279 	ei = btrfs_item_ptr(leaf, path->slots[0],
1280 			    struct btrfs_extent_item);
1281 	refs = btrfs_extent_refs(leaf, ei);
1282 	BUG_ON(refs == 0);
1283 	refs -= 1;
1284 	btrfs_set_extent_refs(leaf, ei, refs);
1285 	btrfs_mark_buffer_dirty(leaf);
1286 
1287 	if (refs == 0) {
1288 		u64 super_used;
1289 		u64 root_used;
1290 
1291 		if (pin) {
1292 			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1293 			if (ret > 0)
1294 				mark_free = 1;
1295 			BUG_ON(ret < 0);
1296 		}
1297 
1298 		/* block accounting for super block */
1299 		super_used = btrfs_super_bytes_used(&info->super_copy);
1300 		btrfs_set_super_bytes_used(&info->super_copy,
1301 					   super_used - num_bytes);
1302 
1303 		/* block accounting for root item */
1304 		root_used = btrfs_root_used(&root->root_item);
1305 		btrfs_set_root_used(&root->root_item,
1306 					   root_used - num_bytes);
1307 
1308 		ret = btrfs_del_item(trans, extent_root, path);
1309 		if (ret) {
1310 			return ret;
1311 		}
1312 		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1313 					 mark_free, 0);
1314 		BUG_ON(ret);
1315 	}
1316 	btrfs_free_path(path);
1317 	finish_current_insert(trans, extent_root);
1318 	return ret;
1319 }
1320 
1321 /*
1322  * find all the blocks marked as pending in the radix tree and remove
1323  * them from the extent map
1324  */
1325 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1326 			       btrfs_root *extent_root)
1327 {
1328 	int ret;
1329 	int err = 0;
1330 	u64 start;
1331 	u64 end;
1332 	struct extent_map_tree *pending_del;
1333 	struct extent_map_tree *pinned_extents;
1334 
1335 	pending_del = &extent_root->fs_info->pending_del;
1336 	pinned_extents = &extent_root->fs_info->pinned_extents;
1337 
1338 	while(1) {
1339 		ret = find_first_extent_bit(pending_del, 0, &start, &end,
1340 					    EXTENT_LOCKED);
1341 		if (ret)
1342 			break;
1343 		update_pinned_extents(extent_root, start, end + 1 - start, 1);
1344 		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1345 				  GFP_NOFS);
1346 		ret = __free_extent(trans, extent_root,
1347 				     start, end + 1 - start,
1348 				     extent_root->root_key.objectid,
1349 				     0, 0, 0, 0, 0);
1350 		if (ret)
1351 			err = ret;
1352 	}
1353 	return err;
1354 }
1355 
1356 /*
1357  * remove an extent from the root, returns 0 on success
1358  */
1359 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1360 		      *root, u64 bytenr, u64 num_bytes,
1361 		      u64 root_objectid, u64 ref_generation,
1362 		      u64 owner_objectid, u64 owner_offset, int pin)
1363 {
1364 	struct btrfs_root *extent_root = root->fs_info->extent_root;
1365 	int pending_ret;
1366 	int ret;
1367 
1368 	WARN_ON(num_bytes < root->sectorsize);
1369 	if (!root->ref_cows)
1370 		ref_generation = 0;
1371 
1372 	if (root == extent_root) {
1373 		pin_down_bytes(root, bytenr, num_bytes, 1);
1374 		return 0;
1375 	}
1376 	ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1377 			    ref_generation, owner_objectid, owner_offset,
1378 			    pin, pin == 0);
1379 	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1380 	return ret ? ret : pending_ret;
1381 }
1382 
1383 static u64 stripe_align(struct btrfs_root *root, u64 val)
1384 {
1385 	u64 mask = ((u64)root->stripesize - 1);
1386 	u64 ret = (val + mask) & ~mask;
1387 	return ret;
1388 }
1389 
1390 /*
1391  * walks the btree of allocated extents and find a hole of a given size.
1392  * The key ins is changed to record the hole:
1393  * ins->objectid == block start
1394  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1395  * ins->offset == number of blocks
1396  * Any available blocks before search_start are skipped.
1397  */
1398 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1399 				     struct btrfs_root *orig_root,
1400 				     u64 num_bytes, u64 empty_size,
1401 				     u64 search_start, u64 search_end,
1402 				     u64 hint_byte, struct btrfs_key *ins,
1403 				     u64 exclude_start, u64 exclude_nr,
1404 				     int data)
1405 {
1406 	struct btrfs_path *path;
1407 	struct btrfs_key key;
1408 	u64 hole_size = 0;
1409 	u64 aligned;
1410 	int ret;
1411 	int slot = 0;
1412 	u64 last_byte = 0;
1413 	u64 orig_search_start = search_start;
1414 	int start_found;
1415 	struct extent_buffer *l;
1416 	struct btrfs_root * root = orig_root->fs_info->extent_root;
1417 	struct btrfs_fs_info *info = root->fs_info;
1418 	u64 total_needed = num_bytes;
1419 	int level;
1420 	struct btrfs_block_group_cache *block_group;
1421 	int full_scan = 0;
1422 	int wrapped = 0;
1423 	u64 cached_start;
1424 
1425 	WARN_ON(num_bytes < root->sectorsize);
1426 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1427 
1428 	level = btrfs_header_level(root->node);
1429 
1430 	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1431 		data = BTRFS_BLOCK_GROUP_MIXED;
1432 	}
1433 
1434 	/* for SSD, cluster allocations together as much as possible */
1435 	if (btrfs_test_opt(root, SSD)) {
1436 		if (!data) {
1437 			if (root->fs_info->last_alloc)
1438 				hint_byte = root->fs_info->last_alloc;
1439 			else {
1440 				hint_byte = hint_byte &
1441 					~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
1442 				empty_size += 16 * 1024 * 1024;
1443 			}
1444 		}
1445 	}
1446 
1447 	search_end = min(search_end,
1448 			 btrfs_super_total_bytes(&info->super_copy));
1449 	if (hint_byte) {
1450 		block_group = btrfs_lookup_block_group(info, hint_byte);
1451 		if (!block_group)
1452 			hint_byte = search_start;
1453 		block_group = btrfs_find_block_group(root, block_group,
1454 						     hint_byte, data, 1);
1455 	} else {
1456 		block_group = btrfs_find_block_group(root,
1457 						     trans->block_group,
1458 						     search_start, data, 1);
1459 	}
1460 
1461 	total_needed += empty_size;
1462 	path = btrfs_alloc_path();
1463 check_failed:
1464 	if (!block_group) {
1465 		block_group = btrfs_lookup_block_group(info, search_start);
1466 		if (!block_group)
1467 			block_group = btrfs_lookup_block_group(info,
1468 						       orig_search_start);
1469 	}
1470 	search_start = find_search_start(root, &block_group, search_start,
1471 					 total_needed, data);
1472 
1473 	if (!data && btrfs_test_opt(root, SSD) && info->last_alloc &&
1474 	    search_start != info->last_alloc) {
1475 		info->last_alloc = 0;
1476 		if (!empty_size) {
1477 			empty_size += 16 * 1024 * 1024;
1478 			total_needed += empty_size;
1479 		}
1480 		search_start = find_search_start(root, &block_group,
1481 						 search_start, total_needed,
1482 						 data);
1483 	}
1484 
1485 	search_start = stripe_align(root, search_start);
1486 	cached_start = search_start;
1487 	btrfs_init_path(path);
1488 	ins->objectid = search_start;
1489 	ins->offset = 0;
1490 	start_found = 0;
1491 	path->reada = 2;
1492 
1493 	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1494 	if (ret < 0)
1495 		goto error;
1496 	ret = find_previous_extent(root, path);
1497 	if (ret < 0)
1498 		goto error;
1499 	l = path->nodes[0];
1500 	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1501 	while (1) {
1502 		l = path->nodes[0];
1503 		slot = path->slots[0];
1504 		if (slot >= btrfs_header_nritems(l)) {
1505 			ret = btrfs_next_leaf(root, path);
1506 			if (ret == 0)
1507 				continue;
1508 			if (ret < 0)
1509 				goto error;
1510 
1511 			search_start = max(search_start,
1512 					   block_group->key.objectid);
1513 			if (!start_found) {
1514 				aligned = stripe_align(root, search_start);
1515 				ins->objectid = aligned;
1516 				if (aligned >= search_end) {
1517 					ret = -ENOSPC;
1518 					goto error;
1519 				}
1520 				ins->offset = search_end - aligned;
1521 				start_found = 1;
1522 				goto check_pending;
1523 			}
1524 			ins->objectid = stripe_align(root,
1525 						     last_byte > search_start ?
1526 						     last_byte : search_start);
1527 			if (search_end <= ins->objectid) {
1528 				ret = -ENOSPC;
1529 				goto error;
1530 			}
1531 			ins->offset = search_end - ins->objectid;
1532 			BUG_ON(ins->objectid >= search_end);
1533 			goto check_pending;
1534 		}
1535 		btrfs_item_key_to_cpu(l, &key, slot);
1536 
1537 		if (key.objectid >= search_start && key.objectid > last_byte &&
1538 		    start_found) {
1539 			if (last_byte < search_start)
1540 				last_byte = search_start;
1541 			aligned = stripe_align(root, last_byte);
1542 			hole_size = key.objectid - aligned;
1543 			if (key.objectid > aligned && hole_size >= num_bytes) {
1544 				ins->objectid = aligned;
1545 				ins->offset = hole_size;
1546 				goto check_pending;
1547 			}
1548 		}
1549 		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1550 			if (!start_found && btrfs_key_type(&key) ==
1551 			    BTRFS_BLOCK_GROUP_ITEM_KEY) {
1552 				last_byte = key.objectid;
1553 				start_found = 1;
1554 			}
1555 			goto next;
1556 		}
1557 
1558 
1559 		start_found = 1;
1560 		last_byte = key.objectid + key.offset;
1561 
1562 		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1563 		    last_byte >= block_group->key.objectid +
1564 		    block_group->key.offset) {
1565 			btrfs_release_path(root, path);
1566 			search_start = block_group->key.objectid +
1567 				block_group->key.offset;
1568 			goto new_group;
1569 		}
1570 next:
1571 		path->slots[0]++;
1572 		cond_resched();
1573 	}
1574 check_pending:
1575 	/* we have to make sure we didn't find an extent that has already
1576 	 * been allocated by the map tree or the original allocation
1577 	 */
1578 	btrfs_release_path(root, path);
1579 	BUG_ON(ins->objectid < search_start);
1580 
1581 	if (ins->objectid + num_bytes >= search_end)
1582 		goto enospc;
1583 	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1584 	    ins->objectid + num_bytes > block_group->
1585 	    key.objectid + block_group->key.offset) {
1586 		search_start = block_group->key.objectid +
1587 			block_group->key.offset;
1588 		goto new_group;
1589 	}
1590 	if (test_range_bit(&info->extent_ins, ins->objectid,
1591 			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1592 		search_start = ins->objectid + num_bytes;
1593 		goto new_group;
1594 	}
1595 	if (test_range_bit(&info->pinned_extents, ins->objectid,
1596 			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1597 		search_start = ins->objectid + num_bytes;
1598 		goto new_group;
1599 	}
1600 	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1601 	    ins->objectid < exclude_start + exclude_nr)) {
1602 		search_start = exclude_start + exclude_nr;
1603 		goto new_group;
1604 	}
1605 	if (!data) {
1606 		block_group = btrfs_lookup_block_group(info, ins->objectid);
1607 		if (block_group)
1608 			trans->block_group = block_group;
1609 	}
1610 	ins->offset = num_bytes;
1611 	btrfs_free_path(path);
1612 	return 0;
1613 
1614 new_group:
1615 	if (search_start + num_bytes >= search_end) {
1616 enospc:
1617 		search_start = orig_search_start;
1618 		if (full_scan) {
1619 			ret = -ENOSPC;
1620 			goto error;
1621 		}
1622 		if (wrapped) {
1623 			if (!full_scan)
1624 				total_needed -= empty_size;
1625 			full_scan = 1;
1626 			data = BTRFS_BLOCK_GROUP_MIXED;
1627 		} else
1628 			wrapped = 1;
1629 	}
1630 	block_group = btrfs_lookup_block_group(info, search_start);
1631 	cond_resched();
1632 	block_group = btrfs_find_block_group(root, block_group,
1633 					     search_start, data, 0);
1634 	goto check_failed;
1635 
1636 error:
1637 	btrfs_release_path(root, path);
1638 	btrfs_free_path(path);
1639 	if (btrfs_test_opt(root, SSD) && !ret && !data)
1640 		info->last_alloc = ins->objectid + ins->offset;
1641 	return ret;
1642 }
1643 /*
1644  * finds a free extent and does all the dirty work required for allocation
1645  * returns the key for the extent through ins, and a tree buffer for
1646  * the first block of the extent through buf.
1647  *
1648  * returns 0 if everything worked, non-zero otherwise.
1649  */
1650 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1651 		       struct btrfs_root *root,
1652 		       u64 num_bytes, u64 root_objectid, u64 ref_generation,
1653 		       u64 owner, u64 owner_offset,
1654 		       u64 empty_size, u64 hint_byte,
1655 		       u64 search_end, struct btrfs_key *ins, int data)
1656 {
1657 	int ret;
1658 	int pending_ret;
1659 	u64 super_used;
1660 	u64 root_used;
1661 	u64 search_start = 0;
1662 	u64 new_hint;
1663 	struct btrfs_fs_info *info = root->fs_info;
1664 	struct btrfs_root *extent_root = info->extent_root;
1665 	struct btrfs_extent_item extent_item;
1666 	struct btrfs_path *path;
1667 
1668 	btrfs_set_stack_extent_refs(&extent_item, 1);
1669 
1670 	new_hint = max(hint_byte, root->fs_info->alloc_start);
1671 	if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1672 		hint_byte = new_hint;
1673 
1674 	WARN_ON(num_bytes < root->sectorsize);
1675 	ret = find_free_extent(trans, root, num_bytes, empty_size,
1676 			       search_start, search_end, hint_byte, ins,
1677 			       trans->alloc_exclude_start,
1678 			       trans->alloc_exclude_nr, data);
1679 	BUG_ON(ret);
1680 	if (ret)
1681 		return ret;
1682 
1683 	/* block accounting for super block */
1684 	super_used = btrfs_super_bytes_used(&info->super_copy);
1685 	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1686 
1687 	/* block accounting for root item */
1688 	root_used = btrfs_root_used(&root->root_item);
1689 	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1690 
1691 	clear_extent_dirty(&root->fs_info->free_space_cache,
1692 			   ins->objectid, ins->objectid + ins->offset - 1,
1693 			   GFP_NOFS);
1694 
1695 	if (root == extent_root) {
1696 		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1697 				ins->objectid + ins->offset - 1,
1698 				EXTENT_LOCKED, GFP_NOFS);
1699 		WARN_ON(data == 1);
1700 		goto update_block;
1701 	}
1702 
1703 	WARN_ON(trans->alloc_exclude_nr);
1704 	trans->alloc_exclude_start = ins->objectid;
1705 	trans->alloc_exclude_nr = ins->offset;
1706 	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1707 				sizeof(extent_item));
1708 
1709 	trans->alloc_exclude_start = 0;
1710 	trans->alloc_exclude_nr = 0;
1711 	BUG_ON(ret);
1712 
1713 	path = btrfs_alloc_path();
1714 	BUG_ON(!path);
1715 	ret = btrfs_insert_extent_backref(trans, extent_root, path,
1716 					  ins->objectid, root_objectid,
1717 					  ref_generation, owner, owner_offset);
1718 
1719 	BUG_ON(ret);
1720 	btrfs_free_path(path);
1721 	finish_current_insert(trans, extent_root);
1722 	pending_ret = del_pending_extents(trans, extent_root);
1723 
1724 	if (ret) {
1725 		return ret;
1726 	}
1727 	if (pending_ret) {
1728 		return pending_ret;
1729 	}
1730 
1731 update_block:
1732 	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1733 				 data);
1734 	BUG_ON(ret);
1735 	return 0;
1736 }
1737 
1738 /*
1739  * helper function to allocate a block for a given tree
1740  * returns the tree buffer or NULL.
1741  */
1742 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1743 					     struct btrfs_root *root,
1744 					     u32 blocksize,
1745 					     u64 root_objectid, u64 hint,
1746 					     u64 empty_size)
1747 {
1748 	u64 ref_generation;
1749 
1750 	if (root->ref_cows)
1751 		ref_generation = trans->transid;
1752 	else
1753 		ref_generation = 0;
1754 
1755 
1756 	return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1757 					ref_generation, 0, 0, hint, empty_size);
1758 }
1759 
1760 /*
1761  * helper function to allocate a block for a given tree
1762  * returns the tree buffer or NULL.
1763  */
1764 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1765 					     struct btrfs_root *root,
1766 					     u32 blocksize,
1767 					     u64 root_objectid,
1768 					     u64 ref_generation,
1769 					     u64 first_objectid,
1770 					     int level,
1771 					     u64 hint,
1772 					     u64 empty_size)
1773 {
1774 	struct btrfs_key ins;
1775 	int ret;
1776 	struct extent_buffer *buf;
1777 
1778 	ret = btrfs_alloc_extent(trans, root, blocksize,
1779 				 root_objectid, ref_generation,
1780 				 level, first_objectid, empty_size, hint,
1781 				 (u64)-1, &ins, 0);
1782 	if (ret) {
1783 		BUG_ON(ret > 0);
1784 		return ERR_PTR(ret);
1785 	}
1786 	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1787 	if (!buf) {
1788 		btrfs_free_extent(trans, root, ins.objectid, blocksize,
1789 				  root->root_key.objectid, ref_generation,
1790 				  0, 0, 0);
1791 		return ERR_PTR(-ENOMEM);
1792 	}
1793 	btrfs_set_header_generation(buf, trans->transid);
1794 	clean_tree_block(trans, root, buf);
1795 	wait_on_tree_block_writeback(root, buf);
1796 	btrfs_set_buffer_uptodate(buf);
1797 
1798 	if (PageDirty(buf->first_page)) {
1799 		printk("page %lu dirty\n", buf->first_page->index);
1800 		WARN_ON(1);
1801 	}
1802 
1803 	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1804 			 buf->start + buf->len - 1, GFP_NOFS);
1805 	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1806 			buf->start, buf->start + buf->len - 1,
1807 			EXTENT_CSUM, GFP_NOFS);
1808 	buf->flags |= EXTENT_CSUM;
1809 	if (!btrfs_test_opt(root, SSD))
1810 		btrfs_set_buffer_defrag(buf);
1811 	trans->blocks_used++;
1812 	return buf;
1813 }
1814 
1815 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1816 				  struct btrfs_root *root,
1817 				  struct extent_buffer *leaf)
1818 {
1819 	u64 leaf_owner;
1820 	u64 leaf_generation;
1821 	struct btrfs_key key;
1822 	struct btrfs_file_extent_item *fi;
1823 	int i;
1824 	int nritems;
1825 	int ret;
1826 
1827 	BUG_ON(!btrfs_is_leaf(leaf));
1828 	nritems = btrfs_header_nritems(leaf);
1829 	leaf_owner = btrfs_header_owner(leaf);
1830 	leaf_generation = btrfs_header_generation(leaf);
1831 
1832 	for (i = 0; i < nritems; i++) {
1833 		u64 disk_bytenr;
1834 
1835 		btrfs_item_key_to_cpu(leaf, &key, i);
1836 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1837 			continue;
1838 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1839 		if (btrfs_file_extent_type(leaf, fi) ==
1840 		    BTRFS_FILE_EXTENT_INLINE)
1841 			continue;
1842 		/*
1843 		 * FIXME make sure to insert a trans record that
1844 		 * repeats the snapshot del on crash
1845 		 */
1846 		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1847 		if (disk_bytenr == 0)
1848 			continue;
1849 		ret = btrfs_free_extent(trans, root, disk_bytenr,
1850 				btrfs_file_extent_disk_num_bytes(leaf, fi),
1851 				leaf_owner, leaf_generation,
1852 				key.objectid, key.offset, 0);
1853 		BUG_ON(ret);
1854 	}
1855 	return 0;
1856 }
1857 
1858 static void noinline reada_walk_down(struct btrfs_root *root,
1859 				     struct extent_buffer *node)
1860 {
1861 	int i;
1862 	u32 nritems;
1863 	u64 bytenr;
1864 	int ret;
1865 	u32 refs;
1866 	int level;
1867 	u32 blocksize;
1868 
1869 	nritems = btrfs_header_nritems(node);
1870 	level = btrfs_header_level(node);
1871 	for (i = 0; i < nritems; i++) {
1872 		bytenr = btrfs_node_blockptr(node, i);
1873 		blocksize = btrfs_level_size(root, level - 1);
1874 		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1875 		BUG_ON(ret);
1876 		if (refs != 1)
1877 			continue;
1878 		mutex_unlock(&root->fs_info->fs_mutex);
1879 		ret = readahead_tree_block(root, bytenr, blocksize);
1880 		cond_resched();
1881 		mutex_lock(&root->fs_info->fs_mutex);
1882 		if (ret)
1883 			break;
1884 	}
1885 }
1886 
1887 /*
1888  * helper function for drop_snapshot, this walks down the tree dropping ref
1889  * counts as it goes.
1890  */
1891 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1892 				   struct btrfs_root *root,
1893 				   struct btrfs_path *path, int *level)
1894 {
1895 	u64 root_owner;
1896 	u64 root_gen;
1897 	u64 bytenr;
1898 	struct extent_buffer *next;
1899 	struct extent_buffer *cur;
1900 	struct extent_buffer *parent;
1901 	u32 blocksize;
1902 	int ret;
1903 	u32 refs;
1904 
1905 	WARN_ON(*level < 0);
1906 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1907 	ret = lookup_extent_ref(trans, root,
1908 				path->nodes[*level]->start,
1909 				path->nodes[*level]->len, &refs);
1910 	BUG_ON(ret);
1911 	if (refs > 1)
1912 		goto out;
1913 
1914 	/*
1915 	 * walk down to the last node level and free all the leaves
1916 	 */
1917 	while(*level >= 0) {
1918 		WARN_ON(*level < 0);
1919 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
1920 		cur = path->nodes[*level];
1921 
1922 		if (*level > 0 && path->slots[*level] == 0)
1923 			reada_walk_down(root, cur);
1924 
1925 		if (btrfs_header_level(cur) != *level)
1926 			WARN_ON(1);
1927 
1928 		if (path->slots[*level] >=
1929 		    btrfs_header_nritems(cur))
1930 			break;
1931 		if (*level == 0) {
1932 			ret = drop_leaf_ref(trans, root, cur);
1933 			BUG_ON(ret);
1934 			break;
1935 		}
1936 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1937 		blocksize = btrfs_level_size(root, *level - 1);
1938 		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1939 		BUG_ON(ret);
1940 		if (refs != 1) {
1941 			parent = path->nodes[*level];
1942 			root_owner = btrfs_header_owner(parent);
1943 			root_gen = btrfs_header_generation(parent);
1944 			path->slots[*level]++;
1945 			ret = btrfs_free_extent(trans, root, bytenr,
1946 						blocksize, root_owner,
1947 						root_gen, 0, 0, 1);
1948 			BUG_ON(ret);
1949 			continue;
1950 		}
1951 		next = btrfs_find_tree_block(root, bytenr, blocksize);
1952 		if (!next || !btrfs_buffer_uptodate(next)) {
1953 			free_extent_buffer(next);
1954 			mutex_unlock(&root->fs_info->fs_mutex);
1955 			next = read_tree_block(root, bytenr, blocksize);
1956 			mutex_lock(&root->fs_info->fs_mutex);
1957 
1958 			/* we dropped the lock, check one more time */
1959 			ret = lookup_extent_ref(trans, root, bytenr,
1960 						blocksize, &refs);
1961 			BUG_ON(ret);
1962 			if (refs != 1) {
1963 				parent = path->nodes[*level];
1964 				root_owner = btrfs_header_owner(parent);
1965 				root_gen = btrfs_header_generation(parent);
1966 
1967 				path->slots[*level]++;
1968 				free_extent_buffer(next);
1969 				ret = btrfs_free_extent(trans, root, bytenr,
1970 							blocksize,
1971 							root_owner,
1972 							root_gen, 0, 0, 1);
1973 				BUG_ON(ret);
1974 				continue;
1975 			}
1976 		}
1977 		WARN_ON(*level <= 0);
1978 		if (path->nodes[*level-1])
1979 			free_extent_buffer(path->nodes[*level-1]);
1980 		path->nodes[*level-1] = next;
1981 		*level = btrfs_header_level(next);
1982 		path->slots[*level] = 0;
1983 	}
1984 out:
1985 	WARN_ON(*level < 0);
1986 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1987 
1988 	if (path->nodes[*level] == root->node) {
1989 		root_owner = root->root_key.objectid;
1990 		parent = path->nodes[*level];
1991 	} else {
1992 		parent = path->nodes[*level + 1];
1993 		root_owner = btrfs_header_owner(parent);
1994 	}
1995 
1996 	root_gen = btrfs_header_generation(parent);
1997 	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1998 				path->nodes[*level]->len,
1999 				root_owner, root_gen, 0, 0, 1);
2000 	free_extent_buffer(path->nodes[*level]);
2001 	path->nodes[*level] = NULL;
2002 	*level += 1;
2003 	BUG_ON(ret);
2004 	return 0;
2005 }
2006 
2007 /*
2008  * helper for dropping snapshots.  This walks back up the tree in the path
2009  * to find the first node higher up where we haven't yet gone through
2010  * all the slots
2011  */
2012 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2013 				 struct btrfs_root *root,
2014 				 struct btrfs_path *path, int *level)
2015 {
2016 	u64 root_owner;
2017 	u64 root_gen;
2018 	struct btrfs_root_item *root_item = &root->root_item;
2019 	int i;
2020 	int slot;
2021 	int ret;
2022 
2023 	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2024 		slot = path->slots[i];
2025 		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2026 			struct extent_buffer *node;
2027 			struct btrfs_disk_key disk_key;
2028 			node = path->nodes[i];
2029 			path->slots[i]++;
2030 			*level = i;
2031 			WARN_ON(*level == 0);
2032 			btrfs_node_key(node, &disk_key, path->slots[i]);
2033 			memcpy(&root_item->drop_progress,
2034 			       &disk_key, sizeof(disk_key));
2035 			root_item->drop_level = i;
2036 			return 0;
2037 		} else {
2038 			if (path->nodes[*level] == root->node) {
2039 				root_owner = root->root_key.objectid;
2040 				root_gen =
2041 				   btrfs_header_generation(path->nodes[*level]);
2042 			} else {
2043 				struct extent_buffer *node;
2044 				node = path->nodes[*level + 1];
2045 				root_owner = btrfs_header_owner(node);
2046 				root_gen = btrfs_header_generation(node);
2047 			}
2048 			ret = btrfs_free_extent(trans, root,
2049 						path->nodes[*level]->start,
2050 						path->nodes[*level]->len,
2051 						root_owner, root_gen, 0, 0, 1);
2052 			BUG_ON(ret);
2053 			free_extent_buffer(path->nodes[*level]);
2054 			path->nodes[*level] = NULL;
2055 			*level = i + 1;
2056 		}
2057 	}
2058 	return 1;
2059 }
2060 
2061 /*
2062  * drop the reference count on the tree rooted at 'snap'.  This traverses
2063  * the tree freeing any blocks that have a ref count of zero after being
2064  * decremented.
2065  */
2066 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2067 			*root)
2068 {
2069 	int ret = 0;
2070 	int wret;
2071 	int level;
2072 	struct btrfs_path *path;
2073 	int i;
2074 	int orig_level;
2075 	struct btrfs_root_item *root_item = &root->root_item;
2076 
2077 	path = btrfs_alloc_path();
2078 	BUG_ON(!path);
2079 
2080 	level = btrfs_header_level(root->node);
2081 	orig_level = level;
2082 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2083 		path->nodes[level] = root->node;
2084 		extent_buffer_get(root->node);
2085 		path->slots[level] = 0;
2086 	} else {
2087 		struct btrfs_key key;
2088 		struct btrfs_disk_key found_key;
2089 		struct extent_buffer *node;
2090 
2091 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2092 		level = root_item->drop_level;
2093 		path->lowest_level = level;
2094 		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2095 		if (wret < 0) {
2096 			ret = wret;
2097 			goto out;
2098 		}
2099 		node = path->nodes[level];
2100 		btrfs_node_key(node, &found_key, path->slots[level]);
2101 		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2102 			       sizeof(found_key)));
2103 	}
2104 	while(1) {
2105 		wret = walk_down_tree(trans, root, path, &level);
2106 		if (wret > 0)
2107 			break;
2108 		if (wret < 0)
2109 			ret = wret;
2110 
2111 		wret = walk_up_tree(trans, root, path, &level);
2112 		if (wret > 0)
2113 			break;
2114 		if (wret < 0)
2115 			ret = wret;
2116 		ret = -EAGAIN;
2117 		break;
2118 	}
2119 	for (i = 0; i <= orig_level; i++) {
2120 		if (path->nodes[i]) {
2121 			free_extent_buffer(path->nodes[i]);
2122 			path->nodes[i] = NULL;
2123 		}
2124 	}
2125 out:
2126 	btrfs_free_path(path);
2127 	return ret;
2128 }
2129 
2130 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2131 {
2132 	u64 start;
2133 	u64 end;
2134 	u64 ptr;
2135 	int ret;
2136 	while(1) {
2137 		ret = find_first_extent_bit(&info->block_group_cache, 0,
2138 					    &start, &end, (unsigned int)-1);
2139 		if (ret)
2140 			break;
2141 		ret = get_state_private(&info->block_group_cache, start, &ptr);
2142 		if (!ret)
2143 			kfree((void *)(unsigned long)ptr);
2144 		clear_extent_bits(&info->block_group_cache, start,
2145 				  end, (unsigned int)-1, GFP_NOFS);
2146 	}
2147 	while(1) {
2148 		ret = find_first_extent_bit(&info->free_space_cache, 0,
2149 					    &start, &end, EXTENT_DIRTY);
2150 		if (ret)
2151 			break;
2152 		clear_extent_dirty(&info->free_space_cache, start,
2153 				   end, GFP_NOFS);
2154 	}
2155 	return 0;
2156 }
2157 
2158 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2159 					 u64 len)
2160 {
2161 	u64 page_start;
2162 	u64 page_end;
2163 	u64 delalloc_start;
2164 	u64 existing_delalloc;
2165 	unsigned long last_index;
2166 	unsigned long i;
2167 	struct page *page;
2168 	struct btrfs_root *root = BTRFS_I(inode)->root;
2169 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2170 	struct file_ra_state *ra;
2171 
2172 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
2173 
2174 	mutex_lock(&inode->i_mutex);
2175 	i = start >> PAGE_CACHE_SHIFT;
2176 	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2177 
2178 	file_ra_state_init(ra, inode->i_mapping);
2179 	btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2180 	kfree(ra);
2181 
2182 	for (; i <= last_index; i++) {
2183 		page = grab_cache_page(inode->i_mapping, i);
2184 		if (!page)
2185 			goto out_unlock;
2186 		if (!PageUptodate(page)) {
2187 			btrfs_readpage(NULL, page);
2188 			lock_page(page);
2189 			if (!PageUptodate(page)) {
2190 				unlock_page(page);
2191 				page_cache_release(page);
2192 				goto out_unlock;
2193 			}
2194 		}
2195 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2196 		page_end = page_start + PAGE_CACHE_SIZE - 1;
2197 
2198 		lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2199 
2200 		delalloc_start = page_start;
2201 		existing_delalloc =
2202 			count_range_bits(&BTRFS_I(inode)->extent_tree,
2203 					 &delalloc_start, page_end,
2204 					 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2205 
2206 		set_extent_delalloc(em_tree, page_start,
2207 				    page_end, GFP_NOFS);
2208 
2209 		spin_lock(&root->fs_info->delalloc_lock);
2210 		root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2211 						 existing_delalloc;
2212 		spin_unlock(&root->fs_info->delalloc_lock);
2213 
2214 		unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2215 		set_page_dirty(page);
2216 		unlock_page(page);
2217 		page_cache_release(page);
2218 	}
2219 
2220 out_unlock:
2221 	mutex_unlock(&inode->i_mutex);
2222 	return 0;
2223 }
2224 
2225 /*
2226  * note, this releases the path
2227  */
2228 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2229 				  struct btrfs_path *path,
2230 				  struct btrfs_key *extent_key)
2231 {
2232 	struct inode *inode;
2233 	struct btrfs_root *found_root;
2234 	struct btrfs_key *root_location;
2235 	struct btrfs_extent_ref *ref;
2236 	u64 ref_root;
2237 	u64 ref_gen;
2238 	u64 ref_objectid;
2239 	u64 ref_offset;
2240 	int ret;
2241 
2242 	ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2243 			     struct btrfs_extent_ref);
2244 	ref_root = btrfs_ref_root(path->nodes[0], ref);
2245 	ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2246 	ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2247 	ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2248 	btrfs_release_path(extent_root, path);
2249 
2250 	root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2251 	root_location->objectid = ref_root;
2252 	if (ref_gen == 0)
2253 		root_location->offset = 0;
2254 	else
2255 		root_location->offset = (u64)-1;
2256 	root_location->type = BTRFS_ROOT_ITEM_KEY;
2257 
2258 	found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2259 						root_location);
2260 	BUG_ON(!found_root);
2261 	kfree(root_location);
2262 
2263 	if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2264 		mutex_unlock(&extent_root->fs_info->fs_mutex);
2265 		inode = btrfs_iget_locked(extent_root->fs_info->sb,
2266 					  ref_objectid, found_root);
2267 		if (inode->i_state & I_NEW) {
2268 			/* the inode and parent dir are two different roots */
2269 			BTRFS_I(inode)->root = found_root;
2270 			BTRFS_I(inode)->location.objectid = ref_objectid;
2271 			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2272 			BTRFS_I(inode)->location.offset = 0;
2273 			btrfs_read_locked_inode(inode);
2274 			unlock_new_inode(inode);
2275 
2276 		}
2277 		/* this can happen if the reference is not against
2278 		 * the latest version of the tree root
2279 		 */
2280 		if (is_bad_inode(inode)) {
2281 			mutex_lock(&extent_root->fs_info->fs_mutex);
2282 			goto out;
2283 		}
2284 		relocate_inode_pages(inode, ref_offset, extent_key->offset);
2285 		/* FIXME, data=ordered will help get rid of this */
2286 		filemap_fdatawrite(inode->i_mapping);
2287 		iput(inode);
2288 		mutex_lock(&extent_root->fs_info->fs_mutex);
2289 	} else {
2290 		struct btrfs_trans_handle *trans;
2291 		struct btrfs_key found_key;
2292 		struct extent_buffer *eb;
2293 		int level;
2294 		int i;
2295 
2296 		trans = btrfs_start_transaction(found_root, 1);
2297 		eb = read_tree_block(found_root, extent_key->objectid,
2298 				     extent_key->offset);
2299 		level = btrfs_header_level(eb);
2300 
2301 		if (level == 0)
2302 			btrfs_item_key_to_cpu(eb, &found_key, 0);
2303 		else
2304 			btrfs_node_key_to_cpu(eb, &found_key, 0);
2305 
2306 		free_extent_buffer(eb);
2307 
2308 		path->lowest_level = level;
2309 		path->reada = 2;
2310 		ret = btrfs_search_slot(trans, found_root, &found_key, path,
2311 					0, 1);
2312 		path->lowest_level = 0;
2313 		for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2314 			if (!path->nodes[i])
2315 				break;
2316 			free_extent_buffer(path->nodes[i]);
2317 			path->nodes[i] = NULL;
2318 		}
2319 		btrfs_release_path(found_root, path);
2320 		btrfs_end_transaction(trans, found_root);
2321 	}
2322 
2323 out:
2324 	return 0;
2325 }
2326 
2327 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2328 					struct btrfs_path *path,
2329 					struct btrfs_key *extent_key)
2330 {
2331 	struct btrfs_key key;
2332 	struct btrfs_key found_key;
2333 	struct extent_buffer *leaf;
2334 	u32 nritems;
2335 	u32 item_size;
2336 	int ret = 0;
2337 
2338 	key.objectid = extent_key->objectid;
2339 	key.type = BTRFS_EXTENT_REF_KEY;
2340 	key.offset = 0;
2341 
2342 	while(1) {
2343 		ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2344 
2345 		if (ret < 0)
2346 			goto out;
2347 
2348 		ret = 0;
2349 		leaf = path->nodes[0];
2350 		nritems = btrfs_header_nritems(leaf);
2351 		if (path->slots[0] == nritems)
2352 			goto out;
2353 
2354 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2355 		if (found_key.objectid != extent_key->objectid)
2356 			break;
2357 
2358 		if (found_key.type != BTRFS_EXTENT_REF_KEY)
2359 			break;
2360 
2361 		key.offset = found_key.offset + 1;
2362 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2363 
2364 		ret = relocate_one_reference(extent_root, path, extent_key);
2365 		if (ret)
2366 			goto out;
2367 	}
2368 	ret = 0;
2369 out:
2370 	btrfs_release_path(extent_root, path);
2371 	return ret;
2372 }
2373 
2374 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2375 {
2376 	struct btrfs_trans_handle *trans;
2377 	struct btrfs_root *tree_root = root->fs_info->tree_root;
2378 	struct btrfs_path *path;
2379 	u64 cur_byte;
2380 	u64 total_found;
2381 	struct btrfs_fs_info *info = root->fs_info;
2382 	struct extent_map_tree *block_group_cache;
2383 	struct btrfs_key key;
2384 	struct btrfs_key found_key;
2385 	struct extent_buffer *leaf;
2386 	u32 nritems;
2387 	int ret;
2388 	int progress = 0;
2389 
2390 	btrfs_set_super_total_bytes(&info->super_copy, new_size);
2391 	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2392 			   GFP_NOFS);
2393 	block_group_cache = &info->block_group_cache;
2394 	path = btrfs_alloc_path();
2395 	root = root->fs_info->extent_root;
2396 	path->reada = 2;
2397 
2398 again:
2399 	total_found = 0;
2400 	key.objectid = new_size;
2401 	key.offset = 0;
2402 	key.type = 0;
2403 	cur_byte = key.objectid;
2404 
2405 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2406 	if (ret < 0)
2407 		goto out;
2408 
2409 	ret = find_previous_extent(root, path);
2410 	if (ret < 0)
2411 		goto out;
2412 	if (ret == 0) {
2413 		leaf = path->nodes[0];
2414 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2415 		if (found_key.objectid + found_key.offset > new_size) {
2416 			cur_byte = found_key.objectid;
2417 			key.objectid = cur_byte;
2418 		}
2419 	}
2420 	btrfs_release_path(root, path);
2421 
2422 	while(1) {
2423 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2424 		if (ret < 0)
2425 			goto out;
2426 
2427 		leaf = path->nodes[0];
2428 		nritems = btrfs_header_nritems(leaf);
2429 next:
2430 		if (path->slots[0] >= nritems) {
2431 			ret = btrfs_next_leaf(root, path);
2432 			if (ret < 0)
2433 				goto out;
2434 			if (ret == 1) {
2435 				ret = 0;
2436 				break;
2437 			}
2438 			leaf = path->nodes[0];
2439 			nritems = btrfs_header_nritems(leaf);
2440 		}
2441 
2442 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2443 
2444 		if (progress && need_resched()) {
2445 			memcpy(&key, &found_key, sizeof(key));
2446 			mutex_unlock(&root->fs_info->fs_mutex);
2447 			cond_resched();
2448 			mutex_lock(&root->fs_info->fs_mutex);
2449 			btrfs_release_path(root, path);
2450 			btrfs_search_slot(NULL, root, &key, path, 0, 0);
2451 			progress = 0;
2452 			goto next;
2453 		}
2454 		progress = 1;
2455 
2456 		if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2457 		    found_key.objectid + found_key.offset <= cur_byte) {
2458 			path->slots[0]++;
2459 			goto next;
2460 		}
2461 
2462 		total_found++;
2463 		cur_byte = found_key.objectid + found_key.offset;
2464 		key.objectid = cur_byte;
2465 		btrfs_release_path(root, path);
2466 		ret = relocate_one_extent(root, path, &found_key);
2467 	}
2468 
2469 	btrfs_release_path(root, path);
2470 
2471 	if (total_found > 0) {
2472 		trans = btrfs_start_transaction(tree_root, 1);
2473 		btrfs_commit_transaction(trans, tree_root);
2474 
2475 		mutex_unlock(&root->fs_info->fs_mutex);
2476 		btrfs_clean_old_snapshots(tree_root);
2477 		mutex_lock(&root->fs_info->fs_mutex);
2478 
2479 		trans = btrfs_start_transaction(tree_root, 1);
2480 		btrfs_commit_transaction(trans, tree_root);
2481 		goto again;
2482 	}
2483 
2484 	trans = btrfs_start_transaction(root, 1);
2485 	key.objectid = new_size;
2486 	key.offset = 0;
2487 	key.type = 0;
2488 	while(1) {
2489 		u64 ptr;
2490 
2491 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2492 		if (ret < 0)
2493 			goto out;
2494 
2495 		leaf = path->nodes[0];
2496 		nritems = btrfs_header_nritems(leaf);
2497 bg_next:
2498 		if (path->slots[0] >= nritems) {
2499 			ret = btrfs_next_leaf(root, path);
2500 			if (ret < 0)
2501 				break;
2502 			if (ret == 1) {
2503 				ret = 0;
2504 				break;
2505 			}
2506 			leaf = path->nodes[0];
2507 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2508 
2509 			/*
2510 			 * btrfs_next_leaf doesn't cow buffers, we have to
2511 			 * do the search again
2512 			 */
2513 			memcpy(&key, &found_key, sizeof(key));
2514 			btrfs_release_path(root, path);
2515 			goto resched_check;
2516 		}
2517 
2518 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2519 		if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2520 			printk("shrinker found key %Lu %u %Lu\n",
2521 				found_key.objectid, found_key.type,
2522 				found_key.offset);
2523 			path->slots[0]++;
2524 			goto bg_next;
2525 		}
2526 		ret = get_state_private(&info->block_group_cache,
2527 					found_key.objectid, &ptr);
2528 		if (!ret)
2529 			kfree((void *)(unsigned long)ptr);
2530 
2531 		clear_extent_bits(&info->block_group_cache, found_key.objectid,
2532 				  found_key.objectid + found_key.offset - 1,
2533 				  (unsigned int)-1, GFP_NOFS);
2534 
2535 		key.objectid = found_key.objectid + 1;
2536 		btrfs_del_item(trans, root, path);
2537 		btrfs_release_path(root, path);
2538 resched_check:
2539 		if (need_resched()) {
2540 			mutex_unlock(&root->fs_info->fs_mutex);
2541 			cond_resched();
2542 			mutex_lock(&root->fs_info->fs_mutex);
2543 		}
2544 	}
2545 	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2546 			   GFP_NOFS);
2547 	btrfs_commit_transaction(trans, root);
2548 out:
2549 	btrfs_free_path(path);
2550 	return ret;
2551 }
2552 
2553 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2554 			   struct btrfs_root *root, u64 new_size)
2555 {
2556 	struct btrfs_path *path;
2557 	u64 nr = 0;
2558 	u64 cur_byte;
2559 	u64 old_size;
2560 	unsigned long rem;
2561 	struct btrfs_block_group_cache *cache;
2562 	struct btrfs_block_group_item *item;
2563 	struct btrfs_fs_info *info = root->fs_info;
2564 	struct extent_map_tree *block_group_cache;
2565 	struct btrfs_key key;
2566 	struct extent_buffer *leaf;
2567 	int ret;
2568 	int bit;
2569 
2570 	old_size = btrfs_super_total_bytes(&info->super_copy);
2571 	block_group_cache = &info->block_group_cache;
2572 
2573 	root = info->extent_root;
2574 
2575 	cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2576 
2577 	cur_byte = cache->key.objectid + cache->key.offset;
2578 	if (cur_byte >= new_size)
2579 		goto set_size;
2580 
2581 	key.offset = BTRFS_BLOCK_GROUP_SIZE;
2582 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2583 
2584 	path = btrfs_alloc_path();
2585 	if (!path)
2586 		return -ENOMEM;
2587 
2588 	while(cur_byte < new_size) {
2589 		key.objectid = cur_byte;
2590 		ret = btrfs_insert_empty_item(trans, root, path, &key,
2591 				        sizeof(struct btrfs_block_group_item));
2592 		BUG_ON(ret);
2593 		leaf = path->nodes[0];
2594 		item = btrfs_item_ptr(leaf, path->slots[0],
2595 				      struct btrfs_block_group_item);
2596 
2597 		btrfs_set_disk_block_group_used(leaf, item, 0);
2598 		div_long_long_rem(nr, 3, &rem);
2599 		if (rem) {
2600 			btrfs_set_disk_block_group_flags(leaf, item,
2601 						 BTRFS_BLOCK_GROUP_DATA);
2602 		} else {
2603 			btrfs_set_disk_block_group_flags(leaf, item, 0);
2604 		}
2605 		nr++;
2606 
2607 		cache = kmalloc(sizeof(*cache), GFP_NOFS);
2608 		BUG_ON(!cache);
2609 
2610 		read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2611 				   sizeof(cache->item));
2612 
2613 		memcpy(&cache->key, &key, sizeof(key));
2614 		cache->cached = 0;
2615 		cache->pinned = 0;
2616 		cur_byte = key.objectid + key.offset;
2617 		btrfs_release_path(root, path);
2618 
2619 		if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2620 			bit = BLOCK_GROUP_DATA;
2621 			cache->data = BTRFS_BLOCK_GROUP_DATA;
2622 		} else {
2623 			bit = BLOCK_GROUP_METADATA;
2624 			cache->data = 0;
2625 		}
2626 
2627 		/* use EXTENT_LOCKED to prevent merging */
2628 		set_extent_bits(block_group_cache, key.objectid,
2629 				key.objectid + key.offset - 1,
2630 				bit | EXTENT_LOCKED, GFP_NOFS);
2631 		set_state_private(block_group_cache, key.objectid,
2632 				  (unsigned long)cache);
2633 	}
2634 	btrfs_free_path(path);
2635 set_size:
2636 	btrfs_set_super_total_bytes(&info->super_copy, new_size);
2637 	return 0;
2638 }
2639 
2640 int btrfs_read_block_groups(struct btrfs_root *root)
2641 {
2642 	struct btrfs_path *path;
2643 	int ret;
2644 	int err = 0;
2645 	int bit;
2646 	struct btrfs_block_group_cache *cache;
2647 	struct btrfs_fs_info *info = root->fs_info;
2648 	struct extent_map_tree *block_group_cache;
2649 	struct btrfs_key key;
2650 	struct btrfs_key found_key;
2651 	struct extent_buffer *leaf;
2652 
2653 	block_group_cache = &info->block_group_cache;
2654 
2655 	root = info->extent_root;
2656 	key.objectid = 0;
2657 	key.offset = BTRFS_BLOCK_GROUP_SIZE;
2658 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2659 
2660 	path = btrfs_alloc_path();
2661 	if (!path)
2662 		return -ENOMEM;
2663 
2664 	while(1) {
2665 		ret = btrfs_search_slot(NULL, info->extent_root,
2666 					&key, path, 0, 0);
2667 		if (ret != 0) {
2668 			err = ret;
2669 			break;
2670 		}
2671 		leaf = path->nodes[0];
2672 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2673 		cache = kmalloc(sizeof(*cache), GFP_NOFS);
2674 		if (!cache) {
2675 			err = -1;
2676 			break;
2677 		}
2678 
2679 		read_extent_buffer(leaf, &cache->item,
2680 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
2681 				   sizeof(cache->item));
2682 		memcpy(&cache->key, &found_key, sizeof(found_key));
2683 		cache->cached = 0;
2684 		cache->pinned = 0;
2685 		key.objectid = found_key.objectid + found_key.offset;
2686 		btrfs_release_path(root, path);
2687 
2688 		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2689 			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2690 			cache->data = BTRFS_BLOCK_GROUP_MIXED;
2691 		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2692 			bit = BLOCK_GROUP_DATA;
2693 			cache->data = BTRFS_BLOCK_GROUP_DATA;
2694 		} else {
2695 			bit = BLOCK_GROUP_METADATA;
2696 			cache->data = 0;
2697 		}
2698 
2699 		/* use EXTENT_LOCKED to prevent merging */
2700 		set_extent_bits(block_group_cache, found_key.objectid,
2701 				found_key.objectid + found_key.offset - 1,
2702 				bit | EXTENT_LOCKED, GFP_NOFS);
2703 		set_state_private(block_group_cache, found_key.objectid,
2704 				  (unsigned long)cache);
2705 
2706 		if (key.objectid >=
2707 		    btrfs_super_total_bytes(&info->super_copy))
2708 			break;
2709 	}
2710 
2711 	btrfs_free_path(path);
2712 	return 0;
2713 }
2714