xref: /openbmc/linux/fs/btrfs/extent-tree.c (revision c31f8830f0dfd31e7e196b85ca1b39aef8b77d57)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 
28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
31 
32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
33 				 btrfs_root *extent_root);
34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
35 			       btrfs_root *extent_root);
36 static int find_previous_extent(struct btrfs_root *root,
37 				struct btrfs_path *path)
38 {
39 	struct btrfs_key found_key;
40 	struct extent_buffer *leaf;
41 	int ret;
42 
43 	while(1) {
44 		if (path->slots[0] == 0) {
45 			ret = btrfs_prev_leaf(root, path);
46 			if (ret != 0)
47 				return ret;
48 		} else {
49 			path->slots[0]--;
50 		}
51 		leaf = path->nodes[0];
52 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
53 		if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
54 			return 0;
55 	}
56 	return 1;
57 }
58 
59 static int cache_block_group(struct btrfs_root *root,
60 			     struct btrfs_block_group_cache *block_group)
61 {
62 	struct btrfs_path *path;
63 	int ret;
64 	struct btrfs_key key;
65 	struct extent_buffer *leaf;
66 	struct extent_map_tree *free_space_cache;
67 	int slot;
68 	u64 last = 0;
69 	u64 hole_size;
70 	u64 first_free;
71 	int found = 0;
72 
73 	if (!block_group)
74 		return 0;
75 
76 	root = root->fs_info->extent_root;
77 	free_space_cache = &root->fs_info->free_space_cache;
78 
79 	if (block_group->cached)
80 		return 0;
81 
82 	path = btrfs_alloc_path();
83 	if (!path)
84 		return -ENOMEM;
85 
86 	path->reada = 2;
87 	first_free = block_group->key.objectid;
88 	key.objectid = block_group->key.objectid;
89 	key.offset = 0;
90 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
91 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
92 	if (ret < 0)
93 		return ret;
94 	ret = find_previous_extent(root, path);
95 	if (ret < 0)
96 		return ret;
97 	if (ret == 0) {
98 		leaf = path->nodes[0];
99 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
100 		if (key.objectid + key.offset > first_free)
101 			first_free = key.objectid + key.offset;
102 	}
103 	while(1) {
104 		leaf = path->nodes[0];
105 		slot = path->slots[0];
106 		if (slot >= btrfs_header_nritems(leaf)) {
107 			ret = btrfs_next_leaf(root, path);
108 			if (ret < 0)
109 				goto err;
110 			if (ret == 0) {
111 				continue;
112 			} else {
113 				break;
114 			}
115 		}
116 		btrfs_item_key_to_cpu(leaf, &key, slot);
117 		if (key.objectid < block_group->key.objectid) {
118 			goto next;
119 		}
120 		if (key.objectid >= block_group->key.objectid +
121 		    block_group->key.offset) {
122 			break;
123 		}
124 
125 		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
126 			if (!found) {
127 				last = first_free;
128 				found = 1;
129 			}
130 			if (key.objectid > last) {
131 				hole_size = key.objectid - last;
132 				set_extent_dirty(free_space_cache, last,
133 						 last + hole_size - 1,
134 						 GFP_NOFS);
135 			}
136 			last = key.objectid + key.offset;
137 		}
138 next:
139 		path->slots[0]++;
140 	}
141 
142 	if (!found)
143 		last = first_free;
144 	if (block_group->key.objectid +
145 	    block_group->key.offset > last) {
146 		hole_size = block_group->key.objectid +
147 			block_group->key.offset - last;
148 		set_extent_dirty(free_space_cache, last,
149 				 last + hole_size - 1, GFP_NOFS);
150 	}
151 	block_group->cached = 1;
152 err:
153 	btrfs_free_path(path);
154 	return 0;
155 }
156 
157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
158 							 btrfs_fs_info *info,
159 							 u64 bytenr)
160 {
161 	struct extent_map_tree *block_group_cache;
162 	struct btrfs_block_group_cache *block_group = NULL;
163 	u64 ptr;
164 	u64 start;
165 	u64 end;
166 	int ret;
167 
168 	block_group_cache = &info->block_group_cache;
169 	ret = find_first_extent_bit(block_group_cache,
170 				    bytenr, &start, &end,
171 				    BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
172 	if (ret) {
173 		return NULL;
174 	}
175 	ret = get_state_private(block_group_cache, start, &ptr);
176 	if (ret)
177 		return NULL;
178 
179 	block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
180 	if (block_group->key.objectid <= bytenr && bytenr <
181 	    block_group->key.objectid + block_group->key.offset)
182 		return block_group;
183 	return NULL;
184 }
185 static u64 noinline find_search_start(struct btrfs_root *root,
186 			      struct btrfs_block_group_cache **cache_ret,
187 			      u64 search_start, int num, int data)
188 {
189 	int ret;
190 	struct btrfs_block_group_cache *cache = *cache_ret;
191 	u64 last;
192 	u64 start = 0;
193 	u64 end = 0;
194 	u64 cache_miss = 0;
195 	u64 total_fs_bytes;
196 	int wrapped = 0;
197 
198 	if (!cache) {
199 		goto out;
200 	}
201 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
202 again:
203 	ret = cache_block_group(root, cache);
204 	if (ret)
205 		goto out;
206 
207 	last = max(search_start, cache->key.objectid);
208 
209 	while(1) {
210 		ret = find_first_extent_bit(&root->fs_info->free_space_cache,
211 					    last, &start, &end, EXTENT_DIRTY);
212 		if (ret) {
213 			if (!cache_miss)
214 				cache_miss = last;
215 			goto new_group;
216 		}
217 
218 		start = max(last, start);
219 		last = end + 1;
220 		if (last - start < num) {
221 			if (last == cache->key.objectid + cache->key.offset)
222 				cache_miss = start;
223 			continue;
224 		}
225 		if (data != BTRFS_BLOCK_GROUP_MIXED &&
226 		    start + num > cache->key.objectid + cache->key.offset)
227 			goto new_group;
228 		if (start + num  > total_fs_bytes)
229 			goto new_group;
230 		return start;
231 	}
232 out:
233 	cache = btrfs_lookup_block_group(root->fs_info, search_start);
234 	if (!cache) {
235 		printk("Unable to find block group for %Lu\n",
236 		       search_start);
237 		WARN_ON(1);
238 		return search_start;
239 	}
240 	return search_start;
241 
242 new_group:
243 	last = cache->key.objectid + cache->key.offset;
244 wrapped:
245 	cache = btrfs_lookup_block_group(root->fs_info, last);
246 	if (!cache || cache->key.objectid >= total_fs_bytes) {
247 no_cache:
248 		if (!wrapped) {
249 			wrapped = 1;
250 			last = search_start;
251 			data = BTRFS_BLOCK_GROUP_MIXED;
252 			goto wrapped;
253 		}
254 		goto out;
255 	}
256 	if (cache_miss && !cache->cached) {
257 		cache_block_group(root, cache);
258 		last = cache_miss;
259 		cache = btrfs_lookup_block_group(root->fs_info, last);
260 	}
261 	cache = btrfs_find_block_group(root, cache, last, data, 0);
262 	if (!cache)
263 		goto no_cache;
264 	*cache_ret = cache;
265 	cache_miss = 0;
266 	goto again;
267 }
268 
269 static u64 div_factor(u64 num, int factor)
270 {
271 	if (factor == 10)
272 		return num;
273 	num *= factor;
274 	do_div(num, 10);
275 	return num;
276 }
277 
278 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
279 						 struct btrfs_block_group_cache
280 						 *hint, u64 search_start,
281 						 int data, int owner)
282 {
283 	struct btrfs_block_group_cache *cache;
284 	struct extent_map_tree *block_group_cache;
285 	struct btrfs_block_group_cache *found_group = NULL;
286 	struct btrfs_fs_info *info = root->fs_info;
287 	u64 used;
288 	u64 last = 0;
289 	u64 hint_last;
290 	u64 start;
291 	u64 end;
292 	u64 free_check;
293 	u64 ptr;
294 	u64 total_fs_bytes;
295 	int bit;
296 	int ret;
297 	int full_search = 0;
298 	int factor = 8;
299 	int data_swap = 0;
300 
301 	block_group_cache = &info->block_group_cache;
302 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
303 
304 	if (!owner)
305 		factor = 8;
306 
307 	if (data == BTRFS_BLOCK_GROUP_MIXED) {
308 		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
309 		factor = 10;
310 	} else if (data)
311 		bit = BLOCK_GROUP_DATA;
312 	else
313 		bit = BLOCK_GROUP_METADATA;
314 
315 	if (search_start && search_start < total_fs_bytes) {
316 		struct btrfs_block_group_cache *shint;
317 		shint = btrfs_lookup_block_group(info, search_start);
318 		if (shint && (shint->data == data ||
319 			      shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
320 			used = btrfs_block_group_used(&shint->item);
321 			if (used + shint->pinned <
322 			    div_factor(shint->key.offset, factor)) {
323 				return shint;
324 			}
325 		}
326 	}
327 	if (hint && hint->key.objectid < total_fs_bytes &&
328 	    (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
329 		used = btrfs_block_group_used(&hint->item);
330 		if (used + hint->pinned <
331 		    div_factor(hint->key.offset, factor)) {
332 			return hint;
333 		}
334 		last = hint->key.objectid + hint->key.offset;
335 		hint_last = last;
336 	} else {
337 		if (hint)
338 			hint_last = max(hint->key.objectid, search_start);
339 		else
340 			hint_last = search_start;
341 
342 		if (hint_last >= total_fs_bytes)
343 			hint_last = search_start;
344 		last = hint_last;
345 	}
346 again:
347 	while(1) {
348 		ret = find_first_extent_bit(block_group_cache, last,
349 					    &start, &end, bit);
350 		if (ret)
351 			break;
352 
353 		ret = get_state_private(block_group_cache, start, &ptr);
354 		if (ret)
355 			break;
356 
357 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
358 		last = cache->key.objectid + cache->key.offset;
359 		used = btrfs_block_group_used(&cache->item);
360 
361 		if (cache->key.objectid > total_fs_bytes)
362 			break;
363 
364 		if (full_search)
365 			free_check = cache->key.offset;
366 		else
367 			free_check = div_factor(cache->key.offset, factor);
368 		if (used + cache->pinned < free_check) {
369 			found_group = cache;
370 			goto found;
371 		}
372 		cond_resched();
373 	}
374 	if (!full_search) {
375 		last = search_start;
376 		full_search = 1;
377 		goto again;
378 	}
379 	if (!data_swap) {
380 		data_swap = 1;
381 		bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
382 		last = search_start;
383 		goto again;
384 	}
385 found:
386 	return found_group;
387 }
388 
389 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
390 			   u64 owner, u64 owner_offset)
391 {
392 	u32 high_crc = ~(u32)0;
393 	u32 low_crc = ~(u32)0;
394 	__le64 lenum;
395 
396 	lenum = cpu_to_le64(root_objectid);
397 	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
398 	lenum = cpu_to_le64(ref_generation);
399 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
400 
401 #if 0
402 	lenum = cpu_to_le64(owner);
403 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
404 	lenum = cpu_to_le64(owner_offset);
405 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
406 #endif
407 	return ((u64)high_crc << 32) | (u64)low_crc;
408 }
409 
410 static int match_extent_ref(struct extent_buffer *leaf,
411 			    struct btrfs_extent_ref *disk_ref,
412 			    struct btrfs_extent_ref *cpu_ref)
413 {
414 	int ret;
415 	int len;
416 
417 	if (cpu_ref->objectid)
418 		len = sizeof(*cpu_ref);
419 	else
420 		len = 2 * sizeof(u64);
421 	ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
422 				   len);
423 	return ret == 0;
424 }
425 
426 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
427 					  struct btrfs_root *root,
428 					  struct btrfs_path *path, u64 bytenr,
429 					  u64 root_objectid,
430 					  u64 ref_generation, u64 owner,
431 					  u64 owner_offset, int del)
432 {
433 	u64 hash;
434 	struct btrfs_key key;
435 	struct btrfs_key found_key;
436 	struct btrfs_extent_ref ref;
437 	struct extent_buffer *leaf;
438 	struct btrfs_extent_ref *disk_ref;
439 	int ret;
440 	int ret2;
441 
442 	btrfs_set_stack_ref_root(&ref, root_objectid);
443 	btrfs_set_stack_ref_generation(&ref, ref_generation);
444 	btrfs_set_stack_ref_objectid(&ref, owner);
445 	btrfs_set_stack_ref_offset(&ref, owner_offset);
446 
447 	hash = hash_extent_ref(root_objectid, ref_generation, owner,
448 			       owner_offset);
449 	key.offset = hash;
450 	key.objectid = bytenr;
451 	key.type = BTRFS_EXTENT_REF_KEY;
452 
453 	while (1) {
454 		ret = btrfs_search_slot(trans, root, &key, path,
455 					del ? -1 : 0, del);
456 		if (ret < 0)
457 			goto out;
458 		leaf = path->nodes[0];
459 		if (ret != 0) {
460 			u32 nritems = btrfs_header_nritems(leaf);
461 			if (path->slots[0] >= nritems) {
462 				ret2 = btrfs_next_leaf(root, path);
463 				if (ret2)
464 					goto out;
465 				leaf = path->nodes[0];
466 			}
467 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
468 			if (found_key.objectid != bytenr ||
469 			    found_key.type != BTRFS_EXTENT_REF_KEY)
470 				goto out;
471 			key.offset = found_key.offset;
472 			if (del) {
473 				btrfs_release_path(root, path);
474 				continue;
475 			}
476 		}
477 		disk_ref = btrfs_item_ptr(path->nodes[0],
478 					  path->slots[0],
479 					  struct btrfs_extent_ref);
480 		if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
481 			ret = 0;
482 			goto out;
483 		}
484 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
485 		key.offset = found_key.offset + 1;
486 		btrfs_release_path(root, path);
487 	}
488 out:
489 	return ret;
490 }
491 
492 /*
493  * Back reference rules.  Back refs have three main goals:
494  *
495  * 1) differentiate between all holders of references to an extent so that
496  *    when a reference is dropped we can make sure it was a valid reference
497  *    before freeing the extent.
498  *
499  * 2) Provide enough information to quickly find the holders of an extent
500  *    if we notice a given block is corrupted or bad.
501  *
502  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
503  *    maintenance.  This is actually the same as #2, but with a slightly
504  *    different use case.
505  *
506  * File extents can be referenced by:
507  *
508  * - multiple snapshots, subvolumes, or different generations in one subvol
509  * - different files inside a single subvolume (in theory, not implemented yet)
510  * - different offsets inside a file (bookend extents in file.c)
511  *
512  * The extent ref structure has fields for:
513  *
514  * - Objectid of the subvolume root
515  * - Generation number of the tree holding the reference
516  * - objectid of the file holding the reference
517  * - offset in the file corresponding to the key holding the reference
518  *
519  * When a file extent is allocated the fields are filled in:
520  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
521  *
522  * When a leaf is cow'd new references are added for every file extent found
523  * in the leaf.  It looks the same as the create case, but trans->transid
524  * will be different when the block is cow'd.
525  *
526  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
527  *
528  * When a file extent is removed either during snapshot deletion or file
529  * truncation, the corresponding back reference is found
530  * by searching for:
531  *
532  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
533  *      inode objectid, offset in file)
534  *
535  * Btree extents can be referenced by:
536  *
537  * - Different subvolumes
538  * - Different generations of the same subvolume
539  *
540  * Storing sufficient information for a full reverse mapping of a btree
541  * block would require storing the lowest key of the block in the backref,
542  * and it would require updating that lowest key either before write out or
543  * every time it changed.  Instead, the objectid of the lowest key is stored
544  * along with the level of the tree block.  This provides a hint
545  * about where in the btree the block can be found.  Searches through the
546  * btree only need to look for a pointer to that block, so they stop one
547  * level higher than the level recorded in the backref.
548  *
549  * Some btrees do not do reference counting on their extents.  These
550  * include the extent tree and the tree of tree roots.  Backrefs for these
551  * trees always have a generation of zero.
552  *
553  * When a tree block is created, back references are inserted:
554  *
555  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
556  *
557  * When a tree block is cow'd in a reference counted root,
558  * new back references are added for all the blocks it points to.
559  * These are of the form (trans->transid will have increased since creation):
560  *
561  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
562  *
563  * Because the lowest_key_objectid and the level are just hints
564  * they are not used when backrefs are deleted.  When a backref is deleted:
565  *
566  * if backref was for a tree root:
567  *     root_objectid = root->root_key.objectid
568  * else
569  *     root_objectid = btrfs_header_owner(parent)
570  *
571  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
572  *
573  * Back Reference Key hashing:
574  *
575  * Back references have four fields, each 64 bits long.  Unfortunately,
576  * This is hashed into a single 64 bit number and placed into the key offset.
577  * The key objectid corresponds to the first byte in the extent, and the
578  * key type is set to BTRFS_EXTENT_REF_KEY
579  */
580 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
581 				 struct btrfs_root *root,
582 				 struct btrfs_path *path, u64 bytenr,
583 				 u64 root_objectid, u64 ref_generation,
584 				 u64 owner, u64 owner_offset)
585 {
586 	u64 hash;
587 	struct btrfs_key key;
588 	struct btrfs_extent_ref ref;
589 	struct btrfs_extent_ref *disk_ref;
590 	int ret;
591 
592 	btrfs_set_stack_ref_root(&ref, root_objectid);
593 	btrfs_set_stack_ref_generation(&ref, ref_generation);
594 	btrfs_set_stack_ref_objectid(&ref, owner);
595 	btrfs_set_stack_ref_offset(&ref, owner_offset);
596 
597 	hash = hash_extent_ref(root_objectid, ref_generation, owner,
598 			       owner_offset);
599 	key.offset = hash;
600 	key.objectid = bytenr;
601 	key.type = BTRFS_EXTENT_REF_KEY;
602 
603 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
604 	while (ret == -EEXIST) {
605 		disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
606 					  struct btrfs_extent_ref);
607 		if (match_extent_ref(path->nodes[0], disk_ref, &ref))
608 			goto out;
609 		key.offset++;
610 		btrfs_release_path(root, path);
611 		ret = btrfs_insert_empty_item(trans, root, path, &key,
612 					      sizeof(ref));
613 	}
614 	if (ret)
615 		goto out;
616 	disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
617 				  struct btrfs_extent_ref);
618 	write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
619 			    sizeof(ref));
620 	btrfs_mark_buffer_dirty(path->nodes[0]);
621 out:
622 	btrfs_release_path(root, path);
623 	return ret;
624 }
625 
626 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
627 				struct btrfs_root *root,
628 				u64 bytenr, u64 num_bytes,
629 				u64 root_objectid, u64 ref_generation,
630 				u64 owner, u64 owner_offset)
631 {
632 	struct btrfs_path *path;
633 	int ret;
634 	struct btrfs_key key;
635 	struct extent_buffer *l;
636 	struct btrfs_extent_item *item;
637 	u32 refs;
638 
639 	WARN_ON(num_bytes < root->sectorsize);
640 	path = btrfs_alloc_path();
641 	if (!path)
642 		return -ENOMEM;
643 
644 	path->reada = 0;
645 	key.objectid = bytenr;
646 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
647 	key.offset = num_bytes;
648 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
649 				0, 1);
650 	if (ret < 0)
651 		return ret;
652 	if (ret != 0) {
653 		BUG();
654 	}
655 	BUG_ON(ret != 0);
656 	l = path->nodes[0];
657 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
658 	refs = btrfs_extent_refs(l, item);
659 	btrfs_set_extent_refs(l, item, refs + 1);
660 	btrfs_mark_buffer_dirty(path->nodes[0]);
661 
662 	btrfs_release_path(root->fs_info->extent_root, path);
663 
664 	path->reada = 0;
665 	ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
666 					  path, bytenr, root_objectid,
667 					  ref_generation, owner, owner_offset);
668 	BUG_ON(ret);
669 	finish_current_insert(trans, root->fs_info->extent_root);
670 	del_pending_extents(trans, root->fs_info->extent_root);
671 
672 	btrfs_free_path(path);
673 	return 0;
674 }
675 
676 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
677 			 struct btrfs_root *root)
678 {
679 	finish_current_insert(trans, root->fs_info->extent_root);
680 	del_pending_extents(trans, root->fs_info->extent_root);
681 	return 0;
682 }
683 
684 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
685 			     struct btrfs_root *root, u64 bytenr,
686 			     u64 num_bytes, u32 *refs)
687 {
688 	struct btrfs_path *path;
689 	int ret;
690 	struct btrfs_key key;
691 	struct extent_buffer *l;
692 	struct btrfs_extent_item *item;
693 
694 	WARN_ON(num_bytes < root->sectorsize);
695 	path = btrfs_alloc_path();
696 	path->reada = 0;
697 	key.objectid = bytenr;
698 	key.offset = num_bytes;
699 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
700 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
701 				0, 0);
702 	if (ret < 0)
703 		goto out;
704 	if (ret != 0) {
705 		btrfs_print_leaf(root, path->nodes[0]);
706 		printk("failed to find block number %Lu\n", bytenr);
707 		BUG();
708 	}
709 	l = path->nodes[0];
710 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
711 	*refs = btrfs_extent_refs(l, item);
712 out:
713 	btrfs_free_path(path);
714 	return 0;
715 }
716 
717 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
718 				  struct btrfs_path *count_path,
719 				  u64 first_extent)
720 {
721 	struct btrfs_root *extent_root = root->fs_info->extent_root;
722 	struct btrfs_path *path;
723 	u64 bytenr;
724 	u64 found_objectid;
725 	u64 root_objectid = root->root_key.objectid;
726 	u32 total_count = 0;
727 	u32 cur_count;
728 	u32 nritems;
729 	int ret;
730 	struct btrfs_key key;
731 	struct btrfs_key found_key;
732 	struct extent_buffer *l;
733 	struct btrfs_extent_item *item;
734 	struct btrfs_extent_ref *ref_item;
735 	int level = -1;
736 
737 	path = btrfs_alloc_path();
738 again:
739 	if (level == -1)
740 		bytenr = first_extent;
741 	else
742 		bytenr = count_path->nodes[level]->start;
743 
744 	cur_count = 0;
745 	key.objectid = bytenr;
746 	key.offset = 0;
747 
748 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
749 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
750 	if (ret < 0)
751 		goto out;
752 	BUG_ON(ret == 0);
753 
754 	l = path->nodes[0];
755 	btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
756 
757 	if (found_key.objectid != bytenr ||
758 	    found_key.type != BTRFS_EXTENT_ITEM_KEY) {
759 		goto out;
760 	}
761 
762 	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
763 	while (1) {
764 		l = path->nodes[0];
765 		nritems = btrfs_header_nritems(l);
766 		if (path->slots[0] >= nritems) {
767 			ret = btrfs_next_leaf(extent_root, path);
768 			if (ret == 0)
769 				continue;
770 			break;
771 		}
772 		btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
773 		if (found_key.objectid != bytenr)
774 			break;
775 
776 		if (found_key.type != BTRFS_EXTENT_REF_KEY) {
777 			path->slots[0]++;
778 			continue;
779 		}
780 
781 		cur_count++;
782 		ref_item = btrfs_item_ptr(l, path->slots[0],
783 					  struct btrfs_extent_ref);
784 		found_objectid = btrfs_ref_root(l, ref_item);
785 
786 		if (found_objectid != root_objectid) {
787 			total_count = 2;
788 			goto out;
789 		}
790 		total_count = 1;
791 		path->slots[0]++;
792 	}
793 	if (cur_count == 0) {
794 		total_count = 0;
795 		goto out;
796 	}
797 	if (level >= 0 && root->node == count_path->nodes[level])
798 		goto out;
799 	level++;
800 	btrfs_release_path(root, path);
801 	goto again;
802 
803 out:
804 	btrfs_free_path(path);
805 	return total_count;
806 }
807 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
808 		       struct btrfs_root *root, u64 owner_objectid)
809 {
810 	u64 generation;
811 	u64 key_objectid;
812 	u64 level;
813 	u32 nritems;
814 	struct btrfs_disk_key disk_key;
815 
816 	level = btrfs_header_level(root->node);
817 	generation = trans->transid;
818 	nritems = btrfs_header_nritems(root->node);
819 	if (nritems > 0) {
820 		if (level == 0)
821 			btrfs_item_key(root->node, &disk_key, 0);
822 		else
823 			btrfs_node_key(root->node, &disk_key, 0);
824 		key_objectid = btrfs_disk_key_objectid(&disk_key);
825 	} else {
826 		key_objectid = 0;
827 	}
828 	return btrfs_inc_extent_ref(trans, root, root->node->start,
829 				    root->node->len, owner_objectid,
830 				    generation, level, key_objectid);
831 }
832 
833 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
834 		  struct extent_buffer *buf)
835 {
836 	u64 bytenr;
837 	u32 nritems;
838 	struct btrfs_key key;
839 	struct btrfs_file_extent_item *fi;
840 	int i;
841 	int level;
842 	int ret;
843 	int faili;
844 
845 	if (!root->ref_cows)
846 		return 0;
847 
848 	level = btrfs_header_level(buf);
849 	nritems = btrfs_header_nritems(buf);
850 	for (i = 0; i < nritems; i++) {
851 		if (level == 0) {
852 			u64 disk_bytenr;
853 			btrfs_item_key_to_cpu(buf, &key, i);
854 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
855 				continue;
856 			fi = btrfs_item_ptr(buf, i,
857 					    struct btrfs_file_extent_item);
858 			if (btrfs_file_extent_type(buf, fi) ==
859 			    BTRFS_FILE_EXTENT_INLINE)
860 				continue;
861 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
862 			if (disk_bytenr == 0)
863 				continue;
864 			ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
865 				    btrfs_file_extent_disk_num_bytes(buf, fi),
866 				    root->root_key.objectid, trans->transid,
867 				    key.objectid, key.offset);
868 			if (ret) {
869 				faili = i;
870 				goto fail;
871 			}
872 		} else {
873 			bytenr = btrfs_node_blockptr(buf, i);
874 			btrfs_node_key_to_cpu(buf, &key, i);
875 			ret = btrfs_inc_extent_ref(trans, root, bytenr,
876 					   btrfs_level_size(root, level - 1),
877 					   root->root_key.objectid,
878 					   trans->transid,
879 					   level - 1, key.objectid);
880 			if (ret) {
881 				faili = i;
882 				goto fail;
883 			}
884 		}
885 	}
886 	return 0;
887 fail:
888 	WARN_ON(1);
889 #if 0
890 	for (i =0; i < faili; i++) {
891 		if (level == 0) {
892 			u64 disk_bytenr;
893 			btrfs_item_key_to_cpu(buf, &key, i);
894 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
895 				continue;
896 			fi = btrfs_item_ptr(buf, i,
897 					    struct btrfs_file_extent_item);
898 			if (btrfs_file_extent_type(buf, fi) ==
899 			    BTRFS_FILE_EXTENT_INLINE)
900 				continue;
901 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
902 			if (disk_bytenr == 0)
903 				continue;
904 			err = btrfs_free_extent(trans, root, disk_bytenr,
905 				    btrfs_file_extent_disk_num_bytes(buf,
906 								      fi), 0);
907 			BUG_ON(err);
908 		} else {
909 			bytenr = btrfs_node_blockptr(buf, i);
910 			err = btrfs_free_extent(trans, root, bytenr,
911 					btrfs_level_size(root, level - 1), 0);
912 			BUG_ON(err);
913 		}
914 	}
915 #endif
916 	return ret;
917 }
918 
919 static int write_one_cache_group(struct btrfs_trans_handle *trans,
920 				 struct btrfs_root *root,
921 				 struct btrfs_path *path,
922 				 struct btrfs_block_group_cache *cache)
923 {
924 	int ret;
925 	int pending_ret;
926 	struct btrfs_root *extent_root = root->fs_info->extent_root;
927 	unsigned long bi;
928 	struct extent_buffer *leaf;
929 
930 	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
931 	if (ret < 0)
932 		goto fail;
933 	BUG_ON(ret);
934 
935 	leaf = path->nodes[0];
936 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
937 	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
938 	btrfs_mark_buffer_dirty(leaf);
939 	btrfs_release_path(extent_root, path);
940 fail:
941 	finish_current_insert(trans, extent_root);
942 	pending_ret = del_pending_extents(trans, extent_root);
943 	if (ret)
944 		return ret;
945 	if (pending_ret)
946 		return pending_ret;
947 	return 0;
948 
949 }
950 
951 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
952 				   struct btrfs_root *root)
953 {
954 	struct extent_map_tree *block_group_cache;
955 	struct btrfs_block_group_cache *cache;
956 	int ret;
957 	int err = 0;
958 	int werr = 0;
959 	struct btrfs_path *path;
960 	u64 last = 0;
961 	u64 start;
962 	u64 end;
963 	u64 ptr;
964 
965 	block_group_cache = &root->fs_info->block_group_cache;
966 	path = btrfs_alloc_path();
967 	if (!path)
968 		return -ENOMEM;
969 
970 	while(1) {
971 		ret = find_first_extent_bit(block_group_cache, last,
972 					    &start, &end, BLOCK_GROUP_DIRTY);
973 		if (ret)
974 			break;
975 
976 		last = end + 1;
977 		ret = get_state_private(block_group_cache, start, &ptr);
978 		if (ret)
979 			break;
980 
981 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
982 		err = write_one_cache_group(trans, root,
983 					    path, cache);
984 		/*
985 		 * if we fail to write the cache group, we want
986 		 * to keep it marked dirty in hopes that a later
987 		 * write will work
988 		 */
989 		if (err) {
990 			werr = err;
991 			continue;
992 		}
993 		clear_extent_bits(block_group_cache, start, end,
994 				  BLOCK_GROUP_DIRTY, GFP_NOFS);
995 	}
996 	btrfs_free_path(path);
997 	return werr;
998 }
999 
1000 static int update_block_group(struct btrfs_trans_handle *trans,
1001 			      struct btrfs_root *root,
1002 			      u64 bytenr, u64 num_bytes, int alloc,
1003 			      int mark_free, int data)
1004 {
1005 	struct btrfs_block_group_cache *cache;
1006 	struct btrfs_fs_info *info = root->fs_info;
1007 	u64 total = num_bytes;
1008 	u64 old_val;
1009 	u64 byte_in_group;
1010 	u64 start;
1011 	u64 end;
1012 
1013 	while(total) {
1014 		cache = btrfs_lookup_block_group(info, bytenr);
1015 		if (!cache) {
1016 			return -1;
1017 		}
1018 		byte_in_group = bytenr - cache->key.objectid;
1019 		WARN_ON(byte_in_group > cache->key.offset);
1020 		start = cache->key.objectid;
1021 		end = start + cache->key.offset - 1;
1022 		set_extent_bits(&info->block_group_cache, start, end,
1023 				BLOCK_GROUP_DIRTY, GFP_NOFS);
1024 
1025 		old_val = btrfs_block_group_used(&cache->item);
1026 		num_bytes = min(total, cache->key.offset - byte_in_group);
1027 		if (alloc) {
1028 			if (cache->data != data &&
1029 			    old_val < (cache->key.offset >> 1)) {
1030 				int bit_to_clear;
1031 				int bit_to_set;
1032 				cache->data = data;
1033 				if (data) {
1034 					bit_to_clear = BLOCK_GROUP_METADATA;
1035 					bit_to_set = BLOCK_GROUP_DATA;
1036 					cache->item.flags &=
1037 						~BTRFS_BLOCK_GROUP_MIXED;
1038 					cache->item.flags |=
1039 						BTRFS_BLOCK_GROUP_DATA;
1040 				} else {
1041 					bit_to_clear = BLOCK_GROUP_DATA;
1042 					bit_to_set = BLOCK_GROUP_METADATA;
1043 					cache->item.flags &=
1044 						~BTRFS_BLOCK_GROUP_MIXED;
1045 					cache->item.flags &=
1046 						~BTRFS_BLOCK_GROUP_DATA;
1047 				}
1048 				clear_extent_bits(&info->block_group_cache,
1049 						  start, end, bit_to_clear,
1050 						  GFP_NOFS);
1051 				set_extent_bits(&info->block_group_cache,
1052 						start, end, bit_to_set,
1053 						GFP_NOFS);
1054 			} else if (cache->data != data &&
1055 				   cache->data != BTRFS_BLOCK_GROUP_MIXED) {
1056 				cache->data = BTRFS_BLOCK_GROUP_MIXED;
1057 				set_extent_bits(&info->block_group_cache,
1058 						start, end,
1059 						BLOCK_GROUP_DATA |
1060 						BLOCK_GROUP_METADATA,
1061 						GFP_NOFS);
1062 			}
1063 			old_val += num_bytes;
1064 		} else {
1065 			old_val -= num_bytes;
1066 			if (mark_free) {
1067 				set_extent_dirty(&info->free_space_cache,
1068 						 bytenr, bytenr + num_bytes - 1,
1069 						 GFP_NOFS);
1070 			}
1071 		}
1072 		btrfs_set_block_group_used(&cache->item, old_val);
1073 		total -= num_bytes;
1074 		bytenr += num_bytes;
1075 	}
1076 	return 0;
1077 }
1078 static int update_pinned_extents(struct btrfs_root *root,
1079 				u64 bytenr, u64 num, int pin)
1080 {
1081 	u64 len;
1082 	struct btrfs_block_group_cache *cache;
1083 	struct btrfs_fs_info *fs_info = root->fs_info;
1084 
1085 	if (pin) {
1086 		set_extent_dirty(&fs_info->pinned_extents,
1087 				bytenr, bytenr + num - 1, GFP_NOFS);
1088 	} else {
1089 		clear_extent_dirty(&fs_info->pinned_extents,
1090 				bytenr, bytenr + num - 1, GFP_NOFS);
1091 	}
1092 	while (num > 0) {
1093 		cache = btrfs_lookup_block_group(fs_info, bytenr);
1094 		WARN_ON(!cache);
1095 		len = min(num, cache->key.offset -
1096 			  (bytenr - cache->key.objectid));
1097 		if (pin) {
1098 			cache->pinned += len;
1099 			fs_info->total_pinned += len;
1100 		} else {
1101 			cache->pinned -= len;
1102 			fs_info->total_pinned -= len;
1103 		}
1104 		bytenr += len;
1105 		num -= len;
1106 	}
1107 	return 0;
1108 }
1109 
1110 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
1111 {
1112 	u64 last = 0;
1113 	u64 start;
1114 	u64 end;
1115 	struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
1116 	int ret;
1117 
1118 	while(1) {
1119 		ret = find_first_extent_bit(pinned_extents, last,
1120 					    &start, &end, EXTENT_DIRTY);
1121 		if (ret)
1122 			break;
1123 		set_extent_dirty(copy, start, end, GFP_NOFS);
1124 		last = end + 1;
1125 	}
1126 	return 0;
1127 }
1128 
1129 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1130 			       struct btrfs_root *root,
1131 			       struct extent_map_tree *unpin)
1132 {
1133 	u64 start;
1134 	u64 end;
1135 	int ret;
1136 	struct extent_map_tree *free_space_cache;
1137 	free_space_cache = &root->fs_info->free_space_cache;
1138 
1139 	while(1) {
1140 		ret = find_first_extent_bit(unpin, 0, &start, &end,
1141 					    EXTENT_DIRTY);
1142 		if (ret)
1143 			break;
1144 		update_pinned_extents(root, start, end + 1 - start, 0);
1145 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
1146 		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1147 	}
1148 	return 0;
1149 }
1150 
1151 static int finish_current_insert(struct btrfs_trans_handle *trans,
1152 				 struct btrfs_root *extent_root)
1153 {
1154 	u64 start;
1155 	u64 end;
1156 	struct btrfs_fs_info *info = extent_root->fs_info;
1157 	struct extent_buffer *eb;
1158 	struct btrfs_path *path;
1159 	struct btrfs_key ins;
1160 	struct btrfs_disk_key first;
1161 	struct btrfs_extent_item extent_item;
1162 	int ret;
1163 	int level;
1164 	int err = 0;
1165 
1166 	btrfs_set_stack_extent_refs(&extent_item, 1);
1167 	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1168 	path = btrfs_alloc_path();
1169 
1170 	while(1) {
1171 		ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1172 					    &end, EXTENT_LOCKED);
1173 		if (ret)
1174 			break;
1175 
1176 		ins.objectid = start;
1177 		ins.offset = end + 1 - start;
1178 		err = btrfs_insert_item(trans, extent_root, &ins,
1179 					&extent_item, sizeof(extent_item));
1180 		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1181 				  GFP_NOFS);
1182 		eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1183 		level = btrfs_header_level(eb);
1184 		if (level == 0) {
1185 			btrfs_item_key(eb, &first, 0);
1186 		} else {
1187 			btrfs_node_key(eb, &first, 0);
1188 		}
1189 		err = btrfs_insert_extent_backref(trans, extent_root, path,
1190 					  start, extent_root->root_key.objectid,
1191 					  0, level,
1192 					  btrfs_disk_key_objectid(&first));
1193 		BUG_ON(err);
1194 		free_extent_buffer(eb);
1195 	}
1196 	btrfs_free_path(path);
1197 	return 0;
1198 }
1199 
1200 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1201 			  int pending)
1202 {
1203 	int err = 0;
1204 	struct extent_buffer *buf;
1205 
1206 	if (!pending) {
1207 		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1208 		if (buf) {
1209 			if (btrfs_buffer_uptodate(buf)) {
1210 				u64 transid =
1211 				    root->fs_info->running_transaction->transid;
1212 				u64 header_transid =
1213 					btrfs_header_generation(buf);
1214 				if (header_transid == transid) {
1215 					free_extent_buffer(buf);
1216 					return 1;
1217 				}
1218 			}
1219 			free_extent_buffer(buf);
1220 		}
1221 		update_pinned_extents(root, bytenr, num_bytes, 1);
1222 	} else {
1223 		set_extent_bits(&root->fs_info->pending_del,
1224 				bytenr, bytenr + num_bytes - 1,
1225 				EXTENT_LOCKED, GFP_NOFS);
1226 	}
1227 	BUG_ON(err < 0);
1228 	return 0;
1229 }
1230 
1231 /*
1232  * remove an extent from the root, returns 0 on success
1233  */
1234 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1235 			 *root, u64 bytenr, u64 num_bytes,
1236 			 u64 root_objectid, u64 ref_generation,
1237 			 u64 owner_objectid, u64 owner_offset, int pin,
1238 			 int mark_free)
1239 {
1240 	struct btrfs_path *path;
1241 	struct btrfs_key key;
1242 	struct btrfs_fs_info *info = root->fs_info;
1243 	struct btrfs_root *extent_root = info->extent_root;
1244 	struct extent_buffer *leaf;
1245 	int ret;
1246 	struct btrfs_extent_item *ei;
1247 	u32 refs;
1248 
1249 	key.objectid = bytenr;
1250 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1251 	key.offset = num_bytes;
1252 
1253 	path = btrfs_alloc_path();
1254 	if (!path)
1255 		return -ENOMEM;
1256 
1257 	path->reada = 0;
1258 	ret = lookup_extent_backref(trans, extent_root, path,
1259 				    bytenr, root_objectid,
1260 				    ref_generation,
1261 				    owner_objectid, owner_offset, 1);
1262 	if (ret == 0) {
1263 		ret = btrfs_del_item(trans, extent_root, path);
1264 	} else {
1265 		btrfs_print_leaf(extent_root, path->nodes[0]);
1266 		WARN_ON(1);
1267 		printk("Unable to find ref byte nr %Lu root %Lu "
1268 		       " gen %Lu owner %Lu offset %Lu\n", bytenr,
1269 		       root_objectid, ref_generation, owner_objectid,
1270 		       owner_offset);
1271 	}
1272 	btrfs_release_path(extent_root, path);
1273 	ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1274 	if (ret < 0)
1275 		return ret;
1276 	BUG_ON(ret);
1277 
1278 	leaf = path->nodes[0];
1279 	ei = btrfs_item_ptr(leaf, path->slots[0],
1280 			    struct btrfs_extent_item);
1281 	refs = btrfs_extent_refs(leaf, ei);
1282 	BUG_ON(refs == 0);
1283 	refs -= 1;
1284 	btrfs_set_extent_refs(leaf, ei, refs);
1285 	btrfs_mark_buffer_dirty(leaf);
1286 
1287 	if (refs == 0) {
1288 		u64 super_used;
1289 		u64 root_used;
1290 
1291 		if (pin) {
1292 			ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1293 			if (ret > 0)
1294 				mark_free = 1;
1295 			BUG_ON(ret < 0);
1296 		}
1297 
1298 		/* block accounting for super block */
1299 		super_used = btrfs_super_bytes_used(&info->super_copy);
1300 		btrfs_set_super_bytes_used(&info->super_copy,
1301 					   super_used - num_bytes);
1302 
1303 		/* block accounting for root item */
1304 		root_used = btrfs_root_used(&root->root_item);
1305 		btrfs_set_root_used(&root->root_item,
1306 					   root_used - num_bytes);
1307 
1308 		ret = btrfs_del_item(trans, extent_root, path);
1309 		if (ret) {
1310 			return ret;
1311 		}
1312 		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1313 					 mark_free, 0);
1314 		BUG_ON(ret);
1315 	}
1316 	btrfs_free_path(path);
1317 	finish_current_insert(trans, extent_root);
1318 	return ret;
1319 }
1320 
1321 /*
1322  * find all the blocks marked as pending in the radix tree and remove
1323  * them from the extent map
1324  */
1325 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1326 			       btrfs_root *extent_root)
1327 {
1328 	int ret;
1329 	int err = 0;
1330 	u64 start;
1331 	u64 end;
1332 	struct extent_map_tree *pending_del;
1333 	struct extent_map_tree *pinned_extents;
1334 
1335 	pending_del = &extent_root->fs_info->pending_del;
1336 	pinned_extents = &extent_root->fs_info->pinned_extents;
1337 
1338 	while(1) {
1339 		ret = find_first_extent_bit(pending_del, 0, &start, &end,
1340 					    EXTENT_LOCKED);
1341 		if (ret)
1342 			break;
1343 		update_pinned_extents(extent_root, start, end + 1 - start, 1);
1344 		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1345 				  GFP_NOFS);
1346 		ret = __free_extent(trans, extent_root,
1347 				     start, end + 1 - start,
1348 				     extent_root->root_key.objectid,
1349 				     0, 0, 0, 0, 0);
1350 		if (ret)
1351 			err = ret;
1352 	}
1353 	return err;
1354 }
1355 
1356 /*
1357  * remove an extent from the root, returns 0 on success
1358  */
1359 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1360 		      *root, u64 bytenr, u64 num_bytes,
1361 		      u64 root_objectid, u64 ref_generation,
1362 		      u64 owner_objectid, u64 owner_offset, int pin)
1363 {
1364 	struct btrfs_root *extent_root = root->fs_info->extent_root;
1365 	int pending_ret;
1366 	int ret;
1367 
1368 	WARN_ON(num_bytes < root->sectorsize);
1369 	if (!root->ref_cows)
1370 		ref_generation = 0;
1371 
1372 	if (root == extent_root) {
1373 		pin_down_bytes(root, bytenr, num_bytes, 1);
1374 		return 0;
1375 	}
1376 	ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1377 			    ref_generation, owner_objectid, owner_offset,
1378 			    pin, pin == 0);
1379 	pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1380 	return ret ? ret : pending_ret;
1381 }
1382 
1383 static u64 stripe_align(struct btrfs_root *root, u64 val)
1384 {
1385 	u64 mask = ((u64)root->stripesize - 1);
1386 	u64 ret = (val + mask) & ~mask;
1387 	return ret;
1388 }
1389 
1390 /*
1391  * walks the btree of allocated extents and find a hole of a given size.
1392  * The key ins is changed to record the hole:
1393  * ins->objectid == block start
1394  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1395  * ins->offset == number of blocks
1396  * Any available blocks before search_start are skipped.
1397  */
1398 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1399 				     struct btrfs_root *orig_root,
1400 				     u64 num_bytes, u64 empty_size,
1401 				     u64 search_start, u64 search_end,
1402 				     u64 hint_byte, struct btrfs_key *ins,
1403 				     u64 exclude_start, u64 exclude_nr,
1404 				     int data)
1405 {
1406 	struct btrfs_path *path;
1407 	struct btrfs_key key;
1408 	u64 hole_size = 0;
1409 	u64 aligned;
1410 	int ret;
1411 	int slot = 0;
1412 	u64 last_byte = 0;
1413 	u64 orig_search_start = search_start;
1414 	int start_found;
1415 	struct extent_buffer *l;
1416 	struct btrfs_root * root = orig_root->fs_info->extent_root;
1417 	struct btrfs_fs_info *info = root->fs_info;
1418 	u64 total_needed = num_bytes;
1419 	int level;
1420 	struct btrfs_block_group_cache *block_group;
1421 	int full_scan = 0;
1422 	int wrapped = 0;
1423 	u64 cached_start;
1424 
1425 	WARN_ON(num_bytes < root->sectorsize);
1426 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1427 
1428 	level = btrfs_header_level(root->node);
1429 
1430 	if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1431 		data = BTRFS_BLOCK_GROUP_MIXED;
1432 	}
1433 
1434 	search_end = min(search_end,
1435 			 btrfs_super_total_bytes(&info->super_copy));
1436 	if (hint_byte) {
1437 		block_group = btrfs_lookup_block_group(info, hint_byte);
1438 		if (!block_group)
1439 			hint_byte = search_start;
1440 		block_group = btrfs_find_block_group(root, block_group,
1441 						     hint_byte, data, 1);
1442 	} else {
1443 		block_group = btrfs_find_block_group(root,
1444 						     trans->block_group,
1445 						     search_start, data, 1);
1446 	}
1447 
1448 	total_needed += empty_size;
1449 	path = btrfs_alloc_path();
1450 check_failed:
1451 	if (!block_group) {
1452 		block_group = btrfs_lookup_block_group(info, search_start);
1453 		if (!block_group)
1454 			block_group = btrfs_lookup_block_group(info,
1455 						       orig_search_start);
1456 	}
1457 	search_start = find_search_start(root, &block_group, search_start,
1458 					 total_needed, data);
1459 	search_start = stripe_align(root, search_start);
1460 	cached_start = search_start;
1461 	btrfs_init_path(path);
1462 	ins->objectid = search_start;
1463 	ins->offset = 0;
1464 	start_found = 0;
1465 	path->reada = 2;
1466 
1467 	ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1468 	if (ret < 0)
1469 		goto error;
1470 	ret = find_previous_extent(root, path);
1471 	if (ret < 0)
1472 		goto error;
1473 	l = path->nodes[0];
1474 	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1475 	while (1) {
1476 		l = path->nodes[0];
1477 		slot = path->slots[0];
1478 		if (slot >= btrfs_header_nritems(l)) {
1479 			ret = btrfs_next_leaf(root, path);
1480 			if (ret == 0)
1481 				continue;
1482 			if (ret < 0)
1483 				goto error;
1484 
1485 			search_start = max(search_start,
1486 					   block_group->key.objectid);
1487 			if (!start_found) {
1488 				aligned = stripe_align(root, search_start);
1489 				ins->objectid = aligned;
1490 				if (aligned >= search_end) {
1491 					ret = -ENOSPC;
1492 					goto error;
1493 				}
1494 				ins->offset = search_end - aligned;
1495 				start_found = 1;
1496 				goto check_pending;
1497 			}
1498 			ins->objectid = stripe_align(root,
1499 						     last_byte > search_start ?
1500 						     last_byte : search_start);
1501 			if (search_end <= ins->objectid) {
1502 				ret = -ENOSPC;
1503 				goto error;
1504 			}
1505 			ins->offset = search_end - ins->objectid;
1506 			BUG_ON(ins->objectid >= search_end);
1507 			goto check_pending;
1508 		}
1509 		btrfs_item_key_to_cpu(l, &key, slot);
1510 
1511 		if (key.objectid >= search_start && key.objectid > last_byte &&
1512 		    start_found) {
1513 			if (last_byte < search_start)
1514 				last_byte = search_start;
1515 			aligned = stripe_align(root, last_byte);
1516 			hole_size = key.objectid - aligned;
1517 			if (key.objectid > aligned && hole_size >= num_bytes) {
1518 				ins->objectid = aligned;
1519 				ins->offset = hole_size;
1520 				goto check_pending;
1521 			}
1522 		}
1523 		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1524 			if (!start_found && btrfs_key_type(&key) ==
1525 			    BTRFS_BLOCK_GROUP_ITEM_KEY) {
1526 				last_byte = key.objectid;
1527 				start_found = 1;
1528 			}
1529 			goto next;
1530 		}
1531 
1532 
1533 		start_found = 1;
1534 		last_byte = key.objectid + key.offset;
1535 
1536 		if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1537 		    last_byte >= block_group->key.objectid +
1538 		    block_group->key.offset) {
1539 			btrfs_release_path(root, path);
1540 			search_start = block_group->key.objectid +
1541 				block_group->key.offset;
1542 			goto new_group;
1543 		}
1544 next:
1545 		path->slots[0]++;
1546 		cond_resched();
1547 	}
1548 check_pending:
1549 	/* we have to make sure we didn't find an extent that has already
1550 	 * been allocated by the map tree or the original allocation
1551 	 */
1552 	btrfs_release_path(root, path);
1553 	BUG_ON(ins->objectid < search_start);
1554 
1555 	if (ins->objectid + num_bytes >= search_end)
1556 		goto enospc;
1557 	if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1558 	    ins->objectid + num_bytes > block_group->
1559 	    key.objectid + block_group->key.offset) {
1560 		search_start = block_group->key.objectid +
1561 			block_group->key.offset;
1562 		goto new_group;
1563 	}
1564 	if (test_range_bit(&info->extent_ins, ins->objectid,
1565 			   ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1566 		search_start = ins->objectid + num_bytes;
1567 		goto new_group;
1568 	}
1569 	if (test_range_bit(&info->pinned_extents, ins->objectid,
1570 			   ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1571 		search_start = ins->objectid + num_bytes;
1572 		goto new_group;
1573 	}
1574 	if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1575 	    ins->objectid < exclude_start + exclude_nr)) {
1576 		search_start = exclude_start + exclude_nr;
1577 		goto new_group;
1578 	}
1579 	if (!data) {
1580 		block_group = btrfs_lookup_block_group(info, ins->objectid);
1581 		if (block_group)
1582 			trans->block_group = block_group;
1583 	}
1584 	ins->offset = num_bytes;
1585 	btrfs_free_path(path);
1586 	return 0;
1587 
1588 new_group:
1589 	if (search_start + num_bytes >= search_end) {
1590 enospc:
1591 		search_start = orig_search_start;
1592 		if (full_scan) {
1593 			ret = -ENOSPC;
1594 			goto error;
1595 		}
1596 		if (wrapped) {
1597 			if (!full_scan)
1598 				total_needed -= empty_size;
1599 			full_scan = 1;
1600 			data = BTRFS_BLOCK_GROUP_MIXED;
1601 		} else
1602 			wrapped = 1;
1603 	}
1604 	block_group = btrfs_lookup_block_group(info, search_start);
1605 	cond_resched();
1606 	block_group = btrfs_find_block_group(root, block_group,
1607 					     search_start, data, 0);
1608 	goto check_failed;
1609 
1610 error:
1611 	btrfs_release_path(root, path);
1612 	btrfs_free_path(path);
1613 	return ret;
1614 }
1615 /*
1616  * finds a free extent and does all the dirty work required for allocation
1617  * returns the key for the extent through ins, and a tree buffer for
1618  * the first block of the extent through buf.
1619  *
1620  * returns 0 if everything worked, non-zero otherwise.
1621  */
1622 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1623 		       struct btrfs_root *root,
1624 		       u64 num_bytes, u64 root_objectid, u64 ref_generation,
1625 		       u64 owner, u64 owner_offset,
1626 		       u64 empty_size, u64 hint_byte,
1627 		       u64 search_end, struct btrfs_key *ins, int data)
1628 {
1629 	int ret;
1630 	int pending_ret;
1631 	u64 super_used;
1632 	u64 root_used;
1633 	u64 search_start = 0;
1634 	u64 new_hint;
1635 	struct btrfs_fs_info *info = root->fs_info;
1636 	struct btrfs_root *extent_root = info->extent_root;
1637 	struct btrfs_extent_item extent_item;
1638 	struct btrfs_path *path;
1639 
1640 	btrfs_set_stack_extent_refs(&extent_item, 1);
1641 
1642 	new_hint = max(hint_byte, root->fs_info->alloc_start);
1643 	if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1644 		hint_byte = new_hint;
1645 
1646 	WARN_ON(num_bytes < root->sectorsize);
1647 	ret = find_free_extent(trans, root, num_bytes, empty_size,
1648 			       search_start, search_end, hint_byte, ins,
1649 			       trans->alloc_exclude_start,
1650 			       trans->alloc_exclude_nr, data);
1651 if (ret)
1652 printk("find free extent returns %d\n", ret);
1653 	BUG_ON(ret);
1654 	if (ret)
1655 		return ret;
1656 
1657 	/* block accounting for super block */
1658 	super_used = btrfs_super_bytes_used(&info->super_copy);
1659 	btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1660 
1661 	/* block accounting for root item */
1662 	root_used = btrfs_root_used(&root->root_item);
1663 	btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1664 
1665 	clear_extent_dirty(&root->fs_info->free_space_cache,
1666 			   ins->objectid, ins->objectid + ins->offset - 1,
1667 			   GFP_NOFS);
1668 
1669 	if (root == extent_root) {
1670 		set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1671 				ins->objectid + ins->offset - 1,
1672 				EXTENT_LOCKED, GFP_NOFS);
1673 		WARN_ON(data == 1);
1674 		goto update_block;
1675 	}
1676 
1677 	WARN_ON(trans->alloc_exclude_nr);
1678 	trans->alloc_exclude_start = ins->objectid;
1679 	trans->alloc_exclude_nr = ins->offset;
1680 	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1681 				sizeof(extent_item));
1682 
1683 	trans->alloc_exclude_start = 0;
1684 	trans->alloc_exclude_nr = 0;
1685 	BUG_ON(ret);
1686 
1687 	path = btrfs_alloc_path();
1688 	BUG_ON(!path);
1689 	ret = btrfs_insert_extent_backref(trans, extent_root, path,
1690 					  ins->objectid, root_objectid,
1691 					  ref_generation, owner, owner_offset);
1692 
1693 	BUG_ON(ret);
1694 	btrfs_free_path(path);
1695 	finish_current_insert(trans, extent_root);
1696 	pending_ret = del_pending_extents(trans, extent_root);
1697 
1698 	if (ret) {
1699 		return ret;
1700 	}
1701 	if (pending_ret) {
1702 		return pending_ret;
1703 	}
1704 
1705 update_block:
1706 	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1707 				 data);
1708 	BUG_ON(ret);
1709 	return 0;
1710 }
1711 
1712 /*
1713  * helper function to allocate a block for a given tree
1714  * returns the tree buffer or NULL.
1715  */
1716 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1717 					     struct btrfs_root *root,
1718 					     u32 blocksize,
1719 					     u64 root_objectid, u64 hint,
1720 					     u64 empty_size)
1721 {
1722 	u64 ref_generation;
1723 
1724 	if (root->ref_cows)
1725 		ref_generation = trans->transid;
1726 	else
1727 		ref_generation = 0;
1728 
1729 
1730 	return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1731 					ref_generation, 0, 0, hint, empty_size);
1732 }
1733 
1734 /*
1735  * helper function to allocate a block for a given tree
1736  * returns the tree buffer or NULL.
1737  */
1738 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1739 					     struct btrfs_root *root,
1740 					     u32 blocksize,
1741 					     u64 root_objectid,
1742 					     u64 ref_generation,
1743 					     u64 first_objectid,
1744 					     int level,
1745 					     u64 hint,
1746 					     u64 empty_size)
1747 {
1748 	struct btrfs_key ins;
1749 	int ret;
1750 	struct extent_buffer *buf;
1751 
1752 	ret = btrfs_alloc_extent(trans, root, blocksize,
1753 				 root_objectid, ref_generation,
1754 				 level, first_objectid, empty_size, hint,
1755 				 (u64)-1, &ins, 0);
1756 	if (ret) {
1757 		BUG_ON(ret > 0);
1758 		return ERR_PTR(ret);
1759 	}
1760 	buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1761 	if (!buf) {
1762 		btrfs_free_extent(trans, root, ins.objectid, blocksize,
1763 				  root->root_key.objectid, ref_generation,
1764 				  0, 0, 0);
1765 		return ERR_PTR(-ENOMEM);
1766 	}
1767 	btrfs_set_buffer_uptodate(buf);
1768 	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1769 			 buf->start + buf->len - 1, GFP_NOFS);
1770 	set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1771 			buf->start, buf->start + buf->len - 1,
1772 			EXTENT_CSUM, GFP_NOFS);
1773 	buf->flags |= EXTENT_CSUM;
1774 	btrfs_set_buffer_defrag(buf);
1775 	trans->blocks_used++;
1776 	return buf;
1777 }
1778 
1779 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1780 				  struct btrfs_root *root,
1781 				  struct extent_buffer *leaf)
1782 {
1783 	u64 leaf_owner;
1784 	u64 leaf_generation;
1785 	struct btrfs_key key;
1786 	struct btrfs_file_extent_item *fi;
1787 	int i;
1788 	int nritems;
1789 	int ret;
1790 
1791 	BUG_ON(!btrfs_is_leaf(leaf));
1792 	nritems = btrfs_header_nritems(leaf);
1793 	leaf_owner = btrfs_header_owner(leaf);
1794 	leaf_generation = btrfs_header_generation(leaf);
1795 
1796 	for (i = 0; i < nritems; i++) {
1797 		u64 disk_bytenr;
1798 
1799 		btrfs_item_key_to_cpu(leaf, &key, i);
1800 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1801 			continue;
1802 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1803 		if (btrfs_file_extent_type(leaf, fi) ==
1804 		    BTRFS_FILE_EXTENT_INLINE)
1805 			continue;
1806 		/*
1807 		 * FIXME make sure to insert a trans record that
1808 		 * repeats the snapshot del on crash
1809 		 */
1810 		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1811 		if (disk_bytenr == 0)
1812 			continue;
1813 		ret = btrfs_free_extent(trans, root, disk_bytenr,
1814 				btrfs_file_extent_disk_num_bytes(leaf, fi),
1815 				leaf_owner, leaf_generation,
1816 				key.objectid, key.offset, 0);
1817 		BUG_ON(ret);
1818 	}
1819 	return 0;
1820 }
1821 
1822 static void noinline reada_walk_down(struct btrfs_root *root,
1823 				     struct extent_buffer *node)
1824 {
1825 	int i;
1826 	u32 nritems;
1827 	u64 bytenr;
1828 	int ret;
1829 	u32 refs;
1830 	int level;
1831 	u32 blocksize;
1832 
1833 	nritems = btrfs_header_nritems(node);
1834 	level = btrfs_header_level(node);
1835 	for (i = 0; i < nritems; i++) {
1836 		bytenr = btrfs_node_blockptr(node, i);
1837 		blocksize = btrfs_level_size(root, level - 1);
1838 		ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1839 		BUG_ON(ret);
1840 		if (refs != 1)
1841 			continue;
1842 		mutex_unlock(&root->fs_info->fs_mutex);
1843 		ret = readahead_tree_block(root, bytenr, blocksize);
1844 		cond_resched();
1845 		mutex_lock(&root->fs_info->fs_mutex);
1846 		if (ret)
1847 			break;
1848 	}
1849 }
1850 
1851 /*
1852  * helper function for drop_snapshot, this walks down the tree dropping ref
1853  * counts as it goes.
1854  */
1855 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1856 				   struct btrfs_root *root,
1857 				   struct btrfs_path *path, int *level)
1858 {
1859 	u64 root_owner;
1860 	u64 root_gen;
1861 	u64 bytenr;
1862 	struct extent_buffer *next;
1863 	struct extent_buffer *cur;
1864 	struct extent_buffer *parent;
1865 	u32 blocksize;
1866 	int ret;
1867 	u32 refs;
1868 
1869 	WARN_ON(*level < 0);
1870 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1871 	ret = lookup_extent_ref(trans, root,
1872 				path->nodes[*level]->start,
1873 				path->nodes[*level]->len, &refs);
1874 	BUG_ON(ret);
1875 	if (refs > 1)
1876 		goto out;
1877 
1878 	/*
1879 	 * walk down to the last node level and free all the leaves
1880 	 */
1881 	while(*level >= 0) {
1882 		WARN_ON(*level < 0);
1883 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
1884 		cur = path->nodes[*level];
1885 
1886 		if (*level > 0 && path->slots[*level] == 0)
1887 			reada_walk_down(root, cur);
1888 
1889 		if (btrfs_header_level(cur) != *level)
1890 			WARN_ON(1);
1891 
1892 		if (path->slots[*level] >=
1893 		    btrfs_header_nritems(cur))
1894 			break;
1895 		if (*level == 0) {
1896 			ret = drop_leaf_ref(trans, root, cur);
1897 			BUG_ON(ret);
1898 			break;
1899 		}
1900 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1901 		blocksize = btrfs_level_size(root, *level - 1);
1902 		ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1903 		BUG_ON(ret);
1904 		if (refs != 1) {
1905 			parent = path->nodes[*level];
1906 			root_owner = btrfs_header_owner(parent);
1907 			root_gen = btrfs_header_generation(parent);
1908 			path->slots[*level]++;
1909 			ret = btrfs_free_extent(trans, root, bytenr,
1910 						blocksize, root_owner,
1911 						root_gen, 0, 0, 1);
1912 			BUG_ON(ret);
1913 			continue;
1914 		}
1915 		next = btrfs_find_tree_block(root, bytenr, blocksize);
1916 		if (!next || !btrfs_buffer_uptodate(next)) {
1917 			free_extent_buffer(next);
1918 			mutex_unlock(&root->fs_info->fs_mutex);
1919 			next = read_tree_block(root, bytenr, blocksize);
1920 			mutex_lock(&root->fs_info->fs_mutex);
1921 
1922 			/* we dropped the lock, check one more time */
1923 			ret = lookup_extent_ref(trans, root, bytenr,
1924 						blocksize, &refs);
1925 			BUG_ON(ret);
1926 			if (refs != 1) {
1927 				parent = path->nodes[*level];
1928 				root_owner = btrfs_header_owner(parent);
1929 				root_gen = btrfs_header_generation(parent);
1930 
1931 				path->slots[*level]++;
1932 				free_extent_buffer(next);
1933 				ret = btrfs_free_extent(trans, root, bytenr,
1934 							blocksize,
1935 							root_owner,
1936 							root_gen, 0, 0, 1);
1937 				BUG_ON(ret);
1938 				continue;
1939 			}
1940 		}
1941 		WARN_ON(*level <= 0);
1942 		if (path->nodes[*level-1])
1943 			free_extent_buffer(path->nodes[*level-1]);
1944 		path->nodes[*level-1] = next;
1945 		*level = btrfs_header_level(next);
1946 		path->slots[*level] = 0;
1947 	}
1948 out:
1949 	WARN_ON(*level < 0);
1950 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
1951 
1952 	if (path->nodes[*level] == root->node) {
1953 		root_owner = root->root_key.objectid;
1954 		parent = path->nodes[*level];
1955 	} else {
1956 		parent = path->nodes[*level + 1];
1957 		root_owner = btrfs_header_owner(parent);
1958 	}
1959 
1960 	root_gen = btrfs_header_generation(parent);
1961 	ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1962 				path->nodes[*level]->len,
1963 				root_owner, root_gen, 0, 0, 1);
1964 	free_extent_buffer(path->nodes[*level]);
1965 	path->nodes[*level] = NULL;
1966 	*level += 1;
1967 	BUG_ON(ret);
1968 	return 0;
1969 }
1970 
1971 /*
1972  * helper for dropping snapshots.  This walks back up the tree in the path
1973  * to find the first node higher up where we haven't yet gone through
1974  * all the slots
1975  */
1976 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
1977 				 struct btrfs_root *root,
1978 				 struct btrfs_path *path, int *level)
1979 {
1980 	u64 root_owner;
1981 	u64 root_gen;
1982 	struct btrfs_root_item *root_item = &root->root_item;
1983 	int i;
1984 	int slot;
1985 	int ret;
1986 
1987 	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1988 		slot = path->slots[i];
1989 		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1990 			struct extent_buffer *node;
1991 			struct btrfs_disk_key disk_key;
1992 			node = path->nodes[i];
1993 			path->slots[i]++;
1994 			*level = i;
1995 			WARN_ON(*level == 0);
1996 			btrfs_node_key(node, &disk_key, path->slots[i]);
1997 			memcpy(&root_item->drop_progress,
1998 			       &disk_key, sizeof(disk_key));
1999 			root_item->drop_level = i;
2000 			return 0;
2001 		} else {
2002 			if (path->nodes[*level] == root->node) {
2003 				root_owner = root->root_key.objectid;
2004 				root_gen =
2005 				   btrfs_header_generation(path->nodes[*level]);
2006 			} else {
2007 				struct extent_buffer *node;
2008 				node = path->nodes[*level + 1];
2009 				root_owner = btrfs_header_owner(node);
2010 				root_gen = btrfs_header_generation(node);
2011 			}
2012 			ret = btrfs_free_extent(trans, root,
2013 						path->nodes[*level]->start,
2014 						path->nodes[*level]->len,
2015 						root_owner, root_gen, 0, 0, 1);
2016 			BUG_ON(ret);
2017 			free_extent_buffer(path->nodes[*level]);
2018 			path->nodes[*level] = NULL;
2019 			*level = i + 1;
2020 		}
2021 	}
2022 	return 1;
2023 }
2024 
2025 /*
2026  * drop the reference count on the tree rooted at 'snap'.  This traverses
2027  * the tree freeing any blocks that have a ref count of zero after being
2028  * decremented.
2029  */
2030 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2031 			*root)
2032 {
2033 	int ret = 0;
2034 	int wret;
2035 	int level;
2036 	struct btrfs_path *path;
2037 	int i;
2038 	int orig_level;
2039 	struct btrfs_root_item *root_item = &root->root_item;
2040 
2041 	path = btrfs_alloc_path();
2042 	BUG_ON(!path);
2043 
2044 	level = btrfs_header_level(root->node);
2045 	orig_level = level;
2046 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2047 		path->nodes[level] = root->node;
2048 		extent_buffer_get(root->node);
2049 		path->slots[level] = 0;
2050 	} else {
2051 		struct btrfs_key key;
2052 		struct btrfs_disk_key found_key;
2053 		struct extent_buffer *node;
2054 
2055 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2056 		level = root_item->drop_level;
2057 		path->lowest_level = level;
2058 		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2059 		if (wret < 0) {
2060 			ret = wret;
2061 			goto out;
2062 		}
2063 		node = path->nodes[level];
2064 		btrfs_node_key(node, &found_key, path->slots[level]);
2065 		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2066 			       sizeof(found_key)));
2067 	}
2068 	while(1) {
2069 		wret = walk_down_tree(trans, root, path, &level);
2070 		if (wret > 0)
2071 			break;
2072 		if (wret < 0)
2073 			ret = wret;
2074 
2075 		wret = walk_up_tree(trans, root, path, &level);
2076 		if (wret > 0)
2077 			break;
2078 		if (wret < 0)
2079 			ret = wret;
2080 		ret = -EAGAIN;
2081 		break;
2082 	}
2083 	for (i = 0; i <= orig_level; i++) {
2084 		if (path->nodes[i]) {
2085 			free_extent_buffer(path->nodes[i]);
2086 			path->nodes[i] = NULL;
2087 		}
2088 	}
2089 out:
2090 	btrfs_free_path(path);
2091 	return ret;
2092 }
2093 
2094 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2095 {
2096 	u64 start;
2097 	u64 end;
2098 	u64 ptr;
2099 	int ret;
2100 	while(1) {
2101 		ret = find_first_extent_bit(&info->block_group_cache, 0,
2102 					    &start, &end, (unsigned int)-1);
2103 		if (ret)
2104 			break;
2105 		ret = get_state_private(&info->block_group_cache, start, &ptr);
2106 		if (!ret)
2107 			kfree((void *)(unsigned long)ptr);
2108 		clear_extent_bits(&info->block_group_cache, start,
2109 				  end, (unsigned int)-1, GFP_NOFS);
2110 	}
2111 	while(1) {
2112 		ret = find_first_extent_bit(&info->free_space_cache, 0,
2113 					    &start, &end, EXTENT_DIRTY);
2114 		if (ret)
2115 			break;
2116 		clear_extent_dirty(&info->free_space_cache, start,
2117 				   end, GFP_NOFS);
2118 	}
2119 	return 0;
2120 }
2121 
2122 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2123 					 u64 len)
2124 {
2125 	u64 page_start;
2126 	u64 page_end;
2127 	u64 delalloc_start;
2128 	u64 existing_delalloc;
2129 	unsigned long last_index;
2130 	unsigned long i;
2131 	struct page *page;
2132 	struct btrfs_root *root = BTRFS_I(inode)->root;
2133 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2134 	struct file_ra_state *ra;
2135 
2136 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
2137 
2138 	mutex_lock(&inode->i_mutex);
2139 	i = start >> PAGE_CACHE_SHIFT;
2140 	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2141 
2142 	file_ra_state_init(ra, inode->i_mapping);
2143 	btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2144 	kfree(ra);
2145 
2146 	for (; i <= last_index; i++) {
2147 		page = grab_cache_page(inode->i_mapping, i);
2148 		if (!page)
2149 			goto out_unlock;
2150 		if (!PageUptodate(page)) {
2151 			btrfs_readpage(NULL, page);
2152 			lock_page(page);
2153 			if (!PageUptodate(page)) {
2154 				unlock_page(page);
2155 				page_cache_release(page);
2156 				goto out_unlock;
2157 			}
2158 		}
2159 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2160 		page_end = page_start + PAGE_CACHE_SIZE - 1;
2161 
2162 		lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2163 
2164 		delalloc_start = page_start;
2165 		existing_delalloc =
2166 			count_range_bits(&BTRFS_I(inode)->extent_tree,
2167 					 &delalloc_start, page_end,
2168 					 PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2169 
2170 		set_extent_delalloc(em_tree, page_start,
2171 				    page_end, GFP_NOFS);
2172 
2173 		spin_lock(&root->fs_info->delalloc_lock);
2174 		root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2175 						 existing_delalloc;
2176 		spin_unlock(&root->fs_info->delalloc_lock);
2177 
2178 		unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2179 		set_page_dirty(page);
2180 		unlock_page(page);
2181 		page_cache_release(page);
2182 	}
2183 
2184 out_unlock:
2185 	mutex_unlock(&inode->i_mutex);
2186 	return 0;
2187 }
2188 
2189 /*
2190  * note, this releases the path
2191  */
2192 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2193 				  struct btrfs_path *path,
2194 				  struct btrfs_key *extent_key)
2195 {
2196 	struct inode *inode;
2197 	struct btrfs_root *found_root;
2198 	struct btrfs_key *root_location;
2199 	struct btrfs_extent_ref *ref;
2200 	u64 ref_root;
2201 	u64 ref_gen;
2202 	u64 ref_objectid;
2203 	u64 ref_offset;
2204 	int ret;
2205 
2206 	ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2207 			     struct btrfs_extent_ref);
2208 	ref_root = btrfs_ref_root(path->nodes[0], ref);
2209 	ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2210 	ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2211 	ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2212 	btrfs_release_path(extent_root, path);
2213 
2214 	root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2215 	root_location->objectid = ref_root;
2216 	if (ref_gen == 0)
2217 		root_location->offset = 0;
2218 	else
2219 		root_location->offset = (u64)-1;
2220 	root_location->type = BTRFS_ROOT_ITEM_KEY;
2221 
2222 	found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2223 						root_location);
2224 	BUG_ON(!found_root);
2225 	kfree(root_location);
2226 
2227 	if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2228 		mutex_unlock(&extent_root->fs_info->fs_mutex);
2229 		inode = btrfs_iget_locked(extent_root->fs_info->sb,
2230 					  ref_objectid, found_root);
2231 		if (inode->i_state & I_NEW) {
2232 			/* the inode and parent dir are two different roots */
2233 			BTRFS_I(inode)->root = found_root;
2234 			BTRFS_I(inode)->location.objectid = ref_objectid;
2235 			BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2236 			BTRFS_I(inode)->location.offset = 0;
2237 			btrfs_read_locked_inode(inode);
2238 			unlock_new_inode(inode);
2239 
2240 		}
2241 		/* this can happen if the reference is not against
2242 		 * the latest version of the tree root
2243 		 */
2244 		if (is_bad_inode(inode)) {
2245 			mutex_lock(&extent_root->fs_info->fs_mutex);
2246 			goto out;
2247 		}
2248 		relocate_inode_pages(inode, ref_offset, extent_key->offset);
2249 		/* FIXME, data=ordered will help get rid of this */
2250 		filemap_fdatawrite(inode->i_mapping);
2251 		iput(inode);
2252 		mutex_lock(&extent_root->fs_info->fs_mutex);
2253 	} else {
2254 		struct btrfs_trans_handle *trans;
2255 		struct btrfs_key found_key;
2256 		struct extent_buffer *eb;
2257 		int level;
2258 		int i;
2259 
2260 		trans = btrfs_start_transaction(found_root, 1);
2261 		eb = read_tree_block(found_root, extent_key->objectid,
2262 				     extent_key->offset);
2263 		level = btrfs_header_level(eb);
2264 
2265 		if (level == 0)
2266 			btrfs_item_key_to_cpu(eb, &found_key, 0);
2267 		else
2268 			btrfs_node_key_to_cpu(eb, &found_key, 0);
2269 
2270 		free_extent_buffer(eb);
2271 
2272 		path->lowest_level = level;
2273 		path->reada = 2;
2274 		ret = btrfs_search_slot(trans, found_root, &found_key, path,
2275 					0, 1);
2276 		path->lowest_level = 0;
2277 		for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2278 			if (!path->nodes[i])
2279 				break;
2280 			free_extent_buffer(path->nodes[i]);
2281 			path->nodes[i] = NULL;
2282 		}
2283 		btrfs_release_path(found_root, path);
2284 		btrfs_end_transaction(trans, found_root);
2285 	}
2286 
2287 out:
2288 	return 0;
2289 }
2290 
2291 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2292 					struct btrfs_path *path,
2293 					struct btrfs_key *extent_key)
2294 {
2295 	struct btrfs_key key;
2296 	struct btrfs_key found_key;
2297 	struct extent_buffer *leaf;
2298 	u32 nritems;
2299 	u32 item_size;
2300 	int ret = 0;
2301 
2302 	key.objectid = extent_key->objectid;
2303 	key.type = BTRFS_EXTENT_REF_KEY;
2304 	key.offset = 0;
2305 
2306 	while(1) {
2307 		ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2308 
2309 		if (ret < 0)
2310 			goto out;
2311 
2312 		ret = 0;
2313 		leaf = path->nodes[0];
2314 		nritems = btrfs_header_nritems(leaf);
2315 		if (path->slots[0] == nritems)
2316 			goto out;
2317 
2318 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2319 		if (found_key.objectid != extent_key->objectid)
2320 			break;
2321 
2322 		if (found_key.type != BTRFS_EXTENT_REF_KEY)
2323 			break;
2324 
2325 		key.offset = found_key.offset + 1;
2326 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2327 
2328 		ret = relocate_one_reference(extent_root, path, extent_key);
2329 		if (ret)
2330 			goto out;
2331 	}
2332 	ret = 0;
2333 out:
2334 	btrfs_release_path(extent_root, path);
2335 	return ret;
2336 }
2337 
2338 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2339 {
2340 	struct btrfs_trans_handle *trans;
2341 	struct btrfs_root *tree_root = root->fs_info->tree_root;
2342 	struct btrfs_path *path;
2343 	u64 cur_byte;
2344 	u64 total_found;
2345 	struct btrfs_fs_info *info = root->fs_info;
2346 	struct extent_map_tree *block_group_cache;
2347 	struct btrfs_key key;
2348 	struct btrfs_key found_key;
2349 	struct extent_buffer *leaf;
2350 	u32 nritems;
2351 	int ret;
2352 	int progress = 0;
2353 
2354 	btrfs_set_super_total_bytes(&info->super_copy, new_size);
2355 	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2356 			   GFP_NOFS);
2357 	block_group_cache = &info->block_group_cache;
2358 	path = btrfs_alloc_path();
2359 	root = root->fs_info->extent_root;
2360 	path->reada = 2;
2361 
2362 again:
2363 	total_found = 0;
2364 	key.objectid = new_size;
2365 	key.offset = 0;
2366 	key.type = 0;
2367 	cur_byte = key.objectid;
2368 
2369 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2370 	if (ret < 0)
2371 		goto out;
2372 
2373 	ret = find_previous_extent(root, path);
2374 	if (ret < 0)
2375 		goto out;
2376 	if (ret == 0) {
2377 		leaf = path->nodes[0];
2378 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2379 		if (found_key.objectid + found_key.offset > new_size) {
2380 			cur_byte = found_key.objectid;
2381 			key.objectid = cur_byte;
2382 		}
2383 	}
2384 	btrfs_release_path(root, path);
2385 
2386 	while(1) {
2387 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2388 		if (ret < 0)
2389 			goto out;
2390 
2391 		leaf = path->nodes[0];
2392 		nritems = btrfs_header_nritems(leaf);
2393 next:
2394 		if (path->slots[0] >= nritems) {
2395 			ret = btrfs_next_leaf(root, path);
2396 			if (ret < 0)
2397 				goto out;
2398 			if (ret == 1) {
2399 				ret = 0;
2400 				break;
2401 			}
2402 			leaf = path->nodes[0];
2403 			nritems = btrfs_header_nritems(leaf);
2404 		}
2405 
2406 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2407 
2408 		if (progress && need_resched()) {
2409 			memcpy(&key, &found_key, sizeof(key));
2410 			mutex_unlock(&root->fs_info->fs_mutex);
2411 			cond_resched();
2412 			mutex_lock(&root->fs_info->fs_mutex);
2413 			btrfs_release_path(root, path);
2414 			btrfs_search_slot(NULL, root, &key, path, 0, 0);
2415 			progress = 0;
2416 			goto next;
2417 		}
2418 		progress = 1;
2419 
2420 		if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2421 		    found_key.objectid + found_key.offset <= cur_byte) {
2422 			path->slots[0]++;
2423 			goto next;
2424 		}
2425 
2426 		total_found++;
2427 		cur_byte = found_key.objectid + found_key.offset;
2428 		key.objectid = cur_byte;
2429 		btrfs_release_path(root, path);
2430 		ret = relocate_one_extent(root, path, &found_key);
2431 	}
2432 
2433 	btrfs_release_path(root, path);
2434 
2435 	if (total_found > 0) {
2436 		trans = btrfs_start_transaction(tree_root, 1);
2437 		btrfs_commit_transaction(trans, tree_root);
2438 
2439 		mutex_unlock(&root->fs_info->fs_mutex);
2440 		btrfs_clean_old_snapshots(tree_root);
2441 		mutex_lock(&root->fs_info->fs_mutex);
2442 
2443 		trans = btrfs_start_transaction(tree_root, 1);
2444 		btrfs_commit_transaction(trans, tree_root);
2445 		goto again;
2446 	}
2447 
2448 	trans = btrfs_start_transaction(root, 1);
2449 	key.objectid = new_size;
2450 	key.offset = 0;
2451 	key.type = 0;
2452 	while(1) {
2453 		u64 ptr;
2454 
2455 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2456 		if (ret < 0)
2457 			goto out;
2458 
2459 		leaf = path->nodes[0];
2460 		nritems = btrfs_header_nritems(leaf);
2461 bg_next:
2462 		if (path->slots[0] >= nritems) {
2463 			ret = btrfs_next_leaf(root, path);
2464 			if (ret < 0)
2465 				break;
2466 			if (ret == 1) {
2467 				ret = 0;
2468 				break;
2469 			}
2470 			leaf = path->nodes[0];
2471 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2472 
2473 			/*
2474 			 * btrfs_next_leaf doesn't cow buffers, we have to
2475 			 * do the search again
2476 			 */
2477 			memcpy(&key, &found_key, sizeof(key));
2478 			btrfs_release_path(root, path);
2479 			goto resched_check;
2480 		}
2481 
2482 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2483 		if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2484 			printk("shrinker found key %Lu %u %Lu\n",
2485 				found_key.objectid, found_key.type,
2486 				found_key.offset);
2487 			path->slots[0]++;
2488 			goto bg_next;
2489 		}
2490 		ret = get_state_private(&info->block_group_cache,
2491 					found_key.objectid, &ptr);
2492 		if (!ret)
2493 			kfree((void *)(unsigned long)ptr);
2494 
2495 		clear_extent_bits(&info->block_group_cache, found_key.objectid,
2496 				  found_key.objectid + found_key.offset - 1,
2497 				  (unsigned int)-1, GFP_NOFS);
2498 
2499 		key.objectid = found_key.objectid + 1;
2500 		btrfs_del_item(trans, root, path);
2501 		btrfs_release_path(root, path);
2502 resched_check:
2503 		if (need_resched()) {
2504 			mutex_unlock(&root->fs_info->fs_mutex);
2505 			cond_resched();
2506 			mutex_lock(&root->fs_info->fs_mutex);
2507 		}
2508 	}
2509 	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2510 			   GFP_NOFS);
2511 	btrfs_commit_transaction(trans, root);
2512 out:
2513 	btrfs_free_path(path);
2514 	return ret;
2515 }
2516 
2517 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2518 			   struct btrfs_root *root, u64 new_size)
2519 {
2520 	struct btrfs_path *path;
2521 	u64 nr = 0;
2522 	u64 cur_byte;
2523 	u64 old_size;
2524 	unsigned long rem;
2525 	struct btrfs_block_group_cache *cache;
2526 	struct btrfs_block_group_item *item;
2527 	struct btrfs_fs_info *info = root->fs_info;
2528 	struct extent_map_tree *block_group_cache;
2529 	struct btrfs_key key;
2530 	struct extent_buffer *leaf;
2531 	int ret;
2532 	int bit;
2533 
2534 	old_size = btrfs_super_total_bytes(&info->super_copy);
2535 	block_group_cache = &info->block_group_cache;
2536 
2537 	root = info->extent_root;
2538 
2539 	cache = btrfs_lookup_block_group(root->fs_info, old_size - 1);
2540 
2541 	cur_byte = cache->key.objectid + cache->key.offset;
2542 	if (cur_byte >= new_size)
2543 		goto set_size;
2544 
2545 	key.offset = BTRFS_BLOCK_GROUP_SIZE;
2546 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2547 
2548 	path = btrfs_alloc_path();
2549 	if (!path)
2550 		return -ENOMEM;
2551 
2552 	while(cur_byte < new_size) {
2553 		key.objectid = cur_byte;
2554 		ret = btrfs_insert_empty_item(trans, root, path, &key,
2555 				        sizeof(struct btrfs_block_group_item));
2556 		BUG_ON(ret);
2557 		leaf = path->nodes[0];
2558 		item = btrfs_item_ptr(leaf, path->slots[0],
2559 				      struct btrfs_block_group_item);
2560 
2561 		btrfs_set_disk_block_group_used(leaf, item, 0);
2562 		div_long_long_rem(nr, 3, &rem);
2563 		if (rem) {
2564 			btrfs_set_disk_block_group_flags(leaf, item,
2565 						 BTRFS_BLOCK_GROUP_DATA);
2566 		} else {
2567 			btrfs_set_disk_block_group_flags(leaf, item, 0);
2568 		}
2569 		nr++;
2570 
2571 		cache = kmalloc(sizeof(*cache), GFP_NOFS);
2572 		BUG_ON(!cache);
2573 
2574 		read_extent_buffer(leaf, &cache->item, (unsigned long)item,
2575 				   sizeof(cache->item));
2576 
2577 		memcpy(&cache->key, &key, sizeof(key));
2578 		cache->cached = 0;
2579 		cache->pinned = 0;
2580 		cur_byte = key.objectid + key.offset;
2581 		btrfs_release_path(root, path);
2582 
2583 		if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2584 			bit = BLOCK_GROUP_DATA;
2585 			cache->data = BTRFS_BLOCK_GROUP_DATA;
2586 		} else {
2587 			bit = BLOCK_GROUP_METADATA;
2588 			cache->data = 0;
2589 		}
2590 
2591 		/* use EXTENT_LOCKED to prevent merging */
2592 		set_extent_bits(block_group_cache, key.objectid,
2593 				key.objectid + key.offset - 1,
2594 				bit | EXTENT_LOCKED, GFP_NOFS);
2595 		set_state_private(block_group_cache, key.objectid,
2596 				  (unsigned long)cache);
2597 	}
2598 	btrfs_free_path(path);
2599 set_size:
2600 	btrfs_set_super_total_bytes(&info->super_copy, new_size);
2601 	return 0;
2602 }
2603 
2604 int btrfs_read_block_groups(struct btrfs_root *root)
2605 {
2606 	struct btrfs_path *path;
2607 	int ret;
2608 	int err = 0;
2609 	int bit;
2610 	struct btrfs_block_group_cache *cache;
2611 	struct btrfs_fs_info *info = root->fs_info;
2612 	struct extent_map_tree *block_group_cache;
2613 	struct btrfs_key key;
2614 	struct btrfs_key found_key;
2615 	struct extent_buffer *leaf;
2616 
2617 	block_group_cache = &info->block_group_cache;
2618 
2619 	root = info->extent_root;
2620 	key.objectid = 0;
2621 	key.offset = BTRFS_BLOCK_GROUP_SIZE;
2622 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2623 
2624 	path = btrfs_alloc_path();
2625 	if (!path)
2626 		return -ENOMEM;
2627 
2628 	while(1) {
2629 		ret = btrfs_search_slot(NULL, info->extent_root,
2630 					&key, path, 0, 0);
2631 		if (ret != 0) {
2632 			err = ret;
2633 			break;
2634 		}
2635 		leaf = path->nodes[0];
2636 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2637 		cache = kmalloc(sizeof(*cache), GFP_NOFS);
2638 		if (!cache) {
2639 			err = -1;
2640 			break;
2641 		}
2642 
2643 		read_extent_buffer(leaf, &cache->item,
2644 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
2645 				   sizeof(cache->item));
2646 		memcpy(&cache->key, &found_key, sizeof(found_key));
2647 		cache->cached = 0;
2648 		cache->pinned = 0;
2649 		key.objectid = found_key.objectid + found_key.offset;
2650 		btrfs_release_path(root, path);
2651 
2652 		if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
2653 			bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
2654 			cache->data = BTRFS_BLOCK_GROUP_MIXED;
2655 		} else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
2656 			bit = BLOCK_GROUP_DATA;
2657 			cache->data = BTRFS_BLOCK_GROUP_DATA;
2658 		} else {
2659 			bit = BLOCK_GROUP_METADATA;
2660 			cache->data = 0;
2661 		}
2662 
2663 		/* use EXTENT_LOCKED to prevent merging */
2664 		set_extent_bits(block_group_cache, found_key.objectid,
2665 				found_key.objectid + found_key.offset - 1,
2666 				bit | EXTENT_LOCKED, GFP_NOFS);
2667 		set_state_private(block_group_cache, found_key.objectid,
2668 				  (unsigned long)cache);
2669 
2670 		if (key.objectid >=
2671 		    btrfs_super_total_bytes(&info->super_copy))
2672 			break;
2673 	}
2674 
2675 	btrfs_free_path(path);
2676 	return 0;
2677 }
2678