xref: /openbmc/linux/fs/btrfs/free-space-tree.c (revision 7939dd9f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "ctree.h"
9 #include "disk-io.h"
10 #include "locking.h"
11 #include "free-space-tree.h"
12 #include "transaction.h"
13 #include "block-group.h"
14 
15 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
16 					struct btrfs_block_group *block_group,
17 					struct btrfs_path *path);
18 
19 static struct btrfs_root *btrfs_free_space_root(
20 				struct btrfs_block_group *block_group)
21 {
22 	return block_group->fs_info->_free_space_root;
23 }
24 
25 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
26 {
27 	u32 bitmap_range;
28 	size_t bitmap_size;
29 	u64 num_bitmaps, total_bitmap_size;
30 
31 	if (WARN_ON(cache->length == 0))
32 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
33 			   cache->start);
34 
35 	/*
36 	 * We convert to bitmaps when the disk space required for using extents
37 	 * exceeds that required for using bitmaps.
38 	 */
39 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
40 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
41 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
42 	total_bitmap_size = num_bitmaps * bitmap_size;
43 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
44 					    sizeof(struct btrfs_item));
45 
46 	/*
47 	 * We allow for a small buffer between the high threshold and low
48 	 * threshold to avoid thrashing back and forth between the two formats.
49 	 */
50 	if (cache->bitmap_high_thresh > 100)
51 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
52 	else
53 		cache->bitmap_low_thresh = 0;
54 }
55 
56 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
57 				   struct btrfs_block_group *block_group,
58 				   struct btrfs_path *path)
59 {
60 	struct btrfs_root *root = btrfs_free_space_root(block_group);
61 	struct btrfs_free_space_info *info;
62 	struct btrfs_key key;
63 	struct extent_buffer *leaf;
64 	int ret;
65 
66 	key.objectid = block_group->start;
67 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
68 	key.offset = block_group->length;
69 
70 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
71 	if (ret)
72 		goto out;
73 
74 	leaf = path->nodes[0];
75 	info = btrfs_item_ptr(leaf, path->slots[0],
76 			      struct btrfs_free_space_info);
77 	btrfs_set_free_space_extent_count(leaf, info, 0);
78 	btrfs_set_free_space_flags(leaf, info, 0);
79 	btrfs_mark_buffer_dirty(leaf);
80 
81 	ret = 0;
82 out:
83 	btrfs_release_path(path);
84 	return ret;
85 }
86 
87 EXPORT_FOR_TESTS
88 struct btrfs_free_space_info *search_free_space_info(
89 		struct btrfs_trans_handle *trans,
90 		struct btrfs_block_group *block_group,
91 		struct btrfs_path *path, int cow)
92 {
93 	struct btrfs_fs_info *fs_info = block_group->fs_info;
94 	struct btrfs_root *root = btrfs_free_space_root(block_group);
95 	struct btrfs_key key;
96 	int ret;
97 
98 	key.objectid = block_group->start;
99 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
100 	key.offset = block_group->length;
101 
102 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
103 	if (ret < 0)
104 		return ERR_PTR(ret);
105 	if (ret != 0) {
106 		btrfs_warn(fs_info, "missing free space info for %llu",
107 			   block_group->start);
108 		ASSERT(0);
109 		return ERR_PTR(-ENOENT);
110 	}
111 
112 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
113 			      struct btrfs_free_space_info);
114 }
115 
116 /*
117  * btrfs_search_slot() but we're looking for the greatest key less than the
118  * passed key.
119  */
120 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
121 				  struct btrfs_root *root,
122 				  struct btrfs_key *key, struct btrfs_path *p,
123 				  int ins_len, int cow)
124 {
125 	int ret;
126 
127 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
128 	if (ret < 0)
129 		return ret;
130 
131 	if (ret == 0) {
132 		ASSERT(0);
133 		return -EIO;
134 	}
135 
136 	if (p->slots[0] == 0) {
137 		ASSERT(0);
138 		return -EIO;
139 	}
140 	p->slots[0]--;
141 
142 	return 0;
143 }
144 
145 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
146 					 u64 size)
147 {
148 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
149 }
150 
151 static unsigned long *alloc_bitmap(u32 bitmap_size)
152 {
153 	unsigned long *ret;
154 	unsigned int nofs_flag;
155 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
156 
157 	/*
158 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
159 	 * into the filesystem as the free space bitmap can be modified in the
160 	 * critical section of a transaction commit.
161 	 *
162 	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
163 	 * know that recursion is unsafe.
164 	 */
165 	nofs_flag = memalloc_nofs_save();
166 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
167 	memalloc_nofs_restore(nofs_flag);
168 	return ret;
169 }
170 
171 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
172 {
173 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
174 	const unsigned int size = start + len;
175 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
176 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
177 
178 	while (len - bits_to_set >= 0) {
179 		*p |= mask_to_set;
180 		len -= bits_to_set;
181 		bits_to_set = BITS_PER_BYTE;
182 		mask_to_set = ~0;
183 		p++;
184 	}
185 	if (len) {
186 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
187 		*p |= mask_to_set;
188 	}
189 }
190 
191 EXPORT_FOR_TESTS
192 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
193 				  struct btrfs_block_group *block_group,
194 				  struct btrfs_path *path)
195 {
196 	struct btrfs_fs_info *fs_info = trans->fs_info;
197 	struct btrfs_root *root = btrfs_free_space_root(block_group);
198 	struct btrfs_free_space_info *info;
199 	struct btrfs_key key, found_key;
200 	struct extent_buffer *leaf;
201 	unsigned long *bitmap;
202 	char *bitmap_cursor;
203 	u64 start, end;
204 	u64 bitmap_range, i;
205 	u32 bitmap_size, flags, expected_extent_count;
206 	u32 extent_count = 0;
207 	int done = 0, nr;
208 	int ret;
209 
210 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
211 	bitmap = alloc_bitmap(bitmap_size);
212 	if (!bitmap) {
213 		ret = -ENOMEM;
214 		goto out;
215 	}
216 
217 	start = block_group->start;
218 	end = block_group->start + block_group->length;
219 
220 	key.objectid = end - 1;
221 	key.type = (u8)-1;
222 	key.offset = (u64)-1;
223 
224 	while (!done) {
225 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
226 		if (ret)
227 			goto out;
228 
229 		leaf = path->nodes[0];
230 		nr = 0;
231 		path->slots[0]++;
232 		while (path->slots[0] > 0) {
233 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
234 
235 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
236 				ASSERT(found_key.objectid == block_group->start);
237 				ASSERT(found_key.offset == block_group->length);
238 				done = 1;
239 				break;
240 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
241 				u64 first, last;
242 
243 				ASSERT(found_key.objectid >= start);
244 				ASSERT(found_key.objectid < end);
245 				ASSERT(found_key.objectid + found_key.offset <= end);
246 
247 				first = div_u64(found_key.objectid - start,
248 						fs_info->sectorsize);
249 				last = div_u64(found_key.objectid + found_key.offset - start,
250 					       fs_info->sectorsize);
251 				le_bitmap_set(bitmap, first, last - first);
252 
253 				extent_count++;
254 				nr++;
255 				path->slots[0]--;
256 			} else {
257 				ASSERT(0);
258 			}
259 		}
260 
261 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
262 		if (ret)
263 			goto out;
264 		btrfs_release_path(path);
265 	}
266 
267 	info = search_free_space_info(trans, block_group, path, 1);
268 	if (IS_ERR(info)) {
269 		ret = PTR_ERR(info);
270 		goto out;
271 	}
272 	leaf = path->nodes[0];
273 	flags = btrfs_free_space_flags(leaf, info);
274 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
275 	btrfs_set_free_space_flags(leaf, info, flags);
276 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
277 	btrfs_mark_buffer_dirty(leaf);
278 	btrfs_release_path(path);
279 
280 	if (extent_count != expected_extent_count) {
281 		btrfs_err(fs_info,
282 			  "incorrect extent count for %llu; counted %u, expected %u",
283 			  block_group->start, extent_count,
284 			  expected_extent_count);
285 		ASSERT(0);
286 		ret = -EIO;
287 		goto out;
288 	}
289 
290 	bitmap_cursor = (char *)bitmap;
291 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
292 	i = start;
293 	while (i < end) {
294 		unsigned long ptr;
295 		u64 extent_size;
296 		u32 data_size;
297 
298 		extent_size = min(end - i, bitmap_range);
299 		data_size = free_space_bitmap_size(fs_info, extent_size);
300 
301 		key.objectid = i;
302 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
303 		key.offset = extent_size;
304 
305 		ret = btrfs_insert_empty_item(trans, root, path, &key,
306 					      data_size);
307 		if (ret)
308 			goto out;
309 
310 		leaf = path->nodes[0];
311 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
312 		write_extent_buffer(leaf, bitmap_cursor, ptr,
313 				    data_size);
314 		btrfs_mark_buffer_dirty(leaf);
315 		btrfs_release_path(path);
316 
317 		i += extent_size;
318 		bitmap_cursor += data_size;
319 	}
320 
321 	ret = 0;
322 out:
323 	kvfree(bitmap);
324 	if (ret)
325 		btrfs_abort_transaction(trans, ret);
326 	return ret;
327 }
328 
329 EXPORT_FOR_TESTS
330 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
331 				  struct btrfs_block_group *block_group,
332 				  struct btrfs_path *path)
333 {
334 	struct btrfs_fs_info *fs_info = trans->fs_info;
335 	struct btrfs_root *root = btrfs_free_space_root(block_group);
336 	struct btrfs_free_space_info *info;
337 	struct btrfs_key key, found_key;
338 	struct extent_buffer *leaf;
339 	unsigned long *bitmap;
340 	u64 start, end;
341 	u32 bitmap_size, flags, expected_extent_count;
342 	unsigned long nrbits, start_bit, end_bit;
343 	u32 extent_count = 0;
344 	int done = 0, nr;
345 	int ret;
346 
347 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
348 	bitmap = alloc_bitmap(bitmap_size);
349 	if (!bitmap) {
350 		ret = -ENOMEM;
351 		goto out;
352 	}
353 
354 	start = block_group->start;
355 	end = block_group->start + block_group->length;
356 
357 	key.objectid = end - 1;
358 	key.type = (u8)-1;
359 	key.offset = (u64)-1;
360 
361 	while (!done) {
362 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
363 		if (ret)
364 			goto out;
365 
366 		leaf = path->nodes[0];
367 		nr = 0;
368 		path->slots[0]++;
369 		while (path->slots[0] > 0) {
370 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
371 
372 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
373 				ASSERT(found_key.objectid == block_group->start);
374 				ASSERT(found_key.offset == block_group->length);
375 				done = 1;
376 				break;
377 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
378 				unsigned long ptr;
379 				char *bitmap_cursor;
380 				u32 bitmap_pos, data_size;
381 
382 				ASSERT(found_key.objectid >= start);
383 				ASSERT(found_key.objectid < end);
384 				ASSERT(found_key.objectid + found_key.offset <= end);
385 
386 				bitmap_pos = div_u64(found_key.objectid - start,
387 						     fs_info->sectorsize *
388 						     BITS_PER_BYTE);
389 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
390 				data_size = free_space_bitmap_size(fs_info,
391 								found_key.offset);
392 
393 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
394 				read_extent_buffer(leaf, bitmap_cursor, ptr,
395 						   data_size);
396 
397 				nr++;
398 				path->slots[0]--;
399 			} else {
400 				ASSERT(0);
401 			}
402 		}
403 
404 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
405 		if (ret)
406 			goto out;
407 		btrfs_release_path(path);
408 	}
409 
410 	info = search_free_space_info(trans, block_group, path, 1);
411 	if (IS_ERR(info)) {
412 		ret = PTR_ERR(info);
413 		goto out;
414 	}
415 	leaf = path->nodes[0];
416 	flags = btrfs_free_space_flags(leaf, info);
417 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
418 	btrfs_set_free_space_flags(leaf, info, flags);
419 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
420 	btrfs_mark_buffer_dirty(leaf);
421 	btrfs_release_path(path);
422 
423 	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
424 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
425 
426 	while (start_bit < nrbits) {
427 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
428 		ASSERT(start_bit < end_bit);
429 
430 		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
431 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
432 		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
433 
434 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
435 		if (ret)
436 			goto out;
437 		btrfs_release_path(path);
438 
439 		extent_count++;
440 
441 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
442 	}
443 
444 	if (extent_count != expected_extent_count) {
445 		btrfs_err(fs_info,
446 			  "incorrect extent count for %llu; counted %u, expected %u",
447 			  block_group->start, extent_count,
448 			  expected_extent_count);
449 		ASSERT(0);
450 		ret = -EIO;
451 		goto out;
452 	}
453 
454 	ret = 0;
455 out:
456 	kvfree(bitmap);
457 	if (ret)
458 		btrfs_abort_transaction(trans, ret);
459 	return ret;
460 }
461 
462 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
463 					  struct btrfs_block_group *block_group,
464 					  struct btrfs_path *path,
465 					  int new_extents)
466 {
467 	struct btrfs_free_space_info *info;
468 	u32 flags;
469 	u32 extent_count;
470 	int ret = 0;
471 
472 	if (new_extents == 0)
473 		return 0;
474 
475 	info = search_free_space_info(trans, block_group, path, 1);
476 	if (IS_ERR(info)) {
477 		ret = PTR_ERR(info);
478 		goto out;
479 	}
480 	flags = btrfs_free_space_flags(path->nodes[0], info);
481 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
482 
483 	extent_count += new_extents;
484 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
485 	btrfs_mark_buffer_dirty(path->nodes[0]);
486 	btrfs_release_path(path);
487 
488 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
489 	    extent_count > block_group->bitmap_high_thresh) {
490 		ret = convert_free_space_to_bitmaps(trans, block_group, path);
491 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
492 		   extent_count < block_group->bitmap_low_thresh) {
493 		ret = convert_free_space_to_extents(trans, block_group, path);
494 	}
495 
496 out:
497 	return ret;
498 }
499 
500 EXPORT_FOR_TESTS
501 int free_space_test_bit(struct btrfs_block_group *block_group,
502 			struct btrfs_path *path, u64 offset)
503 {
504 	struct extent_buffer *leaf;
505 	struct btrfs_key key;
506 	u64 found_start, found_end;
507 	unsigned long ptr, i;
508 
509 	leaf = path->nodes[0];
510 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
511 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
512 
513 	found_start = key.objectid;
514 	found_end = key.objectid + key.offset;
515 	ASSERT(offset >= found_start && offset < found_end);
516 
517 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
518 	i = div_u64(offset - found_start,
519 		    block_group->fs_info->sectorsize);
520 	return !!extent_buffer_test_bit(leaf, ptr, i);
521 }
522 
523 static void free_space_set_bits(struct btrfs_block_group *block_group,
524 				struct btrfs_path *path, u64 *start, u64 *size,
525 				int bit)
526 {
527 	struct btrfs_fs_info *fs_info = block_group->fs_info;
528 	struct extent_buffer *leaf;
529 	struct btrfs_key key;
530 	u64 end = *start + *size;
531 	u64 found_start, found_end;
532 	unsigned long ptr, first, last;
533 
534 	leaf = path->nodes[0];
535 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
536 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
537 
538 	found_start = key.objectid;
539 	found_end = key.objectid + key.offset;
540 	ASSERT(*start >= found_start && *start < found_end);
541 	ASSERT(end > found_start);
542 
543 	if (end > found_end)
544 		end = found_end;
545 
546 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
547 	first = (*start - found_start) >> fs_info->sectorsize_bits;
548 	last = (end - found_start) >> fs_info->sectorsize_bits;
549 	if (bit)
550 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
551 	else
552 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
553 	btrfs_mark_buffer_dirty(leaf);
554 
555 	*size -= end - *start;
556 	*start = end;
557 }
558 
559 /*
560  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
561  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
562  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
563  * looking for.
564  */
565 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
566 				  struct btrfs_root *root, struct btrfs_path *p)
567 {
568 	struct btrfs_key key;
569 
570 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
571 		p->slots[0]++;
572 		return 0;
573 	}
574 
575 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
576 	btrfs_release_path(p);
577 
578 	key.objectid += key.offset;
579 	key.type = (u8)-1;
580 	key.offset = (u64)-1;
581 
582 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
583 }
584 
585 /*
586  * If remove is 1, then we are removing free space, thus clearing bits in the
587  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
588  * the bitmap.
589  */
590 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
591 				    struct btrfs_block_group *block_group,
592 				    struct btrfs_path *path,
593 				    u64 start, u64 size, int remove)
594 {
595 	struct btrfs_root *root = btrfs_free_space_root(block_group);
596 	struct btrfs_key key;
597 	u64 end = start + size;
598 	u64 cur_start, cur_size;
599 	int prev_bit, next_bit;
600 	int new_extents;
601 	int ret;
602 
603 	/*
604 	 * Read the bit for the block immediately before the extent of space if
605 	 * that block is within the block group.
606 	 */
607 	if (start > block_group->start) {
608 		u64 prev_block = start - block_group->fs_info->sectorsize;
609 
610 		key.objectid = prev_block;
611 		key.type = (u8)-1;
612 		key.offset = (u64)-1;
613 
614 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
615 		if (ret)
616 			goto out;
617 
618 		prev_bit = free_space_test_bit(block_group, path, prev_block);
619 
620 		/* The previous block may have been in the previous bitmap. */
621 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
622 		if (start >= key.objectid + key.offset) {
623 			ret = free_space_next_bitmap(trans, root, path);
624 			if (ret)
625 				goto out;
626 		}
627 	} else {
628 		key.objectid = start;
629 		key.type = (u8)-1;
630 		key.offset = (u64)-1;
631 
632 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
633 		if (ret)
634 			goto out;
635 
636 		prev_bit = -1;
637 	}
638 
639 	/*
640 	 * Iterate over all of the bitmaps overlapped by the extent of space,
641 	 * clearing/setting bits as required.
642 	 */
643 	cur_start = start;
644 	cur_size = size;
645 	while (1) {
646 		free_space_set_bits(block_group, path, &cur_start, &cur_size,
647 				    !remove);
648 		if (cur_size == 0)
649 			break;
650 		ret = free_space_next_bitmap(trans, root, path);
651 		if (ret)
652 			goto out;
653 	}
654 
655 	/*
656 	 * Read the bit for the block immediately after the extent of space if
657 	 * that block is within the block group.
658 	 */
659 	if (end < block_group->start + block_group->length) {
660 		/* The next block may be in the next bitmap. */
661 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
662 		if (end >= key.objectid + key.offset) {
663 			ret = free_space_next_bitmap(trans, root, path);
664 			if (ret)
665 				goto out;
666 		}
667 
668 		next_bit = free_space_test_bit(block_group, path, end);
669 	} else {
670 		next_bit = -1;
671 	}
672 
673 	if (remove) {
674 		new_extents = -1;
675 		if (prev_bit == 1) {
676 			/* Leftover on the left. */
677 			new_extents++;
678 		}
679 		if (next_bit == 1) {
680 			/* Leftover on the right. */
681 			new_extents++;
682 		}
683 	} else {
684 		new_extents = 1;
685 		if (prev_bit == 1) {
686 			/* Merging with neighbor on the left. */
687 			new_extents--;
688 		}
689 		if (next_bit == 1) {
690 			/* Merging with neighbor on the right. */
691 			new_extents--;
692 		}
693 	}
694 
695 	btrfs_release_path(path);
696 	ret = update_free_space_extent_count(trans, block_group, path,
697 					     new_extents);
698 
699 out:
700 	return ret;
701 }
702 
703 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
704 				    struct btrfs_block_group *block_group,
705 				    struct btrfs_path *path,
706 				    u64 start, u64 size)
707 {
708 	struct btrfs_root *root = btrfs_free_space_root(block_group);
709 	struct btrfs_key key;
710 	u64 found_start, found_end;
711 	u64 end = start + size;
712 	int new_extents = -1;
713 	int ret;
714 
715 	key.objectid = start;
716 	key.type = (u8)-1;
717 	key.offset = (u64)-1;
718 
719 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
720 	if (ret)
721 		goto out;
722 
723 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
724 
725 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
726 
727 	found_start = key.objectid;
728 	found_end = key.objectid + key.offset;
729 	ASSERT(start >= found_start && end <= found_end);
730 
731 	/*
732 	 * Okay, now that we've found the free space extent which contains the
733 	 * free space that we are removing, there are four cases:
734 	 *
735 	 * 1. We're using the whole extent: delete the key we found and
736 	 * decrement the free space extent count.
737 	 * 2. We are using part of the extent starting at the beginning: delete
738 	 * the key we found and insert a new key representing the leftover at
739 	 * the end. There is no net change in the number of extents.
740 	 * 3. We are using part of the extent ending at the end: delete the key
741 	 * we found and insert a new key representing the leftover at the
742 	 * beginning. There is no net change in the number of extents.
743 	 * 4. We are using part of the extent in the middle: delete the key we
744 	 * found and insert two new keys representing the leftovers on each
745 	 * side. Where we used to have one extent, we now have two, so increment
746 	 * the extent count. We may need to convert the block group to bitmaps
747 	 * as a result.
748 	 */
749 
750 	/* Delete the existing key (cases 1-4). */
751 	ret = btrfs_del_item(trans, root, path);
752 	if (ret)
753 		goto out;
754 
755 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
756 	if (start > found_start) {
757 		key.objectid = found_start;
758 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
759 		key.offset = start - found_start;
760 
761 		btrfs_release_path(path);
762 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
763 		if (ret)
764 			goto out;
765 		new_extents++;
766 	}
767 
768 	/* Add a key for leftovers at the end (cases 2 and 4). */
769 	if (end < found_end) {
770 		key.objectid = end;
771 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
772 		key.offset = found_end - end;
773 
774 		btrfs_release_path(path);
775 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
776 		if (ret)
777 			goto out;
778 		new_extents++;
779 	}
780 
781 	btrfs_release_path(path);
782 	ret = update_free_space_extent_count(trans, block_group, path,
783 					     new_extents);
784 
785 out:
786 	return ret;
787 }
788 
789 EXPORT_FOR_TESTS
790 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
791 				  struct btrfs_block_group *block_group,
792 				  struct btrfs_path *path, u64 start, u64 size)
793 {
794 	struct btrfs_free_space_info *info;
795 	u32 flags;
796 	int ret;
797 
798 	if (block_group->needs_free_space) {
799 		ret = __add_block_group_free_space(trans, block_group, path);
800 		if (ret)
801 			return ret;
802 	}
803 
804 	info = search_free_space_info(NULL, block_group, path, 0);
805 	if (IS_ERR(info))
806 		return PTR_ERR(info);
807 	flags = btrfs_free_space_flags(path->nodes[0], info);
808 	btrfs_release_path(path);
809 
810 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
811 		return modify_free_space_bitmap(trans, block_group, path,
812 						start, size, 1);
813 	} else {
814 		return remove_free_space_extent(trans, block_group, path,
815 						start, size);
816 	}
817 }
818 
819 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
820 				u64 start, u64 size)
821 {
822 	struct btrfs_block_group *block_group;
823 	struct btrfs_path *path;
824 	int ret;
825 
826 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
827 		return 0;
828 
829 	path = btrfs_alloc_path();
830 	if (!path) {
831 		ret = -ENOMEM;
832 		goto out;
833 	}
834 
835 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
836 	if (!block_group) {
837 		ASSERT(0);
838 		ret = -ENOENT;
839 		goto out;
840 	}
841 
842 	mutex_lock(&block_group->free_space_lock);
843 	ret = __remove_from_free_space_tree(trans, block_group, path, start,
844 					    size);
845 	mutex_unlock(&block_group->free_space_lock);
846 
847 	btrfs_put_block_group(block_group);
848 out:
849 	btrfs_free_path(path);
850 	if (ret)
851 		btrfs_abort_transaction(trans, ret);
852 	return ret;
853 }
854 
855 static int add_free_space_extent(struct btrfs_trans_handle *trans,
856 				 struct btrfs_block_group *block_group,
857 				 struct btrfs_path *path,
858 				 u64 start, u64 size)
859 {
860 	struct btrfs_root *root = btrfs_free_space_root(block_group);
861 	struct btrfs_key key, new_key;
862 	u64 found_start, found_end;
863 	u64 end = start + size;
864 	int new_extents = 1;
865 	int ret;
866 
867 	/*
868 	 * We are adding a new extent of free space, but we need to merge
869 	 * extents. There are four cases here:
870 	 *
871 	 * 1. The new extent does not have any immediate neighbors to merge
872 	 * with: add the new key and increment the free space extent count. We
873 	 * may need to convert the block group to bitmaps as a result.
874 	 * 2. The new extent has an immediate neighbor before it: remove the
875 	 * previous key and insert a new key combining both of them. There is no
876 	 * net change in the number of extents.
877 	 * 3. The new extent has an immediate neighbor after it: remove the next
878 	 * key and insert a new key combining both of them. There is no net
879 	 * change in the number of extents.
880 	 * 4. The new extent has immediate neighbors on both sides: remove both
881 	 * of the keys and insert a new key combining all of them. Where we used
882 	 * to have two extents, we now have one, so decrement the extent count.
883 	 */
884 
885 	new_key.objectid = start;
886 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
887 	new_key.offset = size;
888 
889 	/* Search for a neighbor on the left. */
890 	if (start == block_group->start)
891 		goto right;
892 	key.objectid = start - 1;
893 	key.type = (u8)-1;
894 	key.offset = (u64)-1;
895 
896 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
897 	if (ret)
898 		goto out;
899 
900 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
901 
902 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
903 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
904 		btrfs_release_path(path);
905 		goto right;
906 	}
907 
908 	found_start = key.objectid;
909 	found_end = key.objectid + key.offset;
910 	ASSERT(found_start >= block_group->start &&
911 	       found_end > block_group->start);
912 	ASSERT(found_start < start && found_end <= start);
913 
914 	/*
915 	 * Delete the neighbor on the left and absorb it into the new key (cases
916 	 * 2 and 4).
917 	 */
918 	if (found_end == start) {
919 		ret = btrfs_del_item(trans, root, path);
920 		if (ret)
921 			goto out;
922 		new_key.objectid = found_start;
923 		new_key.offset += key.offset;
924 		new_extents--;
925 	}
926 	btrfs_release_path(path);
927 
928 right:
929 	/* Search for a neighbor on the right. */
930 	if (end == block_group->start + block_group->length)
931 		goto insert;
932 	key.objectid = end;
933 	key.type = (u8)-1;
934 	key.offset = (u64)-1;
935 
936 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
937 	if (ret)
938 		goto out;
939 
940 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
941 
942 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
943 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
944 		btrfs_release_path(path);
945 		goto insert;
946 	}
947 
948 	found_start = key.objectid;
949 	found_end = key.objectid + key.offset;
950 	ASSERT(found_start >= block_group->start &&
951 	       found_end > block_group->start);
952 	ASSERT((found_start < start && found_end <= start) ||
953 	       (found_start >= end && found_end > end));
954 
955 	/*
956 	 * Delete the neighbor on the right and absorb it into the new key
957 	 * (cases 3 and 4).
958 	 */
959 	if (found_start == end) {
960 		ret = btrfs_del_item(trans, root, path);
961 		if (ret)
962 			goto out;
963 		new_key.offset += key.offset;
964 		new_extents--;
965 	}
966 	btrfs_release_path(path);
967 
968 insert:
969 	/* Insert the new key (cases 1-4). */
970 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
971 	if (ret)
972 		goto out;
973 
974 	btrfs_release_path(path);
975 	ret = update_free_space_extent_count(trans, block_group, path,
976 					     new_extents);
977 
978 out:
979 	return ret;
980 }
981 
982 EXPORT_FOR_TESTS
983 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
984 			     struct btrfs_block_group *block_group,
985 			     struct btrfs_path *path, u64 start, u64 size)
986 {
987 	struct btrfs_free_space_info *info;
988 	u32 flags;
989 	int ret;
990 
991 	if (block_group->needs_free_space) {
992 		ret = __add_block_group_free_space(trans, block_group, path);
993 		if (ret)
994 			return ret;
995 	}
996 
997 	info = search_free_space_info(NULL, block_group, path, 0);
998 	if (IS_ERR(info))
999 		return PTR_ERR(info);
1000 	flags = btrfs_free_space_flags(path->nodes[0], info);
1001 	btrfs_release_path(path);
1002 
1003 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1004 		return modify_free_space_bitmap(trans, block_group, path,
1005 						start, size, 0);
1006 	} else {
1007 		return add_free_space_extent(trans, block_group, path, start,
1008 					     size);
1009 	}
1010 }
1011 
1012 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1013 			   u64 start, u64 size)
1014 {
1015 	struct btrfs_block_group *block_group;
1016 	struct btrfs_path *path;
1017 	int ret;
1018 
1019 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1020 		return 0;
1021 
1022 	path = btrfs_alloc_path();
1023 	if (!path) {
1024 		ret = -ENOMEM;
1025 		goto out;
1026 	}
1027 
1028 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1029 	if (!block_group) {
1030 		ASSERT(0);
1031 		ret = -ENOENT;
1032 		goto out;
1033 	}
1034 
1035 	mutex_lock(&block_group->free_space_lock);
1036 	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1037 	mutex_unlock(&block_group->free_space_lock);
1038 
1039 	btrfs_put_block_group(block_group);
1040 out:
1041 	btrfs_free_path(path);
1042 	if (ret)
1043 		btrfs_abort_transaction(trans, ret);
1044 	return ret;
1045 }
1046 
1047 /*
1048  * Populate the free space tree by walking the extent tree. Operations on the
1049  * extent tree that happen as a result of writes to the free space tree will go
1050  * through the normal add/remove hooks.
1051  */
1052 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1053 				    struct btrfs_block_group *block_group)
1054 {
1055 	struct btrfs_root *extent_root;
1056 	struct btrfs_path *path, *path2;
1057 	struct btrfs_key key;
1058 	u64 start, end;
1059 	int ret;
1060 
1061 	path = btrfs_alloc_path();
1062 	if (!path)
1063 		return -ENOMEM;
1064 	path->reada = READA_FORWARD;
1065 
1066 	path2 = btrfs_alloc_path();
1067 	if (!path2) {
1068 		btrfs_free_path(path);
1069 		return -ENOMEM;
1070 	}
1071 
1072 	ret = add_new_free_space_info(trans, block_group, path2);
1073 	if (ret)
1074 		goto out;
1075 
1076 	mutex_lock(&block_group->free_space_lock);
1077 
1078 	/*
1079 	 * Iterate through all of the extent and metadata items in this block
1080 	 * group, adding the free space between them and the free space at the
1081 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1082 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1083 	 * contained in.
1084 	 */
1085 	key.objectid = block_group->start;
1086 	key.type = BTRFS_EXTENT_ITEM_KEY;
1087 	key.offset = 0;
1088 
1089 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1090 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1091 	if (ret < 0)
1092 		goto out_locked;
1093 	ASSERT(ret == 0);
1094 
1095 	start = block_group->start;
1096 	end = block_group->start + block_group->length;
1097 	while (1) {
1098 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1099 
1100 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1101 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1102 			if (key.objectid >= end)
1103 				break;
1104 
1105 			if (start < key.objectid) {
1106 				ret = __add_to_free_space_tree(trans,
1107 							       block_group,
1108 							       path2, start,
1109 							       key.objectid -
1110 							       start);
1111 				if (ret)
1112 					goto out_locked;
1113 			}
1114 			start = key.objectid;
1115 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1116 				start += trans->fs_info->nodesize;
1117 			else
1118 				start += key.offset;
1119 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1120 			if (key.objectid != block_group->start)
1121 				break;
1122 		}
1123 
1124 		ret = btrfs_next_item(extent_root, path);
1125 		if (ret < 0)
1126 			goto out_locked;
1127 		if (ret)
1128 			break;
1129 	}
1130 	if (start < end) {
1131 		ret = __add_to_free_space_tree(trans, block_group, path2,
1132 					       start, end - start);
1133 		if (ret)
1134 			goto out_locked;
1135 	}
1136 
1137 	ret = 0;
1138 out_locked:
1139 	mutex_unlock(&block_group->free_space_lock);
1140 out:
1141 	btrfs_free_path(path2);
1142 	btrfs_free_path(path);
1143 	return ret;
1144 }
1145 
1146 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1147 {
1148 	struct btrfs_trans_handle *trans;
1149 	struct btrfs_root *tree_root = fs_info->tree_root;
1150 	struct btrfs_root *free_space_root;
1151 	struct btrfs_block_group *block_group;
1152 	struct rb_node *node;
1153 	int ret;
1154 
1155 	trans = btrfs_start_transaction(tree_root, 0);
1156 	if (IS_ERR(trans))
1157 		return PTR_ERR(trans);
1158 
1159 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1160 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1161 	free_space_root = btrfs_create_tree(trans,
1162 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1163 	if (IS_ERR(free_space_root)) {
1164 		ret = PTR_ERR(free_space_root);
1165 		goto abort;
1166 	}
1167 	fs_info->_free_space_root = free_space_root;
1168 
1169 	node = rb_first(&fs_info->block_group_cache_tree);
1170 	while (node) {
1171 		block_group = rb_entry(node, struct btrfs_block_group,
1172 				       cache_node);
1173 		ret = populate_free_space_tree(trans, block_group);
1174 		if (ret)
1175 			goto abort;
1176 		node = rb_next(node);
1177 	}
1178 
1179 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1180 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1181 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1182 	ret = btrfs_commit_transaction(trans);
1183 
1184 	/*
1185 	 * Now that we've committed the transaction any reading of our commit
1186 	 * root will be safe, so we can cache from the free space tree now.
1187 	 */
1188 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1189 	return ret;
1190 
1191 abort:
1192 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1193 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1194 	btrfs_abort_transaction(trans, ret);
1195 	btrfs_end_transaction(trans);
1196 	return ret;
1197 }
1198 
1199 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1200 				 struct btrfs_root *root)
1201 {
1202 	struct btrfs_path *path;
1203 	struct btrfs_key key;
1204 	int nr;
1205 	int ret;
1206 
1207 	path = btrfs_alloc_path();
1208 	if (!path)
1209 		return -ENOMEM;
1210 
1211 	key.objectid = 0;
1212 	key.type = 0;
1213 	key.offset = 0;
1214 
1215 	while (1) {
1216 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1217 		if (ret < 0)
1218 			goto out;
1219 
1220 		nr = btrfs_header_nritems(path->nodes[0]);
1221 		if (!nr)
1222 			break;
1223 
1224 		path->slots[0] = 0;
1225 		ret = btrfs_del_items(trans, root, path, 0, nr);
1226 		if (ret)
1227 			goto out;
1228 
1229 		btrfs_release_path(path);
1230 	}
1231 
1232 	ret = 0;
1233 out:
1234 	btrfs_free_path(path);
1235 	return ret;
1236 }
1237 
1238 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1239 {
1240 	struct btrfs_trans_handle *trans;
1241 	struct btrfs_root *tree_root = fs_info->tree_root;
1242 	struct btrfs_root *free_space_root = fs_info->_free_space_root;
1243 	int ret;
1244 
1245 	trans = btrfs_start_transaction(tree_root, 0);
1246 	if (IS_ERR(trans))
1247 		return PTR_ERR(trans);
1248 
1249 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1250 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1251 	fs_info->_free_space_root = NULL;
1252 
1253 	ret = clear_free_space_tree(trans, free_space_root);
1254 	if (ret)
1255 		goto abort;
1256 
1257 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1258 	if (ret)
1259 		goto abort;
1260 
1261 	list_del(&free_space_root->dirty_list);
1262 
1263 	btrfs_tree_lock(free_space_root->node);
1264 	btrfs_clean_tree_block(free_space_root->node);
1265 	btrfs_tree_unlock(free_space_root->node);
1266 	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1267 			      free_space_root->node, 0, 1);
1268 
1269 	btrfs_put_root(free_space_root);
1270 
1271 	return btrfs_commit_transaction(trans);
1272 
1273 abort:
1274 	btrfs_abort_transaction(trans, ret);
1275 	btrfs_end_transaction(trans);
1276 	return ret;
1277 }
1278 
1279 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1280 					struct btrfs_block_group *block_group,
1281 					struct btrfs_path *path)
1282 {
1283 	int ret;
1284 
1285 	block_group->needs_free_space = 0;
1286 
1287 	ret = add_new_free_space_info(trans, block_group, path);
1288 	if (ret)
1289 		return ret;
1290 
1291 	return __add_to_free_space_tree(trans, block_group, path,
1292 					block_group->start,
1293 					block_group->length);
1294 }
1295 
1296 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1297 			       struct btrfs_block_group *block_group)
1298 {
1299 	struct btrfs_fs_info *fs_info = trans->fs_info;
1300 	struct btrfs_path *path = NULL;
1301 	int ret = 0;
1302 
1303 	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1304 		return 0;
1305 
1306 	mutex_lock(&block_group->free_space_lock);
1307 	if (!block_group->needs_free_space)
1308 		goto out;
1309 
1310 	path = btrfs_alloc_path();
1311 	if (!path) {
1312 		ret = -ENOMEM;
1313 		goto out;
1314 	}
1315 
1316 	ret = __add_block_group_free_space(trans, block_group, path);
1317 
1318 out:
1319 	btrfs_free_path(path);
1320 	mutex_unlock(&block_group->free_space_lock);
1321 	if (ret)
1322 		btrfs_abort_transaction(trans, ret);
1323 	return ret;
1324 }
1325 
1326 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1327 				  struct btrfs_block_group *block_group)
1328 {
1329 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1330 	struct btrfs_path *path;
1331 	struct btrfs_key key, found_key;
1332 	struct extent_buffer *leaf;
1333 	u64 start, end;
1334 	int done = 0, nr;
1335 	int ret;
1336 
1337 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1338 		return 0;
1339 
1340 	if (block_group->needs_free_space) {
1341 		/* We never added this block group to the free space tree. */
1342 		return 0;
1343 	}
1344 
1345 	path = btrfs_alloc_path();
1346 	if (!path) {
1347 		ret = -ENOMEM;
1348 		goto out;
1349 	}
1350 
1351 	start = block_group->start;
1352 	end = block_group->start + block_group->length;
1353 
1354 	key.objectid = end - 1;
1355 	key.type = (u8)-1;
1356 	key.offset = (u64)-1;
1357 
1358 	while (!done) {
1359 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1360 		if (ret)
1361 			goto out;
1362 
1363 		leaf = path->nodes[0];
1364 		nr = 0;
1365 		path->slots[0]++;
1366 		while (path->slots[0] > 0) {
1367 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1368 
1369 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1370 				ASSERT(found_key.objectid == block_group->start);
1371 				ASSERT(found_key.offset == block_group->length);
1372 				done = 1;
1373 				nr++;
1374 				path->slots[0]--;
1375 				break;
1376 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1377 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1378 				ASSERT(found_key.objectid >= start);
1379 				ASSERT(found_key.objectid < end);
1380 				ASSERT(found_key.objectid + found_key.offset <= end);
1381 				nr++;
1382 				path->slots[0]--;
1383 			} else {
1384 				ASSERT(0);
1385 			}
1386 		}
1387 
1388 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1389 		if (ret)
1390 			goto out;
1391 		btrfs_release_path(path);
1392 	}
1393 
1394 	ret = 0;
1395 out:
1396 	btrfs_free_path(path);
1397 	if (ret)
1398 		btrfs_abort_transaction(trans, ret);
1399 	return ret;
1400 }
1401 
1402 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1403 				   struct btrfs_path *path,
1404 				   u32 expected_extent_count)
1405 {
1406 	struct btrfs_block_group *block_group;
1407 	struct btrfs_fs_info *fs_info;
1408 	struct btrfs_root *root;
1409 	struct btrfs_key key;
1410 	int prev_bit = 0, bit;
1411 	/* Initialize to silence GCC. */
1412 	u64 extent_start = 0;
1413 	u64 end, offset;
1414 	u64 total_found = 0;
1415 	u32 extent_count = 0;
1416 	int ret;
1417 
1418 	block_group = caching_ctl->block_group;
1419 	fs_info = block_group->fs_info;
1420 	root = btrfs_free_space_root(block_group);
1421 
1422 	end = block_group->start + block_group->length;
1423 
1424 	while (1) {
1425 		ret = btrfs_next_item(root, path);
1426 		if (ret < 0)
1427 			goto out;
1428 		if (ret)
1429 			break;
1430 
1431 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1432 
1433 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1434 			break;
1435 
1436 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1437 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1438 
1439 		caching_ctl->progress = key.objectid;
1440 
1441 		offset = key.objectid;
1442 		while (offset < key.objectid + key.offset) {
1443 			bit = free_space_test_bit(block_group, path, offset);
1444 			if (prev_bit == 0 && bit == 1) {
1445 				extent_start = offset;
1446 			} else if (prev_bit == 1 && bit == 0) {
1447 				total_found += add_new_free_space(block_group,
1448 								  extent_start,
1449 								  offset);
1450 				if (total_found > CACHING_CTL_WAKE_UP) {
1451 					total_found = 0;
1452 					wake_up(&caching_ctl->wait);
1453 				}
1454 				extent_count++;
1455 			}
1456 			prev_bit = bit;
1457 			offset += fs_info->sectorsize;
1458 		}
1459 	}
1460 	if (prev_bit == 1) {
1461 		total_found += add_new_free_space(block_group, extent_start,
1462 						  end);
1463 		extent_count++;
1464 	}
1465 
1466 	if (extent_count != expected_extent_count) {
1467 		btrfs_err(fs_info,
1468 			  "incorrect extent count for %llu; counted %u, expected %u",
1469 			  block_group->start, extent_count,
1470 			  expected_extent_count);
1471 		ASSERT(0);
1472 		ret = -EIO;
1473 		goto out;
1474 	}
1475 
1476 	caching_ctl->progress = (u64)-1;
1477 
1478 	ret = 0;
1479 out:
1480 	return ret;
1481 }
1482 
1483 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1484 				   struct btrfs_path *path,
1485 				   u32 expected_extent_count)
1486 {
1487 	struct btrfs_block_group *block_group;
1488 	struct btrfs_fs_info *fs_info;
1489 	struct btrfs_root *root;
1490 	struct btrfs_key key;
1491 	u64 end;
1492 	u64 total_found = 0;
1493 	u32 extent_count = 0;
1494 	int ret;
1495 
1496 	block_group = caching_ctl->block_group;
1497 	fs_info = block_group->fs_info;
1498 	root = btrfs_free_space_root(block_group);
1499 
1500 	end = block_group->start + block_group->length;
1501 
1502 	while (1) {
1503 		ret = btrfs_next_item(root, path);
1504 		if (ret < 0)
1505 			goto out;
1506 		if (ret)
1507 			break;
1508 
1509 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1510 
1511 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1512 			break;
1513 
1514 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1515 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1516 
1517 		caching_ctl->progress = key.objectid;
1518 
1519 		total_found += add_new_free_space(block_group, key.objectid,
1520 						  key.objectid + key.offset);
1521 		if (total_found > CACHING_CTL_WAKE_UP) {
1522 			total_found = 0;
1523 			wake_up(&caching_ctl->wait);
1524 		}
1525 		extent_count++;
1526 	}
1527 
1528 	if (extent_count != expected_extent_count) {
1529 		btrfs_err(fs_info,
1530 			  "incorrect extent count for %llu; counted %u, expected %u",
1531 			  block_group->start, extent_count,
1532 			  expected_extent_count);
1533 		ASSERT(0);
1534 		ret = -EIO;
1535 		goto out;
1536 	}
1537 
1538 	caching_ctl->progress = (u64)-1;
1539 
1540 	ret = 0;
1541 out:
1542 	return ret;
1543 }
1544 
1545 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1546 {
1547 	struct btrfs_block_group *block_group;
1548 	struct btrfs_free_space_info *info;
1549 	struct btrfs_path *path;
1550 	u32 extent_count, flags;
1551 	int ret;
1552 
1553 	block_group = caching_ctl->block_group;
1554 
1555 	path = btrfs_alloc_path();
1556 	if (!path)
1557 		return -ENOMEM;
1558 
1559 	/*
1560 	 * Just like caching_thread() doesn't want to deadlock on the extent
1561 	 * tree, we don't want to deadlock on the free space tree.
1562 	 */
1563 	path->skip_locking = 1;
1564 	path->search_commit_root = 1;
1565 	path->reada = READA_FORWARD;
1566 
1567 	info = search_free_space_info(NULL, block_group, path, 0);
1568 	if (IS_ERR(info)) {
1569 		ret = PTR_ERR(info);
1570 		goto out;
1571 	}
1572 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1573 	flags = btrfs_free_space_flags(path->nodes[0], info);
1574 
1575 	/*
1576 	 * We left path pointing to the free space info item, so now
1577 	 * load_free_space_foo can just iterate through the free space tree from
1578 	 * there.
1579 	 */
1580 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1581 		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1582 	else
1583 		ret = load_free_space_extents(caching_ctl, path, extent_count);
1584 
1585 out:
1586 	btrfs_free_path(path);
1587 	return ret;
1588 }
1589