xref: /openbmc/linux/fs/btrfs/free-space-tree.c (revision 07e81dc9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 
18 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
19 					struct btrfs_block_group *block_group,
20 					struct btrfs_path *path);
21 
22 static struct btrfs_root *btrfs_free_space_root(
23 				struct btrfs_block_group *block_group)
24 {
25 	struct btrfs_key key = {
26 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
27 		.type = BTRFS_ROOT_ITEM_KEY,
28 		.offset = 0,
29 	};
30 
31 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
32 		key.offset = block_group->global_root_id;
33 	return btrfs_global_root(block_group->fs_info, &key);
34 }
35 
36 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
37 {
38 	u32 bitmap_range;
39 	size_t bitmap_size;
40 	u64 num_bitmaps, total_bitmap_size;
41 
42 	if (WARN_ON(cache->length == 0))
43 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
44 			   cache->start);
45 
46 	/*
47 	 * We convert to bitmaps when the disk space required for using extents
48 	 * exceeds that required for using bitmaps.
49 	 */
50 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
51 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
52 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
53 	total_bitmap_size = num_bitmaps * bitmap_size;
54 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
55 					    sizeof(struct btrfs_item));
56 
57 	/*
58 	 * We allow for a small buffer between the high threshold and low
59 	 * threshold to avoid thrashing back and forth between the two formats.
60 	 */
61 	if (cache->bitmap_high_thresh > 100)
62 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
63 	else
64 		cache->bitmap_low_thresh = 0;
65 }
66 
67 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
68 				   struct btrfs_block_group *block_group,
69 				   struct btrfs_path *path)
70 {
71 	struct btrfs_root *root = btrfs_free_space_root(block_group);
72 	struct btrfs_free_space_info *info;
73 	struct btrfs_key key;
74 	struct extent_buffer *leaf;
75 	int ret;
76 
77 	key.objectid = block_group->start;
78 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
79 	key.offset = block_group->length;
80 
81 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
82 	if (ret)
83 		goto out;
84 
85 	leaf = path->nodes[0];
86 	info = btrfs_item_ptr(leaf, path->slots[0],
87 			      struct btrfs_free_space_info);
88 	btrfs_set_free_space_extent_count(leaf, info, 0);
89 	btrfs_set_free_space_flags(leaf, info, 0);
90 	btrfs_mark_buffer_dirty(leaf);
91 
92 	ret = 0;
93 out:
94 	btrfs_release_path(path);
95 	return ret;
96 }
97 
98 EXPORT_FOR_TESTS
99 struct btrfs_free_space_info *search_free_space_info(
100 		struct btrfs_trans_handle *trans,
101 		struct btrfs_block_group *block_group,
102 		struct btrfs_path *path, int cow)
103 {
104 	struct btrfs_fs_info *fs_info = block_group->fs_info;
105 	struct btrfs_root *root = btrfs_free_space_root(block_group);
106 	struct btrfs_key key;
107 	int ret;
108 
109 	key.objectid = block_group->start;
110 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
111 	key.offset = block_group->length;
112 
113 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
114 	if (ret < 0)
115 		return ERR_PTR(ret);
116 	if (ret != 0) {
117 		btrfs_warn(fs_info, "missing free space info for %llu",
118 			   block_group->start);
119 		ASSERT(0);
120 		return ERR_PTR(-ENOENT);
121 	}
122 
123 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
124 			      struct btrfs_free_space_info);
125 }
126 
127 /*
128  * btrfs_search_slot() but we're looking for the greatest key less than the
129  * passed key.
130  */
131 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
132 				  struct btrfs_root *root,
133 				  struct btrfs_key *key, struct btrfs_path *p,
134 				  int ins_len, int cow)
135 {
136 	int ret;
137 
138 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
139 	if (ret < 0)
140 		return ret;
141 
142 	if (ret == 0) {
143 		ASSERT(0);
144 		return -EIO;
145 	}
146 
147 	if (p->slots[0] == 0) {
148 		ASSERT(0);
149 		return -EIO;
150 	}
151 	p->slots[0]--;
152 
153 	return 0;
154 }
155 
156 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
157 					 u64 size)
158 {
159 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
160 }
161 
162 static unsigned long *alloc_bitmap(u32 bitmap_size)
163 {
164 	unsigned long *ret;
165 	unsigned int nofs_flag;
166 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
167 
168 	/*
169 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
170 	 * into the filesystem as the free space bitmap can be modified in the
171 	 * critical section of a transaction commit.
172 	 *
173 	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
174 	 * know that recursion is unsafe.
175 	 */
176 	nofs_flag = memalloc_nofs_save();
177 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
178 	memalloc_nofs_restore(nofs_flag);
179 	return ret;
180 }
181 
182 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
183 {
184 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
185 	const unsigned int size = start + len;
186 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
187 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
188 
189 	while (len - bits_to_set >= 0) {
190 		*p |= mask_to_set;
191 		len -= bits_to_set;
192 		bits_to_set = BITS_PER_BYTE;
193 		mask_to_set = ~0;
194 		p++;
195 	}
196 	if (len) {
197 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
198 		*p |= mask_to_set;
199 	}
200 }
201 
202 EXPORT_FOR_TESTS
203 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
204 				  struct btrfs_block_group *block_group,
205 				  struct btrfs_path *path)
206 {
207 	struct btrfs_fs_info *fs_info = trans->fs_info;
208 	struct btrfs_root *root = btrfs_free_space_root(block_group);
209 	struct btrfs_free_space_info *info;
210 	struct btrfs_key key, found_key;
211 	struct extent_buffer *leaf;
212 	unsigned long *bitmap;
213 	char *bitmap_cursor;
214 	u64 start, end;
215 	u64 bitmap_range, i;
216 	u32 bitmap_size, flags, expected_extent_count;
217 	u32 extent_count = 0;
218 	int done = 0, nr;
219 	int ret;
220 
221 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
222 	bitmap = alloc_bitmap(bitmap_size);
223 	if (!bitmap) {
224 		ret = -ENOMEM;
225 		goto out;
226 	}
227 
228 	start = block_group->start;
229 	end = block_group->start + block_group->length;
230 
231 	key.objectid = end - 1;
232 	key.type = (u8)-1;
233 	key.offset = (u64)-1;
234 
235 	while (!done) {
236 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
237 		if (ret)
238 			goto out;
239 
240 		leaf = path->nodes[0];
241 		nr = 0;
242 		path->slots[0]++;
243 		while (path->slots[0] > 0) {
244 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
245 
246 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
247 				ASSERT(found_key.objectid == block_group->start);
248 				ASSERT(found_key.offset == block_group->length);
249 				done = 1;
250 				break;
251 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
252 				u64 first, last;
253 
254 				ASSERT(found_key.objectid >= start);
255 				ASSERT(found_key.objectid < end);
256 				ASSERT(found_key.objectid + found_key.offset <= end);
257 
258 				first = div_u64(found_key.objectid - start,
259 						fs_info->sectorsize);
260 				last = div_u64(found_key.objectid + found_key.offset - start,
261 					       fs_info->sectorsize);
262 				le_bitmap_set(bitmap, first, last - first);
263 
264 				extent_count++;
265 				nr++;
266 				path->slots[0]--;
267 			} else {
268 				ASSERT(0);
269 			}
270 		}
271 
272 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
273 		if (ret)
274 			goto out;
275 		btrfs_release_path(path);
276 	}
277 
278 	info = search_free_space_info(trans, block_group, path, 1);
279 	if (IS_ERR(info)) {
280 		ret = PTR_ERR(info);
281 		goto out;
282 	}
283 	leaf = path->nodes[0];
284 	flags = btrfs_free_space_flags(leaf, info);
285 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
286 	btrfs_set_free_space_flags(leaf, info, flags);
287 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
288 	btrfs_mark_buffer_dirty(leaf);
289 	btrfs_release_path(path);
290 
291 	if (extent_count != expected_extent_count) {
292 		btrfs_err(fs_info,
293 			  "incorrect extent count for %llu; counted %u, expected %u",
294 			  block_group->start, extent_count,
295 			  expected_extent_count);
296 		ASSERT(0);
297 		ret = -EIO;
298 		goto out;
299 	}
300 
301 	bitmap_cursor = (char *)bitmap;
302 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
303 	i = start;
304 	while (i < end) {
305 		unsigned long ptr;
306 		u64 extent_size;
307 		u32 data_size;
308 
309 		extent_size = min(end - i, bitmap_range);
310 		data_size = free_space_bitmap_size(fs_info, extent_size);
311 
312 		key.objectid = i;
313 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
314 		key.offset = extent_size;
315 
316 		ret = btrfs_insert_empty_item(trans, root, path, &key,
317 					      data_size);
318 		if (ret)
319 			goto out;
320 
321 		leaf = path->nodes[0];
322 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
323 		write_extent_buffer(leaf, bitmap_cursor, ptr,
324 				    data_size);
325 		btrfs_mark_buffer_dirty(leaf);
326 		btrfs_release_path(path);
327 
328 		i += extent_size;
329 		bitmap_cursor += data_size;
330 	}
331 
332 	ret = 0;
333 out:
334 	kvfree(bitmap);
335 	if (ret)
336 		btrfs_abort_transaction(trans, ret);
337 	return ret;
338 }
339 
340 EXPORT_FOR_TESTS
341 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
342 				  struct btrfs_block_group *block_group,
343 				  struct btrfs_path *path)
344 {
345 	struct btrfs_fs_info *fs_info = trans->fs_info;
346 	struct btrfs_root *root = btrfs_free_space_root(block_group);
347 	struct btrfs_free_space_info *info;
348 	struct btrfs_key key, found_key;
349 	struct extent_buffer *leaf;
350 	unsigned long *bitmap;
351 	u64 start, end;
352 	u32 bitmap_size, flags, expected_extent_count;
353 	unsigned long nrbits, start_bit, end_bit;
354 	u32 extent_count = 0;
355 	int done = 0, nr;
356 	int ret;
357 
358 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
359 	bitmap = alloc_bitmap(bitmap_size);
360 	if (!bitmap) {
361 		ret = -ENOMEM;
362 		goto out;
363 	}
364 
365 	start = block_group->start;
366 	end = block_group->start + block_group->length;
367 
368 	key.objectid = end - 1;
369 	key.type = (u8)-1;
370 	key.offset = (u64)-1;
371 
372 	while (!done) {
373 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
374 		if (ret)
375 			goto out;
376 
377 		leaf = path->nodes[0];
378 		nr = 0;
379 		path->slots[0]++;
380 		while (path->slots[0] > 0) {
381 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
382 
383 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
384 				ASSERT(found_key.objectid == block_group->start);
385 				ASSERT(found_key.offset == block_group->length);
386 				done = 1;
387 				break;
388 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
389 				unsigned long ptr;
390 				char *bitmap_cursor;
391 				u32 bitmap_pos, data_size;
392 
393 				ASSERT(found_key.objectid >= start);
394 				ASSERT(found_key.objectid < end);
395 				ASSERT(found_key.objectid + found_key.offset <= end);
396 
397 				bitmap_pos = div_u64(found_key.objectid - start,
398 						     fs_info->sectorsize *
399 						     BITS_PER_BYTE);
400 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
401 				data_size = free_space_bitmap_size(fs_info,
402 								found_key.offset);
403 
404 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
405 				read_extent_buffer(leaf, bitmap_cursor, ptr,
406 						   data_size);
407 
408 				nr++;
409 				path->slots[0]--;
410 			} else {
411 				ASSERT(0);
412 			}
413 		}
414 
415 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
416 		if (ret)
417 			goto out;
418 		btrfs_release_path(path);
419 	}
420 
421 	info = search_free_space_info(trans, block_group, path, 1);
422 	if (IS_ERR(info)) {
423 		ret = PTR_ERR(info);
424 		goto out;
425 	}
426 	leaf = path->nodes[0];
427 	flags = btrfs_free_space_flags(leaf, info);
428 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
429 	btrfs_set_free_space_flags(leaf, info, flags);
430 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
431 	btrfs_mark_buffer_dirty(leaf);
432 	btrfs_release_path(path);
433 
434 	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
435 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
436 
437 	while (start_bit < nrbits) {
438 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
439 		ASSERT(start_bit < end_bit);
440 
441 		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
442 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
443 		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
444 
445 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
446 		if (ret)
447 			goto out;
448 		btrfs_release_path(path);
449 
450 		extent_count++;
451 
452 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
453 	}
454 
455 	if (extent_count != expected_extent_count) {
456 		btrfs_err(fs_info,
457 			  "incorrect extent count for %llu; counted %u, expected %u",
458 			  block_group->start, extent_count,
459 			  expected_extent_count);
460 		ASSERT(0);
461 		ret = -EIO;
462 		goto out;
463 	}
464 
465 	ret = 0;
466 out:
467 	kvfree(bitmap);
468 	if (ret)
469 		btrfs_abort_transaction(trans, ret);
470 	return ret;
471 }
472 
473 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
474 					  struct btrfs_block_group *block_group,
475 					  struct btrfs_path *path,
476 					  int new_extents)
477 {
478 	struct btrfs_free_space_info *info;
479 	u32 flags;
480 	u32 extent_count;
481 	int ret = 0;
482 
483 	if (new_extents == 0)
484 		return 0;
485 
486 	info = search_free_space_info(trans, block_group, path, 1);
487 	if (IS_ERR(info)) {
488 		ret = PTR_ERR(info);
489 		goto out;
490 	}
491 	flags = btrfs_free_space_flags(path->nodes[0], info);
492 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
493 
494 	extent_count += new_extents;
495 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
496 	btrfs_mark_buffer_dirty(path->nodes[0]);
497 	btrfs_release_path(path);
498 
499 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
500 	    extent_count > block_group->bitmap_high_thresh) {
501 		ret = convert_free_space_to_bitmaps(trans, block_group, path);
502 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
503 		   extent_count < block_group->bitmap_low_thresh) {
504 		ret = convert_free_space_to_extents(trans, block_group, path);
505 	}
506 
507 out:
508 	return ret;
509 }
510 
511 EXPORT_FOR_TESTS
512 int free_space_test_bit(struct btrfs_block_group *block_group,
513 			struct btrfs_path *path, u64 offset)
514 {
515 	struct extent_buffer *leaf;
516 	struct btrfs_key key;
517 	u64 found_start, found_end;
518 	unsigned long ptr, i;
519 
520 	leaf = path->nodes[0];
521 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
522 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
523 
524 	found_start = key.objectid;
525 	found_end = key.objectid + key.offset;
526 	ASSERT(offset >= found_start && offset < found_end);
527 
528 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
529 	i = div_u64(offset - found_start,
530 		    block_group->fs_info->sectorsize);
531 	return !!extent_buffer_test_bit(leaf, ptr, i);
532 }
533 
534 static void free_space_set_bits(struct btrfs_block_group *block_group,
535 				struct btrfs_path *path, u64 *start, u64 *size,
536 				int bit)
537 {
538 	struct btrfs_fs_info *fs_info = block_group->fs_info;
539 	struct extent_buffer *leaf;
540 	struct btrfs_key key;
541 	u64 end = *start + *size;
542 	u64 found_start, found_end;
543 	unsigned long ptr, first, last;
544 
545 	leaf = path->nodes[0];
546 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
547 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
548 
549 	found_start = key.objectid;
550 	found_end = key.objectid + key.offset;
551 	ASSERT(*start >= found_start && *start < found_end);
552 	ASSERT(end > found_start);
553 
554 	if (end > found_end)
555 		end = found_end;
556 
557 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
558 	first = (*start - found_start) >> fs_info->sectorsize_bits;
559 	last = (end - found_start) >> fs_info->sectorsize_bits;
560 	if (bit)
561 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
562 	else
563 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
564 	btrfs_mark_buffer_dirty(leaf);
565 
566 	*size -= end - *start;
567 	*start = end;
568 }
569 
570 /*
571  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
572  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
573  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
574  * looking for.
575  */
576 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
577 				  struct btrfs_root *root, struct btrfs_path *p)
578 {
579 	struct btrfs_key key;
580 
581 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
582 		p->slots[0]++;
583 		return 0;
584 	}
585 
586 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
587 	btrfs_release_path(p);
588 
589 	key.objectid += key.offset;
590 	key.type = (u8)-1;
591 	key.offset = (u64)-1;
592 
593 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
594 }
595 
596 /*
597  * If remove is 1, then we are removing free space, thus clearing bits in the
598  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
599  * the bitmap.
600  */
601 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
602 				    struct btrfs_block_group *block_group,
603 				    struct btrfs_path *path,
604 				    u64 start, u64 size, int remove)
605 {
606 	struct btrfs_root *root = btrfs_free_space_root(block_group);
607 	struct btrfs_key key;
608 	u64 end = start + size;
609 	u64 cur_start, cur_size;
610 	int prev_bit, next_bit;
611 	int new_extents;
612 	int ret;
613 
614 	/*
615 	 * Read the bit for the block immediately before the extent of space if
616 	 * that block is within the block group.
617 	 */
618 	if (start > block_group->start) {
619 		u64 prev_block = start - block_group->fs_info->sectorsize;
620 
621 		key.objectid = prev_block;
622 		key.type = (u8)-1;
623 		key.offset = (u64)-1;
624 
625 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
626 		if (ret)
627 			goto out;
628 
629 		prev_bit = free_space_test_bit(block_group, path, prev_block);
630 
631 		/* The previous block may have been in the previous bitmap. */
632 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
633 		if (start >= key.objectid + key.offset) {
634 			ret = free_space_next_bitmap(trans, root, path);
635 			if (ret)
636 				goto out;
637 		}
638 	} else {
639 		key.objectid = start;
640 		key.type = (u8)-1;
641 		key.offset = (u64)-1;
642 
643 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
644 		if (ret)
645 			goto out;
646 
647 		prev_bit = -1;
648 	}
649 
650 	/*
651 	 * Iterate over all of the bitmaps overlapped by the extent of space,
652 	 * clearing/setting bits as required.
653 	 */
654 	cur_start = start;
655 	cur_size = size;
656 	while (1) {
657 		free_space_set_bits(block_group, path, &cur_start, &cur_size,
658 				    !remove);
659 		if (cur_size == 0)
660 			break;
661 		ret = free_space_next_bitmap(trans, root, path);
662 		if (ret)
663 			goto out;
664 	}
665 
666 	/*
667 	 * Read the bit for the block immediately after the extent of space if
668 	 * that block is within the block group.
669 	 */
670 	if (end < block_group->start + block_group->length) {
671 		/* The next block may be in the next bitmap. */
672 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
673 		if (end >= key.objectid + key.offset) {
674 			ret = free_space_next_bitmap(trans, root, path);
675 			if (ret)
676 				goto out;
677 		}
678 
679 		next_bit = free_space_test_bit(block_group, path, end);
680 	} else {
681 		next_bit = -1;
682 	}
683 
684 	if (remove) {
685 		new_extents = -1;
686 		if (prev_bit == 1) {
687 			/* Leftover on the left. */
688 			new_extents++;
689 		}
690 		if (next_bit == 1) {
691 			/* Leftover on the right. */
692 			new_extents++;
693 		}
694 	} else {
695 		new_extents = 1;
696 		if (prev_bit == 1) {
697 			/* Merging with neighbor on the left. */
698 			new_extents--;
699 		}
700 		if (next_bit == 1) {
701 			/* Merging with neighbor on the right. */
702 			new_extents--;
703 		}
704 	}
705 
706 	btrfs_release_path(path);
707 	ret = update_free_space_extent_count(trans, block_group, path,
708 					     new_extents);
709 
710 out:
711 	return ret;
712 }
713 
714 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
715 				    struct btrfs_block_group *block_group,
716 				    struct btrfs_path *path,
717 				    u64 start, u64 size)
718 {
719 	struct btrfs_root *root = btrfs_free_space_root(block_group);
720 	struct btrfs_key key;
721 	u64 found_start, found_end;
722 	u64 end = start + size;
723 	int new_extents = -1;
724 	int ret;
725 
726 	key.objectid = start;
727 	key.type = (u8)-1;
728 	key.offset = (u64)-1;
729 
730 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
731 	if (ret)
732 		goto out;
733 
734 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
735 
736 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
737 
738 	found_start = key.objectid;
739 	found_end = key.objectid + key.offset;
740 	ASSERT(start >= found_start && end <= found_end);
741 
742 	/*
743 	 * Okay, now that we've found the free space extent which contains the
744 	 * free space that we are removing, there are four cases:
745 	 *
746 	 * 1. We're using the whole extent: delete the key we found and
747 	 * decrement the free space extent count.
748 	 * 2. We are using part of the extent starting at the beginning: delete
749 	 * the key we found and insert a new key representing the leftover at
750 	 * the end. There is no net change in the number of extents.
751 	 * 3. We are using part of the extent ending at the end: delete the key
752 	 * we found and insert a new key representing the leftover at the
753 	 * beginning. There is no net change in the number of extents.
754 	 * 4. We are using part of the extent in the middle: delete the key we
755 	 * found and insert two new keys representing the leftovers on each
756 	 * side. Where we used to have one extent, we now have two, so increment
757 	 * the extent count. We may need to convert the block group to bitmaps
758 	 * as a result.
759 	 */
760 
761 	/* Delete the existing key (cases 1-4). */
762 	ret = btrfs_del_item(trans, root, path);
763 	if (ret)
764 		goto out;
765 
766 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
767 	if (start > found_start) {
768 		key.objectid = found_start;
769 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
770 		key.offset = start - found_start;
771 
772 		btrfs_release_path(path);
773 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
774 		if (ret)
775 			goto out;
776 		new_extents++;
777 	}
778 
779 	/* Add a key for leftovers at the end (cases 2 and 4). */
780 	if (end < found_end) {
781 		key.objectid = end;
782 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
783 		key.offset = found_end - end;
784 
785 		btrfs_release_path(path);
786 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
787 		if (ret)
788 			goto out;
789 		new_extents++;
790 	}
791 
792 	btrfs_release_path(path);
793 	ret = update_free_space_extent_count(trans, block_group, path,
794 					     new_extents);
795 
796 out:
797 	return ret;
798 }
799 
800 EXPORT_FOR_TESTS
801 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
802 				  struct btrfs_block_group *block_group,
803 				  struct btrfs_path *path, u64 start, u64 size)
804 {
805 	struct btrfs_free_space_info *info;
806 	u32 flags;
807 	int ret;
808 
809 	if (block_group->needs_free_space) {
810 		ret = __add_block_group_free_space(trans, block_group, path);
811 		if (ret)
812 			return ret;
813 	}
814 
815 	info = search_free_space_info(NULL, block_group, path, 0);
816 	if (IS_ERR(info))
817 		return PTR_ERR(info);
818 	flags = btrfs_free_space_flags(path->nodes[0], info);
819 	btrfs_release_path(path);
820 
821 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
822 		return modify_free_space_bitmap(trans, block_group, path,
823 						start, size, 1);
824 	} else {
825 		return remove_free_space_extent(trans, block_group, path,
826 						start, size);
827 	}
828 }
829 
830 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
831 				u64 start, u64 size)
832 {
833 	struct btrfs_block_group *block_group;
834 	struct btrfs_path *path;
835 	int ret;
836 
837 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
838 		return 0;
839 
840 	path = btrfs_alloc_path();
841 	if (!path) {
842 		ret = -ENOMEM;
843 		goto out;
844 	}
845 
846 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
847 	if (!block_group) {
848 		ASSERT(0);
849 		ret = -ENOENT;
850 		goto out;
851 	}
852 
853 	mutex_lock(&block_group->free_space_lock);
854 	ret = __remove_from_free_space_tree(trans, block_group, path, start,
855 					    size);
856 	mutex_unlock(&block_group->free_space_lock);
857 
858 	btrfs_put_block_group(block_group);
859 out:
860 	btrfs_free_path(path);
861 	if (ret)
862 		btrfs_abort_transaction(trans, ret);
863 	return ret;
864 }
865 
866 static int add_free_space_extent(struct btrfs_trans_handle *trans,
867 				 struct btrfs_block_group *block_group,
868 				 struct btrfs_path *path,
869 				 u64 start, u64 size)
870 {
871 	struct btrfs_root *root = btrfs_free_space_root(block_group);
872 	struct btrfs_key key, new_key;
873 	u64 found_start, found_end;
874 	u64 end = start + size;
875 	int new_extents = 1;
876 	int ret;
877 
878 	/*
879 	 * We are adding a new extent of free space, but we need to merge
880 	 * extents. There are four cases here:
881 	 *
882 	 * 1. The new extent does not have any immediate neighbors to merge
883 	 * with: add the new key and increment the free space extent count. We
884 	 * may need to convert the block group to bitmaps as a result.
885 	 * 2. The new extent has an immediate neighbor before it: remove the
886 	 * previous key and insert a new key combining both of them. There is no
887 	 * net change in the number of extents.
888 	 * 3. The new extent has an immediate neighbor after it: remove the next
889 	 * key and insert a new key combining both of them. There is no net
890 	 * change in the number of extents.
891 	 * 4. The new extent has immediate neighbors on both sides: remove both
892 	 * of the keys and insert a new key combining all of them. Where we used
893 	 * to have two extents, we now have one, so decrement the extent count.
894 	 */
895 
896 	new_key.objectid = start;
897 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
898 	new_key.offset = size;
899 
900 	/* Search for a neighbor on the left. */
901 	if (start == block_group->start)
902 		goto right;
903 	key.objectid = start - 1;
904 	key.type = (u8)-1;
905 	key.offset = (u64)-1;
906 
907 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
908 	if (ret)
909 		goto out;
910 
911 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
912 
913 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
914 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
915 		btrfs_release_path(path);
916 		goto right;
917 	}
918 
919 	found_start = key.objectid;
920 	found_end = key.objectid + key.offset;
921 	ASSERT(found_start >= block_group->start &&
922 	       found_end > block_group->start);
923 	ASSERT(found_start < start && found_end <= start);
924 
925 	/*
926 	 * Delete the neighbor on the left and absorb it into the new key (cases
927 	 * 2 and 4).
928 	 */
929 	if (found_end == start) {
930 		ret = btrfs_del_item(trans, root, path);
931 		if (ret)
932 			goto out;
933 		new_key.objectid = found_start;
934 		new_key.offset += key.offset;
935 		new_extents--;
936 	}
937 	btrfs_release_path(path);
938 
939 right:
940 	/* Search for a neighbor on the right. */
941 	if (end == block_group->start + block_group->length)
942 		goto insert;
943 	key.objectid = end;
944 	key.type = (u8)-1;
945 	key.offset = (u64)-1;
946 
947 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
948 	if (ret)
949 		goto out;
950 
951 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
952 
953 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
954 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
955 		btrfs_release_path(path);
956 		goto insert;
957 	}
958 
959 	found_start = key.objectid;
960 	found_end = key.objectid + key.offset;
961 	ASSERT(found_start >= block_group->start &&
962 	       found_end > block_group->start);
963 	ASSERT((found_start < start && found_end <= start) ||
964 	       (found_start >= end && found_end > end));
965 
966 	/*
967 	 * Delete the neighbor on the right and absorb it into the new key
968 	 * (cases 3 and 4).
969 	 */
970 	if (found_start == end) {
971 		ret = btrfs_del_item(trans, root, path);
972 		if (ret)
973 			goto out;
974 		new_key.offset += key.offset;
975 		new_extents--;
976 	}
977 	btrfs_release_path(path);
978 
979 insert:
980 	/* Insert the new key (cases 1-4). */
981 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
982 	if (ret)
983 		goto out;
984 
985 	btrfs_release_path(path);
986 	ret = update_free_space_extent_count(trans, block_group, path,
987 					     new_extents);
988 
989 out:
990 	return ret;
991 }
992 
993 EXPORT_FOR_TESTS
994 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
995 			     struct btrfs_block_group *block_group,
996 			     struct btrfs_path *path, u64 start, u64 size)
997 {
998 	struct btrfs_free_space_info *info;
999 	u32 flags;
1000 	int ret;
1001 
1002 	if (block_group->needs_free_space) {
1003 		ret = __add_block_group_free_space(trans, block_group, path);
1004 		if (ret)
1005 			return ret;
1006 	}
1007 
1008 	info = search_free_space_info(NULL, block_group, path, 0);
1009 	if (IS_ERR(info))
1010 		return PTR_ERR(info);
1011 	flags = btrfs_free_space_flags(path->nodes[0], info);
1012 	btrfs_release_path(path);
1013 
1014 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1015 		return modify_free_space_bitmap(trans, block_group, path,
1016 						start, size, 0);
1017 	} else {
1018 		return add_free_space_extent(trans, block_group, path, start,
1019 					     size);
1020 	}
1021 }
1022 
1023 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1024 			   u64 start, u64 size)
1025 {
1026 	struct btrfs_block_group *block_group;
1027 	struct btrfs_path *path;
1028 	int ret;
1029 
1030 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1031 		return 0;
1032 
1033 	path = btrfs_alloc_path();
1034 	if (!path) {
1035 		ret = -ENOMEM;
1036 		goto out;
1037 	}
1038 
1039 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1040 	if (!block_group) {
1041 		ASSERT(0);
1042 		ret = -ENOENT;
1043 		goto out;
1044 	}
1045 
1046 	mutex_lock(&block_group->free_space_lock);
1047 	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1048 	mutex_unlock(&block_group->free_space_lock);
1049 
1050 	btrfs_put_block_group(block_group);
1051 out:
1052 	btrfs_free_path(path);
1053 	if (ret)
1054 		btrfs_abort_transaction(trans, ret);
1055 	return ret;
1056 }
1057 
1058 /*
1059  * Populate the free space tree by walking the extent tree. Operations on the
1060  * extent tree that happen as a result of writes to the free space tree will go
1061  * through the normal add/remove hooks.
1062  */
1063 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1064 				    struct btrfs_block_group *block_group)
1065 {
1066 	struct btrfs_root *extent_root;
1067 	struct btrfs_path *path, *path2;
1068 	struct btrfs_key key;
1069 	u64 start, end;
1070 	int ret;
1071 
1072 	path = btrfs_alloc_path();
1073 	if (!path)
1074 		return -ENOMEM;
1075 	path->reada = READA_FORWARD;
1076 
1077 	path2 = btrfs_alloc_path();
1078 	if (!path2) {
1079 		btrfs_free_path(path);
1080 		return -ENOMEM;
1081 	}
1082 
1083 	ret = add_new_free_space_info(trans, block_group, path2);
1084 	if (ret)
1085 		goto out;
1086 
1087 	mutex_lock(&block_group->free_space_lock);
1088 
1089 	/*
1090 	 * Iterate through all of the extent and metadata items in this block
1091 	 * group, adding the free space between them and the free space at the
1092 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1093 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1094 	 * contained in.
1095 	 */
1096 	key.objectid = block_group->start;
1097 	key.type = BTRFS_EXTENT_ITEM_KEY;
1098 	key.offset = 0;
1099 
1100 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1101 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1102 	if (ret < 0)
1103 		goto out_locked;
1104 	ASSERT(ret == 0);
1105 
1106 	start = block_group->start;
1107 	end = block_group->start + block_group->length;
1108 	while (1) {
1109 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1110 
1111 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1112 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1113 			if (key.objectid >= end)
1114 				break;
1115 
1116 			if (start < key.objectid) {
1117 				ret = __add_to_free_space_tree(trans,
1118 							       block_group,
1119 							       path2, start,
1120 							       key.objectid -
1121 							       start);
1122 				if (ret)
1123 					goto out_locked;
1124 			}
1125 			start = key.objectid;
1126 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1127 				start += trans->fs_info->nodesize;
1128 			else
1129 				start += key.offset;
1130 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1131 			if (key.objectid != block_group->start)
1132 				break;
1133 		}
1134 
1135 		ret = btrfs_next_item(extent_root, path);
1136 		if (ret < 0)
1137 			goto out_locked;
1138 		if (ret)
1139 			break;
1140 	}
1141 	if (start < end) {
1142 		ret = __add_to_free_space_tree(trans, block_group, path2,
1143 					       start, end - start);
1144 		if (ret)
1145 			goto out_locked;
1146 	}
1147 
1148 	ret = 0;
1149 out_locked:
1150 	mutex_unlock(&block_group->free_space_lock);
1151 out:
1152 	btrfs_free_path(path2);
1153 	btrfs_free_path(path);
1154 	return ret;
1155 }
1156 
1157 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1158 {
1159 	struct btrfs_trans_handle *trans;
1160 	struct btrfs_root *tree_root = fs_info->tree_root;
1161 	struct btrfs_root *free_space_root;
1162 	struct btrfs_block_group *block_group;
1163 	struct rb_node *node;
1164 	int ret;
1165 
1166 	trans = btrfs_start_transaction(tree_root, 0);
1167 	if (IS_ERR(trans))
1168 		return PTR_ERR(trans);
1169 
1170 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1171 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1172 	free_space_root = btrfs_create_tree(trans,
1173 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1174 	if (IS_ERR(free_space_root)) {
1175 		ret = PTR_ERR(free_space_root);
1176 		goto abort;
1177 	}
1178 	ret = btrfs_global_root_insert(free_space_root);
1179 	if (ret) {
1180 		btrfs_put_root(free_space_root);
1181 		goto abort;
1182 	}
1183 
1184 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1185 	while (node) {
1186 		block_group = rb_entry(node, struct btrfs_block_group,
1187 				       cache_node);
1188 		ret = populate_free_space_tree(trans, block_group);
1189 		if (ret)
1190 			goto abort;
1191 		node = rb_next(node);
1192 	}
1193 
1194 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1195 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1196 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1197 	ret = btrfs_commit_transaction(trans);
1198 
1199 	/*
1200 	 * Now that we've committed the transaction any reading of our commit
1201 	 * root will be safe, so we can cache from the free space tree now.
1202 	 */
1203 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1204 	return ret;
1205 
1206 abort:
1207 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1208 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1209 	btrfs_abort_transaction(trans, ret);
1210 	btrfs_end_transaction(trans);
1211 	return ret;
1212 }
1213 
1214 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1215 				 struct btrfs_root *root)
1216 {
1217 	struct btrfs_path *path;
1218 	struct btrfs_key key;
1219 	int nr;
1220 	int ret;
1221 
1222 	path = btrfs_alloc_path();
1223 	if (!path)
1224 		return -ENOMEM;
1225 
1226 	key.objectid = 0;
1227 	key.type = 0;
1228 	key.offset = 0;
1229 
1230 	while (1) {
1231 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1232 		if (ret < 0)
1233 			goto out;
1234 
1235 		nr = btrfs_header_nritems(path->nodes[0]);
1236 		if (!nr)
1237 			break;
1238 
1239 		path->slots[0] = 0;
1240 		ret = btrfs_del_items(trans, root, path, 0, nr);
1241 		if (ret)
1242 			goto out;
1243 
1244 		btrfs_release_path(path);
1245 	}
1246 
1247 	ret = 0;
1248 out:
1249 	btrfs_free_path(path);
1250 	return ret;
1251 }
1252 
1253 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1254 {
1255 	struct btrfs_trans_handle *trans;
1256 	struct btrfs_root *tree_root = fs_info->tree_root;
1257 	struct btrfs_key key = {
1258 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1259 		.type = BTRFS_ROOT_ITEM_KEY,
1260 		.offset = 0,
1261 	};
1262 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1263 	int ret;
1264 
1265 	trans = btrfs_start_transaction(tree_root, 0);
1266 	if (IS_ERR(trans))
1267 		return PTR_ERR(trans);
1268 
1269 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1270 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1271 
1272 	ret = clear_free_space_tree(trans, free_space_root);
1273 	if (ret)
1274 		goto abort;
1275 
1276 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1277 	if (ret)
1278 		goto abort;
1279 
1280 	btrfs_global_root_delete(free_space_root);
1281 	list_del(&free_space_root->dirty_list);
1282 
1283 	btrfs_tree_lock(free_space_root->node);
1284 	btrfs_clean_tree_block(free_space_root->node);
1285 	btrfs_tree_unlock(free_space_root->node);
1286 	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1287 			      free_space_root->node, 0, 1);
1288 
1289 	btrfs_put_root(free_space_root);
1290 
1291 	return btrfs_commit_transaction(trans);
1292 
1293 abort:
1294 	btrfs_abort_transaction(trans, ret);
1295 	btrfs_end_transaction(trans);
1296 	return ret;
1297 }
1298 
1299 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1300 					struct btrfs_block_group *block_group,
1301 					struct btrfs_path *path)
1302 {
1303 	int ret;
1304 
1305 	block_group->needs_free_space = 0;
1306 
1307 	ret = add_new_free_space_info(trans, block_group, path);
1308 	if (ret)
1309 		return ret;
1310 
1311 	return __add_to_free_space_tree(trans, block_group, path,
1312 					block_group->start,
1313 					block_group->length);
1314 }
1315 
1316 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1317 			       struct btrfs_block_group *block_group)
1318 {
1319 	struct btrfs_fs_info *fs_info = trans->fs_info;
1320 	struct btrfs_path *path = NULL;
1321 	int ret = 0;
1322 
1323 	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1324 		return 0;
1325 
1326 	mutex_lock(&block_group->free_space_lock);
1327 	if (!block_group->needs_free_space)
1328 		goto out;
1329 
1330 	path = btrfs_alloc_path();
1331 	if (!path) {
1332 		ret = -ENOMEM;
1333 		goto out;
1334 	}
1335 
1336 	ret = __add_block_group_free_space(trans, block_group, path);
1337 
1338 out:
1339 	btrfs_free_path(path);
1340 	mutex_unlock(&block_group->free_space_lock);
1341 	if (ret)
1342 		btrfs_abort_transaction(trans, ret);
1343 	return ret;
1344 }
1345 
1346 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1347 				  struct btrfs_block_group *block_group)
1348 {
1349 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1350 	struct btrfs_path *path;
1351 	struct btrfs_key key, found_key;
1352 	struct extent_buffer *leaf;
1353 	u64 start, end;
1354 	int done = 0, nr;
1355 	int ret;
1356 
1357 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1358 		return 0;
1359 
1360 	if (block_group->needs_free_space) {
1361 		/* We never added this block group to the free space tree. */
1362 		return 0;
1363 	}
1364 
1365 	path = btrfs_alloc_path();
1366 	if (!path) {
1367 		ret = -ENOMEM;
1368 		goto out;
1369 	}
1370 
1371 	start = block_group->start;
1372 	end = block_group->start + block_group->length;
1373 
1374 	key.objectid = end - 1;
1375 	key.type = (u8)-1;
1376 	key.offset = (u64)-1;
1377 
1378 	while (!done) {
1379 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1380 		if (ret)
1381 			goto out;
1382 
1383 		leaf = path->nodes[0];
1384 		nr = 0;
1385 		path->slots[0]++;
1386 		while (path->slots[0] > 0) {
1387 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1388 
1389 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1390 				ASSERT(found_key.objectid == block_group->start);
1391 				ASSERT(found_key.offset == block_group->length);
1392 				done = 1;
1393 				nr++;
1394 				path->slots[0]--;
1395 				break;
1396 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1397 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1398 				ASSERT(found_key.objectid >= start);
1399 				ASSERT(found_key.objectid < end);
1400 				ASSERT(found_key.objectid + found_key.offset <= end);
1401 				nr++;
1402 				path->slots[0]--;
1403 			} else {
1404 				ASSERT(0);
1405 			}
1406 		}
1407 
1408 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1409 		if (ret)
1410 			goto out;
1411 		btrfs_release_path(path);
1412 	}
1413 
1414 	ret = 0;
1415 out:
1416 	btrfs_free_path(path);
1417 	if (ret)
1418 		btrfs_abort_transaction(trans, ret);
1419 	return ret;
1420 }
1421 
1422 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1423 				   struct btrfs_path *path,
1424 				   u32 expected_extent_count)
1425 {
1426 	struct btrfs_block_group *block_group;
1427 	struct btrfs_fs_info *fs_info;
1428 	struct btrfs_root *root;
1429 	struct btrfs_key key;
1430 	int prev_bit = 0, bit;
1431 	/* Initialize to silence GCC. */
1432 	u64 extent_start = 0;
1433 	u64 end, offset;
1434 	u64 total_found = 0;
1435 	u32 extent_count = 0;
1436 	int ret;
1437 
1438 	block_group = caching_ctl->block_group;
1439 	fs_info = block_group->fs_info;
1440 	root = btrfs_free_space_root(block_group);
1441 
1442 	end = block_group->start + block_group->length;
1443 
1444 	while (1) {
1445 		ret = btrfs_next_item(root, path);
1446 		if (ret < 0)
1447 			goto out;
1448 		if (ret)
1449 			break;
1450 
1451 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1452 
1453 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1454 			break;
1455 
1456 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1457 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1458 
1459 		offset = key.objectid;
1460 		while (offset < key.objectid + key.offset) {
1461 			bit = free_space_test_bit(block_group, path, offset);
1462 			if (prev_bit == 0 && bit == 1) {
1463 				extent_start = offset;
1464 			} else if (prev_bit == 1 && bit == 0) {
1465 				total_found += add_new_free_space(block_group,
1466 								  extent_start,
1467 								  offset);
1468 				if (total_found > CACHING_CTL_WAKE_UP) {
1469 					total_found = 0;
1470 					wake_up(&caching_ctl->wait);
1471 				}
1472 				extent_count++;
1473 			}
1474 			prev_bit = bit;
1475 			offset += fs_info->sectorsize;
1476 		}
1477 	}
1478 	if (prev_bit == 1) {
1479 		total_found += add_new_free_space(block_group, extent_start,
1480 						  end);
1481 		extent_count++;
1482 	}
1483 
1484 	if (extent_count != expected_extent_count) {
1485 		btrfs_err(fs_info,
1486 			  "incorrect extent count for %llu; counted %u, expected %u",
1487 			  block_group->start, extent_count,
1488 			  expected_extent_count);
1489 		ASSERT(0);
1490 		ret = -EIO;
1491 		goto out;
1492 	}
1493 
1494 	ret = 0;
1495 out:
1496 	return ret;
1497 }
1498 
1499 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1500 				   struct btrfs_path *path,
1501 				   u32 expected_extent_count)
1502 {
1503 	struct btrfs_block_group *block_group;
1504 	struct btrfs_fs_info *fs_info;
1505 	struct btrfs_root *root;
1506 	struct btrfs_key key;
1507 	u64 end;
1508 	u64 total_found = 0;
1509 	u32 extent_count = 0;
1510 	int ret;
1511 
1512 	block_group = caching_ctl->block_group;
1513 	fs_info = block_group->fs_info;
1514 	root = btrfs_free_space_root(block_group);
1515 
1516 	end = block_group->start + block_group->length;
1517 
1518 	while (1) {
1519 		ret = btrfs_next_item(root, path);
1520 		if (ret < 0)
1521 			goto out;
1522 		if (ret)
1523 			break;
1524 
1525 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1526 
1527 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1528 			break;
1529 
1530 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1531 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1532 
1533 		total_found += add_new_free_space(block_group, key.objectid,
1534 						  key.objectid + key.offset);
1535 		if (total_found > CACHING_CTL_WAKE_UP) {
1536 			total_found = 0;
1537 			wake_up(&caching_ctl->wait);
1538 		}
1539 		extent_count++;
1540 	}
1541 
1542 	if (extent_count != expected_extent_count) {
1543 		btrfs_err(fs_info,
1544 			  "incorrect extent count for %llu; counted %u, expected %u",
1545 			  block_group->start, extent_count,
1546 			  expected_extent_count);
1547 		ASSERT(0);
1548 		ret = -EIO;
1549 		goto out;
1550 	}
1551 
1552 	ret = 0;
1553 out:
1554 	return ret;
1555 }
1556 
1557 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1558 {
1559 	struct btrfs_block_group *block_group;
1560 	struct btrfs_free_space_info *info;
1561 	struct btrfs_path *path;
1562 	u32 extent_count, flags;
1563 	int ret;
1564 
1565 	block_group = caching_ctl->block_group;
1566 
1567 	path = btrfs_alloc_path();
1568 	if (!path)
1569 		return -ENOMEM;
1570 
1571 	/*
1572 	 * Just like caching_thread() doesn't want to deadlock on the extent
1573 	 * tree, we don't want to deadlock on the free space tree.
1574 	 */
1575 	path->skip_locking = 1;
1576 	path->search_commit_root = 1;
1577 	path->reada = READA_FORWARD;
1578 
1579 	info = search_free_space_info(NULL, block_group, path, 0);
1580 	if (IS_ERR(info)) {
1581 		ret = PTR_ERR(info);
1582 		goto out;
1583 	}
1584 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1585 	flags = btrfs_free_space_flags(path->nodes[0], info);
1586 
1587 	/*
1588 	 * We left path pointing to the free space info item, so now
1589 	 * load_free_space_foo can just iterate through the free space tree from
1590 	 * there.
1591 	 */
1592 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1593 		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1594 	else
1595 		ret = load_free_space_extents(caching_ctl, path, extent_count);
1596 
1597 out:
1598 	btrfs_free_path(path);
1599 	return ret;
1600 }
1601