xref: /openbmc/linux/fs/btrfs/free-space-tree.c (revision c7f13d42)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "ctree.h"
9 #include "disk-io.h"
10 #include "locking.h"
11 #include "free-space-tree.h"
12 #include "transaction.h"
13 #include "block-group.h"
14 #include "fs.h"
15 
16 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
17 					struct btrfs_block_group *block_group,
18 					struct btrfs_path *path);
19 
20 static struct btrfs_root *btrfs_free_space_root(
21 				struct btrfs_block_group *block_group)
22 {
23 	struct btrfs_key key = {
24 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
25 		.type = BTRFS_ROOT_ITEM_KEY,
26 		.offset = 0,
27 	};
28 
29 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
30 		key.offset = block_group->global_root_id;
31 	return btrfs_global_root(block_group->fs_info, &key);
32 }
33 
34 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
35 {
36 	u32 bitmap_range;
37 	size_t bitmap_size;
38 	u64 num_bitmaps, total_bitmap_size;
39 
40 	if (WARN_ON(cache->length == 0))
41 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
42 			   cache->start);
43 
44 	/*
45 	 * We convert to bitmaps when the disk space required for using extents
46 	 * exceeds that required for using bitmaps.
47 	 */
48 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
49 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
50 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
51 	total_bitmap_size = num_bitmaps * bitmap_size;
52 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
53 					    sizeof(struct btrfs_item));
54 
55 	/*
56 	 * We allow for a small buffer between the high threshold and low
57 	 * threshold to avoid thrashing back and forth between the two formats.
58 	 */
59 	if (cache->bitmap_high_thresh > 100)
60 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
61 	else
62 		cache->bitmap_low_thresh = 0;
63 }
64 
65 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
66 				   struct btrfs_block_group *block_group,
67 				   struct btrfs_path *path)
68 {
69 	struct btrfs_root *root = btrfs_free_space_root(block_group);
70 	struct btrfs_free_space_info *info;
71 	struct btrfs_key key;
72 	struct extent_buffer *leaf;
73 	int ret;
74 
75 	key.objectid = block_group->start;
76 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
77 	key.offset = block_group->length;
78 
79 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
80 	if (ret)
81 		goto out;
82 
83 	leaf = path->nodes[0];
84 	info = btrfs_item_ptr(leaf, path->slots[0],
85 			      struct btrfs_free_space_info);
86 	btrfs_set_free_space_extent_count(leaf, info, 0);
87 	btrfs_set_free_space_flags(leaf, info, 0);
88 	btrfs_mark_buffer_dirty(leaf);
89 
90 	ret = 0;
91 out:
92 	btrfs_release_path(path);
93 	return ret;
94 }
95 
96 EXPORT_FOR_TESTS
97 struct btrfs_free_space_info *search_free_space_info(
98 		struct btrfs_trans_handle *trans,
99 		struct btrfs_block_group *block_group,
100 		struct btrfs_path *path, int cow)
101 {
102 	struct btrfs_fs_info *fs_info = block_group->fs_info;
103 	struct btrfs_root *root = btrfs_free_space_root(block_group);
104 	struct btrfs_key key;
105 	int ret;
106 
107 	key.objectid = block_group->start;
108 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
109 	key.offset = block_group->length;
110 
111 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
112 	if (ret < 0)
113 		return ERR_PTR(ret);
114 	if (ret != 0) {
115 		btrfs_warn(fs_info, "missing free space info for %llu",
116 			   block_group->start);
117 		ASSERT(0);
118 		return ERR_PTR(-ENOENT);
119 	}
120 
121 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
122 			      struct btrfs_free_space_info);
123 }
124 
125 /*
126  * btrfs_search_slot() but we're looking for the greatest key less than the
127  * passed key.
128  */
129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
130 				  struct btrfs_root *root,
131 				  struct btrfs_key *key, struct btrfs_path *p,
132 				  int ins_len, int cow)
133 {
134 	int ret;
135 
136 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
137 	if (ret < 0)
138 		return ret;
139 
140 	if (ret == 0) {
141 		ASSERT(0);
142 		return -EIO;
143 	}
144 
145 	if (p->slots[0] == 0) {
146 		ASSERT(0);
147 		return -EIO;
148 	}
149 	p->slots[0]--;
150 
151 	return 0;
152 }
153 
154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
155 					 u64 size)
156 {
157 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
158 }
159 
160 static unsigned long *alloc_bitmap(u32 bitmap_size)
161 {
162 	unsigned long *ret;
163 	unsigned int nofs_flag;
164 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
165 
166 	/*
167 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
168 	 * into the filesystem as the free space bitmap can be modified in the
169 	 * critical section of a transaction commit.
170 	 *
171 	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
172 	 * know that recursion is unsafe.
173 	 */
174 	nofs_flag = memalloc_nofs_save();
175 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
176 	memalloc_nofs_restore(nofs_flag);
177 	return ret;
178 }
179 
180 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
181 {
182 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
183 	const unsigned int size = start + len;
184 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
185 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
186 
187 	while (len - bits_to_set >= 0) {
188 		*p |= mask_to_set;
189 		len -= bits_to_set;
190 		bits_to_set = BITS_PER_BYTE;
191 		mask_to_set = ~0;
192 		p++;
193 	}
194 	if (len) {
195 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
196 		*p |= mask_to_set;
197 	}
198 }
199 
200 EXPORT_FOR_TESTS
201 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
202 				  struct btrfs_block_group *block_group,
203 				  struct btrfs_path *path)
204 {
205 	struct btrfs_fs_info *fs_info = trans->fs_info;
206 	struct btrfs_root *root = btrfs_free_space_root(block_group);
207 	struct btrfs_free_space_info *info;
208 	struct btrfs_key key, found_key;
209 	struct extent_buffer *leaf;
210 	unsigned long *bitmap;
211 	char *bitmap_cursor;
212 	u64 start, end;
213 	u64 bitmap_range, i;
214 	u32 bitmap_size, flags, expected_extent_count;
215 	u32 extent_count = 0;
216 	int done = 0, nr;
217 	int ret;
218 
219 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
220 	bitmap = alloc_bitmap(bitmap_size);
221 	if (!bitmap) {
222 		ret = -ENOMEM;
223 		goto out;
224 	}
225 
226 	start = block_group->start;
227 	end = block_group->start + block_group->length;
228 
229 	key.objectid = end - 1;
230 	key.type = (u8)-1;
231 	key.offset = (u64)-1;
232 
233 	while (!done) {
234 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
235 		if (ret)
236 			goto out;
237 
238 		leaf = path->nodes[0];
239 		nr = 0;
240 		path->slots[0]++;
241 		while (path->slots[0] > 0) {
242 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
243 
244 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
245 				ASSERT(found_key.objectid == block_group->start);
246 				ASSERT(found_key.offset == block_group->length);
247 				done = 1;
248 				break;
249 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
250 				u64 first, last;
251 
252 				ASSERT(found_key.objectid >= start);
253 				ASSERT(found_key.objectid < end);
254 				ASSERT(found_key.objectid + found_key.offset <= end);
255 
256 				first = div_u64(found_key.objectid - start,
257 						fs_info->sectorsize);
258 				last = div_u64(found_key.objectid + found_key.offset - start,
259 					       fs_info->sectorsize);
260 				le_bitmap_set(bitmap, first, last - first);
261 
262 				extent_count++;
263 				nr++;
264 				path->slots[0]--;
265 			} else {
266 				ASSERT(0);
267 			}
268 		}
269 
270 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
271 		if (ret)
272 			goto out;
273 		btrfs_release_path(path);
274 	}
275 
276 	info = search_free_space_info(trans, block_group, path, 1);
277 	if (IS_ERR(info)) {
278 		ret = PTR_ERR(info);
279 		goto out;
280 	}
281 	leaf = path->nodes[0];
282 	flags = btrfs_free_space_flags(leaf, info);
283 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
284 	btrfs_set_free_space_flags(leaf, info, flags);
285 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
286 	btrfs_mark_buffer_dirty(leaf);
287 	btrfs_release_path(path);
288 
289 	if (extent_count != expected_extent_count) {
290 		btrfs_err(fs_info,
291 			  "incorrect extent count for %llu; counted %u, expected %u",
292 			  block_group->start, extent_count,
293 			  expected_extent_count);
294 		ASSERT(0);
295 		ret = -EIO;
296 		goto out;
297 	}
298 
299 	bitmap_cursor = (char *)bitmap;
300 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
301 	i = start;
302 	while (i < end) {
303 		unsigned long ptr;
304 		u64 extent_size;
305 		u32 data_size;
306 
307 		extent_size = min(end - i, bitmap_range);
308 		data_size = free_space_bitmap_size(fs_info, extent_size);
309 
310 		key.objectid = i;
311 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
312 		key.offset = extent_size;
313 
314 		ret = btrfs_insert_empty_item(trans, root, path, &key,
315 					      data_size);
316 		if (ret)
317 			goto out;
318 
319 		leaf = path->nodes[0];
320 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
321 		write_extent_buffer(leaf, bitmap_cursor, ptr,
322 				    data_size);
323 		btrfs_mark_buffer_dirty(leaf);
324 		btrfs_release_path(path);
325 
326 		i += extent_size;
327 		bitmap_cursor += data_size;
328 	}
329 
330 	ret = 0;
331 out:
332 	kvfree(bitmap);
333 	if (ret)
334 		btrfs_abort_transaction(trans, ret);
335 	return ret;
336 }
337 
338 EXPORT_FOR_TESTS
339 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
340 				  struct btrfs_block_group *block_group,
341 				  struct btrfs_path *path)
342 {
343 	struct btrfs_fs_info *fs_info = trans->fs_info;
344 	struct btrfs_root *root = btrfs_free_space_root(block_group);
345 	struct btrfs_free_space_info *info;
346 	struct btrfs_key key, found_key;
347 	struct extent_buffer *leaf;
348 	unsigned long *bitmap;
349 	u64 start, end;
350 	u32 bitmap_size, flags, expected_extent_count;
351 	unsigned long nrbits, start_bit, end_bit;
352 	u32 extent_count = 0;
353 	int done = 0, nr;
354 	int ret;
355 
356 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
357 	bitmap = alloc_bitmap(bitmap_size);
358 	if (!bitmap) {
359 		ret = -ENOMEM;
360 		goto out;
361 	}
362 
363 	start = block_group->start;
364 	end = block_group->start + block_group->length;
365 
366 	key.objectid = end - 1;
367 	key.type = (u8)-1;
368 	key.offset = (u64)-1;
369 
370 	while (!done) {
371 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
372 		if (ret)
373 			goto out;
374 
375 		leaf = path->nodes[0];
376 		nr = 0;
377 		path->slots[0]++;
378 		while (path->slots[0] > 0) {
379 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
380 
381 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
382 				ASSERT(found_key.objectid == block_group->start);
383 				ASSERT(found_key.offset == block_group->length);
384 				done = 1;
385 				break;
386 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
387 				unsigned long ptr;
388 				char *bitmap_cursor;
389 				u32 bitmap_pos, data_size;
390 
391 				ASSERT(found_key.objectid >= start);
392 				ASSERT(found_key.objectid < end);
393 				ASSERT(found_key.objectid + found_key.offset <= end);
394 
395 				bitmap_pos = div_u64(found_key.objectid - start,
396 						     fs_info->sectorsize *
397 						     BITS_PER_BYTE);
398 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
399 				data_size = free_space_bitmap_size(fs_info,
400 								found_key.offset);
401 
402 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
403 				read_extent_buffer(leaf, bitmap_cursor, ptr,
404 						   data_size);
405 
406 				nr++;
407 				path->slots[0]--;
408 			} else {
409 				ASSERT(0);
410 			}
411 		}
412 
413 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
414 		if (ret)
415 			goto out;
416 		btrfs_release_path(path);
417 	}
418 
419 	info = search_free_space_info(trans, block_group, path, 1);
420 	if (IS_ERR(info)) {
421 		ret = PTR_ERR(info);
422 		goto out;
423 	}
424 	leaf = path->nodes[0];
425 	flags = btrfs_free_space_flags(leaf, info);
426 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
427 	btrfs_set_free_space_flags(leaf, info, flags);
428 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
429 	btrfs_mark_buffer_dirty(leaf);
430 	btrfs_release_path(path);
431 
432 	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
433 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
434 
435 	while (start_bit < nrbits) {
436 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
437 		ASSERT(start_bit < end_bit);
438 
439 		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
440 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
441 		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
442 
443 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
444 		if (ret)
445 			goto out;
446 		btrfs_release_path(path);
447 
448 		extent_count++;
449 
450 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
451 	}
452 
453 	if (extent_count != expected_extent_count) {
454 		btrfs_err(fs_info,
455 			  "incorrect extent count for %llu; counted %u, expected %u",
456 			  block_group->start, extent_count,
457 			  expected_extent_count);
458 		ASSERT(0);
459 		ret = -EIO;
460 		goto out;
461 	}
462 
463 	ret = 0;
464 out:
465 	kvfree(bitmap);
466 	if (ret)
467 		btrfs_abort_transaction(trans, ret);
468 	return ret;
469 }
470 
471 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
472 					  struct btrfs_block_group *block_group,
473 					  struct btrfs_path *path,
474 					  int new_extents)
475 {
476 	struct btrfs_free_space_info *info;
477 	u32 flags;
478 	u32 extent_count;
479 	int ret = 0;
480 
481 	if (new_extents == 0)
482 		return 0;
483 
484 	info = search_free_space_info(trans, block_group, path, 1);
485 	if (IS_ERR(info)) {
486 		ret = PTR_ERR(info);
487 		goto out;
488 	}
489 	flags = btrfs_free_space_flags(path->nodes[0], info);
490 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
491 
492 	extent_count += new_extents;
493 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
494 	btrfs_mark_buffer_dirty(path->nodes[0]);
495 	btrfs_release_path(path);
496 
497 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
498 	    extent_count > block_group->bitmap_high_thresh) {
499 		ret = convert_free_space_to_bitmaps(trans, block_group, path);
500 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
501 		   extent_count < block_group->bitmap_low_thresh) {
502 		ret = convert_free_space_to_extents(trans, block_group, path);
503 	}
504 
505 out:
506 	return ret;
507 }
508 
509 EXPORT_FOR_TESTS
510 int free_space_test_bit(struct btrfs_block_group *block_group,
511 			struct btrfs_path *path, u64 offset)
512 {
513 	struct extent_buffer *leaf;
514 	struct btrfs_key key;
515 	u64 found_start, found_end;
516 	unsigned long ptr, i;
517 
518 	leaf = path->nodes[0];
519 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
520 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
521 
522 	found_start = key.objectid;
523 	found_end = key.objectid + key.offset;
524 	ASSERT(offset >= found_start && offset < found_end);
525 
526 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
527 	i = div_u64(offset - found_start,
528 		    block_group->fs_info->sectorsize);
529 	return !!extent_buffer_test_bit(leaf, ptr, i);
530 }
531 
532 static void free_space_set_bits(struct btrfs_block_group *block_group,
533 				struct btrfs_path *path, u64 *start, u64 *size,
534 				int bit)
535 {
536 	struct btrfs_fs_info *fs_info = block_group->fs_info;
537 	struct extent_buffer *leaf;
538 	struct btrfs_key key;
539 	u64 end = *start + *size;
540 	u64 found_start, found_end;
541 	unsigned long ptr, first, last;
542 
543 	leaf = path->nodes[0];
544 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
545 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
546 
547 	found_start = key.objectid;
548 	found_end = key.objectid + key.offset;
549 	ASSERT(*start >= found_start && *start < found_end);
550 	ASSERT(end > found_start);
551 
552 	if (end > found_end)
553 		end = found_end;
554 
555 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
556 	first = (*start - found_start) >> fs_info->sectorsize_bits;
557 	last = (end - found_start) >> fs_info->sectorsize_bits;
558 	if (bit)
559 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
560 	else
561 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
562 	btrfs_mark_buffer_dirty(leaf);
563 
564 	*size -= end - *start;
565 	*start = end;
566 }
567 
568 /*
569  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
570  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
571  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
572  * looking for.
573  */
574 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
575 				  struct btrfs_root *root, struct btrfs_path *p)
576 {
577 	struct btrfs_key key;
578 
579 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
580 		p->slots[0]++;
581 		return 0;
582 	}
583 
584 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
585 	btrfs_release_path(p);
586 
587 	key.objectid += key.offset;
588 	key.type = (u8)-1;
589 	key.offset = (u64)-1;
590 
591 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
592 }
593 
594 /*
595  * If remove is 1, then we are removing free space, thus clearing bits in the
596  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
597  * the bitmap.
598  */
599 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
600 				    struct btrfs_block_group *block_group,
601 				    struct btrfs_path *path,
602 				    u64 start, u64 size, int remove)
603 {
604 	struct btrfs_root *root = btrfs_free_space_root(block_group);
605 	struct btrfs_key key;
606 	u64 end = start + size;
607 	u64 cur_start, cur_size;
608 	int prev_bit, next_bit;
609 	int new_extents;
610 	int ret;
611 
612 	/*
613 	 * Read the bit for the block immediately before the extent of space if
614 	 * that block is within the block group.
615 	 */
616 	if (start > block_group->start) {
617 		u64 prev_block = start - block_group->fs_info->sectorsize;
618 
619 		key.objectid = prev_block;
620 		key.type = (u8)-1;
621 		key.offset = (u64)-1;
622 
623 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
624 		if (ret)
625 			goto out;
626 
627 		prev_bit = free_space_test_bit(block_group, path, prev_block);
628 
629 		/* The previous block may have been in the previous bitmap. */
630 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
631 		if (start >= key.objectid + key.offset) {
632 			ret = free_space_next_bitmap(trans, root, path);
633 			if (ret)
634 				goto out;
635 		}
636 	} else {
637 		key.objectid = start;
638 		key.type = (u8)-1;
639 		key.offset = (u64)-1;
640 
641 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
642 		if (ret)
643 			goto out;
644 
645 		prev_bit = -1;
646 	}
647 
648 	/*
649 	 * Iterate over all of the bitmaps overlapped by the extent of space,
650 	 * clearing/setting bits as required.
651 	 */
652 	cur_start = start;
653 	cur_size = size;
654 	while (1) {
655 		free_space_set_bits(block_group, path, &cur_start, &cur_size,
656 				    !remove);
657 		if (cur_size == 0)
658 			break;
659 		ret = free_space_next_bitmap(trans, root, path);
660 		if (ret)
661 			goto out;
662 	}
663 
664 	/*
665 	 * Read the bit for the block immediately after the extent of space if
666 	 * that block is within the block group.
667 	 */
668 	if (end < block_group->start + block_group->length) {
669 		/* The next block may be in the next bitmap. */
670 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
671 		if (end >= key.objectid + key.offset) {
672 			ret = free_space_next_bitmap(trans, root, path);
673 			if (ret)
674 				goto out;
675 		}
676 
677 		next_bit = free_space_test_bit(block_group, path, end);
678 	} else {
679 		next_bit = -1;
680 	}
681 
682 	if (remove) {
683 		new_extents = -1;
684 		if (prev_bit == 1) {
685 			/* Leftover on the left. */
686 			new_extents++;
687 		}
688 		if (next_bit == 1) {
689 			/* Leftover on the right. */
690 			new_extents++;
691 		}
692 	} else {
693 		new_extents = 1;
694 		if (prev_bit == 1) {
695 			/* Merging with neighbor on the left. */
696 			new_extents--;
697 		}
698 		if (next_bit == 1) {
699 			/* Merging with neighbor on the right. */
700 			new_extents--;
701 		}
702 	}
703 
704 	btrfs_release_path(path);
705 	ret = update_free_space_extent_count(trans, block_group, path,
706 					     new_extents);
707 
708 out:
709 	return ret;
710 }
711 
712 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
713 				    struct btrfs_block_group *block_group,
714 				    struct btrfs_path *path,
715 				    u64 start, u64 size)
716 {
717 	struct btrfs_root *root = btrfs_free_space_root(block_group);
718 	struct btrfs_key key;
719 	u64 found_start, found_end;
720 	u64 end = start + size;
721 	int new_extents = -1;
722 	int ret;
723 
724 	key.objectid = start;
725 	key.type = (u8)-1;
726 	key.offset = (u64)-1;
727 
728 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
729 	if (ret)
730 		goto out;
731 
732 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
733 
734 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
735 
736 	found_start = key.objectid;
737 	found_end = key.objectid + key.offset;
738 	ASSERT(start >= found_start && end <= found_end);
739 
740 	/*
741 	 * Okay, now that we've found the free space extent which contains the
742 	 * free space that we are removing, there are four cases:
743 	 *
744 	 * 1. We're using the whole extent: delete the key we found and
745 	 * decrement the free space extent count.
746 	 * 2. We are using part of the extent starting at the beginning: delete
747 	 * the key we found and insert a new key representing the leftover at
748 	 * the end. There is no net change in the number of extents.
749 	 * 3. We are using part of the extent ending at the end: delete the key
750 	 * we found and insert a new key representing the leftover at the
751 	 * beginning. There is no net change in the number of extents.
752 	 * 4. We are using part of the extent in the middle: delete the key we
753 	 * found and insert two new keys representing the leftovers on each
754 	 * side. Where we used to have one extent, we now have two, so increment
755 	 * the extent count. We may need to convert the block group to bitmaps
756 	 * as a result.
757 	 */
758 
759 	/* Delete the existing key (cases 1-4). */
760 	ret = btrfs_del_item(trans, root, path);
761 	if (ret)
762 		goto out;
763 
764 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
765 	if (start > found_start) {
766 		key.objectid = found_start;
767 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
768 		key.offset = start - found_start;
769 
770 		btrfs_release_path(path);
771 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
772 		if (ret)
773 			goto out;
774 		new_extents++;
775 	}
776 
777 	/* Add a key for leftovers at the end (cases 2 and 4). */
778 	if (end < found_end) {
779 		key.objectid = end;
780 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
781 		key.offset = found_end - end;
782 
783 		btrfs_release_path(path);
784 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
785 		if (ret)
786 			goto out;
787 		new_extents++;
788 	}
789 
790 	btrfs_release_path(path);
791 	ret = update_free_space_extent_count(trans, block_group, path,
792 					     new_extents);
793 
794 out:
795 	return ret;
796 }
797 
798 EXPORT_FOR_TESTS
799 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
800 				  struct btrfs_block_group *block_group,
801 				  struct btrfs_path *path, u64 start, u64 size)
802 {
803 	struct btrfs_free_space_info *info;
804 	u32 flags;
805 	int ret;
806 
807 	if (block_group->needs_free_space) {
808 		ret = __add_block_group_free_space(trans, block_group, path);
809 		if (ret)
810 			return ret;
811 	}
812 
813 	info = search_free_space_info(NULL, block_group, path, 0);
814 	if (IS_ERR(info))
815 		return PTR_ERR(info);
816 	flags = btrfs_free_space_flags(path->nodes[0], info);
817 	btrfs_release_path(path);
818 
819 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
820 		return modify_free_space_bitmap(trans, block_group, path,
821 						start, size, 1);
822 	} else {
823 		return remove_free_space_extent(trans, block_group, path,
824 						start, size);
825 	}
826 }
827 
828 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
829 				u64 start, u64 size)
830 {
831 	struct btrfs_block_group *block_group;
832 	struct btrfs_path *path;
833 	int ret;
834 
835 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
836 		return 0;
837 
838 	path = btrfs_alloc_path();
839 	if (!path) {
840 		ret = -ENOMEM;
841 		goto out;
842 	}
843 
844 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
845 	if (!block_group) {
846 		ASSERT(0);
847 		ret = -ENOENT;
848 		goto out;
849 	}
850 
851 	mutex_lock(&block_group->free_space_lock);
852 	ret = __remove_from_free_space_tree(trans, block_group, path, start,
853 					    size);
854 	mutex_unlock(&block_group->free_space_lock);
855 
856 	btrfs_put_block_group(block_group);
857 out:
858 	btrfs_free_path(path);
859 	if (ret)
860 		btrfs_abort_transaction(trans, ret);
861 	return ret;
862 }
863 
864 static int add_free_space_extent(struct btrfs_trans_handle *trans,
865 				 struct btrfs_block_group *block_group,
866 				 struct btrfs_path *path,
867 				 u64 start, u64 size)
868 {
869 	struct btrfs_root *root = btrfs_free_space_root(block_group);
870 	struct btrfs_key key, new_key;
871 	u64 found_start, found_end;
872 	u64 end = start + size;
873 	int new_extents = 1;
874 	int ret;
875 
876 	/*
877 	 * We are adding a new extent of free space, but we need to merge
878 	 * extents. There are four cases here:
879 	 *
880 	 * 1. The new extent does not have any immediate neighbors to merge
881 	 * with: add the new key and increment the free space extent count. We
882 	 * may need to convert the block group to bitmaps as a result.
883 	 * 2. The new extent has an immediate neighbor before it: remove the
884 	 * previous key and insert a new key combining both of them. There is no
885 	 * net change in the number of extents.
886 	 * 3. The new extent has an immediate neighbor after it: remove the next
887 	 * key and insert a new key combining both of them. There is no net
888 	 * change in the number of extents.
889 	 * 4. The new extent has immediate neighbors on both sides: remove both
890 	 * of the keys and insert a new key combining all of them. Where we used
891 	 * to have two extents, we now have one, so decrement the extent count.
892 	 */
893 
894 	new_key.objectid = start;
895 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
896 	new_key.offset = size;
897 
898 	/* Search for a neighbor on the left. */
899 	if (start == block_group->start)
900 		goto right;
901 	key.objectid = start - 1;
902 	key.type = (u8)-1;
903 	key.offset = (u64)-1;
904 
905 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
906 	if (ret)
907 		goto out;
908 
909 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
910 
911 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
912 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
913 		btrfs_release_path(path);
914 		goto right;
915 	}
916 
917 	found_start = key.objectid;
918 	found_end = key.objectid + key.offset;
919 	ASSERT(found_start >= block_group->start &&
920 	       found_end > block_group->start);
921 	ASSERT(found_start < start && found_end <= start);
922 
923 	/*
924 	 * Delete the neighbor on the left and absorb it into the new key (cases
925 	 * 2 and 4).
926 	 */
927 	if (found_end == start) {
928 		ret = btrfs_del_item(trans, root, path);
929 		if (ret)
930 			goto out;
931 		new_key.objectid = found_start;
932 		new_key.offset += key.offset;
933 		new_extents--;
934 	}
935 	btrfs_release_path(path);
936 
937 right:
938 	/* Search for a neighbor on the right. */
939 	if (end == block_group->start + block_group->length)
940 		goto insert;
941 	key.objectid = end;
942 	key.type = (u8)-1;
943 	key.offset = (u64)-1;
944 
945 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
946 	if (ret)
947 		goto out;
948 
949 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
950 
951 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
952 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
953 		btrfs_release_path(path);
954 		goto insert;
955 	}
956 
957 	found_start = key.objectid;
958 	found_end = key.objectid + key.offset;
959 	ASSERT(found_start >= block_group->start &&
960 	       found_end > block_group->start);
961 	ASSERT((found_start < start && found_end <= start) ||
962 	       (found_start >= end && found_end > end));
963 
964 	/*
965 	 * Delete the neighbor on the right and absorb it into the new key
966 	 * (cases 3 and 4).
967 	 */
968 	if (found_start == end) {
969 		ret = btrfs_del_item(trans, root, path);
970 		if (ret)
971 			goto out;
972 		new_key.offset += key.offset;
973 		new_extents--;
974 	}
975 	btrfs_release_path(path);
976 
977 insert:
978 	/* Insert the new key (cases 1-4). */
979 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
980 	if (ret)
981 		goto out;
982 
983 	btrfs_release_path(path);
984 	ret = update_free_space_extent_count(trans, block_group, path,
985 					     new_extents);
986 
987 out:
988 	return ret;
989 }
990 
991 EXPORT_FOR_TESTS
992 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
993 			     struct btrfs_block_group *block_group,
994 			     struct btrfs_path *path, u64 start, u64 size)
995 {
996 	struct btrfs_free_space_info *info;
997 	u32 flags;
998 	int ret;
999 
1000 	if (block_group->needs_free_space) {
1001 		ret = __add_block_group_free_space(trans, block_group, path);
1002 		if (ret)
1003 			return ret;
1004 	}
1005 
1006 	info = search_free_space_info(NULL, block_group, path, 0);
1007 	if (IS_ERR(info))
1008 		return PTR_ERR(info);
1009 	flags = btrfs_free_space_flags(path->nodes[0], info);
1010 	btrfs_release_path(path);
1011 
1012 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1013 		return modify_free_space_bitmap(trans, block_group, path,
1014 						start, size, 0);
1015 	} else {
1016 		return add_free_space_extent(trans, block_group, path, start,
1017 					     size);
1018 	}
1019 }
1020 
1021 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1022 			   u64 start, u64 size)
1023 {
1024 	struct btrfs_block_group *block_group;
1025 	struct btrfs_path *path;
1026 	int ret;
1027 
1028 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1029 		return 0;
1030 
1031 	path = btrfs_alloc_path();
1032 	if (!path) {
1033 		ret = -ENOMEM;
1034 		goto out;
1035 	}
1036 
1037 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1038 	if (!block_group) {
1039 		ASSERT(0);
1040 		ret = -ENOENT;
1041 		goto out;
1042 	}
1043 
1044 	mutex_lock(&block_group->free_space_lock);
1045 	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1046 	mutex_unlock(&block_group->free_space_lock);
1047 
1048 	btrfs_put_block_group(block_group);
1049 out:
1050 	btrfs_free_path(path);
1051 	if (ret)
1052 		btrfs_abort_transaction(trans, ret);
1053 	return ret;
1054 }
1055 
1056 /*
1057  * Populate the free space tree by walking the extent tree. Operations on the
1058  * extent tree that happen as a result of writes to the free space tree will go
1059  * through the normal add/remove hooks.
1060  */
1061 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1062 				    struct btrfs_block_group *block_group)
1063 {
1064 	struct btrfs_root *extent_root;
1065 	struct btrfs_path *path, *path2;
1066 	struct btrfs_key key;
1067 	u64 start, end;
1068 	int ret;
1069 
1070 	path = btrfs_alloc_path();
1071 	if (!path)
1072 		return -ENOMEM;
1073 	path->reada = READA_FORWARD;
1074 
1075 	path2 = btrfs_alloc_path();
1076 	if (!path2) {
1077 		btrfs_free_path(path);
1078 		return -ENOMEM;
1079 	}
1080 
1081 	ret = add_new_free_space_info(trans, block_group, path2);
1082 	if (ret)
1083 		goto out;
1084 
1085 	mutex_lock(&block_group->free_space_lock);
1086 
1087 	/*
1088 	 * Iterate through all of the extent and metadata items in this block
1089 	 * group, adding the free space between them and the free space at the
1090 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1091 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1092 	 * contained in.
1093 	 */
1094 	key.objectid = block_group->start;
1095 	key.type = BTRFS_EXTENT_ITEM_KEY;
1096 	key.offset = 0;
1097 
1098 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1099 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1100 	if (ret < 0)
1101 		goto out_locked;
1102 	ASSERT(ret == 0);
1103 
1104 	start = block_group->start;
1105 	end = block_group->start + block_group->length;
1106 	while (1) {
1107 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1108 
1109 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1110 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1111 			if (key.objectid >= end)
1112 				break;
1113 
1114 			if (start < key.objectid) {
1115 				ret = __add_to_free_space_tree(trans,
1116 							       block_group,
1117 							       path2, start,
1118 							       key.objectid -
1119 							       start);
1120 				if (ret)
1121 					goto out_locked;
1122 			}
1123 			start = key.objectid;
1124 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1125 				start += trans->fs_info->nodesize;
1126 			else
1127 				start += key.offset;
1128 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1129 			if (key.objectid != block_group->start)
1130 				break;
1131 		}
1132 
1133 		ret = btrfs_next_item(extent_root, path);
1134 		if (ret < 0)
1135 			goto out_locked;
1136 		if (ret)
1137 			break;
1138 	}
1139 	if (start < end) {
1140 		ret = __add_to_free_space_tree(trans, block_group, path2,
1141 					       start, end - start);
1142 		if (ret)
1143 			goto out_locked;
1144 	}
1145 
1146 	ret = 0;
1147 out_locked:
1148 	mutex_unlock(&block_group->free_space_lock);
1149 out:
1150 	btrfs_free_path(path2);
1151 	btrfs_free_path(path);
1152 	return ret;
1153 }
1154 
1155 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1156 {
1157 	struct btrfs_trans_handle *trans;
1158 	struct btrfs_root *tree_root = fs_info->tree_root;
1159 	struct btrfs_root *free_space_root;
1160 	struct btrfs_block_group *block_group;
1161 	struct rb_node *node;
1162 	int ret;
1163 
1164 	trans = btrfs_start_transaction(tree_root, 0);
1165 	if (IS_ERR(trans))
1166 		return PTR_ERR(trans);
1167 
1168 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1169 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1170 	free_space_root = btrfs_create_tree(trans,
1171 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1172 	if (IS_ERR(free_space_root)) {
1173 		ret = PTR_ERR(free_space_root);
1174 		goto abort;
1175 	}
1176 	ret = btrfs_global_root_insert(free_space_root);
1177 	if (ret) {
1178 		btrfs_put_root(free_space_root);
1179 		goto abort;
1180 	}
1181 
1182 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1183 	while (node) {
1184 		block_group = rb_entry(node, struct btrfs_block_group,
1185 				       cache_node);
1186 		ret = populate_free_space_tree(trans, block_group);
1187 		if (ret)
1188 			goto abort;
1189 		node = rb_next(node);
1190 	}
1191 
1192 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1193 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1194 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1195 	ret = btrfs_commit_transaction(trans);
1196 
1197 	/*
1198 	 * Now that we've committed the transaction any reading of our commit
1199 	 * root will be safe, so we can cache from the free space tree now.
1200 	 */
1201 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1202 	return ret;
1203 
1204 abort:
1205 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1206 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1207 	btrfs_abort_transaction(trans, ret);
1208 	btrfs_end_transaction(trans);
1209 	return ret;
1210 }
1211 
1212 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1213 				 struct btrfs_root *root)
1214 {
1215 	struct btrfs_path *path;
1216 	struct btrfs_key key;
1217 	int nr;
1218 	int ret;
1219 
1220 	path = btrfs_alloc_path();
1221 	if (!path)
1222 		return -ENOMEM;
1223 
1224 	key.objectid = 0;
1225 	key.type = 0;
1226 	key.offset = 0;
1227 
1228 	while (1) {
1229 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1230 		if (ret < 0)
1231 			goto out;
1232 
1233 		nr = btrfs_header_nritems(path->nodes[0]);
1234 		if (!nr)
1235 			break;
1236 
1237 		path->slots[0] = 0;
1238 		ret = btrfs_del_items(trans, root, path, 0, nr);
1239 		if (ret)
1240 			goto out;
1241 
1242 		btrfs_release_path(path);
1243 	}
1244 
1245 	ret = 0;
1246 out:
1247 	btrfs_free_path(path);
1248 	return ret;
1249 }
1250 
1251 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1252 {
1253 	struct btrfs_trans_handle *trans;
1254 	struct btrfs_root *tree_root = fs_info->tree_root;
1255 	struct btrfs_key key = {
1256 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1257 		.type = BTRFS_ROOT_ITEM_KEY,
1258 		.offset = 0,
1259 	};
1260 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1261 	int ret;
1262 
1263 	trans = btrfs_start_transaction(tree_root, 0);
1264 	if (IS_ERR(trans))
1265 		return PTR_ERR(trans);
1266 
1267 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1268 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1269 
1270 	ret = clear_free_space_tree(trans, free_space_root);
1271 	if (ret)
1272 		goto abort;
1273 
1274 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1275 	if (ret)
1276 		goto abort;
1277 
1278 	btrfs_global_root_delete(free_space_root);
1279 	list_del(&free_space_root->dirty_list);
1280 
1281 	btrfs_tree_lock(free_space_root->node);
1282 	btrfs_clean_tree_block(free_space_root->node);
1283 	btrfs_tree_unlock(free_space_root->node);
1284 	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1285 			      free_space_root->node, 0, 1);
1286 
1287 	btrfs_put_root(free_space_root);
1288 
1289 	return btrfs_commit_transaction(trans);
1290 
1291 abort:
1292 	btrfs_abort_transaction(trans, ret);
1293 	btrfs_end_transaction(trans);
1294 	return ret;
1295 }
1296 
1297 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1298 					struct btrfs_block_group *block_group,
1299 					struct btrfs_path *path)
1300 {
1301 	int ret;
1302 
1303 	block_group->needs_free_space = 0;
1304 
1305 	ret = add_new_free_space_info(trans, block_group, path);
1306 	if (ret)
1307 		return ret;
1308 
1309 	return __add_to_free_space_tree(trans, block_group, path,
1310 					block_group->start,
1311 					block_group->length);
1312 }
1313 
1314 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1315 			       struct btrfs_block_group *block_group)
1316 {
1317 	struct btrfs_fs_info *fs_info = trans->fs_info;
1318 	struct btrfs_path *path = NULL;
1319 	int ret = 0;
1320 
1321 	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1322 		return 0;
1323 
1324 	mutex_lock(&block_group->free_space_lock);
1325 	if (!block_group->needs_free_space)
1326 		goto out;
1327 
1328 	path = btrfs_alloc_path();
1329 	if (!path) {
1330 		ret = -ENOMEM;
1331 		goto out;
1332 	}
1333 
1334 	ret = __add_block_group_free_space(trans, block_group, path);
1335 
1336 out:
1337 	btrfs_free_path(path);
1338 	mutex_unlock(&block_group->free_space_lock);
1339 	if (ret)
1340 		btrfs_abort_transaction(trans, ret);
1341 	return ret;
1342 }
1343 
1344 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1345 				  struct btrfs_block_group *block_group)
1346 {
1347 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1348 	struct btrfs_path *path;
1349 	struct btrfs_key key, found_key;
1350 	struct extent_buffer *leaf;
1351 	u64 start, end;
1352 	int done = 0, nr;
1353 	int ret;
1354 
1355 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1356 		return 0;
1357 
1358 	if (block_group->needs_free_space) {
1359 		/* We never added this block group to the free space tree. */
1360 		return 0;
1361 	}
1362 
1363 	path = btrfs_alloc_path();
1364 	if (!path) {
1365 		ret = -ENOMEM;
1366 		goto out;
1367 	}
1368 
1369 	start = block_group->start;
1370 	end = block_group->start + block_group->length;
1371 
1372 	key.objectid = end - 1;
1373 	key.type = (u8)-1;
1374 	key.offset = (u64)-1;
1375 
1376 	while (!done) {
1377 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1378 		if (ret)
1379 			goto out;
1380 
1381 		leaf = path->nodes[0];
1382 		nr = 0;
1383 		path->slots[0]++;
1384 		while (path->slots[0] > 0) {
1385 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1386 
1387 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1388 				ASSERT(found_key.objectid == block_group->start);
1389 				ASSERT(found_key.offset == block_group->length);
1390 				done = 1;
1391 				nr++;
1392 				path->slots[0]--;
1393 				break;
1394 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1395 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1396 				ASSERT(found_key.objectid >= start);
1397 				ASSERT(found_key.objectid < end);
1398 				ASSERT(found_key.objectid + found_key.offset <= end);
1399 				nr++;
1400 				path->slots[0]--;
1401 			} else {
1402 				ASSERT(0);
1403 			}
1404 		}
1405 
1406 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1407 		if (ret)
1408 			goto out;
1409 		btrfs_release_path(path);
1410 	}
1411 
1412 	ret = 0;
1413 out:
1414 	btrfs_free_path(path);
1415 	if (ret)
1416 		btrfs_abort_transaction(trans, ret);
1417 	return ret;
1418 }
1419 
1420 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1421 				   struct btrfs_path *path,
1422 				   u32 expected_extent_count)
1423 {
1424 	struct btrfs_block_group *block_group;
1425 	struct btrfs_fs_info *fs_info;
1426 	struct btrfs_root *root;
1427 	struct btrfs_key key;
1428 	int prev_bit = 0, bit;
1429 	/* Initialize to silence GCC. */
1430 	u64 extent_start = 0;
1431 	u64 end, offset;
1432 	u64 total_found = 0;
1433 	u32 extent_count = 0;
1434 	int ret;
1435 
1436 	block_group = caching_ctl->block_group;
1437 	fs_info = block_group->fs_info;
1438 	root = btrfs_free_space_root(block_group);
1439 
1440 	end = block_group->start + block_group->length;
1441 
1442 	while (1) {
1443 		ret = btrfs_next_item(root, path);
1444 		if (ret < 0)
1445 			goto out;
1446 		if (ret)
1447 			break;
1448 
1449 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1450 
1451 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1452 			break;
1453 
1454 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1455 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1456 
1457 		offset = key.objectid;
1458 		while (offset < key.objectid + key.offset) {
1459 			bit = free_space_test_bit(block_group, path, offset);
1460 			if (prev_bit == 0 && bit == 1) {
1461 				extent_start = offset;
1462 			} else if (prev_bit == 1 && bit == 0) {
1463 				total_found += add_new_free_space(block_group,
1464 								  extent_start,
1465 								  offset);
1466 				if (total_found > CACHING_CTL_WAKE_UP) {
1467 					total_found = 0;
1468 					wake_up(&caching_ctl->wait);
1469 				}
1470 				extent_count++;
1471 			}
1472 			prev_bit = bit;
1473 			offset += fs_info->sectorsize;
1474 		}
1475 	}
1476 	if (prev_bit == 1) {
1477 		total_found += add_new_free_space(block_group, extent_start,
1478 						  end);
1479 		extent_count++;
1480 	}
1481 
1482 	if (extent_count != expected_extent_count) {
1483 		btrfs_err(fs_info,
1484 			  "incorrect extent count for %llu; counted %u, expected %u",
1485 			  block_group->start, extent_count,
1486 			  expected_extent_count);
1487 		ASSERT(0);
1488 		ret = -EIO;
1489 		goto out;
1490 	}
1491 
1492 	ret = 0;
1493 out:
1494 	return ret;
1495 }
1496 
1497 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1498 				   struct btrfs_path *path,
1499 				   u32 expected_extent_count)
1500 {
1501 	struct btrfs_block_group *block_group;
1502 	struct btrfs_fs_info *fs_info;
1503 	struct btrfs_root *root;
1504 	struct btrfs_key key;
1505 	u64 end;
1506 	u64 total_found = 0;
1507 	u32 extent_count = 0;
1508 	int ret;
1509 
1510 	block_group = caching_ctl->block_group;
1511 	fs_info = block_group->fs_info;
1512 	root = btrfs_free_space_root(block_group);
1513 
1514 	end = block_group->start + block_group->length;
1515 
1516 	while (1) {
1517 		ret = btrfs_next_item(root, path);
1518 		if (ret < 0)
1519 			goto out;
1520 		if (ret)
1521 			break;
1522 
1523 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1524 
1525 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1526 			break;
1527 
1528 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1529 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1530 
1531 		total_found += add_new_free_space(block_group, key.objectid,
1532 						  key.objectid + key.offset);
1533 		if (total_found > CACHING_CTL_WAKE_UP) {
1534 			total_found = 0;
1535 			wake_up(&caching_ctl->wait);
1536 		}
1537 		extent_count++;
1538 	}
1539 
1540 	if (extent_count != expected_extent_count) {
1541 		btrfs_err(fs_info,
1542 			  "incorrect extent count for %llu; counted %u, expected %u",
1543 			  block_group->start, extent_count,
1544 			  expected_extent_count);
1545 		ASSERT(0);
1546 		ret = -EIO;
1547 		goto out;
1548 	}
1549 
1550 	ret = 0;
1551 out:
1552 	return ret;
1553 }
1554 
1555 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1556 {
1557 	struct btrfs_block_group *block_group;
1558 	struct btrfs_free_space_info *info;
1559 	struct btrfs_path *path;
1560 	u32 extent_count, flags;
1561 	int ret;
1562 
1563 	block_group = caching_ctl->block_group;
1564 
1565 	path = btrfs_alloc_path();
1566 	if (!path)
1567 		return -ENOMEM;
1568 
1569 	/*
1570 	 * Just like caching_thread() doesn't want to deadlock on the extent
1571 	 * tree, we don't want to deadlock on the free space tree.
1572 	 */
1573 	path->skip_locking = 1;
1574 	path->search_commit_root = 1;
1575 	path->reada = READA_FORWARD;
1576 
1577 	info = search_free_space_info(NULL, block_group, path, 0);
1578 	if (IS_ERR(info)) {
1579 		ret = PTR_ERR(info);
1580 		goto out;
1581 	}
1582 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1583 	flags = btrfs_free_space_flags(path->nodes[0], info);
1584 
1585 	/*
1586 	 * We left path pointing to the free space info item, so now
1587 	 * load_free_space_foo can just iterate through the free space tree from
1588 	 * there.
1589 	 */
1590 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1591 		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1592 	else
1593 		ret = load_free_space_extents(caching_ctl, path, extent_count);
1594 
1595 out:
1596 	btrfs_free_path(path);
1597 	return ret;
1598 }
1599