xref: /openbmc/linux/fs/btrfs/free-space-tree.c (revision f0168042)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19 
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 					struct btrfs_block_group *block_group,
22 					struct btrfs_path *path);
23 
24 static struct btrfs_root *btrfs_free_space_root(
25 				struct btrfs_block_group *block_group)
26 {
27 	struct btrfs_key key = {
28 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29 		.type = BTRFS_ROOT_ITEM_KEY,
30 		.offset = 0,
31 	};
32 
33 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34 		key.offset = block_group->global_root_id;
35 	return btrfs_global_root(block_group->fs_info, &key);
36 }
37 
38 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39 {
40 	u32 bitmap_range;
41 	size_t bitmap_size;
42 	u64 num_bitmaps, total_bitmap_size;
43 
44 	if (WARN_ON(cache->length == 0))
45 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
46 			   cache->start);
47 
48 	/*
49 	 * We convert to bitmaps when the disk space required for using extents
50 	 * exceeds that required for using bitmaps.
51 	 */
52 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55 	total_bitmap_size = num_bitmaps * bitmap_size;
56 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57 					    sizeof(struct btrfs_item));
58 
59 	/*
60 	 * We allow for a small buffer between the high threshold and low
61 	 * threshold to avoid thrashing back and forth between the two formats.
62 	 */
63 	if (cache->bitmap_high_thresh > 100)
64 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65 	else
66 		cache->bitmap_low_thresh = 0;
67 }
68 
69 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70 				   struct btrfs_block_group *block_group,
71 				   struct btrfs_path *path)
72 {
73 	struct btrfs_root *root = btrfs_free_space_root(block_group);
74 	struct btrfs_free_space_info *info;
75 	struct btrfs_key key;
76 	struct extent_buffer *leaf;
77 	int ret;
78 
79 	key.objectid = block_group->start;
80 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
81 	key.offset = block_group->length;
82 
83 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84 	if (ret)
85 		goto out;
86 
87 	leaf = path->nodes[0];
88 	info = btrfs_item_ptr(leaf, path->slots[0],
89 			      struct btrfs_free_space_info);
90 	btrfs_set_free_space_extent_count(leaf, info, 0);
91 	btrfs_set_free_space_flags(leaf, info, 0);
92 	btrfs_mark_buffer_dirty(leaf);
93 
94 	ret = 0;
95 out:
96 	btrfs_release_path(path);
97 	return ret;
98 }
99 
100 EXPORT_FOR_TESTS
101 struct btrfs_free_space_info *search_free_space_info(
102 		struct btrfs_trans_handle *trans,
103 		struct btrfs_block_group *block_group,
104 		struct btrfs_path *path, int cow)
105 {
106 	struct btrfs_fs_info *fs_info = block_group->fs_info;
107 	struct btrfs_root *root = btrfs_free_space_root(block_group);
108 	struct btrfs_key key;
109 	int ret;
110 
111 	key.objectid = block_group->start;
112 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
113 	key.offset = block_group->length;
114 
115 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
116 	if (ret < 0)
117 		return ERR_PTR(ret);
118 	if (ret != 0) {
119 		btrfs_warn(fs_info, "missing free space info for %llu",
120 			   block_group->start);
121 		ASSERT(0);
122 		return ERR_PTR(-ENOENT);
123 	}
124 
125 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
126 			      struct btrfs_free_space_info);
127 }
128 
129 /*
130  * btrfs_search_slot() but we're looking for the greatest key less than the
131  * passed key.
132  */
133 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
134 				  struct btrfs_root *root,
135 				  struct btrfs_key *key, struct btrfs_path *p,
136 				  int ins_len, int cow)
137 {
138 	int ret;
139 
140 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
141 	if (ret < 0)
142 		return ret;
143 
144 	if (ret == 0) {
145 		ASSERT(0);
146 		return -EIO;
147 	}
148 
149 	if (p->slots[0] == 0) {
150 		ASSERT(0);
151 		return -EIO;
152 	}
153 	p->slots[0]--;
154 
155 	return 0;
156 }
157 
158 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
159 					 u64 size)
160 {
161 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
162 }
163 
164 static unsigned long *alloc_bitmap(u32 bitmap_size)
165 {
166 	unsigned long *ret;
167 	unsigned int nofs_flag;
168 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
169 
170 	/*
171 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
172 	 * into the filesystem as the free space bitmap can be modified in the
173 	 * critical section of a transaction commit.
174 	 *
175 	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
176 	 * know that recursion is unsafe.
177 	 */
178 	nofs_flag = memalloc_nofs_save();
179 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
180 	memalloc_nofs_restore(nofs_flag);
181 	return ret;
182 }
183 
184 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
185 {
186 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
187 	const unsigned int size = start + len;
188 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
189 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
190 
191 	while (len - bits_to_set >= 0) {
192 		*p |= mask_to_set;
193 		len -= bits_to_set;
194 		bits_to_set = BITS_PER_BYTE;
195 		mask_to_set = ~0;
196 		p++;
197 	}
198 	if (len) {
199 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
200 		*p |= mask_to_set;
201 	}
202 }
203 
204 EXPORT_FOR_TESTS
205 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
206 				  struct btrfs_block_group *block_group,
207 				  struct btrfs_path *path)
208 {
209 	struct btrfs_fs_info *fs_info = trans->fs_info;
210 	struct btrfs_root *root = btrfs_free_space_root(block_group);
211 	struct btrfs_free_space_info *info;
212 	struct btrfs_key key, found_key;
213 	struct extent_buffer *leaf;
214 	unsigned long *bitmap;
215 	char *bitmap_cursor;
216 	u64 start, end;
217 	u64 bitmap_range, i;
218 	u32 bitmap_size, flags, expected_extent_count;
219 	u32 extent_count = 0;
220 	int done = 0, nr;
221 	int ret;
222 
223 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
224 	bitmap = alloc_bitmap(bitmap_size);
225 	if (!bitmap) {
226 		ret = -ENOMEM;
227 		goto out;
228 	}
229 
230 	start = block_group->start;
231 	end = block_group->start + block_group->length;
232 
233 	key.objectid = end - 1;
234 	key.type = (u8)-1;
235 	key.offset = (u64)-1;
236 
237 	while (!done) {
238 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
239 		if (ret)
240 			goto out;
241 
242 		leaf = path->nodes[0];
243 		nr = 0;
244 		path->slots[0]++;
245 		while (path->slots[0] > 0) {
246 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
247 
248 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
249 				ASSERT(found_key.objectid == block_group->start);
250 				ASSERT(found_key.offset == block_group->length);
251 				done = 1;
252 				break;
253 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
254 				u64 first, last;
255 
256 				ASSERT(found_key.objectid >= start);
257 				ASSERT(found_key.objectid < end);
258 				ASSERT(found_key.objectid + found_key.offset <= end);
259 
260 				first = div_u64(found_key.objectid - start,
261 						fs_info->sectorsize);
262 				last = div_u64(found_key.objectid + found_key.offset - start,
263 					       fs_info->sectorsize);
264 				le_bitmap_set(bitmap, first, last - first);
265 
266 				extent_count++;
267 				nr++;
268 				path->slots[0]--;
269 			} else {
270 				ASSERT(0);
271 			}
272 		}
273 
274 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
275 		if (ret)
276 			goto out;
277 		btrfs_release_path(path);
278 	}
279 
280 	info = search_free_space_info(trans, block_group, path, 1);
281 	if (IS_ERR(info)) {
282 		ret = PTR_ERR(info);
283 		goto out;
284 	}
285 	leaf = path->nodes[0];
286 	flags = btrfs_free_space_flags(leaf, info);
287 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
288 	btrfs_set_free_space_flags(leaf, info, flags);
289 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
290 	btrfs_mark_buffer_dirty(leaf);
291 	btrfs_release_path(path);
292 
293 	if (extent_count != expected_extent_count) {
294 		btrfs_err(fs_info,
295 			  "incorrect extent count for %llu; counted %u, expected %u",
296 			  block_group->start, extent_count,
297 			  expected_extent_count);
298 		ASSERT(0);
299 		ret = -EIO;
300 		goto out;
301 	}
302 
303 	bitmap_cursor = (char *)bitmap;
304 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
305 	i = start;
306 	while (i < end) {
307 		unsigned long ptr;
308 		u64 extent_size;
309 		u32 data_size;
310 
311 		extent_size = min(end - i, bitmap_range);
312 		data_size = free_space_bitmap_size(fs_info, extent_size);
313 
314 		key.objectid = i;
315 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
316 		key.offset = extent_size;
317 
318 		ret = btrfs_insert_empty_item(trans, root, path, &key,
319 					      data_size);
320 		if (ret)
321 			goto out;
322 
323 		leaf = path->nodes[0];
324 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
325 		write_extent_buffer(leaf, bitmap_cursor, ptr,
326 				    data_size);
327 		btrfs_mark_buffer_dirty(leaf);
328 		btrfs_release_path(path);
329 
330 		i += extent_size;
331 		bitmap_cursor += data_size;
332 	}
333 
334 	ret = 0;
335 out:
336 	kvfree(bitmap);
337 	if (ret)
338 		btrfs_abort_transaction(trans, ret);
339 	return ret;
340 }
341 
342 EXPORT_FOR_TESTS
343 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
344 				  struct btrfs_block_group *block_group,
345 				  struct btrfs_path *path)
346 {
347 	struct btrfs_fs_info *fs_info = trans->fs_info;
348 	struct btrfs_root *root = btrfs_free_space_root(block_group);
349 	struct btrfs_free_space_info *info;
350 	struct btrfs_key key, found_key;
351 	struct extent_buffer *leaf;
352 	unsigned long *bitmap;
353 	u64 start, end;
354 	u32 bitmap_size, flags, expected_extent_count;
355 	unsigned long nrbits, start_bit, end_bit;
356 	u32 extent_count = 0;
357 	int done = 0, nr;
358 	int ret;
359 
360 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
361 	bitmap = alloc_bitmap(bitmap_size);
362 	if (!bitmap) {
363 		ret = -ENOMEM;
364 		goto out;
365 	}
366 
367 	start = block_group->start;
368 	end = block_group->start + block_group->length;
369 
370 	key.objectid = end - 1;
371 	key.type = (u8)-1;
372 	key.offset = (u64)-1;
373 
374 	while (!done) {
375 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
376 		if (ret)
377 			goto out;
378 
379 		leaf = path->nodes[0];
380 		nr = 0;
381 		path->slots[0]++;
382 		while (path->slots[0] > 0) {
383 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
384 
385 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
386 				ASSERT(found_key.objectid == block_group->start);
387 				ASSERT(found_key.offset == block_group->length);
388 				done = 1;
389 				break;
390 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
391 				unsigned long ptr;
392 				char *bitmap_cursor;
393 				u32 bitmap_pos, data_size;
394 
395 				ASSERT(found_key.objectid >= start);
396 				ASSERT(found_key.objectid < end);
397 				ASSERT(found_key.objectid + found_key.offset <= end);
398 
399 				bitmap_pos = div_u64(found_key.objectid - start,
400 						     fs_info->sectorsize *
401 						     BITS_PER_BYTE);
402 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
403 				data_size = free_space_bitmap_size(fs_info,
404 								found_key.offset);
405 
406 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
407 				read_extent_buffer(leaf, bitmap_cursor, ptr,
408 						   data_size);
409 
410 				nr++;
411 				path->slots[0]--;
412 			} else {
413 				ASSERT(0);
414 			}
415 		}
416 
417 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
418 		if (ret)
419 			goto out;
420 		btrfs_release_path(path);
421 	}
422 
423 	info = search_free_space_info(trans, block_group, path, 1);
424 	if (IS_ERR(info)) {
425 		ret = PTR_ERR(info);
426 		goto out;
427 	}
428 	leaf = path->nodes[0];
429 	flags = btrfs_free_space_flags(leaf, info);
430 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
431 	btrfs_set_free_space_flags(leaf, info, flags);
432 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
433 	btrfs_mark_buffer_dirty(leaf);
434 	btrfs_release_path(path);
435 
436 	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
437 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
438 
439 	while (start_bit < nrbits) {
440 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
441 		ASSERT(start_bit < end_bit);
442 
443 		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
444 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
445 		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
446 
447 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
448 		if (ret)
449 			goto out;
450 		btrfs_release_path(path);
451 
452 		extent_count++;
453 
454 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
455 	}
456 
457 	if (extent_count != expected_extent_count) {
458 		btrfs_err(fs_info,
459 			  "incorrect extent count for %llu; counted %u, expected %u",
460 			  block_group->start, extent_count,
461 			  expected_extent_count);
462 		ASSERT(0);
463 		ret = -EIO;
464 		goto out;
465 	}
466 
467 	ret = 0;
468 out:
469 	kvfree(bitmap);
470 	if (ret)
471 		btrfs_abort_transaction(trans, ret);
472 	return ret;
473 }
474 
475 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
476 					  struct btrfs_block_group *block_group,
477 					  struct btrfs_path *path,
478 					  int new_extents)
479 {
480 	struct btrfs_free_space_info *info;
481 	u32 flags;
482 	u32 extent_count;
483 	int ret = 0;
484 
485 	if (new_extents == 0)
486 		return 0;
487 
488 	info = search_free_space_info(trans, block_group, path, 1);
489 	if (IS_ERR(info)) {
490 		ret = PTR_ERR(info);
491 		goto out;
492 	}
493 	flags = btrfs_free_space_flags(path->nodes[0], info);
494 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
495 
496 	extent_count += new_extents;
497 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
498 	btrfs_mark_buffer_dirty(path->nodes[0]);
499 	btrfs_release_path(path);
500 
501 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
502 	    extent_count > block_group->bitmap_high_thresh) {
503 		ret = convert_free_space_to_bitmaps(trans, block_group, path);
504 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
505 		   extent_count < block_group->bitmap_low_thresh) {
506 		ret = convert_free_space_to_extents(trans, block_group, path);
507 	}
508 
509 out:
510 	return ret;
511 }
512 
513 EXPORT_FOR_TESTS
514 int free_space_test_bit(struct btrfs_block_group *block_group,
515 			struct btrfs_path *path, u64 offset)
516 {
517 	struct extent_buffer *leaf;
518 	struct btrfs_key key;
519 	u64 found_start, found_end;
520 	unsigned long ptr, i;
521 
522 	leaf = path->nodes[0];
523 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
524 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
525 
526 	found_start = key.objectid;
527 	found_end = key.objectid + key.offset;
528 	ASSERT(offset >= found_start && offset < found_end);
529 
530 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
531 	i = div_u64(offset - found_start,
532 		    block_group->fs_info->sectorsize);
533 	return !!extent_buffer_test_bit(leaf, ptr, i);
534 }
535 
536 static void free_space_set_bits(struct btrfs_block_group *block_group,
537 				struct btrfs_path *path, u64 *start, u64 *size,
538 				int bit)
539 {
540 	struct btrfs_fs_info *fs_info = block_group->fs_info;
541 	struct extent_buffer *leaf;
542 	struct btrfs_key key;
543 	u64 end = *start + *size;
544 	u64 found_start, found_end;
545 	unsigned long ptr, first, last;
546 
547 	leaf = path->nodes[0];
548 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
549 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
550 
551 	found_start = key.objectid;
552 	found_end = key.objectid + key.offset;
553 	ASSERT(*start >= found_start && *start < found_end);
554 	ASSERT(end > found_start);
555 
556 	if (end > found_end)
557 		end = found_end;
558 
559 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
560 	first = (*start - found_start) >> fs_info->sectorsize_bits;
561 	last = (end - found_start) >> fs_info->sectorsize_bits;
562 	if (bit)
563 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
564 	else
565 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
566 	btrfs_mark_buffer_dirty(leaf);
567 
568 	*size -= end - *start;
569 	*start = end;
570 }
571 
572 /*
573  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
574  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
575  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
576  * looking for.
577  */
578 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
579 				  struct btrfs_root *root, struct btrfs_path *p)
580 {
581 	struct btrfs_key key;
582 
583 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
584 		p->slots[0]++;
585 		return 0;
586 	}
587 
588 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
589 	btrfs_release_path(p);
590 
591 	key.objectid += key.offset;
592 	key.type = (u8)-1;
593 	key.offset = (u64)-1;
594 
595 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
596 }
597 
598 /*
599  * If remove is 1, then we are removing free space, thus clearing bits in the
600  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
601  * the bitmap.
602  */
603 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
604 				    struct btrfs_block_group *block_group,
605 				    struct btrfs_path *path,
606 				    u64 start, u64 size, int remove)
607 {
608 	struct btrfs_root *root = btrfs_free_space_root(block_group);
609 	struct btrfs_key key;
610 	u64 end = start + size;
611 	u64 cur_start, cur_size;
612 	int prev_bit, next_bit;
613 	int new_extents;
614 	int ret;
615 
616 	/*
617 	 * Read the bit for the block immediately before the extent of space if
618 	 * that block is within the block group.
619 	 */
620 	if (start > block_group->start) {
621 		u64 prev_block = start - block_group->fs_info->sectorsize;
622 
623 		key.objectid = prev_block;
624 		key.type = (u8)-1;
625 		key.offset = (u64)-1;
626 
627 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
628 		if (ret)
629 			goto out;
630 
631 		prev_bit = free_space_test_bit(block_group, path, prev_block);
632 
633 		/* The previous block may have been in the previous bitmap. */
634 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
635 		if (start >= key.objectid + key.offset) {
636 			ret = free_space_next_bitmap(trans, root, path);
637 			if (ret)
638 				goto out;
639 		}
640 	} else {
641 		key.objectid = start;
642 		key.type = (u8)-1;
643 		key.offset = (u64)-1;
644 
645 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
646 		if (ret)
647 			goto out;
648 
649 		prev_bit = -1;
650 	}
651 
652 	/*
653 	 * Iterate over all of the bitmaps overlapped by the extent of space,
654 	 * clearing/setting bits as required.
655 	 */
656 	cur_start = start;
657 	cur_size = size;
658 	while (1) {
659 		free_space_set_bits(block_group, path, &cur_start, &cur_size,
660 				    !remove);
661 		if (cur_size == 0)
662 			break;
663 		ret = free_space_next_bitmap(trans, root, path);
664 		if (ret)
665 			goto out;
666 	}
667 
668 	/*
669 	 * Read the bit for the block immediately after the extent of space if
670 	 * that block is within the block group.
671 	 */
672 	if (end < block_group->start + block_group->length) {
673 		/* The next block may be in the next bitmap. */
674 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
675 		if (end >= key.objectid + key.offset) {
676 			ret = free_space_next_bitmap(trans, root, path);
677 			if (ret)
678 				goto out;
679 		}
680 
681 		next_bit = free_space_test_bit(block_group, path, end);
682 	} else {
683 		next_bit = -1;
684 	}
685 
686 	if (remove) {
687 		new_extents = -1;
688 		if (prev_bit == 1) {
689 			/* Leftover on the left. */
690 			new_extents++;
691 		}
692 		if (next_bit == 1) {
693 			/* Leftover on the right. */
694 			new_extents++;
695 		}
696 	} else {
697 		new_extents = 1;
698 		if (prev_bit == 1) {
699 			/* Merging with neighbor on the left. */
700 			new_extents--;
701 		}
702 		if (next_bit == 1) {
703 			/* Merging with neighbor on the right. */
704 			new_extents--;
705 		}
706 	}
707 
708 	btrfs_release_path(path);
709 	ret = update_free_space_extent_count(trans, block_group, path,
710 					     new_extents);
711 
712 out:
713 	return ret;
714 }
715 
716 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
717 				    struct btrfs_block_group *block_group,
718 				    struct btrfs_path *path,
719 				    u64 start, u64 size)
720 {
721 	struct btrfs_root *root = btrfs_free_space_root(block_group);
722 	struct btrfs_key key;
723 	u64 found_start, found_end;
724 	u64 end = start + size;
725 	int new_extents = -1;
726 	int ret;
727 
728 	key.objectid = start;
729 	key.type = (u8)-1;
730 	key.offset = (u64)-1;
731 
732 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
733 	if (ret)
734 		goto out;
735 
736 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
737 
738 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
739 
740 	found_start = key.objectid;
741 	found_end = key.objectid + key.offset;
742 	ASSERT(start >= found_start && end <= found_end);
743 
744 	/*
745 	 * Okay, now that we've found the free space extent which contains the
746 	 * free space that we are removing, there are four cases:
747 	 *
748 	 * 1. We're using the whole extent: delete the key we found and
749 	 * decrement the free space extent count.
750 	 * 2. We are using part of the extent starting at the beginning: delete
751 	 * the key we found and insert a new key representing the leftover at
752 	 * the end. There is no net change in the number of extents.
753 	 * 3. We are using part of the extent ending at the end: delete the key
754 	 * we found and insert a new key representing the leftover at the
755 	 * beginning. There is no net change in the number of extents.
756 	 * 4. We are using part of the extent in the middle: delete the key we
757 	 * found and insert two new keys representing the leftovers on each
758 	 * side. Where we used to have one extent, we now have two, so increment
759 	 * the extent count. We may need to convert the block group to bitmaps
760 	 * as a result.
761 	 */
762 
763 	/* Delete the existing key (cases 1-4). */
764 	ret = btrfs_del_item(trans, root, path);
765 	if (ret)
766 		goto out;
767 
768 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
769 	if (start > found_start) {
770 		key.objectid = found_start;
771 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
772 		key.offset = start - found_start;
773 
774 		btrfs_release_path(path);
775 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
776 		if (ret)
777 			goto out;
778 		new_extents++;
779 	}
780 
781 	/* Add a key for leftovers at the end (cases 2 and 4). */
782 	if (end < found_end) {
783 		key.objectid = end;
784 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
785 		key.offset = found_end - end;
786 
787 		btrfs_release_path(path);
788 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
789 		if (ret)
790 			goto out;
791 		new_extents++;
792 	}
793 
794 	btrfs_release_path(path);
795 	ret = update_free_space_extent_count(trans, block_group, path,
796 					     new_extents);
797 
798 out:
799 	return ret;
800 }
801 
802 EXPORT_FOR_TESTS
803 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
804 				  struct btrfs_block_group *block_group,
805 				  struct btrfs_path *path, u64 start, u64 size)
806 {
807 	struct btrfs_free_space_info *info;
808 	u32 flags;
809 	int ret;
810 
811 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
812 		ret = __add_block_group_free_space(trans, block_group, path);
813 		if (ret)
814 			return ret;
815 	}
816 
817 	info = search_free_space_info(NULL, block_group, path, 0);
818 	if (IS_ERR(info))
819 		return PTR_ERR(info);
820 	flags = btrfs_free_space_flags(path->nodes[0], info);
821 	btrfs_release_path(path);
822 
823 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
824 		return modify_free_space_bitmap(trans, block_group, path,
825 						start, size, 1);
826 	} else {
827 		return remove_free_space_extent(trans, block_group, path,
828 						start, size);
829 	}
830 }
831 
832 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
833 				u64 start, u64 size)
834 {
835 	struct btrfs_block_group *block_group;
836 	struct btrfs_path *path;
837 	int ret;
838 
839 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
840 		return 0;
841 
842 	path = btrfs_alloc_path();
843 	if (!path) {
844 		ret = -ENOMEM;
845 		goto out;
846 	}
847 
848 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
849 	if (!block_group) {
850 		ASSERT(0);
851 		ret = -ENOENT;
852 		goto out;
853 	}
854 
855 	mutex_lock(&block_group->free_space_lock);
856 	ret = __remove_from_free_space_tree(trans, block_group, path, start,
857 					    size);
858 	mutex_unlock(&block_group->free_space_lock);
859 
860 	btrfs_put_block_group(block_group);
861 out:
862 	btrfs_free_path(path);
863 	if (ret)
864 		btrfs_abort_transaction(trans, ret);
865 	return ret;
866 }
867 
868 static int add_free_space_extent(struct btrfs_trans_handle *trans,
869 				 struct btrfs_block_group *block_group,
870 				 struct btrfs_path *path,
871 				 u64 start, u64 size)
872 {
873 	struct btrfs_root *root = btrfs_free_space_root(block_group);
874 	struct btrfs_key key, new_key;
875 	u64 found_start, found_end;
876 	u64 end = start + size;
877 	int new_extents = 1;
878 	int ret;
879 
880 	/*
881 	 * We are adding a new extent of free space, but we need to merge
882 	 * extents. There are four cases here:
883 	 *
884 	 * 1. The new extent does not have any immediate neighbors to merge
885 	 * with: add the new key and increment the free space extent count. We
886 	 * may need to convert the block group to bitmaps as a result.
887 	 * 2. The new extent has an immediate neighbor before it: remove the
888 	 * previous key and insert a new key combining both of them. There is no
889 	 * net change in the number of extents.
890 	 * 3. The new extent has an immediate neighbor after it: remove the next
891 	 * key and insert a new key combining both of them. There is no net
892 	 * change in the number of extents.
893 	 * 4. The new extent has immediate neighbors on both sides: remove both
894 	 * of the keys and insert a new key combining all of them. Where we used
895 	 * to have two extents, we now have one, so decrement the extent count.
896 	 */
897 
898 	new_key.objectid = start;
899 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
900 	new_key.offset = size;
901 
902 	/* Search for a neighbor on the left. */
903 	if (start == block_group->start)
904 		goto right;
905 	key.objectid = start - 1;
906 	key.type = (u8)-1;
907 	key.offset = (u64)-1;
908 
909 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
910 	if (ret)
911 		goto out;
912 
913 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
914 
915 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
916 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
917 		btrfs_release_path(path);
918 		goto right;
919 	}
920 
921 	found_start = key.objectid;
922 	found_end = key.objectid + key.offset;
923 	ASSERT(found_start >= block_group->start &&
924 	       found_end > block_group->start);
925 	ASSERT(found_start < start && found_end <= start);
926 
927 	/*
928 	 * Delete the neighbor on the left and absorb it into the new key (cases
929 	 * 2 and 4).
930 	 */
931 	if (found_end == start) {
932 		ret = btrfs_del_item(trans, root, path);
933 		if (ret)
934 			goto out;
935 		new_key.objectid = found_start;
936 		new_key.offset += key.offset;
937 		new_extents--;
938 	}
939 	btrfs_release_path(path);
940 
941 right:
942 	/* Search for a neighbor on the right. */
943 	if (end == block_group->start + block_group->length)
944 		goto insert;
945 	key.objectid = end;
946 	key.type = (u8)-1;
947 	key.offset = (u64)-1;
948 
949 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
950 	if (ret)
951 		goto out;
952 
953 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
954 
955 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
956 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
957 		btrfs_release_path(path);
958 		goto insert;
959 	}
960 
961 	found_start = key.objectid;
962 	found_end = key.objectid + key.offset;
963 	ASSERT(found_start >= block_group->start &&
964 	       found_end > block_group->start);
965 	ASSERT((found_start < start && found_end <= start) ||
966 	       (found_start >= end && found_end > end));
967 
968 	/*
969 	 * Delete the neighbor on the right and absorb it into the new key
970 	 * (cases 3 and 4).
971 	 */
972 	if (found_start == end) {
973 		ret = btrfs_del_item(trans, root, path);
974 		if (ret)
975 			goto out;
976 		new_key.offset += key.offset;
977 		new_extents--;
978 	}
979 	btrfs_release_path(path);
980 
981 insert:
982 	/* Insert the new key (cases 1-4). */
983 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
984 	if (ret)
985 		goto out;
986 
987 	btrfs_release_path(path);
988 	ret = update_free_space_extent_count(trans, block_group, path,
989 					     new_extents);
990 
991 out:
992 	return ret;
993 }
994 
995 EXPORT_FOR_TESTS
996 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
997 			     struct btrfs_block_group *block_group,
998 			     struct btrfs_path *path, u64 start, u64 size)
999 {
1000 	struct btrfs_free_space_info *info;
1001 	u32 flags;
1002 	int ret;
1003 
1004 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1005 		ret = __add_block_group_free_space(trans, block_group, path);
1006 		if (ret)
1007 			return ret;
1008 	}
1009 
1010 	info = search_free_space_info(NULL, block_group, path, 0);
1011 	if (IS_ERR(info))
1012 		return PTR_ERR(info);
1013 	flags = btrfs_free_space_flags(path->nodes[0], info);
1014 	btrfs_release_path(path);
1015 
1016 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1017 		return modify_free_space_bitmap(trans, block_group, path,
1018 						start, size, 0);
1019 	} else {
1020 		return add_free_space_extent(trans, block_group, path, start,
1021 					     size);
1022 	}
1023 }
1024 
1025 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1026 			   u64 start, u64 size)
1027 {
1028 	struct btrfs_block_group *block_group;
1029 	struct btrfs_path *path;
1030 	int ret;
1031 
1032 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1033 		return 0;
1034 
1035 	path = btrfs_alloc_path();
1036 	if (!path) {
1037 		ret = -ENOMEM;
1038 		goto out;
1039 	}
1040 
1041 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1042 	if (!block_group) {
1043 		ASSERT(0);
1044 		ret = -ENOENT;
1045 		goto out;
1046 	}
1047 
1048 	mutex_lock(&block_group->free_space_lock);
1049 	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1050 	mutex_unlock(&block_group->free_space_lock);
1051 
1052 	btrfs_put_block_group(block_group);
1053 out:
1054 	btrfs_free_path(path);
1055 	if (ret)
1056 		btrfs_abort_transaction(trans, ret);
1057 	return ret;
1058 }
1059 
1060 /*
1061  * Populate the free space tree by walking the extent tree. Operations on the
1062  * extent tree that happen as a result of writes to the free space tree will go
1063  * through the normal add/remove hooks.
1064  */
1065 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1066 				    struct btrfs_block_group *block_group)
1067 {
1068 	struct btrfs_root *extent_root;
1069 	struct btrfs_path *path, *path2;
1070 	struct btrfs_key key;
1071 	u64 start, end;
1072 	int ret;
1073 
1074 	path = btrfs_alloc_path();
1075 	if (!path)
1076 		return -ENOMEM;
1077 	path->reada = READA_FORWARD;
1078 
1079 	path2 = btrfs_alloc_path();
1080 	if (!path2) {
1081 		btrfs_free_path(path);
1082 		return -ENOMEM;
1083 	}
1084 
1085 	ret = add_new_free_space_info(trans, block_group, path2);
1086 	if (ret)
1087 		goto out;
1088 
1089 	mutex_lock(&block_group->free_space_lock);
1090 
1091 	/*
1092 	 * Iterate through all of the extent and metadata items in this block
1093 	 * group, adding the free space between them and the free space at the
1094 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1095 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1096 	 * contained in.
1097 	 */
1098 	key.objectid = block_group->start;
1099 	key.type = BTRFS_EXTENT_ITEM_KEY;
1100 	key.offset = 0;
1101 
1102 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1103 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1104 	if (ret < 0)
1105 		goto out_locked;
1106 	ASSERT(ret == 0);
1107 
1108 	start = block_group->start;
1109 	end = block_group->start + block_group->length;
1110 	while (1) {
1111 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1112 
1113 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1114 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1115 			if (key.objectid >= end)
1116 				break;
1117 
1118 			if (start < key.objectid) {
1119 				ret = __add_to_free_space_tree(trans,
1120 							       block_group,
1121 							       path2, start,
1122 							       key.objectid -
1123 							       start);
1124 				if (ret)
1125 					goto out_locked;
1126 			}
1127 			start = key.objectid;
1128 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1129 				start += trans->fs_info->nodesize;
1130 			else
1131 				start += key.offset;
1132 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1133 			if (key.objectid != block_group->start)
1134 				break;
1135 		}
1136 
1137 		ret = btrfs_next_item(extent_root, path);
1138 		if (ret < 0)
1139 			goto out_locked;
1140 		if (ret)
1141 			break;
1142 	}
1143 	if (start < end) {
1144 		ret = __add_to_free_space_tree(trans, block_group, path2,
1145 					       start, end - start);
1146 		if (ret)
1147 			goto out_locked;
1148 	}
1149 
1150 	ret = 0;
1151 out_locked:
1152 	mutex_unlock(&block_group->free_space_lock);
1153 out:
1154 	btrfs_free_path(path2);
1155 	btrfs_free_path(path);
1156 	return ret;
1157 }
1158 
1159 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1160 {
1161 	struct btrfs_trans_handle *trans;
1162 	struct btrfs_root *tree_root = fs_info->tree_root;
1163 	struct btrfs_root *free_space_root;
1164 	struct btrfs_block_group *block_group;
1165 	struct rb_node *node;
1166 	int ret;
1167 
1168 	trans = btrfs_start_transaction(tree_root, 0);
1169 	if (IS_ERR(trans))
1170 		return PTR_ERR(trans);
1171 
1172 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1173 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1174 	free_space_root = btrfs_create_tree(trans,
1175 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1176 	if (IS_ERR(free_space_root)) {
1177 		ret = PTR_ERR(free_space_root);
1178 		goto abort;
1179 	}
1180 	ret = btrfs_global_root_insert(free_space_root);
1181 	if (ret) {
1182 		btrfs_put_root(free_space_root);
1183 		goto abort;
1184 	}
1185 
1186 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1187 	while (node) {
1188 		block_group = rb_entry(node, struct btrfs_block_group,
1189 				       cache_node);
1190 		ret = populate_free_space_tree(trans, block_group);
1191 		if (ret)
1192 			goto abort;
1193 		node = rb_next(node);
1194 	}
1195 
1196 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1197 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1198 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1199 	ret = btrfs_commit_transaction(trans);
1200 
1201 	/*
1202 	 * Now that we've committed the transaction any reading of our commit
1203 	 * root will be safe, so we can cache from the free space tree now.
1204 	 */
1205 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1206 	return ret;
1207 
1208 abort:
1209 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1210 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1211 	btrfs_abort_transaction(trans, ret);
1212 	btrfs_end_transaction(trans);
1213 	return ret;
1214 }
1215 
1216 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1217 				 struct btrfs_root *root)
1218 {
1219 	struct btrfs_path *path;
1220 	struct btrfs_key key;
1221 	int nr;
1222 	int ret;
1223 
1224 	path = btrfs_alloc_path();
1225 	if (!path)
1226 		return -ENOMEM;
1227 
1228 	key.objectid = 0;
1229 	key.type = 0;
1230 	key.offset = 0;
1231 
1232 	while (1) {
1233 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1234 		if (ret < 0)
1235 			goto out;
1236 
1237 		nr = btrfs_header_nritems(path->nodes[0]);
1238 		if (!nr)
1239 			break;
1240 
1241 		path->slots[0] = 0;
1242 		ret = btrfs_del_items(trans, root, path, 0, nr);
1243 		if (ret)
1244 			goto out;
1245 
1246 		btrfs_release_path(path);
1247 	}
1248 
1249 	ret = 0;
1250 out:
1251 	btrfs_free_path(path);
1252 	return ret;
1253 }
1254 
1255 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1256 {
1257 	struct btrfs_trans_handle *trans;
1258 	struct btrfs_root *tree_root = fs_info->tree_root;
1259 	struct btrfs_key key = {
1260 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1261 		.type = BTRFS_ROOT_ITEM_KEY,
1262 		.offset = 0,
1263 	};
1264 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1265 	int ret;
1266 
1267 	trans = btrfs_start_transaction(tree_root, 0);
1268 	if (IS_ERR(trans))
1269 		return PTR_ERR(trans);
1270 
1271 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1272 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1273 
1274 	ret = clear_free_space_tree(trans, free_space_root);
1275 	if (ret)
1276 		goto abort;
1277 
1278 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1279 	if (ret)
1280 		goto abort;
1281 
1282 	btrfs_global_root_delete(free_space_root);
1283 
1284 	spin_lock(&fs_info->trans_lock);
1285 	list_del(&free_space_root->dirty_list);
1286 	spin_unlock(&fs_info->trans_lock);
1287 
1288 	btrfs_tree_lock(free_space_root->node);
1289 	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1290 	btrfs_tree_unlock(free_space_root->node);
1291 	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1292 			      free_space_root->node, 0, 1);
1293 
1294 	btrfs_put_root(free_space_root);
1295 
1296 	return btrfs_commit_transaction(trans);
1297 
1298 abort:
1299 	btrfs_abort_transaction(trans, ret);
1300 	btrfs_end_transaction(trans);
1301 	return ret;
1302 }
1303 
1304 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1305 {
1306 	struct btrfs_trans_handle *trans;
1307 	struct btrfs_key key = {
1308 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1309 		.type = BTRFS_ROOT_ITEM_KEY,
1310 		.offset = 0,
1311 	};
1312 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1313 	struct rb_node *node;
1314 	int ret;
1315 
1316 	trans = btrfs_start_transaction(free_space_root, 1);
1317 	if (IS_ERR(trans))
1318 		return PTR_ERR(trans);
1319 
1320 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1321 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1322 
1323 	ret = clear_free_space_tree(trans, free_space_root);
1324 	if (ret)
1325 		goto abort;
1326 
1327 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1328 	while (node) {
1329 		struct btrfs_block_group *block_group;
1330 
1331 		block_group = rb_entry(node, struct btrfs_block_group,
1332 				       cache_node);
1333 		ret = populate_free_space_tree(trans, block_group);
1334 		if (ret)
1335 			goto abort;
1336 		node = rb_next(node);
1337 	}
1338 
1339 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1340 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1341 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1342 
1343 	ret = btrfs_commit_transaction(trans);
1344 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1345 	return ret;
1346 abort:
1347 	btrfs_abort_transaction(trans, ret);
1348 	btrfs_end_transaction(trans);
1349 	return ret;
1350 }
1351 
1352 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1353 					struct btrfs_block_group *block_group,
1354 					struct btrfs_path *path)
1355 {
1356 	int ret;
1357 
1358 	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1359 
1360 	ret = add_new_free_space_info(trans, block_group, path);
1361 	if (ret)
1362 		return ret;
1363 
1364 	return __add_to_free_space_tree(trans, block_group, path,
1365 					block_group->start,
1366 					block_group->length);
1367 }
1368 
1369 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1370 			       struct btrfs_block_group *block_group)
1371 {
1372 	struct btrfs_fs_info *fs_info = trans->fs_info;
1373 	struct btrfs_path *path = NULL;
1374 	int ret = 0;
1375 
1376 	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1377 		return 0;
1378 
1379 	mutex_lock(&block_group->free_space_lock);
1380 	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1381 		goto out;
1382 
1383 	path = btrfs_alloc_path();
1384 	if (!path) {
1385 		ret = -ENOMEM;
1386 		goto out;
1387 	}
1388 
1389 	ret = __add_block_group_free_space(trans, block_group, path);
1390 
1391 out:
1392 	btrfs_free_path(path);
1393 	mutex_unlock(&block_group->free_space_lock);
1394 	if (ret)
1395 		btrfs_abort_transaction(trans, ret);
1396 	return ret;
1397 }
1398 
1399 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1400 				  struct btrfs_block_group *block_group)
1401 {
1402 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1403 	struct btrfs_path *path;
1404 	struct btrfs_key key, found_key;
1405 	struct extent_buffer *leaf;
1406 	u64 start, end;
1407 	int done = 0, nr;
1408 	int ret;
1409 
1410 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1411 		return 0;
1412 
1413 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1414 		/* We never added this block group to the free space tree. */
1415 		return 0;
1416 	}
1417 
1418 	path = btrfs_alloc_path();
1419 	if (!path) {
1420 		ret = -ENOMEM;
1421 		goto out;
1422 	}
1423 
1424 	start = block_group->start;
1425 	end = block_group->start + block_group->length;
1426 
1427 	key.objectid = end - 1;
1428 	key.type = (u8)-1;
1429 	key.offset = (u64)-1;
1430 
1431 	while (!done) {
1432 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1433 		if (ret)
1434 			goto out;
1435 
1436 		leaf = path->nodes[0];
1437 		nr = 0;
1438 		path->slots[0]++;
1439 		while (path->slots[0] > 0) {
1440 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1441 
1442 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1443 				ASSERT(found_key.objectid == block_group->start);
1444 				ASSERT(found_key.offset == block_group->length);
1445 				done = 1;
1446 				nr++;
1447 				path->slots[0]--;
1448 				break;
1449 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1450 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1451 				ASSERT(found_key.objectid >= start);
1452 				ASSERT(found_key.objectid < end);
1453 				ASSERT(found_key.objectid + found_key.offset <= end);
1454 				nr++;
1455 				path->slots[0]--;
1456 			} else {
1457 				ASSERT(0);
1458 			}
1459 		}
1460 
1461 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1462 		if (ret)
1463 			goto out;
1464 		btrfs_release_path(path);
1465 	}
1466 
1467 	ret = 0;
1468 out:
1469 	btrfs_free_path(path);
1470 	if (ret)
1471 		btrfs_abort_transaction(trans, ret);
1472 	return ret;
1473 }
1474 
1475 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1476 				   struct btrfs_path *path,
1477 				   u32 expected_extent_count)
1478 {
1479 	struct btrfs_block_group *block_group;
1480 	struct btrfs_fs_info *fs_info;
1481 	struct btrfs_root *root;
1482 	struct btrfs_key key;
1483 	int prev_bit = 0, bit;
1484 	/* Initialize to silence GCC. */
1485 	u64 extent_start = 0;
1486 	u64 end, offset;
1487 	u64 total_found = 0;
1488 	u32 extent_count = 0;
1489 	int ret;
1490 
1491 	block_group = caching_ctl->block_group;
1492 	fs_info = block_group->fs_info;
1493 	root = btrfs_free_space_root(block_group);
1494 
1495 	end = block_group->start + block_group->length;
1496 
1497 	while (1) {
1498 		ret = btrfs_next_item(root, path);
1499 		if (ret < 0)
1500 			goto out;
1501 		if (ret)
1502 			break;
1503 
1504 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1505 
1506 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1507 			break;
1508 
1509 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1510 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1511 
1512 		offset = key.objectid;
1513 		while (offset < key.objectid + key.offset) {
1514 			bit = free_space_test_bit(block_group, path, offset);
1515 			if (prev_bit == 0 && bit == 1) {
1516 				extent_start = offset;
1517 			} else if (prev_bit == 1 && bit == 0) {
1518 				u64 space_added;
1519 
1520 				ret = add_new_free_space(block_group, extent_start,
1521 							 offset, &space_added);
1522 				if (ret)
1523 					goto out;
1524 				total_found += space_added;
1525 				if (total_found > CACHING_CTL_WAKE_UP) {
1526 					total_found = 0;
1527 					wake_up(&caching_ctl->wait);
1528 				}
1529 				extent_count++;
1530 			}
1531 			prev_bit = bit;
1532 			offset += fs_info->sectorsize;
1533 		}
1534 	}
1535 	if (prev_bit == 1) {
1536 		ret = add_new_free_space(block_group, extent_start, end, NULL);
1537 		if (ret)
1538 			goto out;
1539 		extent_count++;
1540 	}
1541 
1542 	if (extent_count != expected_extent_count) {
1543 		btrfs_err(fs_info,
1544 			  "incorrect extent count for %llu; counted %u, expected %u",
1545 			  block_group->start, extent_count,
1546 			  expected_extent_count);
1547 		ASSERT(0);
1548 		ret = -EIO;
1549 		goto out;
1550 	}
1551 
1552 	ret = 0;
1553 out:
1554 	return ret;
1555 }
1556 
1557 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1558 				   struct btrfs_path *path,
1559 				   u32 expected_extent_count)
1560 {
1561 	struct btrfs_block_group *block_group;
1562 	struct btrfs_fs_info *fs_info;
1563 	struct btrfs_root *root;
1564 	struct btrfs_key key;
1565 	u64 end;
1566 	u64 total_found = 0;
1567 	u32 extent_count = 0;
1568 	int ret;
1569 
1570 	block_group = caching_ctl->block_group;
1571 	fs_info = block_group->fs_info;
1572 	root = btrfs_free_space_root(block_group);
1573 
1574 	end = block_group->start + block_group->length;
1575 
1576 	while (1) {
1577 		u64 space_added;
1578 
1579 		ret = btrfs_next_item(root, path);
1580 		if (ret < 0)
1581 			goto out;
1582 		if (ret)
1583 			break;
1584 
1585 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1586 
1587 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1588 			break;
1589 
1590 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1591 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1592 
1593 		ret = add_new_free_space(block_group, key.objectid,
1594 					 key.objectid + key.offset, &space_added);
1595 		if (ret)
1596 			goto out;
1597 		total_found += space_added;
1598 		if (total_found > CACHING_CTL_WAKE_UP) {
1599 			total_found = 0;
1600 			wake_up(&caching_ctl->wait);
1601 		}
1602 		extent_count++;
1603 	}
1604 
1605 	if (extent_count != expected_extent_count) {
1606 		btrfs_err(fs_info,
1607 			  "incorrect extent count for %llu; counted %u, expected %u",
1608 			  block_group->start, extent_count,
1609 			  expected_extent_count);
1610 		ASSERT(0);
1611 		ret = -EIO;
1612 		goto out;
1613 	}
1614 
1615 	ret = 0;
1616 out:
1617 	return ret;
1618 }
1619 
1620 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1621 {
1622 	struct btrfs_block_group *block_group;
1623 	struct btrfs_free_space_info *info;
1624 	struct btrfs_path *path;
1625 	u32 extent_count, flags;
1626 	int ret;
1627 
1628 	block_group = caching_ctl->block_group;
1629 
1630 	path = btrfs_alloc_path();
1631 	if (!path)
1632 		return -ENOMEM;
1633 
1634 	/*
1635 	 * Just like caching_thread() doesn't want to deadlock on the extent
1636 	 * tree, we don't want to deadlock on the free space tree.
1637 	 */
1638 	path->skip_locking = 1;
1639 	path->search_commit_root = 1;
1640 	path->reada = READA_FORWARD;
1641 
1642 	info = search_free_space_info(NULL, block_group, path, 0);
1643 	if (IS_ERR(info)) {
1644 		ret = PTR_ERR(info);
1645 		goto out;
1646 	}
1647 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1648 	flags = btrfs_free_space_flags(path->nodes[0], info);
1649 
1650 	/*
1651 	 * We left path pointing to the free space info item, so now
1652 	 * load_free_space_foo can just iterate through the free space tree from
1653 	 * there.
1654 	 */
1655 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1656 		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1657 	else
1658 		ret = load_free_space_extents(caching_ctl, path, extent_count);
1659 
1660 out:
1661 	btrfs_free_path(path);
1662 	return ret;
1663 }
1664