xref: /openbmc/linux/fs/btrfs/extent-tree.c (revision 9f5fae2fe6dc35b46bf56183f11398451851cb3f)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8 #include "transaction.h"
9 
10 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
11 			    *orig_root, u64 num_blocks, u64 search_start, u64
12 			    search_end, struct btrfs_key *ins);
13 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
14 				 btrfs_root *extent_root);
15 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
16 		       *extent_root);
17 
18 /*
19  * pending extents are blocks that we're trying to allocate in the extent
20  * map while trying to grow the map because of other allocations.  To avoid
21  * recursing, they are tagged in the radix tree and cleaned up after
22  * other allocations are done.  The pending tag is also used in the same
23  * manner for deletes.
24  */
25 #define CTREE_EXTENT_PENDING_DEL 0
26 
27 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
28 			 *root, u64 blocknr)
29 {
30 	struct btrfs_path path;
31 	int ret;
32 	struct btrfs_key key;
33 	struct btrfs_leaf *l;
34 	struct btrfs_extent_item *item;
35 	struct btrfs_key ins;
36 	u32 refs;
37 
38 	find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
39 			 &ins);
40 	btrfs_init_path(&path);
41 	key.objectid = blocknr;
42 	key.flags = 0;
43 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
44 	key.offset = 1;
45 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
46 				0, 1);
47 	if (ret != 0)
48 		BUG();
49 	BUG_ON(ret != 0);
50 	l = &path.nodes[0]->leaf;
51 	item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
52 	refs = btrfs_extent_refs(item);
53 	btrfs_set_extent_refs(item, refs + 1);
54 
55 	BUG_ON(list_empty(&path.nodes[0]->dirty));
56 	btrfs_release_path(root->fs_info->extent_root, &path);
57 	finish_current_insert(trans, root->fs_info->extent_root);
58 	run_pending(trans, root->fs_info->extent_root);
59 	return 0;
60 }
61 
62 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
63 			    *root, u64 blocknr, u32 *refs)
64 {
65 	struct btrfs_path path;
66 	int ret;
67 	struct btrfs_key key;
68 	struct btrfs_leaf *l;
69 	struct btrfs_extent_item *item;
70 	btrfs_init_path(&path);
71 	key.objectid = blocknr;
72 	key.offset = 1;
73 	key.flags = 0;
74 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
75 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path,
76 				0, 0);
77 	if (ret != 0)
78 		BUG();
79 	l = &path.nodes[0]->leaf;
80 	item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
81 	*refs = btrfs_extent_refs(item);
82 	btrfs_release_path(root->fs_info->extent_root, &path);
83 	return 0;
84 }
85 
86 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
87 		  struct btrfs_buffer *buf)
88 {
89 	u64 blocknr;
90 	int i;
91 
92 	if (!root->ref_cows)
93 		return 0;
94 	if (btrfs_is_leaf(&buf->node))
95 		return 0;
96 
97 	for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
98 		blocknr = btrfs_node_blockptr(&buf->node, i);
99 		inc_block_ref(trans, root, blocknr);
100 	}
101 	return 0;
102 }
103 
104 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
105 			       btrfs_root *root)
106 {
107 	unsigned long gang[8];
108 	u64 first = 0;
109 	int ret;
110 	int i;
111 
112 	while(1) {
113 		ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix,
114 					     (void **)gang, 0,
115 					     ARRAY_SIZE(gang));
116 		if (!ret)
117 			break;
118 		if (!first)
119 			first = gang[0];
120 		for (i = 0; i < ret; i++) {
121 			radix_tree_delete(&root->fs_info->pinned_radix,
122 					  gang[i]);
123 		}
124 	}
125 	root->fs_info->last_insert.objectid = first;
126 	root->fs_info->last_insert.offset = 0;
127 	return 0;
128 }
129 
130 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
131 				 btrfs_root *extent_root)
132 {
133 	struct btrfs_key ins;
134 	struct btrfs_extent_item extent_item;
135 	int i;
136 	int ret;
137 
138 	btrfs_set_extent_refs(&extent_item, 1);
139 	btrfs_set_extent_owner(&extent_item,
140 		btrfs_header_parentid(&extent_root->node->node.header));
141 	ins.offset = 1;
142 	ins.flags = 0;
143 	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
144 
145 	for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) {
146 		ins.objectid = extent_root->fs_info->current_insert.objectid +
147 				i;
148 		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
149 					sizeof(extent_item));
150 		BUG_ON(ret);
151 	}
152 	extent_root->fs_info->current_insert.offset = 0;
153 	return 0;
154 }
155 
156 /*
157  * remove an extent from the root, returns 0 on success
158  */
159 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
160 			 *root, u64 blocknr, u64 num_blocks, int pin)
161 {
162 	struct btrfs_path path;
163 	struct btrfs_key key;
164 	struct btrfs_root *extent_root = root->fs_info->extent_root;
165 	int ret;
166 	struct btrfs_extent_item *ei;
167 	struct btrfs_key ins;
168 	u32 refs;
169 
170 	BUG_ON(pin && num_blocks != 1);
171 	key.objectid = blocknr;
172 	key.flags = 0;
173 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
174 	key.offset = num_blocks;
175 
176 	find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
177 	btrfs_init_path(&path);
178 	ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
179 	if (ret) {
180 		printf("failed to find %Lu\n", key.objectid);
181 		btrfs_print_tree(extent_root, extent_root->node);
182 		printf("failed to find %Lu\n", key.objectid);
183 		BUG();
184 	}
185 	ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
186 			    struct btrfs_extent_item);
187 	BUG_ON(ei->refs == 0);
188 	refs = btrfs_extent_refs(ei) - 1;
189 	btrfs_set_extent_refs(ei, refs);
190 	if (refs == 0) {
191 		if (pin) {
192 			int err;
193 			radix_tree_preload(GFP_KERNEL);
194 			err = radix_tree_insert(
195 					&extent_root->fs_info->pinned_radix,
196 					blocknr, (void *)blocknr);
197 			BUG_ON(err);
198 			radix_tree_preload_end();
199 		}
200 		ret = btrfs_del_item(trans, extent_root, &path);
201 		if (!pin && extent_root->fs_info->last_insert.objectid >
202 		    blocknr)
203 			extent_root->fs_info->last_insert.objectid = blocknr;
204 		if (ret)
205 			BUG();
206 	}
207 	btrfs_release_path(extent_root, &path);
208 	finish_current_insert(trans, extent_root);
209 	return ret;
210 }
211 
212 /*
213  * find all the blocks marked as pending in the radix tree and remove
214  * them from the extent map
215  */
216 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
217 			       btrfs_root *extent_root)
218 {
219 	int ret;
220 	struct btrfs_buffer *gang[4];
221 	int i;
222 
223 	while(1) {
224 		ret = radix_tree_gang_lookup_tag(
225 					&extent_root->fs_info->cache_radix,
226 					(void **)gang, 0,
227 					ARRAY_SIZE(gang),
228 					CTREE_EXTENT_PENDING_DEL);
229 		if (!ret)
230 			break;
231 		for (i = 0; i < ret; i++) {
232 			ret = __free_extent(trans, extent_root,
233 					    gang[i]->blocknr, 1, 1);
234 			radix_tree_tag_clear(&extent_root->fs_info->cache_radix,
235 					     gang[i]->blocknr,
236 					     CTREE_EXTENT_PENDING_DEL);
237 			btrfs_block_release(extent_root, gang[i]);
238 		}
239 	}
240 	return 0;
241 }
242 
243 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
244 		       *extent_root)
245 {
246 	while(radix_tree_tagged(&extent_root->fs_info->cache_radix,
247 				CTREE_EXTENT_PENDING_DEL))
248 		del_pending_extents(trans, extent_root);
249 	return 0;
250 }
251 
252 
253 /*
254  * remove an extent from the root, returns 0 on success
255  */
256 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
257 		      *root, u64 blocknr, u64 num_blocks, int pin)
258 {
259 	struct btrfs_root *extent_root = root->fs_info->extent_root;
260 	struct btrfs_buffer *t;
261 	int pending_ret;
262 	int ret;
263 
264 	if (root == extent_root) {
265 		t = find_tree_block(root, blocknr);
266 		radix_tree_tag_set(&root->fs_info->cache_radix, blocknr,
267 				   CTREE_EXTENT_PENDING_DEL);
268 		return 0;
269 	}
270 	ret = __free_extent(trans, root, blocknr, num_blocks, pin);
271 	pending_ret = run_pending(trans, root->fs_info->extent_root);
272 	return ret ? ret : pending_ret;
273 }
274 
275 /*
276  * walks the btree of allocated extents and find a hole of a given size.
277  * The key ins is changed to record the hole:
278  * ins->objectid == block start
279  * ins->flags = BTRFS_EXTENT_ITEM_KEY
280  * ins->offset == number of blocks
281  * Any available blocks before search_start are skipped.
282  */
283 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
284 			    *orig_root, u64 num_blocks, u64 search_start, u64
285 			    search_end, struct btrfs_key *ins)
286 {
287 	struct btrfs_path path;
288 	struct btrfs_key key;
289 	int ret;
290 	u64 hole_size = 0;
291 	int slot = 0;
292 	u64 last_block;
293 	u64 test_block;
294 	int start_found;
295 	struct btrfs_leaf *l;
296 	struct btrfs_root * root = orig_root->fs_info->extent_root;
297 	int total_needed = num_blocks;
298 
299 	total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
300 	if (root->fs_info->last_insert.objectid > search_start)
301 		search_start = root->fs_info->last_insert.objectid;
302 
303 	ins->flags = 0;
304 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
305 
306 check_failed:
307 	btrfs_init_path(&path);
308 	ins->objectid = search_start;
309 	ins->offset = 0;
310 	start_found = 0;
311 	ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
312 	if (ret < 0)
313 		goto error;
314 
315 	if (path.slots[0] > 0)
316 		path.slots[0]--;
317 
318 	while (1) {
319 		l = &path.nodes[0]->leaf;
320 		slot = path.slots[0];
321 		if (slot >= btrfs_header_nritems(&l->header)) {
322 			ret = btrfs_next_leaf(root, &path);
323 			if (ret == 0)
324 				continue;
325 			if (ret < 0)
326 				goto error;
327 			if (!start_found) {
328 				ins->objectid = search_start;
329 				ins->offset = (u64)-1;
330 				start_found = 1;
331 				goto check_pending;
332 			}
333 			ins->objectid = last_block > search_start ?
334 					last_block : search_start;
335 			ins->offset = (u64)-1;
336 			goto check_pending;
337 		}
338 		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
339 		if (key.objectid >= search_start) {
340 			if (start_found) {
341 				if (last_block < search_start)
342 					last_block = search_start;
343 				hole_size = key.objectid - last_block;
344 				if (hole_size > total_needed) {
345 					ins->objectid = last_block;
346 					ins->offset = hole_size;
347 					goto check_pending;
348 				}
349 			}
350 		}
351 		start_found = 1;
352 		last_block = key.objectid + key.offset;
353 		path.slots[0]++;
354 	}
355 	// FIXME -ENOSPC
356 check_pending:
357 	/* we have to make sure we didn't find an extent that has already
358 	 * been allocated by the map tree or the original allocation
359 	 */
360 	btrfs_release_path(root, &path);
361 	BUG_ON(ins->objectid < search_start);
362 	for (test_block = ins->objectid;
363 	     test_block < ins->objectid + total_needed; test_block++) {
364 		if (radix_tree_lookup(&root->fs_info->pinned_radix,
365 				      test_block)) {
366 			search_start = test_block + 1;
367 			goto check_failed;
368 		}
369 	}
370 	BUG_ON(root->fs_info->current_insert.offset);
371 	root->fs_info->current_insert.offset = total_needed - num_blocks;
372 	root->fs_info->current_insert.objectid = ins->objectid + num_blocks;
373 	root->fs_info->current_insert.flags = 0;
374 	root->fs_info->last_insert.objectid = ins->objectid;
375 	ins->offset = num_blocks;
376 	return 0;
377 error:
378 	btrfs_release_path(root, &path);
379 	return ret;
380 }
381 
382 /*
383  * finds a free extent and does all the dirty work required for allocation
384  * returns the key for the extent through ins, and a tree buffer for
385  * the first block of the extent through buf.
386  *
387  * returns 0 if everything worked, non-zero otherwise.
388  */
389 static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
390 			*root, u64 num_blocks, u64 search_start, u64
391 			search_end, u64 owner, struct btrfs_key *ins)
392 {
393 	int ret;
394 	int pending_ret;
395 	struct btrfs_root *extent_root = root->fs_info->extent_root;
396 	struct btrfs_extent_item extent_item;
397 
398 	btrfs_set_extent_refs(&extent_item, 1);
399 	btrfs_set_extent_owner(&extent_item, owner);
400 
401 	if (root == extent_root) {
402 		BUG_ON(extent_root->fs_info->current_insert.offset == 0);
403 		BUG_ON(num_blocks != 1);
404 		BUG_ON(extent_root->fs_info->current_insert.flags ==
405 		       extent_root->fs_info->current_insert.offset);
406 		ins->offset = 1;
407 		ins->objectid = extent_root->fs_info->current_insert.objectid +
408 				extent_root->fs_info->current_insert.flags++;
409 		return 0;
410 	}
411 	ret = find_free_extent(trans, root, num_blocks, search_start,
412 			       search_end, ins);
413 	if (ret)
414 		return ret;
415 
416 	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
417 				sizeof(extent_item));
418 
419 	finish_current_insert(trans, extent_root);
420 	pending_ret = run_pending(trans, extent_root);
421 	if (ret)
422 		return ret;
423 	if (pending_ret)
424 		return pending_ret;
425 	return 0;
426 }
427 
428 /*
429  * helper function to allocate a block for a given tree
430  * returns the tree buffer or NULL.
431  */
432 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
433 					    struct btrfs_root *root)
434 {
435 	struct btrfs_key ins;
436 	int ret;
437 	struct btrfs_buffer *buf;
438 
439 	ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1,
440 			   btrfs_header_parentid(&root->node->node.header),
441 			   &ins);
442 	if (ret) {
443 		BUG();
444 		return NULL;
445 	}
446 	buf = find_tree_block(root, ins.objectid);
447 	dirty_tree_block(trans, root, buf);
448 	return buf;
449 }
450 
451 /*
452  * helper function for drop_snapshot, this walks down the tree dropping ref
453  * counts as it goes.
454  */
455 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
456 			  *root, struct btrfs_path *path, int *level)
457 {
458 	struct btrfs_buffer *next;
459 	struct btrfs_buffer *cur;
460 	u64 blocknr;
461 	int ret;
462 	u32 refs;
463 
464 	ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr,
465 			       &refs);
466 	BUG_ON(ret);
467 	if (refs > 1)
468 		goto out;
469 	/*
470 	 * walk down to the last node level and free all the leaves
471 	 */
472 	while(*level > 0) {
473 		cur = path->nodes[*level];
474 		if (path->slots[*level] >=
475 		    btrfs_header_nritems(&cur->node.header))
476 			break;
477 		blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
478 		ret = lookup_block_ref(trans, root, blocknr, &refs);
479 		if (refs != 1 || *level == 1) {
480 			path->slots[*level]++;
481 			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
482 			BUG_ON(ret);
483 			continue;
484 		}
485 		BUG_ON(ret);
486 		next = read_tree_block(root, blocknr);
487 		if (path->nodes[*level-1])
488 			btrfs_block_release(root, path->nodes[*level-1]);
489 		path->nodes[*level-1] = next;
490 		*level = btrfs_header_level(&next->node.header);
491 		path->slots[*level] = 0;
492 	}
493 out:
494 	ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1,
495 				1);
496 	btrfs_block_release(root, path->nodes[*level]);
497 	path->nodes[*level] = NULL;
498 	*level += 1;
499 	BUG_ON(ret);
500 	return 0;
501 }
502 
503 /*
504  * helper for dropping snapshots.  This walks back up the tree in the path
505  * to find the first node higher up where we haven't yet gone through
506  * all the slots
507  */
508 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
509 			*root, struct btrfs_path *path, int *level)
510 {
511 	int i;
512 	int slot;
513 	int ret;
514 	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
515 		slot = path->slots[i];
516 		if (slot <
517 		    btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
518 			path->slots[i]++;
519 			*level = i;
520 			return 0;
521 		} else {
522 			ret = btrfs_free_extent(trans, root,
523 						path->nodes[*level]->blocknr,
524 						1, 1);
525 			btrfs_block_release(root, path->nodes[*level]);
526 			path->nodes[*level] = NULL;
527 			*level = i + 1;
528 			BUG_ON(ret);
529 		}
530 	}
531 	return 1;
532 }
533 
534 /*
535  * drop the reference count on the tree rooted at 'snap'.  This traverses
536  * the tree freeing any blocks that have a ref count of zero after being
537  * decremented.
538  */
539 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
540 			*root, struct btrfs_buffer *snap)
541 {
542 	int ret = 0;
543 	int wret;
544 	int level;
545 	struct btrfs_path path;
546 	int i;
547 	int orig_level;
548 
549 	btrfs_init_path(&path);
550 
551 	level = btrfs_header_level(&snap->node.header);
552 	orig_level = level;
553 	path.nodes[level] = snap;
554 	path.slots[level] = 0;
555 	while(1) {
556 		wret = walk_down_tree(trans, root, &path, &level);
557 		if (wret > 0)
558 			break;
559 		if (wret < 0)
560 			ret = wret;
561 
562 		wret = walk_up_tree(trans, root, &path, &level);
563 		if (wret > 0)
564 			break;
565 		if (wret < 0)
566 			ret = wret;
567 	}
568 	for (i = 0; i <= orig_level; i++) {
569 		if (path.nodes[i]) {
570 			btrfs_block_release(root, path.nodes[i]);
571 		}
572 	}
573 	return ret;
574 }
575