xref: /openbmc/linux/fs/btrfs/extent-tree.c (revision e089f05c18ab36ed5fa7e2319052e03ab800d518)
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8 #include "transaction.h"
9 
10 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
11 			    *orig_root, u64 num_blocks, u64 search_start, u64
12 			    search_end, struct btrfs_key *ins);
13 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
14 				 btrfs_root *extent_root);
15 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
16 		       *extent_root);
17 
18 /*
19  * pending extents are blocks that we're trying to allocate in the extent
20  * map while trying to grow the map because of other allocations.  To avoid
21  * recursing, they are tagged in the radix tree and cleaned up after
22  * other allocations are done.  The pending tag is also used in the same
23  * manner for deletes.
24  */
25 #define CTREE_EXTENT_PENDING_DEL 0
26 
27 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
28 			 *root, u64 blocknr)
29 {
30 	struct btrfs_path path;
31 	int ret;
32 	struct btrfs_key key;
33 	struct btrfs_leaf *l;
34 	struct btrfs_extent_item *item;
35 	struct btrfs_key ins;
36 	u32 refs;
37 
38 	find_free_extent(trans, root->extent_root, 0, 0, (u64)-1, &ins);
39 	btrfs_init_path(&path);
40 	key.objectid = blocknr;
41 	key.flags = 0;
42 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
43 	key.offset = 1;
44 	ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 1);
45 	if (ret != 0)
46 		BUG();
47 	BUG_ON(ret != 0);
48 	l = &path.nodes[0]->leaf;
49 	item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
50 	refs = btrfs_extent_refs(item);
51 	btrfs_set_extent_refs(item, refs + 1);
52 
53 	BUG_ON(list_empty(&path.nodes[0]->dirty));
54 	btrfs_release_path(root->extent_root, &path);
55 	finish_current_insert(trans, root->extent_root);
56 	run_pending(trans, root->extent_root);
57 	return 0;
58 }
59 
60 static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root
61 			    *root, u64 blocknr, u32 *refs)
62 {
63 	struct btrfs_path path;
64 	int ret;
65 	struct btrfs_key key;
66 	struct btrfs_leaf *l;
67 	struct btrfs_extent_item *item;
68 	btrfs_init_path(&path);
69 	key.objectid = blocknr;
70 	key.offset = 1;
71 	key.flags = 0;
72 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
73 	ret = btrfs_search_slot(trans, root->extent_root, &key, &path, 0, 0);
74 	if (ret != 0)
75 		BUG();
76 	l = &path.nodes[0]->leaf;
77 	item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item);
78 	*refs = btrfs_extent_refs(item);
79 	btrfs_release_path(root->extent_root, &path);
80 	return 0;
81 }
82 
83 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
84 		  struct btrfs_buffer *buf)
85 {
86 	u64 blocknr;
87 	int i;
88 
89 	if (!root->ref_cows)
90 		return 0;
91 	if (btrfs_is_leaf(&buf->node))
92 		return 0;
93 
94 	for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) {
95 		blocknr = btrfs_node_blockptr(&buf->node, i);
96 		inc_block_ref(trans, root, blocknr);
97 	}
98 	return 0;
99 }
100 
101 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
102 			       btrfs_root *root)
103 {
104 	unsigned long gang[8];
105 	u64 first = 0;
106 	int ret;
107 	int i;
108 
109 	while(1) {
110 		ret = radix_tree_gang_lookup(&root->pinned_radix,
111 						 (void **)gang, 0,
112 						 ARRAY_SIZE(gang));
113 		if (!ret)
114 			break;
115 		if (!first)
116 			first = gang[0];
117 		for (i = 0; i < ret; i++) {
118 			radix_tree_delete(&root->pinned_radix, gang[i]);
119 		}
120 	}
121 	root->last_insert.objectid = first;
122 	root->last_insert.offset = 0;
123 	return 0;
124 }
125 
126 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
127 				 btrfs_root *extent_root)
128 {
129 	struct btrfs_key ins;
130 	struct btrfs_extent_item extent_item;
131 	int i;
132 	int ret;
133 
134 	btrfs_set_extent_refs(&extent_item, 1);
135 	btrfs_set_extent_owner(&extent_item,
136 		btrfs_header_parentid(&extent_root->node->node.header));
137 	ins.offset = 1;
138 	ins.flags = 0;
139 	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
140 
141 	for (i = 0; i < extent_root->current_insert.flags; i++) {
142 		ins.objectid = extent_root->current_insert.objectid + i;
143 		ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
144 					sizeof(extent_item));
145 		BUG_ON(ret);
146 	}
147 	extent_root->current_insert.offset = 0;
148 	return 0;
149 }
150 
151 /*
152  * remove an extent from the root, returns 0 on success
153  */
154 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
155 			 *root, u64 blocknr, u64 num_blocks, int pin)
156 {
157 	struct btrfs_path path;
158 	struct btrfs_key key;
159 	struct btrfs_root *extent_root = root->extent_root;
160 	int ret;
161 	struct btrfs_extent_item *ei;
162 	struct btrfs_key ins;
163 	u32 refs;
164 
165 	BUG_ON(pin && num_blocks != 1);
166 	key.objectid = blocknr;
167 	key.flags = 0;
168 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
169 	key.offset = num_blocks;
170 
171 	find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
172 	btrfs_init_path(&path);
173 	ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1);
174 	if (ret) {
175 		printf("failed to find %Lu\n", key.objectid);
176 		btrfs_print_tree(extent_root, extent_root->node);
177 		printf("failed to find %Lu\n", key.objectid);
178 		BUG();
179 	}
180 	ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0],
181 			    struct btrfs_extent_item);
182 	BUG_ON(ei->refs == 0);
183 	refs = btrfs_extent_refs(ei) - 1;
184 	btrfs_set_extent_refs(ei, refs);
185 	if (refs == 0) {
186 		if (pin) {
187 			int err;
188 			radix_tree_preload(GFP_KERNEL);
189 			err = radix_tree_insert(&extent_root->pinned_radix,
190 					  blocknr, (void *)blocknr);
191 			BUG_ON(err);
192 			radix_tree_preload_end();
193 		}
194 		ret = btrfs_del_item(trans, extent_root, &path);
195 		if (!pin && extent_root->last_insert.objectid > blocknr)
196 			extent_root->last_insert.objectid = blocknr;
197 		if (ret)
198 			BUG();
199 	}
200 	btrfs_release_path(extent_root, &path);
201 	finish_current_insert(trans, extent_root);
202 	return ret;
203 }
204 
205 /*
206  * find all the blocks marked as pending in the radix tree and remove
207  * them from the extent map
208  */
209 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
210 			       btrfs_root *extent_root)
211 {
212 	int ret;
213 	struct btrfs_buffer *gang[4];
214 	int i;
215 
216 	while(1) {
217 		ret = radix_tree_gang_lookup_tag(&extent_root->cache_radix,
218 						 (void **)gang, 0,
219 						 ARRAY_SIZE(gang),
220 						 CTREE_EXTENT_PENDING_DEL);
221 		if (!ret)
222 			break;
223 		for (i = 0; i < ret; i++) {
224 			ret = __free_extent(trans, extent_root,
225 					    gang[i]->blocknr, 1, 1);
226 			radix_tree_tag_clear(&extent_root->cache_radix,
227 						gang[i]->blocknr,
228 						CTREE_EXTENT_PENDING_DEL);
229 			btrfs_block_release(extent_root, gang[i]);
230 		}
231 	}
232 	return 0;
233 }
234 
235 static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root
236 		       *extent_root)
237 {
238 	while(radix_tree_tagged(&extent_root->cache_radix,
239 			        CTREE_EXTENT_PENDING_DEL))
240 		del_pending_extents(trans, extent_root);
241 	return 0;
242 }
243 
244 
245 /*
246  * remove an extent from the root, returns 0 on success
247  */
248 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
249 		      *root, u64 blocknr, u64 num_blocks, int pin)
250 {
251 	struct btrfs_root *extent_root = root->extent_root;
252 	struct btrfs_buffer *t;
253 	int pending_ret;
254 	int ret;
255 
256 	if (root == extent_root) {
257 		t = find_tree_block(root, blocknr);
258 		radix_tree_tag_set(&root->cache_radix, blocknr,
259 				   CTREE_EXTENT_PENDING_DEL);
260 		return 0;
261 	}
262 	ret = __free_extent(trans, root, blocknr, num_blocks, pin);
263 	pending_ret = run_pending(trans, root->extent_root);
264 	return ret ? ret : pending_ret;
265 }
266 
267 /*
268  * walks the btree of allocated extents and find a hole of a given size.
269  * The key ins is changed to record the hole:
270  * ins->objectid == block start
271  * ins->flags = BTRFS_EXTENT_ITEM_KEY
272  * ins->offset == number of blocks
273  * Any available blocks before search_start are skipped.
274  */
275 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
276 			    *orig_root, u64 num_blocks, u64 search_start, u64
277 			    search_end, struct btrfs_key *ins)
278 {
279 	struct btrfs_path path;
280 	struct btrfs_key key;
281 	int ret;
282 	u64 hole_size = 0;
283 	int slot = 0;
284 	u64 last_block;
285 	u64 test_block;
286 	int start_found;
287 	struct btrfs_leaf *l;
288 	struct btrfs_root * root = orig_root->extent_root;
289 	int total_needed = num_blocks;
290 
291 	total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3;
292 	if (root->last_insert.objectid > search_start)
293 		search_start = root->last_insert.objectid;
294 
295 	ins->flags = 0;
296 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
297 
298 check_failed:
299 	btrfs_init_path(&path);
300 	ins->objectid = search_start;
301 	ins->offset = 0;
302 	start_found = 0;
303 	ret = btrfs_search_slot(trans, root, ins, &path, 0, 0);
304 	if (ret < 0)
305 		goto error;
306 
307 	if (path.slots[0] > 0)
308 		path.slots[0]--;
309 
310 	while (1) {
311 		l = &path.nodes[0]->leaf;
312 		slot = path.slots[0];
313 		if (slot >= btrfs_header_nritems(&l->header)) {
314 			ret = btrfs_next_leaf(root, &path);
315 			if (ret == 0)
316 				continue;
317 			if (ret < 0)
318 				goto error;
319 			if (!start_found) {
320 				ins->objectid = search_start;
321 				ins->offset = (u64)-1;
322 				start_found = 1;
323 				goto check_pending;
324 			}
325 			ins->objectid = last_block > search_start ?
326 					last_block : search_start;
327 			ins->offset = (u64)-1;
328 			goto check_pending;
329 		}
330 		btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
331 		if (key.objectid >= search_start) {
332 			if (start_found) {
333 				if (last_block < search_start)
334 					last_block = search_start;
335 				hole_size = key.objectid - last_block;
336 				if (hole_size > total_needed) {
337 					ins->objectid = last_block;
338 					ins->offset = hole_size;
339 					goto check_pending;
340 				}
341 			}
342 		}
343 		start_found = 1;
344 		last_block = key.objectid + key.offset;
345 		path.slots[0]++;
346 	}
347 	// FIXME -ENOSPC
348 check_pending:
349 	/* we have to make sure we didn't find an extent that has already
350 	 * been allocated by the map tree or the original allocation
351 	 */
352 	btrfs_release_path(root, &path);
353 	BUG_ON(ins->objectid < search_start);
354 	for (test_block = ins->objectid;
355 	     test_block < ins->objectid + total_needed; test_block++) {
356 		if (radix_tree_lookup(&root->pinned_radix, test_block)) {
357 			search_start = test_block + 1;
358 			goto check_failed;
359 		}
360 	}
361 	BUG_ON(root->current_insert.offset);
362 	root->current_insert.offset = total_needed - num_blocks;
363 	root->current_insert.objectid = ins->objectid + num_blocks;
364 	root->current_insert.flags = 0;
365 	root->last_insert.objectid = ins->objectid;
366 	ins->offset = num_blocks;
367 	return 0;
368 error:
369 	btrfs_release_path(root, &path);
370 	return ret;
371 }
372 
373 /*
374  * finds a free extent and does all the dirty work required for allocation
375  * returns the key for the extent through ins, and a tree buffer for
376  * the first block of the extent through buf.
377  *
378  * returns 0 if everything worked, non-zero otherwise.
379  */
380 static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root
381 			*root, u64 num_blocks, u64 search_start, u64
382 			search_end, u64 owner, struct btrfs_key *ins)
383 {
384 	int ret;
385 	int pending_ret;
386 	struct btrfs_root *extent_root = root->extent_root;
387 	struct btrfs_extent_item extent_item;
388 
389 	btrfs_set_extent_refs(&extent_item, 1);
390 	btrfs_set_extent_owner(&extent_item, owner);
391 
392 	if (root == extent_root) {
393 		BUG_ON(extent_root->current_insert.offset == 0);
394 		BUG_ON(num_blocks != 1);
395 		BUG_ON(extent_root->current_insert.flags ==
396 		       extent_root->current_insert.offset);
397 		ins->offset = 1;
398 		ins->objectid = extent_root->current_insert.objectid +
399 				extent_root->current_insert.flags++;
400 		return 0;
401 	}
402 	ret = find_free_extent(trans, root, num_blocks, search_start,
403 			       search_end, ins);
404 	if (ret)
405 		return ret;
406 
407 	ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
408 				sizeof(extent_item));
409 
410 	finish_current_insert(trans, extent_root);
411 	pending_ret = run_pending(trans, extent_root);
412 	if (ret)
413 		return ret;
414 	if (pending_ret)
415 		return pending_ret;
416 	return 0;
417 }
418 
419 /*
420  * helper function to allocate a block for a given tree
421  * returns the tree buffer or NULL.
422  */
423 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
424 					    struct btrfs_root *root)
425 {
426 	struct btrfs_key ins;
427 	int ret;
428 	struct btrfs_buffer *buf;
429 
430 	ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1,
431 			   btrfs_header_parentid(&root->node->node.header),
432 			   &ins);
433 	if (ret) {
434 		BUG();
435 		return NULL;
436 	}
437 	buf = find_tree_block(root, ins.objectid);
438 	dirty_tree_block(trans, root, buf);
439 	return buf;
440 }
441 
442 /*
443  * helper function for drop_snapshot, this walks down the tree dropping ref
444  * counts as it goes.
445  */
446 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
447 			  *root, struct btrfs_path *path, int *level)
448 {
449 	struct btrfs_buffer *next;
450 	struct btrfs_buffer *cur;
451 	u64 blocknr;
452 	int ret;
453 	u32 refs;
454 
455 	ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr,
456 			       &refs);
457 	BUG_ON(ret);
458 	if (refs > 1)
459 		goto out;
460 	/*
461 	 * walk down to the last node level and free all the leaves
462 	 */
463 	while(*level > 0) {
464 		cur = path->nodes[*level];
465 		if (path->slots[*level] >=
466 		    btrfs_header_nritems(&cur->node.header))
467 			break;
468 		blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]);
469 		ret = lookup_block_ref(trans, root, blocknr, &refs);
470 		if (refs != 1 || *level == 1) {
471 			path->slots[*level]++;
472 			ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
473 			BUG_ON(ret);
474 			continue;
475 		}
476 		BUG_ON(ret);
477 		next = read_tree_block(root, blocknr);
478 		if (path->nodes[*level-1])
479 			btrfs_block_release(root, path->nodes[*level-1]);
480 		path->nodes[*level-1] = next;
481 		*level = btrfs_header_level(&next->node.header);
482 		path->slots[*level] = 0;
483 	}
484 out:
485 	ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1,
486 				1);
487 	btrfs_block_release(root, path->nodes[*level]);
488 	path->nodes[*level] = NULL;
489 	*level += 1;
490 	BUG_ON(ret);
491 	return 0;
492 }
493 
494 /*
495  * helper for dropping snapshots.  This walks back up the tree in the path
496  * to find the first node higher up where we haven't yet gone through
497  * all the slots
498  */
499 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
500 			*root, struct btrfs_path *path, int *level)
501 {
502 	int i;
503 	int slot;
504 	int ret;
505 	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
506 		slot = path->slots[i];
507 		if (slot <
508 		    btrfs_header_nritems(&path->nodes[i]->node.header)- 1) {
509 			path->slots[i]++;
510 			*level = i;
511 			return 0;
512 		} else {
513 			ret = btrfs_free_extent(trans, root,
514 						path->nodes[*level]->blocknr,
515 						1, 1);
516 			btrfs_block_release(root, path->nodes[*level]);
517 			path->nodes[*level] = NULL;
518 			*level = i + 1;
519 			BUG_ON(ret);
520 		}
521 	}
522 	return 1;
523 }
524 
525 /*
526  * drop the reference count on the tree rooted at 'snap'.  This traverses
527  * the tree freeing any blocks that have a ref count of zero after being
528  * decremented.
529  */
530 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
531 			*root, struct btrfs_buffer *snap)
532 {
533 	int ret = 0;
534 	int wret;
535 	int level;
536 	struct btrfs_path path;
537 	int i;
538 	int orig_level;
539 
540 	btrfs_init_path(&path);
541 
542 	level = btrfs_header_level(&snap->node.header);
543 	orig_level = level;
544 	path.nodes[level] = snap;
545 	path.slots[level] = 0;
546 	while(1) {
547 		wret = walk_down_tree(trans, root, &path, &level);
548 		if (wret > 0)
549 			break;
550 		if (wret < 0)
551 			ret = wret;
552 
553 		wret = walk_up_tree(trans, root, &path, &level);
554 		if (wret > 0)
555 			break;
556 		if (wret < 0)
557 			ret = wret;
558 	}
559 	for (i = 0; i <= orig_level; i++) {
560 		if (path.nodes[i]) {
561 			btrfs_block_release(root, path.nodes[i]);
562 		}
563 	}
564 	return ret;
565 }
566