xref: /openbmc/linux/fs/btrfs/ctree.c (revision 74123bd7)
1be0e5c09SChris Mason #include <stdio.h>
2be0e5c09SChris Mason #include <stdlib.h>
3be0e5c09SChris Mason #include "kerncompat.h"
4eb60ceacSChris Mason #include "radix-tree.h"
5eb60ceacSChris Mason #include "ctree.h"
6eb60ceacSChris Mason #include "disk-io.h"
7be0e5c09SChris Mason 
8be0e5c09SChris Mason static inline void init_path(struct ctree_path *p)
9be0e5c09SChris Mason {
10be0e5c09SChris Mason 	memset(p, 0, sizeof(*p));
11be0e5c09SChris Mason }
12be0e5c09SChris Mason 
13eb60ceacSChris Mason static void release_path(struct ctree_root *root, struct ctree_path *p)
14eb60ceacSChris Mason {
15eb60ceacSChris Mason 	int i;
16eb60ceacSChris Mason 	for (i = 0; i < MAX_LEVEL; i++) {
17eb60ceacSChris Mason 		if (!p->nodes[i])
18eb60ceacSChris Mason 			break;
19eb60ceacSChris Mason 		tree_block_release(root, p->nodes[i]);
20eb60ceacSChris Mason 	}
21eb60ceacSChris Mason }
22eb60ceacSChris Mason 
2374123bd7SChris Mason /*
2474123bd7SChris Mason  * The leaf data grows from end-to-front in the node.
2574123bd7SChris Mason  * this returns the address of the start of the last item,
2674123bd7SChris Mason  * which is the stop of the leaf data stack
2774123bd7SChris Mason  */
28be0e5c09SChris Mason static inline unsigned int leaf_data_end(struct leaf *leaf)
29be0e5c09SChris Mason {
30be0e5c09SChris Mason 	unsigned int nr = leaf->header.nritems;
31be0e5c09SChris Mason 	if (nr == 0)
32be0e5c09SChris Mason 		return ARRAY_SIZE(leaf->data);
33be0e5c09SChris Mason 	return leaf->items[nr-1].offset;
34be0e5c09SChris Mason }
35be0e5c09SChris Mason 
3674123bd7SChris Mason /*
3774123bd7SChris Mason  * The space between the end of the leaf items and
3874123bd7SChris Mason  * the start of the leaf data.  IOW, how much room
3974123bd7SChris Mason  * the leaf has left for both items and data
4074123bd7SChris Mason  */
41be0e5c09SChris Mason static inline int leaf_free_space(struct leaf *leaf)
42be0e5c09SChris Mason {
43be0e5c09SChris Mason 	int data_end = leaf_data_end(leaf);
44be0e5c09SChris Mason 	int nritems = leaf->header.nritems;
45be0e5c09SChris Mason 	char *items_end = (char *)(leaf->items + nritems + 1);
46be0e5c09SChris Mason 	return (char *)(leaf->data + data_end) - (char *)items_end;
47be0e5c09SChris Mason }
48be0e5c09SChris Mason 
4974123bd7SChris Mason /*
5074123bd7SChris Mason  * compare two keys in a memcmp fashion
5174123bd7SChris Mason  */
52be0e5c09SChris Mason int comp_keys(struct key *k1, struct key *k2)
53be0e5c09SChris Mason {
54be0e5c09SChris Mason 	if (k1->objectid > k2->objectid)
55be0e5c09SChris Mason 		return 1;
56be0e5c09SChris Mason 	if (k1->objectid < k2->objectid)
57be0e5c09SChris Mason 		return -1;
58be0e5c09SChris Mason 	if (k1->flags > k2->flags)
59be0e5c09SChris Mason 		return 1;
60be0e5c09SChris Mason 	if (k1->flags < k2->flags)
61be0e5c09SChris Mason 		return -1;
62be0e5c09SChris Mason 	if (k1->offset > k2->offset)
63be0e5c09SChris Mason 		return 1;
64be0e5c09SChris Mason 	if (k1->offset < k2->offset)
65be0e5c09SChris Mason 		return -1;
66be0e5c09SChris Mason 	return 0;
67be0e5c09SChris Mason }
6874123bd7SChris Mason 
6974123bd7SChris Mason /*
7074123bd7SChris Mason  * search for key in the array p.  items p are item_size apart
7174123bd7SChris Mason  * and there are 'max' items in p
7274123bd7SChris Mason  * the slot in the array is returned via slot, and it points to
7374123bd7SChris Mason  * the place where you would insert key if it is not found in
7474123bd7SChris Mason  * the array.
7574123bd7SChris Mason  *
7674123bd7SChris Mason  * slot may point to max if the key is bigger than all of the keys
7774123bd7SChris Mason  */
78be0e5c09SChris Mason int generic_bin_search(char *p, int item_size, struct key *key,
79be0e5c09SChris Mason 		       int max, int *slot)
80be0e5c09SChris Mason {
81be0e5c09SChris Mason 	int low = 0;
82be0e5c09SChris Mason 	int high = max;
83be0e5c09SChris Mason 	int mid;
84be0e5c09SChris Mason 	int ret;
85be0e5c09SChris Mason 	struct key *tmp;
86be0e5c09SChris Mason 
87be0e5c09SChris Mason 	while(low < high) {
88be0e5c09SChris Mason 		mid = (low + high) / 2;
89be0e5c09SChris Mason 		tmp = (struct key *)(p + mid * item_size);
90be0e5c09SChris Mason 		ret = comp_keys(tmp, key);
91be0e5c09SChris Mason 
92be0e5c09SChris Mason 		if (ret < 0)
93be0e5c09SChris Mason 			low = mid + 1;
94be0e5c09SChris Mason 		else if (ret > 0)
95be0e5c09SChris Mason 			high = mid;
96be0e5c09SChris Mason 		else {
97be0e5c09SChris Mason 			*slot = mid;
98be0e5c09SChris Mason 			return 0;
99be0e5c09SChris Mason 		}
100be0e5c09SChris Mason 	}
101be0e5c09SChris Mason 	*slot = low;
102be0e5c09SChris Mason 	return 1;
103be0e5c09SChris Mason }
104be0e5c09SChris Mason 
105be0e5c09SChris Mason int bin_search(struct node *c, struct key *key, int *slot)
106be0e5c09SChris Mason {
107be0e5c09SChris Mason 	if (is_leaf(c->header.flags)) {
108be0e5c09SChris Mason 		struct leaf *l = (struct leaf *)c;
109be0e5c09SChris Mason 		return generic_bin_search((void *)l->items, sizeof(struct item),
110be0e5c09SChris Mason 					  key, c->header.nritems, slot);
111be0e5c09SChris Mason 	} else {
112be0e5c09SChris Mason 		return generic_bin_search((void *)c->keys, sizeof(struct key),
113be0e5c09SChris Mason 					  key, c->header.nritems, slot);
114be0e5c09SChris Mason 	}
115be0e5c09SChris Mason 	return -1;
116be0e5c09SChris Mason }
117be0e5c09SChris Mason 
11874123bd7SChris Mason /*
11974123bd7SChris Mason  * look for key in the tree.  path is filled in with nodes along the way
12074123bd7SChris Mason  * if key is found, we return zero and you can find the item in the leaf
12174123bd7SChris Mason  * level of the path (level 0)
12274123bd7SChris Mason  *
12374123bd7SChris Mason  * If the key isn't found, the path points to the slot where it should
12474123bd7SChris Mason  * be inserted.
12574123bd7SChris Mason  */
126be0e5c09SChris Mason int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
127be0e5c09SChris Mason {
128eb60ceacSChris Mason 	struct tree_buffer *b = root->node;
129eb60ceacSChris Mason 	struct node *c;
130eb60ceacSChris Mason 
131be0e5c09SChris Mason 	int slot;
132be0e5c09SChris Mason 	int ret;
133be0e5c09SChris Mason 	int level;
134eb60ceacSChris Mason 	b->count++;
135eb60ceacSChris Mason 	while (b) {
136eb60ceacSChris Mason 		c = &b->node;
137be0e5c09SChris Mason 		level = node_level(c->header.flags);
138eb60ceacSChris Mason 		p->nodes[level] = b;
139be0e5c09SChris Mason 		ret = bin_search(c, key, &slot);
140be0e5c09SChris Mason 		if (!is_leaf(c->header.flags)) {
141be0e5c09SChris Mason 			if (ret && slot > 0)
142be0e5c09SChris Mason 				slot -= 1;
143be0e5c09SChris Mason 			p->slots[level] = slot;
144eb60ceacSChris Mason 			b = read_tree_block(root, c->blockptrs[slot]);
145be0e5c09SChris Mason 			continue;
146be0e5c09SChris Mason 		} else {
147be0e5c09SChris Mason 			p->slots[level] = slot;
148be0e5c09SChris Mason 			return ret;
149be0e5c09SChris Mason 		}
150be0e5c09SChris Mason 	}
151be0e5c09SChris Mason 	return -1;
152be0e5c09SChris Mason }
153be0e5c09SChris Mason 
15474123bd7SChris Mason /*
15574123bd7SChris Mason  * adjust the pointers going up the tree, starting at level
15674123bd7SChris Mason  * making sure the right key of each node is points to 'key'.
15774123bd7SChris Mason  * This is used after shifting pointers to the left, so it stops
15874123bd7SChris Mason  * fixing up pointers when a given leaf/node is not in slot 0 of the
15974123bd7SChris Mason  * higher levels
16074123bd7SChris Mason  */
161eb60ceacSChris Mason static void fixup_low_keys(struct ctree_root *root,
162eb60ceacSChris Mason 			   struct ctree_path *path, struct key *key,
163be0e5c09SChris Mason 			   int level)
164be0e5c09SChris Mason {
165be0e5c09SChris Mason 	int i;
166be0e5c09SChris Mason 	for (i = level; i < MAX_LEVEL; i++) {
167eb60ceacSChris Mason 		struct node *t;
168be0e5c09SChris Mason 		int tslot = path->slots[i];
169eb60ceacSChris Mason 		if (!path->nodes[i])
170be0e5c09SChris Mason 			break;
171eb60ceacSChris Mason 		t = &path->nodes[i]->node;
172be0e5c09SChris Mason 		memcpy(t->keys + tslot, key, sizeof(*key));
173eb60ceacSChris Mason 		write_tree_block(root, path->nodes[i]);
174be0e5c09SChris Mason 		if (tslot != 0)
175be0e5c09SChris Mason 			break;
176be0e5c09SChris Mason 	}
177be0e5c09SChris Mason }
178be0e5c09SChris Mason 
17974123bd7SChris Mason /*
18074123bd7SChris Mason  * try to push data from one node into the next node left in the
18174123bd7SChris Mason  * tree.  The src node is found at specified level in the path.
18274123bd7SChris Mason  * If some bytes were pushed, return 0, otherwise return 1.
18374123bd7SChris Mason  *
18474123bd7SChris Mason  * Lower nodes/leaves in the path are not touched, higher nodes may
18574123bd7SChris Mason  * be modified to reflect the push.
18674123bd7SChris Mason  *
18774123bd7SChris Mason  * The path is altered to reflect the push.
18874123bd7SChris Mason  */
189be0e5c09SChris Mason int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
190be0e5c09SChris Mason {
191be0e5c09SChris Mason 	int slot;
192be0e5c09SChris Mason 	struct node *left;
193be0e5c09SChris Mason 	struct node *right;
194be0e5c09SChris Mason 	int push_items = 0;
195be0e5c09SChris Mason 	int left_nritems;
196be0e5c09SChris Mason 	int right_nritems;
197eb60ceacSChris Mason 	struct tree_buffer *t;
198eb60ceacSChris Mason 	struct tree_buffer *right_buf;
199be0e5c09SChris Mason 
200be0e5c09SChris Mason 	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
201be0e5c09SChris Mason 		return 1;
202be0e5c09SChris Mason 	slot = path->slots[level + 1];
203be0e5c09SChris Mason 	if (slot == 0)
204be0e5c09SChris Mason 		return 1;
205be0e5c09SChris Mason 
206eb60ceacSChris Mason 	t = read_tree_block(root,
207eb60ceacSChris Mason 		            path->nodes[level + 1]->node.blockptrs[slot - 1]);
208eb60ceacSChris Mason 	left = &t->node;
209eb60ceacSChris Mason 	right_buf = path->nodes[level];
210eb60ceacSChris Mason 	right = &right_buf->node;
211be0e5c09SChris Mason 	left_nritems = left->header.nritems;
212be0e5c09SChris Mason 	right_nritems = right->header.nritems;
213be0e5c09SChris Mason 	push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
214eb60ceacSChris Mason 	if (push_items <= 0) {
215eb60ceacSChris Mason 		tree_block_release(root, t);
216be0e5c09SChris Mason 		return 1;
217eb60ceacSChris Mason 	}
218be0e5c09SChris Mason 
219be0e5c09SChris Mason 	if (right_nritems < push_items)
220be0e5c09SChris Mason 		push_items = right_nritems;
221be0e5c09SChris Mason 	memcpy(left->keys + left_nritems, right->keys,
222be0e5c09SChris Mason 		push_items * sizeof(struct key));
223be0e5c09SChris Mason 	memcpy(left->blockptrs + left_nritems, right->blockptrs,
224be0e5c09SChris Mason 		push_items * sizeof(u64));
225be0e5c09SChris Mason 	memmove(right->keys, right->keys + push_items,
226be0e5c09SChris Mason 		(right_nritems - push_items) * sizeof(struct key));
227be0e5c09SChris Mason 	memmove(right->blockptrs, right->blockptrs + push_items,
228be0e5c09SChris Mason 		(right_nritems - push_items) * sizeof(u64));
229be0e5c09SChris Mason 	right->header.nritems -= push_items;
230be0e5c09SChris Mason 	left->header.nritems += push_items;
231be0e5c09SChris Mason 
232be0e5c09SChris Mason 	/* adjust the pointers going up the tree */
233eb60ceacSChris Mason 	fixup_low_keys(root, path, right->keys, level + 1);
234eb60ceacSChris Mason 
235eb60ceacSChris Mason 	write_tree_block(root, t);
236eb60ceacSChris Mason 	write_tree_block(root, right_buf);
237be0e5c09SChris Mason 
238be0e5c09SChris Mason 	/* then fixup the leaf pointer in the path */
239be0e5c09SChris Mason 	if (path->slots[level] < push_items) {
240be0e5c09SChris Mason 		path->slots[level] += left_nritems;
241eb60ceacSChris Mason 		tree_block_release(root, path->nodes[level]);
242eb60ceacSChris Mason 		path->nodes[level] = t;
243be0e5c09SChris Mason 		path->slots[level + 1] -= 1;
244be0e5c09SChris Mason 	} else {
245be0e5c09SChris Mason 		path->slots[level] -= push_items;
246eb60ceacSChris Mason 		tree_block_release(root, t);
247be0e5c09SChris Mason 	}
248be0e5c09SChris Mason 	return 0;
249be0e5c09SChris Mason }
250be0e5c09SChris Mason 
25174123bd7SChris Mason /*
25274123bd7SChris Mason  * try to push data from one node into the next node right in the
25374123bd7SChris Mason  * tree.  The src node is found at specified level in the path.
25474123bd7SChris Mason  * If some bytes were pushed, return 0, otherwise return 1.
25574123bd7SChris Mason  *
25674123bd7SChris Mason  * Lower nodes/leaves in the path are not touched, higher nodes may
25774123bd7SChris Mason  * be modified to reflect the push.
25874123bd7SChris Mason  *
25974123bd7SChris Mason  * The path is altered to reflect the push.
26074123bd7SChris Mason  */
261be0e5c09SChris Mason int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
262be0e5c09SChris Mason {
263be0e5c09SChris Mason 	int slot;
264eb60ceacSChris Mason 	struct tree_buffer *t;
265eb60ceacSChris Mason 	struct tree_buffer *src_buffer;
266be0e5c09SChris Mason 	struct node *dst;
267be0e5c09SChris Mason 	struct node *src;
268be0e5c09SChris Mason 	int push_items = 0;
269be0e5c09SChris Mason 	int dst_nritems;
270be0e5c09SChris Mason 	int src_nritems;
271be0e5c09SChris Mason 
27274123bd7SChris Mason 	/* can't push from the root */
273be0e5c09SChris Mason 	if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
274be0e5c09SChris Mason 		return 1;
27574123bd7SChris Mason 
27674123bd7SChris Mason 	/* only try to push inside the node higher up */
277be0e5c09SChris Mason 	slot = path->slots[level + 1];
278be0e5c09SChris Mason 	if (slot == NODEPTRS_PER_BLOCK - 1)
279be0e5c09SChris Mason 		return 1;
280be0e5c09SChris Mason 
281eb60ceacSChris Mason 	if (slot >= path->nodes[level + 1]->node.header.nritems -1)
282be0e5c09SChris Mason 		return 1;
283be0e5c09SChris Mason 
284eb60ceacSChris Mason 	t = read_tree_block(root,
285eb60ceacSChris Mason 			    path->nodes[level + 1]->node.blockptrs[slot + 1]);
286eb60ceacSChris Mason 	dst = &t->node;
287eb60ceacSChris Mason 	src_buffer = path->nodes[level];
288eb60ceacSChris Mason 	src = &src_buffer->node;
289be0e5c09SChris Mason 	dst_nritems = dst->header.nritems;
290be0e5c09SChris Mason 	src_nritems = src->header.nritems;
291be0e5c09SChris Mason 	push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
292eb60ceacSChris Mason 	if (push_items <= 0) {
293eb60ceacSChris Mason 		tree_block_release(root, t);
294be0e5c09SChris Mason 		return 1;
295eb60ceacSChris Mason 	}
296be0e5c09SChris Mason 
297be0e5c09SChris Mason 	if (src_nritems < push_items)
298be0e5c09SChris Mason 		push_items = src_nritems;
299be0e5c09SChris Mason 	memmove(dst->keys + push_items, dst->keys,
300be0e5c09SChris Mason 		dst_nritems * sizeof(struct key));
301be0e5c09SChris Mason 	memcpy(dst->keys, src->keys + src_nritems - push_items,
302be0e5c09SChris Mason 		push_items * sizeof(struct key));
303be0e5c09SChris Mason 
304be0e5c09SChris Mason 	memmove(dst->blockptrs + push_items, dst->blockptrs,
305be0e5c09SChris Mason 		dst_nritems * sizeof(u64));
306be0e5c09SChris Mason 	memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
307be0e5c09SChris Mason 		push_items * sizeof(u64));
308be0e5c09SChris Mason 
309be0e5c09SChris Mason 	src->header.nritems -= push_items;
310be0e5c09SChris Mason 	dst->header.nritems += push_items;
311be0e5c09SChris Mason 
312be0e5c09SChris Mason 	/* adjust the pointers going up the tree */
313eb60ceacSChris Mason 	memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
314be0e5c09SChris Mason 		dst->keys, sizeof(struct key));
315eb60ceacSChris Mason 
316eb60ceacSChris Mason 	write_tree_block(root, path->nodes[level + 1]);
317eb60ceacSChris Mason 	write_tree_block(root, t);
318eb60ceacSChris Mason 	write_tree_block(root, src_buffer);
319eb60ceacSChris Mason 
32074123bd7SChris Mason 	/* then fixup the pointers in the path */
321be0e5c09SChris Mason 	if (path->slots[level] >= src->header.nritems) {
322be0e5c09SChris Mason 		path->slots[level] -= src->header.nritems;
323eb60ceacSChris Mason 		tree_block_release(root, path->nodes[level]);
324eb60ceacSChris Mason 		path->nodes[level] = t;
325be0e5c09SChris Mason 		path->slots[level + 1] += 1;
326eb60ceacSChris Mason 	} else {
327eb60ceacSChris Mason 		tree_block_release(root, t);
328be0e5c09SChris Mason 	}
329be0e5c09SChris Mason 	return 0;
330be0e5c09SChris Mason }
331be0e5c09SChris Mason 
33274123bd7SChris Mason /*
33374123bd7SChris Mason  * worker function to insert a single pointer in a node.
33474123bd7SChris Mason  * the node should have enough room for the pointer already
33574123bd7SChris Mason  * slot and level indicate where you want the key to go, and
33674123bd7SChris Mason  * blocknr is the block the key points to.
33774123bd7SChris Mason  */
33874123bd7SChris Mason int __insert_ptr(struct ctree_root *root,
33974123bd7SChris Mason 		struct ctree_path *path, struct key *key,
34074123bd7SChris Mason 		u64 blocknr, int slot, int level)
34174123bd7SChris Mason {
34274123bd7SChris Mason 	struct node *c;
34374123bd7SChris Mason 	struct node *lower;
34474123bd7SChris Mason 	struct key *lower_key;
34574123bd7SChris Mason 	int nritems;
34674123bd7SChris Mason 	/* need a new root */
34774123bd7SChris Mason 	if (!path->nodes[level]) {
34874123bd7SChris Mason 		struct tree_buffer *t;
34974123bd7SChris Mason 		t = alloc_free_block(root);
35074123bd7SChris Mason 		c = &t->node;
35174123bd7SChris Mason 		memset(c, 0, sizeof(c));
35274123bd7SChris Mason 		c->header.nritems = 2;
35374123bd7SChris Mason 		c->header.flags = node_level(level);
35474123bd7SChris Mason 		c->header.blocknr = t->blocknr;
35574123bd7SChris Mason 		lower = &path->nodes[level-1]->node;
35674123bd7SChris Mason 		if (is_leaf(lower->header.flags))
35774123bd7SChris Mason 			lower_key = &((struct leaf *)lower)->items[0].key;
35874123bd7SChris Mason 		else
35974123bd7SChris Mason 			lower_key = lower->keys;
36074123bd7SChris Mason 		memcpy(c->keys, lower_key, sizeof(struct key));
36174123bd7SChris Mason 		memcpy(c->keys + 1, key, sizeof(struct key));
36274123bd7SChris Mason 		c->blockptrs[0] = path->nodes[level-1]->blocknr;
36374123bd7SChris Mason 		c->blockptrs[1] = blocknr;
36474123bd7SChris Mason 		/* the path has an extra ref to root->node */
36574123bd7SChris Mason 		tree_block_release(root, root->node);
36674123bd7SChris Mason 		root->node = t;
36774123bd7SChris Mason 		t->count++;
36874123bd7SChris Mason 		write_tree_block(root, t);
36974123bd7SChris Mason 		path->nodes[level] = t;
37074123bd7SChris Mason 		path->slots[level] = 0;
37174123bd7SChris Mason 		if (c->keys[1].objectid == 0)
37274123bd7SChris Mason 			BUG();
37374123bd7SChris Mason 		return 0;
37474123bd7SChris Mason 	}
37574123bd7SChris Mason 	lower = &path->nodes[level]->node;
37674123bd7SChris Mason 	nritems = lower->header.nritems;
37774123bd7SChris Mason 	if (slot > nritems)
37874123bd7SChris Mason 		BUG();
37974123bd7SChris Mason 	if (nritems == NODEPTRS_PER_BLOCK)
38074123bd7SChris Mason 		BUG();
38174123bd7SChris Mason 	if (slot != nritems) {
38274123bd7SChris Mason 		memmove(lower->keys + slot + 1, lower->keys + slot,
38374123bd7SChris Mason 			(nritems - slot) * sizeof(struct key));
38474123bd7SChris Mason 		memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
38574123bd7SChris Mason 			(nritems - slot) * sizeof(u64));
38674123bd7SChris Mason 	}
38774123bd7SChris Mason 	memcpy(lower->keys + slot, key, sizeof(struct key));
38874123bd7SChris Mason 	lower->blockptrs[slot] = blocknr;
38974123bd7SChris Mason 	lower->header.nritems++;
39074123bd7SChris Mason 	if (lower->keys[1].objectid == 0)
39174123bd7SChris Mason 			BUG();
39274123bd7SChris Mason 	write_tree_block(root, path->nodes[level]);
39374123bd7SChris Mason 	return 0;
39474123bd7SChris Mason }
39574123bd7SChris Mason 
39674123bd7SChris Mason 
39774123bd7SChris Mason /*
39874123bd7SChris Mason  * insert a key,blocknr pair into the tree at a given level
39974123bd7SChris Mason  * If the node at that level in the path doesn't have room,
40074123bd7SChris Mason  * it is split or shifted as appropriate.
40174123bd7SChris Mason  */
402be0e5c09SChris Mason int insert_ptr(struct ctree_root *root,
403be0e5c09SChris Mason 		struct ctree_path *path, struct key *key,
404be0e5c09SChris Mason 		u64 blocknr, int level)
405be0e5c09SChris Mason {
406eb60ceacSChris Mason 	struct tree_buffer *t = path->nodes[level];
407eb60ceacSChris Mason 	struct node *c = &path->nodes[level]->node;
408be0e5c09SChris Mason 	struct node *b;
409eb60ceacSChris Mason 	struct tree_buffer *b_buffer;
410eb60ceacSChris Mason 	struct tree_buffer *bal[MAX_LEVEL];
411be0e5c09SChris Mason 	int bal_level = level;
412be0e5c09SChris Mason 	int mid;
413be0e5c09SChris Mason 	int bal_start = -1;
414be0e5c09SChris Mason 
41574123bd7SChris Mason 	/*
41674123bd7SChris Mason 	 * check to see if we need to make room in the node for this
41774123bd7SChris Mason 	 * pointer.  If we do, keep walking the tree, making sure there
41874123bd7SChris Mason 	 * is enough room in each level for the required insertions.
41974123bd7SChris Mason 	 *
42074123bd7SChris Mason 	 * The bal array is filled in with any nodes to be inserted
42174123bd7SChris Mason 	 * due to splitting.  Once we've done all the splitting required
42274123bd7SChris Mason 	 * do the inserts based on the data in the bal array.
42374123bd7SChris Mason 	 */
424be0e5c09SChris Mason 	memset(bal, 0, ARRAY_SIZE(bal));
425eb60ceacSChris Mason 	while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
426eb60ceacSChris Mason 		c = &t->node;
427be0e5c09SChris Mason 		if (push_node_left(root, path,
428be0e5c09SChris Mason 		   node_level(c->header.flags)) == 0)
429be0e5c09SChris Mason 			break;
430be0e5c09SChris Mason 		if (push_node_right(root, path,
431be0e5c09SChris Mason 		   node_level(c->header.flags)) == 0)
432be0e5c09SChris Mason 			break;
433be0e5c09SChris Mason 		bal_start = bal_level;
434be0e5c09SChris Mason 		if (bal_level == MAX_LEVEL - 1)
435be0e5c09SChris Mason 			BUG();
436eb60ceacSChris Mason 		b_buffer = alloc_free_block(root);
437eb60ceacSChris Mason 		b = &b_buffer->node;
438be0e5c09SChris Mason 		b->header.flags = c->header.flags;
439eb60ceacSChris Mason 		b->header.blocknr = b_buffer->blocknr;
440be0e5c09SChris Mason 		mid = (c->header.nritems + 1) / 2;
441be0e5c09SChris Mason 		memcpy(b->keys, c->keys + mid,
442be0e5c09SChris Mason 			(c->header.nritems - mid) * sizeof(struct key));
443be0e5c09SChris Mason 		memcpy(b->blockptrs, c->blockptrs + mid,
444be0e5c09SChris Mason 			(c->header.nritems - mid) * sizeof(u64));
445be0e5c09SChris Mason 		b->header.nritems = c->header.nritems - mid;
446be0e5c09SChris Mason 		c->header.nritems = mid;
447eb60ceacSChris Mason 
448eb60ceacSChris Mason 		write_tree_block(root, t);
449eb60ceacSChris Mason 		write_tree_block(root, b_buffer);
450eb60ceacSChris Mason 
451eb60ceacSChris Mason 		bal[bal_level] = b_buffer;
452be0e5c09SChris Mason 		if (bal_level == MAX_LEVEL - 1)
453be0e5c09SChris Mason 			break;
454be0e5c09SChris Mason 		bal_level += 1;
455eb60ceacSChris Mason 		t = path->nodes[bal_level];
456be0e5c09SChris Mason 	}
45774123bd7SChris Mason 	/*
45874123bd7SChris Mason 	 * bal_start tells us the first level in the tree that needed to
45974123bd7SChris Mason 	 * be split.  Go through the bal array inserting the new nodes
46074123bd7SChris Mason 	 * as needed.  The path is fixed as we go.
46174123bd7SChris Mason 	 */
462be0e5c09SChris Mason 	while(bal_start > 0) {
463eb60ceacSChris Mason 		b_buffer = bal[bal_start];
464eb60ceacSChris Mason 		c = &path->nodes[bal_start]->node;
465eb60ceacSChris Mason 		__insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
466be0e5c09SChris Mason 				path->slots[bal_start + 1] + 1, bal_start + 1);
467be0e5c09SChris Mason 		if (path->slots[bal_start] >= c->header.nritems) {
468be0e5c09SChris Mason 			path->slots[bal_start] -= c->header.nritems;
469eb60ceacSChris Mason 			tree_block_release(root, path->nodes[bal_start]);
470eb60ceacSChris Mason 			path->nodes[bal_start] = b_buffer;
471be0e5c09SChris Mason 			path->slots[bal_start + 1] += 1;
472eb60ceacSChris Mason 		} else {
473eb60ceacSChris Mason 			tree_block_release(root, b_buffer);
474be0e5c09SChris Mason 		}
475be0e5c09SChris Mason 		bal_start--;
476be0e5c09SChris Mason 		if (!bal[bal_start])
477be0e5c09SChris Mason 			break;
478be0e5c09SChris Mason 	}
47974123bd7SChris Mason 	/* Now that the tree has room, insert the requested pointer */
480be0e5c09SChris Mason 	return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
481be0e5c09SChris Mason 			    level);
482be0e5c09SChris Mason }
483be0e5c09SChris Mason 
48474123bd7SChris Mason /*
48574123bd7SChris Mason  * how many bytes are required to store the items in a leaf.  start
48674123bd7SChris Mason  * and nr indicate which items in the leaf to check.  This totals up the
48774123bd7SChris Mason  * space used both by the item structs and the item data
48874123bd7SChris Mason  */
489be0e5c09SChris Mason int leaf_space_used(struct leaf *l, int start, int nr)
490be0e5c09SChris Mason {
491be0e5c09SChris Mason 	int data_len;
492be0e5c09SChris Mason 	int end = start + nr - 1;
493be0e5c09SChris Mason 
494be0e5c09SChris Mason 	if (!nr)
495be0e5c09SChris Mason 		return 0;
496be0e5c09SChris Mason 	data_len = l->items[start].offset + l->items[start].size;
497be0e5c09SChris Mason 	data_len = data_len - l->items[end].offset;
498be0e5c09SChris Mason 	data_len += sizeof(struct item) * nr;
499be0e5c09SChris Mason 	return data_len;
500be0e5c09SChris Mason }
501be0e5c09SChris Mason 
50274123bd7SChris Mason /*
50374123bd7SChris Mason  * push some data in the path leaf to the left, trying to free up at
50474123bd7SChris Mason  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
50574123bd7SChris Mason  */
506be0e5c09SChris Mason int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
507be0e5c09SChris Mason 		   int data_size)
508be0e5c09SChris Mason {
509eb60ceacSChris Mason 	struct tree_buffer *right_buf = path->nodes[0];
510eb60ceacSChris Mason 	struct leaf *right = &right_buf->leaf;
511eb60ceacSChris Mason 	struct tree_buffer *t;
512be0e5c09SChris Mason 	struct leaf *left;
513be0e5c09SChris Mason 	int slot;
514be0e5c09SChris Mason 	int i;
515be0e5c09SChris Mason 	int free_space;
516be0e5c09SChris Mason 	int push_space = 0;
517be0e5c09SChris Mason 	int push_items = 0;
518be0e5c09SChris Mason 	struct item *item;
519be0e5c09SChris Mason 	int old_left_nritems;
520be0e5c09SChris Mason 
521be0e5c09SChris Mason 	slot = path->slots[1];
522be0e5c09SChris Mason 	if (slot == 0) {
523be0e5c09SChris Mason 		return 1;
524be0e5c09SChris Mason 	}
525be0e5c09SChris Mason 	if (!path->nodes[1]) {
526be0e5c09SChris Mason 		return 1;
527be0e5c09SChris Mason 	}
528eb60ceacSChris Mason 	t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
529eb60ceacSChris Mason 	left = &t->leaf;
530be0e5c09SChris Mason 	free_space = leaf_free_space(left);
531be0e5c09SChris Mason 	if (free_space < data_size + sizeof(struct item)) {
532eb60ceacSChris Mason 		tree_block_release(root, t);
533be0e5c09SChris Mason 		return 1;
534be0e5c09SChris Mason 	}
535be0e5c09SChris Mason 	for (i = 0; i < right->header.nritems; i++) {
536be0e5c09SChris Mason 		item = right->items + i;
537be0e5c09SChris Mason 		if (path->slots[0] == i)
538be0e5c09SChris Mason 			push_space += data_size + sizeof(*item);
539be0e5c09SChris Mason 		if (item->size + sizeof(*item) + push_space > free_space)
540be0e5c09SChris Mason 			break;
541be0e5c09SChris Mason 		push_items++;
542be0e5c09SChris Mason 		push_space += item->size + sizeof(*item);
543be0e5c09SChris Mason 	}
544be0e5c09SChris Mason 	if (push_items == 0) {
545eb60ceacSChris Mason 		tree_block_release(root, t);
546be0e5c09SChris Mason 		return 1;
547be0e5c09SChris Mason 	}
548be0e5c09SChris Mason 	/* push data from right to left */
549be0e5c09SChris Mason 	memcpy(left->items + left->header.nritems,
550be0e5c09SChris Mason 		right->items, push_items * sizeof(struct item));
551be0e5c09SChris Mason 	push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
552be0e5c09SChris Mason 	memcpy(left->data + leaf_data_end(left) - push_space,
553be0e5c09SChris Mason 		right->data + right->items[push_items - 1].offset,
554be0e5c09SChris Mason 		push_space);
555be0e5c09SChris Mason 	old_left_nritems = left->header.nritems;
556eb60ceacSChris Mason 	BUG_ON(old_left_nritems < 0);
557eb60ceacSChris Mason 
558be0e5c09SChris Mason 	for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
559be0e5c09SChris Mason 		left->items[i].offset -= LEAF_DATA_SIZE -
560be0e5c09SChris Mason 			left->items[old_left_nritems -1].offset;
561be0e5c09SChris Mason 	}
562be0e5c09SChris Mason 	left->header.nritems += push_items;
563be0e5c09SChris Mason 
564be0e5c09SChris Mason 	/* fixup right node */
565be0e5c09SChris Mason 	push_space = right->items[push_items-1].offset - leaf_data_end(right);
566be0e5c09SChris Mason 	memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
567be0e5c09SChris Mason 		leaf_data_end(right), push_space);
568be0e5c09SChris Mason 	memmove(right->items, right->items + push_items,
569be0e5c09SChris Mason 		(right->header.nritems - push_items) * sizeof(struct item));
570be0e5c09SChris Mason 	right->header.nritems -= push_items;
571be0e5c09SChris Mason 	push_space = LEAF_DATA_SIZE;
572eb60ceacSChris Mason 
573be0e5c09SChris Mason 	for (i = 0; i < right->header.nritems; i++) {
574be0e5c09SChris Mason 		right->items[i].offset = push_space - right->items[i].size;
575be0e5c09SChris Mason 		push_space = right->items[i].offset;
576be0e5c09SChris Mason 	}
577eb60ceacSChris Mason 
578eb60ceacSChris Mason 	write_tree_block(root, t);
579eb60ceacSChris Mason 	write_tree_block(root, right_buf);
580eb60ceacSChris Mason 
581eb60ceacSChris Mason 	fixup_low_keys(root, path, &right->items[0].key, 1);
582be0e5c09SChris Mason 
583be0e5c09SChris Mason 	/* then fixup the leaf pointer in the path */
584be0e5c09SChris Mason 	if (path->slots[0] < push_items) {
585be0e5c09SChris Mason 		path->slots[0] += old_left_nritems;
586eb60ceacSChris Mason 		tree_block_release(root, path->nodes[0]);
587eb60ceacSChris Mason 		path->nodes[0] = t;
588be0e5c09SChris Mason 		path->slots[1] -= 1;
589be0e5c09SChris Mason 	} else {
590eb60ceacSChris Mason 		tree_block_release(root, t);
591be0e5c09SChris Mason 		path->slots[0] -= push_items;
592be0e5c09SChris Mason 	}
593eb60ceacSChris Mason 	BUG_ON(path->slots[0] < 0);
594be0e5c09SChris Mason 	return 0;
595be0e5c09SChris Mason }
596be0e5c09SChris Mason 
59774123bd7SChris Mason /*
59874123bd7SChris Mason  * split the path's leaf in two, making sure there is at least data_size
59974123bd7SChris Mason  * available for the resulting leaf level of the path.
60074123bd7SChris Mason  */
601be0e5c09SChris Mason int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
602be0e5c09SChris Mason {
603eb60ceacSChris Mason 	struct tree_buffer *l_buf = path->nodes[0];
604eb60ceacSChris Mason 	struct leaf *l = &l_buf->leaf;
605eb60ceacSChris Mason 	int nritems;
606eb60ceacSChris Mason 	int mid;
607eb60ceacSChris Mason 	int slot;
608be0e5c09SChris Mason 	struct leaf *right;
609eb60ceacSChris Mason 	struct tree_buffer *right_buffer;
610be0e5c09SChris Mason 	int space_needed = data_size + sizeof(struct item);
611be0e5c09SChris Mason 	int data_copy_size;
612be0e5c09SChris Mason 	int rt_data_off;
613be0e5c09SChris Mason 	int i;
614be0e5c09SChris Mason 	int ret;
615be0e5c09SChris Mason 
616be0e5c09SChris Mason 	if (push_leaf_left(root, path, data_size) == 0) {
617eb60ceacSChris Mason 		l_buf = path->nodes[0];
618eb60ceacSChris Mason 		l = &l_buf->leaf;
619eb60ceacSChris Mason 		if (leaf_free_space(l) >= sizeof(struct item) + data_size)
620be0e5c09SChris Mason 			return 0;
621be0e5c09SChris Mason 	}
622eb60ceacSChris Mason 	slot = path->slots[0];
623eb60ceacSChris Mason 	nritems = l->header.nritems;
624eb60ceacSChris Mason 	mid = (nritems + 1)/ 2;
625eb60ceacSChris Mason 
626eb60ceacSChris Mason 	right_buffer = alloc_free_block(root);
627eb60ceacSChris Mason 	BUG_ON(!right_buffer);
628eb60ceacSChris Mason 	BUG_ON(mid == nritems);
629eb60ceacSChris Mason 	right = &right_buffer->leaf;
630be0e5c09SChris Mason 	memset(right, 0, sizeof(*right));
631be0e5c09SChris Mason 	if (mid <= slot) {
632be0e5c09SChris Mason 		if (leaf_space_used(l, mid, nritems - mid) + space_needed >
633be0e5c09SChris Mason 			LEAF_DATA_SIZE)
634be0e5c09SChris Mason 			BUG();
635be0e5c09SChris Mason 	} else {
636be0e5c09SChris Mason 		if (leaf_space_used(l, 0, mid + 1) + space_needed >
637be0e5c09SChris Mason 			LEAF_DATA_SIZE)
638be0e5c09SChris Mason 			BUG();
639be0e5c09SChris Mason 	}
640be0e5c09SChris Mason 	right->header.nritems = nritems - mid;
641eb60ceacSChris Mason 	right->header.blocknr = right_buffer->blocknr;
642eb60ceacSChris Mason 	right->header.flags = node_level(0);
643be0e5c09SChris Mason 	data_copy_size = l->items[mid].offset + l->items[mid].size -
644be0e5c09SChris Mason 			 leaf_data_end(l);
645be0e5c09SChris Mason 	memcpy(right->items, l->items + mid,
646be0e5c09SChris Mason 	       (nritems - mid) * sizeof(struct item));
647be0e5c09SChris Mason 	memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
648be0e5c09SChris Mason 	       l->data + leaf_data_end(l), data_copy_size);
649be0e5c09SChris Mason 	rt_data_off = LEAF_DATA_SIZE -
650be0e5c09SChris Mason 		     (l->items[mid].offset + l->items[mid].size);
65174123bd7SChris Mason 
65274123bd7SChris Mason 	for (i = 0; i < right->header.nritems; i++)
653be0e5c09SChris Mason 		right->items[i].offset += rt_data_off;
65474123bd7SChris Mason 
655be0e5c09SChris Mason 	l->header.nritems = mid;
656be0e5c09SChris Mason 	ret = insert_ptr(root, path, &right->items[0].key,
657eb60ceacSChris Mason 			  right_buffer->blocknr, 1);
658eb60ceacSChris Mason 
659eb60ceacSChris Mason 	write_tree_block(root, right_buffer);
660eb60ceacSChris Mason 	write_tree_block(root, l_buf);
661eb60ceacSChris Mason 
662eb60ceacSChris Mason 	BUG_ON(path->slots[0] != slot);
663be0e5c09SChris Mason 	if (mid <= slot) {
664eb60ceacSChris Mason 		tree_block_release(root, path->nodes[0]);
665eb60ceacSChris Mason 		path->nodes[0] = right_buffer;
666be0e5c09SChris Mason 		path->slots[0] -= mid;
667be0e5c09SChris Mason 		path->slots[1] += 1;
668eb60ceacSChris Mason 	} else
669eb60ceacSChris Mason 		tree_block_release(root, right_buffer);
670eb60ceacSChris Mason 	BUG_ON(path->slots[0] < 0);
671be0e5c09SChris Mason 	return ret;
672be0e5c09SChris Mason }
673be0e5c09SChris Mason 
67474123bd7SChris Mason /*
67574123bd7SChris Mason  * Given a key and some data, insert an item into the tree.
67674123bd7SChris Mason  * This does all the path init required, making room in the tree if needed.
67774123bd7SChris Mason  */
678be0e5c09SChris Mason int insert_item(struct ctree_root *root, struct key *key,
679be0e5c09SChris Mason 			  void *data, int data_size)
680be0e5c09SChris Mason {
681be0e5c09SChris Mason 	int ret;
682be0e5c09SChris Mason 	int slot;
683eb60ceacSChris Mason 	int slot_orig;
684be0e5c09SChris Mason 	struct leaf *leaf;
685eb60ceacSChris Mason 	struct tree_buffer *leaf_buf;
686be0e5c09SChris Mason 	unsigned int nritems;
687be0e5c09SChris Mason 	unsigned int data_end;
688be0e5c09SChris Mason 	struct ctree_path path;
689be0e5c09SChris Mason 
69074123bd7SChris Mason 	/* create a root if there isn't one */
691eb60ceacSChris Mason 	if (!root->node) {
692eb60ceacSChris Mason 		struct tree_buffer *t;
693eb60ceacSChris Mason 		t = alloc_free_block(root);
694eb60ceacSChris Mason 		BUG_ON(!t);
695eb60ceacSChris Mason 		t->node.header.nritems = 0;
696eb60ceacSChris Mason 		t->node.header.flags = node_level(0);
697eb60ceacSChris Mason 		t->node.header.blocknr = t->blocknr;
698eb60ceacSChris Mason 		root->node = t;
699eb60ceacSChris Mason 		write_tree_block(root, t);
700eb60ceacSChris Mason 	}
701be0e5c09SChris Mason 	init_path(&path);
702be0e5c09SChris Mason 	ret = search_slot(root, key, &path);
703eb60ceacSChris Mason 	if (ret == 0) {
704eb60ceacSChris Mason 		release_path(root, &path);
705be0e5c09SChris Mason 		return -EEXIST;
706eb60ceacSChris Mason 	}
707be0e5c09SChris Mason 
708eb60ceacSChris Mason 	slot_orig = path.slots[0];
709eb60ceacSChris Mason 	leaf_buf = path.nodes[0];
710eb60ceacSChris Mason 	leaf = &leaf_buf->leaf;
71174123bd7SChris Mason 
71274123bd7SChris Mason 	/* make room if needed */
713eb60ceacSChris Mason 	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size) {
714be0e5c09SChris Mason 		split_leaf(root, &path, data_size);
715eb60ceacSChris Mason 		leaf_buf = path.nodes[0];
716eb60ceacSChris Mason 		leaf = &path.nodes[0]->leaf;
717eb60ceacSChris Mason 	}
718be0e5c09SChris Mason 	nritems = leaf->header.nritems;
719be0e5c09SChris Mason 	data_end = leaf_data_end(leaf);
720eb60ceacSChris Mason 
721be0e5c09SChris Mason 	if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
722be0e5c09SChris Mason 		BUG();
723be0e5c09SChris Mason 
724be0e5c09SChris Mason 	slot = path.slots[0];
725eb60ceacSChris Mason 	BUG_ON(slot < 0);
726be0e5c09SChris Mason 	if (slot == 0)
727eb60ceacSChris Mason 		fixup_low_keys(root, &path, key, 1);
728be0e5c09SChris Mason 	if (slot != nritems) {
729be0e5c09SChris Mason 		int i;
730be0e5c09SChris Mason 		unsigned int old_data = leaf->items[slot].offset +
731be0e5c09SChris Mason 					leaf->items[slot].size;
732be0e5c09SChris Mason 
733be0e5c09SChris Mason 		/*
734be0e5c09SChris Mason 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
735be0e5c09SChris Mason 		 */
736be0e5c09SChris Mason 		/* first correct the data pointers */
737be0e5c09SChris Mason 		for (i = slot; i < nritems; i++)
738be0e5c09SChris Mason 			leaf->items[i].offset -= data_size;
739be0e5c09SChris Mason 
740be0e5c09SChris Mason 		/* shift the items */
741be0e5c09SChris Mason 		memmove(leaf->items + slot + 1, leaf->items + slot,
742be0e5c09SChris Mason 		        (nritems - slot) * sizeof(struct item));
743be0e5c09SChris Mason 
744be0e5c09SChris Mason 		/* shift the data */
745be0e5c09SChris Mason 		memmove(leaf->data + data_end - data_size, leaf->data +
746be0e5c09SChris Mason 		        data_end, old_data - data_end);
747be0e5c09SChris Mason 		data_end = old_data;
748be0e5c09SChris Mason 	}
74974123bd7SChris Mason 	/* copy the new data in */
750be0e5c09SChris Mason 	memcpy(&leaf->items[slot].key, key, sizeof(struct key));
751be0e5c09SChris Mason 	leaf->items[slot].offset = data_end - data_size;
752be0e5c09SChris Mason 	leaf->items[slot].size = data_size;
753be0e5c09SChris Mason 	memcpy(leaf->data + data_end - data_size, data, data_size);
754be0e5c09SChris Mason 	leaf->header.nritems += 1;
755eb60ceacSChris Mason 	write_tree_block(root, leaf_buf);
756be0e5c09SChris Mason 	if (leaf_free_space(leaf) < 0)
757be0e5c09SChris Mason 		BUG();
758eb60ceacSChris Mason 	release_path(root, &path);
759be0e5c09SChris Mason 	return 0;
760be0e5c09SChris Mason }
761be0e5c09SChris Mason 
76274123bd7SChris Mason /*
76374123bd7SChris Mason  * delete the pointer from a given level in the path.  The path is not
76474123bd7SChris Mason  * fixed up, so after calling this it is not valid at that level.
76574123bd7SChris Mason  *
76674123bd7SChris Mason  * If the delete empties a node, the node is removed from the tree,
76774123bd7SChris Mason  * continuing all the way the root if required.  The root is converted into
76874123bd7SChris Mason  * a leaf if all the nodes are emptied.
76974123bd7SChris Mason  */
770be0e5c09SChris Mason int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
771be0e5c09SChris Mason {
772be0e5c09SChris Mason 	int slot;
773eb60ceacSChris Mason 	struct tree_buffer *t;
774be0e5c09SChris Mason 	struct node *node;
775be0e5c09SChris Mason 	int nritems;
776be0e5c09SChris Mason 
777be0e5c09SChris Mason 	while(1) {
778eb60ceacSChris Mason 		t = path->nodes[level];
779eb60ceacSChris Mason 		if (!t)
780be0e5c09SChris Mason 			break;
781eb60ceacSChris Mason 		node = &t->node;
782be0e5c09SChris Mason 		slot = path->slots[level];
783be0e5c09SChris Mason 		nritems = node->header.nritems;
784be0e5c09SChris Mason 
785be0e5c09SChris Mason 		if (slot != nritems -1) {
786be0e5c09SChris Mason 			memmove(node->keys + slot, node->keys + slot + 1,
787be0e5c09SChris Mason 				sizeof(struct key) * (nritems - slot - 1));
788be0e5c09SChris Mason 			memmove(node->blockptrs + slot,
789be0e5c09SChris Mason 				node->blockptrs + slot + 1,
790be0e5c09SChris Mason 				sizeof(u64) * (nritems - slot - 1));
791be0e5c09SChris Mason 		}
792be0e5c09SChris Mason 		node->header.nritems--;
793eb60ceacSChris Mason 		write_tree_block(root, t);
794be0e5c09SChris Mason 		if (node->header.nritems != 0) {
795be0e5c09SChris Mason 			int tslot;
796be0e5c09SChris Mason 			if (slot == 0)
797eb60ceacSChris Mason 				fixup_low_keys(root, path, node->keys,
798eb60ceacSChris Mason 					       level + 1);
799be0e5c09SChris Mason 			tslot = path->slots[level+1];
800eb60ceacSChris Mason 			t->count++;
801be0e5c09SChris Mason 			push_node_left(root, path, level);
802be0e5c09SChris Mason 			if (node->header.nritems) {
803be0e5c09SChris Mason 				push_node_right(root, path, level);
804be0e5c09SChris Mason 			}
805eb60ceacSChris Mason 			if (node->header.nritems) {
806eb60ceacSChris Mason 				tree_block_release(root, t);
807be0e5c09SChris Mason 				break;
808eb60ceacSChris Mason 			}
809eb60ceacSChris Mason 			tree_block_release(root, t);
8104920c9acSChris Mason 			path->slots[level+1] = tslot;
811be0e5c09SChris Mason 		}
812eb60ceacSChris Mason 		if (t == root->node) {
813eb60ceacSChris Mason 			/* just turn the root into a leaf and break */
814eb60ceacSChris Mason 			root->node->node.header.flags = node_level(0);
815eb60ceacSChris Mason 			write_tree_block(root, t);
816be0e5c09SChris Mason 			break;
817be0e5c09SChris Mason 		}
818be0e5c09SChris Mason 		level++;
819be0e5c09SChris Mason 		if (!path->nodes[level])
820be0e5c09SChris Mason 			BUG();
821be0e5c09SChris Mason 	}
822be0e5c09SChris Mason 	return 0;
823be0e5c09SChris Mason }
824be0e5c09SChris Mason 
82574123bd7SChris Mason /*
82674123bd7SChris Mason  * delete the item at the leaf level in path.  If that empties
82774123bd7SChris Mason  * the leaf, remove it from the tree
82874123bd7SChris Mason  */
8294920c9acSChris Mason int del_item(struct ctree_root *root, struct ctree_path *path)
830be0e5c09SChris Mason {
831be0e5c09SChris Mason 	int slot;
832be0e5c09SChris Mason 	struct leaf *leaf;
833eb60ceacSChris Mason 	struct tree_buffer *leaf_buf;
834be0e5c09SChris Mason 	int doff;
835be0e5c09SChris Mason 	int dsize;
836be0e5c09SChris Mason 
837eb60ceacSChris Mason 	leaf_buf = path->nodes[0];
838eb60ceacSChris Mason 	leaf = &leaf_buf->leaf;
8394920c9acSChris Mason 	slot = path->slots[0];
840be0e5c09SChris Mason 	doff = leaf->items[slot].offset;
841be0e5c09SChris Mason 	dsize = leaf->items[slot].size;
842be0e5c09SChris Mason 
843be0e5c09SChris Mason 	if (slot != leaf->header.nritems - 1) {
844be0e5c09SChris Mason 		int i;
845be0e5c09SChris Mason 		int data_end = leaf_data_end(leaf);
846be0e5c09SChris Mason 		memmove(leaf->data + data_end + dsize,
847be0e5c09SChris Mason 			leaf->data + data_end,
848be0e5c09SChris Mason 			doff - data_end);
849be0e5c09SChris Mason 		for (i = slot + 1; i < leaf->header.nritems; i++)
850be0e5c09SChris Mason 			leaf->items[i].offset += dsize;
851be0e5c09SChris Mason 		memmove(leaf->items + slot, leaf->items + slot + 1,
852be0e5c09SChris Mason 			sizeof(struct item) *
853be0e5c09SChris Mason 			(leaf->header.nritems - slot - 1));
854be0e5c09SChris Mason 	}
855be0e5c09SChris Mason 	leaf->header.nritems -= 1;
85674123bd7SChris Mason 	/* delete the leaf if we've emptied it */
857be0e5c09SChris Mason 	if (leaf->header.nritems == 0) {
858eb60ceacSChris Mason 		if (leaf_buf == root->node) {
859eb60ceacSChris Mason 			leaf->header.flags = node_level(0);
860eb60ceacSChris Mason 			write_tree_block(root, leaf_buf);
861eb60ceacSChris Mason 		} else
8624920c9acSChris Mason 			del_ptr(root, path, 1);
863be0e5c09SChris Mason 	} else {
864be0e5c09SChris Mason 		if (slot == 0)
865eb60ceacSChris Mason 			fixup_low_keys(root, path, &leaf->items[0].key, 1);
866eb60ceacSChris Mason 		write_tree_block(root, leaf_buf);
86774123bd7SChris Mason 		/* delete the leaf if it is mostly empty */
868be0e5c09SChris Mason 		if (leaf_space_used(leaf, 0, leaf->header.nritems) <
869be0e5c09SChris Mason 		    LEAF_DATA_SIZE / 4) {
870be0e5c09SChris Mason 			/* push_leaf_left fixes the path.
871be0e5c09SChris Mason 			 * make sure the path still points to our leaf
872be0e5c09SChris Mason 			 * for possible call to del_ptr below
873be0e5c09SChris Mason 			 */
8744920c9acSChris Mason 			slot = path->slots[1];
875eb60ceacSChris Mason 			leaf_buf->count++;
8764920c9acSChris Mason 			push_leaf_left(root, path, 1);
877be0e5c09SChris Mason 			if (leaf->header.nritems == 0) {
8784920c9acSChris Mason 				path->slots[1] = slot;
8794920c9acSChris Mason 				del_ptr(root, path, 1);
880be0e5c09SChris Mason 			}
881eb60ceacSChris Mason 			tree_block_release(root, leaf_buf);
882be0e5c09SChris Mason 		}
883be0e5c09SChris Mason 	}
884be0e5c09SChris Mason 	return 0;
885be0e5c09SChris Mason }
886be0e5c09SChris Mason 
887be0e5c09SChris Mason void print_leaf(struct leaf *l)
888be0e5c09SChris Mason {
889be0e5c09SChris Mason 	int i;
890be0e5c09SChris Mason 	int nr = l->header.nritems;
891be0e5c09SChris Mason 	struct item *item;
892eb60ceacSChris Mason 	printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
893be0e5c09SChris Mason 	       leaf_free_space(l));
894be0e5c09SChris Mason 	fflush(stdout);
895be0e5c09SChris Mason 	for (i = 0 ; i < nr ; i++) {
896be0e5c09SChris Mason 		item = l->items + i;
897be0e5c09SChris Mason 		printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
898be0e5c09SChris Mason 			i,
899be0e5c09SChris Mason 			item->key.objectid, item->key.flags, item->key.offset,
900be0e5c09SChris Mason 			item->offset, item->size);
901be0e5c09SChris Mason 		fflush(stdout);
902be0e5c09SChris Mason 		printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
903be0e5c09SChris Mason 		fflush(stdout);
904be0e5c09SChris Mason 	}
905be0e5c09SChris Mason }
906eb60ceacSChris Mason void print_tree(struct ctree_root *root, struct tree_buffer *t)
907be0e5c09SChris Mason {
908be0e5c09SChris Mason 	int i;
909be0e5c09SChris Mason 	int nr;
910eb60ceacSChris Mason 	struct node *c;
911be0e5c09SChris Mason 
912eb60ceacSChris Mason 	if (!t)
913be0e5c09SChris Mason 		return;
914eb60ceacSChris Mason 	c = &t->node;
915be0e5c09SChris Mason 	nr = c->header.nritems;
916eb60ceacSChris Mason 	if (c->header.blocknr != t->blocknr)
917eb60ceacSChris Mason 		BUG();
918be0e5c09SChris Mason 	if (is_leaf(c->header.flags)) {
919be0e5c09SChris Mason 		print_leaf((struct leaf *)c);
920be0e5c09SChris Mason 		return;
921be0e5c09SChris Mason 	}
922eb60ceacSChris Mason 	printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
923be0e5c09SChris Mason 	        node_level(c->header.flags), c->header.nritems,
924be0e5c09SChris Mason 		NODEPTRS_PER_BLOCK - c->header.nritems);
925be0e5c09SChris Mason 	fflush(stdout);
926be0e5c09SChris Mason 	for (i = 0; i < nr; i++) {
927eb60ceacSChris Mason 		printf("\tkey %d (%lu %u %lu) block %lu\n",
928be0e5c09SChris Mason 		       i,
929be0e5c09SChris Mason 		       c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
930be0e5c09SChris Mason 		       c->blockptrs[i]);
931be0e5c09SChris Mason 		fflush(stdout);
932be0e5c09SChris Mason 	}
933be0e5c09SChris Mason 	for (i = 0; i < nr; i++) {
934eb60ceacSChris Mason 		struct tree_buffer *next_buf = read_tree_block(root,
935eb60ceacSChris Mason 							    c->blockptrs[i]);
936eb60ceacSChris Mason 		struct node *next = &next_buf->node;
937be0e5c09SChris Mason 		if (is_leaf(next->header.flags) &&
938be0e5c09SChris Mason 		    node_level(c->header.flags) != 1)
939be0e5c09SChris Mason 			BUG();
940be0e5c09SChris Mason 		if (node_level(next->header.flags) !=
941be0e5c09SChris Mason 			node_level(c->header.flags) - 1)
942be0e5c09SChris Mason 			BUG();
943eb60ceacSChris Mason 		print_tree(root, next_buf);
944eb60ceacSChris Mason 		tree_block_release(root, next_buf);
945be0e5c09SChris Mason 	}
946be0e5c09SChris Mason 
947be0e5c09SChris Mason }
948be0e5c09SChris Mason 
949be0e5c09SChris Mason /* for testing only */
950be0e5c09SChris Mason int next_key(int i, int max_key) {
951be0e5c09SChris Mason 	return rand() % max_key;
952be0e5c09SChris Mason 	// return i;
953be0e5c09SChris Mason }
954be0e5c09SChris Mason 
955be0e5c09SChris Mason int main() {
956eb60ceacSChris Mason 	struct ctree_root *root;
957be0e5c09SChris Mason 	struct key ins;
9584920c9acSChris Mason 	struct key last = { (u64)-1, 0, 0};
959be0e5c09SChris Mason 	char *buf;
960be0e5c09SChris Mason 	int i;
961be0e5c09SChris Mason 	int num;
962be0e5c09SChris Mason 	int ret;
96374123bd7SChris Mason 	int run_size = 25000;
964be0e5c09SChris Mason 	int max_key = 100000000;
965be0e5c09SChris Mason 	int tree_size = 0;
966be0e5c09SChris Mason 	struct ctree_path path;
967be0e5c09SChris Mason 
968eb60ceacSChris Mason 	radix_tree_init();
969eb60ceacSChris Mason 
970eb60ceacSChris Mason 
971eb60ceacSChris Mason 	root = open_ctree("dbfile");
972be0e5c09SChris Mason 
973be0e5c09SChris Mason 	srand(55);
974be0e5c09SChris Mason 	for (i = 0; i < run_size; i++) {
975be0e5c09SChris Mason 		buf = malloc(64);
976be0e5c09SChris Mason 		num = next_key(i, max_key);
977be0e5c09SChris Mason 		// num = i;
978be0e5c09SChris Mason 		sprintf(buf, "string-%d", num);
979be0e5c09SChris Mason 		// printf("insert %d\n", num);
980be0e5c09SChris Mason 		ins.objectid = num;
981be0e5c09SChris Mason 		ins.offset = 0;
982be0e5c09SChris Mason 		ins.flags = 0;
983eb60ceacSChris Mason 		ret = insert_item(root, &ins, buf, strlen(buf));
984be0e5c09SChris Mason 		if (!ret)
985be0e5c09SChris Mason 			tree_size++;
986be0e5c09SChris Mason 	}
987eb60ceacSChris Mason 	close_ctree(root);
988eb60ceacSChris Mason 	root = open_ctree("dbfile");
989eb60ceacSChris Mason 	printf("starting search\n");
990be0e5c09SChris Mason 	srand(55);
991be0e5c09SChris Mason 	for (i = 0; i < run_size; i++) {
992be0e5c09SChris Mason 		num = next_key(i, max_key);
993be0e5c09SChris Mason 		ins.objectid = num;
994be0e5c09SChris Mason 		init_path(&path);
995eb60ceacSChris Mason 		ret = search_slot(root, &ins, &path);
996be0e5c09SChris Mason 		if (ret) {
997eb60ceacSChris Mason 			print_tree(root, root->node);
998be0e5c09SChris Mason 			printf("unable to find %d\n", num);
999be0e5c09SChris Mason 			exit(1);
1000be0e5c09SChris Mason 		}
1001eb60ceacSChris Mason 		release_path(root, &path);
1002be0e5c09SChris Mason 	}
1003eb60ceacSChris Mason 	close_ctree(root);
1004eb60ceacSChris Mason 	root = open_ctree("dbfile");
1005eb60ceacSChris Mason 	printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
1006eb60ceacSChris Mason 	        node_level(root->node->node.header.flags),
1007eb60ceacSChris Mason 		root->node->node.header.nritems,
1008eb60ceacSChris Mason 		NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
1009eb60ceacSChris Mason 	printf("all searches good, deleting some items\n");
1010be0e5c09SChris Mason 	i = 0;
1011be0e5c09SChris Mason 	srand(55);
10124920c9acSChris Mason 	for (i = 0 ; i < run_size/4; i++) {
1013be0e5c09SChris Mason 		num = next_key(i, max_key);
1014be0e5c09SChris Mason 		ins.objectid = num;
10154920c9acSChris Mason 		init_path(&path);
1016eb60ceacSChris Mason 		ret = search_slot(root, &ins, &path);
10174920c9acSChris Mason 		if (ret)
10184920c9acSChris Mason 			continue;
1019eb60ceacSChris Mason 		ret = del_item(root, &path);
10204920c9acSChris Mason 		if (ret != 0)
10214920c9acSChris Mason 			BUG();
1022eb60ceacSChris Mason 		release_path(root, &path);
10234920c9acSChris Mason 		tree_size--;
10244920c9acSChris Mason 	}
10254920c9acSChris Mason 	srand(128);
10264920c9acSChris Mason 	for (i = 0; i < run_size; i++) {
10274920c9acSChris Mason 		buf = malloc(64);
10284920c9acSChris Mason 		num = next_key(i, max_key);
10294920c9acSChris Mason 		sprintf(buf, "string-%d", num);
10304920c9acSChris Mason 		ins.objectid = num;
1031eb60ceacSChris Mason 		ret = insert_item(root, &ins, buf, strlen(buf));
10324920c9acSChris Mason 		if (!ret)
10334920c9acSChris Mason 			tree_size++;
10344920c9acSChris Mason 	}
1035eb60ceacSChris Mason 	close_ctree(root);
1036eb60ceacSChris Mason 	root = open_ctree("dbfile");
1037eb60ceacSChris Mason 	printf("starting search2\n");
1038eb60ceacSChris Mason 	srand(128);
1039eb60ceacSChris Mason 	for (i = 0; i < run_size; i++) {
1040eb60ceacSChris Mason 		num = next_key(i, max_key);
1041eb60ceacSChris Mason 		ins.objectid = num;
1042eb60ceacSChris Mason 		init_path(&path);
1043eb60ceacSChris Mason 		ret = search_slot(root, &ins, &path);
1044eb60ceacSChris Mason 		if (ret) {
1045eb60ceacSChris Mason 			print_tree(root, root->node);
1046eb60ceacSChris Mason 			printf("unable to find %d\n", num);
1047eb60ceacSChris Mason 			exit(1);
1048eb60ceacSChris Mason 		}
1049eb60ceacSChris Mason 		release_path(root, &path);
1050eb60ceacSChris Mason 	}
1051eb60ceacSChris Mason 	printf("starting big long delete run\n");
1052eb60ceacSChris Mason 	while(root->node && root->node->node.header.nritems > 0) {
10534920c9acSChris Mason 		struct leaf *leaf;
10544920c9acSChris Mason 		int slot;
10554920c9acSChris Mason 		ins.objectid = (u64)-1;
10564920c9acSChris Mason 		init_path(&path);
1057eb60ceacSChris Mason 		ret = search_slot(root, &ins, &path);
10584920c9acSChris Mason 		if (ret == 0)
10594920c9acSChris Mason 			BUG();
10604920c9acSChris Mason 
1061eb60ceacSChris Mason 		leaf = &path.nodes[0]->leaf;
10624920c9acSChris Mason 		slot = path.slots[0];
10634920c9acSChris Mason 		if (slot != leaf->header.nritems)
10644920c9acSChris Mason 			BUG();
10654920c9acSChris Mason 		while(path.slots[0] > 0) {
10664920c9acSChris Mason 			path.slots[0] -= 1;
10674920c9acSChris Mason 			slot = path.slots[0];
1068eb60ceacSChris Mason 			leaf = &path.nodes[0]->leaf;
10694920c9acSChris Mason 
10704920c9acSChris Mason 			if (comp_keys(&last, &leaf->items[slot].key) <= 0)
10714920c9acSChris Mason 				BUG();
10724920c9acSChris Mason 			memcpy(&last, &leaf->items[slot].key, sizeof(last));
1073eb60ceacSChris Mason 			ret = del_item(root, &path);
1074eb60ceacSChris Mason 			if (ret != 0) {
1075eb60ceacSChris Mason 				printf("del_item returned %d\n", ret);
10764920c9acSChris Mason 				BUG();
1077eb60ceacSChris Mason 			}
10784920c9acSChris Mason 			tree_size--;
10794920c9acSChris Mason 		}
1080eb60ceacSChris Mason 		release_path(root, &path);
1081be0e5c09SChris Mason 	}
1082eb60ceacSChris Mason 	close_ctree(root);
10834920c9acSChris Mason 	printf("tree size is now %d\n", tree_size);
1084be0e5c09SChris Mason 	return 0;
1085be0e5c09SChris Mason }
1086