1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2011 Red Hat, Inc.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include "dm-btree.h"
9 #include "dm-btree-internal.h"
10 #include "dm-transaction-manager.h"
11 
12 #include <linux/export.h>
13 #include <linux/device-mapper.h>
14 
15 #define DM_MSG_PREFIX "btree"
16 
17 /*
18  * Removing an entry from a btree
19  * ==============================
20  *
21  * A very important constraint for our btree is that no node, except the
22  * root, may have fewer than a certain number of entries.
23  * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
24  *
25  * Ensuring this is complicated by the way we want to only ever hold the
26  * locks on 2 nodes concurrently, and only change nodes in a top to bottom
27  * fashion.
28  *
29  * Each node may have a left or right sibling.  When decending the spine,
30  * if a node contains only MIN_ENTRIES then we try and increase this to at
31  * least MIN_ENTRIES + 1.  We do this in the following ways:
32  *
33  * [A] No siblings => this can only happen if the node is the root, in which
34  *     case we copy the childs contents over the root.
35  *
36  * [B] No left sibling
37  *     ==> rebalance(node, right sibling)
38  *
39  * [C] No right sibling
40  *     ==> rebalance(left sibling, node)
41  *
42  * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
43  *     ==> delete node adding it's contents to left and right
44  *
45  * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
46  *     ==> rebalance(left, node, right)
47  *
48  * After these operations it's possible that the our original node no
49  * longer contains the desired sub tree.  For this reason this rebalancing
50  * is performed on the children of the current node.  This also avoids
51  * having a special case for the root.
52  *
53  * Once this rebalancing has occurred we can then step into the child node
54  * for internal nodes.  Or delete the entry for leaf nodes.
55  */
56 
57 /*
58  * Some little utilities for moving node data around.
59  */
node_shift(struct btree_node * n,int shift)60 static void node_shift(struct btree_node *n, int shift)
61 {
62 	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
63 	uint32_t value_size = le32_to_cpu(n->header.value_size);
64 
65 	if (shift < 0) {
66 		shift = -shift;
67 		BUG_ON(shift > nr_entries);
68 		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
69 		memmove(key_ptr(n, 0),
70 			key_ptr(n, shift),
71 			(nr_entries - shift) * sizeof(__le64));
72 		memmove(value_ptr(n, 0),
73 			value_ptr(n, shift),
74 			(nr_entries - shift) * value_size);
75 	} else {
76 		BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
77 		memmove(key_ptr(n, shift),
78 			key_ptr(n, 0),
79 			nr_entries * sizeof(__le64));
80 		memmove(value_ptr(n, shift),
81 			value_ptr(n, 0),
82 			nr_entries * value_size);
83 	}
84 }
85 
node_copy(struct btree_node * left,struct btree_node * right,int shift)86 static int node_copy(struct btree_node *left, struct btree_node *right, int shift)
87 {
88 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
89 	uint32_t value_size = le32_to_cpu(left->header.value_size);
90 
91 	if (value_size != le32_to_cpu(right->header.value_size)) {
92 		DMERR("mismatched value size");
93 		return -EILSEQ;
94 	}
95 
96 	if (shift < 0) {
97 		shift = -shift;
98 
99 		if (nr_left + shift > le32_to_cpu(left->header.max_entries)) {
100 			DMERR("bad shift");
101 			return -EINVAL;
102 		}
103 
104 		memcpy(key_ptr(left, nr_left),
105 		       key_ptr(right, 0),
106 		       shift * sizeof(__le64));
107 		memcpy(value_ptr(left, nr_left),
108 		       value_ptr(right, 0),
109 		       shift * value_size);
110 	} else {
111 		if (shift > le32_to_cpu(right->header.max_entries)) {
112 			DMERR("bad shift");
113 			return -EINVAL;
114 		}
115 
116 		memcpy(key_ptr(right, 0),
117 		       key_ptr(left, nr_left - shift),
118 		       shift * sizeof(__le64));
119 		memcpy(value_ptr(right, 0),
120 		       value_ptr(left, nr_left - shift),
121 		       shift * value_size);
122 	}
123 	return 0;
124 }
125 
126 /*
127  * Delete a specific entry from a leaf node.
128  */
delete_at(struct btree_node * n,unsigned int index)129 static void delete_at(struct btree_node *n, unsigned int index)
130 {
131 	unsigned int nr_entries = le32_to_cpu(n->header.nr_entries);
132 	unsigned int nr_to_copy = nr_entries - (index + 1);
133 	uint32_t value_size = le32_to_cpu(n->header.value_size);
134 
135 	BUG_ON(index >= nr_entries);
136 
137 	if (nr_to_copy) {
138 		memmove(key_ptr(n, index),
139 			key_ptr(n, index + 1),
140 			nr_to_copy * sizeof(__le64));
141 
142 		memmove(value_ptr(n, index),
143 			value_ptr(n, index + 1),
144 			nr_to_copy * value_size);
145 	}
146 
147 	n->header.nr_entries = cpu_to_le32(nr_entries - 1);
148 }
149 
merge_threshold(struct btree_node * n)150 static unsigned int merge_threshold(struct btree_node *n)
151 {
152 	return le32_to_cpu(n->header.max_entries) / 3;
153 }
154 
155 struct child {
156 	unsigned int index;
157 	struct dm_block *block;
158 	struct btree_node *n;
159 };
160 
init_child(struct dm_btree_info * info,struct dm_btree_value_type * vt,struct btree_node * parent,unsigned int index,struct child * result)161 static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
162 		      struct btree_node *parent,
163 		      unsigned int index, struct child *result)
164 {
165 	int r, inc;
166 	dm_block_t root;
167 
168 	result->index = index;
169 	root = value64(parent, index);
170 
171 	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
172 			       &result->block, &inc);
173 	if (r)
174 		return r;
175 
176 	result->n = dm_block_data(result->block);
177 
178 	if (inc)
179 		inc_children(info->tm, result->n, vt);
180 
181 	*((__le64 *) value_ptr(parent, index)) =
182 		cpu_to_le64(dm_block_location(result->block));
183 
184 	return 0;
185 }
186 
exit_child(struct dm_btree_info * info,struct child * c)187 static void exit_child(struct dm_btree_info *info, struct child *c)
188 {
189 	dm_tm_unlock(info->tm, c->block);
190 }
191 
shift(struct btree_node * left,struct btree_node * right,int count)192 static int shift(struct btree_node *left, struct btree_node *right, int count)
193 {
194 	int r;
195 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
196 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
197 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
198 	uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
199 
200 	if (max_entries != r_max_entries) {
201 		DMERR("node max_entries mismatch");
202 		return -EILSEQ;
203 	}
204 
205 	if (nr_left - count > max_entries) {
206 		DMERR("node shift out of bounds");
207 		return -EINVAL;
208 	}
209 
210 	if (nr_right + count > max_entries) {
211 		DMERR("node shift out of bounds");
212 		return -EINVAL;
213 	}
214 
215 	if (!count)
216 		return 0;
217 
218 	if (count > 0) {
219 		node_shift(right, count);
220 		r = node_copy(left, right, count);
221 		if (r)
222 			return r;
223 	} else {
224 		r = node_copy(left, right, count);
225 		if (r)
226 			return r;
227 		node_shift(right, count);
228 	}
229 
230 	left->header.nr_entries = cpu_to_le32(nr_left - count);
231 	right->header.nr_entries = cpu_to_le32(nr_right + count);
232 
233 	return 0;
234 }
235 
__rebalance2(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * r)236 static int __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
237 			struct child *l, struct child *r)
238 {
239 	int ret;
240 	struct btree_node *left = l->n;
241 	struct btree_node *right = r->n;
242 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
243 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
244 	/*
245 	 * Ensure the number of entries in each child will be greater
246 	 * than or equal to (max_entries / 3 + 1), so no matter which
247 	 * child is used for removal, the number will still be not
248 	 * less than (max_entries / 3).
249 	 */
250 	unsigned int threshold = 2 * (merge_threshold(left) + 1);
251 
252 	if (nr_left + nr_right < threshold) {
253 		/*
254 		 * Merge
255 		 */
256 		node_copy(left, right, -nr_right);
257 		left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
258 		delete_at(parent, r->index);
259 
260 		/*
261 		 * We need to decrement the right block, but not it's
262 		 * children, since they're still referenced by left.
263 		 */
264 		dm_tm_dec(info->tm, dm_block_location(r->block));
265 	} else {
266 		/*
267 		 * Rebalance.
268 		 */
269 		unsigned int target_left = (nr_left + nr_right) / 2;
270 
271 		ret = shift(left, right, nr_left - target_left);
272 		if (ret)
273 			return ret;
274 		*key_ptr(parent, r->index) = right->keys[0];
275 	}
276 	return 0;
277 }
278 
rebalance2(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,unsigned int left_index)279 static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
280 		      struct dm_btree_value_type *vt, unsigned int left_index)
281 {
282 	int r;
283 	struct btree_node *parent;
284 	struct child left, right;
285 
286 	parent = dm_block_data(shadow_current(s));
287 
288 	r = init_child(info, vt, parent, left_index, &left);
289 	if (r)
290 		return r;
291 
292 	r = init_child(info, vt, parent, left_index + 1, &right);
293 	if (r) {
294 		exit_child(info, &left);
295 		return r;
296 	}
297 
298 	r = __rebalance2(info, parent, &left, &right);
299 
300 	exit_child(info, &left);
301 	exit_child(info, &right);
302 
303 	return r;
304 }
305 
306 /*
307  * We dump as many entries from center as possible into left, then the rest
308  * in right, then rebalance2.  This wastes some cpu, but I want something
309  * simple atm.
310  */
delete_center_node(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r,struct btree_node * left,struct btree_node * center,struct btree_node * right,uint32_t nr_left,uint32_t nr_center,uint32_t nr_right)311 static int delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
312 			      struct child *l, struct child *c, struct child *r,
313 			      struct btree_node *left, struct btree_node *center, struct btree_node *right,
314 			      uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
315 {
316 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
317 	unsigned int shift = min(max_entries - nr_left, nr_center);
318 
319 	if (nr_left + shift > max_entries) {
320 		DMERR("node shift out of bounds");
321 		return -EINVAL;
322 	}
323 
324 	node_copy(left, center, -shift);
325 	left->header.nr_entries = cpu_to_le32(nr_left + shift);
326 
327 	if (shift != nr_center) {
328 		shift = nr_center - shift;
329 
330 		if ((nr_right + shift) > max_entries) {
331 			DMERR("node shift out of bounds");
332 			return -EINVAL;
333 		}
334 
335 		node_shift(right, shift);
336 		node_copy(center, right, shift);
337 		right->header.nr_entries = cpu_to_le32(nr_right + shift);
338 	}
339 	*key_ptr(parent, r->index) = right->keys[0];
340 
341 	delete_at(parent, c->index);
342 	r->index--;
343 
344 	dm_tm_dec(info->tm, dm_block_location(c->block));
345 	return __rebalance2(info, parent, l, r);
346 }
347 
348 /*
349  * Redistributes entries among 3 sibling nodes.
350  */
redistribute3(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r,struct btree_node * left,struct btree_node * center,struct btree_node * right,uint32_t nr_left,uint32_t nr_center,uint32_t nr_right)351 static int redistribute3(struct dm_btree_info *info, struct btree_node *parent,
352 			 struct child *l, struct child *c, struct child *r,
353 			 struct btree_node *left, struct btree_node *center, struct btree_node *right,
354 			 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
355 {
356 	int s, ret;
357 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
358 	unsigned int total = nr_left + nr_center + nr_right;
359 	unsigned int target_right = total / 3;
360 	unsigned int remainder = (target_right * 3) != total;
361 	unsigned int target_left = target_right + remainder;
362 
363 	BUG_ON(target_left > max_entries);
364 	BUG_ON(target_right > max_entries);
365 
366 	if (nr_left < nr_right) {
367 		s = nr_left - target_left;
368 
369 		if (s < 0 && nr_center < -s) {
370 			/* not enough in central node */
371 			ret = shift(left, center, -nr_center);
372 			if (ret)
373 				return ret;
374 
375 			s += nr_center;
376 			ret = shift(left, right, s);
377 			if (ret)
378 				return ret;
379 
380 			nr_right += s;
381 		} else {
382 			ret = shift(left, center, s);
383 			if (ret)
384 				return ret;
385 		}
386 
387 		ret = shift(center, right, target_right - nr_right);
388 		if (ret)
389 			return ret;
390 	} else {
391 		s = target_right - nr_right;
392 		if (s > 0 && nr_center < s) {
393 			/* not enough in central node */
394 			ret = shift(center, right, nr_center);
395 			if (ret)
396 				return ret;
397 			s -= nr_center;
398 			ret = shift(left, right, s);
399 			if (ret)
400 				return ret;
401 			nr_left -= s;
402 		} else {
403 			ret = shift(center, right, s);
404 			if (ret)
405 				return ret;
406 		}
407 
408 		ret = shift(left, center, nr_left - target_left);
409 		if (ret)
410 			return ret;
411 	}
412 
413 	*key_ptr(parent, c->index) = center->keys[0];
414 	*key_ptr(parent, r->index) = right->keys[0];
415 	return 0;
416 }
417 
__rebalance3(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r)418 static int __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
419 			struct child *l, struct child *c, struct child *r)
420 {
421 	struct btree_node *left = l->n;
422 	struct btree_node *center = c->n;
423 	struct btree_node *right = r->n;
424 
425 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
426 	uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
427 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
428 
429 	unsigned int threshold = merge_threshold(left) * 4 + 1;
430 
431 	if ((left->header.max_entries != center->header.max_entries) ||
432 	    (center->header.max_entries != right->header.max_entries)) {
433 		DMERR("bad btree metadata, max_entries differ");
434 		return -EILSEQ;
435 	}
436 
437 	if ((nr_left + nr_center + nr_right) < threshold) {
438 		return delete_center_node(info, parent, l, c, r, left, center, right,
439 					  nr_left, nr_center, nr_right);
440 	}
441 
442 	return redistribute3(info, parent, l, c, r, left, center, right,
443 			     nr_left, nr_center, nr_right);
444 }
445 
rebalance3(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,unsigned int left_index)446 static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
447 		      struct dm_btree_value_type *vt, unsigned int left_index)
448 {
449 	int r;
450 	struct btree_node *parent = dm_block_data(shadow_current(s));
451 	struct child left, center, right;
452 
453 	/*
454 	 * FIXME: fill out an array?
455 	 */
456 	r = init_child(info, vt, parent, left_index, &left);
457 	if (r)
458 		return r;
459 
460 	r = init_child(info, vt, parent, left_index + 1, &center);
461 	if (r) {
462 		exit_child(info, &left);
463 		return r;
464 	}
465 
466 	r = init_child(info, vt, parent, left_index + 2, &right);
467 	if (r) {
468 		exit_child(info, &left);
469 		exit_child(info, &center);
470 		return r;
471 	}
472 
473 	r = __rebalance3(info, parent, &left, &center, &right);
474 
475 	exit_child(info, &left);
476 	exit_child(info, &center);
477 	exit_child(info, &right);
478 
479 	return r;
480 }
481 
rebalance_children(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,uint64_t key)482 static int rebalance_children(struct shadow_spine *s,
483 			      struct dm_btree_info *info,
484 			      struct dm_btree_value_type *vt, uint64_t key)
485 {
486 	int i, r, has_left_sibling, has_right_sibling;
487 	struct btree_node *n;
488 
489 	n = dm_block_data(shadow_current(s));
490 
491 	if (le32_to_cpu(n->header.nr_entries) == 1) {
492 		struct dm_block *child;
493 		dm_block_t b = value64(n, 0);
494 
495 		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
496 		if (r)
497 			return r;
498 
499 		memcpy(n, dm_block_data(child),
500 		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
501 
502 		dm_tm_dec(info->tm, dm_block_location(child));
503 		dm_tm_unlock(info->tm, child);
504 		return 0;
505 	}
506 
507 	i = lower_bound(n, key);
508 	if (i < 0)
509 		return -ENODATA;
510 
511 	has_left_sibling = i > 0;
512 	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
513 
514 	if (!has_left_sibling)
515 		r = rebalance2(s, info, vt, i);
516 
517 	else if (!has_right_sibling)
518 		r = rebalance2(s, info, vt, i - 1);
519 
520 	else
521 		r = rebalance3(s, info, vt, i - 1);
522 
523 	return r;
524 }
525 
do_leaf(struct btree_node * n,uint64_t key,unsigned int * index)526 static int do_leaf(struct btree_node *n, uint64_t key, unsigned int *index)
527 {
528 	int i = lower_bound(n, key);
529 
530 	if ((i < 0) ||
531 	    (i >= le32_to_cpu(n->header.nr_entries)) ||
532 	    (le64_to_cpu(n->keys[i]) != key))
533 		return -ENODATA;
534 
535 	*index = i;
536 
537 	return 0;
538 }
539 
540 /*
541  * Prepares for removal from one level of the hierarchy.  The caller must
542  * call delete_at() to remove the entry at index.
543  */
remove_raw(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,dm_block_t root,uint64_t key,unsigned int * index)544 static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
545 		      struct dm_btree_value_type *vt, dm_block_t root,
546 		      uint64_t key, unsigned int *index)
547 {
548 	int i = *index, r;
549 	struct btree_node *n;
550 
551 	for (;;) {
552 		r = shadow_step(s, root, vt);
553 		if (r < 0)
554 			break;
555 
556 		/*
557 		 * We have to patch up the parent node, ugly, but I don't
558 		 * see a way to do this automatically as part of the spine
559 		 * op.
560 		 */
561 		if (shadow_has_parent(s)) {
562 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
563 
564 			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
565 			       &location, sizeof(__le64));
566 		}
567 
568 		n = dm_block_data(shadow_current(s));
569 
570 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
571 			return do_leaf(n, key, index);
572 
573 		r = rebalance_children(s, info, vt, key);
574 		if (r)
575 			break;
576 
577 		n = dm_block_data(shadow_current(s));
578 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
579 			return do_leaf(n, key, index);
580 
581 		i = lower_bound(n, key);
582 
583 		/*
584 		 * We know the key is present, or else
585 		 * rebalance_children would have returned
586 		 * -ENODATA
587 		 */
588 		root = value64(n, i);
589 	}
590 
591 	return r;
592 }
593 
dm_btree_remove(struct dm_btree_info * info,dm_block_t root,uint64_t * keys,dm_block_t * new_root)594 int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
595 		    uint64_t *keys, dm_block_t *new_root)
596 {
597 	unsigned int level, last_level = info->levels - 1;
598 	int index = 0, r = 0;
599 	struct shadow_spine spine;
600 	struct btree_node *n;
601 	struct dm_btree_value_type le64_vt;
602 
603 	init_le64_type(info->tm, &le64_vt);
604 	init_shadow_spine(&spine, info);
605 	for (level = 0; level < info->levels; level++) {
606 		r = remove_raw(&spine, info,
607 			       (level == last_level ?
608 				&info->value_type : &le64_vt),
609 			       root, keys[level], (unsigned int *)&index);
610 		if (r < 0)
611 			break;
612 
613 		n = dm_block_data(shadow_current(&spine));
614 		if (level != last_level) {
615 			root = value64(n, index);
616 			continue;
617 		}
618 
619 		BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
620 
621 		if (info->value_type.dec)
622 			info->value_type.dec(info->value_type.context,
623 					     value_ptr(n, index), 1);
624 
625 		delete_at(n, index);
626 	}
627 
628 	if (!r)
629 		*new_root = shadow_root(&spine);
630 	exit_shadow_spine(&spine);
631 
632 	return r;
633 }
634 EXPORT_SYMBOL_GPL(dm_btree_remove);
635 
636 /*----------------------------------------------------------------*/
637 
remove_nearest(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,dm_block_t root,uint64_t key,int * index)638 static int remove_nearest(struct shadow_spine *s, struct dm_btree_info *info,
639 			  struct dm_btree_value_type *vt, dm_block_t root,
640 			  uint64_t key, int *index)
641 {
642 	int i = *index, r;
643 	struct btree_node *n;
644 
645 	for (;;) {
646 		r = shadow_step(s, root, vt);
647 		if (r < 0)
648 			break;
649 
650 		/*
651 		 * We have to patch up the parent node, ugly, but I don't
652 		 * see a way to do this automatically as part of the spine
653 		 * op.
654 		 */
655 		if (shadow_has_parent(s)) {
656 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
657 
658 			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
659 			       &location, sizeof(__le64));
660 		}
661 
662 		n = dm_block_data(shadow_current(s));
663 
664 		if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
665 			*index = lower_bound(n, key);
666 			return 0;
667 		}
668 
669 		r = rebalance_children(s, info, vt, key);
670 		if (r)
671 			break;
672 
673 		n = dm_block_data(shadow_current(s));
674 		if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
675 			*index = lower_bound(n, key);
676 			return 0;
677 		}
678 
679 		i = lower_bound(n, key);
680 
681 		/*
682 		 * We know the key is present, or else
683 		 * rebalance_children would have returned
684 		 * -ENODATA
685 		 */
686 		root = value64(n, i);
687 	}
688 
689 	return r;
690 }
691 
remove_one(struct dm_btree_info * info,dm_block_t root,uint64_t * keys,uint64_t end_key,dm_block_t * new_root,unsigned int * nr_removed)692 static int remove_one(struct dm_btree_info *info, dm_block_t root,
693 		      uint64_t *keys, uint64_t end_key,
694 		      dm_block_t *new_root, unsigned int *nr_removed)
695 {
696 	unsigned int level, last_level = info->levels - 1;
697 	int index = 0, r = 0;
698 	struct shadow_spine spine;
699 	struct btree_node *n;
700 	struct dm_btree_value_type le64_vt;
701 	uint64_t k;
702 
703 	init_le64_type(info->tm, &le64_vt);
704 	init_shadow_spine(&spine, info);
705 	for (level = 0; level < last_level; level++) {
706 		r = remove_raw(&spine, info, &le64_vt,
707 			       root, keys[level], (unsigned int *) &index);
708 		if (r < 0)
709 			goto out;
710 
711 		n = dm_block_data(shadow_current(&spine));
712 		root = value64(n, index);
713 	}
714 
715 	r = remove_nearest(&spine, info, &info->value_type,
716 			   root, keys[last_level], &index);
717 	if (r < 0)
718 		goto out;
719 
720 	n = dm_block_data(shadow_current(&spine));
721 
722 	if (index < 0)
723 		index = 0;
724 
725 	if (index >= le32_to_cpu(n->header.nr_entries)) {
726 		r = -ENODATA;
727 		goto out;
728 	}
729 
730 	k = le64_to_cpu(n->keys[index]);
731 	if (k >= keys[last_level] && k < end_key) {
732 		if (info->value_type.dec)
733 			info->value_type.dec(info->value_type.context,
734 					     value_ptr(n, index), 1);
735 
736 		delete_at(n, index);
737 		keys[last_level] = k + 1ull;
738 
739 	} else
740 		r = -ENODATA;
741 
742 out:
743 	*new_root = shadow_root(&spine);
744 	exit_shadow_spine(&spine);
745 
746 	return r;
747 }
748 
dm_btree_remove_leaves(struct dm_btree_info * info,dm_block_t root,uint64_t * first_key,uint64_t end_key,dm_block_t * new_root,unsigned int * nr_removed)749 int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
750 			   uint64_t *first_key, uint64_t end_key,
751 			   dm_block_t *new_root, unsigned int *nr_removed)
752 {
753 	int r;
754 
755 	*nr_removed = 0;
756 	do {
757 		r = remove_one(info, root, first_key, end_key, &root, nr_removed);
758 		if (!r)
759 			(*nr_removed)++;
760 	} while (!r);
761 
762 	*new_root = root;
763 	return r == -ENODATA ? 0 : r;
764 }
765 EXPORT_SYMBOL_GPL(dm_btree_remove_leaves);
766