13bd94003SHeinz Mauelshagen // SPDX-License-Identifier: GPL-2.0-only
23241b1d3SJoe Thornber /*
33241b1d3SJoe Thornber  * Copyright (C) 2011 Red Hat, Inc.
43241b1d3SJoe Thornber  *
53241b1d3SJoe Thornber  * This file is released under the GPL.
63241b1d3SJoe Thornber  */
73241b1d3SJoe Thornber 
83241b1d3SJoe Thornber #include "dm-btree.h"
93241b1d3SJoe Thornber #include "dm-btree-internal.h"
103241b1d3SJoe Thornber #include "dm-transaction-manager.h"
113241b1d3SJoe Thornber 
121944ce60SPaul Gortmaker #include <linux/export.h>
13c671ffa5SJoe Thornber #include <linux/device-mapper.h>
14c671ffa5SJoe Thornber 
15c671ffa5SJoe Thornber #define DM_MSG_PREFIX "btree"
163241b1d3SJoe Thornber 
173241b1d3SJoe Thornber /*
183241b1d3SJoe Thornber  * Removing an entry from a btree
193241b1d3SJoe Thornber  * ==============================
203241b1d3SJoe Thornber  *
213241b1d3SJoe Thornber  * A very important constraint for our btree is that no node, except the
223241b1d3SJoe Thornber  * root, may have fewer than a certain number of entries.
233241b1d3SJoe Thornber  * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
243241b1d3SJoe Thornber  *
253241b1d3SJoe Thornber  * Ensuring this is complicated by the way we want to only ever hold the
263241b1d3SJoe Thornber  * locks on 2 nodes concurrently, and only change nodes in a top to bottom
273241b1d3SJoe Thornber  * fashion.
283241b1d3SJoe Thornber  *
293241b1d3SJoe Thornber  * Each node may have a left or right sibling.  When decending the spine,
303241b1d3SJoe Thornber  * if a node contains only MIN_ENTRIES then we try and increase this to at
313241b1d3SJoe Thornber  * least MIN_ENTRIES + 1.  We do this in the following ways:
323241b1d3SJoe Thornber  *
333241b1d3SJoe Thornber  * [A] No siblings => this can only happen if the node is the root, in which
343241b1d3SJoe Thornber  *     case we copy the childs contents over the root.
353241b1d3SJoe Thornber  *
363241b1d3SJoe Thornber  * [B] No left sibling
373241b1d3SJoe Thornber  *     ==> rebalance(node, right sibling)
383241b1d3SJoe Thornber  *
393241b1d3SJoe Thornber  * [C] No right sibling
403241b1d3SJoe Thornber  *     ==> rebalance(left sibling, node)
413241b1d3SJoe Thornber  *
423241b1d3SJoe Thornber  * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
433241b1d3SJoe Thornber  *     ==> delete node adding it's contents to left and right
443241b1d3SJoe Thornber  *
453241b1d3SJoe Thornber  * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
463241b1d3SJoe Thornber  *     ==> rebalance(left, node, right)
473241b1d3SJoe Thornber  *
483241b1d3SJoe Thornber  * After these operations it's possible that the our original node no
493241b1d3SJoe Thornber  * longer contains the desired sub tree.  For this reason this rebalancing
503241b1d3SJoe Thornber  * is performed on the children of the current node.  This also avoids
513241b1d3SJoe Thornber  * having a special case for the root.
523241b1d3SJoe Thornber  *
533241b1d3SJoe Thornber  * Once this rebalancing has occurred we can then step into the child node
543241b1d3SJoe Thornber  * for internal nodes.  Or delete the entry for leaf nodes.
553241b1d3SJoe Thornber  */
563241b1d3SJoe Thornber 
573241b1d3SJoe Thornber /*
583241b1d3SJoe Thornber  * Some little utilities for moving node data around.
593241b1d3SJoe Thornber  */
node_shift(struct btree_node * n,int shift)60550929faSMikulas Patocka static void node_shift(struct btree_node *n, int shift)
613241b1d3SJoe Thornber {
623241b1d3SJoe Thornber 	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
633241b1d3SJoe Thornber 	uint32_t value_size = le32_to_cpu(n->header.value_size);
643241b1d3SJoe Thornber 
653241b1d3SJoe Thornber 	if (shift < 0) {
663241b1d3SJoe Thornber 		shift = -shift;
673241b1d3SJoe Thornber 		BUG_ON(shift > nr_entries);
68a3aefb39SJoe Thornber 		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
693241b1d3SJoe Thornber 		memmove(key_ptr(n, 0),
703241b1d3SJoe Thornber 			key_ptr(n, shift),
713241b1d3SJoe Thornber 			(nr_entries - shift) * sizeof(__le64));
72a3aefb39SJoe Thornber 		memmove(value_ptr(n, 0),
73a3aefb39SJoe Thornber 			value_ptr(n, shift),
743241b1d3SJoe Thornber 			(nr_entries - shift) * value_size);
753241b1d3SJoe Thornber 	} else {
763241b1d3SJoe Thornber 		BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
773241b1d3SJoe Thornber 		memmove(key_ptr(n, shift),
783241b1d3SJoe Thornber 			key_ptr(n, 0),
793241b1d3SJoe Thornber 			nr_entries * sizeof(__le64));
80a3aefb39SJoe Thornber 		memmove(value_ptr(n, shift),
81a3aefb39SJoe Thornber 			value_ptr(n, 0),
823241b1d3SJoe Thornber 			nr_entries * value_size);
833241b1d3SJoe Thornber 	}
843241b1d3SJoe Thornber }
853241b1d3SJoe Thornber 
node_copy(struct btree_node * left,struct btree_node * right,int shift)86c671ffa5SJoe Thornber static int node_copy(struct btree_node *left, struct btree_node *right, int shift)
873241b1d3SJoe Thornber {
883241b1d3SJoe Thornber 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
893241b1d3SJoe Thornber 	uint32_t value_size = le32_to_cpu(left->header.value_size);
90*0ef0b471SHeinz Mauelshagen 
91c671ffa5SJoe Thornber 	if (value_size != le32_to_cpu(right->header.value_size)) {
92c671ffa5SJoe Thornber 		DMERR("mismatched value size");
93c671ffa5SJoe Thornber 		return -EILSEQ;
94c671ffa5SJoe Thornber 	}
953241b1d3SJoe Thornber 
963241b1d3SJoe Thornber 	if (shift < 0) {
973241b1d3SJoe Thornber 		shift = -shift;
98c671ffa5SJoe Thornber 
99c671ffa5SJoe Thornber 		if (nr_left + shift > le32_to_cpu(left->header.max_entries)) {
100c671ffa5SJoe Thornber 			DMERR("bad shift");
101c671ffa5SJoe Thornber 			return -EINVAL;
102c671ffa5SJoe Thornber 		}
103c671ffa5SJoe Thornber 
1043241b1d3SJoe Thornber 		memcpy(key_ptr(left, nr_left),
1053241b1d3SJoe Thornber 		       key_ptr(right, 0),
1063241b1d3SJoe Thornber 		       shift * sizeof(__le64));
107a3aefb39SJoe Thornber 		memcpy(value_ptr(left, nr_left),
108a3aefb39SJoe Thornber 		       value_ptr(right, 0),
1093241b1d3SJoe Thornber 		       shift * value_size);
1103241b1d3SJoe Thornber 	} else {
111c671ffa5SJoe Thornber 		if (shift > le32_to_cpu(right->header.max_entries)) {
112c671ffa5SJoe Thornber 			DMERR("bad shift");
113c671ffa5SJoe Thornber 			return -EINVAL;
114c671ffa5SJoe Thornber 		}
115c671ffa5SJoe Thornber 
1163241b1d3SJoe Thornber 		memcpy(key_ptr(right, 0),
1173241b1d3SJoe Thornber 		       key_ptr(left, nr_left - shift),
1183241b1d3SJoe Thornber 		       shift * sizeof(__le64));
119a3aefb39SJoe Thornber 		memcpy(value_ptr(right, 0),
120a3aefb39SJoe Thornber 		       value_ptr(left, nr_left - shift),
1213241b1d3SJoe Thornber 		       shift * value_size);
1223241b1d3SJoe Thornber 	}
123c671ffa5SJoe Thornber 	return 0;
1243241b1d3SJoe Thornber }
1253241b1d3SJoe Thornber 
1263241b1d3SJoe Thornber /*
1273241b1d3SJoe Thornber  * Delete a specific entry from a leaf node.
1283241b1d3SJoe Thornber  */
delete_at(struct btree_node * n,unsigned int index)12986a3238cSHeinz Mauelshagen static void delete_at(struct btree_node *n, unsigned int index)
1303241b1d3SJoe Thornber {
13186a3238cSHeinz Mauelshagen 	unsigned int nr_entries = le32_to_cpu(n->header.nr_entries);
13286a3238cSHeinz Mauelshagen 	unsigned int nr_to_copy = nr_entries - (index + 1);
1333241b1d3SJoe Thornber 	uint32_t value_size = le32_to_cpu(n->header.value_size);
134*0ef0b471SHeinz Mauelshagen 
1353241b1d3SJoe Thornber 	BUG_ON(index >= nr_entries);
1363241b1d3SJoe Thornber 
1373241b1d3SJoe Thornber 	if (nr_to_copy) {
1383241b1d3SJoe Thornber 		memmove(key_ptr(n, index),
1393241b1d3SJoe Thornber 			key_ptr(n, index + 1),
1403241b1d3SJoe Thornber 			nr_to_copy * sizeof(__le64));
1413241b1d3SJoe Thornber 
142a3aefb39SJoe Thornber 		memmove(value_ptr(n, index),
143a3aefb39SJoe Thornber 			value_ptr(n, index + 1),
1443241b1d3SJoe Thornber 			nr_to_copy * value_size);
1453241b1d3SJoe Thornber 	}
1463241b1d3SJoe Thornber 
1473241b1d3SJoe Thornber 	n->header.nr_entries = cpu_to_le32(nr_entries - 1);
1483241b1d3SJoe Thornber }
1493241b1d3SJoe Thornber 
merge_threshold(struct btree_node * n)15086a3238cSHeinz Mauelshagen static unsigned int merge_threshold(struct btree_node *n)
1513241b1d3SJoe Thornber {
152b0988900SJoe Thornber 	return le32_to_cpu(n->header.max_entries) / 3;
1533241b1d3SJoe Thornber }
1543241b1d3SJoe Thornber 
1553241b1d3SJoe Thornber struct child {
15686a3238cSHeinz Mauelshagen 	unsigned int index;
1573241b1d3SJoe Thornber 	struct dm_block *block;
158550929faSMikulas Patocka 	struct btree_node *n;
1593241b1d3SJoe Thornber };
1603241b1d3SJoe Thornber 
init_child(struct dm_btree_info * info,struct dm_btree_value_type * vt,struct btree_node * parent,unsigned int index,struct child * result)161f046f89aSJoe Thornber static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
162f046f89aSJoe Thornber 		      struct btree_node *parent,
16386a3238cSHeinz Mauelshagen 		      unsigned int index, struct child *result)
1643241b1d3SJoe Thornber {
1653241b1d3SJoe Thornber 	int r, inc;
1663241b1d3SJoe Thornber 	dm_block_t root;
1673241b1d3SJoe Thornber 
1683241b1d3SJoe Thornber 	result->index = index;
1693241b1d3SJoe Thornber 	root = value64(parent, index);
1703241b1d3SJoe Thornber 
1713241b1d3SJoe Thornber 	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
1723241b1d3SJoe Thornber 			       &result->block, &inc);
1733241b1d3SJoe Thornber 	if (r)
1743241b1d3SJoe Thornber 		return r;
1753241b1d3SJoe Thornber 
1763241b1d3SJoe Thornber 	result->n = dm_block_data(result->block);
1773241b1d3SJoe Thornber 
1783241b1d3SJoe Thornber 	if (inc)
179f046f89aSJoe Thornber 		inc_children(info->tm, result->n, vt);
1803241b1d3SJoe Thornber 
181a3aefb39SJoe Thornber 	*((__le64 *) value_ptr(parent, index)) =
1823241b1d3SJoe Thornber 		cpu_to_le64(dm_block_location(result->block));
1833241b1d3SJoe Thornber 
1843241b1d3SJoe Thornber 	return 0;
1853241b1d3SJoe Thornber }
1863241b1d3SJoe Thornber 
exit_child(struct dm_btree_info * info,struct child * c)1874c7da06fSMikulas Patocka static void exit_child(struct dm_btree_info *info, struct child *c)
1883241b1d3SJoe Thornber {
1894c7da06fSMikulas Patocka 	dm_tm_unlock(info->tm, c->block);
1903241b1d3SJoe Thornber }
1913241b1d3SJoe Thornber 
shift(struct btree_node * left,struct btree_node * right,int count)192c671ffa5SJoe Thornber static int shift(struct btree_node *left, struct btree_node *right, int count)
1933241b1d3SJoe Thornber {
194c671ffa5SJoe Thornber 	int r;
195b0988900SJoe Thornber 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
196b0988900SJoe Thornber 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
197b0988900SJoe Thornber 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
198b0988900SJoe Thornber 	uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
199b0988900SJoe Thornber 
200c671ffa5SJoe Thornber 	if (max_entries != r_max_entries) {
201c671ffa5SJoe Thornber 		DMERR("node max_entries mismatch");
202c671ffa5SJoe Thornber 		return -EILSEQ;
203c671ffa5SJoe Thornber 	}
204c671ffa5SJoe Thornber 
205c671ffa5SJoe Thornber 	if (nr_left - count > max_entries) {
206c671ffa5SJoe Thornber 		DMERR("node shift out of bounds");
207c671ffa5SJoe Thornber 		return -EINVAL;
208c671ffa5SJoe Thornber 	}
209c671ffa5SJoe Thornber 
210c671ffa5SJoe Thornber 	if (nr_right + count > max_entries) {
211c671ffa5SJoe Thornber 		DMERR("node shift out of bounds");
212c671ffa5SJoe Thornber 		return -EINVAL;
213c671ffa5SJoe Thornber 	}
214b0988900SJoe Thornber 
2153241b1d3SJoe Thornber 	if (!count)
216c671ffa5SJoe Thornber 		return 0;
2173241b1d3SJoe Thornber 
2183241b1d3SJoe Thornber 	if (count > 0) {
2193241b1d3SJoe Thornber 		node_shift(right, count);
220c671ffa5SJoe Thornber 		r = node_copy(left, right, count);
221c671ffa5SJoe Thornber 		if (r)
222c671ffa5SJoe Thornber 			return r;
2233241b1d3SJoe Thornber 	} else {
224c671ffa5SJoe Thornber 		r = node_copy(left, right, count);
225c671ffa5SJoe Thornber 		if (r)
226c671ffa5SJoe Thornber 			return r;
2273241b1d3SJoe Thornber 		node_shift(right, count);
2283241b1d3SJoe Thornber 	}
2293241b1d3SJoe Thornber 
230b0988900SJoe Thornber 	left->header.nr_entries = cpu_to_le32(nr_left - count);
231b0988900SJoe Thornber 	right->header.nr_entries = cpu_to_le32(nr_right + count);
232c671ffa5SJoe Thornber 
233c671ffa5SJoe Thornber 	return 0;
2343241b1d3SJoe Thornber }
2353241b1d3SJoe Thornber 
__rebalance2(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * r)236c671ffa5SJoe Thornber static int __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
2373241b1d3SJoe Thornber 			struct child *l, struct child *r)
2383241b1d3SJoe Thornber {
239c671ffa5SJoe Thornber 	int ret;
240550929faSMikulas Patocka 	struct btree_node *left = l->n;
241550929faSMikulas Patocka 	struct btree_node *right = r->n;
2423241b1d3SJoe Thornber 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
2433241b1d3SJoe Thornber 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
244474e5595SHou Tao 	/*
245474e5595SHou Tao 	 * Ensure the number of entries in each child will be greater
246474e5595SHou Tao 	 * than or equal to (max_entries / 3 + 1), so no matter which
247474e5595SHou Tao 	 * child is used for removal, the number will still be not
248474e5595SHou Tao 	 * less than (max_entries / 3).
249474e5595SHou Tao 	 */
250474e5595SHou Tao 	unsigned int threshold = 2 * (merge_threshold(left) + 1);
2513241b1d3SJoe Thornber 
252b0988900SJoe Thornber 	if (nr_left + nr_right < threshold) {
2533241b1d3SJoe Thornber 		/*
2543241b1d3SJoe Thornber 		 * Merge
2553241b1d3SJoe Thornber 		 */
2563241b1d3SJoe Thornber 		node_copy(left, right, -nr_right);
2573241b1d3SJoe Thornber 		left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
2583241b1d3SJoe Thornber 		delete_at(parent, r->index);
2593241b1d3SJoe Thornber 
2603241b1d3SJoe Thornber 		/*
2613241b1d3SJoe Thornber 		 * We need to decrement the right block, but not it's
2623241b1d3SJoe Thornber 		 * children, since they're still referenced by left.
2633241b1d3SJoe Thornber 		 */
2643241b1d3SJoe Thornber 		dm_tm_dec(info->tm, dm_block_location(r->block));
2653241b1d3SJoe Thornber 	} else {
2663241b1d3SJoe Thornber 		/*
2673241b1d3SJoe Thornber 		 * Rebalance.
2683241b1d3SJoe Thornber 		 */
26986a3238cSHeinz Mauelshagen 		unsigned int target_left = (nr_left + nr_right) / 2;
270*0ef0b471SHeinz Mauelshagen 
271c671ffa5SJoe Thornber 		ret = shift(left, right, nr_left - target_left);
272c671ffa5SJoe Thornber 		if (ret)
273c671ffa5SJoe Thornber 			return ret;
2743241b1d3SJoe Thornber 		*key_ptr(parent, r->index) = right->keys[0];
2753241b1d3SJoe Thornber 	}
276c671ffa5SJoe Thornber 	return 0;
2773241b1d3SJoe Thornber }
2783241b1d3SJoe Thornber 
rebalance2(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,unsigned int left_index)2793241b1d3SJoe Thornber static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
28086a3238cSHeinz Mauelshagen 		      struct dm_btree_value_type *vt, unsigned int left_index)
2813241b1d3SJoe Thornber {
2823241b1d3SJoe Thornber 	int r;
283550929faSMikulas Patocka 	struct btree_node *parent;
2843241b1d3SJoe Thornber 	struct child left, right;
2853241b1d3SJoe Thornber 
2863241b1d3SJoe Thornber 	parent = dm_block_data(shadow_current(s));
2873241b1d3SJoe Thornber 
288f046f89aSJoe Thornber 	r = init_child(info, vt, parent, left_index, &left);
2893241b1d3SJoe Thornber 	if (r)
2903241b1d3SJoe Thornber 		return r;
2913241b1d3SJoe Thornber 
292f046f89aSJoe Thornber 	r = init_child(info, vt, parent, left_index + 1, &right);
2933241b1d3SJoe Thornber 	if (r) {
2943241b1d3SJoe Thornber 		exit_child(info, &left);
2953241b1d3SJoe Thornber 		return r;
2963241b1d3SJoe Thornber 	}
2973241b1d3SJoe Thornber 
298c671ffa5SJoe Thornber 	r = __rebalance2(info, parent, &left, &right);
2993241b1d3SJoe Thornber 
3004c7da06fSMikulas Patocka 	exit_child(info, &left);
3013241b1d3SJoe Thornber 	exit_child(info, &right);
3023241b1d3SJoe Thornber 
303c671ffa5SJoe Thornber 	return r;
3043241b1d3SJoe Thornber }
3053241b1d3SJoe Thornber 
3063241b1d3SJoe Thornber /*
307b0988900SJoe Thornber  * We dump as many entries from center as possible into left, then the rest
308b0988900SJoe Thornber  * in right, then rebalance2.  This wastes some cpu, but I want something
309b0988900SJoe Thornber  * simple atm.
3103241b1d3SJoe Thornber  */
delete_center_node(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r,struct btree_node * left,struct btree_node * center,struct btree_node * right,uint32_t nr_left,uint32_t nr_center,uint32_t nr_right)311c671ffa5SJoe Thornber static int delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
312b0988900SJoe Thornber 			      struct child *l, struct child *c, struct child *r,
313550929faSMikulas Patocka 			      struct btree_node *left, struct btree_node *center, struct btree_node *right,
314b0988900SJoe Thornber 			      uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
315b0988900SJoe Thornber {
316b0988900SJoe Thornber 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
31786a3238cSHeinz Mauelshagen 	unsigned int shift = min(max_entries - nr_left, nr_center);
3183241b1d3SJoe Thornber 
319c671ffa5SJoe Thornber 	if (nr_left + shift > max_entries) {
320c671ffa5SJoe Thornber 		DMERR("node shift out of bounds");
321c671ffa5SJoe Thornber 		return -EINVAL;
322c671ffa5SJoe Thornber 	}
323c671ffa5SJoe Thornber 
3243241b1d3SJoe Thornber 	node_copy(left, center, -shift);
3253241b1d3SJoe Thornber 	left->header.nr_entries = cpu_to_le32(nr_left + shift);
3263241b1d3SJoe Thornber 
3273241b1d3SJoe Thornber 	if (shift != nr_center) {
3283241b1d3SJoe Thornber 		shift = nr_center - shift;
329c671ffa5SJoe Thornber 
330c671ffa5SJoe Thornber 		if ((nr_right + shift) > max_entries) {
331c671ffa5SJoe Thornber 			DMERR("node shift out of bounds");
332c671ffa5SJoe Thornber 			return -EINVAL;
333c671ffa5SJoe Thornber 		}
334c671ffa5SJoe Thornber 
3353241b1d3SJoe Thornber 		node_shift(right, shift);
3363241b1d3SJoe Thornber 		node_copy(center, right, shift);
3373241b1d3SJoe Thornber 		right->header.nr_entries = cpu_to_le32(nr_right + shift);
3383241b1d3SJoe Thornber 	}
3393241b1d3SJoe Thornber 	*key_ptr(parent, r->index) = right->keys[0];
3403241b1d3SJoe Thornber 
3413241b1d3SJoe Thornber 	delete_at(parent, c->index);
3423241b1d3SJoe Thornber 	r->index--;
3433241b1d3SJoe Thornber 
3443241b1d3SJoe Thornber 	dm_tm_dec(info->tm, dm_block_location(c->block));
345c671ffa5SJoe Thornber 	return __rebalance2(info, parent, l, r);
3463241b1d3SJoe Thornber }
3473241b1d3SJoe Thornber 
3483241b1d3SJoe Thornber /*
349b0988900SJoe Thornber  * Redistributes entries among 3 sibling nodes.
3503241b1d3SJoe Thornber  */
redistribute3(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r,struct btree_node * left,struct btree_node * center,struct btree_node * right,uint32_t nr_left,uint32_t nr_center,uint32_t nr_right)351c671ffa5SJoe Thornber static int redistribute3(struct dm_btree_info *info, struct btree_node *parent,
352b0988900SJoe Thornber 			 struct child *l, struct child *c, struct child *r,
353550929faSMikulas Patocka 			 struct btree_node *left, struct btree_node *center, struct btree_node *right,
354b0988900SJoe Thornber 			 uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
355b0988900SJoe Thornber {
356c671ffa5SJoe Thornber 	int s, ret;
357b0988900SJoe Thornber 	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
35886a3238cSHeinz Mauelshagen 	unsigned int total = nr_left + nr_center + nr_right;
35986a3238cSHeinz Mauelshagen 	unsigned int target_right = total / 3;
36086a3238cSHeinz Mauelshagen 	unsigned int remainder = (target_right * 3) != total;
36186a3238cSHeinz Mauelshagen 	unsigned int target_left = target_right + remainder;
3622871c69eSJoe Thornber 
3632871c69eSJoe Thornber 	BUG_ON(target_left > max_entries);
3642871c69eSJoe Thornber 	BUG_ON(target_right > max_entries);
3653241b1d3SJoe Thornber 
366b0988900SJoe Thornber 	if (nr_left < nr_right) {
3672871c69eSJoe Thornber 		s = nr_left - target_left;
3683241b1d3SJoe Thornber 
369b0988900SJoe Thornber 		if (s < 0 && nr_center < -s) {
370b0988900SJoe Thornber 			/* not enough in central node */
371c671ffa5SJoe Thornber 			ret = shift(left, center, -nr_center);
372c671ffa5SJoe Thornber 			if (ret)
373c671ffa5SJoe Thornber 				return ret;
374c671ffa5SJoe Thornber 
3754c7e3093SDennis Yang 			s += nr_center;
376c671ffa5SJoe Thornber 			ret = shift(left, right, s);
377c671ffa5SJoe Thornber 			if (ret)
378c671ffa5SJoe Thornber 				return ret;
379c671ffa5SJoe Thornber 
380b0988900SJoe Thornber 			nr_right += s;
381c671ffa5SJoe Thornber 		} else {
382c671ffa5SJoe Thornber 			ret = shift(left, center, s);
383c671ffa5SJoe Thornber 			if (ret)
384c671ffa5SJoe Thornber 				return ret;
385c671ffa5SJoe Thornber 		}
386b0988900SJoe Thornber 
387c671ffa5SJoe Thornber 		ret = shift(center, right, target_right - nr_right);
388c671ffa5SJoe Thornber 		if (ret)
389c671ffa5SJoe Thornber 			return ret;
390b0988900SJoe Thornber 	} else {
3912871c69eSJoe Thornber 		s = target_right - nr_right;
392b0988900SJoe Thornber 		if (s > 0 && nr_center < s) {
393b0988900SJoe Thornber 			/* not enough in central node */
394c671ffa5SJoe Thornber 			ret = shift(center, right, nr_center);
395c671ffa5SJoe Thornber 			if (ret)
396c671ffa5SJoe Thornber 				return ret;
3974c7e3093SDennis Yang 			s -= nr_center;
398c671ffa5SJoe Thornber 			ret = shift(left, right, s);
399c671ffa5SJoe Thornber 			if (ret)
400c671ffa5SJoe Thornber 				return ret;
401b0988900SJoe Thornber 			nr_left -= s;
402c671ffa5SJoe Thornber 		} else {
403c671ffa5SJoe Thornber 			ret = shift(center, right, s);
404c671ffa5SJoe Thornber 			if (ret)
405c671ffa5SJoe Thornber 				return ret;
406c671ffa5SJoe Thornber 		}
407b0988900SJoe Thornber 
408c671ffa5SJoe Thornber 		ret = shift(left, center, nr_left - target_left);
409c671ffa5SJoe Thornber 		if (ret)
410c671ffa5SJoe Thornber 			return ret;
411b0988900SJoe Thornber 	}
412b0988900SJoe Thornber 
4133241b1d3SJoe Thornber 	*key_ptr(parent, c->index) = center->keys[0];
4143241b1d3SJoe Thornber 	*key_ptr(parent, r->index) = right->keys[0];
415c671ffa5SJoe Thornber 	return 0;
4163241b1d3SJoe Thornber }
4173241b1d3SJoe Thornber 
__rebalance3(struct dm_btree_info * info,struct btree_node * parent,struct child * l,struct child * c,struct child * r)418c671ffa5SJoe Thornber static int __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
419b0988900SJoe Thornber 			struct child *l, struct child *c, struct child *r)
420b0988900SJoe Thornber {
421550929faSMikulas Patocka 	struct btree_node *left = l->n;
422550929faSMikulas Patocka 	struct btree_node *center = c->n;
423550929faSMikulas Patocka 	struct btree_node *right = r->n;
424b0988900SJoe Thornber 
425b0988900SJoe Thornber 	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
426b0988900SJoe Thornber 	uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
427b0988900SJoe Thornber 	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
428b0988900SJoe Thornber 
42986a3238cSHeinz Mauelshagen 	unsigned int threshold = merge_threshold(left) * 4 + 1;
430b0988900SJoe Thornber 
431c671ffa5SJoe Thornber 	if ((left->header.max_entries != center->header.max_entries) ||
432c671ffa5SJoe Thornber 	    (center->header.max_entries != right->header.max_entries)) {
433c671ffa5SJoe Thornber 		DMERR("bad btree metadata, max_entries differ");
434c671ffa5SJoe Thornber 		return -EILSEQ;
435c671ffa5SJoe Thornber 	}
436b0988900SJoe Thornber 
437c671ffa5SJoe Thornber 	if ((nr_left + nr_center + nr_right) < threshold) {
438c671ffa5SJoe Thornber 		return delete_center_node(info, parent, l, c, r, left, center, right,
439b0988900SJoe Thornber 					  nr_left, nr_center, nr_right);
440c671ffa5SJoe Thornber 	}
441c671ffa5SJoe Thornber 
442c671ffa5SJoe Thornber 	return redistribute3(info, parent, l, c, r, left, center, right,
443b0988900SJoe Thornber 			     nr_left, nr_center, nr_right);
444b0988900SJoe Thornber }
445b0988900SJoe Thornber 
rebalance3(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,unsigned int left_index)4463241b1d3SJoe Thornber static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
44786a3238cSHeinz Mauelshagen 		      struct dm_btree_value_type *vt, unsigned int left_index)
4483241b1d3SJoe Thornber {
4493241b1d3SJoe Thornber 	int r;
450550929faSMikulas Patocka 	struct btree_node *parent = dm_block_data(shadow_current(s));
4513241b1d3SJoe Thornber 	struct child left, center, right;
4523241b1d3SJoe Thornber 
4533241b1d3SJoe Thornber 	/*
4543241b1d3SJoe Thornber 	 * FIXME: fill out an array?
4553241b1d3SJoe Thornber 	 */
456f046f89aSJoe Thornber 	r = init_child(info, vt, parent, left_index, &left);
4573241b1d3SJoe Thornber 	if (r)
4583241b1d3SJoe Thornber 		return r;
4593241b1d3SJoe Thornber 
460f046f89aSJoe Thornber 	r = init_child(info, vt, parent, left_index + 1, &center);
4613241b1d3SJoe Thornber 	if (r) {
4623241b1d3SJoe Thornber 		exit_child(info, &left);
4633241b1d3SJoe Thornber 		return r;
4643241b1d3SJoe Thornber 	}
4653241b1d3SJoe Thornber 
466f046f89aSJoe Thornber 	r = init_child(info, vt, parent, left_index + 2, &right);
4673241b1d3SJoe Thornber 	if (r) {
4683241b1d3SJoe Thornber 		exit_child(info, &left);
4693241b1d3SJoe Thornber 		exit_child(info, &center);
4703241b1d3SJoe Thornber 		return r;
4713241b1d3SJoe Thornber 	}
4723241b1d3SJoe Thornber 
473c671ffa5SJoe Thornber 	r = __rebalance3(info, parent, &left, &center, &right);
4743241b1d3SJoe Thornber 
4754c7da06fSMikulas Patocka 	exit_child(info, &left);
4763241b1d3SJoe Thornber 	exit_child(info, &center);
4773241b1d3SJoe Thornber 	exit_child(info, &right);
4783241b1d3SJoe Thornber 
479c671ffa5SJoe Thornber 	return r;
4803241b1d3SJoe Thornber }
4813241b1d3SJoe Thornber 
rebalance_children(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,uint64_t key)4823241b1d3SJoe Thornber static int rebalance_children(struct shadow_spine *s,
483f046f89aSJoe Thornber 			      struct dm_btree_info *info,
484f046f89aSJoe Thornber 			      struct dm_btree_value_type *vt, uint64_t key)
4853241b1d3SJoe Thornber {
4863241b1d3SJoe Thornber 	int i, r, has_left_sibling, has_right_sibling;
487550929faSMikulas Patocka 	struct btree_node *n;
4883241b1d3SJoe Thornber 
4893241b1d3SJoe Thornber 	n = dm_block_data(shadow_current(s));
4903241b1d3SJoe Thornber 
4913241b1d3SJoe Thornber 	if (le32_to_cpu(n->header.nr_entries) == 1) {
4923241b1d3SJoe Thornber 		struct dm_block *child;
4933241b1d3SJoe Thornber 		dm_block_t b = value64(n, 0);
4943241b1d3SJoe Thornber 
4953241b1d3SJoe Thornber 		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
4963241b1d3SJoe Thornber 		if (r)
4973241b1d3SJoe Thornber 			return r;
4983241b1d3SJoe Thornber 
4993241b1d3SJoe Thornber 		memcpy(n, dm_block_data(child),
5003241b1d3SJoe Thornber 		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
5013241b1d3SJoe Thornber 
5023241b1d3SJoe Thornber 		dm_tm_dec(info->tm, dm_block_location(child));
5031b8d2789SJoe Thornber 		dm_tm_unlock(info->tm, child);
5043241b1d3SJoe Thornber 		return 0;
5053241b1d3SJoe Thornber 	}
5063241b1d3SJoe Thornber 
5073241b1d3SJoe Thornber 	i = lower_bound(n, key);
5083241b1d3SJoe Thornber 	if (i < 0)
5093241b1d3SJoe Thornber 		return -ENODATA;
5103241b1d3SJoe Thornber 
5113241b1d3SJoe Thornber 	has_left_sibling = i > 0;
5123241b1d3SJoe Thornber 	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
5133241b1d3SJoe Thornber 
5143241b1d3SJoe Thornber 	if (!has_left_sibling)
515f046f89aSJoe Thornber 		r = rebalance2(s, info, vt, i);
5163241b1d3SJoe Thornber 
5173241b1d3SJoe Thornber 	else if (!has_right_sibling)
518f046f89aSJoe Thornber 		r = rebalance2(s, info, vt, i - 1);
5193241b1d3SJoe Thornber 
5203241b1d3SJoe Thornber 	else
521f046f89aSJoe Thornber 		r = rebalance3(s, info, vt, i - 1);
5223241b1d3SJoe Thornber 
5233241b1d3SJoe Thornber 	return r;
5243241b1d3SJoe Thornber }
5253241b1d3SJoe Thornber 
do_leaf(struct btree_node * n,uint64_t key,unsigned int * index)52686a3238cSHeinz Mauelshagen static int do_leaf(struct btree_node *n, uint64_t key, unsigned int *index)
5273241b1d3SJoe Thornber {
5283241b1d3SJoe Thornber 	int i = lower_bound(n, key);
5293241b1d3SJoe Thornber 
5303241b1d3SJoe Thornber 	if ((i < 0) ||
5313241b1d3SJoe Thornber 	    (i >= le32_to_cpu(n->header.nr_entries)) ||
5323241b1d3SJoe Thornber 	    (le64_to_cpu(n->keys[i]) != key))
5333241b1d3SJoe Thornber 		return -ENODATA;
5343241b1d3SJoe Thornber 
5353241b1d3SJoe Thornber 	*index = i;
5363241b1d3SJoe Thornber 
5373241b1d3SJoe Thornber 	return 0;
5383241b1d3SJoe Thornber }
5393241b1d3SJoe Thornber 
5403241b1d3SJoe Thornber /*
5413241b1d3SJoe Thornber  * Prepares for removal from one level of the hierarchy.  The caller must
5423241b1d3SJoe Thornber  * call delete_at() to remove the entry at index.
5433241b1d3SJoe Thornber  */
remove_raw(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,dm_block_t root,uint64_t key,unsigned int * index)5443241b1d3SJoe Thornber static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
5453241b1d3SJoe Thornber 		      struct dm_btree_value_type *vt, dm_block_t root,
54686a3238cSHeinz Mauelshagen 		      uint64_t key, unsigned int *index)
5473241b1d3SJoe Thornber {
5483241b1d3SJoe Thornber 	int i = *index, r;
549550929faSMikulas Patocka 	struct btree_node *n;
5503241b1d3SJoe Thornber 
5513241b1d3SJoe Thornber 	for (;;) {
5523241b1d3SJoe Thornber 		r = shadow_step(s, root, vt);
5533241b1d3SJoe Thornber 		if (r < 0)
5543241b1d3SJoe Thornber 			break;
5553241b1d3SJoe Thornber 
5563241b1d3SJoe Thornber 		/*
5573241b1d3SJoe Thornber 		 * We have to patch up the parent node, ugly, but I don't
5583241b1d3SJoe Thornber 		 * see a way to do this automatically as part of the spine
5593241b1d3SJoe Thornber 		 * op.
5603241b1d3SJoe Thornber 		 */
5613241b1d3SJoe Thornber 		if (shadow_has_parent(s)) {
5623241b1d3SJoe Thornber 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
563*0ef0b471SHeinz Mauelshagen 
564a3aefb39SJoe Thornber 			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
5653241b1d3SJoe Thornber 			       &location, sizeof(__le64));
5663241b1d3SJoe Thornber 		}
5673241b1d3SJoe Thornber 
5683241b1d3SJoe Thornber 		n = dm_block_data(shadow_current(s));
5693241b1d3SJoe Thornber 
5703241b1d3SJoe Thornber 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
5713241b1d3SJoe Thornber 			return do_leaf(n, key, index);
5723241b1d3SJoe Thornber 
573f046f89aSJoe Thornber 		r = rebalance_children(s, info, vt, key);
5743241b1d3SJoe Thornber 		if (r)
5753241b1d3SJoe Thornber 			break;
5763241b1d3SJoe Thornber 
5773241b1d3SJoe Thornber 		n = dm_block_data(shadow_current(s));
5783241b1d3SJoe Thornber 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
5793241b1d3SJoe Thornber 			return do_leaf(n, key, index);
5803241b1d3SJoe Thornber 
5813241b1d3SJoe Thornber 		i = lower_bound(n, key);
5823241b1d3SJoe Thornber 
5833241b1d3SJoe Thornber 		/*
5843241b1d3SJoe Thornber 		 * We know the key is present, or else
5853241b1d3SJoe Thornber 		 * rebalance_children would have returned
5863241b1d3SJoe Thornber 		 * -ENODATA
5873241b1d3SJoe Thornber 		 */
5883241b1d3SJoe Thornber 		root = value64(n, i);
5893241b1d3SJoe Thornber 	}
5903241b1d3SJoe Thornber 
5913241b1d3SJoe Thornber 	return r;
5923241b1d3SJoe Thornber }
5933241b1d3SJoe Thornber 
dm_btree_remove(struct dm_btree_info * info,dm_block_t root,uint64_t * keys,dm_block_t * new_root)5943241b1d3SJoe Thornber int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
5953241b1d3SJoe Thornber 		    uint64_t *keys, dm_block_t *new_root)
5963241b1d3SJoe Thornber {
59786a3238cSHeinz Mauelshagen 	unsigned int level, last_level = info->levels - 1;
5983241b1d3SJoe Thornber 	int index = 0, r = 0;
5993241b1d3SJoe Thornber 	struct shadow_spine spine;
600550929faSMikulas Patocka 	struct btree_node *n;
601b0dc3c8bSJoe Thornber 	struct dm_btree_value_type le64_vt;
6023241b1d3SJoe Thornber 
603b0dc3c8bSJoe Thornber 	init_le64_type(info->tm, &le64_vt);
6043241b1d3SJoe Thornber 	init_shadow_spine(&spine, info);
6053241b1d3SJoe Thornber 	for (level = 0; level < info->levels; level++) {
6063241b1d3SJoe Thornber 		r = remove_raw(&spine, info,
6073241b1d3SJoe Thornber 			       (level == last_level ?
608b0dc3c8bSJoe Thornber 				&info->value_type : &le64_vt),
60986a3238cSHeinz Mauelshagen 			       root, keys[level], (unsigned int *)&index);
6103241b1d3SJoe Thornber 		if (r < 0)
6113241b1d3SJoe Thornber 			break;
6123241b1d3SJoe Thornber 
6133241b1d3SJoe Thornber 		n = dm_block_data(shadow_current(&spine));
6143241b1d3SJoe Thornber 		if (level != last_level) {
6153241b1d3SJoe Thornber 			root = value64(n, index);
6163241b1d3SJoe Thornber 			continue;
6173241b1d3SJoe Thornber 		}
6183241b1d3SJoe Thornber 
6193241b1d3SJoe Thornber 		BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
6203241b1d3SJoe Thornber 
6213241b1d3SJoe Thornber 		if (info->value_type.dec)
6223241b1d3SJoe Thornber 			info->value_type.dec(info->value_type.context,
623be500ed7SJoe Thornber 					     value_ptr(n, index), 1);
6243241b1d3SJoe Thornber 
6253241b1d3SJoe Thornber 		delete_at(n, index);
6263241b1d3SJoe Thornber 	}
6273241b1d3SJoe Thornber 
628b6e58b54SHou Tao 	if (!r)
6293241b1d3SJoe Thornber 		*new_root = shadow_root(&spine);
6303241b1d3SJoe Thornber 	exit_shadow_spine(&spine);
6313241b1d3SJoe Thornber 
6323241b1d3SJoe Thornber 	return r;
6333241b1d3SJoe Thornber }
6343241b1d3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_remove);
6354ec331c3SJoe Thornber 
6364ec331c3SJoe Thornber /*----------------------------------------------------------------*/
6374ec331c3SJoe Thornber 
remove_nearest(struct shadow_spine * s,struct dm_btree_info * info,struct dm_btree_value_type * vt,dm_block_t root,uint64_t key,int * index)6384ec331c3SJoe Thornber static int remove_nearest(struct shadow_spine *s, struct dm_btree_info *info,
6394ec331c3SJoe Thornber 			  struct dm_btree_value_type *vt, dm_block_t root,
6404ec331c3SJoe Thornber 			  uint64_t key, int *index)
6414ec331c3SJoe Thornber {
6424ec331c3SJoe Thornber 	int i = *index, r;
6434ec331c3SJoe Thornber 	struct btree_node *n;
6444ec331c3SJoe Thornber 
6454ec331c3SJoe Thornber 	for (;;) {
6464ec331c3SJoe Thornber 		r = shadow_step(s, root, vt);
6474ec331c3SJoe Thornber 		if (r < 0)
6484ec331c3SJoe Thornber 			break;
6494ec331c3SJoe Thornber 
6504ec331c3SJoe Thornber 		/*
6514ec331c3SJoe Thornber 		 * We have to patch up the parent node, ugly, but I don't
6524ec331c3SJoe Thornber 		 * see a way to do this automatically as part of the spine
6534ec331c3SJoe Thornber 		 * op.
6544ec331c3SJoe Thornber 		 */
6554ec331c3SJoe Thornber 		if (shadow_has_parent(s)) {
6564ec331c3SJoe Thornber 			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
657*0ef0b471SHeinz Mauelshagen 
6584ec331c3SJoe Thornber 			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
6594ec331c3SJoe Thornber 			       &location, sizeof(__le64));
6604ec331c3SJoe Thornber 		}
6614ec331c3SJoe Thornber 
6624ec331c3SJoe Thornber 		n = dm_block_data(shadow_current(s));
6634ec331c3SJoe Thornber 
6644ec331c3SJoe Thornber 		if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
6654ec331c3SJoe Thornber 			*index = lower_bound(n, key);
6664ec331c3SJoe Thornber 			return 0;
6674ec331c3SJoe Thornber 		}
6684ec331c3SJoe Thornber 
6694ec331c3SJoe Thornber 		r = rebalance_children(s, info, vt, key);
6704ec331c3SJoe Thornber 		if (r)
6714ec331c3SJoe Thornber 			break;
6724ec331c3SJoe Thornber 
6734ec331c3SJoe Thornber 		n = dm_block_data(shadow_current(s));
6744ec331c3SJoe Thornber 		if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
6754ec331c3SJoe Thornber 			*index = lower_bound(n, key);
6764ec331c3SJoe Thornber 			return 0;
6774ec331c3SJoe Thornber 		}
6784ec331c3SJoe Thornber 
6794ec331c3SJoe Thornber 		i = lower_bound(n, key);
6804ec331c3SJoe Thornber 
6814ec331c3SJoe Thornber 		/*
6824ec331c3SJoe Thornber 		 * We know the key is present, or else
6834ec331c3SJoe Thornber 		 * rebalance_children would have returned
6844ec331c3SJoe Thornber 		 * -ENODATA
6854ec331c3SJoe Thornber 		 */
6864ec331c3SJoe Thornber 		root = value64(n, i);
6874ec331c3SJoe Thornber 	}
6884ec331c3SJoe Thornber 
6894ec331c3SJoe Thornber 	return r;
6904ec331c3SJoe Thornber }
6914ec331c3SJoe Thornber 
remove_one(struct dm_btree_info * info,dm_block_t root,uint64_t * keys,uint64_t end_key,dm_block_t * new_root,unsigned int * nr_removed)6924ec331c3SJoe Thornber static int remove_one(struct dm_btree_info *info, dm_block_t root,
6934ec331c3SJoe Thornber 		      uint64_t *keys, uint64_t end_key,
69486a3238cSHeinz Mauelshagen 		      dm_block_t *new_root, unsigned int *nr_removed)
6954ec331c3SJoe Thornber {
69686a3238cSHeinz Mauelshagen 	unsigned int level, last_level = info->levels - 1;
6974ec331c3SJoe Thornber 	int index = 0, r = 0;
6984ec331c3SJoe Thornber 	struct shadow_spine spine;
6994ec331c3SJoe Thornber 	struct btree_node *n;
700b0dc3c8bSJoe Thornber 	struct dm_btree_value_type le64_vt;
7014ec331c3SJoe Thornber 	uint64_t k;
7024ec331c3SJoe Thornber 
703b0dc3c8bSJoe Thornber 	init_le64_type(info->tm, &le64_vt);
7044ec331c3SJoe Thornber 	init_shadow_spine(&spine, info);
7054ec331c3SJoe Thornber 	for (level = 0; level < last_level; level++) {
706b0dc3c8bSJoe Thornber 		r = remove_raw(&spine, info, &le64_vt,
70786a3238cSHeinz Mauelshagen 			       root, keys[level], (unsigned int *) &index);
7084ec331c3SJoe Thornber 		if (r < 0)
7094ec331c3SJoe Thornber 			goto out;
7104ec331c3SJoe Thornber 
7114ec331c3SJoe Thornber 		n = dm_block_data(shadow_current(&spine));
7124ec331c3SJoe Thornber 		root = value64(n, index);
7134ec331c3SJoe Thornber 	}
7144ec331c3SJoe Thornber 
7154ec331c3SJoe Thornber 	r = remove_nearest(&spine, info, &info->value_type,
7164ec331c3SJoe Thornber 			   root, keys[last_level], &index);
7174ec331c3SJoe Thornber 	if (r < 0)
7184ec331c3SJoe Thornber 		goto out;
7194ec331c3SJoe Thornber 
7204ec331c3SJoe Thornber 	n = dm_block_data(shadow_current(&spine));
7214ec331c3SJoe Thornber 
7224ec331c3SJoe Thornber 	if (index < 0)
7234ec331c3SJoe Thornber 		index = 0;
7244ec331c3SJoe Thornber 
7254ec331c3SJoe Thornber 	if (index >= le32_to_cpu(n->header.nr_entries)) {
7264ec331c3SJoe Thornber 		r = -ENODATA;
7274ec331c3SJoe Thornber 		goto out;
7284ec331c3SJoe Thornber 	}
7294ec331c3SJoe Thornber 
7304ec331c3SJoe Thornber 	k = le64_to_cpu(n->keys[index]);
7314ec331c3SJoe Thornber 	if (k >= keys[last_level] && k < end_key) {
7324ec331c3SJoe Thornber 		if (info->value_type.dec)
7334ec331c3SJoe Thornber 			info->value_type.dec(info->value_type.context,
734be500ed7SJoe Thornber 					     value_ptr(n, index), 1);
7354ec331c3SJoe Thornber 
7364ec331c3SJoe Thornber 		delete_at(n, index);
737aa0cd28dSJoe Thornber 		keys[last_level] = k + 1ull;
7384ec331c3SJoe Thornber 
7394ec331c3SJoe Thornber 	} else
7404ec331c3SJoe Thornber 		r = -ENODATA;
7414ec331c3SJoe Thornber 
7424ec331c3SJoe Thornber out:
7434ec331c3SJoe Thornber 	*new_root = shadow_root(&spine);
7444ec331c3SJoe Thornber 	exit_shadow_spine(&spine);
7454ec331c3SJoe Thornber 
7464ec331c3SJoe Thornber 	return r;
7474ec331c3SJoe Thornber }
7484ec331c3SJoe Thornber 
dm_btree_remove_leaves(struct dm_btree_info * info,dm_block_t root,uint64_t * first_key,uint64_t end_key,dm_block_t * new_root,unsigned int * nr_removed)7494ec331c3SJoe Thornber int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
7504ec331c3SJoe Thornber 			   uint64_t *first_key, uint64_t end_key,
75186a3238cSHeinz Mauelshagen 			   dm_block_t *new_root, unsigned int *nr_removed)
7524ec331c3SJoe Thornber {
7534ec331c3SJoe Thornber 	int r;
7544ec331c3SJoe Thornber 
7554ec331c3SJoe Thornber 	*nr_removed = 0;
7564ec331c3SJoe Thornber 	do {
7574ec331c3SJoe Thornber 		r = remove_one(info, root, first_key, end_key, &root, nr_removed);
7584ec331c3SJoe Thornber 		if (!r)
7594ec331c3SJoe Thornber 			(*nr_removed)++;
7604ec331c3SJoe Thornber 	} while (!r);
7614ec331c3SJoe Thornber 
7624ec331c3SJoe Thornber 	*new_root = root;
7634ec331c3SJoe Thornber 	return r == -ENODATA ? 0 : r;
7644ec331c3SJoe Thornber }
7654ec331c3SJoe Thornber EXPORT_SYMBOL_GPL(dm_btree_remove_leaves);
766