xref: /openbmc/linux/fs/nilfs2/btree.c (revision b627b4ed)
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43 	struct buffer_head *bp_bh;
44 	struct buffer_head *bp_sib_bh;
45 	int bp_index;
46 	union nilfs_bmap_ptr_req bp_oldreq;
47 	union nilfs_bmap_ptr_req bp_newreq;
48 	struct nilfs_btnode_chkey_ctxt bp_ctxt;
49 	void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50 		      int, __u64 *, __u64 *);
51 };
52 
53 /*
54  * B-tree path operations
55  */
56 
57 static struct kmem_cache *nilfs_btree_path_cache;
58 
59 int __init nilfs_btree_path_cache_init(void)
60 {
61 	nilfs_btree_path_cache =
62 		kmem_cache_create("nilfs2_btree_path_cache",
63 				  sizeof(struct nilfs_btree_path) *
64 				  NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65 	return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67 
68 void nilfs_btree_path_cache_destroy(void)
69 {
70 	kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72 
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76 	return (struct nilfs_btree_path *)
77 		kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79 
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81 					 struct nilfs_btree_path *path)
82 {
83 	kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85 
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87 				  struct nilfs_btree_path *path)
88 {
89 	int level;
90 
91 	for (level = NILFS_BTREE_LEVEL_DATA;
92 	     level < NILFS_BTREE_LEVEL_MAX;
93 	     level++) {
94 		path[level].bp_bh = NULL;
95 		path[level].bp_sib_bh = NULL;
96 		path[level].bp_index = 0;
97 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99 		path[level].bp_op = NULL;
100 	}
101 }
102 
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104 				   struct nilfs_btree_path *path)
105 {
106 	int level;
107 
108 	for (level = NILFS_BTREE_LEVEL_DATA;
109 	     level < NILFS_BTREE_LEVEL_MAX;
110 	     level++) {
111 		if (path[level].bp_bh != NULL) {
112 			nilfs_bmap_put_block(&btree->bt_bmap,
113 					     path[level].bp_bh);
114 			path[level].bp_bh = NULL;
115 		}
116 		/* sib_bh is released or deleted by prepare or commit
117 		 * operations. */
118 		path[level].bp_sib_bh = NULL;
119 		path[level].bp_index = 0;
120 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122 		path[level].bp_op = NULL;
123 	}
124 }
125 
126 
127 /*
128  * B-tree node operations
129  */
130 
131 static inline int
132 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
133 			   const struct nilfs_btree_node *node)
134 {
135 	return node->bn_flags;
136 }
137 
138 static inline void
139 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
140 			   struct nilfs_btree_node *node,
141 			   int flags)
142 {
143 	node->bn_flags = flags;
144 }
145 
146 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
147 					const struct nilfs_btree_node *node)
148 {
149 	return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
150 }
151 
152 static inline int
153 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
154 			   const struct nilfs_btree_node *node)
155 {
156 	return node->bn_level;
157 }
158 
159 static inline void
160 nilfs_btree_node_set_level(struct nilfs_btree *btree,
161 			   struct nilfs_btree_node *node,
162 			   int level)
163 {
164 	node->bn_level = level;
165 }
166 
167 static inline int
168 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
169 			       const struct nilfs_btree_node *node)
170 {
171 	return le16_to_cpu(node->bn_nchildren);
172 }
173 
174 static inline void
175 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
176 			       struct nilfs_btree_node *node,
177 			       int nchildren)
178 {
179 	node->bn_nchildren = cpu_to_le16(nchildren);
180 }
181 
182 static inline int
183 nilfs_btree_node_size(const struct nilfs_btree *btree)
184 {
185 	return 1 << btree->bt_bmap.b_inode->i_blkbits;
186 }
187 
188 static inline int
189 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
190 			       const struct nilfs_btree_node *node)
191 {
192 	return nilfs_btree_node_root(btree, node) ?
193 		NILFS_BTREE_ROOT_NCHILDREN_MIN :
194 		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
195 }
196 
197 static inline int
198 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
199 			       const struct nilfs_btree_node *node)
200 {
201 	return nilfs_btree_node_root(btree, node) ?
202 		NILFS_BTREE_ROOT_NCHILDREN_MAX :
203 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
204 }
205 
206 static inline __le64 *
207 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
208 		       const struct nilfs_btree_node *node)
209 {
210 	return (__le64 *)((char *)(node + 1) +
211 			  (nilfs_btree_node_root(btree, node) ?
212 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
213 }
214 
215 static inline __le64 *
216 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
217 		       const struct nilfs_btree_node *node)
218 {
219 	return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
220 			  nilfs_btree_node_nchildren_max(btree, node));
221 }
222 
223 static inline __u64
224 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
225 			 const struct nilfs_btree_node *node, int index)
226 {
227 	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
228 					index));
229 }
230 
231 static inline void
232 nilfs_btree_node_set_key(struct nilfs_btree *btree,
233 			 struct nilfs_btree_node *node, int index, __u64 key)
234 {
235 	*(nilfs_btree_node_dkeys(btree, node) + index) =
236 		nilfs_bmap_key_to_dkey(key);
237 }
238 
239 static inline __u64
240 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241 			 const struct nilfs_btree_node *node,
242 			 int index)
243 {
244 	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
245 					index));
246 }
247 
248 static inline void
249 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
250 			 struct nilfs_btree_node *node,
251 			 int index,
252 			 __u64 ptr)
253 {
254 	*(nilfs_btree_node_dptrs(btree, node) + index) =
255 		nilfs_bmap_ptr_to_dptr(ptr);
256 }
257 
258 static void nilfs_btree_node_init(struct nilfs_btree *btree,
259 				  struct nilfs_btree_node *node,
260 				  int flags, int level, int nchildren,
261 				  const __u64 *keys, const __u64 *ptrs)
262 {
263 	__le64 *dkeys;
264 	__le64 *dptrs;
265 	int i;
266 
267 	nilfs_btree_node_set_flags(btree, node, flags);
268 	nilfs_btree_node_set_level(btree, node, level);
269 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
270 
271 	dkeys = nilfs_btree_node_dkeys(btree, node);
272 	dptrs = nilfs_btree_node_dptrs(btree, node);
273 	for (i = 0; i < nchildren; i++) {
274 		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
275 		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
276 	}
277 }
278 
279 /* Assume the buffer heads corresponding to left and right are locked. */
280 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
281 				       struct nilfs_btree_node *left,
282 				       struct nilfs_btree_node *right,
283 				       int n)
284 {
285 	__le64 *ldkeys, *rdkeys;
286 	__le64 *ldptrs, *rdptrs;
287 	int lnchildren, rnchildren;
288 
289 	ldkeys = nilfs_btree_node_dkeys(btree, left);
290 	ldptrs = nilfs_btree_node_dptrs(btree, left);
291 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
292 
293 	rdkeys = nilfs_btree_node_dkeys(btree, right);
294 	rdptrs = nilfs_btree_node_dptrs(btree, right);
295 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
296 
297 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
298 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
299 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
300 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
301 
302 	lnchildren += n;
303 	rnchildren -= n;
304 	nilfs_btree_node_set_nchildren(btree, left, lnchildren);
305 	nilfs_btree_node_set_nchildren(btree, right, rnchildren);
306 }
307 
308 /* Assume that the buffer heads corresponding to left and right are locked. */
309 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
310 					struct nilfs_btree_node *left,
311 					struct nilfs_btree_node *right,
312 					int n)
313 {
314 	__le64 *ldkeys, *rdkeys;
315 	__le64 *ldptrs, *rdptrs;
316 	int lnchildren, rnchildren;
317 
318 	ldkeys = nilfs_btree_node_dkeys(btree, left);
319 	ldptrs = nilfs_btree_node_dptrs(btree, left);
320 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
321 
322 	rdkeys = nilfs_btree_node_dkeys(btree, right);
323 	rdptrs = nilfs_btree_node_dptrs(btree, right);
324 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
325 
326 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
327 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
328 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
329 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
330 
331 	lnchildren -= n;
332 	rnchildren += n;
333 	nilfs_btree_node_set_nchildren(btree, left, lnchildren);
334 	nilfs_btree_node_set_nchildren(btree, right, rnchildren);
335 }
336 
337 /* Assume that the buffer head corresponding to node is locked. */
338 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
339 				    struct nilfs_btree_node *node,
340 				    __u64 key, __u64 ptr, int index)
341 {
342 	__le64 *dkeys;
343 	__le64 *dptrs;
344 	int nchildren;
345 
346 	dkeys = nilfs_btree_node_dkeys(btree, node);
347 	dptrs = nilfs_btree_node_dptrs(btree, node);
348 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
349 	if (index < nchildren) {
350 		memmove(dkeys + index + 1, dkeys + index,
351 			(nchildren - index) * sizeof(*dkeys));
352 		memmove(dptrs + index + 1, dptrs + index,
353 			(nchildren - index) * sizeof(*dptrs));
354 	}
355 	dkeys[index] = nilfs_bmap_key_to_dkey(key);
356 	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
357 	nchildren++;
358 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
359 }
360 
361 /* Assume that the buffer head corresponding to node is locked. */
362 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
363 				    struct nilfs_btree_node *node,
364 				    __u64 *keyp, __u64 *ptrp, int index)
365 {
366 	__u64 key;
367 	__u64 ptr;
368 	__le64 *dkeys;
369 	__le64 *dptrs;
370 	int nchildren;
371 
372 	dkeys = nilfs_btree_node_dkeys(btree, node);
373 	dptrs = nilfs_btree_node_dptrs(btree, node);
374 	key = nilfs_bmap_dkey_to_key(dkeys[index]);
375 	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
376 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
377 	if (keyp != NULL)
378 		*keyp = key;
379 	if (ptrp != NULL)
380 		*ptrp = ptr;
381 
382 	if (index < nchildren - 1) {
383 		memmove(dkeys + index, dkeys + index + 1,
384 			(nchildren - index - 1) * sizeof(*dkeys));
385 		memmove(dptrs + index, dptrs + index + 1,
386 			(nchildren - index - 1) * sizeof(*dptrs));
387 	}
388 	nchildren--;
389 	nilfs_btree_node_set_nchildren(btree, node, nchildren);
390 }
391 
392 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
393 				   const struct nilfs_btree_node *node,
394 				   __u64 key, int *indexp)
395 {
396 	__u64 nkey;
397 	int index, low, high, s;
398 
399 	/* binary search */
400 	low = 0;
401 	high = nilfs_btree_node_get_nchildren(btree, node) - 1;
402 	index = 0;
403 	s = 0;
404 	while (low <= high) {
405 		index = (low + high) / 2;
406 		nkey = nilfs_btree_node_get_key(btree, node, index);
407 		if (nkey == key) {
408 			s = 0;
409 			goto out;
410 		} else if (nkey < key) {
411 			low = index + 1;
412 			s = -1;
413 		} else {
414 			high = index - 1;
415 			s = 1;
416 		}
417 	}
418 
419 	/* adjust index */
420 	if (nilfs_btree_node_get_level(btree, node) >
421 	    NILFS_BTREE_LEVEL_NODE_MIN) {
422 		if ((s > 0) && (index > 0))
423 			index--;
424 	} else if (s < 0)
425 		index++;
426 
427  out:
428 	*indexp = index;
429 
430 	return s == 0;
431 }
432 
433 static inline struct nilfs_btree_node *
434 nilfs_btree_get_root(const struct nilfs_btree *btree)
435 {
436 	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
437 }
438 
439 static inline struct nilfs_btree_node *
440 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
441 			     const struct nilfs_btree_path *path,
442 			     int level)
443 {
444 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
445 }
446 
447 static inline struct nilfs_btree_node *
448 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
449 			 const struct nilfs_btree_path *path,
450 			 int level)
451 {
452 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
453 }
454 
455 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
456 {
457 	return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
458 		+ 1;
459 }
460 
461 static inline struct nilfs_btree_node *
462 nilfs_btree_get_node(const struct nilfs_btree *btree,
463 		     const struct nilfs_btree_path *path,
464 		     int level)
465 {
466 	return (level == nilfs_btree_height(btree) - 1) ?
467 		nilfs_btree_get_root(btree) :
468 		nilfs_btree_get_nonroot_node(btree, path, level);
469 }
470 
471 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
472 				 struct nilfs_btree_path *path,
473 				 __u64 key, __u64 *ptrp, int minlevel)
474 {
475 	struct nilfs_btree_node *node;
476 	__u64 ptr;
477 	int level, index, found, ret;
478 
479 	node = nilfs_btree_get_root(btree);
480 	level = nilfs_btree_node_get_level(btree, node);
481 	if ((level < minlevel) ||
482 	    (nilfs_btree_node_get_nchildren(btree, node) <= 0))
483 		return -ENOENT;
484 
485 	found = nilfs_btree_node_lookup(btree, node, key, &index);
486 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
487 	path[level].bp_bh = NULL;
488 	path[level].bp_index = index;
489 
490 	for (level--; level >= minlevel; level--) {
491 		ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
492 					   &path[level].bp_bh);
493 		if (ret < 0)
494 			return ret;
495 		node = nilfs_btree_get_nonroot_node(btree, path, level);
496 		BUG_ON(level != nilfs_btree_node_get_level(btree, node));
497 		if (!found)
498 			found = nilfs_btree_node_lookup(btree, node, key,
499 							&index);
500 		else
501 			index = 0;
502 		if (index < nilfs_btree_node_nchildren_max(btree, node))
503 			ptr = nilfs_btree_node_get_ptr(btree, node, index);
504 		else {
505 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
506 			/* insert */
507 			ptr = NILFS_BMAP_INVALID_PTR;
508 		}
509 		path[level].bp_index = index;
510 	}
511 	if (!found)
512 		return -ENOENT;
513 
514 	if (ptrp != NULL)
515 		*ptrp = ptr;
516 
517 	return 0;
518 }
519 
520 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
521 				      struct nilfs_btree_path *path,
522 				      __u64 *keyp, __u64 *ptrp)
523 {
524 	struct nilfs_btree_node *node;
525 	__u64 ptr;
526 	int index, level, ret;
527 
528 	node = nilfs_btree_get_root(btree);
529 	index = nilfs_btree_node_get_nchildren(btree, node) - 1;
530 	if (index < 0)
531 		return -ENOENT;
532 	level = nilfs_btree_node_get_level(btree, node);
533 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
534 	path[level].bp_bh = NULL;
535 	path[level].bp_index = index;
536 
537 	for (level--; level > 0; level--) {
538 		ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
539 					   &path[level].bp_bh);
540 		if (ret < 0)
541 			return ret;
542 		node = nilfs_btree_get_nonroot_node(btree, path, level);
543 		BUG_ON(level != nilfs_btree_node_get_level(btree, node));
544 		index = nilfs_btree_node_get_nchildren(btree, node) - 1;
545 		ptr = nilfs_btree_node_get_ptr(btree, node, index);
546 		path[level].bp_index = index;
547 	}
548 
549 	if (keyp != NULL)
550 		*keyp = nilfs_btree_node_get_key(btree, node, index);
551 	if (ptrp != NULL)
552 		*ptrp = ptr;
553 
554 	return 0;
555 }
556 
557 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
558 			      __u64 key, int level, __u64 *ptrp)
559 {
560 	struct nilfs_btree *btree;
561 	struct nilfs_btree_path *path;
562 	__u64 ptr;
563 	int ret;
564 
565 	btree = (struct nilfs_btree *)bmap;
566 	path = nilfs_btree_alloc_path(btree);
567 	if (path == NULL)
568 		return -ENOMEM;
569 	nilfs_btree_init_path(btree, path);
570 
571 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572 
573 	if (ptrp != NULL)
574 		*ptrp = ptr;
575 
576 	nilfs_btree_clear_path(btree, path);
577 	nilfs_btree_free_path(btree, path);
578 
579 	return ret;
580 }
581 
582 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
583 				    struct nilfs_btree_path *path,
584 				    int level, __u64 key)
585 {
586 	if (level < nilfs_btree_height(btree) - 1) {
587 		do {
588 			lock_buffer(path[level].bp_bh);
589 			nilfs_btree_node_set_key(
590 				btree,
591 				nilfs_btree_get_nonroot_node(
592 					btree, path, level),
593 				path[level].bp_index, key);
594 			if (!buffer_dirty(path[level].bp_bh))
595 				nilfs_btnode_mark_dirty(path[level].bp_bh);
596 			unlock_buffer(path[level].bp_bh);
597 		} while ((path[level].bp_index == 0) &&
598 			 (++level < nilfs_btree_height(btree) - 1));
599 	}
600 
601 	/* root */
602 	if (level == nilfs_btree_height(btree) - 1) {
603 		nilfs_btree_node_set_key(btree,
604 					 nilfs_btree_get_root(btree),
605 					 path[level].bp_index, key);
606 	}
607 }
608 
609 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
610 				  struct nilfs_btree_path *path,
611 				  int level, __u64 *keyp, __u64 *ptrp)
612 {
613 	struct nilfs_btree_node *node;
614 
615 	if (level < nilfs_btree_height(btree) - 1) {
616 		lock_buffer(path[level].bp_bh);
617 		node = nilfs_btree_get_nonroot_node(btree, path, level);
618 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
619 					path[level].bp_index);
620 		if (!buffer_dirty(path[level].bp_bh))
621 			nilfs_btnode_mark_dirty(path[level].bp_bh);
622 		unlock_buffer(path[level].bp_bh);
623 
624 		if (path[level].bp_index == 0)
625 			nilfs_btree_promote_key(btree, path, level + 1,
626 						nilfs_btree_node_get_key(
627 							btree, node, 0));
628 	} else {
629 		node = nilfs_btree_get_root(btree);
630 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
631 					path[level].bp_index);
632 	}
633 }
634 
635 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
636 				   struct nilfs_btree_path *path,
637 				   int level, __u64 *keyp, __u64 *ptrp)
638 {
639 	struct nilfs_btree_node *node, *left;
640 	int nchildren, lnchildren, n, move;
641 
642 	lock_buffer(path[level].bp_bh);
643 	lock_buffer(path[level].bp_sib_bh);
644 
645 	node = nilfs_btree_get_nonroot_node(btree, path, level);
646 	left = nilfs_btree_get_sib_node(btree, path, level);
647 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
648 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
649 	move = 0;
650 
651 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
652 	if (n > path[level].bp_index) {
653 		/* move insert point */
654 		n--;
655 		move = 1;
656 	}
657 
658 	nilfs_btree_node_move_left(btree, left, node, n);
659 
660 	if (!buffer_dirty(path[level].bp_bh))
661 		nilfs_btnode_mark_dirty(path[level].bp_bh);
662 	if (!buffer_dirty(path[level].bp_sib_bh))
663 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
664 
665 	unlock_buffer(path[level].bp_bh);
666 	unlock_buffer(path[level].bp_sib_bh);
667 
668 	nilfs_btree_promote_key(btree, path, level + 1,
669 				nilfs_btree_node_get_key(btree, node, 0));
670 
671 	if (move) {
672 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
673 		path[level].bp_bh = path[level].bp_sib_bh;
674 		path[level].bp_sib_bh = NULL;
675 		path[level].bp_index += lnchildren;
676 		path[level + 1].bp_index--;
677 	} else {
678 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
679 		path[level].bp_sib_bh = NULL;
680 		path[level].bp_index -= n;
681 	}
682 
683 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
684 }
685 
686 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
687 				    struct nilfs_btree_path *path,
688 				    int level, __u64 *keyp, __u64 *ptrp)
689 {
690 	struct nilfs_btree_node *node, *right;
691 	int nchildren, rnchildren, n, move;
692 
693 	lock_buffer(path[level].bp_bh);
694 	lock_buffer(path[level].bp_sib_bh);
695 
696 	node = nilfs_btree_get_nonroot_node(btree, path, level);
697 	right = nilfs_btree_get_sib_node(btree, path, level);
698 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
699 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
700 	move = 0;
701 
702 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
703 	if (n > nchildren - path[level].bp_index) {
704 		/* move insert point */
705 		n--;
706 		move = 1;
707 	}
708 
709 	nilfs_btree_node_move_right(btree, node, right, n);
710 
711 	if (!buffer_dirty(path[level].bp_bh))
712 		nilfs_btnode_mark_dirty(path[level].bp_bh);
713 	if (!buffer_dirty(path[level].bp_sib_bh))
714 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
715 
716 	unlock_buffer(path[level].bp_bh);
717 	unlock_buffer(path[level].bp_sib_bh);
718 
719 	path[level + 1].bp_index++;
720 	nilfs_btree_promote_key(btree, path, level + 1,
721 				nilfs_btree_node_get_key(btree, right, 0));
722 	path[level + 1].bp_index--;
723 
724 	if (move) {
725 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
726 		path[level].bp_bh = path[level].bp_sib_bh;
727 		path[level].bp_sib_bh = NULL;
728 		path[level].bp_index -=
729 			nilfs_btree_node_get_nchildren(btree, node);
730 		path[level + 1].bp_index++;
731 	} else {
732 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
733 		path[level].bp_sib_bh = NULL;
734 	}
735 
736 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
737 }
738 
739 static void nilfs_btree_split(struct nilfs_btree *btree,
740 			      struct nilfs_btree_path *path,
741 			      int level, __u64 *keyp, __u64 *ptrp)
742 {
743 	struct nilfs_btree_node *node, *right;
744 	__u64 newkey;
745 	__u64 newptr;
746 	int nchildren, n, move;
747 
748 	lock_buffer(path[level].bp_bh);
749 	lock_buffer(path[level].bp_sib_bh);
750 
751 	node = nilfs_btree_get_nonroot_node(btree, path, level);
752 	right = nilfs_btree_get_sib_node(btree, path, level);
753 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
754 	move = 0;
755 
756 	n = (nchildren + 1) / 2;
757 	if (n > nchildren - path[level].bp_index) {
758 		n--;
759 		move = 1;
760 	}
761 
762 	nilfs_btree_node_move_right(btree, node, right, n);
763 
764 	if (!buffer_dirty(path[level].bp_bh))
765 		nilfs_btnode_mark_dirty(path[level].bp_bh);
766 	if (!buffer_dirty(path[level].bp_sib_bh))
767 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
768 
769 	unlock_buffer(path[level].bp_bh);
770 	unlock_buffer(path[level].bp_sib_bh);
771 
772 	newkey = nilfs_btree_node_get_key(btree, right, 0);
773 	newptr = path[level].bp_newreq.bpr_ptr;
774 
775 	if (move) {
776 		path[level].bp_index -=
777 			nilfs_btree_node_get_nchildren(btree, node);
778 		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
779 					path[level].bp_index);
780 
781 		*keyp = nilfs_btree_node_get_key(btree, right, 0);
782 		*ptrp = path[level].bp_newreq.bpr_ptr;
783 
784 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
785 		path[level].bp_bh = path[level].bp_sib_bh;
786 		path[level].bp_sib_bh = NULL;
787 	} else {
788 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
789 
790 		*keyp = nilfs_btree_node_get_key(btree, right, 0);
791 		*ptrp = path[level].bp_newreq.bpr_ptr;
792 
793 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
794 		path[level].bp_sib_bh = NULL;
795 	}
796 
797 	path[level + 1].bp_index++;
798 }
799 
800 static void nilfs_btree_grow(struct nilfs_btree *btree,
801 			     struct nilfs_btree_path *path,
802 			     int level, __u64 *keyp, __u64 *ptrp)
803 {
804 	struct nilfs_btree_node *root, *child;
805 	int n;
806 
807 	lock_buffer(path[level].bp_sib_bh);
808 
809 	root = nilfs_btree_get_root(btree);
810 	child = nilfs_btree_get_sib_node(btree, path, level);
811 
812 	n = nilfs_btree_node_get_nchildren(btree, root);
813 
814 	nilfs_btree_node_move_right(btree, root, child, n);
815 	nilfs_btree_node_set_level(btree, root, level + 1);
816 
817 	if (!buffer_dirty(path[level].bp_sib_bh))
818 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
819 
820 	unlock_buffer(path[level].bp_sib_bh);
821 
822 	path[level].bp_bh = path[level].bp_sib_bh;
823 	path[level].bp_sib_bh = NULL;
824 
825 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
826 
827 	*keyp = nilfs_btree_node_get_key(btree, child, 0);
828 	*ptrp = path[level].bp_newreq.bpr_ptr;
829 }
830 
831 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
832 				   const struct nilfs_btree_path *path)
833 {
834 	struct nilfs_btree_node *node;
835 	int level;
836 
837 	if (path == NULL)
838 		return NILFS_BMAP_INVALID_PTR;
839 
840 	/* left sibling */
841 	level = NILFS_BTREE_LEVEL_NODE_MIN;
842 	if (path[level].bp_index > 0) {
843 		node = nilfs_btree_get_node(btree, path, level);
844 		return nilfs_btree_node_get_ptr(btree, node,
845 						path[level].bp_index - 1);
846 	}
847 
848 	/* parent */
849 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
850 	if (level <= nilfs_btree_height(btree) - 1) {
851 		node = nilfs_btree_get_node(btree, path, level);
852 		return nilfs_btree_node_get_ptr(btree, node,
853 						path[level].bp_index);
854 	}
855 
856 	return NILFS_BMAP_INVALID_PTR;
857 }
858 
859 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
860 				       const struct nilfs_btree_path *path,
861 				       __u64 key)
862 {
863 	__u64 ptr;
864 
865 	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
866 	if (ptr != NILFS_BMAP_INVALID_PTR)
867 		/* sequential access */
868 		return ptr;
869 	else {
870 		ptr = nilfs_btree_find_near(btree, path);
871 		if (ptr != NILFS_BMAP_INVALID_PTR)
872 			/* near */
873 			return ptr;
874 	}
875 	/* block group */
876 	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
877 }
878 
879 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
880 				     __u64 ptr)
881 {
882 	btree->bt_bmap.b_last_allocated_key = key;
883 	btree->bt_bmap.b_last_allocated_ptr = ptr;
884 }
885 
886 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
887 				      struct nilfs_btree_path *path,
888 				      int *levelp, __u64 key, __u64 ptr,
889 				      struct nilfs_bmap_stats *stats)
890 {
891 	struct buffer_head *bh;
892 	struct nilfs_btree_node *node, *parent, *sib;
893 	__u64 sibptr;
894 	int pindex, level, ret;
895 
896 	stats->bs_nblocks = 0;
897 	level = NILFS_BTREE_LEVEL_DATA;
898 
899 	/* allocate a new ptr for data block */
900 	if (btree->bt_ops->btop_find_target != NULL)
901 		path[level].bp_newreq.bpr_ptr =
902 			btree->bt_ops->btop_find_target(btree, path, key);
903 
904 	ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
905 		&btree->bt_bmap, &path[level].bp_newreq);
906 	if (ret < 0)
907 		goto err_out_data;
908 
909 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
910 	     level < nilfs_btree_height(btree) - 1;
911 	     level++) {
912 		node = nilfs_btree_get_nonroot_node(btree, path, level);
913 		if (nilfs_btree_node_get_nchildren(btree, node) <
914 		    nilfs_btree_node_nchildren_max(btree, node)) {
915 			path[level].bp_op = nilfs_btree_do_insert;
916 			stats->bs_nblocks++;
917 			goto out;
918 		}
919 
920 		parent = nilfs_btree_get_node(btree, path, level + 1);
921 		pindex = path[level + 1].bp_index;
922 
923 		/* left sibling */
924 		if (pindex > 0) {
925 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
926 							  pindex - 1);
927 			ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
928 						   &bh);
929 			if (ret < 0)
930 				goto err_out_child_node;
931 			sib = (struct nilfs_btree_node *)bh->b_data;
932 			if (nilfs_btree_node_get_nchildren(btree, sib) <
933 			    nilfs_btree_node_nchildren_max(btree, sib)) {
934 				path[level].bp_sib_bh = bh;
935 				path[level].bp_op = nilfs_btree_carry_left;
936 				stats->bs_nblocks++;
937 				goto out;
938 			} else
939 				nilfs_bmap_put_block(&btree->bt_bmap, bh);
940 		}
941 
942 		/* right sibling */
943 		if (pindex <
944 		    nilfs_btree_node_get_nchildren(btree, parent) - 1) {
945 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
946 							  pindex + 1);
947 			ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
948 						   &bh);
949 			if (ret < 0)
950 				goto err_out_child_node;
951 			sib = (struct nilfs_btree_node *)bh->b_data;
952 			if (nilfs_btree_node_get_nchildren(btree, sib) <
953 			    nilfs_btree_node_nchildren_max(btree, sib)) {
954 				path[level].bp_sib_bh = bh;
955 				path[level].bp_op = nilfs_btree_carry_right;
956 				stats->bs_nblocks++;
957 				goto out;
958 			} else
959 				nilfs_bmap_put_block(&btree->bt_bmap, bh);
960 		}
961 
962 		/* split */
963 		path[level].bp_newreq.bpr_ptr =
964 			path[level - 1].bp_newreq.bpr_ptr + 1;
965 		ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
966 			&btree->bt_bmap, &path[level].bp_newreq);
967 		if (ret < 0)
968 			goto err_out_child_node;
969 		ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
970 					       path[level].bp_newreq.bpr_ptr,
971 					       &bh);
972 		if (ret < 0)
973 			goto err_out_curr_node;
974 
975 		stats->bs_nblocks++;
976 
977 		lock_buffer(bh);
978 		nilfs_btree_node_init(btree,
979 				      (struct nilfs_btree_node *)bh->b_data,
980 				      0, level, 0, NULL, NULL);
981 		unlock_buffer(bh);
982 		path[level].bp_sib_bh = bh;
983 		path[level].bp_op = nilfs_btree_split;
984 	}
985 
986 	/* root */
987 	node = nilfs_btree_get_root(btree);
988 	if (nilfs_btree_node_get_nchildren(btree, node) <
989 	    nilfs_btree_node_nchildren_max(btree, node)) {
990 		path[level].bp_op = nilfs_btree_do_insert;
991 		stats->bs_nblocks++;
992 		goto out;
993 	}
994 
995 	/* grow */
996 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
997 	ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
998 		&btree->bt_bmap, &path[level].bp_newreq);
999 	if (ret < 0)
1000 		goto err_out_child_node;
1001 	ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
1002 				       path[level].bp_newreq.bpr_ptr, &bh);
1003 	if (ret < 0)
1004 		goto err_out_curr_node;
1005 
1006 	lock_buffer(bh);
1007 	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1008 			      0, level, 0, NULL, NULL);
1009 	unlock_buffer(bh);
1010 	path[level].bp_sib_bh = bh;
1011 	path[level].bp_op = nilfs_btree_grow;
1012 
1013 	level++;
1014 	path[level].bp_op = nilfs_btree_do_insert;
1015 
1016 	/* a newly-created node block and a data block are added */
1017 	stats->bs_nblocks += 2;
1018 
1019 	/* success */
1020  out:
1021 	*levelp = level;
1022 	return ret;
1023 
1024 	/* error */
1025  err_out_curr_node:
1026 	btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1027 						    &path[level].bp_newreq);
1028  err_out_child_node:
1029 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1030 		nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1031 		btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1032 			&btree->bt_bmap, &path[level].bp_newreq);
1033 
1034 	}
1035 
1036 	btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1037 						       &path[level].bp_newreq);
1038  err_out_data:
1039 	*levelp = level;
1040 	stats->bs_nblocks = 0;
1041 	return ret;
1042 }
1043 
1044 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1045 				      struct nilfs_btree_path *path,
1046 				      int maxlevel, __u64 key, __u64 ptr)
1047 {
1048 	int level;
1049 
1050 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1051 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1052 	if (btree->bt_ops->btop_set_target != NULL)
1053 		btree->bt_ops->btop_set_target(btree, key, ptr);
1054 
1055 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1056 		if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1057 			btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1058 				&btree->bt_bmap, &path[level - 1].bp_newreq);
1059 		}
1060 		path[level].bp_op(btree, path, level, &key, &ptr);
1061 	}
1062 
1063 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1064 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1065 }
1066 
1067 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1068 {
1069 	struct nilfs_btree *btree;
1070 	struct nilfs_btree_path *path;
1071 	struct nilfs_bmap_stats stats;
1072 	int level, ret;
1073 
1074 	btree = (struct nilfs_btree *)bmap;
1075 	path = nilfs_btree_alloc_path(btree);
1076 	if (path == NULL)
1077 		return -ENOMEM;
1078 	nilfs_btree_init_path(btree, path);
1079 
1080 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1081 				    NILFS_BTREE_LEVEL_NODE_MIN);
1082 	if (ret != -ENOENT) {
1083 		if (ret == 0)
1084 			ret = -EEXIST;
1085 		goto out;
1086 	}
1087 
1088 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1089 	if (ret < 0)
1090 		goto out;
1091 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1092 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1093 
1094  out:
1095 	nilfs_btree_clear_path(btree, path);
1096 	nilfs_btree_free_path(btree, path);
1097 	return ret;
1098 }
1099 
1100 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1101 				  struct nilfs_btree_path *path,
1102 				  int level, __u64 *keyp, __u64 *ptrp)
1103 {
1104 	struct nilfs_btree_node *node;
1105 
1106 	if (level < nilfs_btree_height(btree) - 1) {
1107 		lock_buffer(path[level].bp_bh);
1108 		node = nilfs_btree_get_nonroot_node(btree, path, level);
1109 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1110 					path[level].bp_index);
1111 		if (!buffer_dirty(path[level].bp_bh))
1112 			nilfs_btnode_mark_dirty(path[level].bp_bh);
1113 		unlock_buffer(path[level].bp_bh);
1114 		if (path[level].bp_index == 0)
1115 			nilfs_btree_promote_key(btree, path, level + 1,
1116 				nilfs_btree_node_get_key(btree, node, 0));
1117 	} else {
1118 		node = nilfs_btree_get_root(btree);
1119 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1120 					path[level].bp_index);
1121 	}
1122 }
1123 
1124 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1125 				    struct nilfs_btree_path *path,
1126 				    int level, __u64 *keyp, __u64 *ptrp)
1127 {
1128 	struct nilfs_btree_node *node, *left;
1129 	int nchildren, lnchildren, n;
1130 
1131 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1132 
1133 	lock_buffer(path[level].bp_bh);
1134 	lock_buffer(path[level].bp_sib_bh);
1135 
1136 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1137 	left = nilfs_btree_get_sib_node(btree, path, level);
1138 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1139 	lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1140 
1141 	n = (nchildren + lnchildren) / 2 - nchildren;
1142 
1143 	nilfs_btree_node_move_right(btree, left, node, n);
1144 
1145 	if (!buffer_dirty(path[level].bp_bh))
1146 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1147 	if (!buffer_dirty(path[level].bp_sib_bh))
1148 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1149 
1150 	unlock_buffer(path[level].bp_bh);
1151 	unlock_buffer(path[level].bp_sib_bh);
1152 
1153 	nilfs_btree_promote_key(btree, path, level + 1,
1154 				nilfs_btree_node_get_key(btree, node, 0));
1155 
1156 	nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1157 	path[level].bp_sib_bh = NULL;
1158 	path[level].bp_index += n;
1159 }
1160 
1161 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1162 				     struct nilfs_btree_path *path,
1163 				     int level, __u64 *keyp, __u64 *ptrp)
1164 {
1165 	struct nilfs_btree_node *node, *right;
1166 	int nchildren, rnchildren, n;
1167 
1168 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1169 
1170 	lock_buffer(path[level].bp_bh);
1171 	lock_buffer(path[level].bp_sib_bh);
1172 
1173 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1174 	right = nilfs_btree_get_sib_node(btree, path, level);
1175 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1176 	rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1177 
1178 	n = (nchildren + rnchildren) / 2 - nchildren;
1179 
1180 	nilfs_btree_node_move_left(btree, node, right, n);
1181 
1182 	if (!buffer_dirty(path[level].bp_bh))
1183 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1184 	if (!buffer_dirty(path[level].bp_sib_bh))
1185 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1186 
1187 	unlock_buffer(path[level].bp_bh);
1188 	unlock_buffer(path[level].bp_sib_bh);
1189 
1190 	path[level + 1].bp_index++;
1191 	nilfs_btree_promote_key(btree, path, level + 1,
1192 				nilfs_btree_node_get_key(btree, right, 0));
1193 	path[level + 1].bp_index--;
1194 
1195 	nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1196 	path[level].bp_sib_bh = NULL;
1197 }
1198 
1199 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1200 				    struct nilfs_btree_path *path,
1201 				    int level, __u64 *keyp, __u64 *ptrp)
1202 {
1203 	struct nilfs_btree_node *node, *left;
1204 	int n;
1205 
1206 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1207 
1208 	lock_buffer(path[level].bp_bh);
1209 	lock_buffer(path[level].bp_sib_bh);
1210 
1211 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1212 	left = nilfs_btree_get_sib_node(btree, path, level);
1213 
1214 	n = nilfs_btree_node_get_nchildren(btree, node);
1215 
1216 	nilfs_btree_node_move_left(btree, left, node, n);
1217 
1218 	if (!buffer_dirty(path[level].bp_sib_bh))
1219 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1220 
1221 	unlock_buffer(path[level].bp_bh);
1222 	unlock_buffer(path[level].bp_sib_bh);
1223 
1224 	nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1225 	path[level].bp_bh = path[level].bp_sib_bh;
1226 	path[level].bp_sib_bh = NULL;
1227 	path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1228 }
1229 
1230 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1231 				     struct nilfs_btree_path *path,
1232 				     int level, __u64 *keyp, __u64 *ptrp)
1233 {
1234 	struct nilfs_btree_node *node, *right;
1235 	int n;
1236 
1237 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1238 
1239 	lock_buffer(path[level].bp_bh);
1240 	lock_buffer(path[level].bp_sib_bh);
1241 
1242 	node = nilfs_btree_get_nonroot_node(btree, path, level);
1243 	right = nilfs_btree_get_sib_node(btree, path, level);
1244 
1245 	n = nilfs_btree_node_get_nchildren(btree, right);
1246 
1247 	nilfs_btree_node_move_left(btree, node, right, n);
1248 
1249 	if (!buffer_dirty(path[level].bp_bh))
1250 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1251 
1252 	unlock_buffer(path[level].bp_bh);
1253 	unlock_buffer(path[level].bp_sib_bh);
1254 
1255 	nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1256 	path[level].bp_sib_bh = NULL;
1257 	path[level + 1].bp_index++;
1258 }
1259 
1260 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1261 			       struct nilfs_btree_path *path,
1262 			       int level, __u64 *keyp, __u64 *ptrp)
1263 {
1264 	struct nilfs_btree_node *root, *child;
1265 	int n;
1266 
1267 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1268 
1269 	lock_buffer(path[level].bp_bh);
1270 	root = nilfs_btree_get_root(btree);
1271 	child = nilfs_btree_get_nonroot_node(btree, path, level);
1272 
1273 	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1274 	nilfs_btree_node_set_level(btree, root, level);
1275 	n = nilfs_btree_node_get_nchildren(btree, child);
1276 	nilfs_btree_node_move_left(btree, root, child, n);
1277 	unlock_buffer(path[level].bp_bh);
1278 
1279 	nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1280 	path[level].bp_bh = NULL;
1281 }
1282 
1283 
1284 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1285 				      struct nilfs_btree_path *path,
1286 				      int *levelp,
1287 				      struct nilfs_bmap_stats *stats)
1288 {
1289 	struct buffer_head *bh;
1290 	struct nilfs_btree_node *node, *parent, *sib;
1291 	__u64 sibptr;
1292 	int pindex, level, ret;
1293 
1294 	ret = 0;
1295 	stats->bs_nblocks = 0;
1296 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1297 	     level < nilfs_btree_height(btree) - 1;
1298 	     level++) {
1299 		node = nilfs_btree_get_nonroot_node(btree, path, level);
1300 		path[level].bp_oldreq.bpr_ptr =
1301 			nilfs_btree_node_get_ptr(btree, node,
1302 						 path[level].bp_index);
1303 		if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1304 			ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1305 				&btree->bt_bmap, &path[level].bp_oldreq);
1306 			if (ret < 0)
1307 				goto err_out_child_node;
1308 		}
1309 
1310 		if (nilfs_btree_node_get_nchildren(btree, node) >
1311 		    nilfs_btree_node_nchildren_min(btree, node)) {
1312 			path[level].bp_op = nilfs_btree_do_delete;
1313 			stats->bs_nblocks++;
1314 			goto out;
1315 		}
1316 
1317 		parent = nilfs_btree_get_node(btree, path, level + 1);
1318 		pindex = path[level + 1].bp_index;
1319 
1320 		if (pindex > 0) {
1321 			/* left sibling */
1322 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1323 							  pindex - 1);
1324 			ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1325 						   &bh);
1326 			if (ret < 0)
1327 				goto err_out_curr_node;
1328 			sib = (struct nilfs_btree_node *)bh->b_data;
1329 			if (nilfs_btree_node_get_nchildren(btree, sib) >
1330 			    nilfs_btree_node_nchildren_min(btree, sib)) {
1331 				path[level].bp_sib_bh = bh;
1332 				path[level].bp_op = nilfs_btree_borrow_left;
1333 				stats->bs_nblocks++;
1334 				goto out;
1335 			} else {
1336 				path[level].bp_sib_bh = bh;
1337 				path[level].bp_op = nilfs_btree_concat_left;
1338 				stats->bs_nblocks++;
1339 				/* continue; */
1340 			}
1341 		} else if (pindex <
1342 			   nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1343 			/* right sibling */
1344 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1345 							  pindex + 1);
1346 			ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1347 						   &bh);
1348 			if (ret < 0)
1349 				goto err_out_curr_node;
1350 			sib = (struct nilfs_btree_node *)bh->b_data;
1351 			if (nilfs_btree_node_get_nchildren(btree, sib) >
1352 			    nilfs_btree_node_nchildren_min(btree, sib)) {
1353 				path[level].bp_sib_bh = bh;
1354 				path[level].bp_op = nilfs_btree_borrow_right;
1355 				stats->bs_nblocks++;
1356 				goto out;
1357 			} else {
1358 				path[level].bp_sib_bh = bh;
1359 				path[level].bp_op = nilfs_btree_concat_right;
1360 				stats->bs_nblocks++;
1361 				/* continue; */
1362 			}
1363 		} else {
1364 			/* no siblings */
1365 			/* the only child of the root node */
1366 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1367 			if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1368 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1369 				path[level].bp_op = nilfs_btree_shrink;
1370 				stats->bs_nblocks += 2;
1371 			} else {
1372 				path[level].bp_op = nilfs_btree_do_delete;
1373 				stats->bs_nblocks++;
1374 			}
1375 
1376 			goto out;
1377 
1378 		}
1379 	}
1380 
1381 	node = nilfs_btree_get_root(btree);
1382 	path[level].bp_oldreq.bpr_ptr =
1383 		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1384 	if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1385 		ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1386 			&btree->bt_bmap, &path[level].bp_oldreq);
1387 		if (ret < 0)
1388 			goto err_out_child_node;
1389 	}
1390 	/* child of the root node is deleted */
1391 	path[level].bp_op = nilfs_btree_do_delete;
1392 	stats->bs_nblocks++;
1393 
1394 	/* success */
1395  out:
1396 	*levelp = level;
1397 	return ret;
1398 
1399 	/* error */
1400  err_out_curr_node:
1401 	if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1402 		btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1403 			&btree->bt_bmap, &path[level].bp_oldreq);
1404  err_out_child_node:
1405 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1406 		nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1407 		if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1408 			btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1409 				&btree->bt_bmap, &path[level].bp_oldreq);
1410 	}
1411 	*levelp = level;
1412 	stats->bs_nblocks = 0;
1413 	return ret;
1414 }
1415 
1416 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1417 				      struct nilfs_btree_path *path,
1418 				      int maxlevel)
1419 {
1420 	int level;
1421 
1422 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1423 		if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1424 			btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1425 				&btree->bt_bmap, &path[level].bp_oldreq);
1426 		path[level].bp_op(btree, path, level, NULL, NULL);
1427 	}
1428 
1429 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1430 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1431 }
1432 
1433 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1434 
1435 {
1436 	struct nilfs_btree *btree;
1437 	struct nilfs_btree_path *path;
1438 	struct nilfs_bmap_stats stats;
1439 	int level, ret;
1440 
1441 	btree = (struct nilfs_btree *)bmap;
1442 	path = nilfs_btree_alloc_path(btree);
1443 	if (path == NULL)
1444 		return -ENOMEM;
1445 	nilfs_btree_init_path(btree, path);
1446 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1447 				    NILFS_BTREE_LEVEL_NODE_MIN);
1448 	if (ret < 0)
1449 		goto out;
1450 
1451 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1452 	if (ret < 0)
1453 		goto out;
1454 	nilfs_btree_commit_delete(btree, path, level);
1455 	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1456 
1457 out:
1458 	nilfs_btree_clear_path(btree, path);
1459 	nilfs_btree_free_path(btree, path);
1460 	return ret;
1461 }
1462 
1463 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1464 {
1465 	struct nilfs_btree *btree;
1466 	struct nilfs_btree_path *path;
1467 	int ret;
1468 
1469 	btree = (struct nilfs_btree *)bmap;
1470 	path = nilfs_btree_alloc_path(btree);
1471 	if (path == NULL)
1472 		return -ENOMEM;
1473 	nilfs_btree_init_path(btree, path);
1474 
1475 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1476 
1477 	nilfs_btree_clear_path(btree, path);
1478 	nilfs_btree_free_path(btree, path);
1479 
1480 	return ret;
1481 }
1482 
1483 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1484 {
1485 	struct buffer_head *bh;
1486 	struct nilfs_btree *btree;
1487 	struct nilfs_btree_node *root, *node;
1488 	__u64 maxkey, nextmaxkey;
1489 	__u64 ptr;
1490 	int nchildren, ret;
1491 
1492 	btree = (struct nilfs_btree *)bmap;
1493 	root = nilfs_btree_get_root(btree);
1494 	switch (nilfs_btree_height(btree)) {
1495 	case 2:
1496 		bh = NULL;
1497 		node = root;
1498 		break;
1499 	case 3:
1500 		nchildren = nilfs_btree_node_get_nchildren(btree, root);
1501 		if (nchildren > 1)
1502 			return 0;
1503 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1504 		ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1505 		if (ret < 0)
1506 			return ret;
1507 		node = (struct nilfs_btree_node *)bh->b_data;
1508 		break;
1509 	default:
1510 		return 0;
1511 	}
1512 
1513 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1514 	maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1515 	nextmaxkey = (nchildren > 1) ?
1516 		nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1517 	if (bh != NULL)
1518 		nilfs_bmap_put_block(bmap, bh);
1519 
1520 	return (maxkey == key) && (nextmaxkey < bmap->b_low);
1521 }
1522 
1523 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1524 				   __u64 *keys, __u64 *ptrs, int nitems)
1525 {
1526 	struct buffer_head *bh;
1527 	struct nilfs_btree *btree;
1528 	struct nilfs_btree_node *node, *root;
1529 	__le64 *dkeys;
1530 	__le64 *dptrs;
1531 	__u64 ptr;
1532 	int nchildren, i, ret;
1533 
1534 	btree = (struct nilfs_btree *)bmap;
1535 	root = nilfs_btree_get_root(btree);
1536 	switch (nilfs_btree_height(btree)) {
1537 	case 2:
1538 		bh = NULL;
1539 		node = root;
1540 		break;
1541 	case 3:
1542 		nchildren = nilfs_btree_node_get_nchildren(btree, root);
1543 		WARN_ON(nchildren > 1);
1544 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1545 		ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1546 		if (ret < 0)
1547 			return ret;
1548 		node = (struct nilfs_btree_node *)bh->b_data;
1549 		break;
1550 	default:
1551 		node = NULL;
1552 		return -EINVAL;
1553 	}
1554 
1555 	nchildren = nilfs_btree_node_get_nchildren(btree, node);
1556 	if (nchildren < nitems)
1557 		nitems = nchildren;
1558 	dkeys = nilfs_btree_node_dkeys(btree, node);
1559 	dptrs = nilfs_btree_node_dptrs(btree, node);
1560 	for (i = 0; i < nitems; i++) {
1561 		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1562 		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1563 	}
1564 
1565 	if (bh != NULL)
1566 		nilfs_bmap_put_block(bmap, bh);
1567 
1568 	return nitems;
1569 }
1570 
1571 static int
1572 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1573 				       union nilfs_bmap_ptr_req *dreq,
1574 				       union nilfs_bmap_ptr_req *nreq,
1575 				       struct buffer_head **bhp,
1576 				       struct nilfs_bmap_stats *stats)
1577 {
1578 	struct buffer_head *bh;
1579 	struct nilfs_btree *btree;
1580 	int ret;
1581 
1582 	btree = (struct nilfs_btree *)bmap;
1583 	stats->bs_nblocks = 0;
1584 
1585 	/* for data */
1586 	/* cannot find near ptr */
1587 	if (btree->bt_ops->btop_find_target != NULL)
1588 		dreq->bpr_ptr
1589 			= btree->bt_ops->btop_find_target(btree, NULL, key);
1590 	ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1591 	if (ret < 0)
1592 		return ret;
1593 
1594 	*bhp = NULL;
1595 	stats->bs_nblocks++;
1596 	if (nreq != NULL) {
1597 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1598 		ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1599 		if (ret < 0)
1600 			goto err_out_dreq;
1601 
1602 		ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
1603 		if (ret < 0)
1604 			goto err_out_nreq;
1605 
1606 		*bhp = bh;
1607 		stats->bs_nblocks++;
1608 	}
1609 
1610 	/* success */
1611 	return 0;
1612 
1613 	/* error */
1614  err_out_nreq:
1615 	bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1616  err_out_dreq:
1617 	bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1618 	stats->bs_nblocks = 0;
1619 	return ret;
1620 
1621 }
1622 
1623 static void
1624 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1625 				      __u64 key, __u64 ptr,
1626 				      const __u64 *keys, const __u64 *ptrs,
1627 				      int n, __u64 low, __u64 high,
1628 				      union nilfs_bmap_ptr_req *dreq,
1629 				      union nilfs_bmap_ptr_req *nreq,
1630 				      struct buffer_head *bh)
1631 {
1632 	struct nilfs_btree *btree;
1633 	struct nilfs_btree_node *node;
1634 	__u64 tmpptr;
1635 
1636 	/* free resources */
1637 	if (bmap->b_ops->bop_clear != NULL)
1638 		bmap->b_ops->bop_clear(bmap);
1639 
1640 	/* ptr must be a pointer to a buffer head. */
1641 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1642 
1643 	/* convert and insert */
1644 	btree = (struct nilfs_btree *)bmap;
1645 	nilfs_btree_init(bmap, low, high);
1646 	if (nreq != NULL) {
1647 		if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1648 			bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1649 			bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1650 		}
1651 
1652 		/* create child node at level 1 */
1653 		lock_buffer(bh);
1654 		node = (struct nilfs_btree_node *)bh->b_data;
1655 		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1656 		nilfs_btree_node_insert(btree, node,
1657 					key, dreq->bpr_ptr, n);
1658 		if (!buffer_dirty(bh))
1659 			nilfs_btnode_mark_dirty(bh);
1660 		if (!nilfs_bmap_dirty(bmap))
1661 			nilfs_bmap_set_dirty(bmap);
1662 
1663 		unlock_buffer(bh);
1664 		nilfs_bmap_put_block(bmap, bh);
1665 
1666 		/* create root node at level 2 */
1667 		node = nilfs_btree_get_root(btree);
1668 		tmpptr = nreq->bpr_ptr;
1669 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1670 				      2, 1, &keys[0], &tmpptr);
1671 	} else {
1672 		if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1673 			bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1674 
1675 		/* create root node at level 1 */
1676 		node = nilfs_btree_get_root(btree);
1677 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678 				      1, n, keys, ptrs);
1679 		nilfs_btree_node_insert(btree, node,
1680 					key, dreq->bpr_ptr, n);
1681 		if (!nilfs_bmap_dirty(bmap))
1682 			nilfs_bmap_set_dirty(bmap);
1683 	}
1684 
1685 	if (btree->bt_ops->btop_set_target != NULL)
1686 		btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1687 }
1688 
1689 /**
1690  * nilfs_btree_convert_and_insert -
1691  * @bmap:
1692  * @key:
1693  * @ptr:
1694  * @keys:
1695  * @ptrs:
1696  * @n:
1697  * @low:
1698  * @high:
1699  */
1700 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1701 				   __u64 key, __u64 ptr,
1702 				   const __u64 *keys, const __u64 *ptrs,
1703 				   int n, __u64 low, __u64 high)
1704 {
1705 	struct buffer_head *bh;
1706 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1707 	struct nilfs_bmap_stats stats;
1708 	int ret;
1709 
1710 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1711 		di = &dreq;
1712 		ni = NULL;
1713 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1714 			   1 << bmap->b_inode->i_blkbits)) {
1715 		di = &dreq;
1716 		ni = &nreq;
1717 	} else {
1718 		di = NULL;
1719 		ni = NULL;
1720 		BUG();
1721 	}
1722 
1723 	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1724 						     &stats);
1725 	if (ret < 0)
1726 		return ret;
1727 	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1728 					      low, high, di, ni, bh);
1729 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1730 	return 0;
1731 }
1732 
1733 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1734 				   struct nilfs_btree_path *path,
1735 				   int level,
1736 				   struct buffer_head *bh)
1737 {
1738 	while ((++level < nilfs_btree_height(btree) - 1) &&
1739 	       !buffer_dirty(path[level].bp_bh))
1740 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1741 
1742 	return 0;
1743 }
1744 
1745 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1746 					struct nilfs_btree_path *path,
1747 					int level)
1748 {
1749 	struct nilfs_btree_node *parent;
1750 	int ret;
1751 
1752 	parent = nilfs_btree_get_node(btree, path, level + 1);
1753 	path[level].bp_oldreq.bpr_ptr =
1754 		nilfs_btree_node_get_ptr(btree, parent,
1755 					 path[level + 1].bp_index);
1756 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1757 	ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1758 					&path[level].bp_oldreq,
1759 					&path[level].bp_newreq);
1760 	if (ret < 0)
1761 		return ret;
1762 
1763 	if (buffer_nilfs_node(path[level].bp_bh)) {
1764 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1765 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1766 		path[level].bp_ctxt.bh = path[level].bp_bh;
1767 		ret = nilfs_btnode_prepare_change_key(
1768 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1769 			&path[level].bp_ctxt);
1770 		if (ret < 0) {
1771 			nilfs_bmap_abort_update(&btree->bt_bmap,
1772 						&path[level].bp_oldreq,
1773 						&path[level].bp_newreq);
1774 			return ret;
1775 		}
1776 	}
1777 
1778 	return 0;
1779 }
1780 
1781 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1782 					struct nilfs_btree_path *path,
1783 					int level)
1784 {
1785 	struct nilfs_btree_node *parent;
1786 
1787 	nilfs_bmap_commit_update(&btree->bt_bmap,
1788 				 &path[level].bp_oldreq,
1789 				 &path[level].bp_newreq);
1790 
1791 	if (buffer_nilfs_node(path[level].bp_bh)) {
1792 		nilfs_btnode_commit_change_key(
1793 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1794 			&path[level].bp_ctxt);
1795 		path[level].bp_bh = path[level].bp_ctxt.bh;
1796 	}
1797 	set_buffer_nilfs_volatile(path[level].bp_bh);
1798 
1799 	parent = nilfs_btree_get_node(btree, path, level + 1);
1800 	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1801 				 path[level].bp_newreq.bpr_ptr);
1802 }
1803 
1804 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1805 				       struct nilfs_btree_path *path,
1806 				       int level)
1807 {
1808 	nilfs_bmap_abort_update(&btree->bt_bmap,
1809 				&path[level].bp_oldreq,
1810 				&path[level].bp_newreq);
1811 	if (buffer_nilfs_node(path[level].bp_bh))
1812 		nilfs_btnode_abort_change_key(
1813 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1814 			&path[level].bp_ctxt);
1815 }
1816 
1817 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1818 					   struct nilfs_btree_path *path,
1819 					   int minlevel,
1820 					   int *maxlevelp)
1821 {
1822 	int level, ret;
1823 
1824 	level = minlevel;
1825 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1826 		ret = nilfs_btree_prepare_update_v(btree, path, level);
1827 		if (ret < 0)
1828 			return ret;
1829 	}
1830 	while ((++level < nilfs_btree_height(btree) - 1) &&
1831 	       !buffer_dirty(path[level].bp_bh)) {
1832 
1833 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1834 		ret = nilfs_btree_prepare_update_v(btree, path, level);
1835 		if (ret < 0)
1836 			goto out;
1837 	}
1838 
1839 	/* success */
1840 	*maxlevelp = level - 1;
1841 	return 0;
1842 
1843 	/* error */
1844  out:
1845 	while (--level > minlevel)
1846 		nilfs_btree_abort_update_v(btree, path, level);
1847 	if (!buffer_nilfs_volatile(path[level].bp_bh))
1848 		nilfs_btree_abort_update_v(btree, path, level);
1849 	return ret;
1850 }
1851 
1852 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1853 					   struct nilfs_btree_path *path,
1854 					   int minlevel,
1855 					   int maxlevel,
1856 					   struct buffer_head *bh)
1857 {
1858 	int level;
1859 
1860 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1861 		nilfs_btree_commit_update_v(btree, path, minlevel);
1862 
1863 	for (level = minlevel + 1; level <= maxlevel; level++)
1864 		nilfs_btree_commit_update_v(btree, path, level);
1865 }
1866 
1867 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1868 				   struct nilfs_btree_path *path,
1869 				   int level,
1870 				   struct buffer_head *bh)
1871 {
1872 	int maxlevel, ret;
1873 	struct nilfs_btree_node *parent;
1874 	__u64 ptr;
1875 
1876 	get_bh(bh);
1877 	path[level].bp_bh = bh;
1878 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1879 	if (ret < 0)
1880 		goto out;
1881 
1882 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1883 		parent = nilfs_btree_get_node(btree, path, level + 1);
1884 		ptr = nilfs_btree_node_get_ptr(btree, parent,
1885 					       path[level + 1].bp_index);
1886 		ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1887 		if (ret < 0)
1888 			goto out;
1889 	}
1890 
1891 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1892 
1893  out:
1894 	brelse(path[level].bp_bh);
1895 	path[level].bp_bh = NULL;
1896 	return ret;
1897 }
1898 
1899 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1900 				 struct buffer_head *bh)
1901 {
1902 	struct nilfs_btree *btree;
1903 	struct nilfs_btree_path *path;
1904 	struct nilfs_btree_node *node;
1905 	__u64 key;
1906 	int level, ret;
1907 
1908 	WARN_ON(!buffer_dirty(bh));
1909 
1910 	btree = (struct nilfs_btree *)bmap;
1911 	path = nilfs_btree_alloc_path(btree);
1912 	if (path == NULL)
1913 		return -ENOMEM;
1914 	nilfs_btree_init_path(btree, path);
1915 
1916 	if (buffer_nilfs_node(bh)) {
1917 		node = (struct nilfs_btree_node *)bh->b_data;
1918 		key = nilfs_btree_node_get_key(btree, node, 0);
1919 		level = nilfs_btree_node_get_level(btree, node);
1920 	} else {
1921 		key = nilfs_bmap_data_get_key(bmap, bh);
1922 		level = NILFS_BTREE_LEVEL_DATA;
1923 	}
1924 
1925 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1926 	if (ret < 0) {
1927 		if (unlikely(ret == -ENOENT))
1928 			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1929 			       __func__, (unsigned long long)key, level);
1930 		goto out;
1931 	}
1932 
1933 	ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1934 
1935  out:
1936 	nilfs_btree_clear_path(btree, path);
1937 	nilfs_btree_free_path(btree, path);
1938 
1939 	return ret;
1940 }
1941 
1942 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1943 				    struct buffer_head *bh)
1944 {
1945 	return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1946 }
1947 
1948 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1949 					 struct list_head *lists,
1950 					 struct buffer_head *bh)
1951 {
1952 	struct list_head *head;
1953 	struct buffer_head *cbh;
1954 	struct nilfs_btree_node *node, *cnode;
1955 	__u64 key, ckey;
1956 	int level;
1957 
1958 	get_bh(bh);
1959 	node = (struct nilfs_btree_node *)bh->b_data;
1960 	key = nilfs_btree_node_get_key(btree, node, 0);
1961 	level = nilfs_btree_node_get_level(btree, node);
1962 	list_for_each(head, &lists[level]) {
1963 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1964 		cnode = (struct nilfs_btree_node *)cbh->b_data;
1965 		ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1966 		if (key < ckey)
1967 			break;
1968 	}
1969 	list_add_tail(&bh->b_assoc_buffers, head);
1970 }
1971 
1972 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1973 					     struct list_head *listp)
1974 {
1975 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1976 	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1977 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1978 	struct pagevec pvec;
1979 	struct buffer_head *bh, *head;
1980 	pgoff_t index = 0;
1981 	int level, i;
1982 
1983 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1984 	     level < NILFS_BTREE_LEVEL_MAX;
1985 	     level++)
1986 		INIT_LIST_HEAD(&lists[level]);
1987 
1988 	pagevec_init(&pvec, 0);
1989 
1990 	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1991 				  PAGEVEC_SIZE)) {
1992 		for (i = 0; i < pagevec_count(&pvec); i++) {
1993 			bh = head = page_buffers(pvec.pages[i]);
1994 			do {
1995 				if (buffer_dirty(bh))
1996 					nilfs_btree_add_dirty_buffer(btree,
1997 								     lists, bh);
1998 			} while ((bh = bh->b_this_page) != head);
1999 		}
2000 		pagevec_release(&pvec);
2001 		cond_resched();
2002 	}
2003 
2004 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2005 	     level < NILFS_BTREE_LEVEL_MAX;
2006 	     level++)
2007 		list_splice(&lists[level], listp->prev);
2008 }
2009 
2010 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2011 				struct nilfs_btree_path *path,
2012 				int level,
2013 				struct buffer_head **bh,
2014 				sector_t blocknr,
2015 				union nilfs_binfo *binfo)
2016 {
2017 	struct nilfs_btree_node *parent;
2018 	__u64 key;
2019 	__u64 ptr;
2020 	int ret;
2021 
2022 	parent = nilfs_btree_get_node(btree, path, level + 1);
2023 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2024 				       path[level + 1].bp_index);
2025 	if (buffer_nilfs_node(*bh)) {
2026 		path[level].bp_ctxt.oldkey = ptr;
2027 		path[level].bp_ctxt.newkey = blocknr;
2028 		path[level].bp_ctxt.bh = *bh;
2029 		ret = nilfs_btnode_prepare_change_key(
2030 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2031 			&path[level].bp_ctxt);
2032 		if (ret < 0)
2033 			return ret;
2034 		nilfs_btnode_commit_change_key(
2035 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2036 			&path[level].bp_ctxt);
2037 		*bh = path[level].bp_ctxt.bh;
2038 	}
2039 
2040 	nilfs_btree_node_set_ptr(btree, parent,
2041 				 path[level + 1].bp_index, blocknr);
2042 
2043 	key = nilfs_btree_node_get_key(btree, parent,
2044 				       path[level + 1].bp_index);
2045 	/* on-disk format */
2046 	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2047 	binfo->bi_dat.bi_level = level;
2048 
2049 	return 0;
2050 }
2051 
2052 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2053 				struct nilfs_btree_path *path,
2054 				int level,
2055 				struct buffer_head **bh,
2056 				sector_t blocknr,
2057 				union nilfs_binfo *binfo)
2058 {
2059 	struct nilfs_btree_node *parent;
2060 	__u64 key;
2061 	__u64 ptr;
2062 	union nilfs_bmap_ptr_req req;
2063 	int ret;
2064 
2065 	parent = nilfs_btree_get_node(btree, path, level + 1);
2066 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2067 				       path[level + 1].bp_index);
2068 	req.bpr_ptr = ptr;
2069 	ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap,
2070 							       &req);
2071 	if (ret < 0)
2072 		return ret;
2073 	btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
2074 							&req, blocknr);
2075 
2076 	key = nilfs_btree_node_get_key(btree, parent,
2077 				       path[level + 1].bp_index);
2078 	/* on-disk format */
2079 	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2080 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2081 
2082 	return 0;
2083 }
2084 
2085 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2086 			      struct buffer_head **bh,
2087 			      sector_t blocknr,
2088 			      union nilfs_binfo *binfo)
2089 {
2090 	struct nilfs_btree *btree;
2091 	struct nilfs_btree_path *path;
2092 	struct nilfs_btree_node *node;
2093 	__u64 key;
2094 	int level, ret;
2095 
2096 	btree = (struct nilfs_btree *)bmap;
2097 	path = nilfs_btree_alloc_path(btree);
2098 	if (path == NULL)
2099 		return -ENOMEM;
2100 	nilfs_btree_init_path(btree, path);
2101 
2102 	if (buffer_nilfs_node(*bh)) {
2103 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2104 		key = nilfs_btree_node_get_key(btree, node, 0);
2105 		level = nilfs_btree_node_get_level(btree, node);
2106 	} else {
2107 		key = nilfs_bmap_data_get_key(bmap, *bh);
2108 		level = NILFS_BTREE_LEVEL_DATA;
2109 	}
2110 
2111 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2112 	if (ret < 0) {
2113 		WARN_ON(ret == -ENOENT);
2114 		goto out;
2115 	}
2116 
2117 	ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2118 					    blocknr, binfo);
2119 
2120  out:
2121 	nilfs_btree_clear_path(btree, path);
2122 	nilfs_btree_free_path(btree, path);
2123 
2124 	return ret;
2125 }
2126 
2127 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2128 				 struct buffer_head **bh,
2129 				 sector_t blocknr,
2130 				 union nilfs_binfo *binfo)
2131 {
2132 	struct nilfs_btree *btree;
2133 	struct nilfs_btree_node *node;
2134 	__u64 key;
2135 	int ret;
2136 
2137 	btree = (struct nilfs_btree *)bmap;
2138 	ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2139 	if (ret < 0)
2140 		return ret;
2141 
2142 	if (buffer_nilfs_node(*bh)) {
2143 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2144 		key = nilfs_btree_node_get_key(btree, node, 0);
2145 	} else
2146 		key = nilfs_bmap_data_get_key(bmap, *bh);
2147 
2148 	/* on-disk format */
2149 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2150 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2151 
2152 	return 0;
2153 }
2154 
2155 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2156 {
2157 	struct buffer_head *bh;
2158 	struct nilfs_btree *btree;
2159 	struct nilfs_btree_path *path;
2160 	__u64 ptr;
2161 	int ret;
2162 
2163 	btree = (struct nilfs_btree *)bmap;
2164 	path = nilfs_btree_alloc_path(btree);
2165 	if (path == NULL)
2166 		return -ENOMEM;
2167 	nilfs_btree_init_path(btree, path);
2168 
2169 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2170 	if (ret < 0) {
2171 		WARN_ON(ret == -ENOENT);
2172 		goto out;
2173 	}
2174 	ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
2175 	if (ret < 0) {
2176 		WARN_ON(ret == -ENOENT);
2177 		goto out;
2178 	}
2179 
2180 	if (!buffer_dirty(bh))
2181 		nilfs_btnode_mark_dirty(bh);
2182 	nilfs_bmap_put_block(&btree->bt_bmap, bh);
2183 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2184 		nilfs_bmap_set_dirty(&btree->bt_bmap);
2185 
2186  out:
2187 	nilfs_btree_clear_path(btree, path);
2188 	nilfs_btree_free_path(btree, path);
2189 	return ret;
2190 }
2191 
2192 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2193 	.bop_lookup		=	nilfs_btree_lookup,
2194 	.bop_insert		=	nilfs_btree_insert,
2195 	.bop_delete		=	nilfs_btree_delete,
2196 	.bop_clear		=	NULL,
2197 
2198 	.bop_propagate		=	nilfs_btree_propagate,
2199 
2200 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2201 
2202 	.bop_assign		=	nilfs_btree_assign,
2203 	.bop_mark		=	nilfs_btree_mark,
2204 
2205 	.bop_last_key		=	nilfs_btree_last_key,
2206 	.bop_check_insert	=	NULL,
2207 	.bop_check_delete	=	nilfs_btree_check_delete,
2208 	.bop_gather_data	=	nilfs_btree_gather_data,
2209 };
2210 
2211 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2212 	.bop_lookup		=	NULL,
2213 	.bop_insert		=	NULL,
2214 	.bop_delete		=	NULL,
2215 	.bop_clear		=	NULL,
2216 
2217 	.bop_propagate		=	nilfs_btree_propagate_gc,
2218 
2219 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2220 
2221 	.bop_assign		=	nilfs_btree_assign_gc,
2222 	.bop_mark		=	NULL,
2223 
2224 	.bop_last_key		=	NULL,
2225 	.bop_check_insert	=	NULL,
2226 	.bop_check_delete	=	NULL,
2227 	.bop_gather_data	=	NULL,
2228 };
2229 
2230 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2231 	.btop_find_target	=	nilfs_btree_find_target_v,
2232 	.btop_set_target	=	nilfs_btree_set_target_v,
2233 	.btop_propagate		=	nilfs_btree_propagate_v,
2234 	.btop_assign		=	nilfs_btree_assign_v,
2235 };
2236 
2237 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2238 	.btop_find_target	=	NULL,
2239 	.btop_set_target	=	NULL,
2240 	.btop_propagate		=	nilfs_btree_propagate_p,
2241 	.btop_assign		=	nilfs_btree_assign_p,
2242 };
2243 
2244 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2245 {
2246 	struct nilfs_btree *btree;
2247 
2248 	btree = (struct nilfs_btree *)bmap;
2249 	bmap->b_ops = &nilfs_btree_ops;
2250 	bmap->b_low = low;
2251 	bmap->b_high = high;
2252 	switch (bmap->b_inode->i_ino) {
2253 	case NILFS_DAT_INO:
2254 		btree->bt_ops = &nilfs_btree_ops_p;
2255 		break;
2256 	default:
2257 		btree->bt_ops = &nilfs_btree_ops_v;
2258 		break;
2259 	}
2260 
2261 	return 0;
2262 }
2263 
2264 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2265 {
2266 	bmap->b_low = NILFS_BMAP_LARGE_LOW;
2267 	bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2268 	bmap->b_ops = &nilfs_btree_ops_gc;
2269 }
2270