xref: /openbmc/linux/fs/nilfs2/btree.c (revision a09d2831)
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33 
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44 	struct buffer_head *bp_bh;
45 	struct buffer_head *bp_sib_bh;
46 	int bp_index;
47 	union nilfs_bmap_ptr_req bp_oldreq;
48 	union nilfs_bmap_ptr_req bp_newreq;
49 	struct nilfs_btnode_chkey_ctxt bp_ctxt;
50 	void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51 		      int, __u64 *, __u64 *);
52 };
53 
54 /*
55  * B-tree path operations
56  */
57 
58 static struct kmem_cache *nilfs_btree_path_cache;
59 
60 int __init nilfs_btree_path_cache_init(void)
61 {
62 	nilfs_btree_path_cache =
63 		kmem_cache_create("nilfs2_btree_path_cache",
64 				  sizeof(struct nilfs_btree_path) *
65 				  NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66 	return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68 
69 void nilfs_btree_path_cache_destroy(void)
70 {
71 	kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73 
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76 	return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78 
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81 	kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83 
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86 	int level;
87 
88 	for (level = NILFS_BTREE_LEVEL_DATA;
89 	     level < NILFS_BTREE_LEVEL_MAX;
90 	     level++) {
91 		path[level].bp_bh = NULL;
92 		path[level].bp_sib_bh = NULL;
93 		path[level].bp_index = 0;
94 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96 		path[level].bp_op = NULL;
97 	}
98 }
99 
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102 	int level;
103 
104 	for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105 	     level++)
106 		brelse(path[level].bp_bh);
107 }
108 
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113 				 struct buffer_head **bhp)
114 {
115 	struct address_space *btnc =
116 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117 	int err;
118 
119 	err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
120 	if (err)
121 		return err == -EEXIST ? 0 : err;
122 
123 	wait_on_buffer(*bhp);
124 	if (!buffer_uptodate(*bhp)) {
125 		brelse(*bhp);
126 		return -EIO;
127 	}
128 	return 0;
129 }
130 
131 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
132 				     __u64 ptr, struct buffer_head **bhp)
133 {
134 	struct address_space *btnc =
135 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
136 	struct buffer_head *bh;
137 
138 	bh = nilfs_btnode_create_block(btnc, ptr);
139 	if (!bh)
140 		return -ENOMEM;
141 
142 	set_buffer_nilfs_volatile(bh);
143 	*bhp = bh;
144 	return 0;
145 }
146 
147 static inline int
148 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
149 {
150 	return node->bn_flags;
151 }
152 
153 static inline void
154 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
155 {
156 	node->bn_flags = flags;
157 }
158 
159 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
160 {
161 	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
162 }
163 
164 static inline int
165 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
166 {
167 	return node->bn_level;
168 }
169 
170 static inline void
171 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
172 {
173 	node->bn_level = level;
174 }
175 
176 static inline int
177 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
178 {
179 	return le16_to_cpu(node->bn_nchildren);
180 }
181 
182 static inline void
183 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
184 {
185 	node->bn_nchildren = cpu_to_le16(nchildren);
186 }
187 
188 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
189 {
190 	return 1 << btree->bt_bmap.b_inode->i_blkbits;
191 }
192 
193 static inline int
194 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
195 			       const struct nilfs_btree *btree)
196 {
197 	return nilfs_btree_node_root(node) ?
198 		NILFS_BTREE_ROOT_NCHILDREN_MIN :
199 		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
200 }
201 
202 static inline int
203 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
204 			       const struct nilfs_btree *btree)
205 {
206 	return nilfs_btree_node_root(node) ?
207 		NILFS_BTREE_ROOT_NCHILDREN_MAX :
208 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
209 }
210 
211 static inline __le64 *
212 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
213 {
214 	return (__le64 *)((char *)(node + 1) +
215 			  (nilfs_btree_node_root(node) ?
216 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
217 }
218 
219 static inline __le64 *
220 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
221 		       const struct nilfs_btree *btree)
222 {
223 	return (__le64 *)(nilfs_btree_node_dkeys(node) +
224 			  nilfs_btree_node_nchildren_max(node, btree));
225 }
226 
227 static inline __u64
228 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
229 {
230 	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
231 }
232 
233 static inline void
234 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
235 {
236 	*(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
237 }
238 
239 static inline __u64
240 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241 			 const struct nilfs_btree_node *node, int index)
242 {
243 	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
244 					index));
245 }
246 
247 static inline void
248 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
249 			 struct nilfs_btree_node *node, int index, __u64 ptr)
250 {
251 	*(nilfs_btree_node_dptrs(node, btree) + index) =
252 		nilfs_bmap_ptr_to_dptr(ptr);
253 }
254 
255 static void nilfs_btree_node_init(struct nilfs_btree *btree,
256 				  struct nilfs_btree_node *node,
257 				  int flags, int level, int nchildren,
258 				  const __u64 *keys, const __u64 *ptrs)
259 {
260 	__le64 *dkeys;
261 	__le64 *dptrs;
262 	int i;
263 
264 	nilfs_btree_node_set_flags(node, flags);
265 	nilfs_btree_node_set_level(node, level);
266 	nilfs_btree_node_set_nchildren(node, nchildren);
267 
268 	dkeys = nilfs_btree_node_dkeys(node);
269 	dptrs = nilfs_btree_node_dptrs(node, btree);
270 	for (i = 0; i < nchildren; i++) {
271 		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
272 		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
273 	}
274 }
275 
276 /* Assume the buffer heads corresponding to left and right are locked. */
277 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
278 				       struct nilfs_btree_node *left,
279 				       struct nilfs_btree_node *right,
280 				       int n)
281 {
282 	__le64 *ldkeys, *rdkeys;
283 	__le64 *ldptrs, *rdptrs;
284 	int lnchildren, rnchildren;
285 
286 	ldkeys = nilfs_btree_node_dkeys(left);
287 	ldptrs = nilfs_btree_node_dptrs(left, btree);
288 	lnchildren = nilfs_btree_node_get_nchildren(left);
289 
290 	rdkeys = nilfs_btree_node_dkeys(right);
291 	rdptrs = nilfs_btree_node_dptrs(right, btree);
292 	rnchildren = nilfs_btree_node_get_nchildren(right);
293 
294 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
295 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
296 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
297 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
298 
299 	lnchildren += n;
300 	rnchildren -= n;
301 	nilfs_btree_node_set_nchildren(left, lnchildren);
302 	nilfs_btree_node_set_nchildren(right, rnchildren);
303 }
304 
305 /* Assume that the buffer heads corresponding to left and right are locked. */
306 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
307 					struct nilfs_btree_node *left,
308 					struct nilfs_btree_node *right,
309 					int n)
310 {
311 	__le64 *ldkeys, *rdkeys;
312 	__le64 *ldptrs, *rdptrs;
313 	int lnchildren, rnchildren;
314 
315 	ldkeys = nilfs_btree_node_dkeys(left);
316 	ldptrs = nilfs_btree_node_dptrs(left, btree);
317 	lnchildren = nilfs_btree_node_get_nchildren(left);
318 
319 	rdkeys = nilfs_btree_node_dkeys(right);
320 	rdptrs = nilfs_btree_node_dptrs(right, btree);
321 	rnchildren = nilfs_btree_node_get_nchildren(right);
322 
323 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
324 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
325 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
326 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
327 
328 	lnchildren -= n;
329 	rnchildren += n;
330 	nilfs_btree_node_set_nchildren(left, lnchildren);
331 	nilfs_btree_node_set_nchildren(right, rnchildren);
332 }
333 
334 /* Assume that the buffer head corresponding to node is locked. */
335 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
336 				    struct nilfs_btree_node *node,
337 				    __u64 key, __u64 ptr, int index)
338 {
339 	__le64 *dkeys;
340 	__le64 *dptrs;
341 	int nchildren;
342 
343 	dkeys = nilfs_btree_node_dkeys(node);
344 	dptrs = nilfs_btree_node_dptrs(node, btree);
345 	nchildren = nilfs_btree_node_get_nchildren(node);
346 	if (index < nchildren) {
347 		memmove(dkeys + index + 1, dkeys + index,
348 			(nchildren - index) * sizeof(*dkeys));
349 		memmove(dptrs + index + 1, dptrs + index,
350 			(nchildren - index) * sizeof(*dptrs));
351 	}
352 	dkeys[index] = nilfs_bmap_key_to_dkey(key);
353 	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
354 	nchildren++;
355 	nilfs_btree_node_set_nchildren(node, nchildren);
356 }
357 
358 /* Assume that the buffer head corresponding to node is locked. */
359 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
360 				    struct nilfs_btree_node *node,
361 				    __u64 *keyp, __u64 *ptrp, int index)
362 {
363 	__u64 key;
364 	__u64 ptr;
365 	__le64 *dkeys;
366 	__le64 *dptrs;
367 	int nchildren;
368 
369 	dkeys = nilfs_btree_node_dkeys(node);
370 	dptrs = nilfs_btree_node_dptrs(node, btree);
371 	key = nilfs_bmap_dkey_to_key(dkeys[index]);
372 	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
373 	nchildren = nilfs_btree_node_get_nchildren(node);
374 	if (keyp != NULL)
375 		*keyp = key;
376 	if (ptrp != NULL)
377 		*ptrp = ptr;
378 
379 	if (index < nchildren - 1) {
380 		memmove(dkeys + index, dkeys + index + 1,
381 			(nchildren - index - 1) * sizeof(*dkeys));
382 		memmove(dptrs + index, dptrs + index + 1,
383 			(nchildren - index - 1) * sizeof(*dptrs));
384 	}
385 	nchildren--;
386 	nilfs_btree_node_set_nchildren(node, nchildren);
387 }
388 
389 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
390 				   __u64 key, int *indexp)
391 {
392 	__u64 nkey;
393 	int index, low, high, s;
394 
395 	/* binary search */
396 	low = 0;
397 	high = nilfs_btree_node_get_nchildren(node) - 1;
398 	index = 0;
399 	s = 0;
400 	while (low <= high) {
401 		index = (low + high) / 2;
402 		nkey = nilfs_btree_node_get_key(node, index);
403 		if (nkey == key) {
404 			s = 0;
405 			goto out;
406 		} else if (nkey < key) {
407 			low = index + 1;
408 			s = -1;
409 		} else {
410 			high = index - 1;
411 			s = 1;
412 		}
413 	}
414 
415 	/* adjust index */
416 	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
417 		if (s > 0 && index > 0)
418 			index--;
419 	} else if (s < 0)
420 		index++;
421 
422  out:
423 	*indexp = index;
424 
425 	return s == 0;
426 }
427 
428 static inline struct nilfs_btree_node *
429 nilfs_btree_get_root(const struct nilfs_btree *btree)
430 {
431 	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
432 }
433 
434 static inline struct nilfs_btree_node *
435 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
436 {
437 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
438 }
439 
440 static inline struct nilfs_btree_node *
441 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
442 {
443 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
444 }
445 
446 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
447 {
448 	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
449 }
450 
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_node(const struct nilfs_btree *btree,
453 		     const struct nilfs_btree_path *path,
454 		     int level)
455 {
456 	return (level == nilfs_btree_height(btree) - 1) ?
457 		nilfs_btree_get_root(btree) :
458 		nilfs_btree_get_nonroot_node(path, level);
459 }
460 
461 static inline int
462 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
463 {
464 	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
465 		dump_stack();
466 		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
467 		       nilfs_btree_node_get_level(node), level);
468 		return 1;
469 	}
470 	return 0;
471 }
472 
473 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
474 				 struct nilfs_btree_path *path,
475 				 __u64 key, __u64 *ptrp, int minlevel)
476 {
477 	struct nilfs_btree_node *node;
478 	__u64 ptr;
479 	int level, index, found, ret;
480 
481 	node = nilfs_btree_get_root(btree);
482 	level = nilfs_btree_node_get_level(node);
483 	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
484 		return -ENOENT;
485 
486 	found = nilfs_btree_node_lookup(node, key, &index);
487 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
488 	path[level].bp_bh = NULL;
489 	path[level].bp_index = index;
490 
491 	for (level--; level >= minlevel; level--) {
492 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
493 		if (ret < 0)
494 			return ret;
495 		node = nilfs_btree_get_nonroot_node(path, level);
496 		if (nilfs_btree_bad_node(node, level))
497 			return -EINVAL;
498 		if (!found)
499 			found = nilfs_btree_node_lookup(node, key, &index);
500 		else
501 			index = 0;
502 		if (index < nilfs_btree_node_nchildren_max(node, btree))
503 			ptr = nilfs_btree_node_get_ptr(btree, node, index);
504 		else {
505 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
506 			/* insert */
507 			ptr = NILFS_BMAP_INVALID_PTR;
508 		}
509 		path[level].bp_index = index;
510 	}
511 	if (!found)
512 		return -ENOENT;
513 
514 	if (ptrp != NULL)
515 		*ptrp = ptr;
516 
517 	return 0;
518 }
519 
520 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
521 				      struct nilfs_btree_path *path,
522 				      __u64 *keyp, __u64 *ptrp)
523 {
524 	struct nilfs_btree_node *node;
525 	__u64 ptr;
526 	int index, level, ret;
527 
528 	node = nilfs_btree_get_root(btree);
529 	index = nilfs_btree_node_get_nchildren(node) - 1;
530 	if (index < 0)
531 		return -ENOENT;
532 	level = nilfs_btree_node_get_level(node);
533 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
534 	path[level].bp_bh = NULL;
535 	path[level].bp_index = index;
536 
537 	for (level--; level > 0; level--) {
538 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
539 		if (ret < 0)
540 			return ret;
541 		node = nilfs_btree_get_nonroot_node(path, level);
542 		if (nilfs_btree_bad_node(node, level))
543 			return -EINVAL;
544 		index = nilfs_btree_node_get_nchildren(node) - 1;
545 		ptr = nilfs_btree_node_get_ptr(btree, node, index);
546 		path[level].bp_index = index;
547 	}
548 
549 	if (keyp != NULL)
550 		*keyp = nilfs_btree_node_get_key(node, index);
551 	if (ptrp != NULL)
552 		*ptrp = ptr;
553 
554 	return 0;
555 }
556 
557 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
558 			      __u64 key, int level, __u64 *ptrp)
559 {
560 	struct nilfs_btree *btree;
561 	struct nilfs_btree_path *path;
562 	__u64 ptr;
563 	int ret;
564 
565 	btree = (struct nilfs_btree *)bmap;
566 	path = nilfs_btree_alloc_path();
567 	if (path == NULL)
568 		return -ENOMEM;
569 	nilfs_btree_init_path(path);
570 
571 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572 
573 	if (ptrp != NULL)
574 		*ptrp = ptr;
575 
576 	nilfs_btree_release_path(path);
577 	nilfs_btree_free_path(path);
578 
579 	return ret;
580 }
581 
582 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
583 				     __u64 key, __u64 *ptrp, unsigned maxblocks)
584 {
585 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
586 	struct nilfs_btree_path *path;
587 	struct nilfs_btree_node *node;
588 	struct inode *dat = NULL;
589 	__u64 ptr, ptr2;
590 	sector_t blocknr;
591 	int level = NILFS_BTREE_LEVEL_NODE_MIN;
592 	int ret, cnt, index, maxlevel;
593 
594 	path = nilfs_btree_alloc_path();
595 	if (path == NULL)
596 		return -ENOMEM;
597 	nilfs_btree_init_path(path);
598 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
599 	if (ret < 0)
600 		goto out;
601 
602 	if (NILFS_BMAP_USE_VBN(bmap)) {
603 		dat = nilfs_bmap_get_dat(bmap);
604 		ret = nilfs_dat_translate(dat, ptr, &blocknr);
605 		if (ret < 0)
606 			goto out;
607 		ptr = blocknr;
608 	}
609 	cnt = 1;
610 	if (cnt == maxblocks)
611 		goto end;
612 
613 	maxlevel = nilfs_btree_height(btree) - 1;
614 	node = nilfs_btree_get_node(btree, path, level);
615 	index = path[level].bp_index + 1;
616 	for (;;) {
617 		while (index < nilfs_btree_node_get_nchildren(node)) {
618 			if (nilfs_btree_node_get_key(node, index) !=
619 			    key + cnt)
620 				goto end;
621 			ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
622 			if (dat) {
623 				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
624 				if (ret < 0)
625 					goto out;
626 				ptr2 = blocknr;
627 			}
628 			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
629 				goto end;
630 			index++;
631 			continue;
632 		}
633 		if (level == maxlevel)
634 			break;
635 
636 		/* look-up right sibling node */
637 		node = nilfs_btree_get_node(btree, path, level + 1);
638 		index = path[level + 1].bp_index + 1;
639 		if (index >= nilfs_btree_node_get_nchildren(node) ||
640 		    nilfs_btree_node_get_key(node, index) != key + cnt)
641 			break;
642 		ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
643 		path[level + 1].bp_index = index;
644 
645 		brelse(path[level].bp_bh);
646 		path[level].bp_bh = NULL;
647 		ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
648 		if (ret < 0)
649 			goto out;
650 		node = nilfs_btree_get_nonroot_node(path, level);
651 		index = 0;
652 		path[level].bp_index = index;
653 	}
654  end:
655 	*ptrp = ptr;
656 	ret = cnt;
657  out:
658 	nilfs_btree_release_path(path);
659 	nilfs_btree_free_path(path);
660 	return ret;
661 }
662 
663 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
664 				    struct nilfs_btree_path *path,
665 				    int level, __u64 key)
666 {
667 	if (level < nilfs_btree_height(btree) - 1) {
668 		do {
669 			nilfs_btree_node_set_key(
670 				nilfs_btree_get_nonroot_node(path, level),
671 				path[level].bp_index, key);
672 			if (!buffer_dirty(path[level].bp_bh))
673 				nilfs_btnode_mark_dirty(path[level].bp_bh);
674 		} while ((path[level].bp_index == 0) &&
675 			 (++level < nilfs_btree_height(btree) - 1));
676 	}
677 
678 	/* root */
679 	if (level == nilfs_btree_height(btree) - 1) {
680 		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
681 					 path[level].bp_index, key);
682 	}
683 }
684 
685 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
686 				  struct nilfs_btree_path *path,
687 				  int level, __u64 *keyp, __u64 *ptrp)
688 {
689 	struct nilfs_btree_node *node;
690 
691 	if (level < nilfs_btree_height(btree) - 1) {
692 		node = nilfs_btree_get_nonroot_node(path, level);
693 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
694 					path[level].bp_index);
695 		if (!buffer_dirty(path[level].bp_bh))
696 			nilfs_btnode_mark_dirty(path[level].bp_bh);
697 
698 		if (path[level].bp_index == 0)
699 			nilfs_btree_promote_key(btree, path, level + 1,
700 						nilfs_btree_node_get_key(node,
701 									 0));
702 	} else {
703 		node = nilfs_btree_get_root(btree);
704 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
705 					path[level].bp_index);
706 	}
707 }
708 
709 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
710 				   struct nilfs_btree_path *path,
711 				   int level, __u64 *keyp, __u64 *ptrp)
712 {
713 	struct nilfs_btree_node *node, *left;
714 	int nchildren, lnchildren, n, move;
715 
716 	node = nilfs_btree_get_nonroot_node(path, level);
717 	left = nilfs_btree_get_sib_node(path, level);
718 	nchildren = nilfs_btree_node_get_nchildren(node);
719 	lnchildren = nilfs_btree_node_get_nchildren(left);
720 	move = 0;
721 
722 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
723 	if (n > path[level].bp_index) {
724 		/* move insert point */
725 		n--;
726 		move = 1;
727 	}
728 
729 	nilfs_btree_node_move_left(btree, left, node, n);
730 
731 	if (!buffer_dirty(path[level].bp_bh))
732 		nilfs_btnode_mark_dirty(path[level].bp_bh);
733 	if (!buffer_dirty(path[level].bp_sib_bh))
734 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
735 
736 	nilfs_btree_promote_key(btree, path, level + 1,
737 				nilfs_btree_node_get_key(node, 0));
738 
739 	if (move) {
740 		brelse(path[level].bp_bh);
741 		path[level].bp_bh = path[level].bp_sib_bh;
742 		path[level].bp_sib_bh = NULL;
743 		path[level].bp_index += lnchildren;
744 		path[level + 1].bp_index--;
745 	} else {
746 		brelse(path[level].bp_sib_bh);
747 		path[level].bp_sib_bh = NULL;
748 		path[level].bp_index -= n;
749 	}
750 
751 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752 }
753 
754 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
755 				    struct nilfs_btree_path *path,
756 				    int level, __u64 *keyp, __u64 *ptrp)
757 {
758 	struct nilfs_btree_node *node, *right;
759 	int nchildren, rnchildren, n, move;
760 
761 	node = nilfs_btree_get_nonroot_node(path, level);
762 	right = nilfs_btree_get_sib_node(path, level);
763 	nchildren = nilfs_btree_node_get_nchildren(node);
764 	rnchildren = nilfs_btree_node_get_nchildren(right);
765 	move = 0;
766 
767 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
768 	if (n > nchildren - path[level].bp_index) {
769 		/* move insert point */
770 		n--;
771 		move = 1;
772 	}
773 
774 	nilfs_btree_node_move_right(btree, node, right, n);
775 
776 	if (!buffer_dirty(path[level].bp_bh))
777 		nilfs_btnode_mark_dirty(path[level].bp_bh);
778 	if (!buffer_dirty(path[level].bp_sib_bh))
779 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
780 
781 	path[level + 1].bp_index++;
782 	nilfs_btree_promote_key(btree, path, level + 1,
783 				nilfs_btree_node_get_key(right, 0));
784 	path[level + 1].bp_index--;
785 
786 	if (move) {
787 		brelse(path[level].bp_bh);
788 		path[level].bp_bh = path[level].bp_sib_bh;
789 		path[level].bp_sib_bh = NULL;
790 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
791 		path[level + 1].bp_index++;
792 	} else {
793 		brelse(path[level].bp_sib_bh);
794 		path[level].bp_sib_bh = NULL;
795 	}
796 
797 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
798 }
799 
800 static void nilfs_btree_split(struct nilfs_btree *btree,
801 			      struct nilfs_btree_path *path,
802 			      int level, __u64 *keyp, __u64 *ptrp)
803 {
804 	struct nilfs_btree_node *node, *right;
805 	__u64 newkey;
806 	__u64 newptr;
807 	int nchildren, n, move;
808 
809 	node = nilfs_btree_get_nonroot_node(path, level);
810 	right = nilfs_btree_get_sib_node(path, level);
811 	nchildren = nilfs_btree_node_get_nchildren(node);
812 	move = 0;
813 
814 	n = (nchildren + 1) / 2;
815 	if (n > nchildren - path[level].bp_index) {
816 		n--;
817 		move = 1;
818 	}
819 
820 	nilfs_btree_node_move_right(btree, node, right, n);
821 
822 	if (!buffer_dirty(path[level].bp_bh))
823 		nilfs_btnode_mark_dirty(path[level].bp_bh);
824 	if (!buffer_dirty(path[level].bp_sib_bh))
825 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
826 
827 	newkey = nilfs_btree_node_get_key(right, 0);
828 	newptr = path[level].bp_newreq.bpr_ptr;
829 
830 	if (move) {
831 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
832 		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
833 					path[level].bp_index);
834 
835 		*keyp = nilfs_btree_node_get_key(right, 0);
836 		*ptrp = path[level].bp_newreq.bpr_ptr;
837 
838 		brelse(path[level].bp_bh);
839 		path[level].bp_bh = path[level].bp_sib_bh;
840 		path[level].bp_sib_bh = NULL;
841 	} else {
842 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
843 
844 		*keyp = nilfs_btree_node_get_key(right, 0);
845 		*ptrp = path[level].bp_newreq.bpr_ptr;
846 
847 		brelse(path[level].bp_sib_bh);
848 		path[level].bp_sib_bh = NULL;
849 	}
850 
851 	path[level + 1].bp_index++;
852 }
853 
854 static void nilfs_btree_grow(struct nilfs_btree *btree,
855 			     struct nilfs_btree_path *path,
856 			     int level, __u64 *keyp, __u64 *ptrp)
857 {
858 	struct nilfs_btree_node *root, *child;
859 	int n;
860 
861 	root = nilfs_btree_get_root(btree);
862 	child = nilfs_btree_get_sib_node(path, level);
863 
864 	n = nilfs_btree_node_get_nchildren(root);
865 
866 	nilfs_btree_node_move_right(btree, root, child, n);
867 	nilfs_btree_node_set_level(root, level + 1);
868 
869 	if (!buffer_dirty(path[level].bp_sib_bh))
870 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
871 
872 	path[level].bp_bh = path[level].bp_sib_bh;
873 	path[level].bp_sib_bh = NULL;
874 
875 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
876 
877 	*keyp = nilfs_btree_node_get_key(child, 0);
878 	*ptrp = path[level].bp_newreq.bpr_ptr;
879 }
880 
881 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
882 				   const struct nilfs_btree_path *path)
883 {
884 	struct nilfs_btree_node *node;
885 	int level;
886 
887 	if (path == NULL)
888 		return NILFS_BMAP_INVALID_PTR;
889 
890 	/* left sibling */
891 	level = NILFS_BTREE_LEVEL_NODE_MIN;
892 	if (path[level].bp_index > 0) {
893 		node = nilfs_btree_get_node(btree, path, level);
894 		return nilfs_btree_node_get_ptr(btree, node,
895 						path[level].bp_index - 1);
896 	}
897 
898 	/* parent */
899 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
900 	if (level <= nilfs_btree_height(btree) - 1) {
901 		node = nilfs_btree_get_node(btree, path, level);
902 		return nilfs_btree_node_get_ptr(btree, node,
903 						path[level].bp_index);
904 	}
905 
906 	return NILFS_BMAP_INVALID_PTR;
907 }
908 
909 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
910 				       const struct nilfs_btree_path *path,
911 				       __u64 key)
912 {
913 	__u64 ptr;
914 
915 	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
916 	if (ptr != NILFS_BMAP_INVALID_PTR)
917 		/* sequential access */
918 		return ptr;
919 	else {
920 		ptr = nilfs_btree_find_near(btree, path);
921 		if (ptr != NILFS_BMAP_INVALID_PTR)
922 			/* near */
923 			return ptr;
924 	}
925 	/* block group */
926 	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
927 }
928 
929 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
930 				     __u64 ptr)
931 {
932 	btree->bt_bmap.b_last_allocated_key = key;
933 	btree->bt_bmap.b_last_allocated_ptr = ptr;
934 }
935 
936 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
937 				      struct nilfs_btree_path *path,
938 				      int *levelp, __u64 key, __u64 ptr,
939 				      struct nilfs_bmap_stats *stats)
940 {
941 	struct buffer_head *bh;
942 	struct nilfs_btree_node *node, *parent, *sib;
943 	__u64 sibptr;
944 	int pindex, level, ret;
945 	struct inode *dat = NULL;
946 
947 	stats->bs_nblocks = 0;
948 	level = NILFS_BTREE_LEVEL_DATA;
949 
950 	/* allocate a new ptr for data block */
951 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
952 		path[level].bp_newreq.bpr_ptr =
953 			nilfs_btree_find_target_v(btree, path, key);
954 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
955 	}
956 
957 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
958 					   &path[level].bp_newreq, dat);
959 	if (ret < 0)
960 		goto err_out_data;
961 
962 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
963 	     level < nilfs_btree_height(btree) - 1;
964 	     level++) {
965 		node = nilfs_btree_get_nonroot_node(path, level);
966 		if (nilfs_btree_node_get_nchildren(node) <
967 		    nilfs_btree_node_nchildren_max(node, btree)) {
968 			path[level].bp_op = nilfs_btree_do_insert;
969 			stats->bs_nblocks++;
970 			goto out;
971 		}
972 
973 		parent = nilfs_btree_get_node(btree, path, level + 1);
974 		pindex = path[level + 1].bp_index;
975 
976 		/* left sibling */
977 		if (pindex > 0) {
978 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
979 							  pindex - 1);
980 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
981 			if (ret < 0)
982 				goto err_out_child_node;
983 			sib = (struct nilfs_btree_node *)bh->b_data;
984 			if (nilfs_btree_node_get_nchildren(sib) <
985 			    nilfs_btree_node_nchildren_max(sib, btree)) {
986 				path[level].bp_sib_bh = bh;
987 				path[level].bp_op = nilfs_btree_carry_left;
988 				stats->bs_nblocks++;
989 				goto out;
990 			} else
991 				brelse(bh);
992 		}
993 
994 		/* right sibling */
995 		if (pindex <
996 		    nilfs_btree_node_get_nchildren(parent) - 1) {
997 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
998 							  pindex + 1);
999 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1000 			if (ret < 0)
1001 				goto err_out_child_node;
1002 			sib = (struct nilfs_btree_node *)bh->b_data;
1003 			if (nilfs_btree_node_get_nchildren(sib) <
1004 			    nilfs_btree_node_nchildren_max(sib, btree)) {
1005 				path[level].bp_sib_bh = bh;
1006 				path[level].bp_op = nilfs_btree_carry_right;
1007 				stats->bs_nblocks++;
1008 				goto out;
1009 			} else
1010 				brelse(bh);
1011 		}
1012 
1013 		/* split */
1014 		path[level].bp_newreq.bpr_ptr =
1015 			path[level - 1].bp_newreq.bpr_ptr + 1;
1016 		ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1017 						   &path[level].bp_newreq, dat);
1018 		if (ret < 0)
1019 			goto err_out_child_node;
1020 		ret = nilfs_btree_get_new_block(btree,
1021 						path[level].bp_newreq.bpr_ptr,
1022 						&bh);
1023 		if (ret < 0)
1024 			goto err_out_curr_node;
1025 
1026 		stats->bs_nblocks++;
1027 
1028 		nilfs_btree_node_init(btree,
1029 				      (struct nilfs_btree_node *)bh->b_data,
1030 				      0, level, 0, NULL, NULL);
1031 		path[level].bp_sib_bh = bh;
1032 		path[level].bp_op = nilfs_btree_split;
1033 	}
1034 
1035 	/* root */
1036 	node = nilfs_btree_get_root(btree);
1037 	if (nilfs_btree_node_get_nchildren(node) <
1038 	    nilfs_btree_node_nchildren_max(node, btree)) {
1039 		path[level].bp_op = nilfs_btree_do_insert;
1040 		stats->bs_nblocks++;
1041 		goto out;
1042 	}
1043 
1044 	/* grow */
1045 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047 					   &path[level].bp_newreq, dat);
1048 	if (ret < 0)
1049 		goto err_out_child_node;
1050 	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051 					&bh);
1052 	if (ret < 0)
1053 		goto err_out_curr_node;
1054 
1055 	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1056 			      0, level, 0, NULL, NULL);
1057 	path[level].bp_sib_bh = bh;
1058 	path[level].bp_op = nilfs_btree_grow;
1059 
1060 	level++;
1061 	path[level].bp_op = nilfs_btree_do_insert;
1062 
1063 	/* a newly-created node block and a data block are added */
1064 	stats->bs_nblocks += 2;
1065 
1066 	/* success */
1067  out:
1068 	*levelp = level;
1069 	return ret;
1070 
1071 	/* error */
1072  err_out_curr_node:
1073 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1074 				   dat);
1075  err_out_child_node:
1076 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1077 		nilfs_btnode_delete(path[level].bp_sib_bh);
1078 		nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1079 					   &path[level].bp_newreq, dat);
1080 
1081 	}
1082 
1083 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1084 				   dat);
1085  err_out_data:
1086 	*levelp = level;
1087 	stats->bs_nblocks = 0;
1088 	return ret;
1089 }
1090 
1091 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1092 				      struct nilfs_btree_path *path,
1093 				      int maxlevel, __u64 key, __u64 ptr)
1094 {
1095 	struct inode *dat = NULL;
1096 	int level;
1097 
1098 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1099 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1100 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1101 		nilfs_btree_set_target_v(btree, key, ptr);
1102 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1103 	}
1104 
1105 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1106 		nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1107 					    &path[level - 1].bp_newreq, dat);
1108 		path[level].bp_op(btree, path, level, &key, &ptr);
1109 	}
1110 
1111 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1112 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1113 }
1114 
1115 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1116 {
1117 	struct nilfs_btree *btree;
1118 	struct nilfs_btree_path *path;
1119 	struct nilfs_bmap_stats stats;
1120 	int level, ret;
1121 
1122 	btree = (struct nilfs_btree *)bmap;
1123 	path = nilfs_btree_alloc_path();
1124 	if (path == NULL)
1125 		return -ENOMEM;
1126 	nilfs_btree_init_path(path);
1127 
1128 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1129 				    NILFS_BTREE_LEVEL_NODE_MIN);
1130 	if (ret != -ENOENT) {
1131 		if (ret == 0)
1132 			ret = -EEXIST;
1133 		goto out;
1134 	}
1135 
1136 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1137 	if (ret < 0)
1138 		goto out;
1139 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1140 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1141 
1142  out:
1143 	nilfs_btree_release_path(path);
1144 	nilfs_btree_free_path(path);
1145 	return ret;
1146 }
1147 
1148 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1149 				  struct nilfs_btree_path *path,
1150 				  int level, __u64 *keyp, __u64 *ptrp)
1151 {
1152 	struct nilfs_btree_node *node;
1153 
1154 	if (level < nilfs_btree_height(btree) - 1) {
1155 		node = nilfs_btree_get_nonroot_node(path, level);
1156 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1157 					path[level].bp_index);
1158 		if (!buffer_dirty(path[level].bp_bh))
1159 			nilfs_btnode_mark_dirty(path[level].bp_bh);
1160 		if (path[level].bp_index == 0)
1161 			nilfs_btree_promote_key(btree, path, level + 1,
1162 				nilfs_btree_node_get_key(node, 0));
1163 	} else {
1164 		node = nilfs_btree_get_root(btree);
1165 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1166 					path[level].bp_index);
1167 	}
1168 }
1169 
1170 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1171 				    struct nilfs_btree_path *path,
1172 				    int level, __u64 *keyp, __u64 *ptrp)
1173 {
1174 	struct nilfs_btree_node *node, *left;
1175 	int nchildren, lnchildren, n;
1176 
1177 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1178 
1179 	node = nilfs_btree_get_nonroot_node(path, level);
1180 	left = nilfs_btree_get_sib_node(path, level);
1181 	nchildren = nilfs_btree_node_get_nchildren(node);
1182 	lnchildren = nilfs_btree_node_get_nchildren(left);
1183 
1184 	n = (nchildren + lnchildren) / 2 - nchildren;
1185 
1186 	nilfs_btree_node_move_right(btree, left, node, n);
1187 
1188 	if (!buffer_dirty(path[level].bp_bh))
1189 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1190 	if (!buffer_dirty(path[level].bp_sib_bh))
1191 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1192 
1193 	nilfs_btree_promote_key(btree, path, level + 1,
1194 				nilfs_btree_node_get_key(node, 0));
1195 
1196 	brelse(path[level].bp_sib_bh);
1197 	path[level].bp_sib_bh = NULL;
1198 	path[level].bp_index += n;
1199 }
1200 
1201 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1202 				     struct nilfs_btree_path *path,
1203 				     int level, __u64 *keyp, __u64 *ptrp)
1204 {
1205 	struct nilfs_btree_node *node, *right;
1206 	int nchildren, rnchildren, n;
1207 
1208 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1209 
1210 	node = nilfs_btree_get_nonroot_node(path, level);
1211 	right = nilfs_btree_get_sib_node(path, level);
1212 	nchildren = nilfs_btree_node_get_nchildren(node);
1213 	rnchildren = nilfs_btree_node_get_nchildren(right);
1214 
1215 	n = (nchildren + rnchildren) / 2 - nchildren;
1216 
1217 	nilfs_btree_node_move_left(btree, node, right, n);
1218 
1219 	if (!buffer_dirty(path[level].bp_bh))
1220 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1221 	if (!buffer_dirty(path[level].bp_sib_bh))
1222 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1223 
1224 	path[level + 1].bp_index++;
1225 	nilfs_btree_promote_key(btree, path, level + 1,
1226 				nilfs_btree_node_get_key(right, 0));
1227 	path[level + 1].bp_index--;
1228 
1229 	brelse(path[level].bp_sib_bh);
1230 	path[level].bp_sib_bh = NULL;
1231 }
1232 
1233 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1234 				    struct nilfs_btree_path *path,
1235 				    int level, __u64 *keyp, __u64 *ptrp)
1236 {
1237 	struct nilfs_btree_node *node, *left;
1238 	int n;
1239 
1240 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1241 
1242 	node = nilfs_btree_get_nonroot_node(path, level);
1243 	left = nilfs_btree_get_sib_node(path, level);
1244 
1245 	n = nilfs_btree_node_get_nchildren(node);
1246 
1247 	nilfs_btree_node_move_left(btree, left, node, n);
1248 
1249 	if (!buffer_dirty(path[level].bp_sib_bh))
1250 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1251 
1252 	nilfs_btnode_delete(path[level].bp_bh);
1253 	path[level].bp_bh = path[level].bp_sib_bh;
1254 	path[level].bp_sib_bh = NULL;
1255 	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1256 }
1257 
1258 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1259 				     struct nilfs_btree_path *path,
1260 				     int level, __u64 *keyp, __u64 *ptrp)
1261 {
1262 	struct nilfs_btree_node *node, *right;
1263 	int n;
1264 
1265 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1266 
1267 	node = nilfs_btree_get_nonroot_node(path, level);
1268 	right = nilfs_btree_get_sib_node(path, level);
1269 
1270 	n = nilfs_btree_node_get_nchildren(right);
1271 
1272 	nilfs_btree_node_move_left(btree, node, right, n);
1273 
1274 	if (!buffer_dirty(path[level].bp_bh))
1275 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1276 
1277 	nilfs_btnode_delete(path[level].bp_sib_bh);
1278 	path[level].bp_sib_bh = NULL;
1279 	path[level + 1].bp_index++;
1280 }
1281 
1282 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1283 			       struct nilfs_btree_path *path,
1284 			       int level, __u64 *keyp, __u64 *ptrp)
1285 {
1286 	struct nilfs_btree_node *root, *child;
1287 	int n;
1288 
1289 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1290 
1291 	root = nilfs_btree_get_root(btree);
1292 	child = nilfs_btree_get_nonroot_node(path, level);
1293 
1294 	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1295 	nilfs_btree_node_set_level(root, level);
1296 	n = nilfs_btree_node_get_nchildren(child);
1297 	nilfs_btree_node_move_left(btree, root, child, n);
1298 
1299 	nilfs_btnode_delete(path[level].bp_bh);
1300 	path[level].bp_bh = NULL;
1301 }
1302 
1303 
1304 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1305 				      struct nilfs_btree_path *path,
1306 				      int *levelp,
1307 				      struct nilfs_bmap_stats *stats,
1308 				      struct inode *dat)
1309 {
1310 	struct buffer_head *bh;
1311 	struct nilfs_btree_node *node, *parent, *sib;
1312 	__u64 sibptr;
1313 	int pindex, level, ret;
1314 
1315 	ret = 0;
1316 	stats->bs_nblocks = 0;
1317 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1318 	     level < nilfs_btree_height(btree) - 1;
1319 	     level++) {
1320 		node = nilfs_btree_get_nonroot_node(path, level);
1321 		path[level].bp_oldreq.bpr_ptr =
1322 			nilfs_btree_node_get_ptr(btree, node,
1323 						 path[level].bp_index);
1324 		ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1325 						 &path[level].bp_oldreq, dat);
1326 		if (ret < 0)
1327 			goto err_out_child_node;
1328 
1329 		if (nilfs_btree_node_get_nchildren(node) >
1330 		    nilfs_btree_node_nchildren_min(node, btree)) {
1331 			path[level].bp_op = nilfs_btree_do_delete;
1332 			stats->bs_nblocks++;
1333 			goto out;
1334 		}
1335 
1336 		parent = nilfs_btree_get_node(btree, path, level + 1);
1337 		pindex = path[level + 1].bp_index;
1338 
1339 		if (pindex > 0) {
1340 			/* left sibling */
1341 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1342 							  pindex - 1);
1343 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1344 			if (ret < 0)
1345 				goto err_out_curr_node;
1346 			sib = (struct nilfs_btree_node *)bh->b_data;
1347 			if (nilfs_btree_node_get_nchildren(sib) >
1348 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1349 				path[level].bp_sib_bh = bh;
1350 				path[level].bp_op = nilfs_btree_borrow_left;
1351 				stats->bs_nblocks++;
1352 				goto out;
1353 			} else {
1354 				path[level].bp_sib_bh = bh;
1355 				path[level].bp_op = nilfs_btree_concat_left;
1356 				stats->bs_nblocks++;
1357 				/* continue; */
1358 			}
1359 		} else if (pindex <
1360 			   nilfs_btree_node_get_nchildren(parent) - 1) {
1361 			/* right sibling */
1362 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1363 							  pindex + 1);
1364 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1365 			if (ret < 0)
1366 				goto err_out_curr_node;
1367 			sib = (struct nilfs_btree_node *)bh->b_data;
1368 			if (nilfs_btree_node_get_nchildren(sib) >
1369 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1370 				path[level].bp_sib_bh = bh;
1371 				path[level].bp_op = nilfs_btree_borrow_right;
1372 				stats->bs_nblocks++;
1373 				goto out;
1374 			} else {
1375 				path[level].bp_sib_bh = bh;
1376 				path[level].bp_op = nilfs_btree_concat_right;
1377 				stats->bs_nblocks++;
1378 				/* continue; */
1379 			}
1380 		} else {
1381 			/* no siblings */
1382 			/* the only child of the root node */
1383 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1384 			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1385 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1386 				path[level].bp_op = nilfs_btree_shrink;
1387 				stats->bs_nblocks += 2;
1388 			} else {
1389 				path[level].bp_op = nilfs_btree_do_delete;
1390 				stats->bs_nblocks++;
1391 			}
1392 
1393 			goto out;
1394 
1395 		}
1396 	}
1397 
1398 	node = nilfs_btree_get_root(btree);
1399 	path[level].bp_oldreq.bpr_ptr =
1400 		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1401 
1402 	ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1403 					 &path[level].bp_oldreq, dat);
1404 	if (ret < 0)
1405 		goto err_out_child_node;
1406 
1407 	/* child of the root node is deleted */
1408 	path[level].bp_op = nilfs_btree_do_delete;
1409 	stats->bs_nblocks++;
1410 
1411 	/* success */
1412  out:
1413 	*levelp = level;
1414 	return ret;
1415 
1416 	/* error */
1417  err_out_curr_node:
1418 	nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1419  err_out_child_node:
1420 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1421 		brelse(path[level].bp_sib_bh);
1422 		nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1423 					 &path[level].bp_oldreq, dat);
1424 	}
1425 	*levelp = level;
1426 	stats->bs_nblocks = 0;
1427 	return ret;
1428 }
1429 
1430 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1431 				      struct nilfs_btree_path *path,
1432 				      int maxlevel, struct inode *dat)
1433 {
1434 	int level;
1435 
1436 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1437 		nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1438 					  &path[level].bp_oldreq, dat);
1439 		path[level].bp_op(btree, path, level, NULL, NULL);
1440 	}
1441 
1442 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1443 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1444 }
1445 
1446 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1447 
1448 {
1449 	struct nilfs_btree *btree;
1450 	struct nilfs_btree_path *path;
1451 	struct nilfs_bmap_stats stats;
1452 	struct inode *dat;
1453 	int level, ret;
1454 
1455 	btree = (struct nilfs_btree *)bmap;
1456 	path = nilfs_btree_alloc_path();
1457 	if (path == NULL)
1458 		return -ENOMEM;
1459 	nilfs_btree_init_path(path);
1460 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1461 				    NILFS_BTREE_LEVEL_NODE_MIN);
1462 	if (ret < 0)
1463 		goto out;
1464 
1465 
1466 	dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1467 		nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1468 
1469 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1470 	if (ret < 0)
1471 		goto out;
1472 	nilfs_btree_commit_delete(btree, path, level, dat);
1473 	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1474 
1475 out:
1476 	nilfs_btree_release_path(path);
1477 	nilfs_btree_free_path(path);
1478 	return ret;
1479 }
1480 
1481 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1482 {
1483 	struct nilfs_btree *btree;
1484 	struct nilfs_btree_path *path;
1485 	int ret;
1486 
1487 	btree = (struct nilfs_btree *)bmap;
1488 	path = nilfs_btree_alloc_path();
1489 	if (path == NULL)
1490 		return -ENOMEM;
1491 	nilfs_btree_init_path(path);
1492 
1493 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1494 
1495 	nilfs_btree_release_path(path);
1496 	nilfs_btree_free_path(path);
1497 
1498 	return ret;
1499 }
1500 
1501 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1502 {
1503 	struct buffer_head *bh;
1504 	struct nilfs_btree *btree;
1505 	struct nilfs_btree_node *root, *node;
1506 	__u64 maxkey, nextmaxkey;
1507 	__u64 ptr;
1508 	int nchildren, ret;
1509 
1510 	btree = (struct nilfs_btree *)bmap;
1511 	root = nilfs_btree_get_root(btree);
1512 	switch (nilfs_btree_height(btree)) {
1513 	case 2:
1514 		bh = NULL;
1515 		node = root;
1516 		break;
1517 	case 3:
1518 		nchildren = nilfs_btree_node_get_nchildren(root);
1519 		if (nchildren > 1)
1520 			return 0;
1521 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1522 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1523 		if (ret < 0)
1524 			return ret;
1525 		node = (struct nilfs_btree_node *)bh->b_data;
1526 		break;
1527 	default:
1528 		return 0;
1529 	}
1530 
1531 	nchildren = nilfs_btree_node_get_nchildren(node);
1532 	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1533 	nextmaxkey = (nchildren > 1) ?
1534 		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1535 	if (bh != NULL)
1536 		brelse(bh);
1537 
1538 	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1539 }
1540 
1541 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1542 				   __u64 *keys, __u64 *ptrs, int nitems)
1543 {
1544 	struct buffer_head *bh;
1545 	struct nilfs_btree *btree;
1546 	struct nilfs_btree_node *node, *root;
1547 	__le64 *dkeys;
1548 	__le64 *dptrs;
1549 	__u64 ptr;
1550 	int nchildren, i, ret;
1551 
1552 	btree = (struct nilfs_btree *)bmap;
1553 	root = nilfs_btree_get_root(btree);
1554 	switch (nilfs_btree_height(btree)) {
1555 	case 2:
1556 		bh = NULL;
1557 		node = root;
1558 		break;
1559 	case 3:
1560 		nchildren = nilfs_btree_node_get_nchildren(root);
1561 		WARN_ON(nchildren > 1);
1562 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1563 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1564 		if (ret < 0)
1565 			return ret;
1566 		node = (struct nilfs_btree_node *)bh->b_data;
1567 		break;
1568 	default:
1569 		node = NULL;
1570 		return -EINVAL;
1571 	}
1572 
1573 	nchildren = nilfs_btree_node_get_nchildren(node);
1574 	if (nchildren < nitems)
1575 		nitems = nchildren;
1576 	dkeys = nilfs_btree_node_dkeys(node);
1577 	dptrs = nilfs_btree_node_dptrs(node, btree);
1578 	for (i = 0; i < nitems; i++) {
1579 		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1580 		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1581 	}
1582 
1583 	if (bh != NULL)
1584 		brelse(bh);
1585 
1586 	return nitems;
1587 }
1588 
1589 static int
1590 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1591 				       union nilfs_bmap_ptr_req *dreq,
1592 				       union nilfs_bmap_ptr_req *nreq,
1593 				       struct buffer_head **bhp,
1594 				       struct nilfs_bmap_stats *stats)
1595 {
1596 	struct buffer_head *bh;
1597 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1598 	struct inode *dat = NULL;
1599 	int ret;
1600 
1601 	stats->bs_nblocks = 0;
1602 
1603 	/* for data */
1604 	/* cannot find near ptr */
1605 	if (NILFS_BMAP_USE_VBN(bmap)) {
1606 		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1607 		dat = nilfs_bmap_get_dat(bmap);
1608 	}
1609 
1610 	ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1611 	if (ret < 0)
1612 		return ret;
1613 
1614 	*bhp = NULL;
1615 	stats->bs_nblocks++;
1616 	if (nreq != NULL) {
1617 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1618 		ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1619 		if (ret < 0)
1620 			goto err_out_dreq;
1621 
1622 		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1623 		if (ret < 0)
1624 			goto err_out_nreq;
1625 
1626 		*bhp = bh;
1627 		stats->bs_nblocks++;
1628 	}
1629 
1630 	/* success */
1631 	return 0;
1632 
1633 	/* error */
1634  err_out_nreq:
1635 	nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1636  err_out_dreq:
1637 	nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1638 	stats->bs_nblocks = 0;
1639 	return ret;
1640 
1641 }
1642 
1643 static void
1644 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1645 				      __u64 key, __u64 ptr,
1646 				      const __u64 *keys, const __u64 *ptrs,
1647 				      int n,
1648 				      union nilfs_bmap_ptr_req *dreq,
1649 				      union nilfs_bmap_ptr_req *nreq,
1650 				      struct buffer_head *bh)
1651 {
1652 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1653 	struct nilfs_btree_node *node;
1654 	struct inode *dat;
1655 	__u64 tmpptr;
1656 
1657 	/* free resources */
1658 	if (bmap->b_ops->bop_clear != NULL)
1659 		bmap->b_ops->bop_clear(bmap);
1660 
1661 	/* ptr must be a pointer to a buffer head. */
1662 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1663 
1664 	/* convert and insert */
1665 	dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1666 	nilfs_btree_init(bmap);
1667 	if (nreq != NULL) {
1668 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1669 		nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1670 
1671 		/* create child node at level 1 */
1672 		node = (struct nilfs_btree_node *)bh->b_data;
1673 		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1674 		nilfs_btree_node_insert(btree, node,
1675 					key, dreq->bpr_ptr, n);
1676 		if (!buffer_dirty(bh))
1677 			nilfs_btnode_mark_dirty(bh);
1678 		if (!nilfs_bmap_dirty(bmap))
1679 			nilfs_bmap_set_dirty(bmap);
1680 
1681 		brelse(bh);
1682 
1683 		/* create root node at level 2 */
1684 		node = nilfs_btree_get_root(btree);
1685 		tmpptr = nreq->bpr_ptr;
1686 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1687 				      2, 1, &keys[0], &tmpptr);
1688 	} else {
1689 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1690 
1691 		/* create root node at level 1 */
1692 		node = nilfs_btree_get_root(btree);
1693 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1694 				      1, n, keys, ptrs);
1695 		nilfs_btree_node_insert(btree, node,
1696 					key, dreq->bpr_ptr, n);
1697 		if (!nilfs_bmap_dirty(bmap))
1698 			nilfs_bmap_set_dirty(bmap);
1699 	}
1700 
1701 	if (NILFS_BMAP_USE_VBN(bmap))
1702 		nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1703 }
1704 
1705 /**
1706  * nilfs_btree_convert_and_insert -
1707  * @bmap:
1708  * @key:
1709  * @ptr:
1710  * @keys:
1711  * @ptrs:
1712  * @n:
1713  */
1714 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1715 				   __u64 key, __u64 ptr,
1716 				   const __u64 *keys, const __u64 *ptrs, int n)
1717 {
1718 	struct buffer_head *bh;
1719 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1720 	struct nilfs_bmap_stats stats;
1721 	int ret;
1722 
1723 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1724 		di = &dreq;
1725 		ni = NULL;
1726 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1727 			   1 << bmap->b_inode->i_blkbits)) {
1728 		di = &dreq;
1729 		ni = &nreq;
1730 	} else {
1731 		di = NULL;
1732 		ni = NULL;
1733 		BUG();
1734 	}
1735 
1736 	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1737 						     &stats);
1738 	if (ret < 0)
1739 		return ret;
1740 	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1741 					      di, ni, bh);
1742 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1743 	return 0;
1744 }
1745 
1746 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1747 				   struct nilfs_btree_path *path,
1748 				   int level,
1749 				   struct buffer_head *bh)
1750 {
1751 	while ((++level < nilfs_btree_height(btree) - 1) &&
1752 	       !buffer_dirty(path[level].bp_bh))
1753 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1754 
1755 	return 0;
1756 }
1757 
1758 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1759 					struct nilfs_btree_path *path,
1760 					int level, struct inode *dat)
1761 {
1762 	struct nilfs_btree_node *parent;
1763 	int ret;
1764 
1765 	parent = nilfs_btree_get_node(btree, path, level + 1);
1766 	path[level].bp_oldreq.bpr_ptr =
1767 		nilfs_btree_node_get_ptr(btree, parent,
1768 					 path[level + 1].bp_index);
1769 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1770 	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1771 				       &path[level].bp_newreq.bpr_req);
1772 	if (ret < 0)
1773 		return ret;
1774 
1775 	if (buffer_nilfs_node(path[level].bp_bh)) {
1776 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1777 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1778 		path[level].bp_ctxt.bh = path[level].bp_bh;
1779 		ret = nilfs_btnode_prepare_change_key(
1780 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1781 			&path[level].bp_ctxt);
1782 		if (ret < 0) {
1783 			nilfs_dat_abort_update(dat,
1784 					       &path[level].bp_oldreq.bpr_req,
1785 					       &path[level].bp_newreq.bpr_req);
1786 			return ret;
1787 		}
1788 	}
1789 
1790 	return 0;
1791 }
1792 
1793 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1794 					struct nilfs_btree_path *path,
1795 					int level, struct inode *dat)
1796 {
1797 	struct nilfs_btree_node *parent;
1798 
1799 	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1800 				&path[level].bp_newreq.bpr_req,
1801 				btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1802 
1803 	if (buffer_nilfs_node(path[level].bp_bh)) {
1804 		nilfs_btnode_commit_change_key(
1805 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1806 			&path[level].bp_ctxt);
1807 		path[level].bp_bh = path[level].bp_ctxt.bh;
1808 	}
1809 	set_buffer_nilfs_volatile(path[level].bp_bh);
1810 
1811 	parent = nilfs_btree_get_node(btree, path, level + 1);
1812 	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1813 				 path[level].bp_newreq.bpr_ptr);
1814 }
1815 
1816 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1817 				       struct nilfs_btree_path *path,
1818 				       int level, struct inode *dat)
1819 {
1820 	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1821 			       &path[level].bp_newreq.bpr_req);
1822 	if (buffer_nilfs_node(path[level].bp_bh))
1823 		nilfs_btnode_abort_change_key(
1824 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1825 			&path[level].bp_ctxt);
1826 }
1827 
1828 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1829 					   struct nilfs_btree_path *path,
1830 					   int minlevel, int *maxlevelp,
1831 					   struct inode *dat)
1832 {
1833 	int level, ret;
1834 
1835 	level = minlevel;
1836 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1837 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1838 		if (ret < 0)
1839 			return ret;
1840 	}
1841 	while ((++level < nilfs_btree_height(btree) - 1) &&
1842 	       !buffer_dirty(path[level].bp_bh)) {
1843 
1844 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1845 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1846 		if (ret < 0)
1847 			goto out;
1848 	}
1849 
1850 	/* success */
1851 	*maxlevelp = level - 1;
1852 	return 0;
1853 
1854 	/* error */
1855  out:
1856 	while (--level > minlevel)
1857 		nilfs_btree_abort_update_v(btree, path, level, dat);
1858 	if (!buffer_nilfs_volatile(path[level].bp_bh))
1859 		nilfs_btree_abort_update_v(btree, path, level, dat);
1860 	return ret;
1861 }
1862 
1863 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1864 					   struct nilfs_btree_path *path,
1865 					   int minlevel, int maxlevel,
1866 					   struct buffer_head *bh,
1867 					   struct inode *dat)
1868 {
1869 	int level;
1870 
1871 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1872 		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1873 
1874 	for (level = minlevel + 1; level <= maxlevel; level++)
1875 		nilfs_btree_commit_update_v(btree, path, level, dat);
1876 }
1877 
1878 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1879 				   struct nilfs_btree_path *path,
1880 				   int level, struct buffer_head *bh)
1881 {
1882 	int maxlevel, ret;
1883 	struct nilfs_btree_node *parent;
1884 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1885 	__u64 ptr;
1886 
1887 	get_bh(bh);
1888 	path[level].bp_bh = bh;
1889 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1890 					      dat);
1891 	if (ret < 0)
1892 		goto out;
1893 
1894 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1895 		parent = nilfs_btree_get_node(btree, path, level + 1);
1896 		ptr = nilfs_btree_node_get_ptr(btree, parent,
1897 					       path[level + 1].bp_index);
1898 		ret = nilfs_dat_mark_dirty(dat, ptr);
1899 		if (ret < 0)
1900 			goto out;
1901 	}
1902 
1903 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1904 
1905  out:
1906 	brelse(path[level].bp_bh);
1907 	path[level].bp_bh = NULL;
1908 	return ret;
1909 }
1910 
1911 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1912 				 struct buffer_head *bh)
1913 {
1914 	struct nilfs_btree *btree;
1915 	struct nilfs_btree_path *path;
1916 	struct nilfs_btree_node *node;
1917 	__u64 key;
1918 	int level, ret;
1919 
1920 	WARN_ON(!buffer_dirty(bh));
1921 
1922 	btree = (struct nilfs_btree *)bmap;
1923 	path = nilfs_btree_alloc_path();
1924 	if (path == NULL)
1925 		return -ENOMEM;
1926 	nilfs_btree_init_path(path);
1927 
1928 	if (buffer_nilfs_node(bh)) {
1929 		node = (struct nilfs_btree_node *)bh->b_data;
1930 		key = nilfs_btree_node_get_key(node, 0);
1931 		level = nilfs_btree_node_get_level(node);
1932 	} else {
1933 		key = nilfs_bmap_data_get_key(bmap, bh);
1934 		level = NILFS_BTREE_LEVEL_DATA;
1935 	}
1936 
1937 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1938 	if (ret < 0) {
1939 		if (unlikely(ret == -ENOENT))
1940 			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1941 			       __func__, (unsigned long long)key, level);
1942 		goto out;
1943 	}
1944 
1945 	ret = NILFS_BMAP_USE_VBN(bmap) ?
1946 		nilfs_btree_propagate_v(btree, path, level, bh) :
1947 		nilfs_btree_propagate_p(btree, path, level, bh);
1948 
1949  out:
1950 	nilfs_btree_release_path(path);
1951 	nilfs_btree_free_path(path);
1952 
1953 	return ret;
1954 }
1955 
1956 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1957 				    struct buffer_head *bh)
1958 {
1959 	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1960 }
1961 
1962 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1963 					 struct list_head *lists,
1964 					 struct buffer_head *bh)
1965 {
1966 	struct list_head *head;
1967 	struct buffer_head *cbh;
1968 	struct nilfs_btree_node *node, *cnode;
1969 	__u64 key, ckey;
1970 	int level;
1971 
1972 	get_bh(bh);
1973 	node = (struct nilfs_btree_node *)bh->b_data;
1974 	key = nilfs_btree_node_get_key(node, 0);
1975 	level = nilfs_btree_node_get_level(node);
1976 	list_for_each(head, &lists[level]) {
1977 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1978 		cnode = (struct nilfs_btree_node *)cbh->b_data;
1979 		ckey = nilfs_btree_node_get_key(cnode, 0);
1980 		if (key < ckey)
1981 			break;
1982 	}
1983 	list_add_tail(&bh->b_assoc_buffers, head);
1984 }
1985 
1986 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1987 					     struct list_head *listp)
1988 {
1989 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1990 	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1991 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1992 	struct pagevec pvec;
1993 	struct buffer_head *bh, *head;
1994 	pgoff_t index = 0;
1995 	int level, i;
1996 
1997 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1998 	     level < NILFS_BTREE_LEVEL_MAX;
1999 	     level++)
2000 		INIT_LIST_HEAD(&lists[level]);
2001 
2002 	pagevec_init(&pvec, 0);
2003 
2004 	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2005 				  PAGEVEC_SIZE)) {
2006 		for (i = 0; i < pagevec_count(&pvec); i++) {
2007 			bh = head = page_buffers(pvec.pages[i]);
2008 			do {
2009 				if (buffer_dirty(bh))
2010 					nilfs_btree_add_dirty_buffer(btree,
2011 								     lists, bh);
2012 			} while ((bh = bh->b_this_page) != head);
2013 		}
2014 		pagevec_release(&pvec);
2015 		cond_resched();
2016 	}
2017 
2018 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2019 	     level < NILFS_BTREE_LEVEL_MAX;
2020 	     level++)
2021 		list_splice_tail(&lists[level], listp);
2022 }
2023 
2024 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2025 				struct nilfs_btree_path *path,
2026 				int level,
2027 				struct buffer_head **bh,
2028 				sector_t blocknr,
2029 				union nilfs_binfo *binfo)
2030 {
2031 	struct nilfs_btree_node *parent;
2032 	__u64 key;
2033 	__u64 ptr;
2034 	int ret;
2035 
2036 	parent = nilfs_btree_get_node(btree, path, level + 1);
2037 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2038 				       path[level + 1].bp_index);
2039 	if (buffer_nilfs_node(*bh)) {
2040 		path[level].bp_ctxt.oldkey = ptr;
2041 		path[level].bp_ctxt.newkey = blocknr;
2042 		path[level].bp_ctxt.bh = *bh;
2043 		ret = nilfs_btnode_prepare_change_key(
2044 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2045 			&path[level].bp_ctxt);
2046 		if (ret < 0)
2047 			return ret;
2048 		nilfs_btnode_commit_change_key(
2049 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2050 			&path[level].bp_ctxt);
2051 		*bh = path[level].bp_ctxt.bh;
2052 	}
2053 
2054 	nilfs_btree_node_set_ptr(btree, parent,
2055 				 path[level + 1].bp_index, blocknr);
2056 
2057 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2058 	/* on-disk format */
2059 	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2060 	binfo->bi_dat.bi_level = level;
2061 
2062 	return 0;
2063 }
2064 
2065 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2066 				struct nilfs_btree_path *path,
2067 				int level,
2068 				struct buffer_head **bh,
2069 				sector_t blocknr,
2070 				union nilfs_binfo *binfo)
2071 {
2072 	struct nilfs_btree_node *parent;
2073 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2074 	__u64 key;
2075 	__u64 ptr;
2076 	union nilfs_bmap_ptr_req req;
2077 	int ret;
2078 
2079 	parent = nilfs_btree_get_node(btree, path, level + 1);
2080 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2081 				       path[level + 1].bp_index);
2082 	req.bpr_ptr = ptr;
2083 	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2084 	if (ret < 0)
2085 		return ret;
2086 	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2087 
2088 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2089 	/* on-disk format */
2090 	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2091 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2092 
2093 	return 0;
2094 }
2095 
2096 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2097 			      struct buffer_head **bh,
2098 			      sector_t blocknr,
2099 			      union nilfs_binfo *binfo)
2100 {
2101 	struct nilfs_btree *btree;
2102 	struct nilfs_btree_path *path;
2103 	struct nilfs_btree_node *node;
2104 	__u64 key;
2105 	int level, ret;
2106 
2107 	btree = (struct nilfs_btree *)bmap;
2108 	path = nilfs_btree_alloc_path();
2109 	if (path == NULL)
2110 		return -ENOMEM;
2111 	nilfs_btree_init_path(path);
2112 
2113 	if (buffer_nilfs_node(*bh)) {
2114 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2115 		key = nilfs_btree_node_get_key(node, 0);
2116 		level = nilfs_btree_node_get_level(node);
2117 	} else {
2118 		key = nilfs_bmap_data_get_key(bmap, *bh);
2119 		level = NILFS_BTREE_LEVEL_DATA;
2120 	}
2121 
2122 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2123 	if (ret < 0) {
2124 		WARN_ON(ret == -ENOENT);
2125 		goto out;
2126 	}
2127 
2128 	ret = NILFS_BMAP_USE_VBN(bmap) ?
2129 		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2130 		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2131 
2132  out:
2133 	nilfs_btree_release_path(path);
2134 	nilfs_btree_free_path(path);
2135 
2136 	return ret;
2137 }
2138 
2139 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2140 				 struct buffer_head **bh,
2141 				 sector_t blocknr,
2142 				 union nilfs_binfo *binfo)
2143 {
2144 	struct nilfs_btree_node *node;
2145 	__u64 key;
2146 	int ret;
2147 
2148 	ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2149 			     blocknr);
2150 	if (ret < 0)
2151 		return ret;
2152 
2153 	if (buffer_nilfs_node(*bh)) {
2154 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2155 		key = nilfs_btree_node_get_key(node, 0);
2156 	} else
2157 		key = nilfs_bmap_data_get_key(bmap, *bh);
2158 
2159 	/* on-disk format */
2160 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2161 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2162 
2163 	return 0;
2164 }
2165 
2166 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2167 {
2168 	struct buffer_head *bh;
2169 	struct nilfs_btree *btree;
2170 	struct nilfs_btree_path *path;
2171 	__u64 ptr;
2172 	int ret;
2173 
2174 	btree = (struct nilfs_btree *)bmap;
2175 	path = nilfs_btree_alloc_path();
2176 	if (path == NULL)
2177 		return -ENOMEM;
2178 	nilfs_btree_init_path(path);
2179 
2180 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2181 	if (ret < 0) {
2182 		WARN_ON(ret == -ENOENT);
2183 		goto out;
2184 	}
2185 	ret = nilfs_btree_get_block(btree, ptr, &bh);
2186 	if (ret < 0) {
2187 		WARN_ON(ret == -ENOENT);
2188 		goto out;
2189 	}
2190 
2191 	if (!buffer_dirty(bh))
2192 		nilfs_btnode_mark_dirty(bh);
2193 	brelse(bh);
2194 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2195 		nilfs_bmap_set_dirty(&btree->bt_bmap);
2196 
2197  out:
2198 	nilfs_btree_release_path(path);
2199 	nilfs_btree_free_path(path);
2200 	return ret;
2201 }
2202 
2203 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2204 	.bop_lookup		=	nilfs_btree_lookup,
2205 	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2206 	.bop_insert		=	nilfs_btree_insert,
2207 	.bop_delete		=	nilfs_btree_delete,
2208 	.bop_clear		=	NULL,
2209 
2210 	.bop_propagate		=	nilfs_btree_propagate,
2211 
2212 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2213 
2214 	.bop_assign		=	nilfs_btree_assign,
2215 	.bop_mark		=	nilfs_btree_mark,
2216 
2217 	.bop_last_key		=	nilfs_btree_last_key,
2218 	.bop_check_insert	=	NULL,
2219 	.bop_check_delete	=	nilfs_btree_check_delete,
2220 	.bop_gather_data	=	nilfs_btree_gather_data,
2221 };
2222 
2223 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2224 	.bop_lookup		=	NULL,
2225 	.bop_lookup_contig	=	NULL,
2226 	.bop_insert		=	NULL,
2227 	.bop_delete		=	NULL,
2228 	.bop_clear		=	NULL,
2229 
2230 	.bop_propagate		=	nilfs_btree_propagate_gc,
2231 
2232 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2233 
2234 	.bop_assign		=	nilfs_btree_assign_gc,
2235 	.bop_mark		=	NULL,
2236 
2237 	.bop_last_key		=	NULL,
2238 	.bop_check_insert	=	NULL,
2239 	.bop_check_delete	=	NULL,
2240 	.bop_gather_data	=	NULL,
2241 };
2242 
2243 int nilfs_btree_init(struct nilfs_bmap *bmap)
2244 {
2245 	bmap->b_ops = &nilfs_btree_ops;
2246 	return 0;
2247 }
2248 
2249 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2250 {
2251 	bmap->b_ops = &nilfs_btree_ops_gc;
2252 }
2253