xref: /openbmc/linux/fs/nilfs2/btree.c (revision fd589a8f)
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33 
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44 	struct buffer_head *bp_bh;
45 	struct buffer_head *bp_sib_bh;
46 	int bp_index;
47 	union nilfs_bmap_ptr_req bp_oldreq;
48 	union nilfs_bmap_ptr_req bp_newreq;
49 	struct nilfs_btnode_chkey_ctxt bp_ctxt;
50 	void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51 		      int, __u64 *, __u64 *);
52 };
53 
54 /*
55  * B-tree path operations
56  */
57 
58 static struct kmem_cache *nilfs_btree_path_cache;
59 
60 int __init nilfs_btree_path_cache_init(void)
61 {
62 	nilfs_btree_path_cache =
63 		kmem_cache_create("nilfs2_btree_path_cache",
64 				  sizeof(struct nilfs_btree_path) *
65 				  NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66 	return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68 
69 void nilfs_btree_path_cache_destroy(void)
70 {
71 	kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73 
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76 	return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78 
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81 	kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83 
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86 	int level;
87 
88 	for (level = NILFS_BTREE_LEVEL_DATA;
89 	     level < NILFS_BTREE_LEVEL_MAX;
90 	     level++) {
91 		path[level].bp_bh = NULL;
92 		path[level].bp_sib_bh = NULL;
93 		path[level].bp_index = 0;
94 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96 		path[level].bp_op = NULL;
97 	}
98 }
99 
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102 	int level;
103 
104 	for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105 	     level++)
106 		brelse(path[level].bp_bh);
107 }
108 
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113 				 struct buffer_head **bhp)
114 {
115 	struct address_space *btnc =
116 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117 	return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
118 }
119 
120 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
121 				     __u64 ptr, struct buffer_head **bhp)
122 {
123 	struct address_space *btnc =
124 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125 	int ret;
126 
127 	ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
128 	if (!ret)
129 		set_buffer_nilfs_volatile(*bhp);
130 	return ret;
131 }
132 
133 static inline int
134 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
135 {
136 	return node->bn_flags;
137 }
138 
139 static inline void
140 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
141 {
142 	node->bn_flags = flags;
143 }
144 
145 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
146 {
147 	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
148 }
149 
150 static inline int
151 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
152 {
153 	return node->bn_level;
154 }
155 
156 static inline void
157 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
158 {
159 	node->bn_level = level;
160 }
161 
162 static inline int
163 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
164 {
165 	return le16_to_cpu(node->bn_nchildren);
166 }
167 
168 static inline void
169 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
170 {
171 	node->bn_nchildren = cpu_to_le16(nchildren);
172 }
173 
174 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
175 {
176 	return 1 << btree->bt_bmap.b_inode->i_blkbits;
177 }
178 
179 static inline int
180 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
181 			       const struct nilfs_btree *btree)
182 {
183 	return nilfs_btree_node_root(node) ?
184 		NILFS_BTREE_ROOT_NCHILDREN_MIN :
185 		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
186 }
187 
188 static inline int
189 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
190 			       const struct nilfs_btree *btree)
191 {
192 	return nilfs_btree_node_root(node) ?
193 		NILFS_BTREE_ROOT_NCHILDREN_MAX :
194 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
195 }
196 
197 static inline __le64 *
198 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
199 {
200 	return (__le64 *)((char *)(node + 1) +
201 			  (nilfs_btree_node_root(node) ?
202 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
203 }
204 
205 static inline __le64 *
206 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
207 		       const struct nilfs_btree *btree)
208 {
209 	return (__le64 *)(nilfs_btree_node_dkeys(node) +
210 			  nilfs_btree_node_nchildren_max(node, btree));
211 }
212 
213 static inline __u64
214 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
215 {
216 	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
217 }
218 
219 static inline void
220 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
221 {
222 	*(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
223 }
224 
225 static inline __u64
226 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
227 			 const struct nilfs_btree_node *node, int index)
228 {
229 	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
230 					index));
231 }
232 
233 static inline void
234 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
235 			 struct nilfs_btree_node *node, int index, __u64 ptr)
236 {
237 	*(nilfs_btree_node_dptrs(node, btree) + index) =
238 		nilfs_bmap_ptr_to_dptr(ptr);
239 }
240 
241 static void nilfs_btree_node_init(struct nilfs_btree *btree,
242 				  struct nilfs_btree_node *node,
243 				  int flags, int level, int nchildren,
244 				  const __u64 *keys, const __u64 *ptrs)
245 {
246 	__le64 *dkeys;
247 	__le64 *dptrs;
248 	int i;
249 
250 	nilfs_btree_node_set_flags(node, flags);
251 	nilfs_btree_node_set_level(node, level);
252 	nilfs_btree_node_set_nchildren(node, nchildren);
253 
254 	dkeys = nilfs_btree_node_dkeys(node);
255 	dptrs = nilfs_btree_node_dptrs(node, btree);
256 	for (i = 0; i < nchildren; i++) {
257 		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
258 		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
259 	}
260 }
261 
262 /* Assume the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
264 				       struct nilfs_btree_node *left,
265 				       struct nilfs_btree_node *right,
266 				       int n)
267 {
268 	__le64 *ldkeys, *rdkeys;
269 	__le64 *ldptrs, *rdptrs;
270 	int lnchildren, rnchildren;
271 
272 	ldkeys = nilfs_btree_node_dkeys(left);
273 	ldptrs = nilfs_btree_node_dptrs(left, btree);
274 	lnchildren = nilfs_btree_node_get_nchildren(left);
275 
276 	rdkeys = nilfs_btree_node_dkeys(right);
277 	rdptrs = nilfs_btree_node_dptrs(right, btree);
278 	rnchildren = nilfs_btree_node_get_nchildren(right);
279 
280 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
281 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
282 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
283 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
284 
285 	lnchildren += n;
286 	rnchildren -= n;
287 	nilfs_btree_node_set_nchildren(left, lnchildren);
288 	nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290 
291 /* Assume that the buffer heads corresponding to left and right are locked. */
292 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
293 					struct nilfs_btree_node *left,
294 					struct nilfs_btree_node *right,
295 					int n)
296 {
297 	__le64 *ldkeys, *rdkeys;
298 	__le64 *ldptrs, *rdptrs;
299 	int lnchildren, rnchildren;
300 
301 	ldkeys = nilfs_btree_node_dkeys(left);
302 	ldptrs = nilfs_btree_node_dptrs(left, btree);
303 	lnchildren = nilfs_btree_node_get_nchildren(left);
304 
305 	rdkeys = nilfs_btree_node_dkeys(right);
306 	rdptrs = nilfs_btree_node_dptrs(right, btree);
307 	rnchildren = nilfs_btree_node_get_nchildren(right);
308 
309 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
310 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
311 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
312 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
313 
314 	lnchildren -= n;
315 	rnchildren += n;
316 	nilfs_btree_node_set_nchildren(left, lnchildren);
317 	nilfs_btree_node_set_nchildren(right, rnchildren);
318 }
319 
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
322 				    struct nilfs_btree_node *node,
323 				    __u64 key, __u64 ptr, int index)
324 {
325 	__le64 *dkeys;
326 	__le64 *dptrs;
327 	int nchildren;
328 
329 	dkeys = nilfs_btree_node_dkeys(node);
330 	dptrs = nilfs_btree_node_dptrs(node, btree);
331 	nchildren = nilfs_btree_node_get_nchildren(node);
332 	if (index < nchildren) {
333 		memmove(dkeys + index + 1, dkeys + index,
334 			(nchildren - index) * sizeof(*dkeys));
335 		memmove(dptrs + index + 1, dptrs + index,
336 			(nchildren - index) * sizeof(*dptrs));
337 	}
338 	dkeys[index] = nilfs_bmap_key_to_dkey(key);
339 	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
340 	nchildren++;
341 	nilfs_btree_node_set_nchildren(node, nchildren);
342 }
343 
344 /* Assume that the buffer head corresponding to node is locked. */
345 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
346 				    struct nilfs_btree_node *node,
347 				    __u64 *keyp, __u64 *ptrp, int index)
348 {
349 	__u64 key;
350 	__u64 ptr;
351 	__le64 *dkeys;
352 	__le64 *dptrs;
353 	int nchildren;
354 
355 	dkeys = nilfs_btree_node_dkeys(node);
356 	dptrs = nilfs_btree_node_dptrs(node, btree);
357 	key = nilfs_bmap_dkey_to_key(dkeys[index]);
358 	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
359 	nchildren = nilfs_btree_node_get_nchildren(node);
360 	if (keyp != NULL)
361 		*keyp = key;
362 	if (ptrp != NULL)
363 		*ptrp = ptr;
364 
365 	if (index < nchildren - 1) {
366 		memmove(dkeys + index, dkeys + index + 1,
367 			(nchildren - index - 1) * sizeof(*dkeys));
368 		memmove(dptrs + index, dptrs + index + 1,
369 			(nchildren - index - 1) * sizeof(*dptrs));
370 	}
371 	nchildren--;
372 	nilfs_btree_node_set_nchildren(node, nchildren);
373 }
374 
375 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
376 				   __u64 key, int *indexp)
377 {
378 	__u64 nkey;
379 	int index, low, high, s;
380 
381 	/* binary search */
382 	low = 0;
383 	high = nilfs_btree_node_get_nchildren(node) - 1;
384 	index = 0;
385 	s = 0;
386 	while (low <= high) {
387 		index = (low + high) / 2;
388 		nkey = nilfs_btree_node_get_key(node, index);
389 		if (nkey == key) {
390 			s = 0;
391 			goto out;
392 		} else if (nkey < key) {
393 			low = index + 1;
394 			s = -1;
395 		} else {
396 			high = index - 1;
397 			s = 1;
398 		}
399 	}
400 
401 	/* adjust index */
402 	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
403 		if (s > 0 && index > 0)
404 			index--;
405 	} else if (s < 0)
406 		index++;
407 
408  out:
409 	*indexp = index;
410 
411 	return s == 0;
412 }
413 
414 static inline struct nilfs_btree_node *
415 nilfs_btree_get_root(const struct nilfs_btree *btree)
416 {
417 	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
418 }
419 
420 static inline struct nilfs_btree_node *
421 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
422 {
423 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
424 }
425 
426 static inline struct nilfs_btree_node *
427 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
428 {
429 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
430 }
431 
432 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
433 {
434 	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
435 }
436 
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_node(const struct nilfs_btree *btree,
439 		     const struct nilfs_btree_path *path,
440 		     int level)
441 {
442 	return (level == nilfs_btree_height(btree) - 1) ?
443 		nilfs_btree_get_root(btree) :
444 		nilfs_btree_get_nonroot_node(path, level);
445 }
446 
447 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
448 				 struct nilfs_btree_path *path,
449 				 __u64 key, __u64 *ptrp, int minlevel)
450 {
451 	struct nilfs_btree_node *node;
452 	__u64 ptr;
453 	int level, index, found, ret;
454 
455 	node = nilfs_btree_get_root(btree);
456 	level = nilfs_btree_node_get_level(node);
457 	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
458 		return -ENOENT;
459 
460 	found = nilfs_btree_node_lookup(node, key, &index);
461 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
462 	path[level].bp_bh = NULL;
463 	path[level].bp_index = index;
464 
465 	for (level--; level >= minlevel; level--) {
466 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
467 		if (ret < 0)
468 			return ret;
469 		node = nilfs_btree_get_nonroot_node(path, level);
470 		BUG_ON(level != nilfs_btree_node_get_level(node));
471 		if (!found)
472 			found = nilfs_btree_node_lookup(node, key, &index);
473 		else
474 			index = 0;
475 		if (index < nilfs_btree_node_nchildren_max(node, btree))
476 			ptr = nilfs_btree_node_get_ptr(btree, node, index);
477 		else {
478 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
479 			/* insert */
480 			ptr = NILFS_BMAP_INVALID_PTR;
481 		}
482 		path[level].bp_index = index;
483 	}
484 	if (!found)
485 		return -ENOENT;
486 
487 	if (ptrp != NULL)
488 		*ptrp = ptr;
489 
490 	return 0;
491 }
492 
493 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
494 				      struct nilfs_btree_path *path,
495 				      __u64 *keyp, __u64 *ptrp)
496 {
497 	struct nilfs_btree_node *node;
498 	__u64 ptr;
499 	int index, level, ret;
500 
501 	node = nilfs_btree_get_root(btree);
502 	index = nilfs_btree_node_get_nchildren(node) - 1;
503 	if (index < 0)
504 		return -ENOENT;
505 	level = nilfs_btree_node_get_level(node);
506 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
507 	path[level].bp_bh = NULL;
508 	path[level].bp_index = index;
509 
510 	for (level--; level > 0; level--) {
511 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
512 		if (ret < 0)
513 			return ret;
514 		node = nilfs_btree_get_nonroot_node(path, level);
515 		BUG_ON(level != nilfs_btree_node_get_level(node));
516 		index = nilfs_btree_node_get_nchildren(node) - 1;
517 		ptr = nilfs_btree_node_get_ptr(btree, node, index);
518 		path[level].bp_index = index;
519 	}
520 
521 	if (keyp != NULL)
522 		*keyp = nilfs_btree_node_get_key(node, index);
523 	if (ptrp != NULL)
524 		*ptrp = ptr;
525 
526 	return 0;
527 }
528 
529 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
530 			      __u64 key, int level, __u64 *ptrp)
531 {
532 	struct nilfs_btree *btree;
533 	struct nilfs_btree_path *path;
534 	__u64 ptr;
535 	int ret;
536 
537 	btree = (struct nilfs_btree *)bmap;
538 	path = nilfs_btree_alloc_path();
539 	if (path == NULL)
540 		return -ENOMEM;
541 	nilfs_btree_init_path(path);
542 
543 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
544 
545 	if (ptrp != NULL)
546 		*ptrp = ptr;
547 
548 	nilfs_btree_release_path(path);
549 	nilfs_btree_free_path(path);
550 
551 	return ret;
552 }
553 
554 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
555 				     __u64 key, __u64 *ptrp, unsigned maxblocks)
556 {
557 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
558 	struct nilfs_btree_path *path;
559 	struct nilfs_btree_node *node;
560 	struct inode *dat = NULL;
561 	__u64 ptr, ptr2;
562 	sector_t blocknr;
563 	int level = NILFS_BTREE_LEVEL_NODE_MIN;
564 	int ret, cnt, index, maxlevel;
565 
566 	path = nilfs_btree_alloc_path();
567 	if (path == NULL)
568 		return -ENOMEM;
569 	nilfs_btree_init_path(path);
570 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
571 	if (ret < 0)
572 		goto out;
573 
574 	if (NILFS_BMAP_USE_VBN(bmap)) {
575 		dat = nilfs_bmap_get_dat(bmap);
576 		ret = nilfs_dat_translate(dat, ptr, &blocknr);
577 		if (ret < 0)
578 			goto out;
579 		ptr = blocknr;
580 	}
581 	cnt = 1;
582 	if (cnt == maxblocks)
583 		goto end;
584 
585 	maxlevel = nilfs_btree_height(btree) - 1;
586 	node = nilfs_btree_get_node(btree, path, level);
587 	index = path[level].bp_index + 1;
588 	for (;;) {
589 		while (index < nilfs_btree_node_get_nchildren(node)) {
590 			if (nilfs_btree_node_get_key(node, index) !=
591 			    key + cnt)
592 				goto end;
593 			ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
594 			if (dat) {
595 				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
596 				if (ret < 0)
597 					goto out;
598 				ptr2 = blocknr;
599 			}
600 			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
601 				goto end;
602 			index++;
603 			continue;
604 		}
605 		if (level == maxlevel)
606 			break;
607 
608 		/* look-up right sibling node */
609 		node = nilfs_btree_get_node(btree, path, level + 1);
610 		index = path[level + 1].bp_index + 1;
611 		if (index >= nilfs_btree_node_get_nchildren(node) ||
612 		    nilfs_btree_node_get_key(node, index) != key + cnt)
613 			break;
614 		ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
615 		path[level + 1].bp_index = index;
616 
617 		brelse(path[level].bp_bh);
618 		path[level].bp_bh = NULL;
619 		ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
620 		if (ret < 0)
621 			goto out;
622 		node = nilfs_btree_get_nonroot_node(path, level);
623 		index = 0;
624 		path[level].bp_index = index;
625 	}
626  end:
627 	*ptrp = ptr;
628 	ret = cnt;
629  out:
630 	nilfs_btree_release_path(path);
631 	nilfs_btree_free_path(path);
632 	return ret;
633 }
634 
635 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
636 				    struct nilfs_btree_path *path,
637 				    int level, __u64 key)
638 {
639 	if (level < nilfs_btree_height(btree) - 1) {
640 		do {
641 			lock_buffer(path[level].bp_bh);
642 			nilfs_btree_node_set_key(
643 				nilfs_btree_get_nonroot_node(path, level),
644 				path[level].bp_index, key);
645 			if (!buffer_dirty(path[level].bp_bh))
646 				nilfs_btnode_mark_dirty(path[level].bp_bh);
647 			unlock_buffer(path[level].bp_bh);
648 		} while ((path[level].bp_index == 0) &&
649 			 (++level < nilfs_btree_height(btree) - 1));
650 	}
651 
652 	/* root */
653 	if (level == nilfs_btree_height(btree) - 1) {
654 		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
655 					 path[level].bp_index, key);
656 	}
657 }
658 
659 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
660 				  struct nilfs_btree_path *path,
661 				  int level, __u64 *keyp, __u64 *ptrp)
662 {
663 	struct nilfs_btree_node *node;
664 
665 	if (level < nilfs_btree_height(btree) - 1) {
666 		lock_buffer(path[level].bp_bh);
667 		node = nilfs_btree_get_nonroot_node(path, level);
668 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
669 					path[level].bp_index);
670 		if (!buffer_dirty(path[level].bp_bh))
671 			nilfs_btnode_mark_dirty(path[level].bp_bh);
672 		unlock_buffer(path[level].bp_bh);
673 
674 		if (path[level].bp_index == 0)
675 			nilfs_btree_promote_key(btree, path, level + 1,
676 						nilfs_btree_node_get_key(node,
677 									 0));
678 	} else {
679 		node = nilfs_btree_get_root(btree);
680 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
681 					path[level].bp_index);
682 	}
683 }
684 
685 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
686 				   struct nilfs_btree_path *path,
687 				   int level, __u64 *keyp, __u64 *ptrp)
688 {
689 	struct nilfs_btree_node *node, *left;
690 	int nchildren, lnchildren, n, move;
691 
692 	lock_buffer(path[level].bp_bh);
693 	lock_buffer(path[level].bp_sib_bh);
694 
695 	node = nilfs_btree_get_nonroot_node(path, level);
696 	left = nilfs_btree_get_sib_node(path, level);
697 	nchildren = nilfs_btree_node_get_nchildren(node);
698 	lnchildren = nilfs_btree_node_get_nchildren(left);
699 	move = 0;
700 
701 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
702 	if (n > path[level].bp_index) {
703 		/* move insert point */
704 		n--;
705 		move = 1;
706 	}
707 
708 	nilfs_btree_node_move_left(btree, left, node, n);
709 
710 	if (!buffer_dirty(path[level].bp_bh))
711 		nilfs_btnode_mark_dirty(path[level].bp_bh);
712 	if (!buffer_dirty(path[level].bp_sib_bh))
713 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714 
715 	unlock_buffer(path[level].bp_bh);
716 	unlock_buffer(path[level].bp_sib_bh);
717 
718 	nilfs_btree_promote_key(btree, path, level + 1,
719 				nilfs_btree_node_get_key(node, 0));
720 
721 	if (move) {
722 		brelse(path[level].bp_bh);
723 		path[level].bp_bh = path[level].bp_sib_bh;
724 		path[level].bp_sib_bh = NULL;
725 		path[level].bp_index += lnchildren;
726 		path[level + 1].bp_index--;
727 	} else {
728 		brelse(path[level].bp_sib_bh);
729 		path[level].bp_sib_bh = NULL;
730 		path[level].bp_index -= n;
731 	}
732 
733 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
734 }
735 
736 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
737 				    struct nilfs_btree_path *path,
738 				    int level, __u64 *keyp, __u64 *ptrp)
739 {
740 	struct nilfs_btree_node *node, *right;
741 	int nchildren, rnchildren, n, move;
742 
743 	lock_buffer(path[level].bp_bh);
744 	lock_buffer(path[level].bp_sib_bh);
745 
746 	node = nilfs_btree_get_nonroot_node(path, level);
747 	right = nilfs_btree_get_sib_node(path, level);
748 	nchildren = nilfs_btree_node_get_nchildren(node);
749 	rnchildren = nilfs_btree_node_get_nchildren(right);
750 	move = 0;
751 
752 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
753 	if (n > nchildren - path[level].bp_index) {
754 		/* move insert point */
755 		n--;
756 		move = 1;
757 	}
758 
759 	nilfs_btree_node_move_right(btree, node, right, n);
760 
761 	if (!buffer_dirty(path[level].bp_bh))
762 		nilfs_btnode_mark_dirty(path[level].bp_bh);
763 	if (!buffer_dirty(path[level].bp_sib_bh))
764 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
765 
766 	unlock_buffer(path[level].bp_bh);
767 	unlock_buffer(path[level].bp_sib_bh);
768 
769 	path[level + 1].bp_index++;
770 	nilfs_btree_promote_key(btree, path, level + 1,
771 				nilfs_btree_node_get_key(right, 0));
772 	path[level + 1].bp_index--;
773 
774 	if (move) {
775 		brelse(path[level].bp_bh);
776 		path[level].bp_bh = path[level].bp_sib_bh;
777 		path[level].bp_sib_bh = NULL;
778 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
779 		path[level + 1].bp_index++;
780 	} else {
781 		brelse(path[level].bp_sib_bh);
782 		path[level].bp_sib_bh = NULL;
783 	}
784 
785 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
786 }
787 
788 static void nilfs_btree_split(struct nilfs_btree *btree,
789 			      struct nilfs_btree_path *path,
790 			      int level, __u64 *keyp, __u64 *ptrp)
791 {
792 	struct nilfs_btree_node *node, *right;
793 	__u64 newkey;
794 	__u64 newptr;
795 	int nchildren, n, move;
796 
797 	lock_buffer(path[level].bp_bh);
798 	lock_buffer(path[level].bp_sib_bh);
799 
800 	node = nilfs_btree_get_nonroot_node(path, level);
801 	right = nilfs_btree_get_sib_node(path, level);
802 	nchildren = nilfs_btree_node_get_nchildren(node);
803 	move = 0;
804 
805 	n = (nchildren + 1) / 2;
806 	if (n > nchildren - path[level].bp_index) {
807 		n--;
808 		move = 1;
809 	}
810 
811 	nilfs_btree_node_move_right(btree, node, right, n);
812 
813 	if (!buffer_dirty(path[level].bp_bh))
814 		nilfs_btnode_mark_dirty(path[level].bp_bh);
815 	if (!buffer_dirty(path[level].bp_sib_bh))
816 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
817 
818 	unlock_buffer(path[level].bp_bh);
819 	unlock_buffer(path[level].bp_sib_bh);
820 
821 	newkey = nilfs_btree_node_get_key(right, 0);
822 	newptr = path[level].bp_newreq.bpr_ptr;
823 
824 	if (move) {
825 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
826 		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
827 					path[level].bp_index);
828 
829 		*keyp = nilfs_btree_node_get_key(right, 0);
830 		*ptrp = path[level].bp_newreq.bpr_ptr;
831 
832 		brelse(path[level].bp_bh);
833 		path[level].bp_bh = path[level].bp_sib_bh;
834 		path[level].bp_sib_bh = NULL;
835 	} else {
836 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
837 
838 		*keyp = nilfs_btree_node_get_key(right, 0);
839 		*ptrp = path[level].bp_newreq.bpr_ptr;
840 
841 		brelse(path[level].bp_sib_bh);
842 		path[level].bp_sib_bh = NULL;
843 	}
844 
845 	path[level + 1].bp_index++;
846 }
847 
848 static void nilfs_btree_grow(struct nilfs_btree *btree,
849 			     struct nilfs_btree_path *path,
850 			     int level, __u64 *keyp, __u64 *ptrp)
851 {
852 	struct nilfs_btree_node *root, *child;
853 	int n;
854 
855 	lock_buffer(path[level].bp_sib_bh);
856 
857 	root = nilfs_btree_get_root(btree);
858 	child = nilfs_btree_get_sib_node(path, level);
859 
860 	n = nilfs_btree_node_get_nchildren(root);
861 
862 	nilfs_btree_node_move_right(btree, root, child, n);
863 	nilfs_btree_node_set_level(root, level + 1);
864 
865 	if (!buffer_dirty(path[level].bp_sib_bh))
866 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867 
868 	unlock_buffer(path[level].bp_sib_bh);
869 
870 	path[level].bp_bh = path[level].bp_sib_bh;
871 	path[level].bp_sib_bh = NULL;
872 
873 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
874 
875 	*keyp = nilfs_btree_node_get_key(child, 0);
876 	*ptrp = path[level].bp_newreq.bpr_ptr;
877 }
878 
879 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
880 				   const struct nilfs_btree_path *path)
881 {
882 	struct nilfs_btree_node *node;
883 	int level;
884 
885 	if (path == NULL)
886 		return NILFS_BMAP_INVALID_PTR;
887 
888 	/* left sibling */
889 	level = NILFS_BTREE_LEVEL_NODE_MIN;
890 	if (path[level].bp_index > 0) {
891 		node = nilfs_btree_get_node(btree, path, level);
892 		return nilfs_btree_node_get_ptr(btree, node,
893 						path[level].bp_index - 1);
894 	}
895 
896 	/* parent */
897 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
898 	if (level <= nilfs_btree_height(btree) - 1) {
899 		node = nilfs_btree_get_node(btree, path, level);
900 		return nilfs_btree_node_get_ptr(btree, node,
901 						path[level].bp_index);
902 	}
903 
904 	return NILFS_BMAP_INVALID_PTR;
905 }
906 
907 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
908 				       const struct nilfs_btree_path *path,
909 				       __u64 key)
910 {
911 	__u64 ptr;
912 
913 	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
914 	if (ptr != NILFS_BMAP_INVALID_PTR)
915 		/* sequential access */
916 		return ptr;
917 	else {
918 		ptr = nilfs_btree_find_near(btree, path);
919 		if (ptr != NILFS_BMAP_INVALID_PTR)
920 			/* near */
921 			return ptr;
922 	}
923 	/* block group */
924 	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
925 }
926 
927 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
928 				     __u64 ptr)
929 {
930 	btree->bt_bmap.b_last_allocated_key = key;
931 	btree->bt_bmap.b_last_allocated_ptr = ptr;
932 }
933 
934 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
935 				      struct nilfs_btree_path *path,
936 				      int *levelp, __u64 key, __u64 ptr,
937 				      struct nilfs_bmap_stats *stats)
938 {
939 	struct buffer_head *bh;
940 	struct nilfs_btree_node *node, *parent, *sib;
941 	__u64 sibptr;
942 	int pindex, level, ret;
943 	struct inode *dat = NULL;
944 
945 	stats->bs_nblocks = 0;
946 	level = NILFS_BTREE_LEVEL_DATA;
947 
948 	/* allocate a new ptr for data block */
949 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
950 		path[level].bp_newreq.bpr_ptr =
951 			nilfs_btree_find_target_v(btree, path, key);
952 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
953 	}
954 
955 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
956 					   &path[level].bp_newreq, dat);
957 	if (ret < 0)
958 		goto err_out_data;
959 
960 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
961 	     level < nilfs_btree_height(btree) - 1;
962 	     level++) {
963 		node = nilfs_btree_get_nonroot_node(path, level);
964 		if (nilfs_btree_node_get_nchildren(node) <
965 		    nilfs_btree_node_nchildren_max(node, btree)) {
966 			path[level].bp_op = nilfs_btree_do_insert;
967 			stats->bs_nblocks++;
968 			goto out;
969 		}
970 
971 		parent = nilfs_btree_get_node(btree, path, level + 1);
972 		pindex = path[level + 1].bp_index;
973 
974 		/* left sibling */
975 		if (pindex > 0) {
976 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
977 							  pindex - 1);
978 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
979 			if (ret < 0)
980 				goto err_out_child_node;
981 			sib = (struct nilfs_btree_node *)bh->b_data;
982 			if (nilfs_btree_node_get_nchildren(sib) <
983 			    nilfs_btree_node_nchildren_max(sib, btree)) {
984 				path[level].bp_sib_bh = bh;
985 				path[level].bp_op = nilfs_btree_carry_left;
986 				stats->bs_nblocks++;
987 				goto out;
988 			} else
989 				brelse(bh);
990 		}
991 
992 		/* right sibling */
993 		if (pindex <
994 		    nilfs_btree_node_get_nchildren(parent) - 1) {
995 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
996 							  pindex + 1);
997 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
998 			if (ret < 0)
999 				goto err_out_child_node;
1000 			sib = (struct nilfs_btree_node *)bh->b_data;
1001 			if (nilfs_btree_node_get_nchildren(sib) <
1002 			    nilfs_btree_node_nchildren_max(sib, btree)) {
1003 				path[level].bp_sib_bh = bh;
1004 				path[level].bp_op = nilfs_btree_carry_right;
1005 				stats->bs_nblocks++;
1006 				goto out;
1007 			} else
1008 				brelse(bh);
1009 		}
1010 
1011 		/* split */
1012 		path[level].bp_newreq.bpr_ptr =
1013 			path[level - 1].bp_newreq.bpr_ptr + 1;
1014 		ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1015 						   &path[level].bp_newreq, dat);
1016 		if (ret < 0)
1017 			goto err_out_child_node;
1018 		ret = nilfs_btree_get_new_block(btree,
1019 						path[level].bp_newreq.bpr_ptr,
1020 						&bh);
1021 		if (ret < 0)
1022 			goto err_out_curr_node;
1023 
1024 		stats->bs_nblocks++;
1025 
1026 		lock_buffer(bh);
1027 		nilfs_btree_node_init(btree,
1028 				      (struct nilfs_btree_node *)bh->b_data,
1029 				      0, level, 0, NULL, NULL);
1030 		unlock_buffer(bh);
1031 		path[level].bp_sib_bh = bh;
1032 		path[level].bp_op = nilfs_btree_split;
1033 	}
1034 
1035 	/* root */
1036 	node = nilfs_btree_get_root(btree);
1037 	if (nilfs_btree_node_get_nchildren(node) <
1038 	    nilfs_btree_node_nchildren_max(node, btree)) {
1039 		path[level].bp_op = nilfs_btree_do_insert;
1040 		stats->bs_nblocks++;
1041 		goto out;
1042 	}
1043 
1044 	/* grow */
1045 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047 					   &path[level].bp_newreq, dat);
1048 	if (ret < 0)
1049 		goto err_out_child_node;
1050 	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051 					&bh);
1052 	if (ret < 0)
1053 		goto err_out_curr_node;
1054 
1055 	lock_buffer(bh);
1056 	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1057 			      0, level, 0, NULL, NULL);
1058 	unlock_buffer(bh);
1059 	path[level].bp_sib_bh = bh;
1060 	path[level].bp_op = nilfs_btree_grow;
1061 
1062 	level++;
1063 	path[level].bp_op = nilfs_btree_do_insert;
1064 
1065 	/* a newly-created node block and a data block are added */
1066 	stats->bs_nblocks += 2;
1067 
1068 	/* success */
1069  out:
1070 	*levelp = level;
1071 	return ret;
1072 
1073 	/* error */
1074  err_out_curr_node:
1075 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1076 				   dat);
1077  err_out_child_node:
1078 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1079 		nilfs_btnode_delete(path[level].bp_sib_bh);
1080 		nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1081 					   &path[level].bp_newreq, dat);
1082 
1083 	}
1084 
1085 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1086 				   dat);
1087  err_out_data:
1088 	*levelp = level;
1089 	stats->bs_nblocks = 0;
1090 	return ret;
1091 }
1092 
1093 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1094 				      struct nilfs_btree_path *path,
1095 				      int maxlevel, __u64 key, __u64 ptr)
1096 {
1097 	struct inode *dat = NULL;
1098 	int level;
1099 
1100 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1101 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1102 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1103 		nilfs_btree_set_target_v(btree, key, ptr);
1104 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1105 	}
1106 
1107 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1108 		nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1109 					    &path[level - 1].bp_newreq, dat);
1110 		path[level].bp_op(btree, path, level, &key, &ptr);
1111 	}
1112 
1113 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1114 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1115 }
1116 
1117 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1118 {
1119 	struct nilfs_btree *btree;
1120 	struct nilfs_btree_path *path;
1121 	struct nilfs_bmap_stats stats;
1122 	int level, ret;
1123 
1124 	btree = (struct nilfs_btree *)bmap;
1125 	path = nilfs_btree_alloc_path();
1126 	if (path == NULL)
1127 		return -ENOMEM;
1128 	nilfs_btree_init_path(path);
1129 
1130 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1131 				    NILFS_BTREE_LEVEL_NODE_MIN);
1132 	if (ret != -ENOENT) {
1133 		if (ret == 0)
1134 			ret = -EEXIST;
1135 		goto out;
1136 	}
1137 
1138 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1139 	if (ret < 0)
1140 		goto out;
1141 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1142 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1143 
1144  out:
1145 	nilfs_btree_release_path(path);
1146 	nilfs_btree_free_path(path);
1147 	return ret;
1148 }
1149 
1150 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1151 				  struct nilfs_btree_path *path,
1152 				  int level, __u64 *keyp, __u64 *ptrp)
1153 {
1154 	struct nilfs_btree_node *node;
1155 
1156 	if (level < nilfs_btree_height(btree) - 1) {
1157 		lock_buffer(path[level].bp_bh);
1158 		node = nilfs_btree_get_nonroot_node(path, level);
1159 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160 					path[level].bp_index);
1161 		if (!buffer_dirty(path[level].bp_bh))
1162 			nilfs_btnode_mark_dirty(path[level].bp_bh);
1163 		unlock_buffer(path[level].bp_bh);
1164 		if (path[level].bp_index == 0)
1165 			nilfs_btree_promote_key(btree, path, level + 1,
1166 				nilfs_btree_node_get_key(node, 0));
1167 	} else {
1168 		node = nilfs_btree_get_root(btree);
1169 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1170 					path[level].bp_index);
1171 	}
1172 }
1173 
1174 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1175 				    struct nilfs_btree_path *path,
1176 				    int level, __u64 *keyp, __u64 *ptrp)
1177 {
1178 	struct nilfs_btree_node *node, *left;
1179 	int nchildren, lnchildren, n;
1180 
1181 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1182 
1183 	lock_buffer(path[level].bp_bh);
1184 	lock_buffer(path[level].bp_sib_bh);
1185 
1186 	node = nilfs_btree_get_nonroot_node(path, level);
1187 	left = nilfs_btree_get_sib_node(path, level);
1188 	nchildren = nilfs_btree_node_get_nchildren(node);
1189 	lnchildren = nilfs_btree_node_get_nchildren(left);
1190 
1191 	n = (nchildren + lnchildren) / 2 - nchildren;
1192 
1193 	nilfs_btree_node_move_right(btree, left, node, n);
1194 
1195 	if (!buffer_dirty(path[level].bp_bh))
1196 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1197 	if (!buffer_dirty(path[level].bp_sib_bh))
1198 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1199 
1200 	unlock_buffer(path[level].bp_bh);
1201 	unlock_buffer(path[level].bp_sib_bh);
1202 
1203 	nilfs_btree_promote_key(btree, path, level + 1,
1204 				nilfs_btree_node_get_key(node, 0));
1205 
1206 	brelse(path[level].bp_sib_bh);
1207 	path[level].bp_sib_bh = NULL;
1208 	path[level].bp_index += n;
1209 }
1210 
1211 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1212 				     struct nilfs_btree_path *path,
1213 				     int level, __u64 *keyp, __u64 *ptrp)
1214 {
1215 	struct nilfs_btree_node *node, *right;
1216 	int nchildren, rnchildren, n;
1217 
1218 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219 
1220 	lock_buffer(path[level].bp_bh);
1221 	lock_buffer(path[level].bp_sib_bh);
1222 
1223 	node = nilfs_btree_get_nonroot_node(path, level);
1224 	right = nilfs_btree_get_sib_node(path, level);
1225 	nchildren = nilfs_btree_node_get_nchildren(node);
1226 	rnchildren = nilfs_btree_node_get_nchildren(right);
1227 
1228 	n = (nchildren + rnchildren) / 2 - nchildren;
1229 
1230 	nilfs_btree_node_move_left(btree, node, right, n);
1231 
1232 	if (!buffer_dirty(path[level].bp_bh))
1233 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1234 	if (!buffer_dirty(path[level].bp_sib_bh))
1235 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1236 
1237 	unlock_buffer(path[level].bp_bh);
1238 	unlock_buffer(path[level].bp_sib_bh);
1239 
1240 	path[level + 1].bp_index++;
1241 	nilfs_btree_promote_key(btree, path, level + 1,
1242 				nilfs_btree_node_get_key(right, 0));
1243 	path[level + 1].bp_index--;
1244 
1245 	brelse(path[level].bp_sib_bh);
1246 	path[level].bp_sib_bh = NULL;
1247 }
1248 
1249 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1250 				    struct nilfs_btree_path *path,
1251 				    int level, __u64 *keyp, __u64 *ptrp)
1252 {
1253 	struct nilfs_btree_node *node, *left;
1254 	int n;
1255 
1256 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1257 
1258 	lock_buffer(path[level].bp_bh);
1259 	lock_buffer(path[level].bp_sib_bh);
1260 
1261 	node = nilfs_btree_get_nonroot_node(path, level);
1262 	left = nilfs_btree_get_sib_node(path, level);
1263 
1264 	n = nilfs_btree_node_get_nchildren(node);
1265 
1266 	nilfs_btree_node_move_left(btree, left, node, n);
1267 
1268 	if (!buffer_dirty(path[level].bp_sib_bh))
1269 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1270 
1271 	unlock_buffer(path[level].bp_bh);
1272 	unlock_buffer(path[level].bp_sib_bh);
1273 
1274 	nilfs_btnode_delete(path[level].bp_bh);
1275 	path[level].bp_bh = path[level].bp_sib_bh;
1276 	path[level].bp_sib_bh = NULL;
1277 	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1278 }
1279 
1280 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1281 				     struct nilfs_btree_path *path,
1282 				     int level, __u64 *keyp, __u64 *ptrp)
1283 {
1284 	struct nilfs_btree_node *node, *right;
1285 	int n;
1286 
1287 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1288 
1289 	lock_buffer(path[level].bp_bh);
1290 	lock_buffer(path[level].bp_sib_bh);
1291 
1292 	node = nilfs_btree_get_nonroot_node(path, level);
1293 	right = nilfs_btree_get_sib_node(path, level);
1294 
1295 	n = nilfs_btree_node_get_nchildren(right);
1296 
1297 	nilfs_btree_node_move_left(btree, node, right, n);
1298 
1299 	if (!buffer_dirty(path[level].bp_bh))
1300 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1301 
1302 	unlock_buffer(path[level].bp_bh);
1303 	unlock_buffer(path[level].bp_sib_bh);
1304 
1305 	nilfs_btnode_delete(path[level].bp_sib_bh);
1306 	path[level].bp_sib_bh = NULL;
1307 	path[level + 1].bp_index++;
1308 }
1309 
1310 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1311 			       struct nilfs_btree_path *path,
1312 			       int level, __u64 *keyp, __u64 *ptrp)
1313 {
1314 	struct nilfs_btree_node *root, *child;
1315 	int n;
1316 
1317 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1318 
1319 	lock_buffer(path[level].bp_bh);
1320 	root = nilfs_btree_get_root(btree);
1321 	child = nilfs_btree_get_nonroot_node(path, level);
1322 
1323 	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1324 	nilfs_btree_node_set_level(root, level);
1325 	n = nilfs_btree_node_get_nchildren(child);
1326 	nilfs_btree_node_move_left(btree, root, child, n);
1327 	unlock_buffer(path[level].bp_bh);
1328 
1329 	nilfs_btnode_delete(path[level].bp_bh);
1330 	path[level].bp_bh = NULL;
1331 }
1332 
1333 
1334 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1335 				      struct nilfs_btree_path *path,
1336 				      int *levelp,
1337 				      struct nilfs_bmap_stats *stats,
1338 				      struct inode *dat)
1339 {
1340 	struct buffer_head *bh;
1341 	struct nilfs_btree_node *node, *parent, *sib;
1342 	__u64 sibptr;
1343 	int pindex, level, ret;
1344 
1345 	ret = 0;
1346 	stats->bs_nblocks = 0;
1347 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1348 	     level < nilfs_btree_height(btree) - 1;
1349 	     level++) {
1350 		node = nilfs_btree_get_nonroot_node(path, level);
1351 		path[level].bp_oldreq.bpr_ptr =
1352 			nilfs_btree_node_get_ptr(btree, node,
1353 						 path[level].bp_index);
1354 		ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1355 						 &path[level].bp_oldreq, dat);
1356 		if (ret < 0)
1357 			goto err_out_child_node;
1358 
1359 		if (nilfs_btree_node_get_nchildren(node) >
1360 		    nilfs_btree_node_nchildren_min(node, btree)) {
1361 			path[level].bp_op = nilfs_btree_do_delete;
1362 			stats->bs_nblocks++;
1363 			goto out;
1364 		}
1365 
1366 		parent = nilfs_btree_get_node(btree, path, level + 1);
1367 		pindex = path[level + 1].bp_index;
1368 
1369 		if (pindex > 0) {
1370 			/* left sibling */
1371 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1372 							  pindex - 1);
1373 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1374 			if (ret < 0)
1375 				goto err_out_curr_node;
1376 			sib = (struct nilfs_btree_node *)bh->b_data;
1377 			if (nilfs_btree_node_get_nchildren(sib) >
1378 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1379 				path[level].bp_sib_bh = bh;
1380 				path[level].bp_op = nilfs_btree_borrow_left;
1381 				stats->bs_nblocks++;
1382 				goto out;
1383 			} else {
1384 				path[level].bp_sib_bh = bh;
1385 				path[level].bp_op = nilfs_btree_concat_left;
1386 				stats->bs_nblocks++;
1387 				/* continue; */
1388 			}
1389 		} else if (pindex <
1390 			   nilfs_btree_node_get_nchildren(parent) - 1) {
1391 			/* right sibling */
1392 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1393 							  pindex + 1);
1394 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1395 			if (ret < 0)
1396 				goto err_out_curr_node;
1397 			sib = (struct nilfs_btree_node *)bh->b_data;
1398 			if (nilfs_btree_node_get_nchildren(sib) >
1399 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1400 				path[level].bp_sib_bh = bh;
1401 				path[level].bp_op = nilfs_btree_borrow_right;
1402 				stats->bs_nblocks++;
1403 				goto out;
1404 			} else {
1405 				path[level].bp_sib_bh = bh;
1406 				path[level].bp_op = nilfs_btree_concat_right;
1407 				stats->bs_nblocks++;
1408 				/* continue; */
1409 			}
1410 		} else {
1411 			/* no siblings */
1412 			/* the only child of the root node */
1413 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1414 			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1415 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1416 				path[level].bp_op = nilfs_btree_shrink;
1417 				stats->bs_nblocks += 2;
1418 			} else {
1419 				path[level].bp_op = nilfs_btree_do_delete;
1420 				stats->bs_nblocks++;
1421 			}
1422 
1423 			goto out;
1424 
1425 		}
1426 	}
1427 
1428 	node = nilfs_btree_get_root(btree);
1429 	path[level].bp_oldreq.bpr_ptr =
1430 		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1431 
1432 	ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1433 					 &path[level].bp_oldreq, dat);
1434 	if (ret < 0)
1435 		goto err_out_child_node;
1436 
1437 	/* child of the root node is deleted */
1438 	path[level].bp_op = nilfs_btree_do_delete;
1439 	stats->bs_nblocks++;
1440 
1441 	/* success */
1442  out:
1443 	*levelp = level;
1444 	return ret;
1445 
1446 	/* error */
1447  err_out_curr_node:
1448 	nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1449  err_out_child_node:
1450 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1451 		brelse(path[level].bp_sib_bh);
1452 		nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1453 					 &path[level].bp_oldreq, dat);
1454 	}
1455 	*levelp = level;
1456 	stats->bs_nblocks = 0;
1457 	return ret;
1458 }
1459 
1460 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1461 				      struct nilfs_btree_path *path,
1462 				      int maxlevel, struct inode *dat)
1463 {
1464 	int level;
1465 
1466 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1467 		nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1468 					  &path[level].bp_oldreq, dat);
1469 		path[level].bp_op(btree, path, level, NULL, NULL);
1470 	}
1471 
1472 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1473 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1474 }
1475 
1476 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1477 
1478 {
1479 	struct nilfs_btree *btree;
1480 	struct nilfs_btree_path *path;
1481 	struct nilfs_bmap_stats stats;
1482 	struct inode *dat;
1483 	int level, ret;
1484 
1485 	btree = (struct nilfs_btree *)bmap;
1486 	path = nilfs_btree_alloc_path();
1487 	if (path == NULL)
1488 		return -ENOMEM;
1489 	nilfs_btree_init_path(path);
1490 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1491 				    NILFS_BTREE_LEVEL_NODE_MIN);
1492 	if (ret < 0)
1493 		goto out;
1494 
1495 
1496 	dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1497 		nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1498 
1499 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1500 	if (ret < 0)
1501 		goto out;
1502 	nilfs_btree_commit_delete(btree, path, level, dat);
1503 	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1504 
1505 out:
1506 	nilfs_btree_release_path(path);
1507 	nilfs_btree_free_path(path);
1508 	return ret;
1509 }
1510 
1511 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1512 {
1513 	struct nilfs_btree *btree;
1514 	struct nilfs_btree_path *path;
1515 	int ret;
1516 
1517 	btree = (struct nilfs_btree *)bmap;
1518 	path = nilfs_btree_alloc_path();
1519 	if (path == NULL)
1520 		return -ENOMEM;
1521 	nilfs_btree_init_path(path);
1522 
1523 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1524 
1525 	nilfs_btree_release_path(path);
1526 	nilfs_btree_free_path(path);
1527 
1528 	return ret;
1529 }
1530 
1531 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1532 {
1533 	struct buffer_head *bh;
1534 	struct nilfs_btree *btree;
1535 	struct nilfs_btree_node *root, *node;
1536 	__u64 maxkey, nextmaxkey;
1537 	__u64 ptr;
1538 	int nchildren, ret;
1539 
1540 	btree = (struct nilfs_btree *)bmap;
1541 	root = nilfs_btree_get_root(btree);
1542 	switch (nilfs_btree_height(btree)) {
1543 	case 2:
1544 		bh = NULL;
1545 		node = root;
1546 		break;
1547 	case 3:
1548 		nchildren = nilfs_btree_node_get_nchildren(root);
1549 		if (nchildren > 1)
1550 			return 0;
1551 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1552 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1553 		if (ret < 0)
1554 			return ret;
1555 		node = (struct nilfs_btree_node *)bh->b_data;
1556 		break;
1557 	default:
1558 		return 0;
1559 	}
1560 
1561 	nchildren = nilfs_btree_node_get_nchildren(node);
1562 	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1563 	nextmaxkey = (nchildren > 1) ?
1564 		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1565 	if (bh != NULL)
1566 		brelse(bh);
1567 
1568 	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1569 }
1570 
1571 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1572 				   __u64 *keys, __u64 *ptrs, int nitems)
1573 {
1574 	struct buffer_head *bh;
1575 	struct nilfs_btree *btree;
1576 	struct nilfs_btree_node *node, *root;
1577 	__le64 *dkeys;
1578 	__le64 *dptrs;
1579 	__u64 ptr;
1580 	int nchildren, i, ret;
1581 
1582 	btree = (struct nilfs_btree *)bmap;
1583 	root = nilfs_btree_get_root(btree);
1584 	switch (nilfs_btree_height(btree)) {
1585 	case 2:
1586 		bh = NULL;
1587 		node = root;
1588 		break;
1589 	case 3:
1590 		nchildren = nilfs_btree_node_get_nchildren(root);
1591 		WARN_ON(nchildren > 1);
1592 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1593 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1594 		if (ret < 0)
1595 			return ret;
1596 		node = (struct nilfs_btree_node *)bh->b_data;
1597 		break;
1598 	default:
1599 		node = NULL;
1600 		return -EINVAL;
1601 	}
1602 
1603 	nchildren = nilfs_btree_node_get_nchildren(node);
1604 	if (nchildren < nitems)
1605 		nitems = nchildren;
1606 	dkeys = nilfs_btree_node_dkeys(node);
1607 	dptrs = nilfs_btree_node_dptrs(node, btree);
1608 	for (i = 0; i < nitems; i++) {
1609 		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1610 		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1611 	}
1612 
1613 	if (bh != NULL)
1614 		brelse(bh);
1615 
1616 	return nitems;
1617 }
1618 
1619 static int
1620 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1621 				       union nilfs_bmap_ptr_req *dreq,
1622 				       union nilfs_bmap_ptr_req *nreq,
1623 				       struct buffer_head **bhp,
1624 				       struct nilfs_bmap_stats *stats)
1625 {
1626 	struct buffer_head *bh;
1627 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1628 	struct inode *dat = NULL;
1629 	int ret;
1630 
1631 	stats->bs_nblocks = 0;
1632 
1633 	/* for data */
1634 	/* cannot find near ptr */
1635 	if (NILFS_BMAP_USE_VBN(bmap)) {
1636 		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1637 		dat = nilfs_bmap_get_dat(bmap);
1638 	}
1639 
1640 	ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1641 	if (ret < 0)
1642 		return ret;
1643 
1644 	*bhp = NULL;
1645 	stats->bs_nblocks++;
1646 	if (nreq != NULL) {
1647 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1648 		ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1649 		if (ret < 0)
1650 			goto err_out_dreq;
1651 
1652 		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1653 		if (ret < 0)
1654 			goto err_out_nreq;
1655 
1656 		*bhp = bh;
1657 		stats->bs_nblocks++;
1658 	}
1659 
1660 	/* success */
1661 	return 0;
1662 
1663 	/* error */
1664  err_out_nreq:
1665 	nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1666  err_out_dreq:
1667 	nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1668 	stats->bs_nblocks = 0;
1669 	return ret;
1670 
1671 }
1672 
1673 static void
1674 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1675 				      __u64 key, __u64 ptr,
1676 				      const __u64 *keys, const __u64 *ptrs,
1677 				      int n,
1678 				      union nilfs_bmap_ptr_req *dreq,
1679 				      union nilfs_bmap_ptr_req *nreq,
1680 				      struct buffer_head *bh)
1681 {
1682 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1683 	struct nilfs_btree_node *node;
1684 	struct inode *dat;
1685 	__u64 tmpptr;
1686 
1687 	/* free resources */
1688 	if (bmap->b_ops->bop_clear != NULL)
1689 		bmap->b_ops->bop_clear(bmap);
1690 
1691 	/* ptr must be a pointer to a buffer head. */
1692 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1693 
1694 	/* convert and insert */
1695 	dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1696 	nilfs_btree_init(bmap);
1697 	if (nreq != NULL) {
1698 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1699 		nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1700 
1701 		/* create child node at level 1 */
1702 		lock_buffer(bh);
1703 		node = (struct nilfs_btree_node *)bh->b_data;
1704 		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1705 		nilfs_btree_node_insert(btree, node,
1706 					key, dreq->bpr_ptr, n);
1707 		if (!buffer_dirty(bh))
1708 			nilfs_btnode_mark_dirty(bh);
1709 		if (!nilfs_bmap_dirty(bmap))
1710 			nilfs_bmap_set_dirty(bmap);
1711 
1712 		unlock_buffer(bh);
1713 		brelse(bh);
1714 
1715 		/* create root node at level 2 */
1716 		node = nilfs_btree_get_root(btree);
1717 		tmpptr = nreq->bpr_ptr;
1718 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1719 				      2, 1, &keys[0], &tmpptr);
1720 	} else {
1721 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1722 
1723 		/* create root node at level 1 */
1724 		node = nilfs_btree_get_root(btree);
1725 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1726 				      1, n, keys, ptrs);
1727 		nilfs_btree_node_insert(btree, node,
1728 					key, dreq->bpr_ptr, n);
1729 		if (!nilfs_bmap_dirty(bmap))
1730 			nilfs_bmap_set_dirty(bmap);
1731 	}
1732 
1733 	if (NILFS_BMAP_USE_VBN(bmap))
1734 		nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1735 }
1736 
1737 /**
1738  * nilfs_btree_convert_and_insert -
1739  * @bmap:
1740  * @key:
1741  * @ptr:
1742  * @keys:
1743  * @ptrs:
1744  * @n:
1745  */
1746 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1747 				   __u64 key, __u64 ptr,
1748 				   const __u64 *keys, const __u64 *ptrs, int n)
1749 {
1750 	struct buffer_head *bh;
1751 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1752 	struct nilfs_bmap_stats stats;
1753 	int ret;
1754 
1755 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1756 		di = &dreq;
1757 		ni = NULL;
1758 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1759 			   1 << bmap->b_inode->i_blkbits)) {
1760 		di = &dreq;
1761 		ni = &nreq;
1762 	} else {
1763 		di = NULL;
1764 		ni = NULL;
1765 		BUG();
1766 	}
1767 
1768 	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1769 						     &stats);
1770 	if (ret < 0)
1771 		return ret;
1772 	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1773 					      di, ni, bh);
1774 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1775 	return 0;
1776 }
1777 
1778 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1779 				   struct nilfs_btree_path *path,
1780 				   int level,
1781 				   struct buffer_head *bh)
1782 {
1783 	while ((++level < nilfs_btree_height(btree) - 1) &&
1784 	       !buffer_dirty(path[level].bp_bh))
1785 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1786 
1787 	return 0;
1788 }
1789 
1790 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1791 					struct nilfs_btree_path *path,
1792 					int level, struct inode *dat)
1793 {
1794 	struct nilfs_btree_node *parent;
1795 	int ret;
1796 
1797 	parent = nilfs_btree_get_node(btree, path, level + 1);
1798 	path[level].bp_oldreq.bpr_ptr =
1799 		nilfs_btree_node_get_ptr(btree, parent,
1800 					 path[level + 1].bp_index);
1801 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1802 	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1803 				       &path[level].bp_newreq.bpr_req);
1804 	if (ret < 0)
1805 		return ret;
1806 
1807 	if (buffer_nilfs_node(path[level].bp_bh)) {
1808 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1809 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1810 		path[level].bp_ctxt.bh = path[level].bp_bh;
1811 		ret = nilfs_btnode_prepare_change_key(
1812 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1813 			&path[level].bp_ctxt);
1814 		if (ret < 0) {
1815 			nilfs_dat_abort_update(dat,
1816 					       &path[level].bp_oldreq.bpr_req,
1817 					       &path[level].bp_newreq.bpr_req);
1818 			return ret;
1819 		}
1820 	}
1821 
1822 	return 0;
1823 }
1824 
1825 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1826 					struct nilfs_btree_path *path,
1827 					int level, struct inode *dat)
1828 {
1829 	struct nilfs_btree_node *parent;
1830 
1831 	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1832 				&path[level].bp_newreq.bpr_req,
1833 				btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1834 
1835 	if (buffer_nilfs_node(path[level].bp_bh)) {
1836 		nilfs_btnode_commit_change_key(
1837 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1838 			&path[level].bp_ctxt);
1839 		path[level].bp_bh = path[level].bp_ctxt.bh;
1840 	}
1841 	set_buffer_nilfs_volatile(path[level].bp_bh);
1842 
1843 	parent = nilfs_btree_get_node(btree, path, level + 1);
1844 	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1845 				 path[level].bp_newreq.bpr_ptr);
1846 }
1847 
1848 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1849 				       struct nilfs_btree_path *path,
1850 				       int level, struct inode *dat)
1851 {
1852 	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1853 			       &path[level].bp_newreq.bpr_req);
1854 	if (buffer_nilfs_node(path[level].bp_bh))
1855 		nilfs_btnode_abort_change_key(
1856 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1857 			&path[level].bp_ctxt);
1858 }
1859 
1860 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1861 					   struct nilfs_btree_path *path,
1862 					   int minlevel, int *maxlevelp,
1863 					   struct inode *dat)
1864 {
1865 	int level, ret;
1866 
1867 	level = minlevel;
1868 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1869 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1870 		if (ret < 0)
1871 			return ret;
1872 	}
1873 	while ((++level < nilfs_btree_height(btree) - 1) &&
1874 	       !buffer_dirty(path[level].bp_bh)) {
1875 
1876 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1877 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1878 		if (ret < 0)
1879 			goto out;
1880 	}
1881 
1882 	/* success */
1883 	*maxlevelp = level - 1;
1884 	return 0;
1885 
1886 	/* error */
1887  out:
1888 	while (--level > minlevel)
1889 		nilfs_btree_abort_update_v(btree, path, level, dat);
1890 	if (!buffer_nilfs_volatile(path[level].bp_bh))
1891 		nilfs_btree_abort_update_v(btree, path, level, dat);
1892 	return ret;
1893 }
1894 
1895 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1896 					   struct nilfs_btree_path *path,
1897 					   int minlevel, int maxlevel,
1898 					   struct buffer_head *bh,
1899 					   struct inode *dat)
1900 {
1901 	int level;
1902 
1903 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1904 		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1905 
1906 	for (level = minlevel + 1; level <= maxlevel; level++)
1907 		nilfs_btree_commit_update_v(btree, path, level, dat);
1908 }
1909 
1910 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1911 				   struct nilfs_btree_path *path,
1912 				   int level, struct buffer_head *bh)
1913 {
1914 	int maxlevel, ret;
1915 	struct nilfs_btree_node *parent;
1916 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1917 	__u64 ptr;
1918 
1919 	get_bh(bh);
1920 	path[level].bp_bh = bh;
1921 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1922 					      dat);
1923 	if (ret < 0)
1924 		goto out;
1925 
1926 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1927 		parent = nilfs_btree_get_node(btree, path, level + 1);
1928 		ptr = nilfs_btree_node_get_ptr(btree, parent,
1929 					       path[level + 1].bp_index);
1930 		ret = nilfs_dat_mark_dirty(dat, ptr);
1931 		if (ret < 0)
1932 			goto out;
1933 	}
1934 
1935 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1936 
1937  out:
1938 	brelse(path[level].bp_bh);
1939 	path[level].bp_bh = NULL;
1940 	return ret;
1941 }
1942 
1943 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1944 				 struct buffer_head *bh)
1945 {
1946 	struct nilfs_btree *btree;
1947 	struct nilfs_btree_path *path;
1948 	struct nilfs_btree_node *node;
1949 	__u64 key;
1950 	int level, ret;
1951 
1952 	WARN_ON(!buffer_dirty(bh));
1953 
1954 	btree = (struct nilfs_btree *)bmap;
1955 	path = nilfs_btree_alloc_path();
1956 	if (path == NULL)
1957 		return -ENOMEM;
1958 	nilfs_btree_init_path(path);
1959 
1960 	if (buffer_nilfs_node(bh)) {
1961 		node = (struct nilfs_btree_node *)bh->b_data;
1962 		key = nilfs_btree_node_get_key(node, 0);
1963 		level = nilfs_btree_node_get_level(node);
1964 	} else {
1965 		key = nilfs_bmap_data_get_key(bmap, bh);
1966 		level = NILFS_BTREE_LEVEL_DATA;
1967 	}
1968 
1969 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1970 	if (ret < 0) {
1971 		if (unlikely(ret == -ENOENT))
1972 			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1973 			       __func__, (unsigned long long)key, level);
1974 		goto out;
1975 	}
1976 
1977 	ret = NILFS_BMAP_USE_VBN(bmap) ?
1978 		nilfs_btree_propagate_v(btree, path, level, bh) :
1979 		nilfs_btree_propagate_p(btree, path, level, bh);
1980 
1981  out:
1982 	nilfs_btree_release_path(path);
1983 	nilfs_btree_free_path(path);
1984 
1985 	return ret;
1986 }
1987 
1988 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1989 				    struct buffer_head *bh)
1990 {
1991 	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1992 }
1993 
1994 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1995 					 struct list_head *lists,
1996 					 struct buffer_head *bh)
1997 {
1998 	struct list_head *head;
1999 	struct buffer_head *cbh;
2000 	struct nilfs_btree_node *node, *cnode;
2001 	__u64 key, ckey;
2002 	int level;
2003 
2004 	get_bh(bh);
2005 	node = (struct nilfs_btree_node *)bh->b_data;
2006 	key = nilfs_btree_node_get_key(node, 0);
2007 	level = nilfs_btree_node_get_level(node);
2008 	list_for_each(head, &lists[level]) {
2009 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2010 		cnode = (struct nilfs_btree_node *)cbh->b_data;
2011 		ckey = nilfs_btree_node_get_key(cnode, 0);
2012 		if (key < ckey)
2013 			break;
2014 	}
2015 	list_add_tail(&bh->b_assoc_buffers, head);
2016 }
2017 
2018 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2019 					     struct list_head *listp)
2020 {
2021 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
2022 	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
2023 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2024 	struct pagevec pvec;
2025 	struct buffer_head *bh, *head;
2026 	pgoff_t index = 0;
2027 	int level, i;
2028 
2029 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2030 	     level < NILFS_BTREE_LEVEL_MAX;
2031 	     level++)
2032 		INIT_LIST_HEAD(&lists[level]);
2033 
2034 	pagevec_init(&pvec, 0);
2035 
2036 	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2037 				  PAGEVEC_SIZE)) {
2038 		for (i = 0; i < pagevec_count(&pvec); i++) {
2039 			bh = head = page_buffers(pvec.pages[i]);
2040 			do {
2041 				if (buffer_dirty(bh))
2042 					nilfs_btree_add_dirty_buffer(btree,
2043 								     lists, bh);
2044 			} while ((bh = bh->b_this_page) != head);
2045 		}
2046 		pagevec_release(&pvec);
2047 		cond_resched();
2048 	}
2049 
2050 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2051 	     level < NILFS_BTREE_LEVEL_MAX;
2052 	     level++)
2053 		list_splice(&lists[level], listp->prev);
2054 }
2055 
2056 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2057 				struct nilfs_btree_path *path,
2058 				int level,
2059 				struct buffer_head **bh,
2060 				sector_t blocknr,
2061 				union nilfs_binfo *binfo)
2062 {
2063 	struct nilfs_btree_node *parent;
2064 	__u64 key;
2065 	__u64 ptr;
2066 	int ret;
2067 
2068 	parent = nilfs_btree_get_node(btree, path, level + 1);
2069 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2070 				       path[level + 1].bp_index);
2071 	if (buffer_nilfs_node(*bh)) {
2072 		path[level].bp_ctxt.oldkey = ptr;
2073 		path[level].bp_ctxt.newkey = blocknr;
2074 		path[level].bp_ctxt.bh = *bh;
2075 		ret = nilfs_btnode_prepare_change_key(
2076 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2077 			&path[level].bp_ctxt);
2078 		if (ret < 0)
2079 			return ret;
2080 		nilfs_btnode_commit_change_key(
2081 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2082 			&path[level].bp_ctxt);
2083 		*bh = path[level].bp_ctxt.bh;
2084 	}
2085 
2086 	nilfs_btree_node_set_ptr(btree, parent,
2087 				 path[level + 1].bp_index, blocknr);
2088 
2089 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2090 	/* on-disk format */
2091 	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2092 	binfo->bi_dat.bi_level = level;
2093 
2094 	return 0;
2095 }
2096 
2097 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2098 				struct nilfs_btree_path *path,
2099 				int level,
2100 				struct buffer_head **bh,
2101 				sector_t blocknr,
2102 				union nilfs_binfo *binfo)
2103 {
2104 	struct nilfs_btree_node *parent;
2105 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2106 	__u64 key;
2107 	__u64 ptr;
2108 	union nilfs_bmap_ptr_req req;
2109 	int ret;
2110 
2111 	parent = nilfs_btree_get_node(btree, path, level + 1);
2112 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2113 				       path[level + 1].bp_index);
2114 	req.bpr_ptr = ptr;
2115 	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2116 	if (ret < 0)
2117 		return ret;
2118 	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2119 
2120 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2121 	/* on-disk format */
2122 	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2123 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2124 
2125 	return 0;
2126 }
2127 
2128 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2129 			      struct buffer_head **bh,
2130 			      sector_t blocknr,
2131 			      union nilfs_binfo *binfo)
2132 {
2133 	struct nilfs_btree *btree;
2134 	struct nilfs_btree_path *path;
2135 	struct nilfs_btree_node *node;
2136 	__u64 key;
2137 	int level, ret;
2138 
2139 	btree = (struct nilfs_btree *)bmap;
2140 	path = nilfs_btree_alloc_path();
2141 	if (path == NULL)
2142 		return -ENOMEM;
2143 	nilfs_btree_init_path(path);
2144 
2145 	if (buffer_nilfs_node(*bh)) {
2146 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2147 		key = nilfs_btree_node_get_key(node, 0);
2148 		level = nilfs_btree_node_get_level(node);
2149 	} else {
2150 		key = nilfs_bmap_data_get_key(bmap, *bh);
2151 		level = NILFS_BTREE_LEVEL_DATA;
2152 	}
2153 
2154 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2155 	if (ret < 0) {
2156 		WARN_ON(ret == -ENOENT);
2157 		goto out;
2158 	}
2159 
2160 	ret = NILFS_BMAP_USE_VBN(bmap) ?
2161 		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2162 		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2163 
2164  out:
2165 	nilfs_btree_release_path(path);
2166 	nilfs_btree_free_path(path);
2167 
2168 	return ret;
2169 }
2170 
2171 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2172 				 struct buffer_head **bh,
2173 				 sector_t blocknr,
2174 				 union nilfs_binfo *binfo)
2175 {
2176 	struct nilfs_btree_node *node;
2177 	__u64 key;
2178 	int ret;
2179 
2180 	ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2181 			     blocknr);
2182 	if (ret < 0)
2183 		return ret;
2184 
2185 	if (buffer_nilfs_node(*bh)) {
2186 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2187 		key = nilfs_btree_node_get_key(node, 0);
2188 	} else
2189 		key = nilfs_bmap_data_get_key(bmap, *bh);
2190 
2191 	/* on-disk format */
2192 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2193 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2194 
2195 	return 0;
2196 }
2197 
2198 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2199 {
2200 	struct buffer_head *bh;
2201 	struct nilfs_btree *btree;
2202 	struct nilfs_btree_path *path;
2203 	__u64 ptr;
2204 	int ret;
2205 
2206 	btree = (struct nilfs_btree *)bmap;
2207 	path = nilfs_btree_alloc_path();
2208 	if (path == NULL)
2209 		return -ENOMEM;
2210 	nilfs_btree_init_path(path);
2211 
2212 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2213 	if (ret < 0) {
2214 		WARN_ON(ret == -ENOENT);
2215 		goto out;
2216 	}
2217 	ret = nilfs_btree_get_block(btree, ptr, &bh);
2218 	if (ret < 0) {
2219 		WARN_ON(ret == -ENOENT);
2220 		goto out;
2221 	}
2222 
2223 	if (!buffer_dirty(bh))
2224 		nilfs_btnode_mark_dirty(bh);
2225 	brelse(bh);
2226 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2227 		nilfs_bmap_set_dirty(&btree->bt_bmap);
2228 
2229  out:
2230 	nilfs_btree_release_path(path);
2231 	nilfs_btree_free_path(path);
2232 	return ret;
2233 }
2234 
2235 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2236 	.bop_lookup		=	nilfs_btree_lookup,
2237 	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2238 	.bop_insert		=	nilfs_btree_insert,
2239 	.bop_delete		=	nilfs_btree_delete,
2240 	.bop_clear		=	NULL,
2241 
2242 	.bop_propagate		=	nilfs_btree_propagate,
2243 
2244 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2245 
2246 	.bop_assign		=	nilfs_btree_assign,
2247 	.bop_mark		=	nilfs_btree_mark,
2248 
2249 	.bop_last_key		=	nilfs_btree_last_key,
2250 	.bop_check_insert	=	NULL,
2251 	.bop_check_delete	=	nilfs_btree_check_delete,
2252 	.bop_gather_data	=	nilfs_btree_gather_data,
2253 };
2254 
2255 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2256 	.bop_lookup		=	NULL,
2257 	.bop_lookup_contig	=	NULL,
2258 	.bop_insert		=	NULL,
2259 	.bop_delete		=	NULL,
2260 	.bop_clear		=	NULL,
2261 
2262 	.bop_propagate		=	nilfs_btree_propagate_gc,
2263 
2264 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2265 
2266 	.bop_assign		=	nilfs_btree_assign_gc,
2267 	.bop_mark		=	NULL,
2268 
2269 	.bop_last_key		=	NULL,
2270 	.bop_check_insert	=	NULL,
2271 	.bop_check_delete	=	NULL,
2272 	.bop_gather_data	=	NULL,
2273 };
2274 
2275 int nilfs_btree_init(struct nilfs_bmap *bmap)
2276 {
2277 	bmap->b_ops = &nilfs_btree_ops;
2278 	return 0;
2279 }
2280 
2281 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2282 {
2283 	bmap->b_ops = &nilfs_btree_ops_gc;
2284 }
2285