xref: /openbmc/linux/fs/nilfs2/btree.c (revision 1fa6ac37)
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33 
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36 	struct nilfs_btree_path *path;
37 	int level = NILFS_BTREE_LEVEL_DATA;
38 
39 	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40 	if (path == NULL)
41 		goto out;
42 
43 	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44 		path[level].bp_bh = NULL;
45 		path[level].bp_sib_bh = NULL;
46 		path[level].bp_index = 0;
47 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49 		path[level].bp_op = NULL;
50 	}
51 
52 out:
53 	return path;
54 }
55 
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58 	int level = NILFS_BTREE_LEVEL_DATA;
59 
60 	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61 		brelse(path[level].bp_bh);
62 
63 	kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65 
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
70 				 struct buffer_head **bhp)
71 {
72 	struct address_space *btnc =
73 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
74 	int err;
75 
76 	err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
77 	if (err)
78 		return err == -EEXIST ? 0 : err;
79 
80 	wait_on_buffer(*bhp);
81 	if (!buffer_uptodate(*bhp)) {
82 		brelse(*bhp);
83 		return -EIO;
84 	}
85 	return 0;
86 }
87 
88 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
89 				     __u64 ptr, struct buffer_head **bhp)
90 {
91 	struct address_space *btnc =
92 		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
93 	struct buffer_head *bh;
94 
95 	bh = nilfs_btnode_create_block(btnc, ptr);
96 	if (!bh)
97 		return -ENOMEM;
98 
99 	set_buffer_nilfs_volatile(bh);
100 	*bhp = bh;
101 	return 0;
102 }
103 
104 static inline int
105 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
106 {
107 	return node->bn_flags;
108 }
109 
110 static inline void
111 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
112 {
113 	node->bn_flags = flags;
114 }
115 
116 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
117 {
118 	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
119 }
120 
121 static inline int
122 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
123 {
124 	return node->bn_level;
125 }
126 
127 static inline void
128 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
129 {
130 	node->bn_level = level;
131 }
132 
133 static inline int
134 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
135 {
136 	return le16_to_cpu(node->bn_nchildren);
137 }
138 
139 static inline void
140 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
141 {
142 	node->bn_nchildren = cpu_to_le16(nchildren);
143 }
144 
145 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
146 {
147 	return 1 << btree->bt_bmap.b_inode->i_blkbits;
148 }
149 
150 static inline int
151 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
152 			       const struct nilfs_btree *btree)
153 {
154 	return nilfs_btree_node_root(node) ?
155 		NILFS_BTREE_ROOT_NCHILDREN_MIN :
156 		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
157 }
158 
159 static inline int
160 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
161 			       const struct nilfs_btree *btree)
162 {
163 	return nilfs_btree_node_root(node) ?
164 		NILFS_BTREE_ROOT_NCHILDREN_MAX :
165 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
166 }
167 
168 static inline __le64 *
169 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
170 {
171 	return (__le64 *)((char *)(node + 1) +
172 			  (nilfs_btree_node_root(node) ?
173 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
174 }
175 
176 static inline __le64 *
177 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
178 		       const struct nilfs_btree *btree)
179 {
180 	return (__le64 *)(nilfs_btree_node_dkeys(node) +
181 			  nilfs_btree_node_nchildren_max(node, btree));
182 }
183 
184 static inline __u64
185 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
186 {
187 	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
188 }
189 
190 static inline void
191 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
192 {
193 	*(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
194 }
195 
196 static inline __u64
197 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
198 			 const struct nilfs_btree_node *node, int index)
199 {
200 	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
201 					index));
202 }
203 
204 static inline void
205 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
206 			 struct nilfs_btree_node *node, int index, __u64 ptr)
207 {
208 	*(nilfs_btree_node_dptrs(node, btree) + index) =
209 		nilfs_bmap_ptr_to_dptr(ptr);
210 }
211 
212 static void nilfs_btree_node_init(struct nilfs_btree *btree,
213 				  struct nilfs_btree_node *node,
214 				  int flags, int level, int nchildren,
215 				  const __u64 *keys, const __u64 *ptrs)
216 {
217 	__le64 *dkeys;
218 	__le64 *dptrs;
219 	int i;
220 
221 	nilfs_btree_node_set_flags(node, flags);
222 	nilfs_btree_node_set_level(node, level);
223 	nilfs_btree_node_set_nchildren(node, nchildren);
224 
225 	dkeys = nilfs_btree_node_dkeys(node);
226 	dptrs = nilfs_btree_node_dptrs(node, btree);
227 	for (i = 0; i < nchildren; i++) {
228 		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
229 		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
230 	}
231 }
232 
233 /* Assume the buffer heads corresponding to left and right are locked. */
234 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
235 				       struct nilfs_btree_node *left,
236 				       struct nilfs_btree_node *right,
237 				       int n)
238 {
239 	__le64 *ldkeys, *rdkeys;
240 	__le64 *ldptrs, *rdptrs;
241 	int lnchildren, rnchildren;
242 
243 	ldkeys = nilfs_btree_node_dkeys(left);
244 	ldptrs = nilfs_btree_node_dptrs(left, btree);
245 	lnchildren = nilfs_btree_node_get_nchildren(left);
246 
247 	rdkeys = nilfs_btree_node_dkeys(right);
248 	rdptrs = nilfs_btree_node_dptrs(right, btree);
249 	rnchildren = nilfs_btree_node_get_nchildren(right);
250 
251 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
252 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
253 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
254 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
255 
256 	lnchildren += n;
257 	rnchildren -= n;
258 	nilfs_btree_node_set_nchildren(left, lnchildren);
259 	nilfs_btree_node_set_nchildren(right, rnchildren);
260 }
261 
262 /* Assume that the buffer heads corresponding to left and right are locked. */
263 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
264 					struct nilfs_btree_node *left,
265 					struct nilfs_btree_node *right,
266 					int n)
267 {
268 	__le64 *ldkeys, *rdkeys;
269 	__le64 *ldptrs, *rdptrs;
270 	int lnchildren, rnchildren;
271 
272 	ldkeys = nilfs_btree_node_dkeys(left);
273 	ldptrs = nilfs_btree_node_dptrs(left, btree);
274 	lnchildren = nilfs_btree_node_get_nchildren(left);
275 
276 	rdkeys = nilfs_btree_node_dkeys(right);
277 	rdptrs = nilfs_btree_node_dptrs(right, btree);
278 	rnchildren = nilfs_btree_node_get_nchildren(right);
279 
280 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
281 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
282 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
283 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
284 
285 	lnchildren -= n;
286 	rnchildren += n;
287 	nilfs_btree_node_set_nchildren(left, lnchildren);
288 	nilfs_btree_node_set_nchildren(right, rnchildren);
289 }
290 
291 /* Assume that the buffer head corresponding to node is locked. */
292 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
293 				    struct nilfs_btree_node *node,
294 				    __u64 key, __u64 ptr, int index)
295 {
296 	__le64 *dkeys;
297 	__le64 *dptrs;
298 	int nchildren;
299 
300 	dkeys = nilfs_btree_node_dkeys(node);
301 	dptrs = nilfs_btree_node_dptrs(node, btree);
302 	nchildren = nilfs_btree_node_get_nchildren(node);
303 	if (index < nchildren) {
304 		memmove(dkeys + index + 1, dkeys + index,
305 			(nchildren - index) * sizeof(*dkeys));
306 		memmove(dptrs + index + 1, dptrs + index,
307 			(nchildren - index) * sizeof(*dptrs));
308 	}
309 	dkeys[index] = nilfs_bmap_key_to_dkey(key);
310 	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
311 	nchildren++;
312 	nilfs_btree_node_set_nchildren(node, nchildren);
313 }
314 
315 /* Assume that the buffer head corresponding to node is locked. */
316 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
317 				    struct nilfs_btree_node *node,
318 				    __u64 *keyp, __u64 *ptrp, int index)
319 {
320 	__u64 key;
321 	__u64 ptr;
322 	__le64 *dkeys;
323 	__le64 *dptrs;
324 	int nchildren;
325 
326 	dkeys = nilfs_btree_node_dkeys(node);
327 	dptrs = nilfs_btree_node_dptrs(node, btree);
328 	key = nilfs_bmap_dkey_to_key(dkeys[index]);
329 	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
330 	nchildren = nilfs_btree_node_get_nchildren(node);
331 	if (keyp != NULL)
332 		*keyp = key;
333 	if (ptrp != NULL)
334 		*ptrp = ptr;
335 
336 	if (index < nchildren - 1) {
337 		memmove(dkeys + index, dkeys + index + 1,
338 			(nchildren - index - 1) * sizeof(*dkeys));
339 		memmove(dptrs + index, dptrs + index + 1,
340 			(nchildren - index - 1) * sizeof(*dptrs));
341 	}
342 	nchildren--;
343 	nilfs_btree_node_set_nchildren(node, nchildren);
344 }
345 
346 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
347 				   __u64 key, int *indexp)
348 {
349 	__u64 nkey;
350 	int index, low, high, s;
351 
352 	/* binary search */
353 	low = 0;
354 	high = nilfs_btree_node_get_nchildren(node) - 1;
355 	index = 0;
356 	s = 0;
357 	while (low <= high) {
358 		index = (low + high) / 2;
359 		nkey = nilfs_btree_node_get_key(node, index);
360 		if (nkey == key) {
361 			s = 0;
362 			goto out;
363 		} else if (nkey < key) {
364 			low = index + 1;
365 			s = -1;
366 		} else {
367 			high = index - 1;
368 			s = 1;
369 		}
370 	}
371 
372 	/* adjust index */
373 	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
374 		if (s > 0 && index > 0)
375 			index--;
376 	} else if (s < 0)
377 		index++;
378 
379  out:
380 	*indexp = index;
381 
382 	return s == 0;
383 }
384 
385 static inline struct nilfs_btree_node *
386 nilfs_btree_get_root(const struct nilfs_btree *btree)
387 {
388 	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
389 }
390 
391 static inline struct nilfs_btree_node *
392 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393 {
394 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395 }
396 
397 static inline struct nilfs_btree_node *
398 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399 {
400 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401 }
402 
403 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
404 {
405 	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406 }
407 
408 static inline struct nilfs_btree_node *
409 nilfs_btree_get_node(const struct nilfs_btree *btree,
410 		     const struct nilfs_btree_path *path,
411 		     int level)
412 {
413 	return (level == nilfs_btree_height(btree) - 1) ?
414 		nilfs_btree_get_root(btree) :
415 		nilfs_btree_get_nonroot_node(path, level);
416 }
417 
418 static inline int
419 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
420 {
421 	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
422 		dump_stack();
423 		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
424 		       nilfs_btree_node_get_level(node), level);
425 		return 1;
426 	}
427 	return 0;
428 }
429 
430 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
431 				 struct nilfs_btree_path *path,
432 				 __u64 key, __u64 *ptrp, int minlevel)
433 {
434 	struct nilfs_btree_node *node;
435 	__u64 ptr;
436 	int level, index, found, ret;
437 
438 	node = nilfs_btree_get_root(btree);
439 	level = nilfs_btree_node_get_level(node);
440 	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
441 		return -ENOENT;
442 
443 	found = nilfs_btree_node_lookup(node, key, &index);
444 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
445 	path[level].bp_bh = NULL;
446 	path[level].bp_index = index;
447 
448 	for (level--; level >= minlevel; level--) {
449 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
450 		if (ret < 0)
451 			return ret;
452 		node = nilfs_btree_get_nonroot_node(path, level);
453 		if (nilfs_btree_bad_node(node, level))
454 			return -EINVAL;
455 		if (!found)
456 			found = nilfs_btree_node_lookup(node, key, &index);
457 		else
458 			index = 0;
459 		if (index < nilfs_btree_node_nchildren_max(node, btree))
460 			ptr = nilfs_btree_node_get_ptr(btree, node, index);
461 		else {
462 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
463 			/* insert */
464 			ptr = NILFS_BMAP_INVALID_PTR;
465 		}
466 		path[level].bp_index = index;
467 	}
468 	if (!found)
469 		return -ENOENT;
470 
471 	if (ptrp != NULL)
472 		*ptrp = ptr;
473 
474 	return 0;
475 }
476 
477 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
478 				      struct nilfs_btree_path *path,
479 				      __u64 *keyp, __u64 *ptrp)
480 {
481 	struct nilfs_btree_node *node;
482 	__u64 ptr;
483 	int index, level, ret;
484 
485 	node = nilfs_btree_get_root(btree);
486 	index = nilfs_btree_node_get_nchildren(node) - 1;
487 	if (index < 0)
488 		return -ENOENT;
489 	level = nilfs_btree_node_get_level(node);
490 	ptr = nilfs_btree_node_get_ptr(btree, node, index);
491 	path[level].bp_bh = NULL;
492 	path[level].bp_index = index;
493 
494 	for (level--; level > 0; level--) {
495 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
496 		if (ret < 0)
497 			return ret;
498 		node = nilfs_btree_get_nonroot_node(path, level);
499 		if (nilfs_btree_bad_node(node, level))
500 			return -EINVAL;
501 		index = nilfs_btree_node_get_nchildren(node) - 1;
502 		ptr = nilfs_btree_node_get_ptr(btree, node, index);
503 		path[level].bp_index = index;
504 	}
505 
506 	if (keyp != NULL)
507 		*keyp = nilfs_btree_node_get_key(node, index);
508 	if (ptrp != NULL)
509 		*ptrp = ptr;
510 
511 	return 0;
512 }
513 
514 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
515 			      __u64 key, int level, __u64 *ptrp)
516 {
517 	struct nilfs_btree *btree;
518 	struct nilfs_btree_path *path;
519 	__u64 ptr;
520 	int ret;
521 
522 	btree = (struct nilfs_btree *)bmap;
523 	path = nilfs_btree_alloc_path();
524 	if (path == NULL)
525 		return -ENOMEM;
526 
527 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
528 
529 	if (ptrp != NULL)
530 		*ptrp = ptr;
531 
532 	nilfs_btree_free_path(path);
533 
534 	return ret;
535 }
536 
537 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
538 				     __u64 key, __u64 *ptrp, unsigned maxblocks)
539 {
540 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
541 	struct nilfs_btree_path *path;
542 	struct nilfs_btree_node *node;
543 	struct inode *dat = NULL;
544 	__u64 ptr, ptr2;
545 	sector_t blocknr;
546 	int level = NILFS_BTREE_LEVEL_NODE_MIN;
547 	int ret, cnt, index, maxlevel;
548 
549 	path = nilfs_btree_alloc_path();
550 	if (path == NULL)
551 		return -ENOMEM;
552 
553 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
554 	if (ret < 0)
555 		goto out;
556 
557 	if (NILFS_BMAP_USE_VBN(bmap)) {
558 		dat = nilfs_bmap_get_dat(bmap);
559 		ret = nilfs_dat_translate(dat, ptr, &blocknr);
560 		if (ret < 0)
561 			goto out;
562 		ptr = blocknr;
563 	}
564 	cnt = 1;
565 	if (cnt == maxblocks)
566 		goto end;
567 
568 	maxlevel = nilfs_btree_height(btree) - 1;
569 	node = nilfs_btree_get_node(btree, path, level);
570 	index = path[level].bp_index + 1;
571 	for (;;) {
572 		while (index < nilfs_btree_node_get_nchildren(node)) {
573 			if (nilfs_btree_node_get_key(node, index) !=
574 			    key + cnt)
575 				goto end;
576 			ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
577 			if (dat) {
578 				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
579 				if (ret < 0)
580 					goto out;
581 				ptr2 = blocknr;
582 			}
583 			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
584 				goto end;
585 			index++;
586 			continue;
587 		}
588 		if (level == maxlevel)
589 			break;
590 
591 		/* look-up right sibling node */
592 		node = nilfs_btree_get_node(btree, path, level + 1);
593 		index = path[level + 1].bp_index + 1;
594 		if (index >= nilfs_btree_node_get_nchildren(node) ||
595 		    nilfs_btree_node_get_key(node, index) != key + cnt)
596 			break;
597 		ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
598 		path[level + 1].bp_index = index;
599 
600 		brelse(path[level].bp_bh);
601 		path[level].bp_bh = NULL;
602 		ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
603 		if (ret < 0)
604 			goto out;
605 		node = nilfs_btree_get_nonroot_node(path, level);
606 		index = 0;
607 		path[level].bp_index = index;
608 	}
609  end:
610 	*ptrp = ptr;
611 	ret = cnt;
612  out:
613 	nilfs_btree_free_path(path);
614 	return ret;
615 }
616 
617 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
618 				    struct nilfs_btree_path *path,
619 				    int level, __u64 key)
620 {
621 	if (level < nilfs_btree_height(btree) - 1) {
622 		do {
623 			nilfs_btree_node_set_key(
624 				nilfs_btree_get_nonroot_node(path, level),
625 				path[level].bp_index, key);
626 			if (!buffer_dirty(path[level].bp_bh))
627 				nilfs_btnode_mark_dirty(path[level].bp_bh);
628 		} while ((path[level].bp_index == 0) &&
629 			 (++level < nilfs_btree_height(btree) - 1));
630 	}
631 
632 	/* root */
633 	if (level == nilfs_btree_height(btree) - 1) {
634 		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
635 					 path[level].bp_index, key);
636 	}
637 }
638 
639 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
640 				  struct nilfs_btree_path *path,
641 				  int level, __u64 *keyp, __u64 *ptrp)
642 {
643 	struct nilfs_btree_node *node;
644 
645 	if (level < nilfs_btree_height(btree) - 1) {
646 		node = nilfs_btree_get_nonroot_node(path, level);
647 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
648 					path[level].bp_index);
649 		if (!buffer_dirty(path[level].bp_bh))
650 			nilfs_btnode_mark_dirty(path[level].bp_bh);
651 
652 		if (path[level].bp_index == 0)
653 			nilfs_btree_promote_key(btree, path, level + 1,
654 						nilfs_btree_node_get_key(node,
655 									 0));
656 	} else {
657 		node = nilfs_btree_get_root(btree);
658 		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
659 					path[level].bp_index);
660 	}
661 }
662 
663 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
664 				   struct nilfs_btree_path *path,
665 				   int level, __u64 *keyp, __u64 *ptrp)
666 {
667 	struct nilfs_btree_node *node, *left;
668 	int nchildren, lnchildren, n, move;
669 
670 	node = nilfs_btree_get_nonroot_node(path, level);
671 	left = nilfs_btree_get_sib_node(path, level);
672 	nchildren = nilfs_btree_node_get_nchildren(node);
673 	lnchildren = nilfs_btree_node_get_nchildren(left);
674 	move = 0;
675 
676 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
677 	if (n > path[level].bp_index) {
678 		/* move insert point */
679 		n--;
680 		move = 1;
681 	}
682 
683 	nilfs_btree_node_move_left(btree, left, node, n);
684 
685 	if (!buffer_dirty(path[level].bp_bh))
686 		nilfs_btnode_mark_dirty(path[level].bp_bh);
687 	if (!buffer_dirty(path[level].bp_sib_bh))
688 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
689 
690 	nilfs_btree_promote_key(btree, path, level + 1,
691 				nilfs_btree_node_get_key(node, 0));
692 
693 	if (move) {
694 		brelse(path[level].bp_bh);
695 		path[level].bp_bh = path[level].bp_sib_bh;
696 		path[level].bp_sib_bh = NULL;
697 		path[level].bp_index += lnchildren;
698 		path[level + 1].bp_index--;
699 	} else {
700 		brelse(path[level].bp_sib_bh);
701 		path[level].bp_sib_bh = NULL;
702 		path[level].bp_index -= n;
703 	}
704 
705 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
706 }
707 
708 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
709 				    struct nilfs_btree_path *path,
710 				    int level, __u64 *keyp, __u64 *ptrp)
711 {
712 	struct nilfs_btree_node *node, *right;
713 	int nchildren, rnchildren, n, move;
714 
715 	node = nilfs_btree_get_nonroot_node(path, level);
716 	right = nilfs_btree_get_sib_node(path, level);
717 	nchildren = nilfs_btree_node_get_nchildren(node);
718 	rnchildren = nilfs_btree_node_get_nchildren(right);
719 	move = 0;
720 
721 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
722 	if (n > nchildren - path[level].bp_index) {
723 		/* move insert point */
724 		n--;
725 		move = 1;
726 	}
727 
728 	nilfs_btree_node_move_right(btree, node, right, n);
729 
730 	if (!buffer_dirty(path[level].bp_bh))
731 		nilfs_btnode_mark_dirty(path[level].bp_bh);
732 	if (!buffer_dirty(path[level].bp_sib_bh))
733 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
734 
735 	path[level + 1].bp_index++;
736 	nilfs_btree_promote_key(btree, path, level + 1,
737 				nilfs_btree_node_get_key(right, 0));
738 	path[level + 1].bp_index--;
739 
740 	if (move) {
741 		brelse(path[level].bp_bh);
742 		path[level].bp_bh = path[level].bp_sib_bh;
743 		path[level].bp_sib_bh = NULL;
744 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
745 		path[level + 1].bp_index++;
746 	} else {
747 		brelse(path[level].bp_sib_bh);
748 		path[level].bp_sib_bh = NULL;
749 	}
750 
751 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752 }
753 
754 static void nilfs_btree_split(struct nilfs_btree *btree,
755 			      struct nilfs_btree_path *path,
756 			      int level, __u64 *keyp, __u64 *ptrp)
757 {
758 	struct nilfs_btree_node *node, *right;
759 	__u64 newkey;
760 	__u64 newptr;
761 	int nchildren, n, move;
762 
763 	node = nilfs_btree_get_nonroot_node(path, level);
764 	right = nilfs_btree_get_sib_node(path, level);
765 	nchildren = nilfs_btree_node_get_nchildren(node);
766 	move = 0;
767 
768 	n = (nchildren + 1) / 2;
769 	if (n > nchildren - path[level].bp_index) {
770 		n--;
771 		move = 1;
772 	}
773 
774 	nilfs_btree_node_move_right(btree, node, right, n);
775 
776 	if (!buffer_dirty(path[level].bp_bh))
777 		nilfs_btnode_mark_dirty(path[level].bp_bh);
778 	if (!buffer_dirty(path[level].bp_sib_bh))
779 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
780 
781 	newkey = nilfs_btree_node_get_key(right, 0);
782 	newptr = path[level].bp_newreq.bpr_ptr;
783 
784 	if (move) {
785 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
786 		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
787 					path[level].bp_index);
788 
789 		*keyp = nilfs_btree_node_get_key(right, 0);
790 		*ptrp = path[level].bp_newreq.bpr_ptr;
791 
792 		brelse(path[level].bp_bh);
793 		path[level].bp_bh = path[level].bp_sib_bh;
794 		path[level].bp_sib_bh = NULL;
795 	} else {
796 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
797 
798 		*keyp = nilfs_btree_node_get_key(right, 0);
799 		*ptrp = path[level].bp_newreq.bpr_ptr;
800 
801 		brelse(path[level].bp_sib_bh);
802 		path[level].bp_sib_bh = NULL;
803 	}
804 
805 	path[level + 1].bp_index++;
806 }
807 
808 static void nilfs_btree_grow(struct nilfs_btree *btree,
809 			     struct nilfs_btree_path *path,
810 			     int level, __u64 *keyp, __u64 *ptrp)
811 {
812 	struct nilfs_btree_node *root, *child;
813 	int n;
814 
815 	root = nilfs_btree_get_root(btree);
816 	child = nilfs_btree_get_sib_node(path, level);
817 
818 	n = nilfs_btree_node_get_nchildren(root);
819 
820 	nilfs_btree_node_move_right(btree, root, child, n);
821 	nilfs_btree_node_set_level(root, level + 1);
822 
823 	if (!buffer_dirty(path[level].bp_sib_bh))
824 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
825 
826 	path[level].bp_bh = path[level].bp_sib_bh;
827 	path[level].bp_sib_bh = NULL;
828 
829 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
830 
831 	*keyp = nilfs_btree_node_get_key(child, 0);
832 	*ptrp = path[level].bp_newreq.bpr_ptr;
833 }
834 
835 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
836 				   const struct nilfs_btree_path *path)
837 {
838 	struct nilfs_btree_node *node;
839 	int level;
840 
841 	if (path == NULL)
842 		return NILFS_BMAP_INVALID_PTR;
843 
844 	/* left sibling */
845 	level = NILFS_BTREE_LEVEL_NODE_MIN;
846 	if (path[level].bp_index > 0) {
847 		node = nilfs_btree_get_node(btree, path, level);
848 		return nilfs_btree_node_get_ptr(btree, node,
849 						path[level].bp_index - 1);
850 	}
851 
852 	/* parent */
853 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
854 	if (level <= nilfs_btree_height(btree) - 1) {
855 		node = nilfs_btree_get_node(btree, path, level);
856 		return nilfs_btree_node_get_ptr(btree, node,
857 						path[level].bp_index);
858 	}
859 
860 	return NILFS_BMAP_INVALID_PTR;
861 }
862 
863 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
864 				       const struct nilfs_btree_path *path,
865 				       __u64 key)
866 {
867 	__u64 ptr;
868 
869 	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
870 	if (ptr != NILFS_BMAP_INVALID_PTR)
871 		/* sequential access */
872 		return ptr;
873 	else {
874 		ptr = nilfs_btree_find_near(btree, path);
875 		if (ptr != NILFS_BMAP_INVALID_PTR)
876 			/* near */
877 			return ptr;
878 	}
879 	/* block group */
880 	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
881 }
882 
883 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
884 				     __u64 ptr)
885 {
886 	btree->bt_bmap.b_last_allocated_key = key;
887 	btree->bt_bmap.b_last_allocated_ptr = ptr;
888 }
889 
890 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
891 				      struct nilfs_btree_path *path,
892 				      int *levelp, __u64 key, __u64 ptr,
893 				      struct nilfs_bmap_stats *stats)
894 {
895 	struct buffer_head *bh;
896 	struct nilfs_btree_node *node, *parent, *sib;
897 	__u64 sibptr;
898 	int pindex, level, ret;
899 	struct inode *dat = NULL;
900 
901 	stats->bs_nblocks = 0;
902 	level = NILFS_BTREE_LEVEL_DATA;
903 
904 	/* allocate a new ptr for data block */
905 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
906 		path[level].bp_newreq.bpr_ptr =
907 			nilfs_btree_find_target_v(btree, path, key);
908 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
909 	}
910 
911 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
912 					   &path[level].bp_newreq, dat);
913 	if (ret < 0)
914 		goto err_out_data;
915 
916 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
917 	     level < nilfs_btree_height(btree) - 1;
918 	     level++) {
919 		node = nilfs_btree_get_nonroot_node(path, level);
920 		if (nilfs_btree_node_get_nchildren(node) <
921 		    nilfs_btree_node_nchildren_max(node, btree)) {
922 			path[level].bp_op = nilfs_btree_do_insert;
923 			stats->bs_nblocks++;
924 			goto out;
925 		}
926 
927 		parent = nilfs_btree_get_node(btree, path, level + 1);
928 		pindex = path[level + 1].bp_index;
929 
930 		/* left sibling */
931 		if (pindex > 0) {
932 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
933 							  pindex - 1);
934 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
935 			if (ret < 0)
936 				goto err_out_child_node;
937 			sib = (struct nilfs_btree_node *)bh->b_data;
938 			if (nilfs_btree_node_get_nchildren(sib) <
939 			    nilfs_btree_node_nchildren_max(sib, btree)) {
940 				path[level].bp_sib_bh = bh;
941 				path[level].bp_op = nilfs_btree_carry_left;
942 				stats->bs_nblocks++;
943 				goto out;
944 			} else
945 				brelse(bh);
946 		}
947 
948 		/* right sibling */
949 		if (pindex <
950 		    nilfs_btree_node_get_nchildren(parent) - 1) {
951 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
952 							  pindex + 1);
953 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
954 			if (ret < 0)
955 				goto err_out_child_node;
956 			sib = (struct nilfs_btree_node *)bh->b_data;
957 			if (nilfs_btree_node_get_nchildren(sib) <
958 			    nilfs_btree_node_nchildren_max(sib, btree)) {
959 				path[level].bp_sib_bh = bh;
960 				path[level].bp_op = nilfs_btree_carry_right;
961 				stats->bs_nblocks++;
962 				goto out;
963 			} else
964 				brelse(bh);
965 		}
966 
967 		/* split */
968 		path[level].bp_newreq.bpr_ptr =
969 			path[level - 1].bp_newreq.bpr_ptr + 1;
970 		ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
971 						   &path[level].bp_newreq, dat);
972 		if (ret < 0)
973 			goto err_out_child_node;
974 		ret = nilfs_btree_get_new_block(btree,
975 						path[level].bp_newreq.bpr_ptr,
976 						&bh);
977 		if (ret < 0)
978 			goto err_out_curr_node;
979 
980 		stats->bs_nblocks++;
981 
982 		nilfs_btree_node_init(btree,
983 				      (struct nilfs_btree_node *)bh->b_data,
984 				      0, level, 0, NULL, NULL);
985 		path[level].bp_sib_bh = bh;
986 		path[level].bp_op = nilfs_btree_split;
987 	}
988 
989 	/* root */
990 	node = nilfs_btree_get_root(btree);
991 	if (nilfs_btree_node_get_nchildren(node) <
992 	    nilfs_btree_node_nchildren_max(node, btree)) {
993 		path[level].bp_op = nilfs_btree_do_insert;
994 		stats->bs_nblocks++;
995 		goto out;
996 	}
997 
998 	/* grow */
999 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1000 	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1001 					   &path[level].bp_newreq, dat);
1002 	if (ret < 0)
1003 		goto err_out_child_node;
1004 	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1005 					&bh);
1006 	if (ret < 0)
1007 		goto err_out_curr_node;
1008 
1009 	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1010 			      0, level, 0, NULL, NULL);
1011 	path[level].bp_sib_bh = bh;
1012 	path[level].bp_op = nilfs_btree_grow;
1013 
1014 	level++;
1015 	path[level].bp_op = nilfs_btree_do_insert;
1016 
1017 	/* a newly-created node block and a data block are added */
1018 	stats->bs_nblocks += 2;
1019 
1020 	/* success */
1021  out:
1022 	*levelp = level;
1023 	return ret;
1024 
1025 	/* error */
1026  err_out_curr_node:
1027 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1028 				   dat);
1029  err_out_child_node:
1030 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1031 		nilfs_btnode_delete(path[level].bp_sib_bh);
1032 		nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1033 					   &path[level].bp_newreq, dat);
1034 
1035 	}
1036 
1037 	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1038 				   dat);
1039  err_out_data:
1040 	*levelp = level;
1041 	stats->bs_nblocks = 0;
1042 	return ret;
1043 }
1044 
1045 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1046 				      struct nilfs_btree_path *path,
1047 				      int maxlevel, __u64 key, __u64 ptr)
1048 {
1049 	struct inode *dat = NULL;
1050 	int level;
1051 
1052 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1053 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1054 	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1055 		nilfs_btree_set_target_v(btree, key, ptr);
1056 		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1057 	}
1058 
1059 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1060 		nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1061 					    &path[level - 1].bp_newreq, dat);
1062 		path[level].bp_op(btree, path, level, &key, &ptr);
1063 	}
1064 
1065 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1066 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1067 }
1068 
1069 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1070 {
1071 	struct nilfs_btree *btree;
1072 	struct nilfs_btree_path *path;
1073 	struct nilfs_bmap_stats stats;
1074 	int level, ret;
1075 
1076 	btree = (struct nilfs_btree *)bmap;
1077 	path = nilfs_btree_alloc_path();
1078 	if (path == NULL)
1079 		return -ENOMEM;
1080 
1081 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1082 				    NILFS_BTREE_LEVEL_NODE_MIN);
1083 	if (ret != -ENOENT) {
1084 		if (ret == 0)
1085 			ret = -EEXIST;
1086 		goto out;
1087 	}
1088 
1089 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1090 	if (ret < 0)
1091 		goto out;
1092 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1093 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1094 
1095  out:
1096 	nilfs_btree_free_path(path);
1097 	return ret;
1098 }
1099 
1100 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1101 				  struct nilfs_btree_path *path,
1102 				  int level, __u64 *keyp, __u64 *ptrp)
1103 {
1104 	struct nilfs_btree_node *node;
1105 
1106 	if (level < nilfs_btree_height(btree) - 1) {
1107 		node = nilfs_btree_get_nonroot_node(path, level);
1108 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1109 					path[level].bp_index);
1110 		if (!buffer_dirty(path[level].bp_bh))
1111 			nilfs_btnode_mark_dirty(path[level].bp_bh);
1112 		if (path[level].bp_index == 0)
1113 			nilfs_btree_promote_key(btree, path, level + 1,
1114 				nilfs_btree_node_get_key(node, 0));
1115 	} else {
1116 		node = nilfs_btree_get_root(btree);
1117 		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1118 					path[level].bp_index);
1119 	}
1120 }
1121 
1122 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1123 				    struct nilfs_btree_path *path,
1124 				    int level, __u64 *keyp, __u64 *ptrp)
1125 {
1126 	struct nilfs_btree_node *node, *left;
1127 	int nchildren, lnchildren, n;
1128 
1129 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1130 
1131 	node = nilfs_btree_get_nonroot_node(path, level);
1132 	left = nilfs_btree_get_sib_node(path, level);
1133 	nchildren = nilfs_btree_node_get_nchildren(node);
1134 	lnchildren = nilfs_btree_node_get_nchildren(left);
1135 
1136 	n = (nchildren + lnchildren) / 2 - nchildren;
1137 
1138 	nilfs_btree_node_move_right(btree, left, node, n);
1139 
1140 	if (!buffer_dirty(path[level].bp_bh))
1141 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1142 	if (!buffer_dirty(path[level].bp_sib_bh))
1143 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1144 
1145 	nilfs_btree_promote_key(btree, path, level + 1,
1146 				nilfs_btree_node_get_key(node, 0));
1147 
1148 	brelse(path[level].bp_sib_bh);
1149 	path[level].bp_sib_bh = NULL;
1150 	path[level].bp_index += n;
1151 }
1152 
1153 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1154 				     struct nilfs_btree_path *path,
1155 				     int level, __u64 *keyp, __u64 *ptrp)
1156 {
1157 	struct nilfs_btree_node *node, *right;
1158 	int nchildren, rnchildren, n;
1159 
1160 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1161 
1162 	node = nilfs_btree_get_nonroot_node(path, level);
1163 	right = nilfs_btree_get_sib_node(path, level);
1164 	nchildren = nilfs_btree_node_get_nchildren(node);
1165 	rnchildren = nilfs_btree_node_get_nchildren(right);
1166 
1167 	n = (nchildren + rnchildren) / 2 - nchildren;
1168 
1169 	nilfs_btree_node_move_left(btree, node, right, n);
1170 
1171 	if (!buffer_dirty(path[level].bp_bh))
1172 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1173 	if (!buffer_dirty(path[level].bp_sib_bh))
1174 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1175 
1176 	path[level + 1].bp_index++;
1177 	nilfs_btree_promote_key(btree, path, level + 1,
1178 				nilfs_btree_node_get_key(right, 0));
1179 	path[level + 1].bp_index--;
1180 
1181 	brelse(path[level].bp_sib_bh);
1182 	path[level].bp_sib_bh = NULL;
1183 }
1184 
1185 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1186 				    struct nilfs_btree_path *path,
1187 				    int level, __u64 *keyp, __u64 *ptrp)
1188 {
1189 	struct nilfs_btree_node *node, *left;
1190 	int n;
1191 
1192 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1193 
1194 	node = nilfs_btree_get_nonroot_node(path, level);
1195 	left = nilfs_btree_get_sib_node(path, level);
1196 
1197 	n = nilfs_btree_node_get_nchildren(node);
1198 
1199 	nilfs_btree_node_move_left(btree, left, node, n);
1200 
1201 	if (!buffer_dirty(path[level].bp_sib_bh))
1202 		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1203 
1204 	nilfs_btnode_delete(path[level].bp_bh);
1205 	path[level].bp_bh = path[level].bp_sib_bh;
1206 	path[level].bp_sib_bh = NULL;
1207 	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1208 }
1209 
1210 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1211 				     struct nilfs_btree_path *path,
1212 				     int level, __u64 *keyp, __u64 *ptrp)
1213 {
1214 	struct nilfs_btree_node *node, *right;
1215 	int n;
1216 
1217 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1218 
1219 	node = nilfs_btree_get_nonroot_node(path, level);
1220 	right = nilfs_btree_get_sib_node(path, level);
1221 
1222 	n = nilfs_btree_node_get_nchildren(right);
1223 
1224 	nilfs_btree_node_move_left(btree, node, right, n);
1225 
1226 	if (!buffer_dirty(path[level].bp_bh))
1227 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1228 
1229 	nilfs_btnode_delete(path[level].bp_sib_bh);
1230 	path[level].bp_sib_bh = NULL;
1231 	path[level + 1].bp_index++;
1232 }
1233 
1234 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1235 			       struct nilfs_btree_path *path,
1236 			       int level, __u64 *keyp, __u64 *ptrp)
1237 {
1238 	struct nilfs_btree_node *root, *child;
1239 	int n;
1240 
1241 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1242 
1243 	root = nilfs_btree_get_root(btree);
1244 	child = nilfs_btree_get_nonroot_node(path, level);
1245 
1246 	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1247 	nilfs_btree_node_set_level(root, level);
1248 	n = nilfs_btree_node_get_nchildren(child);
1249 	nilfs_btree_node_move_left(btree, root, child, n);
1250 
1251 	nilfs_btnode_delete(path[level].bp_bh);
1252 	path[level].bp_bh = NULL;
1253 }
1254 
1255 
1256 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1257 				      struct nilfs_btree_path *path,
1258 				      int *levelp,
1259 				      struct nilfs_bmap_stats *stats,
1260 				      struct inode *dat)
1261 {
1262 	struct buffer_head *bh;
1263 	struct nilfs_btree_node *node, *parent, *sib;
1264 	__u64 sibptr;
1265 	int pindex, level, ret;
1266 
1267 	ret = 0;
1268 	stats->bs_nblocks = 0;
1269 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1270 	     level < nilfs_btree_height(btree) - 1;
1271 	     level++) {
1272 		node = nilfs_btree_get_nonroot_node(path, level);
1273 		path[level].bp_oldreq.bpr_ptr =
1274 			nilfs_btree_node_get_ptr(btree, node,
1275 						 path[level].bp_index);
1276 		ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1277 						 &path[level].bp_oldreq, dat);
1278 		if (ret < 0)
1279 			goto err_out_child_node;
1280 
1281 		if (nilfs_btree_node_get_nchildren(node) >
1282 		    nilfs_btree_node_nchildren_min(node, btree)) {
1283 			path[level].bp_op = nilfs_btree_do_delete;
1284 			stats->bs_nblocks++;
1285 			goto out;
1286 		}
1287 
1288 		parent = nilfs_btree_get_node(btree, path, level + 1);
1289 		pindex = path[level + 1].bp_index;
1290 
1291 		if (pindex > 0) {
1292 			/* left sibling */
1293 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1294 							  pindex - 1);
1295 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1296 			if (ret < 0)
1297 				goto err_out_curr_node;
1298 			sib = (struct nilfs_btree_node *)bh->b_data;
1299 			if (nilfs_btree_node_get_nchildren(sib) >
1300 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1301 				path[level].bp_sib_bh = bh;
1302 				path[level].bp_op = nilfs_btree_borrow_left;
1303 				stats->bs_nblocks++;
1304 				goto out;
1305 			} else {
1306 				path[level].bp_sib_bh = bh;
1307 				path[level].bp_op = nilfs_btree_concat_left;
1308 				stats->bs_nblocks++;
1309 				/* continue; */
1310 			}
1311 		} else if (pindex <
1312 			   nilfs_btree_node_get_nchildren(parent) - 1) {
1313 			/* right sibling */
1314 			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1315 							  pindex + 1);
1316 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1317 			if (ret < 0)
1318 				goto err_out_curr_node;
1319 			sib = (struct nilfs_btree_node *)bh->b_data;
1320 			if (nilfs_btree_node_get_nchildren(sib) >
1321 			    nilfs_btree_node_nchildren_min(sib, btree)) {
1322 				path[level].bp_sib_bh = bh;
1323 				path[level].bp_op = nilfs_btree_borrow_right;
1324 				stats->bs_nblocks++;
1325 				goto out;
1326 			} else {
1327 				path[level].bp_sib_bh = bh;
1328 				path[level].bp_op = nilfs_btree_concat_right;
1329 				stats->bs_nblocks++;
1330 				/* continue; */
1331 			}
1332 		} else {
1333 			/* no siblings */
1334 			/* the only child of the root node */
1335 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1336 			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1337 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1338 				path[level].bp_op = nilfs_btree_shrink;
1339 				stats->bs_nblocks += 2;
1340 			} else {
1341 				path[level].bp_op = nilfs_btree_do_delete;
1342 				stats->bs_nblocks++;
1343 			}
1344 
1345 			goto out;
1346 
1347 		}
1348 	}
1349 
1350 	node = nilfs_btree_get_root(btree);
1351 	path[level].bp_oldreq.bpr_ptr =
1352 		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1353 
1354 	ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1355 					 &path[level].bp_oldreq, dat);
1356 	if (ret < 0)
1357 		goto err_out_child_node;
1358 
1359 	/* child of the root node is deleted */
1360 	path[level].bp_op = nilfs_btree_do_delete;
1361 	stats->bs_nblocks++;
1362 
1363 	/* success */
1364  out:
1365 	*levelp = level;
1366 	return ret;
1367 
1368 	/* error */
1369  err_out_curr_node:
1370 	nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1371  err_out_child_node:
1372 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1373 		brelse(path[level].bp_sib_bh);
1374 		nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1375 					 &path[level].bp_oldreq, dat);
1376 	}
1377 	*levelp = level;
1378 	stats->bs_nblocks = 0;
1379 	return ret;
1380 }
1381 
1382 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1383 				      struct nilfs_btree_path *path,
1384 				      int maxlevel, struct inode *dat)
1385 {
1386 	int level;
1387 
1388 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1389 		nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1390 					  &path[level].bp_oldreq, dat);
1391 		path[level].bp_op(btree, path, level, NULL, NULL);
1392 	}
1393 
1394 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1395 		nilfs_bmap_set_dirty(&btree->bt_bmap);
1396 }
1397 
1398 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1399 
1400 {
1401 	struct nilfs_btree *btree;
1402 	struct nilfs_btree_path *path;
1403 	struct nilfs_bmap_stats stats;
1404 	struct inode *dat;
1405 	int level, ret;
1406 
1407 	btree = (struct nilfs_btree *)bmap;
1408 	path = nilfs_btree_alloc_path();
1409 	if (path == NULL)
1410 		return -ENOMEM;
1411 
1412 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1413 				    NILFS_BTREE_LEVEL_NODE_MIN);
1414 	if (ret < 0)
1415 		goto out;
1416 
1417 
1418 	dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1419 		nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1420 
1421 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1422 	if (ret < 0)
1423 		goto out;
1424 	nilfs_btree_commit_delete(btree, path, level, dat);
1425 	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1426 
1427 out:
1428 	nilfs_btree_free_path(path);
1429 	return ret;
1430 }
1431 
1432 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1433 {
1434 	struct nilfs_btree *btree;
1435 	struct nilfs_btree_path *path;
1436 	int ret;
1437 
1438 	btree = (struct nilfs_btree *)bmap;
1439 	path = nilfs_btree_alloc_path();
1440 	if (path == NULL)
1441 		return -ENOMEM;
1442 
1443 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1444 
1445 	nilfs_btree_free_path(path);
1446 
1447 	return ret;
1448 }
1449 
1450 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1451 {
1452 	struct buffer_head *bh;
1453 	struct nilfs_btree *btree;
1454 	struct nilfs_btree_node *root, *node;
1455 	__u64 maxkey, nextmaxkey;
1456 	__u64 ptr;
1457 	int nchildren, ret;
1458 
1459 	btree = (struct nilfs_btree *)bmap;
1460 	root = nilfs_btree_get_root(btree);
1461 	switch (nilfs_btree_height(btree)) {
1462 	case 2:
1463 		bh = NULL;
1464 		node = root;
1465 		break;
1466 	case 3:
1467 		nchildren = nilfs_btree_node_get_nchildren(root);
1468 		if (nchildren > 1)
1469 			return 0;
1470 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1471 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1472 		if (ret < 0)
1473 			return ret;
1474 		node = (struct nilfs_btree_node *)bh->b_data;
1475 		break;
1476 	default:
1477 		return 0;
1478 	}
1479 
1480 	nchildren = nilfs_btree_node_get_nchildren(node);
1481 	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1482 	nextmaxkey = (nchildren > 1) ?
1483 		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1484 	if (bh != NULL)
1485 		brelse(bh);
1486 
1487 	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1488 }
1489 
1490 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1491 				   __u64 *keys, __u64 *ptrs, int nitems)
1492 {
1493 	struct buffer_head *bh;
1494 	struct nilfs_btree *btree;
1495 	struct nilfs_btree_node *node, *root;
1496 	__le64 *dkeys;
1497 	__le64 *dptrs;
1498 	__u64 ptr;
1499 	int nchildren, i, ret;
1500 
1501 	btree = (struct nilfs_btree *)bmap;
1502 	root = nilfs_btree_get_root(btree);
1503 	switch (nilfs_btree_height(btree)) {
1504 	case 2:
1505 		bh = NULL;
1506 		node = root;
1507 		break;
1508 	case 3:
1509 		nchildren = nilfs_btree_node_get_nchildren(root);
1510 		WARN_ON(nchildren > 1);
1511 		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1512 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1513 		if (ret < 0)
1514 			return ret;
1515 		node = (struct nilfs_btree_node *)bh->b_data;
1516 		break;
1517 	default:
1518 		node = NULL;
1519 		return -EINVAL;
1520 	}
1521 
1522 	nchildren = nilfs_btree_node_get_nchildren(node);
1523 	if (nchildren < nitems)
1524 		nitems = nchildren;
1525 	dkeys = nilfs_btree_node_dkeys(node);
1526 	dptrs = nilfs_btree_node_dptrs(node, btree);
1527 	for (i = 0; i < nitems; i++) {
1528 		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1529 		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1530 	}
1531 
1532 	if (bh != NULL)
1533 		brelse(bh);
1534 
1535 	return nitems;
1536 }
1537 
1538 static int
1539 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1540 				       union nilfs_bmap_ptr_req *dreq,
1541 				       union nilfs_bmap_ptr_req *nreq,
1542 				       struct buffer_head **bhp,
1543 				       struct nilfs_bmap_stats *stats)
1544 {
1545 	struct buffer_head *bh;
1546 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1547 	struct inode *dat = NULL;
1548 	int ret;
1549 
1550 	stats->bs_nblocks = 0;
1551 
1552 	/* for data */
1553 	/* cannot find near ptr */
1554 	if (NILFS_BMAP_USE_VBN(bmap)) {
1555 		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1556 		dat = nilfs_bmap_get_dat(bmap);
1557 	}
1558 
1559 	ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1560 	if (ret < 0)
1561 		return ret;
1562 
1563 	*bhp = NULL;
1564 	stats->bs_nblocks++;
1565 	if (nreq != NULL) {
1566 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1567 		ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1568 		if (ret < 0)
1569 			goto err_out_dreq;
1570 
1571 		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1572 		if (ret < 0)
1573 			goto err_out_nreq;
1574 
1575 		*bhp = bh;
1576 		stats->bs_nblocks++;
1577 	}
1578 
1579 	/* success */
1580 	return 0;
1581 
1582 	/* error */
1583  err_out_nreq:
1584 	nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1585  err_out_dreq:
1586 	nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1587 	stats->bs_nblocks = 0;
1588 	return ret;
1589 
1590 }
1591 
1592 static void
1593 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1594 				      __u64 key, __u64 ptr,
1595 				      const __u64 *keys, const __u64 *ptrs,
1596 				      int n,
1597 				      union nilfs_bmap_ptr_req *dreq,
1598 				      union nilfs_bmap_ptr_req *nreq,
1599 				      struct buffer_head *bh)
1600 {
1601 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1602 	struct nilfs_btree_node *node;
1603 	struct inode *dat;
1604 	__u64 tmpptr;
1605 
1606 	/* free resources */
1607 	if (bmap->b_ops->bop_clear != NULL)
1608 		bmap->b_ops->bop_clear(bmap);
1609 
1610 	/* ptr must be a pointer to a buffer head. */
1611 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1612 
1613 	/* convert and insert */
1614 	dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1615 	nilfs_btree_init(bmap);
1616 	if (nreq != NULL) {
1617 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1618 		nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1619 
1620 		/* create child node at level 1 */
1621 		node = (struct nilfs_btree_node *)bh->b_data;
1622 		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1623 		nilfs_btree_node_insert(btree, node,
1624 					key, dreq->bpr_ptr, n);
1625 		if (!buffer_dirty(bh))
1626 			nilfs_btnode_mark_dirty(bh);
1627 		if (!nilfs_bmap_dirty(bmap))
1628 			nilfs_bmap_set_dirty(bmap);
1629 
1630 		brelse(bh);
1631 
1632 		/* create root node at level 2 */
1633 		node = nilfs_btree_get_root(btree);
1634 		tmpptr = nreq->bpr_ptr;
1635 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1636 				      2, 1, &keys[0], &tmpptr);
1637 	} else {
1638 		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1639 
1640 		/* create root node at level 1 */
1641 		node = nilfs_btree_get_root(btree);
1642 		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1643 				      1, n, keys, ptrs);
1644 		nilfs_btree_node_insert(btree, node,
1645 					key, dreq->bpr_ptr, n);
1646 		if (!nilfs_bmap_dirty(bmap))
1647 			nilfs_bmap_set_dirty(bmap);
1648 	}
1649 
1650 	if (NILFS_BMAP_USE_VBN(bmap))
1651 		nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1652 }
1653 
1654 /**
1655  * nilfs_btree_convert_and_insert -
1656  * @bmap:
1657  * @key:
1658  * @ptr:
1659  * @keys:
1660  * @ptrs:
1661  * @n:
1662  */
1663 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1664 				   __u64 key, __u64 ptr,
1665 				   const __u64 *keys, const __u64 *ptrs, int n)
1666 {
1667 	struct buffer_head *bh;
1668 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1669 	struct nilfs_bmap_stats stats;
1670 	int ret;
1671 
1672 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1673 		di = &dreq;
1674 		ni = NULL;
1675 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1676 			   1 << bmap->b_inode->i_blkbits)) {
1677 		di = &dreq;
1678 		ni = &nreq;
1679 	} else {
1680 		di = NULL;
1681 		ni = NULL;
1682 		BUG();
1683 	}
1684 
1685 	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1686 						     &stats);
1687 	if (ret < 0)
1688 		return ret;
1689 	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1690 					      di, ni, bh);
1691 	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1692 	return 0;
1693 }
1694 
1695 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1696 				   struct nilfs_btree_path *path,
1697 				   int level,
1698 				   struct buffer_head *bh)
1699 {
1700 	while ((++level < nilfs_btree_height(btree) - 1) &&
1701 	       !buffer_dirty(path[level].bp_bh))
1702 		nilfs_btnode_mark_dirty(path[level].bp_bh);
1703 
1704 	return 0;
1705 }
1706 
1707 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1708 					struct nilfs_btree_path *path,
1709 					int level, struct inode *dat)
1710 {
1711 	struct nilfs_btree_node *parent;
1712 	int ret;
1713 
1714 	parent = nilfs_btree_get_node(btree, path, level + 1);
1715 	path[level].bp_oldreq.bpr_ptr =
1716 		nilfs_btree_node_get_ptr(btree, parent,
1717 					 path[level + 1].bp_index);
1718 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1719 	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1720 				       &path[level].bp_newreq.bpr_req);
1721 	if (ret < 0)
1722 		return ret;
1723 
1724 	if (buffer_nilfs_node(path[level].bp_bh)) {
1725 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1726 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1727 		path[level].bp_ctxt.bh = path[level].bp_bh;
1728 		ret = nilfs_btnode_prepare_change_key(
1729 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1730 			&path[level].bp_ctxt);
1731 		if (ret < 0) {
1732 			nilfs_dat_abort_update(dat,
1733 					       &path[level].bp_oldreq.bpr_req,
1734 					       &path[level].bp_newreq.bpr_req);
1735 			return ret;
1736 		}
1737 	}
1738 
1739 	return 0;
1740 }
1741 
1742 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1743 					struct nilfs_btree_path *path,
1744 					int level, struct inode *dat)
1745 {
1746 	struct nilfs_btree_node *parent;
1747 
1748 	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1749 				&path[level].bp_newreq.bpr_req,
1750 				btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1751 
1752 	if (buffer_nilfs_node(path[level].bp_bh)) {
1753 		nilfs_btnode_commit_change_key(
1754 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1755 			&path[level].bp_ctxt);
1756 		path[level].bp_bh = path[level].bp_ctxt.bh;
1757 	}
1758 	set_buffer_nilfs_volatile(path[level].bp_bh);
1759 
1760 	parent = nilfs_btree_get_node(btree, path, level + 1);
1761 	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1762 				 path[level].bp_newreq.bpr_ptr);
1763 }
1764 
1765 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1766 				       struct nilfs_btree_path *path,
1767 				       int level, struct inode *dat)
1768 {
1769 	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1770 			       &path[level].bp_newreq.bpr_req);
1771 	if (buffer_nilfs_node(path[level].bp_bh))
1772 		nilfs_btnode_abort_change_key(
1773 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1774 			&path[level].bp_ctxt);
1775 }
1776 
1777 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1778 					   struct nilfs_btree_path *path,
1779 					   int minlevel, int *maxlevelp,
1780 					   struct inode *dat)
1781 {
1782 	int level, ret;
1783 
1784 	level = minlevel;
1785 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1786 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1787 		if (ret < 0)
1788 			return ret;
1789 	}
1790 	while ((++level < nilfs_btree_height(btree) - 1) &&
1791 	       !buffer_dirty(path[level].bp_bh)) {
1792 
1793 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1794 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1795 		if (ret < 0)
1796 			goto out;
1797 	}
1798 
1799 	/* success */
1800 	*maxlevelp = level - 1;
1801 	return 0;
1802 
1803 	/* error */
1804  out:
1805 	while (--level > minlevel)
1806 		nilfs_btree_abort_update_v(btree, path, level, dat);
1807 	if (!buffer_nilfs_volatile(path[level].bp_bh))
1808 		nilfs_btree_abort_update_v(btree, path, level, dat);
1809 	return ret;
1810 }
1811 
1812 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1813 					   struct nilfs_btree_path *path,
1814 					   int minlevel, int maxlevel,
1815 					   struct buffer_head *bh,
1816 					   struct inode *dat)
1817 {
1818 	int level;
1819 
1820 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1821 		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1822 
1823 	for (level = minlevel + 1; level <= maxlevel; level++)
1824 		nilfs_btree_commit_update_v(btree, path, level, dat);
1825 }
1826 
1827 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1828 				   struct nilfs_btree_path *path,
1829 				   int level, struct buffer_head *bh)
1830 {
1831 	int maxlevel = 0, ret;
1832 	struct nilfs_btree_node *parent;
1833 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1834 	__u64 ptr;
1835 
1836 	get_bh(bh);
1837 	path[level].bp_bh = bh;
1838 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1839 					      dat);
1840 	if (ret < 0)
1841 		goto out;
1842 
1843 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1844 		parent = nilfs_btree_get_node(btree, path, level + 1);
1845 		ptr = nilfs_btree_node_get_ptr(btree, parent,
1846 					       path[level + 1].bp_index);
1847 		ret = nilfs_dat_mark_dirty(dat, ptr);
1848 		if (ret < 0)
1849 			goto out;
1850 	}
1851 
1852 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1853 
1854  out:
1855 	brelse(path[level].bp_bh);
1856 	path[level].bp_bh = NULL;
1857 	return ret;
1858 }
1859 
1860 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1861 				 struct buffer_head *bh)
1862 {
1863 	struct nilfs_btree *btree;
1864 	struct nilfs_btree_path *path;
1865 	struct nilfs_btree_node *node;
1866 	__u64 key;
1867 	int level, ret;
1868 
1869 	WARN_ON(!buffer_dirty(bh));
1870 
1871 	btree = (struct nilfs_btree *)bmap;
1872 	path = nilfs_btree_alloc_path();
1873 	if (path == NULL)
1874 		return -ENOMEM;
1875 
1876 	if (buffer_nilfs_node(bh)) {
1877 		node = (struct nilfs_btree_node *)bh->b_data;
1878 		key = nilfs_btree_node_get_key(node, 0);
1879 		level = nilfs_btree_node_get_level(node);
1880 	} else {
1881 		key = nilfs_bmap_data_get_key(bmap, bh);
1882 		level = NILFS_BTREE_LEVEL_DATA;
1883 	}
1884 
1885 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1886 	if (ret < 0) {
1887 		if (unlikely(ret == -ENOENT))
1888 			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1889 			       __func__, (unsigned long long)key, level);
1890 		goto out;
1891 	}
1892 
1893 	ret = NILFS_BMAP_USE_VBN(bmap) ?
1894 		nilfs_btree_propagate_v(btree, path, level, bh) :
1895 		nilfs_btree_propagate_p(btree, path, level, bh);
1896 
1897  out:
1898 	nilfs_btree_free_path(path);
1899 
1900 	return ret;
1901 }
1902 
1903 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1904 				    struct buffer_head *bh)
1905 {
1906 	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1907 }
1908 
1909 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1910 					 struct list_head *lists,
1911 					 struct buffer_head *bh)
1912 {
1913 	struct list_head *head;
1914 	struct buffer_head *cbh;
1915 	struct nilfs_btree_node *node, *cnode;
1916 	__u64 key, ckey;
1917 	int level;
1918 
1919 	get_bh(bh);
1920 	node = (struct nilfs_btree_node *)bh->b_data;
1921 	key = nilfs_btree_node_get_key(node, 0);
1922 	level = nilfs_btree_node_get_level(node);
1923 	list_for_each(head, &lists[level]) {
1924 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1925 		cnode = (struct nilfs_btree_node *)cbh->b_data;
1926 		ckey = nilfs_btree_node_get_key(cnode, 0);
1927 		if (key < ckey)
1928 			break;
1929 	}
1930 	list_add_tail(&bh->b_assoc_buffers, head);
1931 }
1932 
1933 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1934 					     struct list_head *listp)
1935 {
1936 	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1937 	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1938 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1939 	struct pagevec pvec;
1940 	struct buffer_head *bh, *head;
1941 	pgoff_t index = 0;
1942 	int level, i;
1943 
1944 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1945 	     level < NILFS_BTREE_LEVEL_MAX;
1946 	     level++)
1947 		INIT_LIST_HEAD(&lists[level]);
1948 
1949 	pagevec_init(&pvec, 0);
1950 
1951 	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1952 				  PAGEVEC_SIZE)) {
1953 		for (i = 0; i < pagevec_count(&pvec); i++) {
1954 			bh = head = page_buffers(pvec.pages[i]);
1955 			do {
1956 				if (buffer_dirty(bh))
1957 					nilfs_btree_add_dirty_buffer(btree,
1958 								     lists, bh);
1959 			} while ((bh = bh->b_this_page) != head);
1960 		}
1961 		pagevec_release(&pvec);
1962 		cond_resched();
1963 	}
1964 
1965 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1966 	     level < NILFS_BTREE_LEVEL_MAX;
1967 	     level++)
1968 		list_splice_tail(&lists[level], listp);
1969 }
1970 
1971 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
1972 				struct nilfs_btree_path *path,
1973 				int level,
1974 				struct buffer_head **bh,
1975 				sector_t blocknr,
1976 				union nilfs_binfo *binfo)
1977 {
1978 	struct nilfs_btree_node *parent;
1979 	__u64 key;
1980 	__u64 ptr;
1981 	int ret;
1982 
1983 	parent = nilfs_btree_get_node(btree, path, level + 1);
1984 	ptr = nilfs_btree_node_get_ptr(btree, parent,
1985 				       path[level + 1].bp_index);
1986 	if (buffer_nilfs_node(*bh)) {
1987 		path[level].bp_ctxt.oldkey = ptr;
1988 		path[level].bp_ctxt.newkey = blocknr;
1989 		path[level].bp_ctxt.bh = *bh;
1990 		ret = nilfs_btnode_prepare_change_key(
1991 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1992 			&path[level].bp_ctxt);
1993 		if (ret < 0)
1994 			return ret;
1995 		nilfs_btnode_commit_change_key(
1996 			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1997 			&path[level].bp_ctxt);
1998 		*bh = path[level].bp_ctxt.bh;
1999 	}
2000 
2001 	nilfs_btree_node_set_ptr(btree, parent,
2002 				 path[level + 1].bp_index, blocknr);
2003 
2004 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2005 	/* on-disk format */
2006 	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2007 	binfo->bi_dat.bi_level = level;
2008 
2009 	return 0;
2010 }
2011 
2012 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2013 				struct nilfs_btree_path *path,
2014 				int level,
2015 				struct buffer_head **bh,
2016 				sector_t blocknr,
2017 				union nilfs_binfo *binfo)
2018 {
2019 	struct nilfs_btree_node *parent;
2020 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2021 	__u64 key;
2022 	__u64 ptr;
2023 	union nilfs_bmap_ptr_req req;
2024 	int ret;
2025 
2026 	parent = nilfs_btree_get_node(btree, path, level + 1);
2027 	ptr = nilfs_btree_node_get_ptr(btree, parent,
2028 				       path[level + 1].bp_index);
2029 	req.bpr_ptr = ptr;
2030 	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2031 	if (ret < 0)
2032 		return ret;
2033 	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2034 
2035 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2036 	/* on-disk format */
2037 	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2038 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2039 
2040 	return 0;
2041 }
2042 
2043 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2044 			      struct buffer_head **bh,
2045 			      sector_t blocknr,
2046 			      union nilfs_binfo *binfo)
2047 {
2048 	struct nilfs_btree *btree;
2049 	struct nilfs_btree_path *path;
2050 	struct nilfs_btree_node *node;
2051 	__u64 key;
2052 	int level, ret;
2053 
2054 	btree = (struct nilfs_btree *)bmap;
2055 	path = nilfs_btree_alloc_path();
2056 	if (path == NULL)
2057 		return -ENOMEM;
2058 
2059 	if (buffer_nilfs_node(*bh)) {
2060 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2061 		key = nilfs_btree_node_get_key(node, 0);
2062 		level = nilfs_btree_node_get_level(node);
2063 	} else {
2064 		key = nilfs_bmap_data_get_key(bmap, *bh);
2065 		level = NILFS_BTREE_LEVEL_DATA;
2066 	}
2067 
2068 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2069 	if (ret < 0) {
2070 		WARN_ON(ret == -ENOENT);
2071 		goto out;
2072 	}
2073 
2074 	ret = NILFS_BMAP_USE_VBN(bmap) ?
2075 		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2076 		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2077 
2078  out:
2079 	nilfs_btree_free_path(path);
2080 
2081 	return ret;
2082 }
2083 
2084 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2085 				 struct buffer_head **bh,
2086 				 sector_t blocknr,
2087 				 union nilfs_binfo *binfo)
2088 {
2089 	struct nilfs_btree_node *node;
2090 	__u64 key;
2091 	int ret;
2092 
2093 	ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2094 			     blocknr);
2095 	if (ret < 0)
2096 		return ret;
2097 
2098 	if (buffer_nilfs_node(*bh)) {
2099 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2100 		key = nilfs_btree_node_get_key(node, 0);
2101 	} else
2102 		key = nilfs_bmap_data_get_key(bmap, *bh);
2103 
2104 	/* on-disk format */
2105 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2106 	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2107 
2108 	return 0;
2109 }
2110 
2111 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2112 {
2113 	struct buffer_head *bh;
2114 	struct nilfs_btree *btree;
2115 	struct nilfs_btree_path *path;
2116 	__u64 ptr;
2117 	int ret;
2118 
2119 	btree = (struct nilfs_btree *)bmap;
2120 	path = nilfs_btree_alloc_path();
2121 	if (path == NULL)
2122 		return -ENOMEM;
2123 
2124 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2125 	if (ret < 0) {
2126 		WARN_ON(ret == -ENOENT);
2127 		goto out;
2128 	}
2129 	ret = nilfs_btree_get_block(btree, ptr, &bh);
2130 	if (ret < 0) {
2131 		WARN_ON(ret == -ENOENT);
2132 		goto out;
2133 	}
2134 
2135 	if (!buffer_dirty(bh))
2136 		nilfs_btnode_mark_dirty(bh);
2137 	brelse(bh);
2138 	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2139 		nilfs_bmap_set_dirty(&btree->bt_bmap);
2140 
2141  out:
2142 	nilfs_btree_free_path(path);
2143 	return ret;
2144 }
2145 
2146 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2147 	.bop_lookup		=	nilfs_btree_lookup,
2148 	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2149 	.bop_insert		=	nilfs_btree_insert,
2150 	.bop_delete		=	nilfs_btree_delete,
2151 	.bop_clear		=	NULL,
2152 
2153 	.bop_propagate		=	nilfs_btree_propagate,
2154 
2155 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2156 
2157 	.bop_assign		=	nilfs_btree_assign,
2158 	.bop_mark		=	nilfs_btree_mark,
2159 
2160 	.bop_last_key		=	nilfs_btree_last_key,
2161 	.bop_check_insert	=	NULL,
2162 	.bop_check_delete	=	nilfs_btree_check_delete,
2163 	.bop_gather_data	=	nilfs_btree_gather_data,
2164 };
2165 
2166 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2167 	.bop_lookup		=	NULL,
2168 	.bop_lookup_contig	=	NULL,
2169 	.bop_insert		=	NULL,
2170 	.bop_delete		=	NULL,
2171 	.bop_clear		=	NULL,
2172 
2173 	.bop_propagate		=	nilfs_btree_propagate_gc,
2174 
2175 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2176 
2177 	.bop_assign		=	nilfs_btree_assign_gc,
2178 	.bop_mark		=	NULL,
2179 
2180 	.bop_last_key		=	NULL,
2181 	.bop_check_insert	=	NULL,
2182 	.bop_check_delete	=	NULL,
2183 	.bop_gather_data	=	NULL,
2184 };
2185 
2186 int nilfs_btree_init(struct nilfs_bmap *bmap)
2187 {
2188 	bmap->b_ops = &nilfs_btree_ops;
2189 	return 0;
2190 }
2191 
2192 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2193 {
2194 	bmap->b_ops = &nilfs_btree_ops_gc;
2195 }
2196