xref: /openbmc/linux/fs/xfs/libxfs/xfs_btree.c (revision 110e6f26)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_inode.h"
27 #include "xfs_trans.h"
28 #include "xfs_inode_item.h"
29 #include "xfs_buf_item.h"
30 #include "xfs_btree.h"
31 #include "xfs_error.h"
32 #include "xfs_trace.h"
33 #include "xfs_cksum.h"
34 #include "xfs_alloc.h"
35 #include "xfs_log.h"
36 
37 /*
38  * Cursor allocation zone.
39  */
40 kmem_zone_t	*xfs_btree_cur_zone;
41 
42 /*
43  * Btree magic numbers.
44  */
45 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
46 	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 	  XFS_FIBT_MAGIC },
48 	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
49 	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
50 };
51 #define xfs_btree_magic(cur) \
52 	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
53 
54 
55 STATIC int				/* error (0 or EFSCORRUPTED) */
56 xfs_btree_check_lblock(
57 	struct xfs_btree_cur	*cur,	/* btree cursor */
58 	struct xfs_btree_block	*block,	/* btree long form block pointer */
59 	int			level,	/* level of the btree block */
60 	struct xfs_buf		*bp)	/* buffer for block, if any */
61 {
62 	int			lblock_ok = 1; /* block passes checks */
63 	struct xfs_mount	*mp;	/* file system mount point */
64 
65 	mp = cur->bc_mp;
66 
67 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
68 		lblock_ok = lblock_ok &&
69 			uuid_equal(&block->bb_u.l.bb_uuid,
70 				   &mp->m_sb.sb_meta_uuid) &&
71 			block->bb_u.l.bb_blkno == cpu_to_be64(
72 				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
73 	}
74 
75 	lblock_ok = lblock_ok &&
76 		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
77 		be16_to_cpu(block->bb_level) == level &&
78 		be16_to_cpu(block->bb_numrecs) <=
79 			cur->bc_ops->get_maxrecs(cur, level) &&
80 		block->bb_u.l.bb_leftsib &&
81 		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
82 		 XFS_FSB_SANITY_CHECK(mp,
83 			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
84 		block->bb_u.l.bb_rightsib &&
85 		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
86 		 XFS_FSB_SANITY_CHECK(mp,
87 			be64_to_cpu(block->bb_u.l.bb_rightsib)));
88 
89 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
90 			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
91 			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
92 		if (bp)
93 			trace_xfs_btree_corrupt(bp, _RET_IP_);
94 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
95 		return -EFSCORRUPTED;
96 	}
97 	return 0;
98 }
99 
100 STATIC int				/* error (0 or EFSCORRUPTED) */
101 xfs_btree_check_sblock(
102 	struct xfs_btree_cur	*cur,	/* btree cursor */
103 	struct xfs_btree_block	*block,	/* btree short form block pointer */
104 	int			level,	/* level of the btree block */
105 	struct xfs_buf		*bp)	/* buffer containing block */
106 {
107 	struct xfs_mount	*mp;	/* file system mount point */
108 	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */
109 	struct xfs_agf		*agf;	/* ag. freespace structure */
110 	xfs_agblock_t		agflen;	/* native ag. freespace length */
111 	int			sblock_ok = 1; /* block passes checks */
112 
113 	mp = cur->bc_mp;
114 	agbp = cur->bc_private.a.agbp;
115 	agf = XFS_BUF_TO_AGF(agbp);
116 	agflen = be32_to_cpu(agf->agf_length);
117 
118 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
119 		sblock_ok = sblock_ok &&
120 			uuid_equal(&block->bb_u.s.bb_uuid,
121 				   &mp->m_sb.sb_meta_uuid) &&
122 			block->bb_u.s.bb_blkno == cpu_to_be64(
123 				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
124 	}
125 
126 	sblock_ok = sblock_ok &&
127 		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
128 		be16_to_cpu(block->bb_level) == level &&
129 		be16_to_cpu(block->bb_numrecs) <=
130 			cur->bc_ops->get_maxrecs(cur, level) &&
131 		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
132 		 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
133 		block->bb_u.s.bb_leftsib &&
134 		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
135 		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
136 		block->bb_u.s.bb_rightsib;
137 
138 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
139 			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
140 			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
141 		if (bp)
142 			trace_xfs_btree_corrupt(bp, _RET_IP_);
143 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
144 		return -EFSCORRUPTED;
145 	}
146 	return 0;
147 }
148 
149 /*
150  * Debug routine: check that block header is ok.
151  */
152 int
153 xfs_btree_check_block(
154 	struct xfs_btree_cur	*cur,	/* btree cursor */
155 	struct xfs_btree_block	*block,	/* generic btree block pointer */
156 	int			level,	/* level of the btree block */
157 	struct xfs_buf		*bp)	/* buffer containing block, if any */
158 {
159 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
160 		return xfs_btree_check_lblock(cur, block, level, bp);
161 	else
162 		return xfs_btree_check_sblock(cur, block, level, bp);
163 }
164 
165 /*
166  * Check that (long) pointer is ok.
167  */
168 int					/* error (0 or EFSCORRUPTED) */
169 xfs_btree_check_lptr(
170 	struct xfs_btree_cur	*cur,	/* btree cursor */
171 	xfs_fsblock_t		bno,	/* btree block disk address */
172 	int			level)	/* btree block level */
173 {
174 	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
175 		level > 0 &&
176 		bno != NULLFSBLOCK &&
177 		XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
178 	return 0;
179 }
180 
181 #ifdef DEBUG
182 /*
183  * Check that (short) pointer is ok.
184  */
185 STATIC int				/* error (0 or EFSCORRUPTED) */
186 xfs_btree_check_sptr(
187 	struct xfs_btree_cur	*cur,	/* btree cursor */
188 	xfs_agblock_t		bno,	/* btree block disk address */
189 	int			level)	/* btree block level */
190 {
191 	xfs_agblock_t		agblocks = cur->bc_mp->m_sb.sb_agblocks;
192 
193 	XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
194 		level > 0 &&
195 		bno != NULLAGBLOCK &&
196 		bno != 0 &&
197 		bno < agblocks);
198 	return 0;
199 }
200 
201 /*
202  * Check that block ptr is ok.
203  */
204 STATIC int				/* error (0 or EFSCORRUPTED) */
205 xfs_btree_check_ptr(
206 	struct xfs_btree_cur	*cur,	/* btree cursor */
207 	union xfs_btree_ptr	*ptr,	/* btree block disk address */
208 	int			index,	/* offset from ptr to check */
209 	int			level)	/* btree block level */
210 {
211 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
212 		return xfs_btree_check_lptr(cur,
213 				be64_to_cpu((&ptr->l)[index]), level);
214 	} else {
215 		return xfs_btree_check_sptr(cur,
216 				be32_to_cpu((&ptr->s)[index]), level);
217 	}
218 }
219 #endif
220 
221 /*
222  * Calculate CRC on the whole btree block and stuff it into the
223  * long-form btree header.
224  *
225  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
226  * it into the buffer so recovery knows what the last modification was that made
227  * it to disk.
228  */
229 void
230 xfs_btree_lblock_calc_crc(
231 	struct xfs_buf		*bp)
232 {
233 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
234 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
235 
236 	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
237 		return;
238 	if (bip)
239 		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
240 	xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
241 }
242 
243 bool
244 xfs_btree_lblock_verify_crc(
245 	struct xfs_buf		*bp)
246 {
247 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
248 	struct xfs_mount	*mp = bp->b_target->bt_mount;
249 
250 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
251 		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
252 			return false;
253 		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
254 	}
255 
256 	return true;
257 }
258 
259 /*
260  * Calculate CRC on the whole btree block and stuff it into the
261  * short-form btree header.
262  *
263  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
264  * it into the buffer so recovery knows what the last modification was that made
265  * it to disk.
266  */
267 void
268 xfs_btree_sblock_calc_crc(
269 	struct xfs_buf		*bp)
270 {
271 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
272 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
273 
274 	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
275 		return;
276 	if (bip)
277 		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
278 	xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
279 }
280 
281 bool
282 xfs_btree_sblock_verify_crc(
283 	struct xfs_buf		*bp)
284 {
285 	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
286 	struct xfs_mount	*mp = bp->b_target->bt_mount;
287 
288 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
289 		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
290 			return false;
291 		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
292 	}
293 
294 	return true;
295 }
296 
297 static int
298 xfs_btree_free_block(
299 	struct xfs_btree_cur	*cur,
300 	struct xfs_buf		*bp)
301 {
302 	int			error;
303 
304 	error = cur->bc_ops->free_block(cur, bp);
305 	if (!error) {
306 		xfs_trans_binval(cur->bc_tp, bp);
307 		XFS_BTREE_STATS_INC(cur, free);
308 	}
309 	return error;
310 }
311 
312 /*
313  * Delete the btree cursor.
314  */
315 void
316 xfs_btree_del_cursor(
317 	xfs_btree_cur_t	*cur,		/* btree cursor */
318 	int		error)		/* del because of error */
319 {
320 	int		i;		/* btree level */
321 
322 	/*
323 	 * Clear the buffer pointers, and release the buffers.
324 	 * If we're doing this in the face of an error, we
325 	 * need to make sure to inspect all of the entries
326 	 * in the bc_bufs array for buffers to be unlocked.
327 	 * This is because some of the btree code works from
328 	 * level n down to 0, and if we get an error along
329 	 * the way we won't have initialized all the entries
330 	 * down to 0.
331 	 */
332 	for (i = 0; i < cur->bc_nlevels; i++) {
333 		if (cur->bc_bufs[i])
334 			xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
335 		else if (!error)
336 			break;
337 	}
338 	/*
339 	 * Can't free a bmap cursor without having dealt with the
340 	 * allocated indirect blocks' accounting.
341 	 */
342 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
343 	       cur->bc_private.b.allocated == 0);
344 	/*
345 	 * Free the cursor.
346 	 */
347 	kmem_zone_free(xfs_btree_cur_zone, cur);
348 }
349 
350 /*
351  * Duplicate the btree cursor.
352  * Allocate a new one, copy the record, re-get the buffers.
353  */
354 int					/* error */
355 xfs_btree_dup_cursor(
356 	xfs_btree_cur_t	*cur,		/* input cursor */
357 	xfs_btree_cur_t	**ncur)		/* output cursor */
358 {
359 	xfs_buf_t	*bp;		/* btree block's buffer pointer */
360 	int		error;		/* error return value */
361 	int		i;		/* level number of btree block */
362 	xfs_mount_t	*mp;		/* mount structure for filesystem */
363 	xfs_btree_cur_t	*new;		/* new cursor value */
364 	xfs_trans_t	*tp;		/* transaction pointer, can be NULL */
365 
366 	tp = cur->bc_tp;
367 	mp = cur->bc_mp;
368 
369 	/*
370 	 * Allocate a new cursor like the old one.
371 	 */
372 	new = cur->bc_ops->dup_cursor(cur);
373 
374 	/*
375 	 * Copy the record currently in the cursor.
376 	 */
377 	new->bc_rec = cur->bc_rec;
378 
379 	/*
380 	 * For each level current, re-get the buffer and copy the ptr value.
381 	 */
382 	for (i = 0; i < new->bc_nlevels; i++) {
383 		new->bc_ptrs[i] = cur->bc_ptrs[i];
384 		new->bc_ra[i] = cur->bc_ra[i];
385 		bp = cur->bc_bufs[i];
386 		if (bp) {
387 			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
388 						   XFS_BUF_ADDR(bp), mp->m_bsize,
389 						   0, &bp,
390 						   cur->bc_ops->buf_ops);
391 			if (error) {
392 				xfs_btree_del_cursor(new, error);
393 				*ncur = NULL;
394 				return error;
395 			}
396 		}
397 		new->bc_bufs[i] = bp;
398 	}
399 	*ncur = new;
400 	return 0;
401 }
402 
403 /*
404  * XFS btree block layout and addressing:
405  *
406  * There are two types of blocks in the btree: leaf and non-leaf blocks.
407  *
408  * The leaf record start with a header then followed by records containing
409  * the values.  A non-leaf block also starts with the same header, and
410  * then first contains lookup keys followed by an equal number of pointers
411  * to the btree blocks at the previous level.
412  *
413  *		+--------+-------+-------+-------+-------+-------+-------+
414  * Leaf:	| header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
415  *		+--------+-------+-------+-------+-------+-------+-------+
416  *
417  *		+--------+-------+-------+-------+-------+-------+-------+
418  * Non-Leaf:	| header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
419  *		+--------+-------+-------+-------+-------+-------+-------+
420  *
421  * The header is called struct xfs_btree_block for reasons better left unknown
422  * and comes in different versions for short (32bit) and long (64bit) block
423  * pointers.  The record and key structures are defined by the btree instances
424  * and opaque to the btree core.  The block pointers are simple disk endian
425  * integers, available in a short (32bit) and long (64bit) variant.
426  *
427  * The helpers below calculate the offset of a given record, key or pointer
428  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
429  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
430  * inside the btree block is done using indices starting at one, not zero!
431  */
432 
433 /*
434  * Return size of the btree block header for this btree instance.
435  */
436 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
437 {
438 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
439 		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
440 			return XFS_BTREE_LBLOCK_CRC_LEN;
441 		return XFS_BTREE_LBLOCK_LEN;
442 	}
443 	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
444 		return XFS_BTREE_SBLOCK_CRC_LEN;
445 	return XFS_BTREE_SBLOCK_LEN;
446 }
447 
448 /*
449  * Return size of btree block pointers for this btree instance.
450  */
451 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
452 {
453 	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
454 		sizeof(__be64) : sizeof(__be32);
455 }
456 
457 /*
458  * Calculate offset of the n-th record in a btree block.
459  */
460 STATIC size_t
461 xfs_btree_rec_offset(
462 	struct xfs_btree_cur	*cur,
463 	int			n)
464 {
465 	return xfs_btree_block_len(cur) +
466 		(n - 1) * cur->bc_ops->rec_len;
467 }
468 
469 /*
470  * Calculate offset of the n-th key in a btree block.
471  */
472 STATIC size_t
473 xfs_btree_key_offset(
474 	struct xfs_btree_cur	*cur,
475 	int			n)
476 {
477 	return xfs_btree_block_len(cur) +
478 		(n - 1) * cur->bc_ops->key_len;
479 }
480 
481 /*
482  * Calculate offset of the n-th block pointer in a btree block.
483  */
484 STATIC size_t
485 xfs_btree_ptr_offset(
486 	struct xfs_btree_cur	*cur,
487 	int			n,
488 	int			level)
489 {
490 	return xfs_btree_block_len(cur) +
491 		cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
492 		(n - 1) * xfs_btree_ptr_len(cur);
493 }
494 
495 /*
496  * Return a pointer to the n-th record in the btree block.
497  */
498 STATIC union xfs_btree_rec *
499 xfs_btree_rec_addr(
500 	struct xfs_btree_cur	*cur,
501 	int			n,
502 	struct xfs_btree_block	*block)
503 {
504 	return (union xfs_btree_rec *)
505 		((char *)block + xfs_btree_rec_offset(cur, n));
506 }
507 
508 /*
509  * Return a pointer to the n-th key in the btree block.
510  */
511 STATIC union xfs_btree_key *
512 xfs_btree_key_addr(
513 	struct xfs_btree_cur	*cur,
514 	int			n,
515 	struct xfs_btree_block	*block)
516 {
517 	return (union xfs_btree_key *)
518 		((char *)block + xfs_btree_key_offset(cur, n));
519 }
520 
521 /*
522  * Return a pointer to the n-th block pointer in the btree block.
523  */
524 STATIC union xfs_btree_ptr *
525 xfs_btree_ptr_addr(
526 	struct xfs_btree_cur	*cur,
527 	int			n,
528 	struct xfs_btree_block	*block)
529 {
530 	int			level = xfs_btree_get_level(block);
531 
532 	ASSERT(block->bb_level != 0);
533 
534 	return (union xfs_btree_ptr *)
535 		((char *)block + xfs_btree_ptr_offset(cur, n, level));
536 }
537 
538 /*
539  * Get the root block which is stored in the inode.
540  *
541  * For now this btree implementation assumes the btree root is always
542  * stored in the if_broot field of an inode fork.
543  */
544 STATIC struct xfs_btree_block *
545 xfs_btree_get_iroot(
546        struct xfs_btree_cur    *cur)
547 {
548        struct xfs_ifork        *ifp;
549 
550        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
551        return (struct xfs_btree_block *)ifp->if_broot;
552 }
553 
554 /*
555  * Retrieve the block pointer from the cursor at the given level.
556  * This may be an inode btree root or from a buffer.
557  */
558 STATIC struct xfs_btree_block *		/* generic btree block pointer */
559 xfs_btree_get_block(
560 	struct xfs_btree_cur	*cur,	/* btree cursor */
561 	int			level,	/* level in btree */
562 	struct xfs_buf		**bpp)	/* buffer containing the block */
563 {
564 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
565 	    (level == cur->bc_nlevels - 1)) {
566 		*bpp = NULL;
567 		return xfs_btree_get_iroot(cur);
568 	}
569 
570 	*bpp = cur->bc_bufs[level];
571 	return XFS_BUF_TO_BLOCK(*bpp);
572 }
573 
574 /*
575  * Get a buffer for the block, return it with no data read.
576  * Long-form addressing.
577  */
578 xfs_buf_t *				/* buffer for fsbno */
579 xfs_btree_get_bufl(
580 	xfs_mount_t	*mp,		/* file system mount point */
581 	xfs_trans_t	*tp,		/* transaction pointer */
582 	xfs_fsblock_t	fsbno,		/* file system block number */
583 	uint		lock)		/* lock flags for get_buf */
584 {
585 	xfs_daddr_t		d;		/* real disk block address */
586 
587 	ASSERT(fsbno != NULLFSBLOCK);
588 	d = XFS_FSB_TO_DADDR(mp, fsbno);
589 	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
590 }
591 
592 /*
593  * Get a buffer for the block, return it with no data read.
594  * Short-form addressing.
595  */
596 xfs_buf_t *				/* buffer for agno/agbno */
597 xfs_btree_get_bufs(
598 	xfs_mount_t	*mp,		/* file system mount point */
599 	xfs_trans_t	*tp,		/* transaction pointer */
600 	xfs_agnumber_t	agno,		/* allocation group number */
601 	xfs_agblock_t	agbno,		/* allocation group block number */
602 	uint		lock)		/* lock flags for get_buf */
603 {
604 	xfs_daddr_t		d;		/* real disk block address */
605 
606 	ASSERT(agno != NULLAGNUMBER);
607 	ASSERT(agbno != NULLAGBLOCK);
608 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
609 	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
610 }
611 
612 /*
613  * Check for the cursor referring to the last block at the given level.
614  */
615 int					/* 1=is last block, 0=not last block */
616 xfs_btree_islastblock(
617 	xfs_btree_cur_t		*cur,	/* btree cursor */
618 	int			level)	/* level to check */
619 {
620 	struct xfs_btree_block	*block;	/* generic btree block pointer */
621 	xfs_buf_t		*bp;	/* buffer containing block */
622 
623 	block = xfs_btree_get_block(cur, level, &bp);
624 	xfs_btree_check_block(cur, block, level, bp);
625 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
626 		return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
627 	else
628 		return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
629 }
630 
631 /*
632  * Change the cursor to point to the first record at the given level.
633  * Other levels are unaffected.
634  */
635 STATIC int				/* success=1, failure=0 */
636 xfs_btree_firstrec(
637 	xfs_btree_cur_t		*cur,	/* btree cursor */
638 	int			level)	/* level to change */
639 {
640 	struct xfs_btree_block	*block;	/* generic btree block pointer */
641 	xfs_buf_t		*bp;	/* buffer containing block */
642 
643 	/*
644 	 * Get the block pointer for this level.
645 	 */
646 	block = xfs_btree_get_block(cur, level, &bp);
647 	xfs_btree_check_block(cur, block, level, bp);
648 	/*
649 	 * It's empty, there is no such record.
650 	 */
651 	if (!block->bb_numrecs)
652 		return 0;
653 	/*
654 	 * Set the ptr value to 1, that's the first record/key.
655 	 */
656 	cur->bc_ptrs[level] = 1;
657 	return 1;
658 }
659 
660 /*
661  * Change the cursor to point to the last record in the current block
662  * at the given level.  Other levels are unaffected.
663  */
664 STATIC int				/* success=1, failure=0 */
665 xfs_btree_lastrec(
666 	xfs_btree_cur_t		*cur,	/* btree cursor */
667 	int			level)	/* level to change */
668 {
669 	struct xfs_btree_block	*block;	/* generic btree block pointer */
670 	xfs_buf_t		*bp;	/* buffer containing block */
671 
672 	/*
673 	 * Get the block pointer for this level.
674 	 */
675 	block = xfs_btree_get_block(cur, level, &bp);
676 	xfs_btree_check_block(cur, block, level, bp);
677 	/*
678 	 * It's empty, there is no such record.
679 	 */
680 	if (!block->bb_numrecs)
681 		return 0;
682 	/*
683 	 * Set the ptr value to numrecs, that's the last record/key.
684 	 */
685 	cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
686 	return 1;
687 }
688 
689 /*
690  * Compute first and last byte offsets for the fields given.
691  * Interprets the offsets table, which contains struct field offsets.
692  */
693 void
694 xfs_btree_offsets(
695 	__int64_t	fields,		/* bitmask of fields */
696 	const short	*offsets,	/* table of field offsets */
697 	int		nbits,		/* number of bits to inspect */
698 	int		*first,		/* output: first byte offset */
699 	int		*last)		/* output: last byte offset */
700 {
701 	int		i;		/* current bit number */
702 	__int64_t	imask;		/* mask for current bit number */
703 
704 	ASSERT(fields != 0);
705 	/*
706 	 * Find the lowest bit, so the first byte offset.
707 	 */
708 	for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
709 		if (imask & fields) {
710 			*first = offsets[i];
711 			break;
712 		}
713 	}
714 	/*
715 	 * Find the highest bit, so the last byte offset.
716 	 */
717 	for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
718 		if (imask & fields) {
719 			*last = offsets[i + 1] - 1;
720 			break;
721 		}
722 	}
723 }
724 
725 /*
726  * Get a buffer for the block, return it read in.
727  * Long-form addressing.
728  */
729 int
730 xfs_btree_read_bufl(
731 	struct xfs_mount	*mp,		/* file system mount point */
732 	struct xfs_trans	*tp,		/* transaction pointer */
733 	xfs_fsblock_t		fsbno,		/* file system block number */
734 	uint			lock,		/* lock flags for read_buf */
735 	struct xfs_buf		**bpp,		/* buffer for fsbno */
736 	int			refval,		/* ref count value for buffer */
737 	const struct xfs_buf_ops *ops)
738 {
739 	struct xfs_buf		*bp;		/* return value */
740 	xfs_daddr_t		d;		/* real disk block address */
741 	int			error;
742 
743 	ASSERT(fsbno != NULLFSBLOCK);
744 	d = XFS_FSB_TO_DADDR(mp, fsbno);
745 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
746 				   mp->m_bsize, lock, &bp, ops);
747 	if (error)
748 		return error;
749 	if (bp)
750 		xfs_buf_set_ref(bp, refval);
751 	*bpp = bp;
752 	return 0;
753 }
754 
755 /*
756  * Read-ahead the block, don't wait for it, don't return a buffer.
757  * Long-form addressing.
758  */
759 /* ARGSUSED */
760 void
761 xfs_btree_reada_bufl(
762 	struct xfs_mount	*mp,		/* file system mount point */
763 	xfs_fsblock_t		fsbno,		/* file system block number */
764 	xfs_extlen_t		count,		/* count of filesystem blocks */
765 	const struct xfs_buf_ops *ops)
766 {
767 	xfs_daddr_t		d;
768 
769 	ASSERT(fsbno != NULLFSBLOCK);
770 	d = XFS_FSB_TO_DADDR(mp, fsbno);
771 	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
772 }
773 
774 /*
775  * Read-ahead the block, don't wait for it, don't return a buffer.
776  * Short-form addressing.
777  */
778 /* ARGSUSED */
779 void
780 xfs_btree_reada_bufs(
781 	struct xfs_mount	*mp,		/* file system mount point */
782 	xfs_agnumber_t		agno,		/* allocation group number */
783 	xfs_agblock_t		agbno,		/* allocation group block number */
784 	xfs_extlen_t		count,		/* count of filesystem blocks */
785 	const struct xfs_buf_ops *ops)
786 {
787 	xfs_daddr_t		d;
788 
789 	ASSERT(agno != NULLAGNUMBER);
790 	ASSERT(agbno != NULLAGBLOCK);
791 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
792 	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
793 }
794 
795 STATIC int
796 xfs_btree_readahead_lblock(
797 	struct xfs_btree_cur	*cur,
798 	int			lr,
799 	struct xfs_btree_block	*block)
800 {
801 	int			rval = 0;
802 	xfs_fsblock_t		left = be64_to_cpu(block->bb_u.l.bb_leftsib);
803 	xfs_fsblock_t		right = be64_to_cpu(block->bb_u.l.bb_rightsib);
804 
805 	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
806 		xfs_btree_reada_bufl(cur->bc_mp, left, 1,
807 				     cur->bc_ops->buf_ops);
808 		rval++;
809 	}
810 
811 	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
812 		xfs_btree_reada_bufl(cur->bc_mp, right, 1,
813 				     cur->bc_ops->buf_ops);
814 		rval++;
815 	}
816 
817 	return rval;
818 }
819 
820 STATIC int
821 xfs_btree_readahead_sblock(
822 	struct xfs_btree_cur	*cur,
823 	int			lr,
824 	struct xfs_btree_block *block)
825 {
826 	int			rval = 0;
827 	xfs_agblock_t		left = be32_to_cpu(block->bb_u.s.bb_leftsib);
828 	xfs_agblock_t		right = be32_to_cpu(block->bb_u.s.bb_rightsib);
829 
830 
831 	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
832 		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
833 				     left, 1, cur->bc_ops->buf_ops);
834 		rval++;
835 	}
836 
837 	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
838 		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
839 				     right, 1, cur->bc_ops->buf_ops);
840 		rval++;
841 	}
842 
843 	return rval;
844 }
845 
846 /*
847  * Read-ahead btree blocks, at the given level.
848  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
849  */
850 STATIC int
851 xfs_btree_readahead(
852 	struct xfs_btree_cur	*cur,		/* btree cursor */
853 	int			lev,		/* level in btree */
854 	int			lr)		/* left/right bits */
855 {
856 	struct xfs_btree_block	*block;
857 
858 	/*
859 	 * No readahead needed if we are at the root level and the
860 	 * btree root is stored in the inode.
861 	 */
862 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
863 	    (lev == cur->bc_nlevels - 1))
864 		return 0;
865 
866 	if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
867 		return 0;
868 
869 	cur->bc_ra[lev] |= lr;
870 	block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
871 
872 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
873 		return xfs_btree_readahead_lblock(cur, lr, block);
874 	return xfs_btree_readahead_sblock(cur, lr, block);
875 }
876 
877 STATIC xfs_daddr_t
878 xfs_btree_ptr_to_daddr(
879 	struct xfs_btree_cur	*cur,
880 	union xfs_btree_ptr	*ptr)
881 {
882 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
883 		ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
884 
885 		return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
886 	} else {
887 		ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
888 		ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
889 
890 		return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
891 					be32_to_cpu(ptr->s));
892 	}
893 }
894 
895 /*
896  * Readahead @count btree blocks at the given @ptr location.
897  *
898  * We don't need to care about long or short form btrees here as we have a
899  * method of converting the ptr directly to a daddr available to us.
900  */
901 STATIC void
902 xfs_btree_readahead_ptr(
903 	struct xfs_btree_cur	*cur,
904 	union xfs_btree_ptr	*ptr,
905 	xfs_extlen_t		count)
906 {
907 	xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
908 			  xfs_btree_ptr_to_daddr(cur, ptr),
909 			  cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
910 }
911 
912 /*
913  * Set the buffer for level "lev" in the cursor to bp, releasing
914  * any previous buffer.
915  */
916 STATIC void
917 xfs_btree_setbuf(
918 	xfs_btree_cur_t		*cur,	/* btree cursor */
919 	int			lev,	/* level in btree */
920 	xfs_buf_t		*bp)	/* new buffer to set */
921 {
922 	struct xfs_btree_block	*b;	/* btree block */
923 
924 	if (cur->bc_bufs[lev])
925 		xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
926 	cur->bc_bufs[lev] = bp;
927 	cur->bc_ra[lev] = 0;
928 
929 	b = XFS_BUF_TO_BLOCK(bp);
930 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
931 		if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
932 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
933 		if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
934 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
935 	} else {
936 		if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
937 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
938 		if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
939 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
940 	}
941 }
942 
943 STATIC int
944 xfs_btree_ptr_is_null(
945 	struct xfs_btree_cur	*cur,
946 	union xfs_btree_ptr	*ptr)
947 {
948 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
949 		return ptr->l == cpu_to_be64(NULLFSBLOCK);
950 	else
951 		return ptr->s == cpu_to_be32(NULLAGBLOCK);
952 }
953 
954 STATIC void
955 xfs_btree_set_ptr_null(
956 	struct xfs_btree_cur	*cur,
957 	union xfs_btree_ptr	*ptr)
958 {
959 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 		ptr->l = cpu_to_be64(NULLFSBLOCK);
961 	else
962 		ptr->s = cpu_to_be32(NULLAGBLOCK);
963 }
964 
965 /*
966  * Get/set/init sibling pointers
967  */
968 STATIC void
969 xfs_btree_get_sibling(
970 	struct xfs_btree_cur	*cur,
971 	struct xfs_btree_block	*block,
972 	union xfs_btree_ptr	*ptr,
973 	int			lr)
974 {
975 	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
976 
977 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
978 		if (lr == XFS_BB_RIGHTSIB)
979 			ptr->l = block->bb_u.l.bb_rightsib;
980 		else
981 			ptr->l = block->bb_u.l.bb_leftsib;
982 	} else {
983 		if (lr == XFS_BB_RIGHTSIB)
984 			ptr->s = block->bb_u.s.bb_rightsib;
985 		else
986 			ptr->s = block->bb_u.s.bb_leftsib;
987 	}
988 }
989 
990 STATIC void
991 xfs_btree_set_sibling(
992 	struct xfs_btree_cur	*cur,
993 	struct xfs_btree_block	*block,
994 	union xfs_btree_ptr	*ptr,
995 	int			lr)
996 {
997 	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
998 
999 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1000 		if (lr == XFS_BB_RIGHTSIB)
1001 			block->bb_u.l.bb_rightsib = ptr->l;
1002 		else
1003 			block->bb_u.l.bb_leftsib = ptr->l;
1004 	} else {
1005 		if (lr == XFS_BB_RIGHTSIB)
1006 			block->bb_u.s.bb_rightsib = ptr->s;
1007 		else
1008 			block->bb_u.s.bb_leftsib = ptr->s;
1009 	}
1010 }
1011 
1012 void
1013 xfs_btree_init_block_int(
1014 	struct xfs_mount	*mp,
1015 	struct xfs_btree_block	*buf,
1016 	xfs_daddr_t		blkno,
1017 	__u32			magic,
1018 	__u16			level,
1019 	__u16			numrecs,
1020 	__u64			owner,
1021 	unsigned int		flags)
1022 {
1023 	buf->bb_magic = cpu_to_be32(magic);
1024 	buf->bb_level = cpu_to_be16(level);
1025 	buf->bb_numrecs = cpu_to_be16(numrecs);
1026 
1027 	if (flags & XFS_BTREE_LONG_PTRS) {
1028 		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1029 		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1030 		if (flags & XFS_BTREE_CRC_BLOCKS) {
1031 			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1032 			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1033 			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1034 			buf->bb_u.l.bb_pad = 0;
1035 			buf->bb_u.l.bb_lsn = 0;
1036 		}
1037 	} else {
1038 		/* owner is a 32 bit value on short blocks */
1039 		__u32 __owner = (__u32)owner;
1040 
1041 		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1042 		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1043 		if (flags & XFS_BTREE_CRC_BLOCKS) {
1044 			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1045 			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1046 			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1047 			buf->bb_u.s.bb_lsn = 0;
1048 		}
1049 	}
1050 }
1051 
1052 void
1053 xfs_btree_init_block(
1054 	struct xfs_mount *mp,
1055 	struct xfs_buf	*bp,
1056 	__u32		magic,
1057 	__u16		level,
1058 	__u16		numrecs,
1059 	__u64		owner,
1060 	unsigned int	flags)
1061 {
1062 	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1063 				 magic, level, numrecs, owner, flags);
1064 }
1065 
1066 STATIC void
1067 xfs_btree_init_block_cur(
1068 	struct xfs_btree_cur	*cur,
1069 	struct xfs_buf		*bp,
1070 	int			level,
1071 	int			numrecs)
1072 {
1073 	__u64 owner;
1074 
1075 	/*
1076 	 * we can pull the owner from the cursor right now as the different
1077 	 * owners align directly with the pointer size of the btree. This may
1078 	 * change in future, but is safe for current users of the generic btree
1079 	 * code.
1080 	 */
1081 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1082 		owner = cur->bc_private.b.ip->i_ino;
1083 	else
1084 		owner = cur->bc_private.a.agno;
1085 
1086 	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1087 				 xfs_btree_magic(cur), level, numrecs,
1088 				 owner, cur->bc_flags);
1089 }
1090 
1091 /*
1092  * Return true if ptr is the last record in the btree and
1093  * we need to track updates to this record.  The decision
1094  * will be further refined in the update_lastrec method.
1095  */
1096 STATIC int
1097 xfs_btree_is_lastrec(
1098 	struct xfs_btree_cur	*cur,
1099 	struct xfs_btree_block	*block,
1100 	int			level)
1101 {
1102 	union xfs_btree_ptr	ptr;
1103 
1104 	if (level > 0)
1105 		return 0;
1106 	if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1107 		return 0;
1108 
1109 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1110 	if (!xfs_btree_ptr_is_null(cur, &ptr))
1111 		return 0;
1112 	return 1;
1113 }
1114 
1115 STATIC void
1116 xfs_btree_buf_to_ptr(
1117 	struct xfs_btree_cur	*cur,
1118 	struct xfs_buf		*bp,
1119 	union xfs_btree_ptr	*ptr)
1120 {
1121 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1122 		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1123 					XFS_BUF_ADDR(bp)));
1124 	else {
1125 		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1126 					XFS_BUF_ADDR(bp)));
1127 	}
1128 }
1129 
1130 STATIC void
1131 xfs_btree_set_refs(
1132 	struct xfs_btree_cur	*cur,
1133 	struct xfs_buf		*bp)
1134 {
1135 	switch (cur->bc_btnum) {
1136 	case XFS_BTNUM_BNO:
1137 	case XFS_BTNUM_CNT:
1138 		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1139 		break;
1140 	case XFS_BTNUM_INO:
1141 	case XFS_BTNUM_FINO:
1142 		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1143 		break;
1144 	case XFS_BTNUM_BMAP:
1145 		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1146 		break;
1147 	default:
1148 		ASSERT(0);
1149 	}
1150 }
1151 
1152 STATIC int
1153 xfs_btree_get_buf_block(
1154 	struct xfs_btree_cur	*cur,
1155 	union xfs_btree_ptr	*ptr,
1156 	int			flags,
1157 	struct xfs_btree_block	**block,
1158 	struct xfs_buf		**bpp)
1159 {
1160 	struct xfs_mount	*mp = cur->bc_mp;
1161 	xfs_daddr_t		d;
1162 
1163 	/* need to sort out how callers deal with failures first */
1164 	ASSERT(!(flags & XBF_TRYLOCK));
1165 
1166 	d = xfs_btree_ptr_to_daddr(cur, ptr);
1167 	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1168 				 mp->m_bsize, flags);
1169 
1170 	if (!*bpp)
1171 		return -ENOMEM;
1172 
1173 	(*bpp)->b_ops = cur->bc_ops->buf_ops;
1174 	*block = XFS_BUF_TO_BLOCK(*bpp);
1175 	return 0;
1176 }
1177 
1178 /*
1179  * Read in the buffer at the given ptr and return the buffer and
1180  * the block pointer within the buffer.
1181  */
1182 STATIC int
1183 xfs_btree_read_buf_block(
1184 	struct xfs_btree_cur	*cur,
1185 	union xfs_btree_ptr	*ptr,
1186 	int			flags,
1187 	struct xfs_btree_block	**block,
1188 	struct xfs_buf		**bpp)
1189 {
1190 	struct xfs_mount	*mp = cur->bc_mp;
1191 	xfs_daddr_t		d;
1192 	int			error;
1193 
1194 	/* need to sort out how callers deal with failures first */
1195 	ASSERT(!(flags & XBF_TRYLOCK));
1196 
1197 	d = xfs_btree_ptr_to_daddr(cur, ptr);
1198 	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1199 				   mp->m_bsize, flags, bpp,
1200 				   cur->bc_ops->buf_ops);
1201 	if (error)
1202 		return error;
1203 
1204 	xfs_btree_set_refs(cur, *bpp);
1205 	*block = XFS_BUF_TO_BLOCK(*bpp);
1206 	return 0;
1207 }
1208 
1209 /*
1210  * Copy keys from one btree block to another.
1211  */
1212 STATIC void
1213 xfs_btree_copy_keys(
1214 	struct xfs_btree_cur	*cur,
1215 	union xfs_btree_key	*dst_key,
1216 	union xfs_btree_key	*src_key,
1217 	int			numkeys)
1218 {
1219 	ASSERT(numkeys >= 0);
1220 	memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1221 }
1222 
1223 /*
1224  * Copy records from one btree block to another.
1225  */
1226 STATIC void
1227 xfs_btree_copy_recs(
1228 	struct xfs_btree_cur	*cur,
1229 	union xfs_btree_rec	*dst_rec,
1230 	union xfs_btree_rec	*src_rec,
1231 	int			numrecs)
1232 {
1233 	ASSERT(numrecs >= 0);
1234 	memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1235 }
1236 
1237 /*
1238  * Copy block pointers from one btree block to another.
1239  */
1240 STATIC void
1241 xfs_btree_copy_ptrs(
1242 	struct xfs_btree_cur	*cur,
1243 	union xfs_btree_ptr	*dst_ptr,
1244 	union xfs_btree_ptr	*src_ptr,
1245 	int			numptrs)
1246 {
1247 	ASSERT(numptrs >= 0);
1248 	memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1249 }
1250 
1251 /*
1252  * Shift keys one index left/right inside a single btree block.
1253  */
1254 STATIC void
1255 xfs_btree_shift_keys(
1256 	struct xfs_btree_cur	*cur,
1257 	union xfs_btree_key	*key,
1258 	int			dir,
1259 	int			numkeys)
1260 {
1261 	char			*dst_key;
1262 
1263 	ASSERT(numkeys >= 0);
1264 	ASSERT(dir == 1 || dir == -1);
1265 
1266 	dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1267 	memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1268 }
1269 
1270 /*
1271  * Shift records one index left/right inside a single btree block.
1272  */
1273 STATIC void
1274 xfs_btree_shift_recs(
1275 	struct xfs_btree_cur	*cur,
1276 	union xfs_btree_rec	*rec,
1277 	int			dir,
1278 	int			numrecs)
1279 {
1280 	char			*dst_rec;
1281 
1282 	ASSERT(numrecs >= 0);
1283 	ASSERT(dir == 1 || dir == -1);
1284 
1285 	dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1286 	memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1287 }
1288 
1289 /*
1290  * Shift block pointers one index left/right inside a single btree block.
1291  */
1292 STATIC void
1293 xfs_btree_shift_ptrs(
1294 	struct xfs_btree_cur	*cur,
1295 	union xfs_btree_ptr	*ptr,
1296 	int			dir,
1297 	int			numptrs)
1298 {
1299 	char			*dst_ptr;
1300 
1301 	ASSERT(numptrs >= 0);
1302 	ASSERT(dir == 1 || dir == -1);
1303 
1304 	dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1305 	memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1306 }
1307 
1308 /*
1309  * Log key values from the btree block.
1310  */
1311 STATIC void
1312 xfs_btree_log_keys(
1313 	struct xfs_btree_cur	*cur,
1314 	struct xfs_buf		*bp,
1315 	int			first,
1316 	int			last)
1317 {
1318 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1319 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1320 
1321 	if (bp) {
1322 		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1323 		xfs_trans_log_buf(cur->bc_tp, bp,
1324 				  xfs_btree_key_offset(cur, first),
1325 				  xfs_btree_key_offset(cur, last + 1) - 1);
1326 	} else {
1327 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1328 				xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1329 	}
1330 
1331 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1332 }
1333 
1334 /*
1335  * Log record values from the btree block.
1336  */
1337 void
1338 xfs_btree_log_recs(
1339 	struct xfs_btree_cur	*cur,
1340 	struct xfs_buf		*bp,
1341 	int			first,
1342 	int			last)
1343 {
1344 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1345 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1346 
1347 	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1348 	xfs_trans_log_buf(cur->bc_tp, bp,
1349 			  xfs_btree_rec_offset(cur, first),
1350 			  xfs_btree_rec_offset(cur, last + 1) - 1);
1351 
1352 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1353 }
1354 
1355 /*
1356  * Log block pointer fields from a btree block (nonleaf).
1357  */
1358 STATIC void
1359 xfs_btree_log_ptrs(
1360 	struct xfs_btree_cur	*cur,	/* btree cursor */
1361 	struct xfs_buf		*bp,	/* buffer containing btree block */
1362 	int			first,	/* index of first pointer to log */
1363 	int			last)	/* index of last pointer to log */
1364 {
1365 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1366 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1367 
1368 	if (bp) {
1369 		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
1370 		int			level = xfs_btree_get_level(block);
1371 
1372 		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1373 		xfs_trans_log_buf(cur->bc_tp, bp,
1374 				xfs_btree_ptr_offset(cur, first, level),
1375 				xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1376 	} else {
1377 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1378 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1379 	}
1380 
1381 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382 }
1383 
1384 /*
1385  * Log fields from a btree block header.
1386  */
1387 void
1388 xfs_btree_log_block(
1389 	struct xfs_btree_cur	*cur,	/* btree cursor */
1390 	struct xfs_buf		*bp,	/* buffer containing btree block */
1391 	int			fields)	/* mask of fields: XFS_BB_... */
1392 {
1393 	int			first;	/* first byte offset logged */
1394 	int			last;	/* last byte offset logged */
1395 	static const short	soffsets[] = {	/* table of offsets (short) */
1396 		offsetof(struct xfs_btree_block, bb_magic),
1397 		offsetof(struct xfs_btree_block, bb_level),
1398 		offsetof(struct xfs_btree_block, bb_numrecs),
1399 		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1400 		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1401 		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1402 		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1403 		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1404 		offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1405 		offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1406 		XFS_BTREE_SBLOCK_CRC_LEN
1407 	};
1408 	static const short	loffsets[] = {	/* table of offsets (long) */
1409 		offsetof(struct xfs_btree_block, bb_magic),
1410 		offsetof(struct xfs_btree_block, bb_level),
1411 		offsetof(struct xfs_btree_block, bb_numrecs),
1412 		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1413 		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1414 		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1415 		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1416 		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1417 		offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1418 		offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1419 		offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1420 		XFS_BTREE_LBLOCK_CRC_LEN
1421 	};
1422 
1423 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1424 	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1425 
1426 	if (bp) {
1427 		int nbits;
1428 
1429 		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1430 			/*
1431 			 * We don't log the CRC when updating a btree
1432 			 * block but instead recreate it during log
1433 			 * recovery.  As the log buffers have checksums
1434 			 * of their own this is safe and avoids logging a crc
1435 			 * update in a lot of places.
1436 			 */
1437 			if (fields == XFS_BB_ALL_BITS)
1438 				fields = XFS_BB_ALL_BITS_CRC;
1439 			nbits = XFS_BB_NUM_BITS_CRC;
1440 		} else {
1441 			nbits = XFS_BB_NUM_BITS;
1442 		}
1443 		xfs_btree_offsets(fields,
1444 				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1445 					loffsets : soffsets,
1446 				  nbits, &first, &last);
1447 		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1448 		xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1449 	} else {
1450 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1451 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1452 	}
1453 
1454 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1455 }
1456 
1457 /*
1458  * Increment cursor by one record at the level.
1459  * For nonzero levels the leaf-ward information is untouched.
1460  */
1461 int						/* error */
1462 xfs_btree_increment(
1463 	struct xfs_btree_cur	*cur,
1464 	int			level,
1465 	int			*stat)		/* success/failure */
1466 {
1467 	struct xfs_btree_block	*block;
1468 	union xfs_btree_ptr	ptr;
1469 	struct xfs_buf		*bp;
1470 	int			error;		/* error return value */
1471 	int			lev;
1472 
1473 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1474 	XFS_BTREE_TRACE_ARGI(cur, level);
1475 
1476 	ASSERT(level < cur->bc_nlevels);
1477 
1478 	/* Read-ahead to the right at this level. */
1479 	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1480 
1481 	/* Get a pointer to the btree block. */
1482 	block = xfs_btree_get_block(cur, level, &bp);
1483 
1484 #ifdef DEBUG
1485 	error = xfs_btree_check_block(cur, block, level, bp);
1486 	if (error)
1487 		goto error0;
1488 #endif
1489 
1490 	/* We're done if we remain in the block after the increment. */
1491 	if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1492 		goto out1;
1493 
1494 	/* Fail if we just went off the right edge of the tree. */
1495 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1496 	if (xfs_btree_ptr_is_null(cur, &ptr))
1497 		goto out0;
1498 
1499 	XFS_BTREE_STATS_INC(cur, increment);
1500 
1501 	/*
1502 	 * March up the tree incrementing pointers.
1503 	 * Stop when we don't go off the right edge of a block.
1504 	 */
1505 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1506 		block = xfs_btree_get_block(cur, lev, &bp);
1507 
1508 #ifdef DEBUG
1509 		error = xfs_btree_check_block(cur, block, lev, bp);
1510 		if (error)
1511 			goto error0;
1512 #endif
1513 
1514 		if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1515 			break;
1516 
1517 		/* Read-ahead the right block for the next loop. */
1518 		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1519 	}
1520 
1521 	/*
1522 	 * If we went off the root then we are either seriously
1523 	 * confused or have the tree root in an inode.
1524 	 */
1525 	if (lev == cur->bc_nlevels) {
1526 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1527 			goto out0;
1528 		ASSERT(0);
1529 		error = -EFSCORRUPTED;
1530 		goto error0;
1531 	}
1532 	ASSERT(lev < cur->bc_nlevels);
1533 
1534 	/*
1535 	 * Now walk back down the tree, fixing up the cursor's buffer
1536 	 * pointers and key numbers.
1537 	 */
1538 	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1539 		union xfs_btree_ptr	*ptrp;
1540 
1541 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1542 		--lev;
1543 		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1544 		if (error)
1545 			goto error0;
1546 
1547 		xfs_btree_setbuf(cur, lev, bp);
1548 		cur->bc_ptrs[lev] = 1;
1549 	}
1550 out1:
1551 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1552 	*stat = 1;
1553 	return 0;
1554 
1555 out0:
1556 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1557 	*stat = 0;
1558 	return 0;
1559 
1560 error0:
1561 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1562 	return error;
1563 }
1564 
1565 /*
1566  * Decrement cursor by one record at the level.
1567  * For nonzero levels the leaf-ward information is untouched.
1568  */
1569 int						/* error */
1570 xfs_btree_decrement(
1571 	struct xfs_btree_cur	*cur,
1572 	int			level,
1573 	int			*stat)		/* success/failure */
1574 {
1575 	struct xfs_btree_block	*block;
1576 	xfs_buf_t		*bp;
1577 	int			error;		/* error return value */
1578 	int			lev;
1579 	union xfs_btree_ptr	ptr;
1580 
1581 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1582 	XFS_BTREE_TRACE_ARGI(cur, level);
1583 
1584 	ASSERT(level < cur->bc_nlevels);
1585 
1586 	/* Read-ahead to the left at this level. */
1587 	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1588 
1589 	/* We're done if we remain in the block after the decrement. */
1590 	if (--cur->bc_ptrs[level] > 0)
1591 		goto out1;
1592 
1593 	/* Get a pointer to the btree block. */
1594 	block = xfs_btree_get_block(cur, level, &bp);
1595 
1596 #ifdef DEBUG
1597 	error = xfs_btree_check_block(cur, block, level, bp);
1598 	if (error)
1599 		goto error0;
1600 #endif
1601 
1602 	/* Fail if we just went off the left edge of the tree. */
1603 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1604 	if (xfs_btree_ptr_is_null(cur, &ptr))
1605 		goto out0;
1606 
1607 	XFS_BTREE_STATS_INC(cur, decrement);
1608 
1609 	/*
1610 	 * March up the tree decrementing pointers.
1611 	 * Stop when we don't go off the left edge of a block.
1612 	 */
1613 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1614 		if (--cur->bc_ptrs[lev] > 0)
1615 			break;
1616 		/* Read-ahead the left block for the next loop. */
1617 		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1618 	}
1619 
1620 	/*
1621 	 * If we went off the root then we are seriously confused.
1622 	 * or the root of the tree is in an inode.
1623 	 */
1624 	if (lev == cur->bc_nlevels) {
1625 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1626 			goto out0;
1627 		ASSERT(0);
1628 		error = -EFSCORRUPTED;
1629 		goto error0;
1630 	}
1631 	ASSERT(lev < cur->bc_nlevels);
1632 
1633 	/*
1634 	 * Now walk back down the tree, fixing up the cursor's buffer
1635 	 * pointers and key numbers.
1636 	 */
1637 	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1638 		union xfs_btree_ptr	*ptrp;
1639 
1640 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1641 		--lev;
1642 		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1643 		if (error)
1644 			goto error0;
1645 		xfs_btree_setbuf(cur, lev, bp);
1646 		cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1647 	}
1648 out1:
1649 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1650 	*stat = 1;
1651 	return 0;
1652 
1653 out0:
1654 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1655 	*stat = 0;
1656 	return 0;
1657 
1658 error0:
1659 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1660 	return error;
1661 }
1662 
1663 STATIC int
1664 xfs_btree_lookup_get_block(
1665 	struct xfs_btree_cur	*cur,	/* btree cursor */
1666 	int			level,	/* level in the btree */
1667 	union xfs_btree_ptr	*pp,	/* ptr to btree block */
1668 	struct xfs_btree_block	**blkp) /* return btree block */
1669 {
1670 	struct xfs_buf		*bp;	/* buffer pointer for btree block */
1671 	int			error = 0;
1672 
1673 	/* special case the root block if in an inode */
1674 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1675 	    (level == cur->bc_nlevels - 1)) {
1676 		*blkp = xfs_btree_get_iroot(cur);
1677 		return 0;
1678 	}
1679 
1680 	/*
1681 	 * If the old buffer at this level for the disk address we are
1682 	 * looking for re-use it.
1683 	 *
1684 	 * Otherwise throw it away and get a new one.
1685 	 */
1686 	bp = cur->bc_bufs[level];
1687 	if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1688 		*blkp = XFS_BUF_TO_BLOCK(bp);
1689 		return 0;
1690 	}
1691 
1692 	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1693 	if (error)
1694 		return error;
1695 
1696 	xfs_btree_setbuf(cur, level, bp);
1697 	return 0;
1698 }
1699 
1700 /*
1701  * Get current search key.  For level 0 we don't actually have a key
1702  * structure so we make one up from the record.  For all other levels
1703  * we just return the right key.
1704  */
1705 STATIC union xfs_btree_key *
1706 xfs_lookup_get_search_key(
1707 	struct xfs_btree_cur	*cur,
1708 	int			level,
1709 	int			keyno,
1710 	struct xfs_btree_block	*block,
1711 	union xfs_btree_key	*kp)
1712 {
1713 	if (level == 0) {
1714 		cur->bc_ops->init_key_from_rec(kp,
1715 				xfs_btree_rec_addr(cur, keyno, block));
1716 		return kp;
1717 	}
1718 
1719 	return xfs_btree_key_addr(cur, keyno, block);
1720 }
1721 
1722 /*
1723  * Lookup the record.  The cursor is made to point to it, based on dir.
1724  * stat is set to 0 if can't find any such record, 1 for success.
1725  */
1726 int					/* error */
1727 xfs_btree_lookup(
1728 	struct xfs_btree_cur	*cur,	/* btree cursor */
1729 	xfs_lookup_t		dir,	/* <=, ==, or >= */
1730 	int			*stat)	/* success/failure */
1731 {
1732 	struct xfs_btree_block	*block;	/* current btree block */
1733 	__int64_t		diff;	/* difference for the current key */
1734 	int			error;	/* error return value */
1735 	int			keyno;	/* current key number */
1736 	int			level;	/* level in the btree */
1737 	union xfs_btree_ptr	*pp;	/* ptr to btree block */
1738 	union xfs_btree_ptr	ptr;	/* ptr to btree block */
1739 
1740 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1741 	XFS_BTREE_TRACE_ARGI(cur, dir);
1742 
1743 	XFS_BTREE_STATS_INC(cur, lookup);
1744 
1745 	block = NULL;
1746 	keyno = 0;
1747 
1748 	/* initialise start pointer from cursor */
1749 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1750 	pp = &ptr;
1751 
1752 	/*
1753 	 * Iterate over each level in the btree, starting at the root.
1754 	 * For each level above the leaves, find the key we need, based
1755 	 * on the lookup record, then follow the corresponding block
1756 	 * pointer down to the next level.
1757 	 */
1758 	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1759 		/* Get the block we need to do the lookup on. */
1760 		error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1761 		if (error)
1762 			goto error0;
1763 
1764 		if (diff == 0) {
1765 			/*
1766 			 * If we already had a key match at a higher level, we
1767 			 * know we need to use the first entry in this block.
1768 			 */
1769 			keyno = 1;
1770 		} else {
1771 			/* Otherwise search this block. Do a binary search. */
1772 
1773 			int	high;	/* high entry number */
1774 			int	low;	/* low entry number */
1775 
1776 			/* Set low and high entry numbers, 1-based. */
1777 			low = 1;
1778 			high = xfs_btree_get_numrecs(block);
1779 			if (!high) {
1780 				/* Block is empty, must be an empty leaf. */
1781 				ASSERT(level == 0 && cur->bc_nlevels == 1);
1782 
1783 				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1784 				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1785 				*stat = 0;
1786 				return 0;
1787 			}
1788 
1789 			/* Binary search the block. */
1790 			while (low <= high) {
1791 				union xfs_btree_key	key;
1792 				union xfs_btree_key	*kp;
1793 
1794 				XFS_BTREE_STATS_INC(cur, compare);
1795 
1796 				/* keyno is average of low and high. */
1797 				keyno = (low + high) >> 1;
1798 
1799 				/* Get current search key */
1800 				kp = xfs_lookup_get_search_key(cur, level,
1801 						keyno, block, &key);
1802 
1803 				/*
1804 				 * Compute difference to get next direction:
1805 				 *  - less than, move right
1806 				 *  - greater than, move left
1807 				 *  - equal, we're done
1808 				 */
1809 				diff = cur->bc_ops->key_diff(cur, kp);
1810 				if (diff < 0)
1811 					low = keyno + 1;
1812 				else if (diff > 0)
1813 					high = keyno - 1;
1814 				else
1815 					break;
1816 			}
1817 		}
1818 
1819 		/*
1820 		 * If there are more levels, set up for the next level
1821 		 * by getting the block number and filling in the cursor.
1822 		 */
1823 		if (level > 0) {
1824 			/*
1825 			 * If we moved left, need the previous key number,
1826 			 * unless there isn't one.
1827 			 */
1828 			if (diff > 0 && --keyno < 1)
1829 				keyno = 1;
1830 			pp = xfs_btree_ptr_addr(cur, keyno, block);
1831 
1832 #ifdef DEBUG
1833 			error = xfs_btree_check_ptr(cur, pp, 0, level);
1834 			if (error)
1835 				goto error0;
1836 #endif
1837 			cur->bc_ptrs[level] = keyno;
1838 		}
1839 	}
1840 
1841 	/* Done with the search. See if we need to adjust the results. */
1842 	if (dir != XFS_LOOKUP_LE && diff < 0) {
1843 		keyno++;
1844 		/*
1845 		 * If ge search and we went off the end of the block, but it's
1846 		 * not the last block, we're in the wrong block.
1847 		 */
1848 		xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1849 		if (dir == XFS_LOOKUP_GE &&
1850 		    keyno > xfs_btree_get_numrecs(block) &&
1851 		    !xfs_btree_ptr_is_null(cur, &ptr)) {
1852 			int	i;
1853 
1854 			cur->bc_ptrs[0] = keyno;
1855 			error = xfs_btree_increment(cur, 0, &i);
1856 			if (error)
1857 				goto error0;
1858 			XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1859 			XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1860 			*stat = 1;
1861 			return 0;
1862 		}
1863 	} else if (dir == XFS_LOOKUP_LE && diff > 0)
1864 		keyno--;
1865 	cur->bc_ptrs[0] = keyno;
1866 
1867 	/* Return if we succeeded or not. */
1868 	if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1869 		*stat = 0;
1870 	else if (dir != XFS_LOOKUP_EQ || diff == 0)
1871 		*stat = 1;
1872 	else
1873 		*stat = 0;
1874 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1875 	return 0;
1876 
1877 error0:
1878 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1879 	return error;
1880 }
1881 
1882 /*
1883  * Update keys at all levels from here to the root along the cursor's path.
1884  */
1885 STATIC int
1886 xfs_btree_updkey(
1887 	struct xfs_btree_cur	*cur,
1888 	union xfs_btree_key	*keyp,
1889 	int			level)
1890 {
1891 	struct xfs_btree_block	*block;
1892 	struct xfs_buf		*bp;
1893 	union xfs_btree_key	*kp;
1894 	int			ptr;
1895 
1896 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1897 	XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1898 
1899 	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1900 
1901 	/*
1902 	 * Go up the tree from this level toward the root.
1903 	 * At each level, update the key value to the value input.
1904 	 * Stop when we reach a level where the cursor isn't pointing
1905 	 * at the first entry in the block.
1906 	 */
1907 	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1908 #ifdef DEBUG
1909 		int		error;
1910 #endif
1911 		block = xfs_btree_get_block(cur, level, &bp);
1912 #ifdef DEBUG
1913 		error = xfs_btree_check_block(cur, block, level, bp);
1914 		if (error) {
1915 			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1916 			return error;
1917 		}
1918 #endif
1919 		ptr = cur->bc_ptrs[level];
1920 		kp = xfs_btree_key_addr(cur, ptr, block);
1921 		xfs_btree_copy_keys(cur, kp, keyp, 1);
1922 		xfs_btree_log_keys(cur, bp, ptr, ptr);
1923 	}
1924 
1925 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1926 	return 0;
1927 }
1928 
1929 /*
1930  * Update the record referred to by cur to the value in the
1931  * given record. This either works (return 0) or gets an
1932  * EFSCORRUPTED error.
1933  */
1934 int
1935 xfs_btree_update(
1936 	struct xfs_btree_cur	*cur,
1937 	union xfs_btree_rec	*rec)
1938 {
1939 	struct xfs_btree_block	*block;
1940 	struct xfs_buf		*bp;
1941 	int			error;
1942 	int			ptr;
1943 	union xfs_btree_rec	*rp;
1944 
1945 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1946 	XFS_BTREE_TRACE_ARGR(cur, rec);
1947 
1948 	/* Pick up the current block. */
1949 	block = xfs_btree_get_block(cur, 0, &bp);
1950 
1951 #ifdef DEBUG
1952 	error = xfs_btree_check_block(cur, block, 0, bp);
1953 	if (error)
1954 		goto error0;
1955 #endif
1956 	/* Get the address of the rec to be updated. */
1957 	ptr = cur->bc_ptrs[0];
1958 	rp = xfs_btree_rec_addr(cur, ptr, block);
1959 
1960 	/* Fill in the new contents and log them. */
1961 	xfs_btree_copy_recs(cur, rp, rec, 1);
1962 	xfs_btree_log_recs(cur, bp, ptr, ptr);
1963 
1964 	/*
1965 	 * If we are tracking the last record in the tree and
1966 	 * we are at the far right edge of the tree, update it.
1967 	 */
1968 	if (xfs_btree_is_lastrec(cur, block, 0)) {
1969 		cur->bc_ops->update_lastrec(cur, block, rec,
1970 					    ptr, LASTREC_UPDATE);
1971 	}
1972 
1973 	/* Updating first rec in leaf. Pass new key value up to our parent. */
1974 	if (ptr == 1) {
1975 		union xfs_btree_key	key;
1976 
1977 		cur->bc_ops->init_key_from_rec(&key, rec);
1978 		error = xfs_btree_updkey(cur, &key, 1);
1979 		if (error)
1980 			goto error0;
1981 	}
1982 
1983 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1984 	return 0;
1985 
1986 error0:
1987 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1988 	return error;
1989 }
1990 
1991 /*
1992  * Move 1 record left from cur/level if possible.
1993  * Update cur to reflect the new path.
1994  */
1995 STATIC int					/* error */
1996 xfs_btree_lshift(
1997 	struct xfs_btree_cur	*cur,
1998 	int			level,
1999 	int			*stat)		/* success/failure */
2000 {
2001 	union xfs_btree_key	key;		/* btree key */
2002 	struct xfs_buf		*lbp;		/* left buffer pointer */
2003 	struct xfs_btree_block	*left;		/* left btree block */
2004 	int			lrecs;		/* left record count */
2005 	struct xfs_buf		*rbp;		/* right buffer pointer */
2006 	struct xfs_btree_block	*right;		/* right btree block */
2007 	int			rrecs;		/* right record count */
2008 	union xfs_btree_ptr	lptr;		/* left btree pointer */
2009 	union xfs_btree_key	*rkp = NULL;	/* right btree key */
2010 	union xfs_btree_ptr	*rpp = NULL;	/* right address pointer */
2011 	union xfs_btree_rec	*rrp = NULL;	/* right record pointer */
2012 	int			error;		/* error return value */
2013 
2014 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2015 	XFS_BTREE_TRACE_ARGI(cur, level);
2016 
2017 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2018 	    level == cur->bc_nlevels - 1)
2019 		goto out0;
2020 
2021 	/* Set up variables for this block as "right". */
2022 	right = xfs_btree_get_block(cur, level, &rbp);
2023 
2024 #ifdef DEBUG
2025 	error = xfs_btree_check_block(cur, right, level, rbp);
2026 	if (error)
2027 		goto error0;
2028 #endif
2029 
2030 	/* If we've got no left sibling then we can't shift an entry left. */
2031 	xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2032 	if (xfs_btree_ptr_is_null(cur, &lptr))
2033 		goto out0;
2034 
2035 	/*
2036 	 * If the cursor entry is the one that would be moved, don't
2037 	 * do it... it's too complicated.
2038 	 */
2039 	if (cur->bc_ptrs[level] <= 1)
2040 		goto out0;
2041 
2042 	/* Set up the left neighbor as "left". */
2043 	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2044 	if (error)
2045 		goto error0;
2046 
2047 	/* If it's full, it can't take another entry. */
2048 	lrecs = xfs_btree_get_numrecs(left);
2049 	if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2050 		goto out0;
2051 
2052 	rrecs = xfs_btree_get_numrecs(right);
2053 
2054 	/*
2055 	 * We add one entry to the left side and remove one for the right side.
2056 	 * Account for it here, the changes will be updated on disk and logged
2057 	 * later.
2058 	 */
2059 	lrecs++;
2060 	rrecs--;
2061 
2062 	XFS_BTREE_STATS_INC(cur, lshift);
2063 	XFS_BTREE_STATS_ADD(cur, moves, 1);
2064 
2065 	/*
2066 	 * If non-leaf, copy a key and a ptr to the left block.
2067 	 * Log the changes to the left block.
2068 	 */
2069 	if (level > 0) {
2070 		/* It's a non-leaf.  Move keys and pointers. */
2071 		union xfs_btree_key	*lkp;	/* left btree key */
2072 		union xfs_btree_ptr	*lpp;	/* left address pointer */
2073 
2074 		lkp = xfs_btree_key_addr(cur, lrecs, left);
2075 		rkp = xfs_btree_key_addr(cur, 1, right);
2076 
2077 		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2078 		rpp = xfs_btree_ptr_addr(cur, 1, right);
2079 #ifdef DEBUG
2080 		error = xfs_btree_check_ptr(cur, rpp, 0, level);
2081 		if (error)
2082 			goto error0;
2083 #endif
2084 		xfs_btree_copy_keys(cur, lkp, rkp, 1);
2085 		xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2086 
2087 		xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2088 		xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2089 
2090 		ASSERT(cur->bc_ops->keys_inorder(cur,
2091 			xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2092 	} else {
2093 		/* It's a leaf.  Move records.  */
2094 		union xfs_btree_rec	*lrp;	/* left record pointer */
2095 
2096 		lrp = xfs_btree_rec_addr(cur, lrecs, left);
2097 		rrp = xfs_btree_rec_addr(cur, 1, right);
2098 
2099 		xfs_btree_copy_recs(cur, lrp, rrp, 1);
2100 		xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2101 
2102 		ASSERT(cur->bc_ops->recs_inorder(cur,
2103 			xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2104 	}
2105 
2106 	xfs_btree_set_numrecs(left, lrecs);
2107 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2108 
2109 	xfs_btree_set_numrecs(right, rrecs);
2110 	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2111 
2112 	/*
2113 	 * Slide the contents of right down one entry.
2114 	 */
2115 	XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2116 	if (level > 0) {
2117 		/* It's a nonleaf. operate on keys and ptrs */
2118 #ifdef DEBUG
2119 		int			i;		/* loop index */
2120 
2121 		for (i = 0; i < rrecs; i++) {
2122 			error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2123 			if (error)
2124 				goto error0;
2125 		}
2126 #endif
2127 		xfs_btree_shift_keys(cur,
2128 				xfs_btree_key_addr(cur, 2, right),
2129 				-1, rrecs);
2130 		xfs_btree_shift_ptrs(cur,
2131 				xfs_btree_ptr_addr(cur, 2, right),
2132 				-1, rrecs);
2133 
2134 		xfs_btree_log_keys(cur, rbp, 1, rrecs);
2135 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2136 	} else {
2137 		/* It's a leaf. operate on records */
2138 		xfs_btree_shift_recs(cur,
2139 			xfs_btree_rec_addr(cur, 2, right),
2140 			-1, rrecs);
2141 		xfs_btree_log_recs(cur, rbp, 1, rrecs);
2142 
2143 		/*
2144 		 * If it's the first record in the block, we'll need a key
2145 		 * structure to pass up to the next level (updkey).
2146 		 */
2147 		cur->bc_ops->init_key_from_rec(&key,
2148 			xfs_btree_rec_addr(cur, 1, right));
2149 		rkp = &key;
2150 	}
2151 
2152 	/* Update the parent key values of right. */
2153 	error = xfs_btree_updkey(cur, rkp, level + 1);
2154 	if (error)
2155 		goto error0;
2156 
2157 	/* Slide the cursor value left one. */
2158 	cur->bc_ptrs[level]--;
2159 
2160 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161 	*stat = 1;
2162 	return 0;
2163 
2164 out0:
2165 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166 	*stat = 0;
2167 	return 0;
2168 
2169 error0:
2170 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 	return error;
2172 }
2173 
2174 /*
2175  * Move 1 record right from cur/level if possible.
2176  * Update cur to reflect the new path.
2177  */
2178 STATIC int					/* error */
2179 xfs_btree_rshift(
2180 	struct xfs_btree_cur	*cur,
2181 	int			level,
2182 	int			*stat)		/* success/failure */
2183 {
2184 	union xfs_btree_key	key;		/* btree key */
2185 	struct xfs_buf		*lbp;		/* left buffer pointer */
2186 	struct xfs_btree_block	*left;		/* left btree block */
2187 	struct xfs_buf		*rbp;		/* right buffer pointer */
2188 	struct xfs_btree_block	*right;		/* right btree block */
2189 	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
2190 	union xfs_btree_ptr	rptr;		/* right block pointer */
2191 	union xfs_btree_key	*rkp;		/* right btree key */
2192 	int			rrecs;		/* right record count */
2193 	int			lrecs;		/* left record count */
2194 	int			error;		/* error return value */
2195 	int			i;		/* loop counter */
2196 
2197 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2198 	XFS_BTREE_TRACE_ARGI(cur, level);
2199 
2200 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2201 	    (level == cur->bc_nlevels - 1))
2202 		goto out0;
2203 
2204 	/* Set up variables for this block as "left". */
2205 	left = xfs_btree_get_block(cur, level, &lbp);
2206 
2207 #ifdef DEBUG
2208 	error = xfs_btree_check_block(cur, left, level, lbp);
2209 	if (error)
2210 		goto error0;
2211 #endif
2212 
2213 	/* If we've got no right sibling then we can't shift an entry right. */
2214 	xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2215 	if (xfs_btree_ptr_is_null(cur, &rptr))
2216 		goto out0;
2217 
2218 	/*
2219 	 * If the cursor entry is the one that would be moved, don't
2220 	 * do it... it's too complicated.
2221 	 */
2222 	lrecs = xfs_btree_get_numrecs(left);
2223 	if (cur->bc_ptrs[level] >= lrecs)
2224 		goto out0;
2225 
2226 	/* Set up the right neighbor as "right". */
2227 	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2228 	if (error)
2229 		goto error0;
2230 
2231 	/* If it's full, it can't take another entry. */
2232 	rrecs = xfs_btree_get_numrecs(right);
2233 	if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2234 		goto out0;
2235 
2236 	XFS_BTREE_STATS_INC(cur, rshift);
2237 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2238 
2239 	/*
2240 	 * Make a hole at the start of the right neighbor block, then
2241 	 * copy the last left block entry to the hole.
2242 	 */
2243 	if (level > 0) {
2244 		/* It's a nonleaf. make a hole in the keys and ptrs */
2245 		union xfs_btree_key	*lkp;
2246 		union xfs_btree_ptr	*lpp;
2247 		union xfs_btree_ptr	*rpp;
2248 
2249 		lkp = xfs_btree_key_addr(cur, lrecs, left);
2250 		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2251 		rkp = xfs_btree_key_addr(cur, 1, right);
2252 		rpp = xfs_btree_ptr_addr(cur, 1, right);
2253 
2254 #ifdef DEBUG
2255 		for (i = rrecs - 1; i >= 0; i--) {
2256 			error = xfs_btree_check_ptr(cur, rpp, i, level);
2257 			if (error)
2258 				goto error0;
2259 		}
2260 #endif
2261 
2262 		xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2263 		xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2264 
2265 #ifdef DEBUG
2266 		error = xfs_btree_check_ptr(cur, lpp, 0, level);
2267 		if (error)
2268 			goto error0;
2269 #endif
2270 
2271 		/* Now put the new data in, and log it. */
2272 		xfs_btree_copy_keys(cur, rkp, lkp, 1);
2273 		xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2274 
2275 		xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2276 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2277 
2278 		ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2279 			xfs_btree_key_addr(cur, 2, right)));
2280 	} else {
2281 		/* It's a leaf. make a hole in the records */
2282 		union xfs_btree_rec	*lrp;
2283 		union xfs_btree_rec	*rrp;
2284 
2285 		lrp = xfs_btree_rec_addr(cur, lrecs, left);
2286 		rrp = xfs_btree_rec_addr(cur, 1, right);
2287 
2288 		xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2289 
2290 		/* Now put the new data in, and log it. */
2291 		xfs_btree_copy_recs(cur, rrp, lrp, 1);
2292 		xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2293 
2294 		cur->bc_ops->init_key_from_rec(&key, rrp);
2295 		rkp = &key;
2296 
2297 		ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2298 			xfs_btree_rec_addr(cur, 2, right)));
2299 	}
2300 
2301 	/*
2302 	 * Decrement and log left's numrecs, bump and log right's numrecs.
2303 	 */
2304 	xfs_btree_set_numrecs(left, --lrecs);
2305 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2306 
2307 	xfs_btree_set_numrecs(right, ++rrecs);
2308 	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2309 
2310 	/*
2311 	 * Using a temporary cursor, update the parent key values of the
2312 	 * block on the right.
2313 	 */
2314 	error = xfs_btree_dup_cursor(cur, &tcur);
2315 	if (error)
2316 		goto error0;
2317 	i = xfs_btree_lastrec(tcur, level);
2318 	XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
2319 
2320 	error = xfs_btree_increment(tcur, level, &i);
2321 	if (error)
2322 		goto error1;
2323 
2324 	error = xfs_btree_updkey(tcur, rkp, level + 1);
2325 	if (error)
2326 		goto error1;
2327 
2328 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2329 
2330 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2331 	*stat = 1;
2332 	return 0;
2333 
2334 out0:
2335 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2336 	*stat = 0;
2337 	return 0;
2338 
2339 error0:
2340 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2341 	return error;
2342 
2343 error1:
2344 	XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2345 	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2346 	return error;
2347 }
2348 
2349 /*
2350  * Split cur/level block in half.
2351  * Return new block number and the key to its first
2352  * record (to be inserted into parent).
2353  */
2354 STATIC int					/* error */
2355 __xfs_btree_split(
2356 	struct xfs_btree_cur	*cur,
2357 	int			level,
2358 	union xfs_btree_ptr	*ptrp,
2359 	union xfs_btree_key	*key,
2360 	struct xfs_btree_cur	**curp,
2361 	int			*stat)		/* success/failure */
2362 {
2363 	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
2364 	struct xfs_buf		*lbp;		/* left buffer pointer */
2365 	struct xfs_btree_block	*left;		/* left btree block */
2366 	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
2367 	struct xfs_buf		*rbp;		/* right buffer pointer */
2368 	struct xfs_btree_block	*right;		/* right btree block */
2369 	union xfs_btree_ptr	rrptr;		/* right-right sibling ptr */
2370 	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
2371 	struct xfs_btree_block	*rrblock;	/* right-right btree block */
2372 	int			lrecs;
2373 	int			rrecs;
2374 	int			src_index;
2375 	int			error;		/* error return value */
2376 #ifdef DEBUG
2377 	int			i;
2378 #endif
2379 
2380 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2381 	XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2382 
2383 	XFS_BTREE_STATS_INC(cur, split);
2384 
2385 	/* Set up left block (current one). */
2386 	left = xfs_btree_get_block(cur, level, &lbp);
2387 
2388 #ifdef DEBUG
2389 	error = xfs_btree_check_block(cur, left, level, lbp);
2390 	if (error)
2391 		goto error0;
2392 #endif
2393 
2394 	xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2395 
2396 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2397 	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2398 	if (error)
2399 		goto error0;
2400 	if (*stat == 0)
2401 		goto out0;
2402 	XFS_BTREE_STATS_INC(cur, alloc);
2403 
2404 	/* Set up the new block as "right". */
2405 	error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2406 	if (error)
2407 		goto error0;
2408 
2409 	/* Fill in the btree header for the new right block. */
2410 	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2411 
2412 	/*
2413 	 * Split the entries between the old and the new block evenly.
2414 	 * Make sure that if there's an odd number of entries now, that
2415 	 * each new block will have the same number of entries.
2416 	 */
2417 	lrecs = xfs_btree_get_numrecs(left);
2418 	rrecs = lrecs / 2;
2419 	if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2420 		rrecs++;
2421 	src_index = (lrecs - rrecs + 1);
2422 
2423 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2424 
2425 	/*
2426 	 * Copy btree block entries from the left block over to the
2427 	 * new block, the right. Update the right block and log the
2428 	 * changes.
2429 	 */
2430 	if (level > 0) {
2431 		/* It's a non-leaf.  Move keys and pointers. */
2432 		union xfs_btree_key	*lkp;	/* left btree key */
2433 		union xfs_btree_ptr	*lpp;	/* left address pointer */
2434 		union xfs_btree_key	*rkp;	/* right btree key */
2435 		union xfs_btree_ptr	*rpp;	/* right address pointer */
2436 
2437 		lkp = xfs_btree_key_addr(cur, src_index, left);
2438 		lpp = xfs_btree_ptr_addr(cur, src_index, left);
2439 		rkp = xfs_btree_key_addr(cur, 1, right);
2440 		rpp = xfs_btree_ptr_addr(cur, 1, right);
2441 
2442 #ifdef DEBUG
2443 		for (i = src_index; i < rrecs; i++) {
2444 			error = xfs_btree_check_ptr(cur, lpp, i, level);
2445 			if (error)
2446 				goto error0;
2447 		}
2448 #endif
2449 
2450 		xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2451 		xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2452 
2453 		xfs_btree_log_keys(cur, rbp, 1, rrecs);
2454 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2455 
2456 		/* Grab the keys to the entries moved to the right block */
2457 		xfs_btree_copy_keys(cur, key, rkp, 1);
2458 	} else {
2459 		/* It's a leaf.  Move records.  */
2460 		union xfs_btree_rec	*lrp;	/* left record pointer */
2461 		union xfs_btree_rec	*rrp;	/* right record pointer */
2462 
2463 		lrp = xfs_btree_rec_addr(cur, src_index, left);
2464 		rrp = xfs_btree_rec_addr(cur, 1, right);
2465 
2466 		xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2467 		xfs_btree_log_recs(cur, rbp, 1, rrecs);
2468 
2469 		cur->bc_ops->init_key_from_rec(key,
2470 			xfs_btree_rec_addr(cur, 1, right));
2471 	}
2472 
2473 
2474 	/*
2475 	 * Find the left block number by looking in the buffer.
2476 	 * Adjust numrecs, sibling pointers.
2477 	 */
2478 	xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2479 	xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2480 	xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2481 	xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2482 
2483 	lrecs -= rrecs;
2484 	xfs_btree_set_numrecs(left, lrecs);
2485 	xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2486 
2487 	xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2488 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2489 
2490 	/*
2491 	 * If there's a block to the new block's right, make that block
2492 	 * point back to right instead of to left.
2493 	 */
2494 	if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2495 		error = xfs_btree_read_buf_block(cur, &rrptr,
2496 							0, &rrblock, &rrbp);
2497 		if (error)
2498 			goto error0;
2499 		xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2500 		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2501 	}
2502 	/*
2503 	 * If the cursor is really in the right block, move it there.
2504 	 * If it's just pointing past the last entry in left, then we'll
2505 	 * insert there, so don't change anything in that case.
2506 	 */
2507 	if (cur->bc_ptrs[level] > lrecs + 1) {
2508 		xfs_btree_setbuf(cur, level, rbp);
2509 		cur->bc_ptrs[level] -= lrecs;
2510 	}
2511 	/*
2512 	 * If there are more levels, we'll need another cursor which refers
2513 	 * the right block, no matter where this cursor was.
2514 	 */
2515 	if (level + 1 < cur->bc_nlevels) {
2516 		error = xfs_btree_dup_cursor(cur, curp);
2517 		if (error)
2518 			goto error0;
2519 		(*curp)->bc_ptrs[level + 1]++;
2520 	}
2521 	*ptrp = rptr;
2522 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2523 	*stat = 1;
2524 	return 0;
2525 out0:
2526 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2527 	*stat = 0;
2528 	return 0;
2529 
2530 error0:
2531 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2532 	return error;
2533 }
2534 
2535 struct xfs_btree_split_args {
2536 	struct xfs_btree_cur	*cur;
2537 	int			level;
2538 	union xfs_btree_ptr	*ptrp;
2539 	union xfs_btree_key	*key;
2540 	struct xfs_btree_cur	**curp;
2541 	int			*stat;		/* success/failure */
2542 	int			result;
2543 	bool			kswapd;	/* allocation in kswapd context */
2544 	struct completion	*done;
2545 	struct work_struct	work;
2546 };
2547 
2548 /*
2549  * Stack switching interfaces for allocation
2550  */
2551 static void
2552 xfs_btree_split_worker(
2553 	struct work_struct	*work)
2554 {
2555 	struct xfs_btree_split_args	*args = container_of(work,
2556 						struct xfs_btree_split_args, work);
2557 	unsigned long		pflags;
2558 	unsigned long		new_pflags = PF_FSTRANS;
2559 
2560 	/*
2561 	 * we are in a transaction context here, but may also be doing work
2562 	 * in kswapd context, and hence we may need to inherit that state
2563 	 * temporarily to ensure that we don't block waiting for memory reclaim
2564 	 * in any way.
2565 	 */
2566 	if (args->kswapd)
2567 		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2568 
2569 	current_set_flags_nested(&pflags, new_pflags);
2570 
2571 	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2572 					 args->key, args->curp, args->stat);
2573 	complete(args->done);
2574 
2575 	current_restore_flags_nested(&pflags, new_pflags);
2576 }
2577 
2578 /*
2579  * BMBT split requests often come in with little stack to work on. Push
2580  * them off to a worker thread so there is lots of stack to use. For the other
2581  * btree types, just call directly to avoid the context switch overhead here.
2582  */
2583 STATIC int					/* error */
2584 xfs_btree_split(
2585 	struct xfs_btree_cur	*cur,
2586 	int			level,
2587 	union xfs_btree_ptr	*ptrp,
2588 	union xfs_btree_key	*key,
2589 	struct xfs_btree_cur	**curp,
2590 	int			*stat)		/* success/failure */
2591 {
2592 	struct xfs_btree_split_args	args;
2593 	DECLARE_COMPLETION_ONSTACK(done);
2594 
2595 	if (cur->bc_btnum != XFS_BTNUM_BMAP)
2596 		return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2597 
2598 	args.cur = cur;
2599 	args.level = level;
2600 	args.ptrp = ptrp;
2601 	args.key = key;
2602 	args.curp = curp;
2603 	args.stat = stat;
2604 	args.done = &done;
2605 	args.kswapd = current_is_kswapd();
2606 	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2607 	queue_work(xfs_alloc_wq, &args.work);
2608 	wait_for_completion(&done);
2609 	destroy_work_on_stack(&args.work);
2610 	return args.result;
2611 }
2612 
2613 
2614 /*
2615  * Copy the old inode root contents into a real block and make the
2616  * broot point to it.
2617  */
2618 int						/* error */
2619 xfs_btree_new_iroot(
2620 	struct xfs_btree_cur	*cur,		/* btree cursor */
2621 	int			*logflags,	/* logging flags for inode */
2622 	int			*stat)		/* return status - 0 fail */
2623 {
2624 	struct xfs_buf		*cbp;		/* buffer for cblock */
2625 	struct xfs_btree_block	*block;		/* btree block */
2626 	struct xfs_btree_block	*cblock;	/* child btree block */
2627 	union xfs_btree_key	*ckp;		/* child key pointer */
2628 	union xfs_btree_ptr	*cpp;		/* child ptr pointer */
2629 	union xfs_btree_key	*kp;		/* pointer to btree key */
2630 	union xfs_btree_ptr	*pp;		/* pointer to block addr */
2631 	union xfs_btree_ptr	nptr;		/* new block addr */
2632 	int			level;		/* btree level */
2633 	int			error;		/* error return code */
2634 #ifdef DEBUG
2635 	int			i;		/* loop counter */
2636 #endif
2637 
2638 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2639 	XFS_BTREE_STATS_INC(cur, newroot);
2640 
2641 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2642 
2643 	level = cur->bc_nlevels - 1;
2644 
2645 	block = xfs_btree_get_iroot(cur);
2646 	pp = xfs_btree_ptr_addr(cur, 1, block);
2647 
2648 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2649 	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
2650 	if (error)
2651 		goto error0;
2652 	if (*stat == 0) {
2653 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2654 		return 0;
2655 	}
2656 	XFS_BTREE_STATS_INC(cur, alloc);
2657 
2658 	/* Copy the root into a real block. */
2659 	error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2660 	if (error)
2661 		goto error0;
2662 
2663 	/*
2664 	 * we can't just memcpy() the root in for CRC enabled btree blocks.
2665 	 * In that case have to also ensure the blkno remains correct
2666 	 */
2667 	memcpy(cblock, block, xfs_btree_block_len(cur));
2668 	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2669 		if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2670 			cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2671 		else
2672 			cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2673 	}
2674 
2675 	be16_add_cpu(&block->bb_level, 1);
2676 	xfs_btree_set_numrecs(block, 1);
2677 	cur->bc_nlevels++;
2678 	cur->bc_ptrs[level + 1] = 1;
2679 
2680 	kp = xfs_btree_key_addr(cur, 1, block);
2681 	ckp = xfs_btree_key_addr(cur, 1, cblock);
2682 	xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2683 
2684 	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2685 #ifdef DEBUG
2686 	for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2687 		error = xfs_btree_check_ptr(cur, pp, i, level);
2688 		if (error)
2689 			goto error0;
2690 	}
2691 #endif
2692 	xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2693 
2694 #ifdef DEBUG
2695 	error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2696 	if (error)
2697 		goto error0;
2698 #endif
2699 	xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2700 
2701 	xfs_iroot_realloc(cur->bc_private.b.ip,
2702 			  1 - xfs_btree_get_numrecs(cblock),
2703 			  cur->bc_private.b.whichfork);
2704 
2705 	xfs_btree_setbuf(cur, level, cbp);
2706 
2707 	/*
2708 	 * Do all this logging at the end so that
2709 	 * the root is at the right level.
2710 	 */
2711 	xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2712 	xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2713 	xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2714 
2715 	*logflags |=
2716 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2717 	*stat = 1;
2718 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2719 	return 0;
2720 error0:
2721 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2722 	return error;
2723 }
2724 
2725 /*
2726  * Allocate a new root block, fill it in.
2727  */
2728 STATIC int				/* error */
2729 xfs_btree_new_root(
2730 	struct xfs_btree_cur	*cur,	/* btree cursor */
2731 	int			*stat)	/* success/failure */
2732 {
2733 	struct xfs_btree_block	*block;	/* one half of the old root block */
2734 	struct xfs_buf		*bp;	/* buffer containing block */
2735 	int			error;	/* error return value */
2736 	struct xfs_buf		*lbp;	/* left buffer pointer */
2737 	struct xfs_btree_block	*left;	/* left btree block */
2738 	struct xfs_buf		*nbp;	/* new (root) buffer */
2739 	struct xfs_btree_block	*new;	/* new (root) btree block */
2740 	int			nptr;	/* new value for key index, 1 or 2 */
2741 	struct xfs_buf		*rbp;	/* right buffer pointer */
2742 	struct xfs_btree_block	*right;	/* right btree block */
2743 	union xfs_btree_ptr	rptr;
2744 	union xfs_btree_ptr	lptr;
2745 
2746 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2747 	XFS_BTREE_STATS_INC(cur, newroot);
2748 
2749 	/* initialise our start point from the cursor */
2750 	cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2751 
2752 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2753 	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
2754 	if (error)
2755 		goto error0;
2756 	if (*stat == 0)
2757 		goto out0;
2758 	XFS_BTREE_STATS_INC(cur, alloc);
2759 
2760 	/* Set up the new block. */
2761 	error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2762 	if (error)
2763 		goto error0;
2764 
2765 	/* Set the root in the holding structure  increasing the level by 1. */
2766 	cur->bc_ops->set_root(cur, &lptr, 1);
2767 
2768 	/*
2769 	 * At the previous root level there are now two blocks: the old root,
2770 	 * and the new block generated when it was split.  We don't know which
2771 	 * one the cursor is pointing at, so we set up variables "left" and
2772 	 * "right" for each case.
2773 	 */
2774 	block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2775 
2776 #ifdef DEBUG
2777 	error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2778 	if (error)
2779 		goto error0;
2780 #endif
2781 
2782 	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2783 	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2784 		/* Our block is left, pick up the right block. */
2785 		lbp = bp;
2786 		xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2787 		left = block;
2788 		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2789 		if (error)
2790 			goto error0;
2791 		bp = rbp;
2792 		nptr = 1;
2793 	} else {
2794 		/* Our block is right, pick up the left block. */
2795 		rbp = bp;
2796 		xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2797 		right = block;
2798 		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2799 		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2800 		if (error)
2801 			goto error0;
2802 		bp = lbp;
2803 		nptr = 2;
2804 	}
2805 	/* Fill in the new block's btree header and log it. */
2806 	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
2807 	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2808 	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2809 			!xfs_btree_ptr_is_null(cur, &rptr));
2810 
2811 	/* Fill in the key data in the new root. */
2812 	if (xfs_btree_get_level(left) > 0) {
2813 		xfs_btree_copy_keys(cur,
2814 				xfs_btree_key_addr(cur, 1, new),
2815 				xfs_btree_key_addr(cur, 1, left), 1);
2816 		xfs_btree_copy_keys(cur,
2817 				xfs_btree_key_addr(cur, 2, new),
2818 				xfs_btree_key_addr(cur, 1, right), 1);
2819 	} else {
2820 		cur->bc_ops->init_key_from_rec(
2821 				xfs_btree_key_addr(cur, 1, new),
2822 				xfs_btree_rec_addr(cur, 1, left));
2823 		cur->bc_ops->init_key_from_rec(
2824 				xfs_btree_key_addr(cur, 2, new),
2825 				xfs_btree_rec_addr(cur, 1, right));
2826 	}
2827 	xfs_btree_log_keys(cur, nbp, 1, 2);
2828 
2829 	/* Fill in the pointer data in the new root. */
2830 	xfs_btree_copy_ptrs(cur,
2831 		xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2832 	xfs_btree_copy_ptrs(cur,
2833 		xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2834 	xfs_btree_log_ptrs(cur, nbp, 1, 2);
2835 
2836 	/* Fix up the cursor. */
2837 	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2838 	cur->bc_ptrs[cur->bc_nlevels] = nptr;
2839 	cur->bc_nlevels++;
2840 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2841 	*stat = 1;
2842 	return 0;
2843 error0:
2844 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2845 	return error;
2846 out0:
2847 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2848 	*stat = 0;
2849 	return 0;
2850 }
2851 
2852 STATIC int
2853 xfs_btree_make_block_unfull(
2854 	struct xfs_btree_cur	*cur,	/* btree cursor */
2855 	int			level,	/* btree level */
2856 	int			numrecs,/* # of recs in block */
2857 	int			*oindex,/* old tree index */
2858 	int			*index,	/* new tree index */
2859 	union xfs_btree_ptr	*nptr,	/* new btree ptr */
2860 	struct xfs_btree_cur	**ncur,	/* new btree cursor */
2861 	union xfs_btree_rec	*nrec,	/* new record */
2862 	int			*stat)
2863 {
2864 	union xfs_btree_key	key;	/* new btree key value */
2865 	int			error = 0;
2866 
2867 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2868 	    level == cur->bc_nlevels - 1) {
2869 	    	struct xfs_inode *ip = cur->bc_private.b.ip;
2870 
2871 		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2872 			/* A root block that can be made bigger. */
2873 			xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2874 		} else {
2875 			/* A root block that needs replacing */
2876 			int	logflags = 0;
2877 
2878 			error = xfs_btree_new_iroot(cur, &logflags, stat);
2879 			if (error || *stat == 0)
2880 				return error;
2881 
2882 			xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2883 		}
2884 
2885 		return 0;
2886 	}
2887 
2888 	/* First, try shifting an entry to the right neighbor. */
2889 	error = xfs_btree_rshift(cur, level, stat);
2890 	if (error || *stat)
2891 		return error;
2892 
2893 	/* Next, try shifting an entry to the left neighbor. */
2894 	error = xfs_btree_lshift(cur, level, stat);
2895 	if (error)
2896 		return error;
2897 
2898 	if (*stat) {
2899 		*oindex = *index = cur->bc_ptrs[level];
2900 		return 0;
2901 	}
2902 
2903 	/*
2904 	 * Next, try splitting the current block in half.
2905 	 *
2906 	 * If this works we have to re-set our variables because we
2907 	 * could be in a different block now.
2908 	 */
2909 	error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2910 	if (error || *stat == 0)
2911 		return error;
2912 
2913 
2914 	*index = cur->bc_ptrs[level];
2915 	cur->bc_ops->init_rec_from_key(&key, nrec);
2916 	return 0;
2917 }
2918 
2919 /*
2920  * Insert one record/level.  Return information to the caller
2921  * allowing the next level up to proceed if necessary.
2922  */
2923 STATIC int
2924 xfs_btree_insrec(
2925 	struct xfs_btree_cur	*cur,	/* btree cursor */
2926 	int			level,	/* level to insert record at */
2927 	union xfs_btree_ptr	*ptrp,	/* i/o: block number inserted */
2928 	union xfs_btree_rec	*recp,	/* i/o: record data inserted */
2929 	struct xfs_btree_cur	**curp,	/* output: new cursor replacing cur */
2930 	int			*stat)	/* success/failure */
2931 {
2932 	struct xfs_btree_block	*block;	/* btree block */
2933 	struct xfs_buf		*bp;	/* buffer for block */
2934 	union xfs_btree_key	key;	/* btree key */
2935 	union xfs_btree_ptr	nptr;	/* new block ptr */
2936 	struct xfs_btree_cur	*ncur;	/* new btree cursor */
2937 	union xfs_btree_rec	nrec;	/* new record count */
2938 	int			optr;	/* old key/record index */
2939 	int			ptr;	/* key/record index */
2940 	int			numrecs;/* number of records */
2941 	int			error;	/* error return value */
2942 #ifdef DEBUG
2943 	int			i;
2944 #endif
2945 
2946 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2947 	XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2948 
2949 	ncur = NULL;
2950 
2951 	/*
2952 	 * If we have an external root pointer, and we've made it to the
2953 	 * root level, allocate a new root block and we're done.
2954 	 */
2955 	if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2956 	    (level >= cur->bc_nlevels)) {
2957 		error = xfs_btree_new_root(cur, stat);
2958 		xfs_btree_set_ptr_null(cur, ptrp);
2959 
2960 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2961 		return error;
2962 	}
2963 
2964 	/* If we're off the left edge, return failure. */
2965 	ptr = cur->bc_ptrs[level];
2966 	if (ptr == 0) {
2967 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2968 		*stat = 0;
2969 		return 0;
2970 	}
2971 
2972 	/* Make a key out of the record data to be inserted, and save it. */
2973 	cur->bc_ops->init_key_from_rec(&key, recp);
2974 
2975 	optr = ptr;
2976 
2977 	XFS_BTREE_STATS_INC(cur, insrec);
2978 
2979 	/* Get pointers to the btree buffer and block. */
2980 	block = xfs_btree_get_block(cur, level, &bp);
2981 	numrecs = xfs_btree_get_numrecs(block);
2982 
2983 #ifdef DEBUG
2984 	error = xfs_btree_check_block(cur, block, level, bp);
2985 	if (error)
2986 		goto error0;
2987 
2988 	/* Check that the new entry is being inserted in the right place. */
2989 	if (ptr <= numrecs) {
2990 		if (level == 0) {
2991 			ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2992 				xfs_btree_rec_addr(cur, ptr, block)));
2993 		} else {
2994 			ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2995 				xfs_btree_key_addr(cur, ptr, block)));
2996 		}
2997 	}
2998 #endif
2999 
3000 	/*
3001 	 * If the block is full, we can't insert the new entry until we
3002 	 * make the block un-full.
3003 	 */
3004 	xfs_btree_set_ptr_null(cur, &nptr);
3005 	if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3006 		error = xfs_btree_make_block_unfull(cur, level, numrecs,
3007 					&optr, &ptr, &nptr, &ncur, &nrec, stat);
3008 		if (error || *stat == 0)
3009 			goto error0;
3010 	}
3011 
3012 	/*
3013 	 * The current block may have changed if the block was
3014 	 * previously full and we have just made space in it.
3015 	 */
3016 	block = xfs_btree_get_block(cur, level, &bp);
3017 	numrecs = xfs_btree_get_numrecs(block);
3018 
3019 #ifdef DEBUG
3020 	error = xfs_btree_check_block(cur, block, level, bp);
3021 	if (error)
3022 		return error;
3023 #endif
3024 
3025 	/*
3026 	 * At this point we know there's room for our new entry in the block
3027 	 * we're pointing at.
3028 	 */
3029 	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3030 
3031 	if (level > 0) {
3032 		/* It's a nonleaf. make a hole in the keys and ptrs */
3033 		union xfs_btree_key	*kp;
3034 		union xfs_btree_ptr	*pp;
3035 
3036 		kp = xfs_btree_key_addr(cur, ptr, block);
3037 		pp = xfs_btree_ptr_addr(cur, ptr, block);
3038 
3039 #ifdef DEBUG
3040 		for (i = numrecs - ptr; i >= 0; i--) {
3041 			error = xfs_btree_check_ptr(cur, pp, i, level);
3042 			if (error)
3043 				return error;
3044 		}
3045 #endif
3046 
3047 		xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3048 		xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3049 
3050 #ifdef DEBUG
3051 		error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3052 		if (error)
3053 			goto error0;
3054 #endif
3055 
3056 		/* Now put the new data in, bump numrecs and log it. */
3057 		xfs_btree_copy_keys(cur, kp, &key, 1);
3058 		xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3059 		numrecs++;
3060 		xfs_btree_set_numrecs(block, numrecs);
3061 		xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3062 		xfs_btree_log_keys(cur, bp, ptr, numrecs);
3063 #ifdef DEBUG
3064 		if (ptr < numrecs) {
3065 			ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3066 				xfs_btree_key_addr(cur, ptr + 1, block)));
3067 		}
3068 #endif
3069 	} else {
3070 		/* It's a leaf. make a hole in the records */
3071 		union xfs_btree_rec             *rp;
3072 
3073 		rp = xfs_btree_rec_addr(cur, ptr, block);
3074 
3075 		xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3076 
3077 		/* Now put the new data in, bump numrecs and log it. */
3078 		xfs_btree_copy_recs(cur, rp, recp, 1);
3079 		xfs_btree_set_numrecs(block, ++numrecs);
3080 		xfs_btree_log_recs(cur, bp, ptr, numrecs);
3081 #ifdef DEBUG
3082 		if (ptr < numrecs) {
3083 			ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3084 				xfs_btree_rec_addr(cur, ptr + 1, block)));
3085 		}
3086 #endif
3087 	}
3088 
3089 	/* Log the new number of records in the btree header. */
3090 	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3091 
3092 	/* If we inserted at the start of a block, update the parents' keys. */
3093 	if (optr == 1) {
3094 		error = xfs_btree_updkey(cur, &key, level + 1);
3095 		if (error)
3096 			goto error0;
3097 	}
3098 
3099 	/*
3100 	 * If we are tracking the last record in the tree and
3101 	 * we are at the far right edge of the tree, update it.
3102 	 */
3103 	if (xfs_btree_is_lastrec(cur, block, level)) {
3104 		cur->bc_ops->update_lastrec(cur, block, recp,
3105 					    ptr, LASTREC_INSREC);
3106 	}
3107 
3108 	/*
3109 	 * Return the new block number, if any.
3110 	 * If there is one, give back a record value and a cursor too.
3111 	 */
3112 	*ptrp = nptr;
3113 	if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3114 		*recp = nrec;
3115 		*curp = ncur;
3116 	}
3117 
3118 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3119 	*stat = 1;
3120 	return 0;
3121 
3122 error0:
3123 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3124 	return error;
3125 }
3126 
3127 /*
3128  * Insert the record at the point referenced by cur.
3129  *
3130  * A multi-level split of the tree on insert will invalidate the original
3131  * cursor.  All callers of this function should assume that the cursor is
3132  * no longer valid and revalidate it.
3133  */
3134 int
3135 xfs_btree_insert(
3136 	struct xfs_btree_cur	*cur,
3137 	int			*stat)
3138 {
3139 	int			error;	/* error return value */
3140 	int			i;	/* result value, 0 for failure */
3141 	int			level;	/* current level number in btree */
3142 	union xfs_btree_ptr	nptr;	/* new block number (split result) */
3143 	struct xfs_btree_cur	*ncur;	/* new cursor (split result) */
3144 	struct xfs_btree_cur	*pcur;	/* previous level's cursor */
3145 	union xfs_btree_rec	rec;	/* record to insert */
3146 
3147 	level = 0;
3148 	ncur = NULL;
3149 	pcur = cur;
3150 
3151 	xfs_btree_set_ptr_null(cur, &nptr);
3152 	cur->bc_ops->init_rec_from_cur(cur, &rec);
3153 
3154 	/*
3155 	 * Loop going up the tree, starting at the leaf level.
3156 	 * Stop when we don't get a split block, that must mean that
3157 	 * the insert is finished with this level.
3158 	 */
3159 	do {
3160 		/*
3161 		 * Insert nrec/nptr into this level of the tree.
3162 		 * Note if we fail, nptr will be null.
3163 		 */
3164 		error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
3165 		if (error) {
3166 			if (pcur != cur)
3167 				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3168 			goto error0;
3169 		}
3170 
3171 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3172 		level++;
3173 
3174 		/*
3175 		 * See if the cursor we just used is trash.
3176 		 * Can't trash the caller's cursor, but otherwise we should
3177 		 * if ncur is a new cursor or we're about to be done.
3178 		 */
3179 		if (pcur != cur &&
3180 		    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3181 			/* Save the state from the cursor before we trash it */
3182 			if (cur->bc_ops->update_cursor)
3183 				cur->bc_ops->update_cursor(pcur, cur);
3184 			cur->bc_nlevels = pcur->bc_nlevels;
3185 			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3186 		}
3187 		/* If we got a new cursor, switch to it. */
3188 		if (ncur) {
3189 			pcur = ncur;
3190 			ncur = NULL;
3191 		}
3192 	} while (!xfs_btree_ptr_is_null(cur, &nptr));
3193 
3194 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3195 	*stat = i;
3196 	return 0;
3197 error0:
3198 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3199 	return error;
3200 }
3201 
3202 /*
3203  * Try to merge a non-leaf block back into the inode root.
3204  *
3205  * Note: the killroot names comes from the fact that we're effectively
3206  * killing the old root block.  But because we can't just delete the
3207  * inode we have to copy the single block it was pointing to into the
3208  * inode.
3209  */
3210 STATIC int
3211 xfs_btree_kill_iroot(
3212 	struct xfs_btree_cur	*cur)
3213 {
3214 	int			whichfork = cur->bc_private.b.whichfork;
3215 	struct xfs_inode	*ip = cur->bc_private.b.ip;
3216 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
3217 	struct xfs_btree_block	*block;
3218 	struct xfs_btree_block	*cblock;
3219 	union xfs_btree_key	*kp;
3220 	union xfs_btree_key	*ckp;
3221 	union xfs_btree_ptr	*pp;
3222 	union xfs_btree_ptr	*cpp;
3223 	struct xfs_buf		*cbp;
3224 	int			level;
3225 	int			index;
3226 	int			numrecs;
3227 	int			error;
3228 #ifdef DEBUG
3229 	union xfs_btree_ptr	ptr;
3230 	int			i;
3231 #endif
3232 
3233 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3234 
3235 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3236 	ASSERT(cur->bc_nlevels > 1);
3237 
3238 	/*
3239 	 * Don't deal with the root block needs to be a leaf case.
3240 	 * We're just going to turn the thing back into extents anyway.
3241 	 */
3242 	level = cur->bc_nlevels - 1;
3243 	if (level == 1)
3244 		goto out0;
3245 
3246 	/*
3247 	 * Give up if the root has multiple children.
3248 	 */
3249 	block = xfs_btree_get_iroot(cur);
3250 	if (xfs_btree_get_numrecs(block) != 1)
3251 		goto out0;
3252 
3253 	cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3254 	numrecs = xfs_btree_get_numrecs(cblock);
3255 
3256 	/*
3257 	 * Only do this if the next level will fit.
3258 	 * Then the data must be copied up to the inode,
3259 	 * instead of freeing the root you free the next level.
3260 	 */
3261 	if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3262 		goto out0;
3263 
3264 	XFS_BTREE_STATS_INC(cur, killroot);
3265 
3266 #ifdef DEBUG
3267 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3268 	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3269 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3270 	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3271 #endif
3272 
3273 	index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3274 	if (index) {
3275 		xfs_iroot_realloc(cur->bc_private.b.ip, index,
3276 				  cur->bc_private.b.whichfork);
3277 		block = ifp->if_broot;
3278 	}
3279 
3280 	be16_add_cpu(&block->bb_numrecs, index);
3281 	ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3282 
3283 	kp = xfs_btree_key_addr(cur, 1, block);
3284 	ckp = xfs_btree_key_addr(cur, 1, cblock);
3285 	xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3286 
3287 	pp = xfs_btree_ptr_addr(cur, 1, block);
3288 	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3289 #ifdef DEBUG
3290 	for (i = 0; i < numrecs; i++) {
3291 		error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3292 		if (error) {
3293 			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3294 			return error;
3295 		}
3296 	}
3297 #endif
3298 	xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3299 
3300 	error = xfs_btree_free_block(cur, cbp);
3301 	if (error) {
3302 		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3303 		return error;
3304 	}
3305 
3306 	cur->bc_bufs[level - 1] = NULL;
3307 	be16_add_cpu(&block->bb_level, -1);
3308 	xfs_trans_log_inode(cur->bc_tp, ip,
3309 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3310 	cur->bc_nlevels--;
3311 out0:
3312 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3313 	return 0;
3314 }
3315 
3316 /*
3317  * Kill the current root node, and replace it with it's only child node.
3318  */
3319 STATIC int
3320 xfs_btree_kill_root(
3321 	struct xfs_btree_cur	*cur,
3322 	struct xfs_buf		*bp,
3323 	int			level,
3324 	union xfs_btree_ptr	*newroot)
3325 {
3326 	int			error;
3327 
3328 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3329 	XFS_BTREE_STATS_INC(cur, killroot);
3330 
3331 	/*
3332 	 * Update the root pointer, decreasing the level by 1 and then
3333 	 * free the old root.
3334 	 */
3335 	cur->bc_ops->set_root(cur, newroot, -1);
3336 
3337 	error = xfs_btree_free_block(cur, bp);
3338 	if (error) {
3339 		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3340 		return error;
3341 	}
3342 
3343 	cur->bc_bufs[level] = NULL;
3344 	cur->bc_ra[level] = 0;
3345 	cur->bc_nlevels--;
3346 
3347 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3348 	return 0;
3349 }
3350 
3351 STATIC int
3352 xfs_btree_dec_cursor(
3353 	struct xfs_btree_cur	*cur,
3354 	int			level,
3355 	int			*stat)
3356 {
3357 	int			error;
3358 	int			i;
3359 
3360 	if (level > 0) {
3361 		error = xfs_btree_decrement(cur, level, &i);
3362 		if (error)
3363 			return error;
3364 	}
3365 
3366 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3367 	*stat = 1;
3368 	return 0;
3369 }
3370 
3371 /*
3372  * Single level of the btree record deletion routine.
3373  * Delete record pointed to by cur/level.
3374  * Remove the record from its block then rebalance the tree.
3375  * Return 0 for error, 1 for done, 2 to go on to the next level.
3376  */
3377 STATIC int					/* error */
3378 xfs_btree_delrec(
3379 	struct xfs_btree_cur	*cur,		/* btree cursor */
3380 	int			level,		/* level removing record from */
3381 	int			*stat)		/* fail/done/go-on */
3382 {
3383 	struct xfs_btree_block	*block;		/* btree block */
3384 	union xfs_btree_ptr	cptr;		/* current block ptr */
3385 	struct xfs_buf		*bp;		/* buffer for block */
3386 	int			error;		/* error return value */
3387 	int			i;		/* loop counter */
3388 	union xfs_btree_key	key;		/* storage for keyp */
3389 	union xfs_btree_key	*keyp = &key;	/* passed to the next level */
3390 	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
3391 	struct xfs_buf		*lbp;		/* left buffer pointer */
3392 	struct xfs_btree_block	*left;		/* left btree block */
3393 	int			lrecs = 0;	/* left record count */
3394 	int			ptr;		/* key/record index */
3395 	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
3396 	struct xfs_buf		*rbp;		/* right buffer pointer */
3397 	struct xfs_btree_block	*right;		/* right btree block */
3398 	struct xfs_btree_block	*rrblock;	/* right-right btree block */
3399 	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
3400 	int			rrecs = 0;	/* right record count */
3401 	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
3402 	int			numrecs;	/* temporary numrec count */
3403 
3404 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3405 	XFS_BTREE_TRACE_ARGI(cur, level);
3406 
3407 	tcur = NULL;
3408 
3409 	/* Get the index of the entry being deleted, check for nothing there. */
3410 	ptr = cur->bc_ptrs[level];
3411 	if (ptr == 0) {
3412 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3413 		*stat = 0;
3414 		return 0;
3415 	}
3416 
3417 	/* Get the buffer & block containing the record or key/ptr. */
3418 	block = xfs_btree_get_block(cur, level, &bp);
3419 	numrecs = xfs_btree_get_numrecs(block);
3420 
3421 #ifdef DEBUG
3422 	error = xfs_btree_check_block(cur, block, level, bp);
3423 	if (error)
3424 		goto error0;
3425 #endif
3426 
3427 	/* Fail if we're off the end of the block. */
3428 	if (ptr > numrecs) {
3429 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3430 		*stat = 0;
3431 		return 0;
3432 	}
3433 
3434 	XFS_BTREE_STATS_INC(cur, delrec);
3435 	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3436 
3437 	/* Excise the entries being deleted. */
3438 	if (level > 0) {
3439 		/* It's a nonleaf. operate on keys and ptrs */
3440 		union xfs_btree_key	*lkp;
3441 		union xfs_btree_ptr	*lpp;
3442 
3443 		lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3444 		lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3445 
3446 #ifdef DEBUG
3447 		for (i = 0; i < numrecs - ptr; i++) {
3448 			error = xfs_btree_check_ptr(cur, lpp, i, level);
3449 			if (error)
3450 				goto error0;
3451 		}
3452 #endif
3453 
3454 		if (ptr < numrecs) {
3455 			xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3456 			xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3457 			xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3458 			xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3459 		}
3460 
3461 		/*
3462 		 * If it's the first record in the block, we'll need to pass a
3463 		 * key up to the next level (updkey).
3464 		 */
3465 		if (ptr == 1)
3466 			keyp = xfs_btree_key_addr(cur, 1, block);
3467 	} else {
3468 		/* It's a leaf. operate on records */
3469 		if (ptr < numrecs) {
3470 			xfs_btree_shift_recs(cur,
3471 				xfs_btree_rec_addr(cur, ptr + 1, block),
3472 				-1, numrecs - ptr);
3473 			xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3474 		}
3475 
3476 		/*
3477 		 * If it's the first record in the block, we'll need a key
3478 		 * structure to pass up to the next level (updkey).
3479 		 */
3480 		if (ptr == 1) {
3481 			cur->bc_ops->init_key_from_rec(&key,
3482 					xfs_btree_rec_addr(cur, 1, block));
3483 			keyp = &key;
3484 		}
3485 	}
3486 
3487 	/*
3488 	 * Decrement and log the number of entries in the block.
3489 	 */
3490 	xfs_btree_set_numrecs(block, --numrecs);
3491 	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3492 
3493 	/*
3494 	 * If we are tracking the last record in the tree and
3495 	 * we are at the far right edge of the tree, update it.
3496 	 */
3497 	if (xfs_btree_is_lastrec(cur, block, level)) {
3498 		cur->bc_ops->update_lastrec(cur, block, NULL,
3499 					    ptr, LASTREC_DELREC);
3500 	}
3501 
3502 	/*
3503 	 * We're at the root level.  First, shrink the root block in-memory.
3504 	 * Try to get rid of the next level down.  If we can't then there's
3505 	 * nothing left to do.
3506 	 */
3507 	if (level == cur->bc_nlevels - 1) {
3508 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3509 			xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3510 					  cur->bc_private.b.whichfork);
3511 
3512 			error = xfs_btree_kill_iroot(cur);
3513 			if (error)
3514 				goto error0;
3515 
3516 			error = xfs_btree_dec_cursor(cur, level, stat);
3517 			if (error)
3518 				goto error0;
3519 			*stat = 1;
3520 			return 0;
3521 		}
3522 
3523 		/*
3524 		 * If this is the root level, and there's only one entry left,
3525 		 * and it's NOT the leaf level, then we can get rid of this
3526 		 * level.
3527 		 */
3528 		if (numrecs == 1 && level > 0) {
3529 			union xfs_btree_ptr	*pp;
3530 			/*
3531 			 * pp is still set to the first pointer in the block.
3532 			 * Make it the new root of the btree.
3533 			 */
3534 			pp = xfs_btree_ptr_addr(cur, 1, block);
3535 			error = xfs_btree_kill_root(cur, bp, level, pp);
3536 			if (error)
3537 				goto error0;
3538 		} else if (level > 0) {
3539 			error = xfs_btree_dec_cursor(cur, level, stat);
3540 			if (error)
3541 				goto error0;
3542 		}
3543 		*stat = 1;
3544 		return 0;
3545 	}
3546 
3547 	/*
3548 	 * If we deleted the leftmost entry in the block, update the
3549 	 * key values above us in the tree.
3550 	 */
3551 	if (ptr == 1) {
3552 		error = xfs_btree_updkey(cur, keyp, level + 1);
3553 		if (error)
3554 			goto error0;
3555 	}
3556 
3557 	/*
3558 	 * If the number of records remaining in the block is at least
3559 	 * the minimum, we're done.
3560 	 */
3561 	if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3562 		error = xfs_btree_dec_cursor(cur, level, stat);
3563 		if (error)
3564 			goto error0;
3565 		return 0;
3566 	}
3567 
3568 	/*
3569 	 * Otherwise, we have to move some records around to keep the
3570 	 * tree balanced.  Look at the left and right sibling blocks to
3571 	 * see if we can re-balance by moving only one record.
3572 	 */
3573 	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3574 	xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3575 
3576 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3577 		/*
3578 		 * One child of root, need to get a chance to copy its contents
3579 		 * into the root and delete it. Can't go up to next level,
3580 		 * there's nothing to delete there.
3581 		 */
3582 		if (xfs_btree_ptr_is_null(cur, &rptr) &&
3583 		    xfs_btree_ptr_is_null(cur, &lptr) &&
3584 		    level == cur->bc_nlevels - 2) {
3585 			error = xfs_btree_kill_iroot(cur);
3586 			if (!error)
3587 				error = xfs_btree_dec_cursor(cur, level, stat);
3588 			if (error)
3589 				goto error0;
3590 			return 0;
3591 		}
3592 	}
3593 
3594 	ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3595 	       !xfs_btree_ptr_is_null(cur, &lptr));
3596 
3597 	/*
3598 	 * Duplicate the cursor so our btree manipulations here won't
3599 	 * disrupt the next level up.
3600 	 */
3601 	error = xfs_btree_dup_cursor(cur, &tcur);
3602 	if (error)
3603 		goto error0;
3604 
3605 	/*
3606 	 * If there's a right sibling, see if it's ok to shift an entry
3607 	 * out of it.
3608 	 */
3609 	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3610 		/*
3611 		 * Move the temp cursor to the last entry in the next block.
3612 		 * Actually any entry but the first would suffice.
3613 		 */
3614 		i = xfs_btree_lastrec(tcur, level);
3615 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3616 
3617 		error = xfs_btree_increment(tcur, level, &i);
3618 		if (error)
3619 			goto error0;
3620 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3621 
3622 		i = xfs_btree_lastrec(tcur, level);
3623 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3624 
3625 		/* Grab a pointer to the block. */
3626 		right = xfs_btree_get_block(tcur, level, &rbp);
3627 #ifdef DEBUG
3628 		error = xfs_btree_check_block(tcur, right, level, rbp);
3629 		if (error)
3630 			goto error0;
3631 #endif
3632 		/* Grab the current block number, for future use. */
3633 		xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3634 
3635 		/*
3636 		 * If right block is full enough so that removing one entry
3637 		 * won't make it too empty, and left-shifting an entry out
3638 		 * of right to us works, we're done.
3639 		 */
3640 		if (xfs_btree_get_numrecs(right) - 1 >=
3641 		    cur->bc_ops->get_minrecs(tcur, level)) {
3642 			error = xfs_btree_lshift(tcur, level, &i);
3643 			if (error)
3644 				goto error0;
3645 			if (i) {
3646 				ASSERT(xfs_btree_get_numrecs(block) >=
3647 				       cur->bc_ops->get_minrecs(tcur, level));
3648 
3649 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3650 				tcur = NULL;
3651 
3652 				error = xfs_btree_dec_cursor(cur, level, stat);
3653 				if (error)
3654 					goto error0;
3655 				return 0;
3656 			}
3657 		}
3658 
3659 		/*
3660 		 * Otherwise, grab the number of records in right for
3661 		 * future reference, and fix up the temp cursor to point
3662 		 * to our block again (last record).
3663 		 */
3664 		rrecs = xfs_btree_get_numrecs(right);
3665 		if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3666 			i = xfs_btree_firstrec(tcur, level);
3667 			XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3668 
3669 			error = xfs_btree_decrement(tcur, level, &i);
3670 			if (error)
3671 				goto error0;
3672 			XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3673 		}
3674 	}
3675 
3676 	/*
3677 	 * If there's a left sibling, see if it's ok to shift an entry
3678 	 * out of it.
3679 	 */
3680 	if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3681 		/*
3682 		 * Move the temp cursor to the first entry in the
3683 		 * previous block.
3684 		 */
3685 		i = xfs_btree_firstrec(tcur, level);
3686 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3687 
3688 		error = xfs_btree_decrement(tcur, level, &i);
3689 		if (error)
3690 			goto error0;
3691 		i = xfs_btree_firstrec(tcur, level);
3692 		XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3693 
3694 		/* Grab a pointer to the block. */
3695 		left = xfs_btree_get_block(tcur, level, &lbp);
3696 #ifdef DEBUG
3697 		error = xfs_btree_check_block(cur, left, level, lbp);
3698 		if (error)
3699 			goto error0;
3700 #endif
3701 		/* Grab the current block number, for future use. */
3702 		xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3703 
3704 		/*
3705 		 * If left block is full enough so that removing one entry
3706 		 * won't make it too empty, and right-shifting an entry out
3707 		 * of left to us works, we're done.
3708 		 */
3709 		if (xfs_btree_get_numrecs(left) - 1 >=
3710 		    cur->bc_ops->get_minrecs(tcur, level)) {
3711 			error = xfs_btree_rshift(tcur, level, &i);
3712 			if (error)
3713 				goto error0;
3714 			if (i) {
3715 				ASSERT(xfs_btree_get_numrecs(block) >=
3716 				       cur->bc_ops->get_minrecs(tcur, level));
3717 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3718 				tcur = NULL;
3719 				if (level == 0)
3720 					cur->bc_ptrs[0]++;
3721 				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3722 				*stat = 1;
3723 				return 0;
3724 			}
3725 		}
3726 
3727 		/*
3728 		 * Otherwise, grab the number of records in right for
3729 		 * future reference.
3730 		 */
3731 		lrecs = xfs_btree_get_numrecs(left);
3732 	}
3733 
3734 	/* Delete the temp cursor, we're done with it. */
3735 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3736 	tcur = NULL;
3737 
3738 	/* If here, we need to do a join to keep the tree balanced. */
3739 	ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3740 
3741 	if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3742 	    lrecs + xfs_btree_get_numrecs(block) <=
3743 			cur->bc_ops->get_maxrecs(cur, level)) {
3744 		/*
3745 		 * Set "right" to be the starting block,
3746 		 * "left" to be the left neighbor.
3747 		 */
3748 		rptr = cptr;
3749 		right = block;
3750 		rbp = bp;
3751 		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3752 		if (error)
3753 			goto error0;
3754 
3755 	/*
3756 	 * If that won't work, see if we can join with the right neighbor block.
3757 	 */
3758 	} else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3759 		   rrecs + xfs_btree_get_numrecs(block) <=
3760 			cur->bc_ops->get_maxrecs(cur, level)) {
3761 		/*
3762 		 * Set "left" to be the starting block,
3763 		 * "right" to be the right neighbor.
3764 		 */
3765 		lptr = cptr;
3766 		left = block;
3767 		lbp = bp;
3768 		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3769 		if (error)
3770 			goto error0;
3771 
3772 	/*
3773 	 * Otherwise, we can't fix the imbalance.
3774 	 * Just return.  This is probably a logic error, but it's not fatal.
3775 	 */
3776 	} else {
3777 		error = xfs_btree_dec_cursor(cur, level, stat);
3778 		if (error)
3779 			goto error0;
3780 		return 0;
3781 	}
3782 
3783 	rrecs = xfs_btree_get_numrecs(right);
3784 	lrecs = xfs_btree_get_numrecs(left);
3785 
3786 	/*
3787 	 * We're now going to join "left" and "right" by moving all the stuff
3788 	 * in "right" to "left" and deleting "right".
3789 	 */
3790 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3791 	if (level > 0) {
3792 		/* It's a non-leaf.  Move keys and pointers. */
3793 		union xfs_btree_key	*lkp;	/* left btree key */
3794 		union xfs_btree_ptr	*lpp;	/* left address pointer */
3795 		union xfs_btree_key	*rkp;	/* right btree key */
3796 		union xfs_btree_ptr	*rpp;	/* right address pointer */
3797 
3798 		lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3799 		lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3800 		rkp = xfs_btree_key_addr(cur, 1, right);
3801 		rpp = xfs_btree_ptr_addr(cur, 1, right);
3802 #ifdef DEBUG
3803 		for (i = 1; i < rrecs; i++) {
3804 			error = xfs_btree_check_ptr(cur, rpp, i, level);
3805 			if (error)
3806 				goto error0;
3807 		}
3808 #endif
3809 		xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3810 		xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3811 
3812 		xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3813 		xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3814 	} else {
3815 		/* It's a leaf.  Move records.  */
3816 		union xfs_btree_rec	*lrp;	/* left record pointer */
3817 		union xfs_btree_rec	*rrp;	/* right record pointer */
3818 
3819 		lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3820 		rrp = xfs_btree_rec_addr(cur, 1, right);
3821 
3822 		xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3823 		xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3824 	}
3825 
3826 	XFS_BTREE_STATS_INC(cur, join);
3827 
3828 	/*
3829 	 * Fix up the number of records and right block pointer in the
3830 	 * surviving block, and log it.
3831 	 */
3832 	xfs_btree_set_numrecs(left, lrecs + rrecs);
3833 	xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3834 	xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3835 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3836 
3837 	/* If there is a right sibling, point it to the remaining block. */
3838 	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3839 	if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3840 		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
3841 		if (error)
3842 			goto error0;
3843 		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3844 		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3845 	}
3846 
3847 	/* Free the deleted block. */
3848 	error = xfs_btree_free_block(cur, rbp);
3849 	if (error)
3850 		goto error0;
3851 
3852 	/*
3853 	 * If we joined with the left neighbor, set the buffer in the
3854 	 * cursor to the left block, and fix up the index.
3855 	 */
3856 	if (bp != lbp) {
3857 		cur->bc_bufs[level] = lbp;
3858 		cur->bc_ptrs[level] += lrecs;
3859 		cur->bc_ra[level] = 0;
3860 	}
3861 	/*
3862 	 * If we joined with the right neighbor and there's a level above
3863 	 * us, increment the cursor at that level.
3864 	 */
3865 	else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3866 		   (level + 1 < cur->bc_nlevels)) {
3867 		error = xfs_btree_increment(cur, level + 1, &i);
3868 		if (error)
3869 			goto error0;
3870 	}
3871 
3872 	/*
3873 	 * Readjust the ptr at this level if it's not a leaf, since it's
3874 	 * still pointing at the deletion point, which makes the cursor
3875 	 * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3876 	 * We can't use decrement because it would change the next level up.
3877 	 */
3878 	if (level > 0)
3879 		cur->bc_ptrs[level]--;
3880 
3881 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3882 	/* Return value means the next level up has something to do. */
3883 	*stat = 2;
3884 	return 0;
3885 
3886 error0:
3887 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3888 	if (tcur)
3889 		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3890 	return error;
3891 }
3892 
3893 /*
3894  * Delete the record pointed to by cur.
3895  * The cursor refers to the place where the record was (could be inserted)
3896  * when the operation returns.
3897  */
3898 int					/* error */
3899 xfs_btree_delete(
3900 	struct xfs_btree_cur	*cur,
3901 	int			*stat)	/* success/failure */
3902 {
3903 	int			error;	/* error return value */
3904 	int			level;
3905 	int			i;
3906 
3907 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3908 
3909 	/*
3910 	 * Go up the tree, starting at leaf level.
3911 	 *
3912 	 * If 2 is returned then a join was done; go to the next level.
3913 	 * Otherwise we are done.
3914 	 */
3915 	for (level = 0, i = 2; i == 2; level++) {
3916 		error = xfs_btree_delrec(cur, level, &i);
3917 		if (error)
3918 			goto error0;
3919 	}
3920 
3921 	if (i == 0) {
3922 		for (level = 1; level < cur->bc_nlevels; level++) {
3923 			if (cur->bc_ptrs[level] == 0) {
3924 				error = xfs_btree_decrement(cur, level, &i);
3925 				if (error)
3926 					goto error0;
3927 				break;
3928 			}
3929 		}
3930 	}
3931 
3932 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3933 	*stat = i;
3934 	return 0;
3935 error0:
3936 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3937 	return error;
3938 }
3939 
3940 /*
3941  * Get the data from the pointed-to record.
3942  */
3943 int					/* error */
3944 xfs_btree_get_rec(
3945 	struct xfs_btree_cur	*cur,	/* btree cursor */
3946 	union xfs_btree_rec	**recp,	/* output: btree record */
3947 	int			*stat)	/* output: success/failure */
3948 {
3949 	struct xfs_btree_block	*block;	/* btree block */
3950 	struct xfs_buf		*bp;	/* buffer pointer */
3951 	int			ptr;	/* record number */
3952 #ifdef DEBUG
3953 	int			error;	/* error return value */
3954 #endif
3955 
3956 	ptr = cur->bc_ptrs[0];
3957 	block = xfs_btree_get_block(cur, 0, &bp);
3958 
3959 #ifdef DEBUG
3960 	error = xfs_btree_check_block(cur, block, 0, bp);
3961 	if (error)
3962 		return error;
3963 #endif
3964 
3965 	/*
3966 	 * Off the right end or left end, return failure.
3967 	 */
3968 	if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3969 		*stat = 0;
3970 		return 0;
3971 	}
3972 
3973 	/*
3974 	 * Point to the record and extract its data.
3975 	 */
3976 	*recp = xfs_btree_rec_addr(cur, ptr, block);
3977 	*stat = 1;
3978 	return 0;
3979 }
3980 
3981 /*
3982  * Change the owner of a btree.
3983  *
3984  * The mechanism we use here is ordered buffer logging. Because we don't know
3985  * how many buffers were are going to need to modify, we don't really want to
3986  * have to make transaction reservations for the worst case of every buffer in a
3987  * full size btree as that may be more space that we can fit in the log....
3988  *
3989  * We do the btree walk in the most optimal manner possible - we have sibling
3990  * pointers so we can just walk all the blocks on each level from left to right
3991  * in a single pass, and then move to the next level and do the same. We can
3992  * also do readahead on the sibling pointers to get IO moving more quickly,
3993  * though for slow disks this is unlikely to make much difference to performance
3994  * as the amount of CPU work we have to do before moving to the next block is
3995  * relatively small.
3996  *
3997  * For each btree block that we load, modify the owner appropriately, set the
3998  * buffer as an ordered buffer and log it appropriately. We need to ensure that
3999  * we mark the region we change dirty so that if the buffer is relogged in
4000  * a subsequent transaction the changes we make here as an ordered buffer are
4001  * correctly relogged in that transaction.  If we are in recovery context, then
4002  * just queue the modified buffer as delayed write buffer so the transaction
4003  * recovery completion writes the changes to disk.
4004  */
4005 static int
4006 xfs_btree_block_change_owner(
4007 	struct xfs_btree_cur	*cur,
4008 	int			level,
4009 	__uint64_t		new_owner,
4010 	struct list_head	*buffer_list)
4011 {
4012 	struct xfs_btree_block	*block;
4013 	struct xfs_buf		*bp;
4014 	union xfs_btree_ptr     rptr;
4015 
4016 	/* do right sibling readahead */
4017 	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4018 
4019 	/* modify the owner */
4020 	block = xfs_btree_get_block(cur, level, &bp);
4021 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4022 		block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
4023 	else
4024 		block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
4025 
4026 	/*
4027 	 * If the block is a root block hosted in an inode, we might not have a
4028 	 * buffer pointer here and we shouldn't attempt to log the change as the
4029 	 * information is already held in the inode and discarded when the root
4030 	 * block is formatted into the on-disk inode fork. We still change it,
4031 	 * though, so everything is consistent in memory.
4032 	 */
4033 	if (bp) {
4034 		if (cur->bc_tp) {
4035 			xfs_trans_ordered_buf(cur->bc_tp, bp);
4036 			xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4037 		} else {
4038 			xfs_buf_delwri_queue(bp, buffer_list);
4039 		}
4040 	} else {
4041 		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4042 		ASSERT(level == cur->bc_nlevels - 1);
4043 	}
4044 
4045 	/* now read rh sibling block for next iteration */
4046 	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4047 	if (xfs_btree_ptr_is_null(cur, &rptr))
4048 		return -ENOENT;
4049 
4050 	return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4051 }
4052 
4053 int
4054 xfs_btree_change_owner(
4055 	struct xfs_btree_cur	*cur,
4056 	__uint64_t		new_owner,
4057 	struct list_head	*buffer_list)
4058 {
4059 	union xfs_btree_ptr     lptr;
4060 	int			level;
4061 	struct xfs_btree_block	*block = NULL;
4062 	int			error = 0;
4063 
4064 	cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4065 
4066 	/* for each level */
4067 	for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4068 		/* grab the left hand block */
4069 		error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4070 		if (error)
4071 			return error;
4072 
4073 		/* readahead the left most block for the next level down */
4074 		if (level > 0) {
4075 			union xfs_btree_ptr     *ptr;
4076 
4077 			ptr = xfs_btree_ptr_addr(cur, 1, block);
4078 			xfs_btree_readahead_ptr(cur, ptr, 1);
4079 
4080 			/* save for the next iteration of the loop */
4081 			lptr = *ptr;
4082 		}
4083 
4084 		/* for each buffer in the level */
4085 		do {
4086 			error = xfs_btree_block_change_owner(cur, level,
4087 							     new_owner,
4088 							     buffer_list);
4089 		} while (!error);
4090 
4091 		if (error != -ENOENT)
4092 			return error;
4093 	}
4094 
4095 	return 0;
4096 }
4097 
4098 /**
4099  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4100  *				      btree block
4101  *
4102  * @bp: buffer containing the btree block
4103  * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4104  * @pag_max_level: pointer to the per-ag max level field
4105  */
4106 bool
4107 xfs_btree_sblock_v5hdr_verify(
4108 	struct xfs_buf		*bp)
4109 {
4110 	struct xfs_mount	*mp = bp->b_target->bt_mount;
4111 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
4112 	struct xfs_perag	*pag = bp->b_pag;
4113 
4114 	if (!xfs_sb_version_hascrc(&mp->m_sb))
4115 		return false;
4116 	if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4117 		return false;
4118 	if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4119 		return false;
4120 	if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4121 		return false;
4122 	return true;
4123 }
4124 
4125 /**
4126  * xfs_btree_sblock_verify() -- verify a short-format btree block
4127  *
4128  * @bp: buffer containing the btree block
4129  * @max_recs: maximum records allowed in this btree node
4130  */
4131 bool
4132 xfs_btree_sblock_verify(
4133 	struct xfs_buf		*bp,
4134 	unsigned int		max_recs)
4135 {
4136 	struct xfs_mount	*mp = bp->b_target->bt_mount;
4137 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
4138 
4139 	/* numrecs verification */
4140 	if (be16_to_cpu(block->bb_numrecs) > max_recs)
4141 		return false;
4142 
4143 	/* sibling pointer verification */
4144 	if (!block->bb_u.s.bb_leftsib ||
4145 	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4146 	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4147 		return false;
4148 	if (!block->bb_u.s.bb_rightsib ||
4149 	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4150 	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4151 		return false;
4152 
4153 	return true;
4154 }
4155