xref: /openbmc/linux/fs/xfs/scrub/btree.c (revision 53004ee7)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
18 
19 /* btree scrubbing */
20 
21 /*
22  * Check for btree operation errors.  See the section about handling
23  * operational errors in common.c.
24  */
25 static bool
26 __xchk_btree_process_error(
27 	struct xfs_scrub	*sc,
28 	struct xfs_btree_cur	*cur,
29 	int			level,
30 	int			*error,
31 	__u32			errflag,
32 	void			*ret_ip)
33 {
34 	if (*error == 0)
35 		return true;
36 
37 	switch (*error) {
38 	case -EDEADLOCK:
39 		/* Used to restart an op with deadlock avoidance. */
40 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
41 		break;
42 	case -EFSBADCRC:
43 	case -EFSCORRUPTED:
44 		/* Note the badness but don't abort. */
45 		sc->sm->sm_flags |= errflag;
46 		*error = 0;
47 		fallthrough;
48 	default:
49 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
50 			trace_xchk_ifork_btree_op_error(sc, cur, level,
51 					*error, ret_ip);
52 		else
53 			trace_xchk_btree_op_error(sc, cur, level,
54 					*error, ret_ip);
55 		break;
56 	}
57 	return false;
58 }
59 
60 bool
61 xchk_btree_process_error(
62 	struct xfs_scrub	*sc,
63 	struct xfs_btree_cur	*cur,
64 	int			level,
65 	int			*error)
66 {
67 	return __xchk_btree_process_error(sc, cur, level, error,
68 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
69 }
70 
71 bool
72 xchk_btree_xref_process_error(
73 	struct xfs_scrub	*sc,
74 	struct xfs_btree_cur	*cur,
75 	int			level,
76 	int			*error)
77 {
78 	return __xchk_btree_process_error(sc, cur, level, error,
79 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
80 }
81 
82 /* Record btree block corruption. */
83 static void
84 __xchk_btree_set_corrupt(
85 	struct xfs_scrub	*sc,
86 	struct xfs_btree_cur	*cur,
87 	int			level,
88 	__u32			errflag,
89 	void			*ret_ip)
90 {
91 	sc->sm->sm_flags |= errflag;
92 
93 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
94 		trace_xchk_ifork_btree_error(sc, cur, level,
95 				ret_ip);
96 	else
97 		trace_xchk_btree_error(sc, cur, level,
98 				ret_ip);
99 }
100 
101 void
102 xchk_btree_set_corrupt(
103 	struct xfs_scrub	*sc,
104 	struct xfs_btree_cur	*cur,
105 	int			level)
106 {
107 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
108 			__return_address);
109 }
110 
111 void
112 xchk_btree_xref_set_corrupt(
113 	struct xfs_scrub	*sc,
114 	struct xfs_btree_cur	*cur,
115 	int			level)
116 {
117 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
118 			__return_address);
119 }
120 
121 /*
122  * Make sure this record is in order and doesn't stray outside of the parent
123  * keys.
124  */
125 STATIC void
126 xchk_btree_rec(
127 	struct xchk_btree	*bs)
128 {
129 	struct xfs_btree_cur	*cur = bs->cur;
130 	union xfs_btree_rec	*rec;
131 	union xfs_btree_key	key;
132 	union xfs_btree_key	hkey;
133 	union xfs_btree_key	*keyp;
134 	struct xfs_btree_block	*block;
135 	struct xfs_btree_block	*keyblock;
136 	struct xfs_buf		*bp;
137 
138 	block = xfs_btree_get_block(cur, 0, &bp);
139 	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
140 
141 	trace_xchk_btree_rec(bs->sc, cur, 0);
142 
143 	/* If this isn't the first record, are they in order? */
144 	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
145 		xchk_btree_set_corrupt(bs->sc, cur, 0);
146 	bs->firstrec = false;
147 	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
148 
149 	if (cur->bc_nlevels == 1)
150 		return;
151 
152 	/* Is this at least as large as the parent low key? */
153 	cur->bc_ops->init_key_from_rec(&key, rec);
154 	keyblock = xfs_btree_get_block(cur, 1, &bp);
155 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
156 	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
157 		xchk_btree_set_corrupt(bs->sc, cur, 1);
158 
159 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
160 		return;
161 
162 	/* Is this no larger than the parent high key? */
163 	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
164 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
165 	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
166 		xchk_btree_set_corrupt(bs->sc, cur, 1);
167 }
168 
169 /*
170  * Make sure this key is in order and doesn't stray outside of the parent
171  * keys.
172  */
173 STATIC void
174 xchk_btree_key(
175 	struct xchk_btree	*bs,
176 	int			level)
177 {
178 	struct xfs_btree_cur	*cur = bs->cur;
179 	union xfs_btree_key	*key;
180 	union xfs_btree_key	*keyp;
181 	struct xfs_btree_block	*block;
182 	struct xfs_btree_block	*keyblock;
183 	struct xfs_buf		*bp;
184 
185 	block = xfs_btree_get_block(cur, level, &bp);
186 	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
187 
188 	trace_xchk_btree_key(bs->sc, cur, level);
189 
190 	/* If this isn't the first key, are they in order? */
191 	if (!bs->firstkey[level] &&
192 	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
193 		xchk_btree_set_corrupt(bs->sc, cur, level);
194 	bs->firstkey[level] = false;
195 	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
196 
197 	if (level + 1 >= cur->bc_nlevels)
198 		return;
199 
200 	/* Is this at least as large as the parent low key? */
201 	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
202 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
203 	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
204 		xchk_btree_set_corrupt(bs->sc, cur, level);
205 
206 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
207 		return;
208 
209 	/* Is this no larger than the parent high key? */
210 	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
211 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
212 	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
213 		xchk_btree_set_corrupt(bs->sc, cur, level);
214 }
215 
216 /*
217  * Check a btree pointer.  Returns true if it's ok to use this pointer.
218  * Callers do not need to set the corrupt flag.
219  */
220 static bool
221 xchk_btree_ptr_ok(
222 	struct xchk_btree	*bs,
223 	int			level,
224 	union xfs_btree_ptr	*ptr)
225 {
226 	bool			res;
227 
228 	/* A btree rooted in an inode has no block pointer to the root. */
229 	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
230 	    level == bs->cur->bc_nlevels)
231 		return true;
232 
233 	/* Otherwise, check the pointers. */
234 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
235 		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
236 	else
237 		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
238 	if (!res)
239 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
240 
241 	return res;
242 }
243 
244 /* Check that a btree block's sibling matches what we expect it. */
245 STATIC int
246 xchk_btree_block_check_sibling(
247 	struct xchk_btree	*bs,
248 	int			level,
249 	int			direction,
250 	union xfs_btree_ptr	*sibling)
251 {
252 	struct xfs_btree_cur	*cur = bs->cur;
253 	struct xfs_btree_block	*pblock;
254 	struct xfs_buf		*pbp;
255 	struct xfs_btree_cur	*ncur = NULL;
256 	union xfs_btree_ptr	*pp;
257 	int			success;
258 	int			error;
259 
260 	error = xfs_btree_dup_cursor(cur, &ncur);
261 	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
262 	    !ncur)
263 		return error;
264 
265 	/*
266 	 * If the pointer is null, we shouldn't be able to move the upper
267 	 * level pointer anywhere.
268 	 */
269 	if (xfs_btree_ptr_is_null(cur, sibling)) {
270 		if (direction > 0)
271 			error = xfs_btree_increment(ncur, level + 1, &success);
272 		else
273 			error = xfs_btree_decrement(ncur, level + 1, &success);
274 		if (error == 0 && success)
275 			xchk_btree_set_corrupt(bs->sc, cur, level);
276 		error = 0;
277 		goto out;
278 	}
279 
280 	/* Increment upper level pointer. */
281 	if (direction > 0)
282 		error = xfs_btree_increment(ncur, level + 1, &success);
283 	else
284 		error = xfs_btree_decrement(ncur, level + 1, &success);
285 	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
286 		goto out;
287 	if (!success) {
288 		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
289 		goto out;
290 	}
291 
292 	/* Compare upper level pointer to sibling pointer. */
293 	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
294 	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
295 	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
296 		goto out;
297 	if (pbp)
298 		xchk_buffer_recheck(bs->sc, pbp);
299 
300 	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
301 		xchk_btree_set_corrupt(bs->sc, cur, level);
302 out:
303 	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
304 	return error;
305 }
306 
307 /* Check the siblings of a btree block. */
308 STATIC int
309 xchk_btree_block_check_siblings(
310 	struct xchk_btree	*bs,
311 	struct xfs_btree_block	*block)
312 {
313 	struct xfs_btree_cur	*cur = bs->cur;
314 	union xfs_btree_ptr	leftsib;
315 	union xfs_btree_ptr	rightsib;
316 	int			level;
317 	int			error = 0;
318 
319 	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
320 	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
321 	level = xfs_btree_get_level(block);
322 
323 	/* Root block should never have siblings. */
324 	if (level == cur->bc_nlevels - 1) {
325 		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
326 		    !xfs_btree_ptr_is_null(cur, &rightsib))
327 			xchk_btree_set_corrupt(bs->sc, cur, level);
328 		goto out;
329 	}
330 
331 	/*
332 	 * Does the left & right sibling pointers match the adjacent
333 	 * parent level pointers?
334 	 * (These function absorbs error codes for us.)
335 	 */
336 	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
337 	if (error)
338 		return error;
339 	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
340 	if (error)
341 		return error;
342 out:
343 	return error;
344 }
345 
346 struct check_owner {
347 	struct list_head	list;
348 	xfs_daddr_t		daddr;
349 	int			level;
350 };
351 
352 /*
353  * Make sure this btree block isn't in the free list and that there's
354  * an rmap record for it.
355  */
356 STATIC int
357 xchk_btree_check_block_owner(
358 	struct xchk_btree	*bs,
359 	int			level,
360 	xfs_daddr_t		daddr)
361 {
362 	xfs_agnumber_t		agno;
363 	xfs_agblock_t		agbno;
364 	xfs_btnum_t		btnum;
365 	bool			init_sa;
366 	int			error = 0;
367 
368 	if (!bs->cur)
369 		return 0;
370 
371 	btnum = bs->cur->bc_btnum;
372 	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
373 	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
374 
375 	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
376 	if (init_sa) {
377 		error = xchk_ag_init(bs->sc, agno, &bs->sc->sa);
378 		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
379 				level, &error))
380 			return error;
381 	}
382 
383 	xchk_xref_is_used_space(bs->sc, agbno, 1);
384 	/*
385 	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
386 	 * have to nullify it (to shut down further block owner checks) if
387 	 * self-xref encounters problems.
388 	 */
389 	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
390 		bs->cur = NULL;
391 
392 	xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
393 	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
394 		bs->cur = NULL;
395 
396 	if (init_sa)
397 		xchk_ag_free(bs->sc, &bs->sc->sa);
398 
399 	return error;
400 }
401 
402 /* Check the owner of a btree block. */
403 STATIC int
404 xchk_btree_check_owner(
405 	struct xchk_btree	*bs,
406 	int			level,
407 	struct xfs_buf		*bp)
408 {
409 	struct xfs_btree_cur	*cur = bs->cur;
410 	struct check_owner	*co;
411 
412 	/*
413 	 * In theory, xfs_btree_get_block should only give us a null buffer
414 	 * pointer for the root of a root-in-inode btree type, but we need
415 	 * to check defensively here in case the cursor state is also screwed
416 	 * up.
417 	 */
418 	if (bp == NULL) {
419 		if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
420 			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
421 		return 0;
422 	}
423 
424 	/*
425 	 * We want to cross-reference each btree block with the bnobt
426 	 * and the rmapbt.  We cannot cross-reference the bnobt or
427 	 * rmapbt while scanning the bnobt or rmapbt, respectively,
428 	 * because we cannot alter the cursor and we'd prefer not to
429 	 * duplicate cursors.  Therefore, save the buffer daddr for
430 	 * later scanning.
431 	 */
432 	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
433 		co = kmem_alloc(sizeof(struct check_owner),
434 				KM_MAYFAIL);
435 		if (!co)
436 			return -ENOMEM;
437 		co->level = level;
438 		co->daddr = XFS_BUF_ADDR(bp);
439 		list_add_tail(&co->list, &bs->to_check);
440 		return 0;
441 	}
442 
443 	return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
444 }
445 
446 /* Decide if we want to check minrecs of a btree block in the inode root. */
447 static inline bool
448 xchk_btree_check_iroot_minrecs(
449 	struct xchk_btree	*bs)
450 {
451 	/*
452 	 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
453 	 * would miscalculate the space required for the data fork bmbt root
454 	 * when adding an attr fork, and promote the iroot contents to an
455 	 * external block unnecessarily.  This went unnoticed for many years
456 	 * until scrub found filesystems in this state.  Inode rooted btrees are
457 	 * not supposed to have immediate child blocks that are small enough
458 	 * that the contents could fit in the inode root, but we can't fail
459 	 * existing filesystems, so instead we disable the check for data fork
460 	 * bmap btrees when there's an attr fork.
461 	 */
462 	if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
463 	    bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
464 	    XFS_IFORK_Q(bs->sc->ip))
465 		return false;
466 
467 	return true;
468 }
469 
470 /*
471  * Check that this btree block has at least minrecs records or is one of the
472  * special blocks that don't require that.
473  */
474 STATIC void
475 xchk_btree_check_minrecs(
476 	struct xchk_btree	*bs,
477 	int			level,
478 	struct xfs_btree_block	*block)
479 {
480 	struct xfs_btree_cur	*cur = bs->cur;
481 	unsigned int		root_level = cur->bc_nlevels - 1;
482 	unsigned int		numrecs = be16_to_cpu(block->bb_numrecs);
483 
484 	/* More records than minrecs means the block is ok. */
485 	if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
486 		return;
487 
488 	/*
489 	 * For btrees rooted in the inode, it's possible that the root block
490 	 * contents spilled into a regular ondisk block because there wasn't
491 	 * enough space in the inode root.  The number of records in that
492 	 * child block might be less than the standard minrecs, but that's ok
493 	 * provided that there's only one direct child of the root.
494 	 */
495 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
496 	    level == cur->bc_nlevels - 2) {
497 		struct xfs_btree_block	*root_block;
498 		struct xfs_buf		*root_bp;
499 		int			root_maxrecs;
500 
501 		root_block = xfs_btree_get_block(cur, root_level, &root_bp);
502 		root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
503 		if (xchk_btree_check_iroot_minrecs(bs) &&
504 		    (be16_to_cpu(root_block->bb_numrecs) != 1 ||
505 		     numrecs <= root_maxrecs))
506 			xchk_btree_set_corrupt(bs->sc, cur, level);
507 		return;
508 	}
509 
510 	/*
511 	 * Otherwise, only the root level is allowed to have fewer than minrecs
512 	 * records or keyptrs.
513 	 */
514 	if (level < root_level)
515 		xchk_btree_set_corrupt(bs->sc, cur, level);
516 }
517 
518 /*
519  * Grab and scrub a btree block given a btree pointer.  Returns block
520  * and buffer pointers (if applicable) if they're ok to use.
521  */
522 STATIC int
523 xchk_btree_get_block(
524 	struct xchk_btree	*bs,
525 	int			level,
526 	union xfs_btree_ptr	*pp,
527 	struct xfs_btree_block	**pblock,
528 	struct xfs_buf		**pbp)
529 {
530 	xfs_failaddr_t		failed_at;
531 	int			error;
532 
533 	*pblock = NULL;
534 	*pbp = NULL;
535 
536 	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
537 	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
538 	    !*pblock)
539 		return error;
540 
541 	xfs_btree_get_block(bs->cur, level, pbp);
542 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
543 		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
544 				level, *pbp);
545 	else
546 		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
547 				 level, *pbp);
548 	if (failed_at) {
549 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
550 		return 0;
551 	}
552 	if (*pbp)
553 		xchk_buffer_recheck(bs->sc, *pbp);
554 
555 	xchk_btree_check_minrecs(bs, level, *pblock);
556 
557 	/*
558 	 * Check the block's owner; this function absorbs error codes
559 	 * for us.
560 	 */
561 	error = xchk_btree_check_owner(bs, level, *pbp);
562 	if (error)
563 		return error;
564 
565 	/*
566 	 * Check the block's siblings; this function absorbs error codes
567 	 * for us.
568 	 */
569 	return xchk_btree_block_check_siblings(bs, *pblock);
570 }
571 
572 /*
573  * Check that the low and high keys of this block match the keys stored
574  * in the parent block.
575  */
576 STATIC void
577 xchk_btree_block_keys(
578 	struct xchk_btree	*bs,
579 	int			level,
580 	struct xfs_btree_block	*block)
581 {
582 	union xfs_btree_key	block_keys;
583 	struct xfs_btree_cur	*cur = bs->cur;
584 	union xfs_btree_key	*high_bk;
585 	union xfs_btree_key	*parent_keys;
586 	union xfs_btree_key	*high_pk;
587 	struct xfs_btree_block	*parent_block;
588 	struct xfs_buf		*bp;
589 
590 	if (level >= cur->bc_nlevels - 1)
591 		return;
592 
593 	/* Calculate the keys for this block. */
594 	xfs_btree_get_keys(cur, block, &block_keys);
595 
596 	/* Obtain the parent's copy of the keys for this block. */
597 	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
598 	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
599 			parent_block);
600 
601 	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
602 		xchk_btree_set_corrupt(bs->sc, cur, 1);
603 
604 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
605 		return;
606 
607 	/* Get high keys */
608 	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
609 	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
610 			parent_block);
611 
612 	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
613 		xchk_btree_set_corrupt(bs->sc, cur, 1);
614 }
615 
616 /*
617  * Visit all nodes and leaves of a btree.  Check that all pointers and
618  * records are in order, that the keys reflect the records, and use a callback
619  * so that the caller can verify individual records.
620  */
621 int
622 xchk_btree(
623 	struct xfs_scrub		*sc,
624 	struct xfs_btree_cur		*cur,
625 	xchk_btree_rec_fn		scrub_fn,
626 	const struct xfs_owner_info	*oinfo,
627 	void				*private)
628 {
629 	struct xchk_btree		bs = {
630 		.cur			= cur,
631 		.scrub_rec		= scrub_fn,
632 		.oinfo			= oinfo,
633 		.firstrec		= true,
634 		.private		= private,
635 		.sc			= sc,
636 	};
637 	union xfs_btree_ptr		ptr;
638 	union xfs_btree_ptr		*pp;
639 	union xfs_btree_rec		*recp;
640 	struct xfs_btree_block		*block;
641 	int				level;
642 	struct xfs_buf			*bp;
643 	struct check_owner		*co;
644 	struct check_owner		*n;
645 	int				i;
646 	int				error = 0;
647 
648 	/* Initialize scrub state */
649 	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
650 		bs.firstkey[i] = true;
651 	INIT_LIST_HEAD(&bs.to_check);
652 
653 	/* Don't try to check a tree with a height we can't handle. */
654 	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
655 		xchk_btree_set_corrupt(sc, cur, 0);
656 		goto out;
657 	}
658 
659 	/*
660 	 * Load the root of the btree.  The helper function absorbs
661 	 * error codes for us.
662 	 */
663 	level = cur->bc_nlevels - 1;
664 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
665 	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
666 		goto out;
667 	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
668 	if (error || !block)
669 		goto out;
670 
671 	cur->bc_ptrs[level] = 1;
672 
673 	while (level < cur->bc_nlevels) {
674 		block = xfs_btree_get_block(cur, level, &bp);
675 
676 		if (level == 0) {
677 			/* End of leaf, pop back towards the root. */
678 			if (cur->bc_ptrs[level] >
679 			    be16_to_cpu(block->bb_numrecs)) {
680 				xchk_btree_block_keys(&bs, level, block);
681 				if (level < cur->bc_nlevels - 1)
682 					cur->bc_ptrs[level + 1]++;
683 				level++;
684 				continue;
685 			}
686 
687 			/* Records in order for scrub? */
688 			xchk_btree_rec(&bs);
689 
690 			/* Call out to the record checker. */
691 			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
692 			error = bs.scrub_rec(&bs, recp);
693 			if (error)
694 				break;
695 			if (xchk_should_terminate(sc, &error) ||
696 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
697 				break;
698 
699 			cur->bc_ptrs[level]++;
700 			continue;
701 		}
702 
703 		/* End of node, pop back towards the root. */
704 		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
705 			xchk_btree_block_keys(&bs, level, block);
706 			if (level < cur->bc_nlevels - 1)
707 				cur->bc_ptrs[level + 1]++;
708 			level++;
709 			continue;
710 		}
711 
712 		/* Keys in order for scrub? */
713 		xchk_btree_key(&bs, level);
714 
715 		/* Drill another level deeper. */
716 		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
717 		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
718 			cur->bc_ptrs[level]++;
719 			continue;
720 		}
721 		level--;
722 		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
723 		if (error || !block)
724 			goto out;
725 
726 		cur->bc_ptrs[level] = 1;
727 	}
728 
729 out:
730 	/* Process deferred owner checks on btree blocks. */
731 	list_for_each_entry_safe(co, n, &bs.to_check, list) {
732 		if (!error && bs.cur)
733 			error = xchk_btree_check_block_owner(&bs,
734 					co->level, co->daddr);
735 		list_del(&co->list);
736 		kmem_free(co);
737 	}
738 
739 	return error;
740 }
741