xref: /openbmc/linux/fs/xfs/scrub/btree.c (revision 6396bb221514d2876fd6dc0aa2a1f240d99b37bb)
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_alloc.h"
34 #include "scrub/scrub.h"
35 #include "scrub/common.h"
36 #include "scrub/btree.h"
37 #include "scrub/trace.h"
38 
39 /* btree scrubbing */
40 
41 /*
42  * Check for btree operation errors.  See the section about handling
43  * operational errors in common.c.
44  */
45 static bool
46 __xfs_scrub_btree_process_error(
47 	struct xfs_scrub_context	*sc,
48 	struct xfs_btree_cur		*cur,
49 	int				level,
50 	int				*error,
51 	__u32				errflag,
52 	void				*ret_ip)
53 {
54 	if (*error == 0)
55 		return true;
56 
57 	switch (*error) {
58 	case -EDEADLOCK:
59 		/* Used to restart an op with deadlock avoidance. */
60 		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
61 		break;
62 	case -EFSBADCRC:
63 	case -EFSCORRUPTED:
64 		/* Note the badness but don't abort. */
65 		sc->sm->sm_flags |= errflag;
66 		*error = 0;
67 		/* fall through */
68 	default:
69 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
70 			trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
71 					*error, ret_ip);
72 		else
73 			trace_xfs_scrub_btree_op_error(sc, cur, level,
74 					*error, ret_ip);
75 		break;
76 	}
77 	return false;
78 }
79 
80 bool
81 xfs_scrub_btree_process_error(
82 	struct xfs_scrub_context	*sc,
83 	struct xfs_btree_cur		*cur,
84 	int				level,
85 	int				*error)
86 {
87 	return __xfs_scrub_btree_process_error(sc, cur, level, error,
88 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
89 }
90 
91 bool
92 xfs_scrub_btree_xref_process_error(
93 	struct xfs_scrub_context	*sc,
94 	struct xfs_btree_cur		*cur,
95 	int				level,
96 	int				*error)
97 {
98 	return __xfs_scrub_btree_process_error(sc, cur, level, error,
99 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
100 }
101 
102 /* Record btree block corruption. */
103 static void
104 __xfs_scrub_btree_set_corrupt(
105 	struct xfs_scrub_context	*sc,
106 	struct xfs_btree_cur		*cur,
107 	int				level,
108 	__u32				errflag,
109 	void				*ret_ip)
110 {
111 	sc->sm->sm_flags |= errflag;
112 
113 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
114 		trace_xfs_scrub_ifork_btree_error(sc, cur, level,
115 				ret_ip);
116 	else
117 		trace_xfs_scrub_btree_error(sc, cur, level,
118 				ret_ip);
119 }
120 
121 void
122 xfs_scrub_btree_set_corrupt(
123 	struct xfs_scrub_context	*sc,
124 	struct xfs_btree_cur		*cur,
125 	int				level)
126 {
127 	__xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
128 			__return_address);
129 }
130 
131 void
132 xfs_scrub_btree_xref_set_corrupt(
133 	struct xfs_scrub_context	*sc,
134 	struct xfs_btree_cur		*cur,
135 	int				level)
136 {
137 	__xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
138 			__return_address);
139 }
140 
141 /*
142  * Make sure this record is in order and doesn't stray outside of the parent
143  * keys.
144  */
145 STATIC void
146 xfs_scrub_btree_rec(
147 	struct xfs_scrub_btree	*bs)
148 {
149 	struct xfs_btree_cur	*cur = bs->cur;
150 	union xfs_btree_rec	*rec;
151 	union xfs_btree_key	key;
152 	union xfs_btree_key	hkey;
153 	union xfs_btree_key	*keyp;
154 	struct xfs_btree_block	*block;
155 	struct xfs_btree_block	*keyblock;
156 	struct xfs_buf		*bp;
157 
158 	block = xfs_btree_get_block(cur, 0, &bp);
159 	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
160 
161 	trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
162 
163 	/* If this isn't the first record, are they in order? */
164 	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
165 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
166 	bs->firstrec = false;
167 	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
168 
169 	if (cur->bc_nlevels == 1)
170 		return;
171 
172 	/* Is this at least as large as the parent low key? */
173 	cur->bc_ops->init_key_from_rec(&key, rec);
174 	keyblock = xfs_btree_get_block(cur, 1, &bp);
175 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
176 	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
177 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
178 
179 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
180 		return;
181 
182 	/* Is this no larger than the parent high key? */
183 	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
184 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
185 	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
186 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
187 }
188 
189 /*
190  * Make sure this key is in order and doesn't stray outside of the parent
191  * keys.
192  */
193 STATIC void
194 xfs_scrub_btree_key(
195 	struct xfs_scrub_btree	*bs,
196 	int			level)
197 {
198 	struct xfs_btree_cur	*cur = bs->cur;
199 	union xfs_btree_key	*key;
200 	union xfs_btree_key	*keyp;
201 	struct xfs_btree_block	*block;
202 	struct xfs_btree_block	*keyblock;
203 	struct xfs_buf		*bp;
204 
205 	block = xfs_btree_get_block(cur, level, &bp);
206 	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
207 
208 	trace_xfs_scrub_btree_key(bs->sc, cur, level);
209 
210 	/* If this isn't the first key, are they in order? */
211 	if (!bs->firstkey[level] &&
212 	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
213 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
214 	bs->firstkey[level] = false;
215 	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
216 
217 	if (level + 1 >= cur->bc_nlevels)
218 		return;
219 
220 	/* Is this at least as large as the parent low key? */
221 	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
222 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
223 	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
224 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
225 
226 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
227 		return;
228 
229 	/* Is this no larger than the parent high key? */
230 	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
231 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
232 	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
233 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
234 }
235 
236 /*
237  * Check a btree pointer.  Returns true if it's ok to use this pointer.
238  * Callers do not need to set the corrupt flag.
239  */
240 static bool
241 xfs_scrub_btree_ptr_ok(
242 	struct xfs_scrub_btree		*bs,
243 	int				level,
244 	union xfs_btree_ptr		*ptr)
245 {
246 	bool				res;
247 
248 	/* A btree rooted in an inode has no block pointer to the root. */
249 	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
250 	    level == bs->cur->bc_nlevels)
251 		return true;
252 
253 	/* Otherwise, check the pointers. */
254 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
255 		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
256 	else
257 		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
258 	if (!res)
259 		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
260 
261 	return res;
262 }
263 
264 /* Check that a btree block's sibling matches what we expect it. */
265 STATIC int
266 xfs_scrub_btree_block_check_sibling(
267 	struct xfs_scrub_btree		*bs,
268 	int				level,
269 	int				direction,
270 	union xfs_btree_ptr		*sibling)
271 {
272 	struct xfs_btree_cur		*cur = bs->cur;
273 	struct xfs_btree_block		*pblock;
274 	struct xfs_buf			*pbp;
275 	struct xfs_btree_cur		*ncur = NULL;
276 	union xfs_btree_ptr		*pp;
277 	int				success;
278 	int				error;
279 
280 	error = xfs_btree_dup_cursor(cur, &ncur);
281 	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
282 	    !ncur)
283 		return error;
284 
285 	/*
286 	 * If the pointer is null, we shouldn't be able to move the upper
287 	 * level pointer anywhere.
288 	 */
289 	if (xfs_btree_ptr_is_null(cur, sibling)) {
290 		if (direction > 0)
291 			error = xfs_btree_increment(ncur, level + 1, &success);
292 		else
293 			error = xfs_btree_decrement(ncur, level + 1, &success);
294 		if (error == 0 && success)
295 			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
296 		error = 0;
297 		goto out;
298 	}
299 
300 	/* Increment upper level pointer. */
301 	if (direction > 0)
302 		error = xfs_btree_increment(ncur, level + 1, &success);
303 	else
304 		error = xfs_btree_decrement(ncur, level + 1, &success);
305 	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
306 		goto out;
307 	if (!success) {
308 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
309 		goto out;
310 	}
311 
312 	/* Compare upper level pointer to sibling pointer. */
313 	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
314 	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
315 	if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
316 		goto out;
317 	if (pbp)
318 		xfs_scrub_buffer_recheck(bs->sc, pbp);
319 
320 	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
321 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
322 out:
323 	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
324 	return error;
325 }
326 
327 /* Check the siblings of a btree block. */
328 STATIC int
329 xfs_scrub_btree_block_check_siblings(
330 	struct xfs_scrub_btree		*bs,
331 	struct xfs_btree_block		*block)
332 {
333 	struct xfs_btree_cur		*cur = bs->cur;
334 	union xfs_btree_ptr		leftsib;
335 	union xfs_btree_ptr		rightsib;
336 	int				level;
337 	int				error = 0;
338 
339 	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
340 	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
341 	level = xfs_btree_get_level(block);
342 
343 	/* Root block should never have siblings. */
344 	if (level == cur->bc_nlevels - 1) {
345 		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
346 		    !xfs_btree_ptr_is_null(cur, &rightsib))
347 			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
348 		goto out;
349 	}
350 
351 	/*
352 	 * Does the left & right sibling pointers match the adjacent
353 	 * parent level pointers?
354 	 * (These function absorbs error codes for us.)
355 	 */
356 	error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
357 	if (error)
358 		return error;
359 	error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
360 	if (error)
361 		return error;
362 out:
363 	return error;
364 }
365 
366 struct check_owner {
367 	struct list_head	list;
368 	xfs_daddr_t		daddr;
369 	int			level;
370 };
371 
372 /*
373  * Make sure this btree block isn't in the free list and that there's
374  * an rmap record for it.
375  */
376 STATIC int
377 xfs_scrub_btree_check_block_owner(
378 	struct xfs_scrub_btree		*bs,
379 	int				level,
380 	xfs_daddr_t			daddr)
381 {
382 	xfs_agnumber_t			agno;
383 	xfs_agblock_t			agbno;
384 	xfs_btnum_t			btnum;
385 	bool				init_sa;
386 	int				error = 0;
387 
388 	if (!bs->cur)
389 		return 0;
390 
391 	btnum = bs->cur->bc_btnum;
392 	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
393 	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
394 
395 	init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
396 	if (init_sa) {
397 		error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
398 		if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
399 				level, &error))
400 			return error;
401 	}
402 
403 	xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
404 	/*
405 	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
406 	 * have to nullify it (to shut down further block owner checks) if
407 	 * self-xref encounters problems.
408 	 */
409 	if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
410 		bs->cur = NULL;
411 
412 	xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
413 	if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
414 		bs->cur = NULL;
415 
416 	if (init_sa)
417 		xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
418 
419 	return error;
420 }
421 
422 /* Check the owner of a btree block. */
423 STATIC int
424 xfs_scrub_btree_check_owner(
425 	struct xfs_scrub_btree		*bs,
426 	int				level,
427 	struct xfs_buf			*bp)
428 {
429 	struct xfs_btree_cur		*cur = bs->cur;
430 	struct check_owner		*co;
431 
432 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
433 		return 0;
434 
435 	/*
436 	 * We want to cross-reference each btree block with the bnobt
437 	 * and the rmapbt.  We cannot cross-reference the bnobt or
438 	 * rmapbt while scanning the bnobt or rmapbt, respectively,
439 	 * because we cannot alter the cursor and we'd prefer not to
440 	 * duplicate cursors.  Therefore, save the buffer daddr for
441 	 * later scanning.
442 	 */
443 	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
444 		co = kmem_alloc(sizeof(struct check_owner),
445 				KM_MAYFAIL);
446 		if (!co)
447 			return -ENOMEM;
448 		co->level = level;
449 		co->daddr = XFS_BUF_ADDR(bp);
450 		list_add_tail(&co->list, &bs->to_check);
451 		return 0;
452 	}
453 
454 	return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
455 }
456 
457 /*
458  * Check that this btree block has at least minrecs records or is one of the
459  * special blocks that don't require that.
460  */
461 STATIC void
462 xfs_scrub_btree_check_minrecs(
463 	struct xfs_scrub_btree	*bs,
464 	int			level,
465 	struct xfs_btree_block	*block)
466 {
467 	unsigned int		numrecs;
468 	int			ok_level;
469 
470 	numrecs = be16_to_cpu(block->bb_numrecs);
471 
472 	/* More records than minrecs means the block is ok. */
473 	if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
474 		return;
475 
476 	/*
477 	 * Certain btree blocks /can/ have fewer than minrecs records.  Any
478 	 * level greater than or equal to the level of the highest dedicated
479 	 * btree block are allowed to violate this constraint.
480 	 *
481 	 * For a btree rooted in a block, the btree root can have fewer than
482 	 * minrecs records.  If the btree is rooted in an inode and does not
483 	 * store records in the root, the direct children of the root and the
484 	 * root itself can have fewer than minrecs records.
485 	 */
486 	ok_level = bs->cur->bc_nlevels - 1;
487 	if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
488 		ok_level--;
489 	if (level >= ok_level)
490 		return;
491 
492 	xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
493 }
494 
495 /*
496  * Grab and scrub a btree block given a btree pointer.  Returns block
497  * and buffer pointers (if applicable) if they're ok to use.
498  */
499 STATIC int
500 xfs_scrub_btree_get_block(
501 	struct xfs_scrub_btree		*bs,
502 	int				level,
503 	union xfs_btree_ptr		*pp,
504 	struct xfs_btree_block		**pblock,
505 	struct xfs_buf			**pbp)
506 {
507 	void				*failed_at;
508 	int				error;
509 
510 	*pblock = NULL;
511 	*pbp = NULL;
512 
513 	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
514 	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
515 	    !*pblock)
516 		return error;
517 
518 	xfs_btree_get_block(bs->cur, level, pbp);
519 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
520 		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
521 				level, *pbp);
522 	else
523 		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
524 				 level, *pbp);
525 	if (failed_at) {
526 		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
527 		return 0;
528 	}
529 	if (*pbp)
530 		xfs_scrub_buffer_recheck(bs->sc, *pbp);
531 
532 	xfs_scrub_btree_check_minrecs(bs, level, *pblock);
533 
534 	/*
535 	 * Check the block's owner; this function absorbs error codes
536 	 * for us.
537 	 */
538 	error = xfs_scrub_btree_check_owner(bs, level, *pbp);
539 	if (error)
540 		return error;
541 
542 	/*
543 	 * Check the block's siblings; this function absorbs error codes
544 	 * for us.
545 	 */
546 	return xfs_scrub_btree_block_check_siblings(bs, *pblock);
547 }
548 
549 /*
550  * Check that the low and high keys of this block match the keys stored
551  * in the parent block.
552  */
553 STATIC void
554 xfs_scrub_btree_block_keys(
555 	struct xfs_scrub_btree		*bs,
556 	int				level,
557 	struct xfs_btree_block		*block)
558 {
559 	union xfs_btree_key		block_keys;
560 	struct xfs_btree_cur		*cur = bs->cur;
561 	union xfs_btree_key		*high_bk;
562 	union xfs_btree_key		*parent_keys;
563 	union xfs_btree_key		*high_pk;
564 	struct xfs_btree_block		*parent_block;
565 	struct xfs_buf			*bp;
566 
567 	if (level >= cur->bc_nlevels - 1)
568 		return;
569 
570 	/* Calculate the keys for this block. */
571 	xfs_btree_get_keys(cur, block, &block_keys);
572 
573 	/* Obtain the parent's copy of the keys for this block. */
574 	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
575 	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
576 			parent_block);
577 
578 	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
579 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
580 
581 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
582 		return;
583 
584 	/* Get high keys */
585 	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
586 	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
587 			parent_block);
588 
589 	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
590 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
591 }
592 
593 /*
594  * Visit all nodes and leaves of a btree.  Check that all pointers and
595  * records are in order, that the keys reflect the records, and use a callback
596  * so that the caller can verify individual records.
597  */
598 int
599 xfs_scrub_btree(
600 	struct xfs_scrub_context	*sc,
601 	struct xfs_btree_cur		*cur,
602 	xfs_scrub_btree_rec_fn		scrub_fn,
603 	struct xfs_owner_info		*oinfo,
604 	void				*private)
605 {
606 	struct xfs_scrub_btree		bs = { NULL };
607 	union xfs_btree_ptr		ptr;
608 	union xfs_btree_ptr		*pp;
609 	union xfs_btree_rec		*recp;
610 	struct xfs_btree_block		*block;
611 	int				level;
612 	struct xfs_buf			*bp;
613 	struct check_owner		*co;
614 	struct check_owner		*n;
615 	int				i;
616 	int				error = 0;
617 
618 	/* Initialize scrub state */
619 	bs.cur = cur;
620 	bs.scrub_rec = scrub_fn;
621 	bs.oinfo = oinfo;
622 	bs.firstrec = true;
623 	bs.private = private;
624 	bs.sc = sc;
625 	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
626 		bs.firstkey[i] = true;
627 	INIT_LIST_HEAD(&bs.to_check);
628 
629 	/* Don't try to check a tree with a height we can't handle. */
630 	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
631 		xfs_scrub_btree_set_corrupt(sc, cur, 0);
632 		goto out;
633 	}
634 
635 	/*
636 	 * Load the root of the btree.  The helper function absorbs
637 	 * error codes for us.
638 	 */
639 	level = cur->bc_nlevels - 1;
640 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
641 	if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
642 		goto out;
643 	error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
644 	if (error || !block)
645 		goto out;
646 
647 	cur->bc_ptrs[level] = 1;
648 
649 	while (level < cur->bc_nlevels) {
650 		block = xfs_btree_get_block(cur, level, &bp);
651 
652 		if (level == 0) {
653 			/* End of leaf, pop back towards the root. */
654 			if (cur->bc_ptrs[level] >
655 			    be16_to_cpu(block->bb_numrecs)) {
656 				xfs_scrub_btree_block_keys(&bs, level, block);
657 				if (level < cur->bc_nlevels - 1)
658 					cur->bc_ptrs[level + 1]++;
659 				level++;
660 				continue;
661 			}
662 
663 			/* Records in order for scrub? */
664 			xfs_scrub_btree_rec(&bs);
665 
666 			/* Call out to the record checker. */
667 			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
668 			error = bs.scrub_rec(&bs, recp);
669 			if (error)
670 				break;
671 			if (xfs_scrub_should_terminate(sc, &error) ||
672 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
673 				break;
674 
675 			cur->bc_ptrs[level]++;
676 			continue;
677 		}
678 
679 		/* End of node, pop back towards the root. */
680 		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
681 			xfs_scrub_btree_block_keys(&bs, level, block);
682 			if (level < cur->bc_nlevels - 1)
683 				cur->bc_ptrs[level + 1]++;
684 			level++;
685 			continue;
686 		}
687 
688 		/* Keys in order for scrub? */
689 		xfs_scrub_btree_key(&bs, level);
690 
691 		/* Drill another level deeper. */
692 		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
693 		if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
694 			cur->bc_ptrs[level]++;
695 			continue;
696 		}
697 		level--;
698 		error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
699 		if (error || !block)
700 			goto out;
701 
702 		cur->bc_ptrs[level] = 1;
703 	}
704 
705 out:
706 	/* Process deferred owner checks on btree blocks. */
707 	list_for_each_entry_safe(co, n, &bs.to_check, list) {
708 		if (!error && bs.cur)
709 			error = xfs_scrub_btree_check_block_owner(&bs,
710 					co->level, co->daddr);
711 		list_del(&co->list);
712 		kmem_free(co);
713 	}
714 
715 	return error;
716 }
717