xref: /openbmc/linux/fs/xfs/scrub/dabtree.c (revision b868a02e)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 		/* Used to restart an op with deadlock avoidance. */
43 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44 		break;
45 	case -EFSBADCRC:
46 	case -EFSCORRUPTED:
47 		/* Note the badness but don't abort. */
48 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
49 		*error = 0;
50 		fallthrough;
51 	default:
52 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
53 				xfs_dir2_da_to_db(ds->dargs.geo,
54 					ds->state->path.blk[level].blkno),
55 				*error, __return_address);
56 		break;
57 	}
58 	return false;
59 }
60 
61 /*
62  * Check for da btree corruption.  See the section about handling
63  * operational errors in common.c.
64  */
65 void
66 xchk_da_set_corrupt(
67 	struct xchk_da_btree	*ds,
68 	int			level)
69 {
70 	struct xfs_scrub	*sc = ds->sc;
71 
72 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
73 
74 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
75 			xfs_dir2_da_to_db(ds->dargs.geo,
76 				ds->state->path.blk[level].blkno),
77 			__return_address);
78 }
79 
80 static struct xfs_da_node_entry *
81 xchk_da_btree_node_entry(
82 	struct xchk_da_btree		*ds,
83 	int				level)
84 {
85 	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
86 	struct xfs_da3_icnode_hdr	hdr;
87 
88 	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
89 
90 	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
91 	return hdr.btree + blk->index;
92 }
93 
94 /* Scrub a da btree hash (key). */
95 int
96 xchk_da_btree_hash(
97 	struct xchk_da_btree		*ds,
98 	int				level,
99 	__be32				*hashp)
100 {
101 	struct xfs_da_node_entry	*entry;
102 	xfs_dahash_t			hash;
103 	xfs_dahash_t			parent_hash;
104 
105 	/* Is this hash in order? */
106 	hash = be32_to_cpu(*hashp);
107 	if (hash < ds->hashes[level])
108 		xchk_da_set_corrupt(ds, level);
109 	ds->hashes[level] = hash;
110 
111 	if (level == 0)
112 		return 0;
113 
114 	/* Is this hash no larger than the parent hash? */
115 	entry = xchk_da_btree_node_entry(ds, level - 1);
116 	parent_hash = be32_to_cpu(entry->hashval);
117 	if (parent_hash < hash)
118 		xchk_da_set_corrupt(ds, level);
119 
120 	return 0;
121 }
122 
123 /*
124  * Check a da btree pointer.  Returns true if it's ok to use this
125  * pointer.
126  */
127 STATIC bool
128 xchk_da_btree_ptr_ok(
129 	struct xchk_da_btree	*ds,
130 	int			level,
131 	xfs_dablk_t		blkno)
132 {
133 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
134 		xchk_da_set_corrupt(ds, level);
135 		return false;
136 	}
137 
138 	return true;
139 }
140 
141 /*
142  * The da btree scrubber can handle leaf1 blocks as a degenerate
143  * form of leafn blocks.  Since the regular da code doesn't handle
144  * leaf1, we must multiplex the verifiers.
145  */
146 static void
147 xchk_da_btree_read_verify(
148 	struct xfs_buf		*bp)
149 {
150 	struct xfs_da_blkinfo	*info = bp->b_addr;
151 
152 	switch (be16_to_cpu(info->magic)) {
153 	case XFS_DIR2_LEAF1_MAGIC:
154 	case XFS_DIR3_LEAF1_MAGIC:
155 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
156 		bp->b_ops->verify_read(bp);
157 		return;
158 	default:
159 		/*
160 		 * xfs_da3_node_buf_ops already know how to handle
161 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
162 		 */
163 		bp->b_ops = &xfs_da3_node_buf_ops;
164 		bp->b_ops->verify_read(bp);
165 		return;
166 	}
167 }
168 static void
169 xchk_da_btree_write_verify(
170 	struct xfs_buf		*bp)
171 {
172 	struct xfs_da_blkinfo	*info = bp->b_addr;
173 
174 	switch (be16_to_cpu(info->magic)) {
175 	case XFS_DIR2_LEAF1_MAGIC:
176 	case XFS_DIR3_LEAF1_MAGIC:
177 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
178 		bp->b_ops->verify_write(bp);
179 		return;
180 	default:
181 		/*
182 		 * xfs_da3_node_buf_ops already know how to handle
183 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
184 		 */
185 		bp->b_ops = &xfs_da3_node_buf_ops;
186 		bp->b_ops->verify_write(bp);
187 		return;
188 	}
189 }
190 static void *
191 xchk_da_btree_verify(
192 	struct xfs_buf		*bp)
193 {
194 	struct xfs_da_blkinfo	*info = bp->b_addr;
195 
196 	switch (be16_to_cpu(info->magic)) {
197 	case XFS_DIR2_LEAF1_MAGIC:
198 	case XFS_DIR3_LEAF1_MAGIC:
199 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
200 		return bp->b_ops->verify_struct(bp);
201 	default:
202 		bp->b_ops = &xfs_da3_node_buf_ops;
203 		return bp->b_ops->verify_struct(bp);
204 	}
205 }
206 
207 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
208 	.name = "xchk_da_btree",
209 	.verify_read = xchk_da_btree_read_verify,
210 	.verify_write = xchk_da_btree_write_verify,
211 	.verify_struct = xchk_da_btree_verify,
212 };
213 
214 /* Check a block's sibling. */
215 STATIC int
216 xchk_da_btree_block_check_sibling(
217 	struct xchk_da_btree	*ds,
218 	int			level,
219 	int			direction,
220 	xfs_dablk_t		sibling)
221 {
222 	struct xfs_da_state_path *path = &ds->state->path;
223 	struct xfs_da_state_path *altpath = &ds->state->altpath;
224 	int			retval;
225 	int			plevel;
226 	int			error;
227 
228 	memcpy(altpath, path, sizeof(ds->state->altpath));
229 
230 	/*
231 	 * If the pointer is null, we shouldn't be able to move the upper
232 	 * level pointer anywhere.
233 	 */
234 	if (sibling == 0) {
235 		error = xfs_da3_path_shift(ds->state, altpath, direction,
236 				false, &retval);
237 		if (error == 0 && retval == 0)
238 			xchk_da_set_corrupt(ds, level);
239 		error = 0;
240 		goto out;
241 	}
242 
243 	/* Move the alternate cursor one block in the direction given. */
244 	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
245 			&retval);
246 	if (!xchk_da_process_error(ds, level, &error))
247 		goto out;
248 	if (retval) {
249 		xchk_da_set_corrupt(ds, level);
250 		goto out;
251 	}
252 	if (altpath->blk[level].bp)
253 		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
254 
255 	/* Compare upper level pointer to sibling pointer. */
256 	if (altpath->blk[level].blkno != sibling)
257 		xchk_da_set_corrupt(ds, level);
258 
259 out:
260 	/* Free all buffers in the altpath that aren't referenced from path. */
261 	for (plevel = 0; plevel < altpath->active; plevel++) {
262 		if (altpath->blk[plevel].bp == NULL ||
263 		    (plevel < path->active &&
264 		     altpath->blk[plevel].bp == path->blk[plevel].bp))
265 			continue;
266 
267 		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
268 		altpath->blk[plevel].bp = NULL;
269 	}
270 
271 	return error;
272 }
273 
274 /* Check a block's sibling pointers. */
275 STATIC int
276 xchk_da_btree_block_check_siblings(
277 	struct xchk_da_btree	*ds,
278 	int			level,
279 	struct xfs_da_blkinfo	*hdr)
280 {
281 	xfs_dablk_t		forw;
282 	xfs_dablk_t		back;
283 	int			error = 0;
284 
285 	forw = be32_to_cpu(hdr->forw);
286 	back = be32_to_cpu(hdr->back);
287 
288 	/* Top level blocks should not have sibling pointers. */
289 	if (level == 0) {
290 		if (forw != 0 || back != 0)
291 			xchk_da_set_corrupt(ds, level);
292 		return 0;
293 	}
294 
295 	/*
296 	 * Check back (left) and forw (right) pointers.  These functions
297 	 * absorb error codes for us.
298 	 */
299 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
300 	if (error)
301 		goto out;
302 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
303 
304 out:
305 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
306 	return error;
307 }
308 
309 /* Load a dir/attribute block from a btree. */
310 STATIC int
311 xchk_da_btree_block(
312 	struct xchk_da_btree		*ds,
313 	int				level,
314 	xfs_dablk_t			blkno)
315 {
316 	struct xfs_da_state_blk		*blk;
317 	struct xfs_da_intnode		*node;
318 	struct xfs_da_node_entry	*btree;
319 	struct xfs_da3_blkinfo		*hdr3;
320 	struct xfs_da_args		*dargs = &ds->dargs;
321 	struct xfs_inode		*ip = ds->dargs.dp;
322 	xfs_ino_t			owner;
323 	int				*pmaxrecs;
324 	struct xfs_da3_icnode_hdr	nodehdr;
325 	int				error = 0;
326 
327 	blk = &ds->state->path.blk[level];
328 	ds->state->path.active = level + 1;
329 
330 	/* Release old block. */
331 	if (blk->bp) {
332 		xfs_trans_brelse(dargs->trans, blk->bp);
333 		blk->bp = NULL;
334 	}
335 
336 	/* Check the pointer. */
337 	blk->blkno = blkno;
338 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
339 		goto out_nobuf;
340 
341 	/* Read the buffer. */
342 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
343 			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
344 			&xchk_da_btree_buf_ops);
345 	if (!xchk_da_process_error(ds, level, &error))
346 		goto out_nobuf;
347 	if (blk->bp)
348 		xchk_buffer_recheck(ds->sc, blk->bp);
349 
350 	/*
351 	 * We didn't find a dir btree root block, which means that
352 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
353 	 * to be), so jump out now.
354 	 */
355 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
356 			blk->bp == NULL)
357 		goto out_nobuf;
358 
359 	/* It's /not/ ok for attr trees not to have a da btree. */
360 	if (blk->bp == NULL) {
361 		xchk_da_set_corrupt(ds, level);
362 		goto out_nobuf;
363 	}
364 
365 	hdr3 = blk->bp->b_addr;
366 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
367 	pmaxrecs = &ds->maxrecs[level];
368 
369 	/* We only started zeroing the header on v5 filesystems. */
370 	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
371 		xchk_da_set_corrupt(ds, level);
372 
373 	/* Check the owner. */
374 	if (xfs_has_crc(ip->i_mount)) {
375 		owner = be64_to_cpu(hdr3->owner);
376 		if (owner != ip->i_ino)
377 			xchk_da_set_corrupt(ds, level);
378 	}
379 
380 	/* Check the siblings. */
381 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
382 	if (error)
383 		goto out;
384 
385 	/* Interpret the buffer. */
386 	switch (blk->magic) {
387 	case XFS_ATTR_LEAF_MAGIC:
388 	case XFS_ATTR3_LEAF_MAGIC:
389 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
390 				XFS_BLFT_ATTR_LEAF_BUF);
391 		blk->magic = XFS_ATTR_LEAF_MAGIC;
392 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
393 		if (ds->tree_level != 0)
394 			xchk_da_set_corrupt(ds, level);
395 		break;
396 	case XFS_DIR2_LEAFN_MAGIC:
397 	case XFS_DIR3_LEAFN_MAGIC:
398 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
399 				XFS_BLFT_DIR_LEAFN_BUF);
400 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
401 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
402 		if (ds->tree_level != 0)
403 			xchk_da_set_corrupt(ds, level);
404 		break;
405 	case XFS_DIR2_LEAF1_MAGIC:
406 	case XFS_DIR3_LEAF1_MAGIC:
407 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
408 				XFS_BLFT_DIR_LEAF1_BUF);
409 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
410 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
411 		if (ds->tree_level != 0)
412 			xchk_da_set_corrupt(ds, level);
413 		break;
414 	case XFS_DA_NODE_MAGIC:
415 	case XFS_DA3_NODE_MAGIC:
416 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
417 				XFS_BLFT_DA_NODE_BUF);
418 		blk->magic = XFS_DA_NODE_MAGIC;
419 		node = blk->bp->b_addr;
420 		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
421 		btree = nodehdr.btree;
422 		*pmaxrecs = nodehdr.count;
423 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
424 		if (level == 0) {
425 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
426 				xchk_da_set_corrupt(ds, level);
427 				goto out_freebp;
428 			}
429 			ds->tree_level = nodehdr.level;
430 		} else {
431 			if (ds->tree_level != nodehdr.level) {
432 				xchk_da_set_corrupt(ds, level);
433 				goto out_freebp;
434 			}
435 		}
436 
437 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
438 		break;
439 	default:
440 		xchk_da_set_corrupt(ds, level);
441 		goto out_freebp;
442 	}
443 
444 	/*
445 	 * If we've been handed a block that is below the dabtree root, does
446 	 * its hashval match what the parent block expected to see?
447 	 */
448 	if (level > 0) {
449 		struct xfs_da_node_entry	*key;
450 
451 		key = xchk_da_btree_node_entry(ds, level - 1);
452 		if (be32_to_cpu(key->hashval) != blk->hashval) {
453 			xchk_da_set_corrupt(ds, level);
454 			goto out_freebp;
455 		}
456 	}
457 
458 out:
459 	return error;
460 out_freebp:
461 	xfs_trans_brelse(dargs->trans, blk->bp);
462 	blk->bp = NULL;
463 out_nobuf:
464 	blk->blkno = 0;
465 	return error;
466 }
467 
468 /* Visit all nodes and leaves of a da btree. */
469 int
470 xchk_da_btree(
471 	struct xfs_scrub		*sc,
472 	int				whichfork,
473 	xchk_da_btree_rec_fn		scrub_fn,
474 	void				*private)
475 {
476 	struct xchk_da_btree		*ds;
477 	struct xfs_mount		*mp = sc->mp;
478 	struct xfs_da_state_blk		*blks;
479 	struct xfs_da_node_entry	*key;
480 	xfs_dablk_t			blkno;
481 	int				level;
482 	int				error;
483 
484 	/* Skip short format data structures; no btree to scan. */
485 	if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
486 		return 0;
487 
488 	/* Set up initial da state. */
489 	ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL);
490 	if (!ds)
491 		return -ENOMEM;
492 	ds->dargs.dp = sc->ip;
493 	ds->dargs.whichfork = whichfork;
494 	ds->dargs.trans = sc->tp;
495 	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
496 	ds->state = xfs_da_state_alloc(&ds->dargs);
497 	ds->sc = sc;
498 	ds->private = private;
499 	if (whichfork == XFS_ATTR_FORK) {
500 		ds->dargs.geo = mp->m_attr_geo;
501 		ds->lowest = 0;
502 		ds->highest = 0;
503 	} else {
504 		ds->dargs.geo = mp->m_dir_geo;
505 		ds->lowest = ds->dargs.geo->leafblk;
506 		ds->highest = ds->dargs.geo->freeblk;
507 	}
508 	blkno = ds->lowest;
509 	level = 0;
510 
511 	/* Find the root of the da tree, if present. */
512 	blks = ds->state->path.blk;
513 	error = xchk_da_btree_block(ds, level, blkno);
514 	if (error)
515 		goto out_state;
516 	/*
517 	 * We didn't find a block at ds->lowest, which means that there's
518 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
519 	 * so jump out now.
520 	 */
521 	if (blks[level].bp == NULL)
522 		goto out_state;
523 
524 	blks[level].index = 0;
525 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
526 		/* Handle leaf block. */
527 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
528 			/* End of leaf, pop back towards the root. */
529 			if (blks[level].index >= ds->maxrecs[level]) {
530 				if (level > 0)
531 					blks[level - 1].index++;
532 				ds->tree_level++;
533 				level--;
534 				continue;
535 			}
536 
537 			/* Dispatch record scrubbing. */
538 			error = scrub_fn(ds, level);
539 			if (error)
540 				break;
541 			if (xchk_should_terminate(sc, &error) ||
542 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
543 				break;
544 
545 			blks[level].index++;
546 			continue;
547 		}
548 
549 
550 		/* End of node, pop back towards the root. */
551 		if (blks[level].index >= ds->maxrecs[level]) {
552 			if (level > 0)
553 				blks[level - 1].index++;
554 			ds->tree_level++;
555 			level--;
556 			continue;
557 		}
558 
559 		/* Hashes in order for scrub? */
560 		key = xchk_da_btree_node_entry(ds, level);
561 		error = xchk_da_btree_hash(ds, level, &key->hashval);
562 		if (error)
563 			goto out;
564 
565 		/* Drill another level deeper. */
566 		blkno = be32_to_cpu(key->before);
567 		level++;
568 		if (level >= XFS_DA_NODE_MAXDEPTH) {
569 			/* Too deep! */
570 			xchk_da_set_corrupt(ds, level - 1);
571 			break;
572 		}
573 		ds->tree_level--;
574 		error = xchk_da_btree_block(ds, level, blkno);
575 		if (error)
576 			goto out;
577 		if (blks[level].bp == NULL)
578 			goto out;
579 
580 		blks[level].index = 0;
581 	}
582 
583 out:
584 	/* Release all the buffers we're tracking. */
585 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
586 		if (blks[level].bp == NULL)
587 			continue;
588 		xfs_trans_brelse(sc->tp, blks[level].bp);
589 		blks[level].bp = NULL;
590 	}
591 
592 out_state:
593 	xfs_da_state_free(ds->state);
594 	kmem_free(ds);
595 	return error;
596 }
597