xref: /openbmc/linux/fs/xfs/scrub/dabtree.c (revision b593bce5)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 		/* Used to restart an op with deadlock avoidance. */
43 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44 		break;
45 	case -EFSBADCRC:
46 	case -EFSCORRUPTED:
47 		/* Note the badness but don't abort. */
48 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
49 		*error = 0;
50 		/* fall through */
51 	default:
52 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
53 				xfs_dir2_da_to_db(ds->dargs.geo,
54 					ds->state->path.blk[level].blkno),
55 				*error, __return_address);
56 		break;
57 	}
58 	return false;
59 }
60 
61 /*
62  * Check for da btree corruption.  See the section about handling
63  * operational errors in common.c.
64  */
65 void
66 xchk_da_set_corrupt(
67 	struct xchk_da_btree	*ds,
68 	int			level)
69 {
70 	struct xfs_scrub	*sc = ds->sc;
71 
72 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
73 
74 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
75 			xfs_dir2_da_to_db(ds->dargs.geo,
76 				ds->state->path.blk[level].blkno),
77 			__return_address);
78 }
79 
80 /* Find an entry at a certain level in a da btree. */
81 STATIC void *
82 xchk_da_btree_entry(
83 	struct xchk_da_btree	*ds,
84 	int			level,
85 	int			rec)
86 {
87 	char			*ents;
88 	struct xfs_da_state_blk	*blk;
89 	void			*baddr;
90 
91 	/* Dispatch the entry finding function. */
92 	blk = &ds->state->path.blk[level];
93 	baddr = blk->bp->b_addr;
94 	switch (blk->magic) {
95 	case XFS_ATTR_LEAF_MAGIC:
96 	case XFS_ATTR3_LEAF_MAGIC:
97 		ents = (char *)xfs_attr3_leaf_entryp(baddr);
98 		return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
99 	case XFS_DIR2_LEAFN_MAGIC:
100 	case XFS_DIR3_LEAFN_MAGIC:
101 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
102 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
103 	case XFS_DIR2_LEAF1_MAGIC:
104 	case XFS_DIR3_LEAF1_MAGIC:
105 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
106 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
107 	case XFS_DA_NODE_MAGIC:
108 	case XFS_DA3_NODE_MAGIC:
109 		ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
110 		return ents + (rec * sizeof(struct xfs_da_node_entry));
111 	}
112 
113 	return NULL;
114 }
115 
116 /* Scrub a da btree hash (key). */
117 int
118 xchk_da_btree_hash(
119 	struct xchk_da_btree		*ds,
120 	int				level,
121 	__be32				*hashp)
122 {
123 	struct xfs_da_state_blk		*blks;
124 	struct xfs_da_node_entry	*entry;
125 	xfs_dahash_t			hash;
126 	xfs_dahash_t			parent_hash;
127 
128 	/* Is this hash in order? */
129 	hash = be32_to_cpu(*hashp);
130 	if (hash < ds->hashes[level])
131 		xchk_da_set_corrupt(ds, level);
132 	ds->hashes[level] = hash;
133 
134 	if (level == 0)
135 		return 0;
136 
137 	/* Is this hash no larger than the parent hash? */
138 	blks = ds->state->path.blk;
139 	entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
140 	parent_hash = be32_to_cpu(entry->hashval);
141 	if (parent_hash < hash)
142 		xchk_da_set_corrupt(ds, level);
143 
144 	return 0;
145 }
146 
147 /*
148  * Check a da btree pointer.  Returns true if it's ok to use this
149  * pointer.
150  */
151 STATIC bool
152 xchk_da_btree_ptr_ok(
153 	struct xchk_da_btree	*ds,
154 	int			level,
155 	xfs_dablk_t		blkno)
156 {
157 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
158 		xchk_da_set_corrupt(ds, level);
159 		return false;
160 	}
161 
162 	return true;
163 }
164 
165 /*
166  * The da btree scrubber can handle leaf1 blocks as a degenerate
167  * form of leafn blocks.  Since the regular da code doesn't handle
168  * leaf1, we must multiplex the verifiers.
169  */
170 static void
171 xchk_da_btree_read_verify(
172 	struct xfs_buf		*bp)
173 {
174 	struct xfs_da_blkinfo	*info = bp->b_addr;
175 
176 	switch (be16_to_cpu(info->magic)) {
177 	case XFS_DIR2_LEAF1_MAGIC:
178 	case XFS_DIR3_LEAF1_MAGIC:
179 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
180 		bp->b_ops->verify_read(bp);
181 		return;
182 	default:
183 		/*
184 		 * xfs_da3_node_buf_ops already know how to handle
185 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
186 		 */
187 		bp->b_ops = &xfs_da3_node_buf_ops;
188 		bp->b_ops->verify_read(bp);
189 		return;
190 	}
191 }
192 static void
193 xchk_da_btree_write_verify(
194 	struct xfs_buf		*bp)
195 {
196 	struct xfs_da_blkinfo	*info = bp->b_addr;
197 
198 	switch (be16_to_cpu(info->magic)) {
199 	case XFS_DIR2_LEAF1_MAGIC:
200 	case XFS_DIR3_LEAF1_MAGIC:
201 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
202 		bp->b_ops->verify_write(bp);
203 		return;
204 	default:
205 		/*
206 		 * xfs_da3_node_buf_ops already know how to handle
207 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
208 		 */
209 		bp->b_ops = &xfs_da3_node_buf_ops;
210 		bp->b_ops->verify_write(bp);
211 		return;
212 	}
213 }
214 static void *
215 xchk_da_btree_verify(
216 	struct xfs_buf		*bp)
217 {
218 	struct xfs_da_blkinfo	*info = bp->b_addr;
219 
220 	switch (be16_to_cpu(info->magic)) {
221 	case XFS_DIR2_LEAF1_MAGIC:
222 	case XFS_DIR3_LEAF1_MAGIC:
223 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
224 		return bp->b_ops->verify_struct(bp);
225 	default:
226 		bp->b_ops = &xfs_da3_node_buf_ops;
227 		return bp->b_ops->verify_struct(bp);
228 	}
229 }
230 
231 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
232 	.name = "xchk_da_btree",
233 	.verify_read = xchk_da_btree_read_verify,
234 	.verify_write = xchk_da_btree_write_verify,
235 	.verify_struct = xchk_da_btree_verify,
236 };
237 
238 /* Check a block's sibling. */
239 STATIC int
240 xchk_da_btree_block_check_sibling(
241 	struct xchk_da_btree	*ds,
242 	int			level,
243 	int			direction,
244 	xfs_dablk_t		sibling)
245 {
246 	int			retval;
247 	int			error;
248 
249 	memcpy(&ds->state->altpath, &ds->state->path,
250 			sizeof(ds->state->altpath));
251 
252 	/*
253 	 * If the pointer is null, we shouldn't be able to move the upper
254 	 * level pointer anywhere.
255 	 */
256 	if (sibling == 0) {
257 		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
258 				direction, false, &retval);
259 		if (error == 0 && retval == 0)
260 			xchk_da_set_corrupt(ds, level);
261 		error = 0;
262 		goto out;
263 	}
264 
265 	/* Move the alternate cursor one block in the direction given. */
266 	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
267 			direction, false, &retval);
268 	if (!xchk_da_process_error(ds, level, &error))
269 		return error;
270 	if (retval) {
271 		xchk_da_set_corrupt(ds, level);
272 		return error;
273 	}
274 	if (ds->state->altpath.blk[level].bp)
275 		xchk_buffer_recheck(ds->sc,
276 				ds->state->altpath.blk[level].bp);
277 
278 	/* Compare upper level pointer to sibling pointer. */
279 	if (ds->state->altpath.blk[level].blkno != sibling)
280 		xchk_da_set_corrupt(ds, level);
281 	if (ds->state->altpath.blk[level].bp) {
282 		xfs_trans_brelse(ds->dargs.trans,
283 				ds->state->altpath.blk[level].bp);
284 		ds->state->altpath.blk[level].bp = NULL;
285 	}
286 out:
287 	return error;
288 }
289 
290 /* Check a block's sibling pointers. */
291 STATIC int
292 xchk_da_btree_block_check_siblings(
293 	struct xchk_da_btree	*ds,
294 	int			level,
295 	struct xfs_da_blkinfo	*hdr)
296 {
297 	xfs_dablk_t		forw;
298 	xfs_dablk_t		back;
299 	int			error = 0;
300 
301 	forw = be32_to_cpu(hdr->forw);
302 	back = be32_to_cpu(hdr->back);
303 
304 	/* Top level blocks should not have sibling pointers. */
305 	if (level == 0) {
306 		if (forw != 0 || back != 0)
307 			xchk_da_set_corrupt(ds, level);
308 		return 0;
309 	}
310 
311 	/*
312 	 * Check back (left) and forw (right) pointers.  These functions
313 	 * absorb error codes for us.
314 	 */
315 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
316 	if (error)
317 		goto out;
318 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
319 
320 out:
321 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
322 	return error;
323 }
324 
325 /* Load a dir/attribute block from a btree. */
326 STATIC int
327 xchk_da_btree_block(
328 	struct xchk_da_btree		*ds,
329 	int				level,
330 	xfs_dablk_t			blkno)
331 {
332 	struct xfs_da_state_blk		*blk;
333 	struct xfs_da_intnode		*node;
334 	struct xfs_da_node_entry	*btree;
335 	struct xfs_da3_blkinfo		*hdr3;
336 	struct xfs_da_args		*dargs = &ds->dargs;
337 	struct xfs_inode		*ip = ds->dargs.dp;
338 	xfs_ino_t			owner;
339 	int				*pmaxrecs;
340 	struct xfs_da3_icnode_hdr	nodehdr;
341 	int				error = 0;
342 
343 	blk = &ds->state->path.blk[level];
344 	ds->state->path.active = level + 1;
345 
346 	/* Release old block. */
347 	if (blk->bp) {
348 		xfs_trans_brelse(dargs->trans, blk->bp);
349 		blk->bp = NULL;
350 	}
351 
352 	/* Check the pointer. */
353 	blk->blkno = blkno;
354 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
355 		goto out_nobuf;
356 
357 	/* Read the buffer. */
358 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
359 			&blk->bp, dargs->whichfork,
360 			&xchk_da_btree_buf_ops);
361 	if (!xchk_da_process_error(ds, level, &error))
362 		goto out_nobuf;
363 	if (blk->bp)
364 		xchk_buffer_recheck(ds->sc, blk->bp);
365 
366 	/*
367 	 * We didn't find a dir btree root block, which means that
368 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
369 	 * to be), so jump out now.
370 	 */
371 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
372 			blk->bp == NULL)
373 		goto out_nobuf;
374 
375 	/* It's /not/ ok for attr trees not to have a da btree. */
376 	if (blk->bp == NULL) {
377 		xchk_da_set_corrupt(ds, level);
378 		goto out_nobuf;
379 	}
380 
381 	hdr3 = blk->bp->b_addr;
382 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
383 	pmaxrecs = &ds->maxrecs[level];
384 
385 	/* We only started zeroing the header on v5 filesystems. */
386 	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
387 		xchk_da_set_corrupt(ds, level);
388 
389 	/* Check the owner. */
390 	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
391 		owner = be64_to_cpu(hdr3->owner);
392 		if (owner != ip->i_ino)
393 			xchk_da_set_corrupt(ds, level);
394 	}
395 
396 	/* Check the siblings. */
397 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
398 	if (error)
399 		goto out;
400 
401 	/* Interpret the buffer. */
402 	switch (blk->magic) {
403 	case XFS_ATTR_LEAF_MAGIC:
404 	case XFS_ATTR3_LEAF_MAGIC:
405 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
406 				XFS_BLFT_ATTR_LEAF_BUF);
407 		blk->magic = XFS_ATTR_LEAF_MAGIC;
408 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
409 		if (ds->tree_level != 0)
410 			xchk_da_set_corrupt(ds, level);
411 		break;
412 	case XFS_DIR2_LEAFN_MAGIC:
413 	case XFS_DIR3_LEAFN_MAGIC:
414 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
415 				XFS_BLFT_DIR_LEAFN_BUF);
416 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
417 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
418 		if (ds->tree_level != 0)
419 			xchk_da_set_corrupt(ds, level);
420 		break;
421 	case XFS_DIR2_LEAF1_MAGIC:
422 	case XFS_DIR3_LEAF1_MAGIC:
423 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
424 				XFS_BLFT_DIR_LEAF1_BUF);
425 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
426 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
427 		if (ds->tree_level != 0)
428 			xchk_da_set_corrupt(ds, level);
429 		break;
430 	case XFS_DA_NODE_MAGIC:
431 	case XFS_DA3_NODE_MAGIC:
432 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
433 				XFS_BLFT_DA_NODE_BUF);
434 		blk->magic = XFS_DA_NODE_MAGIC;
435 		node = blk->bp->b_addr;
436 		ip->d_ops->node_hdr_from_disk(&nodehdr, node);
437 		btree = ip->d_ops->node_tree_p(node);
438 		*pmaxrecs = nodehdr.count;
439 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
440 		if (level == 0) {
441 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
442 				xchk_da_set_corrupt(ds, level);
443 				goto out_freebp;
444 			}
445 			ds->tree_level = nodehdr.level;
446 		} else {
447 			if (ds->tree_level != nodehdr.level) {
448 				xchk_da_set_corrupt(ds, level);
449 				goto out_freebp;
450 			}
451 		}
452 
453 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
454 		break;
455 	default:
456 		xchk_da_set_corrupt(ds, level);
457 		goto out_freebp;
458 	}
459 
460 out:
461 	return error;
462 out_freebp:
463 	xfs_trans_brelse(dargs->trans, blk->bp);
464 	blk->bp = NULL;
465 out_nobuf:
466 	blk->blkno = 0;
467 	return error;
468 }
469 
470 /* Visit all nodes and leaves of a da btree. */
471 int
472 xchk_da_btree(
473 	struct xfs_scrub		*sc,
474 	int				whichfork,
475 	xchk_da_btree_rec_fn		scrub_fn,
476 	void				*private)
477 {
478 	struct xchk_da_btree		ds = {};
479 	struct xfs_mount		*mp = sc->mp;
480 	struct xfs_da_state_blk		*blks;
481 	struct xfs_da_node_entry	*key;
482 	void				*rec;
483 	xfs_dablk_t			blkno;
484 	int				level;
485 	int				error;
486 
487 	/* Skip short format data structures; no btree to scan. */
488 	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
489 	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
490 		return 0;
491 
492 	/* Set up initial da state. */
493 	ds.dargs.dp = sc->ip;
494 	ds.dargs.whichfork = whichfork;
495 	ds.dargs.trans = sc->tp;
496 	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
497 	ds.state = xfs_da_state_alloc();
498 	ds.state->args = &ds.dargs;
499 	ds.state->mp = mp;
500 	ds.sc = sc;
501 	ds.private = private;
502 	if (whichfork == XFS_ATTR_FORK) {
503 		ds.dargs.geo = mp->m_attr_geo;
504 		ds.lowest = 0;
505 		ds.highest = 0;
506 	} else {
507 		ds.dargs.geo = mp->m_dir_geo;
508 		ds.lowest = ds.dargs.geo->leafblk;
509 		ds.highest = ds.dargs.geo->freeblk;
510 	}
511 	blkno = ds.lowest;
512 	level = 0;
513 
514 	/* Find the root of the da tree, if present. */
515 	blks = ds.state->path.blk;
516 	error = xchk_da_btree_block(&ds, level, blkno);
517 	if (error)
518 		goto out_state;
519 	/*
520 	 * We didn't find a block at ds.lowest, which means that there's
521 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
522 	 * so jump out now.
523 	 */
524 	if (blks[level].bp == NULL)
525 		goto out_state;
526 
527 	blks[level].index = 0;
528 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
529 		/* Handle leaf block. */
530 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
531 			/* End of leaf, pop back towards the root. */
532 			if (blks[level].index >= ds.maxrecs[level]) {
533 				if (level > 0)
534 					blks[level - 1].index++;
535 				ds.tree_level++;
536 				level--;
537 				continue;
538 			}
539 
540 			/* Dispatch record scrubbing. */
541 			rec = xchk_da_btree_entry(&ds, level,
542 					blks[level].index);
543 			error = scrub_fn(&ds, level, rec);
544 			if (error)
545 				break;
546 			if (xchk_should_terminate(sc, &error) ||
547 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
548 				break;
549 
550 			blks[level].index++;
551 			continue;
552 		}
553 
554 
555 		/* End of node, pop back towards the root. */
556 		if (blks[level].index >= ds.maxrecs[level]) {
557 			if (level > 0)
558 				blks[level - 1].index++;
559 			ds.tree_level++;
560 			level--;
561 			continue;
562 		}
563 
564 		/* Hashes in order for scrub? */
565 		key = xchk_da_btree_entry(&ds, level, blks[level].index);
566 		error = xchk_da_btree_hash(&ds, level, &key->hashval);
567 		if (error)
568 			goto out;
569 
570 		/* Drill another level deeper. */
571 		blkno = be32_to_cpu(key->before);
572 		level++;
573 		if (level >= XFS_DA_NODE_MAXDEPTH) {
574 			/* Too deep! */
575 			xchk_da_set_corrupt(&ds, level - 1);
576 			break;
577 		}
578 		ds.tree_level--;
579 		error = xchk_da_btree_block(&ds, level, blkno);
580 		if (error)
581 			goto out;
582 		if (blks[level].bp == NULL)
583 			goto out;
584 
585 		blks[level].index = 0;
586 	}
587 
588 out:
589 	/* Release all the buffers we're tracking. */
590 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
591 		if (blks[level].bp == NULL)
592 			continue;
593 		xfs_trans_brelse(sc->tp, blks[level].bp);
594 		blks[level].bp = NULL;
595 	}
596 
597 out_state:
598 	xfs_da_state_free(ds.state);
599 	return error;
600 }
601