xref: /openbmc/linux/fs/xfs/scrub/dabtree.c (revision 3557b3fd)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_inode_fork.h"
20 #include "xfs_da_format.h"
21 #include "xfs_da_btree.h"
22 #include "xfs_dir2.h"
23 #include "xfs_dir2_priv.h"
24 #include "xfs_attr_leaf.h"
25 #include "scrub/xfs_scrub.h"
26 #include "scrub/scrub.h"
27 #include "scrub/common.h"
28 #include "scrub/trace.h"
29 #include "scrub/dabtree.h"
30 
31 /* Directory/Attribute Btree */
32 
33 /*
34  * Check for da btree operation errors.  See the section about handling
35  * operational errors in common.c.
36  */
37 bool
38 xchk_da_process_error(
39 	struct xchk_da_btree	*ds,
40 	int			level,
41 	int			*error)
42 {
43 	struct xfs_scrub	*sc = ds->sc;
44 
45 	if (*error == 0)
46 		return true;
47 
48 	switch (*error) {
49 	case -EDEADLOCK:
50 		/* Used to restart an op with deadlock avoidance. */
51 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
52 		break;
53 	case -EFSBADCRC:
54 	case -EFSCORRUPTED:
55 		/* Note the badness but don't abort. */
56 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
57 		*error = 0;
58 		/* fall through */
59 	default:
60 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
61 				xfs_dir2_da_to_db(ds->dargs.geo,
62 					ds->state->path.blk[level].blkno),
63 				*error, __return_address);
64 		break;
65 	}
66 	return false;
67 }
68 
69 /*
70  * Check for da btree corruption.  See the section about handling
71  * operational errors in common.c.
72  */
73 void
74 xchk_da_set_corrupt(
75 	struct xchk_da_btree	*ds,
76 	int			level)
77 {
78 	struct xfs_scrub	*sc = ds->sc;
79 
80 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
81 
82 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
83 			xfs_dir2_da_to_db(ds->dargs.geo,
84 				ds->state->path.blk[level].blkno),
85 			__return_address);
86 }
87 
88 /* Find an entry at a certain level in a da btree. */
89 STATIC void *
90 xchk_da_btree_entry(
91 	struct xchk_da_btree	*ds,
92 	int			level,
93 	int			rec)
94 {
95 	char			*ents;
96 	struct xfs_da_state_blk	*blk;
97 	void			*baddr;
98 
99 	/* Dispatch the entry finding function. */
100 	blk = &ds->state->path.blk[level];
101 	baddr = blk->bp->b_addr;
102 	switch (blk->magic) {
103 	case XFS_ATTR_LEAF_MAGIC:
104 	case XFS_ATTR3_LEAF_MAGIC:
105 		ents = (char *)xfs_attr3_leaf_entryp(baddr);
106 		return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
107 	case XFS_DIR2_LEAFN_MAGIC:
108 	case XFS_DIR3_LEAFN_MAGIC:
109 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
110 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
111 	case XFS_DIR2_LEAF1_MAGIC:
112 	case XFS_DIR3_LEAF1_MAGIC:
113 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
114 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
115 	case XFS_DA_NODE_MAGIC:
116 	case XFS_DA3_NODE_MAGIC:
117 		ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
118 		return ents + (rec * sizeof(struct xfs_da_node_entry));
119 	}
120 
121 	return NULL;
122 }
123 
124 /* Scrub a da btree hash (key). */
125 int
126 xchk_da_btree_hash(
127 	struct xchk_da_btree		*ds,
128 	int				level,
129 	__be32				*hashp)
130 {
131 	struct xfs_da_state_blk		*blks;
132 	struct xfs_da_node_entry	*entry;
133 	xfs_dahash_t			hash;
134 	xfs_dahash_t			parent_hash;
135 
136 	/* Is this hash in order? */
137 	hash = be32_to_cpu(*hashp);
138 	if (hash < ds->hashes[level])
139 		xchk_da_set_corrupt(ds, level);
140 	ds->hashes[level] = hash;
141 
142 	if (level == 0)
143 		return 0;
144 
145 	/* Is this hash no larger than the parent hash? */
146 	blks = ds->state->path.blk;
147 	entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
148 	parent_hash = be32_to_cpu(entry->hashval);
149 	if (parent_hash < hash)
150 		xchk_da_set_corrupt(ds, level);
151 
152 	return 0;
153 }
154 
155 /*
156  * Check a da btree pointer.  Returns true if it's ok to use this
157  * pointer.
158  */
159 STATIC bool
160 xchk_da_btree_ptr_ok(
161 	struct xchk_da_btree	*ds,
162 	int			level,
163 	xfs_dablk_t		blkno)
164 {
165 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
166 		xchk_da_set_corrupt(ds, level);
167 		return false;
168 	}
169 
170 	return true;
171 }
172 
173 /*
174  * The da btree scrubber can handle leaf1 blocks as a degenerate
175  * form of leafn blocks.  Since the regular da code doesn't handle
176  * leaf1, we must multiplex the verifiers.
177  */
178 static void
179 xchk_da_btree_read_verify(
180 	struct xfs_buf		*bp)
181 {
182 	struct xfs_da_blkinfo	*info = bp->b_addr;
183 
184 	switch (be16_to_cpu(info->magic)) {
185 	case XFS_DIR2_LEAF1_MAGIC:
186 	case XFS_DIR3_LEAF1_MAGIC:
187 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
188 		bp->b_ops->verify_read(bp);
189 		return;
190 	default:
191 		/*
192 		 * xfs_da3_node_buf_ops already know how to handle
193 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
194 		 */
195 		bp->b_ops = &xfs_da3_node_buf_ops;
196 		bp->b_ops->verify_read(bp);
197 		return;
198 	}
199 }
200 static void
201 xchk_da_btree_write_verify(
202 	struct xfs_buf		*bp)
203 {
204 	struct xfs_da_blkinfo	*info = bp->b_addr;
205 
206 	switch (be16_to_cpu(info->magic)) {
207 	case XFS_DIR2_LEAF1_MAGIC:
208 	case XFS_DIR3_LEAF1_MAGIC:
209 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
210 		bp->b_ops->verify_write(bp);
211 		return;
212 	default:
213 		/*
214 		 * xfs_da3_node_buf_ops already know how to handle
215 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
216 		 */
217 		bp->b_ops = &xfs_da3_node_buf_ops;
218 		bp->b_ops->verify_write(bp);
219 		return;
220 	}
221 }
222 static void *
223 xchk_da_btree_verify(
224 	struct xfs_buf		*bp)
225 {
226 	struct xfs_da_blkinfo	*info = bp->b_addr;
227 
228 	switch (be16_to_cpu(info->magic)) {
229 	case XFS_DIR2_LEAF1_MAGIC:
230 	case XFS_DIR3_LEAF1_MAGIC:
231 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
232 		return bp->b_ops->verify_struct(bp);
233 	default:
234 		bp->b_ops = &xfs_da3_node_buf_ops;
235 		return bp->b_ops->verify_struct(bp);
236 	}
237 }
238 
239 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
240 	.name = "xchk_da_btree",
241 	.verify_read = xchk_da_btree_read_verify,
242 	.verify_write = xchk_da_btree_write_verify,
243 	.verify_struct = xchk_da_btree_verify,
244 };
245 
246 /* Check a block's sibling. */
247 STATIC int
248 xchk_da_btree_block_check_sibling(
249 	struct xchk_da_btree	*ds,
250 	int			level,
251 	int			direction,
252 	xfs_dablk_t		sibling)
253 {
254 	int			retval;
255 	int			error;
256 
257 	memcpy(&ds->state->altpath, &ds->state->path,
258 			sizeof(ds->state->altpath));
259 
260 	/*
261 	 * If the pointer is null, we shouldn't be able to move the upper
262 	 * level pointer anywhere.
263 	 */
264 	if (sibling == 0) {
265 		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
266 				direction, false, &retval);
267 		if (error == 0 && retval == 0)
268 			xchk_da_set_corrupt(ds, level);
269 		error = 0;
270 		goto out;
271 	}
272 
273 	/* Move the alternate cursor one block in the direction given. */
274 	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
275 			direction, false, &retval);
276 	if (!xchk_da_process_error(ds, level, &error))
277 		return error;
278 	if (retval) {
279 		xchk_da_set_corrupt(ds, level);
280 		return error;
281 	}
282 	if (ds->state->altpath.blk[level].bp)
283 		xchk_buffer_recheck(ds->sc,
284 				ds->state->altpath.blk[level].bp);
285 
286 	/* Compare upper level pointer to sibling pointer. */
287 	if (ds->state->altpath.blk[level].blkno != sibling)
288 		xchk_da_set_corrupt(ds, level);
289 	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
290 out:
291 	return error;
292 }
293 
294 /* Check a block's sibling pointers. */
295 STATIC int
296 xchk_da_btree_block_check_siblings(
297 	struct xchk_da_btree	*ds,
298 	int			level,
299 	struct xfs_da_blkinfo	*hdr)
300 {
301 	xfs_dablk_t		forw;
302 	xfs_dablk_t		back;
303 	int			error = 0;
304 
305 	forw = be32_to_cpu(hdr->forw);
306 	back = be32_to_cpu(hdr->back);
307 
308 	/* Top level blocks should not have sibling pointers. */
309 	if (level == 0) {
310 		if (forw != 0 || back != 0)
311 			xchk_da_set_corrupt(ds, level);
312 		return 0;
313 	}
314 
315 	/*
316 	 * Check back (left) and forw (right) pointers.  These functions
317 	 * absorb error codes for us.
318 	 */
319 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
320 	if (error)
321 		goto out;
322 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
323 
324 out:
325 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
326 	return error;
327 }
328 
329 /* Load a dir/attribute block from a btree. */
330 STATIC int
331 xchk_da_btree_block(
332 	struct xchk_da_btree		*ds,
333 	int				level,
334 	xfs_dablk_t			blkno)
335 {
336 	struct xfs_da_state_blk		*blk;
337 	struct xfs_da_intnode		*node;
338 	struct xfs_da_node_entry	*btree;
339 	struct xfs_da3_blkinfo		*hdr3;
340 	struct xfs_da_args		*dargs = &ds->dargs;
341 	struct xfs_inode		*ip = ds->dargs.dp;
342 	xfs_ino_t			owner;
343 	int				*pmaxrecs;
344 	struct xfs_da3_icnode_hdr	nodehdr;
345 	int				error = 0;
346 
347 	blk = &ds->state->path.blk[level];
348 	ds->state->path.active = level + 1;
349 
350 	/* Release old block. */
351 	if (blk->bp) {
352 		xfs_trans_brelse(dargs->trans, blk->bp);
353 		blk->bp = NULL;
354 	}
355 
356 	/* Check the pointer. */
357 	blk->blkno = blkno;
358 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
359 		goto out_nobuf;
360 
361 	/* Read the buffer. */
362 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
363 			&blk->bp, dargs->whichfork,
364 			&xchk_da_btree_buf_ops);
365 	if (!xchk_da_process_error(ds, level, &error))
366 		goto out_nobuf;
367 	if (blk->bp)
368 		xchk_buffer_recheck(ds->sc, blk->bp);
369 
370 	/*
371 	 * We didn't find a dir btree root block, which means that
372 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
373 	 * to be), so jump out now.
374 	 */
375 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
376 			blk->bp == NULL)
377 		goto out_nobuf;
378 
379 	/* It's /not/ ok for attr trees not to have a da btree. */
380 	if (blk->bp == NULL) {
381 		xchk_da_set_corrupt(ds, level);
382 		goto out_nobuf;
383 	}
384 
385 	hdr3 = blk->bp->b_addr;
386 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
387 	pmaxrecs = &ds->maxrecs[level];
388 
389 	/* We only started zeroing the header on v5 filesystems. */
390 	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
391 		xchk_da_set_corrupt(ds, level);
392 
393 	/* Check the owner. */
394 	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
395 		owner = be64_to_cpu(hdr3->owner);
396 		if (owner != ip->i_ino)
397 			xchk_da_set_corrupt(ds, level);
398 	}
399 
400 	/* Check the siblings. */
401 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
402 	if (error)
403 		goto out;
404 
405 	/* Interpret the buffer. */
406 	switch (blk->magic) {
407 	case XFS_ATTR_LEAF_MAGIC:
408 	case XFS_ATTR3_LEAF_MAGIC:
409 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
410 				XFS_BLFT_ATTR_LEAF_BUF);
411 		blk->magic = XFS_ATTR_LEAF_MAGIC;
412 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
413 		if (ds->tree_level != 0)
414 			xchk_da_set_corrupt(ds, level);
415 		break;
416 	case XFS_DIR2_LEAFN_MAGIC:
417 	case XFS_DIR3_LEAFN_MAGIC:
418 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
419 				XFS_BLFT_DIR_LEAFN_BUF);
420 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
421 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
422 		if (ds->tree_level != 0)
423 			xchk_da_set_corrupt(ds, level);
424 		break;
425 	case XFS_DIR2_LEAF1_MAGIC:
426 	case XFS_DIR3_LEAF1_MAGIC:
427 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
428 				XFS_BLFT_DIR_LEAF1_BUF);
429 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
430 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
431 		if (ds->tree_level != 0)
432 			xchk_da_set_corrupt(ds, level);
433 		break;
434 	case XFS_DA_NODE_MAGIC:
435 	case XFS_DA3_NODE_MAGIC:
436 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
437 				XFS_BLFT_DA_NODE_BUF);
438 		blk->magic = XFS_DA_NODE_MAGIC;
439 		node = blk->bp->b_addr;
440 		ip->d_ops->node_hdr_from_disk(&nodehdr, node);
441 		btree = ip->d_ops->node_tree_p(node);
442 		*pmaxrecs = nodehdr.count;
443 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
444 		if (level == 0) {
445 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
446 				xchk_da_set_corrupt(ds, level);
447 				goto out_freebp;
448 			}
449 			ds->tree_level = nodehdr.level;
450 		} else {
451 			if (ds->tree_level != nodehdr.level) {
452 				xchk_da_set_corrupt(ds, level);
453 				goto out_freebp;
454 			}
455 		}
456 
457 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
458 		break;
459 	default:
460 		xchk_da_set_corrupt(ds, level);
461 		goto out_freebp;
462 	}
463 
464 out:
465 	return error;
466 out_freebp:
467 	xfs_trans_brelse(dargs->trans, blk->bp);
468 	blk->bp = NULL;
469 out_nobuf:
470 	blk->blkno = 0;
471 	return error;
472 }
473 
474 /* Visit all nodes and leaves of a da btree. */
475 int
476 xchk_da_btree(
477 	struct xfs_scrub		*sc,
478 	int				whichfork,
479 	xchk_da_btree_rec_fn		scrub_fn,
480 	void				*private)
481 {
482 	struct xchk_da_btree		ds = {};
483 	struct xfs_mount		*mp = sc->mp;
484 	struct xfs_da_state_blk		*blks;
485 	struct xfs_da_node_entry	*key;
486 	void				*rec;
487 	xfs_dablk_t			blkno;
488 	int				level;
489 	int				error;
490 
491 	/* Skip short format data structures; no btree to scan. */
492 	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
493 	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
494 		return 0;
495 
496 	/* Set up initial da state. */
497 	ds.dargs.dp = sc->ip;
498 	ds.dargs.whichfork = whichfork;
499 	ds.dargs.trans = sc->tp;
500 	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
501 	ds.state = xfs_da_state_alloc();
502 	ds.state->args = &ds.dargs;
503 	ds.state->mp = mp;
504 	ds.sc = sc;
505 	ds.private = private;
506 	if (whichfork == XFS_ATTR_FORK) {
507 		ds.dargs.geo = mp->m_attr_geo;
508 		ds.lowest = 0;
509 		ds.highest = 0;
510 	} else {
511 		ds.dargs.geo = mp->m_dir_geo;
512 		ds.lowest = ds.dargs.geo->leafblk;
513 		ds.highest = ds.dargs.geo->freeblk;
514 	}
515 	blkno = ds.lowest;
516 	level = 0;
517 
518 	/* Find the root of the da tree, if present. */
519 	blks = ds.state->path.blk;
520 	error = xchk_da_btree_block(&ds, level, blkno);
521 	if (error)
522 		goto out_state;
523 	/*
524 	 * We didn't find a block at ds.lowest, which means that there's
525 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
526 	 * so jump out now.
527 	 */
528 	if (blks[level].bp == NULL)
529 		goto out_state;
530 
531 	blks[level].index = 0;
532 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
533 		/* Handle leaf block. */
534 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
535 			/* End of leaf, pop back towards the root. */
536 			if (blks[level].index >= ds.maxrecs[level]) {
537 				if (level > 0)
538 					blks[level - 1].index++;
539 				ds.tree_level++;
540 				level--;
541 				continue;
542 			}
543 
544 			/* Dispatch record scrubbing. */
545 			rec = xchk_da_btree_entry(&ds, level,
546 					blks[level].index);
547 			error = scrub_fn(&ds, level, rec);
548 			if (error)
549 				break;
550 			if (xchk_should_terminate(sc, &error) ||
551 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
552 				break;
553 
554 			blks[level].index++;
555 			continue;
556 		}
557 
558 
559 		/* End of node, pop back towards the root. */
560 		if (blks[level].index >= ds.maxrecs[level]) {
561 			if (level > 0)
562 				blks[level - 1].index++;
563 			ds.tree_level++;
564 			level--;
565 			continue;
566 		}
567 
568 		/* Hashes in order for scrub? */
569 		key = xchk_da_btree_entry(&ds, level, blks[level].index);
570 		error = xchk_da_btree_hash(&ds, level, &key->hashval);
571 		if (error)
572 			goto out;
573 
574 		/* Drill another level deeper. */
575 		blkno = be32_to_cpu(key->before);
576 		level++;
577 		if (level >= XFS_DA_NODE_MAXDEPTH) {
578 			/* Too deep! */
579 			xchk_da_set_corrupt(&ds, level - 1);
580 			break;
581 		}
582 		ds.tree_level--;
583 		error = xchk_da_btree_block(&ds, level, blkno);
584 		if (error)
585 			goto out;
586 		if (blks[level].bp == NULL)
587 			goto out;
588 
589 		blks[level].index = 0;
590 	}
591 
592 out:
593 	/* Release all the buffers we're tracking. */
594 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
595 		if (blks[level].bp == NULL)
596 			continue;
597 		xfs_trans_brelse(sc->tp, blks[level].bp);
598 		blks[level].bp = NULL;
599 	}
600 
601 out_state:
602 	xfs_da_state_free(ds.state);
603 	return error;
604 }
605