xref: /openbmc/linux/fs/xfs/scrub/dabtree.c (revision ccd51b9f)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 		/* Used to restart an op with deadlock avoidance. */
43 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44 		break;
45 	case -EFSBADCRC:
46 	case -EFSCORRUPTED:
47 		/* Note the badness but don't abort. */
48 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
49 		*error = 0;
50 		/* fall through */
51 	default:
52 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
53 				xfs_dir2_da_to_db(ds->dargs.geo,
54 					ds->state->path.blk[level].blkno),
55 				*error, __return_address);
56 		break;
57 	}
58 	return false;
59 }
60 
61 /*
62  * Check for da btree corruption.  See the section about handling
63  * operational errors in common.c.
64  */
65 void
66 xchk_da_set_corrupt(
67 	struct xchk_da_btree	*ds,
68 	int			level)
69 {
70 	struct xfs_scrub	*sc = ds->sc;
71 
72 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
73 
74 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
75 			xfs_dir2_da_to_db(ds->dargs.geo,
76 				ds->state->path.blk[level].blkno),
77 			__return_address);
78 }
79 
80 /* Find an entry at a certain level in a da btree. */
81 STATIC void *
82 xchk_da_btree_entry(
83 	struct xchk_da_btree	*ds,
84 	int			level,
85 	int			rec)
86 {
87 	char			*ents;
88 	struct xfs_da_state_blk	*blk;
89 	void			*baddr;
90 
91 	/* Dispatch the entry finding function. */
92 	blk = &ds->state->path.blk[level];
93 	baddr = blk->bp->b_addr;
94 	switch (blk->magic) {
95 	case XFS_ATTR_LEAF_MAGIC:
96 	case XFS_ATTR3_LEAF_MAGIC:
97 		ents = (char *)xfs_attr3_leaf_entryp(baddr);
98 		return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
99 	case XFS_DIR2_LEAFN_MAGIC:
100 	case XFS_DIR3_LEAFN_MAGIC:
101 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
102 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
103 	case XFS_DIR2_LEAF1_MAGIC:
104 	case XFS_DIR3_LEAF1_MAGIC:
105 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
106 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
107 	case XFS_DA_NODE_MAGIC:
108 	case XFS_DA3_NODE_MAGIC:
109 		ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
110 		return ents + (rec * sizeof(struct xfs_da_node_entry));
111 	}
112 
113 	return NULL;
114 }
115 
116 /* Scrub a da btree hash (key). */
117 int
118 xchk_da_btree_hash(
119 	struct xchk_da_btree		*ds,
120 	int				level,
121 	__be32				*hashp)
122 {
123 	struct xfs_da_state_blk		*blks;
124 	struct xfs_da_node_entry	*entry;
125 	xfs_dahash_t			hash;
126 	xfs_dahash_t			parent_hash;
127 
128 	/* Is this hash in order? */
129 	hash = be32_to_cpu(*hashp);
130 	if (hash < ds->hashes[level])
131 		xchk_da_set_corrupt(ds, level);
132 	ds->hashes[level] = hash;
133 
134 	if (level == 0)
135 		return 0;
136 
137 	/* Is this hash no larger than the parent hash? */
138 	blks = ds->state->path.blk;
139 	entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
140 	parent_hash = be32_to_cpu(entry->hashval);
141 	if (parent_hash < hash)
142 		xchk_da_set_corrupt(ds, level);
143 
144 	return 0;
145 }
146 
147 /*
148  * Check a da btree pointer.  Returns true if it's ok to use this
149  * pointer.
150  */
151 STATIC bool
152 xchk_da_btree_ptr_ok(
153 	struct xchk_da_btree	*ds,
154 	int			level,
155 	xfs_dablk_t		blkno)
156 {
157 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
158 		xchk_da_set_corrupt(ds, level);
159 		return false;
160 	}
161 
162 	return true;
163 }
164 
165 /*
166  * The da btree scrubber can handle leaf1 blocks as a degenerate
167  * form of leafn blocks.  Since the regular da code doesn't handle
168  * leaf1, we must multiplex the verifiers.
169  */
170 static void
171 xchk_da_btree_read_verify(
172 	struct xfs_buf		*bp)
173 {
174 	struct xfs_da_blkinfo	*info = bp->b_addr;
175 
176 	switch (be16_to_cpu(info->magic)) {
177 	case XFS_DIR2_LEAF1_MAGIC:
178 	case XFS_DIR3_LEAF1_MAGIC:
179 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
180 		bp->b_ops->verify_read(bp);
181 		return;
182 	default:
183 		/*
184 		 * xfs_da3_node_buf_ops already know how to handle
185 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
186 		 */
187 		bp->b_ops = &xfs_da3_node_buf_ops;
188 		bp->b_ops->verify_read(bp);
189 		return;
190 	}
191 }
192 static void
193 xchk_da_btree_write_verify(
194 	struct xfs_buf		*bp)
195 {
196 	struct xfs_da_blkinfo	*info = bp->b_addr;
197 
198 	switch (be16_to_cpu(info->magic)) {
199 	case XFS_DIR2_LEAF1_MAGIC:
200 	case XFS_DIR3_LEAF1_MAGIC:
201 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
202 		bp->b_ops->verify_write(bp);
203 		return;
204 	default:
205 		/*
206 		 * xfs_da3_node_buf_ops already know how to handle
207 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
208 		 */
209 		bp->b_ops = &xfs_da3_node_buf_ops;
210 		bp->b_ops->verify_write(bp);
211 		return;
212 	}
213 }
214 static void *
215 xchk_da_btree_verify(
216 	struct xfs_buf		*bp)
217 {
218 	struct xfs_da_blkinfo	*info = bp->b_addr;
219 
220 	switch (be16_to_cpu(info->magic)) {
221 	case XFS_DIR2_LEAF1_MAGIC:
222 	case XFS_DIR3_LEAF1_MAGIC:
223 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
224 		return bp->b_ops->verify_struct(bp);
225 	default:
226 		bp->b_ops = &xfs_da3_node_buf_ops;
227 		return bp->b_ops->verify_struct(bp);
228 	}
229 }
230 
231 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
232 	.name = "xchk_da_btree",
233 	.verify_read = xchk_da_btree_read_verify,
234 	.verify_write = xchk_da_btree_write_verify,
235 	.verify_struct = xchk_da_btree_verify,
236 };
237 
238 /* Check a block's sibling. */
239 STATIC int
240 xchk_da_btree_block_check_sibling(
241 	struct xchk_da_btree	*ds,
242 	int			level,
243 	int			direction,
244 	xfs_dablk_t		sibling)
245 {
246 	int			retval;
247 	int			error;
248 
249 	memcpy(&ds->state->altpath, &ds->state->path,
250 			sizeof(ds->state->altpath));
251 
252 	/*
253 	 * If the pointer is null, we shouldn't be able to move the upper
254 	 * level pointer anywhere.
255 	 */
256 	if (sibling == 0) {
257 		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
258 				direction, false, &retval);
259 		if (error == 0 && retval == 0)
260 			xchk_da_set_corrupt(ds, level);
261 		error = 0;
262 		goto out;
263 	}
264 
265 	/* Move the alternate cursor one block in the direction given. */
266 	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
267 			direction, false, &retval);
268 	if (!xchk_da_process_error(ds, level, &error))
269 		return error;
270 	if (retval) {
271 		xchk_da_set_corrupt(ds, level);
272 		return error;
273 	}
274 	if (ds->state->altpath.blk[level].bp)
275 		xchk_buffer_recheck(ds->sc,
276 				ds->state->altpath.blk[level].bp);
277 
278 	/* Compare upper level pointer to sibling pointer. */
279 	if (ds->state->altpath.blk[level].blkno != sibling)
280 		xchk_da_set_corrupt(ds, level);
281 	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
282 out:
283 	return error;
284 }
285 
286 /* Check a block's sibling pointers. */
287 STATIC int
288 xchk_da_btree_block_check_siblings(
289 	struct xchk_da_btree	*ds,
290 	int			level,
291 	struct xfs_da_blkinfo	*hdr)
292 {
293 	xfs_dablk_t		forw;
294 	xfs_dablk_t		back;
295 	int			error = 0;
296 
297 	forw = be32_to_cpu(hdr->forw);
298 	back = be32_to_cpu(hdr->back);
299 
300 	/* Top level blocks should not have sibling pointers. */
301 	if (level == 0) {
302 		if (forw != 0 || back != 0)
303 			xchk_da_set_corrupt(ds, level);
304 		return 0;
305 	}
306 
307 	/*
308 	 * Check back (left) and forw (right) pointers.  These functions
309 	 * absorb error codes for us.
310 	 */
311 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
312 	if (error)
313 		goto out;
314 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
315 
316 out:
317 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
318 	return error;
319 }
320 
321 /* Load a dir/attribute block from a btree. */
322 STATIC int
323 xchk_da_btree_block(
324 	struct xchk_da_btree		*ds,
325 	int				level,
326 	xfs_dablk_t			blkno)
327 {
328 	struct xfs_da_state_blk		*blk;
329 	struct xfs_da_intnode		*node;
330 	struct xfs_da_node_entry	*btree;
331 	struct xfs_da3_blkinfo		*hdr3;
332 	struct xfs_da_args		*dargs = &ds->dargs;
333 	struct xfs_inode		*ip = ds->dargs.dp;
334 	xfs_ino_t			owner;
335 	int				*pmaxrecs;
336 	struct xfs_da3_icnode_hdr	nodehdr;
337 	int				error = 0;
338 
339 	blk = &ds->state->path.blk[level];
340 	ds->state->path.active = level + 1;
341 
342 	/* Release old block. */
343 	if (blk->bp) {
344 		xfs_trans_brelse(dargs->trans, blk->bp);
345 		blk->bp = NULL;
346 	}
347 
348 	/* Check the pointer. */
349 	blk->blkno = blkno;
350 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
351 		goto out_nobuf;
352 
353 	/* Read the buffer. */
354 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
355 			&blk->bp, dargs->whichfork,
356 			&xchk_da_btree_buf_ops);
357 	if (!xchk_da_process_error(ds, level, &error))
358 		goto out_nobuf;
359 	if (blk->bp)
360 		xchk_buffer_recheck(ds->sc, blk->bp);
361 
362 	/*
363 	 * We didn't find a dir btree root block, which means that
364 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
365 	 * to be), so jump out now.
366 	 */
367 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
368 			blk->bp == NULL)
369 		goto out_nobuf;
370 
371 	/* It's /not/ ok for attr trees not to have a da btree. */
372 	if (blk->bp == NULL) {
373 		xchk_da_set_corrupt(ds, level);
374 		goto out_nobuf;
375 	}
376 
377 	hdr3 = blk->bp->b_addr;
378 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
379 	pmaxrecs = &ds->maxrecs[level];
380 
381 	/* We only started zeroing the header on v5 filesystems. */
382 	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
383 		xchk_da_set_corrupt(ds, level);
384 
385 	/* Check the owner. */
386 	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
387 		owner = be64_to_cpu(hdr3->owner);
388 		if (owner != ip->i_ino)
389 			xchk_da_set_corrupt(ds, level);
390 	}
391 
392 	/* Check the siblings. */
393 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
394 	if (error)
395 		goto out;
396 
397 	/* Interpret the buffer. */
398 	switch (blk->magic) {
399 	case XFS_ATTR_LEAF_MAGIC:
400 	case XFS_ATTR3_LEAF_MAGIC:
401 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
402 				XFS_BLFT_ATTR_LEAF_BUF);
403 		blk->magic = XFS_ATTR_LEAF_MAGIC;
404 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
405 		if (ds->tree_level != 0)
406 			xchk_da_set_corrupt(ds, level);
407 		break;
408 	case XFS_DIR2_LEAFN_MAGIC:
409 	case XFS_DIR3_LEAFN_MAGIC:
410 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
411 				XFS_BLFT_DIR_LEAFN_BUF);
412 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
413 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
414 		if (ds->tree_level != 0)
415 			xchk_da_set_corrupt(ds, level);
416 		break;
417 	case XFS_DIR2_LEAF1_MAGIC:
418 	case XFS_DIR3_LEAF1_MAGIC:
419 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
420 				XFS_BLFT_DIR_LEAF1_BUF);
421 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
422 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
423 		if (ds->tree_level != 0)
424 			xchk_da_set_corrupt(ds, level);
425 		break;
426 	case XFS_DA_NODE_MAGIC:
427 	case XFS_DA3_NODE_MAGIC:
428 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
429 				XFS_BLFT_DA_NODE_BUF);
430 		blk->magic = XFS_DA_NODE_MAGIC;
431 		node = blk->bp->b_addr;
432 		ip->d_ops->node_hdr_from_disk(&nodehdr, node);
433 		btree = ip->d_ops->node_tree_p(node);
434 		*pmaxrecs = nodehdr.count;
435 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
436 		if (level == 0) {
437 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
438 				xchk_da_set_corrupt(ds, level);
439 				goto out_freebp;
440 			}
441 			ds->tree_level = nodehdr.level;
442 		} else {
443 			if (ds->tree_level != nodehdr.level) {
444 				xchk_da_set_corrupt(ds, level);
445 				goto out_freebp;
446 			}
447 		}
448 
449 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
450 		break;
451 	default:
452 		xchk_da_set_corrupt(ds, level);
453 		goto out_freebp;
454 	}
455 
456 out:
457 	return error;
458 out_freebp:
459 	xfs_trans_brelse(dargs->trans, blk->bp);
460 	blk->bp = NULL;
461 out_nobuf:
462 	blk->blkno = 0;
463 	return error;
464 }
465 
466 /* Visit all nodes and leaves of a da btree. */
467 int
468 xchk_da_btree(
469 	struct xfs_scrub		*sc,
470 	int				whichfork,
471 	xchk_da_btree_rec_fn		scrub_fn,
472 	void				*private)
473 {
474 	struct xchk_da_btree		ds = {};
475 	struct xfs_mount		*mp = sc->mp;
476 	struct xfs_da_state_blk		*blks;
477 	struct xfs_da_node_entry	*key;
478 	void				*rec;
479 	xfs_dablk_t			blkno;
480 	int				level;
481 	int				error;
482 
483 	/* Skip short format data structures; no btree to scan. */
484 	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
485 	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
486 		return 0;
487 
488 	/* Set up initial da state. */
489 	ds.dargs.dp = sc->ip;
490 	ds.dargs.whichfork = whichfork;
491 	ds.dargs.trans = sc->tp;
492 	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
493 	ds.state = xfs_da_state_alloc();
494 	ds.state->args = &ds.dargs;
495 	ds.state->mp = mp;
496 	ds.sc = sc;
497 	ds.private = private;
498 	if (whichfork == XFS_ATTR_FORK) {
499 		ds.dargs.geo = mp->m_attr_geo;
500 		ds.lowest = 0;
501 		ds.highest = 0;
502 	} else {
503 		ds.dargs.geo = mp->m_dir_geo;
504 		ds.lowest = ds.dargs.geo->leafblk;
505 		ds.highest = ds.dargs.geo->freeblk;
506 	}
507 	blkno = ds.lowest;
508 	level = 0;
509 
510 	/* Find the root of the da tree, if present. */
511 	blks = ds.state->path.blk;
512 	error = xchk_da_btree_block(&ds, level, blkno);
513 	if (error)
514 		goto out_state;
515 	/*
516 	 * We didn't find a block at ds.lowest, which means that there's
517 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
518 	 * so jump out now.
519 	 */
520 	if (blks[level].bp == NULL)
521 		goto out_state;
522 
523 	blks[level].index = 0;
524 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
525 		/* Handle leaf block. */
526 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
527 			/* End of leaf, pop back towards the root. */
528 			if (blks[level].index >= ds.maxrecs[level]) {
529 				if (level > 0)
530 					blks[level - 1].index++;
531 				ds.tree_level++;
532 				level--;
533 				continue;
534 			}
535 
536 			/* Dispatch record scrubbing. */
537 			rec = xchk_da_btree_entry(&ds, level,
538 					blks[level].index);
539 			error = scrub_fn(&ds, level, rec);
540 			if (error)
541 				break;
542 			if (xchk_should_terminate(sc, &error) ||
543 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
544 				break;
545 
546 			blks[level].index++;
547 			continue;
548 		}
549 
550 
551 		/* End of node, pop back towards the root. */
552 		if (blks[level].index >= ds.maxrecs[level]) {
553 			if (level > 0)
554 				blks[level - 1].index++;
555 			ds.tree_level++;
556 			level--;
557 			continue;
558 		}
559 
560 		/* Hashes in order for scrub? */
561 		key = xchk_da_btree_entry(&ds, level, blks[level].index);
562 		error = xchk_da_btree_hash(&ds, level, &key->hashval);
563 		if (error)
564 			goto out;
565 
566 		/* Drill another level deeper. */
567 		blkno = be32_to_cpu(key->before);
568 		level++;
569 		if (level >= XFS_DA_NODE_MAXDEPTH) {
570 			/* Too deep! */
571 			xchk_da_set_corrupt(&ds, level - 1);
572 			break;
573 		}
574 		ds.tree_level--;
575 		error = xchk_da_btree_block(&ds, level, blkno);
576 		if (error)
577 			goto out;
578 		if (blks[level].bp == NULL)
579 			goto out;
580 
581 		blks[level].index = 0;
582 	}
583 
584 out:
585 	/* Release all the buffers we're tracking. */
586 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
587 		if (blks[level].bp == NULL)
588 			continue;
589 		xfs_trans_brelse(sc->tp, blks[level].bp);
590 		blks[level].bp = NULL;
591 	}
592 
593 out_state:
594 	xfs_da_state_free(ds.state);
595 	return error;
596 }
597