xref: /openbmc/linux/fs/xfs/scrub/dabtree.c (revision 7f2e85840871f199057e65232ebde846192ed989)
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_inode_fork.h"
34 #include "xfs_da_format.h"
35 #include "xfs_da_btree.h"
36 #include "xfs_dir2.h"
37 #include "xfs_dir2_priv.h"
38 #include "xfs_attr_leaf.h"
39 #include "scrub/xfs_scrub.h"
40 #include "scrub/scrub.h"
41 #include "scrub/common.h"
42 #include "scrub/trace.h"
43 #include "scrub/dabtree.h"
44 
45 /* Directory/Attribute Btree */
46 
47 /*
48  * Check for da btree operation errors.  See the section about handling
49  * operational errors in common.c.
50  */
51 bool
52 xfs_scrub_da_process_error(
53 	struct xfs_scrub_da_btree	*ds,
54 	int				level,
55 	int				*error)
56 {
57 	struct xfs_scrub_context	*sc = ds->sc;
58 
59 	if (*error == 0)
60 		return true;
61 
62 	switch (*error) {
63 	case -EDEADLOCK:
64 		/* Used to restart an op with deadlock avoidance. */
65 		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
66 		break;
67 	case -EFSBADCRC:
68 	case -EFSCORRUPTED:
69 		/* Note the badness but don't abort. */
70 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
71 		*error = 0;
72 		/* fall through */
73 	default:
74 		trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
75 				xfs_dir2_da_to_db(ds->dargs.geo,
76 					ds->state->path.blk[level].blkno),
77 				*error, __return_address);
78 		break;
79 	}
80 	return false;
81 }
82 
83 /*
84  * Check for da btree corruption.  See the section about handling
85  * operational errors in common.c.
86  */
87 void
88 xfs_scrub_da_set_corrupt(
89 	struct xfs_scrub_da_btree	*ds,
90 	int				level)
91 {
92 	struct xfs_scrub_context	*sc = ds->sc;
93 
94 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
95 
96 	trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
97 			xfs_dir2_da_to_db(ds->dargs.geo,
98 				ds->state->path.blk[level].blkno),
99 			__return_address);
100 }
101 
102 /* Find an entry at a certain level in a da btree. */
103 STATIC void *
104 xfs_scrub_da_btree_entry(
105 	struct xfs_scrub_da_btree	*ds,
106 	int				level,
107 	int				rec)
108 {
109 	char				*ents;
110 	struct xfs_da_state_blk		*blk;
111 	void				*baddr;
112 
113 	/* Dispatch the entry finding function. */
114 	blk = &ds->state->path.blk[level];
115 	baddr = blk->bp->b_addr;
116 	switch (blk->magic) {
117 	case XFS_ATTR_LEAF_MAGIC:
118 	case XFS_ATTR3_LEAF_MAGIC:
119 		ents = (char *)xfs_attr3_leaf_entryp(baddr);
120 		return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
121 	case XFS_DIR2_LEAFN_MAGIC:
122 	case XFS_DIR3_LEAFN_MAGIC:
123 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
124 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
125 	case XFS_DIR2_LEAF1_MAGIC:
126 	case XFS_DIR3_LEAF1_MAGIC:
127 		ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
128 		return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
129 	case XFS_DA_NODE_MAGIC:
130 	case XFS_DA3_NODE_MAGIC:
131 		ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
132 		return ents + (rec * sizeof(struct xfs_da_node_entry));
133 	}
134 
135 	return NULL;
136 }
137 
138 /* Scrub a da btree hash (key). */
139 int
140 xfs_scrub_da_btree_hash(
141 	struct xfs_scrub_da_btree	*ds,
142 	int				level,
143 	__be32				*hashp)
144 {
145 	struct xfs_da_state_blk		*blks;
146 	struct xfs_da_node_entry	*entry;
147 	xfs_dahash_t			hash;
148 	xfs_dahash_t			parent_hash;
149 
150 	/* Is this hash in order? */
151 	hash = be32_to_cpu(*hashp);
152 	if (hash < ds->hashes[level])
153 		xfs_scrub_da_set_corrupt(ds, level);
154 	ds->hashes[level] = hash;
155 
156 	if (level == 0)
157 		return 0;
158 
159 	/* Is this hash no larger than the parent hash? */
160 	blks = ds->state->path.blk;
161 	entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
162 	parent_hash = be32_to_cpu(entry->hashval);
163 	if (parent_hash < hash)
164 		xfs_scrub_da_set_corrupt(ds, level);
165 
166 	return 0;
167 }
168 
169 /*
170  * Check a da btree pointer.  Returns true if it's ok to use this
171  * pointer.
172  */
173 STATIC bool
174 xfs_scrub_da_btree_ptr_ok(
175 	struct xfs_scrub_da_btree	*ds,
176 	int				level,
177 	xfs_dablk_t			blkno)
178 {
179 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
180 		xfs_scrub_da_set_corrupt(ds, level);
181 		return false;
182 	}
183 
184 	return true;
185 }
186 
187 /*
188  * The da btree scrubber can handle leaf1 blocks as a degenerate
189  * form of leafn blocks.  Since the regular da code doesn't handle
190  * leaf1, we must multiplex the verifiers.
191  */
192 static void
193 xfs_scrub_da_btree_read_verify(
194 	struct xfs_buf		*bp)
195 {
196 	struct xfs_da_blkinfo	*info = bp->b_addr;
197 
198 	switch (be16_to_cpu(info->magic)) {
199 	case XFS_DIR2_LEAF1_MAGIC:
200 	case XFS_DIR3_LEAF1_MAGIC:
201 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
202 		bp->b_ops->verify_read(bp);
203 		return;
204 	default:
205 		/*
206 		 * xfs_da3_node_buf_ops already know how to handle
207 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
208 		 */
209 		bp->b_ops = &xfs_da3_node_buf_ops;
210 		bp->b_ops->verify_read(bp);
211 		return;
212 	}
213 }
214 static void
215 xfs_scrub_da_btree_write_verify(
216 	struct xfs_buf		*bp)
217 {
218 	struct xfs_da_blkinfo	*info = bp->b_addr;
219 
220 	switch (be16_to_cpu(info->magic)) {
221 	case XFS_DIR2_LEAF1_MAGIC:
222 	case XFS_DIR3_LEAF1_MAGIC:
223 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
224 		bp->b_ops->verify_write(bp);
225 		return;
226 	default:
227 		/*
228 		 * xfs_da3_node_buf_ops already know how to handle
229 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
230 		 */
231 		bp->b_ops = &xfs_da3_node_buf_ops;
232 		bp->b_ops->verify_write(bp);
233 		return;
234 	}
235 }
236 static void *
237 xfs_scrub_da_btree_verify(
238 	struct xfs_buf		*bp)
239 {
240 	struct xfs_da_blkinfo	*info = bp->b_addr;
241 
242 	switch (be16_to_cpu(info->magic)) {
243 	case XFS_DIR2_LEAF1_MAGIC:
244 	case XFS_DIR3_LEAF1_MAGIC:
245 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
246 		return bp->b_ops->verify_struct(bp);
247 	default:
248 		bp->b_ops = &xfs_da3_node_buf_ops;
249 		return bp->b_ops->verify_struct(bp);
250 	}
251 }
252 
253 static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
254 	.name = "xfs_scrub_da_btree",
255 	.verify_read = xfs_scrub_da_btree_read_verify,
256 	.verify_write = xfs_scrub_da_btree_write_verify,
257 	.verify_struct = xfs_scrub_da_btree_verify,
258 };
259 
260 /* Check a block's sibling. */
261 STATIC int
262 xfs_scrub_da_btree_block_check_sibling(
263 	struct xfs_scrub_da_btree	*ds,
264 	int				level,
265 	int				direction,
266 	xfs_dablk_t			sibling)
267 {
268 	int				retval;
269 	int				error;
270 
271 	memcpy(&ds->state->altpath, &ds->state->path,
272 			sizeof(ds->state->altpath));
273 
274 	/*
275 	 * If the pointer is null, we shouldn't be able to move the upper
276 	 * level pointer anywhere.
277 	 */
278 	if (sibling == 0) {
279 		error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
280 				direction, false, &retval);
281 		if (error == 0 && retval == 0)
282 			xfs_scrub_da_set_corrupt(ds, level);
283 		error = 0;
284 		goto out;
285 	}
286 
287 	/* Move the alternate cursor one block in the direction given. */
288 	error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
289 			direction, false, &retval);
290 	if (!xfs_scrub_da_process_error(ds, level, &error))
291 		return error;
292 	if (retval) {
293 		xfs_scrub_da_set_corrupt(ds, level);
294 		return error;
295 	}
296 	if (ds->state->altpath.blk[level].bp)
297 		xfs_scrub_buffer_recheck(ds->sc,
298 				ds->state->altpath.blk[level].bp);
299 
300 	/* Compare upper level pointer to sibling pointer. */
301 	if (ds->state->altpath.blk[level].blkno != sibling)
302 		xfs_scrub_da_set_corrupt(ds, level);
303 	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
304 out:
305 	return error;
306 }
307 
308 /* Check a block's sibling pointers. */
309 STATIC int
310 xfs_scrub_da_btree_block_check_siblings(
311 	struct xfs_scrub_da_btree	*ds,
312 	int				level,
313 	struct xfs_da_blkinfo		*hdr)
314 {
315 	xfs_dablk_t			forw;
316 	xfs_dablk_t			back;
317 	int				error = 0;
318 
319 	forw = be32_to_cpu(hdr->forw);
320 	back = be32_to_cpu(hdr->back);
321 
322 	/* Top level blocks should not have sibling pointers. */
323 	if (level == 0) {
324 		if (forw != 0 || back != 0)
325 			xfs_scrub_da_set_corrupt(ds, level);
326 		return 0;
327 	}
328 
329 	/*
330 	 * Check back (left) and forw (right) pointers.  These functions
331 	 * absorb error codes for us.
332 	 */
333 	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
334 	if (error)
335 		goto out;
336 	error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
337 
338 out:
339 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
340 	return error;
341 }
342 
343 /* Load a dir/attribute block from a btree. */
344 STATIC int
345 xfs_scrub_da_btree_block(
346 	struct xfs_scrub_da_btree	*ds,
347 	int				level,
348 	xfs_dablk_t			blkno)
349 {
350 	struct xfs_da_state_blk		*blk;
351 	struct xfs_da_intnode		*node;
352 	struct xfs_da_node_entry	*btree;
353 	struct xfs_da3_blkinfo		*hdr3;
354 	struct xfs_da_args		*dargs = &ds->dargs;
355 	struct xfs_inode		*ip = ds->dargs.dp;
356 	xfs_ino_t			owner;
357 	int				*pmaxrecs;
358 	struct xfs_da3_icnode_hdr	nodehdr;
359 	int				error = 0;
360 
361 	blk = &ds->state->path.blk[level];
362 	ds->state->path.active = level + 1;
363 
364 	/* Release old block. */
365 	if (blk->bp) {
366 		xfs_trans_brelse(dargs->trans, blk->bp);
367 		blk->bp = NULL;
368 	}
369 
370 	/* Check the pointer. */
371 	blk->blkno = blkno;
372 	if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
373 		goto out_nobuf;
374 
375 	/* Read the buffer. */
376 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
377 			&blk->bp, dargs->whichfork,
378 			&xfs_scrub_da_btree_buf_ops);
379 	if (!xfs_scrub_da_process_error(ds, level, &error))
380 		goto out_nobuf;
381 	if (blk->bp)
382 		xfs_scrub_buffer_recheck(ds->sc, blk->bp);
383 
384 	/*
385 	 * We didn't find a dir btree root block, which means that
386 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
387 	 * to be), so jump out now.
388 	 */
389 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
390 			blk->bp == NULL)
391 		goto out_nobuf;
392 
393 	/* It's /not/ ok for attr trees not to have a da btree. */
394 	if (blk->bp == NULL) {
395 		xfs_scrub_da_set_corrupt(ds, level);
396 		goto out_nobuf;
397 	}
398 
399 	hdr3 = blk->bp->b_addr;
400 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
401 	pmaxrecs = &ds->maxrecs[level];
402 
403 	/* We only started zeroing the header on v5 filesystems. */
404 	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
405 		xfs_scrub_da_set_corrupt(ds, level);
406 
407 	/* Check the owner. */
408 	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
409 		owner = be64_to_cpu(hdr3->owner);
410 		if (owner != ip->i_ino)
411 			xfs_scrub_da_set_corrupt(ds, level);
412 	}
413 
414 	/* Check the siblings. */
415 	error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
416 	if (error)
417 		goto out;
418 
419 	/* Interpret the buffer. */
420 	switch (blk->magic) {
421 	case XFS_ATTR_LEAF_MAGIC:
422 	case XFS_ATTR3_LEAF_MAGIC:
423 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
424 				XFS_BLFT_ATTR_LEAF_BUF);
425 		blk->magic = XFS_ATTR_LEAF_MAGIC;
426 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
427 		if (ds->tree_level != 0)
428 			xfs_scrub_da_set_corrupt(ds, level);
429 		break;
430 	case XFS_DIR2_LEAFN_MAGIC:
431 	case XFS_DIR3_LEAFN_MAGIC:
432 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
433 				XFS_BLFT_DIR_LEAFN_BUF);
434 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
435 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
436 		if (ds->tree_level != 0)
437 			xfs_scrub_da_set_corrupt(ds, level);
438 		break;
439 	case XFS_DIR2_LEAF1_MAGIC:
440 	case XFS_DIR3_LEAF1_MAGIC:
441 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
442 				XFS_BLFT_DIR_LEAF1_BUF);
443 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
444 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
445 		if (ds->tree_level != 0)
446 			xfs_scrub_da_set_corrupt(ds, level);
447 		break;
448 	case XFS_DA_NODE_MAGIC:
449 	case XFS_DA3_NODE_MAGIC:
450 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
451 				XFS_BLFT_DA_NODE_BUF);
452 		blk->magic = XFS_DA_NODE_MAGIC;
453 		node = blk->bp->b_addr;
454 		ip->d_ops->node_hdr_from_disk(&nodehdr, node);
455 		btree = ip->d_ops->node_tree_p(node);
456 		*pmaxrecs = nodehdr.count;
457 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
458 		if (level == 0) {
459 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
460 				xfs_scrub_da_set_corrupt(ds, level);
461 				goto out_freebp;
462 			}
463 			ds->tree_level = nodehdr.level;
464 		} else {
465 			if (ds->tree_level != nodehdr.level) {
466 				xfs_scrub_da_set_corrupt(ds, level);
467 				goto out_freebp;
468 			}
469 		}
470 
471 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
472 		break;
473 	default:
474 		xfs_scrub_da_set_corrupt(ds, level);
475 		goto out_freebp;
476 	}
477 
478 out:
479 	return error;
480 out_freebp:
481 	xfs_trans_brelse(dargs->trans, blk->bp);
482 	blk->bp = NULL;
483 out_nobuf:
484 	blk->blkno = 0;
485 	return error;
486 }
487 
488 /* Visit all nodes and leaves of a da btree. */
489 int
490 xfs_scrub_da_btree(
491 	struct xfs_scrub_context	*sc,
492 	int				whichfork,
493 	xfs_scrub_da_btree_rec_fn	scrub_fn,
494 	void				*private)
495 {
496 	struct xfs_scrub_da_btree	ds = {};
497 	struct xfs_mount		*mp = sc->mp;
498 	struct xfs_da_state_blk		*blks;
499 	struct xfs_da_node_entry	*key;
500 	void				*rec;
501 	xfs_dablk_t			blkno;
502 	int				level;
503 	int				error;
504 
505 	/* Skip short format data structures; no btree to scan. */
506 	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
507 	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
508 		return 0;
509 
510 	/* Set up initial da state. */
511 	ds.dargs.dp = sc->ip;
512 	ds.dargs.whichfork = whichfork;
513 	ds.dargs.trans = sc->tp;
514 	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
515 	ds.state = xfs_da_state_alloc();
516 	ds.state->args = &ds.dargs;
517 	ds.state->mp = mp;
518 	ds.sc = sc;
519 	ds.private = private;
520 	if (whichfork == XFS_ATTR_FORK) {
521 		ds.dargs.geo = mp->m_attr_geo;
522 		ds.lowest = 0;
523 		ds.highest = 0;
524 	} else {
525 		ds.dargs.geo = mp->m_dir_geo;
526 		ds.lowest = ds.dargs.geo->leafblk;
527 		ds.highest = ds.dargs.geo->freeblk;
528 	}
529 	blkno = ds.lowest;
530 	level = 0;
531 
532 	/* Find the root of the da tree, if present. */
533 	blks = ds.state->path.blk;
534 	error = xfs_scrub_da_btree_block(&ds, level, blkno);
535 	if (error)
536 		goto out_state;
537 	/*
538 	 * We didn't find a block at ds.lowest, which means that there's
539 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
540 	 * so jump out now.
541 	 */
542 	if (blks[level].bp == NULL)
543 		goto out_state;
544 
545 	blks[level].index = 0;
546 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
547 		/* Handle leaf block. */
548 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
549 			/* End of leaf, pop back towards the root. */
550 			if (blks[level].index >= ds.maxrecs[level]) {
551 				if (level > 0)
552 					blks[level - 1].index++;
553 				ds.tree_level++;
554 				level--;
555 				continue;
556 			}
557 
558 			/* Dispatch record scrubbing. */
559 			rec = xfs_scrub_da_btree_entry(&ds, level,
560 					blks[level].index);
561 			error = scrub_fn(&ds, level, rec);
562 			if (error)
563 				break;
564 			if (xfs_scrub_should_terminate(sc, &error) ||
565 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
566 				break;
567 
568 			blks[level].index++;
569 			continue;
570 		}
571 
572 
573 		/* End of node, pop back towards the root. */
574 		if (blks[level].index >= ds.maxrecs[level]) {
575 			if (level > 0)
576 				blks[level - 1].index++;
577 			ds.tree_level++;
578 			level--;
579 			continue;
580 		}
581 
582 		/* Hashes in order for scrub? */
583 		key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
584 		error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
585 		if (error)
586 			goto out;
587 
588 		/* Drill another level deeper. */
589 		blkno = be32_to_cpu(key->before);
590 		level++;
591 		ds.tree_level--;
592 		error = xfs_scrub_da_btree_block(&ds, level, blkno);
593 		if (error)
594 			goto out;
595 		if (blks[level].bp == NULL)
596 			goto out;
597 
598 		blks[level].index = 0;
599 	}
600 
601 out:
602 	/* Release all the buffers we're tracking. */
603 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
604 		if (blks[level].bp == NULL)
605 			continue;
606 		xfs_trans_brelse(sc->tp, blks[level].bp);
607 		blks[level].bp = NULL;
608 	}
609 
610 out_state:
611 	xfs_da_state_free(ds.state);
612 	return error;
613 }
614