xref: /openbmc/linux/fs/xfs/scrub/common.c (revision e00a844a)
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_icache.h"
34 #include "xfs_itable.h"
35 #include "xfs_alloc.h"
36 #include "xfs_alloc_btree.h"
37 #include "xfs_bmap.h"
38 #include "xfs_bmap_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_ialloc_btree.h"
41 #include "xfs_refcount.h"
42 #include "xfs_refcount_btree.h"
43 #include "xfs_rmap.h"
44 #include "xfs_rmap_btree.h"
45 #include "xfs_log.h"
46 #include "xfs_trans_priv.h"
47 #include "scrub/xfs_scrub.h"
48 #include "scrub/scrub.h"
49 #include "scrub/common.h"
50 #include "scrub/trace.h"
51 #include "scrub/btree.h"
52 
53 /* Common code for the metadata scrubbers. */
54 
55 /*
56  * Handling operational errors.
57  *
58  * The *_process_error() family of functions are used to process error return
59  * codes from functions called as part of a scrub operation.
60  *
61  * If there's no error, we return true to tell the caller that it's ok
62  * to move on to the next check in its list.
63  *
64  * For non-verifier errors (e.g. ENOMEM) we return false to tell the
65  * caller that something bad happened, and we preserve *error so that
66  * the caller can return the *error up the stack to userspace.
67  *
68  * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
69  * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
70  * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
71  * not via return codes.  We return false to tell the caller that
72  * something bad happened.  Since the error has been cleared, the caller
73  * will (presumably) return that zero and scrubbing will move on to
74  * whatever's next.
75  *
76  * ftrace can be used to record the precise metadata location and the
77  * approximate code location of the failed operation.
78  */
79 
80 /* Check for operational errors. */
81 bool
82 xfs_scrub_process_error(
83 	struct xfs_scrub_context	*sc,
84 	xfs_agnumber_t			agno,
85 	xfs_agblock_t			bno,
86 	int				*error)
87 {
88 	switch (*error) {
89 	case 0:
90 		return true;
91 	case -EDEADLOCK:
92 		/* Used to restart an op with deadlock avoidance. */
93 		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
94 		break;
95 	case -EFSBADCRC:
96 	case -EFSCORRUPTED:
97 		/* Note the badness but don't abort. */
98 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
99 		*error = 0;
100 		/* fall through */
101 	default:
102 		trace_xfs_scrub_op_error(sc, agno, bno, *error,
103 				__return_address);
104 		break;
105 	}
106 	return false;
107 }
108 
109 /* Check for operational errors for a file offset. */
110 bool
111 xfs_scrub_fblock_process_error(
112 	struct xfs_scrub_context	*sc,
113 	int				whichfork,
114 	xfs_fileoff_t			offset,
115 	int				*error)
116 {
117 	switch (*error) {
118 	case 0:
119 		return true;
120 	case -EDEADLOCK:
121 		/* Used to restart an op with deadlock avoidance. */
122 		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
123 		break;
124 	case -EFSBADCRC:
125 	case -EFSCORRUPTED:
126 		/* Note the badness but don't abort. */
127 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
128 		*error = 0;
129 		/* fall through */
130 	default:
131 		trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
132 				__return_address);
133 		break;
134 	}
135 	return false;
136 }
137 
138 /*
139  * Handling scrub corruption/optimization/warning checks.
140  *
141  * The *_set_{corrupt,preen,warning}() family of functions are used to
142  * record the presence of metadata that is incorrect (corrupt), could be
143  * optimized somehow (preen), or should be flagged for administrative
144  * review but is not incorrect (warn).
145  *
146  * ftrace can be used to record the precise metadata location and
147  * approximate code location of the failed check.
148  */
149 
150 /* Record a block which could be optimized. */
151 void
152 xfs_scrub_block_set_preen(
153 	struct xfs_scrub_context	*sc,
154 	struct xfs_buf			*bp)
155 {
156 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
157 	trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address);
158 }
159 
160 /*
161  * Record an inode which could be optimized.  The trace data will
162  * include the block given by bp if bp is given; otherwise it will use
163  * the block location of the inode record itself.
164  */
165 void
166 xfs_scrub_ino_set_preen(
167 	struct xfs_scrub_context	*sc,
168 	xfs_ino_t			ino,
169 	struct xfs_buf			*bp)
170 {
171 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
172 	trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
173 			__return_address);
174 }
175 
176 /* Record a corrupt block. */
177 void
178 xfs_scrub_block_set_corrupt(
179 	struct xfs_scrub_context	*sc,
180 	struct xfs_buf			*bp)
181 {
182 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
183 	trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
184 }
185 
186 /*
187  * Record a corrupt inode.  The trace data will include the block given
188  * by bp if bp is given; otherwise it will use the block location of the
189  * inode record itself.
190  */
191 void
192 xfs_scrub_ino_set_corrupt(
193 	struct xfs_scrub_context	*sc,
194 	xfs_ino_t			ino,
195 	struct xfs_buf			*bp)
196 {
197 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
198 	trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
199 }
200 
201 /* Record corruption in a block indexed by a file fork. */
202 void
203 xfs_scrub_fblock_set_corrupt(
204 	struct xfs_scrub_context	*sc,
205 	int				whichfork,
206 	xfs_fileoff_t			offset)
207 {
208 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
209 	trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
210 }
211 
212 /*
213  * Warn about inodes that need administrative review but is not
214  * incorrect.
215  */
216 void
217 xfs_scrub_ino_set_warning(
218 	struct xfs_scrub_context	*sc,
219 	xfs_ino_t			ino,
220 	struct xfs_buf			*bp)
221 {
222 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
223 	trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
224 			__return_address);
225 }
226 
227 /* Warn about a block indexed by a file fork that needs review. */
228 void
229 xfs_scrub_fblock_set_warning(
230 	struct xfs_scrub_context	*sc,
231 	int				whichfork,
232 	xfs_fileoff_t			offset)
233 {
234 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
235 	trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address);
236 }
237 
238 /* Signal an incomplete scrub. */
239 void
240 xfs_scrub_set_incomplete(
241 	struct xfs_scrub_context	*sc)
242 {
243 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
244 	trace_xfs_scrub_incomplete(sc, __return_address);
245 }
246 
247 /*
248  * AG scrubbing
249  *
250  * These helpers facilitate locking an allocation group's header
251  * buffers, setting up cursors for all btrees that are present, and
252  * cleaning everything up once we're through.
253  */
254 
255 /* Decide if we want to return an AG header read failure. */
256 static inline bool
257 want_ag_read_header_failure(
258 	struct xfs_scrub_context	*sc,
259 	unsigned int			type)
260 {
261 	/* Return all AG header read failures when scanning btrees. */
262 	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
263 	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
264 	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
265 		return true;
266 	/*
267 	 * If we're scanning a given type of AG header, we only want to
268 	 * see read failures from that specific header.  We'd like the
269 	 * other headers to cross-check them, but this isn't required.
270 	 */
271 	if (sc->sm->sm_type == type)
272 		return true;
273 	return false;
274 }
275 
276 /*
277  * Grab all the headers for an AG.
278  *
279  * The headers should be released by xfs_scrub_ag_free, but as a fail
280  * safe we attach all the buffers we grab to the scrub transaction so
281  * they'll all be freed when we cancel it.
282  */
283 int
284 xfs_scrub_ag_read_headers(
285 	struct xfs_scrub_context	*sc,
286 	xfs_agnumber_t			agno,
287 	struct xfs_buf			**agi,
288 	struct xfs_buf			**agf,
289 	struct xfs_buf			**agfl)
290 {
291 	struct xfs_mount		*mp = sc->mp;
292 	int				error;
293 
294 	error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
295 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
296 		goto out;
297 
298 	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
299 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
300 		goto out;
301 
302 	error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
303 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
304 		goto out;
305 
306 out:
307 	return error;
308 }
309 
310 /* Release all the AG btree cursors. */
311 void
312 xfs_scrub_ag_btcur_free(
313 	struct xfs_scrub_ag		*sa)
314 {
315 	if (sa->refc_cur)
316 		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
317 	if (sa->rmap_cur)
318 		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
319 	if (sa->fino_cur)
320 		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
321 	if (sa->ino_cur)
322 		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
323 	if (sa->cnt_cur)
324 		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
325 	if (sa->bno_cur)
326 		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
327 
328 	sa->refc_cur = NULL;
329 	sa->rmap_cur = NULL;
330 	sa->fino_cur = NULL;
331 	sa->ino_cur = NULL;
332 	sa->bno_cur = NULL;
333 	sa->cnt_cur = NULL;
334 }
335 
336 /* Initialize all the btree cursors for an AG. */
337 int
338 xfs_scrub_ag_btcur_init(
339 	struct xfs_scrub_context	*sc,
340 	struct xfs_scrub_ag		*sa)
341 {
342 	struct xfs_mount		*mp = sc->mp;
343 	xfs_agnumber_t			agno = sa->agno;
344 
345 	if (sa->agf_bp) {
346 		/* Set up a bnobt cursor for cross-referencing. */
347 		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
348 				agno, XFS_BTNUM_BNO);
349 		if (!sa->bno_cur)
350 			goto err;
351 
352 		/* Set up a cntbt cursor for cross-referencing. */
353 		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
354 				agno, XFS_BTNUM_CNT);
355 		if (!sa->cnt_cur)
356 			goto err;
357 	}
358 
359 	/* Set up a inobt cursor for cross-referencing. */
360 	if (sa->agi_bp) {
361 		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
362 					agno, XFS_BTNUM_INO);
363 		if (!sa->ino_cur)
364 			goto err;
365 	}
366 
367 	/* Set up a finobt cursor for cross-referencing. */
368 	if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
369 		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
370 				agno, XFS_BTNUM_FINO);
371 		if (!sa->fino_cur)
372 			goto err;
373 	}
374 
375 	/* Set up a rmapbt cursor for cross-referencing. */
376 	if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
377 		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
378 				agno);
379 		if (!sa->rmap_cur)
380 			goto err;
381 	}
382 
383 	/* Set up a refcountbt cursor for cross-referencing. */
384 	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
385 		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
386 				sa->agf_bp, agno, NULL);
387 		if (!sa->refc_cur)
388 			goto err;
389 	}
390 
391 	return 0;
392 err:
393 	return -ENOMEM;
394 }
395 
396 /* Release the AG header context and btree cursors. */
397 void
398 xfs_scrub_ag_free(
399 	struct xfs_scrub_context	*sc,
400 	struct xfs_scrub_ag		*sa)
401 {
402 	xfs_scrub_ag_btcur_free(sa);
403 	if (sa->agfl_bp) {
404 		xfs_trans_brelse(sc->tp, sa->agfl_bp);
405 		sa->agfl_bp = NULL;
406 	}
407 	if (sa->agf_bp) {
408 		xfs_trans_brelse(sc->tp, sa->agf_bp);
409 		sa->agf_bp = NULL;
410 	}
411 	if (sa->agi_bp) {
412 		xfs_trans_brelse(sc->tp, sa->agi_bp);
413 		sa->agi_bp = NULL;
414 	}
415 	sa->agno = NULLAGNUMBER;
416 }
417 
418 /*
419  * For scrub, grab the AGI and the AGF headers, in that order.  Locking
420  * order requires us to get the AGI before the AGF.  We use the
421  * transaction to avoid deadlocking on crosslinked metadata buffers;
422  * either the caller passes one in (bmap scrub) or we have to create a
423  * transaction ourselves.
424  */
425 int
426 xfs_scrub_ag_init(
427 	struct xfs_scrub_context	*sc,
428 	xfs_agnumber_t			agno,
429 	struct xfs_scrub_ag		*sa)
430 {
431 	int				error;
432 
433 	sa->agno = agno;
434 	error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp,
435 			&sa->agf_bp, &sa->agfl_bp);
436 	if (error)
437 		return error;
438 
439 	return xfs_scrub_ag_btcur_init(sc, sa);
440 }
441 
442 /* Per-scrubber setup functions */
443 
444 /* Set us up with a transaction and an empty context. */
445 int
446 xfs_scrub_setup_fs(
447 	struct xfs_scrub_context	*sc,
448 	struct xfs_inode		*ip)
449 {
450 	return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp);
451 }
452 
453 /* Set us up with AG headers and btree cursors. */
454 int
455 xfs_scrub_setup_ag_btree(
456 	struct xfs_scrub_context	*sc,
457 	struct xfs_inode		*ip,
458 	bool				force_log)
459 {
460 	struct xfs_mount		*mp = sc->mp;
461 	int				error;
462 
463 	/*
464 	 * If the caller asks us to checkpont the log, do so.  This
465 	 * expensive operation should be performed infrequently and only
466 	 * as a last resort.  Any caller that sets force_log should
467 	 * document why they need to do so.
468 	 */
469 	if (force_log) {
470 		error = xfs_scrub_checkpoint_log(mp);
471 		if (error)
472 			return error;
473 	}
474 
475 	error = xfs_scrub_setup_ag_header(sc, ip);
476 	if (error)
477 		return error;
478 
479 	return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa);
480 }
481 
482 /* Push everything out of the log onto disk. */
483 int
484 xfs_scrub_checkpoint_log(
485 	struct xfs_mount	*mp)
486 {
487 	int			error;
488 
489 	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
490 	if (error)
491 		return error;
492 	xfs_ail_push_all_sync(mp->m_ail);
493 	return 0;
494 }
495 
496 /*
497  * Given an inode and the scrub control structure, grab either the
498  * inode referenced in the control structure or the inode passed in.
499  * The inode is not locked.
500  */
501 int
502 xfs_scrub_get_inode(
503 	struct xfs_scrub_context	*sc,
504 	struct xfs_inode		*ip_in)
505 {
506 	struct xfs_mount		*mp = sc->mp;
507 	struct xfs_inode		*ip = NULL;
508 	int				error;
509 
510 	/*
511 	 * If userspace passed us an AG number or a generation number
512 	 * without an inode number, they haven't got a clue so bail out
513 	 * immediately.
514 	 */
515 	if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
516 		return -EINVAL;
517 
518 	/* We want to scan the inode we already had opened. */
519 	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
520 		sc->ip = ip_in;
521 		return 0;
522 	}
523 
524 	/* Look up the inode, see if the generation number matches. */
525 	if (xfs_internal_inum(mp, sc->sm->sm_ino))
526 		return -ENOENT;
527 	error = xfs_iget(mp, NULL, sc->sm->sm_ino,
528 			XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
529 	if (error == -ENOENT || error == -EINVAL) {
530 		/* inode doesn't exist... */
531 		return -ENOENT;
532 	} else if (error) {
533 		trace_xfs_scrub_op_error(sc,
534 				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
535 				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
536 				error, __return_address);
537 		return error;
538 	}
539 	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
540 		iput(VFS_I(ip));
541 		return -ENOENT;
542 	}
543 
544 	sc->ip = ip;
545 	return 0;
546 }
547 
548 /* Set us up to scrub a file's contents. */
549 int
550 xfs_scrub_setup_inode_contents(
551 	struct xfs_scrub_context	*sc,
552 	struct xfs_inode		*ip,
553 	unsigned int			resblks)
554 {
555 	struct xfs_mount		*mp = sc->mp;
556 	int				error;
557 
558 	error = xfs_scrub_get_inode(sc, ip);
559 	if (error)
560 		return error;
561 
562 	/* Got the inode, lock it and we're ready to go. */
563 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
564 	xfs_ilock(sc->ip, sc->ilock_flags);
565 	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
566 	if (error)
567 		goto out;
568 	sc->ilock_flags |= XFS_ILOCK_EXCL;
569 	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
570 
571 out:
572 	/* scrub teardown will unlock and release the inode for us */
573 	return error;
574 }
575