xref: /openbmc/linux/fs/xfs/scrub/fscounters.c (revision 46eeaa11bdd1bc9e077bdf741d32ca7235d263c6)
1ce85a1e0SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
275efa57dSDarrick J. Wong /*
3ecc73f8aSDarrick J. Wong  * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
4739a2fe0SDarrick J. Wong  * Author: Darrick J. Wong <djwong@kernel.org>
575efa57dSDarrick J. Wong  */
675efa57dSDarrick J. Wong #include "xfs.h"
775efa57dSDarrick J. Wong #include "xfs_fs.h"
875efa57dSDarrick J. Wong #include "xfs_shared.h"
975efa57dSDarrick J. Wong #include "xfs_format.h"
1075efa57dSDarrick J. Wong #include "xfs_trans_resv.h"
11ce85a1e0SDarrick J. Wong #include "xfs_log_format.h"
12ce85a1e0SDarrick J. Wong #include "xfs_trans.h"
1375efa57dSDarrick J. Wong #include "xfs_mount.h"
1475efa57dSDarrick J. Wong #include "xfs_alloc.h"
1575efa57dSDarrick J. Wong #include "xfs_ialloc.h"
1675efa57dSDarrick J. Wong #include "xfs_health.h"
17e147a756SDarrick J. Wong #include "xfs_btree.h"
189bbafc71SDave Chinner #include "xfs_ag.h"
19*1a6d63f2SDarrick J. Wong #include "xfs_rtbitmap.h"
20e74331d6SDarrick J. Wong #include "xfs_inode.h"
21ce85a1e0SDarrick J. Wong #include "xfs_icache.h"
2275efa57dSDarrick J. Wong #include "scrub/scrub.h"
2375efa57dSDarrick J. Wong #include "scrub/common.h"
2475efa57dSDarrick J. Wong #include "scrub/trace.h"
2575efa57dSDarrick J. Wong 
2675efa57dSDarrick J. Wong /*
2775efa57dSDarrick J. Wong  * FS Summary Counters
2875efa57dSDarrick J. Wong  * ===================
2975efa57dSDarrick J. Wong  *
3075efa57dSDarrick J. Wong  * The basics of filesystem summary counter checking are that we iterate the
3175efa57dSDarrick J. Wong  * AGs counting the number of free blocks, free space btree blocks, per-AG
3275efa57dSDarrick J. Wong  * reservations, inodes, delayed allocation reservations, and free inodes.
3375efa57dSDarrick J. Wong  * Then we compare what we computed against the in-core counters.
3475efa57dSDarrick J. Wong  *
3575efa57dSDarrick J. Wong  * However, the reality is that summary counters are a tricky beast to check.
3675efa57dSDarrick J. Wong  * While we /could/ freeze the filesystem and scramble around the AGs counting
3775efa57dSDarrick J. Wong  * the free blocks, in practice we prefer not do that for a scan because
3875efa57dSDarrick J. Wong  * freezing is costly.  To get around this, we added a per-cpu counter of the
3975efa57dSDarrick J. Wong  * delalloc reservations so that we can rotor around the AGs relatively
4075efa57dSDarrick J. Wong  * quickly, and we allow the counts to be slightly off because we're not taking
4175efa57dSDarrick J. Wong  * any locks while we do this.
4275efa57dSDarrick J. Wong  *
4375efa57dSDarrick J. Wong  * So the first thing we do is warm up the buffer cache in the setup routine by
4475efa57dSDarrick J. Wong  * walking all the AGs to make sure the incore per-AG structure has been
4575efa57dSDarrick J. Wong  * initialized.  The expected value calculation then iterates the incore per-AG
4675efa57dSDarrick J. Wong  * structures as quickly as it can.  We snapshot the percpu counters before and
4775efa57dSDarrick J. Wong  * after this operation and use the difference in counter values to guess at
4875efa57dSDarrick J. Wong  * our tolerance for mismatch between expected and actual counter values.
4975efa57dSDarrick J. Wong  */
5075efa57dSDarrick J. Wong 
51e74331d6SDarrick J. Wong struct xchk_fscounters {
52e74331d6SDarrick J. Wong 	struct xfs_scrub	*sc;
53e74331d6SDarrick J. Wong 	uint64_t		icount;
54e74331d6SDarrick J. Wong 	uint64_t		ifree;
55e74331d6SDarrick J. Wong 	uint64_t		fdblocks;
56e74331d6SDarrick J. Wong 	uint64_t		frextents;
57e74331d6SDarrick J. Wong 	unsigned long long	icount_min;
58e74331d6SDarrick J. Wong 	unsigned long long	icount_max;
59ce85a1e0SDarrick J. Wong 	bool			frozen;
60e74331d6SDarrick J. Wong };
61e74331d6SDarrick J. Wong 
6275efa57dSDarrick J. Wong /*
6375efa57dSDarrick J. Wong  * Since the expected value computation is lockless but only browses incore
6475efa57dSDarrick J. Wong  * values, the percpu counters should be fairly close to each other.  However,
6575efa57dSDarrick J. Wong  * we'll allow ourselves to be off by at least this (arbitrary) amount.
6675efa57dSDarrick J. Wong  */
6775efa57dSDarrick J. Wong #define XCHK_FSCOUNT_MIN_VARIANCE	(512)
6875efa57dSDarrick J. Wong 
6975efa57dSDarrick J. Wong /*
7075efa57dSDarrick J. Wong  * Make sure the per-AG structure has been initialized from the on-disk header
7175efa57dSDarrick J. Wong  * contents and trust that the incore counters match the ondisk counters.  (The
7275efa57dSDarrick J. Wong  * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the
7375efa57dSDarrick J. Wong  * summary counters after checking all AG headers).  Do this from the setup
7475efa57dSDarrick J. Wong  * function so that the inner AG aggregation loop runs as quickly as possible.
7575efa57dSDarrick J. Wong  *
7675efa57dSDarrick J. Wong  * This function runs during the setup phase /before/ we start checking any
7775efa57dSDarrick J. Wong  * metadata.
7875efa57dSDarrick J. Wong  */
7975efa57dSDarrick J. Wong STATIC int
xchk_fscount_warmup(struct xfs_scrub * sc)8075efa57dSDarrick J. Wong xchk_fscount_warmup(
8175efa57dSDarrick J. Wong 	struct xfs_scrub	*sc)
8275efa57dSDarrick J. Wong {
8375efa57dSDarrick J. Wong 	struct xfs_mount	*mp = sc->mp;
8475efa57dSDarrick J. Wong 	struct xfs_buf		*agi_bp = NULL;
8575efa57dSDarrick J. Wong 	struct xfs_buf		*agf_bp = NULL;
8675efa57dSDarrick J. Wong 	struct xfs_perag	*pag = NULL;
8775efa57dSDarrick J. Wong 	xfs_agnumber_t		agno;
8875efa57dSDarrick J. Wong 	int			error = 0;
8975efa57dSDarrick J. Wong 
90f250eedcSDave Chinner 	for_each_perag(mp, agno, pag) {
91f250eedcSDave Chinner 		if (xchk_should_terminate(sc, &error))
92f250eedcSDave Chinner 			break;
937ac2ff8bSDave Chinner 		if (xfs_perag_initialised_agi(pag) &&
947ac2ff8bSDave Chinner 		    xfs_perag_initialised_agf(pag))
95f250eedcSDave Chinner 			continue;
9675efa57dSDarrick J. Wong 
9775efa57dSDarrick J. Wong 		/* Lock both AG headers. */
9899b13c7fSDave Chinner 		error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp);
9975efa57dSDarrick J. Wong 		if (error)
10075efa57dSDarrick J. Wong 			break;
10108d3e84fSDave Chinner 		error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp);
10275efa57dSDarrick J. Wong 		if (error)
10375efa57dSDarrick J. Wong 			break;
10475efa57dSDarrick J. Wong 
10575efa57dSDarrick J. Wong 		/*
10675efa57dSDarrick J. Wong 		 * These are supposed to be initialized by the header read
10775efa57dSDarrick J. Wong 		 * function.
10875efa57dSDarrick J. Wong 		 */
1097ac2ff8bSDave Chinner 		if (!xfs_perag_initialised_agi(pag) ||
1107ac2ff8bSDave Chinner 		    !xfs_perag_initialised_agf(pag)) {
11175efa57dSDarrick J. Wong 			error = -EFSCORRUPTED;
11275efa57dSDarrick J. Wong 			break;
113f250eedcSDave Chinner 		}
11475efa57dSDarrick J. Wong 
11575efa57dSDarrick J. Wong 		xfs_buf_relse(agf_bp);
11675efa57dSDarrick J. Wong 		agf_bp = NULL;
11775efa57dSDarrick J. Wong 		xfs_buf_relse(agi_bp);
11875efa57dSDarrick J. Wong 		agi_bp = NULL;
11975efa57dSDarrick J. Wong 	}
12075efa57dSDarrick J. Wong 
12175efa57dSDarrick J. Wong 	if (agf_bp)
12275efa57dSDarrick J. Wong 		xfs_buf_relse(agf_bp);
12375efa57dSDarrick J. Wong 	if (agi_bp)
12475efa57dSDarrick J. Wong 		xfs_buf_relse(agi_bp);
12575efa57dSDarrick J. Wong 	if (pag)
126c4d5660aSDave Chinner 		xfs_perag_rele(pag);
12775efa57dSDarrick J. Wong 	return error;
12875efa57dSDarrick J. Wong }
12975efa57dSDarrick J. Wong 
130ce85a1e0SDarrick J. Wong static inline int
xchk_fsfreeze(struct xfs_scrub * sc)131ce85a1e0SDarrick J. Wong xchk_fsfreeze(
132ce85a1e0SDarrick J. Wong 	struct xfs_scrub	*sc)
133ce85a1e0SDarrick J. Wong {
134ce85a1e0SDarrick J. Wong 	int			error;
135ce85a1e0SDarrick J. Wong 
136ce85a1e0SDarrick J. Wong 	error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
137ce85a1e0SDarrick J. Wong 	trace_xchk_fsfreeze(sc, error);
138ce85a1e0SDarrick J. Wong 	return error;
139ce85a1e0SDarrick J. Wong }
140ce85a1e0SDarrick J. Wong 
141ce85a1e0SDarrick J. Wong static inline int
xchk_fsthaw(struct xfs_scrub * sc)142ce85a1e0SDarrick J. Wong xchk_fsthaw(
143ce85a1e0SDarrick J. Wong 	struct xfs_scrub	*sc)
144ce85a1e0SDarrick J. Wong {
145ce85a1e0SDarrick J. Wong 	int			error;
146ce85a1e0SDarrick J. Wong 
147ce85a1e0SDarrick J. Wong 	/* This should always succeed, we have a kernel freeze */
148ce85a1e0SDarrick J. Wong 	error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
149ce85a1e0SDarrick J. Wong 	trace_xchk_fsthaw(sc, error);
150ce85a1e0SDarrick J. Wong 	return error;
151ce85a1e0SDarrick J. Wong }
152ce85a1e0SDarrick J. Wong 
153ce85a1e0SDarrick J. Wong /*
154ce85a1e0SDarrick J. Wong  * We couldn't stabilize the filesystem long enough to sample all the variables
155ce85a1e0SDarrick J. Wong  * that comprise the summary counters and compare them to the percpu counters.
156ce85a1e0SDarrick J. Wong  * We need to disable all writer threads, which means taking the first two
157ce85a1e0SDarrick J. Wong  * freeze levels to put userspace to sleep, and the third freeze level to
158ce85a1e0SDarrick J. Wong  * prevent background threads from starting new transactions.  Take one level
159ce85a1e0SDarrick J. Wong  * more to prevent other callers from unfreezing the filesystem while we run.
160ce85a1e0SDarrick J. Wong  */
161ce85a1e0SDarrick J. Wong STATIC int
xchk_fscounters_freeze(struct xfs_scrub * sc)162ce85a1e0SDarrick J. Wong xchk_fscounters_freeze(
163ce85a1e0SDarrick J. Wong 	struct xfs_scrub	*sc)
164ce85a1e0SDarrick J. Wong {
165ce85a1e0SDarrick J. Wong 	struct xchk_fscounters	*fsc = sc->buf;
166ce85a1e0SDarrick J. Wong 	int			error = 0;
167ce85a1e0SDarrick J. Wong 
168ce85a1e0SDarrick J. Wong 	if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
169ce85a1e0SDarrick J. Wong 		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
170ce85a1e0SDarrick J. Wong 		mnt_drop_write_file(sc->file);
171ce85a1e0SDarrick J. Wong 	}
172ce85a1e0SDarrick J. Wong 
173ce85a1e0SDarrick J. Wong 	/* Try to grab a kernel freeze. */
174ce85a1e0SDarrick J. Wong 	while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
175ce85a1e0SDarrick J. Wong 		if (xchk_should_terminate(sc, &error))
176ce85a1e0SDarrick J. Wong 			return error;
177ce85a1e0SDarrick J. Wong 
178ce85a1e0SDarrick J. Wong 		delay(HZ / 10);
179ce85a1e0SDarrick J. Wong 	}
180ce85a1e0SDarrick J. Wong 	if (error)
181ce85a1e0SDarrick J. Wong 		return error;
182ce85a1e0SDarrick J. Wong 
183ce85a1e0SDarrick J. Wong 	fsc->frozen = true;
184ce85a1e0SDarrick J. Wong 	return 0;
185ce85a1e0SDarrick J. Wong }
186ce85a1e0SDarrick J. Wong 
187ce85a1e0SDarrick J. Wong /* Thaw the filesystem after checking or repairing fscounters. */
188ce85a1e0SDarrick J. Wong STATIC void
xchk_fscounters_cleanup(void * buf)189ce85a1e0SDarrick J. Wong xchk_fscounters_cleanup(
190ce85a1e0SDarrick J. Wong 	void			*buf)
191ce85a1e0SDarrick J. Wong {
192ce85a1e0SDarrick J. Wong 	struct xchk_fscounters	*fsc = buf;
193ce85a1e0SDarrick J. Wong 	struct xfs_scrub	*sc = fsc->sc;
194ce85a1e0SDarrick J. Wong 	int			error;
195ce85a1e0SDarrick J. Wong 
196ce85a1e0SDarrick J. Wong 	if (!fsc->frozen)
197ce85a1e0SDarrick J. Wong 		return;
198ce85a1e0SDarrick J. Wong 
199ce85a1e0SDarrick J. Wong 	error = xchk_fsthaw(sc);
200ce85a1e0SDarrick J. Wong 	if (error)
201ce85a1e0SDarrick J. Wong 		xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
202ce85a1e0SDarrick J. Wong 	else
203ce85a1e0SDarrick J. Wong 		fsc->frozen = false;
204ce85a1e0SDarrick J. Wong }
205ce85a1e0SDarrick J. Wong 
20675efa57dSDarrick J. Wong int
xchk_setup_fscounters(struct xfs_scrub * sc)20775efa57dSDarrick J. Wong xchk_setup_fscounters(
208026f57ebSDarrick J. Wong 	struct xfs_scrub	*sc)
20975efa57dSDarrick J. Wong {
21075efa57dSDarrick J. Wong 	struct xchk_fscounters	*fsc;
21175efa57dSDarrick J. Wong 	int			error;
21275efa57dSDarrick J. Wong 
213466c525dSDarrick J. Wong 	/*
214466c525dSDarrick J. Wong 	 * If the AGF doesn't track btreeblks, we have to lock the AGF to count
215466c525dSDarrick J. Wong 	 * btree block usage by walking the actual btrees.
216466c525dSDarrick J. Wong 	 */
217466c525dSDarrick J. Wong 	if (!xfs_has_lazysbcount(sc->mp))
218466c525dSDarrick J. Wong 		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
219466c525dSDarrick J. Wong 
220306195f3SDarrick J. Wong 	sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
22175efa57dSDarrick J. Wong 	if (!sc->buf)
22275efa57dSDarrick J. Wong 		return -ENOMEM;
223ce85a1e0SDarrick J. Wong 	sc->buf_cleanup = xchk_fscounters_cleanup;
22475efa57dSDarrick J. Wong 	fsc = sc->buf;
225e74331d6SDarrick J. Wong 	fsc->sc = sc;
22675efa57dSDarrick J. Wong 
22775efa57dSDarrick J. Wong 	xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
22875efa57dSDarrick J. Wong 
22975efa57dSDarrick J. Wong 	/* We must get the incore counters set up before we can proceed. */
23075efa57dSDarrick J. Wong 	error = xchk_fscount_warmup(sc);
23175efa57dSDarrick J. Wong 	if (error)
23275efa57dSDarrick J. Wong 		return error;
23375efa57dSDarrick J. Wong 
234ce85a1e0SDarrick J. Wong 	/*
235ce85a1e0SDarrick J. Wong 	 * Pause all writer activity in the filesystem while we're scrubbing to
236ce85a1e0SDarrick J. Wong 	 * reduce the likelihood of background perturbations to the counters
237ce85a1e0SDarrick J. Wong 	 * throwing off our calculations.
238ce85a1e0SDarrick J. Wong 	 */
239ce85a1e0SDarrick J. Wong 	if (sc->flags & XCHK_TRY_HARDER) {
240ce85a1e0SDarrick J. Wong 		error = xchk_fscounters_freeze(sc);
241ce85a1e0SDarrick J. Wong 		if (error)
242ce85a1e0SDarrick J. Wong 			return error;
243ce85a1e0SDarrick J. Wong 	}
244ce85a1e0SDarrick J. Wong 
245ce85a1e0SDarrick J. Wong 	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
24675efa57dSDarrick J. Wong }
24775efa57dSDarrick J. Wong 
24811f97e68SDarrick J. Wong /*
24911f97e68SDarrick J. Wong  * Part 1: Collecting filesystem summary counts.  For each AG, we add its
25011f97e68SDarrick J. Wong  * summary counts (total inodes, free inodes, free data blocks) to an incore
25111f97e68SDarrick J. Wong  * copy of the overall filesystem summary counts.
25211f97e68SDarrick J. Wong  *
25311f97e68SDarrick J. Wong  * To avoid false corruption reports in part 2, any failure in this part must
25411f97e68SDarrick J. Wong  * set the INCOMPLETE flag even when a negative errno is returned.  This care
25511f97e68SDarrick J. Wong  * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
25611f97e68SDarrick J. Wong  * ECANCELED) that are absorbed into a scrub state flag update by
25711f97e68SDarrick J. Wong  * xchk_*_process_error.
25811f97e68SDarrick J. Wong  */
25911f97e68SDarrick J. Wong 
260e147a756SDarrick J. Wong /* Count free space btree blocks manually for pre-lazysbcount filesystems. */
261e147a756SDarrick J. Wong static int
xchk_fscount_btreeblks(struct xfs_scrub * sc,struct xchk_fscounters * fsc,xfs_agnumber_t agno)262e147a756SDarrick J. Wong xchk_fscount_btreeblks(
263e147a756SDarrick J. Wong 	struct xfs_scrub	*sc,
264e147a756SDarrick J. Wong 	struct xchk_fscounters	*fsc,
265e147a756SDarrick J. Wong 	xfs_agnumber_t		agno)
266e147a756SDarrick J. Wong {
267e147a756SDarrick J. Wong 	xfs_extlen_t		blocks;
268e147a756SDarrick J. Wong 	int			error;
269e147a756SDarrick J. Wong 
27048c6615cSDarrick J. Wong 	error = xchk_ag_init_existing(sc, agno, &sc->sa);
271e147a756SDarrick J. Wong 	if (error)
27261e0d0ccSDarrick J. Wong 		goto out_free;
273e147a756SDarrick J. Wong 
274e147a756SDarrick J. Wong 	error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
275e147a756SDarrick J. Wong 	if (error)
276e147a756SDarrick J. Wong 		goto out_free;
277e147a756SDarrick J. Wong 	fsc->fdblocks += blocks - 1;
278e147a756SDarrick J. Wong 
279e147a756SDarrick J. Wong 	error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
280e147a756SDarrick J. Wong 	if (error)
281e147a756SDarrick J. Wong 		goto out_free;
282e147a756SDarrick J. Wong 	fsc->fdblocks += blocks - 1;
283e147a756SDarrick J. Wong 
284e147a756SDarrick J. Wong out_free:
285e147a756SDarrick J. Wong 	xchk_ag_free(sc, &sc->sa);
286e147a756SDarrick J. Wong 	return error;
287e147a756SDarrick J. Wong }
288e147a756SDarrick J. Wong 
28975efa57dSDarrick J. Wong /*
29075efa57dSDarrick J. Wong  * Calculate what the global in-core counters ought to be from the incore
29175efa57dSDarrick J. Wong  * per-AG structure.  Callers can compare this to the actual in-core counters
29275efa57dSDarrick J. Wong  * to estimate by how much both in-core and on-disk counters need to be
29375efa57dSDarrick J. Wong  * adjusted.
29475efa57dSDarrick J. Wong  */
29575efa57dSDarrick J. Wong STATIC int
xchk_fscount_aggregate_agcounts(struct xfs_scrub * sc,struct xchk_fscounters * fsc)29675efa57dSDarrick J. Wong xchk_fscount_aggregate_agcounts(
29775efa57dSDarrick J. Wong 	struct xfs_scrub	*sc,
29875efa57dSDarrick J. Wong 	struct xchk_fscounters	*fsc)
29975efa57dSDarrick J. Wong {
30075efa57dSDarrick J. Wong 	struct xfs_mount	*mp = sc->mp;
30175efa57dSDarrick J. Wong 	struct xfs_perag	*pag;
30275efa57dSDarrick J. Wong 	uint64_t		delayed;
30375efa57dSDarrick J. Wong 	xfs_agnumber_t		agno;
30475efa57dSDarrick J. Wong 	int			tries = 8;
3058ef34723SDarrick J. Wong 	int			error = 0;
30675efa57dSDarrick J. Wong 
30775efa57dSDarrick J. Wong retry:
30875efa57dSDarrick J. Wong 	fsc->icount = 0;
30975efa57dSDarrick J. Wong 	fsc->ifree = 0;
31075efa57dSDarrick J. Wong 	fsc->fdblocks = 0;
31175efa57dSDarrick J. Wong 
312f250eedcSDave Chinner 	for_each_perag(mp, agno, pag) {
313f250eedcSDave Chinner 		if (xchk_should_terminate(sc, &error))
314f250eedcSDave Chinner 			break;
31575efa57dSDarrick J. Wong 
31675efa57dSDarrick J. Wong 		/* This somehow got unset since the warmup? */
3177ac2ff8bSDave Chinner 		if (!xfs_perag_initialised_agi(pag) ||
3187ac2ff8bSDave Chinner 		    !xfs_perag_initialised_agf(pag)) {
319f250eedcSDave Chinner 			error = -EFSCORRUPTED;
320f250eedcSDave Chinner 			break;
32175efa57dSDarrick J. Wong 		}
32275efa57dSDarrick J. Wong 
32375efa57dSDarrick J. Wong 		/* Count all the inodes */
32475efa57dSDarrick J. Wong 		fsc->icount += pag->pagi_count;
32575efa57dSDarrick J. Wong 		fsc->ifree += pag->pagi_freecount;
32675efa57dSDarrick J. Wong 
32775efa57dSDarrick J. Wong 		/* Add up the free/freelist/bnobt/cntbt blocks */
32875efa57dSDarrick J. Wong 		fsc->fdblocks += pag->pagf_freeblks;
32975efa57dSDarrick J. Wong 		fsc->fdblocks += pag->pagf_flcount;
330ebd9027dSDave Chinner 		if (xfs_has_lazysbcount(sc->mp)) {
33175efa57dSDarrick J. Wong 			fsc->fdblocks += pag->pagf_btreeblks;
332e147a756SDarrick J. Wong 		} else {
333e147a756SDarrick J. Wong 			error = xchk_fscount_btreeblks(sc, fsc, agno);
334f250eedcSDave Chinner 			if (error)
335e147a756SDarrick J. Wong 				break;
336e147a756SDarrick J. Wong 		}
33775efa57dSDarrick J. Wong 
33875efa57dSDarrick J. Wong 		/*
33975efa57dSDarrick J. Wong 		 * Per-AG reservations are taken out of the incore counters,
34075efa57dSDarrick J. Wong 		 * so they must be left out of the free blocks computation.
34175efa57dSDarrick J. Wong 		 */
34275efa57dSDarrick J. Wong 		fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
34375efa57dSDarrick J. Wong 		fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
34475efa57dSDarrick J. Wong 
34575efa57dSDarrick J. Wong 	}
346f250eedcSDave Chinner 	if (pag)
347c4d5660aSDave Chinner 		xfs_perag_rele(pag);
34811f97e68SDarrick J. Wong 	if (error) {
34911f97e68SDarrick J. Wong 		xchk_set_incomplete(sc);
3508ef34723SDarrick J. Wong 		return error;
35111f97e68SDarrick J. Wong 	}
3528ef34723SDarrick J. Wong 
35375efa57dSDarrick J. Wong 	/*
35475efa57dSDarrick J. Wong 	 * The global incore space reservation is taken from the incore
35575efa57dSDarrick J. Wong 	 * counters, so leave that out of the computation.
35675efa57dSDarrick J. Wong 	 */
35775efa57dSDarrick J. Wong 	fsc->fdblocks -= mp->m_resblks_avail;
35875efa57dSDarrick J. Wong 
35975efa57dSDarrick J. Wong 	/*
36075efa57dSDarrick J. Wong 	 * Delayed allocation reservations are taken out of the incore counters
36175efa57dSDarrick J. Wong 	 * but not recorded on disk, so leave them and their indlen blocks out
36275efa57dSDarrick J. Wong 	 * of the computation.
36375efa57dSDarrick J. Wong 	 */
36475efa57dSDarrick J. Wong 	delayed = percpu_counter_sum(&mp->m_delalloc_blks);
36575efa57dSDarrick J. Wong 	fsc->fdblocks -= delayed;
36675efa57dSDarrick J. Wong 
36775efa57dSDarrick J. Wong 	trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
36875efa57dSDarrick J. Wong 			delayed);
36975efa57dSDarrick J. Wong 
37075efa57dSDarrick J. Wong 
37175efa57dSDarrick J. Wong 	/* Bail out if the values we compute are totally nonsense. */
37275efa57dSDarrick J. Wong 	if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max ||
37375efa57dSDarrick J. Wong 	    fsc->fdblocks > mp->m_sb.sb_dblocks ||
37475efa57dSDarrick J. Wong 	    fsc->ifree > fsc->icount_max)
37575efa57dSDarrick J. Wong 		return -EFSCORRUPTED;
37675efa57dSDarrick J. Wong 
37775efa57dSDarrick J. Wong 	/*
37875efa57dSDarrick J. Wong 	 * If ifree > icount then we probably had some perturbation in the
37975efa57dSDarrick J. Wong 	 * counters while we were calculating things.  We'll try a few times
38075efa57dSDarrick J. Wong 	 * to maintain ifree <= icount before giving up.
38175efa57dSDarrick J. Wong 	 */
38275efa57dSDarrick J. Wong 	if (fsc->ifree > fsc->icount) {
38375efa57dSDarrick J. Wong 		if (tries--)
38475efa57dSDarrick J. Wong 			goto retry;
385ce85a1e0SDarrick J. Wong 		return -EDEADLOCK;
38675efa57dSDarrick J. Wong 	}
38775efa57dSDarrick J. Wong 
38875efa57dSDarrick J. Wong 	return 0;
38975efa57dSDarrick J. Wong }
39075efa57dSDarrick J. Wong 
391e74331d6SDarrick J. Wong #ifdef CONFIG_XFS_RT
392e74331d6SDarrick J. Wong STATIC int
xchk_fscount_add_frextent(struct xfs_mount * mp,struct xfs_trans * tp,const struct xfs_rtalloc_rec * rec,void * priv)393e74331d6SDarrick J. Wong xchk_fscount_add_frextent(
394e74331d6SDarrick J. Wong 	struct xfs_mount		*mp,
395e74331d6SDarrick J. Wong 	struct xfs_trans		*tp,
396e74331d6SDarrick J. Wong 	const struct xfs_rtalloc_rec	*rec,
397e74331d6SDarrick J. Wong 	void				*priv)
398e74331d6SDarrick J. Wong {
399e74331d6SDarrick J. Wong 	struct xchk_fscounters		*fsc = priv;
400e74331d6SDarrick J. Wong 	int				error = 0;
401e74331d6SDarrick J. Wong 
402e74331d6SDarrick J. Wong 	fsc->frextents += rec->ar_extcount;
403e74331d6SDarrick J. Wong 
404e74331d6SDarrick J. Wong 	xchk_should_terminate(fsc->sc, &error);
405e74331d6SDarrick J. Wong 	return error;
406e74331d6SDarrick J. Wong }
407e74331d6SDarrick J. Wong 
408e74331d6SDarrick J. Wong /* Calculate the number of free realtime extents from the realtime bitmap. */
409e74331d6SDarrick J. Wong STATIC int
xchk_fscount_count_frextents(struct xfs_scrub * sc,struct xchk_fscounters * fsc)410e74331d6SDarrick J. Wong xchk_fscount_count_frextents(
411e74331d6SDarrick J. Wong 	struct xfs_scrub	*sc,
412e74331d6SDarrick J. Wong 	struct xchk_fscounters	*fsc)
413e74331d6SDarrick J. Wong {
414e74331d6SDarrick J. Wong 	struct xfs_mount	*mp = sc->mp;
415e74331d6SDarrick J. Wong 	int			error;
416e74331d6SDarrick J. Wong 
417e74331d6SDarrick J. Wong 	fsc->frextents = 0;
418e74331d6SDarrick J. Wong 	if (!xfs_has_realtime(mp))
419e74331d6SDarrick J. Wong 		return 0;
420e74331d6SDarrick J. Wong 
421e74331d6SDarrick J. Wong 	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
422e74331d6SDarrick J. Wong 	error = xfs_rtalloc_query_all(sc->mp, sc->tp,
423e74331d6SDarrick J. Wong 			xchk_fscount_add_frextent, fsc);
424e74331d6SDarrick J. Wong 	if (error) {
425e74331d6SDarrick J. Wong 		xchk_set_incomplete(sc);
426e74331d6SDarrick J. Wong 		goto out_unlock;
427e74331d6SDarrick J. Wong 	}
428e74331d6SDarrick J. Wong 
429e74331d6SDarrick J. Wong out_unlock:
430e74331d6SDarrick J. Wong 	xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
431e74331d6SDarrick J. Wong 	return error;
432e74331d6SDarrick J. Wong }
433e74331d6SDarrick J. Wong #else
434e74331d6SDarrick J. Wong STATIC int
xchk_fscount_count_frextents(struct xfs_scrub * sc,struct xchk_fscounters * fsc)435e74331d6SDarrick J. Wong xchk_fscount_count_frextents(
436e74331d6SDarrick J. Wong 	struct xfs_scrub	*sc,
437e74331d6SDarrick J. Wong 	struct xchk_fscounters	*fsc)
438e74331d6SDarrick J. Wong {
439e74331d6SDarrick J. Wong 	fsc->frextents = 0;
440e74331d6SDarrick J. Wong 	return 0;
441e74331d6SDarrick J. Wong }
442e74331d6SDarrick J. Wong #endif /* CONFIG_XFS_RT */
443e74331d6SDarrick J. Wong 
44475efa57dSDarrick J. Wong /*
44511f97e68SDarrick J. Wong  * Part 2: Comparing filesystem summary counters.  All we have to do here is
44611f97e68SDarrick J. Wong  * sum the percpu counters and compare them to what we've observed.
44711f97e68SDarrick J. Wong  */
44811f97e68SDarrick J. Wong 
44911f97e68SDarrick J. Wong /*
45075efa57dSDarrick J. Wong  * Is the @counter reasonably close to the @expected value?
45175efa57dSDarrick J. Wong  *
45275efa57dSDarrick J. Wong  * We neither locked nor froze anything in the filesystem while aggregating the
45375efa57dSDarrick J. Wong  * per-AG data to compute the @expected value, which means that the counter
45475efa57dSDarrick J. Wong  * could have changed.  We know the @old_value of the summation of the counter
45575efa57dSDarrick J. Wong  * before the aggregation, and we re-sum the counter now.  If the expected
45675efa57dSDarrick J. Wong  * value falls between the two summations, we're ok.
45775efa57dSDarrick J. Wong  *
45875efa57dSDarrick J. Wong  * Otherwise, we /might/ have a problem.  If the change in the summations is
45975efa57dSDarrick J. Wong  * more than we want to tolerate, the filesystem is probably busy and we should
46075efa57dSDarrick J. Wong  * just send back INCOMPLETE and see if userspace will try again.
461ce85a1e0SDarrick J. Wong  *
462ce85a1e0SDarrick J. Wong  * If we're repairing then we require an exact match.
46375efa57dSDarrick J. Wong  */
46475efa57dSDarrick J. Wong static inline bool
xchk_fscount_within_range(struct xfs_scrub * sc,const int64_t old_value,struct percpu_counter * counter,uint64_t expected)46575efa57dSDarrick J. Wong xchk_fscount_within_range(
46675efa57dSDarrick J. Wong 	struct xfs_scrub	*sc,
46775efa57dSDarrick J. Wong 	const int64_t		old_value,
46875efa57dSDarrick J. Wong 	struct percpu_counter	*counter,
46975efa57dSDarrick J. Wong 	uint64_t		expected)
47075efa57dSDarrick J. Wong {
47175efa57dSDarrick J. Wong 	int64_t			min_value, max_value;
47275efa57dSDarrick J. Wong 	int64_t			curr_value = percpu_counter_sum(counter);
47375efa57dSDarrick J. Wong 
47475efa57dSDarrick J. Wong 	trace_xchk_fscounters_within_range(sc->mp, expected, curr_value,
47575efa57dSDarrick J. Wong 			old_value);
47675efa57dSDarrick J. Wong 
47775efa57dSDarrick J. Wong 	/* Negative values are always wrong. */
47875efa57dSDarrick J. Wong 	if (curr_value < 0)
47975efa57dSDarrick J. Wong 		return false;
48075efa57dSDarrick J. Wong 
48175efa57dSDarrick J. Wong 	/* Exact matches are always ok. */
48275efa57dSDarrick J. Wong 	if (curr_value == expected)
48375efa57dSDarrick J. Wong 		return true;
48475efa57dSDarrick J. Wong 
48575efa57dSDarrick J. Wong 	min_value = min(old_value, curr_value);
48675efa57dSDarrick J. Wong 	max_value = max(old_value, curr_value);
48775efa57dSDarrick J. Wong 
48875efa57dSDarrick J. Wong 	/* Within the before-and-after range is ok. */
48975efa57dSDarrick J. Wong 	if (expected >= min_value && expected <= max_value)
49075efa57dSDarrick J. Wong 		return true;
49175efa57dSDarrick J. Wong 
492ce85a1e0SDarrick J. Wong 	/* Everything else is bad. */
49375efa57dSDarrick J. Wong 	return false;
49475efa57dSDarrick J. Wong }
49575efa57dSDarrick J. Wong 
49675efa57dSDarrick J. Wong /* Check the superblock counters. */
49775efa57dSDarrick J. Wong int
xchk_fscounters(struct xfs_scrub * sc)49875efa57dSDarrick J. Wong xchk_fscounters(
49975efa57dSDarrick J. Wong 	struct xfs_scrub	*sc)
50075efa57dSDarrick J. Wong {
50175efa57dSDarrick J. Wong 	struct xfs_mount	*mp = sc->mp;
50275efa57dSDarrick J. Wong 	struct xchk_fscounters	*fsc = sc->buf;
503e74331d6SDarrick J. Wong 	int64_t			icount, ifree, fdblocks, frextents;
504ce85a1e0SDarrick J. Wong 	bool			try_again = false;
50575efa57dSDarrick J. Wong 	int			error;
50675efa57dSDarrick J. Wong 
50775efa57dSDarrick J. Wong 	/* Snapshot the percpu counters. */
50875efa57dSDarrick J. Wong 	icount = percpu_counter_sum(&mp->m_icount);
50975efa57dSDarrick J. Wong 	ifree = percpu_counter_sum(&mp->m_ifree);
51075efa57dSDarrick J. Wong 	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
511e74331d6SDarrick J. Wong 	frextents = percpu_counter_sum(&mp->m_frextents);
51275efa57dSDarrick J. Wong 
51375efa57dSDarrick J. Wong 	/* No negative values, please! */
514ce85a1e0SDarrick J. Wong 	if (icount < 0 || ifree < 0)
51575efa57dSDarrick J. Wong 		xchk_set_corrupt(sc);
51675efa57dSDarrick J. Wong 
517ce85a1e0SDarrick J. Wong 	/*
518ce85a1e0SDarrick J. Wong 	 * If the filesystem is not frozen, the counter summation calls above
519ce85a1e0SDarrick J. Wong 	 * can race with xfs_mod_freecounter, which subtracts a requested space
520ce85a1e0SDarrick J. Wong 	 * reservation from the counter and undoes the subtraction if that made
521ce85a1e0SDarrick J. Wong 	 * the counter go negative.  Therefore, it's possible to see negative
522ce85a1e0SDarrick J. Wong 	 * values here, and we should only flag that as a corruption if we
523ce85a1e0SDarrick J. Wong 	 * froze the fs.  This is much more likely to happen with frextents
524ce85a1e0SDarrick J. Wong 	 * since there are no reserved pools.
525ce85a1e0SDarrick J. Wong 	 */
526ce85a1e0SDarrick J. Wong 	if (fdblocks < 0 || frextents < 0) {
527ce85a1e0SDarrick J. Wong 		if (!fsc->frozen)
528ce85a1e0SDarrick J. Wong 			return -EDEADLOCK;
529ce85a1e0SDarrick J. Wong 
530ce85a1e0SDarrick J. Wong 		xchk_set_corrupt(sc);
531ce85a1e0SDarrick J. Wong 		return 0;
532ce85a1e0SDarrick J. Wong 	}
533ce85a1e0SDarrick J. Wong 
53475efa57dSDarrick J. Wong 	/* See if icount is obviously wrong. */
53575efa57dSDarrick J. Wong 	if (icount < fsc->icount_min || icount > fsc->icount_max)
53675efa57dSDarrick J. Wong 		xchk_set_corrupt(sc);
53775efa57dSDarrick J. Wong 
53875efa57dSDarrick J. Wong 	/* See if fdblocks is obviously wrong. */
53975efa57dSDarrick J. Wong 	if (fdblocks > mp->m_sb.sb_dblocks)
54075efa57dSDarrick J. Wong 		xchk_set_corrupt(sc);
54175efa57dSDarrick J. Wong 
542e74331d6SDarrick J. Wong 	/* See if frextents is obviously wrong. */
543e74331d6SDarrick J. Wong 	if (frextents > mp->m_sb.sb_rextents)
544e74331d6SDarrick J. Wong 		xchk_set_corrupt(sc);
545e74331d6SDarrick J. Wong 
54675efa57dSDarrick J. Wong 	/*
54775efa57dSDarrick J. Wong 	 * If ifree exceeds icount by more than the minimum variance then
54875efa57dSDarrick J. Wong 	 * something's probably wrong with the counters.
54975efa57dSDarrick J. Wong 	 */
55075efa57dSDarrick J. Wong 	if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE)
55175efa57dSDarrick J. Wong 		xchk_set_corrupt(sc);
55275efa57dSDarrick J. Wong 
55375efa57dSDarrick J. Wong 	/* Walk the incore AG headers to calculate the expected counters. */
55475efa57dSDarrick J. Wong 	error = xchk_fscount_aggregate_agcounts(sc, fsc);
55575efa57dSDarrick J. Wong 	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
55675efa57dSDarrick J. Wong 		return error;
55775efa57dSDarrick J. Wong 
558e74331d6SDarrick J. Wong 	/* Count the free extents counter for rt volumes. */
559e74331d6SDarrick J. Wong 	error = xchk_fscount_count_frextents(sc, fsc);
560e74331d6SDarrick J. Wong 	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
561e74331d6SDarrick J. Wong 		return error;
562e74331d6SDarrick J. Wong 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
563e74331d6SDarrick J. Wong 		return 0;
564e74331d6SDarrick J. Wong 
565ce85a1e0SDarrick J. Wong 	/*
566ce85a1e0SDarrick J. Wong 	 * Compare the in-core counters with whatever we counted.  If the fs is
567ce85a1e0SDarrick J. Wong 	 * frozen, we treat the discrepancy as a corruption because the freeze
568ce85a1e0SDarrick J. Wong 	 * should have stabilized the counter values.  Otherwise, we need
569ce85a1e0SDarrick J. Wong 	 * userspace to call us back having granted us freeze permission.
570ce85a1e0SDarrick J. Wong 	 */
571ce85a1e0SDarrick J. Wong 	if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
572ce85a1e0SDarrick J. Wong 				fsc->icount)) {
573ce85a1e0SDarrick J. Wong 		if (fsc->frozen)
57475efa57dSDarrick J. Wong 			xchk_set_corrupt(sc);
575ce85a1e0SDarrick J. Wong 		else
576ce85a1e0SDarrick J. Wong 			try_again = true;
577ce85a1e0SDarrick J. Wong 	}
57875efa57dSDarrick J. Wong 
579ce85a1e0SDarrick J. Wong 	if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
580ce85a1e0SDarrick J. Wong 		if (fsc->frozen)
58175efa57dSDarrick J. Wong 			xchk_set_corrupt(sc);
582ce85a1e0SDarrick J. Wong 		else
583ce85a1e0SDarrick J. Wong 			try_again = true;
584ce85a1e0SDarrick J. Wong 	}
58575efa57dSDarrick J. Wong 
58675efa57dSDarrick J. Wong 	if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
587ce85a1e0SDarrick J. Wong 			fsc->fdblocks)) {
588ce85a1e0SDarrick J. Wong 		if (fsc->frozen)
58975efa57dSDarrick J. Wong 			xchk_set_corrupt(sc);
590ce85a1e0SDarrick J. Wong 		else
591ce85a1e0SDarrick J. Wong 			try_again = true;
592ce85a1e0SDarrick J. Wong 	}
59375efa57dSDarrick J. Wong 
594e74331d6SDarrick J. Wong 	if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
595ce85a1e0SDarrick J. Wong 			fsc->frextents)) {
596ce85a1e0SDarrick J. Wong 		if (fsc->frozen)
597e74331d6SDarrick J. Wong 			xchk_set_corrupt(sc);
598ce85a1e0SDarrick J. Wong 		else
599ce85a1e0SDarrick J. Wong 			try_again = true;
600ce85a1e0SDarrick J. Wong 	}
601ce85a1e0SDarrick J. Wong 
602ce85a1e0SDarrick J. Wong 	if (try_again)
603ce85a1e0SDarrick J. Wong 		return -EDEADLOCK;
604e74331d6SDarrick J. Wong 
60575efa57dSDarrick J. Wong 	return 0;
60675efa57dSDarrick J. Wong }
607