1ce85a1e0SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
275efa57dSDarrick J. Wong /*
3ecc73f8aSDarrick J. Wong * Copyright (C) 2019-2023 Oracle. All Rights Reserved.
4739a2fe0SDarrick J. Wong * Author: Darrick J. Wong <djwong@kernel.org>
575efa57dSDarrick J. Wong */
675efa57dSDarrick J. Wong #include "xfs.h"
775efa57dSDarrick J. Wong #include "xfs_fs.h"
875efa57dSDarrick J. Wong #include "xfs_shared.h"
975efa57dSDarrick J. Wong #include "xfs_format.h"
1075efa57dSDarrick J. Wong #include "xfs_trans_resv.h"
11ce85a1e0SDarrick J. Wong #include "xfs_log_format.h"
12ce85a1e0SDarrick J. Wong #include "xfs_trans.h"
1375efa57dSDarrick J. Wong #include "xfs_mount.h"
1475efa57dSDarrick J. Wong #include "xfs_alloc.h"
1575efa57dSDarrick J. Wong #include "xfs_ialloc.h"
1675efa57dSDarrick J. Wong #include "xfs_health.h"
17e147a756SDarrick J. Wong #include "xfs_btree.h"
189bbafc71SDave Chinner #include "xfs_ag.h"
19*1a6d63f2SDarrick J. Wong #include "xfs_rtbitmap.h"
20e74331d6SDarrick J. Wong #include "xfs_inode.h"
21ce85a1e0SDarrick J. Wong #include "xfs_icache.h"
2275efa57dSDarrick J. Wong #include "scrub/scrub.h"
2375efa57dSDarrick J. Wong #include "scrub/common.h"
2475efa57dSDarrick J. Wong #include "scrub/trace.h"
2575efa57dSDarrick J. Wong
2675efa57dSDarrick J. Wong /*
2775efa57dSDarrick J. Wong * FS Summary Counters
2875efa57dSDarrick J. Wong * ===================
2975efa57dSDarrick J. Wong *
3075efa57dSDarrick J. Wong * The basics of filesystem summary counter checking are that we iterate the
3175efa57dSDarrick J. Wong * AGs counting the number of free blocks, free space btree blocks, per-AG
3275efa57dSDarrick J. Wong * reservations, inodes, delayed allocation reservations, and free inodes.
3375efa57dSDarrick J. Wong * Then we compare what we computed against the in-core counters.
3475efa57dSDarrick J. Wong *
3575efa57dSDarrick J. Wong * However, the reality is that summary counters are a tricky beast to check.
3675efa57dSDarrick J. Wong * While we /could/ freeze the filesystem and scramble around the AGs counting
3775efa57dSDarrick J. Wong * the free blocks, in practice we prefer not do that for a scan because
3875efa57dSDarrick J. Wong * freezing is costly. To get around this, we added a per-cpu counter of the
3975efa57dSDarrick J. Wong * delalloc reservations so that we can rotor around the AGs relatively
4075efa57dSDarrick J. Wong * quickly, and we allow the counts to be slightly off because we're not taking
4175efa57dSDarrick J. Wong * any locks while we do this.
4275efa57dSDarrick J. Wong *
4375efa57dSDarrick J. Wong * So the first thing we do is warm up the buffer cache in the setup routine by
4475efa57dSDarrick J. Wong * walking all the AGs to make sure the incore per-AG structure has been
4575efa57dSDarrick J. Wong * initialized. The expected value calculation then iterates the incore per-AG
4675efa57dSDarrick J. Wong * structures as quickly as it can. We snapshot the percpu counters before and
4775efa57dSDarrick J. Wong * after this operation and use the difference in counter values to guess at
4875efa57dSDarrick J. Wong * our tolerance for mismatch between expected and actual counter values.
4975efa57dSDarrick J. Wong */
5075efa57dSDarrick J. Wong
51e74331d6SDarrick J. Wong struct xchk_fscounters {
52e74331d6SDarrick J. Wong struct xfs_scrub *sc;
53e74331d6SDarrick J. Wong uint64_t icount;
54e74331d6SDarrick J. Wong uint64_t ifree;
55e74331d6SDarrick J. Wong uint64_t fdblocks;
56e74331d6SDarrick J. Wong uint64_t frextents;
57e74331d6SDarrick J. Wong unsigned long long icount_min;
58e74331d6SDarrick J. Wong unsigned long long icount_max;
59ce85a1e0SDarrick J. Wong bool frozen;
60e74331d6SDarrick J. Wong };
61e74331d6SDarrick J. Wong
6275efa57dSDarrick J. Wong /*
6375efa57dSDarrick J. Wong * Since the expected value computation is lockless but only browses incore
6475efa57dSDarrick J. Wong * values, the percpu counters should be fairly close to each other. However,
6575efa57dSDarrick J. Wong * we'll allow ourselves to be off by at least this (arbitrary) amount.
6675efa57dSDarrick J. Wong */
6775efa57dSDarrick J. Wong #define XCHK_FSCOUNT_MIN_VARIANCE (512)
6875efa57dSDarrick J. Wong
6975efa57dSDarrick J. Wong /*
7075efa57dSDarrick J. Wong * Make sure the per-AG structure has been initialized from the on-disk header
7175efa57dSDarrick J. Wong * contents and trust that the incore counters match the ondisk counters. (The
7275efa57dSDarrick J. Wong * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the
7375efa57dSDarrick J. Wong * summary counters after checking all AG headers). Do this from the setup
7475efa57dSDarrick J. Wong * function so that the inner AG aggregation loop runs as quickly as possible.
7575efa57dSDarrick J. Wong *
7675efa57dSDarrick J. Wong * This function runs during the setup phase /before/ we start checking any
7775efa57dSDarrick J. Wong * metadata.
7875efa57dSDarrick J. Wong */
7975efa57dSDarrick J. Wong STATIC int
xchk_fscount_warmup(struct xfs_scrub * sc)8075efa57dSDarrick J. Wong xchk_fscount_warmup(
8175efa57dSDarrick J. Wong struct xfs_scrub *sc)
8275efa57dSDarrick J. Wong {
8375efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp;
8475efa57dSDarrick J. Wong struct xfs_buf *agi_bp = NULL;
8575efa57dSDarrick J. Wong struct xfs_buf *agf_bp = NULL;
8675efa57dSDarrick J. Wong struct xfs_perag *pag = NULL;
8775efa57dSDarrick J. Wong xfs_agnumber_t agno;
8875efa57dSDarrick J. Wong int error = 0;
8975efa57dSDarrick J. Wong
90f250eedcSDave Chinner for_each_perag(mp, agno, pag) {
91f250eedcSDave Chinner if (xchk_should_terminate(sc, &error))
92f250eedcSDave Chinner break;
937ac2ff8bSDave Chinner if (xfs_perag_initialised_agi(pag) &&
947ac2ff8bSDave Chinner xfs_perag_initialised_agf(pag))
95f250eedcSDave Chinner continue;
9675efa57dSDarrick J. Wong
9775efa57dSDarrick J. Wong /* Lock both AG headers. */
9899b13c7fSDave Chinner error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp);
9975efa57dSDarrick J. Wong if (error)
10075efa57dSDarrick J. Wong break;
10108d3e84fSDave Chinner error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp);
10275efa57dSDarrick J. Wong if (error)
10375efa57dSDarrick J. Wong break;
10475efa57dSDarrick J. Wong
10575efa57dSDarrick J. Wong /*
10675efa57dSDarrick J. Wong * These are supposed to be initialized by the header read
10775efa57dSDarrick J. Wong * function.
10875efa57dSDarrick J. Wong */
1097ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) ||
1107ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) {
11175efa57dSDarrick J. Wong error = -EFSCORRUPTED;
11275efa57dSDarrick J. Wong break;
113f250eedcSDave Chinner }
11475efa57dSDarrick J. Wong
11575efa57dSDarrick J. Wong xfs_buf_relse(agf_bp);
11675efa57dSDarrick J. Wong agf_bp = NULL;
11775efa57dSDarrick J. Wong xfs_buf_relse(agi_bp);
11875efa57dSDarrick J. Wong agi_bp = NULL;
11975efa57dSDarrick J. Wong }
12075efa57dSDarrick J. Wong
12175efa57dSDarrick J. Wong if (agf_bp)
12275efa57dSDarrick J. Wong xfs_buf_relse(agf_bp);
12375efa57dSDarrick J. Wong if (agi_bp)
12475efa57dSDarrick J. Wong xfs_buf_relse(agi_bp);
12575efa57dSDarrick J. Wong if (pag)
126c4d5660aSDave Chinner xfs_perag_rele(pag);
12775efa57dSDarrick J. Wong return error;
12875efa57dSDarrick J. Wong }
12975efa57dSDarrick J. Wong
130ce85a1e0SDarrick J. Wong static inline int
xchk_fsfreeze(struct xfs_scrub * sc)131ce85a1e0SDarrick J. Wong xchk_fsfreeze(
132ce85a1e0SDarrick J. Wong struct xfs_scrub *sc)
133ce85a1e0SDarrick J. Wong {
134ce85a1e0SDarrick J. Wong int error;
135ce85a1e0SDarrick J. Wong
136ce85a1e0SDarrick J. Wong error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
137ce85a1e0SDarrick J. Wong trace_xchk_fsfreeze(sc, error);
138ce85a1e0SDarrick J. Wong return error;
139ce85a1e0SDarrick J. Wong }
140ce85a1e0SDarrick J. Wong
141ce85a1e0SDarrick J. Wong static inline int
xchk_fsthaw(struct xfs_scrub * sc)142ce85a1e0SDarrick J. Wong xchk_fsthaw(
143ce85a1e0SDarrick J. Wong struct xfs_scrub *sc)
144ce85a1e0SDarrick J. Wong {
145ce85a1e0SDarrick J. Wong int error;
146ce85a1e0SDarrick J. Wong
147ce85a1e0SDarrick J. Wong /* This should always succeed, we have a kernel freeze */
148ce85a1e0SDarrick J. Wong error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
149ce85a1e0SDarrick J. Wong trace_xchk_fsthaw(sc, error);
150ce85a1e0SDarrick J. Wong return error;
151ce85a1e0SDarrick J. Wong }
152ce85a1e0SDarrick J. Wong
153ce85a1e0SDarrick J. Wong /*
154ce85a1e0SDarrick J. Wong * We couldn't stabilize the filesystem long enough to sample all the variables
155ce85a1e0SDarrick J. Wong * that comprise the summary counters and compare them to the percpu counters.
156ce85a1e0SDarrick J. Wong * We need to disable all writer threads, which means taking the first two
157ce85a1e0SDarrick J. Wong * freeze levels to put userspace to sleep, and the third freeze level to
158ce85a1e0SDarrick J. Wong * prevent background threads from starting new transactions. Take one level
159ce85a1e0SDarrick J. Wong * more to prevent other callers from unfreezing the filesystem while we run.
160ce85a1e0SDarrick J. Wong */
161ce85a1e0SDarrick J. Wong STATIC int
xchk_fscounters_freeze(struct xfs_scrub * sc)162ce85a1e0SDarrick J. Wong xchk_fscounters_freeze(
163ce85a1e0SDarrick J. Wong struct xfs_scrub *sc)
164ce85a1e0SDarrick J. Wong {
165ce85a1e0SDarrick J. Wong struct xchk_fscounters *fsc = sc->buf;
166ce85a1e0SDarrick J. Wong int error = 0;
167ce85a1e0SDarrick J. Wong
168ce85a1e0SDarrick J. Wong if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
169ce85a1e0SDarrick J. Wong sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
170ce85a1e0SDarrick J. Wong mnt_drop_write_file(sc->file);
171ce85a1e0SDarrick J. Wong }
172ce85a1e0SDarrick J. Wong
173ce85a1e0SDarrick J. Wong /* Try to grab a kernel freeze. */
174ce85a1e0SDarrick J. Wong while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
175ce85a1e0SDarrick J. Wong if (xchk_should_terminate(sc, &error))
176ce85a1e0SDarrick J. Wong return error;
177ce85a1e0SDarrick J. Wong
178ce85a1e0SDarrick J. Wong delay(HZ / 10);
179ce85a1e0SDarrick J. Wong }
180ce85a1e0SDarrick J. Wong if (error)
181ce85a1e0SDarrick J. Wong return error;
182ce85a1e0SDarrick J. Wong
183ce85a1e0SDarrick J. Wong fsc->frozen = true;
184ce85a1e0SDarrick J. Wong return 0;
185ce85a1e0SDarrick J. Wong }
186ce85a1e0SDarrick J. Wong
187ce85a1e0SDarrick J. Wong /* Thaw the filesystem after checking or repairing fscounters. */
188ce85a1e0SDarrick J. Wong STATIC void
xchk_fscounters_cleanup(void * buf)189ce85a1e0SDarrick J. Wong xchk_fscounters_cleanup(
190ce85a1e0SDarrick J. Wong void *buf)
191ce85a1e0SDarrick J. Wong {
192ce85a1e0SDarrick J. Wong struct xchk_fscounters *fsc = buf;
193ce85a1e0SDarrick J. Wong struct xfs_scrub *sc = fsc->sc;
194ce85a1e0SDarrick J. Wong int error;
195ce85a1e0SDarrick J. Wong
196ce85a1e0SDarrick J. Wong if (!fsc->frozen)
197ce85a1e0SDarrick J. Wong return;
198ce85a1e0SDarrick J. Wong
199ce85a1e0SDarrick J. Wong error = xchk_fsthaw(sc);
200ce85a1e0SDarrick J. Wong if (error)
201ce85a1e0SDarrick J. Wong xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
202ce85a1e0SDarrick J. Wong else
203ce85a1e0SDarrick J. Wong fsc->frozen = false;
204ce85a1e0SDarrick J. Wong }
205ce85a1e0SDarrick J. Wong
20675efa57dSDarrick J. Wong int
xchk_setup_fscounters(struct xfs_scrub * sc)20775efa57dSDarrick J. Wong xchk_setup_fscounters(
208026f57ebSDarrick J. Wong struct xfs_scrub *sc)
20975efa57dSDarrick J. Wong {
21075efa57dSDarrick J. Wong struct xchk_fscounters *fsc;
21175efa57dSDarrick J. Wong int error;
21275efa57dSDarrick J. Wong
213466c525dSDarrick J. Wong /*
214466c525dSDarrick J. Wong * If the AGF doesn't track btreeblks, we have to lock the AGF to count
215466c525dSDarrick J. Wong * btree block usage by walking the actual btrees.
216466c525dSDarrick J. Wong */
217466c525dSDarrick J. Wong if (!xfs_has_lazysbcount(sc->mp))
218466c525dSDarrick J. Wong xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
219466c525dSDarrick J. Wong
220306195f3SDarrick J. Wong sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
22175efa57dSDarrick J. Wong if (!sc->buf)
22275efa57dSDarrick J. Wong return -ENOMEM;
223ce85a1e0SDarrick J. Wong sc->buf_cleanup = xchk_fscounters_cleanup;
22475efa57dSDarrick J. Wong fsc = sc->buf;
225e74331d6SDarrick J. Wong fsc->sc = sc;
22675efa57dSDarrick J. Wong
22775efa57dSDarrick J. Wong xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
22875efa57dSDarrick J. Wong
22975efa57dSDarrick J. Wong /* We must get the incore counters set up before we can proceed. */
23075efa57dSDarrick J. Wong error = xchk_fscount_warmup(sc);
23175efa57dSDarrick J. Wong if (error)
23275efa57dSDarrick J. Wong return error;
23375efa57dSDarrick J. Wong
234ce85a1e0SDarrick J. Wong /*
235ce85a1e0SDarrick J. Wong * Pause all writer activity in the filesystem while we're scrubbing to
236ce85a1e0SDarrick J. Wong * reduce the likelihood of background perturbations to the counters
237ce85a1e0SDarrick J. Wong * throwing off our calculations.
238ce85a1e0SDarrick J. Wong */
239ce85a1e0SDarrick J. Wong if (sc->flags & XCHK_TRY_HARDER) {
240ce85a1e0SDarrick J. Wong error = xchk_fscounters_freeze(sc);
241ce85a1e0SDarrick J. Wong if (error)
242ce85a1e0SDarrick J. Wong return error;
243ce85a1e0SDarrick J. Wong }
244ce85a1e0SDarrick J. Wong
245ce85a1e0SDarrick J. Wong return xfs_trans_alloc_empty(sc->mp, &sc->tp);
24675efa57dSDarrick J. Wong }
24775efa57dSDarrick J. Wong
24811f97e68SDarrick J. Wong /*
24911f97e68SDarrick J. Wong * Part 1: Collecting filesystem summary counts. For each AG, we add its
25011f97e68SDarrick J. Wong * summary counts (total inodes, free inodes, free data blocks) to an incore
25111f97e68SDarrick J. Wong * copy of the overall filesystem summary counts.
25211f97e68SDarrick J. Wong *
25311f97e68SDarrick J. Wong * To avoid false corruption reports in part 2, any failure in this part must
25411f97e68SDarrick J. Wong * set the INCOMPLETE flag even when a negative errno is returned. This care
25511f97e68SDarrick J. Wong * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
25611f97e68SDarrick J. Wong * ECANCELED) that are absorbed into a scrub state flag update by
25711f97e68SDarrick J. Wong * xchk_*_process_error.
25811f97e68SDarrick J. Wong */
25911f97e68SDarrick J. Wong
260e147a756SDarrick J. Wong /* Count free space btree blocks manually for pre-lazysbcount filesystems. */
261e147a756SDarrick J. Wong static int
xchk_fscount_btreeblks(struct xfs_scrub * sc,struct xchk_fscounters * fsc,xfs_agnumber_t agno)262e147a756SDarrick J. Wong xchk_fscount_btreeblks(
263e147a756SDarrick J. Wong struct xfs_scrub *sc,
264e147a756SDarrick J. Wong struct xchk_fscounters *fsc,
265e147a756SDarrick J. Wong xfs_agnumber_t agno)
266e147a756SDarrick J. Wong {
267e147a756SDarrick J. Wong xfs_extlen_t blocks;
268e147a756SDarrick J. Wong int error;
269e147a756SDarrick J. Wong
27048c6615cSDarrick J. Wong error = xchk_ag_init_existing(sc, agno, &sc->sa);
271e147a756SDarrick J. Wong if (error)
27261e0d0ccSDarrick J. Wong goto out_free;
273e147a756SDarrick J. Wong
274e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
275e147a756SDarrick J. Wong if (error)
276e147a756SDarrick J. Wong goto out_free;
277e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1;
278e147a756SDarrick J. Wong
279e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
280e147a756SDarrick J. Wong if (error)
281e147a756SDarrick J. Wong goto out_free;
282e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1;
283e147a756SDarrick J. Wong
284e147a756SDarrick J. Wong out_free:
285e147a756SDarrick J. Wong xchk_ag_free(sc, &sc->sa);
286e147a756SDarrick J. Wong return error;
287e147a756SDarrick J. Wong }
288e147a756SDarrick J. Wong
28975efa57dSDarrick J. Wong /*
29075efa57dSDarrick J. Wong * Calculate what the global in-core counters ought to be from the incore
29175efa57dSDarrick J. Wong * per-AG structure. Callers can compare this to the actual in-core counters
29275efa57dSDarrick J. Wong * to estimate by how much both in-core and on-disk counters need to be
29375efa57dSDarrick J. Wong * adjusted.
29475efa57dSDarrick J. Wong */
29575efa57dSDarrick J. Wong STATIC int
xchk_fscount_aggregate_agcounts(struct xfs_scrub * sc,struct xchk_fscounters * fsc)29675efa57dSDarrick J. Wong xchk_fscount_aggregate_agcounts(
29775efa57dSDarrick J. Wong struct xfs_scrub *sc,
29875efa57dSDarrick J. Wong struct xchk_fscounters *fsc)
29975efa57dSDarrick J. Wong {
30075efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp;
30175efa57dSDarrick J. Wong struct xfs_perag *pag;
30275efa57dSDarrick J. Wong uint64_t delayed;
30375efa57dSDarrick J. Wong xfs_agnumber_t agno;
30475efa57dSDarrick J. Wong int tries = 8;
3058ef34723SDarrick J. Wong int error = 0;
30675efa57dSDarrick J. Wong
30775efa57dSDarrick J. Wong retry:
30875efa57dSDarrick J. Wong fsc->icount = 0;
30975efa57dSDarrick J. Wong fsc->ifree = 0;
31075efa57dSDarrick J. Wong fsc->fdblocks = 0;
31175efa57dSDarrick J. Wong
312f250eedcSDave Chinner for_each_perag(mp, agno, pag) {
313f250eedcSDave Chinner if (xchk_should_terminate(sc, &error))
314f250eedcSDave Chinner break;
31575efa57dSDarrick J. Wong
31675efa57dSDarrick J. Wong /* This somehow got unset since the warmup? */
3177ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) ||
3187ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) {
319f250eedcSDave Chinner error = -EFSCORRUPTED;
320f250eedcSDave Chinner break;
32175efa57dSDarrick J. Wong }
32275efa57dSDarrick J. Wong
32375efa57dSDarrick J. Wong /* Count all the inodes */
32475efa57dSDarrick J. Wong fsc->icount += pag->pagi_count;
32575efa57dSDarrick J. Wong fsc->ifree += pag->pagi_freecount;
32675efa57dSDarrick J. Wong
32775efa57dSDarrick J. Wong /* Add up the free/freelist/bnobt/cntbt blocks */
32875efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_freeblks;
32975efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_flcount;
330ebd9027dSDave Chinner if (xfs_has_lazysbcount(sc->mp)) {
33175efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_btreeblks;
332e147a756SDarrick J. Wong } else {
333e147a756SDarrick J. Wong error = xchk_fscount_btreeblks(sc, fsc, agno);
334f250eedcSDave Chinner if (error)
335e147a756SDarrick J. Wong break;
336e147a756SDarrick J. Wong }
33775efa57dSDarrick J. Wong
33875efa57dSDarrick J. Wong /*
33975efa57dSDarrick J. Wong * Per-AG reservations are taken out of the incore counters,
34075efa57dSDarrick J. Wong * so they must be left out of the free blocks computation.
34175efa57dSDarrick J. Wong */
34275efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
34375efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
34475efa57dSDarrick J. Wong
34575efa57dSDarrick J. Wong }
346f250eedcSDave Chinner if (pag)
347c4d5660aSDave Chinner xfs_perag_rele(pag);
34811f97e68SDarrick J. Wong if (error) {
34911f97e68SDarrick J. Wong xchk_set_incomplete(sc);
3508ef34723SDarrick J. Wong return error;
35111f97e68SDarrick J. Wong }
3528ef34723SDarrick J. Wong
35375efa57dSDarrick J. Wong /*
35475efa57dSDarrick J. Wong * The global incore space reservation is taken from the incore
35575efa57dSDarrick J. Wong * counters, so leave that out of the computation.
35675efa57dSDarrick J. Wong */
35775efa57dSDarrick J. Wong fsc->fdblocks -= mp->m_resblks_avail;
35875efa57dSDarrick J. Wong
35975efa57dSDarrick J. Wong /*
36075efa57dSDarrick J. Wong * Delayed allocation reservations are taken out of the incore counters
36175efa57dSDarrick J. Wong * but not recorded on disk, so leave them and their indlen blocks out
36275efa57dSDarrick J. Wong * of the computation.
36375efa57dSDarrick J. Wong */
36475efa57dSDarrick J. Wong delayed = percpu_counter_sum(&mp->m_delalloc_blks);
36575efa57dSDarrick J. Wong fsc->fdblocks -= delayed;
36675efa57dSDarrick J. Wong
36775efa57dSDarrick J. Wong trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
36875efa57dSDarrick J. Wong delayed);
36975efa57dSDarrick J. Wong
37075efa57dSDarrick J. Wong
37175efa57dSDarrick J. Wong /* Bail out if the values we compute are totally nonsense. */
37275efa57dSDarrick J. Wong if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max ||
37375efa57dSDarrick J. Wong fsc->fdblocks > mp->m_sb.sb_dblocks ||
37475efa57dSDarrick J. Wong fsc->ifree > fsc->icount_max)
37575efa57dSDarrick J. Wong return -EFSCORRUPTED;
37675efa57dSDarrick J. Wong
37775efa57dSDarrick J. Wong /*
37875efa57dSDarrick J. Wong * If ifree > icount then we probably had some perturbation in the
37975efa57dSDarrick J. Wong * counters while we were calculating things. We'll try a few times
38075efa57dSDarrick J. Wong * to maintain ifree <= icount before giving up.
38175efa57dSDarrick J. Wong */
38275efa57dSDarrick J. Wong if (fsc->ifree > fsc->icount) {
38375efa57dSDarrick J. Wong if (tries--)
38475efa57dSDarrick J. Wong goto retry;
385ce85a1e0SDarrick J. Wong return -EDEADLOCK;
38675efa57dSDarrick J. Wong }
38775efa57dSDarrick J. Wong
38875efa57dSDarrick J. Wong return 0;
38975efa57dSDarrick J. Wong }
39075efa57dSDarrick J. Wong
391e74331d6SDarrick J. Wong #ifdef CONFIG_XFS_RT
392e74331d6SDarrick J. Wong STATIC int
xchk_fscount_add_frextent(struct xfs_mount * mp,struct xfs_trans * tp,const struct xfs_rtalloc_rec * rec,void * priv)393e74331d6SDarrick J. Wong xchk_fscount_add_frextent(
394e74331d6SDarrick J. Wong struct xfs_mount *mp,
395e74331d6SDarrick J. Wong struct xfs_trans *tp,
396e74331d6SDarrick J. Wong const struct xfs_rtalloc_rec *rec,
397e74331d6SDarrick J. Wong void *priv)
398e74331d6SDarrick J. Wong {
399e74331d6SDarrick J. Wong struct xchk_fscounters *fsc = priv;
400e74331d6SDarrick J. Wong int error = 0;
401e74331d6SDarrick J. Wong
402e74331d6SDarrick J. Wong fsc->frextents += rec->ar_extcount;
403e74331d6SDarrick J. Wong
404e74331d6SDarrick J. Wong xchk_should_terminate(fsc->sc, &error);
405e74331d6SDarrick J. Wong return error;
406e74331d6SDarrick J. Wong }
407e74331d6SDarrick J. Wong
408e74331d6SDarrick J. Wong /* Calculate the number of free realtime extents from the realtime bitmap. */
409e74331d6SDarrick J. Wong STATIC int
xchk_fscount_count_frextents(struct xfs_scrub * sc,struct xchk_fscounters * fsc)410e74331d6SDarrick J. Wong xchk_fscount_count_frextents(
411e74331d6SDarrick J. Wong struct xfs_scrub *sc,
412e74331d6SDarrick J. Wong struct xchk_fscounters *fsc)
413e74331d6SDarrick J. Wong {
414e74331d6SDarrick J. Wong struct xfs_mount *mp = sc->mp;
415e74331d6SDarrick J. Wong int error;
416e74331d6SDarrick J. Wong
417e74331d6SDarrick J. Wong fsc->frextents = 0;
418e74331d6SDarrick J. Wong if (!xfs_has_realtime(mp))
419e74331d6SDarrick J. Wong return 0;
420e74331d6SDarrick J. Wong
421e74331d6SDarrick J. Wong xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
422e74331d6SDarrick J. Wong error = xfs_rtalloc_query_all(sc->mp, sc->tp,
423e74331d6SDarrick J. Wong xchk_fscount_add_frextent, fsc);
424e74331d6SDarrick J. Wong if (error) {
425e74331d6SDarrick J. Wong xchk_set_incomplete(sc);
426e74331d6SDarrick J. Wong goto out_unlock;
427e74331d6SDarrick J. Wong }
428e74331d6SDarrick J. Wong
429e74331d6SDarrick J. Wong out_unlock:
430e74331d6SDarrick J. Wong xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
431e74331d6SDarrick J. Wong return error;
432e74331d6SDarrick J. Wong }
433e74331d6SDarrick J. Wong #else
434e74331d6SDarrick J. Wong STATIC int
xchk_fscount_count_frextents(struct xfs_scrub * sc,struct xchk_fscounters * fsc)435e74331d6SDarrick J. Wong xchk_fscount_count_frextents(
436e74331d6SDarrick J. Wong struct xfs_scrub *sc,
437e74331d6SDarrick J. Wong struct xchk_fscounters *fsc)
438e74331d6SDarrick J. Wong {
439e74331d6SDarrick J. Wong fsc->frextents = 0;
440e74331d6SDarrick J. Wong return 0;
441e74331d6SDarrick J. Wong }
442e74331d6SDarrick J. Wong #endif /* CONFIG_XFS_RT */
443e74331d6SDarrick J. Wong
44475efa57dSDarrick J. Wong /*
44511f97e68SDarrick J. Wong * Part 2: Comparing filesystem summary counters. All we have to do here is
44611f97e68SDarrick J. Wong * sum the percpu counters and compare them to what we've observed.
44711f97e68SDarrick J. Wong */
44811f97e68SDarrick J. Wong
44911f97e68SDarrick J. Wong /*
45075efa57dSDarrick J. Wong * Is the @counter reasonably close to the @expected value?
45175efa57dSDarrick J. Wong *
45275efa57dSDarrick J. Wong * We neither locked nor froze anything in the filesystem while aggregating the
45375efa57dSDarrick J. Wong * per-AG data to compute the @expected value, which means that the counter
45475efa57dSDarrick J. Wong * could have changed. We know the @old_value of the summation of the counter
45575efa57dSDarrick J. Wong * before the aggregation, and we re-sum the counter now. If the expected
45675efa57dSDarrick J. Wong * value falls between the two summations, we're ok.
45775efa57dSDarrick J. Wong *
45875efa57dSDarrick J. Wong * Otherwise, we /might/ have a problem. If the change in the summations is
45975efa57dSDarrick J. Wong * more than we want to tolerate, the filesystem is probably busy and we should
46075efa57dSDarrick J. Wong * just send back INCOMPLETE and see if userspace will try again.
461ce85a1e0SDarrick J. Wong *
462ce85a1e0SDarrick J. Wong * If we're repairing then we require an exact match.
46375efa57dSDarrick J. Wong */
46475efa57dSDarrick J. Wong static inline bool
xchk_fscount_within_range(struct xfs_scrub * sc,const int64_t old_value,struct percpu_counter * counter,uint64_t expected)46575efa57dSDarrick J. Wong xchk_fscount_within_range(
46675efa57dSDarrick J. Wong struct xfs_scrub *sc,
46775efa57dSDarrick J. Wong const int64_t old_value,
46875efa57dSDarrick J. Wong struct percpu_counter *counter,
46975efa57dSDarrick J. Wong uint64_t expected)
47075efa57dSDarrick J. Wong {
47175efa57dSDarrick J. Wong int64_t min_value, max_value;
47275efa57dSDarrick J. Wong int64_t curr_value = percpu_counter_sum(counter);
47375efa57dSDarrick J. Wong
47475efa57dSDarrick J. Wong trace_xchk_fscounters_within_range(sc->mp, expected, curr_value,
47575efa57dSDarrick J. Wong old_value);
47675efa57dSDarrick J. Wong
47775efa57dSDarrick J. Wong /* Negative values are always wrong. */
47875efa57dSDarrick J. Wong if (curr_value < 0)
47975efa57dSDarrick J. Wong return false;
48075efa57dSDarrick J. Wong
48175efa57dSDarrick J. Wong /* Exact matches are always ok. */
48275efa57dSDarrick J. Wong if (curr_value == expected)
48375efa57dSDarrick J. Wong return true;
48475efa57dSDarrick J. Wong
48575efa57dSDarrick J. Wong min_value = min(old_value, curr_value);
48675efa57dSDarrick J. Wong max_value = max(old_value, curr_value);
48775efa57dSDarrick J. Wong
48875efa57dSDarrick J. Wong /* Within the before-and-after range is ok. */
48975efa57dSDarrick J. Wong if (expected >= min_value && expected <= max_value)
49075efa57dSDarrick J. Wong return true;
49175efa57dSDarrick J. Wong
492ce85a1e0SDarrick J. Wong /* Everything else is bad. */
49375efa57dSDarrick J. Wong return false;
49475efa57dSDarrick J. Wong }
49575efa57dSDarrick J. Wong
49675efa57dSDarrick J. Wong /* Check the superblock counters. */
49775efa57dSDarrick J. Wong int
xchk_fscounters(struct xfs_scrub * sc)49875efa57dSDarrick J. Wong xchk_fscounters(
49975efa57dSDarrick J. Wong struct xfs_scrub *sc)
50075efa57dSDarrick J. Wong {
50175efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp;
50275efa57dSDarrick J. Wong struct xchk_fscounters *fsc = sc->buf;
503e74331d6SDarrick J. Wong int64_t icount, ifree, fdblocks, frextents;
504ce85a1e0SDarrick J. Wong bool try_again = false;
50575efa57dSDarrick J. Wong int error;
50675efa57dSDarrick J. Wong
50775efa57dSDarrick J. Wong /* Snapshot the percpu counters. */
50875efa57dSDarrick J. Wong icount = percpu_counter_sum(&mp->m_icount);
50975efa57dSDarrick J. Wong ifree = percpu_counter_sum(&mp->m_ifree);
51075efa57dSDarrick J. Wong fdblocks = percpu_counter_sum(&mp->m_fdblocks);
511e74331d6SDarrick J. Wong frextents = percpu_counter_sum(&mp->m_frextents);
51275efa57dSDarrick J. Wong
51375efa57dSDarrick J. Wong /* No negative values, please! */
514ce85a1e0SDarrick J. Wong if (icount < 0 || ifree < 0)
51575efa57dSDarrick J. Wong xchk_set_corrupt(sc);
51675efa57dSDarrick J. Wong
517ce85a1e0SDarrick J. Wong /*
518ce85a1e0SDarrick J. Wong * If the filesystem is not frozen, the counter summation calls above
519ce85a1e0SDarrick J. Wong * can race with xfs_mod_freecounter, which subtracts a requested space
520ce85a1e0SDarrick J. Wong * reservation from the counter and undoes the subtraction if that made
521ce85a1e0SDarrick J. Wong * the counter go negative. Therefore, it's possible to see negative
522ce85a1e0SDarrick J. Wong * values here, and we should only flag that as a corruption if we
523ce85a1e0SDarrick J. Wong * froze the fs. This is much more likely to happen with frextents
524ce85a1e0SDarrick J. Wong * since there are no reserved pools.
525ce85a1e0SDarrick J. Wong */
526ce85a1e0SDarrick J. Wong if (fdblocks < 0 || frextents < 0) {
527ce85a1e0SDarrick J. Wong if (!fsc->frozen)
528ce85a1e0SDarrick J. Wong return -EDEADLOCK;
529ce85a1e0SDarrick J. Wong
530ce85a1e0SDarrick J. Wong xchk_set_corrupt(sc);
531ce85a1e0SDarrick J. Wong return 0;
532ce85a1e0SDarrick J. Wong }
533ce85a1e0SDarrick J. Wong
53475efa57dSDarrick J. Wong /* See if icount is obviously wrong. */
53575efa57dSDarrick J. Wong if (icount < fsc->icount_min || icount > fsc->icount_max)
53675efa57dSDarrick J. Wong xchk_set_corrupt(sc);
53775efa57dSDarrick J. Wong
53875efa57dSDarrick J. Wong /* See if fdblocks is obviously wrong. */
53975efa57dSDarrick J. Wong if (fdblocks > mp->m_sb.sb_dblocks)
54075efa57dSDarrick J. Wong xchk_set_corrupt(sc);
54175efa57dSDarrick J. Wong
542e74331d6SDarrick J. Wong /* See if frextents is obviously wrong. */
543e74331d6SDarrick J. Wong if (frextents > mp->m_sb.sb_rextents)
544e74331d6SDarrick J. Wong xchk_set_corrupt(sc);
545e74331d6SDarrick J. Wong
54675efa57dSDarrick J. Wong /*
54775efa57dSDarrick J. Wong * If ifree exceeds icount by more than the minimum variance then
54875efa57dSDarrick J. Wong * something's probably wrong with the counters.
54975efa57dSDarrick J. Wong */
55075efa57dSDarrick J. Wong if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE)
55175efa57dSDarrick J. Wong xchk_set_corrupt(sc);
55275efa57dSDarrick J. Wong
55375efa57dSDarrick J. Wong /* Walk the incore AG headers to calculate the expected counters. */
55475efa57dSDarrick J. Wong error = xchk_fscount_aggregate_agcounts(sc, fsc);
55575efa57dSDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
55675efa57dSDarrick J. Wong return error;
55775efa57dSDarrick J. Wong
558e74331d6SDarrick J. Wong /* Count the free extents counter for rt volumes. */
559e74331d6SDarrick J. Wong error = xchk_fscount_count_frextents(sc, fsc);
560e74331d6SDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
561e74331d6SDarrick J. Wong return error;
562e74331d6SDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
563e74331d6SDarrick J. Wong return 0;
564e74331d6SDarrick J. Wong
565ce85a1e0SDarrick J. Wong /*
566ce85a1e0SDarrick J. Wong * Compare the in-core counters with whatever we counted. If the fs is
567ce85a1e0SDarrick J. Wong * frozen, we treat the discrepancy as a corruption because the freeze
568ce85a1e0SDarrick J. Wong * should have stabilized the counter values. Otherwise, we need
569ce85a1e0SDarrick J. Wong * userspace to call us back having granted us freeze permission.
570ce85a1e0SDarrick J. Wong */
571ce85a1e0SDarrick J. Wong if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
572ce85a1e0SDarrick J. Wong fsc->icount)) {
573ce85a1e0SDarrick J. Wong if (fsc->frozen)
57475efa57dSDarrick J. Wong xchk_set_corrupt(sc);
575ce85a1e0SDarrick J. Wong else
576ce85a1e0SDarrick J. Wong try_again = true;
577ce85a1e0SDarrick J. Wong }
57875efa57dSDarrick J. Wong
579ce85a1e0SDarrick J. Wong if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
580ce85a1e0SDarrick J. Wong if (fsc->frozen)
58175efa57dSDarrick J. Wong xchk_set_corrupt(sc);
582ce85a1e0SDarrick J. Wong else
583ce85a1e0SDarrick J. Wong try_again = true;
584ce85a1e0SDarrick J. Wong }
58575efa57dSDarrick J. Wong
58675efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
587ce85a1e0SDarrick J. Wong fsc->fdblocks)) {
588ce85a1e0SDarrick J. Wong if (fsc->frozen)
58975efa57dSDarrick J. Wong xchk_set_corrupt(sc);
590ce85a1e0SDarrick J. Wong else
591ce85a1e0SDarrick J. Wong try_again = true;
592ce85a1e0SDarrick J. Wong }
59375efa57dSDarrick J. Wong
594e74331d6SDarrick J. Wong if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
595ce85a1e0SDarrick J. Wong fsc->frextents)) {
596ce85a1e0SDarrick J. Wong if (fsc->frozen)
597e74331d6SDarrick J. Wong xchk_set_corrupt(sc);
598ce85a1e0SDarrick J. Wong else
599ce85a1e0SDarrick J. Wong try_again = true;
600ce85a1e0SDarrick J. Wong }
601ce85a1e0SDarrick J. Wong
602ce85a1e0SDarrick J. Wong if (try_again)
603ce85a1e0SDarrick J. Wong return -EDEADLOCK;
604e74331d6SDarrick J. Wong
60575efa57dSDarrick J. Wong return 0;
60675efa57dSDarrick J. Wong }
607