1ce85a1e0SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later 275efa57dSDarrick J. Wong /* 3ecc73f8aSDarrick J. Wong * Copyright (C) 2019-2023 Oracle. All Rights Reserved. 4739a2fe0SDarrick J. Wong * Author: Darrick J. Wong <djwong@kernel.org> 575efa57dSDarrick J. Wong */ 675efa57dSDarrick J. Wong #include "xfs.h" 775efa57dSDarrick J. Wong #include "xfs_fs.h" 875efa57dSDarrick J. Wong #include "xfs_shared.h" 975efa57dSDarrick J. Wong #include "xfs_format.h" 1075efa57dSDarrick J. Wong #include "xfs_trans_resv.h" 11ce85a1e0SDarrick J. Wong #include "xfs_log_format.h" 12ce85a1e0SDarrick J. Wong #include "xfs_trans.h" 1375efa57dSDarrick J. Wong #include "xfs_mount.h" 1475efa57dSDarrick J. Wong #include "xfs_alloc.h" 1575efa57dSDarrick J. Wong #include "xfs_ialloc.h" 1675efa57dSDarrick J. Wong #include "xfs_health.h" 17e147a756SDarrick J. Wong #include "xfs_btree.h" 189bbafc71SDave Chinner #include "xfs_ag.h" 19*1a6d63f2SDarrick J. Wong #include "xfs_rtbitmap.h" 20e74331d6SDarrick J. Wong #include "xfs_inode.h" 21ce85a1e0SDarrick J. Wong #include "xfs_icache.h" 2275efa57dSDarrick J. Wong #include "scrub/scrub.h" 2375efa57dSDarrick J. Wong #include "scrub/common.h" 2475efa57dSDarrick J. Wong #include "scrub/trace.h" 2575efa57dSDarrick J. Wong 2675efa57dSDarrick J. Wong /* 2775efa57dSDarrick J. Wong * FS Summary Counters 2875efa57dSDarrick J. Wong * =================== 2975efa57dSDarrick J. Wong * 3075efa57dSDarrick J. Wong * The basics of filesystem summary counter checking are that we iterate the 3175efa57dSDarrick J. Wong * AGs counting the number of free blocks, free space btree blocks, per-AG 3275efa57dSDarrick J. Wong * reservations, inodes, delayed allocation reservations, and free inodes. 3375efa57dSDarrick J. Wong * Then we compare what we computed against the in-core counters. 3475efa57dSDarrick J. Wong * 3575efa57dSDarrick J. Wong * However, the reality is that summary counters are a tricky beast to check. 3675efa57dSDarrick J. Wong * While we /could/ freeze the filesystem and scramble around the AGs counting 3775efa57dSDarrick J. Wong * the free blocks, in practice we prefer not do that for a scan because 3875efa57dSDarrick J. Wong * freezing is costly. To get around this, we added a per-cpu counter of the 3975efa57dSDarrick J. Wong * delalloc reservations so that we can rotor around the AGs relatively 4075efa57dSDarrick J. Wong * quickly, and we allow the counts to be slightly off because we're not taking 4175efa57dSDarrick J. Wong * any locks while we do this. 4275efa57dSDarrick J. Wong * 4375efa57dSDarrick J. Wong * So the first thing we do is warm up the buffer cache in the setup routine by 4475efa57dSDarrick J. Wong * walking all the AGs to make sure the incore per-AG structure has been 4575efa57dSDarrick J. Wong * initialized. The expected value calculation then iterates the incore per-AG 4675efa57dSDarrick J. Wong * structures as quickly as it can. We snapshot the percpu counters before and 4775efa57dSDarrick J. Wong * after this operation and use the difference in counter values to guess at 4875efa57dSDarrick J. Wong * our tolerance for mismatch between expected and actual counter values. 4975efa57dSDarrick J. Wong */ 5075efa57dSDarrick J. Wong 51e74331d6SDarrick J. Wong struct xchk_fscounters { 52e74331d6SDarrick J. Wong struct xfs_scrub *sc; 53e74331d6SDarrick J. Wong uint64_t icount; 54e74331d6SDarrick J. Wong uint64_t ifree; 55e74331d6SDarrick J. Wong uint64_t fdblocks; 56e74331d6SDarrick J. Wong uint64_t frextents; 57e74331d6SDarrick J. Wong unsigned long long icount_min; 58e74331d6SDarrick J. Wong unsigned long long icount_max; 59ce85a1e0SDarrick J. Wong bool frozen; 60e74331d6SDarrick J. Wong }; 61e74331d6SDarrick J. Wong 6275efa57dSDarrick J. Wong /* 6375efa57dSDarrick J. Wong * Since the expected value computation is lockless but only browses incore 6475efa57dSDarrick J. Wong * values, the percpu counters should be fairly close to each other. However, 6575efa57dSDarrick J. Wong * we'll allow ourselves to be off by at least this (arbitrary) amount. 6675efa57dSDarrick J. Wong */ 6775efa57dSDarrick J. Wong #define XCHK_FSCOUNT_MIN_VARIANCE (512) 6875efa57dSDarrick J. Wong 6975efa57dSDarrick J. Wong /* 7075efa57dSDarrick J. Wong * Make sure the per-AG structure has been initialized from the on-disk header 7175efa57dSDarrick J. Wong * contents and trust that the incore counters match the ondisk counters. (The 7275efa57dSDarrick J. Wong * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the 7375efa57dSDarrick J. Wong * summary counters after checking all AG headers). Do this from the setup 7475efa57dSDarrick J. Wong * function so that the inner AG aggregation loop runs as quickly as possible. 7575efa57dSDarrick J. Wong * 7675efa57dSDarrick J. Wong * This function runs during the setup phase /before/ we start checking any 7775efa57dSDarrick J. Wong * metadata. 7875efa57dSDarrick J. Wong */ 7975efa57dSDarrick J. Wong STATIC int 8075efa57dSDarrick J. Wong xchk_fscount_warmup( 8175efa57dSDarrick J. Wong struct xfs_scrub *sc) 8275efa57dSDarrick J. Wong { 8375efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 8475efa57dSDarrick J. Wong struct xfs_buf *agi_bp = NULL; 8575efa57dSDarrick J. Wong struct xfs_buf *agf_bp = NULL; 8675efa57dSDarrick J. Wong struct xfs_perag *pag = NULL; 8775efa57dSDarrick J. Wong xfs_agnumber_t agno; 8875efa57dSDarrick J. Wong int error = 0; 8975efa57dSDarrick J. Wong 90f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 91f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 92f250eedcSDave Chinner break; 937ac2ff8bSDave Chinner if (xfs_perag_initialised_agi(pag) && 947ac2ff8bSDave Chinner xfs_perag_initialised_agf(pag)) 95f250eedcSDave Chinner continue; 9675efa57dSDarrick J. Wong 9775efa57dSDarrick J. Wong /* Lock both AG headers. */ 9899b13c7fSDave Chinner error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp); 9975efa57dSDarrick J. Wong if (error) 10075efa57dSDarrick J. Wong break; 10108d3e84fSDave Chinner error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp); 10275efa57dSDarrick J. Wong if (error) 10375efa57dSDarrick J. Wong break; 10475efa57dSDarrick J. Wong 10575efa57dSDarrick J. Wong /* 10675efa57dSDarrick J. Wong * These are supposed to be initialized by the header read 10775efa57dSDarrick J. Wong * function. 10875efa57dSDarrick J. Wong */ 1097ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) || 1107ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) { 11175efa57dSDarrick J. Wong error = -EFSCORRUPTED; 11275efa57dSDarrick J. Wong break; 113f250eedcSDave Chinner } 11475efa57dSDarrick J. Wong 11575efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 11675efa57dSDarrick J. Wong agf_bp = NULL; 11775efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 11875efa57dSDarrick J. Wong agi_bp = NULL; 11975efa57dSDarrick J. Wong } 12075efa57dSDarrick J. Wong 12175efa57dSDarrick J. Wong if (agf_bp) 12275efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 12375efa57dSDarrick J. Wong if (agi_bp) 12475efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 12575efa57dSDarrick J. Wong if (pag) 126c4d5660aSDave Chinner xfs_perag_rele(pag); 12775efa57dSDarrick J. Wong return error; 12875efa57dSDarrick J. Wong } 12975efa57dSDarrick J. Wong 130ce85a1e0SDarrick J. Wong static inline int 131ce85a1e0SDarrick J. Wong xchk_fsfreeze( 132ce85a1e0SDarrick J. Wong struct xfs_scrub *sc) 133ce85a1e0SDarrick J. Wong { 134ce85a1e0SDarrick J. Wong int error; 135ce85a1e0SDarrick J. Wong 136ce85a1e0SDarrick J. Wong error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); 137ce85a1e0SDarrick J. Wong trace_xchk_fsfreeze(sc, error); 138ce85a1e0SDarrick J. Wong return error; 139ce85a1e0SDarrick J. Wong } 140ce85a1e0SDarrick J. Wong 141ce85a1e0SDarrick J. Wong static inline int 142ce85a1e0SDarrick J. Wong xchk_fsthaw( 143ce85a1e0SDarrick J. Wong struct xfs_scrub *sc) 144ce85a1e0SDarrick J. Wong { 145ce85a1e0SDarrick J. Wong int error; 146ce85a1e0SDarrick J. Wong 147ce85a1e0SDarrick J. Wong /* This should always succeed, we have a kernel freeze */ 148ce85a1e0SDarrick J. Wong error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL); 149ce85a1e0SDarrick J. Wong trace_xchk_fsthaw(sc, error); 150ce85a1e0SDarrick J. Wong return error; 151ce85a1e0SDarrick J. Wong } 152ce85a1e0SDarrick J. Wong 153ce85a1e0SDarrick J. Wong /* 154ce85a1e0SDarrick J. Wong * We couldn't stabilize the filesystem long enough to sample all the variables 155ce85a1e0SDarrick J. Wong * that comprise the summary counters and compare them to the percpu counters. 156ce85a1e0SDarrick J. Wong * We need to disable all writer threads, which means taking the first two 157ce85a1e0SDarrick J. Wong * freeze levels to put userspace to sleep, and the third freeze level to 158ce85a1e0SDarrick J. Wong * prevent background threads from starting new transactions. Take one level 159ce85a1e0SDarrick J. Wong * more to prevent other callers from unfreezing the filesystem while we run. 160ce85a1e0SDarrick J. Wong */ 161ce85a1e0SDarrick J. Wong STATIC int 162ce85a1e0SDarrick J. Wong xchk_fscounters_freeze( 163ce85a1e0SDarrick J. Wong struct xfs_scrub *sc) 164ce85a1e0SDarrick J. Wong { 165ce85a1e0SDarrick J. Wong struct xchk_fscounters *fsc = sc->buf; 166ce85a1e0SDarrick J. Wong int error = 0; 167ce85a1e0SDarrick J. Wong 168ce85a1e0SDarrick J. Wong if (sc->flags & XCHK_HAVE_FREEZE_PROT) { 169ce85a1e0SDarrick J. Wong sc->flags &= ~XCHK_HAVE_FREEZE_PROT; 170ce85a1e0SDarrick J. Wong mnt_drop_write_file(sc->file); 171ce85a1e0SDarrick J. Wong } 172ce85a1e0SDarrick J. Wong 173ce85a1e0SDarrick J. Wong /* Try to grab a kernel freeze. */ 174ce85a1e0SDarrick J. Wong while ((error = xchk_fsfreeze(sc)) == -EBUSY) { 175ce85a1e0SDarrick J. Wong if (xchk_should_terminate(sc, &error)) 176ce85a1e0SDarrick J. Wong return error; 177ce85a1e0SDarrick J. Wong 178ce85a1e0SDarrick J. Wong delay(HZ / 10); 179ce85a1e0SDarrick J. Wong } 180ce85a1e0SDarrick J. Wong if (error) 181ce85a1e0SDarrick J. Wong return error; 182ce85a1e0SDarrick J. Wong 183ce85a1e0SDarrick J. Wong fsc->frozen = true; 184ce85a1e0SDarrick J. Wong return 0; 185ce85a1e0SDarrick J. Wong } 186ce85a1e0SDarrick J. Wong 187ce85a1e0SDarrick J. Wong /* Thaw the filesystem after checking or repairing fscounters. */ 188ce85a1e0SDarrick J. Wong STATIC void 189ce85a1e0SDarrick J. Wong xchk_fscounters_cleanup( 190ce85a1e0SDarrick J. Wong void *buf) 191ce85a1e0SDarrick J. Wong { 192ce85a1e0SDarrick J. Wong struct xchk_fscounters *fsc = buf; 193ce85a1e0SDarrick J. Wong struct xfs_scrub *sc = fsc->sc; 194ce85a1e0SDarrick J. Wong int error; 195ce85a1e0SDarrick J. Wong 196ce85a1e0SDarrick J. Wong if (!fsc->frozen) 197ce85a1e0SDarrick J. Wong return; 198ce85a1e0SDarrick J. Wong 199ce85a1e0SDarrick J. Wong error = xchk_fsthaw(sc); 200ce85a1e0SDarrick J. Wong if (error) 201ce85a1e0SDarrick J. Wong xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error); 202ce85a1e0SDarrick J. Wong else 203ce85a1e0SDarrick J. Wong fsc->frozen = false; 204ce85a1e0SDarrick J. Wong } 205ce85a1e0SDarrick J. Wong 20675efa57dSDarrick J. Wong int 20775efa57dSDarrick J. Wong xchk_setup_fscounters( 208026f57ebSDarrick J. Wong struct xfs_scrub *sc) 20975efa57dSDarrick J. Wong { 21075efa57dSDarrick J. Wong struct xchk_fscounters *fsc; 21175efa57dSDarrick J. Wong int error; 21275efa57dSDarrick J. Wong 213466c525dSDarrick J. Wong /* 214466c525dSDarrick J. Wong * If the AGF doesn't track btreeblks, we have to lock the AGF to count 215466c525dSDarrick J. Wong * btree block usage by walking the actual btrees. 216466c525dSDarrick J. Wong */ 217466c525dSDarrick J. Wong if (!xfs_has_lazysbcount(sc->mp)) 218466c525dSDarrick J. Wong xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 219466c525dSDarrick J. Wong 220306195f3SDarrick J. Wong sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); 22175efa57dSDarrick J. Wong if (!sc->buf) 22275efa57dSDarrick J. Wong return -ENOMEM; 223ce85a1e0SDarrick J. Wong sc->buf_cleanup = xchk_fscounters_cleanup; 22475efa57dSDarrick J. Wong fsc = sc->buf; 225e74331d6SDarrick J. Wong fsc->sc = sc; 22675efa57dSDarrick J. Wong 22775efa57dSDarrick J. Wong xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max); 22875efa57dSDarrick J. Wong 22975efa57dSDarrick J. Wong /* We must get the incore counters set up before we can proceed. */ 23075efa57dSDarrick J. Wong error = xchk_fscount_warmup(sc); 23175efa57dSDarrick J. Wong if (error) 23275efa57dSDarrick J. Wong return error; 23375efa57dSDarrick J. Wong 234ce85a1e0SDarrick J. Wong /* 235ce85a1e0SDarrick J. Wong * Pause all writer activity in the filesystem while we're scrubbing to 236ce85a1e0SDarrick J. Wong * reduce the likelihood of background perturbations to the counters 237ce85a1e0SDarrick J. Wong * throwing off our calculations. 238ce85a1e0SDarrick J. Wong */ 239ce85a1e0SDarrick J. Wong if (sc->flags & XCHK_TRY_HARDER) { 240ce85a1e0SDarrick J. Wong error = xchk_fscounters_freeze(sc); 241ce85a1e0SDarrick J. Wong if (error) 242ce85a1e0SDarrick J. Wong return error; 243ce85a1e0SDarrick J. Wong } 244ce85a1e0SDarrick J. Wong 245ce85a1e0SDarrick J. Wong return xfs_trans_alloc_empty(sc->mp, &sc->tp); 24675efa57dSDarrick J. Wong } 24775efa57dSDarrick J. Wong 24811f97e68SDarrick J. Wong /* 24911f97e68SDarrick J. Wong * Part 1: Collecting filesystem summary counts. For each AG, we add its 25011f97e68SDarrick J. Wong * summary counts (total inodes, free inodes, free data blocks) to an incore 25111f97e68SDarrick J. Wong * copy of the overall filesystem summary counts. 25211f97e68SDarrick J. Wong * 25311f97e68SDarrick J. Wong * To avoid false corruption reports in part 2, any failure in this part must 25411f97e68SDarrick J. Wong * set the INCOMPLETE flag even when a negative errno is returned. This care 25511f97e68SDarrick J. Wong * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 25611f97e68SDarrick J. Wong * ECANCELED) that are absorbed into a scrub state flag update by 25711f97e68SDarrick J. Wong * xchk_*_process_error. 25811f97e68SDarrick J. Wong */ 25911f97e68SDarrick J. Wong 260e147a756SDarrick J. Wong /* Count free space btree blocks manually for pre-lazysbcount filesystems. */ 261e147a756SDarrick J. Wong static int 262e147a756SDarrick J. Wong xchk_fscount_btreeblks( 263e147a756SDarrick J. Wong struct xfs_scrub *sc, 264e147a756SDarrick J. Wong struct xchk_fscounters *fsc, 265e147a756SDarrick J. Wong xfs_agnumber_t agno) 266e147a756SDarrick J. Wong { 267e147a756SDarrick J. Wong xfs_extlen_t blocks; 268e147a756SDarrick J. Wong int error; 269e147a756SDarrick J. Wong 27048c6615cSDarrick J. Wong error = xchk_ag_init_existing(sc, agno, &sc->sa); 271e147a756SDarrick J. Wong if (error) 27261e0d0ccSDarrick J. Wong goto out_free; 273e147a756SDarrick J. Wong 274e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); 275e147a756SDarrick J. Wong if (error) 276e147a756SDarrick J. Wong goto out_free; 277e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 278e147a756SDarrick J. Wong 279e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); 280e147a756SDarrick J. Wong if (error) 281e147a756SDarrick J. Wong goto out_free; 282e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 283e147a756SDarrick J. Wong 284e147a756SDarrick J. Wong out_free: 285e147a756SDarrick J. Wong xchk_ag_free(sc, &sc->sa); 286e147a756SDarrick J. Wong return error; 287e147a756SDarrick J. Wong } 288e147a756SDarrick J. Wong 28975efa57dSDarrick J. Wong /* 29075efa57dSDarrick J. Wong * Calculate what the global in-core counters ought to be from the incore 29175efa57dSDarrick J. Wong * per-AG structure. Callers can compare this to the actual in-core counters 29275efa57dSDarrick J. Wong * to estimate by how much both in-core and on-disk counters need to be 29375efa57dSDarrick J. Wong * adjusted. 29475efa57dSDarrick J. Wong */ 29575efa57dSDarrick J. Wong STATIC int 29675efa57dSDarrick J. Wong xchk_fscount_aggregate_agcounts( 29775efa57dSDarrick J. Wong struct xfs_scrub *sc, 29875efa57dSDarrick J. Wong struct xchk_fscounters *fsc) 29975efa57dSDarrick J. Wong { 30075efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 30175efa57dSDarrick J. Wong struct xfs_perag *pag; 30275efa57dSDarrick J. Wong uint64_t delayed; 30375efa57dSDarrick J. Wong xfs_agnumber_t agno; 30475efa57dSDarrick J. Wong int tries = 8; 3058ef34723SDarrick J. Wong int error = 0; 30675efa57dSDarrick J. Wong 30775efa57dSDarrick J. Wong retry: 30875efa57dSDarrick J. Wong fsc->icount = 0; 30975efa57dSDarrick J. Wong fsc->ifree = 0; 31075efa57dSDarrick J. Wong fsc->fdblocks = 0; 31175efa57dSDarrick J. Wong 312f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 313f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 314f250eedcSDave Chinner break; 31575efa57dSDarrick J. Wong 31675efa57dSDarrick J. Wong /* This somehow got unset since the warmup? */ 3177ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) || 3187ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) { 319f250eedcSDave Chinner error = -EFSCORRUPTED; 320f250eedcSDave Chinner break; 32175efa57dSDarrick J. Wong } 32275efa57dSDarrick J. Wong 32375efa57dSDarrick J. Wong /* Count all the inodes */ 32475efa57dSDarrick J. Wong fsc->icount += pag->pagi_count; 32575efa57dSDarrick J. Wong fsc->ifree += pag->pagi_freecount; 32675efa57dSDarrick J. Wong 32775efa57dSDarrick J. Wong /* Add up the free/freelist/bnobt/cntbt blocks */ 32875efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_freeblks; 32975efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_flcount; 330ebd9027dSDave Chinner if (xfs_has_lazysbcount(sc->mp)) { 33175efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_btreeblks; 332e147a756SDarrick J. Wong } else { 333e147a756SDarrick J. Wong error = xchk_fscount_btreeblks(sc, fsc, agno); 334f250eedcSDave Chinner if (error) 335e147a756SDarrick J. Wong break; 336e147a756SDarrick J. Wong } 33775efa57dSDarrick J. Wong 33875efa57dSDarrick J. Wong /* 33975efa57dSDarrick J. Wong * Per-AG reservations are taken out of the incore counters, 34075efa57dSDarrick J. Wong * so they must be left out of the free blocks computation. 34175efa57dSDarrick J. Wong */ 34275efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; 34375efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; 34475efa57dSDarrick J. Wong 34575efa57dSDarrick J. Wong } 346f250eedcSDave Chinner if (pag) 347c4d5660aSDave Chinner xfs_perag_rele(pag); 34811f97e68SDarrick J. Wong if (error) { 34911f97e68SDarrick J. Wong xchk_set_incomplete(sc); 3508ef34723SDarrick J. Wong return error; 35111f97e68SDarrick J. Wong } 3528ef34723SDarrick J. Wong 35375efa57dSDarrick J. Wong /* 35475efa57dSDarrick J. Wong * The global incore space reservation is taken from the incore 35575efa57dSDarrick J. Wong * counters, so leave that out of the computation. 35675efa57dSDarrick J. Wong */ 35775efa57dSDarrick J. Wong fsc->fdblocks -= mp->m_resblks_avail; 35875efa57dSDarrick J. Wong 35975efa57dSDarrick J. Wong /* 36075efa57dSDarrick J. Wong * Delayed allocation reservations are taken out of the incore counters 36175efa57dSDarrick J. Wong * but not recorded on disk, so leave them and their indlen blocks out 36275efa57dSDarrick J. Wong * of the computation. 36375efa57dSDarrick J. Wong */ 36475efa57dSDarrick J. Wong delayed = percpu_counter_sum(&mp->m_delalloc_blks); 36575efa57dSDarrick J. Wong fsc->fdblocks -= delayed; 36675efa57dSDarrick J. Wong 36775efa57dSDarrick J. Wong trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks, 36875efa57dSDarrick J. Wong delayed); 36975efa57dSDarrick J. Wong 37075efa57dSDarrick J. Wong 37175efa57dSDarrick J. Wong /* Bail out if the values we compute are totally nonsense. */ 37275efa57dSDarrick J. Wong if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max || 37375efa57dSDarrick J. Wong fsc->fdblocks > mp->m_sb.sb_dblocks || 37475efa57dSDarrick J. Wong fsc->ifree > fsc->icount_max) 37575efa57dSDarrick J. Wong return -EFSCORRUPTED; 37675efa57dSDarrick J. Wong 37775efa57dSDarrick J. Wong /* 37875efa57dSDarrick J. Wong * If ifree > icount then we probably had some perturbation in the 37975efa57dSDarrick J. Wong * counters while we were calculating things. We'll try a few times 38075efa57dSDarrick J. Wong * to maintain ifree <= icount before giving up. 38175efa57dSDarrick J. Wong */ 38275efa57dSDarrick J. Wong if (fsc->ifree > fsc->icount) { 38375efa57dSDarrick J. Wong if (tries--) 38475efa57dSDarrick J. Wong goto retry; 385ce85a1e0SDarrick J. Wong return -EDEADLOCK; 38675efa57dSDarrick J. Wong } 38775efa57dSDarrick J. Wong 38875efa57dSDarrick J. Wong return 0; 38975efa57dSDarrick J. Wong } 39075efa57dSDarrick J. Wong 391e74331d6SDarrick J. Wong #ifdef CONFIG_XFS_RT 392e74331d6SDarrick J. Wong STATIC int 393e74331d6SDarrick J. Wong xchk_fscount_add_frextent( 394e74331d6SDarrick J. Wong struct xfs_mount *mp, 395e74331d6SDarrick J. Wong struct xfs_trans *tp, 396e74331d6SDarrick J. Wong const struct xfs_rtalloc_rec *rec, 397e74331d6SDarrick J. Wong void *priv) 398e74331d6SDarrick J. Wong { 399e74331d6SDarrick J. Wong struct xchk_fscounters *fsc = priv; 400e74331d6SDarrick J. Wong int error = 0; 401e74331d6SDarrick J. Wong 402e74331d6SDarrick J. Wong fsc->frextents += rec->ar_extcount; 403e74331d6SDarrick J. Wong 404e74331d6SDarrick J. Wong xchk_should_terminate(fsc->sc, &error); 405e74331d6SDarrick J. Wong return error; 406e74331d6SDarrick J. Wong } 407e74331d6SDarrick J. Wong 408e74331d6SDarrick J. Wong /* Calculate the number of free realtime extents from the realtime bitmap. */ 409e74331d6SDarrick J. Wong STATIC int 410e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 411e74331d6SDarrick J. Wong struct xfs_scrub *sc, 412e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 413e74331d6SDarrick J. Wong { 414e74331d6SDarrick J. Wong struct xfs_mount *mp = sc->mp; 415e74331d6SDarrick J. Wong int error; 416e74331d6SDarrick J. Wong 417e74331d6SDarrick J. Wong fsc->frextents = 0; 418e74331d6SDarrick J. Wong if (!xfs_has_realtime(mp)) 419e74331d6SDarrick J. Wong return 0; 420e74331d6SDarrick J. Wong 421e74331d6SDarrick J. Wong xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 422e74331d6SDarrick J. Wong error = xfs_rtalloc_query_all(sc->mp, sc->tp, 423e74331d6SDarrick J. Wong xchk_fscount_add_frextent, fsc); 424e74331d6SDarrick J. Wong if (error) { 425e74331d6SDarrick J. Wong xchk_set_incomplete(sc); 426e74331d6SDarrick J. Wong goto out_unlock; 427e74331d6SDarrick J. Wong } 428e74331d6SDarrick J. Wong 429e74331d6SDarrick J. Wong out_unlock: 430e74331d6SDarrick J. Wong xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 431e74331d6SDarrick J. Wong return error; 432e74331d6SDarrick J. Wong } 433e74331d6SDarrick J. Wong #else 434e74331d6SDarrick J. Wong STATIC int 435e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 436e74331d6SDarrick J. Wong struct xfs_scrub *sc, 437e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 438e74331d6SDarrick J. Wong { 439e74331d6SDarrick J. Wong fsc->frextents = 0; 440e74331d6SDarrick J. Wong return 0; 441e74331d6SDarrick J. Wong } 442e74331d6SDarrick J. Wong #endif /* CONFIG_XFS_RT */ 443e74331d6SDarrick J. Wong 44475efa57dSDarrick J. Wong /* 44511f97e68SDarrick J. Wong * Part 2: Comparing filesystem summary counters. All we have to do here is 44611f97e68SDarrick J. Wong * sum the percpu counters and compare them to what we've observed. 44711f97e68SDarrick J. Wong */ 44811f97e68SDarrick J. Wong 44911f97e68SDarrick J. Wong /* 45075efa57dSDarrick J. Wong * Is the @counter reasonably close to the @expected value? 45175efa57dSDarrick J. Wong * 45275efa57dSDarrick J. Wong * We neither locked nor froze anything in the filesystem while aggregating the 45375efa57dSDarrick J. Wong * per-AG data to compute the @expected value, which means that the counter 45475efa57dSDarrick J. Wong * could have changed. We know the @old_value of the summation of the counter 45575efa57dSDarrick J. Wong * before the aggregation, and we re-sum the counter now. If the expected 45675efa57dSDarrick J. Wong * value falls between the two summations, we're ok. 45775efa57dSDarrick J. Wong * 45875efa57dSDarrick J. Wong * Otherwise, we /might/ have a problem. If the change in the summations is 45975efa57dSDarrick J. Wong * more than we want to tolerate, the filesystem is probably busy and we should 46075efa57dSDarrick J. Wong * just send back INCOMPLETE and see if userspace will try again. 461ce85a1e0SDarrick J. Wong * 462ce85a1e0SDarrick J. Wong * If we're repairing then we require an exact match. 46375efa57dSDarrick J. Wong */ 46475efa57dSDarrick J. Wong static inline bool 46575efa57dSDarrick J. Wong xchk_fscount_within_range( 46675efa57dSDarrick J. Wong struct xfs_scrub *sc, 46775efa57dSDarrick J. Wong const int64_t old_value, 46875efa57dSDarrick J. Wong struct percpu_counter *counter, 46975efa57dSDarrick J. Wong uint64_t expected) 47075efa57dSDarrick J. Wong { 47175efa57dSDarrick J. Wong int64_t min_value, max_value; 47275efa57dSDarrick J. Wong int64_t curr_value = percpu_counter_sum(counter); 47375efa57dSDarrick J. Wong 47475efa57dSDarrick J. Wong trace_xchk_fscounters_within_range(sc->mp, expected, curr_value, 47575efa57dSDarrick J. Wong old_value); 47675efa57dSDarrick J. Wong 47775efa57dSDarrick J. Wong /* Negative values are always wrong. */ 47875efa57dSDarrick J. Wong if (curr_value < 0) 47975efa57dSDarrick J. Wong return false; 48075efa57dSDarrick J. Wong 48175efa57dSDarrick J. Wong /* Exact matches are always ok. */ 48275efa57dSDarrick J. Wong if (curr_value == expected) 48375efa57dSDarrick J. Wong return true; 48475efa57dSDarrick J. Wong 48575efa57dSDarrick J. Wong min_value = min(old_value, curr_value); 48675efa57dSDarrick J. Wong max_value = max(old_value, curr_value); 48775efa57dSDarrick J. Wong 48875efa57dSDarrick J. Wong /* Within the before-and-after range is ok. */ 48975efa57dSDarrick J. Wong if (expected >= min_value && expected <= max_value) 49075efa57dSDarrick J. Wong return true; 49175efa57dSDarrick J. Wong 492ce85a1e0SDarrick J. Wong /* Everything else is bad. */ 49375efa57dSDarrick J. Wong return false; 49475efa57dSDarrick J. Wong } 49575efa57dSDarrick J. Wong 49675efa57dSDarrick J. Wong /* Check the superblock counters. */ 49775efa57dSDarrick J. Wong int 49875efa57dSDarrick J. Wong xchk_fscounters( 49975efa57dSDarrick J. Wong struct xfs_scrub *sc) 50075efa57dSDarrick J. Wong { 50175efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 50275efa57dSDarrick J. Wong struct xchk_fscounters *fsc = sc->buf; 503e74331d6SDarrick J. Wong int64_t icount, ifree, fdblocks, frextents; 504ce85a1e0SDarrick J. Wong bool try_again = false; 50575efa57dSDarrick J. Wong int error; 50675efa57dSDarrick J. Wong 50775efa57dSDarrick J. Wong /* Snapshot the percpu counters. */ 50875efa57dSDarrick J. Wong icount = percpu_counter_sum(&mp->m_icount); 50975efa57dSDarrick J. Wong ifree = percpu_counter_sum(&mp->m_ifree); 51075efa57dSDarrick J. Wong fdblocks = percpu_counter_sum(&mp->m_fdblocks); 511e74331d6SDarrick J. Wong frextents = percpu_counter_sum(&mp->m_frextents); 51275efa57dSDarrick J. Wong 51375efa57dSDarrick J. Wong /* No negative values, please! */ 514ce85a1e0SDarrick J. Wong if (icount < 0 || ifree < 0) 51575efa57dSDarrick J. Wong xchk_set_corrupt(sc); 51675efa57dSDarrick J. Wong 517ce85a1e0SDarrick J. Wong /* 518ce85a1e0SDarrick J. Wong * If the filesystem is not frozen, the counter summation calls above 519ce85a1e0SDarrick J. Wong * can race with xfs_mod_freecounter, which subtracts a requested space 520ce85a1e0SDarrick J. Wong * reservation from the counter and undoes the subtraction if that made 521ce85a1e0SDarrick J. Wong * the counter go negative. Therefore, it's possible to see negative 522ce85a1e0SDarrick J. Wong * values here, and we should only flag that as a corruption if we 523ce85a1e0SDarrick J. Wong * froze the fs. This is much more likely to happen with frextents 524ce85a1e0SDarrick J. Wong * since there are no reserved pools. 525ce85a1e0SDarrick J. Wong */ 526ce85a1e0SDarrick J. Wong if (fdblocks < 0 || frextents < 0) { 527ce85a1e0SDarrick J. Wong if (!fsc->frozen) 528ce85a1e0SDarrick J. Wong return -EDEADLOCK; 529ce85a1e0SDarrick J. Wong 530ce85a1e0SDarrick J. Wong xchk_set_corrupt(sc); 531ce85a1e0SDarrick J. Wong return 0; 532ce85a1e0SDarrick J. Wong } 533ce85a1e0SDarrick J. Wong 53475efa57dSDarrick J. Wong /* See if icount is obviously wrong. */ 53575efa57dSDarrick J. Wong if (icount < fsc->icount_min || icount > fsc->icount_max) 53675efa57dSDarrick J. Wong xchk_set_corrupt(sc); 53775efa57dSDarrick J. Wong 53875efa57dSDarrick J. Wong /* See if fdblocks is obviously wrong. */ 53975efa57dSDarrick J. Wong if (fdblocks > mp->m_sb.sb_dblocks) 54075efa57dSDarrick J. Wong xchk_set_corrupt(sc); 54175efa57dSDarrick J. Wong 542e74331d6SDarrick J. Wong /* See if frextents is obviously wrong. */ 543e74331d6SDarrick J. Wong if (frextents > mp->m_sb.sb_rextents) 544e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 545e74331d6SDarrick J. Wong 54675efa57dSDarrick J. Wong /* 54775efa57dSDarrick J. Wong * If ifree exceeds icount by more than the minimum variance then 54875efa57dSDarrick J. Wong * something's probably wrong with the counters. 54975efa57dSDarrick J. Wong */ 55075efa57dSDarrick J. Wong if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE) 55175efa57dSDarrick J. Wong xchk_set_corrupt(sc); 55275efa57dSDarrick J. Wong 55375efa57dSDarrick J. Wong /* Walk the incore AG headers to calculate the expected counters. */ 55475efa57dSDarrick J. Wong error = xchk_fscount_aggregate_agcounts(sc, fsc); 55575efa57dSDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 55675efa57dSDarrick J. Wong return error; 55775efa57dSDarrick J. Wong 558e74331d6SDarrick J. Wong /* Count the free extents counter for rt volumes. */ 559e74331d6SDarrick J. Wong error = xchk_fscount_count_frextents(sc, fsc); 560e74331d6SDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 561e74331d6SDarrick J. Wong return error; 562e74331d6SDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 563e74331d6SDarrick J. Wong return 0; 564e74331d6SDarrick J. Wong 565ce85a1e0SDarrick J. Wong /* 566ce85a1e0SDarrick J. Wong * Compare the in-core counters with whatever we counted. If the fs is 567ce85a1e0SDarrick J. Wong * frozen, we treat the discrepancy as a corruption because the freeze 568ce85a1e0SDarrick J. Wong * should have stabilized the counter values. Otherwise, we need 569ce85a1e0SDarrick J. Wong * userspace to call us back having granted us freeze permission. 570ce85a1e0SDarrick J. Wong */ 571ce85a1e0SDarrick J. Wong if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, 572ce85a1e0SDarrick J. Wong fsc->icount)) { 573ce85a1e0SDarrick J. Wong if (fsc->frozen) 57475efa57dSDarrick J. Wong xchk_set_corrupt(sc); 575ce85a1e0SDarrick J. Wong else 576ce85a1e0SDarrick J. Wong try_again = true; 577ce85a1e0SDarrick J. Wong } 57875efa57dSDarrick J. Wong 579ce85a1e0SDarrick J. Wong if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) { 580ce85a1e0SDarrick J. Wong if (fsc->frozen) 58175efa57dSDarrick J. Wong xchk_set_corrupt(sc); 582ce85a1e0SDarrick J. Wong else 583ce85a1e0SDarrick J. Wong try_again = true; 584ce85a1e0SDarrick J. Wong } 58575efa57dSDarrick J. Wong 58675efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks, 587ce85a1e0SDarrick J. Wong fsc->fdblocks)) { 588ce85a1e0SDarrick J. Wong if (fsc->frozen) 58975efa57dSDarrick J. Wong xchk_set_corrupt(sc); 590ce85a1e0SDarrick J. Wong else 591ce85a1e0SDarrick J. Wong try_again = true; 592ce85a1e0SDarrick J. Wong } 59375efa57dSDarrick J. Wong 594e74331d6SDarrick J. Wong if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, 595ce85a1e0SDarrick J. Wong fsc->frextents)) { 596ce85a1e0SDarrick J. Wong if (fsc->frozen) 597e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 598ce85a1e0SDarrick J. Wong else 599ce85a1e0SDarrick J. Wong try_again = true; 600ce85a1e0SDarrick J. Wong } 601ce85a1e0SDarrick J. Wong 602ce85a1e0SDarrick J. Wong if (try_again) 603ce85a1e0SDarrick J. Wong return -EDEADLOCK; 604e74331d6SDarrick J. Wong 60575efa57dSDarrick J. Wong return 0; 60675efa57dSDarrick J. Wong } 607