175efa57dSDarrick J. Wong // SPDX-License-Identifier: GPL-2.0+ 275efa57dSDarrick J. Wong /* 375efa57dSDarrick J. Wong * Copyright (C) 2019 Oracle. All Rights Reserved. 475efa57dSDarrick J. Wong * Author: Darrick J. Wong <darrick.wong@oracle.com> 575efa57dSDarrick J. Wong */ 675efa57dSDarrick J. Wong #include "xfs.h" 775efa57dSDarrick J. Wong #include "xfs_fs.h" 875efa57dSDarrick J. Wong #include "xfs_shared.h" 975efa57dSDarrick J. Wong #include "xfs_format.h" 1075efa57dSDarrick J. Wong #include "xfs_trans_resv.h" 1175efa57dSDarrick J. Wong #include "xfs_mount.h" 1275efa57dSDarrick J. Wong #include "xfs_alloc.h" 1375efa57dSDarrick J. Wong #include "xfs_ialloc.h" 1475efa57dSDarrick J. Wong #include "xfs_health.h" 15e147a756SDarrick J. Wong #include "xfs_btree.h" 169bbafc71SDave Chinner #include "xfs_ag.h" 17*e74331d6SDarrick J. Wong #include "xfs_rtalloc.h" 18*e74331d6SDarrick J. Wong #include "xfs_inode.h" 1975efa57dSDarrick J. Wong #include "scrub/scrub.h" 2075efa57dSDarrick J. Wong #include "scrub/common.h" 2175efa57dSDarrick J. Wong #include "scrub/trace.h" 2275efa57dSDarrick J. Wong 2375efa57dSDarrick J. Wong /* 2475efa57dSDarrick J. Wong * FS Summary Counters 2575efa57dSDarrick J. Wong * =================== 2675efa57dSDarrick J. Wong * 2775efa57dSDarrick J. Wong * The basics of filesystem summary counter checking are that we iterate the 2875efa57dSDarrick J. Wong * AGs counting the number of free blocks, free space btree blocks, per-AG 2975efa57dSDarrick J. Wong * reservations, inodes, delayed allocation reservations, and free inodes. 3075efa57dSDarrick J. Wong * Then we compare what we computed against the in-core counters. 3175efa57dSDarrick J. Wong * 3275efa57dSDarrick J. Wong * However, the reality is that summary counters are a tricky beast to check. 3375efa57dSDarrick J. Wong * While we /could/ freeze the filesystem and scramble around the AGs counting 3475efa57dSDarrick J. Wong * the free blocks, in practice we prefer not do that for a scan because 3575efa57dSDarrick J. Wong * freezing is costly. To get around this, we added a per-cpu counter of the 3675efa57dSDarrick J. Wong * delalloc reservations so that we can rotor around the AGs relatively 3775efa57dSDarrick J. Wong * quickly, and we allow the counts to be slightly off because we're not taking 3875efa57dSDarrick J. Wong * any locks while we do this. 3975efa57dSDarrick J. Wong * 4075efa57dSDarrick J. Wong * So the first thing we do is warm up the buffer cache in the setup routine by 4175efa57dSDarrick J. Wong * walking all the AGs to make sure the incore per-AG structure has been 4275efa57dSDarrick J. Wong * initialized. The expected value calculation then iterates the incore per-AG 4375efa57dSDarrick J. Wong * structures as quickly as it can. We snapshot the percpu counters before and 4475efa57dSDarrick J. Wong * after this operation and use the difference in counter values to guess at 4575efa57dSDarrick J. Wong * our tolerance for mismatch between expected and actual counter values. 4675efa57dSDarrick J. Wong */ 4775efa57dSDarrick J. Wong 48*e74331d6SDarrick J. Wong struct xchk_fscounters { 49*e74331d6SDarrick J. Wong struct xfs_scrub *sc; 50*e74331d6SDarrick J. Wong uint64_t icount; 51*e74331d6SDarrick J. Wong uint64_t ifree; 52*e74331d6SDarrick J. Wong uint64_t fdblocks; 53*e74331d6SDarrick J. Wong uint64_t frextents; 54*e74331d6SDarrick J. Wong unsigned long long icount_min; 55*e74331d6SDarrick J. Wong unsigned long long icount_max; 56*e74331d6SDarrick J. Wong }; 57*e74331d6SDarrick J. Wong 5875efa57dSDarrick J. Wong /* 5975efa57dSDarrick J. Wong * Since the expected value computation is lockless but only browses incore 6075efa57dSDarrick J. Wong * values, the percpu counters should be fairly close to each other. However, 6175efa57dSDarrick J. Wong * we'll allow ourselves to be off by at least this (arbitrary) amount. 6275efa57dSDarrick J. Wong */ 6375efa57dSDarrick J. Wong #define XCHK_FSCOUNT_MIN_VARIANCE (512) 6475efa57dSDarrick J. Wong 6575efa57dSDarrick J. Wong /* 6675efa57dSDarrick J. Wong * Make sure the per-AG structure has been initialized from the on-disk header 6775efa57dSDarrick J. Wong * contents and trust that the incore counters match the ondisk counters. (The 6875efa57dSDarrick J. Wong * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the 6975efa57dSDarrick J. Wong * summary counters after checking all AG headers). Do this from the setup 7075efa57dSDarrick J. Wong * function so that the inner AG aggregation loop runs as quickly as possible. 7175efa57dSDarrick J. Wong * 7275efa57dSDarrick J. Wong * This function runs during the setup phase /before/ we start checking any 7375efa57dSDarrick J. Wong * metadata. 7475efa57dSDarrick J. Wong */ 7575efa57dSDarrick J. Wong STATIC int 7675efa57dSDarrick J. Wong xchk_fscount_warmup( 7775efa57dSDarrick J. Wong struct xfs_scrub *sc) 7875efa57dSDarrick J. Wong { 7975efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 8075efa57dSDarrick J. Wong struct xfs_buf *agi_bp = NULL; 8175efa57dSDarrick J. Wong struct xfs_buf *agf_bp = NULL; 8275efa57dSDarrick J. Wong struct xfs_perag *pag = NULL; 8375efa57dSDarrick J. Wong xfs_agnumber_t agno; 8475efa57dSDarrick J. Wong int error = 0; 8575efa57dSDarrick J. Wong 86f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 87f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 88f250eedcSDave Chinner break; 8975efa57dSDarrick J. Wong if (pag->pagi_init && pag->pagf_init) 90f250eedcSDave Chinner continue; 9175efa57dSDarrick J. Wong 9275efa57dSDarrick J. Wong /* Lock both AG headers. */ 9399b13c7fSDave Chinner error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp); 9475efa57dSDarrick J. Wong if (error) 9575efa57dSDarrick J. Wong break; 9608d3e84fSDave Chinner error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp); 9775efa57dSDarrick J. Wong if (error) 9875efa57dSDarrick J. Wong break; 9975efa57dSDarrick J. Wong 10075efa57dSDarrick J. Wong /* 10175efa57dSDarrick J. Wong * These are supposed to be initialized by the header read 10275efa57dSDarrick J. Wong * function. 10375efa57dSDarrick J. Wong */ 104f250eedcSDave Chinner if (!pag->pagi_init || !pag->pagf_init) { 10575efa57dSDarrick J. Wong error = -EFSCORRUPTED; 10675efa57dSDarrick J. Wong break; 107f250eedcSDave Chinner } 10875efa57dSDarrick J. Wong 10975efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 11075efa57dSDarrick J. Wong agf_bp = NULL; 11175efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 11275efa57dSDarrick J. Wong agi_bp = NULL; 11375efa57dSDarrick J. Wong } 11475efa57dSDarrick J. Wong 11575efa57dSDarrick J. Wong if (agf_bp) 11675efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 11775efa57dSDarrick J. Wong if (agi_bp) 11875efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 11975efa57dSDarrick J. Wong if (pag) 12075efa57dSDarrick J. Wong xfs_perag_put(pag); 12175efa57dSDarrick J. Wong return error; 12275efa57dSDarrick J. Wong } 12375efa57dSDarrick J. Wong 12475efa57dSDarrick J. Wong int 12575efa57dSDarrick J. Wong xchk_setup_fscounters( 126026f57ebSDarrick J. Wong struct xfs_scrub *sc) 12775efa57dSDarrick J. Wong { 12875efa57dSDarrick J. Wong struct xchk_fscounters *fsc; 12975efa57dSDarrick J. Wong int error; 13075efa57dSDarrick J. Wong 131306195f3SDarrick J. Wong sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); 13275efa57dSDarrick J. Wong if (!sc->buf) 13375efa57dSDarrick J. Wong return -ENOMEM; 13475efa57dSDarrick J. Wong fsc = sc->buf; 135*e74331d6SDarrick J. Wong fsc->sc = sc; 13675efa57dSDarrick J. Wong 13775efa57dSDarrick J. Wong xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max); 13875efa57dSDarrick J. Wong 13975efa57dSDarrick J. Wong /* We must get the incore counters set up before we can proceed. */ 14075efa57dSDarrick J. Wong error = xchk_fscount_warmup(sc); 14175efa57dSDarrick J. Wong if (error) 14275efa57dSDarrick J. Wong return error; 14375efa57dSDarrick J. Wong 14475efa57dSDarrick J. Wong /* 14575efa57dSDarrick J. Wong * Pause background reclaim while we're scrubbing to reduce the 14675efa57dSDarrick J. Wong * likelihood of background perturbations to the counters throwing off 14775efa57dSDarrick J. Wong * our calculations. 14875efa57dSDarrick J. Wong */ 14975efa57dSDarrick J. Wong xchk_stop_reaping(sc); 15075efa57dSDarrick J. Wong 15175efa57dSDarrick J. Wong return xchk_trans_alloc(sc, 0); 15275efa57dSDarrick J. Wong } 15375efa57dSDarrick J. Wong 15411f97e68SDarrick J. Wong /* 15511f97e68SDarrick J. Wong * Part 1: Collecting filesystem summary counts. For each AG, we add its 15611f97e68SDarrick J. Wong * summary counts (total inodes, free inodes, free data blocks) to an incore 15711f97e68SDarrick J. Wong * copy of the overall filesystem summary counts. 15811f97e68SDarrick J. Wong * 15911f97e68SDarrick J. Wong * To avoid false corruption reports in part 2, any failure in this part must 16011f97e68SDarrick J. Wong * set the INCOMPLETE flag even when a negative errno is returned. This care 16111f97e68SDarrick J. Wong * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 16211f97e68SDarrick J. Wong * ECANCELED) that are absorbed into a scrub state flag update by 16311f97e68SDarrick J. Wong * xchk_*_process_error. 16411f97e68SDarrick J. Wong */ 16511f97e68SDarrick J. Wong 166e147a756SDarrick J. Wong /* Count free space btree blocks manually for pre-lazysbcount filesystems. */ 167e147a756SDarrick J. Wong static int 168e147a756SDarrick J. Wong xchk_fscount_btreeblks( 169e147a756SDarrick J. Wong struct xfs_scrub *sc, 170e147a756SDarrick J. Wong struct xchk_fscounters *fsc, 171e147a756SDarrick J. Wong xfs_agnumber_t agno) 172e147a756SDarrick J. Wong { 173e147a756SDarrick J. Wong xfs_extlen_t blocks; 174e147a756SDarrick J. Wong int error; 175e147a756SDarrick J. Wong 17648c6615cSDarrick J. Wong error = xchk_ag_init_existing(sc, agno, &sc->sa); 177e147a756SDarrick J. Wong if (error) 17861e0d0ccSDarrick J. Wong goto out_free; 179e147a756SDarrick J. Wong 180e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); 181e147a756SDarrick J. Wong if (error) 182e147a756SDarrick J. Wong goto out_free; 183e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 184e147a756SDarrick J. Wong 185e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); 186e147a756SDarrick J. Wong if (error) 187e147a756SDarrick J. Wong goto out_free; 188e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 189e147a756SDarrick J. Wong 190e147a756SDarrick J. Wong out_free: 191e147a756SDarrick J. Wong xchk_ag_free(sc, &sc->sa); 192e147a756SDarrick J. Wong return error; 193e147a756SDarrick J. Wong } 194e147a756SDarrick J. Wong 19575efa57dSDarrick J. Wong /* 19675efa57dSDarrick J. Wong * Calculate what the global in-core counters ought to be from the incore 19775efa57dSDarrick J. Wong * per-AG structure. Callers can compare this to the actual in-core counters 19875efa57dSDarrick J. Wong * to estimate by how much both in-core and on-disk counters need to be 19975efa57dSDarrick J. Wong * adjusted. 20075efa57dSDarrick J. Wong */ 20175efa57dSDarrick J. Wong STATIC int 20275efa57dSDarrick J. Wong xchk_fscount_aggregate_agcounts( 20375efa57dSDarrick J. Wong struct xfs_scrub *sc, 20475efa57dSDarrick J. Wong struct xchk_fscounters *fsc) 20575efa57dSDarrick J. Wong { 20675efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 20775efa57dSDarrick J. Wong struct xfs_perag *pag; 20875efa57dSDarrick J. Wong uint64_t delayed; 20975efa57dSDarrick J. Wong xfs_agnumber_t agno; 21075efa57dSDarrick J. Wong int tries = 8; 2118ef34723SDarrick J. Wong int error = 0; 21275efa57dSDarrick J. Wong 21375efa57dSDarrick J. Wong retry: 21475efa57dSDarrick J. Wong fsc->icount = 0; 21575efa57dSDarrick J. Wong fsc->ifree = 0; 21675efa57dSDarrick J. Wong fsc->fdblocks = 0; 21775efa57dSDarrick J. Wong 218f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 219f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 220f250eedcSDave Chinner break; 22175efa57dSDarrick J. Wong 22275efa57dSDarrick J. Wong /* This somehow got unset since the warmup? */ 22375efa57dSDarrick J. Wong if (!pag->pagi_init || !pag->pagf_init) { 224f250eedcSDave Chinner error = -EFSCORRUPTED; 225f250eedcSDave Chinner break; 22675efa57dSDarrick J. Wong } 22775efa57dSDarrick J. Wong 22875efa57dSDarrick J. Wong /* Count all the inodes */ 22975efa57dSDarrick J. Wong fsc->icount += pag->pagi_count; 23075efa57dSDarrick J. Wong fsc->ifree += pag->pagi_freecount; 23175efa57dSDarrick J. Wong 23275efa57dSDarrick J. Wong /* Add up the free/freelist/bnobt/cntbt blocks */ 23375efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_freeblks; 23475efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_flcount; 235ebd9027dSDave Chinner if (xfs_has_lazysbcount(sc->mp)) { 23675efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_btreeblks; 237e147a756SDarrick J. Wong } else { 238e147a756SDarrick J. Wong error = xchk_fscount_btreeblks(sc, fsc, agno); 239f250eedcSDave Chinner if (error) 240e147a756SDarrick J. Wong break; 241e147a756SDarrick J. Wong } 24275efa57dSDarrick J. Wong 24375efa57dSDarrick J. Wong /* 24475efa57dSDarrick J. Wong * Per-AG reservations are taken out of the incore counters, 24575efa57dSDarrick J. Wong * so they must be left out of the free blocks computation. 24675efa57dSDarrick J. Wong */ 24775efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; 24875efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; 24975efa57dSDarrick J. Wong 25075efa57dSDarrick J. Wong } 251f250eedcSDave Chinner if (pag) 252f250eedcSDave Chinner xfs_perag_put(pag); 25311f97e68SDarrick J. Wong if (error) { 25411f97e68SDarrick J. Wong xchk_set_incomplete(sc); 2558ef34723SDarrick J. Wong return error; 25611f97e68SDarrick J. Wong } 2578ef34723SDarrick J. Wong 25875efa57dSDarrick J. Wong /* 25975efa57dSDarrick J. Wong * The global incore space reservation is taken from the incore 26075efa57dSDarrick J. Wong * counters, so leave that out of the computation. 26175efa57dSDarrick J. Wong */ 26275efa57dSDarrick J. Wong fsc->fdblocks -= mp->m_resblks_avail; 26375efa57dSDarrick J. Wong 26475efa57dSDarrick J. Wong /* 26575efa57dSDarrick J. Wong * Delayed allocation reservations are taken out of the incore counters 26675efa57dSDarrick J. Wong * but not recorded on disk, so leave them and their indlen blocks out 26775efa57dSDarrick J. Wong * of the computation. 26875efa57dSDarrick J. Wong */ 26975efa57dSDarrick J. Wong delayed = percpu_counter_sum(&mp->m_delalloc_blks); 27075efa57dSDarrick J. Wong fsc->fdblocks -= delayed; 27175efa57dSDarrick J. Wong 27275efa57dSDarrick J. Wong trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks, 27375efa57dSDarrick J. Wong delayed); 27475efa57dSDarrick J. Wong 27575efa57dSDarrick J. Wong 27675efa57dSDarrick J. Wong /* Bail out if the values we compute are totally nonsense. */ 27775efa57dSDarrick J. Wong if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max || 27875efa57dSDarrick J. Wong fsc->fdblocks > mp->m_sb.sb_dblocks || 27975efa57dSDarrick J. Wong fsc->ifree > fsc->icount_max) 28075efa57dSDarrick J. Wong return -EFSCORRUPTED; 28175efa57dSDarrick J. Wong 28275efa57dSDarrick J. Wong /* 28375efa57dSDarrick J. Wong * If ifree > icount then we probably had some perturbation in the 28475efa57dSDarrick J. Wong * counters while we were calculating things. We'll try a few times 28575efa57dSDarrick J. Wong * to maintain ifree <= icount before giving up. 28675efa57dSDarrick J. Wong */ 28775efa57dSDarrick J. Wong if (fsc->ifree > fsc->icount) { 28875efa57dSDarrick J. Wong if (tries--) 28975efa57dSDarrick J. Wong goto retry; 29075efa57dSDarrick J. Wong xchk_set_incomplete(sc); 29175efa57dSDarrick J. Wong return 0; 29275efa57dSDarrick J. Wong } 29375efa57dSDarrick J. Wong 29475efa57dSDarrick J. Wong return 0; 29575efa57dSDarrick J. Wong } 29675efa57dSDarrick J. Wong 297*e74331d6SDarrick J. Wong #ifdef CONFIG_XFS_RT 298*e74331d6SDarrick J. Wong STATIC int 299*e74331d6SDarrick J. Wong xchk_fscount_add_frextent( 300*e74331d6SDarrick J. Wong struct xfs_mount *mp, 301*e74331d6SDarrick J. Wong struct xfs_trans *tp, 302*e74331d6SDarrick J. Wong const struct xfs_rtalloc_rec *rec, 303*e74331d6SDarrick J. Wong void *priv) 304*e74331d6SDarrick J. Wong { 305*e74331d6SDarrick J. Wong struct xchk_fscounters *fsc = priv; 306*e74331d6SDarrick J. Wong int error = 0; 307*e74331d6SDarrick J. Wong 308*e74331d6SDarrick J. Wong fsc->frextents += rec->ar_extcount; 309*e74331d6SDarrick J. Wong 310*e74331d6SDarrick J. Wong xchk_should_terminate(fsc->sc, &error); 311*e74331d6SDarrick J. Wong return error; 312*e74331d6SDarrick J. Wong } 313*e74331d6SDarrick J. Wong 314*e74331d6SDarrick J. Wong /* Calculate the number of free realtime extents from the realtime bitmap. */ 315*e74331d6SDarrick J. Wong STATIC int 316*e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 317*e74331d6SDarrick J. Wong struct xfs_scrub *sc, 318*e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 319*e74331d6SDarrick J. Wong { 320*e74331d6SDarrick J. Wong struct xfs_mount *mp = sc->mp; 321*e74331d6SDarrick J. Wong int error; 322*e74331d6SDarrick J. Wong 323*e74331d6SDarrick J. Wong fsc->frextents = 0; 324*e74331d6SDarrick J. Wong if (!xfs_has_realtime(mp)) 325*e74331d6SDarrick J. Wong return 0; 326*e74331d6SDarrick J. Wong 327*e74331d6SDarrick J. Wong xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 328*e74331d6SDarrick J. Wong error = xfs_rtalloc_query_all(sc->mp, sc->tp, 329*e74331d6SDarrick J. Wong xchk_fscount_add_frextent, fsc); 330*e74331d6SDarrick J. Wong if (error) { 331*e74331d6SDarrick J. Wong xchk_set_incomplete(sc); 332*e74331d6SDarrick J. Wong goto out_unlock; 333*e74331d6SDarrick J. Wong } 334*e74331d6SDarrick J. Wong 335*e74331d6SDarrick J. Wong out_unlock: 336*e74331d6SDarrick J. Wong xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 337*e74331d6SDarrick J. Wong return error; 338*e74331d6SDarrick J. Wong } 339*e74331d6SDarrick J. Wong #else 340*e74331d6SDarrick J. Wong STATIC int 341*e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 342*e74331d6SDarrick J. Wong struct xfs_scrub *sc, 343*e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 344*e74331d6SDarrick J. Wong { 345*e74331d6SDarrick J. Wong fsc->frextents = 0; 346*e74331d6SDarrick J. Wong return 0; 347*e74331d6SDarrick J. Wong } 348*e74331d6SDarrick J. Wong #endif /* CONFIG_XFS_RT */ 349*e74331d6SDarrick J. Wong 35075efa57dSDarrick J. Wong /* 35111f97e68SDarrick J. Wong * Part 2: Comparing filesystem summary counters. All we have to do here is 35211f97e68SDarrick J. Wong * sum the percpu counters and compare them to what we've observed. 35311f97e68SDarrick J. Wong */ 35411f97e68SDarrick J. Wong 35511f97e68SDarrick J. Wong /* 35675efa57dSDarrick J. Wong * Is the @counter reasonably close to the @expected value? 35775efa57dSDarrick J. Wong * 35875efa57dSDarrick J. Wong * We neither locked nor froze anything in the filesystem while aggregating the 35975efa57dSDarrick J. Wong * per-AG data to compute the @expected value, which means that the counter 36075efa57dSDarrick J. Wong * could have changed. We know the @old_value of the summation of the counter 36175efa57dSDarrick J. Wong * before the aggregation, and we re-sum the counter now. If the expected 36275efa57dSDarrick J. Wong * value falls between the two summations, we're ok. 36375efa57dSDarrick J. Wong * 36475efa57dSDarrick J. Wong * Otherwise, we /might/ have a problem. If the change in the summations is 36575efa57dSDarrick J. Wong * more than we want to tolerate, the filesystem is probably busy and we should 36675efa57dSDarrick J. Wong * just send back INCOMPLETE and see if userspace will try again. 36775efa57dSDarrick J. Wong */ 36875efa57dSDarrick J. Wong static inline bool 36975efa57dSDarrick J. Wong xchk_fscount_within_range( 37075efa57dSDarrick J. Wong struct xfs_scrub *sc, 37175efa57dSDarrick J. Wong const int64_t old_value, 37275efa57dSDarrick J. Wong struct percpu_counter *counter, 37375efa57dSDarrick J. Wong uint64_t expected) 37475efa57dSDarrick J. Wong { 37575efa57dSDarrick J. Wong int64_t min_value, max_value; 37675efa57dSDarrick J. Wong int64_t curr_value = percpu_counter_sum(counter); 37775efa57dSDarrick J. Wong 37875efa57dSDarrick J. Wong trace_xchk_fscounters_within_range(sc->mp, expected, curr_value, 37975efa57dSDarrick J. Wong old_value); 38075efa57dSDarrick J. Wong 38175efa57dSDarrick J. Wong /* Negative values are always wrong. */ 38275efa57dSDarrick J. Wong if (curr_value < 0) 38375efa57dSDarrick J. Wong return false; 38475efa57dSDarrick J. Wong 38575efa57dSDarrick J. Wong /* Exact matches are always ok. */ 38675efa57dSDarrick J. Wong if (curr_value == expected) 38775efa57dSDarrick J. Wong return true; 38875efa57dSDarrick J. Wong 38975efa57dSDarrick J. Wong min_value = min(old_value, curr_value); 39075efa57dSDarrick J. Wong max_value = max(old_value, curr_value); 39175efa57dSDarrick J. Wong 39275efa57dSDarrick J. Wong /* Within the before-and-after range is ok. */ 39375efa57dSDarrick J. Wong if (expected >= min_value && expected <= max_value) 39475efa57dSDarrick J. Wong return true; 39575efa57dSDarrick J. Wong 39675efa57dSDarrick J. Wong /* 39775efa57dSDarrick J. Wong * If the difference between the two summations is too large, the fs 39875efa57dSDarrick J. Wong * might just be busy and so we'll mark the scrub incomplete. Return 39975efa57dSDarrick J. Wong * true here so that we don't mark the counter corrupt. 40075efa57dSDarrick J. Wong * 40175efa57dSDarrick J. Wong * XXX: In the future when userspace can grant scrub permission to 40275efa57dSDarrick J. Wong * quiesce the filesystem to solve the outsized variance problem, this 40375efa57dSDarrick J. Wong * check should be moved up and the return code changed to signal to 40475efa57dSDarrick J. Wong * userspace that we need quiesce permission. 40575efa57dSDarrick J. Wong */ 40675efa57dSDarrick J. Wong if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) { 40775efa57dSDarrick J. Wong xchk_set_incomplete(sc); 40875efa57dSDarrick J. Wong return true; 40975efa57dSDarrick J. Wong } 41075efa57dSDarrick J. Wong 41175efa57dSDarrick J. Wong return false; 41275efa57dSDarrick J. Wong } 41375efa57dSDarrick J. Wong 41475efa57dSDarrick J. Wong /* Check the superblock counters. */ 41575efa57dSDarrick J. Wong int 41675efa57dSDarrick J. Wong xchk_fscounters( 41775efa57dSDarrick J. Wong struct xfs_scrub *sc) 41875efa57dSDarrick J. Wong { 41975efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 42075efa57dSDarrick J. Wong struct xchk_fscounters *fsc = sc->buf; 421*e74331d6SDarrick J. Wong int64_t icount, ifree, fdblocks, frextents; 42275efa57dSDarrick J. Wong int error; 42375efa57dSDarrick J. Wong 42475efa57dSDarrick J. Wong /* Snapshot the percpu counters. */ 42575efa57dSDarrick J. Wong icount = percpu_counter_sum(&mp->m_icount); 42675efa57dSDarrick J. Wong ifree = percpu_counter_sum(&mp->m_ifree); 42775efa57dSDarrick J. Wong fdblocks = percpu_counter_sum(&mp->m_fdblocks); 428*e74331d6SDarrick J. Wong frextents = percpu_counter_sum(&mp->m_frextents); 42975efa57dSDarrick J. Wong 43075efa57dSDarrick J. Wong /* No negative values, please! */ 431*e74331d6SDarrick J. Wong if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0) 43275efa57dSDarrick J. Wong xchk_set_corrupt(sc); 43375efa57dSDarrick J. Wong 43475efa57dSDarrick J. Wong /* See if icount is obviously wrong. */ 43575efa57dSDarrick J. Wong if (icount < fsc->icount_min || icount > fsc->icount_max) 43675efa57dSDarrick J. Wong xchk_set_corrupt(sc); 43775efa57dSDarrick J. Wong 43875efa57dSDarrick J. Wong /* See if fdblocks is obviously wrong. */ 43975efa57dSDarrick J. Wong if (fdblocks > mp->m_sb.sb_dblocks) 44075efa57dSDarrick J. Wong xchk_set_corrupt(sc); 44175efa57dSDarrick J. Wong 442*e74331d6SDarrick J. Wong /* See if frextents is obviously wrong. */ 443*e74331d6SDarrick J. Wong if (frextents > mp->m_sb.sb_rextents) 444*e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 445*e74331d6SDarrick J. Wong 44675efa57dSDarrick J. Wong /* 44775efa57dSDarrick J. Wong * If ifree exceeds icount by more than the minimum variance then 44875efa57dSDarrick J. Wong * something's probably wrong with the counters. 44975efa57dSDarrick J. Wong */ 45075efa57dSDarrick J. Wong if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE) 45175efa57dSDarrick J. Wong xchk_set_corrupt(sc); 45275efa57dSDarrick J. Wong 45375efa57dSDarrick J. Wong /* Walk the incore AG headers to calculate the expected counters. */ 45475efa57dSDarrick J. Wong error = xchk_fscount_aggregate_agcounts(sc, fsc); 45575efa57dSDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 45675efa57dSDarrick J. Wong return error; 45775efa57dSDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 45875efa57dSDarrick J. Wong return 0; 45975efa57dSDarrick J. Wong 460*e74331d6SDarrick J. Wong /* Count the free extents counter for rt volumes. */ 461*e74331d6SDarrick J. Wong error = xchk_fscount_count_frextents(sc, fsc); 462*e74331d6SDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 463*e74331d6SDarrick J. Wong return error; 464*e74331d6SDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 465*e74331d6SDarrick J. Wong return 0; 466*e74331d6SDarrick J. Wong 46775efa57dSDarrick J. Wong /* Compare the in-core counters with whatever we counted. */ 46875efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount)) 46975efa57dSDarrick J. Wong xchk_set_corrupt(sc); 47075efa57dSDarrick J. Wong 47175efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) 47275efa57dSDarrick J. Wong xchk_set_corrupt(sc); 47375efa57dSDarrick J. Wong 47475efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks, 47575efa57dSDarrick J. Wong fsc->fdblocks)) 47675efa57dSDarrick J. Wong xchk_set_corrupt(sc); 47775efa57dSDarrick J. Wong 478*e74331d6SDarrick J. Wong if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, 479*e74331d6SDarrick J. Wong fsc->frextents)) 480*e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 481*e74331d6SDarrick J. Wong 48275efa57dSDarrick J. Wong return 0; 48375efa57dSDarrick J. Wong } 484