175efa57dSDarrick J. Wong // SPDX-License-Identifier: GPL-2.0+ 275efa57dSDarrick J. Wong /* 3*ecc73f8aSDarrick J. Wong * Copyright (C) 2019-2023 Oracle. All Rights Reserved. 4739a2fe0SDarrick J. Wong * Author: Darrick J. Wong <djwong@kernel.org> 575efa57dSDarrick J. Wong */ 675efa57dSDarrick J. Wong #include "xfs.h" 775efa57dSDarrick J. Wong #include "xfs_fs.h" 875efa57dSDarrick J. Wong #include "xfs_shared.h" 975efa57dSDarrick J. Wong #include "xfs_format.h" 1075efa57dSDarrick J. Wong #include "xfs_trans_resv.h" 1175efa57dSDarrick J. Wong #include "xfs_mount.h" 1275efa57dSDarrick J. Wong #include "xfs_alloc.h" 1375efa57dSDarrick J. Wong #include "xfs_ialloc.h" 1475efa57dSDarrick J. Wong #include "xfs_health.h" 15e147a756SDarrick J. Wong #include "xfs_btree.h" 169bbafc71SDave Chinner #include "xfs_ag.h" 17e74331d6SDarrick J. Wong #include "xfs_rtalloc.h" 18e74331d6SDarrick J. Wong #include "xfs_inode.h" 1975efa57dSDarrick J. Wong #include "scrub/scrub.h" 2075efa57dSDarrick J. Wong #include "scrub/common.h" 2175efa57dSDarrick J. Wong #include "scrub/trace.h" 2275efa57dSDarrick J. Wong 2375efa57dSDarrick J. Wong /* 2475efa57dSDarrick J. Wong * FS Summary Counters 2575efa57dSDarrick J. Wong * =================== 2675efa57dSDarrick J. Wong * 2775efa57dSDarrick J. Wong * The basics of filesystem summary counter checking are that we iterate the 2875efa57dSDarrick J. Wong * AGs counting the number of free blocks, free space btree blocks, per-AG 2975efa57dSDarrick J. Wong * reservations, inodes, delayed allocation reservations, and free inodes. 3075efa57dSDarrick J. Wong * Then we compare what we computed against the in-core counters. 3175efa57dSDarrick J. Wong * 3275efa57dSDarrick J. Wong * However, the reality is that summary counters are a tricky beast to check. 3375efa57dSDarrick J. Wong * While we /could/ freeze the filesystem and scramble around the AGs counting 3475efa57dSDarrick J. Wong * the free blocks, in practice we prefer not do that for a scan because 3575efa57dSDarrick J. Wong * freezing is costly. To get around this, we added a per-cpu counter of the 3675efa57dSDarrick J. Wong * delalloc reservations so that we can rotor around the AGs relatively 3775efa57dSDarrick J. Wong * quickly, and we allow the counts to be slightly off because we're not taking 3875efa57dSDarrick J. Wong * any locks while we do this. 3975efa57dSDarrick J. Wong * 4075efa57dSDarrick J. Wong * So the first thing we do is warm up the buffer cache in the setup routine by 4175efa57dSDarrick J. Wong * walking all the AGs to make sure the incore per-AG structure has been 4275efa57dSDarrick J. Wong * initialized. The expected value calculation then iterates the incore per-AG 4375efa57dSDarrick J. Wong * structures as quickly as it can. We snapshot the percpu counters before and 4475efa57dSDarrick J. Wong * after this operation and use the difference in counter values to guess at 4575efa57dSDarrick J. Wong * our tolerance for mismatch between expected and actual counter values. 4675efa57dSDarrick J. Wong */ 4775efa57dSDarrick J. Wong 48e74331d6SDarrick J. Wong struct xchk_fscounters { 49e74331d6SDarrick J. Wong struct xfs_scrub *sc; 50e74331d6SDarrick J. Wong uint64_t icount; 51e74331d6SDarrick J. Wong uint64_t ifree; 52e74331d6SDarrick J. Wong uint64_t fdblocks; 53e74331d6SDarrick J. Wong uint64_t frextents; 54e74331d6SDarrick J. Wong unsigned long long icount_min; 55e74331d6SDarrick J. Wong unsigned long long icount_max; 56e74331d6SDarrick J. Wong }; 57e74331d6SDarrick J. Wong 5875efa57dSDarrick J. Wong /* 5975efa57dSDarrick J. Wong * Since the expected value computation is lockless but only browses incore 6075efa57dSDarrick J. Wong * values, the percpu counters should be fairly close to each other. However, 6175efa57dSDarrick J. Wong * we'll allow ourselves to be off by at least this (arbitrary) amount. 6275efa57dSDarrick J. Wong */ 6375efa57dSDarrick J. Wong #define XCHK_FSCOUNT_MIN_VARIANCE (512) 6475efa57dSDarrick J. Wong 6575efa57dSDarrick J. Wong /* 6675efa57dSDarrick J. Wong * Make sure the per-AG structure has been initialized from the on-disk header 6775efa57dSDarrick J. Wong * contents and trust that the incore counters match the ondisk counters. (The 6875efa57dSDarrick J. Wong * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the 6975efa57dSDarrick J. Wong * summary counters after checking all AG headers). Do this from the setup 7075efa57dSDarrick J. Wong * function so that the inner AG aggregation loop runs as quickly as possible. 7175efa57dSDarrick J. Wong * 7275efa57dSDarrick J. Wong * This function runs during the setup phase /before/ we start checking any 7375efa57dSDarrick J. Wong * metadata. 7475efa57dSDarrick J. Wong */ 7575efa57dSDarrick J. Wong STATIC int 7675efa57dSDarrick J. Wong xchk_fscount_warmup( 7775efa57dSDarrick J. Wong struct xfs_scrub *sc) 7875efa57dSDarrick J. Wong { 7975efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 8075efa57dSDarrick J. Wong struct xfs_buf *agi_bp = NULL; 8175efa57dSDarrick J. Wong struct xfs_buf *agf_bp = NULL; 8275efa57dSDarrick J. Wong struct xfs_perag *pag = NULL; 8375efa57dSDarrick J. Wong xfs_agnumber_t agno; 8475efa57dSDarrick J. Wong int error = 0; 8575efa57dSDarrick J. Wong 86f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 87f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 88f250eedcSDave Chinner break; 897ac2ff8bSDave Chinner if (xfs_perag_initialised_agi(pag) && 907ac2ff8bSDave Chinner xfs_perag_initialised_agf(pag)) 91f250eedcSDave Chinner continue; 9275efa57dSDarrick J. Wong 9375efa57dSDarrick J. Wong /* Lock both AG headers. */ 9499b13c7fSDave Chinner error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp); 9575efa57dSDarrick J. Wong if (error) 9675efa57dSDarrick J. Wong break; 9708d3e84fSDave Chinner error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp); 9875efa57dSDarrick J. Wong if (error) 9975efa57dSDarrick J. Wong break; 10075efa57dSDarrick J. Wong 10175efa57dSDarrick J. Wong /* 10275efa57dSDarrick J. Wong * These are supposed to be initialized by the header read 10375efa57dSDarrick J. Wong * function. 10475efa57dSDarrick J. Wong */ 1057ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) || 1067ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) { 10775efa57dSDarrick J. Wong error = -EFSCORRUPTED; 10875efa57dSDarrick J. Wong break; 109f250eedcSDave Chinner } 11075efa57dSDarrick J. Wong 11175efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 11275efa57dSDarrick J. Wong agf_bp = NULL; 11375efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 11475efa57dSDarrick J. Wong agi_bp = NULL; 11575efa57dSDarrick J. Wong } 11675efa57dSDarrick J. Wong 11775efa57dSDarrick J. Wong if (agf_bp) 11875efa57dSDarrick J. Wong xfs_buf_relse(agf_bp); 11975efa57dSDarrick J. Wong if (agi_bp) 12075efa57dSDarrick J. Wong xfs_buf_relse(agi_bp); 12175efa57dSDarrick J. Wong if (pag) 122c4d5660aSDave Chinner xfs_perag_rele(pag); 12375efa57dSDarrick J. Wong return error; 12475efa57dSDarrick J. Wong } 12575efa57dSDarrick J. Wong 12675efa57dSDarrick J. Wong int 12775efa57dSDarrick J. Wong xchk_setup_fscounters( 128026f57ebSDarrick J. Wong struct xfs_scrub *sc) 12975efa57dSDarrick J. Wong { 13075efa57dSDarrick J. Wong struct xchk_fscounters *fsc; 13175efa57dSDarrick J. Wong int error; 13275efa57dSDarrick J. Wong 133306195f3SDarrick J. Wong sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS); 13475efa57dSDarrick J. Wong if (!sc->buf) 13575efa57dSDarrick J. Wong return -ENOMEM; 13675efa57dSDarrick J. Wong fsc = sc->buf; 137e74331d6SDarrick J. Wong fsc->sc = sc; 13875efa57dSDarrick J. Wong 13975efa57dSDarrick J. Wong xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max); 14075efa57dSDarrick J. Wong 14175efa57dSDarrick J. Wong /* We must get the incore counters set up before we can proceed. */ 14275efa57dSDarrick J. Wong error = xchk_fscount_warmup(sc); 14375efa57dSDarrick J. Wong if (error) 14475efa57dSDarrick J. Wong return error; 14575efa57dSDarrick J. Wong 14675efa57dSDarrick J. Wong /* 14775efa57dSDarrick J. Wong * Pause background reclaim while we're scrubbing to reduce the 14875efa57dSDarrick J. Wong * likelihood of background perturbations to the counters throwing off 14975efa57dSDarrick J. Wong * our calculations. 15075efa57dSDarrick J. Wong */ 15175efa57dSDarrick J. Wong xchk_stop_reaping(sc); 15275efa57dSDarrick J. Wong 15375efa57dSDarrick J. Wong return xchk_trans_alloc(sc, 0); 15475efa57dSDarrick J. Wong } 15575efa57dSDarrick J. Wong 15611f97e68SDarrick J. Wong /* 15711f97e68SDarrick J. Wong * Part 1: Collecting filesystem summary counts. For each AG, we add its 15811f97e68SDarrick J. Wong * summary counts (total inodes, free inodes, free data blocks) to an incore 15911f97e68SDarrick J. Wong * copy of the overall filesystem summary counts. 16011f97e68SDarrick J. Wong * 16111f97e68SDarrick J. Wong * To avoid false corruption reports in part 2, any failure in this part must 16211f97e68SDarrick J. Wong * set the INCOMPLETE flag even when a negative errno is returned. This care 16311f97e68SDarrick J. Wong * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 16411f97e68SDarrick J. Wong * ECANCELED) that are absorbed into a scrub state flag update by 16511f97e68SDarrick J. Wong * xchk_*_process_error. 16611f97e68SDarrick J. Wong */ 16711f97e68SDarrick J. Wong 168e147a756SDarrick J. Wong /* Count free space btree blocks manually for pre-lazysbcount filesystems. */ 169e147a756SDarrick J. Wong static int 170e147a756SDarrick J. Wong xchk_fscount_btreeblks( 171e147a756SDarrick J. Wong struct xfs_scrub *sc, 172e147a756SDarrick J. Wong struct xchk_fscounters *fsc, 173e147a756SDarrick J. Wong xfs_agnumber_t agno) 174e147a756SDarrick J. Wong { 175e147a756SDarrick J. Wong xfs_extlen_t blocks; 176e147a756SDarrick J. Wong int error; 177e147a756SDarrick J. Wong 17848c6615cSDarrick J. Wong error = xchk_ag_init_existing(sc, agno, &sc->sa); 179e147a756SDarrick J. Wong if (error) 18061e0d0ccSDarrick J. Wong goto out_free; 181e147a756SDarrick J. Wong 182e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks); 183e147a756SDarrick J. Wong if (error) 184e147a756SDarrick J. Wong goto out_free; 185e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 186e147a756SDarrick J. Wong 187e147a756SDarrick J. Wong error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks); 188e147a756SDarrick J. Wong if (error) 189e147a756SDarrick J. Wong goto out_free; 190e147a756SDarrick J. Wong fsc->fdblocks += blocks - 1; 191e147a756SDarrick J. Wong 192e147a756SDarrick J. Wong out_free: 193e147a756SDarrick J. Wong xchk_ag_free(sc, &sc->sa); 194e147a756SDarrick J. Wong return error; 195e147a756SDarrick J. Wong } 196e147a756SDarrick J. Wong 19775efa57dSDarrick J. Wong /* 19875efa57dSDarrick J. Wong * Calculate what the global in-core counters ought to be from the incore 19975efa57dSDarrick J. Wong * per-AG structure. Callers can compare this to the actual in-core counters 20075efa57dSDarrick J. Wong * to estimate by how much both in-core and on-disk counters need to be 20175efa57dSDarrick J. Wong * adjusted. 20275efa57dSDarrick J. Wong */ 20375efa57dSDarrick J. Wong STATIC int 20475efa57dSDarrick J. Wong xchk_fscount_aggregate_agcounts( 20575efa57dSDarrick J. Wong struct xfs_scrub *sc, 20675efa57dSDarrick J. Wong struct xchk_fscounters *fsc) 20775efa57dSDarrick J. Wong { 20875efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 20975efa57dSDarrick J. Wong struct xfs_perag *pag; 21075efa57dSDarrick J. Wong uint64_t delayed; 21175efa57dSDarrick J. Wong xfs_agnumber_t agno; 21275efa57dSDarrick J. Wong int tries = 8; 2138ef34723SDarrick J. Wong int error = 0; 21475efa57dSDarrick J. Wong 21575efa57dSDarrick J. Wong retry: 21675efa57dSDarrick J. Wong fsc->icount = 0; 21775efa57dSDarrick J. Wong fsc->ifree = 0; 21875efa57dSDarrick J. Wong fsc->fdblocks = 0; 21975efa57dSDarrick J. Wong 220f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 221f250eedcSDave Chinner if (xchk_should_terminate(sc, &error)) 222f250eedcSDave Chinner break; 22375efa57dSDarrick J. Wong 22475efa57dSDarrick J. Wong /* This somehow got unset since the warmup? */ 2257ac2ff8bSDave Chinner if (!xfs_perag_initialised_agi(pag) || 2267ac2ff8bSDave Chinner !xfs_perag_initialised_agf(pag)) { 227f250eedcSDave Chinner error = -EFSCORRUPTED; 228f250eedcSDave Chinner break; 22975efa57dSDarrick J. Wong } 23075efa57dSDarrick J. Wong 23175efa57dSDarrick J. Wong /* Count all the inodes */ 23275efa57dSDarrick J. Wong fsc->icount += pag->pagi_count; 23375efa57dSDarrick J. Wong fsc->ifree += pag->pagi_freecount; 23475efa57dSDarrick J. Wong 23575efa57dSDarrick J. Wong /* Add up the free/freelist/bnobt/cntbt blocks */ 23675efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_freeblks; 23775efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_flcount; 238ebd9027dSDave Chinner if (xfs_has_lazysbcount(sc->mp)) { 23975efa57dSDarrick J. Wong fsc->fdblocks += pag->pagf_btreeblks; 240e147a756SDarrick J. Wong } else { 241e147a756SDarrick J. Wong error = xchk_fscount_btreeblks(sc, fsc, agno); 242f250eedcSDave Chinner if (error) 243e147a756SDarrick J. Wong break; 244e147a756SDarrick J. Wong } 24575efa57dSDarrick J. Wong 24675efa57dSDarrick J. Wong /* 24775efa57dSDarrick J. Wong * Per-AG reservations are taken out of the incore counters, 24875efa57dSDarrick J. Wong * so they must be left out of the free blocks computation. 24975efa57dSDarrick J. Wong */ 25075efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; 25175efa57dSDarrick J. Wong fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; 25275efa57dSDarrick J. Wong 25375efa57dSDarrick J. Wong } 254f250eedcSDave Chinner if (pag) 255c4d5660aSDave Chinner xfs_perag_rele(pag); 25611f97e68SDarrick J. Wong if (error) { 25711f97e68SDarrick J. Wong xchk_set_incomplete(sc); 2588ef34723SDarrick J. Wong return error; 25911f97e68SDarrick J. Wong } 2608ef34723SDarrick J. Wong 26175efa57dSDarrick J. Wong /* 26275efa57dSDarrick J. Wong * The global incore space reservation is taken from the incore 26375efa57dSDarrick J. Wong * counters, so leave that out of the computation. 26475efa57dSDarrick J. Wong */ 26575efa57dSDarrick J. Wong fsc->fdblocks -= mp->m_resblks_avail; 26675efa57dSDarrick J. Wong 26775efa57dSDarrick J. Wong /* 26875efa57dSDarrick J. Wong * Delayed allocation reservations are taken out of the incore counters 26975efa57dSDarrick J. Wong * but not recorded on disk, so leave them and their indlen blocks out 27075efa57dSDarrick J. Wong * of the computation. 27175efa57dSDarrick J. Wong */ 27275efa57dSDarrick J. Wong delayed = percpu_counter_sum(&mp->m_delalloc_blks); 27375efa57dSDarrick J. Wong fsc->fdblocks -= delayed; 27475efa57dSDarrick J. Wong 27575efa57dSDarrick J. Wong trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks, 27675efa57dSDarrick J. Wong delayed); 27775efa57dSDarrick J. Wong 27875efa57dSDarrick J. Wong 27975efa57dSDarrick J. Wong /* Bail out if the values we compute are totally nonsense. */ 28075efa57dSDarrick J. Wong if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max || 28175efa57dSDarrick J. Wong fsc->fdblocks > mp->m_sb.sb_dblocks || 28275efa57dSDarrick J. Wong fsc->ifree > fsc->icount_max) 28375efa57dSDarrick J. Wong return -EFSCORRUPTED; 28475efa57dSDarrick J. Wong 28575efa57dSDarrick J. Wong /* 28675efa57dSDarrick J. Wong * If ifree > icount then we probably had some perturbation in the 28775efa57dSDarrick J. Wong * counters while we were calculating things. We'll try a few times 28875efa57dSDarrick J. Wong * to maintain ifree <= icount before giving up. 28975efa57dSDarrick J. Wong */ 29075efa57dSDarrick J. Wong if (fsc->ifree > fsc->icount) { 29175efa57dSDarrick J. Wong if (tries--) 29275efa57dSDarrick J. Wong goto retry; 29375efa57dSDarrick J. Wong xchk_set_incomplete(sc); 29475efa57dSDarrick J. Wong return 0; 29575efa57dSDarrick J. Wong } 29675efa57dSDarrick J. Wong 29775efa57dSDarrick J. Wong return 0; 29875efa57dSDarrick J. Wong } 29975efa57dSDarrick J. Wong 300e74331d6SDarrick J. Wong #ifdef CONFIG_XFS_RT 301e74331d6SDarrick J. Wong STATIC int 302e74331d6SDarrick J. Wong xchk_fscount_add_frextent( 303e74331d6SDarrick J. Wong struct xfs_mount *mp, 304e74331d6SDarrick J. Wong struct xfs_trans *tp, 305e74331d6SDarrick J. Wong const struct xfs_rtalloc_rec *rec, 306e74331d6SDarrick J. Wong void *priv) 307e74331d6SDarrick J. Wong { 308e74331d6SDarrick J. Wong struct xchk_fscounters *fsc = priv; 309e74331d6SDarrick J. Wong int error = 0; 310e74331d6SDarrick J. Wong 311e74331d6SDarrick J. Wong fsc->frextents += rec->ar_extcount; 312e74331d6SDarrick J. Wong 313e74331d6SDarrick J. Wong xchk_should_terminate(fsc->sc, &error); 314e74331d6SDarrick J. Wong return error; 315e74331d6SDarrick J. Wong } 316e74331d6SDarrick J. Wong 317e74331d6SDarrick J. Wong /* Calculate the number of free realtime extents from the realtime bitmap. */ 318e74331d6SDarrick J. Wong STATIC int 319e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 320e74331d6SDarrick J. Wong struct xfs_scrub *sc, 321e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 322e74331d6SDarrick J. Wong { 323e74331d6SDarrick J. Wong struct xfs_mount *mp = sc->mp; 324e74331d6SDarrick J. Wong int error; 325e74331d6SDarrick J. Wong 326e74331d6SDarrick J. Wong fsc->frextents = 0; 327e74331d6SDarrick J. Wong if (!xfs_has_realtime(mp)) 328e74331d6SDarrick J. Wong return 0; 329e74331d6SDarrick J. Wong 330e74331d6SDarrick J. Wong xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 331e74331d6SDarrick J. Wong error = xfs_rtalloc_query_all(sc->mp, sc->tp, 332e74331d6SDarrick J. Wong xchk_fscount_add_frextent, fsc); 333e74331d6SDarrick J. Wong if (error) { 334e74331d6SDarrick J. Wong xchk_set_incomplete(sc); 335e74331d6SDarrick J. Wong goto out_unlock; 336e74331d6SDarrick J. Wong } 337e74331d6SDarrick J. Wong 338e74331d6SDarrick J. Wong out_unlock: 339e74331d6SDarrick J. Wong xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 340e74331d6SDarrick J. Wong return error; 341e74331d6SDarrick J. Wong } 342e74331d6SDarrick J. Wong #else 343e74331d6SDarrick J. Wong STATIC int 344e74331d6SDarrick J. Wong xchk_fscount_count_frextents( 345e74331d6SDarrick J. Wong struct xfs_scrub *sc, 346e74331d6SDarrick J. Wong struct xchk_fscounters *fsc) 347e74331d6SDarrick J. Wong { 348e74331d6SDarrick J. Wong fsc->frextents = 0; 349e74331d6SDarrick J. Wong return 0; 350e74331d6SDarrick J. Wong } 351e74331d6SDarrick J. Wong #endif /* CONFIG_XFS_RT */ 352e74331d6SDarrick J. Wong 35375efa57dSDarrick J. Wong /* 35411f97e68SDarrick J. Wong * Part 2: Comparing filesystem summary counters. All we have to do here is 35511f97e68SDarrick J. Wong * sum the percpu counters and compare them to what we've observed. 35611f97e68SDarrick J. Wong */ 35711f97e68SDarrick J. Wong 35811f97e68SDarrick J. Wong /* 35975efa57dSDarrick J. Wong * Is the @counter reasonably close to the @expected value? 36075efa57dSDarrick J. Wong * 36175efa57dSDarrick J. Wong * We neither locked nor froze anything in the filesystem while aggregating the 36275efa57dSDarrick J. Wong * per-AG data to compute the @expected value, which means that the counter 36375efa57dSDarrick J. Wong * could have changed. We know the @old_value of the summation of the counter 36475efa57dSDarrick J. Wong * before the aggregation, and we re-sum the counter now. If the expected 36575efa57dSDarrick J. Wong * value falls between the two summations, we're ok. 36675efa57dSDarrick J. Wong * 36775efa57dSDarrick J. Wong * Otherwise, we /might/ have a problem. If the change in the summations is 36875efa57dSDarrick J. Wong * more than we want to tolerate, the filesystem is probably busy and we should 36975efa57dSDarrick J. Wong * just send back INCOMPLETE and see if userspace will try again. 37075efa57dSDarrick J. Wong */ 37175efa57dSDarrick J. Wong static inline bool 37275efa57dSDarrick J. Wong xchk_fscount_within_range( 37375efa57dSDarrick J. Wong struct xfs_scrub *sc, 37475efa57dSDarrick J. Wong const int64_t old_value, 37575efa57dSDarrick J. Wong struct percpu_counter *counter, 37675efa57dSDarrick J. Wong uint64_t expected) 37775efa57dSDarrick J. Wong { 37875efa57dSDarrick J. Wong int64_t min_value, max_value; 37975efa57dSDarrick J. Wong int64_t curr_value = percpu_counter_sum(counter); 38075efa57dSDarrick J. Wong 38175efa57dSDarrick J. Wong trace_xchk_fscounters_within_range(sc->mp, expected, curr_value, 38275efa57dSDarrick J. Wong old_value); 38375efa57dSDarrick J. Wong 38475efa57dSDarrick J. Wong /* Negative values are always wrong. */ 38575efa57dSDarrick J. Wong if (curr_value < 0) 38675efa57dSDarrick J. Wong return false; 38775efa57dSDarrick J. Wong 38875efa57dSDarrick J. Wong /* Exact matches are always ok. */ 38975efa57dSDarrick J. Wong if (curr_value == expected) 39075efa57dSDarrick J. Wong return true; 39175efa57dSDarrick J. Wong 39275efa57dSDarrick J. Wong min_value = min(old_value, curr_value); 39375efa57dSDarrick J. Wong max_value = max(old_value, curr_value); 39475efa57dSDarrick J. Wong 39575efa57dSDarrick J. Wong /* Within the before-and-after range is ok. */ 39675efa57dSDarrick J. Wong if (expected >= min_value && expected <= max_value) 39775efa57dSDarrick J. Wong return true; 39875efa57dSDarrick J. Wong 39975efa57dSDarrick J. Wong /* 40075efa57dSDarrick J. Wong * If the difference between the two summations is too large, the fs 40175efa57dSDarrick J. Wong * might just be busy and so we'll mark the scrub incomplete. Return 40275efa57dSDarrick J. Wong * true here so that we don't mark the counter corrupt. 40375efa57dSDarrick J. Wong * 40475efa57dSDarrick J. Wong * XXX: In the future when userspace can grant scrub permission to 40575efa57dSDarrick J. Wong * quiesce the filesystem to solve the outsized variance problem, this 40675efa57dSDarrick J. Wong * check should be moved up and the return code changed to signal to 40775efa57dSDarrick J. Wong * userspace that we need quiesce permission. 40875efa57dSDarrick J. Wong */ 40975efa57dSDarrick J. Wong if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) { 41075efa57dSDarrick J. Wong xchk_set_incomplete(sc); 41175efa57dSDarrick J. Wong return true; 41275efa57dSDarrick J. Wong } 41375efa57dSDarrick J. Wong 41475efa57dSDarrick J. Wong return false; 41575efa57dSDarrick J. Wong } 41675efa57dSDarrick J. Wong 41775efa57dSDarrick J. Wong /* Check the superblock counters. */ 41875efa57dSDarrick J. Wong int 41975efa57dSDarrick J. Wong xchk_fscounters( 42075efa57dSDarrick J. Wong struct xfs_scrub *sc) 42175efa57dSDarrick J. Wong { 42275efa57dSDarrick J. Wong struct xfs_mount *mp = sc->mp; 42375efa57dSDarrick J. Wong struct xchk_fscounters *fsc = sc->buf; 424e74331d6SDarrick J. Wong int64_t icount, ifree, fdblocks, frextents; 42575efa57dSDarrick J. Wong int error; 42675efa57dSDarrick J. Wong 42775efa57dSDarrick J. Wong /* Snapshot the percpu counters. */ 42875efa57dSDarrick J. Wong icount = percpu_counter_sum(&mp->m_icount); 42975efa57dSDarrick J. Wong ifree = percpu_counter_sum(&mp->m_ifree); 43075efa57dSDarrick J. Wong fdblocks = percpu_counter_sum(&mp->m_fdblocks); 431e74331d6SDarrick J. Wong frextents = percpu_counter_sum(&mp->m_frextents); 43275efa57dSDarrick J. Wong 43375efa57dSDarrick J. Wong /* No negative values, please! */ 434e74331d6SDarrick J. Wong if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0) 43575efa57dSDarrick J. Wong xchk_set_corrupt(sc); 43675efa57dSDarrick J. Wong 43775efa57dSDarrick J. Wong /* See if icount is obviously wrong. */ 43875efa57dSDarrick J. Wong if (icount < fsc->icount_min || icount > fsc->icount_max) 43975efa57dSDarrick J. Wong xchk_set_corrupt(sc); 44075efa57dSDarrick J. Wong 44175efa57dSDarrick J. Wong /* See if fdblocks is obviously wrong. */ 44275efa57dSDarrick J. Wong if (fdblocks > mp->m_sb.sb_dblocks) 44375efa57dSDarrick J. Wong xchk_set_corrupt(sc); 44475efa57dSDarrick J. Wong 445e74331d6SDarrick J. Wong /* See if frextents is obviously wrong. */ 446e74331d6SDarrick J. Wong if (frextents > mp->m_sb.sb_rextents) 447e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 448e74331d6SDarrick J. Wong 44975efa57dSDarrick J. Wong /* 45075efa57dSDarrick J. Wong * If ifree exceeds icount by more than the minimum variance then 45175efa57dSDarrick J. Wong * something's probably wrong with the counters. 45275efa57dSDarrick J. Wong */ 45375efa57dSDarrick J. Wong if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE) 45475efa57dSDarrick J. Wong xchk_set_corrupt(sc); 45575efa57dSDarrick J. Wong 45675efa57dSDarrick J. Wong /* Walk the incore AG headers to calculate the expected counters. */ 45775efa57dSDarrick J. Wong error = xchk_fscount_aggregate_agcounts(sc, fsc); 45875efa57dSDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 45975efa57dSDarrick J. Wong return error; 46075efa57dSDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 46175efa57dSDarrick J. Wong return 0; 46275efa57dSDarrick J. Wong 463e74331d6SDarrick J. Wong /* Count the free extents counter for rt volumes. */ 464e74331d6SDarrick J. Wong error = xchk_fscount_count_frextents(sc, fsc); 465e74331d6SDarrick J. Wong if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error)) 466e74331d6SDarrick J. Wong return error; 467e74331d6SDarrick J. Wong if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 468e74331d6SDarrick J. Wong return 0; 469e74331d6SDarrick J. Wong 47075efa57dSDarrick J. Wong /* Compare the in-core counters with whatever we counted. */ 47175efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount)) 47275efa57dSDarrick J. Wong xchk_set_corrupt(sc); 47375efa57dSDarrick J. Wong 47475efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) 47575efa57dSDarrick J. Wong xchk_set_corrupt(sc); 47675efa57dSDarrick J. Wong 47775efa57dSDarrick J. Wong if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks, 47875efa57dSDarrick J. Wong fsc->fdblocks)) 47975efa57dSDarrick J. Wong xchk_set_corrupt(sc); 48075efa57dSDarrick J. Wong 481e74331d6SDarrick J. Wong if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents, 482e74331d6SDarrick J. Wong fsc->frextents)) 483e74331d6SDarrick J. Wong xchk_set_corrupt(sc); 484e74331d6SDarrick J. Wong 48575efa57dSDarrick J. Wong return 0; 48675efa57dSDarrick J. Wong } 487