xref: /openbmc/linux/fs/xfs/scrub/reap.c (revision 46eeaa11bdd1bc9e077bdf741d32ca7235d263c6)
1e06ef14bSDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
2e06ef14bSDarrick J. Wong /*
3e06ef14bSDarrick J. Wong  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
4e06ef14bSDarrick J. Wong  * Author: Darrick J. Wong <djwong@kernel.org>
5e06ef14bSDarrick J. Wong  */
6e06ef14bSDarrick J. Wong #include "xfs.h"
7e06ef14bSDarrick J. Wong #include "xfs_fs.h"
8e06ef14bSDarrick J. Wong #include "xfs_shared.h"
9e06ef14bSDarrick J. Wong #include "xfs_format.h"
10e06ef14bSDarrick J. Wong #include "xfs_trans_resv.h"
11e06ef14bSDarrick J. Wong #include "xfs_mount.h"
12e06ef14bSDarrick J. Wong #include "xfs_btree.h"
13e06ef14bSDarrick J. Wong #include "xfs_log_format.h"
14e06ef14bSDarrick J. Wong #include "xfs_trans.h"
15e06ef14bSDarrick J. Wong #include "xfs_sb.h"
16e06ef14bSDarrick J. Wong #include "xfs_inode.h"
17e06ef14bSDarrick J. Wong #include "xfs_alloc.h"
18e06ef14bSDarrick J. Wong #include "xfs_alloc_btree.h"
19e06ef14bSDarrick J. Wong #include "xfs_ialloc.h"
20e06ef14bSDarrick J. Wong #include "xfs_ialloc_btree.h"
21e06ef14bSDarrick J. Wong #include "xfs_rmap.h"
22e06ef14bSDarrick J. Wong #include "xfs_rmap_btree.h"
23e06ef14bSDarrick J. Wong #include "xfs_refcount_btree.h"
24e06ef14bSDarrick J. Wong #include "xfs_extent_busy.h"
25e06ef14bSDarrick J. Wong #include "xfs_ag.h"
26e06ef14bSDarrick J. Wong #include "xfs_ag_resv.h"
27e06ef14bSDarrick J. Wong #include "xfs_quota.h"
28e06ef14bSDarrick J. Wong #include "xfs_qm.h"
295fee784eSDarrick J. Wong #include "xfs_bmap.h"
301c7ce115SDarrick J. Wong #include "xfs_da_format.h"
311c7ce115SDarrick J. Wong #include "xfs_da_btree.h"
321c7ce115SDarrick J. Wong #include "xfs_attr.h"
331c7ce115SDarrick J. Wong #include "xfs_attr_remote.h"
34e06ef14bSDarrick J. Wong #include "scrub/scrub.h"
35e06ef14bSDarrick J. Wong #include "scrub/common.h"
36e06ef14bSDarrick J. Wong #include "scrub/trace.h"
37e06ef14bSDarrick J. Wong #include "scrub/repair.h"
38e06ef14bSDarrick J. Wong #include "scrub/bitmap.h"
39e06ef14bSDarrick J. Wong #include "scrub/reap.h"
40e06ef14bSDarrick J. Wong 
41e06ef14bSDarrick J. Wong /*
42e06ef14bSDarrick J. Wong  * Disposal of Blocks from Old Metadata
43e06ef14bSDarrick J. Wong  *
44e06ef14bSDarrick J. Wong  * Now that we've constructed a new btree to replace the damaged one, we want
45e06ef14bSDarrick J. Wong  * to dispose of the blocks that (we think) the old btree was using.
46e06ef14bSDarrick J. Wong  * Previously, we used the rmapbt to collect the extents (bitmap) with the
47e06ef14bSDarrick J. Wong  * rmap owner corresponding to the tree we rebuilt, collected extents for any
48e06ef14bSDarrick J. Wong  * blocks with the same rmap owner that are owned by another data structure
49e06ef14bSDarrick J. Wong  * (sublist), and subtracted sublist from bitmap.  In theory the extents
50e06ef14bSDarrick J. Wong  * remaining in bitmap are the old btree's blocks.
51e06ef14bSDarrick J. Wong  *
52e06ef14bSDarrick J. Wong  * Unfortunately, it's possible that the btree was crosslinked with other
53e06ef14bSDarrick J. Wong  * blocks on disk.  The rmap data can tell us if there are multiple owners, so
54e06ef14bSDarrick J. Wong  * if the rmapbt says there is an owner of this block other than @oinfo, then
55e06ef14bSDarrick J. Wong  * the block is crosslinked.  Remove the reverse mapping and continue.
56e06ef14bSDarrick J. Wong  *
57e06ef14bSDarrick J. Wong  * If there is one rmap record, we can free the block, which removes the
58e06ef14bSDarrick J. Wong  * reverse mapping but doesn't add the block to the free space.  Our repair
59e06ef14bSDarrick J. Wong  * strategy is to hope the other metadata objects crosslinked on this block
60e06ef14bSDarrick J. Wong  * will be rebuilt (atop different blocks), thereby removing all the cross
61e06ef14bSDarrick J. Wong  * links.
62e06ef14bSDarrick J. Wong  *
63e06ef14bSDarrick J. Wong  * If there are no rmap records at all, we also free the block.  If the btree
64e06ef14bSDarrick J. Wong  * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
65e06ef14bSDarrick J. Wong  * supposed to be a rmap record and everything is ok.  For other btrees there
66e06ef14bSDarrick J. Wong  * had to have been an rmap entry for the block to have ended up on @bitmap,
67e06ef14bSDarrick J. Wong  * so if it's gone now there's something wrong and the fs will shut down.
68e06ef14bSDarrick J. Wong  *
69e06ef14bSDarrick J. Wong  * Note: If there are multiple rmap records with only the same rmap owner as
70e06ef14bSDarrick J. Wong  * the btree we're trying to rebuild and the block is indeed owned by another
71e06ef14bSDarrick J. Wong  * data structure with the same rmap owner, then the block will be in sublist
72e06ef14bSDarrick J. Wong  * and therefore doesn't need disposal.  If there are multiple rmap records
73e06ef14bSDarrick J. Wong  * with only the same rmap owner but the block is not owned by something with
74e06ef14bSDarrick J. Wong  * the same rmap owner, the block will be freed.
75e06ef14bSDarrick J. Wong  *
76e06ef14bSDarrick J. Wong  * The caller is responsible for locking the AG headers for the entire rebuild
77e06ef14bSDarrick J. Wong  * operation so that nothing else can sneak in and change the AG state while
788e54e06bSDarrick J. Wong  * we're not looking.  We must also invalidate any buffers associated with
798e54e06bSDarrick J. Wong  * @bitmap.
80e06ef14bSDarrick J. Wong  */
81e06ef14bSDarrick J. Wong 
82e06ef14bSDarrick J. Wong /* Information about reaping extents after a repair. */
831c7ce115SDarrick J. Wong struct xreap_state {
84e06ef14bSDarrick J. Wong 	struct xfs_scrub		*sc;
85e06ef14bSDarrick J. Wong 
86e06ef14bSDarrick J. Wong 	/* Reverse mapping owner and metadata reservation type. */
87e06ef14bSDarrick J. Wong 	const struct xfs_owner_info	*oinfo;
88e06ef14bSDarrick J. Wong 	enum xfs_ag_resv_type		resv;
895fee784eSDarrick J. Wong 
901c7ce115SDarrick J. Wong 	/* If true, roll the transaction before reaping the next extent. */
911c7ce115SDarrick J. Wong 	bool				force_roll;
921c7ce115SDarrick J. Wong 
935fee784eSDarrick J. Wong 	/* Number of deferred reaps attached to the current transaction. */
945fee784eSDarrick J. Wong 	unsigned int			deferred;
951c7ce115SDarrick J. Wong 
961c7ce115SDarrick J. Wong 	/* Number of invalidated buffers logged to the current transaction. */
971c7ce115SDarrick J. Wong 	unsigned int			invalidated;
981c7ce115SDarrick J. Wong 
991c7ce115SDarrick J. Wong 	/* Number of deferred reaps queued during the whole reap sequence. */
1001c7ce115SDarrick J. Wong 	unsigned long long		total_deferred;
101e06ef14bSDarrick J. Wong };
102e06ef14bSDarrick J. Wong 
1038e54e06bSDarrick J. Wong /* Put a block back on the AGFL. */
104e06ef14bSDarrick J. Wong STATIC int
xreap_put_freelist(struct xfs_scrub * sc,xfs_agblock_t agbno)1051c7ce115SDarrick J. Wong xreap_put_freelist(
106e06ef14bSDarrick J. Wong 	struct xfs_scrub	*sc,
107e06ef14bSDarrick J. Wong 	xfs_agblock_t		agbno)
108e06ef14bSDarrick J. Wong {
109e06ef14bSDarrick J. Wong 	struct xfs_buf		*agfl_bp;
110e06ef14bSDarrick J. Wong 	int			error;
111e06ef14bSDarrick J. Wong 
112e06ef14bSDarrick J. Wong 	/* Make sure there's space on the freelist. */
113e06ef14bSDarrick J. Wong 	error = xrep_fix_freelist(sc, true);
114e06ef14bSDarrick J. Wong 	if (error)
115e06ef14bSDarrick J. Wong 		return error;
116e06ef14bSDarrick J. Wong 
117e06ef14bSDarrick J. Wong 	/*
118e06ef14bSDarrick J. Wong 	 * Since we're "freeing" a lost block onto the AGFL, we have to
119e06ef14bSDarrick J. Wong 	 * create an rmap for the block prior to merging it or else other
120e06ef14bSDarrick J. Wong 	 * parts will break.
121e06ef14bSDarrick J. Wong 	 */
122e06ef14bSDarrick J. Wong 	error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
123e06ef14bSDarrick J. Wong 			&XFS_RMAP_OINFO_AG);
124e06ef14bSDarrick J. Wong 	if (error)
125e06ef14bSDarrick J. Wong 		return error;
126e06ef14bSDarrick J. Wong 
127e06ef14bSDarrick J. Wong 	/* Put the block on the AGFL. */
128e06ef14bSDarrick J. Wong 	error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
129e06ef14bSDarrick J. Wong 	if (error)
130e06ef14bSDarrick J. Wong 		return error;
131e06ef14bSDarrick J. Wong 
132e06ef14bSDarrick J. Wong 	error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
133e06ef14bSDarrick J. Wong 			agfl_bp, agbno, 0);
134e06ef14bSDarrick J. Wong 	if (error)
135e06ef14bSDarrick J. Wong 		return error;
136e06ef14bSDarrick J. Wong 	xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
137e06ef14bSDarrick J. Wong 			XFS_EXTENT_BUSY_SKIP_DISCARD);
138e06ef14bSDarrick J. Wong 
139e06ef14bSDarrick J. Wong 	return 0;
140e06ef14bSDarrick J. Wong }
141e06ef14bSDarrick J. Wong 
1421c7ce115SDarrick J. Wong /* Are there any uncommitted reap operations? */
xreap_dirty(const struct xreap_state * rs)1431c7ce115SDarrick J. Wong static inline bool xreap_dirty(const struct xreap_state *rs)
1448e54e06bSDarrick J. Wong {
1451c7ce115SDarrick J. Wong 	if (rs->force_roll)
1461c7ce115SDarrick J. Wong 		return true;
1471c7ce115SDarrick J. Wong 	if (rs->deferred)
1481c7ce115SDarrick J. Wong 		return true;
1491c7ce115SDarrick J. Wong 	if (rs->invalidated)
1501c7ce115SDarrick J. Wong 		return true;
1511c7ce115SDarrick J. Wong 	if (rs->total_deferred)
1521c7ce115SDarrick J. Wong 		return true;
1531c7ce115SDarrick J. Wong 	return false;
1541c7ce115SDarrick J. Wong }
1551c7ce115SDarrick J. Wong 
1561c7ce115SDarrick J. Wong #define XREAP_MAX_BINVAL	(2048)
1578e54e06bSDarrick J. Wong 
1588e54e06bSDarrick J. Wong /*
1591c7ce115SDarrick J. Wong  * Decide if we want to roll the transaction after reaping an extent.  We don't
1601c7ce115SDarrick J. Wong  * want to overrun the transaction reservation, so we prohibit more than
1611c7ce115SDarrick J. Wong  * 128 EFIs per transaction.  For the same reason, we limit the number
1621c7ce115SDarrick J. Wong  * of buffer invalidations to 2048.
1631c7ce115SDarrick J. Wong  */
xreap_want_roll(const struct xreap_state * rs)1641c7ce115SDarrick J. Wong static inline bool xreap_want_roll(const struct xreap_state *rs)
1651c7ce115SDarrick J. Wong {
1661c7ce115SDarrick J. Wong 	if (rs->force_roll)
1671c7ce115SDarrick J. Wong 		return true;
1681c7ce115SDarrick J. Wong 	if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
1691c7ce115SDarrick J. Wong 		return true;
1701c7ce115SDarrick J. Wong 	if (rs->invalidated > XREAP_MAX_BINVAL)
1711c7ce115SDarrick J. Wong 		return true;
1721c7ce115SDarrick J. Wong 	return false;
1731c7ce115SDarrick J. Wong }
1741c7ce115SDarrick J. Wong 
xreap_reset(struct xreap_state * rs)1751c7ce115SDarrick J. Wong static inline void xreap_reset(struct xreap_state *rs)
1761c7ce115SDarrick J. Wong {
1771c7ce115SDarrick J. Wong 	rs->total_deferred += rs->deferred;
1781c7ce115SDarrick J. Wong 	rs->deferred = 0;
1791c7ce115SDarrick J. Wong 	rs->invalidated = 0;
1801c7ce115SDarrick J. Wong 	rs->force_roll = false;
1811c7ce115SDarrick J. Wong }
1821c7ce115SDarrick J. Wong 
1831c7ce115SDarrick J. Wong #define XREAP_MAX_DEFER_CHAIN		(2048)
1841c7ce115SDarrick J. Wong 
1851c7ce115SDarrick J. Wong /*
1861c7ce115SDarrick J. Wong  * Decide if we want to finish the deferred ops that are attached to the scrub
1871c7ce115SDarrick J. Wong  * transaction.  We don't want to queue huge chains of deferred ops because
1881c7ce115SDarrick J. Wong  * that can consume a lot of log space and kernel memory.  Hence we trigger a
1891c7ce115SDarrick J. Wong  * xfs_defer_finish if there are more than 2048 deferred reap operations or the
1901c7ce115SDarrick J. Wong  * caller did some real work.
1911c7ce115SDarrick J. Wong  */
1921c7ce115SDarrick J. Wong static inline bool
xreap_want_defer_finish(const struct xreap_state * rs)1931c7ce115SDarrick J. Wong xreap_want_defer_finish(const struct xreap_state *rs)
1941c7ce115SDarrick J. Wong {
1951c7ce115SDarrick J. Wong 	if (rs->force_roll)
1961c7ce115SDarrick J. Wong 		return true;
1971c7ce115SDarrick J. Wong 	if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
1981c7ce115SDarrick J. Wong 		return true;
1991c7ce115SDarrick J. Wong 	return false;
2001c7ce115SDarrick J. Wong }
2011c7ce115SDarrick J. Wong 
xreap_defer_finish_reset(struct xreap_state * rs)2021c7ce115SDarrick J. Wong static inline void xreap_defer_finish_reset(struct xreap_state *rs)
2031c7ce115SDarrick J. Wong {
2041c7ce115SDarrick J. Wong 	rs->total_deferred = 0;
2051c7ce115SDarrick J. Wong 	rs->deferred = 0;
2061c7ce115SDarrick J. Wong 	rs->invalidated = 0;
2071c7ce115SDarrick J. Wong 	rs->force_roll = false;
2081c7ce115SDarrick J. Wong }
2091c7ce115SDarrick J. Wong 
2101c7ce115SDarrick J. Wong /* Try to invalidate the incore buffers for an extent that we're freeing. */
2111c7ce115SDarrick J. Wong STATIC void
xreap_agextent_binval(struct xreap_state * rs,xfs_agblock_t agbno,xfs_extlen_t * aglenp)2121c7ce115SDarrick J. Wong xreap_agextent_binval(
2131c7ce115SDarrick J. Wong 	struct xreap_state	*rs,
2141c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno,
2151c7ce115SDarrick J. Wong 	xfs_extlen_t		*aglenp)
2161c7ce115SDarrick J. Wong {
2171c7ce115SDarrick J. Wong 	struct xfs_scrub	*sc = rs->sc;
2181c7ce115SDarrick J. Wong 	struct xfs_perag	*pag = sc->sa.pag;
2191c7ce115SDarrick J. Wong 	struct xfs_mount	*mp = sc->mp;
2201c7ce115SDarrick J. Wong 	xfs_agnumber_t		agno = sc->sa.pag->pag_agno;
2211c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno_next = agbno + *aglenp;
2221c7ce115SDarrick J. Wong 	xfs_agblock_t		bno = agbno;
2231c7ce115SDarrick J. Wong 
2241c7ce115SDarrick J. Wong 	/*
2258e54e06bSDarrick J. Wong 	 * Avoid invalidating AG headers and post-EOFS blocks because we never
2268e54e06bSDarrick J. Wong 	 * own those.
2278e54e06bSDarrick J. Wong 	 */
2281c7ce115SDarrick J. Wong 	if (!xfs_verify_agbno(pag, agbno) ||
2291c7ce115SDarrick J. Wong 	    !xfs_verify_agbno(pag, agbno_next - 1))
2308e54e06bSDarrick J. Wong 		return;
2318e54e06bSDarrick J. Wong 
2328e54e06bSDarrick J. Wong 	/*
2331c7ce115SDarrick J. Wong 	 * If there are incore buffers for these blocks, invalidate them.  We
2341c7ce115SDarrick J. Wong 	 * assume that the lack of any other known owners means that the buffer
2351c7ce115SDarrick J. Wong 	 * can be locked without risk of deadlocking.  The buffer cache cannot
2361c7ce115SDarrick J. Wong 	 * detect aliasing, so employ nested loops to scan for incore buffers
2371c7ce115SDarrick J. Wong 	 * of any plausible size.
2388e54e06bSDarrick J. Wong 	 */
2391c7ce115SDarrick J. Wong 	while (bno < agbno_next) {
2401c7ce115SDarrick J. Wong 		xfs_agblock_t	fsbcount;
2411c7ce115SDarrick J. Wong 		xfs_agblock_t	max_fsbs;
2421c7ce115SDarrick J. Wong 
2431c7ce115SDarrick J. Wong 		/*
2441c7ce115SDarrick J. Wong 		 * Max buffer size is the max remote xattr buffer size, which
2451c7ce115SDarrick J. Wong 		 * is one fs block larger than 64k.
2461c7ce115SDarrick J. Wong 		 */
2471c7ce115SDarrick J. Wong 		max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
2481c7ce115SDarrick J. Wong 				xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
2491c7ce115SDarrick J. Wong 
250*7bc086bbSDarrick J. Wong 		for (fsbcount = 1; fsbcount <= max_fsbs; fsbcount++) {
2511c7ce115SDarrick J. Wong 			struct xfs_buf	*bp = NULL;
2521c7ce115SDarrick J. Wong 			xfs_daddr_t	daddr;
2531c7ce115SDarrick J. Wong 			int		error;
2541c7ce115SDarrick J. Wong 
2551c7ce115SDarrick J. Wong 			daddr = XFS_AGB_TO_DADDR(mp, agno, bno);
2561c7ce115SDarrick J. Wong 			error = xfs_buf_incore(mp->m_ddev_targp, daddr,
2571c7ce115SDarrick J. Wong 					XFS_FSB_TO_BB(mp, fsbcount),
2581c7ce115SDarrick J. Wong 					XBF_LIVESCAN, &bp);
2598e54e06bSDarrick J. Wong 			if (error)
2601c7ce115SDarrick J. Wong 				continue;
2618e54e06bSDarrick J. Wong 
2628e54e06bSDarrick J. Wong 			xfs_trans_bjoin(sc->tp, bp);
2638e54e06bSDarrick J. Wong 			xfs_trans_binval(sc->tp, bp);
2641c7ce115SDarrick J. Wong 			rs->invalidated++;
2651c7ce115SDarrick J. Wong 
2661c7ce115SDarrick J. Wong 			/*
2671c7ce115SDarrick J. Wong 			 * Stop invalidating if we've hit the limit; we should
2681c7ce115SDarrick J. Wong 			 * still have enough reservation left to free however
2691c7ce115SDarrick J. Wong 			 * far we've gotten.
2701c7ce115SDarrick J. Wong 			 */
2711c7ce115SDarrick J. Wong 			if (rs->invalidated > XREAP_MAX_BINVAL) {
2721c7ce115SDarrick J. Wong 				*aglenp -= agbno_next - bno;
2731c7ce115SDarrick J. Wong 				goto out;
2741c7ce115SDarrick J. Wong 			}
2758e54e06bSDarrick J. Wong 		}
2768e54e06bSDarrick J. Wong 
2771c7ce115SDarrick J. Wong 		bno++;
2781c7ce115SDarrick J. Wong 	}
2791c7ce115SDarrick J. Wong 
2801c7ce115SDarrick J. Wong out:
2811c7ce115SDarrick J. Wong 	trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
2821c7ce115SDarrick J. Wong }
2831c7ce115SDarrick J. Wong 
2841c7ce115SDarrick J. Wong /*
2851c7ce115SDarrick J. Wong  * Figure out the longest run of blocks that we can dispose of with a single
2861c7ce115SDarrick J. Wong  * call.  Cross-linked blocks should have their reverse mappings removed, but
2871c7ce115SDarrick J. Wong  * single-owner extents can be freed.  AGFL blocks can only be put back one at
2881c7ce115SDarrick J. Wong  * a time.
2891c7ce115SDarrick J. Wong  */
290e06ef14bSDarrick J. Wong STATIC int
xreap_agextent_select(struct xreap_state * rs,xfs_agblock_t agbno,xfs_agblock_t agbno_next,bool * crosslinked,xfs_extlen_t * aglenp)2911c7ce115SDarrick J. Wong xreap_agextent_select(
2921c7ce115SDarrick J. Wong 	struct xreap_state	*rs,
2931c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno,
2941c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno_next,
2951c7ce115SDarrick J. Wong 	bool			*crosslinked,
2961c7ce115SDarrick J. Wong 	xfs_extlen_t		*aglenp)
297e06ef14bSDarrick J. Wong {
298e06ef14bSDarrick J. Wong 	struct xfs_scrub	*sc = rs->sc;
299e06ef14bSDarrick J. Wong 	struct xfs_btree_cur	*cur;
3001c7ce115SDarrick J. Wong 	xfs_agblock_t		bno = agbno + 1;
3011c7ce115SDarrick J. Wong 	xfs_extlen_t		len = 1;
302e06ef14bSDarrick J. Wong 	int			error;
303e06ef14bSDarrick J. Wong 
3041c7ce115SDarrick J. Wong 	/*
3051c7ce115SDarrick J. Wong 	 * Determine if there are any other rmap records covering the first
3061c7ce115SDarrick J. Wong 	 * block of this extent.  If so, the block is crosslinked.
3071c7ce115SDarrick J. Wong 	 */
3081c7ce115SDarrick J. Wong 	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
3091c7ce115SDarrick J. Wong 			sc->sa.pag);
3101c7ce115SDarrick J. Wong 	error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
3111c7ce115SDarrick J. Wong 			crosslinked);
3121c7ce115SDarrick J. Wong 	if (error)
3131c7ce115SDarrick J. Wong 		goto out_cur;
314e06ef14bSDarrick J. Wong 
3151c7ce115SDarrick J. Wong 	/* AGFL blocks can only be deal with one at a time. */
3161c7ce115SDarrick J. Wong 	if (rs->resv == XFS_AG_RESV_AGFL)
3171c7ce115SDarrick J. Wong 		goto out_found;
3181c7ce115SDarrick J. Wong 
3191c7ce115SDarrick J. Wong 	/*
3201c7ce115SDarrick J. Wong 	 * Figure out how many of the subsequent blocks have the same crosslink
3211c7ce115SDarrick J. Wong 	 * status.
3221c7ce115SDarrick J. Wong 	 */
3231c7ce115SDarrick J. Wong 	while (bno < agbno_next) {
3241c7ce115SDarrick J. Wong 		bool		also_crosslinked;
3251c7ce115SDarrick J. Wong 
3261c7ce115SDarrick J. Wong 		error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
3271c7ce115SDarrick J. Wong 				&also_crosslinked);
3281c7ce115SDarrick J. Wong 		if (error)
3291c7ce115SDarrick J. Wong 			goto out_cur;
3301c7ce115SDarrick J. Wong 
3311c7ce115SDarrick J. Wong 		if (*crosslinked != also_crosslinked)
3321c7ce115SDarrick J. Wong 			break;
3331c7ce115SDarrick J. Wong 
3341c7ce115SDarrick J. Wong 		len++;
3351c7ce115SDarrick J. Wong 		bno++;
336e06ef14bSDarrick J. Wong 	}
337a55e0730SDarrick J. Wong 
3381c7ce115SDarrick J. Wong out_found:
3391c7ce115SDarrick J. Wong 	*aglenp = len;
3401c7ce115SDarrick J. Wong 	trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
3411c7ce115SDarrick J. Wong out_cur:
342e06ef14bSDarrick J. Wong 	xfs_btree_del_cursor(cur, error);
343a55e0730SDarrick J. Wong 	return error;
3441c7ce115SDarrick J. Wong }
3451c7ce115SDarrick J. Wong 
3461c7ce115SDarrick J. Wong /*
3471c7ce115SDarrick J. Wong  * Dispose of as much of the beginning of this AG extent as possible.  The
3481c7ce115SDarrick J. Wong  * number of blocks disposed of will be returned in @aglenp.
3491c7ce115SDarrick J. Wong  */
3501c7ce115SDarrick J. Wong STATIC int
xreap_agextent_iter(struct xreap_state * rs,xfs_agblock_t agbno,xfs_extlen_t * aglenp,bool crosslinked)3511c7ce115SDarrick J. Wong xreap_agextent_iter(
3521c7ce115SDarrick J. Wong 	struct xreap_state	*rs,
3531c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno,
3541c7ce115SDarrick J. Wong 	xfs_extlen_t		*aglenp,
3551c7ce115SDarrick J. Wong 	bool			crosslinked)
3561c7ce115SDarrick J. Wong {
3571c7ce115SDarrick J. Wong 	struct xfs_scrub	*sc = rs->sc;
3581c7ce115SDarrick J. Wong 	xfs_fsblock_t		fsbno;
3591c7ce115SDarrick J. Wong 	int			error = 0;
3601c7ce115SDarrick J. Wong 
3611c7ce115SDarrick J. Wong 	fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
362e06ef14bSDarrick J. Wong 
363e06ef14bSDarrick J. Wong 	/*
364e06ef14bSDarrick J. Wong 	 * If there are other rmappings, this block is cross linked and must
365e06ef14bSDarrick J. Wong 	 * not be freed.  Remove the reverse mapping and move on.  Otherwise,
366e06ef14bSDarrick J. Wong 	 * we were the only owner of the block, so free the extent, which will
367e06ef14bSDarrick J. Wong 	 * also remove the rmap.
368e06ef14bSDarrick J. Wong 	 *
369e06ef14bSDarrick J. Wong 	 * XXX: XFS doesn't support detecting the case where a single block
370e06ef14bSDarrick J. Wong 	 * metadata structure is crosslinked with a multi-block structure
371e06ef14bSDarrick J. Wong 	 * because the buffer cache doesn't detect aliasing problems, so we
372e06ef14bSDarrick J. Wong 	 * can't fix 100% of crosslinking problems (yet).  The verifiers will
373e06ef14bSDarrick J. Wong 	 * blow on writeout, the filesystem will shut down, and the admin gets
374e06ef14bSDarrick J. Wong 	 * to run xfs_repair.
375e06ef14bSDarrick J. Wong 	 */
3761c7ce115SDarrick J. Wong 	if (crosslinked) {
3771c7ce115SDarrick J. Wong 		trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
37877a1396fSDarrick J. Wong 
3791c7ce115SDarrick J. Wong 		rs->force_roll = true;
3801c7ce115SDarrick J. Wong 		return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
3811c7ce115SDarrick J. Wong 				*aglenp, rs->oinfo);
3821c7ce115SDarrick J. Wong 	}
3831c7ce115SDarrick J. Wong 
3841c7ce115SDarrick J. Wong 	trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
3851c7ce115SDarrick J. Wong 
3861c7ce115SDarrick J. Wong 	/*
3871c7ce115SDarrick J. Wong 	 * Invalidate as many buffers as we can, starting at agbno.  If this
3881c7ce115SDarrick J. Wong 	 * function sets *aglenp to zero, the transaction is full of logged
3891c7ce115SDarrick J. Wong 	 * buffer invalidations, so we need to return early so that we can
3901c7ce115SDarrick J. Wong 	 * roll and retry.
3911c7ce115SDarrick J. Wong 	 */
3921c7ce115SDarrick J. Wong 	xreap_agextent_binval(rs, agbno, aglenp);
3931c7ce115SDarrick J. Wong 	if (*aglenp == 0) {
3941c7ce115SDarrick J. Wong 		ASSERT(xreap_want_roll(rs));
3951c7ce115SDarrick J. Wong 		return 0;
3961c7ce115SDarrick J. Wong 	}
3971c7ce115SDarrick J. Wong 
3981c7ce115SDarrick J. Wong 	/* Put blocks back on the AGFL one at a time. */
3991c7ce115SDarrick J. Wong 	if (rs->resv == XFS_AG_RESV_AGFL) {
4001c7ce115SDarrick J. Wong 		ASSERT(*aglenp == 1);
4011c7ce115SDarrick J. Wong 		error = xreap_put_freelist(sc, agbno);
40277a1396fSDarrick J. Wong 		if (error)
40377a1396fSDarrick J. Wong 			return error;
40477a1396fSDarrick J. Wong 
4051c7ce115SDarrick J. Wong 		rs->force_roll = true;
4061c7ce115SDarrick J. Wong 		return 0;
40777a1396fSDarrick J. Wong 	}
40877a1396fSDarrick J. Wong 
4095fee784eSDarrick J. Wong 	/*
4101c7ce115SDarrick J. Wong 	 * Use deferred frees to get rid of the old btree blocks to try to
4111c7ce115SDarrick J. Wong 	 * minimize the window in which we could crash and lose the old blocks.
4125fee784eSDarrick J. Wong 	 */
4131c7ce115SDarrick J. Wong 	error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
4145fee784eSDarrick J. Wong 			rs->resv, true);
415e06ef14bSDarrick J. Wong 	if (error)
416e06ef14bSDarrick J. Wong 		return error;
4171c7ce115SDarrick J. Wong 
4185fee784eSDarrick J. Wong 	rs->deferred++;
4191c7ce115SDarrick J. Wong 	return 0;
4205fee784eSDarrick J. Wong }
4211c7ce115SDarrick J. Wong 
4221c7ce115SDarrick J. Wong /*
4231c7ce115SDarrick J. Wong  * Break an AG metadata extent into sub-extents by fate (crosslinked, not
4241c7ce115SDarrick J. Wong  * crosslinked), and dispose of each sub-extent separately.
4251c7ce115SDarrick J. Wong  */
4261c7ce115SDarrick J. Wong STATIC int
xreap_agmeta_extent(uint64_t fsbno,uint64_t len,void * priv)4271c7ce115SDarrick J. Wong xreap_agmeta_extent(
4281c7ce115SDarrick J. Wong 	uint64_t		fsbno,
4291c7ce115SDarrick J. Wong 	uint64_t		len,
4301c7ce115SDarrick J. Wong 	void			*priv)
4311c7ce115SDarrick J. Wong {
4321c7ce115SDarrick J. Wong 	struct xreap_state	*rs = priv;
4331c7ce115SDarrick J. Wong 	struct xfs_scrub	*sc = rs->sc;
434014ad537SDarrick J. Wong 	xfs_agblock_t		agbno = fsbno;
4351c7ce115SDarrick J. Wong 	xfs_agblock_t		agbno_next = agbno + len;
4361c7ce115SDarrick J. Wong 	int			error = 0;
4371c7ce115SDarrick J. Wong 
4381c7ce115SDarrick J. Wong 	ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
4391c7ce115SDarrick J. Wong 	ASSERT(sc->ip == NULL);
4401c7ce115SDarrick J. Wong 
4411c7ce115SDarrick J. Wong 	while (agbno < agbno_next) {
4421c7ce115SDarrick J. Wong 		xfs_extlen_t	aglen;
4431c7ce115SDarrick J. Wong 		bool		crosslinked;
4441c7ce115SDarrick J. Wong 
4451c7ce115SDarrick J. Wong 		error = xreap_agextent_select(rs, agbno, agbno_next,
4461c7ce115SDarrick J. Wong 				&crosslinked, &aglen);
4471c7ce115SDarrick J. Wong 		if (error)
4485fee784eSDarrick J. Wong 			return error;
449e06ef14bSDarrick J. Wong 
4501c7ce115SDarrick J. Wong 		error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
4511c7ce115SDarrick J. Wong 		if (error)
4521c7ce115SDarrick J. Wong 			return error;
4531c7ce115SDarrick J. Wong 
4541c7ce115SDarrick J. Wong 		if (xreap_want_defer_finish(rs)) {
4551c7ce115SDarrick J. Wong 			error = xrep_defer_finish(sc);
4561c7ce115SDarrick J. Wong 			if (error)
4571c7ce115SDarrick J. Wong 				return error;
4581c7ce115SDarrick J. Wong 			xreap_defer_finish_reset(rs);
4591c7ce115SDarrick J. Wong 		} else if (xreap_want_roll(rs)) {
4601c7ce115SDarrick J. Wong 			error = xrep_roll_ag_trans(sc);
4611c7ce115SDarrick J. Wong 			if (error)
4621c7ce115SDarrick J. Wong 				return error;
4631c7ce115SDarrick J. Wong 			xreap_reset(rs);
464e06ef14bSDarrick J. Wong 		}
465e06ef14bSDarrick J. Wong 
4661c7ce115SDarrick J. Wong 		agbno += aglen;
4671c7ce115SDarrick J. Wong 	}
4681c7ce115SDarrick J. Wong 
4691c7ce115SDarrick J. Wong 	return 0;
4701c7ce115SDarrick J. Wong }
4711c7ce115SDarrick J. Wong 
4721c7ce115SDarrick J. Wong /* Dispose of every block of every AG metadata extent in the bitmap. */
473e06ef14bSDarrick J. Wong int
xrep_reap_agblocks(struct xfs_scrub * sc,struct xagb_bitmap * bitmap,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)474014ad537SDarrick J. Wong xrep_reap_agblocks(
475e06ef14bSDarrick J. Wong 	struct xfs_scrub		*sc,
476014ad537SDarrick J. Wong 	struct xagb_bitmap		*bitmap,
477e06ef14bSDarrick J. Wong 	const struct xfs_owner_info	*oinfo,
478e06ef14bSDarrick J. Wong 	enum xfs_ag_resv_type		type)
479e06ef14bSDarrick J. Wong {
4801c7ce115SDarrick J. Wong 	struct xreap_state		rs = {
481e06ef14bSDarrick J. Wong 		.sc			= sc,
482e06ef14bSDarrick J. Wong 		.oinfo			= oinfo,
483e06ef14bSDarrick J. Wong 		.resv			= type,
484e06ef14bSDarrick J. Wong 	};
4855fee784eSDarrick J. Wong 	int				error;
486e06ef14bSDarrick J. Wong 
487e06ef14bSDarrick J. Wong 	ASSERT(xfs_has_rmapbt(sc->mp));
4881c7ce115SDarrick J. Wong 	ASSERT(sc->ip == NULL);
489e06ef14bSDarrick J. Wong 
490014ad537SDarrick J. Wong 	error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
4911c7ce115SDarrick J. Wong 	if (error)
4925fee784eSDarrick J. Wong 		return error;
4935fee784eSDarrick J. Wong 
4941c7ce115SDarrick J. Wong 	if (xreap_dirty(&rs))
4951c7ce115SDarrick J. Wong 		return xrep_defer_finish(sc);
4961c7ce115SDarrick J. Wong 
4971c7ce115SDarrick J. Wong 	return 0;
498e06ef14bSDarrick J. Wong }
499