xref: /openbmc/linux/fs/xfs/libxfs/xfs_alloc.c (revision d37cf9b63113f13d742713881ce691fc615d8b3b)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
230f712c9SDave Chinner /*
330f712c9SDave Chinner  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
430f712c9SDave Chinner  * All Rights Reserved.
530f712c9SDave Chinner  */
630f712c9SDave Chinner #include "xfs.h"
730f712c9SDave Chinner #include "xfs_fs.h"
830f712c9SDave Chinner #include "xfs_format.h"
930f712c9SDave Chinner #include "xfs_log_format.h"
1030f712c9SDave Chinner #include "xfs_shared.h"
1130f712c9SDave Chinner #include "xfs_trans_resv.h"
1230f712c9SDave Chinner #include "xfs_bit.h"
1330f712c9SDave Chinner #include "xfs_mount.h"
143ab78df2SDarrick J. Wong #include "xfs_defer.h"
1530f712c9SDave Chinner #include "xfs_btree.h"
16673930c3SDarrick J. Wong #include "xfs_rmap.h"
1730f712c9SDave Chinner #include "xfs_alloc_btree.h"
1830f712c9SDave Chinner #include "xfs_alloc.h"
1930f712c9SDave Chinner #include "xfs_extent_busy.h"
20e9e899a2SDarrick J. Wong #include "xfs_errortag.h"
2130f712c9SDave Chinner #include "xfs_error.h"
2230f712c9SDave Chinner #include "xfs_trace.h"
2330f712c9SDave Chinner #include "xfs_trans.h"
2430f712c9SDave Chinner #include "xfs_buf_item.h"
2530f712c9SDave Chinner #include "xfs_log.h"
269bbafc71SDave Chinner #include "xfs_ag.h"
273fd129b6SDarrick J. Wong #include "xfs_ag_resv.h"
28f8f2835aSBrian Foster #include "xfs_bmap.h"
29f8f2835aSBrian Foster 
30c201d9caSDarrick J. Wong struct kmem_cache	*xfs_extfree_item_cache;
3130f712c9SDave Chinner 
3230f712c9SDave Chinner struct workqueue_struct *xfs_alloc_wq;
3330f712c9SDave Chinner 
3430f712c9SDave Chinner #define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
3530f712c9SDave Chinner 
3630f712c9SDave Chinner #define	XFSA_FIXUP_BNO_OK	1
3730f712c9SDave Chinner #define	XFSA_FIXUP_CNT_OK	2
3830f712c9SDave Chinner 
39a78ee256SDave Chinner /*
40a78ee256SDave Chinner  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
41a78ee256SDave Chinner  * the beginning of the block for a proper header with the location information
42a78ee256SDave Chinner  * and CRC.
43a78ee256SDave Chinner  */
44a78ee256SDave Chinner unsigned int
xfs_agfl_size(struct xfs_mount * mp)45a78ee256SDave Chinner xfs_agfl_size(
46a78ee256SDave Chinner 	struct xfs_mount	*mp)
47a78ee256SDave Chinner {
48a78ee256SDave Chinner 	unsigned int		size = mp->m_sb.sb_sectsize;
49a78ee256SDave Chinner 
5038c26bfdSDave Chinner 	if (xfs_has_crc(mp))
51a78ee256SDave Chinner 		size -= sizeof(struct xfs_agfl);
52a78ee256SDave Chinner 
53a78ee256SDave Chinner 	return size / sizeof(xfs_agblock_t);
54a78ee256SDave Chinner }
55a78ee256SDave Chinner 
56af30dfa1SDarrick J. Wong unsigned int
xfs_refc_block(struct xfs_mount * mp)57af30dfa1SDarrick J. Wong xfs_refc_block(
58af30dfa1SDarrick J. Wong 	struct xfs_mount	*mp)
59af30dfa1SDarrick J. Wong {
6038c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp))
61af30dfa1SDarrick J. Wong 		return XFS_RMAP_BLOCK(mp) + 1;
6238c26bfdSDave Chinner 	if (xfs_has_finobt(mp))
63af30dfa1SDarrick J. Wong 		return XFS_FIBT_BLOCK(mp) + 1;
64af30dfa1SDarrick J. Wong 	return XFS_IBT_BLOCK(mp) + 1;
65af30dfa1SDarrick J. Wong }
66af30dfa1SDarrick J. Wong 
678018026eSDarrick J. Wong xfs_extlen_t
xfs_prealloc_blocks(struct xfs_mount * mp)688018026eSDarrick J. Wong xfs_prealloc_blocks(
698018026eSDarrick J. Wong 	struct xfs_mount	*mp)
708018026eSDarrick J. Wong {
7138c26bfdSDave Chinner 	if (xfs_has_reflink(mp))
72af30dfa1SDarrick J. Wong 		return xfs_refc_block(mp) + 1;
7338c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp))
748018026eSDarrick J. Wong 		return XFS_RMAP_BLOCK(mp) + 1;
7538c26bfdSDave Chinner 	if (xfs_has_finobt(mp))
768018026eSDarrick J. Wong 		return XFS_FIBT_BLOCK(mp) + 1;
778018026eSDarrick J. Wong 	return XFS_IBT_BLOCK(mp) + 1;
788018026eSDarrick J. Wong }
798018026eSDarrick J. Wong 
8030f712c9SDave Chinner /*
8193defd5aSDarrick J. Wong  * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
8293defd5aSDarrick J. Wong  * guarantee that we can refill the AGFL prior to allocating space in a nearly
834869b6e8SSlark Xiao  * full AG.  Although the space described by the free space btrees, the
8493defd5aSDarrick J. Wong  * blocks used by the freesp btrees themselves, and the blocks owned by the
8593defd5aSDarrick J. Wong  * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
8693defd5aSDarrick J. Wong  * free space in the AG drop so low that the free space btrees cannot refill an
8793defd5aSDarrick J. Wong  * empty AGFL up to the minimum level.  Rather than grind through empty AGs
8893defd5aSDarrick J. Wong  * until the fs goes down, we subtract this many AG blocks from the incore
8993defd5aSDarrick J. Wong  * fdblocks to ensure user allocation does not overcommit the space the
9093defd5aSDarrick J. Wong  * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
9193defd5aSDarrick J. Wong  * withhold space from xfs_mod_fdblocks, so we do not account for that here.
9293defd5aSDarrick J. Wong  */
9393defd5aSDarrick J. Wong #define XFS_ALLOCBT_AGFL_RESERVE	4
9493defd5aSDarrick J. Wong 
9593defd5aSDarrick J. Wong /*
9693defd5aSDarrick J. Wong  * Compute the number of blocks that we set aside to guarantee the ability to
9793defd5aSDarrick J. Wong  * refill the AGFL and handle a full bmap btree split.
9893defd5aSDarrick J. Wong  *
9952548852SDarrick J. Wong  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
10052548852SDarrick J. Wong  * AGF buffer (PV 947395), we place constraints on the relationship among
10152548852SDarrick J. Wong  * actual allocations for data blocks, freelist blocks, and potential file data
10252548852SDarrick J. Wong  * bmap btree blocks. However, these restrictions may result in no actual space
10352548852SDarrick J. Wong  * allocated for a delayed extent, for example, a data block in a certain AG is
10452548852SDarrick J. Wong  * allocated but there is no additional block for the additional bmap btree
10552548852SDarrick J. Wong  * block due to a split of the bmap btree of the file. The result of this may
10652548852SDarrick J. Wong  * lead to an infinite loop when the file gets flushed to disk and all delayed
10752548852SDarrick J. Wong  * extents need to be actually allocated. To get around this, we explicitly set
10852548852SDarrick J. Wong  * aside a few blocks which will not be reserved in delayed allocation.
10952548852SDarrick J. Wong  *
11093defd5aSDarrick J. Wong  * For each AG, we need to reserve enough blocks to replenish a totally empty
11193defd5aSDarrick J. Wong  * AGFL and 4 more to handle a potential split of the file's bmap btree.
11252548852SDarrick J. Wong  */
11352548852SDarrick J. Wong unsigned int
xfs_alloc_set_aside(struct xfs_mount * mp)11452548852SDarrick J. Wong xfs_alloc_set_aside(
11552548852SDarrick J. Wong 	struct xfs_mount	*mp)
11652548852SDarrick J. Wong {
11793defd5aSDarrick J. Wong 	return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
11852548852SDarrick J. Wong }
11952548852SDarrick J. Wong 
12052548852SDarrick J. Wong /*
12152548852SDarrick J. Wong  * When deciding how much space to allocate out of an AG, we limit the
12252548852SDarrick J. Wong  * allocation maximum size to the size the AG. However, we cannot use all the
12352548852SDarrick J. Wong  * blocks in the AG - some are permanently used by metadata. These
12452548852SDarrick J. Wong  * blocks are generally:
12552548852SDarrick J. Wong  *	- the AG superblock, AGF, AGI and AGFL
12652548852SDarrick J. Wong  *	- the AGF (bno and cnt) and AGI btree root blocks, and optionally
12752548852SDarrick J. Wong  *	  the AGI free inode and rmap btree root blocks.
12852548852SDarrick J. Wong  *	- blocks on the AGFL according to xfs_alloc_set_aside() limits
12952548852SDarrick J. Wong  *	- the rmapbt root block
13052548852SDarrick J. Wong  *
13152548852SDarrick J. Wong  * The AG headers are sector sized, so the amount of space they take up is
13252548852SDarrick J. Wong  * dependent on filesystem geometry. The others are all single blocks.
13352548852SDarrick J. Wong  */
13452548852SDarrick J. Wong unsigned int
xfs_alloc_ag_max_usable(struct xfs_mount * mp)13552548852SDarrick J. Wong xfs_alloc_ag_max_usable(
13652548852SDarrick J. Wong 	struct xfs_mount	*mp)
13752548852SDarrick J. Wong {
13852548852SDarrick J. Wong 	unsigned int		blocks;
13952548852SDarrick J. Wong 
14052548852SDarrick J. Wong 	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
14193defd5aSDarrick J. Wong 	blocks += XFS_ALLOCBT_AGFL_RESERVE;
14252548852SDarrick J. Wong 	blocks += 3;			/* AGF, AGI btree root blocks */
14338c26bfdSDave Chinner 	if (xfs_has_finobt(mp))
14452548852SDarrick J. Wong 		blocks++;		/* finobt root block */
14538c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp))
14652548852SDarrick J. Wong 		blocks++;		/* rmap root block */
14738c26bfdSDave Chinner 	if (xfs_has_reflink(mp))
148d0e853f3SDarrick J. Wong 		blocks++;		/* refcount root block */
14952548852SDarrick J. Wong 
15052548852SDarrick J. Wong 	return mp->m_sb.sb_agblocks - blocks;
15152548852SDarrick J. Wong }
15252548852SDarrick J. Wong 
15352548852SDarrick J. Wong /*
15430f712c9SDave Chinner  * Lookup the record equal to [bno, len] in the btree given by cur.
15530f712c9SDave Chinner  */
15630f712c9SDave Chinner STATIC int				/* error */
xfs_alloc_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)15730f712c9SDave Chinner xfs_alloc_lookup_eq(
15830f712c9SDave Chinner 	struct xfs_btree_cur	*cur,	/* btree cursor */
15930f712c9SDave Chinner 	xfs_agblock_t		bno,	/* starting block of extent */
16030f712c9SDave Chinner 	xfs_extlen_t		len,	/* length of extent */
16130f712c9SDave Chinner 	int			*stat)	/* success/failure */
16230f712c9SDave Chinner {
163f6b428a4SBrian Foster 	int			error;
164f6b428a4SBrian Foster 
16530f712c9SDave Chinner 	cur->bc_rec.a.ar_startblock = bno;
16630f712c9SDave Chinner 	cur->bc_rec.a.ar_blockcount = len;
167f6b428a4SBrian Foster 	error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
168c4aa10d0SDave Chinner 	cur->bc_ag.abt.active = (*stat == 1);
169f6b428a4SBrian Foster 	return error;
17030f712c9SDave Chinner }
17130f712c9SDave Chinner 
17230f712c9SDave Chinner /*
17330f712c9SDave Chinner  * Lookup the first record greater than or equal to [bno, len]
17430f712c9SDave Chinner  * in the btree given by cur.
17530f712c9SDave Chinner  */
17630f712c9SDave Chinner int				/* error */
xfs_alloc_lookup_ge(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)17730f712c9SDave Chinner xfs_alloc_lookup_ge(
17830f712c9SDave Chinner 	struct xfs_btree_cur	*cur,	/* btree cursor */
17930f712c9SDave Chinner 	xfs_agblock_t		bno,	/* starting block of extent */
18030f712c9SDave Chinner 	xfs_extlen_t		len,	/* length of extent */
18130f712c9SDave Chinner 	int			*stat)	/* success/failure */
18230f712c9SDave Chinner {
183f6b428a4SBrian Foster 	int			error;
184f6b428a4SBrian Foster 
18530f712c9SDave Chinner 	cur->bc_rec.a.ar_startblock = bno;
18630f712c9SDave Chinner 	cur->bc_rec.a.ar_blockcount = len;
187f6b428a4SBrian Foster 	error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
188c4aa10d0SDave Chinner 	cur->bc_ag.abt.active = (*stat == 1);
189f6b428a4SBrian Foster 	return error;
19030f712c9SDave Chinner }
19130f712c9SDave Chinner 
19230f712c9SDave Chinner /*
19330f712c9SDave Chinner  * Lookup the first record less than or equal to [bno, len]
19430f712c9SDave Chinner  * in the btree given by cur.
19530f712c9SDave Chinner  */
196ce1d802eSDarrick J. Wong int					/* error */
xfs_alloc_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)19730f712c9SDave Chinner xfs_alloc_lookup_le(
19830f712c9SDave Chinner 	struct xfs_btree_cur	*cur,	/* btree cursor */
19930f712c9SDave Chinner 	xfs_agblock_t		bno,	/* starting block of extent */
20030f712c9SDave Chinner 	xfs_extlen_t		len,	/* length of extent */
20130f712c9SDave Chinner 	int			*stat)	/* success/failure */
20230f712c9SDave Chinner {
203f6b428a4SBrian Foster 	int			error;
20430f712c9SDave Chinner 	cur->bc_rec.a.ar_startblock = bno;
20530f712c9SDave Chinner 	cur->bc_rec.a.ar_blockcount = len;
206f6b428a4SBrian Foster 	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
207c4aa10d0SDave Chinner 	cur->bc_ag.abt.active = (*stat == 1);
208f6b428a4SBrian Foster 	return error;
209f6b428a4SBrian Foster }
210f6b428a4SBrian Foster 
211f6b428a4SBrian Foster static inline bool
xfs_alloc_cur_active(struct xfs_btree_cur * cur)212f6b428a4SBrian Foster xfs_alloc_cur_active(
213f6b428a4SBrian Foster 	struct xfs_btree_cur	*cur)
214f6b428a4SBrian Foster {
215c4aa10d0SDave Chinner 	return cur && cur->bc_ag.abt.active;
21630f712c9SDave Chinner }
21730f712c9SDave Chinner 
21830f712c9SDave Chinner /*
21930f712c9SDave Chinner  * Update the record referred to by cur to the value given
22030f712c9SDave Chinner  * by [bno, len].
22130f712c9SDave Chinner  * This either works (return 0) or gets an EFSCORRUPTED error.
22230f712c9SDave Chinner  */
22330f712c9SDave Chinner STATIC int				/* error */
xfs_alloc_update(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len)22430f712c9SDave Chinner xfs_alloc_update(
22530f712c9SDave Chinner 	struct xfs_btree_cur	*cur,	/* btree cursor */
22630f712c9SDave Chinner 	xfs_agblock_t		bno,	/* starting block of extent */
22730f712c9SDave Chinner 	xfs_extlen_t		len)	/* length of extent */
22830f712c9SDave Chinner {
22930f712c9SDave Chinner 	union xfs_btree_rec	rec;
23030f712c9SDave Chinner 
23130f712c9SDave Chinner 	rec.alloc.ar_startblock = cpu_to_be32(bno);
23230f712c9SDave Chinner 	rec.alloc.ar_blockcount = cpu_to_be32(len);
23330f712c9SDave Chinner 	return xfs_btree_update(cur, &rec);
23430f712c9SDave Chinner }
23530f712c9SDave Chinner 
23635e3b9a1SDarrick J. Wong /* Convert the ondisk btree record to its incore representation. */
23735e3b9a1SDarrick J. Wong void
xfs_alloc_btrec_to_irec(const union xfs_btree_rec * rec,struct xfs_alloc_rec_incore * irec)23835e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(
23935e3b9a1SDarrick J. Wong 	const union xfs_btree_rec	*rec,
24035e3b9a1SDarrick J. Wong 	struct xfs_alloc_rec_incore	*irec)
24135e3b9a1SDarrick J. Wong {
24235e3b9a1SDarrick J. Wong 	irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
24335e3b9a1SDarrick J. Wong 	irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
24435e3b9a1SDarrick J. Wong }
24535e3b9a1SDarrick J. Wong 
24635e3b9a1SDarrick J. Wong /* Simple checks for free space records. */
24735e3b9a1SDarrick J. Wong xfs_failaddr_t
xfs_alloc_check_irec(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * irec)24835e3b9a1SDarrick J. Wong xfs_alloc_check_irec(
24935e3b9a1SDarrick J. Wong 	struct xfs_btree_cur		*cur,
25035e3b9a1SDarrick J. Wong 	const struct xfs_alloc_rec_incore *irec)
25135e3b9a1SDarrick J. Wong {
25235e3b9a1SDarrick J. Wong 	struct xfs_perag		*pag = cur->bc_ag.pag;
25335e3b9a1SDarrick J. Wong 
25435e3b9a1SDarrick J. Wong 	if (irec->ar_blockcount == 0)
25535e3b9a1SDarrick J. Wong 		return __this_address;
25635e3b9a1SDarrick J. Wong 
25735e3b9a1SDarrick J. Wong 	/* check for valid extent range, including overflow */
25835e3b9a1SDarrick J. Wong 	if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
25935e3b9a1SDarrick J. Wong 		return __this_address;
26035e3b9a1SDarrick J. Wong 
26135e3b9a1SDarrick J. Wong 	return NULL;
26235e3b9a1SDarrick J. Wong }
26335e3b9a1SDarrick J. Wong 
264ee12eaaaSDarrick J. Wong static inline int
xfs_alloc_complain_bad_rec(struct xfs_btree_cur * cur,xfs_failaddr_t fa,const struct xfs_alloc_rec_incore * irec)265ee12eaaaSDarrick J. Wong xfs_alloc_complain_bad_rec(
266ee12eaaaSDarrick J. Wong 	struct xfs_btree_cur		*cur,
267ee12eaaaSDarrick J. Wong 	xfs_failaddr_t			fa,
268ee12eaaaSDarrick J. Wong 	const struct xfs_alloc_rec_incore *irec)
269ee12eaaaSDarrick J. Wong {
270ee12eaaaSDarrick J. Wong 	struct xfs_mount		*mp = cur->bc_mp;
271ee12eaaaSDarrick J. Wong 
272ee12eaaaSDarrick J. Wong 	xfs_warn(mp,
273ee12eaaaSDarrick J. Wong 		"%s Freespace BTree record corruption in AG %d detected at %pS!",
274ee12eaaaSDarrick J. Wong 		cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
275ee12eaaaSDarrick J. Wong 		cur->bc_ag.pag->pag_agno, fa);
276ee12eaaaSDarrick J. Wong 	xfs_warn(mp,
277ee12eaaaSDarrick J. Wong 		"start block 0x%x block count 0x%x", irec->ar_startblock,
278ee12eaaaSDarrick J. Wong 		irec->ar_blockcount);
279ee12eaaaSDarrick J. Wong 	return -EFSCORRUPTED;
280ee12eaaaSDarrick J. Wong }
281ee12eaaaSDarrick J. Wong 
28230f712c9SDave Chinner /*
28330f712c9SDave Chinner  * Get the data from the pointed-to record.
28430f712c9SDave Chinner  */
28530f712c9SDave Chinner int					/* error */
xfs_alloc_get_rec(struct xfs_btree_cur * cur,xfs_agblock_t * bno,xfs_extlen_t * len,int * stat)28630f712c9SDave Chinner xfs_alloc_get_rec(
28730f712c9SDave Chinner 	struct xfs_btree_cur	*cur,	/* btree cursor */
28830f712c9SDave Chinner 	xfs_agblock_t		*bno,	/* output: starting block of extent */
28930f712c9SDave Chinner 	xfs_extlen_t		*len,	/* output: length of extent */
29030f712c9SDave Chinner 	int			*stat)	/* output: success/failure */
29130f712c9SDave Chinner {
29235e3b9a1SDarrick J. Wong 	struct xfs_alloc_rec_incore irec;
29330f712c9SDave Chinner 	union xfs_btree_rec	*rec;
29435e3b9a1SDarrick J. Wong 	xfs_failaddr_t		fa;
29530f712c9SDave Chinner 	int			error;
29630f712c9SDave Chinner 
29730f712c9SDave Chinner 	error = xfs_btree_get_rec(cur, &rec, stat);
298a37f7b12SDarrick J. Wong 	if (error || !(*stat))
299a37f7b12SDarrick J. Wong 		return error;
300a37f7b12SDarrick J. Wong 
30135e3b9a1SDarrick J. Wong 	xfs_alloc_btrec_to_irec(rec, &irec);
30235e3b9a1SDarrick J. Wong 	fa = xfs_alloc_check_irec(cur, &irec);
30335e3b9a1SDarrick J. Wong 	if (fa)
304ee12eaaaSDarrick J. Wong 		return xfs_alloc_complain_bad_rec(cur, fa, &irec);
305efe80327SCarlos Maiolino 
30635e3b9a1SDarrick J. Wong 	*bno = irec.ar_startblock;
30735e3b9a1SDarrick J. Wong 	*len = irec.ar_blockcount;
3089e6c08d4SDave Chinner 	return 0;
30930f712c9SDave Chinner }
31030f712c9SDave Chinner 
31130f712c9SDave Chinner /*
31230f712c9SDave Chinner  * Compute aligned version of the found extent.
31330f712c9SDave Chinner  * Takes alignment and min length into account.
31430f712c9SDave Chinner  */
315ebf55872SChristoph Hellwig STATIC bool
xfs_alloc_compute_aligned(xfs_alloc_arg_t * args,xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen,unsigned * busy_gen)31630f712c9SDave Chinner xfs_alloc_compute_aligned(
31730f712c9SDave Chinner 	xfs_alloc_arg_t	*args,		/* allocation argument structure */
31830f712c9SDave Chinner 	xfs_agblock_t	foundbno,	/* starting block in found extent */
31930f712c9SDave Chinner 	xfs_extlen_t	foundlen,	/* length in found extent */
32030f712c9SDave Chinner 	xfs_agblock_t	*resbno,	/* result block number */
321ebf55872SChristoph Hellwig 	xfs_extlen_t	*reslen,	/* result length */
322ebf55872SChristoph Hellwig 	unsigned	*busy_gen)
32330f712c9SDave Chinner {
324ebf55872SChristoph Hellwig 	xfs_agblock_t	bno = foundbno;
325ebf55872SChristoph Hellwig 	xfs_extlen_t	len = foundlen;
326bfe46d4eSBrian Foster 	xfs_extlen_t	diff;
327ebf55872SChristoph Hellwig 	bool		busy;
32830f712c9SDave Chinner 
32930f712c9SDave Chinner 	/* Trim busy sections out of found extent */
330ebf55872SChristoph Hellwig 	busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
33130f712c9SDave Chinner 
332bfe46d4eSBrian Foster 	/*
333bfe46d4eSBrian Foster 	 * If we have a largish extent that happens to start before min_agbno,
334bfe46d4eSBrian Foster 	 * see if we can shift it into range...
335bfe46d4eSBrian Foster 	 */
336bfe46d4eSBrian Foster 	if (bno < args->min_agbno && bno + len > args->min_agbno) {
337bfe46d4eSBrian Foster 		diff = args->min_agbno - bno;
338bfe46d4eSBrian Foster 		if (len > diff) {
339bfe46d4eSBrian Foster 			bno += diff;
340bfe46d4eSBrian Foster 			len -= diff;
341bfe46d4eSBrian Foster 		}
342bfe46d4eSBrian Foster 	}
343bfe46d4eSBrian Foster 
34430f712c9SDave Chinner 	if (args->alignment > 1 && len >= args->minlen) {
34530f712c9SDave Chinner 		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
346bfe46d4eSBrian Foster 
347bfe46d4eSBrian Foster 		diff = aligned_bno - bno;
34830f712c9SDave Chinner 
34930f712c9SDave Chinner 		*resbno = aligned_bno;
35030f712c9SDave Chinner 		*reslen = diff >= len ? 0 : len - diff;
35130f712c9SDave Chinner 	} else {
35230f712c9SDave Chinner 		*resbno = bno;
35330f712c9SDave Chinner 		*reslen = len;
35430f712c9SDave Chinner 	}
355ebf55872SChristoph Hellwig 
356ebf55872SChristoph Hellwig 	return busy;
35730f712c9SDave Chinner }
35830f712c9SDave Chinner 
35930f712c9SDave Chinner /*
36030f712c9SDave Chinner  * Compute best start block and diff for "near" allocations.
36130f712c9SDave Chinner  * freelen >= wantlen already checked by caller.
36230f712c9SDave Chinner  */
36330f712c9SDave Chinner STATIC xfs_extlen_t			/* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,int datatype,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)36430f712c9SDave Chinner xfs_alloc_compute_diff(
36530f712c9SDave Chinner 	xfs_agblock_t	wantbno,	/* target starting block */
36630f712c9SDave Chinner 	xfs_extlen_t	wantlen,	/* target length */
36730f712c9SDave Chinner 	xfs_extlen_t	alignment,	/* target alignment */
368292378edSDave Chinner 	int		datatype,	/* are we allocating data? */
36930f712c9SDave Chinner 	xfs_agblock_t	freebno,	/* freespace's starting block */
37030f712c9SDave Chinner 	xfs_extlen_t	freelen,	/* freespace's length */
37130f712c9SDave Chinner 	xfs_agblock_t	*newbnop)	/* result: best start block from free */
37230f712c9SDave Chinner {
37330f712c9SDave Chinner 	xfs_agblock_t	freeend;	/* end of freespace extent */
37430f712c9SDave Chinner 	xfs_agblock_t	newbno1;	/* return block number */
37530f712c9SDave Chinner 	xfs_agblock_t	newbno2;	/* other new block number */
37630f712c9SDave Chinner 	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
37730f712c9SDave Chinner 	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
37830f712c9SDave Chinner 	xfs_agblock_t	wantend;	/* end of target extent */
379c34d570dSChristoph Hellwig 	bool		userdata = datatype & XFS_ALLOC_USERDATA;
38030f712c9SDave Chinner 
38130f712c9SDave Chinner 	ASSERT(freelen >= wantlen);
38230f712c9SDave Chinner 	freeend = freebno + freelen;
38330f712c9SDave Chinner 	wantend = wantbno + wantlen;
38430f712c9SDave Chinner 	/*
38530f712c9SDave Chinner 	 * We want to allocate from the start of a free extent if it is past
38630f712c9SDave Chinner 	 * the desired block or if we are allocating user data and the free
38730f712c9SDave Chinner 	 * extent is before desired block. The second case is there to allow
38830f712c9SDave Chinner 	 * for contiguous allocation from the remaining free space if the file
38930f712c9SDave Chinner 	 * grows in the short term.
39030f712c9SDave Chinner 	 */
39130f712c9SDave Chinner 	if (freebno >= wantbno || (userdata && freeend < wantend)) {
39230f712c9SDave Chinner 		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
39330f712c9SDave Chinner 			newbno1 = NULLAGBLOCK;
39430f712c9SDave Chinner 	} else if (freeend >= wantend && alignment > 1) {
39530f712c9SDave Chinner 		newbno1 = roundup(wantbno, alignment);
39630f712c9SDave Chinner 		newbno2 = newbno1 - alignment;
39730f712c9SDave Chinner 		if (newbno1 >= freeend)
39830f712c9SDave Chinner 			newbno1 = NULLAGBLOCK;
39930f712c9SDave Chinner 		else
40030f712c9SDave Chinner 			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
40130f712c9SDave Chinner 		if (newbno2 < freebno)
40230f712c9SDave Chinner 			newbno2 = NULLAGBLOCK;
40330f712c9SDave Chinner 		else
40430f712c9SDave Chinner 			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
40530f712c9SDave Chinner 		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
40630f712c9SDave Chinner 			if (newlen1 < newlen2 ||
40730f712c9SDave Chinner 			    (newlen1 == newlen2 &&
40830f712c9SDave Chinner 			     XFS_ABSDIFF(newbno1, wantbno) >
40930f712c9SDave Chinner 			     XFS_ABSDIFF(newbno2, wantbno)))
41030f712c9SDave Chinner 				newbno1 = newbno2;
41130f712c9SDave Chinner 		} else if (newbno2 != NULLAGBLOCK)
41230f712c9SDave Chinner 			newbno1 = newbno2;
41330f712c9SDave Chinner 	} else if (freeend >= wantend) {
41430f712c9SDave Chinner 		newbno1 = wantbno;
41530f712c9SDave Chinner 	} else if (alignment > 1) {
41630f712c9SDave Chinner 		newbno1 = roundup(freeend - wantlen, alignment);
41730f712c9SDave Chinner 		if (newbno1 > freeend - wantlen &&
41830f712c9SDave Chinner 		    newbno1 - alignment >= freebno)
41930f712c9SDave Chinner 			newbno1 -= alignment;
42030f712c9SDave Chinner 		else if (newbno1 >= freeend)
42130f712c9SDave Chinner 			newbno1 = NULLAGBLOCK;
42230f712c9SDave Chinner 	} else
42330f712c9SDave Chinner 		newbno1 = freeend - wantlen;
42430f712c9SDave Chinner 	*newbnop = newbno1;
42530f712c9SDave Chinner 	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
42630f712c9SDave Chinner }
42730f712c9SDave Chinner 
42830f712c9SDave Chinner /*
42930f712c9SDave Chinner  * Fix up the length, based on mod and prod.
43030f712c9SDave Chinner  * len should be k * prod + mod for some k.
43130f712c9SDave Chinner  * If len is too small it is returned unchanged.
43230f712c9SDave Chinner  * If len hits maxlen it is left alone.
43330f712c9SDave Chinner  */
43430f712c9SDave Chinner STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)43530f712c9SDave Chinner xfs_alloc_fix_len(
43630f712c9SDave Chinner 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
43730f712c9SDave Chinner {
43830f712c9SDave Chinner 	xfs_extlen_t	k;
43930f712c9SDave Chinner 	xfs_extlen_t	rlen;
44030f712c9SDave Chinner 
44130f712c9SDave Chinner 	ASSERT(args->mod < args->prod);
44230f712c9SDave Chinner 	rlen = args->len;
44330f712c9SDave Chinner 	ASSERT(rlen >= args->minlen);
44430f712c9SDave Chinner 	ASSERT(rlen <= args->maxlen);
44530f712c9SDave Chinner 	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
44630f712c9SDave Chinner 	    (args->mod == 0 && rlen < args->prod))
44730f712c9SDave Chinner 		return;
44830f712c9SDave Chinner 	k = rlen % args->prod;
44930f712c9SDave Chinner 	if (k == args->mod)
45030f712c9SDave Chinner 		return;
45130f712c9SDave Chinner 	if (k > args->mod)
45230f712c9SDave Chinner 		rlen = rlen - (k - args->mod);
45330f712c9SDave Chinner 	else
45430f712c9SDave Chinner 		rlen = rlen - args->prod + (args->mod - k);
4553790a8cdSDave Chinner 	/* casts to (int) catch length underflows */
45630f712c9SDave Chinner 	if ((int)rlen < (int)args->minlen)
45730f712c9SDave Chinner 		return;
45830f712c9SDave Chinner 	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
45930f712c9SDave Chinner 	ASSERT(rlen % args->prod == args->mod);
46054fee133SChristoph Hellwig 	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
46154fee133SChristoph Hellwig 		rlen + args->minleft);
46230f712c9SDave Chinner 	args->len = rlen;
46330f712c9SDave Chinner }
46430f712c9SDave Chinner 
46530f712c9SDave Chinner /*
46630f712c9SDave Chinner  * Update the two btrees, logically removing from freespace the extent
46730f712c9SDave Chinner  * starting at rbno, rlen blocks.  The extent is contained within the
46830f712c9SDave Chinner  * actual (current) free extent fbno for flen blocks.
46930f712c9SDave Chinner  * Flags are passed in indicating whether the cursors are set to the
47030f712c9SDave Chinner  * relevant records.
47130f712c9SDave Chinner  */
47230f712c9SDave Chinner STATIC int				/* error code */
xfs_alloc_fixup_trees(struct xfs_btree_cur * cnt_cur,struct xfs_btree_cur * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)47330f712c9SDave Chinner xfs_alloc_fixup_trees(
474ae127f08SDarrick J. Wong 	struct xfs_btree_cur *cnt_cur,	/* cursor for by-size btree */
475ae127f08SDarrick J. Wong 	struct xfs_btree_cur *bno_cur,	/* cursor for by-block btree */
47630f712c9SDave Chinner 	xfs_agblock_t	fbno,		/* starting block of free extent */
47730f712c9SDave Chinner 	xfs_extlen_t	flen,		/* length of free extent */
47830f712c9SDave Chinner 	xfs_agblock_t	rbno,		/* starting block of returned extent */
47930f712c9SDave Chinner 	xfs_extlen_t	rlen,		/* length of returned extent */
48030f712c9SDave Chinner 	int		flags)		/* flags, XFSA_FIXUP_... */
48130f712c9SDave Chinner {
48230f712c9SDave Chinner 	int		error;		/* error code */
48330f712c9SDave Chinner 	int		i;		/* operation results */
48430f712c9SDave Chinner 	xfs_agblock_t	nfbno1;		/* first new free startblock */
48530f712c9SDave Chinner 	xfs_agblock_t	nfbno2;		/* second new free startblock */
48630f712c9SDave Chinner 	xfs_extlen_t	nflen1=0;	/* first new free length */
48730f712c9SDave Chinner 	xfs_extlen_t	nflen2=0;	/* second new free length */
4885fb5aeeeSEric Sandeen 	struct xfs_mount *mp;
4895fb5aeeeSEric Sandeen 
4905fb5aeeeSEric Sandeen 	mp = cnt_cur->bc_mp;
49130f712c9SDave Chinner 
49230f712c9SDave Chinner 	/*
49330f712c9SDave Chinner 	 * Look up the record in the by-size tree if necessary.
49430f712c9SDave Chinner 	 */
49530f712c9SDave Chinner 	if (flags & XFSA_FIXUP_CNT_OK) {
49630f712c9SDave Chinner #ifdef DEBUG
49730f712c9SDave Chinner 		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
49830f712c9SDave Chinner 			return error;
499f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp,
500f9e03706SDarrick J. Wong 				   i != 1 ||
501f9e03706SDarrick J. Wong 				   nfbno1 != fbno ||
502f9e03706SDarrick J. Wong 				   nflen1 != flen))
503f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
50430f712c9SDave Chinner #endif
50530f712c9SDave Chinner 	} else {
50630f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
50730f712c9SDave Chinner 			return error;
508f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
509f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
51030f712c9SDave Chinner 	}
51130f712c9SDave Chinner 	/*
51230f712c9SDave Chinner 	 * Look up the record in the by-block tree if necessary.
51330f712c9SDave Chinner 	 */
51430f712c9SDave Chinner 	if (flags & XFSA_FIXUP_BNO_OK) {
51530f712c9SDave Chinner #ifdef DEBUG
51630f712c9SDave Chinner 		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
51730f712c9SDave Chinner 			return error;
518f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp,
519f9e03706SDarrick J. Wong 				   i != 1 ||
520f9e03706SDarrick J. Wong 				   nfbno1 != fbno ||
521f9e03706SDarrick J. Wong 				   nflen1 != flen))
522f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
52330f712c9SDave Chinner #endif
52430f712c9SDave Chinner 	} else {
52530f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
52630f712c9SDave Chinner 			return error;
527f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
528f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
52930f712c9SDave Chinner 	}
53030f712c9SDave Chinner 
53130f712c9SDave Chinner #ifdef DEBUG
53230f712c9SDave Chinner 	if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
53330f712c9SDave Chinner 		struct xfs_btree_block	*bnoblock;
53430f712c9SDave Chinner 		struct xfs_btree_block	*cntblock;
53530f712c9SDave Chinner 
5366ca444cfSDarrick J. Wong 		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
5376ca444cfSDarrick J. Wong 		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
53830f712c9SDave Chinner 
539f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp,
540f9e03706SDarrick J. Wong 				   bnoblock->bb_numrecs !=
541f9e03706SDarrick J. Wong 				   cntblock->bb_numrecs))
542f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
54330f712c9SDave Chinner 	}
54430f712c9SDave Chinner #endif
54530f712c9SDave Chinner 
54630f712c9SDave Chinner 	/*
54730f712c9SDave Chinner 	 * Deal with all four cases: the allocated record is contained
54830f712c9SDave Chinner 	 * within the freespace record, so we can have new freespace
54930f712c9SDave Chinner 	 * at either (or both) end, or no freespace remaining.
55030f712c9SDave Chinner 	 */
55130f712c9SDave Chinner 	if (rbno == fbno && rlen == flen)
55230f712c9SDave Chinner 		nfbno1 = nfbno2 = NULLAGBLOCK;
55330f712c9SDave Chinner 	else if (rbno == fbno) {
55430f712c9SDave Chinner 		nfbno1 = rbno + rlen;
55530f712c9SDave Chinner 		nflen1 = flen - rlen;
55630f712c9SDave Chinner 		nfbno2 = NULLAGBLOCK;
55730f712c9SDave Chinner 	} else if (rbno + rlen == fbno + flen) {
55830f712c9SDave Chinner 		nfbno1 = fbno;
55930f712c9SDave Chinner 		nflen1 = flen - rlen;
56030f712c9SDave Chinner 		nfbno2 = NULLAGBLOCK;
56130f712c9SDave Chinner 	} else {
56230f712c9SDave Chinner 		nfbno1 = fbno;
56330f712c9SDave Chinner 		nflen1 = rbno - fbno;
56430f712c9SDave Chinner 		nfbno2 = rbno + rlen;
56530f712c9SDave Chinner 		nflen2 = (fbno + flen) - nfbno2;
56630f712c9SDave Chinner 	}
56730f712c9SDave Chinner 	/*
56830f712c9SDave Chinner 	 * Delete the entry from the by-size btree.
56930f712c9SDave Chinner 	 */
57030f712c9SDave Chinner 	if ((error = xfs_btree_delete(cnt_cur, &i)))
57130f712c9SDave Chinner 		return error;
572f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(mp, i != 1))
573f9e03706SDarrick J. Wong 		return -EFSCORRUPTED;
57430f712c9SDave Chinner 	/*
57530f712c9SDave Chinner 	 * Add new by-size btree entry(s).
57630f712c9SDave Chinner 	 */
57730f712c9SDave Chinner 	if (nfbno1 != NULLAGBLOCK) {
57830f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
57930f712c9SDave Chinner 			return error;
580f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 0))
581f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
58230f712c9SDave Chinner 		if ((error = xfs_btree_insert(cnt_cur, &i)))
58330f712c9SDave Chinner 			return error;
584f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
585f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
58630f712c9SDave Chinner 	}
58730f712c9SDave Chinner 	if (nfbno2 != NULLAGBLOCK) {
58830f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
58930f712c9SDave Chinner 			return error;
590f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 0))
591f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
59230f712c9SDave Chinner 		if ((error = xfs_btree_insert(cnt_cur, &i)))
59330f712c9SDave Chinner 			return error;
594f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
595f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
59630f712c9SDave Chinner 	}
59730f712c9SDave Chinner 	/*
59830f712c9SDave Chinner 	 * Fix up the by-block btree entry(s).
59930f712c9SDave Chinner 	 */
60030f712c9SDave Chinner 	if (nfbno1 == NULLAGBLOCK) {
60130f712c9SDave Chinner 		/*
60230f712c9SDave Chinner 		 * No remaining freespace, just delete the by-block tree entry.
60330f712c9SDave Chinner 		 */
60430f712c9SDave Chinner 		if ((error = xfs_btree_delete(bno_cur, &i)))
60530f712c9SDave Chinner 			return error;
606f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
607f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
60830f712c9SDave Chinner 	} else {
60930f712c9SDave Chinner 		/*
61030f712c9SDave Chinner 		 * Update the by-block entry to start later|be shorter.
61130f712c9SDave Chinner 		 */
61230f712c9SDave Chinner 		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
61330f712c9SDave Chinner 			return error;
61430f712c9SDave Chinner 	}
61530f712c9SDave Chinner 	if (nfbno2 != NULLAGBLOCK) {
61630f712c9SDave Chinner 		/*
61730f712c9SDave Chinner 		 * 2 resulting free entries, need to add one.
61830f712c9SDave Chinner 		 */
61930f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
62030f712c9SDave Chinner 			return error;
621f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 0))
622f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
62330f712c9SDave Chinner 		if ((error = xfs_btree_insert(bno_cur, &i)))
62430f712c9SDave Chinner 			return error;
625f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1))
626f9e03706SDarrick J. Wong 			return -EFSCORRUPTED;
62730f712c9SDave Chinner 	}
62830f712c9SDave Chinner 	return 0;
62930f712c9SDave Chinner }
63030f712c9SDave Chinner 
631e0a8de7dSDave Chinner /*
632e0a8de7dSDave Chinner  * We do not verify the AGFL contents against AGF-based index counters here,
633e0a8de7dSDave Chinner  * even though we may have access to the perag that contains shadow copies. We
634e0a8de7dSDave Chinner  * don't know if the AGF based counters have been checked, and if they have they
635e0a8de7dSDave Chinner  * still may be inconsistent because they haven't yet been reset on the first
636e0a8de7dSDave Chinner  * allocation after the AGF has been read in.
637e0a8de7dSDave Chinner  *
638e0a8de7dSDave Chinner  * This means we can only check that all agfl entries contain valid or null
639e0a8de7dSDave Chinner  * values because we can't reliably determine the active range to exclude
640e0a8de7dSDave Chinner  * NULLAGBNO as a valid value.
641e0a8de7dSDave Chinner  *
642e0a8de7dSDave Chinner  * However, we can't even do that for v4 format filesystems because there are
643e0a8de7dSDave Chinner  * old versions of mkfs out there that does not initialise the AGFL to known,
644e0a8de7dSDave Chinner  * verifiable values. HEnce we can't tell the difference between a AGFL block
645e0a8de7dSDave Chinner  * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
646e0a8de7dSDave Chinner  *
647e0a8de7dSDave Chinner  * As a result, we can only fully validate AGFL block numbers when we pull them
648e0a8de7dSDave Chinner  * from the freelist in xfs_alloc_get_freelist().
649e0a8de7dSDave Chinner  */
650a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agfl_verify(struct xfs_buf * bp)65130f712c9SDave Chinner xfs_agfl_verify(
65230f712c9SDave Chinner 	struct xfs_buf	*bp)
65330f712c9SDave Chinner {
654dbd329f1SChristoph Hellwig 	struct xfs_mount *mp = bp->b_mount;
65530f712c9SDave Chinner 	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
656183606d8SChristoph Hellwig 	__be32		*agfl_bno = xfs_buf_to_agfl_bno(bp);
65730f712c9SDave Chinner 	int		i;
65830f712c9SDave Chinner 
65938c26bfdSDave Chinner 	if (!xfs_has_crc(mp))
660b5572597SDarrick J. Wong 		return NULL;
661b5572597SDarrick J. Wong 
66239708c20SBrian Foster 	if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
663a6a781a5SDarrick J. Wong 		return __this_address;
66439708c20SBrian Foster 	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
665a6a781a5SDarrick J. Wong 		return __this_address;
66630f712c9SDave Chinner 	/*
66730f712c9SDave Chinner 	 * during growfs operations, the perag is not fully initialised,
66830f712c9SDave Chinner 	 * so we can't use it for any useful checking. growfs ensures we can't
66930f712c9SDave Chinner 	 * use it by using uncached buffers that don't have the perag attached
67030f712c9SDave Chinner 	 * so we can detect and avoid this problem.
67130f712c9SDave Chinner 	 */
67230f712c9SDave Chinner 	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
673a6a781a5SDarrick J. Wong 		return __this_address;
67430f712c9SDave Chinner 
675a78ee256SDave Chinner 	for (i = 0; i < xfs_agfl_size(mp); i++) {
676183606d8SChristoph Hellwig 		if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
677183606d8SChristoph Hellwig 		    be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
678a6a781a5SDarrick J. Wong 			return __this_address;
67930f712c9SDave Chinner 	}
680a45086e2SBrian Foster 
681a6a781a5SDarrick J. Wong 	if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
682a6a781a5SDarrick J. Wong 		return __this_address;
683a6a781a5SDarrick J. Wong 	return NULL;
68430f712c9SDave Chinner }
68530f712c9SDave Chinner 
68630f712c9SDave Chinner static void
xfs_agfl_read_verify(struct xfs_buf * bp)68730f712c9SDave Chinner xfs_agfl_read_verify(
68830f712c9SDave Chinner 	struct xfs_buf	*bp)
68930f712c9SDave Chinner {
690dbd329f1SChristoph Hellwig 	struct xfs_mount *mp = bp->b_mount;
691bc1a09b8SDarrick J. Wong 	xfs_failaddr_t	fa;
69230f712c9SDave Chinner 
69330f712c9SDave Chinner 	/*
69430f712c9SDave Chinner 	 * There is no verification of non-crc AGFLs because mkfs does not
69530f712c9SDave Chinner 	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
69630f712c9SDave Chinner 	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
69730f712c9SDave Chinner 	 * can't verify just those entries are valid.
69830f712c9SDave Chinner 	 */
69938c26bfdSDave Chinner 	if (!xfs_has_crc(mp))
70030f712c9SDave Chinner 		return;
70130f712c9SDave Chinner 
70230f712c9SDave Chinner 	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
703bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
704bc1a09b8SDarrick J. Wong 	else {
705bc1a09b8SDarrick J. Wong 		fa = xfs_agfl_verify(bp);
706bc1a09b8SDarrick J. Wong 		if (fa)
707bc1a09b8SDarrick J. Wong 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
708bc1a09b8SDarrick J. Wong 	}
70930f712c9SDave Chinner }
71030f712c9SDave Chinner 
71130f712c9SDave Chinner static void
xfs_agfl_write_verify(struct xfs_buf * bp)71230f712c9SDave Chinner xfs_agfl_write_verify(
71330f712c9SDave Chinner 	struct xfs_buf	*bp)
71430f712c9SDave Chinner {
715dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
716fb1755a6SCarlos Maiolino 	struct xfs_buf_log_item	*bip = bp->b_log_item;
717bc1a09b8SDarrick J. Wong 	xfs_failaddr_t		fa;
71830f712c9SDave Chinner 
71930f712c9SDave Chinner 	/* no verification of non-crc AGFLs */
72038c26bfdSDave Chinner 	if (!xfs_has_crc(mp))
72130f712c9SDave Chinner 		return;
72230f712c9SDave Chinner 
723bc1a09b8SDarrick J. Wong 	fa = xfs_agfl_verify(bp);
724bc1a09b8SDarrick J. Wong 	if (fa) {
725bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
72630f712c9SDave Chinner 		return;
72730f712c9SDave Chinner 	}
72830f712c9SDave Chinner 
72930f712c9SDave Chinner 	if (bip)
73030f712c9SDave Chinner 		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
73130f712c9SDave Chinner 
73230f712c9SDave Chinner 	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
73330f712c9SDave Chinner }
73430f712c9SDave Chinner 
73530f712c9SDave Chinner const struct xfs_buf_ops xfs_agfl_buf_ops = {
736233135b7SEric Sandeen 	.name = "xfs_agfl",
73739708c20SBrian Foster 	.magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
73830f712c9SDave Chinner 	.verify_read = xfs_agfl_read_verify,
73930f712c9SDave Chinner 	.verify_write = xfs_agfl_write_verify,
740b5572597SDarrick J. Wong 	.verify_struct = xfs_agfl_verify,
74130f712c9SDave Chinner };
74230f712c9SDave Chinner 
74330f712c9SDave Chinner /*
74430f712c9SDave Chinner  * Read in the allocation group free block array.
74530f712c9SDave Chinner  */
746cec7bb7dSDave Chinner int
xfs_alloc_read_agfl(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf ** bpp)74730f712c9SDave Chinner xfs_alloc_read_agfl(
748cec7bb7dSDave Chinner 	struct xfs_perag	*pag,
749cec7bb7dSDave Chinner 	struct xfs_trans	*tp,
750cec7bb7dSDave Chinner 	struct xfs_buf		**bpp)
75130f712c9SDave Chinner {
752cec7bb7dSDave Chinner 	struct xfs_mount	*mp = pag->pag_mount;
753cec7bb7dSDave Chinner 	struct xfs_buf		*bp;
75430f712c9SDave Chinner 	int			error;
75530f712c9SDave Chinner 
75630f712c9SDave Chinner 	error = xfs_trans_read_buf(
75730f712c9SDave Chinner 			mp, tp, mp->m_ddev_targp,
758cec7bb7dSDave Chinner 			XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
75930f712c9SDave Chinner 			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
76030f712c9SDave Chinner 	if (error)
76130f712c9SDave Chinner 		return error;
76230f712c9SDave Chinner 	xfs_buf_set_ref(bp, XFS_AGFL_REF);
76330f712c9SDave Chinner 	*bpp = bp;
76430f712c9SDave Chinner 	return 0;
76530f712c9SDave Chinner }
76630f712c9SDave Chinner 
76730f712c9SDave Chinner STATIC int
xfs_alloc_update_counters(struct xfs_trans * tp,struct xfs_buf * agbp,long len)76830f712c9SDave Chinner xfs_alloc_update_counters(
76930f712c9SDave Chinner 	struct xfs_trans	*tp,
77030f712c9SDave Chinner 	struct xfs_buf		*agbp,
77130f712c9SDave Chinner 	long			len)
77230f712c9SDave Chinner {
7739798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
77430f712c9SDave Chinner 
77592a00544SGao Xiang 	agbp->b_pag->pagf_freeblks += len;
77630f712c9SDave Chinner 	be32_add_cpu(&agf->agf_freeblks, len);
77730f712c9SDave Chinner 
77830f712c9SDave Chinner 	if (unlikely(be32_to_cpu(agf->agf_freeblks) >
779a5155b87SDarrick J. Wong 		     be32_to_cpu(agf->agf_length))) {
7808d57c216SDarrick J. Wong 		xfs_buf_mark_corrupt(agbp);
7812451337dSDave Chinner 		return -EFSCORRUPTED;
782a5155b87SDarrick J. Wong 	}
78330f712c9SDave Chinner 
78430f712c9SDave Chinner 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
78530f712c9SDave Chinner 	return 0;
78630f712c9SDave Chinner }
78730f712c9SDave Chinner 
78830f712c9SDave Chinner /*
789f5e7dbeaSBrian Foster  * Block allocation algorithm and data structures.
79030f712c9SDave Chinner  */
791f5e7dbeaSBrian Foster struct xfs_alloc_cur {
792f5e7dbeaSBrian Foster 	struct xfs_btree_cur		*cnt;	/* btree cursors */
793f5e7dbeaSBrian Foster 	struct xfs_btree_cur		*bnolt;
794f5e7dbeaSBrian Foster 	struct xfs_btree_cur		*bnogt;
795dc8e69bdSBrian Foster 	xfs_extlen_t			cur_len;/* current search length */
796c62321a2SBrian Foster 	xfs_agblock_t			rec_bno;/* extent startblock */
797c62321a2SBrian Foster 	xfs_extlen_t			rec_len;/* extent length */
798c62321a2SBrian Foster 	xfs_agblock_t			bno;	/* alloc bno */
799c62321a2SBrian Foster 	xfs_extlen_t			len;	/* alloc len */
800c62321a2SBrian Foster 	xfs_extlen_t			diff;	/* diff from search bno */
801d6d3aff2SBrian Foster 	unsigned int			busy_gen;/* busy state */
802d6d3aff2SBrian Foster 	bool				busy;
803f5e7dbeaSBrian Foster };
804f5e7dbeaSBrian Foster 
805f5e7dbeaSBrian Foster /*
806f5e7dbeaSBrian Foster  * Set up cursors, etc. in the extent allocation cursor. This function can be
807f5e7dbeaSBrian Foster  * called multiple times to reset an initialized structure without having to
808f5e7dbeaSBrian Foster  * reallocate cursors.
809f5e7dbeaSBrian Foster  */
810f5e7dbeaSBrian Foster static int
xfs_alloc_cur_setup(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)811f5e7dbeaSBrian Foster xfs_alloc_cur_setup(
812f5e7dbeaSBrian Foster 	struct xfs_alloc_arg	*args,
813f5e7dbeaSBrian Foster 	struct xfs_alloc_cur	*acur)
814f5e7dbeaSBrian Foster {
815f5e7dbeaSBrian Foster 	int			error;
816f5e7dbeaSBrian Foster 	int			i;
817f5e7dbeaSBrian Foster 
818dc8e69bdSBrian Foster 	acur->cur_len = args->maxlen;
819c62321a2SBrian Foster 	acur->rec_bno = 0;
820c62321a2SBrian Foster 	acur->rec_len = 0;
821c62321a2SBrian Foster 	acur->bno = 0;
822c62321a2SBrian Foster 	acur->len = 0;
823396bbf3cSBrian Foster 	acur->diff = -1;
824d6d3aff2SBrian Foster 	acur->busy = false;
825d6d3aff2SBrian Foster 	acur->busy_gen = 0;
826d6d3aff2SBrian Foster 
827f5e7dbeaSBrian Foster 	/*
828f5e7dbeaSBrian Foster 	 * Perform an initial cntbt lookup to check for availability of maxlen
829f5e7dbeaSBrian Foster 	 * extents. If this fails, we'll return -ENOSPC to signal the caller to
830f5e7dbeaSBrian Foster 	 * attempt a small allocation.
831f5e7dbeaSBrian Foster 	 */
832f5e7dbeaSBrian Foster 	if (!acur->cnt)
833f5e7dbeaSBrian Foster 		acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
834289d38d2SDave Chinner 					args->agbp, args->pag, XFS_BTNUM_CNT);
835f5e7dbeaSBrian Foster 	error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
836f5e7dbeaSBrian Foster 	if (error)
837f5e7dbeaSBrian Foster 		return error;
838f5e7dbeaSBrian Foster 
839f5e7dbeaSBrian Foster 	/*
840f5e7dbeaSBrian Foster 	 * Allocate the bnobt left and right search cursors.
841f5e7dbeaSBrian Foster 	 */
842f5e7dbeaSBrian Foster 	if (!acur->bnolt)
843f5e7dbeaSBrian Foster 		acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
844289d38d2SDave Chinner 					args->agbp, args->pag, XFS_BTNUM_BNO);
845f5e7dbeaSBrian Foster 	if (!acur->bnogt)
846f5e7dbeaSBrian Foster 		acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
847289d38d2SDave Chinner 					args->agbp, args->pag, XFS_BTNUM_BNO);
848f5e7dbeaSBrian Foster 	return i == 1 ? 0 : -ENOSPC;
849f5e7dbeaSBrian Foster }
850f5e7dbeaSBrian Foster 
851f5e7dbeaSBrian Foster static void
xfs_alloc_cur_close(struct xfs_alloc_cur * acur,bool error)852f5e7dbeaSBrian Foster xfs_alloc_cur_close(
853f5e7dbeaSBrian Foster 	struct xfs_alloc_cur	*acur,
854f5e7dbeaSBrian Foster 	bool			error)
855f5e7dbeaSBrian Foster {
856f5e7dbeaSBrian Foster 	int			cur_error = XFS_BTREE_NOERROR;
857f5e7dbeaSBrian Foster 
858f5e7dbeaSBrian Foster 	if (error)
859f5e7dbeaSBrian Foster 		cur_error = XFS_BTREE_ERROR;
860f5e7dbeaSBrian Foster 
861f5e7dbeaSBrian Foster 	if (acur->cnt)
862f5e7dbeaSBrian Foster 		xfs_btree_del_cursor(acur->cnt, cur_error);
863f5e7dbeaSBrian Foster 	if (acur->bnolt)
864f5e7dbeaSBrian Foster 		xfs_btree_del_cursor(acur->bnolt, cur_error);
865f5e7dbeaSBrian Foster 	if (acur->bnogt)
866f5e7dbeaSBrian Foster 		xfs_btree_del_cursor(acur->bnogt, cur_error);
867f5e7dbeaSBrian Foster 	acur->cnt = acur->bnolt = acur->bnogt = NULL;
868f5e7dbeaSBrian Foster }
86930f712c9SDave Chinner 
87030f712c9SDave Chinner /*
871396bbf3cSBrian Foster  * Check an extent for allocation and track the best available candidate in the
872396bbf3cSBrian Foster  * allocation structure. The cursor is deactivated if it has entered an out of
873396bbf3cSBrian Foster  * range state based on allocation arguments. Optionally return the extent
874396bbf3cSBrian Foster  * extent geometry and allocation status if requested by the caller.
875396bbf3cSBrian Foster  */
876396bbf3cSBrian Foster static int
xfs_alloc_cur_check(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,int * new)877396bbf3cSBrian Foster xfs_alloc_cur_check(
878396bbf3cSBrian Foster 	struct xfs_alloc_arg	*args,
879396bbf3cSBrian Foster 	struct xfs_alloc_cur	*acur,
880396bbf3cSBrian Foster 	struct xfs_btree_cur	*cur,
881396bbf3cSBrian Foster 	int			*new)
882396bbf3cSBrian Foster {
883396bbf3cSBrian Foster 	int			error, i;
884396bbf3cSBrian Foster 	xfs_agblock_t		bno, bnoa, bnew;
885396bbf3cSBrian Foster 	xfs_extlen_t		len, lena, diff = -1;
886396bbf3cSBrian Foster 	bool			busy;
887396bbf3cSBrian Foster 	unsigned		busy_gen = 0;
888396bbf3cSBrian Foster 	bool			deactivate = false;
889fec0afdaSBrian Foster 	bool			isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
890396bbf3cSBrian Foster 
891396bbf3cSBrian Foster 	*new = 0;
892396bbf3cSBrian Foster 
893396bbf3cSBrian Foster 	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
894396bbf3cSBrian Foster 	if (error)
895396bbf3cSBrian Foster 		return error;
896f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(args->mp, i != 1))
897f9e03706SDarrick J. Wong 		return -EFSCORRUPTED;
898396bbf3cSBrian Foster 
899396bbf3cSBrian Foster 	/*
900396bbf3cSBrian Foster 	 * Check minlen and deactivate a cntbt cursor if out of acceptable size
901396bbf3cSBrian Foster 	 * range (i.e., walking backwards looking for a minlen extent).
902396bbf3cSBrian Foster 	 */
903396bbf3cSBrian Foster 	if (len < args->minlen) {
904fec0afdaSBrian Foster 		deactivate = !isbnobt;
905396bbf3cSBrian Foster 		goto out;
906396bbf3cSBrian Foster 	}
907396bbf3cSBrian Foster 
908396bbf3cSBrian Foster 	busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
909396bbf3cSBrian Foster 					 &busy_gen);
910396bbf3cSBrian Foster 	acur->busy |= busy;
911396bbf3cSBrian Foster 	if (busy)
912396bbf3cSBrian Foster 		acur->busy_gen = busy_gen;
913396bbf3cSBrian Foster 	/* deactivate a bnobt cursor outside of locality range */
914fec0afdaSBrian Foster 	if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
915fec0afdaSBrian Foster 		deactivate = isbnobt;
916396bbf3cSBrian Foster 		goto out;
917fec0afdaSBrian Foster 	}
918396bbf3cSBrian Foster 	if (lena < args->minlen)
919396bbf3cSBrian Foster 		goto out;
920396bbf3cSBrian Foster 
921396bbf3cSBrian Foster 	args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
922396bbf3cSBrian Foster 	xfs_alloc_fix_len(args);
923396bbf3cSBrian Foster 	ASSERT(args->len >= args->minlen);
924396bbf3cSBrian Foster 	if (args->len < acur->len)
925396bbf3cSBrian Foster 		goto out;
926396bbf3cSBrian Foster 
927396bbf3cSBrian Foster 	/*
928396bbf3cSBrian Foster 	 * We have an aligned record that satisfies minlen and beats or matches
929396bbf3cSBrian Foster 	 * the candidate extent size. Compare locality for near allocation mode.
930396bbf3cSBrian Foster 	 */
931396bbf3cSBrian Foster 	diff = xfs_alloc_compute_diff(args->agbno, args->len,
932396bbf3cSBrian Foster 				      args->alignment, args->datatype,
933396bbf3cSBrian Foster 				      bnoa, lena, &bnew);
934396bbf3cSBrian Foster 	if (bnew == NULLAGBLOCK)
935396bbf3cSBrian Foster 		goto out;
936fec0afdaSBrian Foster 
937fec0afdaSBrian Foster 	/*
938fec0afdaSBrian Foster 	 * Deactivate a bnobt cursor with worse locality than the current best.
939fec0afdaSBrian Foster 	 */
940fec0afdaSBrian Foster 	if (diff > acur->diff) {
941fec0afdaSBrian Foster 		deactivate = isbnobt;
942396bbf3cSBrian Foster 		goto out;
943fec0afdaSBrian Foster 	}
944396bbf3cSBrian Foster 
945396bbf3cSBrian Foster 	ASSERT(args->len > acur->len ||
946396bbf3cSBrian Foster 	       (args->len == acur->len && diff <= acur->diff));
947396bbf3cSBrian Foster 	acur->rec_bno = bno;
948396bbf3cSBrian Foster 	acur->rec_len = len;
949396bbf3cSBrian Foster 	acur->bno = bnew;
950396bbf3cSBrian Foster 	acur->len = args->len;
951396bbf3cSBrian Foster 	acur->diff = diff;
952396bbf3cSBrian Foster 	*new = 1;
953396bbf3cSBrian Foster 
95478d7aabdSBrian Foster 	/*
95578d7aabdSBrian Foster 	 * We're done if we found a perfect allocation. This only deactivates
95678d7aabdSBrian Foster 	 * the current cursor, but this is just an optimization to terminate a
95778d7aabdSBrian Foster 	 * cntbt search that otherwise runs to the edge of the tree.
95878d7aabdSBrian Foster 	 */
95978d7aabdSBrian Foster 	if (acur->diff == 0 && acur->len == args->maxlen)
96078d7aabdSBrian Foster 		deactivate = true;
961396bbf3cSBrian Foster out:
962396bbf3cSBrian Foster 	if (deactivate)
963c4aa10d0SDave Chinner 		cur->bc_ag.abt.active = false;
964396bbf3cSBrian Foster 	trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
965396bbf3cSBrian Foster 				  *new);
966396bbf3cSBrian Foster 	return 0;
967396bbf3cSBrian Foster }
968396bbf3cSBrian Foster 
969396bbf3cSBrian Foster /*
970d2968825SBrian Foster  * Complete an allocation of a candidate extent. Remove the extent from both
971d2968825SBrian Foster  * trees and update the args structure.
972d2968825SBrian Foster  */
973d2968825SBrian Foster STATIC int
xfs_alloc_cur_finish(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)974d2968825SBrian Foster xfs_alloc_cur_finish(
975d2968825SBrian Foster 	struct xfs_alloc_arg	*args,
976d2968825SBrian Foster 	struct xfs_alloc_cur	*acur)
977d2968825SBrian Foster {
9789798f615SChristoph Hellwig 	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
979d2968825SBrian Foster 	int			error;
980d2968825SBrian Foster 
981d2968825SBrian Foster 	ASSERT(acur->cnt && acur->bnolt);
982d2968825SBrian Foster 	ASSERT(acur->bno >= acur->rec_bno);
983d2968825SBrian Foster 	ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
9849798f615SChristoph Hellwig 	ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
985d2968825SBrian Foster 
986d2968825SBrian Foster 	error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
987d2968825SBrian Foster 				      acur->rec_len, acur->bno, acur->len, 0);
988d2968825SBrian Foster 	if (error)
989d2968825SBrian Foster 		return error;
990d2968825SBrian Foster 
991d2968825SBrian Foster 	args->agbno = acur->bno;
992d2968825SBrian Foster 	args->len = acur->len;
993d2968825SBrian Foster 	args->wasfromfl = 0;
994d2968825SBrian Foster 
995d2968825SBrian Foster 	trace_xfs_alloc_cur(args);
996d2968825SBrian Foster 	return 0;
997d2968825SBrian Foster }
998d2968825SBrian Foster 
999d2968825SBrian Foster /*
1000dc8e69bdSBrian Foster  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1001dc8e69bdSBrian Foster  * bno optimized lookup to search for extents with ideal size and locality.
1002dc8e69bdSBrian Foster  */
1003dc8e69bdSBrian Foster STATIC int
xfs_alloc_cntbt_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)1004dc8e69bdSBrian Foster xfs_alloc_cntbt_iter(
1005dc8e69bdSBrian Foster 	struct xfs_alloc_arg		*args,
1006dc8e69bdSBrian Foster 	struct xfs_alloc_cur		*acur)
1007dc8e69bdSBrian Foster {
1008dc8e69bdSBrian Foster 	struct xfs_btree_cur	*cur = acur->cnt;
1009dc8e69bdSBrian Foster 	xfs_agblock_t		bno;
1010dc8e69bdSBrian Foster 	xfs_extlen_t		len, cur_len;
1011dc8e69bdSBrian Foster 	int			error;
1012dc8e69bdSBrian Foster 	int			i;
1013dc8e69bdSBrian Foster 
1014dc8e69bdSBrian Foster 	if (!xfs_alloc_cur_active(cur))
1015dc8e69bdSBrian Foster 		return 0;
1016dc8e69bdSBrian Foster 
1017dc8e69bdSBrian Foster 	/* locality optimized lookup */
1018dc8e69bdSBrian Foster 	cur_len = acur->cur_len;
1019dc8e69bdSBrian Foster 	error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1020dc8e69bdSBrian Foster 	if (error)
1021dc8e69bdSBrian Foster 		return error;
1022dc8e69bdSBrian Foster 	if (i == 0)
1023dc8e69bdSBrian Foster 		return 0;
1024dc8e69bdSBrian Foster 	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1025dc8e69bdSBrian Foster 	if (error)
1026dc8e69bdSBrian Foster 		return error;
1027dc8e69bdSBrian Foster 
1028dc8e69bdSBrian Foster 	/* check the current record and update search length from it */
1029dc8e69bdSBrian Foster 	error = xfs_alloc_cur_check(args, acur, cur, &i);
1030dc8e69bdSBrian Foster 	if (error)
1031dc8e69bdSBrian Foster 		return error;
1032dc8e69bdSBrian Foster 	ASSERT(len >= acur->cur_len);
1033dc8e69bdSBrian Foster 	acur->cur_len = len;
1034dc8e69bdSBrian Foster 
1035dc8e69bdSBrian Foster 	/*
1036dc8e69bdSBrian Foster 	 * We looked up the first record >= [agbno, len] above. The agbno is a
1037dc8e69bdSBrian Foster 	 * secondary key and so the current record may lie just before or after
1038dc8e69bdSBrian Foster 	 * agbno. If it is past agbno, check the previous record too so long as
1039dc8e69bdSBrian Foster 	 * the length matches as it may be closer. Don't check a smaller record
1040dc8e69bdSBrian Foster 	 * because that could deactivate our cursor.
1041dc8e69bdSBrian Foster 	 */
1042dc8e69bdSBrian Foster 	if (bno > args->agbno) {
1043dc8e69bdSBrian Foster 		error = xfs_btree_decrement(cur, 0, &i);
1044dc8e69bdSBrian Foster 		if (!error && i) {
1045dc8e69bdSBrian Foster 			error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1046dc8e69bdSBrian Foster 			if (!error && i && len == acur->cur_len)
1047dc8e69bdSBrian Foster 				error = xfs_alloc_cur_check(args, acur, cur,
1048dc8e69bdSBrian Foster 							    &i);
1049dc8e69bdSBrian Foster 		}
1050dc8e69bdSBrian Foster 		if (error)
1051dc8e69bdSBrian Foster 			return error;
1052dc8e69bdSBrian Foster 	}
1053dc8e69bdSBrian Foster 
1054dc8e69bdSBrian Foster 	/*
1055dc8e69bdSBrian Foster 	 * Increment the search key until we find at least one allocation
1056dc8e69bdSBrian Foster 	 * candidate or if the extent we found was larger. Otherwise, double the
1057dc8e69bdSBrian Foster 	 * search key to optimize the search. Efficiency is more important here
1058dc8e69bdSBrian Foster 	 * than absolute best locality.
1059dc8e69bdSBrian Foster 	 */
1060dc8e69bdSBrian Foster 	cur_len <<= 1;
1061dc8e69bdSBrian Foster 	if (!acur->len || acur->cur_len >= cur_len)
1062dc8e69bdSBrian Foster 		acur->cur_len++;
1063dc8e69bdSBrian Foster 	else
1064dc8e69bdSBrian Foster 		acur->cur_len = cur_len;
1065dc8e69bdSBrian Foster 
1066dc8e69bdSBrian Foster 	return error;
1067dc8e69bdSBrian Foster }
1068dc8e69bdSBrian Foster 
1069dc8e69bdSBrian Foster /*
1070c63cdd4fSBrian Foster  * Deal with the case where only small freespaces remain. Either return the
1071c63cdd4fSBrian Foster  * contents of the last freespace record, or allocate space from the freelist if
1072c63cdd4fSBrian Foster  * there is nothing in the tree.
1073c63cdd4fSBrian Foster  */
1074c63cdd4fSBrian Foster STATIC int			/* error */
xfs_alloc_ag_vextent_small(struct xfs_alloc_arg * args,struct xfs_btree_cur * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1075c63cdd4fSBrian Foster xfs_alloc_ag_vextent_small(
1076c63cdd4fSBrian Foster 	struct xfs_alloc_arg	*args,	/* allocation argument structure */
1077c63cdd4fSBrian Foster 	struct xfs_btree_cur	*ccur,	/* optional by-size cursor */
1078c63cdd4fSBrian Foster 	xfs_agblock_t		*fbnop,	/* result block number */
1079c63cdd4fSBrian Foster 	xfs_extlen_t		*flenp,	/* result length */
1080c63cdd4fSBrian Foster 	int			*stat)	/* status: 0-freelist, 1-normal/none */
1081c63cdd4fSBrian Foster {
10829798f615SChristoph Hellwig 	struct xfs_agf		*agf = args->agbp->b_addr;
1083c63cdd4fSBrian Foster 	int			error = 0;
1084c63cdd4fSBrian Foster 	xfs_agblock_t		fbno = NULLAGBLOCK;
1085c63cdd4fSBrian Foster 	xfs_extlen_t		flen = 0;
10866691cd92SBrian Foster 	int			i = 0;
1087c63cdd4fSBrian Foster 
10886691cd92SBrian Foster 	/*
10896691cd92SBrian Foster 	 * If a cntbt cursor is provided, try to allocate the largest record in
10906691cd92SBrian Foster 	 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
10916691cd92SBrian Foster 	 * allocation. Make sure to respect minleft even when pulling from the
10926691cd92SBrian Foster 	 * freelist.
10936691cd92SBrian Foster 	 */
10946691cd92SBrian Foster 	if (ccur)
1095c63cdd4fSBrian Foster 		error = xfs_btree_decrement(ccur, 0, &i);
1096c63cdd4fSBrian Foster 	if (error)
1097c63cdd4fSBrian Foster 		goto error;
1098c63cdd4fSBrian Foster 	if (i) {
1099c63cdd4fSBrian Foster 		error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1100c63cdd4fSBrian Foster 		if (error)
1101c63cdd4fSBrian Foster 			goto error;
1102f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1103f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
1104f9e03706SDarrick J. Wong 			goto error;
1105f9e03706SDarrick J. Wong 		}
1106c63cdd4fSBrian Foster 		goto out;
1107c63cdd4fSBrian Foster 	}
1108c63cdd4fSBrian Foster 
1109c63cdd4fSBrian Foster 	if (args->minlen != 1 || args->alignment != 1 ||
1110c63cdd4fSBrian Foster 	    args->resv == XFS_AG_RESV_AGFL ||
11119798f615SChristoph Hellwig 	    be32_to_cpu(agf->agf_flcount) <= args->minleft)
1112c63cdd4fSBrian Foster 		goto out;
1113c63cdd4fSBrian Foster 
111449f0d84eSDave Chinner 	error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
111549f0d84eSDave Chinner 			&fbno, 0);
1116c63cdd4fSBrian Foster 	if (error)
1117c63cdd4fSBrian Foster 		goto error;
1118c63cdd4fSBrian Foster 	if (fbno == NULLAGBLOCK)
1119c63cdd4fSBrian Foster 		goto out;
1120c63cdd4fSBrian Foster 
112145d06621SDave Chinner 	xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1122c34d570dSChristoph Hellwig 			      (args->datatype & XFS_ALLOC_NOBUSY));
1123c63cdd4fSBrian Foster 
1124c34d570dSChristoph Hellwig 	if (args->datatype & XFS_ALLOC_USERDATA) {
1125c63cdd4fSBrian Foster 		struct xfs_buf	*bp;
1126c63cdd4fSBrian Foster 
1127ee647f85SDarrick J. Wong 		error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1128ee647f85SDarrick J. Wong 				XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1129ee647f85SDarrick J. Wong 				args->mp->m_bsize, 0, &bp);
1130ee647f85SDarrick J. Wong 		if (error)
1131c63cdd4fSBrian Foster 			goto error;
1132c63cdd4fSBrian Foster 		xfs_trans_binval(args->tp, bp);
1133c63cdd4fSBrian Foster 	}
11347e36a3a6SBrian Foster 	*fbnop = args->agbno = fbno;
11357e36a3a6SBrian Foster 	*flenp = args->len = 1;
11369798f615SChristoph Hellwig 	if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1137f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
1138f9e03706SDarrick J. Wong 		goto error;
1139f9e03706SDarrick J. Wong 	}
1140c63cdd4fSBrian Foster 	args->wasfromfl = 1;
1141c63cdd4fSBrian Foster 	trace_xfs_alloc_small_freelist(args);
1142c63cdd4fSBrian Foster 
1143c63cdd4fSBrian Foster 	/*
1144c63cdd4fSBrian Foster 	 * If we're feeding an AGFL block to something that doesn't live in the
1145c63cdd4fSBrian Foster 	 * free space, we need to clear out the OWN_AG rmap.
1146c63cdd4fSBrian Foster 	 */
1147fa9c3c19SDave Chinner 	error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1148c63cdd4fSBrian Foster 			      &XFS_RMAP_OINFO_AG);
1149c63cdd4fSBrian Foster 	if (error)
1150c63cdd4fSBrian Foster 		goto error;
1151c63cdd4fSBrian Foster 
1152c63cdd4fSBrian Foster 	*stat = 0;
1153c63cdd4fSBrian Foster 	return 0;
1154c63cdd4fSBrian Foster 
1155c63cdd4fSBrian Foster out:
1156c63cdd4fSBrian Foster 	/*
1157c63cdd4fSBrian Foster 	 * Can't do the allocation, give up.
1158c63cdd4fSBrian Foster 	 */
1159c63cdd4fSBrian Foster 	if (flen < args->minlen) {
1160c63cdd4fSBrian Foster 		args->agbno = NULLAGBLOCK;
1161c63cdd4fSBrian Foster 		trace_xfs_alloc_small_notenough(args);
1162c63cdd4fSBrian Foster 		flen = 0;
1163c63cdd4fSBrian Foster 	}
1164c63cdd4fSBrian Foster 	*fbnop = fbno;
1165c63cdd4fSBrian Foster 	*flenp = flen;
1166c63cdd4fSBrian Foster 	*stat = 1;
1167c63cdd4fSBrian Foster 	trace_xfs_alloc_small_done(args);
1168c63cdd4fSBrian Foster 	return 0;
1169c63cdd4fSBrian Foster 
1170c63cdd4fSBrian Foster error:
1171c63cdd4fSBrian Foster 	trace_xfs_alloc_small_error(args);
1172c63cdd4fSBrian Foster 	return error;
1173c63cdd4fSBrian Foster }
1174c63cdd4fSBrian Foster 
1175c63cdd4fSBrian Foster /*
117630f712c9SDave Chinner  * Allocate a variable extent at exactly agno/bno.
117730f712c9SDave Chinner  * Extent's length (returned in *len) will be between minlen and maxlen,
117830f712c9SDave Chinner  * and of the form k * prod + mod unless there's nothing that large.
117930f712c9SDave Chinner  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
118030f712c9SDave Chinner  */
118130f712c9SDave Chinner STATIC int			/* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)118230f712c9SDave Chinner xfs_alloc_ag_vextent_exact(
118330f712c9SDave Chinner 	xfs_alloc_arg_t	*args)	/* allocation argument structure */
118430f712c9SDave Chinner {
11859798f615SChristoph Hellwig 	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1186ae127f08SDarrick J. Wong 	struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1187ae127f08SDarrick J. Wong 	struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
118830f712c9SDave Chinner 	int		error;
118930f712c9SDave Chinner 	xfs_agblock_t	fbno;	/* start block of found extent */
119030f712c9SDave Chinner 	xfs_extlen_t	flen;	/* length of found extent */
1191ebf55872SChristoph Hellwig 	xfs_agblock_t	tbno;	/* start block of busy extent */
1192ebf55872SChristoph Hellwig 	xfs_extlen_t	tlen;	/* length of busy extent */
1193ebf55872SChristoph Hellwig 	xfs_agblock_t	tend;	/* end block of busy extent */
119430f712c9SDave Chinner 	int		i;	/* success/failure of operation */
1195ebf55872SChristoph Hellwig 	unsigned	busy_gen;
119630f712c9SDave Chinner 
119730f712c9SDave Chinner 	ASSERT(args->alignment == 1);
119830f712c9SDave Chinner 
119930f712c9SDave Chinner 	/*
120030f712c9SDave Chinner 	 * Allocate/initialize a cursor for the by-number freespace btree.
120130f712c9SDave Chinner 	 */
120230f712c9SDave Chinner 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1203289d38d2SDave Chinner 					  args->pag, XFS_BTNUM_BNO);
120430f712c9SDave Chinner 
120530f712c9SDave Chinner 	/*
120630f712c9SDave Chinner 	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
120730f712c9SDave Chinner 	 * Look for the closest free block <= bno, it must contain bno
120830f712c9SDave Chinner 	 * if any free block does.
120930f712c9SDave Chinner 	 */
121030f712c9SDave Chinner 	error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
121130f712c9SDave Chinner 	if (error)
121230f712c9SDave Chinner 		goto error0;
121330f712c9SDave Chinner 	if (!i)
121430f712c9SDave Chinner 		goto not_found;
121530f712c9SDave Chinner 
121630f712c9SDave Chinner 	/*
121730f712c9SDave Chinner 	 * Grab the freespace record.
121830f712c9SDave Chinner 	 */
121930f712c9SDave Chinner 	error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
122030f712c9SDave Chinner 	if (error)
122130f712c9SDave Chinner 		goto error0;
1222f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1223f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
1224f9e03706SDarrick J. Wong 		goto error0;
1225f9e03706SDarrick J. Wong 	}
122630f712c9SDave Chinner 	ASSERT(fbno <= args->agbno);
122730f712c9SDave Chinner 
122830f712c9SDave Chinner 	/*
122930f712c9SDave Chinner 	 * Check for overlapping busy extents.
123030f712c9SDave Chinner 	 */
1231ebf55872SChristoph Hellwig 	tbno = fbno;
1232ebf55872SChristoph Hellwig 	tlen = flen;
1233ebf55872SChristoph Hellwig 	xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
123430f712c9SDave Chinner 
123530f712c9SDave Chinner 	/*
123630f712c9SDave Chinner 	 * Give up if the start of the extent is busy, or the freespace isn't
123730f712c9SDave Chinner 	 * long enough for the minimum request.
123830f712c9SDave Chinner 	 */
123930f712c9SDave Chinner 	if (tbno > args->agbno)
124030f712c9SDave Chinner 		goto not_found;
124130f712c9SDave Chinner 	if (tlen < args->minlen)
124230f712c9SDave Chinner 		goto not_found;
124330f712c9SDave Chinner 	tend = tbno + tlen;
124430f712c9SDave Chinner 	if (tend < args->agbno + args->minlen)
124530f712c9SDave Chinner 		goto not_found;
124630f712c9SDave Chinner 
124730f712c9SDave Chinner 	/*
124830f712c9SDave Chinner 	 * End of extent will be smaller of the freespace end and the
124930f712c9SDave Chinner 	 * maximal requested end.
125030f712c9SDave Chinner 	 *
125130f712c9SDave Chinner 	 * Fix the length according to mod and prod if given.
125230f712c9SDave Chinner 	 */
125330f712c9SDave Chinner 	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
125430f712c9SDave Chinner 						- args->agbno;
125530f712c9SDave Chinner 	xfs_alloc_fix_len(args);
125630f712c9SDave Chinner 	ASSERT(args->agbno + args->len <= tend);
125730f712c9SDave Chinner 
125830f712c9SDave Chinner 	/*
125930f712c9SDave Chinner 	 * We are allocating agbno for args->len
126030f712c9SDave Chinner 	 * Allocate/initialize a cursor for the by-size btree.
126130f712c9SDave Chinner 	 */
126230f712c9SDave Chinner 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1263289d38d2SDave Chinner 					args->pag, XFS_BTNUM_CNT);
12649798f615SChristoph Hellwig 	ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
126530f712c9SDave Chinner 	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
126630f712c9SDave Chinner 				      args->len, XFSA_FIXUP_BNO_OK);
126730f712c9SDave Chinner 	if (error) {
126830f712c9SDave Chinner 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
126930f712c9SDave Chinner 		goto error0;
127030f712c9SDave Chinner 	}
127130f712c9SDave Chinner 
127230f712c9SDave Chinner 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
127330f712c9SDave Chinner 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
127430f712c9SDave Chinner 
127530f712c9SDave Chinner 	args->wasfromfl = 0;
127630f712c9SDave Chinner 	trace_xfs_alloc_exact_done(args);
127730f712c9SDave Chinner 	return 0;
127830f712c9SDave Chinner 
127930f712c9SDave Chinner not_found:
128030f712c9SDave Chinner 	/* Didn't find it, return null. */
128130f712c9SDave Chinner 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
128230f712c9SDave Chinner 	args->agbno = NULLAGBLOCK;
128330f712c9SDave Chinner 	trace_xfs_alloc_exact_notfound(args);
128430f712c9SDave Chinner 	return 0;
128530f712c9SDave Chinner 
128630f712c9SDave Chinner error0:
128730f712c9SDave Chinner 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
128830f712c9SDave Chinner 	trace_xfs_alloc_exact_error(args);
128930f712c9SDave Chinner 	return error;
129030f712c9SDave Chinner }
129130f712c9SDave Chinner 
129230f712c9SDave Chinner /*
129378d7aabdSBrian Foster  * Search a given number of btree records in a given direction. Check each
129478d7aabdSBrian Foster  * record against the good extent we've already found.
129530f712c9SDave Chinner  */
129630f712c9SDave Chinner STATIC int
xfs_alloc_walk_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,bool increment,bool find_one,int count,int * stat)129778d7aabdSBrian Foster xfs_alloc_walk_iter(
1298fec0afdaSBrian Foster 	struct xfs_alloc_arg	*args,
1299fec0afdaSBrian Foster 	struct xfs_alloc_cur	*acur,
1300fec0afdaSBrian Foster 	struct xfs_btree_cur	*cur,
130178d7aabdSBrian Foster 	bool			increment,
130278d7aabdSBrian Foster 	bool			find_one, /* quit on first candidate */
130378d7aabdSBrian Foster 	int			count,    /* rec count (-1 for infinite) */
130478d7aabdSBrian Foster 	int			*stat)
130530f712c9SDave Chinner {
130630f712c9SDave Chinner 	int			error;
130730f712c9SDave Chinner 	int			i;
130830f712c9SDave Chinner 
130978d7aabdSBrian Foster 	*stat = 0;
131078d7aabdSBrian Foster 
131130f712c9SDave Chinner 	/*
1312fec0afdaSBrian Foster 	 * Search so long as the cursor is active or we find a better extent.
1313fec0afdaSBrian Foster 	 * The cursor is deactivated if it extends beyond the range of the
1314fec0afdaSBrian Foster 	 * current allocation candidate.
131530f712c9SDave Chinner 	 */
131678d7aabdSBrian Foster 	while (xfs_alloc_cur_active(cur) && count) {
1317fec0afdaSBrian Foster 		error = xfs_alloc_cur_check(args, acur, cur, &i);
131830f712c9SDave Chinner 		if (error)
131930f712c9SDave Chinner 			return error;
132078d7aabdSBrian Foster 		if (i == 1) {
132178d7aabdSBrian Foster 			*stat = 1;
132278d7aabdSBrian Foster 			if (find_one)
1323fec0afdaSBrian Foster 				break;
132478d7aabdSBrian Foster 		}
1325fec0afdaSBrian Foster 		if (!xfs_alloc_cur_active(cur))
1326fec0afdaSBrian Foster 			break;
1327fec0afdaSBrian Foster 
1328fec0afdaSBrian Foster 		if (increment)
1329fec0afdaSBrian Foster 			error = xfs_btree_increment(cur, 0, &i);
1330fec0afdaSBrian Foster 		else
1331fec0afdaSBrian Foster 			error = xfs_btree_decrement(cur, 0, &i);
1332fec0afdaSBrian Foster 		if (error)
1333fec0afdaSBrian Foster 			return error;
1334fec0afdaSBrian Foster 		if (i == 0)
1335c4aa10d0SDave Chinner 			cur->bc_ag.abt.active = false;
133678d7aabdSBrian Foster 
133778d7aabdSBrian Foster 		if (count > 0)
133878d7aabdSBrian Foster 			count--;
1339fec0afdaSBrian Foster 	}
1340fec0afdaSBrian Foster 
1341fec0afdaSBrian Foster 	return 0;
134230f712c9SDave Chinner }
134330f712c9SDave Chinner 
134430f712c9SDave Chinner /*
1345dc8e69bdSBrian Foster  * Search the by-bno and by-size btrees in parallel in search of an extent with
1346dc8e69bdSBrian Foster  * ideal locality based on the NEAR mode ->agbno locality hint.
13470e26d5caSBrian Foster  */
13480e26d5caSBrian Foster STATIC int
xfs_alloc_ag_vextent_locality(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,int * stat)1349dc8e69bdSBrian Foster xfs_alloc_ag_vextent_locality(
13500e26d5caSBrian Foster 	struct xfs_alloc_arg	*args,
13510e26d5caSBrian Foster 	struct xfs_alloc_cur	*acur,
13520e26d5caSBrian Foster 	int			*stat)
13530e26d5caSBrian Foster {
13540e26d5caSBrian Foster 	struct xfs_btree_cur	*fbcur = NULL;
13550e26d5caSBrian Foster 	int			error;
13560e26d5caSBrian Foster 	int			i;
13570e26d5caSBrian Foster 	bool			fbinc;
13580e26d5caSBrian Foster 
13590e26d5caSBrian Foster 	ASSERT(acur->len == 0);
13600e26d5caSBrian Foster 
13610e26d5caSBrian Foster 	*stat = 0;
13620e26d5caSBrian Foster 
1363dc8e69bdSBrian Foster 	error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1364dc8e69bdSBrian Foster 	if (error)
1365dc8e69bdSBrian Foster 		return error;
13660e26d5caSBrian Foster 	error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
13670e26d5caSBrian Foster 	if (error)
13680e26d5caSBrian Foster 		return error;
13690e26d5caSBrian Foster 	error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
13700e26d5caSBrian Foster 	if (error)
13710e26d5caSBrian Foster 		return error;
13720e26d5caSBrian Foster 
13730e26d5caSBrian Foster 	/*
1374dc8e69bdSBrian Foster 	 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1375dc8e69bdSBrian Foster 	 * right and lookup the closest extent to the locality hint for each
1376dc8e69bdSBrian Foster 	 * extent size key in the cntbt. The entire search terminates
1377dc8e69bdSBrian Foster 	 * immediately on a bnobt hit because that means we've found best case
1378dc8e69bdSBrian Foster 	 * locality. Otherwise the search continues until the cntbt cursor runs
1379dc8e69bdSBrian Foster 	 * off the end of the tree. If no allocation candidate is found at this
1380dc8e69bdSBrian Foster 	 * point, give up on locality, walk backwards from the end of the cntbt
1381dc8e69bdSBrian Foster 	 * and take the first available extent.
1382dc8e69bdSBrian Foster 	 *
1383dc8e69bdSBrian Foster 	 * The parallel tree searches balance each other out to provide fairly
1384dc8e69bdSBrian Foster 	 * consistent performance for various situations. The bnobt search can
1385dc8e69bdSBrian Foster 	 * have pathological behavior in the worst case scenario of larger
1386dc8e69bdSBrian Foster 	 * allocation requests and fragmented free space. On the other hand, the
1387dc8e69bdSBrian Foster 	 * bnobt is able to satisfy most smaller allocation requests much more
1388dc8e69bdSBrian Foster 	 * quickly than the cntbt. The cntbt search can sift through fragmented
1389dc8e69bdSBrian Foster 	 * free space and sets of free extents for larger allocation requests
1390dc8e69bdSBrian Foster 	 * more quickly than the bnobt. Since the locality hint is just a hint
1391dc8e69bdSBrian Foster 	 * and we don't want to scan the entire bnobt for perfect locality, the
1392dc8e69bdSBrian Foster 	 * cntbt search essentially bounds the bnobt search such that we can
1393dc8e69bdSBrian Foster 	 * find good enough locality at reasonable performance in most cases.
13940e26d5caSBrian Foster 	 */
13950e26d5caSBrian Foster 	while (xfs_alloc_cur_active(acur->bnolt) ||
1396dc8e69bdSBrian Foster 	       xfs_alloc_cur_active(acur->bnogt) ||
1397dc8e69bdSBrian Foster 	       xfs_alloc_cur_active(acur->cnt)) {
1398dc8e69bdSBrian Foster 
1399dc8e69bdSBrian Foster 		trace_xfs_alloc_cur_lookup(args);
1400dc8e69bdSBrian Foster 
1401dc8e69bdSBrian Foster 		/*
1402dc8e69bdSBrian Foster 		 * Search the bnobt left and right. In the case of a hit, finish
1403dc8e69bdSBrian Foster 		 * the search in the opposite direction and we're done.
1404dc8e69bdSBrian Foster 		 */
14050e26d5caSBrian Foster 		error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
14060e26d5caSBrian Foster 					    true, 1, &i);
14070e26d5caSBrian Foster 		if (error)
14080e26d5caSBrian Foster 			return error;
14090e26d5caSBrian Foster 		if (i == 1) {
14100e26d5caSBrian Foster 			trace_xfs_alloc_cur_left(args);
14110e26d5caSBrian Foster 			fbcur = acur->bnogt;
14120e26d5caSBrian Foster 			fbinc = true;
14130e26d5caSBrian Foster 			break;
14140e26d5caSBrian Foster 		}
14150e26d5caSBrian Foster 		error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
14160e26d5caSBrian Foster 					    1, &i);
14170e26d5caSBrian Foster 		if (error)
14180e26d5caSBrian Foster 			return error;
14190e26d5caSBrian Foster 		if (i == 1) {
14200e26d5caSBrian Foster 			trace_xfs_alloc_cur_right(args);
14210e26d5caSBrian Foster 			fbcur = acur->bnolt;
14220e26d5caSBrian Foster 			fbinc = false;
14230e26d5caSBrian Foster 			break;
14240e26d5caSBrian Foster 		}
1425dc8e69bdSBrian Foster 
1426dc8e69bdSBrian Foster 		/*
1427dc8e69bdSBrian Foster 		 * Check the extent with best locality based on the current
1428dc8e69bdSBrian Foster 		 * extent size search key and keep track of the best candidate.
1429dc8e69bdSBrian Foster 		 */
1430dc8e69bdSBrian Foster 		error = xfs_alloc_cntbt_iter(args, acur);
1431dc8e69bdSBrian Foster 		if (error)
1432dc8e69bdSBrian Foster 			return error;
1433dc8e69bdSBrian Foster 		if (!xfs_alloc_cur_active(acur->cnt)) {
1434dc8e69bdSBrian Foster 			trace_xfs_alloc_cur_lookup_done(args);
1435dc8e69bdSBrian Foster 			break;
1436dc8e69bdSBrian Foster 		}
14370e26d5caSBrian Foster 	}
14380e26d5caSBrian Foster 
1439dc8e69bdSBrian Foster 	/*
1440dc8e69bdSBrian Foster 	 * If we failed to find anything due to busy extents, return empty
1441dc8e69bdSBrian Foster 	 * handed so the caller can flush and retry. If no busy extents were
1442dc8e69bdSBrian Foster 	 * found, walk backwards from the end of the cntbt as a last resort.
1443dc8e69bdSBrian Foster 	 */
1444dc8e69bdSBrian Foster 	if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1445dc8e69bdSBrian Foster 		error = xfs_btree_decrement(acur->cnt, 0, &i);
1446dc8e69bdSBrian Foster 		if (error)
1447dc8e69bdSBrian Foster 			return error;
1448dc8e69bdSBrian Foster 		if (i) {
1449c4aa10d0SDave Chinner 			acur->cnt->bc_ag.abt.active = true;
1450dc8e69bdSBrian Foster 			fbcur = acur->cnt;
1451dc8e69bdSBrian Foster 			fbinc = false;
1452dc8e69bdSBrian Foster 		}
1453dc8e69bdSBrian Foster 	}
1454dc8e69bdSBrian Foster 
1455dc8e69bdSBrian Foster 	/*
1456dc8e69bdSBrian Foster 	 * Search in the opposite direction for a better entry in the case of
1457dc8e69bdSBrian Foster 	 * a bnobt hit or walk backwards from the end of the cntbt.
1458dc8e69bdSBrian Foster 	 */
14590e26d5caSBrian Foster 	if (fbcur) {
14600e26d5caSBrian Foster 		error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
14610e26d5caSBrian Foster 					    &i);
14620e26d5caSBrian Foster 		if (error)
14630e26d5caSBrian Foster 			return error;
14640e26d5caSBrian Foster 	}
14650e26d5caSBrian Foster 
14660e26d5caSBrian Foster 	if (acur->len)
14670e26d5caSBrian Foster 		*stat = 1;
14680e26d5caSBrian Foster 
14690e26d5caSBrian Foster 	return 0;
14700e26d5caSBrian Foster }
14710e26d5caSBrian Foster 
14725113f8ecSDarrick J. Wong /* Check the last block of the cnt btree for allocations. */
14735113f8ecSDarrick J. Wong static int
xfs_alloc_ag_vextent_lastblock(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,xfs_agblock_t * bno,xfs_extlen_t * len,bool * allocated)14745113f8ecSDarrick J. Wong xfs_alloc_ag_vextent_lastblock(
14755113f8ecSDarrick J. Wong 	struct xfs_alloc_arg	*args,
14765113f8ecSDarrick J. Wong 	struct xfs_alloc_cur	*acur,
14775113f8ecSDarrick J. Wong 	xfs_agblock_t		*bno,
14785113f8ecSDarrick J. Wong 	xfs_extlen_t		*len,
14795113f8ecSDarrick J. Wong 	bool			*allocated)
14805113f8ecSDarrick J. Wong {
14815113f8ecSDarrick J. Wong 	int			error;
14825113f8ecSDarrick J. Wong 	int			i;
14835113f8ecSDarrick J. Wong 
14845113f8ecSDarrick J. Wong #ifdef DEBUG
14855113f8ecSDarrick J. Wong 	/* Randomly don't execute the first algorithm. */
14868032bf12SJason A. Donenfeld 	if (get_random_u32_below(2))
14875113f8ecSDarrick J. Wong 		return 0;
14885113f8ecSDarrick J. Wong #endif
14895113f8ecSDarrick J. Wong 
14905113f8ecSDarrick J. Wong 	/*
14915113f8ecSDarrick J. Wong 	 * Start from the entry that lookup found, sequence through all larger
14925113f8ecSDarrick J. Wong 	 * free blocks.  If we're actually pointing at a record smaller than
14935113f8ecSDarrick J. Wong 	 * maxlen, go to the start of this block, and skip all those smaller
14945113f8ecSDarrick J. Wong 	 * than minlen.
14955113f8ecSDarrick J. Wong 	 */
149677ca1eedSDarrick J. Wong 	if (*len || args->alignment > 1) {
14976ca444cfSDarrick J. Wong 		acur->cnt->bc_levels[0].ptr = 1;
14985113f8ecSDarrick J. Wong 		do {
14995113f8ecSDarrick J. Wong 			error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
15005113f8ecSDarrick J. Wong 			if (error)
15015113f8ecSDarrick J. Wong 				return error;
1502f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(args->mp, i != 1))
1503f9e03706SDarrick J. Wong 				return -EFSCORRUPTED;
15045113f8ecSDarrick J. Wong 			if (*len >= args->minlen)
15055113f8ecSDarrick J. Wong 				break;
15065113f8ecSDarrick J. Wong 			error = xfs_btree_increment(acur->cnt, 0, &i);
15075113f8ecSDarrick J. Wong 			if (error)
15085113f8ecSDarrick J. Wong 				return error;
15095113f8ecSDarrick J. Wong 		} while (i);
15105113f8ecSDarrick J. Wong 		ASSERT(*len >= args->minlen);
15115113f8ecSDarrick J. Wong 		if (!i)
15125113f8ecSDarrick J. Wong 			return 0;
15135113f8ecSDarrick J. Wong 	}
15145113f8ecSDarrick J. Wong 
15155113f8ecSDarrick J. Wong 	error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
15165113f8ecSDarrick J. Wong 	if (error)
15175113f8ecSDarrick J. Wong 		return error;
15185113f8ecSDarrick J. Wong 
15195113f8ecSDarrick J. Wong 	/*
15205113f8ecSDarrick J. Wong 	 * It didn't work.  We COULD be in a case where there's a good record
15215113f8ecSDarrick J. Wong 	 * somewhere, so try again.
15225113f8ecSDarrick J. Wong 	 */
15235113f8ecSDarrick J. Wong 	if (acur->len == 0)
15245113f8ecSDarrick J. Wong 		return 0;
15255113f8ecSDarrick J. Wong 
15265113f8ecSDarrick J. Wong 	trace_xfs_alloc_near_first(args);
15275113f8ecSDarrick J. Wong 	*allocated = true;
15285113f8ecSDarrick J. Wong 	return 0;
15295113f8ecSDarrick J. Wong }
15305113f8ecSDarrick J. Wong 
15310e26d5caSBrian Foster /*
153230f712c9SDave Chinner  * Allocate a variable extent near bno in the allocation group agno.
153330f712c9SDave Chinner  * Extent's length (returned in len) will be between minlen and maxlen,
153430f712c9SDave Chinner  * and of the form k * prod + mod unless there's nothing that large.
153530f712c9SDave Chinner  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
153630f712c9SDave Chinner  */
1537f5e7dbeaSBrian Foster STATIC int
xfs_alloc_ag_vextent_near(struct xfs_alloc_arg * args,uint32_t alloc_flags)153830f712c9SDave Chinner xfs_alloc_ag_vextent_near(
15396a2a9d77SDave Chinner 	struct xfs_alloc_arg	*args,
15406a2a9d77SDave Chinner 	uint32_t		alloc_flags)
154130f712c9SDave Chinner {
1542f5e7dbeaSBrian Foster 	struct xfs_alloc_cur	acur = {};
154330f712c9SDave Chinner 	int			error;		/* error code */
154430f712c9SDave Chinner 	int			i;		/* result code, temporary */
1545fec0afdaSBrian Foster 	xfs_agblock_t		bno;
1546fec0afdaSBrian Foster 	xfs_extlen_t		len;
154730f712c9SDave Chinner 
1548cf085a1bSJoe Perches 	/* handle uninitialized agbno range so caller doesn't have to */
1549bfe46d4eSBrian Foster 	if (!args->min_agbno && !args->max_agbno)
1550bfe46d4eSBrian Foster 		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1551bfe46d4eSBrian Foster 	ASSERT(args->min_agbno <= args->max_agbno);
1552bfe46d4eSBrian Foster 
1553bfe46d4eSBrian Foster 	/* clamp agbno to the range if it's outside */
1554bfe46d4eSBrian Foster 	if (args->agbno < args->min_agbno)
1555bfe46d4eSBrian Foster 		args->agbno = args->min_agbno;
1556bfe46d4eSBrian Foster 	if (args->agbno > args->max_agbno)
1557bfe46d4eSBrian Foster 		args->agbno = args->max_agbno;
1558bfe46d4eSBrian Foster 
15598ebbf262SDave Chinner 	/* Retry once quickly if we find busy extents before blocking. */
15608ebbf262SDave Chinner 	alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
156130f712c9SDave Chinner restart:
1562fec0afdaSBrian Foster 	len = 0;
156330f712c9SDave Chinner 
156430f712c9SDave Chinner 	/*
1565f5e7dbeaSBrian Foster 	 * Set up cursors and see if there are any free extents as big as
1566f5e7dbeaSBrian Foster 	 * maxlen. If not, pick the last entry in the tree unless the tree is
1567f5e7dbeaSBrian Foster 	 * empty.
156830f712c9SDave Chinner 	 */
1569f5e7dbeaSBrian Foster 	error = xfs_alloc_cur_setup(args, &acur);
1570f5e7dbeaSBrian Foster 	if (error == -ENOSPC) {
1571fec0afdaSBrian Foster 		error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1572fec0afdaSBrian Foster 				&len, &i);
1573f5e7dbeaSBrian Foster 		if (error)
1574f5e7dbeaSBrian Foster 			goto out;
1575fec0afdaSBrian Foster 		if (i == 0 || len == 0) {
157630f712c9SDave Chinner 			trace_xfs_alloc_near_noentry(args);
1577f5e7dbeaSBrian Foster 			goto out;
157830f712c9SDave Chinner 		}
157930f712c9SDave Chinner 		ASSERT(i == 1);
1580f5e7dbeaSBrian Foster 	} else if (error) {
1581f5e7dbeaSBrian Foster 		goto out;
158230f712c9SDave Chinner 	}
158330f712c9SDave Chinner 
158430f712c9SDave Chinner 	/*
158530f712c9SDave Chinner 	 * First algorithm.
158630f712c9SDave Chinner 	 * If the requested extent is large wrt the freespaces available
158730f712c9SDave Chinner 	 * in this a.g., then the cursor will be pointing to a btree entry
158830f712c9SDave Chinner 	 * near the right edge of the tree.  If it's in the last btree leaf
158930f712c9SDave Chinner 	 * block, then we just examine all the entries in that block
159030f712c9SDave Chinner 	 * that are big enough, and pick the best one.
159130f712c9SDave Chinner 	 */
15925113f8ecSDarrick J. Wong 	if (xfs_btree_islastblock(acur.cnt, 0)) {
15935113f8ecSDarrick J. Wong 		bool		allocated = false;
15945113f8ecSDarrick J. Wong 
15955113f8ecSDarrick J. Wong 		error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
15965113f8ecSDarrick J. Wong 				&allocated);
1597f5e7dbeaSBrian Foster 		if (error)
1598f5e7dbeaSBrian Foster 			goto out;
15995113f8ecSDarrick J. Wong 		if (allocated)
16005113f8ecSDarrick J. Wong 			goto alloc_finish;
160130f712c9SDave Chinner 	}
1602f5e7dbeaSBrian Foster 
160330f712c9SDave Chinner 	/*
1604dc8e69bdSBrian Foster 	 * Second algorithm. Combined cntbt and bnobt search to find ideal
1605dc8e69bdSBrian Foster 	 * locality.
160630f712c9SDave Chinner 	 */
1607dc8e69bdSBrian Foster 	error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1608f5e7dbeaSBrian Foster 	if (error)
1609f5e7dbeaSBrian Foster 		goto out;
161030f712c9SDave Chinner 
161130f712c9SDave Chinner 	/*
161230f712c9SDave Chinner 	 * If we couldn't get anything, give up.
161330f712c9SDave Chinner 	 */
1614fec0afdaSBrian Foster 	if (!acur.len) {
1615d6d3aff2SBrian Foster 		if (acur.busy) {
16168ebbf262SDave Chinner 			/*
16178ebbf262SDave Chinner 			 * Our only valid extents must have been busy. Flush and
16188ebbf262SDave Chinner 			 * retry the allocation again. If we get an -EAGAIN
16198ebbf262SDave Chinner 			 * error, we're being told that a deadlock was avoided
16208ebbf262SDave Chinner 			 * and the current transaction needs committing before
16218ebbf262SDave Chinner 			 * the allocation can be retried.
16228ebbf262SDave Chinner 			 */
162330f712c9SDave Chinner 			trace_xfs_alloc_near_busy(args);
16248ebbf262SDave Chinner 			error = xfs_extent_busy_flush(args->tp, args->pag,
16256a2a9d77SDave Chinner 					acur.busy_gen, alloc_flags);
16268ebbf262SDave Chinner 			if (error)
16278ebbf262SDave Chinner 				goto out;
16288ebbf262SDave Chinner 
16298ebbf262SDave Chinner 			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
163030f712c9SDave Chinner 			goto restart;
163130f712c9SDave Chinner 		}
163230f712c9SDave Chinner 		trace_xfs_alloc_size_neither(args);
163330f712c9SDave Chinner 		args->agbno = NULLAGBLOCK;
1634f5e7dbeaSBrian Foster 		goto out;
163530f712c9SDave Chinner 	}
163630f712c9SDave Chinner 
16375113f8ecSDarrick J. Wong alloc_finish:
1638d2968825SBrian Foster 	/* fix up btrees on a successful allocation */
1639d2968825SBrian Foster 	error = xfs_alloc_cur_finish(args, &acur);
164030f712c9SDave Chinner 
1641f5e7dbeaSBrian Foster out:
1642f5e7dbeaSBrian Foster 	xfs_alloc_cur_close(&acur, error);
164330f712c9SDave Chinner 	return error;
164430f712c9SDave Chinner }
164530f712c9SDave Chinner 
164630f712c9SDave Chinner /*
164730f712c9SDave Chinner  * Allocate a variable extent anywhere in the allocation group agno.
164830f712c9SDave Chinner  * Extent's length (returned in len) will be between minlen and maxlen,
164930f712c9SDave Chinner  * and of the form k * prod + mod unless there's nothing that large.
165030f712c9SDave Chinner  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
165130f712c9SDave Chinner  */
16526a2a9d77SDave Chinner static int
xfs_alloc_ag_vextent_size(struct xfs_alloc_arg * args,uint32_t alloc_flags)165330f712c9SDave Chinner xfs_alloc_ag_vextent_size(
16546a2a9d77SDave Chinner 	struct xfs_alloc_arg	*args,
16556a2a9d77SDave Chinner 	uint32_t		alloc_flags)
165630f712c9SDave Chinner {
16579798f615SChristoph Hellwig 	struct xfs_agf		*agf = args->agbp->b_addr;
16586a2a9d77SDave Chinner 	struct xfs_btree_cur	*bno_cur;
16596a2a9d77SDave Chinner 	struct xfs_btree_cur	*cnt_cur;
166030f712c9SDave Chinner 	xfs_agblock_t		fbno;		/* start of found freespace */
166130f712c9SDave Chinner 	xfs_extlen_t		flen;		/* length of found freespace */
166230f712c9SDave Chinner 	xfs_agblock_t		rbno;		/* returned block number */
166330f712c9SDave Chinner 	xfs_extlen_t		rlen;		/* length of returned extent */
1664ebf55872SChristoph Hellwig 	bool			busy;
1665ebf55872SChristoph Hellwig 	unsigned		busy_gen;
16666a2a9d77SDave Chinner 	int			error;
16676a2a9d77SDave Chinner 	int			i;
166830f712c9SDave Chinner 
16698ebbf262SDave Chinner 	/* Retry once quickly if we find busy extents before blocking. */
16708ebbf262SDave Chinner 	alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
167130f712c9SDave Chinner restart:
167230f712c9SDave Chinner 	/*
167330f712c9SDave Chinner 	 * Allocate and initialize a cursor for the by-size btree.
167430f712c9SDave Chinner 	 */
167530f712c9SDave Chinner 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1676289d38d2SDave Chinner 					args->pag, XFS_BTNUM_CNT);
167730f712c9SDave Chinner 	bno_cur = NULL;
167830f712c9SDave Chinner 
167930f712c9SDave Chinner 	/*
168030f712c9SDave Chinner 	 * Look for an entry >= maxlen+alignment-1 blocks.
168130f712c9SDave Chinner 	 */
168230f712c9SDave Chinner 	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
168330f712c9SDave Chinner 			args->maxlen + args->alignment - 1, &i)))
168430f712c9SDave Chinner 		goto error0;
168530f712c9SDave Chinner 
168630f712c9SDave Chinner 	/*
1687ebf55872SChristoph Hellwig 	 * If none then we have to settle for a smaller extent. In the case that
1688ebf55872SChristoph Hellwig 	 * there are no large extents, this will return the last entry in the
1689ebf55872SChristoph Hellwig 	 * tree unless the tree is empty. In the case that there are only busy
1690ebf55872SChristoph Hellwig 	 * large extents, this will return the largest small extent unless there
169130f712c9SDave Chinner 	 * are no smaller extents available.
169230f712c9SDave Chinner 	 */
1693ebf55872SChristoph Hellwig 	if (!i) {
169430f712c9SDave Chinner 		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
169530f712c9SDave Chinner 						   &fbno, &flen, &i);
169630f712c9SDave Chinner 		if (error)
169730f712c9SDave Chinner 			goto error0;
169830f712c9SDave Chinner 		if (i == 0 || flen == 0) {
169930f712c9SDave Chinner 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
170030f712c9SDave Chinner 			trace_xfs_alloc_size_noentry(args);
170130f712c9SDave Chinner 			return 0;
170230f712c9SDave Chinner 		}
170330f712c9SDave Chinner 		ASSERT(i == 1);
1704ebf55872SChristoph Hellwig 		busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1705ebf55872SChristoph Hellwig 				&rlen, &busy_gen);
170630f712c9SDave Chinner 	} else {
170730f712c9SDave Chinner 		/*
170830f712c9SDave Chinner 		 * Search for a non-busy extent that is large enough.
170930f712c9SDave Chinner 		 */
171030f712c9SDave Chinner 		for (;;) {
171130f712c9SDave Chinner 			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
171230f712c9SDave Chinner 			if (error)
171330f712c9SDave Chinner 				goto error0;
1714f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1715f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
1716f9e03706SDarrick J. Wong 				goto error0;
1717f9e03706SDarrick J. Wong 			}
171830f712c9SDave Chinner 
1719ebf55872SChristoph Hellwig 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
1720ebf55872SChristoph Hellwig 					&rbno, &rlen, &busy_gen);
172130f712c9SDave Chinner 
172230f712c9SDave Chinner 			if (rlen >= args->maxlen)
172330f712c9SDave Chinner 				break;
172430f712c9SDave Chinner 
172530f712c9SDave Chinner 			error = xfs_btree_increment(cnt_cur, 0, &i);
172630f712c9SDave Chinner 			if (error)
172730f712c9SDave Chinner 				goto error0;
17288ebbf262SDave Chinner 			if (i)
17298ebbf262SDave Chinner 				continue;
17308ebbf262SDave Chinner 
173130f712c9SDave Chinner 			/*
17328ebbf262SDave Chinner 			 * Our only valid extents must have been busy. Flush and
17338ebbf262SDave Chinner 			 * retry the allocation again. If we get an -EAGAIN
17348ebbf262SDave Chinner 			 * error, we're being told that a deadlock was avoided
17358ebbf262SDave Chinner 			 * and the current transaction needs committing before
17368ebbf262SDave Chinner 			 * the allocation can be retried.
173730f712c9SDave Chinner 			 */
173830f712c9SDave Chinner 			trace_xfs_alloc_size_busy(args);
17398ebbf262SDave Chinner 			error = xfs_extent_busy_flush(args->tp, args->pag,
17406a2a9d77SDave Chinner 					busy_gen, alloc_flags);
17418ebbf262SDave Chinner 			if (error)
17428ebbf262SDave Chinner 				goto error0;
17438ebbf262SDave Chinner 
17448ebbf262SDave Chinner 			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
17458ebbf262SDave Chinner 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
174630f712c9SDave Chinner 			goto restart;
174730f712c9SDave Chinner 		}
174830f712c9SDave Chinner 	}
174930f712c9SDave Chinner 
175030f712c9SDave Chinner 	/*
175130f712c9SDave Chinner 	 * In the first case above, we got the last entry in the
175230f712c9SDave Chinner 	 * by-size btree.  Now we check to see if the space hits maxlen
175330f712c9SDave Chinner 	 * once aligned; if not, we search left for something better.
175430f712c9SDave Chinner 	 * This can't happen in the second case above.
175530f712c9SDave Chinner 	 */
175630f712c9SDave Chinner 	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1757f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(args->mp,
1758f9e03706SDarrick J. Wong 			   rlen != 0 &&
1759f9e03706SDarrick J. Wong 			   (rlen > flen ||
1760f9e03706SDarrick J. Wong 			    rbno + rlen > fbno + flen))) {
1761f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
1762f9e03706SDarrick J. Wong 		goto error0;
1763f9e03706SDarrick J. Wong 	}
176430f712c9SDave Chinner 	if (rlen < args->maxlen) {
176530f712c9SDave Chinner 		xfs_agblock_t	bestfbno;
176630f712c9SDave Chinner 		xfs_extlen_t	bestflen;
176730f712c9SDave Chinner 		xfs_agblock_t	bestrbno;
176830f712c9SDave Chinner 		xfs_extlen_t	bestrlen;
176930f712c9SDave Chinner 
177030f712c9SDave Chinner 		bestrlen = rlen;
177130f712c9SDave Chinner 		bestrbno = rbno;
177230f712c9SDave Chinner 		bestflen = flen;
177330f712c9SDave Chinner 		bestfbno = fbno;
177430f712c9SDave Chinner 		for (;;) {
177530f712c9SDave Chinner 			if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
177630f712c9SDave Chinner 				goto error0;
177730f712c9SDave Chinner 			if (i == 0)
177830f712c9SDave Chinner 				break;
177930f712c9SDave Chinner 			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
178030f712c9SDave Chinner 					&i)))
178130f712c9SDave Chinner 				goto error0;
1782f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1783f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
1784f9e03706SDarrick J. Wong 				goto error0;
1785f9e03706SDarrick J. Wong 			}
1786*1fe5c2aaSChi Zhiling 			if (flen <= bestrlen)
178730f712c9SDave Chinner 				break;
1788ebf55872SChristoph Hellwig 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
1789ebf55872SChristoph Hellwig 					&rbno, &rlen, &busy_gen);
179030f712c9SDave Chinner 			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1791f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(args->mp,
1792f9e03706SDarrick J. Wong 					   rlen != 0 &&
1793f9e03706SDarrick J. Wong 					   (rlen > flen ||
1794f9e03706SDarrick J. Wong 					    rbno + rlen > fbno + flen))) {
1795f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
1796f9e03706SDarrick J. Wong 				goto error0;
1797f9e03706SDarrick J. Wong 			}
179830f712c9SDave Chinner 			if (rlen > bestrlen) {
179930f712c9SDave Chinner 				bestrlen = rlen;
180030f712c9SDave Chinner 				bestrbno = rbno;
180130f712c9SDave Chinner 				bestflen = flen;
180230f712c9SDave Chinner 				bestfbno = fbno;
180330f712c9SDave Chinner 				if (rlen == args->maxlen)
180430f712c9SDave Chinner 					break;
180530f712c9SDave Chinner 			}
180630f712c9SDave Chinner 		}
180730f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
180830f712c9SDave Chinner 				&i)))
180930f712c9SDave Chinner 			goto error0;
1810f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1811f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
1812f9e03706SDarrick J. Wong 			goto error0;
1813f9e03706SDarrick J. Wong 		}
181430f712c9SDave Chinner 		rlen = bestrlen;
181530f712c9SDave Chinner 		rbno = bestrbno;
181630f712c9SDave Chinner 		flen = bestflen;
181730f712c9SDave Chinner 		fbno = bestfbno;
181830f712c9SDave Chinner 	}
181930f712c9SDave Chinner 	args->wasfromfl = 0;
182030f712c9SDave Chinner 	/*
182130f712c9SDave Chinner 	 * Fix up the length.
182230f712c9SDave Chinner 	 */
182330f712c9SDave Chinner 	args->len = rlen;
182430f712c9SDave Chinner 	if (rlen < args->minlen) {
1825ebf55872SChristoph Hellwig 		if (busy) {
18268ebbf262SDave Chinner 			/*
18278ebbf262SDave Chinner 			 * Our only valid extents must have been busy. Flush and
18288ebbf262SDave Chinner 			 * retry the allocation again. If we get an -EAGAIN
18298ebbf262SDave Chinner 			 * error, we're being told that a deadlock was avoided
18308ebbf262SDave Chinner 			 * and the current transaction needs committing before
18318ebbf262SDave Chinner 			 * the allocation can be retried.
18328ebbf262SDave Chinner 			 */
183330f712c9SDave Chinner 			trace_xfs_alloc_size_busy(args);
18348ebbf262SDave Chinner 			error = xfs_extent_busy_flush(args->tp, args->pag,
18358ebbf262SDave Chinner 					busy_gen, alloc_flags);
18368ebbf262SDave Chinner 			if (error)
18378ebbf262SDave Chinner 				goto error0;
18388ebbf262SDave Chinner 
18398ebbf262SDave Chinner 			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
18408ebbf262SDave Chinner 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
184130f712c9SDave Chinner 			goto restart;
184230f712c9SDave Chinner 		}
184330f712c9SDave Chinner 		goto out_nominleft;
184430f712c9SDave Chinner 	}
184530f712c9SDave Chinner 	xfs_alloc_fix_len(args);
184630f712c9SDave Chinner 
184730f712c9SDave Chinner 	rlen = args->len;
1848f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1849f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
1850f9e03706SDarrick J. Wong 		goto error0;
1851f9e03706SDarrick J. Wong 	}
185230f712c9SDave Chinner 	/*
185330f712c9SDave Chinner 	 * Allocate and initialize a cursor for the by-block tree.
185430f712c9SDave Chinner 	 */
185530f712c9SDave Chinner 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1856289d38d2SDave Chinner 					args->pag, XFS_BTNUM_BNO);
185730f712c9SDave Chinner 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
185830f712c9SDave Chinner 			rbno, rlen, XFSA_FIXUP_CNT_OK)))
185930f712c9SDave Chinner 		goto error0;
186030f712c9SDave Chinner 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
186130f712c9SDave Chinner 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
186230f712c9SDave Chinner 	cnt_cur = bno_cur = NULL;
186330f712c9SDave Chinner 	args->len = rlen;
186430f712c9SDave Chinner 	args->agbno = rbno;
1865f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(args->mp,
1866f9e03706SDarrick J. Wong 			   args->agbno + args->len >
18679798f615SChristoph Hellwig 			   be32_to_cpu(agf->agf_length))) {
1868f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
1869f9e03706SDarrick J. Wong 		goto error0;
1870f9e03706SDarrick J. Wong 	}
187130f712c9SDave Chinner 	trace_xfs_alloc_size_done(args);
187230f712c9SDave Chinner 	return 0;
187330f712c9SDave Chinner 
187430f712c9SDave Chinner error0:
187530f712c9SDave Chinner 	trace_xfs_alloc_size_error(args);
187630f712c9SDave Chinner 	if (cnt_cur)
187730f712c9SDave Chinner 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
187830f712c9SDave Chinner 	if (bno_cur)
187930f712c9SDave Chinner 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
188030f712c9SDave Chinner 	return error;
188130f712c9SDave Chinner 
188230f712c9SDave Chinner out_nominleft:
188330f712c9SDave Chinner 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
188430f712c9SDave Chinner 	trace_xfs_alloc_size_nominleft(args);
188530f712c9SDave Chinner 	args->agbno = NULLAGBLOCK;
188630f712c9SDave Chinner 	return 0;
188730f712c9SDave Chinner }
188830f712c9SDave Chinner 
188930f712c9SDave Chinner /*
189030f712c9SDave Chinner  * Free the extent starting at agno/bno for length.
189130f712c9SDave Chinner  */
1892340785ccSDarrick J. Wong STATIC int
xfs_free_ag_extent(struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)189330f712c9SDave Chinner xfs_free_ag_extent(
189466e3237eSDarrick J. Wong 	struct xfs_trans		*tp,
189566e3237eSDarrick J. Wong 	struct xfs_buf			*agbp,
1896340785ccSDarrick J. Wong 	xfs_agnumber_t			agno,
1897340785ccSDarrick J. Wong 	xfs_agblock_t			bno,
1898340785ccSDarrick J. Wong 	xfs_extlen_t			len,
189966e3237eSDarrick J. Wong 	const struct xfs_owner_info	*oinfo,
19003fd129b6SDarrick J. Wong 	enum xfs_ag_resv_type		type)
190130f712c9SDave Chinner {
190266e3237eSDarrick J. Wong 	struct xfs_mount		*mp;
190366e3237eSDarrick J. Wong 	struct xfs_btree_cur		*bno_cur;
190466e3237eSDarrick J. Wong 	struct xfs_btree_cur		*cnt_cur;
190566e3237eSDarrick J. Wong 	xfs_agblock_t			gtbno; /* start of right neighbor */
190666e3237eSDarrick J. Wong 	xfs_extlen_t			gtlen; /* length of right neighbor */
190766e3237eSDarrick J. Wong 	xfs_agblock_t			ltbno; /* start of left neighbor */
190866e3237eSDarrick J. Wong 	xfs_extlen_t			ltlen; /* length of left neighbor */
190966e3237eSDarrick J. Wong 	xfs_agblock_t			nbno; /* new starting block of freesp */
191030f712c9SDave Chinner 	xfs_extlen_t			nlen; /* new length of freespace */
191166e3237eSDarrick J. Wong 	int				haveleft; /* have a left neighbor */
191266e3237eSDarrick J. Wong 	int				haveright; /* have a right neighbor */
191366e3237eSDarrick J. Wong 	int				i;
191466e3237eSDarrick J. Wong 	int				error;
1915fa9c3c19SDave Chinner 	struct xfs_perag		*pag = agbp->b_pag;
191630f712c9SDave Chinner 
1917673930c3SDarrick J. Wong 	bno_cur = cnt_cur = NULL;
191830f712c9SDave Chinner 	mp = tp->t_mountp;
1919673930c3SDarrick J. Wong 
192033df3a9cSDarrick J. Wong 	if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1921fa9c3c19SDave Chinner 		error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1922673930c3SDarrick J. Wong 		if (error)
1923673930c3SDarrick J. Wong 			goto error0;
1924673930c3SDarrick J. Wong 	}
1925673930c3SDarrick J. Wong 
192630f712c9SDave Chinner 	/*
192730f712c9SDave Chinner 	 * Allocate and initialize a cursor for the by-block btree.
192830f712c9SDave Chinner 	 */
1929289d38d2SDave Chinner 	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
193030f712c9SDave Chinner 	/*
193130f712c9SDave Chinner 	 * Look for a neighboring block on the left (lower block numbers)
193230f712c9SDave Chinner 	 * that is contiguous with this space.
193330f712c9SDave Chinner 	 */
193430f712c9SDave Chinner 	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
193530f712c9SDave Chinner 		goto error0;
193630f712c9SDave Chinner 	if (haveleft) {
193730f712c9SDave Chinner 		/*
193830f712c9SDave Chinner 		 * There is a block to our left.
193930f712c9SDave Chinner 		 */
194030f712c9SDave Chinner 		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
194130f712c9SDave Chinner 			goto error0;
1942f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1943f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
1944f9e03706SDarrick J. Wong 			goto error0;
1945f9e03706SDarrick J. Wong 		}
194630f712c9SDave Chinner 		/*
194730f712c9SDave Chinner 		 * It's not contiguous, though.
194830f712c9SDave Chinner 		 */
194930f712c9SDave Chinner 		if (ltbno + ltlen < bno)
195030f712c9SDave Chinner 			haveleft = 0;
195130f712c9SDave Chinner 		else {
195230f712c9SDave Chinner 			/*
195330f712c9SDave Chinner 			 * If this failure happens the request to free this
195430f712c9SDave Chinner 			 * space was invalid, it's (partly) already free.
195530f712c9SDave Chinner 			 * Very bad.
195630f712c9SDave Chinner 			 */
1957f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1958f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
1959f9e03706SDarrick J. Wong 				goto error0;
1960f9e03706SDarrick J. Wong 			}
196130f712c9SDave Chinner 		}
196230f712c9SDave Chinner 	}
196330f712c9SDave Chinner 	/*
196430f712c9SDave Chinner 	 * Look for a neighboring block on the right (higher block numbers)
196530f712c9SDave Chinner 	 * that is contiguous with this space.
196630f712c9SDave Chinner 	 */
196730f712c9SDave Chinner 	if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
196830f712c9SDave Chinner 		goto error0;
196930f712c9SDave Chinner 	if (haveright) {
197030f712c9SDave Chinner 		/*
197130f712c9SDave Chinner 		 * There is a block to our right.
197230f712c9SDave Chinner 		 */
197330f712c9SDave Chinner 		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
197430f712c9SDave Chinner 			goto error0;
1975f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1976f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
1977f9e03706SDarrick J. Wong 			goto error0;
1978f9e03706SDarrick J. Wong 		}
197930f712c9SDave Chinner 		/*
198030f712c9SDave Chinner 		 * It's not contiguous, though.
198130f712c9SDave Chinner 		 */
198230f712c9SDave Chinner 		if (bno + len < gtbno)
198330f712c9SDave Chinner 			haveright = 0;
198430f712c9SDave Chinner 		else {
198530f712c9SDave Chinner 			/*
198630f712c9SDave Chinner 			 * If this failure happens the request to free this
198730f712c9SDave Chinner 			 * space was invalid, it's (partly) already free.
198830f712c9SDave Chinner 			 * Very bad.
198930f712c9SDave Chinner 			 */
1990f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1991f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
1992f9e03706SDarrick J. Wong 				goto error0;
1993f9e03706SDarrick J. Wong 			}
199430f712c9SDave Chinner 		}
199530f712c9SDave Chinner 	}
199630f712c9SDave Chinner 	/*
199730f712c9SDave Chinner 	 * Now allocate and initialize a cursor for the by-size tree.
199830f712c9SDave Chinner 	 */
1999289d38d2SDave Chinner 	cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
200030f712c9SDave Chinner 	/*
200130f712c9SDave Chinner 	 * Have both left and right contiguous neighbors.
200230f712c9SDave Chinner 	 * Merge all three into a single free block.
200330f712c9SDave Chinner 	 */
200430f712c9SDave Chinner 	if (haveleft && haveright) {
200530f712c9SDave Chinner 		/*
200630f712c9SDave Chinner 		 * Delete the old by-size entry on the left.
200730f712c9SDave Chinner 		 */
200830f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
200930f712c9SDave Chinner 			goto error0;
2010f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2011f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2012f9e03706SDarrick J. Wong 			goto error0;
2013f9e03706SDarrick J. Wong 		}
201430f712c9SDave Chinner 		if ((error = xfs_btree_delete(cnt_cur, &i)))
201530f712c9SDave Chinner 			goto error0;
2016f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2017f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2018f9e03706SDarrick J. Wong 			goto error0;
2019f9e03706SDarrick J. Wong 		}
202030f712c9SDave Chinner 		/*
202130f712c9SDave Chinner 		 * Delete the old by-size entry on the right.
202230f712c9SDave Chinner 		 */
202330f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
202430f712c9SDave Chinner 			goto error0;
2025f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2026f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2027f9e03706SDarrick J. Wong 			goto error0;
2028f9e03706SDarrick J. Wong 		}
202930f712c9SDave Chinner 		if ((error = xfs_btree_delete(cnt_cur, &i)))
203030f712c9SDave Chinner 			goto error0;
2031f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2032f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2033f9e03706SDarrick J. Wong 			goto error0;
2034f9e03706SDarrick J. Wong 		}
203530f712c9SDave Chinner 		/*
203630f712c9SDave Chinner 		 * Delete the old by-block entry for the right block.
203730f712c9SDave Chinner 		 */
203830f712c9SDave Chinner 		if ((error = xfs_btree_delete(bno_cur, &i)))
203930f712c9SDave Chinner 			goto error0;
2040f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2041f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2042f9e03706SDarrick J. Wong 			goto error0;
2043f9e03706SDarrick J. Wong 		}
204430f712c9SDave Chinner 		/*
204530f712c9SDave Chinner 		 * Move the by-block cursor back to the left neighbor.
204630f712c9SDave Chinner 		 */
204730f712c9SDave Chinner 		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
204830f712c9SDave Chinner 			goto error0;
2049f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2050f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2051f9e03706SDarrick J. Wong 			goto error0;
2052f9e03706SDarrick J. Wong 		}
205330f712c9SDave Chinner #ifdef DEBUG
205430f712c9SDave Chinner 		/*
205530f712c9SDave Chinner 		 * Check that this is the right record: delete didn't
205630f712c9SDave Chinner 		 * mangle the cursor.
205730f712c9SDave Chinner 		 */
205830f712c9SDave Chinner 		{
205930f712c9SDave Chinner 			xfs_agblock_t	xxbno;
206030f712c9SDave Chinner 			xfs_extlen_t	xxlen;
206130f712c9SDave Chinner 
206230f712c9SDave Chinner 			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
206330f712c9SDave Chinner 					&i)))
206430f712c9SDave Chinner 				goto error0;
2065f9e03706SDarrick J. Wong 			if (XFS_IS_CORRUPT(mp,
2066f9e03706SDarrick J. Wong 					   i != 1 ||
2067f9e03706SDarrick J. Wong 					   xxbno != ltbno ||
2068f9e03706SDarrick J. Wong 					   xxlen != ltlen)) {
2069f9e03706SDarrick J. Wong 				error = -EFSCORRUPTED;
2070f9e03706SDarrick J. Wong 				goto error0;
2071f9e03706SDarrick J. Wong 			}
207230f712c9SDave Chinner 		}
207330f712c9SDave Chinner #endif
207430f712c9SDave Chinner 		/*
207530f712c9SDave Chinner 		 * Update remaining by-block entry to the new, joined block.
207630f712c9SDave Chinner 		 */
207730f712c9SDave Chinner 		nbno = ltbno;
207830f712c9SDave Chinner 		nlen = len + ltlen + gtlen;
207930f712c9SDave Chinner 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
208030f712c9SDave Chinner 			goto error0;
208130f712c9SDave Chinner 	}
208230f712c9SDave Chinner 	/*
208330f712c9SDave Chinner 	 * Have only a left contiguous neighbor.
208430f712c9SDave Chinner 	 * Merge it together with the new freespace.
208530f712c9SDave Chinner 	 */
208630f712c9SDave Chinner 	else if (haveleft) {
208730f712c9SDave Chinner 		/*
208830f712c9SDave Chinner 		 * Delete the old by-size entry on the left.
208930f712c9SDave Chinner 		 */
209030f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
209130f712c9SDave Chinner 			goto error0;
2092f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2093f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2094f9e03706SDarrick J. Wong 			goto error0;
2095f9e03706SDarrick J. Wong 		}
209630f712c9SDave Chinner 		if ((error = xfs_btree_delete(cnt_cur, &i)))
209730f712c9SDave Chinner 			goto error0;
2098f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2099f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2100f9e03706SDarrick J. Wong 			goto error0;
2101f9e03706SDarrick J. Wong 		}
210230f712c9SDave Chinner 		/*
210330f712c9SDave Chinner 		 * Back up the by-block cursor to the left neighbor, and
210430f712c9SDave Chinner 		 * update its length.
210530f712c9SDave Chinner 		 */
210630f712c9SDave Chinner 		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
210730f712c9SDave Chinner 			goto error0;
2108f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2109f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2110f9e03706SDarrick J. Wong 			goto error0;
2111f9e03706SDarrick J. Wong 		}
211230f712c9SDave Chinner 		nbno = ltbno;
211330f712c9SDave Chinner 		nlen = len + ltlen;
211430f712c9SDave Chinner 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
211530f712c9SDave Chinner 			goto error0;
211630f712c9SDave Chinner 	}
211730f712c9SDave Chinner 	/*
211830f712c9SDave Chinner 	 * Have only a right contiguous neighbor.
211930f712c9SDave Chinner 	 * Merge it together with the new freespace.
212030f712c9SDave Chinner 	 */
212130f712c9SDave Chinner 	else if (haveright) {
212230f712c9SDave Chinner 		/*
212330f712c9SDave Chinner 		 * Delete the old by-size entry on the right.
212430f712c9SDave Chinner 		 */
212530f712c9SDave Chinner 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
212630f712c9SDave Chinner 			goto error0;
2127f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2128f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2129f9e03706SDarrick J. Wong 			goto error0;
2130f9e03706SDarrick J. Wong 		}
213130f712c9SDave Chinner 		if ((error = xfs_btree_delete(cnt_cur, &i)))
213230f712c9SDave Chinner 			goto error0;
2133f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2134f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2135f9e03706SDarrick J. Wong 			goto error0;
2136f9e03706SDarrick J. Wong 		}
213730f712c9SDave Chinner 		/*
213830f712c9SDave Chinner 		 * Update the starting block and length of the right
213930f712c9SDave Chinner 		 * neighbor in the by-block tree.
214030f712c9SDave Chinner 		 */
214130f712c9SDave Chinner 		nbno = bno;
214230f712c9SDave Chinner 		nlen = len + gtlen;
214330f712c9SDave Chinner 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
214430f712c9SDave Chinner 			goto error0;
214530f712c9SDave Chinner 	}
214630f712c9SDave Chinner 	/*
214730f712c9SDave Chinner 	 * No contiguous neighbors.
214830f712c9SDave Chinner 	 * Insert the new freespace into the by-block tree.
214930f712c9SDave Chinner 	 */
215030f712c9SDave Chinner 	else {
215130f712c9SDave Chinner 		nbno = bno;
215230f712c9SDave Chinner 		nlen = len;
215330f712c9SDave Chinner 		if ((error = xfs_btree_insert(bno_cur, &i)))
215430f712c9SDave Chinner 			goto error0;
2155f9e03706SDarrick J. Wong 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2156f9e03706SDarrick J. Wong 			error = -EFSCORRUPTED;
2157f9e03706SDarrick J. Wong 			goto error0;
2158f9e03706SDarrick J. Wong 		}
215930f712c9SDave Chinner 	}
216030f712c9SDave Chinner 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
216130f712c9SDave Chinner 	bno_cur = NULL;
216230f712c9SDave Chinner 	/*
216330f712c9SDave Chinner 	 * In all cases we need to insert the new freespace in the by-size tree.
216430f712c9SDave Chinner 	 */
216530f712c9SDave Chinner 	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
216630f712c9SDave Chinner 		goto error0;
2167f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(mp, i != 0)) {
2168f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
2169f9e03706SDarrick J. Wong 		goto error0;
2170f9e03706SDarrick J. Wong 	}
217130f712c9SDave Chinner 	if ((error = xfs_btree_insert(cnt_cur, &i)))
217230f712c9SDave Chinner 		goto error0;
2173f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(mp, i != 1)) {
2174f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
2175f9e03706SDarrick J. Wong 		goto error0;
2176f9e03706SDarrick J. Wong 	}
217730f712c9SDave Chinner 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
217830f712c9SDave Chinner 	cnt_cur = NULL;
217930f712c9SDave Chinner 
218030f712c9SDave Chinner 	/*
218130f712c9SDave Chinner 	 * Update the freespace totals in the ag and superblock.
218230f712c9SDave Chinner 	 */
218392a00544SGao Xiang 	error = xfs_alloc_update_counters(tp, agbp, len);
218492a00544SGao Xiang 	xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
218530f712c9SDave Chinner 	if (error)
218630f712c9SDave Chinner 		goto error0;
218730f712c9SDave Chinner 
2188ff6d6af2SBill O'Donnell 	XFS_STATS_INC(mp, xs_freex);
2189ff6d6af2SBill O'Donnell 	XFS_STATS_ADD(mp, xs_freeb, len);
219030f712c9SDave Chinner 
219121592863SBrian Foster 	trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
219230f712c9SDave Chinner 
219330f712c9SDave Chinner 	return 0;
219430f712c9SDave Chinner 
219530f712c9SDave Chinner  error0:
219621592863SBrian Foster 	trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
219730f712c9SDave Chinner 	if (bno_cur)
219830f712c9SDave Chinner 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
219930f712c9SDave Chinner 	if (cnt_cur)
220030f712c9SDave Chinner 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
220130f712c9SDave Chinner 	return error;
220230f712c9SDave Chinner }
220330f712c9SDave Chinner 
220430f712c9SDave Chinner /*
220530f712c9SDave Chinner  * Visible (exported) allocation/free functions.
220630f712c9SDave Chinner  * Some of these are used just by xfs_alloc_btree.c and this file.
220730f712c9SDave Chinner  */
220830f712c9SDave Chinner 
220930f712c9SDave Chinner /*
22107cb3efb4SDarrick J. Wong  * Compute and fill in value of m_alloc_maxlevels.
221130f712c9SDave Chinner  */
221230f712c9SDave Chinner void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)221330f712c9SDave Chinner xfs_alloc_compute_maxlevels(
221430f712c9SDave Chinner 	xfs_mount_t	*mp)	/* file system mount structure */
221530f712c9SDave Chinner {
22167cb3efb4SDarrick J. Wong 	mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
221719b54ee6SDarrick J. Wong 			(mp->m_sb.sb_agblocks + 1) / 2);
22180ed5f735SDarrick J. Wong 	ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
221930f712c9SDave Chinner }
222030f712c9SDave Chinner 
222130f712c9SDave Chinner /*
22223fd129b6SDarrick J. Wong  * Find the length of the longest extent in an AG.  The 'need' parameter
22233fd129b6SDarrick J. Wong  * specifies how much space we're going to need for the AGFL and the
22243fd129b6SDarrick J. Wong  * 'reserved' parameter tells us how many blocks in this AG are reserved for
22253fd129b6SDarrick J. Wong  * other callers.
222630f712c9SDave Chinner  */
222730f712c9SDave Chinner xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_perag * pag,xfs_extlen_t need,xfs_extlen_t reserved)222830f712c9SDave Chinner xfs_alloc_longest_free_extent(
222950adbcb4SDave Chinner 	struct xfs_perag	*pag,
22303fd129b6SDarrick J. Wong 	xfs_extlen_t		need,
22313fd129b6SDarrick J. Wong 	xfs_extlen_t		reserved)
223230f712c9SDave Chinner {
223350adbcb4SDave Chinner 	xfs_extlen_t		delta = 0;
223430f712c9SDave Chinner 
22353fd129b6SDarrick J. Wong 	/*
22363fd129b6SDarrick J. Wong 	 * If the AGFL needs a recharge, we'll have to subtract that from the
22373fd129b6SDarrick J. Wong 	 * longest extent.
22383fd129b6SDarrick J. Wong 	 */
223930f712c9SDave Chinner 	if (need > pag->pagf_flcount)
224030f712c9SDave Chinner 		delta = need - pag->pagf_flcount;
224130f712c9SDave Chinner 
22423fd129b6SDarrick J. Wong 	/*
22433fd129b6SDarrick J. Wong 	 * If we cannot maintain others' reservations with space from the
22443fd129b6SDarrick J. Wong 	 * not-longest freesp extents, we'll have to subtract /that/ from
22453fd129b6SDarrick J. Wong 	 * the longest extent too.
22463fd129b6SDarrick J. Wong 	 */
22473fd129b6SDarrick J. Wong 	if (pag->pagf_freeblks - pag->pagf_longest < reserved)
22483fd129b6SDarrick J. Wong 		delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
22493fd129b6SDarrick J. Wong 
22503fd129b6SDarrick J. Wong 	/*
22513fd129b6SDarrick J. Wong 	 * If the longest extent is long enough to satisfy all the
22523fd129b6SDarrick J. Wong 	 * reservations and AGFL rules in place, we can return this extent.
22533fd129b6SDarrick J. Wong 	 */
225430f712c9SDave Chinner 	if (pag->pagf_longest > delta)
22551c743574SDave Chinner 		return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
22561c743574SDave Chinner 				pag->pagf_longest - delta);
22573fd129b6SDarrick J. Wong 
22583fd129b6SDarrick J. Wong 	/* Otherwise, let the caller try for 1 block if there's space. */
225930f712c9SDave Chinner 	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
226030f712c9SDave Chinner }
226130f712c9SDave Chinner 
22621cac233cSDarrick J. Wong /*
22631cac233cSDarrick J. Wong  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
22641cac233cSDarrick J. Wong  * return the largest possible minimum length.
22651cac233cSDarrick J. Wong  */
2266496817b4SDave Chinner unsigned int
xfs_alloc_min_freelist(struct xfs_mount * mp,struct xfs_perag * pag)2267496817b4SDave Chinner xfs_alloc_min_freelist(
2268496817b4SDave Chinner 	struct xfs_mount	*mp,
2269496817b4SDave Chinner 	struct xfs_perag	*pag)
2270496817b4SDave Chinner {
22711cac233cSDarrick J. Wong 	/* AG btrees have at least 1 level. */
22721cac233cSDarrick J. Wong 	static const uint8_t	fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
22731cac233cSDarrick J. Wong 	const uint8_t		*levels = pag ? pag->pagf_levels : fake_levels;
2274496817b4SDave Chinner 	unsigned int		min_free;
2275496817b4SDave Chinner 
22767cb3efb4SDarrick J. Wong 	ASSERT(mp->m_alloc_maxlevels > 0);
22771cac233cSDarrick J. Wong 
22780838177bSOmar Sandoval 	/*
22790838177bSOmar Sandoval 	 * For a btree shorter than the maximum height, the worst case is that
22800838177bSOmar Sandoval 	 * every level gets split and a new level is added, then while inserting
22810838177bSOmar Sandoval 	 * another entry to refill the AGFL, every level under the old root gets
22820838177bSOmar Sandoval 	 * split again. This is:
22830838177bSOmar Sandoval 	 *
22840838177bSOmar Sandoval 	 *   (full height split reservation) + (AGFL refill split height)
22850838177bSOmar Sandoval 	 * = (current height + 1) + (current height - 1)
22860838177bSOmar Sandoval 	 * = (new height) + (new height - 2)
22870838177bSOmar Sandoval 	 * = 2 * new height - 2
22880838177bSOmar Sandoval 	 *
22890838177bSOmar Sandoval 	 * For a btree of maximum height, the worst case is that every level
22900838177bSOmar Sandoval 	 * under the root gets split, then while inserting another entry to
22910838177bSOmar Sandoval 	 * refill the AGFL, every level under the root gets split again. This is
22920838177bSOmar Sandoval 	 * also:
22930838177bSOmar Sandoval 	 *
22940838177bSOmar Sandoval 	 *   2 * (current height - 1)
22950838177bSOmar Sandoval 	 * = 2 * (new height - 1)
22960838177bSOmar Sandoval 	 * = 2 * new height - 2
22970838177bSOmar Sandoval 	 */
22980838177bSOmar Sandoval 
2299496817b4SDave Chinner 	/* space needed by-bno freespace btree */
23001cac233cSDarrick J. Wong 	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
23010838177bSOmar Sandoval 				       mp->m_alloc_maxlevels) * 2 - 2;
2302496817b4SDave Chinner 	/* space needed by-size freespace btree */
23031cac233cSDarrick J. Wong 	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
23040838177bSOmar Sandoval 				       mp->m_alloc_maxlevels) * 2 - 2;
230552548852SDarrick J. Wong 	/* space needed reverse mapping used space btree */
2306ebd9027dSDave Chinner 	if (xfs_has_rmapbt(mp))
23071cac233cSDarrick J. Wong 		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
23080838177bSOmar Sandoval 						mp->m_rmap_maxlevels) * 2 - 2;
2309496817b4SDave Chinner 
2310496817b4SDave Chinner 	return min_free;
2311496817b4SDave Chinner }
2312496817b4SDave Chinner 
231330f712c9SDave Chinner /*
231472d55285SDave Chinner  * Check if the operation we are fixing up the freelist for should go ahead or
231572d55285SDave Chinner  * not. If we are freeing blocks, we always allow it, otherwise the allocation
231672d55285SDave Chinner  * is dependent on whether the size and shape of free space available will
231772d55285SDave Chinner  * permit the requested allocation to take place.
231872d55285SDave Chinner  */
231972d55285SDave Chinner static bool
xfs_alloc_space_available(struct xfs_alloc_arg * args,xfs_extlen_t min_free,int flags)232072d55285SDave Chinner xfs_alloc_space_available(
232172d55285SDave Chinner 	struct xfs_alloc_arg	*args,
232272d55285SDave Chinner 	xfs_extlen_t		min_free,
232372d55285SDave Chinner 	int			flags)
232472d55285SDave Chinner {
232572d55285SDave Chinner 	struct xfs_perag	*pag = args->pag;
232612ef8301SChristoph Hellwig 	xfs_extlen_t		alloc_len, longest;
23273fd129b6SDarrick J. Wong 	xfs_extlen_t		reservation; /* blocks that are still reserved */
232872d55285SDave Chinner 	int			available;
23291ca89fbcSBrian Foster 	xfs_extlen_t		agflcount;
233072d55285SDave Chinner 
233172d55285SDave Chinner 	if (flags & XFS_ALLOC_FLAG_FREEING)
233272d55285SDave Chinner 		return true;
233372d55285SDave Chinner 
23343fd129b6SDarrick J. Wong 	reservation = xfs_ag_resv_needed(pag, args->resv);
23353fd129b6SDarrick J. Wong 
233672d55285SDave Chinner 	/* do we have enough contiguous free space for the allocation? */
233712ef8301SChristoph Hellwig 	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2338a1f69417SEric Sandeen 	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
233912ef8301SChristoph Hellwig 	if (longest < alloc_len)
234072d55285SDave Chinner 		return false;
234172d55285SDave Chinner 
23421ca89fbcSBrian Foster 	/*
23431ca89fbcSBrian Foster 	 * Do we have enough free space remaining for the allocation? Don't
23441ca89fbcSBrian Foster 	 * account extra agfl blocks because we are about to defer free them,
23451ca89fbcSBrian Foster 	 * making them unavailable until the current transaction commits.
23461ca89fbcSBrian Foster 	 */
23471ca89fbcSBrian Foster 	agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
23481ca89fbcSBrian Foster 	available = (int)(pag->pagf_freeblks + agflcount -
234954fee133SChristoph Hellwig 			  reservation - min_free - args->minleft);
235012ef8301SChristoph Hellwig 	if (available < (int)max(args->total, alloc_len))
235172d55285SDave Chinner 		return false;
235272d55285SDave Chinner 
235354fee133SChristoph Hellwig 	/*
235454fee133SChristoph Hellwig 	 * Clamp maxlen to the amount of free space available for the actual
235554fee133SChristoph Hellwig 	 * extent allocation.
235654fee133SChristoph Hellwig 	 */
235754fee133SChristoph Hellwig 	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
235854fee133SChristoph Hellwig 		args->maxlen = available;
235954fee133SChristoph Hellwig 		ASSERT(args->maxlen > 0);
236054fee133SChristoph Hellwig 		ASSERT(args->maxlen >= args->minlen);
236154fee133SChristoph Hellwig 	}
236254fee133SChristoph Hellwig 
236372d55285SDave Chinner 	return true;
236472d55285SDave Chinner }
236572d55285SDave Chinner 
23664223f659SBrian Foster int
xfs_free_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_buf * agbp,struct xfs_owner_info * oinfo)23674223f659SBrian Foster xfs_free_agfl_block(
23684223f659SBrian Foster 	struct xfs_trans	*tp,
23694223f659SBrian Foster 	xfs_agnumber_t		agno,
23704223f659SBrian Foster 	xfs_agblock_t		agbno,
23714223f659SBrian Foster 	struct xfs_buf		*agbp,
23724223f659SBrian Foster 	struct xfs_owner_info	*oinfo)
23734223f659SBrian Foster {
23744223f659SBrian Foster 	int			error;
23754223f659SBrian Foster 	struct xfs_buf		*bp;
23764223f659SBrian Foster 
23774223f659SBrian Foster 	error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
23784223f659SBrian Foster 				   XFS_AG_RESV_AGFL);
23794223f659SBrian Foster 	if (error)
23804223f659SBrian Foster 		return error;
23814223f659SBrian Foster 
2382ee647f85SDarrick J. Wong 	error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2383ee647f85SDarrick J. Wong 			XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2384ee647f85SDarrick J. Wong 			tp->t_mountp->m_bsize, 0, &bp);
2385ee647f85SDarrick J. Wong 	if (error)
2386ee647f85SDarrick J. Wong 		return error;
23874223f659SBrian Foster 	xfs_trans_binval(tp, bp);
23884223f659SBrian Foster 
23894223f659SBrian Foster 	return 0;
23904223f659SBrian Foster }
23914223f659SBrian Foster 
239230f712c9SDave Chinner /*
2393e0a8de7dSDave Chinner  * Check the agfl fields of the agf for inconsistency or corruption.
2394e0a8de7dSDave Chinner  *
2395e0a8de7dSDave Chinner  * The original purpose was to detect an agfl header padding mismatch between
2396e0a8de7dSDave Chinner  * current and early v5 kernels. This problem manifests as a 1-slot size
2397e0a8de7dSDave Chinner  * difference between the on-disk flcount and the active [first, last] range of
2398e0a8de7dSDave Chinner  * a wrapped agfl.
2399e0a8de7dSDave Chinner  *
2400e0a8de7dSDave Chinner  * However, we need to use these same checks to catch agfl count corruptions
2401e0a8de7dSDave Chinner  * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2402e0a8de7dSDave Chinner  * way, we need to reset the agfl and warn the user.
2403a27ba260SBrian Foster  *
2404a27ba260SBrian Foster  * Return true if a reset is required before the agfl can be used, false
2405a27ba260SBrian Foster  * otherwise.
2406a27ba260SBrian Foster  */
2407a27ba260SBrian Foster static bool
xfs_agfl_needs_reset(struct xfs_mount * mp,struct xfs_agf * agf)2408a27ba260SBrian Foster xfs_agfl_needs_reset(
2409a27ba260SBrian Foster 	struct xfs_mount	*mp,
2410a27ba260SBrian Foster 	struct xfs_agf		*agf)
2411a27ba260SBrian Foster {
2412a27ba260SBrian Foster 	uint32_t		f = be32_to_cpu(agf->agf_flfirst);
2413a27ba260SBrian Foster 	uint32_t		l = be32_to_cpu(agf->agf_fllast);
2414a27ba260SBrian Foster 	uint32_t		c = be32_to_cpu(agf->agf_flcount);
2415a27ba260SBrian Foster 	int			agfl_size = xfs_agfl_size(mp);
2416a27ba260SBrian Foster 	int			active;
2417a27ba260SBrian Foster 
2418a27ba260SBrian Foster 	/*
2419a27ba260SBrian Foster 	 * The agf read verifier catches severe corruption of these fields.
2420a27ba260SBrian Foster 	 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2421a27ba260SBrian Foster 	 * the verifier allows it.
2422a27ba260SBrian Foster 	 */
2423a27ba260SBrian Foster 	if (f >= agfl_size || l >= agfl_size)
2424a27ba260SBrian Foster 		return true;
2425a27ba260SBrian Foster 	if (c > agfl_size)
2426a27ba260SBrian Foster 		return true;
2427a27ba260SBrian Foster 
2428a27ba260SBrian Foster 	/*
2429a27ba260SBrian Foster 	 * Check consistency between the on-disk count and the active range. An
2430a27ba260SBrian Foster 	 * agfl padding mismatch manifests as an inconsistent flcount.
2431a27ba260SBrian Foster 	 */
2432a27ba260SBrian Foster 	if (c && l >= f)
2433a27ba260SBrian Foster 		active = l - f + 1;
2434a27ba260SBrian Foster 	else if (c)
2435a27ba260SBrian Foster 		active = agfl_size - f + l + 1;
2436a27ba260SBrian Foster 	else
2437a27ba260SBrian Foster 		active = 0;
2438a27ba260SBrian Foster 
2439a27ba260SBrian Foster 	return active != c;
2440a27ba260SBrian Foster }
2441a27ba260SBrian Foster 
2442a27ba260SBrian Foster /*
2443a27ba260SBrian Foster  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2444a27ba260SBrian Foster  * agfl content cannot be trusted. Warn the user that a repair is required to
2445a27ba260SBrian Foster  * recover leaked blocks.
2446a27ba260SBrian Foster  *
2447a27ba260SBrian Foster  * The purpose of this mechanism is to handle filesystems affected by the agfl
2448a27ba260SBrian Foster  * header padding mismatch problem. A reset keeps the filesystem online with a
2449a27ba260SBrian Foster  * relatively minor free space accounting inconsistency rather than suffer the
2450a27ba260SBrian Foster  * inevitable crash from use of an invalid agfl block.
2451a27ba260SBrian Foster  */
2452a27ba260SBrian Foster static void
xfs_agfl_reset(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag)2453a27ba260SBrian Foster xfs_agfl_reset(
2454a27ba260SBrian Foster 	struct xfs_trans	*tp,
2455a27ba260SBrian Foster 	struct xfs_buf		*agbp,
2456a27ba260SBrian Foster 	struct xfs_perag	*pag)
2457a27ba260SBrian Foster {
2458a27ba260SBrian Foster 	struct xfs_mount	*mp = tp->t_mountp;
24599798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
2460a27ba260SBrian Foster 
24617ac2ff8bSDave Chinner 	ASSERT(xfs_perag_agfl_needs_reset(pag));
2462a27ba260SBrian Foster 	trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2463a27ba260SBrian Foster 
2464a27ba260SBrian Foster 	xfs_warn(mp,
2465a27ba260SBrian Foster 	       "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2466a27ba260SBrian Foster 	       "Please unmount and run xfs_repair.",
2467a27ba260SBrian Foster 	         pag->pag_agno, pag->pagf_flcount);
2468a27ba260SBrian Foster 
2469a27ba260SBrian Foster 	agf->agf_flfirst = 0;
2470a27ba260SBrian Foster 	agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2471a27ba260SBrian Foster 	agf->agf_flcount = 0;
2472a27ba260SBrian Foster 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2473a27ba260SBrian Foster 				    XFS_AGF_FLCOUNT);
2474a27ba260SBrian Foster 
2475a27ba260SBrian Foster 	pag->pagf_flcount = 0;
24767ac2ff8bSDave Chinner 	clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2477a27ba260SBrian Foster }
2478a27ba260SBrian Foster 
2479a27ba260SBrian Foster /*
2480f8f2835aSBrian Foster  * Defer an AGFL block free. This is effectively equivalent to
2481c201d9caSDarrick J. Wong  * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2482f8f2835aSBrian Foster  *
2483f8f2835aSBrian Foster  * Deferring AGFL frees helps prevent log reservation overruns due to too many
2484f8f2835aSBrian Foster  * allocation operations in a transaction. AGFL frees are prone to this problem
2485f8f2835aSBrian Foster  * because for one they are always freed one at a time. Further, an immediate
2486f8f2835aSBrian Foster  * AGFL block free can cause a btree join and require another block free before
2487f8f2835aSBrian Foster  * the real allocation can proceed. Deferring the free disconnects freeing up
2488f8f2835aSBrian Foster  * the AGFL slot from freeing the block.
2489f8f2835aSBrian Foster  */
24907dfee17bSDave Chinner static int
xfs_defer_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_owner_info * oinfo)2491f8f2835aSBrian Foster xfs_defer_agfl_block(
24920f37d178SBrian Foster 	struct xfs_trans		*tp,
2493f8f2835aSBrian Foster 	xfs_agnumber_t			agno,
24942bed0d82SDave Chinner 	xfs_agblock_t			agbno,
2495f8f2835aSBrian Foster 	struct xfs_owner_info		*oinfo)
2496f8f2835aSBrian Foster {
24970f37d178SBrian Foster 	struct xfs_mount		*mp = tp->t_mountp;
2498578c714bSDarrick J. Wong 	struct xfs_extent_free_item	*xefi;
24992bed0d82SDave Chinner 	xfs_fsblock_t			fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2500f8f2835aSBrian Foster 
2501c201d9caSDarrick J. Wong 	ASSERT(xfs_extfree_item_cache != NULL);
2502f8f2835aSBrian Foster 	ASSERT(oinfo != NULL);
2503f8f2835aSBrian Foster 
25042bed0d82SDave Chinner 	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
25052bed0d82SDave Chinner 		return -EFSCORRUPTED;
25062bed0d82SDave Chinner 
2507578c714bSDarrick J. Wong 	xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
25083050bd0bSCarlos Maiolino 			       GFP_KERNEL | __GFP_NOFAIL);
25092bed0d82SDave Chinner 	xefi->xefi_startblock = fsbno;
2510578c714bSDarrick J. Wong 	xefi->xefi_blockcount = 1;
2511578c714bSDarrick J. Wong 	xefi->xefi_owner = oinfo->oi_owner;
2512b742d7b4SDave Chinner 	xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2513f8f2835aSBrian Foster 
2514f8f2835aSBrian Foster 	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2515f8f2835aSBrian Foster 
2516f6b38463SDarrick J. Wong 	xfs_extent_free_get_group(mp, xefi);
2517578c714bSDarrick J. Wong 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
25187dfee17bSDave Chinner 	return 0;
2519f8f2835aSBrian Foster }
2520f8f2835aSBrian Foster 
2521c201d9caSDarrick J. Wong /*
2522c201d9caSDarrick J. Wong  * Add the extent to the list of extents to be free at transaction end.
2523c201d9caSDarrick J. Wong  * The list is maintained sorted (by block number).
2524c201d9caSDarrick J. Wong  */
25257dfee17bSDave Chinner int
__xfs_free_extent_later(struct xfs_trans * tp,xfs_fsblock_t bno,xfs_filblks_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)2526c201d9caSDarrick J. Wong __xfs_free_extent_later(
2527c201d9caSDarrick J. Wong 	struct xfs_trans		*tp,
2528c201d9caSDarrick J. Wong 	xfs_fsblock_t			bno,
2529c201d9caSDarrick J. Wong 	xfs_filblks_t			len,
2530c201d9caSDarrick J. Wong 	const struct xfs_owner_info	*oinfo,
2531b742d7b4SDave Chinner 	enum xfs_ag_resv_type		type,
2532c201d9caSDarrick J. Wong 	bool				skip_discard)
2533c201d9caSDarrick J. Wong {
2534578c714bSDarrick J. Wong 	struct xfs_extent_free_item	*xefi;
2535c201d9caSDarrick J. Wong 	struct xfs_mount		*mp = tp->t_mountp;
2536f6b38463SDarrick J. Wong #ifdef DEBUG
2537c201d9caSDarrick J. Wong 	xfs_agnumber_t			agno;
2538c201d9caSDarrick J. Wong 	xfs_agblock_t			agbno;
2539c201d9caSDarrick J. Wong 
2540c201d9caSDarrick J. Wong 	ASSERT(bno != NULLFSBLOCK);
2541c201d9caSDarrick J. Wong 	ASSERT(len > 0);
254295f0b95eSChandan Babu R 	ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2543c201d9caSDarrick J. Wong 	ASSERT(!isnullstartblock(bno));
2544c201d9caSDarrick J. Wong 	agno = XFS_FSB_TO_AGNO(mp, bno);
2545c201d9caSDarrick J. Wong 	agbno = XFS_FSB_TO_AGBNO(mp, bno);
2546c201d9caSDarrick J. Wong 	ASSERT(agno < mp->m_sb.sb_agcount);
2547c201d9caSDarrick J. Wong 	ASSERT(agbno < mp->m_sb.sb_agblocks);
2548c201d9caSDarrick J. Wong 	ASSERT(len < mp->m_sb.sb_agblocks);
2549c201d9caSDarrick J. Wong 	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2550c201d9caSDarrick J. Wong #endif
2551c201d9caSDarrick J. Wong 	ASSERT(xfs_extfree_item_cache != NULL);
2552b742d7b4SDave Chinner 	ASSERT(type != XFS_AG_RESV_AGFL);
2553c201d9caSDarrick J. Wong 
25547dfee17bSDave Chinner 	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
25557dfee17bSDave Chinner 		return -EFSCORRUPTED;
25567dfee17bSDave Chinner 
2557578c714bSDarrick J. Wong 	xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2558c201d9caSDarrick J. Wong 			       GFP_KERNEL | __GFP_NOFAIL);
2559578c714bSDarrick J. Wong 	xefi->xefi_startblock = bno;
2560578c714bSDarrick J. Wong 	xefi->xefi_blockcount = (xfs_extlen_t)len;
2561b742d7b4SDave Chinner 	xefi->xefi_agresv = type;
2562b3b5ff41SDarrick J. Wong 	if (skip_discard)
2563578c714bSDarrick J. Wong 		xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2564b3b5ff41SDarrick J. Wong 	if (oinfo) {
2565b3b5ff41SDarrick J. Wong 		ASSERT(oinfo->oi_offset == 0);
2566b3b5ff41SDarrick J. Wong 
2567b3b5ff41SDarrick J. Wong 		if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2568578c714bSDarrick J. Wong 			xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2569b3b5ff41SDarrick J. Wong 		if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2570578c714bSDarrick J. Wong 			xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2571578c714bSDarrick J. Wong 		xefi->xefi_owner = oinfo->oi_owner;
2572b3b5ff41SDarrick J. Wong 	} else {
2573578c714bSDarrick J. Wong 		xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2574b3b5ff41SDarrick J. Wong 	}
2575f6b38463SDarrick J. Wong 	trace_xfs_bmap_free_defer(mp,
2576c201d9caSDarrick J. Wong 			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2577c201d9caSDarrick J. Wong 			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2578f6b38463SDarrick J. Wong 
2579f6b38463SDarrick J. Wong 	xfs_extent_free_get_group(mp, xefi);
2580578c714bSDarrick J. Wong 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
25817dfee17bSDave Chinner 	return 0;
2582c201d9caSDarrick J. Wong }
2583c201d9caSDarrick J. Wong 
258430151967SChandan Babu R /*
258530151967SChandan Babu R  * Check if an AGF has a free extent record whose length is equal to
258630151967SChandan Babu R  * args->minlen.
258730151967SChandan Babu R  */
258830151967SChandan Babu R STATIC int
xfs_exact_minlen_extent_available(struct xfs_alloc_arg * args,struct xfs_buf * agbp,int * stat)258930151967SChandan Babu R xfs_exact_minlen_extent_available(
259030151967SChandan Babu R 	struct xfs_alloc_arg	*args,
259130151967SChandan Babu R 	struct xfs_buf		*agbp,
259230151967SChandan Babu R 	int			*stat)
259330151967SChandan Babu R {
259430151967SChandan Babu R 	struct xfs_btree_cur	*cnt_cur;
259530151967SChandan Babu R 	xfs_agblock_t		fbno;
259630151967SChandan Babu R 	xfs_extlen_t		flen;
259730151967SChandan Babu R 	int			error = 0;
259830151967SChandan Babu R 
259930151967SChandan Babu R 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2600289d38d2SDave Chinner 					args->pag, XFS_BTNUM_CNT);
260130151967SChandan Babu R 	error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
260230151967SChandan Babu R 	if (error)
260330151967SChandan Babu R 		goto out;
260430151967SChandan Babu R 
260530151967SChandan Babu R 	if (*stat == 0) {
260630151967SChandan Babu R 		error = -EFSCORRUPTED;
260730151967SChandan Babu R 		goto out;
260830151967SChandan Babu R 	}
260930151967SChandan Babu R 
261030151967SChandan Babu R 	error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
261130151967SChandan Babu R 	if (error)
261230151967SChandan Babu R 		goto out;
261330151967SChandan Babu R 
261430151967SChandan Babu R 	if (*stat == 1 && flen != args->minlen)
261530151967SChandan Babu R 		*stat = 0;
261630151967SChandan Babu R 
261730151967SChandan Babu R out:
261830151967SChandan Babu R 	xfs_btree_del_cursor(cnt_cur, error);
261930151967SChandan Babu R 
262030151967SChandan Babu R 	return error;
262130151967SChandan Babu R }
262230151967SChandan Babu R 
2623f8f2835aSBrian Foster /*
262430f712c9SDave Chinner  * Decide whether to use this allocation group for this allocation.
262530f712c9SDave Chinner  * If so, fix up the btree freelist's size.
262630f712c9SDave Chinner  */
26272e9101daSDarrick J. Wong int			/* error */
xfs_alloc_fix_freelist(struct xfs_alloc_arg * args,uint32_t alloc_flags)262830f712c9SDave Chinner xfs_alloc_fix_freelist(
2629396503fcSDave Chinner 	struct xfs_alloc_arg	*args,	/* allocation argument structure */
26306a2a9d77SDave Chinner 	uint32_t		alloc_flags)
263130f712c9SDave Chinner {
2632396503fcSDave Chinner 	struct xfs_mount	*mp = args->mp;
2633396503fcSDave Chinner 	struct xfs_perag	*pag = args->pag;
2634396503fcSDave Chinner 	struct xfs_trans	*tp = args->tp;
2635396503fcSDave Chinner 	struct xfs_buf		*agbp = NULL;
2636396503fcSDave Chinner 	struct xfs_buf		*agflbp = NULL;
2637396503fcSDave Chinner 	struct xfs_alloc_arg	targs;	/* local allocation arguments */
263830f712c9SDave Chinner 	xfs_agblock_t		bno;	/* freelist block */
263930f712c9SDave Chinner 	xfs_extlen_t		need;	/* total blocks needed in freelist */
2640c184f855SJan Kara 	int			error = 0;
264130f712c9SDave Chinner 
2642362f5e74SBrian Foster 	/* deferred ops (AGFL block frees) require permanent transactions */
2643362f5e74SBrian Foster 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2644362f5e74SBrian Foster 
26457ac2ff8bSDave Chinner 	if (!xfs_perag_initialised_agf(pag)) {
26466a2a9d77SDave Chinner 		error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2647f48e2df8SDarrick J. Wong 		if (error) {
2648f48e2df8SDarrick J. Wong 			/* Couldn't lock the AGF so skip this AG. */
2649f48e2df8SDarrick J. Wong 			if (error == -EAGAIN)
2650f48e2df8SDarrick J. Wong 				error = 0;
2651396503fcSDave Chinner 			goto out_no_agbp;
265230f712c9SDave Chinner 		}
2653396503fcSDave Chinner 	}
265430f712c9SDave Chinner 
265530f712c9SDave Chinner 	/*
2656396503fcSDave Chinner 	 * If this is a metadata preferred pag and we are user data then try
2657396503fcSDave Chinner 	 * somewhere else if we are not being asked to try harder at this
2658396503fcSDave Chinner 	 * point
265930f712c9SDave Chinner 	 */
26607ac2ff8bSDave Chinner 	if (xfs_perag_prefers_metadata(pag) &&
26617ac2ff8bSDave Chinner 	    (args->datatype & XFS_ALLOC_USERDATA) &&
26626a2a9d77SDave Chinner 	    (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
26636a2a9d77SDave Chinner 		ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2664396503fcSDave Chinner 		goto out_agbp_relse;
266530f712c9SDave Chinner 	}
266630f712c9SDave Chinner 
2667496817b4SDave Chinner 	need = xfs_alloc_min_freelist(mp, pag);
26686a2a9d77SDave Chinner 	if (!xfs_alloc_space_available(args, need, alloc_flags |
266954fee133SChristoph Hellwig 			XFS_ALLOC_FLAG_CHECK))
2670396503fcSDave Chinner 		goto out_agbp_relse;
267130f712c9SDave Chinner 
267230f712c9SDave Chinner 	/*
267330f712c9SDave Chinner 	 * Get the a.g. freespace buffer.
267430f712c9SDave Chinner 	 * Can fail if we're not blocking on locks, and it's held.
267530f712c9SDave Chinner 	 */
2676396503fcSDave Chinner 	if (!agbp) {
26776a2a9d77SDave Chinner 		error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2678f48e2df8SDarrick J. Wong 		if (error) {
2679f48e2df8SDarrick J. Wong 			/* Couldn't lock the AGF so skip this AG. */
2680f48e2df8SDarrick J. Wong 			if (error == -EAGAIN)
2681f48e2df8SDarrick J. Wong 				error = 0;
2682396503fcSDave Chinner 			goto out_no_agbp;
268330f712c9SDave Chinner 		}
268430f712c9SDave Chinner 	}
268550adbcb4SDave Chinner 
2686a27ba260SBrian Foster 	/* reset a padding mismatched agfl before final free space check */
26877ac2ff8bSDave Chinner 	if (xfs_perag_agfl_needs_reset(pag))
2688a27ba260SBrian Foster 		xfs_agfl_reset(tp, agbp, pag);
2689a27ba260SBrian Foster 
269050adbcb4SDave Chinner 	/* If there isn't enough total space or single-extent, reject it. */
2691496817b4SDave Chinner 	need = xfs_alloc_min_freelist(mp, pag);
26926a2a9d77SDave Chinner 	if (!xfs_alloc_space_available(args, need, alloc_flags))
2693396503fcSDave Chinner 		goto out_agbp_relse;
269472d55285SDave Chinner 
2695479e112dSChristoph Hellwig 	if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
269630151967SChandan Babu R 		int stat;
269730151967SChandan Babu R 
269830151967SChandan Babu R 		error = xfs_exact_minlen_extent_available(args, agbp, &stat);
269930151967SChandan Babu R 		if (error || !stat)
270030151967SChandan Babu R 			goto out_agbp_relse;
270130151967SChandan Babu R 	}
2702479e112dSChristoph Hellwig 
270330f712c9SDave Chinner 	/*
270430f712c9SDave Chinner 	 * Make the freelist shorter if it's too long.
270550adbcb4SDave Chinner 	 *
2706396503fcSDave Chinner 	 * Note that from this point onwards, we will always release the agf and
2707396503fcSDave Chinner 	 * agfl buffers on error. This handles the case where we error out and
2708396503fcSDave Chinner 	 * the buffers are clean or may not have been joined to the transaction
2709396503fcSDave Chinner 	 * and hence need to be released manually. If they have been joined to
2710396503fcSDave Chinner 	 * the transaction, then xfs_trans_brelse() will handle them
2711396503fcSDave Chinner 	 * appropriately based on the recursion count and dirty state of the
2712396503fcSDave Chinner 	 * buffer.
2713396503fcSDave Chinner 	 *
271450adbcb4SDave Chinner 	 * XXX (dgc): When we have lots of free space, does this buy us
271550adbcb4SDave Chinner 	 * anything other than extra overhead when we need to put more blocks
271650adbcb4SDave Chinner 	 * back on the free list? Maybe we should only do this when space is
271750adbcb4SDave Chinner 	 * getting low or the AGFL is more than half full?
271804f13060SDarrick J. Wong 	 *
271904f13060SDarrick J. Wong 	 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
272004f13060SDarrick J. Wong 	 * big; the NORMAP flag prevents AGFL expand/shrink operations from
272104f13060SDarrick J. Wong 	 * updating the rmapbt.  Both flags are used in xfs_repair while we're
272204f13060SDarrick J. Wong 	 * rebuilding the rmapbt, and neither are used by the kernel.  They're
272304f13060SDarrick J. Wong 	 * both required to ensure that rmaps are correctly recorded for the
272404f13060SDarrick J. Wong 	 * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
272504f13060SDarrick J. Wong 	 * repair/rmap.c in xfsprogs for details.
272630f712c9SDave Chinner 	 */
272704f13060SDarrick J. Wong 	memset(&targs, 0, sizeof(targs));
27287280fedaSDarrick J. Wong 	/* struct copy below */
27296a2a9d77SDave Chinner 	if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
27307280fedaSDarrick J. Wong 		targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
273104f13060SDarrick J. Wong 	else
27327280fedaSDarrick J. Wong 		targs.oinfo = XFS_RMAP_OINFO_AG;
27336a2a9d77SDave Chinner 	while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
27346a2a9d77SDave Chinner 			pag->pagf_flcount > need) {
273549f0d84eSDave Chinner 		error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
273630f712c9SDave Chinner 		if (error)
2737396503fcSDave Chinner 			goto out_agbp_relse;
27384223f659SBrian Foster 
2739c03edc9eSBrian Foster 		/* defer agfl frees */
27407dfee17bSDave Chinner 		error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
27417dfee17bSDave Chinner 		if (error)
27427dfee17bSDave Chinner 			goto out_agbp_relse;
2743f8f2835aSBrian Foster 	}
274450adbcb4SDave Chinner 
274530f712c9SDave Chinner 	targs.tp = tp;
274630f712c9SDave Chinner 	targs.mp = mp;
274730f712c9SDave Chinner 	targs.agbp = agbp;
274830f712c9SDave Chinner 	targs.agno = args->agno;
27493fd129b6SDarrick J. Wong 	targs.alignment = targs.minlen = targs.prod = 1;
275030f712c9SDave Chinner 	targs.pag = pag;
2751cec7bb7dSDave Chinner 	error = xfs_alloc_read_agfl(pag, tp, &agflbp);
275250adbcb4SDave Chinner 	if (error)
2753396503fcSDave Chinner 		goto out_agbp_relse;
275450adbcb4SDave Chinner 
275550adbcb4SDave Chinner 	/* Make the freelist longer if it's too short. */
275650adbcb4SDave Chinner 	while (pag->pagf_flcount < need) {
275730f712c9SDave Chinner 		targs.agbno = 0;
275850adbcb4SDave Chinner 		targs.maxlen = need - pag->pagf_flcount;
27590ab32086SBrian Foster 		targs.resv = XFS_AG_RESV_AGFL;
276050adbcb4SDave Chinner 
276150adbcb4SDave Chinner 		/* Allocate as many blocks as possible at once. */
27626a2a9d77SDave Chinner 		error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2763396503fcSDave Chinner 		if (error)
2764396503fcSDave Chinner 			goto out_agflbp_relse;
2765396503fcSDave Chinner 
276630f712c9SDave Chinner 		/*
276730f712c9SDave Chinner 		 * Stop if we run out.  Won't happen if callers are obeying
276830f712c9SDave Chinner 		 * the restrictions correctly.  Can happen for free calls
276930f712c9SDave Chinner 		 * on a completely full ag.
277030f712c9SDave Chinner 		 */
277130f712c9SDave Chinner 		if (targs.agbno == NULLAGBLOCK) {
27726a2a9d77SDave Chinner 			if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
277330f712c9SDave Chinner 				break;
2774396503fcSDave Chinner 			goto out_agflbp_relse;
277530f712c9SDave Chinner 		}
27764811c933SDave Chinner 
27774811c933SDave Chinner 		if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
27784811c933SDave Chinner 			error = xfs_rmap_alloc(tp, agbp, pag,
27794811c933SDave Chinner 				       targs.agbno, targs.len, &targs.oinfo);
27804811c933SDave Chinner 			if (error)
27814811c933SDave Chinner 				goto out_agflbp_relse;
27824811c933SDave Chinner 		}
27834811c933SDave Chinner 		error = xfs_alloc_update_counters(tp, agbp,
27844811c933SDave Chinner 						  -((long)(targs.len)));
27854811c933SDave Chinner 		if (error)
27864811c933SDave Chinner 			goto out_agflbp_relse;
27874811c933SDave Chinner 
278830f712c9SDave Chinner 		/*
278930f712c9SDave Chinner 		 * Put each allocated block on the list.
279030f712c9SDave Chinner 		 */
279130f712c9SDave Chinner 		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
27928c392eb2SDave Chinner 			error = xfs_alloc_put_freelist(pag, tp, agbp,
279330f712c9SDave Chinner 							agflbp, bno, 0);
279430f712c9SDave Chinner 			if (error)
2795396503fcSDave Chinner 				goto out_agflbp_relse;
279630f712c9SDave Chinner 		}
279730f712c9SDave Chinner 	}
279830f712c9SDave Chinner 	xfs_trans_brelse(tp, agflbp);
279930f712c9SDave Chinner 	args->agbp = agbp;
280030f712c9SDave Chinner 	return 0;
2801396503fcSDave Chinner 
2802396503fcSDave Chinner out_agflbp_relse:
2803396503fcSDave Chinner 	xfs_trans_brelse(tp, agflbp);
2804396503fcSDave Chinner out_agbp_relse:
2805396503fcSDave Chinner 	if (agbp)
2806396503fcSDave Chinner 		xfs_trans_brelse(tp, agbp);
2807396503fcSDave Chinner out_no_agbp:
2808396503fcSDave Chinner 	args->agbp = NULL;
2809396503fcSDave Chinner 	return error;
281030f712c9SDave Chinner }
281130f712c9SDave Chinner 
281230f712c9SDave Chinner /*
281330f712c9SDave Chinner  * Get a block from the freelist.
281430f712c9SDave Chinner  * Returns with the buffer for the block gotten.
281530f712c9SDave Chinner  */
281650920116SDave Chinner int
xfs_alloc_get_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agblock_t * bnop,int btreeblk)281730f712c9SDave Chinner xfs_alloc_get_freelist(
281849f0d84eSDave Chinner 	struct xfs_perag	*pag,
281950920116SDave Chinner 	struct xfs_trans	*tp,
282050920116SDave Chinner 	struct xfs_buf		*agbp,
282150920116SDave Chinner 	xfs_agblock_t		*bnop,
282250920116SDave Chinner 	int			btreeblk)
282330f712c9SDave Chinner {
28249798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
282550920116SDave Chinner 	struct xfs_buf		*agflbp;
282650920116SDave Chinner 	xfs_agblock_t		bno;
282730f712c9SDave Chinner 	__be32			*agfl_bno;
282830f712c9SDave Chinner 	int			error;
2829f53dde11SDave Chinner 	uint32_t		logflags;
283050920116SDave Chinner 	struct xfs_mount	*mp = tp->t_mountp;
283130f712c9SDave Chinner 
283230f712c9SDave Chinner 	/*
283330f712c9SDave Chinner 	 * Freelist is empty, give up.
283430f712c9SDave Chinner 	 */
283530f712c9SDave Chinner 	if (!agf->agf_flcount) {
283630f712c9SDave Chinner 		*bnop = NULLAGBLOCK;
283730f712c9SDave Chinner 		return 0;
283830f712c9SDave Chinner 	}
283930f712c9SDave Chinner 	/*
284030f712c9SDave Chinner 	 * Read the array of free blocks.
284130f712c9SDave Chinner 	 */
2842cec7bb7dSDave Chinner 	error = xfs_alloc_read_agfl(pag, tp, &agflbp);
284330f712c9SDave Chinner 	if (error)
284430f712c9SDave Chinner 		return error;
284530f712c9SDave Chinner 
284630f712c9SDave Chinner 
284730f712c9SDave Chinner 	/*
284830f712c9SDave Chinner 	 * Get the block number and update the data structures.
284930f712c9SDave Chinner 	 */
2850183606d8SChristoph Hellwig 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
285130f712c9SDave Chinner 	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
28523148ebf2SDave Chinner 	if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
28533148ebf2SDave Chinner 		return -EFSCORRUPTED;
28543148ebf2SDave Chinner 
285530f712c9SDave Chinner 	be32_add_cpu(&agf->agf_flfirst, 1);
285630f712c9SDave Chinner 	xfs_trans_brelse(tp, agflbp);
2857a78ee256SDave Chinner 	if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
285830f712c9SDave Chinner 		agf->agf_flfirst = 0;
285930f712c9SDave Chinner 
28607ac2ff8bSDave Chinner 	ASSERT(!xfs_perag_agfl_needs_reset(pag));
286130f712c9SDave Chinner 	be32_add_cpu(&agf->agf_flcount, -1);
286230f712c9SDave Chinner 	pag->pagf_flcount--;
286330f712c9SDave Chinner 
286430f712c9SDave Chinner 	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
286530f712c9SDave Chinner 	if (btreeblk) {
286630f712c9SDave Chinner 		be32_add_cpu(&agf->agf_btreeblks, 1);
286730f712c9SDave Chinner 		pag->pagf_btreeblks++;
286830f712c9SDave Chinner 		logflags |= XFS_AGF_BTREEBLKS;
286930f712c9SDave Chinner 	}
287030f712c9SDave Chinner 
287130f712c9SDave Chinner 	xfs_alloc_log_agf(tp, agbp, logflags);
287230f712c9SDave Chinner 	*bnop = bno;
287330f712c9SDave Chinner 
287430f712c9SDave Chinner 	return 0;
287530f712c9SDave Chinner }
287630f712c9SDave Chinner 
287730f712c9SDave Chinner /*
287830f712c9SDave Chinner  * Log the given fields from the agf structure.
287930f712c9SDave Chinner  */
288030f712c9SDave Chinner void
xfs_alloc_log_agf(struct xfs_trans * tp,struct xfs_buf * bp,uint32_t fields)288130f712c9SDave Chinner xfs_alloc_log_agf(
2882f53dde11SDave Chinner 	struct xfs_trans	*tp,
2883f53dde11SDave Chinner 	struct xfs_buf		*bp,
2884f53dde11SDave Chinner 	uint32_t		fields)
288530f712c9SDave Chinner {
288630f712c9SDave Chinner 	int	first;		/* first byte offset */
288730f712c9SDave Chinner 	int	last;		/* last byte offset */
288830f712c9SDave Chinner 	static const short	offsets[] = {
288930f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_magicnum),
289030f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_versionnum),
289130f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_seqno),
289230f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_length),
289330f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_roots[0]),
289430f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_levels[0]),
289530f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_flfirst),
289630f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_fllast),
289730f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_flcount),
289830f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_freeblks),
289930f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_longest),
290030f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_btreeblks),
290130f712c9SDave Chinner 		offsetof(xfs_agf_t, agf_uuid),
2902f32866fdSDarrick J. Wong 		offsetof(xfs_agf_t, agf_rmap_blocks),
2903bdf28630SDarrick J. Wong 		offsetof(xfs_agf_t, agf_refcount_blocks),
2904bdf28630SDarrick J. Wong 		offsetof(xfs_agf_t, agf_refcount_root),
2905bdf28630SDarrick J. Wong 		offsetof(xfs_agf_t, agf_refcount_level),
2906da1f039dSDarrick J. Wong 		/* needed so that we don't log the whole rest of the structure: */
2907da1f039dSDarrick J. Wong 		offsetof(xfs_agf_t, agf_spare64),
290830f712c9SDave Chinner 		sizeof(xfs_agf_t)
290930f712c9SDave Chinner 	};
291030f712c9SDave Chinner 
29119798f615SChristoph Hellwig 	trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
291230f712c9SDave Chinner 
291330f712c9SDave Chinner 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
291430f712c9SDave Chinner 
291530f712c9SDave Chinner 	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
291630f712c9SDave Chinner 	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
291730f712c9SDave Chinner }
291830f712c9SDave Chinner 
291930f712c9SDave Chinner /*
292030f712c9SDave Chinner  * Put the block on the freelist for the allocation group.
292130f712c9SDave Chinner  */
292250920116SDave Chinner int
xfs_alloc_put_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_buf * agflbp,xfs_agblock_t bno,int btreeblk)292330f712c9SDave Chinner xfs_alloc_put_freelist(
29248c392eb2SDave Chinner 	struct xfs_perag	*pag,
292550920116SDave Chinner 	struct xfs_trans	*tp,
292650920116SDave Chinner 	struct xfs_buf		*agbp,
292750920116SDave Chinner 	struct xfs_buf		*agflbp,
292850920116SDave Chinner 	xfs_agblock_t		bno,
292950920116SDave Chinner 	int			btreeblk)
293030f712c9SDave Chinner {
29319798f615SChristoph Hellwig 	struct xfs_mount	*mp = tp->t_mountp;
29329798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
293350920116SDave Chinner 	__be32			*blockp;
293430f712c9SDave Chinner 	int			error;
2935f53dde11SDave Chinner 	uint32_t		logflags;
293630f712c9SDave Chinner 	__be32			*agfl_bno;
293730f712c9SDave Chinner 	int			startoff;
293830f712c9SDave Chinner 
2939cec7bb7dSDave Chinner 	if (!agflbp) {
2940cec7bb7dSDave Chinner 		error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2941cec7bb7dSDave Chinner 		if (error)
294230f712c9SDave Chinner 			return error;
2943cec7bb7dSDave Chinner 	}
2944cec7bb7dSDave Chinner 
294530f712c9SDave Chinner 	be32_add_cpu(&agf->agf_fllast, 1);
2946a78ee256SDave Chinner 	if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
294730f712c9SDave Chinner 		agf->agf_fllast = 0;
294830f712c9SDave Chinner 
29497ac2ff8bSDave Chinner 	ASSERT(!xfs_perag_agfl_needs_reset(pag));
295030f712c9SDave Chinner 	be32_add_cpu(&agf->agf_flcount, 1);
295130f712c9SDave Chinner 	pag->pagf_flcount++;
295230f712c9SDave Chinner 
295330f712c9SDave Chinner 	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
295430f712c9SDave Chinner 	if (btreeblk) {
295530f712c9SDave Chinner 		be32_add_cpu(&agf->agf_btreeblks, -1);
295630f712c9SDave Chinner 		pag->pagf_btreeblks--;
295730f712c9SDave Chinner 		logflags |= XFS_AGF_BTREEBLKS;
295830f712c9SDave Chinner 	}
295930f712c9SDave Chinner 
296030f712c9SDave Chinner 	xfs_alloc_log_agf(tp, agbp, logflags);
296130f712c9SDave Chinner 
2962a78ee256SDave Chinner 	ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
296330f712c9SDave Chinner 
2964183606d8SChristoph Hellwig 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
296530f712c9SDave Chinner 	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
296630f712c9SDave Chinner 	*blockp = cpu_to_be32(bno);
296730f712c9SDave Chinner 	startoff = (char *)blockp - (char *)agflbp->b_addr;
296830f712c9SDave Chinner 
296930f712c9SDave Chinner 	xfs_alloc_log_agf(tp, agbp, logflags);
297030f712c9SDave Chinner 
297130f712c9SDave Chinner 	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
297230f712c9SDave Chinner 	xfs_trans_log_buf(tp, agflbp, startoff,
297330f712c9SDave Chinner 			  startoff + sizeof(xfs_agblock_t) - 1);
297430f712c9SDave Chinner 	return 0;
297530f712c9SDave Chinner }
297630f712c9SDave Chinner 
2977e0a8de7dSDave Chinner /*
29782d7d1e7eSDarrick J. Wong  * Check that this AGF/AGI header's sequence number and length matches the AG
29792d7d1e7eSDarrick J. Wong  * number and size in fsblocks.
29802d7d1e7eSDarrick J. Wong  */
29812d7d1e7eSDarrick J. Wong xfs_failaddr_t
xfs_validate_ag_length(struct xfs_buf * bp,uint32_t seqno,uint32_t length)29822d7d1e7eSDarrick J. Wong xfs_validate_ag_length(
29832d7d1e7eSDarrick J. Wong 	struct xfs_buf		*bp,
29842d7d1e7eSDarrick J. Wong 	uint32_t		seqno,
29852d7d1e7eSDarrick J. Wong 	uint32_t		length)
29862d7d1e7eSDarrick J. Wong {
29872d7d1e7eSDarrick J. Wong 	struct xfs_mount	*mp = bp->b_mount;
29882d7d1e7eSDarrick J. Wong 	/*
29892d7d1e7eSDarrick J. Wong 	 * During growfs operations, the perag is not fully initialised,
29902d7d1e7eSDarrick J. Wong 	 * so we can't use it for any useful checking. growfs ensures we can't
29912d7d1e7eSDarrick J. Wong 	 * use it by using uncached buffers that don't have the perag attached
29922d7d1e7eSDarrick J. Wong 	 * so we can detect and avoid this problem.
29932d7d1e7eSDarrick J. Wong 	 */
29942d7d1e7eSDarrick J. Wong 	if (bp->b_pag && seqno != bp->b_pag->pag_agno)
29952d7d1e7eSDarrick J. Wong 		return __this_address;
29962d7d1e7eSDarrick J. Wong 
29972d7d1e7eSDarrick J. Wong 	/*
29982d7d1e7eSDarrick J. Wong 	 * Only the last AG in the filesystem is allowed to be shorter
29992d7d1e7eSDarrick J. Wong 	 * than the AG size recorded in the superblock.
30002d7d1e7eSDarrick J. Wong 	 */
30012d7d1e7eSDarrick J. Wong 	if (length != mp->m_sb.sb_agblocks) {
30022d7d1e7eSDarrick J. Wong 		/*
30032d7d1e7eSDarrick J. Wong 		 * During growfs, the new last AG can get here before we
30042d7d1e7eSDarrick J. Wong 		 * have updated the superblock. Give it a pass on the seqno
30052d7d1e7eSDarrick J. Wong 		 * check.
30062d7d1e7eSDarrick J. Wong 		 */
30072d7d1e7eSDarrick J. Wong 		if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
30082d7d1e7eSDarrick J. Wong 			return __this_address;
30092d7d1e7eSDarrick J. Wong 		if (length < XFS_MIN_AG_BLOCKS)
30102d7d1e7eSDarrick J. Wong 			return __this_address;
30112d7d1e7eSDarrick J. Wong 		if (length > mp->m_sb.sb_agblocks)
30122d7d1e7eSDarrick J. Wong 			return __this_address;
30132d7d1e7eSDarrick J. Wong 	}
30142d7d1e7eSDarrick J. Wong 
30152d7d1e7eSDarrick J. Wong 	return NULL;
30162d7d1e7eSDarrick J. Wong }
30172d7d1e7eSDarrick J. Wong 
30182d7d1e7eSDarrick J. Wong /*
3019e0a8de7dSDave Chinner  * Verify the AGF is consistent.
3020e0a8de7dSDave Chinner  *
3021e0a8de7dSDave Chinner  * We do not verify the AGFL indexes in the AGF are fully consistent here
3022e0a8de7dSDave Chinner  * because of issues with variable on-disk structure sizes. Instead, we check
3023e0a8de7dSDave Chinner  * the agfl indexes for consistency when we initialise the perag from the AGF
3024e0a8de7dSDave Chinner  * information after a read completes.
3025e0a8de7dSDave Chinner  *
3026e0a8de7dSDave Chinner  * If the index is inconsistent, then we mark the perag as needing an AGFL
3027e0a8de7dSDave Chinner  * reset. The first AGFL update performed then resets the AGFL indexes and
3028e0a8de7dSDave Chinner  * refills the AGFL with known good free blocks, allowing the filesystem to
3029e0a8de7dSDave Chinner  * continue operating normally at the cost of a few leaked free space blocks.
3030e0a8de7dSDave Chinner  */
3031a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agf_verify(struct xfs_buf * bp)303230f712c9SDave Chinner xfs_agf_verify(
303330f712c9SDave Chinner 	struct xfs_buf		*bp)
303430f712c9SDave Chinner {
3035dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
30369798f615SChristoph Hellwig 	struct xfs_agf		*agf = bp->b_addr;
30372d7d1e7eSDarrick J. Wong 	xfs_failaddr_t		fa;
30382d7d1e7eSDarrick J. Wong 	uint32_t		agf_seqno = be32_to_cpu(agf->agf_seqno);
3039edd8276dSDave Chinner 	uint32_t		agf_length = be32_to_cpu(agf->agf_length);
304030f712c9SDave Chinner 
304138c26bfdSDave Chinner 	if (xfs_has_crc(mp)) {
3042a45086e2SBrian Foster 		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3043a6a781a5SDarrick J. Wong 			return __this_address;
30449798f615SChristoph Hellwig 		if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3045a6a781a5SDarrick J. Wong 			return __this_address;
3046a45086e2SBrian Foster 	}
304730f712c9SDave Chinner 
304839708c20SBrian Foster 	if (!xfs_verify_magic(bp, agf->agf_magicnum))
304939708c20SBrian Foster 		return __this_address;
305039708c20SBrian Foster 
3051edd8276dSDave Chinner 	if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3052a6a781a5SDarrick J. Wong 		return __this_address;
305330f712c9SDave Chinner 
3054edd8276dSDave Chinner 	/*
3055edd8276dSDave Chinner 	 * Both agf_seqno and agf_length need to validated before anything else
3056edd8276dSDave Chinner 	 * block number related in the AGF or AGFL can be checked.
3057edd8276dSDave Chinner 	 */
30582d7d1e7eSDarrick J. Wong 	fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
30592d7d1e7eSDarrick J. Wong 	if (fa)
30602d7d1e7eSDarrick J. Wong 		return fa;
3061edd8276dSDave Chinner 
3062edd8276dSDave Chinner 	if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3063edd8276dSDave Chinner 		return __this_address;
3064edd8276dSDave Chinner 	if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3065edd8276dSDave Chinner 		return __this_address;
3066edd8276dSDave Chinner 	if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3067d0c7feafSZheng Bin 		return __this_address;
3068d0c7feafSZheng Bin 
3069d0c7feafSZheng Bin 	if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3070edd8276dSDave Chinner 	    be32_to_cpu(agf->agf_freeblks) > agf_length)
3071d0c7feafSZheng Bin 		return __this_address;
3072d0c7feafSZheng Bin 
3073d2a047f3SDarrick J. Wong 	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3074d2a047f3SDarrick J. Wong 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
30757cb3efb4SDarrick J. Wong 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
30767cb3efb4SDarrick J. Wong 						mp->m_alloc_maxlevels ||
30777cb3efb4SDarrick J. Wong 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
30787cb3efb4SDarrick J. Wong 						mp->m_alloc_maxlevels)
3079a6a781a5SDarrick J. Wong 		return __this_address;
3080e1b05723SEric Sandeen 
3081ebd9027dSDave Chinner 	if (xfs_has_lazysbcount(mp) &&
3082edd8276dSDave Chinner 	    be32_to_cpu(agf->agf_btreeblks) > agf_length)
3083a6a781a5SDarrick J. Wong 		return __this_address;
308430f712c9SDave Chinner 
3085edd8276dSDave Chinner 	if (xfs_has_rmapbt(mp)) {
3086edd8276dSDave Chinner 		if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3087d0c7feafSZheng Bin 			return __this_address;
3088d0c7feafSZheng Bin 
3089edd8276dSDave Chinner 		if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3090edd8276dSDave Chinner 		    be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3091edd8276dSDave Chinner 							mp->m_rmap_maxlevels)
3092a6a781a5SDarrick J. Wong 			return __this_address;
3093edd8276dSDave Chinner 	}
3094edd8276dSDave Chinner 
3095edd8276dSDave Chinner 	if (xfs_has_reflink(mp)) {
3096edd8276dSDave Chinner 		if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3097edd8276dSDave Chinner 			return __this_address;
3098edd8276dSDave Chinner 
3099edd8276dSDave Chinner 		if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3100edd8276dSDave Chinner 		    be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3101edd8276dSDave Chinner 			return __this_address;
3102edd8276dSDave Chinner 	}
310346eeb521SDarrick J. Wong 
3104a6a781a5SDarrick J. Wong 	return NULL;
310530f712c9SDave Chinner }
310630f712c9SDave Chinner 
310730f712c9SDave Chinner static void
xfs_agf_read_verify(struct xfs_buf * bp)310830f712c9SDave Chinner xfs_agf_read_verify(
310930f712c9SDave Chinner 	struct xfs_buf	*bp)
311030f712c9SDave Chinner {
3111dbd329f1SChristoph Hellwig 	struct xfs_mount *mp = bp->b_mount;
3112bc1a09b8SDarrick J. Wong 	xfs_failaddr_t	fa;
311330f712c9SDave Chinner 
311438c26bfdSDave Chinner 	if (xfs_has_crc(mp) &&
311530f712c9SDave Chinner 	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3116bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3117bc1a09b8SDarrick J. Wong 	else {
3118b5572597SDarrick J. Wong 		fa = xfs_agf_verify(bp);
3119bc1a09b8SDarrick J. Wong 		if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3120bc1a09b8SDarrick J. Wong 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3121bc1a09b8SDarrick J. Wong 	}
312230f712c9SDave Chinner }
312330f712c9SDave Chinner 
312430f712c9SDave Chinner static void
xfs_agf_write_verify(struct xfs_buf * bp)312530f712c9SDave Chinner xfs_agf_write_verify(
312630f712c9SDave Chinner 	struct xfs_buf	*bp)
312730f712c9SDave Chinner {
3128dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
3129fb1755a6SCarlos Maiolino 	struct xfs_buf_log_item	*bip = bp->b_log_item;
31309798f615SChristoph Hellwig 	struct xfs_agf		*agf = bp->b_addr;
3131bc1a09b8SDarrick J. Wong 	xfs_failaddr_t		fa;
313230f712c9SDave Chinner 
3133b5572597SDarrick J. Wong 	fa = xfs_agf_verify(bp);
3134bc1a09b8SDarrick J. Wong 	if (fa) {
3135bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
313630f712c9SDave Chinner 		return;
313730f712c9SDave Chinner 	}
313830f712c9SDave Chinner 
313938c26bfdSDave Chinner 	if (!xfs_has_crc(mp))
314030f712c9SDave Chinner 		return;
314130f712c9SDave Chinner 
314230f712c9SDave Chinner 	if (bip)
31439798f615SChristoph Hellwig 		agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
314430f712c9SDave Chinner 
314530f712c9SDave Chinner 	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
314630f712c9SDave Chinner }
314730f712c9SDave Chinner 
314830f712c9SDave Chinner const struct xfs_buf_ops xfs_agf_buf_ops = {
3149233135b7SEric Sandeen 	.name = "xfs_agf",
315039708c20SBrian Foster 	.magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
315130f712c9SDave Chinner 	.verify_read = xfs_agf_read_verify,
315230f712c9SDave Chinner 	.verify_write = xfs_agf_write_verify,
3153b5572597SDarrick J. Wong 	.verify_struct = xfs_agf_verify,
315430f712c9SDave Chinner };
315530f712c9SDave Chinner 
315630f712c9SDave Chinner /*
315730f712c9SDave Chinner  * Read in the allocation group header (free/alloc section).
315830f712c9SDave Chinner  */
3159fa044ae7SDave Chinner int
xfs_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)316030f712c9SDave Chinner xfs_read_agf(
3161fa044ae7SDave Chinner 	struct xfs_perag	*pag,
3162fa044ae7SDave Chinner 	struct xfs_trans	*tp,
3163fa044ae7SDave Chinner 	int			flags,
3164fa044ae7SDave Chinner 	struct xfs_buf		**agfbpp)
316530f712c9SDave Chinner {
3166fa044ae7SDave Chinner 	struct xfs_mount	*mp = pag->pag_mount;
316730f712c9SDave Chinner 	int			error;
316830f712c9SDave Chinner 
3169fa044ae7SDave Chinner 	trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
317030f712c9SDave Chinner 
31714ed8e27bSDarrick J. Wong 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3172fa044ae7SDave Chinner 			XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3173fa044ae7SDave Chinner 			XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
317430f712c9SDave Chinner 	if (error)
317530f712c9SDave Chinner 		return error;
317630f712c9SDave Chinner 
3177fa044ae7SDave Chinner 	xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
317830f712c9SDave Chinner 	return 0;
317930f712c9SDave Chinner }
318030f712c9SDave Chinner 
318130f712c9SDave Chinner /*
318276b47e52SDave Chinner  * Read in the allocation group header (free/alloc section) and initialise the
318376b47e52SDave Chinner  * perag structure if necessary. If the caller provides @agfbpp, then return the
318476b47e52SDave Chinner  * locked buffer to the caller, otherwise free it.
318530f712c9SDave Chinner  */
318608d3e84fSDave Chinner int
xfs_alloc_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)318730f712c9SDave Chinner xfs_alloc_read_agf(
318808d3e84fSDave Chinner 	struct xfs_perag	*pag,
318908d3e84fSDave Chinner 	struct xfs_trans	*tp,
319008d3e84fSDave Chinner 	int			flags,
319176b47e52SDave Chinner 	struct xfs_buf		**agfbpp)
319230f712c9SDave Chinner {
319376b47e52SDave Chinner 	struct xfs_buf		*agfbp;
319408d3e84fSDave Chinner 	struct xfs_agf		*agf;
319530f712c9SDave Chinner 	int			error;
319616eaab83SBrian Foster 	int			allocbt_blks;
319730f712c9SDave Chinner 
319808d3e84fSDave Chinner 	trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
319930f712c9SDave Chinner 
3200f48e2df8SDarrick J. Wong 	/* We don't support trylock when freeing. */
3201f48e2df8SDarrick J. Wong 	ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3202f48e2df8SDarrick J. Wong 			(XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3203fa044ae7SDave Chinner 	error = xfs_read_agf(pag, tp,
320430f712c9SDave Chinner 			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
320576b47e52SDave Chinner 			&agfbp);
320630f712c9SDave Chinner 	if (error)
320730f712c9SDave Chinner 		return error;
320830f712c9SDave Chinner 
320976b47e52SDave Chinner 	agf = agfbp->b_addr;
32107ac2ff8bSDave Chinner 	if (!xfs_perag_initialised_agf(pag)) {
321130f712c9SDave Chinner 		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
321230f712c9SDave Chinner 		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
321330f712c9SDave Chinner 		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
321430f712c9SDave Chinner 		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
321530f712c9SDave Chinner 		pag->pagf_levels[XFS_BTNUM_BNOi] =
321630f712c9SDave Chinner 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
321730f712c9SDave Chinner 		pag->pagf_levels[XFS_BTNUM_CNTi] =
321830f712c9SDave Chinner 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3219b8704944SDarrick J. Wong 		pag->pagf_levels[XFS_BTNUM_RMAPi] =
3220b8704944SDarrick J. Wong 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
322146eeb521SDarrick J. Wong 		pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
32227ac2ff8bSDave Chinner 		if (xfs_agfl_needs_reset(pag->pag_mount, agf))
32237ac2ff8bSDave Chinner 			set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3224e2e63b07SDarrick J. Wong 		else
3225e2e63b07SDarrick J. Wong 			clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
322616eaab83SBrian Foster 
322716eaab83SBrian Foster 		/*
322816eaab83SBrian Foster 		 * Update the in-core allocbt counter. Filter out the rmapbt
322916eaab83SBrian Foster 		 * subset of the btreeblks counter because the rmapbt is managed
323016eaab83SBrian Foster 		 * by perag reservation. Subtract one for the rmapbt root block
323116eaab83SBrian Foster 		 * because the rmap counter includes it while the btreeblks
323216eaab83SBrian Foster 		 * counter only tracks non-root blocks.
323316eaab83SBrian Foster 		 */
323416eaab83SBrian Foster 		allocbt_blks = pag->pagf_btreeblks;
323508d3e84fSDave Chinner 		if (xfs_has_rmapbt(pag->pag_mount))
323616eaab83SBrian Foster 			allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
323716eaab83SBrian Foster 		if (allocbt_blks > 0)
323808d3e84fSDave Chinner 			atomic64_add(allocbt_blks,
323908d3e84fSDave Chinner 					&pag->pag_mount->m_allocbt_blks);
32407ac2ff8bSDave Chinner 
32417ac2ff8bSDave Chinner 		set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
324230f712c9SDave Chinner 	}
324330f712c9SDave Chinner #ifdef DEBUG
324408d3e84fSDave Chinner 	else if (!xfs_is_shutdown(pag->pag_mount)) {
324530f712c9SDave Chinner 		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
324630f712c9SDave Chinner 		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
324730f712c9SDave Chinner 		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
324830f712c9SDave Chinner 		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
324930f712c9SDave Chinner 		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
325030f712c9SDave Chinner 		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
325130f712c9SDave Chinner 		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
325230f712c9SDave Chinner 		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
325330f712c9SDave Chinner 	}
325430f712c9SDave Chinner #endif
325576b47e52SDave Chinner 	if (agfbpp)
325676b47e52SDave Chinner 		*agfbpp = agfbp;
325776b47e52SDave Chinner 	else
325876b47e52SDave Chinner 		xfs_trans_brelse(tp, agfbp);
325930f712c9SDave Chinner 	return 0;
326030f712c9SDave Chinner }
326130f712c9SDave Chinner 
326230f712c9SDave Chinner /*
3263ecd788a9SDave Chinner  * Pre-proces allocation arguments to set initial state that we don't require
3264ecd788a9SDave Chinner  * callers to set up correctly, as well as bounds check the allocation args
3265ecd788a9SDave Chinner  * that are set up.
326630f712c9SDave Chinner  */
3267ecd788a9SDave Chinner static int
xfs_alloc_vextent_check_args(struct xfs_alloc_arg * args,xfs_fsblock_t target,xfs_agnumber_t * minimum_agno)3268ecd788a9SDave Chinner xfs_alloc_vextent_check_args(
3269319c9e87SDave Chinner 	struct xfs_alloc_arg	*args,
32708b813568SDave Chinner 	xfs_fsblock_t		target,
32718b813568SDave Chinner 	xfs_agnumber_t		*minimum_agno)
327230f712c9SDave Chinner {
3273ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
3274ecd788a9SDave Chinner 	xfs_agblock_t		agsize;
327530f712c9SDave Chinner 
3276230e8fe8SDave Chinner 	args->fsbno = NULLFSBLOCK;
3277ecd788a9SDave Chinner 
32788b813568SDave Chinner 	*minimum_agno = 0;
32798b813568SDave Chinner 	if (args->tp->t_highest_agno != NULLAGNUMBER)
32808b813568SDave Chinner 		*minimum_agno = args->tp->t_highest_agno;
32818b813568SDave Chinner 
328230f712c9SDave Chinner 	/*
328330f712c9SDave Chinner 	 * Just fix this up, for the case where the last a.g. is shorter
328430f712c9SDave Chinner 	 * (or there's only one a.g.) and the caller couldn't easily figure
328530f712c9SDave Chinner 	 * that out (xfs_bmap_alloc).
328630f712c9SDave Chinner 	 */
328730f712c9SDave Chinner 	agsize = mp->m_sb.sb_agblocks;
328830f712c9SDave Chinner 	if (args->maxlen > agsize)
328930f712c9SDave Chinner 		args->maxlen = agsize;
329030f712c9SDave Chinner 	if (args->alignment == 0)
329130f712c9SDave Chinner 		args->alignment = 1;
329274b9aa63SDave Chinner 
329374b9aa63SDave Chinner 	ASSERT(args->minlen > 0);
329474b9aa63SDave Chinner 	ASSERT(args->maxlen > 0);
329574b9aa63SDave Chinner 	ASSERT(args->alignment > 0);
329674b9aa63SDave Chinner 	ASSERT(args->resv != XFS_AG_RESV_AGFL);
329774b9aa63SDave Chinner 
3298319c9e87SDave Chinner 	ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3299319c9e87SDave Chinner 	ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
330030f712c9SDave Chinner 	ASSERT(args->minlen <= args->maxlen);
330130f712c9SDave Chinner 	ASSERT(args->minlen <= agsize);
330230f712c9SDave Chinner 	ASSERT(args->mod < args->prod);
330374b9aa63SDave Chinner 
3304319c9e87SDave Chinner 	if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3305319c9e87SDave Chinner 	    XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
330630f712c9SDave Chinner 	    args->minlen > args->maxlen || args->minlen > agsize ||
330730f712c9SDave Chinner 	    args->mod >= args->prod) {
330830f712c9SDave Chinner 		trace_xfs_alloc_vextent_badargs(args);
3309ecd788a9SDave Chinner 		return -ENOSPC;
3310ecd788a9SDave Chinner 	}
33118b813568SDave Chinner 
33128b813568SDave Chinner 	if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
33138b813568SDave Chinner 		trace_xfs_alloc_vextent_skip_deadlock(args);
33148b813568SDave Chinner 		return -ENOSPC;
33158b813568SDave Chinner 	}
331630f712c9SDave Chinner 	return 0;
33178b813568SDave Chinner 
331830f712c9SDave Chinner }
331930f712c9SDave Chinner 
332030f712c9SDave Chinner /*
332174b9aa63SDave Chinner  * Prepare an AG for allocation. If the AG is not prepared to accept the
332274b9aa63SDave Chinner  * allocation, return failure.
332374b9aa63SDave Chinner  *
332474b9aa63SDave Chinner  * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
332574b9aa63SDave Chinner  * modified to hold their own perag references.
332674b9aa63SDave Chinner  */
332774b9aa63SDave Chinner static int
xfs_alloc_vextent_prepare_ag(struct xfs_alloc_arg * args,uint32_t alloc_flags)332874b9aa63SDave Chinner xfs_alloc_vextent_prepare_ag(
332900dcd17cSDave Chinner 	struct xfs_alloc_arg	*args,
33306a2a9d77SDave Chinner 	uint32_t		alloc_flags)
333174b9aa63SDave Chinner {
333274b9aa63SDave Chinner 	bool			need_pag = !args->pag;
333374b9aa63SDave Chinner 	int			error;
333474b9aa63SDave Chinner 
333574b9aa63SDave Chinner 	if (need_pag)
333674b9aa63SDave Chinner 		args->pag = xfs_perag_get(args->mp, args->agno);
333774b9aa63SDave Chinner 
33383432ef61SDave Chinner 	args->agbp = NULL;
33396a2a9d77SDave Chinner 	error = xfs_alloc_fix_freelist(args, alloc_flags);
334074b9aa63SDave Chinner 	if (error) {
334174b9aa63SDave Chinner 		trace_xfs_alloc_vextent_nofix(args);
334274b9aa63SDave Chinner 		if (need_pag)
334374b9aa63SDave Chinner 			xfs_perag_put(args->pag);
334474b9aa63SDave Chinner 		args->agbno = NULLAGBLOCK;
334574b9aa63SDave Chinner 		return error;
334674b9aa63SDave Chinner 	}
334774b9aa63SDave Chinner 	if (!args->agbp) {
334874b9aa63SDave Chinner 		/* cannot allocate in this AG at all */
334974b9aa63SDave Chinner 		trace_xfs_alloc_vextent_noagbp(args);
335074b9aa63SDave Chinner 		args->agbno = NULLAGBLOCK;
335174b9aa63SDave Chinner 		return 0;
335274b9aa63SDave Chinner 	}
335374b9aa63SDave Chinner 	args->wasfromfl = 0;
335474b9aa63SDave Chinner 	return 0;
335574b9aa63SDave Chinner }
335674b9aa63SDave Chinner 
335774b9aa63SDave Chinner /*
3358e4d17426SDave Chinner  * Post-process allocation results to account for the allocation if it succeed
3359e4d17426SDave Chinner  * and set the allocated block number correctly for the caller.
3360ecd788a9SDave Chinner  *
3361e4d17426SDave Chinner  * XXX: we should really be returning ENOSPC for ENOSPC, not
3362ecd788a9SDave Chinner  * hiding it behind a "successful" NULLFSBLOCK allocation.
336330f712c9SDave Chinner  */
3364e4d17426SDave Chinner static int
xfs_alloc_vextent_finish(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,int alloc_error,bool drop_perag)3365e4d17426SDave Chinner xfs_alloc_vextent_finish(
3366ecd788a9SDave Chinner 	struct xfs_alloc_arg	*args,
3367e4d17426SDave Chinner 	xfs_agnumber_t		minimum_agno,
3368e4d17426SDave Chinner 	int			alloc_error,
3369e4d17426SDave Chinner 	bool			drop_perag)
3370ecd788a9SDave Chinner {
3371ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
3372e4d17426SDave Chinner 	int			error = 0;
33731dd0510fSDave Chinner 
3374ecd788a9SDave Chinner 	/*
3375ecd788a9SDave Chinner 	 * We can end up here with a locked AGF. If we failed, the caller is
3376ecd788a9SDave Chinner 	 * likely going to try to allocate again with different parameters, and
3377ecd788a9SDave Chinner 	 * that can widen the AGs that are searched for free space. If we have
3378ecd788a9SDave Chinner 	 * to do BMBT block allocation, we have to do a new allocation.
3379ecd788a9SDave Chinner 	 *
3380ecd788a9SDave Chinner 	 * Hence leaving this function with the AGF locked opens up potential
3381ecd788a9SDave Chinner 	 * ABBA AGF deadlocks because a future allocation attempt in this
3382ecd788a9SDave Chinner 	 * transaction may attempt to lock a lower number AGF.
3383ecd788a9SDave Chinner 	 *
3384ecd788a9SDave Chinner 	 * We can't release the AGF until the transaction is commited, so at
3385ecd788a9SDave Chinner 	 * this point we must update the "first allocation" tracker to point at
3386ecd788a9SDave Chinner 	 * this AG if the tracker is empty or points to a lower AG. This allows
3387ecd788a9SDave Chinner 	 * the next allocation attempt to be modified appropriately to avoid
3388ecd788a9SDave Chinner 	 * deadlocks.
3389ecd788a9SDave Chinner 	 */
3390ecd788a9SDave Chinner 	if (args->agbp &&
3391ecd788a9SDave Chinner 	    (args->tp->t_highest_agno == NULLAGNUMBER ||
3392ecd788a9SDave Chinner 	     args->agno > minimum_agno))
3393ecd788a9SDave Chinner 		args->tp->t_highest_agno = args->agno;
3394ecd788a9SDave Chinner 
3395e4d17426SDave Chinner 	/*
3396e4d17426SDave Chinner 	 * If the allocation failed with an error or we had an ENOSPC result,
3397e4d17426SDave Chinner 	 * preserve the returned error whilst also marking the allocation result
3398e4d17426SDave Chinner 	 * as "no extent allocated". This ensures that callers that fail to
3399e4d17426SDave Chinner 	 * capture the error will still treat it as a failed allocation.
3400e4d17426SDave Chinner 	 */
3401e4d17426SDave Chinner 	if (alloc_error || args->agbno == NULLAGBLOCK) {
3402ecd788a9SDave Chinner 		args->fsbno = NULLFSBLOCK;
3403e4d17426SDave Chinner 		error = alloc_error;
3404e4d17426SDave Chinner 		goto out_drop_perag;
34051dd0510fSDave Chinner 	}
34061dd0510fSDave Chinner 
3407ecd788a9SDave Chinner 	args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3408e4d17426SDave Chinner 
3409ecd788a9SDave Chinner 	ASSERT(args->len >= args->minlen);
3410ecd788a9SDave Chinner 	ASSERT(args->len <= args->maxlen);
3411ecd788a9SDave Chinner 	ASSERT(args->agbno % args->alignment == 0);
3412ecd788a9SDave Chinner 	XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3413e4d17426SDave Chinner 
3414e4d17426SDave Chinner 	/* if not file data, insert new block into the reverse map btree */
3415e4d17426SDave Chinner 	if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3416e4d17426SDave Chinner 		error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3417e4d17426SDave Chinner 				       args->agbno, args->len, &args->oinfo);
3418e4d17426SDave Chinner 		if (error)
3419e4d17426SDave Chinner 			goto out_drop_perag;
3420e4d17426SDave Chinner 	}
3421e4d17426SDave Chinner 
3422e4d17426SDave Chinner 	if (!args->wasfromfl) {
3423e4d17426SDave Chinner 		error = xfs_alloc_update_counters(args->tp, args->agbp,
3424e4d17426SDave Chinner 						  -((long)(args->len)));
3425e4d17426SDave Chinner 		if (error)
3426e4d17426SDave Chinner 			goto out_drop_perag;
3427e4d17426SDave Chinner 
3428e4d17426SDave Chinner 		ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3429e4d17426SDave Chinner 				args->len));
3430e4d17426SDave Chinner 	}
3431e4d17426SDave Chinner 
3432e4d17426SDave Chinner 	xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3433e4d17426SDave Chinner 
3434e4d17426SDave Chinner 	XFS_STATS_INC(mp, xs_allocx);
3435e4d17426SDave Chinner 	XFS_STATS_ADD(mp, xs_allocb, args->len);
3436e4d17426SDave Chinner 
3437e6fbb716SDarrick J. Wong 	trace_xfs_alloc_vextent_finish(args);
3438e6fbb716SDarrick J. Wong 
3439e4d17426SDave Chinner out_drop_perag:
34403432ef61SDave Chinner 	if (drop_perag && args->pag) {
34413432ef61SDave Chinner 		xfs_perag_rele(args->pag);
3442e4d17426SDave Chinner 		args->pag = NULL;
3443e4d17426SDave Chinner 	}
3444e4d17426SDave Chinner 	return error;
3445ecd788a9SDave Chinner }
3446ecd788a9SDave Chinner 
3447ecd788a9SDave Chinner /*
3448230e8fe8SDave Chinner  * Allocate within a single AG only. This uses a best-fit length algorithm so if
3449230e8fe8SDave Chinner  * you need an exact sized allocation without locality constraints, this is the
3450230e8fe8SDave Chinner  * fastest way to do it.
3451230e8fe8SDave Chinner  *
3452230e8fe8SDave Chinner  * Caller is expected to hold a perag reference in args->pag.
3453ecd788a9SDave Chinner  */
345474c36a86SDave Chinner int
xfs_alloc_vextent_this_ag(struct xfs_alloc_arg * args,xfs_agnumber_t agno)3455ecd788a9SDave Chinner xfs_alloc_vextent_this_ag(
34565f36b2ceSDave Chinner 	struct xfs_alloc_arg	*args,
34575f36b2ceSDave Chinner 	xfs_agnumber_t		agno)
3458ecd788a9SDave Chinner {
3459ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
34608b813568SDave Chinner 	xfs_agnumber_t		minimum_agno;
34616a2a9d77SDave Chinner 	uint32_t		alloc_flags = 0;
3462ecd788a9SDave Chinner 	int			error;
3463ecd788a9SDave Chinner 
34646de4b1abSDarrick J. Wong 	ASSERT(args->pag != NULL);
34656de4b1abSDarrick J. Wong 	ASSERT(args->pag->pag_agno == agno);
34666de4b1abSDarrick J. Wong 
34678b813568SDave Chinner 	args->agno = agno;
34688b813568SDave Chinner 	args->agbno = 0;
3469e6fbb716SDarrick J. Wong 
3470e6fbb716SDarrick J. Wong 	trace_xfs_alloc_vextent_this_ag(args);
3471e6fbb716SDarrick J. Wong 
34728b813568SDave Chinner 	error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
34738b813568SDave Chinner 			&minimum_agno);
3474ecd788a9SDave Chinner 	if (error) {
3475ecd788a9SDave Chinner 		if (error == -ENOSPC)
3476ecd788a9SDave Chinner 			return 0;
3477ecd788a9SDave Chinner 		return error;
3478ecd788a9SDave Chinner 	}
3479ecd788a9SDave Chinner 
34806a2a9d77SDave Chinner 	error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
348174b9aa63SDave Chinner 	if (!error && args->agbp)
34826a2a9d77SDave Chinner 		error = xfs_alloc_ag_vextent_size(args, alloc_flags);
348374b9aa63SDave Chinner 
3484e4d17426SDave Chinner 	return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
348530f712c9SDave Chinner }
3486ecd788a9SDave Chinner 
348730f712c9SDave Chinner /*
3488ecd788a9SDave Chinner  * Iterate all AGs trying to allocate an extent starting from @start_ag.
3489ecd788a9SDave Chinner  *
3490ecd788a9SDave Chinner  * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3491ecd788a9SDave Chinner  * allocation attempts in @start_agno have locality information. If we fail to
3492ecd788a9SDave Chinner  * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3493ecd788a9SDave Chinner  * we attempt to allocation in as there is no locality optimisation possible for
3494ecd788a9SDave Chinner  * those allocations.
3495ecd788a9SDave Chinner  *
34963432ef61SDave Chinner  * On return, args->pag may be left referenced if we finish before the "all
34973432ef61SDave Chinner  * failed" return point. The allocation finish still needs the perag, and
34983432ef61SDave Chinner  * so the caller will release it once they've finished the allocation.
34993432ef61SDave Chinner  *
3500ecd788a9SDave Chinner  * When we wrap the AG iteration at the end of the filesystem, we have to be
3501ecd788a9SDave Chinner  * careful not to wrap into AGs below ones we already have locked in the
3502ecd788a9SDave Chinner  * transaction if we are doing a blocking iteration. This will result in an
3503ecd788a9SDave Chinner  * out-of-order locking of AGFs and hence can cause deadlocks.
350430f712c9SDave Chinner  */
3505ecd788a9SDave Chinner static int
xfs_alloc_vextent_iterate_ags(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,xfs_agnumber_t start_agno,xfs_agblock_t target_agbno,uint32_t alloc_flags)3506ecd788a9SDave Chinner xfs_alloc_vextent_iterate_ags(
3507ecd788a9SDave Chinner 	struct xfs_alloc_arg	*args,
3508ecd788a9SDave Chinner 	xfs_agnumber_t		minimum_agno,
3509ecd788a9SDave Chinner 	xfs_agnumber_t		start_agno,
3510230e8fe8SDave Chinner 	xfs_agblock_t		target_agbno,
35116a2a9d77SDave Chinner 	uint32_t		alloc_flags)
3512ecd788a9SDave Chinner {
3513ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
35149eb77596SDarrick J. Wong 	xfs_agnumber_t		restart_agno = minimum_agno;
35153432ef61SDave Chinner 	xfs_agnumber_t		agno;
3516ecd788a9SDave Chinner 	int			error = 0;
3517ecd788a9SDave Chinner 
35186a2a9d77SDave Chinner 	if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
35199eb77596SDarrick J. Wong 		restart_agno = 0;
35203432ef61SDave Chinner restart:
35219eb77596SDarrick J. Wong 	for_each_perag_wrap_range(mp, start_agno, restart_agno,
35223432ef61SDave Chinner 			mp->m_sb.sb_agcount, agno, args->pag) {
35233432ef61SDave Chinner 		args->agno = agno;
35246a2a9d77SDave Chinner 		error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
352574b9aa63SDave Chinner 		if (error)
352674b9aa63SDave Chinner 			break;
35273432ef61SDave Chinner 		if (!args->agbp) {
35283432ef61SDave Chinner 			trace_xfs_alloc_vextent_loopfailed(args);
35293432ef61SDave Chinner 			continue;
35303432ef61SDave Chinner 		}
353174b9aa63SDave Chinner 
353274b9aa63SDave Chinner 		/*
35333432ef61SDave Chinner 		 * Allocation is supposed to succeed now, so break out of the
35343432ef61SDave Chinner 		 * loop regardless of whether we succeed or not.
353574b9aa63SDave Chinner 		 */
3536230e8fe8SDave Chinner 		if (args->agno == start_agno && target_agbno) {
3537230e8fe8SDave Chinner 			args->agbno = target_agbno;
35386a2a9d77SDave Chinner 			error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3539230e8fe8SDave Chinner 		} else {
3540230e8fe8SDave Chinner 			args->agbno = 0;
35416a2a9d77SDave Chinner 			error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3542230e8fe8SDave Chinner 		}
3543ecd788a9SDave Chinner 		break;
354430f712c9SDave Chinner 	}
35453432ef61SDave Chinner 	if (error) {
35463432ef61SDave Chinner 		xfs_perag_rele(args->pag);
3547ecd788a9SDave Chinner 		args->pag = NULL;
3548ecd788a9SDave Chinner 		return error;
3549ecd788a9SDave Chinner 	}
35503432ef61SDave Chinner 	if (args->agbp)
35513432ef61SDave Chinner 		return 0;
35523432ef61SDave Chinner 
35533432ef61SDave Chinner 	/*
35543432ef61SDave Chinner 	 * We didn't find an AG we can alloation from. If we were given
35553432ef61SDave Chinner 	 * constraining flags by the caller, drop them and retry the allocation
35563432ef61SDave Chinner 	 * without any constraints being set.
35573432ef61SDave Chinner 	 */
35586a2a9d77SDave Chinner 	if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
35596a2a9d77SDave Chinner 		alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
35609eb77596SDarrick J. Wong 		restart_agno = minimum_agno;
35613432ef61SDave Chinner 		goto restart;
35623432ef61SDave Chinner 	}
35633432ef61SDave Chinner 
35643432ef61SDave Chinner 	ASSERT(args->pag == NULL);
35653432ef61SDave Chinner 	trace_xfs_alloc_vextent_allfailed(args);
35663432ef61SDave Chinner 	return 0;
35673432ef61SDave Chinner }
3568ecd788a9SDave Chinner 
3569ecd788a9SDave Chinner /*
3570ecd788a9SDave Chinner  * Iterate from the AGs from the start AG to the end of the filesystem, trying
3571ecd788a9SDave Chinner  * to allocate blocks. It starts with a near allocation attempt in the initial
3572ecd788a9SDave Chinner  * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3573ecd788a9SDave Chinner  * back to zero if allowed by previous allocations in this transaction,
3574ecd788a9SDave Chinner  * otherwise will wrap back to the start AG and run a second blocking pass to
3575ecd788a9SDave Chinner  * the end of the filesystem.
3576ecd788a9SDave Chinner  */
35772a7f6d41SDave Chinner int
xfs_alloc_vextent_start_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3578ecd788a9SDave Chinner xfs_alloc_vextent_start_ag(
3579ecd788a9SDave Chinner 	struct xfs_alloc_arg	*args,
35802a7f6d41SDave Chinner 	xfs_fsblock_t		target)
3581ecd788a9SDave Chinner {
3582ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
35838b813568SDave Chinner 	xfs_agnumber_t		minimum_agno;
3584ecd788a9SDave Chinner 	xfs_agnumber_t		start_agno;
3585ecd788a9SDave Chinner 	xfs_agnumber_t		rotorstep = xfs_rotorstep;
3586ecd788a9SDave Chinner 	bool			bump_rotor = false;
35876a2a9d77SDave Chinner 	uint32_t		alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3588ecd788a9SDave Chinner 	int			error;
3589ecd788a9SDave Chinner 
35906de4b1abSDarrick J. Wong 	ASSERT(args->pag == NULL);
35916de4b1abSDarrick J. Wong 
35928b813568SDave Chinner 	args->agno = NULLAGNUMBER;
35938b813568SDave Chinner 	args->agbno = NULLAGBLOCK;
3594e6fbb716SDarrick J. Wong 
35954dfb02d5SDarrick J. Wong 	trace_xfs_alloc_vextent_start_ag(args);
3596e6fbb716SDarrick J. Wong 
35978b813568SDave Chinner 	error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3598ecd788a9SDave Chinner 	if (error) {
3599ecd788a9SDave Chinner 		if (error == -ENOSPC)
3600ecd788a9SDave Chinner 			return 0;
3601ecd788a9SDave Chinner 		return error;
3602ecd788a9SDave Chinner 	}
3603ecd788a9SDave Chinner 
3604ecd788a9SDave Chinner 	if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3605ecd788a9SDave Chinner 	    xfs_is_inode32(mp)) {
36062a7f6d41SDave Chinner 		target = XFS_AGB_TO_FSB(mp,
3607ecd788a9SDave Chinner 				((mp->m_agfrotor / rotorstep) %
3608ecd788a9SDave Chinner 				mp->m_sb.sb_agcount), 0);
3609ecd788a9SDave Chinner 		bump_rotor = 1;
3610ecd788a9SDave Chinner 	}
3611ecd788a9SDave Chinner 
3612230e8fe8SDave Chinner 	start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3613ecd788a9SDave Chinner 	error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36146a2a9d77SDave Chinner 			XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3615230e8fe8SDave Chinner 
36168d242e93SChristoph Hellwig 	if (bump_rotor) {
3617ecd788a9SDave Chinner 		if (args->agno == start_agno)
361830f712c9SDave Chinner 			mp->m_agfrotor = (mp->m_agfrotor + 1) %
361930f712c9SDave Chinner 				(mp->m_sb.sb_agcount * rotorstep);
362030f712c9SDave Chinner 		else
362130f712c9SDave Chinner 			mp->m_agfrotor = (args->agno * rotorstep + 1) %
362230f712c9SDave Chinner 				(mp->m_sb.sb_agcount * rotorstep);
362330f712c9SDave Chinner 	}
3624ecd788a9SDave Chinner 
3625e4d17426SDave Chinner 	return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3626ecd788a9SDave Chinner }
3627ecd788a9SDave Chinner 
3628ecd788a9SDave Chinner /*
3629230e8fe8SDave Chinner  * Iterate from the agno indicated via @target through to the end of the
3630ecd788a9SDave Chinner  * filesystem attempting blocking allocation. This does not wrap or try a second
3631230e8fe8SDave Chinner  * pass, so will not recurse into AGs lower than indicated by the target.
3632ecd788a9SDave Chinner  */
3633319c9e87SDave Chinner int
xfs_alloc_vextent_first_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3634ecd788a9SDave Chinner xfs_alloc_vextent_first_ag(
3635ecd788a9SDave Chinner 	struct xfs_alloc_arg	*args,
3636319c9e87SDave Chinner 	xfs_fsblock_t		target)
3637ecd788a9SDave Chinner  {
3638ecd788a9SDave Chinner 	struct xfs_mount	*mp = args->mp;
36398b813568SDave Chinner 	xfs_agnumber_t		minimum_agno;
3640ecd788a9SDave Chinner 	xfs_agnumber_t		start_agno;
36416a2a9d77SDave Chinner 	uint32_t		alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3642ecd788a9SDave Chinner 	int			error;
3643ecd788a9SDave Chinner 
36446de4b1abSDarrick J. Wong 	ASSERT(args->pag == NULL);
36456de4b1abSDarrick J. Wong 
36468b813568SDave Chinner 	args->agno = NULLAGNUMBER;
36478b813568SDave Chinner 	args->agbno = NULLAGBLOCK;
3648e6fbb716SDarrick J. Wong 
36494dfb02d5SDarrick J. Wong 	trace_xfs_alloc_vextent_first_ag(args);
3650e6fbb716SDarrick J. Wong 
36518b813568SDave Chinner 	error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3652ecd788a9SDave Chinner 	if (error) {
3653ecd788a9SDave Chinner 		if (error == -ENOSPC)
3654ecd788a9SDave Chinner 			return 0;
3655ecd788a9SDave Chinner 		return error;
3656ecd788a9SDave Chinner 	}
3657ecd788a9SDave Chinner 
3658319c9e87SDave Chinner 	start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3659230e8fe8SDave Chinner 	error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36606a2a9d77SDave Chinner 			XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3661e4d17426SDave Chinner 	return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3662ecd788a9SDave Chinner }
3663ecd788a9SDave Chinner 
3664ecd788a9SDave Chinner /*
366574b9aa63SDave Chinner  * Allocate at the exact block target or fail. Caller is expected to hold a
366674b9aa63SDave Chinner  * perag reference in args->pag.
36675f36b2ceSDave Chinner  */
36685f36b2ceSDave Chinner int
xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)36695f36b2ceSDave Chinner xfs_alloc_vextent_exact_bno(
36705f36b2ceSDave Chinner 	struct xfs_alloc_arg	*args,
36715f36b2ceSDave Chinner 	xfs_fsblock_t		target)
36725f36b2ceSDave Chinner {
36735f36b2ceSDave Chinner 	struct xfs_mount	*mp = args->mp;
36748b813568SDave Chinner 	xfs_agnumber_t		minimum_agno;
36755f36b2ceSDave Chinner 	int			error;
36765f36b2ceSDave Chinner 
36776de4b1abSDarrick J. Wong 	ASSERT(args->pag != NULL);
36786de4b1abSDarrick J. Wong 	ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
36796de4b1abSDarrick J. Wong 
36808b813568SDave Chinner 	args->agno = XFS_FSB_TO_AGNO(mp, target);
36818b813568SDave Chinner 	args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3682e6fbb716SDarrick J. Wong 
36834dfb02d5SDarrick J. Wong 	trace_xfs_alloc_vextent_exact_bno(args);
3684e6fbb716SDarrick J. Wong 
36858b813568SDave Chinner 	error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
36865f36b2ceSDave Chinner 	if (error) {
36875f36b2ceSDave Chinner 		if (error == -ENOSPC)
36885f36b2ceSDave Chinner 			return 0;
36895f36b2ceSDave Chinner 		return error;
36905f36b2ceSDave Chinner 	}
36915f36b2ceSDave Chinner 
369200dcd17cSDave Chinner 	error = xfs_alloc_vextent_prepare_ag(args, 0);
369374b9aa63SDave Chinner 	if (!error && args->agbp)
3694230e8fe8SDave Chinner 		error = xfs_alloc_ag_vextent_exact(args);
36955f36b2ceSDave Chinner 
3696e4d17426SDave Chinner 	return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
36975f36b2ceSDave Chinner }
36985f36b2ceSDave Chinner 
36995f36b2ceSDave Chinner /*
3700db4710fdSDave Chinner  * Allocate an extent as close to the target as possible. If there are not
3701db4710fdSDave Chinner  * viable candidates in the AG, then fail the allocation.
370274b9aa63SDave Chinner  *
370374b9aa63SDave Chinner  * Caller may or may not have a per-ag reference in args->pag.
3704ecd788a9SDave Chinner  */
3705ecd788a9SDave Chinner int
xfs_alloc_vextent_near_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)3706db4710fdSDave Chinner xfs_alloc_vextent_near_bno(
3707db4710fdSDave Chinner 	struct xfs_alloc_arg	*args,
3708db4710fdSDave Chinner 	xfs_fsblock_t		target)
3709ecd788a9SDave Chinner {
3710db4710fdSDave Chinner 	struct xfs_mount	*mp = args->mp;
37118b813568SDave Chinner 	xfs_agnumber_t		minimum_agno;
3712e4d17426SDave Chinner 	bool			needs_perag = args->pag == NULL;
37136a2a9d77SDave Chinner 	uint32_t		alloc_flags = 0;
371474c36a86SDave Chinner 	int			error;
3715ecd788a9SDave Chinner 
37166de4b1abSDarrick J. Wong 	if (!needs_perag)
37176de4b1abSDarrick J. Wong 		ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
37186de4b1abSDarrick J. Wong 
37198b813568SDave Chinner 	args->agno = XFS_FSB_TO_AGNO(mp, target);
37208b813568SDave Chinner 	args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3721e6fbb716SDarrick J. Wong 
37224dfb02d5SDarrick J. Wong 	trace_xfs_alloc_vextent_near_bno(args);
3723e6fbb716SDarrick J. Wong 
37248b813568SDave Chinner 	error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3725db4710fdSDave Chinner 	if (error) {
3726db4710fdSDave Chinner 		if (error == -ENOSPC)
3727db4710fdSDave Chinner 			return 0;
372874c36a86SDave Chinner 		return error;
372930f712c9SDave Chinner 	}
373030f712c9SDave Chinner 
3731e4d17426SDave Chinner 	if (needs_perag)
37323432ef61SDave Chinner 		args->pag = xfs_perag_grab(mp, args->agno);
3733e4d17426SDave Chinner 
37346a2a9d77SDave Chinner 	error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
373574b9aa63SDave Chinner 	if (!error && args->agbp)
37366a2a9d77SDave Chinner 		error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3737db4710fdSDave Chinner 
3738e4d17426SDave Chinner 	return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3739db4710fdSDave Chinner }
3740db4710fdSDave Chinner 
37414d89e20bSDave Chinner /* Ensure that the freelist is at full capacity. */
37424d89e20bSDave Chinner int
xfs_free_extent_fix_freelist(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf ** agbp)37434d89e20bSDave Chinner xfs_free_extent_fix_freelist(
37444d89e20bSDave Chinner 	struct xfs_trans	*tp,
374545d06621SDave Chinner 	struct xfs_perag	*pag,
37464d89e20bSDave Chinner 	struct xfs_buf		**agbp)
374730f712c9SDave Chinner {
37484d89e20bSDave Chinner 	struct xfs_alloc_arg	args;
374930f712c9SDave Chinner 	int			error;
375030f712c9SDave Chinner 
37514d89e20bSDave Chinner 	memset(&args, 0, sizeof(struct xfs_alloc_arg));
375230f712c9SDave Chinner 	args.tp = tp;
375330f712c9SDave Chinner 	args.mp = tp->t_mountp;
375445d06621SDave Chinner 	args.agno = pag->pag_agno;
375545d06621SDave Chinner 	args.pag = pag;
375630f712c9SDave Chinner 
375730f712c9SDave Chinner 	/*
375830f712c9SDave Chinner 	 * validate that the block number is legal - the enables us to detect
375930f712c9SDave Chinner 	 * and handle a silent filesystem corruption rather than crashing.
376030f712c9SDave Chinner 	 */
376130f712c9SDave Chinner 	if (args.agno >= args.mp->m_sb.sb_agcount)
37622451337dSDave Chinner 		return -EFSCORRUPTED;
376330f712c9SDave Chinner 
376430f712c9SDave Chinner 	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
376530f712c9SDave Chinner 	if (error)
376645d06621SDave Chinner 		return error;
376730f712c9SDave Chinner 
37684d89e20bSDave Chinner 	*agbp = args.agbp;
376945d06621SDave Chinner 	return 0;
377030f712c9SDave Chinner }
377130f712c9SDave Chinner 
37724d89e20bSDave Chinner /*
37734d89e20bSDave Chinner  * Free an extent.
37744d89e20bSDave Chinner  * Just break up the extent address and hand off to xfs_free_ag_extent
37754d89e20bSDave Chinner  * after fixing up the freelist.
37764d89e20bSDave Chinner  */
377766e3237eSDarrick J. Wong int
__xfs_free_extent(struct xfs_trans * tp,struct xfs_perag * pag,xfs_agblock_t agbno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)3778fcb762f5SBrian Foster __xfs_free_extent(
377966e3237eSDarrick J. Wong 	struct xfs_trans		*tp,
3780b2ccab31SDarrick J. Wong 	struct xfs_perag		*pag,
3781b2ccab31SDarrick J. Wong 	xfs_agblock_t			agbno,
378266e3237eSDarrick J. Wong 	xfs_extlen_t			len,
378366e3237eSDarrick J. Wong 	const struct xfs_owner_info	*oinfo,
378466e3237eSDarrick J. Wong 	enum xfs_ag_resv_type		type,
3785fcb762f5SBrian Foster 	bool				skip_discard)
37864d89e20bSDave Chinner {
37874d89e20bSDave Chinner 	struct xfs_mount		*mp = tp->t_mountp;
37884d89e20bSDave Chinner 	struct xfs_buf			*agbp;
37899798f615SChristoph Hellwig 	struct xfs_agf			*agf;
37904d89e20bSDave Chinner 	int				error;
3791fcb762f5SBrian Foster 	unsigned int			busy_flags = 0;
37924d89e20bSDave Chinner 
37934d89e20bSDave Chinner 	ASSERT(len != 0);
37940ab32086SBrian Foster 	ASSERT(type != XFS_AG_RESV_AGFL);
37954d89e20bSDave Chinner 
3796ba9e7802SDarrick J. Wong 	if (XFS_TEST_ERROR(false, mp,
37979e24cfd0SDarrick J. Wong 			XFS_ERRTAG_FREE_EXTENT))
3798ba9e7802SDarrick J. Wong 		return -EIO;
3799ba9e7802SDarrick J. Wong 
380045d06621SDave Chinner 	error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
38014d89e20bSDave Chinner 	if (error)
3802b2ccab31SDarrick J. Wong 		return error;
38039798f615SChristoph Hellwig 	agf = agbp->b_addr;
38044d89e20bSDave Chinner 
3805f9e03706SDarrick J. Wong 	if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3806f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
380745d06621SDave Chinner 		goto err_release;
3808f9e03706SDarrick J. Wong 	}
38094d89e20bSDave Chinner 
38104d89e20bSDave Chinner 	/* validate the extent size is legal now we have the agf locked */
38119798f615SChristoph Hellwig 	if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3812f9e03706SDarrick J. Wong 		error = -EFSCORRUPTED;
381345d06621SDave Chinner 		goto err_release;
3814f9e03706SDarrick J. Wong 	}
38154d89e20bSDave Chinner 
3816b2ccab31SDarrick J. Wong 	error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3817b2ccab31SDarrick J. Wong 			type);
38184d89e20bSDave Chinner 	if (error)
381945d06621SDave Chinner 		goto err_release;
38204d89e20bSDave Chinner 
3821fcb762f5SBrian Foster 	if (skip_discard)
3822fcb762f5SBrian Foster 		busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
382345d06621SDave Chinner 	xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
38244d89e20bSDave Chinner 	return 0;
38254d89e20bSDave Chinner 
382645d06621SDave Chinner err_release:
38274d89e20bSDave Chinner 	xfs_trans_brelse(tp, agbp);
382830f712c9SDave Chinner 	return error;
382930f712c9SDave Chinner }
38302d520bfaSDarrick J. Wong 
38312d520bfaSDarrick J. Wong struct xfs_alloc_query_range_info {
38322d520bfaSDarrick J. Wong 	xfs_alloc_query_range_fn	fn;
38332d520bfaSDarrick J. Wong 	void				*priv;
38342d520bfaSDarrick J. Wong };
38352d520bfaSDarrick J. Wong 
38362d520bfaSDarrick J. Wong /* Format btree record and pass to our callback. */
38372d520bfaSDarrick J. Wong STATIC int
xfs_alloc_query_range_helper(struct xfs_btree_cur * cur,const union xfs_btree_rec * rec,void * priv)38382d520bfaSDarrick J. Wong xfs_alloc_query_range_helper(
38392d520bfaSDarrick J. Wong 	struct xfs_btree_cur		*cur,
3840159eb69dSDarrick J. Wong 	const union xfs_btree_rec	*rec,
38412d520bfaSDarrick J. Wong 	void				*priv)
38422d520bfaSDarrick J. Wong {
38432d520bfaSDarrick J. Wong 	struct xfs_alloc_query_range_info	*query = priv;
38442d520bfaSDarrick J. Wong 	struct xfs_alloc_rec_incore		irec;
3845ee12eaaaSDarrick J. Wong 	xfs_failaddr_t				fa;
38462d520bfaSDarrick J. Wong 
384735e3b9a1SDarrick J. Wong 	xfs_alloc_btrec_to_irec(rec, &irec);
3848ee12eaaaSDarrick J. Wong 	fa = xfs_alloc_check_irec(cur, &irec);
3849ee12eaaaSDarrick J. Wong 	if (fa)
3850ee12eaaaSDarrick J. Wong 		return xfs_alloc_complain_bad_rec(cur, fa, &irec);
385135e3b9a1SDarrick J. Wong 
38522d520bfaSDarrick J. Wong 	return query->fn(cur, &irec, query->priv);
38532d520bfaSDarrick J. Wong }
38542d520bfaSDarrick J. Wong 
38552d520bfaSDarrick J. Wong /* Find all free space within a given range of blocks. */
38562d520bfaSDarrick J. Wong int
xfs_alloc_query_range(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * low_rec,const struct xfs_alloc_rec_incore * high_rec,xfs_alloc_query_range_fn fn,void * priv)38572d520bfaSDarrick J. Wong xfs_alloc_query_range(
38582d520bfaSDarrick J. Wong 	struct xfs_btree_cur			*cur,
385904dcb474SDarrick J. Wong 	const struct xfs_alloc_rec_incore	*low_rec,
386004dcb474SDarrick J. Wong 	const struct xfs_alloc_rec_incore	*high_rec,
38612d520bfaSDarrick J. Wong 	xfs_alloc_query_range_fn		fn,
38622d520bfaSDarrick J. Wong 	void					*priv)
38632d520bfaSDarrick J. Wong {
386475dc0345SDarrick J. Wong 	union xfs_btree_irec			low_brec = { .a = *low_rec };
386575dc0345SDarrick J. Wong 	union xfs_btree_irec			high_brec = { .a = *high_rec };
386675dc0345SDarrick J. Wong 	struct xfs_alloc_query_range_info	query = { .priv = priv, .fn = fn };
38672d520bfaSDarrick J. Wong 
38682d520bfaSDarrick J. Wong 	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
38692d520bfaSDarrick J. Wong 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
38702d520bfaSDarrick J. Wong 			xfs_alloc_query_range_helper, &query);
38712d520bfaSDarrick J. Wong }
3872e9a2599aSDarrick J. Wong 
3873e9a2599aSDarrick J. Wong /* Find all free space records. */
3874e9a2599aSDarrick J. Wong int
xfs_alloc_query_all(struct xfs_btree_cur * cur,xfs_alloc_query_range_fn fn,void * priv)3875e9a2599aSDarrick J. Wong xfs_alloc_query_all(
3876e9a2599aSDarrick J. Wong 	struct xfs_btree_cur			*cur,
3877e9a2599aSDarrick J. Wong 	xfs_alloc_query_range_fn		fn,
3878e9a2599aSDarrick J. Wong 	void					*priv)
3879e9a2599aSDarrick J. Wong {
3880e9a2599aSDarrick J. Wong 	struct xfs_alloc_query_range_info	query;
3881e9a2599aSDarrick J. Wong 
3882e9a2599aSDarrick J. Wong 	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3883e9a2599aSDarrick J. Wong 	query.priv = priv;
3884e9a2599aSDarrick J. Wong 	query.fn = fn;
3885e9a2599aSDarrick J. Wong 	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3886e9a2599aSDarrick J. Wong }
388721ec5416SDarrick J. Wong 
38886abc7aefSDarrick J. Wong /*
38896abc7aefSDarrick J. Wong  * Scan part of the keyspace of the free space and tell us if the area has no
38906abc7aefSDarrick J. Wong  * records, is fully mapped by records, or is partially filled.
38916abc7aefSDarrick J. Wong  */
3892ce1d802eSDarrick J. Wong int
xfs_alloc_has_records(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,enum xbtree_recpacking * outcome)38936abc7aefSDarrick J. Wong xfs_alloc_has_records(
3894ce1d802eSDarrick J. Wong 	struct xfs_btree_cur	*cur,
3895ce1d802eSDarrick J. Wong 	xfs_agblock_t		bno,
3896ce1d802eSDarrick J. Wong 	xfs_extlen_t		len,
38976abc7aefSDarrick J. Wong 	enum xbtree_recpacking	*outcome)
3898ce1d802eSDarrick J. Wong {
3899ce1d802eSDarrick J. Wong 	union xfs_btree_irec	low;
3900ce1d802eSDarrick J. Wong 	union xfs_btree_irec	high;
3901ce1d802eSDarrick J. Wong 
3902ce1d802eSDarrick J. Wong 	memset(&low, 0, sizeof(low));
3903ce1d802eSDarrick J. Wong 	low.a.ar_startblock = bno;
3904ce1d802eSDarrick J. Wong 	memset(&high, 0xFF, sizeof(high));
3905ce1d802eSDarrick J. Wong 	high.a.ar_startblock = bno + len - 1;
3906ce1d802eSDarrick J. Wong 
39074a200a09SDarrick J. Wong 	return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3908ce1d802eSDarrick J. Wong }
39099f3a080eSDarrick J. Wong 
39109f3a080eSDarrick J. Wong /*
39119f3a080eSDarrick J. Wong  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
39125bb46e3eSDarrick J. Wong  * error code or XFS_ITER_*.
39139f3a080eSDarrick J. Wong  */
39149f3a080eSDarrick J. Wong int
xfs_agfl_walk(struct xfs_mount * mp,struct xfs_agf * agf,struct xfs_buf * agflbp,xfs_agfl_walk_fn walk_fn,void * priv)39159f3a080eSDarrick J. Wong xfs_agfl_walk(
39169f3a080eSDarrick J. Wong 	struct xfs_mount	*mp,
39179f3a080eSDarrick J. Wong 	struct xfs_agf		*agf,
39189f3a080eSDarrick J. Wong 	struct xfs_buf		*agflbp,
39199f3a080eSDarrick J. Wong 	xfs_agfl_walk_fn	walk_fn,
39209f3a080eSDarrick J. Wong 	void			*priv)
39219f3a080eSDarrick J. Wong {
39229f3a080eSDarrick J. Wong 	__be32			*agfl_bno;
39239f3a080eSDarrick J. Wong 	unsigned int		i;
39249f3a080eSDarrick J. Wong 	int			error;
39259f3a080eSDarrick J. Wong 
3926183606d8SChristoph Hellwig 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
39279f3a080eSDarrick J. Wong 	i = be32_to_cpu(agf->agf_flfirst);
39289f3a080eSDarrick J. Wong 
39299f3a080eSDarrick J. Wong 	/* Nothing to walk in an empty AGFL. */
39309f3a080eSDarrick J. Wong 	if (agf->agf_flcount == cpu_to_be32(0))
39319f3a080eSDarrick J. Wong 		return 0;
39329f3a080eSDarrick J. Wong 
39339f3a080eSDarrick J. Wong 	/* Otherwise, walk from first to last, wrapping as needed. */
39349f3a080eSDarrick J. Wong 	for (;;) {
39359f3a080eSDarrick J. Wong 		error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
39369f3a080eSDarrick J. Wong 		if (error)
39379f3a080eSDarrick J. Wong 			return error;
39389f3a080eSDarrick J. Wong 		if (i == be32_to_cpu(agf->agf_fllast))
39399f3a080eSDarrick J. Wong 			break;
39409f3a080eSDarrick J. Wong 		if (++i == xfs_agfl_size(mp))
39419f3a080eSDarrick J. Wong 			i = 0;
39429f3a080eSDarrick J. Wong 	}
39439f3a080eSDarrick J. Wong 
39449f3a080eSDarrick J. Wong 	return 0;
39459f3a080eSDarrick J. Wong }
3946c201d9caSDarrick J. Wong 
3947c201d9caSDarrick J. Wong int __init
xfs_extfree_intent_init_cache(void)3948c201d9caSDarrick J. Wong xfs_extfree_intent_init_cache(void)
3949c201d9caSDarrick J. Wong {
3950c201d9caSDarrick J. Wong 	xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3951c201d9caSDarrick J. Wong 			sizeof(struct xfs_extent_free_item),
3952c201d9caSDarrick J. Wong 			0, 0, NULL);
3953c201d9caSDarrick J. Wong 
3954c201d9caSDarrick J. Wong 	return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3955c201d9caSDarrick J. Wong }
3956c201d9caSDarrick J. Wong 
3957c201d9caSDarrick J. Wong void
xfs_extfree_intent_destroy_cache(void)3958c201d9caSDarrick J. Wong xfs_extfree_intent_destroy_cache(void)
3959c201d9caSDarrick J. Wong {
3960c201d9caSDarrick J. Wong 	kmem_cache_destroy(xfs_extfree_item_cache);
3961c201d9caSDarrick J. Wong 	xfs_extfree_item_cache = NULL;
3962c201d9caSDarrick J. Wong }
3963