10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
230f712c9SDave Chinner /*
330f712c9SDave Chinner * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
430f712c9SDave Chinner * All Rights Reserved.
530f712c9SDave Chinner */
630f712c9SDave Chinner #include "xfs.h"
730f712c9SDave Chinner #include "xfs_fs.h"
830f712c9SDave Chinner #include "xfs_format.h"
930f712c9SDave Chinner #include "xfs_log_format.h"
1030f712c9SDave Chinner #include "xfs_shared.h"
1130f712c9SDave Chinner #include "xfs_trans_resv.h"
1230f712c9SDave Chinner #include "xfs_bit.h"
1330f712c9SDave Chinner #include "xfs_mount.h"
143ab78df2SDarrick J. Wong #include "xfs_defer.h"
1530f712c9SDave Chinner #include "xfs_btree.h"
16673930c3SDarrick J. Wong #include "xfs_rmap.h"
1730f712c9SDave Chinner #include "xfs_alloc_btree.h"
1830f712c9SDave Chinner #include "xfs_alloc.h"
1930f712c9SDave Chinner #include "xfs_extent_busy.h"
20e9e899a2SDarrick J. Wong #include "xfs_errortag.h"
2130f712c9SDave Chinner #include "xfs_error.h"
2230f712c9SDave Chinner #include "xfs_trace.h"
2330f712c9SDave Chinner #include "xfs_trans.h"
2430f712c9SDave Chinner #include "xfs_buf_item.h"
2530f712c9SDave Chinner #include "xfs_log.h"
269bbafc71SDave Chinner #include "xfs_ag.h"
273fd129b6SDarrick J. Wong #include "xfs_ag_resv.h"
28f8f2835aSBrian Foster #include "xfs_bmap.h"
29f8f2835aSBrian Foster
30c201d9caSDarrick J. Wong struct kmem_cache *xfs_extfree_item_cache;
3130f712c9SDave Chinner
3230f712c9SDave Chinner struct workqueue_struct *xfs_alloc_wq;
3330f712c9SDave Chinner
3430f712c9SDave Chinner #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
3530f712c9SDave Chinner
3630f712c9SDave Chinner #define XFSA_FIXUP_BNO_OK 1
3730f712c9SDave Chinner #define XFSA_FIXUP_CNT_OK 2
3830f712c9SDave Chinner
39a78ee256SDave Chinner /*
40a78ee256SDave Chinner * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
41a78ee256SDave Chinner * the beginning of the block for a proper header with the location information
42a78ee256SDave Chinner * and CRC.
43a78ee256SDave Chinner */
44a78ee256SDave Chinner unsigned int
xfs_agfl_size(struct xfs_mount * mp)45a78ee256SDave Chinner xfs_agfl_size(
46a78ee256SDave Chinner struct xfs_mount *mp)
47a78ee256SDave Chinner {
48a78ee256SDave Chinner unsigned int size = mp->m_sb.sb_sectsize;
49a78ee256SDave Chinner
5038c26bfdSDave Chinner if (xfs_has_crc(mp))
51a78ee256SDave Chinner size -= sizeof(struct xfs_agfl);
52a78ee256SDave Chinner
53a78ee256SDave Chinner return size / sizeof(xfs_agblock_t);
54a78ee256SDave Chinner }
55a78ee256SDave Chinner
56af30dfa1SDarrick J. Wong unsigned int
xfs_refc_block(struct xfs_mount * mp)57af30dfa1SDarrick J. Wong xfs_refc_block(
58af30dfa1SDarrick J. Wong struct xfs_mount *mp)
59af30dfa1SDarrick J. Wong {
6038c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
61af30dfa1SDarrick J. Wong return XFS_RMAP_BLOCK(mp) + 1;
6238c26bfdSDave Chinner if (xfs_has_finobt(mp))
63af30dfa1SDarrick J. Wong return XFS_FIBT_BLOCK(mp) + 1;
64af30dfa1SDarrick J. Wong return XFS_IBT_BLOCK(mp) + 1;
65af30dfa1SDarrick J. Wong }
66af30dfa1SDarrick J. Wong
678018026eSDarrick J. Wong xfs_extlen_t
xfs_prealloc_blocks(struct xfs_mount * mp)688018026eSDarrick J. Wong xfs_prealloc_blocks(
698018026eSDarrick J. Wong struct xfs_mount *mp)
708018026eSDarrick J. Wong {
7138c26bfdSDave Chinner if (xfs_has_reflink(mp))
72af30dfa1SDarrick J. Wong return xfs_refc_block(mp) + 1;
7338c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
748018026eSDarrick J. Wong return XFS_RMAP_BLOCK(mp) + 1;
7538c26bfdSDave Chinner if (xfs_has_finobt(mp))
768018026eSDarrick J. Wong return XFS_FIBT_BLOCK(mp) + 1;
778018026eSDarrick J. Wong return XFS_IBT_BLOCK(mp) + 1;
788018026eSDarrick J. Wong }
798018026eSDarrick J. Wong
8030f712c9SDave Chinner /*
8193defd5aSDarrick J. Wong * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
8293defd5aSDarrick J. Wong * guarantee that we can refill the AGFL prior to allocating space in a nearly
834869b6e8SSlark Xiao * full AG. Although the space described by the free space btrees, the
8493defd5aSDarrick J. Wong * blocks used by the freesp btrees themselves, and the blocks owned by the
8593defd5aSDarrick J. Wong * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
8693defd5aSDarrick J. Wong * free space in the AG drop so low that the free space btrees cannot refill an
8793defd5aSDarrick J. Wong * empty AGFL up to the minimum level. Rather than grind through empty AGs
8893defd5aSDarrick J. Wong * until the fs goes down, we subtract this many AG blocks from the incore
8993defd5aSDarrick J. Wong * fdblocks to ensure user allocation does not overcommit the space the
9093defd5aSDarrick J. Wong * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
9193defd5aSDarrick J. Wong * withhold space from xfs_mod_fdblocks, so we do not account for that here.
9293defd5aSDarrick J. Wong */
9393defd5aSDarrick J. Wong #define XFS_ALLOCBT_AGFL_RESERVE 4
9493defd5aSDarrick J. Wong
9593defd5aSDarrick J. Wong /*
9693defd5aSDarrick J. Wong * Compute the number of blocks that we set aside to guarantee the ability to
9793defd5aSDarrick J. Wong * refill the AGFL and handle a full bmap btree split.
9893defd5aSDarrick J. Wong *
9952548852SDarrick J. Wong * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
10052548852SDarrick J. Wong * AGF buffer (PV 947395), we place constraints on the relationship among
10152548852SDarrick J. Wong * actual allocations for data blocks, freelist blocks, and potential file data
10252548852SDarrick J. Wong * bmap btree blocks. However, these restrictions may result in no actual space
10352548852SDarrick J. Wong * allocated for a delayed extent, for example, a data block in a certain AG is
10452548852SDarrick J. Wong * allocated but there is no additional block for the additional bmap btree
10552548852SDarrick J. Wong * block due to a split of the bmap btree of the file. The result of this may
10652548852SDarrick J. Wong * lead to an infinite loop when the file gets flushed to disk and all delayed
10752548852SDarrick J. Wong * extents need to be actually allocated. To get around this, we explicitly set
10852548852SDarrick J. Wong * aside a few blocks which will not be reserved in delayed allocation.
10952548852SDarrick J. Wong *
11093defd5aSDarrick J. Wong * For each AG, we need to reserve enough blocks to replenish a totally empty
11193defd5aSDarrick J. Wong * AGFL and 4 more to handle a potential split of the file's bmap btree.
11252548852SDarrick J. Wong */
11352548852SDarrick J. Wong unsigned int
xfs_alloc_set_aside(struct xfs_mount * mp)11452548852SDarrick J. Wong xfs_alloc_set_aside(
11552548852SDarrick J. Wong struct xfs_mount *mp)
11652548852SDarrick J. Wong {
11793defd5aSDarrick J. Wong return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
11852548852SDarrick J. Wong }
11952548852SDarrick J. Wong
12052548852SDarrick J. Wong /*
12152548852SDarrick J. Wong * When deciding how much space to allocate out of an AG, we limit the
12252548852SDarrick J. Wong * allocation maximum size to the size the AG. However, we cannot use all the
12352548852SDarrick J. Wong * blocks in the AG - some are permanently used by metadata. These
12452548852SDarrick J. Wong * blocks are generally:
12552548852SDarrick J. Wong * - the AG superblock, AGF, AGI and AGFL
12652548852SDarrick J. Wong * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
12752548852SDarrick J. Wong * the AGI free inode and rmap btree root blocks.
12852548852SDarrick J. Wong * - blocks on the AGFL according to xfs_alloc_set_aside() limits
12952548852SDarrick J. Wong * - the rmapbt root block
13052548852SDarrick J. Wong *
13152548852SDarrick J. Wong * The AG headers are sector sized, so the amount of space they take up is
13252548852SDarrick J. Wong * dependent on filesystem geometry. The others are all single blocks.
13352548852SDarrick J. Wong */
13452548852SDarrick J. Wong unsigned int
xfs_alloc_ag_max_usable(struct xfs_mount * mp)13552548852SDarrick J. Wong xfs_alloc_ag_max_usable(
13652548852SDarrick J. Wong struct xfs_mount *mp)
13752548852SDarrick J. Wong {
13852548852SDarrick J. Wong unsigned int blocks;
13952548852SDarrick J. Wong
14052548852SDarrick J. Wong blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
14193defd5aSDarrick J. Wong blocks += XFS_ALLOCBT_AGFL_RESERVE;
14252548852SDarrick J. Wong blocks += 3; /* AGF, AGI btree root blocks */
14338c26bfdSDave Chinner if (xfs_has_finobt(mp))
14452548852SDarrick J. Wong blocks++; /* finobt root block */
14538c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
14652548852SDarrick J. Wong blocks++; /* rmap root block */
14738c26bfdSDave Chinner if (xfs_has_reflink(mp))
148d0e853f3SDarrick J. Wong blocks++; /* refcount root block */
14952548852SDarrick J. Wong
15052548852SDarrick J. Wong return mp->m_sb.sb_agblocks - blocks;
15152548852SDarrick J. Wong }
15252548852SDarrick J. Wong
15352548852SDarrick J. Wong /*
15430f712c9SDave Chinner * Lookup the record equal to [bno, len] in the btree given by cur.
15530f712c9SDave Chinner */
15630f712c9SDave Chinner STATIC int /* error */
xfs_alloc_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)15730f712c9SDave Chinner xfs_alloc_lookup_eq(
15830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
15930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
16030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
16130f712c9SDave Chinner int *stat) /* success/failure */
16230f712c9SDave Chinner {
163f6b428a4SBrian Foster int error;
164f6b428a4SBrian Foster
16530f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
16630f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
167f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
168c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
169f6b428a4SBrian Foster return error;
17030f712c9SDave Chinner }
17130f712c9SDave Chinner
17230f712c9SDave Chinner /*
17330f712c9SDave Chinner * Lookup the first record greater than or equal to [bno, len]
17430f712c9SDave Chinner * in the btree given by cur.
17530f712c9SDave Chinner */
17630f712c9SDave Chinner int /* error */
xfs_alloc_lookup_ge(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)17730f712c9SDave Chinner xfs_alloc_lookup_ge(
17830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
17930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
18030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
18130f712c9SDave Chinner int *stat) /* success/failure */
18230f712c9SDave Chinner {
183f6b428a4SBrian Foster int error;
184f6b428a4SBrian Foster
18530f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
18630f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
187f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
188c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
189f6b428a4SBrian Foster return error;
19030f712c9SDave Chinner }
19130f712c9SDave Chinner
19230f712c9SDave Chinner /*
19330f712c9SDave Chinner * Lookup the first record less than or equal to [bno, len]
19430f712c9SDave Chinner * in the btree given by cur.
19530f712c9SDave Chinner */
196ce1d802eSDarrick J. Wong int /* error */
xfs_alloc_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)19730f712c9SDave Chinner xfs_alloc_lookup_le(
19830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
19930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
20030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
20130f712c9SDave Chinner int *stat) /* success/failure */
20230f712c9SDave Chinner {
203f6b428a4SBrian Foster int error;
20430f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
20530f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
206f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
207c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
208f6b428a4SBrian Foster return error;
209f6b428a4SBrian Foster }
210f6b428a4SBrian Foster
211f6b428a4SBrian Foster static inline bool
xfs_alloc_cur_active(struct xfs_btree_cur * cur)212f6b428a4SBrian Foster xfs_alloc_cur_active(
213f6b428a4SBrian Foster struct xfs_btree_cur *cur)
214f6b428a4SBrian Foster {
215c4aa10d0SDave Chinner return cur && cur->bc_ag.abt.active;
21630f712c9SDave Chinner }
21730f712c9SDave Chinner
21830f712c9SDave Chinner /*
21930f712c9SDave Chinner * Update the record referred to by cur to the value given
22030f712c9SDave Chinner * by [bno, len].
22130f712c9SDave Chinner * This either works (return 0) or gets an EFSCORRUPTED error.
22230f712c9SDave Chinner */
22330f712c9SDave Chinner STATIC int /* error */
xfs_alloc_update(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len)22430f712c9SDave Chinner xfs_alloc_update(
22530f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
22630f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
22730f712c9SDave Chinner xfs_extlen_t len) /* length of extent */
22830f712c9SDave Chinner {
22930f712c9SDave Chinner union xfs_btree_rec rec;
23030f712c9SDave Chinner
23130f712c9SDave Chinner rec.alloc.ar_startblock = cpu_to_be32(bno);
23230f712c9SDave Chinner rec.alloc.ar_blockcount = cpu_to_be32(len);
23330f712c9SDave Chinner return xfs_btree_update(cur, &rec);
23430f712c9SDave Chinner }
23530f712c9SDave Chinner
23635e3b9a1SDarrick J. Wong /* Convert the ondisk btree record to its incore representation. */
23735e3b9a1SDarrick J. Wong void
xfs_alloc_btrec_to_irec(const union xfs_btree_rec * rec,struct xfs_alloc_rec_incore * irec)23835e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(
23935e3b9a1SDarrick J. Wong const union xfs_btree_rec *rec,
24035e3b9a1SDarrick J. Wong struct xfs_alloc_rec_incore *irec)
24135e3b9a1SDarrick J. Wong {
24235e3b9a1SDarrick J. Wong irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
24335e3b9a1SDarrick J. Wong irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
24435e3b9a1SDarrick J. Wong }
24535e3b9a1SDarrick J. Wong
24635e3b9a1SDarrick J. Wong /* Simple checks for free space records. */
24735e3b9a1SDarrick J. Wong xfs_failaddr_t
xfs_alloc_check_irec(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * irec)24835e3b9a1SDarrick J. Wong xfs_alloc_check_irec(
24935e3b9a1SDarrick J. Wong struct xfs_btree_cur *cur,
25035e3b9a1SDarrick J. Wong const struct xfs_alloc_rec_incore *irec)
25135e3b9a1SDarrick J. Wong {
25235e3b9a1SDarrick J. Wong struct xfs_perag *pag = cur->bc_ag.pag;
25335e3b9a1SDarrick J. Wong
25435e3b9a1SDarrick J. Wong if (irec->ar_blockcount == 0)
25535e3b9a1SDarrick J. Wong return __this_address;
25635e3b9a1SDarrick J. Wong
25735e3b9a1SDarrick J. Wong /* check for valid extent range, including overflow */
25835e3b9a1SDarrick J. Wong if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
25935e3b9a1SDarrick J. Wong return __this_address;
26035e3b9a1SDarrick J. Wong
26135e3b9a1SDarrick J. Wong return NULL;
26235e3b9a1SDarrick J. Wong }
26335e3b9a1SDarrick J. Wong
264ee12eaaaSDarrick J. Wong static inline int
xfs_alloc_complain_bad_rec(struct xfs_btree_cur * cur,xfs_failaddr_t fa,const struct xfs_alloc_rec_incore * irec)265ee12eaaaSDarrick J. Wong xfs_alloc_complain_bad_rec(
266ee12eaaaSDarrick J. Wong struct xfs_btree_cur *cur,
267ee12eaaaSDarrick J. Wong xfs_failaddr_t fa,
268ee12eaaaSDarrick J. Wong const struct xfs_alloc_rec_incore *irec)
269ee12eaaaSDarrick J. Wong {
270ee12eaaaSDarrick J. Wong struct xfs_mount *mp = cur->bc_mp;
271ee12eaaaSDarrick J. Wong
272ee12eaaaSDarrick J. Wong xfs_warn(mp,
273ee12eaaaSDarrick J. Wong "%s Freespace BTree record corruption in AG %d detected at %pS!",
274ee12eaaaSDarrick J. Wong cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
275ee12eaaaSDarrick J. Wong cur->bc_ag.pag->pag_agno, fa);
276ee12eaaaSDarrick J. Wong xfs_warn(mp,
277ee12eaaaSDarrick J. Wong "start block 0x%x block count 0x%x", irec->ar_startblock,
278ee12eaaaSDarrick J. Wong irec->ar_blockcount);
279ee12eaaaSDarrick J. Wong return -EFSCORRUPTED;
280ee12eaaaSDarrick J. Wong }
281ee12eaaaSDarrick J. Wong
28230f712c9SDave Chinner /*
28330f712c9SDave Chinner * Get the data from the pointed-to record.
28430f712c9SDave Chinner */
28530f712c9SDave Chinner int /* error */
xfs_alloc_get_rec(struct xfs_btree_cur * cur,xfs_agblock_t * bno,xfs_extlen_t * len,int * stat)28630f712c9SDave Chinner xfs_alloc_get_rec(
28730f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
28830f712c9SDave Chinner xfs_agblock_t *bno, /* output: starting block of extent */
28930f712c9SDave Chinner xfs_extlen_t *len, /* output: length of extent */
29030f712c9SDave Chinner int *stat) /* output: success/failure */
29130f712c9SDave Chinner {
29235e3b9a1SDarrick J. Wong struct xfs_alloc_rec_incore irec;
29330f712c9SDave Chinner union xfs_btree_rec *rec;
29435e3b9a1SDarrick J. Wong xfs_failaddr_t fa;
29530f712c9SDave Chinner int error;
29630f712c9SDave Chinner
29730f712c9SDave Chinner error = xfs_btree_get_rec(cur, &rec, stat);
298a37f7b12SDarrick J. Wong if (error || !(*stat))
299a37f7b12SDarrick J. Wong return error;
300a37f7b12SDarrick J. Wong
30135e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(rec, &irec);
30235e3b9a1SDarrick J. Wong fa = xfs_alloc_check_irec(cur, &irec);
30335e3b9a1SDarrick J. Wong if (fa)
304ee12eaaaSDarrick J. Wong return xfs_alloc_complain_bad_rec(cur, fa, &irec);
305efe80327SCarlos Maiolino
30635e3b9a1SDarrick J. Wong *bno = irec.ar_startblock;
30735e3b9a1SDarrick J. Wong *len = irec.ar_blockcount;
3089e6c08d4SDave Chinner return 0;
30930f712c9SDave Chinner }
31030f712c9SDave Chinner
31130f712c9SDave Chinner /*
31230f712c9SDave Chinner * Compute aligned version of the found extent.
31330f712c9SDave Chinner * Takes alignment and min length into account.
31430f712c9SDave Chinner */
315ebf55872SChristoph Hellwig STATIC bool
xfs_alloc_compute_aligned(xfs_alloc_arg_t * args,xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen,unsigned * busy_gen)31630f712c9SDave Chinner xfs_alloc_compute_aligned(
31730f712c9SDave Chinner xfs_alloc_arg_t *args, /* allocation argument structure */
31830f712c9SDave Chinner xfs_agblock_t foundbno, /* starting block in found extent */
31930f712c9SDave Chinner xfs_extlen_t foundlen, /* length in found extent */
32030f712c9SDave Chinner xfs_agblock_t *resbno, /* result block number */
321ebf55872SChristoph Hellwig xfs_extlen_t *reslen, /* result length */
322ebf55872SChristoph Hellwig unsigned *busy_gen)
32330f712c9SDave Chinner {
324ebf55872SChristoph Hellwig xfs_agblock_t bno = foundbno;
325ebf55872SChristoph Hellwig xfs_extlen_t len = foundlen;
326bfe46d4eSBrian Foster xfs_extlen_t diff;
327ebf55872SChristoph Hellwig bool busy;
32830f712c9SDave Chinner
32930f712c9SDave Chinner /* Trim busy sections out of found extent */
330ebf55872SChristoph Hellwig busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
33130f712c9SDave Chinner
332bfe46d4eSBrian Foster /*
333bfe46d4eSBrian Foster * If we have a largish extent that happens to start before min_agbno,
334bfe46d4eSBrian Foster * see if we can shift it into range...
335bfe46d4eSBrian Foster */
336bfe46d4eSBrian Foster if (bno < args->min_agbno && bno + len > args->min_agbno) {
337bfe46d4eSBrian Foster diff = args->min_agbno - bno;
338bfe46d4eSBrian Foster if (len > diff) {
339bfe46d4eSBrian Foster bno += diff;
340bfe46d4eSBrian Foster len -= diff;
341bfe46d4eSBrian Foster }
342bfe46d4eSBrian Foster }
343bfe46d4eSBrian Foster
34430f712c9SDave Chinner if (args->alignment > 1 && len >= args->minlen) {
34530f712c9SDave Chinner xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
346bfe46d4eSBrian Foster
347bfe46d4eSBrian Foster diff = aligned_bno - bno;
34830f712c9SDave Chinner
34930f712c9SDave Chinner *resbno = aligned_bno;
35030f712c9SDave Chinner *reslen = diff >= len ? 0 : len - diff;
35130f712c9SDave Chinner } else {
35230f712c9SDave Chinner *resbno = bno;
35330f712c9SDave Chinner *reslen = len;
35430f712c9SDave Chinner }
355ebf55872SChristoph Hellwig
356ebf55872SChristoph Hellwig return busy;
35730f712c9SDave Chinner }
35830f712c9SDave Chinner
35930f712c9SDave Chinner /*
36030f712c9SDave Chinner * Compute best start block and diff for "near" allocations.
36130f712c9SDave Chinner * freelen >= wantlen already checked by caller.
36230f712c9SDave Chinner */
36330f712c9SDave Chinner STATIC xfs_extlen_t /* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,int datatype,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)36430f712c9SDave Chinner xfs_alloc_compute_diff(
36530f712c9SDave Chinner xfs_agblock_t wantbno, /* target starting block */
36630f712c9SDave Chinner xfs_extlen_t wantlen, /* target length */
36730f712c9SDave Chinner xfs_extlen_t alignment, /* target alignment */
368292378edSDave Chinner int datatype, /* are we allocating data? */
36930f712c9SDave Chinner xfs_agblock_t freebno, /* freespace's starting block */
37030f712c9SDave Chinner xfs_extlen_t freelen, /* freespace's length */
37130f712c9SDave Chinner xfs_agblock_t *newbnop) /* result: best start block from free */
37230f712c9SDave Chinner {
37330f712c9SDave Chinner xfs_agblock_t freeend; /* end of freespace extent */
37430f712c9SDave Chinner xfs_agblock_t newbno1; /* return block number */
37530f712c9SDave Chinner xfs_agblock_t newbno2; /* other new block number */
37630f712c9SDave Chinner xfs_extlen_t newlen1=0; /* length with newbno1 */
37730f712c9SDave Chinner xfs_extlen_t newlen2=0; /* length with newbno2 */
37830f712c9SDave Chinner xfs_agblock_t wantend; /* end of target extent */
379c34d570dSChristoph Hellwig bool userdata = datatype & XFS_ALLOC_USERDATA;
38030f712c9SDave Chinner
38130f712c9SDave Chinner ASSERT(freelen >= wantlen);
38230f712c9SDave Chinner freeend = freebno + freelen;
38330f712c9SDave Chinner wantend = wantbno + wantlen;
38430f712c9SDave Chinner /*
38530f712c9SDave Chinner * We want to allocate from the start of a free extent if it is past
38630f712c9SDave Chinner * the desired block or if we are allocating user data and the free
38730f712c9SDave Chinner * extent is before desired block. The second case is there to allow
38830f712c9SDave Chinner * for contiguous allocation from the remaining free space if the file
38930f712c9SDave Chinner * grows in the short term.
39030f712c9SDave Chinner */
39130f712c9SDave Chinner if (freebno >= wantbno || (userdata && freeend < wantend)) {
39230f712c9SDave Chinner if ((newbno1 = roundup(freebno, alignment)) >= freeend)
39330f712c9SDave Chinner newbno1 = NULLAGBLOCK;
39430f712c9SDave Chinner } else if (freeend >= wantend && alignment > 1) {
39530f712c9SDave Chinner newbno1 = roundup(wantbno, alignment);
39630f712c9SDave Chinner newbno2 = newbno1 - alignment;
39730f712c9SDave Chinner if (newbno1 >= freeend)
39830f712c9SDave Chinner newbno1 = NULLAGBLOCK;
39930f712c9SDave Chinner else
40030f712c9SDave Chinner newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
40130f712c9SDave Chinner if (newbno2 < freebno)
40230f712c9SDave Chinner newbno2 = NULLAGBLOCK;
40330f712c9SDave Chinner else
40430f712c9SDave Chinner newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
40530f712c9SDave Chinner if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
40630f712c9SDave Chinner if (newlen1 < newlen2 ||
40730f712c9SDave Chinner (newlen1 == newlen2 &&
40830f712c9SDave Chinner XFS_ABSDIFF(newbno1, wantbno) >
40930f712c9SDave Chinner XFS_ABSDIFF(newbno2, wantbno)))
41030f712c9SDave Chinner newbno1 = newbno2;
41130f712c9SDave Chinner } else if (newbno2 != NULLAGBLOCK)
41230f712c9SDave Chinner newbno1 = newbno2;
41330f712c9SDave Chinner } else if (freeend >= wantend) {
41430f712c9SDave Chinner newbno1 = wantbno;
41530f712c9SDave Chinner } else if (alignment > 1) {
41630f712c9SDave Chinner newbno1 = roundup(freeend - wantlen, alignment);
41730f712c9SDave Chinner if (newbno1 > freeend - wantlen &&
41830f712c9SDave Chinner newbno1 - alignment >= freebno)
41930f712c9SDave Chinner newbno1 -= alignment;
42030f712c9SDave Chinner else if (newbno1 >= freeend)
42130f712c9SDave Chinner newbno1 = NULLAGBLOCK;
42230f712c9SDave Chinner } else
42330f712c9SDave Chinner newbno1 = freeend - wantlen;
42430f712c9SDave Chinner *newbnop = newbno1;
42530f712c9SDave Chinner return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
42630f712c9SDave Chinner }
42730f712c9SDave Chinner
42830f712c9SDave Chinner /*
42930f712c9SDave Chinner * Fix up the length, based on mod and prod.
43030f712c9SDave Chinner * len should be k * prod + mod for some k.
43130f712c9SDave Chinner * If len is too small it is returned unchanged.
43230f712c9SDave Chinner * If len hits maxlen it is left alone.
43330f712c9SDave Chinner */
43430f712c9SDave Chinner STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)43530f712c9SDave Chinner xfs_alloc_fix_len(
43630f712c9SDave Chinner xfs_alloc_arg_t *args) /* allocation argument structure */
43730f712c9SDave Chinner {
43830f712c9SDave Chinner xfs_extlen_t k;
43930f712c9SDave Chinner xfs_extlen_t rlen;
44030f712c9SDave Chinner
44130f712c9SDave Chinner ASSERT(args->mod < args->prod);
44230f712c9SDave Chinner rlen = args->len;
44330f712c9SDave Chinner ASSERT(rlen >= args->minlen);
44430f712c9SDave Chinner ASSERT(rlen <= args->maxlen);
44530f712c9SDave Chinner if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
44630f712c9SDave Chinner (args->mod == 0 && rlen < args->prod))
44730f712c9SDave Chinner return;
44830f712c9SDave Chinner k = rlen % args->prod;
44930f712c9SDave Chinner if (k == args->mod)
45030f712c9SDave Chinner return;
45130f712c9SDave Chinner if (k > args->mod)
45230f712c9SDave Chinner rlen = rlen - (k - args->mod);
45330f712c9SDave Chinner else
45430f712c9SDave Chinner rlen = rlen - args->prod + (args->mod - k);
4553790a8cdSDave Chinner /* casts to (int) catch length underflows */
45630f712c9SDave Chinner if ((int)rlen < (int)args->minlen)
45730f712c9SDave Chinner return;
45830f712c9SDave Chinner ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
45930f712c9SDave Chinner ASSERT(rlen % args->prod == args->mod);
46054fee133SChristoph Hellwig ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
46154fee133SChristoph Hellwig rlen + args->minleft);
46230f712c9SDave Chinner args->len = rlen;
46330f712c9SDave Chinner }
46430f712c9SDave Chinner
46530f712c9SDave Chinner /*
46630f712c9SDave Chinner * Update the two btrees, logically removing from freespace the extent
46730f712c9SDave Chinner * starting at rbno, rlen blocks. The extent is contained within the
46830f712c9SDave Chinner * actual (current) free extent fbno for flen blocks.
46930f712c9SDave Chinner * Flags are passed in indicating whether the cursors are set to the
47030f712c9SDave Chinner * relevant records.
47130f712c9SDave Chinner */
47230f712c9SDave Chinner STATIC int /* error code */
xfs_alloc_fixup_trees(struct xfs_btree_cur * cnt_cur,struct xfs_btree_cur * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)47330f712c9SDave Chinner xfs_alloc_fixup_trees(
474ae127f08SDarrick J. Wong struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
475ae127f08SDarrick J. Wong struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
47630f712c9SDave Chinner xfs_agblock_t fbno, /* starting block of free extent */
47730f712c9SDave Chinner xfs_extlen_t flen, /* length of free extent */
47830f712c9SDave Chinner xfs_agblock_t rbno, /* starting block of returned extent */
47930f712c9SDave Chinner xfs_extlen_t rlen, /* length of returned extent */
48030f712c9SDave Chinner int flags) /* flags, XFSA_FIXUP_... */
48130f712c9SDave Chinner {
48230f712c9SDave Chinner int error; /* error code */
48330f712c9SDave Chinner int i; /* operation results */
48430f712c9SDave Chinner xfs_agblock_t nfbno1; /* first new free startblock */
48530f712c9SDave Chinner xfs_agblock_t nfbno2; /* second new free startblock */
48630f712c9SDave Chinner xfs_extlen_t nflen1=0; /* first new free length */
48730f712c9SDave Chinner xfs_extlen_t nflen2=0; /* second new free length */
4885fb5aeeeSEric Sandeen struct xfs_mount *mp;
4895fb5aeeeSEric Sandeen
4905fb5aeeeSEric Sandeen mp = cnt_cur->bc_mp;
49130f712c9SDave Chinner
49230f712c9SDave Chinner /*
49330f712c9SDave Chinner * Look up the record in the by-size tree if necessary.
49430f712c9SDave Chinner */
49530f712c9SDave Chinner if (flags & XFSA_FIXUP_CNT_OK) {
49630f712c9SDave Chinner #ifdef DEBUG
49730f712c9SDave Chinner if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
49830f712c9SDave Chinner return error;
499f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
500f9e03706SDarrick J. Wong i != 1 ||
501f9e03706SDarrick J. Wong nfbno1 != fbno ||
502f9e03706SDarrick J. Wong nflen1 != flen))
503f9e03706SDarrick J. Wong return -EFSCORRUPTED;
50430f712c9SDave Chinner #endif
50530f712c9SDave Chinner } else {
50630f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
50730f712c9SDave Chinner return error;
508f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
509f9e03706SDarrick J. Wong return -EFSCORRUPTED;
51030f712c9SDave Chinner }
51130f712c9SDave Chinner /*
51230f712c9SDave Chinner * Look up the record in the by-block tree if necessary.
51330f712c9SDave Chinner */
51430f712c9SDave Chinner if (flags & XFSA_FIXUP_BNO_OK) {
51530f712c9SDave Chinner #ifdef DEBUG
51630f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
51730f712c9SDave Chinner return error;
518f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
519f9e03706SDarrick J. Wong i != 1 ||
520f9e03706SDarrick J. Wong nfbno1 != fbno ||
521f9e03706SDarrick J. Wong nflen1 != flen))
522f9e03706SDarrick J. Wong return -EFSCORRUPTED;
52330f712c9SDave Chinner #endif
52430f712c9SDave Chinner } else {
52530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
52630f712c9SDave Chinner return error;
527f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
528f9e03706SDarrick J. Wong return -EFSCORRUPTED;
52930f712c9SDave Chinner }
53030f712c9SDave Chinner
53130f712c9SDave Chinner #ifdef DEBUG
53230f712c9SDave Chinner if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
53330f712c9SDave Chinner struct xfs_btree_block *bnoblock;
53430f712c9SDave Chinner struct xfs_btree_block *cntblock;
53530f712c9SDave Chinner
5366ca444cfSDarrick J. Wong bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
5376ca444cfSDarrick J. Wong cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
53830f712c9SDave Chinner
539f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
540f9e03706SDarrick J. Wong bnoblock->bb_numrecs !=
541f9e03706SDarrick J. Wong cntblock->bb_numrecs))
542f9e03706SDarrick J. Wong return -EFSCORRUPTED;
54330f712c9SDave Chinner }
54430f712c9SDave Chinner #endif
54530f712c9SDave Chinner
54630f712c9SDave Chinner /*
54730f712c9SDave Chinner * Deal with all four cases: the allocated record is contained
54830f712c9SDave Chinner * within the freespace record, so we can have new freespace
54930f712c9SDave Chinner * at either (or both) end, or no freespace remaining.
55030f712c9SDave Chinner */
55130f712c9SDave Chinner if (rbno == fbno && rlen == flen)
55230f712c9SDave Chinner nfbno1 = nfbno2 = NULLAGBLOCK;
55330f712c9SDave Chinner else if (rbno == fbno) {
55430f712c9SDave Chinner nfbno1 = rbno + rlen;
55530f712c9SDave Chinner nflen1 = flen - rlen;
55630f712c9SDave Chinner nfbno2 = NULLAGBLOCK;
55730f712c9SDave Chinner } else if (rbno + rlen == fbno + flen) {
55830f712c9SDave Chinner nfbno1 = fbno;
55930f712c9SDave Chinner nflen1 = flen - rlen;
56030f712c9SDave Chinner nfbno2 = NULLAGBLOCK;
56130f712c9SDave Chinner } else {
56230f712c9SDave Chinner nfbno1 = fbno;
56330f712c9SDave Chinner nflen1 = rbno - fbno;
56430f712c9SDave Chinner nfbno2 = rbno + rlen;
56530f712c9SDave Chinner nflen2 = (fbno + flen) - nfbno2;
56630f712c9SDave Chinner }
56730f712c9SDave Chinner /*
56830f712c9SDave Chinner * Delete the entry from the by-size btree.
56930f712c9SDave Chinner */
57030f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
57130f712c9SDave Chinner return error;
572f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
573f9e03706SDarrick J. Wong return -EFSCORRUPTED;
57430f712c9SDave Chinner /*
57530f712c9SDave Chinner * Add new by-size btree entry(s).
57630f712c9SDave Chinner */
57730f712c9SDave Chinner if (nfbno1 != NULLAGBLOCK) {
57830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
57930f712c9SDave Chinner return error;
580f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
581f9e03706SDarrick J. Wong return -EFSCORRUPTED;
58230f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
58330f712c9SDave Chinner return error;
584f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
585f9e03706SDarrick J. Wong return -EFSCORRUPTED;
58630f712c9SDave Chinner }
58730f712c9SDave Chinner if (nfbno2 != NULLAGBLOCK) {
58830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
58930f712c9SDave Chinner return error;
590f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
591f9e03706SDarrick J. Wong return -EFSCORRUPTED;
59230f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
59330f712c9SDave Chinner return error;
594f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
595f9e03706SDarrick J. Wong return -EFSCORRUPTED;
59630f712c9SDave Chinner }
59730f712c9SDave Chinner /*
59830f712c9SDave Chinner * Fix up the by-block btree entry(s).
59930f712c9SDave Chinner */
60030f712c9SDave Chinner if (nfbno1 == NULLAGBLOCK) {
60130f712c9SDave Chinner /*
60230f712c9SDave Chinner * No remaining freespace, just delete the by-block tree entry.
60330f712c9SDave Chinner */
60430f712c9SDave Chinner if ((error = xfs_btree_delete(bno_cur, &i)))
60530f712c9SDave Chinner return error;
606f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
607f9e03706SDarrick J. Wong return -EFSCORRUPTED;
60830f712c9SDave Chinner } else {
60930f712c9SDave Chinner /*
61030f712c9SDave Chinner * Update the by-block entry to start later|be shorter.
61130f712c9SDave Chinner */
61230f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
61330f712c9SDave Chinner return error;
61430f712c9SDave Chinner }
61530f712c9SDave Chinner if (nfbno2 != NULLAGBLOCK) {
61630f712c9SDave Chinner /*
61730f712c9SDave Chinner * 2 resulting free entries, need to add one.
61830f712c9SDave Chinner */
61930f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
62030f712c9SDave Chinner return error;
621f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
622f9e03706SDarrick J. Wong return -EFSCORRUPTED;
62330f712c9SDave Chinner if ((error = xfs_btree_insert(bno_cur, &i)))
62430f712c9SDave Chinner return error;
625f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
626f9e03706SDarrick J. Wong return -EFSCORRUPTED;
62730f712c9SDave Chinner }
62830f712c9SDave Chinner return 0;
62930f712c9SDave Chinner }
63030f712c9SDave Chinner
631e0a8de7dSDave Chinner /*
632e0a8de7dSDave Chinner * We do not verify the AGFL contents against AGF-based index counters here,
633e0a8de7dSDave Chinner * even though we may have access to the perag that contains shadow copies. We
634e0a8de7dSDave Chinner * don't know if the AGF based counters have been checked, and if they have they
635e0a8de7dSDave Chinner * still may be inconsistent because they haven't yet been reset on the first
636e0a8de7dSDave Chinner * allocation after the AGF has been read in.
637e0a8de7dSDave Chinner *
638e0a8de7dSDave Chinner * This means we can only check that all agfl entries contain valid or null
639e0a8de7dSDave Chinner * values because we can't reliably determine the active range to exclude
640e0a8de7dSDave Chinner * NULLAGBNO as a valid value.
641e0a8de7dSDave Chinner *
642e0a8de7dSDave Chinner * However, we can't even do that for v4 format filesystems because there are
643e0a8de7dSDave Chinner * old versions of mkfs out there that does not initialise the AGFL to known,
644e0a8de7dSDave Chinner * verifiable values. HEnce we can't tell the difference between a AGFL block
645e0a8de7dSDave Chinner * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
646e0a8de7dSDave Chinner *
647e0a8de7dSDave Chinner * As a result, we can only fully validate AGFL block numbers when we pull them
648e0a8de7dSDave Chinner * from the freelist in xfs_alloc_get_freelist().
649e0a8de7dSDave Chinner */
650a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agfl_verify(struct xfs_buf * bp)65130f712c9SDave Chinner xfs_agfl_verify(
65230f712c9SDave Chinner struct xfs_buf *bp)
65330f712c9SDave Chinner {
654dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
65530f712c9SDave Chinner struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
656183606d8SChristoph Hellwig __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
65730f712c9SDave Chinner int i;
65830f712c9SDave Chinner
65938c26bfdSDave Chinner if (!xfs_has_crc(mp))
660b5572597SDarrick J. Wong return NULL;
661b5572597SDarrick J. Wong
66239708c20SBrian Foster if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
663a6a781a5SDarrick J. Wong return __this_address;
66439708c20SBrian Foster if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
665a6a781a5SDarrick J. Wong return __this_address;
66630f712c9SDave Chinner /*
66730f712c9SDave Chinner * during growfs operations, the perag is not fully initialised,
66830f712c9SDave Chinner * so we can't use it for any useful checking. growfs ensures we can't
66930f712c9SDave Chinner * use it by using uncached buffers that don't have the perag attached
67030f712c9SDave Chinner * so we can detect and avoid this problem.
67130f712c9SDave Chinner */
67230f712c9SDave Chinner if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
673a6a781a5SDarrick J. Wong return __this_address;
67430f712c9SDave Chinner
675a78ee256SDave Chinner for (i = 0; i < xfs_agfl_size(mp); i++) {
676183606d8SChristoph Hellwig if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
677183606d8SChristoph Hellwig be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
678a6a781a5SDarrick J. Wong return __this_address;
67930f712c9SDave Chinner }
680a45086e2SBrian Foster
681a6a781a5SDarrick J. Wong if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
682a6a781a5SDarrick J. Wong return __this_address;
683a6a781a5SDarrick J. Wong return NULL;
68430f712c9SDave Chinner }
68530f712c9SDave Chinner
68630f712c9SDave Chinner static void
xfs_agfl_read_verify(struct xfs_buf * bp)68730f712c9SDave Chinner xfs_agfl_read_verify(
68830f712c9SDave Chinner struct xfs_buf *bp)
68930f712c9SDave Chinner {
690dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
691bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
69230f712c9SDave Chinner
69330f712c9SDave Chinner /*
69430f712c9SDave Chinner * There is no verification of non-crc AGFLs because mkfs does not
69530f712c9SDave Chinner * initialise the AGFL to zero or NULL. Hence the only valid part of the
69630f712c9SDave Chinner * AGFL is what the AGF says is active. We can't get to the AGF, so we
69730f712c9SDave Chinner * can't verify just those entries are valid.
69830f712c9SDave Chinner */
69938c26bfdSDave Chinner if (!xfs_has_crc(mp))
70030f712c9SDave Chinner return;
70130f712c9SDave Chinner
70230f712c9SDave Chinner if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
703bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSBADCRC, __this_address);
704bc1a09b8SDarrick J. Wong else {
705bc1a09b8SDarrick J. Wong fa = xfs_agfl_verify(bp);
706bc1a09b8SDarrick J. Wong if (fa)
707bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
708bc1a09b8SDarrick J. Wong }
70930f712c9SDave Chinner }
71030f712c9SDave Chinner
71130f712c9SDave Chinner static void
xfs_agfl_write_verify(struct xfs_buf * bp)71230f712c9SDave Chinner xfs_agfl_write_verify(
71330f712c9SDave Chinner struct xfs_buf *bp)
71430f712c9SDave Chinner {
715dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
716fb1755a6SCarlos Maiolino struct xfs_buf_log_item *bip = bp->b_log_item;
717bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
71830f712c9SDave Chinner
71930f712c9SDave Chinner /* no verification of non-crc AGFLs */
72038c26bfdSDave Chinner if (!xfs_has_crc(mp))
72130f712c9SDave Chinner return;
72230f712c9SDave Chinner
723bc1a09b8SDarrick J. Wong fa = xfs_agfl_verify(bp);
724bc1a09b8SDarrick J. Wong if (fa) {
725bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
72630f712c9SDave Chinner return;
72730f712c9SDave Chinner }
72830f712c9SDave Chinner
72930f712c9SDave Chinner if (bip)
73030f712c9SDave Chinner XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
73130f712c9SDave Chinner
73230f712c9SDave Chinner xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
73330f712c9SDave Chinner }
73430f712c9SDave Chinner
73530f712c9SDave Chinner const struct xfs_buf_ops xfs_agfl_buf_ops = {
736233135b7SEric Sandeen .name = "xfs_agfl",
73739708c20SBrian Foster .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
73830f712c9SDave Chinner .verify_read = xfs_agfl_read_verify,
73930f712c9SDave Chinner .verify_write = xfs_agfl_write_verify,
740b5572597SDarrick J. Wong .verify_struct = xfs_agfl_verify,
74130f712c9SDave Chinner };
74230f712c9SDave Chinner
74330f712c9SDave Chinner /*
74430f712c9SDave Chinner * Read in the allocation group free block array.
74530f712c9SDave Chinner */
746cec7bb7dSDave Chinner int
xfs_alloc_read_agfl(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf ** bpp)74730f712c9SDave Chinner xfs_alloc_read_agfl(
748cec7bb7dSDave Chinner struct xfs_perag *pag,
749cec7bb7dSDave Chinner struct xfs_trans *tp,
750cec7bb7dSDave Chinner struct xfs_buf **bpp)
75130f712c9SDave Chinner {
752cec7bb7dSDave Chinner struct xfs_mount *mp = pag->pag_mount;
753cec7bb7dSDave Chinner struct xfs_buf *bp;
75430f712c9SDave Chinner int error;
75530f712c9SDave Chinner
75630f712c9SDave Chinner error = xfs_trans_read_buf(
75730f712c9SDave Chinner mp, tp, mp->m_ddev_targp,
758cec7bb7dSDave Chinner XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
75930f712c9SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
76030f712c9SDave Chinner if (error)
76130f712c9SDave Chinner return error;
76230f712c9SDave Chinner xfs_buf_set_ref(bp, XFS_AGFL_REF);
76330f712c9SDave Chinner *bpp = bp;
76430f712c9SDave Chinner return 0;
76530f712c9SDave Chinner }
76630f712c9SDave Chinner
76730f712c9SDave Chinner STATIC int
xfs_alloc_update_counters(struct xfs_trans * tp,struct xfs_buf * agbp,long len)76830f712c9SDave Chinner xfs_alloc_update_counters(
76930f712c9SDave Chinner struct xfs_trans *tp,
77030f712c9SDave Chinner struct xfs_buf *agbp,
77130f712c9SDave Chinner long len)
77230f712c9SDave Chinner {
7739798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
77430f712c9SDave Chinner
77592a00544SGao Xiang agbp->b_pag->pagf_freeblks += len;
77630f712c9SDave Chinner be32_add_cpu(&agf->agf_freeblks, len);
77730f712c9SDave Chinner
77830f712c9SDave Chinner if (unlikely(be32_to_cpu(agf->agf_freeblks) >
779a5155b87SDarrick J. Wong be32_to_cpu(agf->agf_length))) {
7808d57c216SDarrick J. Wong xfs_buf_mark_corrupt(agbp);
7812451337dSDave Chinner return -EFSCORRUPTED;
782a5155b87SDarrick J. Wong }
78330f712c9SDave Chinner
78430f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
78530f712c9SDave Chinner return 0;
78630f712c9SDave Chinner }
78730f712c9SDave Chinner
78830f712c9SDave Chinner /*
789f5e7dbeaSBrian Foster * Block allocation algorithm and data structures.
79030f712c9SDave Chinner */
791f5e7dbeaSBrian Foster struct xfs_alloc_cur {
792f5e7dbeaSBrian Foster struct xfs_btree_cur *cnt; /* btree cursors */
793f5e7dbeaSBrian Foster struct xfs_btree_cur *bnolt;
794f5e7dbeaSBrian Foster struct xfs_btree_cur *bnogt;
795dc8e69bdSBrian Foster xfs_extlen_t cur_len;/* current search length */
796c62321a2SBrian Foster xfs_agblock_t rec_bno;/* extent startblock */
797c62321a2SBrian Foster xfs_extlen_t rec_len;/* extent length */
798c62321a2SBrian Foster xfs_agblock_t bno; /* alloc bno */
799c62321a2SBrian Foster xfs_extlen_t len; /* alloc len */
800c62321a2SBrian Foster xfs_extlen_t diff; /* diff from search bno */
801d6d3aff2SBrian Foster unsigned int busy_gen;/* busy state */
802d6d3aff2SBrian Foster bool busy;
803f5e7dbeaSBrian Foster };
804f5e7dbeaSBrian Foster
805f5e7dbeaSBrian Foster /*
806f5e7dbeaSBrian Foster * Set up cursors, etc. in the extent allocation cursor. This function can be
807f5e7dbeaSBrian Foster * called multiple times to reset an initialized structure without having to
808f5e7dbeaSBrian Foster * reallocate cursors.
809f5e7dbeaSBrian Foster */
810f5e7dbeaSBrian Foster static int
xfs_alloc_cur_setup(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)811f5e7dbeaSBrian Foster xfs_alloc_cur_setup(
812f5e7dbeaSBrian Foster struct xfs_alloc_arg *args,
813f5e7dbeaSBrian Foster struct xfs_alloc_cur *acur)
814f5e7dbeaSBrian Foster {
815f5e7dbeaSBrian Foster int error;
816f5e7dbeaSBrian Foster int i;
817f5e7dbeaSBrian Foster
818dc8e69bdSBrian Foster acur->cur_len = args->maxlen;
819c62321a2SBrian Foster acur->rec_bno = 0;
820c62321a2SBrian Foster acur->rec_len = 0;
821c62321a2SBrian Foster acur->bno = 0;
822c62321a2SBrian Foster acur->len = 0;
823396bbf3cSBrian Foster acur->diff = -1;
824d6d3aff2SBrian Foster acur->busy = false;
825d6d3aff2SBrian Foster acur->busy_gen = 0;
826d6d3aff2SBrian Foster
827f5e7dbeaSBrian Foster /*
828f5e7dbeaSBrian Foster * Perform an initial cntbt lookup to check for availability of maxlen
829f5e7dbeaSBrian Foster * extents. If this fails, we'll return -ENOSPC to signal the caller to
830f5e7dbeaSBrian Foster * attempt a small allocation.
831f5e7dbeaSBrian Foster */
832f5e7dbeaSBrian Foster if (!acur->cnt)
833f5e7dbeaSBrian Foster acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
834289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_CNT);
835f5e7dbeaSBrian Foster error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
836f5e7dbeaSBrian Foster if (error)
837f5e7dbeaSBrian Foster return error;
838f5e7dbeaSBrian Foster
839f5e7dbeaSBrian Foster /*
840f5e7dbeaSBrian Foster * Allocate the bnobt left and right search cursors.
841f5e7dbeaSBrian Foster */
842f5e7dbeaSBrian Foster if (!acur->bnolt)
843f5e7dbeaSBrian Foster acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
844289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_BNO);
845f5e7dbeaSBrian Foster if (!acur->bnogt)
846f5e7dbeaSBrian Foster acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
847289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_BNO);
848f5e7dbeaSBrian Foster return i == 1 ? 0 : -ENOSPC;
849f5e7dbeaSBrian Foster }
850f5e7dbeaSBrian Foster
851f5e7dbeaSBrian Foster static void
xfs_alloc_cur_close(struct xfs_alloc_cur * acur,bool error)852f5e7dbeaSBrian Foster xfs_alloc_cur_close(
853f5e7dbeaSBrian Foster struct xfs_alloc_cur *acur,
854f5e7dbeaSBrian Foster bool error)
855f5e7dbeaSBrian Foster {
856f5e7dbeaSBrian Foster int cur_error = XFS_BTREE_NOERROR;
857f5e7dbeaSBrian Foster
858f5e7dbeaSBrian Foster if (error)
859f5e7dbeaSBrian Foster cur_error = XFS_BTREE_ERROR;
860f5e7dbeaSBrian Foster
861f5e7dbeaSBrian Foster if (acur->cnt)
862f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->cnt, cur_error);
863f5e7dbeaSBrian Foster if (acur->bnolt)
864f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->bnolt, cur_error);
865f5e7dbeaSBrian Foster if (acur->bnogt)
866f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->bnogt, cur_error);
867f5e7dbeaSBrian Foster acur->cnt = acur->bnolt = acur->bnogt = NULL;
868f5e7dbeaSBrian Foster }
86930f712c9SDave Chinner
87030f712c9SDave Chinner /*
871396bbf3cSBrian Foster * Check an extent for allocation and track the best available candidate in the
872396bbf3cSBrian Foster * allocation structure. The cursor is deactivated if it has entered an out of
873396bbf3cSBrian Foster * range state based on allocation arguments. Optionally return the extent
874396bbf3cSBrian Foster * extent geometry and allocation status if requested by the caller.
875396bbf3cSBrian Foster */
876396bbf3cSBrian Foster static int
xfs_alloc_cur_check(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,int * new)877396bbf3cSBrian Foster xfs_alloc_cur_check(
878396bbf3cSBrian Foster struct xfs_alloc_arg *args,
879396bbf3cSBrian Foster struct xfs_alloc_cur *acur,
880396bbf3cSBrian Foster struct xfs_btree_cur *cur,
881396bbf3cSBrian Foster int *new)
882396bbf3cSBrian Foster {
883396bbf3cSBrian Foster int error, i;
884396bbf3cSBrian Foster xfs_agblock_t bno, bnoa, bnew;
885396bbf3cSBrian Foster xfs_extlen_t len, lena, diff = -1;
886396bbf3cSBrian Foster bool busy;
887396bbf3cSBrian Foster unsigned busy_gen = 0;
888396bbf3cSBrian Foster bool deactivate = false;
889fec0afdaSBrian Foster bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
890396bbf3cSBrian Foster
891396bbf3cSBrian Foster *new = 0;
892396bbf3cSBrian Foster
893396bbf3cSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
894396bbf3cSBrian Foster if (error)
895396bbf3cSBrian Foster return error;
896f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1))
897f9e03706SDarrick J. Wong return -EFSCORRUPTED;
898396bbf3cSBrian Foster
899396bbf3cSBrian Foster /*
900396bbf3cSBrian Foster * Check minlen and deactivate a cntbt cursor if out of acceptable size
901396bbf3cSBrian Foster * range (i.e., walking backwards looking for a minlen extent).
902396bbf3cSBrian Foster */
903396bbf3cSBrian Foster if (len < args->minlen) {
904fec0afdaSBrian Foster deactivate = !isbnobt;
905396bbf3cSBrian Foster goto out;
906396bbf3cSBrian Foster }
907396bbf3cSBrian Foster
908396bbf3cSBrian Foster busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
909396bbf3cSBrian Foster &busy_gen);
910396bbf3cSBrian Foster acur->busy |= busy;
911396bbf3cSBrian Foster if (busy)
912396bbf3cSBrian Foster acur->busy_gen = busy_gen;
913396bbf3cSBrian Foster /* deactivate a bnobt cursor outside of locality range */
914fec0afdaSBrian Foster if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
915fec0afdaSBrian Foster deactivate = isbnobt;
916396bbf3cSBrian Foster goto out;
917fec0afdaSBrian Foster }
918396bbf3cSBrian Foster if (lena < args->minlen)
919396bbf3cSBrian Foster goto out;
920396bbf3cSBrian Foster
921396bbf3cSBrian Foster args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
922396bbf3cSBrian Foster xfs_alloc_fix_len(args);
923396bbf3cSBrian Foster ASSERT(args->len >= args->minlen);
924396bbf3cSBrian Foster if (args->len < acur->len)
925396bbf3cSBrian Foster goto out;
926396bbf3cSBrian Foster
927396bbf3cSBrian Foster /*
928396bbf3cSBrian Foster * We have an aligned record that satisfies minlen and beats or matches
929396bbf3cSBrian Foster * the candidate extent size. Compare locality for near allocation mode.
930396bbf3cSBrian Foster */
931396bbf3cSBrian Foster diff = xfs_alloc_compute_diff(args->agbno, args->len,
932396bbf3cSBrian Foster args->alignment, args->datatype,
933396bbf3cSBrian Foster bnoa, lena, &bnew);
934396bbf3cSBrian Foster if (bnew == NULLAGBLOCK)
935396bbf3cSBrian Foster goto out;
936fec0afdaSBrian Foster
937fec0afdaSBrian Foster /*
938fec0afdaSBrian Foster * Deactivate a bnobt cursor with worse locality than the current best.
939fec0afdaSBrian Foster */
940fec0afdaSBrian Foster if (diff > acur->diff) {
941fec0afdaSBrian Foster deactivate = isbnobt;
942396bbf3cSBrian Foster goto out;
943fec0afdaSBrian Foster }
944396bbf3cSBrian Foster
945396bbf3cSBrian Foster ASSERT(args->len > acur->len ||
946396bbf3cSBrian Foster (args->len == acur->len && diff <= acur->diff));
947396bbf3cSBrian Foster acur->rec_bno = bno;
948396bbf3cSBrian Foster acur->rec_len = len;
949396bbf3cSBrian Foster acur->bno = bnew;
950396bbf3cSBrian Foster acur->len = args->len;
951396bbf3cSBrian Foster acur->diff = diff;
952396bbf3cSBrian Foster *new = 1;
953396bbf3cSBrian Foster
95478d7aabdSBrian Foster /*
95578d7aabdSBrian Foster * We're done if we found a perfect allocation. This only deactivates
95678d7aabdSBrian Foster * the current cursor, but this is just an optimization to terminate a
95778d7aabdSBrian Foster * cntbt search that otherwise runs to the edge of the tree.
95878d7aabdSBrian Foster */
95978d7aabdSBrian Foster if (acur->diff == 0 && acur->len == args->maxlen)
96078d7aabdSBrian Foster deactivate = true;
961396bbf3cSBrian Foster out:
962396bbf3cSBrian Foster if (deactivate)
963c4aa10d0SDave Chinner cur->bc_ag.abt.active = false;
964396bbf3cSBrian Foster trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
965396bbf3cSBrian Foster *new);
966396bbf3cSBrian Foster return 0;
967396bbf3cSBrian Foster }
968396bbf3cSBrian Foster
969396bbf3cSBrian Foster /*
970d2968825SBrian Foster * Complete an allocation of a candidate extent. Remove the extent from both
971d2968825SBrian Foster * trees and update the args structure.
972d2968825SBrian Foster */
973d2968825SBrian Foster STATIC int
xfs_alloc_cur_finish(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)974d2968825SBrian Foster xfs_alloc_cur_finish(
975d2968825SBrian Foster struct xfs_alloc_arg *args,
976d2968825SBrian Foster struct xfs_alloc_cur *acur)
977d2968825SBrian Foster {
9789798f615SChristoph Hellwig struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
979d2968825SBrian Foster int error;
980d2968825SBrian Foster
981d2968825SBrian Foster ASSERT(acur->cnt && acur->bnolt);
982d2968825SBrian Foster ASSERT(acur->bno >= acur->rec_bno);
983d2968825SBrian Foster ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
9849798f615SChristoph Hellwig ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
985d2968825SBrian Foster
986d2968825SBrian Foster error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
987d2968825SBrian Foster acur->rec_len, acur->bno, acur->len, 0);
988d2968825SBrian Foster if (error)
989d2968825SBrian Foster return error;
990d2968825SBrian Foster
991d2968825SBrian Foster args->agbno = acur->bno;
992d2968825SBrian Foster args->len = acur->len;
993d2968825SBrian Foster args->wasfromfl = 0;
994d2968825SBrian Foster
995d2968825SBrian Foster trace_xfs_alloc_cur(args);
996d2968825SBrian Foster return 0;
997d2968825SBrian Foster }
998d2968825SBrian Foster
999d2968825SBrian Foster /*
1000dc8e69bdSBrian Foster * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1001dc8e69bdSBrian Foster * bno optimized lookup to search for extents with ideal size and locality.
1002dc8e69bdSBrian Foster */
1003dc8e69bdSBrian Foster STATIC int
xfs_alloc_cntbt_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)1004dc8e69bdSBrian Foster xfs_alloc_cntbt_iter(
1005dc8e69bdSBrian Foster struct xfs_alloc_arg *args,
1006dc8e69bdSBrian Foster struct xfs_alloc_cur *acur)
1007dc8e69bdSBrian Foster {
1008dc8e69bdSBrian Foster struct xfs_btree_cur *cur = acur->cnt;
1009dc8e69bdSBrian Foster xfs_agblock_t bno;
1010dc8e69bdSBrian Foster xfs_extlen_t len, cur_len;
1011dc8e69bdSBrian Foster int error;
1012dc8e69bdSBrian Foster int i;
1013dc8e69bdSBrian Foster
1014dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(cur))
1015dc8e69bdSBrian Foster return 0;
1016dc8e69bdSBrian Foster
1017dc8e69bdSBrian Foster /* locality optimized lookup */
1018dc8e69bdSBrian Foster cur_len = acur->cur_len;
1019dc8e69bdSBrian Foster error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1020dc8e69bdSBrian Foster if (error)
1021dc8e69bdSBrian Foster return error;
1022dc8e69bdSBrian Foster if (i == 0)
1023dc8e69bdSBrian Foster return 0;
1024dc8e69bdSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1025dc8e69bdSBrian Foster if (error)
1026dc8e69bdSBrian Foster return error;
1027dc8e69bdSBrian Foster
1028dc8e69bdSBrian Foster /* check the current record and update search length from it */
1029dc8e69bdSBrian Foster error = xfs_alloc_cur_check(args, acur, cur, &i);
1030dc8e69bdSBrian Foster if (error)
1031dc8e69bdSBrian Foster return error;
1032dc8e69bdSBrian Foster ASSERT(len >= acur->cur_len);
1033dc8e69bdSBrian Foster acur->cur_len = len;
1034dc8e69bdSBrian Foster
1035dc8e69bdSBrian Foster /*
1036dc8e69bdSBrian Foster * We looked up the first record >= [agbno, len] above. The agbno is a
1037dc8e69bdSBrian Foster * secondary key and so the current record may lie just before or after
1038dc8e69bdSBrian Foster * agbno. If it is past agbno, check the previous record too so long as
1039dc8e69bdSBrian Foster * the length matches as it may be closer. Don't check a smaller record
1040dc8e69bdSBrian Foster * because that could deactivate our cursor.
1041dc8e69bdSBrian Foster */
1042dc8e69bdSBrian Foster if (bno > args->agbno) {
1043dc8e69bdSBrian Foster error = xfs_btree_decrement(cur, 0, &i);
1044dc8e69bdSBrian Foster if (!error && i) {
1045dc8e69bdSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1046dc8e69bdSBrian Foster if (!error && i && len == acur->cur_len)
1047dc8e69bdSBrian Foster error = xfs_alloc_cur_check(args, acur, cur,
1048dc8e69bdSBrian Foster &i);
1049dc8e69bdSBrian Foster }
1050dc8e69bdSBrian Foster if (error)
1051dc8e69bdSBrian Foster return error;
1052dc8e69bdSBrian Foster }
1053dc8e69bdSBrian Foster
1054dc8e69bdSBrian Foster /*
1055dc8e69bdSBrian Foster * Increment the search key until we find at least one allocation
1056dc8e69bdSBrian Foster * candidate or if the extent we found was larger. Otherwise, double the
1057dc8e69bdSBrian Foster * search key to optimize the search. Efficiency is more important here
1058dc8e69bdSBrian Foster * than absolute best locality.
1059dc8e69bdSBrian Foster */
1060dc8e69bdSBrian Foster cur_len <<= 1;
1061dc8e69bdSBrian Foster if (!acur->len || acur->cur_len >= cur_len)
1062dc8e69bdSBrian Foster acur->cur_len++;
1063dc8e69bdSBrian Foster else
1064dc8e69bdSBrian Foster acur->cur_len = cur_len;
1065dc8e69bdSBrian Foster
1066dc8e69bdSBrian Foster return error;
1067dc8e69bdSBrian Foster }
1068dc8e69bdSBrian Foster
1069dc8e69bdSBrian Foster /*
1070c63cdd4fSBrian Foster * Deal with the case where only small freespaces remain. Either return the
1071c63cdd4fSBrian Foster * contents of the last freespace record, or allocate space from the freelist if
1072c63cdd4fSBrian Foster * there is nothing in the tree.
1073c63cdd4fSBrian Foster */
1074c63cdd4fSBrian Foster STATIC int /* error */
xfs_alloc_ag_vextent_small(struct xfs_alloc_arg * args,struct xfs_btree_cur * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1075c63cdd4fSBrian Foster xfs_alloc_ag_vextent_small(
1076c63cdd4fSBrian Foster struct xfs_alloc_arg *args, /* allocation argument structure */
1077c63cdd4fSBrian Foster struct xfs_btree_cur *ccur, /* optional by-size cursor */
1078c63cdd4fSBrian Foster xfs_agblock_t *fbnop, /* result block number */
1079c63cdd4fSBrian Foster xfs_extlen_t *flenp, /* result length */
1080c63cdd4fSBrian Foster int *stat) /* status: 0-freelist, 1-normal/none */
1081c63cdd4fSBrian Foster {
10829798f615SChristoph Hellwig struct xfs_agf *agf = args->agbp->b_addr;
1083c63cdd4fSBrian Foster int error = 0;
1084c63cdd4fSBrian Foster xfs_agblock_t fbno = NULLAGBLOCK;
1085c63cdd4fSBrian Foster xfs_extlen_t flen = 0;
10866691cd92SBrian Foster int i = 0;
1087c63cdd4fSBrian Foster
10886691cd92SBrian Foster /*
10896691cd92SBrian Foster * If a cntbt cursor is provided, try to allocate the largest record in
10906691cd92SBrian Foster * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
10916691cd92SBrian Foster * allocation. Make sure to respect minleft even when pulling from the
10926691cd92SBrian Foster * freelist.
10936691cd92SBrian Foster */
10946691cd92SBrian Foster if (ccur)
1095c63cdd4fSBrian Foster error = xfs_btree_decrement(ccur, 0, &i);
1096c63cdd4fSBrian Foster if (error)
1097c63cdd4fSBrian Foster goto error;
1098c63cdd4fSBrian Foster if (i) {
1099c63cdd4fSBrian Foster error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1100c63cdd4fSBrian Foster if (error)
1101c63cdd4fSBrian Foster goto error;
1102f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1103f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1104f9e03706SDarrick J. Wong goto error;
1105f9e03706SDarrick J. Wong }
1106c63cdd4fSBrian Foster goto out;
1107c63cdd4fSBrian Foster }
1108c63cdd4fSBrian Foster
1109c63cdd4fSBrian Foster if (args->minlen != 1 || args->alignment != 1 ||
1110c63cdd4fSBrian Foster args->resv == XFS_AG_RESV_AGFL ||
11119798f615SChristoph Hellwig be32_to_cpu(agf->agf_flcount) <= args->minleft)
1112c63cdd4fSBrian Foster goto out;
1113c63cdd4fSBrian Foster
111449f0d84eSDave Chinner error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
111549f0d84eSDave Chinner &fbno, 0);
1116c63cdd4fSBrian Foster if (error)
1117c63cdd4fSBrian Foster goto error;
1118c63cdd4fSBrian Foster if (fbno == NULLAGBLOCK)
1119c63cdd4fSBrian Foster goto out;
1120c63cdd4fSBrian Foster
112145d06621SDave Chinner xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1122c34d570dSChristoph Hellwig (args->datatype & XFS_ALLOC_NOBUSY));
1123c63cdd4fSBrian Foster
1124c34d570dSChristoph Hellwig if (args->datatype & XFS_ALLOC_USERDATA) {
1125c63cdd4fSBrian Foster struct xfs_buf *bp;
1126c63cdd4fSBrian Foster
1127ee647f85SDarrick J. Wong error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1128ee647f85SDarrick J. Wong XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1129ee647f85SDarrick J. Wong args->mp->m_bsize, 0, &bp);
1130ee647f85SDarrick J. Wong if (error)
1131c63cdd4fSBrian Foster goto error;
1132c63cdd4fSBrian Foster xfs_trans_binval(args->tp, bp);
1133c63cdd4fSBrian Foster }
11347e36a3a6SBrian Foster *fbnop = args->agbno = fbno;
11357e36a3a6SBrian Foster *flenp = args->len = 1;
11369798f615SChristoph Hellwig if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1137f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1138f9e03706SDarrick J. Wong goto error;
1139f9e03706SDarrick J. Wong }
1140c63cdd4fSBrian Foster args->wasfromfl = 1;
1141c63cdd4fSBrian Foster trace_xfs_alloc_small_freelist(args);
1142c63cdd4fSBrian Foster
1143c63cdd4fSBrian Foster /*
1144c63cdd4fSBrian Foster * If we're feeding an AGFL block to something that doesn't live in the
1145c63cdd4fSBrian Foster * free space, we need to clear out the OWN_AG rmap.
1146c63cdd4fSBrian Foster */
1147fa9c3c19SDave Chinner error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1148c63cdd4fSBrian Foster &XFS_RMAP_OINFO_AG);
1149c63cdd4fSBrian Foster if (error)
1150c63cdd4fSBrian Foster goto error;
1151c63cdd4fSBrian Foster
1152c63cdd4fSBrian Foster *stat = 0;
1153c63cdd4fSBrian Foster return 0;
1154c63cdd4fSBrian Foster
1155c63cdd4fSBrian Foster out:
1156c63cdd4fSBrian Foster /*
1157c63cdd4fSBrian Foster * Can't do the allocation, give up.
1158c63cdd4fSBrian Foster */
1159c63cdd4fSBrian Foster if (flen < args->minlen) {
1160c63cdd4fSBrian Foster args->agbno = NULLAGBLOCK;
1161c63cdd4fSBrian Foster trace_xfs_alloc_small_notenough(args);
1162c63cdd4fSBrian Foster flen = 0;
1163c63cdd4fSBrian Foster }
1164c63cdd4fSBrian Foster *fbnop = fbno;
1165c63cdd4fSBrian Foster *flenp = flen;
1166c63cdd4fSBrian Foster *stat = 1;
1167c63cdd4fSBrian Foster trace_xfs_alloc_small_done(args);
1168c63cdd4fSBrian Foster return 0;
1169c63cdd4fSBrian Foster
1170c63cdd4fSBrian Foster error:
1171c63cdd4fSBrian Foster trace_xfs_alloc_small_error(args);
1172c63cdd4fSBrian Foster return error;
1173c63cdd4fSBrian Foster }
1174c63cdd4fSBrian Foster
1175c63cdd4fSBrian Foster /*
117630f712c9SDave Chinner * Allocate a variable extent at exactly agno/bno.
117730f712c9SDave Chinner * Extent's length (returned in *len) will be between minlen and maxlen,
117830f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
117930f712c9SDave Chinner * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
118030f712c9SDave Chinner */
118130f712c9SDave Chinner STATIC int /* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)118230f712c9SDave Chinner xfs_alloc_ag_vextent_exact(
118330f712c9SDave Chinner xfs_alloc_arg_t *args) /* allocation argument structure */
118430f712c9SDave Chinner {
11859798f615SChristoph Hellwig struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1186ae127f08SDarrick J. Wong struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1187ae127f08SDarrick J. Wong struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
118830f712c9SDave Chinner int error;
118930f712c9SDave Chinner xfs_agblock_t fbno; /* start block of found extent */
119030f712c9SDave Chinner xfs_extlen_t flen; /* length of found extent */
1191ebf55872SChristoph Hellwig xfs_agblock_t tbno; /* start block of busy extent */
1192ebf55872SChristoph Hellwig xfs_extlen_t tlen; /* length of busy extent */
1193ebf55872SChristoph Hellwig xfs_agblock_t tend; /* end block of busy extent */
119430f712c9SDave Chinner int i; /* success/failure of operation */
1195ebf55872SChristoph Hellwig unsigned busy_gen;
119630f712c9SDave Chinner
119730f712c9SDave Chinner ASSERT(args->alignment == 1);
119830f712c9SDave Chinner
119930f712c9SDave Chinner /*
120030f712c9SDave Chinner * Allocate/initialize a cursor for the by-number freespace btree.
120130f712c9SDave Chinner */
120230f712c9SDave Chinner bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1203289d38d2SDave Chinner args->pag, XFS_BTNUM_BNO);
120430f712c9SDave Chinner
120530f712c9SDave Chinner /*
120630f712c9SDave Chinner * Lookup bno and minlen in the btree (minlen is irrelevant, really).
120730f712c9SDave Chinner * Look for the closest free block <= bno, it must contain bno
120830f712c9SDave Chinner * if any free block does.
120930f712c9SDave Chinner */
121030f712c9SDave Chinner error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
121130f712c9SDave Chinner if (error)
121230f712c9SDave Chinner goto error0;
121330f712c9SDave Chinner if (!i)
121430f712c9SDave Chinner goto not_found;
121530f712c9SDave Chinner
121630f712c9SDave Chinner /*
121730f712c9SDave Chinner * Grab the freespace record.
121830f712c9SDave Chinner */
121930f712c9SDave Chinner error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
122030f712c9SDave Chinner if (error)
122130f712c9SDave Chinner goto error0;
1222f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1223f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1224f9e03706SDarrick J. Wong goto error0;
1225f9e03706SDarrick J. Wong }
122630f712c9SDave Chinner ASSERT(fbno <= args->agbno);
122730f712c9SDave Chinner
122830f712c9SDave Chinner /*
122930f712c9SDave Chinner * Check for overlapping busy extents.
123030f712c9SDave Chinner */
1231ebf55872SChristoph Hellwig tbno = fbno;
1232ebf55872SChristoph Hellwig tlen = flen;
1233ebf55872SChristoph Hellwig xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
123430f712c9SDave Chinner
123530f712c9SDave Chinner /*
123630f712c9SDave Chinner * Give up if the start of the extent is busy, or the freespace isn't
123730f712c9SDave Chinner * long enough for the minimum request.
123830f712c9SDave Chinner */
123930f712c9SDave Chinner if (tbno > args->agbno)
124030f712c9SDave Chinner goto not_found;
124130f712c9SDave Chinner if (tlen < args->minlen)
124230f712c9SDave Chinner goto not_found;
124330f712c9SDave Chinner tend = tbno + tlen;
124430f712c9SDave Chinner if (tend < args->agbno + args->minlen)
124530f712c9SDave Chinner goto not_found;
124630f712c9SDave Chinner
124730f712c9SDave Chinner /*
124830f712c9SDave Chinner * End of extent will be smaller of the freespace end and the
124930f712c9SDave Chinner * maximal requested end.
125030f712c9SDave Chinner *
125130f712c9SDave Chinner * Fix the length according to mod and prod if given.
125230f712c9SDave Chinner */
125330f712c9SDave Chinner args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
125430f712c9SDave Chinner - args->agbno;
125530f712c9SDave Chinner xfs_alloc_fix_len(args);
125630f712c9SDave Chinner ASSERT(args->agbno + args->len <= tend);
125730f712c9SDave Chinner
125830f712c9SDave Chinner /*
125930f712c9SDave Chinner * We are allocating agbno for args->len
126030f712c9SDave Chinner * Allocate/initialize a cursor for the by-size btree.
126130f712c9SDave Chinner */
126230f712c9SDave Chinner cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1263289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
12649798f615SChristoph Hellwig ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
126530f712c9SDave Chinner error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
126630f712c9SDave Chinner args->len, XFSA_FIXUP_BNO_OK);
126730f712c9SDave Chinner if (error) {
126830f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
126930f712c9SDave Chinner goto error0;
127030f712c9SDave Chinner }
127130f712c9SDave Chinner
127230f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
127330f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
127430f712c9SDave Chinner
127530f712c9SDave Chinner args->wasfromfl = 0;
127630f712c9SDave Chinner trace_xfs_alloc_exact_done(args);
127730f712c9SDave Chinner return 0;
127830f712c9SDave Chinner
127930f712c9SDave Chinner not_found:
128030f712c9SDave Chinner /* Didn't find it, return null. */
128130f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
128230f712c9SDave Chinner args->agbno = NULLAGBLOCK;
128330f712c9SDave Chinner trace_xfs_alloc_exact_notfound(args);
128430f712c9SDave Chinner return 0;
128530f712c9SDave Chinner
128630f712c9SDave Chinner error0:
128730f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
128830f712c9SDave Chinner trace_xfs_alloc_exact_error(args);
128930f712c9SDave Chinner return error;
129030f712c9SDave Chinner }
129130f712c9SDave Chinner
129230f712c9SDave Chinner /*
129378d7aabdSBrian Foster * Search a given number of btree records in a given direction. Check each
129478d7aabdSBrian Foster * record against the good extent we've already found.
129530f712c9SDave Chinner */
129630f712c9SDave Chinner STATIC int
xfs_alloc_walk_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,bool increment,bool find_one,int count,int * stat)129778d7aabdSBrian Foster xfs_alloc_walk_iter(
1298fec0afdaSBrian Foster struct xfs_alloc_arg *args,
1299fec0afdaSBrian Foster struct xfs_alloc_cur *acur,
1300fec0afdaSBrian Foster struct xfs_btree_cur *cur,
130178d7aabdSBrian Foster bool increment,
130278d7aabdSBrian Foster bool find_one, /* quit on first candidate */
130378d7aabdSBrian Foster int count, /* rec count (-1 for infinite) */
130478d7aabdSBrian Foster int *stat)
130530f712c9SDave Chinner {
130630f712c9SDave Chinner int error;
130730f712c9SDave Chinner int i;
130830f712c9SDave Chinner
130978d7aabdSBrian Foster *stat = 0;
131078d7aabdSBrian Foster
131130f712c9SDave Chinner /*
1312fec0afdaSBrian Foster * Search so long as the cursor is active or we find a better extent.
1313fec0afdaSBrian Foster * The cursor is deactivated if it extends beyond the range of the
1314fec0afdaSBrian Foster * current allocation candidate.
131530f712c9SDave Chinner */
131678d7aabdSBrian Foster while (xfs_alloc_cur_active(cur) && count) {
1317fec0afdaSBrian Foster error = xfs_alloc_cur_check(args, acur, cur, &i);
131830f712c9SDave Chinner if (error)
131930f712c9SDave Chinner return error;
132078d7aabdSBrian Foster if (i == 1) {
132178d7aabdSBrian Foster *stat = 1;
132278d7aabdSBrian Foster if (find_one)
1323fec0afdaSBrian Foster break;
132478d7aabdSBrian Foster }
1325fec0afdaSBrian Foster if (!xfs_alloc_cur_active(cur))
1326fec0afdaSBrian Foster break;
1327fec0afdaSBrian Foster
1328fec0afdaSBrian Foster if (increment)
1329fec0afdaSBrian Foster error = xfs_btree_increment(cur, 0, &i);
1330fec0afdaSBrian Foster else
1331fec0afdaSBrian Foster error = xfs_btree_decrement(cur, 0, &i);
1332fec0afdaSBrian Foster if (error)
1333fec0afdaSBrian Foster return error;
1334fec0afdaSBrian Foster if (i == 0)
1335c4aa10d0SDave Chinner cur->bc_ag.abt.active = false;
133678d7aabdSBrian Foster
133778d7aabdSBrian Foster if (count > 0)
133878d7aabdSBrian Foster count--;
1339fec0afdaSBrian Foster }
1340fec0afdaSBrian Foster
1341fec0afdaSBrian Foster return 0;
134230f712c9SDave Chinner }
134330f712c9SDave Chinner
134430f712c9SDave Chinner /*
1345dc8e69bdSBrian Foster * Search the by-bno and by-size btrees in parallel in search of an extent with
1346dc8e69bdSBrian Foster * ideal locality based on the NEAR mode ->agbno locality hint.
13470e26d5caSBrian Foster */
13480e26d5caSBrian Foster STATIC int
xfs_alloc_ag_vextent_locality(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,int * stat)1349dc8e69bdSBrian Foster xfs_alloc_ag_vextent_locality(
13500e26d5caSBrian Foster struct xfs_alloc_arg *args,
13510e26d5caSBrian Foster struct xfs_alloc_cur *acur,
13520e26d5caSBrian Foster int *stat)
13530e26d5caSBrian Foster {
13540e26d5caSBrian Foster struct xfs_btree_cur *fbcur = NULL;
13550e26d5caSBrian Foster int error;
13560e26d5caSBrian Foster int i;
13570e26d5caSBrian Foster bool fbinc;
13580e26d5caSBrian Foster
13590e26d5caSBrian Foster ASSERT(acur->len == 0);
13600e26d5caSBrian Foster
13610e26d5caSBrian Foster *stat = 0;
13620e26d5caSBrian Foster
1363dc8e69bdSBrian Foster error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1364dc8e69bdSBrian Foster if (error)
1365dc8e69bdSBrian Foster return error;
13660e26d5caSBrian Foster error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
13670e26d5caSBrian Foster if (error)
13680e26d5caSBrian Foster return error;
13690e26d5caSBrian Foster error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
13700e26d5caSBrian Foster if (error)
13710e26d5caSBrian Foster return error;
13720e26d5caSBrian Foster
13730e26d5caSBrian Foster /*
1374dc8e69bdSBrian Foster * Search the bnobt and cntbt in parallel. Search the bnobt left and
1375dc8e69bdSBrian Foster * right and lookup the closest extent to the locality hint for each
1376dc8e69bdSBrian Foster * extent size key in the cntbt. The entire search terminates
1377dc8e69bdSBrian Foster * immediately on a bnobt hit because that means we've found best case
1378dc8e69bdSBrian Foster * locality. Otherwise the search continues until the cntbt cursor runs
1379dc8e69bdSBrian Foster * off the end of the tree. If no allocation candidate is found at this
1380dc8e69bdSBrian Foster * point, give up on locality, walk backwards from the end of the cntbt
1381dc8e69bdSBrian Foster * and take the first available extent.
1382dc8e69bdSBrian Foster *
1383dc8e69bdSBrian Foster * The parallel tree searches balance each other out to provide fairly
1384dc8e69bdSBrian Foster * consistent performance for various situations. The bnobt search can
1385dc8e69bdSBrian Foster * have pathological behavior in the worst case scenario of larger
1386dc8e69bdSBrian Foster * allocation requests and fragmented free space. On the other hand, the
1387dc8e69bdSBrian Foster * bnobt is able to satisfy most smaller allocation requests much more
1388dc8e69bdSBrian Foster * quickly than the cntbt. The cntbt search can sift through fragmented
1389dc8e69bdSBrian Foster * free space and sets of free extents for larger allocation requests
1390dc8e69bdSBrian Foster * more quickly than the bnobt. Since the locality hint is just a hint
1391dc8e69bdSBrian Foster * and we don't want to scan the entire bnobt for perfect locality, the
1392dc8e69bdSBrian Foster * cntbt search essentially bounds the bnobt search such that we can
1393dc8e69bdSBrian Foster * find good enough locality at reasonable performance in most cases.
13940e26d5caSBrian Foster */
13950e26d5caSBrian Foster while (xfs_alloc_cur_active(acur->bnolt) ||
1396dc8e69bdSBrian Foster xfs_alloc_cur_active(acur->bnogt) ||
1397dc8e69bdSBrian Foster xfs_alloc_cur_active(acur->cnt)) {
1398dc8e69bdSBrian Foster
1399dc8e69bdSBrian Foster trace_xfs_alloc_cur_lookup(args);
1400dc8e69bdSBrian Foster
1401dc8e69bdSBrian Foster /*
1402dc8e69bdSBrian Foster * Search the bnobt left and right. In the case of a hit, finish
1403dc8e69bdSBrian Foster * the search in the opposite direction and we're done.
1404dc8e69bdSBrian Foster */
14050e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
14060e26d5caSBrian Foster true, 1, &i);
14070e26d5caSBrian Foster if (error)
14080e26d5caSBrian Foster return error;
14090e26d5caSBrian Foster if (i == 1) {
14100e26d5caSBrian Foster trace_xfs_alloc_cur_left(args);
14110e26d5caSBrian Foster fbcur = acur->bnogt;
14120e26d5caSBrian Foster fbinc = true;
14130e26d5caSBrian Foster break;
14140e26d5caSBrian Foster }
14150e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
14160e26d5caSBrian Foster 1, &i);
14170e26d5caSBrian Foster if (error)
14180e26d5caSBrian Foster return error;
14190e26d5caSBrian Foster if (i == 1) {
14200e26d5caSBrian Foster trace_xfs_alloc_cur_right(args);
14210e26d5caSBrian Foster fbcur = acur->bnolt;
14220e26d5caSBrian Foster fbinc = false;
14230e26d5caSBrian Foster break;
14240e26d5caSBrian Foster }
1425dc8e69bdSBrian Foster
1426dc8e69bdSBrian Foster /*
1427dc8e69bdSBrian Foster * Check the extent with best locality based on the current
1428dc8e69bdSBrian Foster * extent size search key and keep track of the best candidate.
1429dc8e69bdSBrian Foster */
1430dc8e69bdSBrian Foster error = xfs_alloc_cntbt_iter(args, acur);
1431dc8e69bdSBrian Foster if (error)
1432dc8e69bdSBrian Foster return error;
1433dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(acur->cnt)) {
1434dc8e69bdSBrian Foster trace_xfs_alloc_cur_lookup_done(args);
1435dc8e69bdSBrian Foster break;
1436dc8e69bdSBrian Foster }
14370e26d5caSBrian Foster }
14380e26d5caSBrian Foster
1439dc8e69bdSBrian Foster /*
1440dc8e69bdSBrian Foster * If we failed to find anything due to busy extents, return empty
1441dc8e69bdSBrian Foster * handed so the caller can flush and retry. If no busy extents were
1442dc8e69bdSBrian Foster * found, walk backwards from the end of the cntbt as a last resort.
1443dc8e69bdSBrian Foster */
1444dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1445dc8e69bdSBrian Foster error = xfs_btree_decrement(acur->cnt, 0, &i);
1446dc8e69bdSBrian Foster if (error)
1447dc8e69bdSBrian Foster return error;
1448dc8e69bdSBrian Foster if (i) {
1449c4aa10d0SDave Chinner acur->cnt->bc_ag.abt.active = true;
1450dc8e69bdSBrian Foster fbcur = acur->cnt;
1451dc8e69bdSBrian Foster fbinc = false;
1452dc8e69bdSBrian Foster }
1453dc8e69bdSBrian Foster }
1454dc8e69bdSBrian Foster
1455dc8e69bdSBrian Foster /*
1456dc8e69bdSBrian Foster * Search in the opposite direction for a better entry in the case of
1457dc8e69bdSBrian Foster * a bnobt hit or walk backwards from the end of the cntbt.
1458dc8e69bdSBrian Foster */
14590e26d5caSBrian Foster if (fbcur) {
14600e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
14610e26d5caSBrian Foster &i);
14620e26d5caSBrian Foster if (error)
14630e26d5caSBrian Foster return error;
14640e26d5caSBrian Foster }
14650e26d5caSBrian Foster
14660e26d5caSBrian Foster if (acur->len)
14670e26d5caSBrian Foster *stat = 1;
14680e26d5caSBrian Foster
14690e26d5caSBrian Foster return 0;
14700e26d5caSBrian Foster }
14710e26d5caSBrian Foster
14725113f8ecSDarrick J. Wong /* Check the last block of the cnt btree for allocations. */
14735113f8ecSDarrick J. Wong static int
xfs_alloc_ag_vextent_lastblock(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,xfs_agblock_t * bno,xfs_extlen_t * len,bool * allocated)14745113f8ecSDarrick J. Wong xfs_alloc_ag_vextent_lastblock(
14755113f8ecSDarrick J. Wong struct xfs_alloc_arg *args,
14765113f8ecSDarrick J. Wong struct xfs_alloc_cur *acur,
14775113f8ecSDarrick J. Wong xfs_agblock_t *bno,
14785113f8ecSDarrick J. Wong xfs_extlen_t *len,
14795113f8ecSDarrick J. Wong bool *allocated)
14805113f8ecSDarrick J. Wong {
14815113f8ecSDarrick J. Wong int error;
14825113f8ecSDarrick J. Wong int i;
14835113f8ecSDarrick J. Wong
14845113f8ecSDarrick J. Wong #ifdef DEBUG
14855113f8ecSDarrick J. Wong /* Randomly don't execute the first algorithm. */
14868032bf12SJason A. Donenfeld if (get_random_u32_below(2))
14875113f8ecSDarrick J. Wong return 0;
14885113f8ecSDarrick J. Wong #endif
14895113f8ecSDarrick J. Wong
14905113f8ecSDarrick J. Wong /*
14915113f8ecSDarrick J. Wong * Start from the entry that lookup found, sequence through all larger
14925113f8ecSDarrick J. Wong * free blocks. If we're actually pointing at a record smaller than
14935113f8ecSDarrick J. Wong * maxlen, go to the start of this block, and skip all those smaller
14945113f8ecSDarrick J. Wong * than minlen.
14955113f8ecSDarrick J. Wong */
149677ca1eedSDarrick J. Wong if (*len || args->alignment > 1) {
14976ca444cfSDarrick J. Wong acur->cnt->bc_levels[0].ptr = 1;
14985113f8ecSDarrick J. Wong do {
14995113f8ecSDarrick J. Wong error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
15005113f8ecSDarrick J. Wong if (error)
15015113f8ecSDarrick J. Wong return error;
1502f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1))
1503f9e03706SDarrick J. Wong return -EFSCORRUPTED;
15045113f8ecSDarrick J. Wong if (*len >= args->minlen)
15055113f8ecSDarrick J. Wong break;
15065113f8ecSDarrick J. Wong error = xfs_btree_increment(acur->cnt, 0, &i);
15075113f8ecSDarrick J. Wong if (error)
15085113f8ecSDarrick J. Wong return error;
15095113f8ecSDarrick J. Wong } while (i);
15105113f8ecSDarrick J. Wong ASSERT(*len >= args->minlen);
15115113f8ecSDarrick J. Wong if (!i)
15125113f8ecSDarrick J. Wong return 0;
15135113f8ecSDarrick J. Wong }
15145113f8ecSDarrick J. Wong
15155113f8ecSDarrick J. Wong error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
15165113f8ecSDarrick J. Wong if (error)
15175113f8ecSDarrick J. Wong return error;
15185113f8ecSDarrick J. Wong
15195113f8ecSDarrick J. Wong /*
15205113f8ecSDarrick J. Wong * It didn't work. We COULD be in a case where there's a good record
15215113f8ecSDarrick J. Wong * somewhere, so try again.
15225113f8ecSDarrick J. Wong */
15235113f8ecSDarrick J. Wong if (acur->len == 0)
15245113f8ecSDarrick J. Wong return 0;
15255113f8ecSDarrick J. Wong
15265113f8ecSDarrick J. Wong trace_xfs_alloc_near_first(args);
15275113f8ecSDarrick J. Wong *allocated = true;
15285113f8ecSDarrick J. Wong return 0;
15295113f8ecSDarrick J. Wong }
15305113f8ecSDarrick J. Wong
15310e26d5caSBrian Foster /*
153230f712c9SDave Chinner * Allocate a variable extent near bno in the allocation group agno.
153330f712c9SDave Chinner * Extent's length (returned in len) will be between minlen and maxlen,
153430f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
153530f712c9SDave Chinner * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
153630f712c9SDave Chinner */
1537f5e7dbeaSBrian Foster STATIC int
xfs_alloc_ag_vextent_near(struct xfs_alloc_arg * args,uint32_t alloc_flags)153830f712c9SDave Chinner xfs_alloc_ag_vextent_near(
15396a2a9d77SDave Chinner struct xfs_alloc_arg *args,
15406a2a9d77SDave Chinner uint32_t alloc_flags)
154130f712c9SDave Chinner {
1542f5e7dbeaSBrian Foster struct xfs_alloc_cur acur = {};
154330f712c9SDave Chinner int error; /* error code */
154430f712c9SDave Chinner int i; /* result code, temporary */
1545fec0afdaSBrian Foster xfs_agblock_t bno;
1546fec0afdaSBrian Foster xfs_extlen_t len;
154730f712c9SDave Chinner
1548cf085a1bSJoe Perches /* handle uninitialized agbno range so caller doesn't have to */
1549bfe46d4eSBrian Foster if (!args->min_agbno && !args->max_agbno)
1550bfe46d4eSBrian Foster args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1551bfe46d4eSBrian Foster ASSERT(args->min_agbno <= args->max_agbno);
1552bfe46d4eSBrian Foster
1553bfe46d4eSBrian Foster /* clamp agbno to the range if it's outside */
1554bfe46d4eSBrian Foster if (args->agbno < args->min_agbno)
1555bfe46d4eSBrian Foster args->agbno = args->min_agbno;
1556bfe46d4eSBrian Foster if (args->agbno > args->max_agbno)
1557bfe46d4eSBrian Foster args->agbno = args->max_agbno;
1558bfe46d4eSBrian Foster
15598ebbf262SDave Chinner /* Retry once quickly if we find busy extents before blocking. */
15608ebbf262SDave Chinner alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
156130f712c9SDave Chinner restart:
1562fec0afdaSBrian Foster len = 0;
156330f712c9SDave Chinner
156430f712c9SDave Chinner /*
1565f5e7dbeaSBrian Foster * Set up cursors and see if there are any free extents as big as
1566f5e7dbeaSBrian Foster * maxlen. If not, pick the last entry in the tree unless the tree is
1567f5e7dbeaSBrian Foster * empty.
156830f712c9SDave Chinner */
1569f5e7dbeaSBrian Foster error = xfs_alloc_cur_setup(args, &acur);
1570f5e7dbeaSBrian Foster if (error == -ENOSPC) {
1571fec0afdaSBrian Foster error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1572fec0afdaSBrian Foster &len, &i);
1573f5e7dbeaSBrian Foster if (error)
1574f5e7dbeaSBrian Foster goto out;
1575fec0afdaSBrian Foster if (i == 0 || len == 0) {
157630f712c9SDave Chinner trace_xfs_alloc_near_noentry(args);
1577f5e7dbeaSBrian Foster goto out;
157830f712c9SDave Chinner }
157930f712c9SDave Chinner ASSERT(i == 1);
1580f5e7dbeaSBrian Foster } else if (error) {
1581f5e7dbeaSBrian Foster goto out;
158230f712c9SDave Chinner }
158330f712c9SDave Chinner
158430f712c9SDave Chinner /*
158530f712c9SDave Chinner * First algorithm.
158630f712c9SDave Chinner * If the requested extent is large wrt the freespaces available
158730f712c9SDave Chinner * in this a.g., then the cursor will be pointing to a btree entry
158830f712c9SDave Chinner * near the right edge of the tree. If it's in the last btree leaf
158930f712c9SDave Chinner * block, then we just examine all the entries in that block
159030f712c9SDave Chinner * that are big enough, and pick the best one.
159130f712c9SDave Chinner */
15925113f8ecSDarrick J. Wong if (xfs_btree_islastblock(acur.cnt, 0)) {
15935113f8ecSDarrick J. Wong bool allocated = false;
15945113f8ecSDarrick J. Wong
15955113f8ecSDarrick J. Wong error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
15965113f8ecSDarrick J. Wong &allocated);
1597f5e7dbeaSBrian Foster if (error)
1598f5e7dbeaSBrian Foster goto out;
15995113f8ecSDarrick J. Wong if (allocated)
16005113f8ecSDarrick J. Wong goto alloc_finish;
160130f712c9SDave Chinner }
1602f5e7dbeaSBrian Foster
160330f712c9SDave Chinner /*
1604dc8e69bdSBrian Foster * Second algorithm. Combined cntbt and bnobt search to find ideal
1605dc8e69bdSBrian Foster * locality.
160630f712c9SDave Chinner */
1607dc8e69bdSBrian Foster error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1608f5e7dbeaSBrian Foster if (error)
1609f5e7dbeaSBrian Foster goto out;
161030f712c9SDave Chinner
161130f712c9SDave Chinner /*
161230f712c9SDave Chinner * If we couldn't get anything, give up.
161330f712c9SDave Chinner */
1614fec0afdaSBrian Foster if (!acur.len) {
1615d6d3aff2SBrian Foster if (acur.busy) {
16168ebbf262SDave Chinner /*
16178ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
16188ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
16198ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
16208ebbf262SDave Chinner * and the current transaction needs committing before
16218ebbf262SDave Chinner * the allocation can be retried.
16228ebbf262SDave Chinner */
162330f712c9SDave Chinner trace_xfs_alloc_near_busy(args);
16248ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
16256a2a9d77SDave Chinner acur.busy_gen, alloc_flags);
16268ebbf262SDave Chinner if (error)
16278ebbf262SDave Chinner goto out;
16288ebbf262SDave Chinner
16298ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
163030f712c9SDave Chinner goto restart;
163130f712c9SDave Chinner }
163230f712c9SDave Chinner trace_xfs_alloc_size_neither(args);
163330f712c9SDave Chinner args->agbno = NULLAGBLOCK;
1634f5e7dbeaSBrian Foster goto out;
163530f712c9SDave Chinner }
163630f712c9SDave Chinner
16375113f8ecSDarrick J. Wong alloc_finish:
1638d2968825SBrian Foster /* fix up btrees on a successful allocation */
1639d2968825SBrian Foster error = xfs_alloc_cur_finish(args, &acur);
164030f712c9SDave Chinner
1641f5e7dbeaSBrian Foster out:
1642f5e7dbeaSBrian Foster xfs_alloc_cur_close(&acur, error);
164330f712c9SDave Chinner return error;
164430f712c9SDave Chinner }
164530f712c9SDave Chinner
164630f712c9SDave Chinner /*
164730f712c9SDave Chinner * Allocate a variable extent anywhere in the allocation group agno.
164830f712c9SDave Chinner * Extent's length (returned in len) will be between minlen and maxlen,
164930f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
165030f712c9SDave Chinner * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
165130f712c9SDave Chinner */
16526a2a9d77SDave Chinner static int
xfs_alloc_ag_vextent_size(struct xfs_alloc_arg * args,uint32_t alloc_flags)165330f712c9SDave Chinner xfs_alloc_ag_vextent_size(
16546a2a9d77SDave Chinner struct xfs_alloc_arg *args,
16556a2a9d77SDave Chinner uint32_t alloc_flags)
165630f712c9SDave Chinner {
16579798f615SChristoph Hellwig struct xfs_agf *agf = args->agbp->b_addr;
16586a2a9d77SDave Chinner struct xfs_btree_cur *bno_cur;
16596a2a9d77SDave Chinner struct xfs_btree_cur *cnt_cur;
166030f712c9SDave Chinner xfs_agblock_t fbno; /* start of found freespace */
166130f712c9SDave Chinner xfs_extlen_t flen; /* length of found freespace */
166230f712c9SDave Chinner xfs_agblock_t rbno; /* returned block number */
166330f712c9SDave Chinner xfs_extlen_t rlen; /* length of returned extent */
1664ebf55872SChristoph Hellwig bool busy;
1665ebf55872SChristoph Hellwig unsigned busy_gen;
16666a2a9d77SDave Chinner int error;
16676a2a9d77SDave Chinner int i;
166830f712c9SDave Chinner
16698ebbf262SDave Chinner /* Retry once quickly if we find busy extents before blocking. */
16708ebbf262SDave Chinner alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
167130f712c9SDave Chinner restart:
167230f712c9SDave Chinner /*
167330f712c9SDave Chinner * Allocate and initialize a cursor for the by-size btree.
167430f712c9SDave Chinner */
167530f712c9SDave Chinner cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1676289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
167730f712c9SDave Chinner bno_cur = NULL;
167830f712c9SDave Chinner
167930f712c9SDave Chinner /*
168030f712c9SDave Chinner * Look for an entry >= maxlen+alignment-1 blocks.
168130f712c9SDave Chinner */
168230f712c9SDave Chinner if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
168330f712c9SDave Chinner args->maxlen + args->alignment - 1, &i)))
168430f712c9SDave Chinner goto error0;
168530f712c9SDave Chinner
168630f712c9SDave Chinner /*
1687ebf55872SChristoph Hellwig * If none then we have to settle for a smaller extent. In the case that
1688ebf55872SChristoph Hellwig * there are no large extents, this will return the last entry in the
1689ebf55872SChristoph Hellwig * tree unless the tree is empty. In the case that there are only busy
1690ebf55872SChristoph Hellwig * large extents, this will return the largest small extent unless there
169130f712c9SDave Chinner * are no smaller extents available.
169230f712c9SDave Chinner */
1693ebf55872SChristoph Hellwig if (!i) {
169430f712c9SDave Chinner error = xfs_alloc_ag_vextent_small(args, cnt_cur,
169530f712c9SDave Chinner &fbno, &flen, &i);
169630f712c9SDave Chinner if (error)
169730f712c9SDave Chinner goto error0;
169830f712c9SDave Chinner if (i == 0 || flen == 0) {
169930f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
170030f712c9SDave Chinner trace_xfs_alloc_size_noentry(args);
170130f712c9SDave Chinner return 0;
170230f712c9SDave Chinner }
170330f712c9SDave Chinner ASSERT(i == 1);
1704ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1705ebf55872SChristoph Hellwig &rlen, &busy_gen);
170630f712c9SDave Chinner } else {
170730f712c9SDave Chinner /*
170830f712c9SDave Chinner * Search for a non-busy extent that is large enough.
170930f712c9SDave Chinner */
171030f712c9SDave Chinner for (;;) {
171130f712c9SDave Chinner error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
171230f712c9SDave Chinner if (error)
171330f712c9SDave Chinner goto error0;
1714f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1715f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1716f9e03706SDarrick J. Wong goto error0;
1717f9e03706SDarrick J. Wong }
171830f712c9SDave Chinner
1719ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen,
1720ebf55872SChristoph Hellwig &rbno, &rlen, &busy_gen);
172130f712c9SDave Chinner
172230f712c9SDave Chinner if (rlen >= args->maxlen)
172330f712c9SDave Chinner break;
172430f712c9SDave Chinner
172530f712c9SDave Chinner error = xfs_btree_increment(cnt_cur, 0, &i);
172630f712c9SDave Chinner if (error)
172730f712c9SDave Chinner goto error0;
17288ebbf262SDave Chinner if (i)
17298ebbf262SDave Chinner continue;
17308ebbf262SDave Chinner
173130f712c9SDave Chinner /*
17328ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
17338ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
17348ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
17358ebbf262SDave Chinner * and the current transaction needs committing before
17368ebbf262SDave Chinner * the allocation can be retried.
173730f712c9SDave Chinner */
173830f712c9SDave Chinner trace_xfs_alloc_size_busy(args);
17398ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
17406a2a9d77SDave Chinner busy_gen, alloc_flags);
17418ebbf262SDave Chinner if (error)
17428ebbf262SDave Chinner goto error0;
17438ebbf262SDave Chinner
17448ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
17458ebbf262SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
174630f712c9SDave Chinner goto restart;
174730f712c9SDave Chinner }
174830f712c9SDave Chinner }
174930f712c9SDave Chinner
175030f712c9SDave Chinner /*
175130f712c9SDave Chinner * In the first case above, we got the last entry in the
175230f712c9SDave Chinner * by-size btree. Now we check to see if the space hits maxlen
175330f712c9SDave Chinner * once aligned; if not, we search left for something better.
175430f712c9SDave Chinner * This can't happen in the second case above.
175530f712c9SDave Chinner */
175630f712c9SDave Chinner rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1757f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1758f9e03706SDarrick J. Wong rlen != 0 &&
1759f9e03706SDarrick J. Wong (rlen > flen ||
1760f9e03706SDarrick J. Wong rbno + rlen > fbno + flen))) {
1761f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1762f9e03706SDarrick J. Wong goto error0;
1763f9e03706SDarrick J. Wong }
176430f712c9SDave Chinner if (rlen < args->maxlen) {
176530f712c9SDave Chinner xfs_agblock_t bestfbno;
176630f712c9SDave Chinner xfs_extlen_t bestflen;
176730f712c9SDave Chinner xfs_agblock_t bestrbno;
176830f712c9SDave Chinner xfs_extlen_t bestrlen;
176930f712c9SDave Chinner
177030f712c9SDave Chinner bestrlen = rlen;
177130f712c9SDave Chinner bestrbno = rbno;
177230f712c9SDave Chinner bestflen = flen;
177330f712c9SDave Chinner bestfbno = fbno;
177430f712c9SDave Chinner for (;;) {
177530f712c9SDave Chinner if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
177630f712c9SDave Chinner goto error0;
177730f712c9SDave Chinner if (i == 0)
177830f712c9SDave Chinner break;
177930f712c9SDave Chinner if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
178030f712c9SDave Chinner &i)))
178130f712c9SDave Chinner goto error0;
1782f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1783f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1784f9e03706SDarrick J. Wong goto error0;
1785f9e03706SDarrick J. Wong }
1786*1fe5c2aaSChi Zhiling if (flen <= bestrlen)
178730f712c9SDave Chinner break;
1788ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen,
1789ebf55872SChristoph Hellwig &rbno, &rlen, &busy_gen);
179030f712c9SDave Chinner rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1791f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1792f9e03706SDarrick J. Wong rlen != 0 &&
1793f9e03706SDarrick J. Wong (rlen > flen ||
1794f9e03706SDarrick J. Wong rbno + rlen > fbno + flen))) {
1795f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1796f9e03706SDarrick J. Wong goto error0;
1797f9e03706SDarrick J. Wong }
179830f712c9SDave Chinner if (rlen > bestrlen) {
179930f712c9SDave Chinner bestrlen = rlen;
180030f712c9SDave Chinner bestrbno = rbno;
180130f712c9SDave Chinner bestflen = flen;
180230f712c9SDave Chinner bestfbno = fbno;
180330f712c9SDave Chinner if (rlen == args->maxlen)
180430f712c9SDave Chinner break;
180530f712c9SDave Chinner }
180630f712c9SDave Chinner }
180730f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
180830f712c9SDave Chinner &i)))
180930f712c9SDave Chinner goto error0;
1810f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1811f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1812f9e03706SDarrick J. Wong goto error0;
1813f9e03706SDarrick J. Wong }
181430f712c9SDave Chinner rlen = bestrlen;
181530f712c9SDave Chinner rbno = bestrbno;
181630f712c9SDave Chinner flen = bestflen;
181730f712c9SDave Chinner fbno = bestfbno;
181830f712c9SDave Chinner }
181930f712c9SDave Chinner args->wasfromfl = 0;
182030f712c9SDave Chinner /*
182130f712c9SDave Chinner * Fix up the length.
182230f712c9SDave Chinner */
182330f712c9SDave Chinner args->len = rlen;
182430f712c9SDave Chinner if (rlen < args->minlen) {
1825ebf55872SChristoph Hellwig if (busy) {
18268ebbf262SDave Chinner /*
18278ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
18288ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
18298ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
18308ebbf262SDave Chinner * and the current transaction needs committing before
18318ebbf262SDave Chinner * the allocation can be retried.
18328ebbf262SDave Chinner */
183330f712c9SDave Chinner trace_xfs_alloc_size_busy(args);
18348ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
18358ebbf262SDave Chinner busy_gen, alloc_flags);
18368ebbf262SDave Chinner if (error)
18378ebbf262SDave Chinner goto error0;
18388ebbf262SDave Chinner
18398ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
18408ebbf262SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
184130f712c9SDave Chinner goto restart;
184230f712c9SDave Chinner }
184330f712c9SDave Chinner goto out_nominleft;
184430f712c9SDave Chinner }
184530f712c9SDave Chinner xfs_alloc_fix_len(args);
184630f712c9SDave Chinner
184730f712c9SDave Chinner rlen = args->len;
1848f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1849f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1850f9e03706SDarrick J. Wong goto error0;
1851f9e03706SDarrick J. Wong }
185230f712c9SDave Chinner /*
185330f712c9SDave Chinner * Allocate and initialize a cursor for the by-block tree.
185430f712c9SDave Chinner */
185530f712c9SDave Chinner bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1856289d38d2SDave Chinner args->pag, XFS_BTNUM_BNO);
185730f712c9SDave Chinner if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
185830f712c9SDave Chinner rbno, rlen, XFSA_FIXUP_CNT_OK)))
185930f712c9SDave Chinner goto error0;
186030f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
186130f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
186230f712c9SDave Chinner cnt_cur = bno_cur = NULL;
186330f712c9SDave Chinner args->len = rlen;
186430f712c9SDave Chinner args->agbno = rbno;
1865f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1866f9e03706SDarrick J. Wong args->agbno + args->len >
18679798f615SChristoph Hellwig be32_to_cpu(agf->agf_length))) {
1868f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1869f9e03706SDarrick J. Wong goto error0;
1870f9e03706SDarrick J. Wong }
187130f712c9SDave Chinner trace_xfs_alloc_size_done(args);
187230f712c9SDave Chinner return 0;
187330f712c9SDave Chinner
187430f712c9SDave Chinner error0:
187530f712c9SDave Chinner trace_xfs_alloc_size_error(args);
187630f712c9SDave Chinner if (cnt_cur)
187730f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
187830f712c9SDave Chinner if (bno_cur)
187930f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
188030f712c9SDave Chinner return error;
188130f712c9SDave Chinner
188230f712c9SDave Chinner out_nominleft:
188330f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
188430f712c9SDave Chinner trace_xfs_alloc_size_nominleft(args);
188530f712c9SDave Chinner args->agbno = NULLAGBLOCK;
188630f712c9SDave Chinner return 0;
188730f712c9SDave Chinner }
188830f712c9SDave Chinner
188930f712c9SDave Chinner /*
189030f712c9SDave Chinner * Free the extent starting at agno/bno for length.
189130f712c9SDave Chinner */
1892340785ccSDarrick J. Wong STATIC int
xfs_free_ag_extent(struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)189330f712c9SDave Chinner xfs_free_ag_extent(
189466e3237eSDarrick J. Wong struct xfs_trans *tp,
189566e3237eSDarrick J. Wong struct xfs_buf *agbp,
1896340785ccSDarrick J. Wong xfs_agnumber_t agno,
1897340785ccSDarrick J. Wong xfs_agblock_t bno,
1898340785ccSDarrick J. Wong xfs_extlen_t len,
189966e3237eSDarrick J. Wong const struct xfs_owner_info *oinfo,
19003fd129b6SDarrick J. Wong enum xfs_ag_resv_type type)
190130f712c9SDave Chinner {
190266e3237eSDarrick J. Wong struct xfs_mount *mp;
190366e3237eSDarrick J. Wong struct xfs_btree_cur *bno_cur;
190466e3237eSDarrick J. Wong struct xfs_btree_cur *cnt_cur;
190566e3237eSDarrick J. Wong xfs_agblock_t gtbno; /* start of right neighbor */
190666e3237eSDarrick J. Wong xfs_extlen_t gtlen; /* length of right neighbor */
190766e3237eSDarrick J. Wong xfs_agblock_t ltbno; /* start of left neighbor */
190866e3237eSDarrick J. Wong xfs_extlen_t ltlen; /* length of left neighbor */
190966e3237eSDarrick J. Wong xfs_agblock_t nbno; /* new starting block of freesp */
191030f712c9SDave Chinner xfs_extlen_t nlen; /* new length of freespace */
191166e3237eSDarrick J. Wong int haveleft; /* have a left neighbor */
191266e3237eSDarrick J. Wong int haveright; /* have a right neighbor */
191366e3237eSDarrick J. Wong int i;
191466e3237eSDarrick J. Wong int error;
1915fa9c3c19SDave Chinner struct xfs_perag *pag = agbp->b_pag;
191630f712c9SDave Chinner
1917673930c3SDarrick J. Wong bno_cur = cnt_cur = NULL;
191830f712c9SDave Chinner mp = tp->t_mountp;
1919673930c3SDarrick J. Wong
192033df3a9cSDarrick J. Wong if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1921fa9c3c19SDave Chinner error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1922673930c3SDarrick J. Wong if (error)
1923673930c3SDarrick J. Wong goto error0;
1924673930c3SDarrick J. Wong }
1925673930c3SDarrick J. Wong
192630f712c9SDave Chinner /*
192730f712c9SDave Chinner * Allocate and initialize a cursor for the by-block btree.
192830f712c9SDave Chinner */
1929289d38d2SDave Chinner bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
193030f712c9SDave Chinner /*
193130f712c9SDave Chinner * Look for a neighboring block on the left (lower block numbers)
193230f712c9SDave Chinner * that is contiguous with this space.
193330f712c9SDave Chinner */
193430f712c9SDave Chinner if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
193530f712c9SDave Chinner goto error0;
193630f712c9SDave Chinner if (haveleft) {
193730f712c9SDave Chinner /*
193830f712c9SDave Chinner * There is a block to our left.
193930f712c9SDave Chinner */
194030f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
194130f712c9SDave Chinner goto error0;
1942f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
1943f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1944f9e03706SDarrick J. Wong goto error0;
1945f9e03706SDarrick J. Wong }
194630f712c9SDave Chinner /*
194730f712c9SDave Chinner * It's not contiguous, though.
194830f712c9SDave Chinner */
194930f712c9SDave Chinner if (ltbno + ltlen < bno)
195030f712c9SDave Chinner haveleft = 0;
195130f712c9SDave Chinner else {
195230f712c9SDave Chinner /*
195330f712c9SDave Chinner * If this failure happens the request to free this
195430f712c9SDave Chinner * space was invalid, it's (partly) already free.
195530f712c9SDave Chinner * Very bad.
195630f712c9SDave Chinner */
1957f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1958f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1959f9e03706SDarrick J. Wong goto error0;
1960f9e03706SDarrick J. Wong }
196130f712c9SDave Chinner }
196230f712c9SDave Chinner }
196330f712c9SDave Chinner /*
196430f712c9SDave Chinner * Look for a neighboring block on the right (higher block numbers)
196530f712c9SDave Chinner * that is contiguous with this space.
196630f712c9SDave Chinner */
196730f712c9SDave Chinner if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
196830f712c9SDave Chinner goto error0;
196930f712c9SDave Chinner if (haveright) {
197030f712c9SDave Chinner /*
197130f712c9SDave Chinner * There is a block to our right.
197230f712c9SDave Chinner */
197330f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
197430f712c9SDave Chinner goto error0;
1975f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
1976f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1977f9e03706SDarrick J. Wong goto error0;
1978f9e03706SDarrick J. Wong }
197930f712c9SDave Chinner /*
198030f712c9SDave Chinner * It's not contiguous, though.
198130f712c9SDave Chinner */
198230f712c9SDave Chinner if (bno + len < gtbno)
198330f712c9SDave Chinner haveright = 0;
198430f712c9SDave Chinner else {
198530f712c9SDave Chinner /*
198630f712c9SDave Chinner * If this failure happens the request to free this
198730f712c9SDave Chinner * space was invalid, it's (partly) already free.
198830f712c9SDave Chinner * Very bad.
198930f712c9SDave Chinner */
1990f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1991f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1992f9e03706SDarrick J. Wong goto error0;
1993f9e03706SDarrick J. Wong }
199430f712c9SDave Chinner }
199530f712c9SDave Chinner }
199630f712c9SDave Chinner /*
199730f712c9SDave Chinner * Now allocate and initialize a cursor for the by-size tree.
199830f712c9SDave Chinner */
1999289d38d2SDave Chinner cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
200030f712c9SDave Chinner /*
200130f712c9SDave Chinner * Have both left and right contiguous neighbors.
200230f712c9SDave Chinner * Merge all three into a single free block.
200330f712c9SDave Chinner */
200430f712c9SDave Chinner if (haveleft && haveright) {
200530f712c9SDave Chinner /*
200630f712c9SDave Chinner * Delete the old by-size entry on the left.
200730f712c9SDave Chinner */
200830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
200930f712c9SDave Chinner goto error0;
2010f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2011f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2012f9e03706SDarrick J. Wong goto error0;
2013f9e03706SDarrick J. Wong }
201430f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
201530f712c9SDave Chinner goto error0;
2016f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2017f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2018f9e03706SDarrick J. Wong goto error0;
2019f9e03706SDarrick J. Wong }
202030f712c9SDave Chinner /*
202130f712c9SDave Chinner * Delete the old by-size entry on the right.
202230f712c9SDave Chinner */
202330f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
202430f712c9SDave Chinner goto error0;
2025f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2026f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2027f9e03706SDarrick J. Wong goto error0;
2028f9e03706SDarrick J. Wong }
202930f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
203030f712c9SDave Chinner goto error0;
2031f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2032f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2033f9e03706SDarrick J. Wong goto error0;
2034f9e03706SDarrick J. Wong }
203530f712c9SDave Chinner /*
203630f712c9SDave Chinner * Delete the old by-block entry for the right block.
203730f712c9SDave Chinner */
203830f712c9SDave Chinner if ((error = xfs_btree_delete(bno_cur, &i)))
203930f712c9SDave Chinner goto error0;
2040f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2041f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2042f9e03706SDarrick J. Wong goto error0;
2043f9e03706SDarrick J. Wong }
204430f712c9SDave Chinner /*
204530f712c9SDave Chinner * Move the by-block cursor back to the left neighbor.
204630f712c9SDave Chinner */
204730f712c9SDave Chinner if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
204830f712c9SDave Chinner goto error0;
2049f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2050f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2051f9e03706SDarrick J. Wong goto error0;
2052f9e03706SDarrick J. Wong }
205330f712c9SDave Chinner #ifdef DEBUG
205430f712c9SDave Chinner /*
205530f712c9SDave Chinner * Check that this is the right record: delete didn't
205630f712c9SDave Chinner * mangle the cursor.
205730f712c9SDave Chinner */
205830f712c9SDave Chinner {
205930f712c9SDave Chinner xfs_agblock_t xxbno;
206030f712c9SDave Chinner xfs_extlen_t xxlen;
206130f712c9SDave Chinner
206230f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
206330f712c9SDave Chinner &i)))
206430f712c9SDave Chinner goto error0;
2065f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
2066f9e03706SDarrick J. Wong i != 1 ||
2067f9e03706SDarrick J. Wong xxbno != ltbno ||
2068f9e03706SDarrick J. Wong xxlen != ltlen)) {
2069f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2070f9e03706SDarrick J. Wong goto error0;
2071f9e03706SDarrick J. Wong }
207230f712c9SDave Chinner }
207330f712c9SDave Chinner #endif
207430f712c9SDave Chinner /*
207530f712c9SDave Chinner * Update remaining by-block entry to the new, joined block.
207630f712c9SDave Chinner */
207730f712c9SDave Chinner nbno = ltbno;
207830f712c9SDave Chinner nlen = len + ltlen + gtlen;
207930f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
208030f712c9SDave Chinner goto error0;
208130f712c9SDave Chinner }
208230f712c9SDave Chinner /*
208330f712c9SDave Chinner * Have only a left contiguous neighbor.
208430f712c9SDave Chinner * Merge it together with the new freespace.
208530f712c9SDave Chinner */
208630f712c9SDave Chinner else if (haveleft) {
208730f712c9SDave Chinner /*
208830f712c9SDave Chinner * Delete the old by-size entry on the left.
208930f712c9SDave Chinner */
209030f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
209130f712c9SDave Chinner goto error0;
2092f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2093f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2094f9e03706SDarrick J. Wong goto error0;
2095f9e03706SDarrick J. Wong }
209630f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
209730f712c9SDave Chinner goto error0;
2098f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2099f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2100f9e03706SDarrick J. Wong goto error0;
2101f9e03706SDarrick J. Wong }
210230f712c9SDave Chinner /*
210330f712c9SDave Chinner * Back up the by-block cursor to the left neighbor, and
210430f712c9SDave Chinner * update its length.
210530f712c9SDave Chinner */
210630f712c9SDave Chinner if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
210730f712c9SDave Chinner goto error0;
2108f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2109f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2110f9e03706SDarrick J. Wong goto error0;
2111f9e03706SDarrick J. Wong }
211230f712c9SDave Chinner nbno = ltbno;
211330f712c9SDave Chinner nlen = len + ltlen;
211430f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
211530f712c9SDave Chinner goto error0;
211630f712c9SDave Chinner }
211730f712c9SDave Chinner /*
211830f712c9SDave Chinner * Have only a right contiguous neighbor.
211930f712c9SDave Chinner * Merge it together with the new freespace.
212030f712c9SDave Chinner */
212130f712c9SDave Chinner else if (haveright) {
212230f712c9SDave Chinner /*
212330f712c9SDave Chinner * Delete the old by-size entry on the right.
212430f712c9SDave Chinner */
212530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
212630f712c9SDave Chinner goto error0;
2127f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2128f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2129f9e03706SDarrick J. Wong goto error0;
2130f9e03706SDarrick J. Wong }
213130f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
213230f712c9SDave Chinner goto error0;
2133f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2134f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2135f9e03706SDarrick J. Wong goto error0;
2136f9e03706SDarrick J. Wong }
213730f712c9SDave Chinner /*
213830f712c9SDave Chinner * Update the starting block and length of the right
213930f712c9SDave Chinner * neighbor in the by-block tree.
214030f712c9SDave Chinner */
214130f712c9SDave Chinner nbno = bno;
214230f712c9SDave Chinner nlen = len + gtlen;
214330f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
214430f712c9SDave Chinner goto error0;
214530f712c9SDave Chinner }
214630f712c9SDave Chinner /*
214730f712c9SDave Chinner * No contiguous neighbors.
214830f712c9SDave Chinner * Insert the new freespace into the by-block tree.
214930f712c9SDave Chinner */
215030f712c9SDave Chinner else {
215130f712c9SDave Chinner nbno = bno;
215230f712c9SDave Chinner nlen = len;
215330f712c9SDave Chinner if ((error = xfs_btree_insert(bno_cur, &i)))
215430f712c9SDave Chinner goto error0;
2155f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2156f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2157f9e03706SDarrick J. Wong goto error0;
2158f9e03706SDarrick J. Wong }
215930f712c9SDave Chinner }
216030f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
216130f712c9SDave Chinner bno_cur = NULL;
216230f712c9SDave Chinner /*
216330f712c9SDave Chinner * In all cases we need to insert the new freespace in the by-size tree.
216430f712c9SDave Chinner */
216530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
216630f712c9SDave Chinner goto error0;
2167f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0)) {
2168f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2169f9e03706SDarrick J. Wong goto error0;
2170f9e03706SDarrick J. Wong }
217130f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
217230f712c9SDave Chinner goto error0;
2173f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2174f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2175f9e03706SDarrick J. Wong goto error0;
2176f9e03706SDarrick J. Wong }
217730f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
217830f712c9SDave Chinner cnt_cur = NULL;
217930f712c9SDave Chinner
218030f712c9SDave Chinner /*
218130f712c9SDave Chinner * Update the freespace totals in the ag and superblock.
218230f712c9SDave Chinner */
218392a00544SGao Xiang error = xfs_alloc_update_counters(tp, agbp, len);
218492a00544SGao Xiang xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
218530f712c9SDave Chinner if (error)
218630f712c9SDave Chinner goto error0;
218730f712c9SDave Chinner
2188ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_freex);
2189ff6d6af2SBill O'Donnell XFS_STATS_ADD(mp, xs_freeb, len);
219030f712c9SDave Chinner
219121592863SBrian Foster trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
219230f712c9SDave Chinner
219330f712c9SDave Chinner return 0;
219430f712c9SDave Chinner
219530f712c9SDave Chinner error0:
219621592863SBrian Foster trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
219730f712c9SDave Chinner if (bno_cur)
219830f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
219930f712c9SDave Chinner if (cnt_cur)
220030f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
220130f712c9SDave Chinner return error;
220230f712c9SDave Chinner }
220330f712c9SDave Chinner
220430f712c9SDave Chinner /*
220530f712c9SDave Chinner * Visible (exported) allocation/free functions.
220630f712c9SDave Chinner * Some of these are used just by xfs_alloc_btree.c and this file.
220730f712c9SDave Chinner */
220830f712c9SDave Chinner
220930f712c9SDave Chinner /*
22107cb3efb4SDarrick J. Wong * Compute and fill in value of m_alloc_maxlevels.
221130f712c9SDave Chinner */
221230f712c9SDave Chinner void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)221330f712c9SDave Chinner xfs_alloc_compute_maxlevels(
221430f712c9SDave Chinner xfs_mount_t *mp) /* file system mount structure */
221530f712c9SDave Chinner {
22167cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
221719b54ee6SDarrick J. Wong (mp->m_sb.sb_agblocks + 1) / 2);
22180ed5f735SDarrick J. Wong ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
221930f712c9SDave Chinner }
222030f712c9SDave Chinner
222130f712c9SDave Chinner /*
22223fd129b6SDarrick J. Wong * Find the length of the longest extent in an AG. The 'need' parameter
22233fd129b6SDarrick J. Wong * specifies how much space we're going to need for the AGFL and the
22243fd129b6SDarrick J. Wong * 'reserved' parameter tells us how many blocks in this AG are reserved for
22253fd129b6SDarrick J. Wong * other callers.
222630f712c9SDave Chinner */
222730f712c9SDave Chinner xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_perag * pag,xfs_extlen_t need,xfs_extlen_t reserved)222830f712c9SDave Chinner xfs_alloc_longest_free_extent(
222950adbcb4SDave Chinner struct xfs_perag *pag,
22303fd129b6SDarrick J. Wong xfs_extlen_t need,
22313fd129b6SDarrick J. Wong xfs_extlen_t reserved)
223230f712c9SDave Chinner {
223350adbcb4SDave Chinner xfs_extlen_t delta = 0;
223430f712c9SDave Chinner
22353fd129b6SDarrick J. Wong /*
22363fd129b6SDarrick J. Wong * If the AGFL needs a recharge, we'll have to subtract that from the
22373fd129b6SDarrick J. Wong * longest extent.
22383fd129b6SDarrick J. Wong */
223930f712c9SDave Chinner if (need > pag->pagf_flcount)
224030f712c9SDave Chinner delta = need - pag->pagf_flcount;
224130f712c9SDave Chinner
22423fd129b6SDarrick J. Wong /*
22433fd129b6SDarrick J. Wong * If we cannot maintain others' reservations with space from the
22443fd129b6SDarrick J. Wong * not-longest freesp extents, we'll have to subtract /that/ from
22453fd129b6SDarrick J. Wong * the longest extent too.
22463fd129b6SDarrick J. Wong */
22473fd129b6SDarrick J. Wong if (pag->pagf_freeblks - pag->pagf_longest < reserved)
22483fd129b6SDarrick J. Wong delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
22493fd129b6SDarrick J. Wong
22503fd129b6SDarrick J. Wong /*
22513fd129b6SDarrick J. Wong * If the longest extent is long enough to satisfy all the
22523fd129b6SDarrick J. Wong * reservations and AGFL rules in place, we can return this extent.
22533fd129b6SDarrick J. Wong */
225430f712c9SDave Chinner if (pag->pagf_longest > delta)
22551c743574SDave Chinner return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
22561c743574SDave Chinner pag->pagf_longest - delta);
22573fd129b6SDarrick J. Wong
22583fd129b6SDarrick J. Wong /* Otherwise, let the caller try for 1 block if there's space. */
225930f712c9SDave Chinner return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
226030f712c9SDave Chinner }
226130f712c9SDave Chinner
22621cac233cSDarrick J. Wong /*
22631cac233cSDarrick J. Wong * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
22641cac233cSDarrick J. Wong * return the largest possible minimum length.
22651cac233cSDarrick J. Wong */
2266496817b4SDave Chinner unsigned int
xfs_alloc_min_freelist(struct xfs_mount * mp,struct xfs_perag * pag)2267496817b4SDave Chinner xfs_alloc_min_freelist(
2268496817b4SDave Chinner struct xfs_mount *mp,
2269496817b4SDave Chinner struct xfs_perag *pag)
2270496817b4SDave Chinner {
22711cac233cSDarrick J. Wong /* AG btrees have at least 1 level. */
22721cac233cSDarrick J. Wong static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
22731cac233cSDarrick J. Wong const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2274496817b4SDave Chinner unsigned int min_free;
2275496817b4SDave Chinner
22767cb3efb4SDarrick J. Wong ASSERT(mp->m_alloc_maxlevels > 0);
22771cac233cSDarrick J. Wong
22780838177bSOmar Sandoval /*
22790838177bSOmar Sandoval * For a btree shorter than the maximum height, the worst case is that
22800838177bSOmar Sandoval * every level gets split and a new level is added, then while inserting
22810838177bSOmar Sandoval * another entry to refill the AGFL, every level under the old root gets
22820838177bSOmar Sandoval * split again. This is:
22830838177bSOmar Sandoval *
22840838177bSOmar Sandoval * (full height split reservation) + (AGFL refill split height)
22850838177bSOmar Sandoval * = (current height + 1) + (current height - 1)
22860838177bSOmar Sandoval * = (new height) + (new height - 2)
22870838177bSOmar Sandoval * = 2 * new height - 2
22880838177bSOmar Sandoval *
22890838177bSOmar Sandoval * For a btree of maximum height, the worst case is that every level
22900838177bSOmar Sandoval * under the root gets split, then while inserting another entry to
22910838177bSOmar Sandoval * refill the AGFL, every level under the root gets split again. This is
22920838177bSOmar Sandoval * also:
22930838177bSOmar Sandoval *
22940838177bSOmar Sandoval * 2 * (current height - 1)
22950838177bSOmar Sandoval * = 2 * (new height - 1)
22960838177bSOmar Sandoval * = 2 * new height - 2
22970838177bSOmar Sandoval */
22980838177bSOmar Sandoval
2299496817b4SDave Chinner /* space needed by-bno freespace btree */
23001cac233cSDarrick J. Wong min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
23010838177bSOmar Sandoval mp->m_alloc_maxlevels) * 2 - 2;
2302496817b4SDave Chinner /* space needed by-size freespace btree */
23031cac233cSDarrick J. Wong min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
23040838177bSOmar Sandoval mp->m_alloc_maxlevels) * 2 - 2;
230552548852SDarrick J. Wong /* space needed reverse mapping used space btree */
2306ebd9027dSDave Chinner if (xfs_has_rmapbt(mp))
23071cac233cSDarrick J. Wong min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
23080838177bSOmar Sandoval mp->m_rmap_maxlevels) * 2 - 2;
2309496817b4SDave Chinner
2310496817b4SDave Chinner return min_free;
2311496817b4SDave Chinner }
2312496817b4SDave Chinner
231330f712c9SDave Chinner /*
231472d55285SDave Chinner * Check if the operation we are fixing up the freelist for should go ahead or
231572d55285SDave Chinner * not. If we are freeing blocks, we always allow it, otherwise the allocation
231672d55285SDave Chinner * is dependent on whether the size and shape of free space available will
231772d55285SDave Chinner * permit the requested allocation to take place.
231872d55285SDave Chinner */
231972d55285SDave Chinner static bool
xfs_alloc_space_available(struct xfs_alloc_arg * args,xfs_extlen_t min_free,int flags)232072d55285SDave Chinner xfs_alloc_space_available(
232172d55285SDave Chinner struct xfs_alloc_arg *args,
232272d55285SDave Chinner xfs_extlen_t min_free,
232372d55285SDave Chinner int flags)
232472d55285SDave Chinner {
232572d55285SDave Chinner struct xfs_perag *pag = args->pag;
232612ef8301SChristoph Hellwig xfs_extlen_t alloc_len, longest;
23273fd129b6SDarrick J. Wong xfs_extlen_t reservation; /* blocks that are still reserved */
232872d55285SDave Chinner int available;
23291ca89fbcSBrian Foster xfs_extlen_t agflcount;
233072d55285SDave Chinner
233172d55285SDave Chinner if (flags & XFS_ALLOC_FLAG_FREEING)
233272d55285SDave Chinner return true;
233372d55285SDave Chinner
23343fd129b6SDarrick J. Wong reservation = xfs_ag_resv_needed(pag, args->resv);
23353fd129b6SDarrick J. Wong
233672d55285SDave Chinner /* do we have enough contiguous free space for the allocation? */
233712ef8301SChristoph Hellwig alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2338a1f69417SEric Sandeen longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
233912ef8301SChristoph Hellwig if (longest < alloc_len)
234072d55285SDave Chinner return false;
234172d55285SDave Chinner
23421ca89fbcSBrian Foster /*
23431ca89fbcSBrian Foster * Do we have enough free space remaining for the allocation? Don't
23441ca89fbcSBrian Foster * account extra agfl blocks because we are about to defer free them,
23451ca89fbcSBrian Foster * making them unavailable until the current transaction commits.
23461ca89fbcSBrian Foster */
23471ca89fbcSBrian Foster agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
23481ca89fbcSBrian Foster available = (int)(pag->pagf_freeblks + agflcount -
234954fee133SChristoph Hellwig reservation - min_free - args->minleft);
235012ef8301SChristoph Hellwig if (available < (int)max(args->total, alloc_len))
235172d55285SDave Chinner return false;
235272d55285SDave Chinner
235354fee133SChristoph Hellwig /*
235454fee133SChristoph Hellwig * Clamp maxlen to the amount of free space available for the actual
235554fee133SChristoph Hellwig * extent allocation.
235654fee133SChristoph Hellwig */
235754fee133SChristoph Hellwig if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
235854fee133SChristoph Hellwig args->maxlen = available;
235954fee133SChristoph Hellwig ASSERT(args->maxlen > 0);
236054fee133SChristoph Hellwig ASSERT(args->maxlen >= args->minlen);
236154fee133SChristoph Hellwig }
236254fee133SChristoph Hellwig
236372d55285SDave Chinner return true;
236472d55285SDave Chinner }
236572d55285SDave Chinner
23664223f659SBrian Foster int
xfs_free_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_buf * agbp,struct xfs_owner_info * oinfo)23674223f659SBrian Foster xfs_free_agfl_block(
23684223f659SBrian Foster struct xfs_trans *tp,
23694223f659SBrian Foster xfs_agnumber_t agno,
23704223f659SBrian Foster xfs_agblock_t agbno,
23714223f659SBrian Foster struct xfs_buf *agbp,
23724223f659SBrian Foster struct xfs_owner_info *oinfo)
23734223f659SBrian Foster {
23744223f659SBrian Foster int error;
23754223f659SBrian Foster struct xfs_buf *bp;
23764223f659SBrian Foster
23774223f659SBrian Foster error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
23784223f659SBrian Foster XFS_AG_RESV_AGFL);
23794223f659SBrian Foster if (error)
23804223f659SBrian Foster return error;
23814223f659SBrian Foster
2382ee647f85SDarrick J. Wong error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2383ee647f85SDarrick J. Wong XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2384ee647f85SDarrick J. Wong tp->t_mountp->m_bsize, 0, &bp);
2385ee647f85SDarrick J. Wong if (error)
2386ee647f85SDarrick J. Wong return error;
23874223f659SBrian Foster xfs_trans_binval(tp, bp);
23884223f659SBrian Foster
23894223f659SBrian Foster return 0;
23904223f659SBrian Foster }
23914223f659SBrian Foster
239230f712c9SDave Chinner /*
2393e0a8de7dSDave Chinner * Check the agfl fields of the agf for inconsistency or corruption.
2394e0a8de7dSDave Chinner *
2395e0a8de7dSDave Chinner * The original purpose was to detect an agfl header padding mismatch between
2396e0a8de7dSDave Chinner * current and early v5 kernels. This problem manifests as a 1-slot size
2397e0a8de7dSDave Chinner * difference between the on-disk flcount and the active [first, last] range of
2398e0a8de7dSDave Chinner * a wrapped agfl.
2399e0a8de7dSDave Chinner *
2400e0a8de7dSDave Chinner * However, we need to use these same checks to catch agfl count corruptions
2401e0a8de7dSDave Chinner * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2402e0a8de7dSDave Chinner * way, we need to reset the agfl and warn the user.
2403a27ba260SBrian Foster *
2404a27ba260SBrian Foster * Return true if a reset is required before the agfl can be used, false
2405a27ba260SBrian Foster * otherwise.
2406a27ba260SBrian Foster */
2407a27ba260SBrian Foster static bool
xfs_agfl_needs_reset(struct xfs_mount * mp,struct xfs_agf * agf)2408a27ba260SBrian Foster xfs_agfl_needs_reset(
2409a27ba260SBrian Foster struct xfs_mount *mp,
2410a27ba260SBrian Foster struct xfs_agf *agf)
2411a27ba260SBrian Foster {
2412a27ba260SBrian Foster uint32_t f = be32_to_cpu(agf->agf_flfirst);
2413a27ba260SBrian Foster uint32_t l = be32_to_cpu(agf->agf_fllast);
2414a27ba260SBrian Foster uint32_t c = be32_to_cpu(agf->agf_flcount);
2415a27ba260SBrian Foster int agfl_size = xfs_agfl_size(mp);
2416a27ba260SBrian Foster int active;
2417a27ba260SBrian Foster
2418a27ba260SBrian Foster /*
2419a27ba260SBrian Foster * The agf read verifier catches severe corruption of these fields.
2420a27ba260SBrian Foster * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2421a27ba260SBrian Foster * the verifier allows it.
2422a27ba260SBrian Foster */
2423a27ba260SBrian Foster if (f >= agfl_size || l >= agfl_size)
2424a27ba260SBrian Foster return true;
2425a27ba260SBrian Foster if (c > agfl_size)
2426a27ba260SBrian Foster return true;
2427a27ba260SBrian Foster
2428a27ba260SBrian Foster /*
2429a27ba260SBrian Foster * Check consistency between the on-disk count and the active range. An
2430a27ba260SBrian Foster * agfl padding mismatch manifests as an inconsistent flcount.
2431a27ba260SBrian Foster */
2432a27ba260SBrian Foster if (c && l >= f)
2433a27ba260SBrian Foster active = l - f + 1;
2434a27ba260SBrian Foster else if (c)
2435a27ba260SBrian Foster active = agfl_size - f + l + 1;
2436a27ba260SBrian Foster else
2437a27ba260SBrian Foster active = 0;
2438a27ba260SBrian Foster
2439a27ba260SBrian Foster return active != c;
2440a27ba260SBrian Foster }
2441a27ba260SBrian Foster
2442a27ba260SBrian Foster /*
2443a27ba260SBrian Foster * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2444a27ba260SBrian Foster * agfl content cannot be trusted. Warn the user that a repair is required to
2445a27ba260SBrian Foster * recover leaked blocks.
2446a27ba260SBrian Foster *
2447a27ba260SBrian Foster * The purpose of this mechanism is to handle filesystems affected by the agfl
2448a27ba260SBrian Foster * header padding mismatch problem. A reset keeps the filesystem online with a
2449a27ba260SBrian Foster * relatively minor free space accounting inconsistency rather than suffer the
2450a27ba260SBrian Foster * inevitable crash from use of an invalid agfl block.
2451a27ba260SBrian Foster */
2452a27ba260SBrian Foster static void
xfs_agfl_reset(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag)2453a27ba260SBrian Foster xfs_agfl_reset(
2454a27ba260SBrian Foster struct xfs_trans *tp,
2455a27ba260SBrian Foster struct xfs_buf *agbp,
2456a27ba260SBrian Foster struct xfs_perag *pag)
2457a27ba260SBrian Foster {
2458a27ba260SBrian Foster struct xfs_mount *mp = tp->t_mountp;
24599798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
2460a27ba260SBrian Foster
24617ac2ff8bSDave Chinner ASSERT(xfs_perag_agfl_needs_reset(pag));
2462a27ba260SBrian Foster trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2463a27ba260SBrian Foster
2464a27ba260SBrian Foster xfs_warn(mp,
2465a27ba260SBrian Foster "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2466a27ba260SBrian Foster "Please unmount and run xfs_repair.",
2467a27ba260SBrian Foster pag->pag_agno, pag->pagf_flcount);
2468a27ba260SBrian Foster
2469a27ba260SBrian Foster agf->agf_flfirst = 0;
2470a27ba260SBrian Foster agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2471a27ba260SBrian Foster agf->agf_flcount = 0;
2472a27ba260SBrian Foster xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2473a27ba260SBrian Foster XFS_AGF_FLCOUNT);
2474a27ba260SBrian Foster
2475a27ba260SBrian Foster pag->pagf_flcount = 0;
24767ac2ff8bSDave Chinner clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2477a27ba260SBrian Foster }
2478a27ba260SBrian Foster
2479a27ba260SBrian Foster /*
2480f8f2835aSBrian Foster * Defer an AGFL block free. This is effectively equivalent to
2481c201d9caSDarrick J. Wong * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2482f8f2835aSBrian Foster *
2483f8f2835aSBrian Foster * Deferring AGFL frees helps prevent log reservation overruns due to too many
2484f8f2835aSBrian Foster * allocation operations in a transaction. AGFL frees are prone to this problem
2485f8f2835aSBrian Foster * because for one they are always freed one at a time. Further, an immediate
2486f8f2835aSBrian Foster * AGFL block free can cause a btree join and require another block free before
2487f8f2835aSBrian Foster * the real allocation can proceed. Deferring the free disconnects freeing up
2488f8f2835aSBrian Foster * the AGFL slot from freeing the block.
2489f8f2835aSBrian Foster */
24907dfee17bSDave Chinner static int
xfs_defer_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_owner_info * oinfo)2491f8f2835aSBrian Foster xfs_defer_agfl_block(
24920f37d178SBrian Foster struct xfs_trans *tp,
2493f8f2835aSBrian Foster xfs_agnumber_t agno,
24942bed0d82SDave Chinner xfs_agblock_t agbno,
2495f8f2835aSBrian Foster struct xfs_owner_info *oinfo)
2496f8f2835aSBrian Foster {
24970f37d178SBrian Foster struct xfs_mount *mp = tp->t_mountp;
2498578c714bSDarrick J. Wong struct xfs_extent_free_item *xefi;
24992bed0d82SDave Chinner xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2500f8f2835aSBrian Foster
2501c201d9caSDarrick J. Wong ASSERT(xfs_extfree_item_cache != NULL);
2502f8f2835aSBrian Foster ASSERT(oinfo != NULL);
2503f8f2835aSBrian Foster
25042bed0d82SDave Chinner if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
25052bed0d82SDave Chinner return -EFSCORRUPTED;
25062bed0d82SDave Chinner
2507578c714bSDarrick J. Wong xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
25083050bd0bSCarlos Maiolino GFP_KERNEL | __GFP_NOFAIL);
25092bed0d82SDave Chinner xefi->xefi_startblock = fsbno;
2510578c714bSDarrick J. Wong xefi->xefi_blockcount = 1;
2511578c714bSDarrick J. Wong xefi->xefi_owner = oinfo->oi_owner;
2512b742d7b4SDave Chinner xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2513f8f2835aSBrian Foster
2514f8f2835aSBrian Foster trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2515f8f2835aSBrian Foster
2516f6b38463SDarrick J. Wong xfs_extent_free_get_group(mp, xefi);
2517578c714bSDarrick J. Wong xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
25187dfee17bSDave Chinner return 0;
2519f8f2835aSBrian Foster }
2520f8f2835aSBrian Foster
2521c201d9caSDarrick J. Wong /*
2522c201d9caSDarrick J. Wong * Add the extent to the list of extents to be free at transaction end.
2523c201d9caSDarrick J. Wong * The list is maintained sorted (by block number).
2524c201d9caSDarrick J. Wong */
25257dfee17bSDave Chinner int
__xfs_free_extent_later(struct xfs_trans * tp,xfs_fsblock_t bno,xfs_filblks_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)2526c201d9caSDarrick J. Wong __xfs_free_extent_later(
2527c201d9caSDarrick J. Wong struct xfs_trans *tp,
2528c201d9caSDarrick J. Wong xfs_fsblock_t bno,
2529c201d9caSDarrick J. Wong xfs_filblks_t len,
2530c201d9caSDarrick J. Wong const struct xfs_owner_info *oinfo,
2531b742d7b4SDave Chinner enum xfs_ag_resv_type type,
2532c201d9caSDarrick J. Wong bool skip_discard)
2533c201d9caSDarrick J. Wong {
2534578c714bSDarrick J. Wong struct xfs_extent_free_item *xefi;
2535c201d9caSDarrick J. Wong struct xfs_mount *mp = tp->t_mountp;
2536f6b38463SDarrick J. Wong #ifdef DEBUG
2537c201d9caSDarrick J. Wong xfs_agnumber_t agno;
2538c201d9caSDarrick J. Wong xfs_agblock_t agbno;
2539c201d9caSDarrick J. Wong
2540c201d9caSDarrick J. Wong ASSERT(bno != NULLFSBLOCK);
2541c201d9caSDarrick J. Wong ASSERT(len > 0);
254295f0b95eSChandan Babu R ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2543c201d9caSDarrick J. Wong ASSERT(!isnullstartblock(bno));
2544c201d9caSDarrick J. Wong agno = XFS_FSB_TO_AGNO(mp, bno);
2545c201d9caSDarrick J. Wong agbno = XFS_FSB_TO_AGBNO(mp, bno);
2546c201d9caSDarrick J. Wong ASSERT(agno < mp->m_sb.sb_agcount);
2547c201d9caSDarrick J. Wong ASSERT(agbno < mp->m_sb.sb_agblocks);
2548c201d9caSDarrick J. Wong ASSERT(len < mp->m_sb.sb_agblocks);
2549c201d9caSDarrick J. Wong ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2550c201d9caSDarrick J. Wong #endif
2551c201d9caSDarrick J. Wong ASSERT(xfs_extfree_item_cache != NULL);
2552b742d7b4SDave Chinner ASSERT(type != XFS_AG_RESV_AGFL);
2553c201d9caSDarrick J. Wong
25547dfee17bSDave Chinner if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
25557dfee17bSDave Chinner return -EFSCORRUPTED;
25567dfee17bSDave Chinner
2557578c714bSDarrick J. Wong xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2558c201d9caSDarrick J. Wong GFP_KERNEL | __GFP_NOFAIL);
2559578c714bSDarrick J. Wong xefi->xefi_startblock = bno;
2560578c714bSDarrick J. Wong xefi->xefi_blockcount = (xfs_extlen_t)len;
2561b742d7b4SDave Chinner xefi->xefi_agresv = type;
2562b3b5ff41SDarrick J. Wong if (skip_discard)
2563578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2564b3b5ff41SDarrick J. Wong if (oinfo) {
2565b3b5ff41SDarrick J. Wong ASSERT(oinfo->oi_offset == 0);
2566b3b5ff41SDarrick J. Wong
2567b3b5ff41SDarrick J. Wong if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2568578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2569b3b5ff41SDarrick J. Wong if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2570578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2571578c714bSDarrick J. Wong xefi->xefi_owner = oinfo->oi_owner;
2572b3b5ff41SDarrick J. Wong } else {
2573578c714bSDarrick J. Wong xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2574b3b5ff41SDarrick J. Wong }
2575f6b38463SDarrick J. Wong trace_xfs_bmap_free_defer(mp,
2576c201d9caSDarrick J. Wong XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2577c201d9caSDarrick J. Wong XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2578f6b38463SDarrick J. Wong
2579f6b38463SDarrick J. Wong xfs_extent_free_get_group(mp, xefi);
2580578c714bSDarrick J. Wong xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
25817dfee17bSDave Chinner return 0;
2582c201d9caSDarrick J. Wong }
2583c201d9caSDarrick J. Wong
258430151967SChandan Babu R /*
258530151967SChandan Babu R * Check if an AGF has a free extent record whose length is equal to
258630151967SChandan Babu R * args->minlen.
258730151967SChandan Babu R */
258830151967SChandan Babu R STATIC int
xfs_exact_minlen_extent_available(struct xfs_alloc_arg * args,struct xfs_buf * agbp,int * stat)258930151967SChandan Babu R xfs_exact_minlen_extent_available(
259030151967SChandan Babu R struct xfs_alloc_arg *args,
259130151967SChandan Babu R struct xfs_buf *agbp,
259230151967SChandan Babu R int *stat)
259330151967SChandan Babu R {
259430151967SChandan Babu R struct xfs_btree_cur *cnt_cur;
259530151967SChandan Babu R xfs_agblock_t fbno;
259630151967SChandan Babu R xfs_extlen_t flen;
259730151967SChandan Babu R int error = 0;
259830151967SChandan Babu R
259930151967SChandan Babu R cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2600289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
260130151967SChandan Babu R error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
260230151967SChandan Babu R if (error)
260330151967SChandan Babu R goto out;
260430151967SChandan Babu R
260530151967SChandan Babu R if (*stat == 0) {
260630151967SChandan Babu R error = -EFSCORRUPTED;
260730151967SChandan Babu R goto out;
260830151967SChandan Babu R }
260930151967SChandan Babu R
261030151967SChandan Babu R error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
261130151967SChandan Babu R if (error)
261230151967SChandan Babu R goto out;
261330151967SChandan Babu R
261430151967SChandan Babu R if (*stat == 1 && flen != args->minlen)
261530151967SChandan Babu R *stat = 0;
261630151967SChandan Babu R
261730151967SChandan Babu R out:
261830151967SChandan Babu R xfs_btree_del_cursor(cnt_cur, error);
261930151967SChandan Babu R
262030151967SChandan Babu R return error;
262130151967SChandan Babu R }
262230151967SChandan Babu R
2623f8f2835aSBrian Foster /*
262430f712c9SDave Chinner * Decide whether to use this allocation group for this allocation.
262530f712c9SDave Chinner * If so, fix up the btree freelist's size.
262630f712c9SDave Chinner */
26272e9101daSDarrick J. Wong int /* error */
xfs_alloc_fix_freelist(struct xfs_alloc_arg * args,uint32_t alloc_flags)262830f712c9SDave Chinner xfs_alloc_fix_freelist(
2629396503fcSDave Chinner struct xfs_alloc_arg *args, /* allocation argument structure */
26306a2a9d77SDave Chinner uint32_t alloc_flags)
263130f712c9SDave Chinner {
2632396503fcSDave Chinner struct xfs_mount *mp = args->mp;
2633396503fcSDave Chinner struct xfs_perag *pag = args->pag;
2634396503fcSDave Chinner struct xfs_trans *tp = args->tp;
2635396503fcSDave Chinner struct xfs_buf *agbp = NULL;
2636396503fcSDave Chinner struct xfs_buf *agflbp = NULL;
2637396503fcSDave Chinner struct xfs_alloc_arg targs; /* local allocation arguments */
263830f712c9SDave Chinner xfs_agblock_t bno; /* freelist block */
263930f712c9SDave Chinner xfs_extlen_t need; /* total blocks needed in freelist */
2640c184f855SJan Kara int error = 0;
264130f712c9SDave Chinner
2642362f5e74SBrian Foster /* deferred ops (AGFL block frees) require permanent transactions */
2643362f5e74SBrian Foster ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2644362f5e74SBrian Foster
26457ac2ff8bSDave Chinner if (!xfs_perag_initialised_agf(pag)) {
26466a2a9d77SDave Chinner error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2647f48e2df8SDarrick J. Wong if (error) {
2648f48e2df8SDarrick J. Wong /* Couldn't lock the AGF so skip this AG. */
2649f48e2df8SDarrick J. Wong if (error == -EAGAIN)
2650f48e2df8SDarrick J. Wong error = 0;
2651396503fcSDave Chinner goto out_no_agbp;
265230f712c9SDave Chinner }
2653396503fcSDave Chinner }
265430f712c9SDave Chinner
265530f712c9SDave Chinner /*
2656396503fcSDave Chinner * If this is a metadata preferred pag and we are user data then try
2657396503fcSDave Chinner * somewhere else if we are not being asked to try harder at this
2658396503fcSDave Chinner * point
265930f712c9SDave Chinner */
26607ac2ff8bSDave Chinner if (xfs_perag_prefers_metadata(pag) &&
26617ac2ff8bSDave Chinner (args->datatype & XFS_ALLOC_USERDATA) &&
26626a2a9d77SDave Chinner (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
26636a2a9d77SDave Chinner ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2664396503fcSDave Chinner goto out_agbp_relse;
266530f712c9SDave Chinner }
266630f712c9SDave Chinner
2667496817b4SDave Chinner need = xfs_alloc_min_freelist(mp, pag);
26686a2a9d77SDave Chinner if (!xfs_alloc_space_available(args, need, alloc_flags |
266954fee133SChristoph Hellwig XFS_ALLOC_FLAG_CHECK))
2670396503fcSDave Chinner goto out_agbp_relse;
267130f712c9SDave Chinner
267230f712c9SDave Chinner /*
267330f712c9SDave Chinner * Get the a.g. freespace buffer.
267430f712c9SDave Chinner * Can fail if we're not blocking on locks, and it's held.
267530f712c9SDave Chinner */
2676396503fcSDave Chinner if (!agbp) {
26776a2a9d77SDave Chinner error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2678f48e2df8SDarrick J. Wong if (error) {
2679f48e2df8SDarrick J. Wong /* Couldn't lock the AGF so skip this AG. */
2680f48e2df8SDarrick J. Wong if (error == -EAGAIN)
2681f48e2df8SDarrick J. Wong error = 0;
2682396503fcSDave Chinner goto out_no_agbp;
268330f712c9SDave Chinner }
268430f712c9SDave Chinner }
268550adbcb4SDave Chinner
2686a27ba260SBrian Foster /* reset a padding mismatched agfl before final free space check */
26877ac2ff8bSDave Chinner if (xfs_perag_agfl_needs_reset(pag))
2688a27ba260SBrian Foster xfs_agfl_reset(tp, agbp, pag);
2689a27ba260SBrian Foster
269050adbcb4SDave Chinner /* If there isn't enough total space or single-extent, reject it. */
2691496817b4SDave Chinner need = xfs_alloc_min_freelist(mp, pag);
26926a2a9d77SDave Chinner if (!xfs_alloc_space_available(args, need, alloc_flags))
2693396503fcSDave Chinner goto out_agbp_relse;
269472d55285SDave Chinner
2695479e112dSChristoph Hellwig if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
269630151967SChandan Babu R int stat;
269730151967SChandan Babu R
269830151967SChandan Babu R error = xfs_exact_minlen_extent_available(args, agbp, &stat);
269930151967SChandan Babu R if (error || !stat)
270030151967SChandan Babu R goto out_agbp_relse;
270130151967SChandan Babu R }
2702479e112dSChristoph Hellwig
270330f712c9SDave Chinner /*
270430f712c9SDave Chinner * Make the freelist shorter if it's too long.
270550adbcb4SDave Chinner *
2706396503fcSDave Chinner * Note that from this point onwards, we will always release the agf and
2707396503fcSDave Chinner * agfl buffers on error. This handles the case where we error out and
2708396503fcSDave Chinner * the buffers are clean or may not have been joined to the transaction
2709396503fcSDave Chinner * and hence need to be released manually. If they have been joined to
2710396503fcSDave Chinner * the transaction, then xfs_trans_brelse() will handle them
2711396503fcSDave Chinner * appropriately based on the recursion count and dirty state of the
2712396503fcSDave Chinner * buffer.
2713396503fcSDave Chinner *
271450adbcb4SDave Chinner * XXX (dgc): When we have lots of free space, does this buy us
271550adbcb4SDave Chinner * anything other than extra overhead when we need to put more blocks
271650adbcb4SDave Chinner * back on the free list? Maybe we should only do this when space is
271750adbcb4SDave Chinner * getting low or the AGFL is more than half full?
271804f13060SDarrick J. Wong *
271904f13060SDarrick J. Wong * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
272004f13060SDarrick J. Wong * big; the NORMAP flag prevents AGFL expand/shrink operations from
272104f13060SDarrick J. Wong * updating the rmapbt. Both flags are used in xfs_repair while we're
272204f13060SDarrick J. Wong * rebuilding the rmapbt, and neither are used by the kernel. They're
272304f13060SDarrick J. Wong * both required to ensure that rmaps are correctly recorded for the
272404f13060SDarrick J. Wong * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
272504f13060SDarrick J. Wong * repair/rmap.c in xfsprogs for details.
272630f712c9SDave Chinner */
272704f13060SDarrick J. Wong memset(&targs, 0, sizeof(targs));
27287280fedaSDarrick J. Wong /* struct copy below */
27296a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
27307280fedaSDarrick J. Wong targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
273104f13060SDarrick J. Wong else
27327280fedaSDarrick J. Wong targs.oinfo = XFS_RMAP_OINFO_AG;
27336a2a9d77SDave Chinner while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
27346a2a9d77SDave Chinner pag->pagf_flcount > need) {
273549f0d84eSDave Chinner error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
273630f712c9SDave Chinner if (error)
2737396503fcSDave Chinner goto out_agbp_relse;
27384223f659SBrian Foster
2739c03edc9eSBrian Foster /* defer agfl frees */
27407dfee17bSDave Chinner error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
27417dfee17bSDave Chinner if (error)
27427dfee17bSDave Chinner goto out_agbp_relse;
2743f8f2835aSBrian Foster }
274450adbcb4SDave Chinner
274530f712c9SDave Chinner targs.tp = tp;
274630f712c9SDave Chinner targs.mp = mp;
274730f712c9SDave Chinner targs.agbp = agbp;
274830f712c9SDave Chinner targs.agno = args->agno;
27493fd129b6SDarrick J. Wong targs.alignment = targs.minlen = targs.prod = 1;
275030f712c9SDave Chinner targs.pag = pag;
2751cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
275250adbcb4SDave Chinner if (error)
2753396503fcSDave Chinner goto out_agbp_relse;
275450adbcb4SDave Chinner
275550adbcb4SDave Chinner /* Make the freelist longer if it's too short. */
275650adbcb4SDave Chinner while (pag->pagf_flcount < need) {
275730f712c9SDave Chinner targs.agbno = 0;
275850adbcb4SDave Chinner targs.maxlen = need - pag->pagf_flcount;
27590ab32086SBrian Foster targs.resv = XFS_AG_RESV_AGFL;
276050adbcb4SDave Chinner
276150adbcb4SDave Chinner /* Allocate as many blocks as possible at once. */
27626a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2763396503fcSDave Chinner if (error)
2764396503fcSDave Chinner goto out_agflbp_relse;
2765396503fcSDave Chinner
276630f712c9SDave Chinner /*
276730f712c9SDave Chinner * Stop if we run out. Won't happen if callers are obeying
276830f712c9SDave Chinner * the restrictions correctly. Can happen for free calls
276930f712c9SDave Chinner * on a completely full ag.
277030f712c9SDave Chinner */
277130f712c9SDave Chinner if (targs.agbno == NULLAGBLOCK) {
27726a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
277330f712c9SDave Chinner break;
2774396503fcSDave Chinner goto out_agflbp_relse;
277530f712c9SDave Chinner }
27764811c933SDave Chinner
27774811c933SDave Chinner if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
27784811c933SDave Chinner error = xfs_rmap_alloc(tp, agbp, pag,
27794811c933SDave Chinner targs.agbno, targs.len, &targs.oinfo);
27804811c933SDave Chinner if (error)
27814811c933SDave Chinner goto out_agflbp_relse;
27824811c933SDave Chinner }
27834811c933SDave Chinner error = xfs_alloc_update_counters(tp, agbp,
27844811c933SDave Chinner -((long)(targs.len)));
27854811c933SDave Chinner if (error)
27864811c933SDave Chinner goto out_agflbp_relse;
27874811c933SDave Chinner
278830f712c9SDave Chinner /*
278930f712c9SDave Chinner * Put each allocated block on the list.
279030f712c9SDave Chinner */
279130f712c9SDave Chinner for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
27928c392eb2SDave Chinner error = xfs_alloc_put_freelist(pag, tp, agbp,
279330f712c9SDave Chinner agflbp, bno, 0);
279430f712c9SDave Chinner if (error)
2795396503fcSDave Chinner goto out_agflbp_relse;
279630f712c9SDave Chinner }
279730f712c9SDave Chinner }
279830f712c9SDave Chinner xfs_trans_brelse(tp, agflbp);
279930f712c9SDave Chinner args->agbp = agbp;
280030f712c9SDave Chinner return 0;
2801396503fcSDave Chinner
2802396503fcSDave Chinner out_agflbp_relse:
2803396503fcSDave Chinner xfs_trans_brelse(tp, agflbp);
2804396503fcSDave Chinner out_agbp_relse:
2805396503fcSDave Chinner if (agbp)
2806396503fcSDave Chinner xfs_trans_brelse(tp, agbp);
2807396503fcSDave Chinner out_no_agbp:
2808396503fcSDave Chinner args->agbp = NULL;
2809396503fcSDave Chinner return error;
281030f712c9SDave Chinner }
281130f712c9SDave Chinner
281230f712c9SDave Chinner /*
281330f712c9SDave Chinner * Get a block from the freelist.
281430f712c9SDave Chinner * Returns with the buffer for the block gotten.
281530f712c9SDave Chinner */
281650920116SDave Chinner int
xfs_alloc_get_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agblock_t * bnop,int btreeblk)281730f712c9SDave Chinner xfs_alloc_get_freelist(
281849f0d84eSDave Chinner struct xfs_perag *pag,
281950920116SDave Chinner struct xfs_trans *tp,
282050920116SDave Chinner struct xfs_buf *agbp,
282150920116SDave Chinner xfs_agblock_t *bnop,
282250920116SDave Chinner int btreeblk)
282330f712c9SDave Chinner {
28249798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
282550920116SDave Chinner struct xfs_buf *agflbp;
282650920116SDave Chinner xfs_agblock_t bno;
282730f712c9SDave Chinner __be32 *agfl_bno;
282830f712c9SDave Chinner int error;
2829f53dde11SDave Chinner uint32_t logflags;
283050920116SDave Chinner struct xfs_mount *mp = tp->t_mountp;
283130f712c9SDave Chinner
283230f712c9SDave Chinner /*
283330f712c9SDave Chinner * Freelist is empty, give up.
283430f712c9SDave Chinner */
283530f712c9SDave Chinner if (!agf->agf_flcount) {
283630f712c9SDave Chinner *bnop = NULLAGBLOCK;
283730f712c9SDave Chinner return 0;
283830f712c9SDave Chinner }
283930f712c9SDave Chinner /*
284030f712c9SDave Chinner * Read the array of free blocks.
284130f712c9SDave Chinner */
2842cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
284330f712c9SDave Chinner if (error)
284430f712c9SDave Chinner return error;
284530f712c9SDave Chinner
284630f712c9SDave Chinner
284730f712c9SDave Chinner /*
284830f712c9SDave Chinner * Get the block number and update the data structures.
284930f712c9SDave Chinner */
2850183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
285130f712c9SDave Chinner bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
28523148ebf2SDave Chinner if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
28533148ebf2SDave Chinner return -EFSCORRUPTED;
28543148ebf2SDave Chinner
285530f712c9SDave Chinner be32_add_cpu(&agf->agf_flfirst, 1);
285630f712c9SDave Chinner xfs_trans_brelse(tp, agflbp);
2857a78ee256SDave Chinner if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
285830f712c9SDave Chinner agf->agf_flfirst = 0;
285930f712c9SDave Chinner
28607ac2ff8bSDave Chinner ASSERT(!xfs_perag_agfl_needs_reset(pag));
286130f712c9SDave Chinner be32_add_cpu(&agf->agf_flcount, -1);
286230f712c9SDave Chinner pag->pagf_flcount--;
286330f712c9SDave Chinner
286430f712c9SDave Chinner logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
286530f712c9SDave Chinner if (btreeblk) {
286630f712c9SDave Chinner be32_add_cpu(&agf->agf_btreeblks, 1);
286730f712c9SDave Chinner pag->pagf_btreeblks++;
286830f712c9SDave Chinner logflags |= XFS_AGF_BTREEBLKS;
286930f712c9SDave Chinner }
287030f712c9SDave Chinner
287130f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
287230f712c9SDave Chinner *bnop = bno;
287330f712c9SDave Chinner
287430f712c9SDave Chinner return 0;
287530f712c9SDave Chinner }
287630f712c9SDave Chinner
287730f712c9SDave Chinner /*
287830f712c9SDave Chinner * Log the given fields from the agf structure.
287930f712c9SDave Chinner */
288030f712c9SDave Chinner void
xfs_alloc_log_agf(struct xfs_trans * tp,struct xfs_buf * bp,uint32_t fields)288130f712c9SDave Chinner xfs_alloc_log_agf(
2882f53dde11SDave Chinner struct xfs_trans *tp,
2883f53dde11SDave Chinner struct xfs_buf *bp,
2884f53dde11SDave Chinner uint32_t fields)
288530f712c9SDave Chinner {
288630f712c9SDave Chinner int first; /* first byte offset */
288730f712c9SDave Chinner int last; /* last byte offset */
288830f712c9SDave Chinner static const short offsets[] = {
288930f712c9SDave Chinner offsetof(xfs_agf_t, agf_magicnum),
289030f712c9SDave Chinner offsetof(xfs_agf_t, agf_versionnum),
289130f712c9SDave Chinner offsetof(xfs_agf_t, agf_seqno),
289230f712c9SDave Chinner offsetof(xfs_agf_t, agf_length),
289330f712c9SDave Chinner offsetof(xfs_agf_t, agf_roots[0]),
289430f712c9SDave Chinner offsetof(xfs_agf_t, agf_levels[0]),
289530f712c9SDave Chinner offsetof(xfs_agf_t, agf_flfirst),
289630f712c9SDave Chinner offsetof(xfs_agf_t, agf_fllast),
289730f712c9SDave Chinner offsetof(xfs_agf_t, agf_flcount),
289830f712c9SDave Chinner offsetof(xfs_agf_t, agf_freeblks),
289930f712c9SDave Chinner offsetof(xfs_agf_t, agf_longest),
290030f712c9SDave Chinner offsetof(xfs_agf_t, agf_btreeblks),
290130f712c9SDave Chinner offsetof(xfs_agf_t, agf_uuid),
2902f32866fdSDarrick J. Wong offsetof(xfs_agf_t, agf_rmap_blocks),
2903bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_blocks),
2904bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_root),
2905bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_level),
2906da1f039dSDarrick J. Wong /* needed so that we don't log the whole rest of the structure: */
2907da1f039dSDarrick J. Wong offsetof(xfs_agf_t, agf_spare64),
290830f712c9SDave Chinner sizeof(xfs_agf_t)
290930f712c9SDave Chinner };
291030f712c9SDave Chinner
29119798f615SChristoph Hellwig trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
291230f712c9SDave Chinner
291330f712c9SDave Chinner xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
291430f712c9SDave Chinner
291530f712c9SDave Chinner xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
291630f712c9SDave Chinner xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
291730f712c9SDave Chinner }
291830f712c9SDave Chinner
291930f712c9SDave Chinner /*
292030f712c9SDave Chinner * Put the block on the freelist for the allocation group.
292130f712c9SDave Chinner */
292250920116SDave Chinner int
xfs_alloc_put_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_buf * agflbp,xfs_agblock_t bno,int btreeblk)292330f712c9SDave Chinner xfs_alloc_put_freelist(
29248c392eb2SDave Chinner struct xfs_perag *pag,
292550920116SDave Chinner struct xfs_trans *tp,
292650920116SDave Chinner struct xfs_buf *agbp,
292750920116SDave Chinner struct xfs_buf *agflbp,
292850920116SDave Chinner xfs_agblock_t bno,
292950920116SDave Chinner int btreeblk)
293030f712c9SDave Chinner {
29319798f615SChristoph Hellwig struct xfs_mount *mp = tp->t_mountp;
29329798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
293350920116SDave Chinner __be32 *blockp;
293430f712c9SDave Chinner int error;
2935f53dde11SDave Chinner uint32_t logflags;
293630f712c9SDave Chinner __be32 *agfl_bno;
293730f712c9SDave Chinner int startoff;
293830f712c9SDave Chinner
2939cec7bb7dSDave Chinner if (!agflbp) {
2940cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2941cec7bb7dSDave Chinner if (error)
294230f712c9SDave Chinner return error;
2943cec7bb7dSDave Chinner }
2944cec7bb7dSDave Chinner
294530f712c9SDave Chinner be32_add_cpu(&agf->agf_fllast, 1);
2946a78ee256SDave Chinner if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
294730f712c9SDave Chinner agf->agf_fllast = 0;
294830f712c9SDave Chinner
29497ac2ff8bSDave Chinner ASSERT(!xfs_perag_agfl_needs_reset(pag));
295030f712c9SDave Chinner be32_add_cpu(&agf->agf_flcount, 1);
295130f712c9SDave Chinner pag->pagf_flcount++;
295230f712c9SDave Chinner
295330f712c9SDave Chinner logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
295430f712c9SDave Chinner if (btreeblk) {
295530f712c9SDave Chinner be32_add_cpu(&agf->agf_btreeblks, -1);
295630f712c9SDave Chinner pag->pagf_btreeblks--;
295730f712c9SDave Chinner logflags |= XFS_AGF_BTREEBLKS;
295830f712c9SDave Chinner }
295930f712c9SDave Chinner
296030f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
296130f712c9SDave Chinner
2962a78ee256SDave Chinner ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
296330f712c9SDave Chinner
2964183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
296530f712c9SDave Chinner blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
296630f712c9SDave Chinner *blockp = cpu_to_be32(bno);
296730f712c9SDave Chinner startoff = (char *)blockp - (char *)agflbp->b_addr;
296830f712c9SDave Chinner
296930f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
297030f712c9SDave Chinner
297130f712c9SDave Chinner xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
297230f712c9SDave Chinner xfs_trans_log_buf(tp, agflbp, startoff,
297330f712c9SDave Chinner startoff + sizeof(xfs_agblock_t) - 1);
297430f712c9SDave Chinner return 0;
297530f712c9SDave Chinner }
297630f712c9SDave Chinner
2977e0a8de7dSDave Chinner /*
29782d7d1e7eSDarrick J. Wong * Check that this AGF/AGI header's sequence number and length matches the AG
29792d7d1e7eSDarrick J. Wong * number and size in fsblocks.
29802d7d1e7eSDarrick J. Wong */
29812d7d1e7eSDarrick J. Wong xfs_failaddr_t
xfs_validate_ag_length(struct xfs_buf * bp,uint32_t seqno,uint32_t length)29822d7d1e7eSDarrick J. Wong xfs_validate_ag_length(
29832d7d1e7eSDarrick J. Wong struct xfs_buf *bp,
29842d7d1e7eSDarrick J. Wong uint32_t seqno,
29852d7d1e7eSDarrick J. Wong uint32_t length)
29862d7d1e7eSDarrick J. Wong {
29872d7d1e7eSDarrick J. Wong struct xfs_mount *mp = bp->b_mount;
29882d7d1e7eSDarrick J. Wong /*
29892d7d1e7eSDarrick J. Wong * During growfs operations, the perag is not fully initialised,
29902d7d1e7eSDarrick J. Wong * so we can't use it for any useful checking. growfs ensures we can't
29912d7d1e7eSDarrick J. Wong * use it by using uncached buffers that don't have the perag attached
29922d7d1e7eSDarrick J. Wong * so we can detect and avoid this problem.
29932d7d1e7eSDarrick J. Wong */
29942d7d1e7eSDarrick J. Wong if (bp->b_pag && seqno != bp->b_pag->pag_agno)
29952d7d1e7eSDarrick J. Wong return __this_address;
29962d7d1e7eSDarrick J. Wong
29972d7d1e7eSDarrick J. Wong /*
29982d7d1e7eSDarrick J. Wong * Only the last AG in the filesystem is allowed to be shorter
29992d7d1e7eSDarrick J. Wong * than the AG size recorded in the superblock.
30002d7d1e7eSDarrick J. Wong */
30012d7d1e7eSDarrick J. Wong if (length != mp->m_sb.sb_agblocks) {
30022d7d1e7eSDarrick J. Wong /*
30032d7d1e7eSDarrick J. Wong * During growfs, the new last AG can get here before we
30042d7d1e7eSDarrick J. Wong * have updated the superblock. Give it a pass on the seqno
30052d7d1e7eSDarrick J. Wong * check.
30062d7d1e7eSDarrick J. Wong */
30072d7d1e7eSDarrick J. Wong if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
30082d7d1e7eSDarrick J. Wong return __this_address;
30092d7d1e7eSDarrick J. Wong if (length < XFS_MIN_AG_BLOCKS)
30102d7d1e7eSDarrick J. Wong return __this_address;
30112d7d1e7eSDarrick J. Wong if (length > mp->m_sb.sb_agblocks)
30122d7d1e7eSDarrick J. Wong return __this_address;
30132d7d1e7eSDarrick J. Wong }
30142d7d1e7eSDarrick J. Wong
30152d7d1e7eSDarrick J. Wong return NULL;
30162d7d1e7eSDarrick J. Wong }
30172d7d1e7eSDarrick J. Wong
30182d7d1e7eSDarrick J. Wong /*
3019e0a8de7dSDave Chinner * Verify the AGF is consistent.
3020e0a8de7dSDave Chinner *
3021e0a8de7dSDave Chinner * We do not verify the AGFL indexes in the AGF are fully consistent here
3022e0a8de7dSDave Chinner * because of issues with variable on-disk structure sizes. Instead, we check
3023e0a8de7dSDave Chinner * the agfl indexes for consistency when we initialise the perag from the AGF
3024e0a8de7dSDave Chinner * information after a read completes.
3025e0a8de7dSDave Chinner *
3026e0a8de7dSDave Chinner * If the index is inconsistent, then we mark the perag as needing an AGFL
3027e0a8de7dSDave Chinner * reset. The first AGFL update performed then resets the AGFL indexes and
3028e0a8de7dSDave Chinner * refills the AGFL with known good free blocks, allowing the filesystem to
3029e0a8de7dSDave Chinner * continue operating normally at the cost of a few leaked free space blocks.
3030e0a8de7dSDave Chinner */
3031a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agf_verify(struct xfs_buf * bp)303230f712c9SDave Chinner xfs_agf_verify(
303330f712c9SDave Chinner struct xfs_buf *bp)
303430f712c9SDave Chinner {
3035dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
30369798f615SChristoph Hellwig struct xfs_agf *agf = bp->b_addr;
30372d7d1e7eSDarrick J. Wong xfs_failaddr_t fa;
30382d7d1e7eSDarrick J. Wong uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3039edd8276dSDave Chinner uint32_t agf_length = be32_to_cpu(agf->agf_length);
304030f712c9SDave Chinner
304138c26bfdSDave Chinner if (xfs_has_crc(mp)) {
3042a45086e2SBrian Foster if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3043a6a781a5SDarrick J. Wong return __this_address;
30449798f615SChristoph Hellwig if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3045a6a781a5SDarrick J. Wong return __this_address;
3046a45086e2SBrian Foster }
304730f712c9SDave Chinner
304839708c20SBrian Foster if (!xfs_verify_magic(bp, agf->agf_magicnum))
304939708c20SBrian Foster return __this_address;
305039708c20SBrian Foster
3051edd8276dSDave Chinner if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3052a6a781a5SDarrick J. Wong return __this_address;
305330f712c9SDave Chinner
3054edd8276dSDave Chinner /*
3055edd8276dSDave Chinner * Both agf_seqno and agf_length need to validated before anything else
3056edd8276dSDave Chinner * block number related in the AGF or AGFL can be checked.
3057edd8276dSDave Chinner */
30582d7d1e7eSDarrick J. Wong fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
30592d7d1e7eSDarrick J. Wong if (fa)
30602d7d1e7eSDarrick J. Wong return fa;
3061edd8276dSDave Chinner
3062edd8276dSDave Chinner if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3063edd8276dSDave Chinner return __this_address;
3064edd8276dSDave Chinner if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3065edd8276dSDave Chinner return __this_address;
3066edd8276dSDave Chinner if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3067d0c7feafSZheng Bin return __this_address;
3068d0c7feafSZheng Bin
3069d0c7feafSZheng Bin if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3070edd8276dSDave Chinner be32_to_cpu(agf->agf_freeblks) > agf_length)
3071d0c7feafSZheng Bin return __this_address;
3072d0c7feafSZheng Bin
3073d2a047f3SDarrick J. Wong if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3074d2a047f3SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
30757cb3efb4SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
30767cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels ||
30777cb3efb4SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
30787cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels)
3079a6a781a5SDarrick J. Wong return __this_address;
3080e1b05723SEric Sandeen
3081ebd9027dSDave Chinner if (xfs_has_lazysbcount(mp) &&
3082edd8276dSDave Chinner be32_to_cpu(agf->agf_btreeblks) > agf_length)
3083a6a781a5SDarrick J. Wong return __this_address;
308430f712c9SDave Chinner
3085edd8276dSDave Chinner if (xfs_has_rmapbt(mp)) {
3086edd8276dSDave Chinner if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3087d0c7feafSZheng Bin return __this_address;
3088d0c7feafSZheng Bin
3089edd8276dSDave Chinner if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3090edd8276dSDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3091edd8276dSDave Chinner mp->m_rmap_maxlevels)
3092a6a781a5SDarrick J. Wong return __this_address;
3093edd8276dSDave Chinner }
3094edd8276dSDave Chinner
3095edd8276dSDave Chinner if (xfs_has_reflink(mp)) {
3096edd8276dSDave Chinner if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3097edd8276dSDave Chinner return __this_address;
3098edd8276dSDave Chinner
3099edd8276dSDave Chinner if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3100edd8276dSDave Chinner be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3101edd8276dSDave Chinner return __this_address;
3102edd8276dSDave Chinner }
310346eeb521SDarrick J. Wong
3104a6a781a5SDarrick J. Wong return NULL;
310530f712c9SDave Chinner }
310630f712c9SDave Chinner
310730f712c9SDave Chinner static void
xfs_agf_read_verify(struct xfs_buf * bp)310830f712c9SDave Chinner xfs_agf_read_verify(
310930f712c9SDave Chinner struct xfs_buf *bp)
311030f712c9SDave Chinner {
3111dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
3112bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
311330f712c9SDave Chinner
311438c26bfdSDave Chinner if (xfs_has_crc(mp) &&
311530f712c9SDave Chinner !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3116bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3117bc1a09b8SDarrick J. Wong else {
3118b5572597SDarrick J. Wong fa = xfs_agf_verify(bp);
3119bc1a09b8SDarrick J. Wong if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3120bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3121bc1a09b8SDarrick J. Wong }
312230f712c9SDave Chinner }
312330f712c9SDave Chinner
312430f712c9SDave Chinner static void
xfs_agf_write_verify(struct xfs_buf * bp)312530f712c9SDave Chinner xfs_agf_write_verify(
312630f712c9SDave Chinner struct xfs_buf *bp)
312730f712c9SDave Chinner {
3128dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
3129fb1755a6SCarlos Maiolino struct xfs_buf_log_item *bip = bp->b_log_item;
31309798f615SChristoph Hellwig struct xfs_agf *agf = bp->b_addr;
3131bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
313230f712c9SDave Chinner
3133b5572597SDarrick J. Wong fa = xfs_agf_verify(bp);
3134bc1a09b8SDarrick J. Wong if (fa) {
3135bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
313630f712c9SDave Chinner return;
313730f712c9SDave Chinner }
313830f712c9SDave Chinner
313938c26bfdSDave Chinner if (!xfs_has_crc(mp))
314030f712c9SDave Chinner return;
314130f712c9SDave Chinner
314230f712c9SDave Chinner if (bip)
31439798f615SChristoph Hellwig agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
314430f712c9SDave Chinner
314530f712c9SDave Chinner xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
314630f712c9SDave Chinner }
314730f712c9SDave Chinner
314830f712c9SDave Chinner const struct xfs_buf_ops xfs_agf_buf_ops = {
3149233135b7SEric Sandeen .name = "xfs_agf",
315039708c20SBrian Foster .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
315130f712c9SDave Chinner .verify_read = xfs_agf_read_verify,
315230f712c9SDave Chinner .verify_write = xfs_agf_write_verify,
3153b5572597SDarrick J. Wong .verify_struct = xfs_agf_verify,
315430f712c9SDave Chinner };
315530f712c9SDave Chinner
315630f712c9SDave Chinner /*
315730f712c9SDave Chinner * Read in the allocation group header (free/alloc section).
315830f712c9SDave Chinner */
3159fa044ae7SDave Chinner int
xfs_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)316030f712c9SDave Chinner xfs_read_agf(
3161fa044ae7SDave Chinner struct xfs_perag *pag,
3162fa044ae7SDave Chinner struct xfs_trans *tp,
3163fa044ae7SDave Chinner int flags,
3164fa044ae7SDave Chinner struct xfs_buf **agfbpp)
316530f712c9SDave Chinner {
3166fa044ae7SDave Chinner struct xfs_mount *mp = pag->pag_mount;
316730f712c9SDave Chinner int error;
316830f712c9SDave Chinner
3169fa044ae7SDave Chinner trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
317030f712c9SDave Chinner
31714ed8e27bSDarrick J. Wong error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3172fa044ae7SDave Chinner XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3173fa044ae7SDave Chinner XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
317430f712c9SDave Chinner if (error)
317530f712c9SDave Chinner return error;
317630f712c9SDave Chinner
3177fa044ae7SDave Chinner xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
317830f712c9SDave Chinner return 0;
317930f712c9SDave Chinner }
318030f712c9SDave Chinner
318130f712c9SDave Chinner /*
318276b47e52SDave Chinner * Read in the allocation group header (free/alloc section) and initialise the
318376b47e52SDave Chinner * perag structure if necessary. If the caller provides @agfbpp, then return the
318476b47e52SDave Chinner * locked buffer to the caller, otherwise free it.
318530f712c9SDave Chinner */
318608d3e84fSDave Chinner int
xfs_alloc_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)318730f712c9SDave Chinner xfs_alloc_read_agf(
318808d3e84fSDave Chinner struct xfs_perag *pag,
318908d3e84fSDave Chinner struct xfs_trans *tp,
319008d3e84fSDave Chinner int flags,
319176b47e52SDave Chinner struct xfs_buf **agfbpp)
319230f712c9SDave Chinner {
319376b47e52SDave Chinner struct xfs_buf *agfbp;
319408d3e84fSDave Chinner struct xfs_agf *agf;
319530f712c9SDave Chinner int error;
319616eaab83SBrian Foster int allocbt_blks;
319730f712c9SDave Chinner
319808d3e84fSDave Chinner trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
319930f712c9SDave Chinner
3200f48e2df8SDarrick J. Wong /* We don't support trylock when freeing. */
3201f48e2df8SDarrick J. Wong ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3202f48e2df8SDarrick J. Wong (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3203fa044ae7SDave Chinner error = xfs_read_agf(pag, tp,
320430f712c9SDave Chinner (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
320576b47e52SDave Chinner &agfbp);
320630f712c9SDave Chinner if (error)
320730f712c9SDave Chinner return error;
320830f712c9SDave Chinner
320976b47e52SDave Chinner agf = agfbp->b_addr;
32107ac2ff8bSDave Chinner if (!xfs_perag_initialised_agf(pag)) {
321130f712c9SDave Chinner pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
321230f712c9SDave Chinner pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
321330f712c9SDave Chinner pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
321430f712c9SDave Chinner pag->pagf_longest = be32_to_cpu(agf->agf_longest);
321530f712c9SDave Chinner pag->pagf_levels[XFS_BTNUM_BNOi] =
321630f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
321730f712c9SDave Chinner pag->pagf_levels[XFS_BTNUM_CNTi] =
321830f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3219b8704944SDarrick J. Wong pag->pagf_levels[XFS_BTNUM_RMAPi] =
3220b8704944SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
322146eeb521SDarrick J. Wong pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
32227ac2ff8bSDave Chinner if (xfs_agfl_needs_reset(pag->pag_mount, agf))
32237ac2ff8bSDave Chinner set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3224e2e63b07SDarrick J. Wong else
3225e2e63b07SDarrick J. Wong clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
322616eaab83SBrian Foster
322716eaab83SBrian Foster /*
322816eaab83SBrian Foster * Update the in-core allocbt counter. Filter out the rmapbt
322916eaab83SBrian Foster * subset of the btreeblks counter because the rmapbt is managed
323016eaab83SBrian Foster * by perag reservation. Subtract one for the rmapbt root block
323116eaab83SBrian Foster * because the rmap counter includes it while the btreeblks
323216eaab83SBrian Foster * counter only tracks non-root blocks.
323316eaab83SBrian Foster */
323416eaab83SBrian Foster allocbt_blks = pag->pagf_btreeblks;
323508d3e84fSDave Chinner if (xfs_has_rmapbt(pag->pag_mount))
323616eaab83SBrian Foster allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
323716eaab83SBrian Foster if (allocbt_blks > 0)
323808d3e84fSDave Chinner atomic64_add(allocbt_blks,
323908d3e84fSDave Chinner &pag->pag_mount->m_allocbt_blks);
32407ac2ff8bSDave Chinner
32417ac2ff8bSDave Chinner set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
324230f712c9SDave Chinner }
324330f712c9SDave Chinner #ifdef DEBUG
324408d3e84fSDave Chinner else if (!xfs_is_shutdown(pag->pag_mount)) {
324530f712c9SDave Chinner ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
324630f712c9SDave Chinner ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
324730f712c9SDave Chinner ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
324830f712c9SDave Chinner ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
324930f712c9SDave Chinner ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
325030f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
325130f712c9SDave Chinner ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
325230f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
325330f712c9SDave Chinner }
325430f712c9SDave Chinner #endif
325576b47e52SDave Chinner if (agfbpp)
325676b47e52SDave Chinner *agfbpp = agfbp;
325776b47e52SDave Chinner else
325876b47e52SDave Chinner xfs_trans_brelse(tp, agfbp);
325930f712c9SDave Chinner return 0;
326030f712c9SDave Chinner }
326130f712c9SDave Chinner
326230f712c9SDave Chinner /*
3263ecd788a9SDave Chinner * Pre-proces allocation arguments to set initial state that we don't require
3264ecd788a9SDave Chinner * callers to set up correctly, as well as bounds check the allocation args
3265ecd788a9SDave Chinner * that are set up.
326630f712c9SDave Chinner */
3267ecd788a9SDave Chinner static int
xfs_alloc_vextent_check_args(struct xfs_alloc_arg * args,xfs_fsblock_t target,xfs_agnumber_t * minimum_agno)3268ecd788a9SDave Chinner xfs_alloc_vextent_check_args(
3269319c9e87SDave Chinner struct xfs_alloc_arg *args,
32708b813568SDave Chinner xfs_fsblock_t target,
32718b813568SDave Chinner xfs_agnumber_t *minimum_agno)
327230f712c9SDave Chinner {
3273ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
3274ecd788a9SDave Chinner xfs_agblock_t agsize;
327530f712c9SDave Chinner
3276230e8fe8SDave Chinner args->fsbno = NULLFSBLOCK;
3277ecd788a9SDave Chinner
32788b813568SDave Chinner *minimum_agno = 0;
32798b813568SDave Chinner if (args->tp->t_highest_agno != NULLAGNUMBER)
32808b813568SDave Chinner *minimum_agno = args->tp->t_highest_agno;
32818b813568SDave Chinner
328230f712c9SDave Chinner /*
328330f712c9SDave Chinner * Just fix this up, for the case where the last a.g. is shorter
328430f712c9SDave Chinner * (or there's only one a.g.) and the caller couldn't easily figure
328530f712c9SDave Chinner * that out (xfs_bmap_alloc).
328630f712c9SDave Chinner */
328730f712c9SDave Chinner agsize = mp->m_sb.sb_agblocks;
328830f712c9SDave Chinner if (args->maxlen > agsize)
328930f712c9SDave Chinner args->maxlen = agsize;
329030f712c9SDave Chinner if (args->alignment == 0)
329130f712c9SDave Chinner args->alignment = 1;
329274b9aa63SDave Chinner
329374b9aa63SDave Chinner ASSERT(args->minlen > 0);
329474b9aa63SDave Chinner ASSERT(args->maxlen > 0);
329574b9aa63SDave Chinner ASSERT(args->alignment > 0);
329674b9aa63SDave Chinner ASSERT(args->resv != XFS_AG_RESV_AGFL);
329774b9aa63SDave Chinner
3298319c9e87SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3299319c9e87SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
330030f712c9SDave Chinner ASSERT(args->minlen <= args->maxlen);
330130f712c9SDave Chinner ASSERT(args->minlen <= agsize);
330230f712c9SDave Chinner ASSERT(args->mod < args->prod);
330374b9aa63SDave Chinner
3304319c9e87SDave Chinner if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3305319c9e87SDave Chinner XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
330630f712c9SDave Chinner args->minlen > args->maxlen || args->minlen > agsize ||
330730f712c9SDave Chinner args->mod >= args->prod) {
330830f712c9SDave Chinner trace_xfs_alloc_vextent_badargs(args);
3309ecd788a9SDave Chinner return -ENOSPC;
3310ecd788a9SDave Chinner }
33118b813568SDave Chinner
33128b813568SDave Chinner if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
33138b813568SDave Chinner trace_xfs_alloc_vextent_skip_deadlock(args);
33148b813568SDave Chinner return -ENOSPC;
33158b813568SDave Chinner }
331630f712c9SDave Chinner return 0;
33178b813568SDave Chinner
331830f712c9SDave Chinner }
331930f712c9SDave Chinner
332030f712c9SDave Chinner /*
332174b9aa63SDave Chinner * Prepare an AG for allocation. If the AG is not prepared to accept the
332274b9aa63SDave Chinner * allocation, return failure.
332374b9aa63SDave Chinner *
332474b9aa63SDave Chinner * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
332574b9aa63SDave Chinner * modified to hold their own perag references.
332674b9aa63SDave Chinner */
332774b9aa63SDave Chinner static int
xfs_alloc_vextent_prepare_ag(struct xfs_alloc_arg * args,uint32_t alloc_flags)332874b9aa63SDave Chinner xfs_alloc_vextent_prepare_ag(
332900dcd17cSDave Chinner struct xfs_alloc_arg *args,
33306a2a9d77SDave Chinner uint32_t alloc_flags)
333174b9aa63SDave Chinner {
333274b9aa63SDave Chinner bool need_pag = !args->pag;
333374b9aa63SDave Chinner int error;
333474b9aa63SDave Chinner
333574b9aa63SDave Chinner if (need_pag)
333674b9aa63SDave Chinner args->pag = xfs_perag_get(args->mp, args->agno);
333774b9aa63SDave Chinner
33383432ef61SDave Chinner args->agbp = NULL;
33396a2a9d77SDave Chinner error = xfs_alloc_fix_freelist(args, alloc_flags);
334074b9aa63SDave Chinner if (error) {
334174b9aa63SDave Chinner trace_xfs_alloc_vextent_nofix(args);
334274b9aa63SDave Chinner if (need_pag)
334374b9aa63SDave Chinner xfs_perag_put(args->pag);
334474b9aa63SDave Chinner args->agbno = NULLAGBLOCK;
334574b9aa63SDave Chinner return error;
334674b9aa63SDave Chinner }
334774b9aa63SDave Chinner if (!args->agbp) {
334874b9aa63SDave Chinner /* cannot allocate in this AG at all */
334974b9aa63SDave Chinner trace_xfs_alloc_vextent_noagbp(args);
335074b9aa63SDave Chinner args->agbno = NULLAGBLOCK;
335174b9aa63SDave Chinner return 0;
335274b9aa63SDave Chinner }
335374b9aa63SDave Chinner args->wasfromfl = 0;
335474b9aa63SDave Chinner return 0;
335574b9aa63SDave Chinner }
335674b9aa63SDave Chinner
335774b9aa63SDave Chinner /*
3358e4d17426SDave Chinner * Post-process allocation results to account for the allocation if it succeed
3359e4d17426SDave Chinner * and set the allocated block number correctly for the caller.
3360ecd788a9SDave Chinner *
3361e4d17426SDave Chinner * XXX: we should really be returning ENOSPC for ENOSPC, not
3362ecd788a9SDave Chinner * hiding it behind a "successful" NULLFSBLOCK allocation.
336330f712c9SDave Chinner */
3364e4d17426SDave Chinner static int
xfs_alloc_vextent_finish(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,int alloc_error,bool drop_perag)3365e4d17426SDave Chinner xfs_alloc_vextent_finish(
3366ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3367e4d17426SDave Chinner xfs_agnumber_t minimum_agno,
3368e4d17426SDave Chinner int alloc_error,
3369e4d17426SDave Chinner bool drop_perag)
3370ecd788a9SDave Chinner {
3371ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
3372e4d17426SDave Chinner int error = 0;
33731dd0510fSDave Chinner
3374ecd788a9SDave Chinner /*
3375ecd788a9SDave Chinner * We can end up here with a locked AGF. If we failed, the caller is
3376ecd788a9SDave Chinner * likely going to try to allocate again with different parameters, and
3377ecd788a9SDave Chinner * that can widen the AGs that are searched for free space. If we have
3378ecd788a9SDave Chinner * to do BMBT block allocation, we have to do a new allocation.
3379ecd788a9SDave Chinner *
3380ecd788a9SDave Chinner * Hence leaving this function with the AGF locked opens up potential
3381ecd788a9SDave Chinner * ABBA AGF deadlocks because a future allocation attempt in this
3382ecd788a9SDave Chinner * transaction may attempt to lock a lower number AGF.
3383ecd788a9SDave Chinner *
3384ecd788a9SDave Chinner * We can't release the AGF until the transaction is commited, so at
3385ecd788a9SDave Chinner * this point we must update the "first allocation" tracker to point at
3386ecd788a9SDave Chinner * this AG if the tracker is empty or points to a lower AG. This allows
3387ecd788a9SDave Chinner * the next allocation attempt to be modified appropriately to avoid
3388ecd788a9SDave Chinner * deadlocks.
3389ecd788a9SDave Chinner */
3390ecd788a9SDave Chinner if (args->agbp &&
3391ecd788a9SDave Chinner (args->tp->t_highest_agno == NULLAGNUMBER ||
3392ecd788a9SDave Chinner args->agno > minimum_agno))
3393ecd788a9SDave Chinner args->tp->t_highest_agno = args->agno;
3394ecd788a9SDave Chinner
3395e4d17426SDave Chinner /*
3396e4d17426SDave Chinner * If the allocation failed with an error or we had an ENOSPC result,
3397e4d17426SDave Chinner * preserve the returned error whilst also marking the allocation result
3398e4d17426SDave Chinner * as "no extent allocated". This ensures that callers that fail to
3399e4d17426SDave Chinner * capture the error will still treat it as a failed allocation.
3400e4d17426SDave Chinner */
3401e4d17426SDave Chinner if (alloc_error || args->agbno == NULLAGBLOCK) {
3402ecd788a9SDave Chinner args->fsbno = NULLFSBLOCK;
3403e4d17426SDave Chinner error = alloc_error;
3404e4d17426SDave Chinner goto out_drop_perag;
34051dd0510fSDave Chinner }
34061dd0510fSDave Chinner
3407ecd788a9SDave Chinner args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3408e4d17426SDave Chinner
3409ecd788a9SDave Chinner ASSERT(args->len >= args->minlen);
3410ecd788a9SDave Chinner ASSERT(args->len <= args->maxlen);
3411ecd788a9SDave Chinner ASSERT(args->agbno % args->alignment == 0);
3412ecd788a9SDave Chinner XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3413e4d17426SDave Chinner
3414e4d17426SDave Chinner /* if not file data, insert new block into the reverse map btree */
3415e4d17426SDave Chinner if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3416e4d17426SDave Chinner error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3417e4d17426SDave Chinner args->agbno, args->len, &args->oinfo);
3418e4d17426SDave Chinner if (error)
3419e4d17426SDave Chinner goto out_drop_perag;
3420e4d17426SDave Chinner }
3421e4d17426SDave Chinner
3422e4d17426SDave Chinner if (!args->wasfromfl) {
3423e4d17426SDave Chinner error = xfs_alloc_update_counters(args->tp, args->agbp,
3424e4d17426SDave Chinner -((long)(args->len)));
3425e4d17426SDave Chinner if (error)
3426e4d17426SDave Chinner goto out_drop_perag;
3427e4d17426SDave Chinner
3428e4d17426SDave Chinner ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3429e4d17426SDave Chinner args->len));
3430e4d17426SDave Chinner }
3431e4d17426SDave Chinner
3432e4d17426SDave Chinner xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3433e4d17426SDave Chinner
3434e4d17426SDave Chinner XFS_STATS_INC(mp, xs_allocx);
3435e4d17426SDave Chinner XFS_STATS_ADD(mp, xs_allocb, args->len);
3436e4d17426SDave Chinner
3437e6fbb716SDarrick J. Wong trace_xfs_alloc_vextent_finish(args);
3438e6fbb716SDarrick J. Wong
3439e4d17426SDave Chinner out_drop_perag:
34403432ef61SDave Chinner if (drop_perag && args->pag) {
34413432ef61SDave Chinner xfs_perag_rele(args->pag);
3442e4d17426SDave Chinner args->pag = NULL;
3443e4d17426SDave Chinner }
3444e4d17426SDave Chinner return error;
3445ecd788a9SDave Chinner }
3446ecd788a9SDave Chinner
3447ecd788a9SDave Chinner /*
3448230e8fe8SDave Chinner * Allocate within a single AG only. This uses a best-fit length algorithm so if
3449230e8fe8SDave Chinner * you need an exact sized allocation without locality constraints, this is the
3450230e8fe8SDave Chinner * fastest way to do it.
3451230e8fe8SDave Chinner *
3452230e8fe8SDave Chinner * Caller is expected to hold a perag reference in args->pag.
3453ecd788a9SDave Chinner */
345474c36a86SDave Chinner int
xfs_alloc_vextent_this_ag(struct xfs_alloc_arg * args,xfs_agnumber_t agno)3455ecd788a9SDave Chinner xfs_alloc_vextent_this_ag(
34565f36b2ceSDave Chinner struct xfs_alloc_arg *args,
34575f36b2ceSDave Chinner xfs_agnumber_t agno)
3458ecd788a9SDave Chinner {
3459ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
34608b813568SDave Chinner xfs_agnumber_t minimum_agno;
34616a2a9d77SDave Chinner uint32_t alloc_flags = 0;
3462ecd788a9SDave Chinner int error;
3463ecd788a9SDave Chinner
34646de4b1abSDarrick J. Wong ASSERT(args->pag != NULL);
34656de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == agno);
34666de4b1abSDarrick J. Wong
34678b813568SDave Chinner args->agno = agno;
34688b813568SDave Chinner args->agbno = 0;
3469e6fbb716SDarrick J. Wong
3470e6fbb716SDarrick J. Wong trace_xfs_alloc_vextent_this_ag(args);
3471e6fbb716SDarrick J. Wong
34728b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
34738b813568SDave Chinner &minimum_agno);
3474ecd788a9SDave Chinner if (error) {
3475ecd788a9SDave Chinner if (error == -ENOSPC)
3476ecd788a9SDave Chinner return 0;
3477ecd788a9SDave Chinner return error;
3478ecd788a9SDave Chinner }
3479ecd788a9SDave Chinner
34806a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
348174b9aa63SDave Chinner if (!error && args->agbp)
34826a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(args, alloc_flags);
348374b9aa63SDave Chinner
3484e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
348530f712c9SDave Chinner }
3486ecd788a9SDave Chinner
348730f712c9SDave Chinner /*
3488ecd788a9SDave Chinner * Iterate all AGs trying to allocate an extent starting from @start_ag.
3489ecd788a9SDave Chinner *
3490ecd788a9SDave Chinner * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3491ecd788a9SDave Chinner * allocation attempts in @start_agno have locality information. If we fail to
3492ecd788a9SDave Chinner * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3493ecd788a9SDave Chinner * we attempt to allocation in as there is no locality optimisation possible for
3494ecd788a9SDave Chinner * those allocations.
3495ecd788a9SDave Chinner *
34963432ef61SDave Chinner * On return, args->pag may be left referenced if we finish before the "all
34973432ef61SDave Chinner * failed" return point. The allocation finish still needs the perag, and
34983432ef61SDave Chinner * so the caller will release it once they've finished the allocation.
34993432ef61SDave Chinner *
3500ecd788a9SDave Chinner * When we wrap the AG iteration at the end of the filesystem, we have to be
3501ecd788a9SDave Chinner * careful not to wrap into AGs below ones we already have locked in the
3502ecd788a9SDave Chinner * transaction if we are doing a blocking iteration. This will result in an
3503ecd788a9SDave Chinner * out-of-order locking of AGFs and hence can cause deadlocks.
350430f712c9SDave Chinner */
3505ecd788a9SDave Chinner static int
xfs_alloc_vextent_iterate_ags(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,xfs_agnumber_t start_agno,xfs_agblock_t target_agbno,uint32_t alloc_flags)3506ecd788a9SDave Chinner xfs_alloc_vextent_iterate_ags(
3507ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3508ecd788a9SDave Chinner xfs_agnumber_t minimum_agno,
3509ecd788a9SDave Chinner xfs_agnumber_t start_agno,
3510230e8fe8SDave Chinner xfs_agblock_t target_agbno,
35116a2a9d77SDave Chinner uint32_t alloc_flags)
3512ecd788a9SDave Chinner {
3513ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
35149eb77596SDarrick J. Wong xfs_agnumber_t restart_agno = minimum_agno;
35153432ef61SDave Chinner xfs_agnumber_t agno;
3516ecd788a9SDave Chinner int error = 0;
3517ecd788a9SDave Chinner
35186a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
35199eb77596SDarrick J. Wong restart_agno = 0;
35203432ef61SDave Chinner restart:
35219eb77596SDarrick J. Wong for_each_perag_wrap_range(mp, start_agno, restart_agno,
35223432ef61SDave Chinner mp->m_sb.sb_agcount, agno, args->pag) {
35233432ef61SDave Chinner args->agno = agno;
35246a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
352574b9aa63SDave Chinner if (error)
352674b9aa63SDave Chinner break;
35273432ef61SDave Chinner if (!args->agbp) {
35283432ef61SDave Chinner trace_xfs_alloc_vextent_loopfailed(args);
35293432ef61SDave Chinner continue;
35303432ef61SDave Chinner }
353174b9aa63SDave Chinner
353274b9aa63SDave Chinner /*
35333432ef61SDave Chinner * Allocation is supposed to succeed now, so break out of the
35343432ef61SDave Chinner * loop regardless of whether we succeed or not.
353574b9aa63SDave Chinner */
3536230e8fe8SDave Chinner if (args->agno == start_agno && target_agbno) {
3537230e8fe8SDave Chinner args->agbno = target_agbno;
35386a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3539230e8fe8SDave Chinner } else {
3540230e8fe8SDave Chinner args->agbno = 0;
35416a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3542230e8fe8SDave Chinner }
3543ecd788a9SDave Chinner break;
354430f712c9SDave Chinner }
35453432ef61SDave Chinner if (error) {
35463432ef61SDave Chinner xfs_perag_rele(args->pag);
3547ecd788a9SDave Chinner args->pag = NULL;
3548ecd788a9SDave Chinner return error;
3549ecd788a9SDave Chinner }
35503432ef61SDave Chinner if (args->agbp)
35513432ef61SDave Chinner return 0;
35523432ef61SDave Chinner
35533432ef61SDave Chinner /*
35543432ef61SDave Chinner * We didn't find an AG we can alloation from. If we were given
35553432ef61SDave Chinner * constraining flags by the caller, drop them and retry the allocation
35563432ef61SDave Chinner * without any constraints being set.
35573432ef61SDave Chinner */
35586a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
35596a2a9d77SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
35609eb77596SDarrick J. Wong restart_agno = minimum_agno;
35613432ef61SDave Chinner goto restart;
35623432ef61SDave Chinner }
35633432ef61SDave Chinner
35643432ef61SDave Chinner ASSERT(args->pag == NULL);
35653432ef61SDave Chinner trace_xfs_alloc_vextent_allfailed(args);
35663432ef61SDave Chinner return 0;
35673432ef61SDave Chinner }
3568ecd788a9SDave Chinner
3569ecd788a9SDave Chinner /*
3570ecd788a9SDave Chinner * Iterate from the AGs from the start AG to the end of the filesystem, trying
3571ecd788a9SDave Chinner * to allocate blocks. It starts with a near allocation attempt in the initial
3572ecd788a9SDave Chinner * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3573ecd788a9SDave Chinner * back to zero if allowed by previous allocations in this transaction,
3574ecd788a9SDave Chinner * otherwise will wrap back to the start AG and run a second blocking pass to
3575ecd788a9SDave Chinner * the end of the filesystem.
3576ecd788a9SDave Chinner */
35772a7f6d41SDave Chinner int
xfs_alloc_vextent_start_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3578ecd788a9SDave Chinner xfs_alloc_vextent_start_ag(
3579ecd788a9SDave Chinner struct xfs_alloc_arg *args,
35802a7f6d41SDave Chinner xfs_fsblock_t target)
3581ecd788a9SDave Chinner {
3582ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
35838b813568SDave Chinner xfs_agnumber_t minimum_agno;
3584ecd788a9SDave Chinner xfs_agnumber_t start_agno;
3585ecd788a9SDave Chinner xfs_agnumber_t rotorstep = xfs_rotorstep;
3586ecd788a9SDave Chinner bool bump_rotor = false;
35876a2a9d77SDave Chinner uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3588ecd788a9SDave Chinner int error;
3589ecd788a9SDave Chinner
35906de4b1abSDarrick J. Wong ASSERT(args->pag == NULL);
35916de4b1abSDarrick J. Wong
35928b813568SDave Chinner args->agno = NULLAGNUMBER;
35938b813568SDave Chinner args->agbno = NULLAGBLOCK;
3594e6fbb716SDarrick J. Wong
35954dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_start_ag(args);
3596e6fbb716SDarrick J. Wong
35978b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3598ecd788a9SDave Chinner if (error) {
3599ecd788a9SDave Chinner if (error == -ENOSPC)
3600ecd788a9SDave Chinner return 0;
3601ecd788a9SDave Chinner return error;
3602ecd788a9SDave Chinner }
3603ecd788a9SDave Chinner
3604ecd788a9SDave Chinner if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3605ecd788a9SDave Chinner xfs_is_inode32(mp)) {
36062a7f6d41SDave Chinner target = XFS_AGB_TO_FSB(mp,
3607ecd788a9SDave Chinner ((mp->m_agfrotor / rotorstep) %
3608ecd788a9SDave Chinner mp->m_sb.sb_agcount), 0);
3609ecd788a9SDave Chinner bump_rotor = 1;
3610ecd788a9SDave Chinner }
3611ecd788a9SDave Chinner
3612230e8fe8SDave Chinner start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3613ecd788a9SDave Chinner error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36146a2a9d77SDave Chinner XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3615230e8fe8SDave Chinner
36168d242e93SChristoph Hellwig if (bump_rotor) {
3617ecd788a9SDave Chinner if (args->agno == start_agno)
361830f712c9SDave Chinner mp->m_agfrotor = (mp->m_agfrotor + 1) %
361930f712c9SDave Chinner (mp->m_sb.sb_agcount * rotorstep);
362030f712c9SDave Chinner else
362130f712c9SDave Chinner mp->m_agfrotor = (args->agno * rotorstep + 1) %
362230f712c9SDave Chinner (mp->m_sb.sb_agcount * rotorstep);
362330f712c9SDave Chinner }
3624ecd788a9SDave Chinner
3625e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3626ecd788a9SDave Chinner }
3627ecd788a9SDave Chinner
3628ecd788a9SDave Chinner /*
3629230e8fe8SDave Chinner * Iterate from the agno indicated via @target through to the end of the
3630ecd788a9SDave Chinner * filesystem attempting blocking allocation. This does not wrap or try a second
3631230e8fe8SDave Chinner * pass, so will not recurse into AGs lower than indicated by the target.
3632ecd788a9SDave Chinner */
3633319c9e87SDave Chinner int
xfs_alloc_vextent_first_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3634ecd788a9SDave Chinner xfs_alloc_vextent_first_ag(
3635ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3636319c9e87SDave Chinner xfs_fsblock_t target)
3637ecd788a9SDave Chinner {
3638ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
36398b813568SDave Chinner xfs_agnumber_t minimum_agno;
3640ecd788a9SDave Chinner xfs_agnumber_t start_agno;
36416a2a9d77SDave Chinner uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3642ecd788a9SDave Chinner int error;
3643ecd788a9SDave Chinner
36446de4b1abSDarrick J. Wong ASSERT(args->pag == NULL);
36456de4b1abSDarrick J. Wong
36468b813568SDave Chinner args->agno = NULLAGNUMBER;
36478b813568SDave Chinner args->agbno = NULLAGBLOCK;
3648e6fbb716SDarrick J. Wong
36494dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_first_ag(args);
3650e6fbb716SDarrick J. Wong
36518b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3652ecd788a9SDave Chinner if (error) {
3653ecd788a9SDave Chinner if (error == -ENOSPC)
3654ecd788a9SDave Chinner return 0;
3655ecd788a9SDave Chinner return error;
3656ecd788a9SDave Chinner }
3657ecd788a9SDave Chinner
3658319c9e87SDave Chinner start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3659230e8fe8SDave Chinner error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36606a2a9d77SDave Chinner XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3661e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3662ecd788a9SDave Chinner }
3663ecd788a9SDave Chinner
3664ecd788a9SDave Chinner /*
366574b9aa63SDave Chinner * Allocate at the exact block target or fail. Caller is expected to hold a
366674b9aa63SDave Chinner * perag reference in args->pag.
36675f36b2ceSDave Chinner */
36685f36b2ceSDave Chinner int
xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)36695f36b2ceSDave Chinner xfs_alloc_vextent_exact_bno(
36705f36b2ceSDave Chinner struct xfs_alloc_arg *args,
36715f36b2ceSDave Chinner xfs_fsblock_t target)
36725f36b2ceSDave Chinner {
36735f36b2ceSDave Chinner struct xfs_mount *mp = args->mp;
36748b813568SDave Chinner xfs_agnumber_t minimum_agno;
36755f36b2ceSDave Chinner int error;
36765f36b2ceSDave Chinner
36776de4b1abSDarrick J. Wong ASSERT(args->pag != NULL);
36786de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
36796de4b1abSDarrick J. Wong
36808b813568SDave Chinner args->agno = XFS_FSB_TO_AGNO(mp, target);
36818b813568SDave Chinner args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3682e6fbb716SDarrick J. Wong
36834dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_exact_bno(args);
3684e6fbb716SDarrick J. Wong
36858b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
36865f36b2ceSDave Chinner if (error) {
36875f36b2ceSDave Chinner if (error == -ENOSPC)
36885f36b2ceSDave Chinner return 0;
36895f36b2ceSDave Chinner return error;
36905f36b2ceSDave Chinner }
36915f36b2ceSDave Chinner
369200dcd17cSDave Chinner error = xfs_alloc_vextent_prepare_ag(args, 0);
369374b9aa63SDave Chinner if (!error && args->agbp)
3694230e8fe8SDave Chinner error = xfs_alloc_ag_vextent_exact(args);
36955f36b2ceSDave Chinner
3696e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
36975f36b2ceSDave Chinner }
36985f36b2ceSDave Chinner
36995f36b2ceSDave Chinner /*
3700db4710fdSDave Chinner * Allocate an extent as close to the target as possible. If there are not
3701db4710fdSDave Chinner * viable candidates in the AG, then fail the allocation.
370274b9aa63SDave Chinner *
370374b9aa63SDave Chinner * Caller may or may not have a per-ag reference in args->pag.
3704ecd788a9SDave Chinner */
3705ecd788a9SDave Chinner int
xfs_alloc_vextent_near_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)3706db4710fdSDave Chinner xfs_alloc_vextent_near_bno(
3707db4710fdSDave Chinner struct xfs_alloc_arg *args,
3708db4710fdSDave Chinner xfs_fsblock_t target)
3709ecd788a9SDave Chinner {
3710db4710fdSDave Chinner struct xfs_mount *mp = args->mp;
37118b813568SDave Chinner xfs_agnumber_t minimum_agno;
3712e4d17426SDave Chinner bool needs_perag = args->pag == NULL;
37136a2a9d77SDave Chinner uint32_t alloc_flags = 0;
371474c36a86SDave Chinner int error;
3715ecd788a9SDave Chinner
37166de4b1abSDarrick J. Wong if (!needs_perag)
37176de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
37186de4b1abSDarrick J. Wong
37198b813568SDave Chinner args->agno = XFS_FSB_TO_AGNO(mp, target);
37208b813568SDave Chinner args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3721e6fbb716SDarrick J. Wong
37224dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_near_bno(args);
3723e6fbb716SDarrick J. Wong
37248b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3725db4710fdSDave Chinner if (error) {
3726db4710fdSDave Chinner if (error == -ENOSPC)
3727db4710fdSDave Chinner return 0;
372874c36a86SDave Chinner return error;
372930f712c9SDave Chinner }
373030f712c9SDave Chinner
3731e4d17426SDave Chinner if (needs_perag)
37323432ef61SDave Chinner args->pag = xfs_perag_grab(mp, args->agno);
3733e4d17426SDave Chinner
37346a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
373574b9aa63SDave Chinner if (!error && args->agbp)
37366a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3737db4710fdSDave Chinner
3738e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3739db4710fdSDave Chinner }
3740db4710fdSDave Chinner
37414d89e20bSDave Chinner /* Ensure that the freelist is at full capacity. */
37424d89e20bSDave Chinner int
xfs_free_extent_fix_freelist(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf ** agbp)37434d89e20bSDave Chinner xfs_free_extent_fix_freelist(
37444d89e20bSDave Chinner struct xfs_trans *tp,
374545d06621SDave Chinner struct xfs_perag *pag,
37464d89e20bSDave Chinner struct xfs_buf **agbp)
374730f712c9SDave Chinner {
37484d89e20bSDave Chinner struct xfs_alloc_arg args;
374930f712c9SDave Chinner int error;
375030f712c9SDave Chinner
37514d89e20bSDave Chinner memset(&args, 0, sizeof(struct xfs_alloc_arg));
375230f712c9SDave Chinner args.tp = tp;
375330f712c9SDave Chinner args.mp = tp->t_mountp;
375445d06621SDave Chinner args.agno = pag->pag_agno;
375545d06621SDave Chinner args.pag = pag;
375630f712c9SDave Chinner
375730f712c9SDave Chinner /*
375830f712c9SDave Chinner * validate that the block number is legal - the enables us to detect
375930f712c9SDave Chinner * and handle a silent filesystem corruption rather than crashing.
376030f712c9SDave Chinner */
376130f712c9SDave Chinner if (args.agno >= args.mp->m_sb.sb_agcount)
37622451337dSDave Chinner return -EFSCORRUPTED;
376330f712c9SDave Chinner
376430f712c9SDave Chinner error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
376530f712c9SDave Chinner if (error)
376645d06621SDave Chinner return error;
376730f712c9SDave Chinner
37684d89e20bSDave Chinner *agbp = args.agbp;
376945d06621SDave Chinner return 0;
377030f712c9SDave Chinner }
377130f712c9SDave Chinner
37724d89e20bSDave Chinner /*
37734d89e20bSDave Chinner * Free an extent.
37744d89e20bSDave Chinner * Just break up the extent address and hand off to xfs_free_ag_extent
37754d89e20bSDave Chinner * after fixing up the freelist.
37764d89e20bSDave Chinner */
377766e3237eSDarrick J. Wong int
__xfs_free_extent(struct xfs_trans * tp,struct xfs_perag * pag,xfs_agblock_t agbno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)3778fcb762f5SBrian Foster __xfs_free_extent(
377966e3237eSDarrick J. Wong struct xfs_trans *tp,
3780b2ccab31SDarrick J. Wong struct xfs_perag *pag,
3781b2ccab31SDarrick J. Wong xfs_agblock_t agbno,
378266e3237eSDarrick J. Wong xfs_extlen_t len,
378366e3237eSDarrick J. Wong const struct xfs_owner_info *oinfo,
378466e3237eSDarrick J. Wong enum xfs_ag_resv_type type,
3785fcb762f5SBrian Foster bool skip_discard)
37864d89e20bSDave Chinner {
37874d89e20bSDave Chinner struct xfs_mount *mp = tp->t_mountp;
37884d89e20bSDave Chinner struct xfs_buf *agbp;
37899798f615SChristoph Hellwig struct xfs_agf *agf;
37904d89e20bSDave Chinner int error;
3791fcb762f5SBrian Foster unsigned int busy_flags = 0;
37924d89e20bSDave Chinner
37934d89e20bSDave Chinner ASSERT(len != 0);
37940ab32086SBrian Foster ASSERT(type != XFS_AG_RESV_AGFL);
37954d89e20bSDave Chinner
3796ba9e7802SDarrick J. Wong if (XFS_TEST_ERROR(false, mp,
37979e24cfd0SDarrick J. Wong XFS_ERRTAG_FREE_EXTENT))
3798ba9e7802SDarrick J. Wong return -EIO;
3799ba9e7802SDarrick J. Wong
380045d06621SDave Chinner error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
38014d89e20bSDave Chinner if (error)
3802b2ccab31SDarrick J. Wong return error;
38039798f615SChristoph Hellwig agf = agbp->b_addr;
38044d89e20bSDave Chinner
3805f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3806f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
380745d06621SDave Chinner goto err_release;
3808f9e03706SDarrick J. Wong }
38094d89e20bSDave Chinner
38104d89e20bSDave Chinner /* validate the extent size is legal now we have the agf locked */
38119798f615SChristoph Hellwig if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3812f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
381345d06621SDave Chinner goto err_release;
3814f9e03706SDarrick J. Wong }
38154d89e20bSDave Chinner
3816b2ccab31SDarrick J. Wong error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3817b2ccab31SDarrick J. Wong type);
38184d89e20bSDave Chinner if (error)
381945d06621SDave Chinner goto err_release;
38204d89e20bSDave Chinner
3821fcb762f5SBrian Foster if (skip_discard)
3822fcb762f5SBrian Foster busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
382345d06621SDave Chinner xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
38244d89e20bSDave Chinner return 0;
38254d89e20bSDave Chinner
382645d06621SDave Chinner err_release:
38274d89e20bSDave Chinner xfs_trans_brelse(tp, agbp);
382830f712c9SDave Chinner return error;
382930f712c9SDave Chinner }
38302d520bfaSDarrick J. Wong
38312d520bfaSDarrick J. Wong struct xfs_alloc_query_range_info {
38322d520bfaSDarrick J. Wong xfs_alloc_query_range_fn fn;
38332d520bfaSDarrick J. Wong void *priv;
38342d520bfaSDarrick J. Wong };
38352d520bfaSDarrick J. Wong
38362d520bfaSDarrick J. Wong /* Format btree record and pass to our callback. */
38372d520bfaSDarrick J. Wong STATIC int
xfs_alloc_query_range_helper(struct xfs_btree_cur * cur,const union xfs_btree_rec * rec,void * priv)38382d520bfaSDarrick J. Wong xfs_alloc_query_range_helper(
38392d520bfaSDarrick J. Wong struct xfs_btree_cur *cur,
3840159eb69dSDarrick J. Wong const union xfs_btree_rec *rec,
38412d520bfaSDarrick J. Wong void *priv)
38422d520bfaSDarrick J. Wong {
38432d520bfaSDarrick J. Wong struct xfs_alloc_query_range_info *query = priv;
38442d520bfaSDarrick J. Wong struct xfs_alloc_rec_incore irec;
3845ee12eaaaSDarrick J. Wong xfs_failaddr_t fa;
38462d520bfaSDarrick J. Wong
384735e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(rec, &irec);
3848ee12eaaaSDarrick J. Wong fa = xfs_alloc_check_irec(cur, &irec);
3849ee12eaaaSDarrick J. Wong if (fa)
3850ee12eaaaSDarrick J. Wong return xfs_alloc_complain_bad_rec(cur, fa, &irec);
385135e3b9a1SDarrick J. Wong
38522d520bfaSDarrick J. Wong return query->fn(cur, &irec, query->priv);
38532d520bfaSDarrick J. Wong }
38542d520bfaSDarrick J. Wong
38552d520bfaSDarrick J. Wong /* Find all free space within a given range of blocks. */
38562d520bfaSDarrick J. Wong int
xfs_alloc_query_range(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * low_rec,const struct xfs_alloc_rec_incore * high_rec,xfs_alloc_query_range_fn fn,void * priv)38572d520bfaSDarrick J. Wong xfs_alloc_query_range(
38582d520bfaSDarrick J. Wong struct xfs_btree_cur *cur,
385904dcb474SDarrick J. Wong const struct xfs_alloc_rec_incore *low_rec,
386004dcb474SDarrick J. Wong const struct xfs_alloc_rec_incore *high_rec,
38612d520bfaSDarrick J. Wong xfs_alloc_query_range_fn fn,
38622d520bfaSDarrick J. Wong void *priv)
38632d520bfaSDarrick J. Wong {
386475dc0345SDarrick J. Wong union xfs_btree_irec low_brec = { .a = *low_rec };
386575dc0345SDarrick J. Wong union xfs_btree_irec high_brec = { .a = *high_rec };
386675dc0345SDarrick J. Wong struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
38672d520bfaSDarrick J. Wong
38682d520bfaSDarrick J. Wong ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
38692d520bfaSDarrick J. Wong return xfs_btree_query_range(cur, &low_brec, &high_brec,
38702d520bfaSDarrick J. Wong xfs_alloc_query_range_helper, &query);
38712d520bfaSDarrick J. Wong }
3872e9a2599aSDarrick J. Wong
3873e9a2599aSDarrick J. Wong /* Find all free space records. */
3874e9a2599aSDarrick J. Wong int
xfs_alloc_query_all(struct xfs_btree_cur * cur,xfs_alloc_query_range_fn fn,void * priv)3875e9a2599aSDarrick J. Wong xfs_alloc_query_all(
3876e9a2599aSDarrick J. Wong struct xfs_btree_cur *cur,
3877e9a2599aSDarrick J. Wong xfs_alloc_query_range_fn fn,
3878e9a2599aSDarrick J. Wong void *priv)
3879e9a2599aSDarrick J. Wong {
3880e9a2599aSDarrick J. Wong struct xfs_alloc_query_range_info query;
3881e9a2599aSDarrick J. Wong
3882e9a2599aSDarrick J. Wong ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3883e9a2599aSDarrick J. Wong query.priv = priv;
3884e9a2599aSDarrick J. Wong query.fn = fn;
3885e9a2599aSDarrick J. Wong return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3886e9a2599aSDarrick J. Wong }
388721ec5416SDarrick J. Wong
38886abc7aefSDarrick J. Wong /*
38896abc7aefSDarrick J. Wong * Scan part of the keyspace of the free space and tell us if the area has no
38906abc7aefSDarrick J. Wong * records, is fully mapped by records, or is partially filled.
38916abc7aefSDarrick J. Wong */
3892ce1d802eSDarrick J. Wong int
xfs_alloc_has_records(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,enum xbtree_recpacking * outcome)38936abc7aefSDarrick J. Wong xfs_alloc_has_records(
3894ce1d802eSDarrick J. Wong struct xfs_btree_cur *cur,
3895ce1d802eSDarrick J. Wong xfs_agblock_t bno,
3896ce1d802eSDarrick J. Wong xfs_extlen_t len,
38976abc7aefSDarrick J. Wong enum xbtree_recpacking *outcome)
3898ce1d802eSDarrick J. Wong {
3899ce1d802eSDarrick J. Wong union xfs_btree_irec low;
3900ce1d802eSDarrick J. Wong union xfs_btree_irec high;
3901ce1d802eSDarrick J. Wong
3902ce1d802eSDarrick J. Wong memset(&low, 0, sizeof(low));
3903ce1d802eSDarrick J. Wong low.a.ar_startblock = bno;
3904ce1d802eSDarrick J. Wong memset(&high, 0xFF, sizeof(high));
3905ce1d802eSDarrick J. Wong high.a.ar_startblock = bno + len - 1;
3906ce1d802eSDarrick J. Wong
39074a200a09SDarrick J. Wong return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3908ce1d802eSDarrick J. Wong }
39099f3a080eSDarrick J. Wong
39109f3a080eSDarrick J. Wong /*
39119f3a080eSDarrick J. Wong * Walk all the blocks in the AGFL. The @walk_fn can return any negative
39125bb46e3eSDarrick J. Wong * error code or XFS_ITER_*.
39139f3a080eSDarrick J. Wong */
39149f3a080eSDarrick J. Wong int
xfs_agfl_walk(struct xfs_mount * mp,struct xfs_agf * agf,struct xfs_buf * agflbp,xfs_agfl_walk_fn walk_fn,void * priv)39159f3a080eSDarrick J. Wong xfs_agfl_walk(
39169f3a080eSDarrick J. Wong struct xfs_mount *mp,
39179f3a080eSDarrick J. Wong struct xfs_agf *agf,
39189f3a080eSDarrick J. Wong struct xfs_buf *agflbp,
39199f3a080eSDarrick J. Wong xfs_agfl_walk_fn walk_fn,
39209f3a080eSDarrick J. Wong void *priv)
39219f3a080eSDarrick J. Wong {
39229f3a080eSDarrick J. Wong __be32 *agfl_bno;
39239f3a080eSDarrick J. Wong unsigned int i;
39249f3a080eSDarrick J. Wong int error;
39259f3a080eSDarrick J. Wong
3926183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
39279f3a080eSDarrick J. Wong i = be32_to_cpu(agf->agf_flfirst);
39289f3a080eSDarrick J. Wong
39299f3a080eSDarrick J. Wong /* Nothing to walk in an empty AGFL. */
39309f3a080eSDarrick J. Wong if (agf->agf_flcount == cpu_to_be32(0))
39319f3a080eSDarrick J. Wong return 0;
39329f3a080eSDarrick J. Wong
39339f3a080eSDarrick J. Wong /* Otherwise, walk from first to last, wrapping as needed. */
39349f3a080eSDarrick J. Wong for (;;) {
39359f3a080eSDarrick J. Wong error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
39369f3a080eSDarrick J. Wong if (error)
39379f3a080eSDarrick J. Wong return error;
39389f3a080eSDarrick J. Wong if (i == be32_to_cpu(agf->agf_fllast))
39399f3a080eSDarrick J. Wong break;
39409f3a080eSDarrick J. Wong if (++i == xfs_agfl_size(mp))
39419f3a080eSDarrick J. Wong i = 0;
39429f3a080eSDarrick J. Wong }
39439f3a080eSDarrick J. Wong
39449f3a080eSDarrick J. Wong return 0;
39459f3a080eSDarrick J. Wong }
3946c201d9caSDarrick J. Wong
3947c201d9caSDarrick J. Wong int __init
xfs_extfree_intent_init_cache(void)3948c201d9caSDarrick J. Wong xfs_extfree_intent_init_cache(void)
3949c201d9caSDarrick J. Wong {
3950c201d9caSDarrick J. Wong xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3951c201d9caSDarrick J. Wong sizeof(struct xfs_extent_free_item),
3952c201d9caSDarrick J. Wong 0, 0, NULL);
3953c201d9caSDarrick J. Wong
3954c201d9caSDarrick J. Wong return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3955c201d9caSDarrick J. Wong }
3956c201d9caSDarrick J. Wong
3957c201d9caSDarrick J. Wong void
xfs_extfree_intent_destroy_cache(void)3958c201d9caSDarrick J. Wong xfs_extfree_intent_destroy_cache(void)
3959c201d9caSDarrick J. Wong {
3960c201d9caSDarrick J. Wong kmem_cache_destroy(xfs_extfree_item_cache);
3961c201d9caSDarrick J. Wong xfs_extfree_item_cache = NULL;
3962c201d9caSDarrick J. Wong }
3963