10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
230f712c9SDave Chinner /*
330f712c9SDave Chinner * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
430f712c9SDave Chinner * All Rights Reserved.
530f712c9SDave Chinner */
630f712c9SDave Chinner #include "xfs.h"
730f712c9SDave Chinner #include "xfs_fs.h"
830f712c9SDave Chinner #include "xfs_format.h"
930f712c9SDave Chinner #include "xfs_log_format.h"
1030f712c9SDave Chinner #include "xfs_shared.h"
1130f712c9SDave Chinner #include "xfs_trans_resv.h"
1230f712c9SDave Chinner #include "xfs_bit.h"
1330f712c9SDave Chinner #include "xfs_mount.h"
143ab78df2SDarrick J. Wong #include "xfs_defer.h"
1530f712c9SDave Chinner #include "xfs_btree.h"
16673930c3SDarrick J. Wong #include "xfs_rmap.h"
1730f712c9SDave Chinner #include "xfs_alloc_btree.h"
1830f712c9SDave Chinner #include "xfs_alloc.h"
1930f712c9SDave Chinner #include "xfs_extent_busy.h"
20e9e899a2SDarrick J. Wong #include "xfs_errortag.h"
2130f712c9SDave Chinner #include "xfs_error.h"
2230f712c9SDave Chinner #include "xfs_trace.h"
2330f712c9SDave Chinner #include "xfs_trans.h"
2430f712c9SDave Chinner #include "xfs_buf_item.h"
2530f712c9SDave Chinner #include "xfs_log.h"
269bbafc71SDave Chinner #include "xfs_ag.h"
273fd129b6SDarrick J. Wong #include "xfs_ag_resv.h"
28f8f2835aSBrian Foster #include "xfs_bmap.h"
29f8f2835aSBrian Foster
30c201d9caSDarrick J. Wong struct kmem_cache *xfs_extfree_item_cache;
3130f712c9SDave Chinner
3230f712c9SDave Chinner struct workqueue_struct *xfs_alloc_wq;
3330f712c9SDave Chinner
3430f712c9SDave Chinner #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
3530f712c9SDave Chinner
3630f712c9SDave Chinner #define XFSA_FIXUP_BNO_OK 1
3730f712c9SDave Chinner #define XFSA_FIXUP_CNT_OK 2
3830f712c9SDave Chinner
39a78ee256SDave Chinner /*
40a78ee256SDave Chinner * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
41a78ee256SDave Chinner * the beginning of the block for a proper header with the location information
42a78ee256SDave Chinner * and CRC.
43a78ee256SDave Chinner */
44a78ee256SDave Chinner unsigned int
xfs_agfl_size(struct xfs_mount * mp)45a78ee256SDave Chinner xfs_agfl_size(
46a78ee256SDave Chinner struct xfs_mount *mp)
47a78ee256SDave Chinner {
48a78ee256SDave Chinner unsigned int size = mp->m_sb.sb_sectsize;
49a78ee256SDave Chinner
5038c26bfdSDave Chinner if (xfs_has_crc(mp))
51a78ee256SDave Chinner size -= sizeof(struct xfs_agfl);
52a78ee256SDave Chinner
53a78ee256SDave Chinner return size / sizeof(xfs_agblock_t);
54a78ee256SDave Chinner }
55a78ee256SDave Chinner
56af30dfa1SDarrick J. Wong unsigned int
xfs_refc_block(struct xfs_mount * mp)57af30dfa1SDarrick J. Wong xfs_refc_block(
58af30dfa1SDarrick J. Wong struct xfs_mount *mp)
59af30dfa1SDarrick J. Wong {
6038c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
61af30dfa1SDarrick J. Wong return XFS_RMAP_BLOCK(mp) + 1;
6238c26bfdSDave Chinner if (xfs_has_finobt(mp))
63af30dfa1SDarrick J. Wong return XFS_FIBT_BLOCK(mp) + 1;
64af30dfa1SDarrick J. Wong return XFS_IBT_BLOCK(mp) + 1;
65af30dfa1SDarrick J. Wong }
66af30dfa1SDarrick J. Wong
678018026eSDarrick J. Wong xfs_extlen_t
xfs_prealloc_blocks(struct xfs_mount * mp)688018026eSDarrick J. Wong xfs_prealloc_blocks(
698018026eSDarrick J. Wong struct xfs_mount *mp)
708018026eSDarrick J. Wong {
7138c26bfdSDave Chinner if (xfs_has_reflink(mp))
72af30dfa1SDarrick J. Wong return xfs_refc_block(mp) + 1;
7338c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
748018026eSDarrick J. Wong return XFS_RMAP_BLOCK(mp) + 1;
7538c26bfdSDave Chinner if (xfs_has_finobt(mp))
768018026eSDarrick J. Wong return XFS_FIBT_BLOCK(mp) + 1;
778018026eSDarrick J. Wong return XFS_IBT_BLOCK(mp) + 1;
788018026eSDarrick J. Wong }
798018026eSDarrick J. Wong
8030f712c9SDave Chinner /*
8193defd5aSDarrick J. Wong * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
8293defd5aSDarrick J. Wong * guarantee that we can refill the AGFL prior to allocating space in a nearly
834869b6e8SSlark Xiao * full AG. Although the space described by the free space btrees, the
8493defd5aSDarrick J. Wong * blocks used by the freesp btrees themselves, and the blocks owned by the
8593defd5aSDarrick J. Wong * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
8693defd5aSDarrick J. Wong * free space in the AG drop so low that the free space btrees cannot refill an
8793defd5aSDarrick J. Wong * empty AGFL up to the minimum level. Rather than grind through empty AGs
8893defd5aSDarrick J. Wong * until the fs goes down, we subtract this many AG blocks from the incore
8993defd5aSDarrick J. Wong * fdblocks to ensure user allocation does not overcommit the space the
9093defd5aSDarrick J. Wong * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
9193defd5aSDarrick J. Wong * withhold space from xfs_mod_fdblocks, so we do not account for that here.
9293defd5aSDarrick J. Wong */
9393defd5aSDarrick J. Wong #define XFS_ALLOCBT_AGFL_RESERVE 4
9493defd5aSDarrick J. Wong
9593defd5aSDarrick J. Wong /*
9693defd5aSDarrick J. Wong * Compute the number of blocks that we set aside to guarantee the ability to
9793defd5aSDarrick J. Wong * refill the AGFL and handle a full bmap btree split.
9893defd5aSDarrick J. Wong *
9952548852SDarrick J. Wong * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
10052548852SDarrick J. Wong * AGF buffer (PV 947395), we place constraints on the relationship among
10152548852SDarrick J. Wong * actual allocations for data blocks, freelist blocks, and potential file data
10252548852SDarrick J. Wong * bmap btree blocks. However, these restrictions may result in no actual space
10352548852SDarrick J. Wong * allocated for a delayed extent, for example, a data block in a certain AG is
10452548852SDarrick J. Wong * allocated but there is no additional block for the additional bmap btree
10552548852SDarrick J. Wong * block due to a split of the bmap btree of the file. The result of this may
10652548852SDarrick J. Wong * lead to an infinite loop when the file gets flushed to disk and all delayed
10752548852SDarrick J. Wong * extents need to be actually allocated. To get around this, we explicitly set
10852548852SDarrick J. Wong * aside a few blocks which will not be reserved in delayed allocation.
10952548852SDarrick J. Wong *
11093defd5aSDarrick J. Wong * For each AG, we need to reserve enough blocks to replenish a totally empty
11193defd5aSDarrick J. Wong * AGFL and 4 more to handle a potential split of the file's bmap btree.
11252548852SDarrick J. Wong */
11352548852SDarrick J. Wong unsigned int
xfs_alloc_set_aside(struct xfs_mount * mp)11452548852SDarrick J. Wong xfs_alloc_set_aside(
11552548852SDarrick J. Wong struct xfs_mount *mp)
11652548852SDarrick J. Wong {
11793defd5aSDarrick J. Wong return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
11852548852SDarrick J. Wong }
11952548852SDarrick J. Wong
12052548852SDarrick J. Wong /*
12152548852SDarrick J. Wong * When deciding how much space to allocate out of an AG, we limit the
12252548852SDarrick J. Wong * allocation maximum size to the size the AG. However, we cannot use all the
12352548852SDarrick J. Wong * blocks in the AG - some are permanently used by metadata. These
12452548852SDarrick J. Wong * blocks are generally:
12552548852SDarrick J. Wong * - the AG superblock, AGF, AGI and AGFL
12652548852SDarrick J. Wong * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
12752548852SDarrick J. Wong * the AGI free inode and rmap btree root blocks.
12852548852SDarrick J. Wong * - blocks on the AGFL according to xfs_alloc_set_aside() limits
12952548852SDarrick J. Wong * - the rmapbt root block
13052548852SDarrick J. Wong *
13152548852SDarrick J. Wong * The AG headers are sector sized, so the amount of space they take up is
13252548852SDarrick J. Wong * dependent on filesystem geometry. The others are all single blocks.
13352548852SDarrick J. Wong */
13452548852SDarrick J. Wong unsigned int
xfs_alloc_ag_max_usable(struct xfs_mount * mp)13552548852SDarrick J. Wong xfs_alloc_ag_max_usable(
13652548852SDarrick J. Wong struct xfs_mount *mp)
13752548852SDarrick J. Wong {
13852548852SDarrick J. Wong unsigned int blocks;
13952548852SDarrick J. Wong
14052548852SDarrick J. Wong blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
14193defd5aSDarrick J. Wong blocks += XFS_ALLOCBT_AGFL_RESERVE;
14252548852SDarrick J. Wong blocks += 3; /* AGF, AGI btree root blocks */
14338c26bfdSDave Chinner if (xfs_has_finobt(mp))
14452548852SDarrick J. Wong blocks++; /* finobt root block */
14538c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
14652548852SDarrick J. Wong blocks++; /* rmap root block */
14738c26bfdSDave Chinner if (xfs_has_reflink(mp))
148d0e853f3SDarrick J. Wong blocks++; /* refcount root block */
14952548852SDarrick J. Wong
15052548852SDarrick J. Wong return mp->m_sb.sb_agblocks - blocks;
15152548852SDarrick J. Wong }
15252548852SDarrick J. Wong
15352548852SDarrick J. Wong /*
15430f712c9SDave Chinner * Lookup the record equal to [bno, len] in the btree given by cur.
15530f712c9SDave Chinner */
15630f712c9SDave Chinner STATIC int /* error */
xfs_alloc_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)15730f712c9SDave Chinner xfs_alloc_lookup_eq(
15830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
15930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
16030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
16130f712c9SDave Chinner int *stat) /* success/failure */
16230f712c9SDave Chinner {
163f6b428a4SBrian Foster int error;
164f6b428a4SBrian Foster
16530f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
16630f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
167f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
168c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
169f6b428a4SBrian Foster return error;
17030f712c9SDave Chinner }
17130f712c9SDave Chinner
17230f712c9SDave Chinner /*
17330f712c9SDave Chinner * Lookup the first record greater than or equal to [bno, len]
17430f712c9SDave Chinner * in the btree given by cur.
17530f712c9SDave Chinner */
17630f712c9SDave Chinner int /* error */
xfs_alloc_lookup_ge(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)17730f712c9SDave Chinner xfs_alloc_lookup_ge(
17830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
17930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
18030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
18130f712c9SDave Chinner int *stat) /* success/failure */
18230f712c9SDave Chinner {
183f6b428a4SBrian Foster int error;
184f6b428a4SBrian Foster
18530f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
18630f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
187f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
188c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
189f6b428a4SBrian Foster return error;
19030f712c9SDave Chinner }
19130f712c9SDave Chinner
19230f712c9SDave Chinner /*
19330f712c9SDave Chinner * Lookup the first record less than or equal to [bno, len]
19430f712c9SDave Chinner * in the btree given by cur.
19530f712c9SDave Chinner */
196ce1d802eSDarrick J. Wong int /* error */
xfs_alloc_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)19730f712c9SDave Chinner xfs_alloc_lookup_le(
19830f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
19930f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
20030f712c9SDave Chinner xfs_extlen_t len, /* length of extent */
20130f712c9SDave Chinner int *stat) /* success/failure */
20230f712c9SDave Chinner {
203f6b428a4SBrian Foster int error;
20430f712c9SDave Chinner cur->bc_rec.a.ar_startblock = bno;
20530f712c9SDave Chinner cur->bc_rec.a.ar_blockcount = len;
206f6b428a4SBrian Foster error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
207c4aa10d0SDave Chinner cur->bc_ag.abt.active = (*stat == 1);
208f6b428a4SBrian Foster return error;
209f6b428a4SBrian Foster }
210f6b428a4SBrian Foster
211f6b428a4SBrian Foster static inline bool
xfs_alloc_cur_active(struct xfs_btree_cur * cur)212f6b428a4SBrian Foster xfs_alloc_cur_active(
213f6b428a4SBrian Foster struct xfs_btree_cur *cur)
214f6b428a4SBrian Foster {
215c4aa10d0SDave Chinner return cur && cur->bc_ag.abt.active;
21630f712c9SDave Chinner }
21730f712c9SDave Chinner
21830f712c9SDave Chinner /*
21930f712c9SDave Chinner * Update the record referred to by cur to the value given
22030f712c9SDave Chinner * by [bno, len].
22130f712c9SDave Chinner * This either works (return 0) or gets an EFSCORRUPTED error.
22230f712c9SDave Chinner */
22330f712c9SDave Chinner STATIC int /* error */
xfs_alloc_update(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len)22430f712c9SDave Chinner xfs_alloc_update(
22530f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
22630f712c9SDave Chinner xfs_agblock_t bno, /* starting block of extent */
22730f712c9SDave Chinner xfs_extlen_t len) /* length of extent */
22830f712c9SDave Chinner {
22930f712c9SDave Chinner union xfs_btree_rec rec;
23030f712c9SDave Chinner
23130f712c9SDave Chinner rec.alloc.ar_startblock = cpu_to_be32(bno);
23230f712c9SDave Chinner rec.alloc.ar_blockcount = cpu_to_be32(len);
23330f712c9SDave Chinner return xfs_btree_update(cur, &rec);
23430f712c9SDave Chinner }
23530f712c9SDave Chinner
23635e3b9a1SDarrick J. Wong /* Convert the ondisk btree record to its incore representation. */
23735e3b9a1SDarrick J. Wong void
xfs_alloc_btrec_to_irec(const union xfs_btree_rec * rec,struct xfs_alloc_rec_incore * irec)23835e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(
23935e3b9a1SDarrick J. Wong const union xfs_btree_rec *rec,
24035e3b9a1SDarrick J. Wong struct xfs_alloc_rec_incore *irec)
24135e3b9a1SDarrick J. Wong {
24235e3b9a1SDarrick J. Wong irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
24335e3b9a1SDarrick J. Wong irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
24435e3b9a1SDarrick J. Wong }
24535e3b9a1SDarrick J. Wong
24635e3b9a1SDarrick J. Wong /* Simple checks for free space records. */
24735e3b9a1SDarrick J. Wong xfs_failaddr_t
xfs_alloc_check_irec(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * irec)24835e3b9a1SDarrick J. Wong xfs_alloc_check_irec(
24935e3b9a1SDarrick J. Wong struct xfs_btree_cur *cur,
25035e3b9a1SDarrick J. Wong const struct xfs_alloc_rec_incore *irec)
25135e3b9a1SDarrick J. Wong {
25235e3b9a1SDarrick J. Wong struct xfs_perag *pag = cur->bc_ag.pag;
25335e3b9a1SDarrick J. Wong
25435e3b9a1SDarrick J. Wong if (irec->ar_blockcount == 0)
25535e3b9a1SDarrick J. Wong return __this_address;
25635e3b9a1SDarrick J. Wong
25735e3b9a1SDarrick J. Wong /* check for valid extent range, including overflow */
25835e3b9a1SDarrick J. Wong if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
25935e3b9a1SDarrick J. Wong return __this_address;
26035e3b9a1SDarrick J. Wong
26135e3b9a1SDarrick J. Wong return NULL;
26235e3b9a1SDarrick J. Wong }
26335e3b9a1SDarrick J. Wong
264ee12eaaaSDarrick J. Wong static inline int
xfs_alloc_complain_bad_rec(struct xfs_btree_cur * cur,xfs_failaddr_t fa,const struct xfs_alloc_rec_incore * irec)265ee12eaaaSDarrick J. Wong xfs_alloc_complain_bad_rec(
266ee12eaaaSDarrick J. Wong struct xfs_btree_cur *cur,
267ee12eaaaSDarrick J. Wong xfs_failaddr_t fa,
268ee12eaaaSDarrick J. Wong const struct xfs_alloc_rec_incore *irec)
269ee12eaaaSDarrick J. Wong {
270ee12eaaaSDarrick J. Wong struct xfs_mount *mp = cur->bc_mp;
271ee12eaaaSDarrick J. Wong
272ee12eaaaSDarrick J. Wong xfs_warn(mp,
273ee12eaaaSDarrick J. Wong "%s Freespace BTree record corruption in AG %d detected at %pS!",
274ee12eaaaSDarrick J. Wong cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
275ee12eaaaSDarrick J. Wong cur->bc_ag.pag->pag_agno, fa);
276ee12eaaaSDarrick J. Wong xfs_warn(mp,
277ee12eaaaSDarrick J. Wong "start block 0x%x block count 0x%x", irec->ar_startblock,
278ee12eaaaSDarrick J. Wong irec->ar_blockcount);
279ee12eaaaSDarrick J. Wong return -EFSCORRUPTED;
280ee12eaaaSDarrick J. Wong }
281ee12eaaaSDarrick J. Wong
28230f712c9SDave Chinner /*
28330f712c9SDave Chinner * Get the data from the pointed-to record.
28430f712c9SDave Chinner */
28530f712c9SDave Chinner int /* error */
xfs_alloc_get_rec(struct xfs_btree_cur * cur,xfs_agblock_t * bno,xfs_extlen_t * len,int * stat)28630f712c9SDave Chinner xfs_alloc_get_rec(
28730f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */
28830f712c9SDave Chinner xfs_agblock_t *bno, /* output: starting block of extent */
28930f712c9SDave Chinner xfs_extlen_t *len, /* output: length of extent */
29030f712c9SDave Chinner int *stat) /* output: success/failure */
29130f712c9SDave Chinner {
29235e3b9a1SDarrick J. Wong struct xfs_alloc_rec_incore irec;
29330f712c9SDave Chinner union xfs_btree_rec *rec;
29435e3b9a1SDarrick J. Wong xfs_failaddr_t fa;
29530f712c9SDave Chinner int error;
29630f712c9SDave Chinner
29730f712c9SDave Chinner error = xfs_btree_get_rec(cur, &rec, stat);
298a37f7b12SDarrick J. Wong if (error || !(*stat))
299a37f7b12SDarrick J. Wong return error;
300a37f7b12SDarrick J. Wong
30135e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(rec, &irec);
30235e3b9a1SDarrick J. Wong fa = xfs_alloc_check_irec(cur, &irec);
30335e3b9a1SDarrick J. Wong if (fa)
304ee12eaaaSDarrick J. Wong return xfs_alloc_complain_bad_rec(cur, fa, &irec);
305efe80327SCarlos Maiolino
30635e3b9a1SDarrick J. Wong *bno = irec.ar_startblock;
30735e3b9a1SDarrick J. Wong *len = irec.ar_blockcount;
3089e6c08d4SDave Chinner return 0;
30930f712c9SDave Chinner }
31030f712c9SDave Chinner
31130f712c9SDave Chinner /*
31230f712c9SDave Chinner * Compute aligned version of the found extent.
31330f712c9SDave Chinner * Takes alignment and min length into account.
31430f712c9SDave Chinner */
315ebf55872SChristoph Hellwig STATIC bool
xfs_alloc_compute_aligned(xfs_alloc_arg_t * args,xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen,unsigned * busy_gen)31630f712c9SDave Chinner xfs_alloc_compute_aligned(
31730f712c9SDave Chinner xfs_alloc_arg_t *args, /* allocation argument structure */
31830f712c9SDave Chinner xfs_agblock_t foundbno, /* starting block in found extent */
31930f712c9SDave Chinner xfs_extlen_t foundlen, /* length in found extent */
32030f712c9SDave Chinner xfs_agblock_t *resbno, /* result block number */
321ebf55872SChristoph Hellwig xfs_extlen_t *reslen, /* result length */
322ebf55872SChristoph Hellwig unsigned *busy_gen)
32330f712c9SDave Chinner {
324ebf55872SChristoph Hellwig xfs_agblock_t bno = foundbno;
325ebf55872SChristoph Hellwig xfs_extlen_t len = foundlen;
326bfe46d4eSBrian Foster xfs_extlen_t diff;
327ebf55872SChristoph Hellwig bool busy;
32830f712c9SDave Chinner
32930f712c9SDave Chinner /* Trim busy sections out of found extent */
330ebf55872SChristoph Hellwig busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
33130f712c9SDave Chinner
332bfe46d4eSBrian Foster /*
333bfe46d4eSBrian Foster * If we have a largish extent that happens to start before min_agbno,
334bfe46d4eSBrian Foster * see if we can shift it into range...
335bfe46d4eSBrian Foster */
336bfe46d4eSBrian Foster if (bno < args->min_agbno && bno + len > args->min_agbno) {
337bfe46d4eSBrian Foster diff = args->min_agbno - bno;
338bfe46d4eSBrian Foster if (len > diff) {
339bfe46d4eSBrian Foster bno += diff;
340bfe46d4eSBrian Foster len -= diff;
341bfe46d4eSBrian Foster }
342bfe46d4eSBrian Foster }
343bfe46d4eSBrian Foster
34430f712c9SDave Chinner if (args->alignment > 1 && len >= args->minlen) {
34530f712c9SDave Chinner xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
346bfe46d4eSBrian Foster
347bfe46d4eSBrian Foster diff = aligned_bno - bno;
34830f712c9SDave Chinner
34930f712c9SDave Chinner *resbno = aligned_bno;
35030f712c9SDave Chinner *reslen = diff >= len ? 0 : len - diff;
35130f712c9SDave Chinner } else {
35230f712c9SDave Chinner *resbno = bno;
35330f712c9SDave Chinner *reslen = len;
35430f712c9SDave Chinner }
355ebf55872SChristoph Hellwig
356ebf55872SChristoph Hellwig return busy;
35730f712c9SDave Chinner }
35830f712c9SDave Chinner
35930f712c9SDave Chinner /*
36030f712c9SDave Chinner * Compute best start block and diff for "near" allocations.
36130f712c9SDave Chinner * freelen >= wantlen already checked by caller.
36230f712c9SDave Chinner */
36330f712c9SDave Chinner STATIC xfs_extlen_t /* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,int datatype,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)36430f712c9SDave Chinner xfs_alloc_compute_diff(
36530f712c9SDave Chinner xfs_agblock_t wantbno, /* target starting block */
36630f712c9SDave Chinner xfs_extlen_t wantlen, /* target length */
36730f712c9SDave Chinner xfs_extlen_t alignment, /* target alignment */
368292378edSDave Chinner int datatype, /* are we allocating data? */
36930f712c9SDave Chinner xfs_agblock_t freebno, /* freespace's starting block */
37030f712c9SDave Chinner xfs_extlen_t freelen, /* freespace's length */
37130f712c9SDave Chinner xfs_agblock_t *newbnop) /* result: best start block from free */
37230f712c9SDave Chinner {
37330f712c9SDave Chinner xfs_agblock_t freeend; /* end of freespace extent */
37430f712c9SDave Chinner xfs_agblock_t newbno1; /* return block number */
37530f712c9SDave Chinner xfs_agblock_t newbno2; /* other new block number */
37630f712c9SDave Chinner xfs_extlen_t newlen1=0; /* length with newbno1 */
37730f712c9SDave Chinner xfs_extlen_t newlen2=0; /* length with newbno2 */
37830f712c9SDave Chinner xfs_agblock_t wantend; /* end of target extent */
379c34d570dSChristoph Hellwig bool userdata = datatype & XFS_ALLOC_USERDATA;
38030f712c9SDave Chinner
38130f712c9SDave Chinner ASSERT(freelen >= wantlen);
38230f712c9SDave Chinner freeend = freebno + freelen;
38330f712c9SDave Chinner wantend = wantbno + wantlen;
38430f712c9SDave Chinner /*
38530f712c9SDave Chinner * We want to allocate from the start of a free extent if it is past
38630f712c9SDave Chinner * the desired block or if we are allocating user data and the free
38730f712c9SDave Chinner * extent is before desired block. The second case is there to allow
38830f712c9SDave Chinner * for contiguous allocation from the remaining free space if the file
38930f712c9SDave Chinner * grows in the short term.
39030f712c9SDave Chinner */
39130f712c9SDave Chinner if (freebno >= wantbno || (userdata && freeend < wantend)) {
39230f712c9SDave Chinner if ((newbno1 = roundup(freebno, alignment)) >= freeend)
39330f712c9SDave Chinner newbno1 = NULLAGBLOCK;
39430f712c9SDave Chinner } else if (freeend >= wantend && alignment > 1) {
39530f712c9SDave Chinner newbno1 = roundup(wantbno, alignment);
39630f712c9SDave Chinner newbno2 = newbno1 - alignment;
39730f712c9SDave Chinner if (newbno1 >= freeend)
39830f712c9SDave Chinner newbno1 = NULLAGBLOCK;
39930f712c9SDave Chinner else
40030f712c9SDave Chinner newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
40130f712c9SDave Chinner if (newbno2 < freebno)
40230f712c9SDave Chinner newbno2 = NULLAGBLOCK;
40330f712c9SDave Chinner else
40430f712c9SDave Chinner newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
40530f712c9SDave Chinner if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
40630f712c9SDave Chinner if (newlen1 < newlen2 ||
40730f712c9SDave Chinner (newlen1 == newlen2 &&
40830f712c9SDave Chinner XFS_ABSDIFF(newbno1, wantbno) >
40930f712c9SDave Chinner XFS_ABSDIFF(newbno2, wantbno)))
41030f712c9SDave Chinner newbno1 = newbno2;
41130f712c9SDave Chinner } else if (newbno2 != NULLAGBLOCK)
41230f712c9SDave Chinner newbno1 = newbno2;
41330f712c9SDave Chinner } else if (freeend >= wantend) {
41430f712c9SDave Chinner newbno1 = wantbno;
41530f712c9SDave Chinner } else if (alignment > 1) {
41630f712c9SDave Chinner newbno1 = roundup(freeend - wantlen, alignment);
41730f712c9SDave Chinner if (newbno1 > freeend - wantlen &&
41830f712c9SDave Chinner newbno1 - alignment >= freebno)
41930f712c9SDave Chinner newbno1 -= alignment;
42030f712c9SDave Chinner else if (newbno1 >= freeend)
42130f712c9SDave Chinner newbno1 = NULLAGBLOCK;
42230f712c9SDave Chinner } else
42330f712c9SDave Chinner newbno1 = freeend - wantlen;
42430f712c9SDave Chinner *newbnop = newbno1;
42530f712c9SDave Chinner return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
42630f712c9SDave Chinner }
42730f712c9SDave Chinner
42830f712c9SDave Chinner /*
42930f712c9SDave Chinner * Fix up the length, based on mod and prod.
43030f712c9SDave Chinner * len should be k * prod + mod for some k.
43130f712c9SDave Chinner * If len is too small it is returned unchanged.
43230f712c9SDave Chinner * If len hits maxlen it is left alone.
43330f712c9SDave Chinner */
43430f712c9SDave Chinner STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)43530f712c9SDave Chinner xfs_alloc_fix_len(
43630f712c9SDave Chinner xfs_alloc_arg_t *args) /* allocation argument structure */
43730f712c9SDave Chinner {
43830f712c9SDave Chinner xfs_extlen_t k;
43930f712c9SDave Chinner xfs_extlen_t rlen;
44030f712c9SDave Chinner
44130f712c9SDave Chinner ASSERT(args->mod < args->prod);
44230f712c9SDave Chinner rlen = args->len;
44330f712c9SDave Chinner ASSERT(rlen >= args->minlen);
44430f712c9SDave Chinner ASSERT(rlen <= args->maxlen);
44530f712c9SDave Chinner if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
44630f712c9SDave Chinner (args->mod == 0 && rlen < args->prod))
44730f712c9SDave Chinner return;
44830f712c9SDave Chinner k = rlen % args->prod;
44930f712c9SDave Chinner if (k == args->mod)
45030f712c9SDave Chinner return;
45130f712c9SDave Chinner if (k > args->mod)
45230f712c9SDave Chinner rlen = rlen - (k - args->mod);
45330f712c9SDave Chinner else
45430f712c9SDave Chinner rlen = rlen - args->prod + (args->mod - k);
4553790a8cdSDave Chinner /* casts to (int) catch length underflows */
45630f712c9SDave Chinner if ((int)rlen < (int)args->minlen)
45730f712c9SDave Chinner return;
45830f712c9SDave Chinner ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
45930f712c9SDave Chinner ASSERT(rlen % args->prod == args->mod);
46054fee133SChristoph Hellwig ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
46154fee133SChristoph Hellwig rlen + args->minleft);
46230f712c9SDave Chinner args->len = rlen;
46330f712c9SDave Chinner }
46430f712c9SDave Chinner
46530f712c9SDave Chinner /*
46630f712c9SDave Chinner * Update the two btrees, logically removing from freespace the extent
46730f712c9SDave Chinner * starting at rbno, rlen blocks. The extent is contained within the
46830f712c9SDave Chinner * actual (current) free extent fbno for flen blocks.
46930f712c9SDave Chinner * Flags are passed in indicating whether the cursors are set to the
47030f712c9SDave Chinner * relevant records.
47130f712c9SDave Chinner */
47230f712c9SDave Chinner STATIC int /* error code */
xfs_alloc_fixup_trees(struct xfs_btree_cur * cnt_cur,struct xfs_btree_cur * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)47330f712c9SDave Chinner xfs_alloc_fixup_trees(
474ae127f08SDarrick J. Wong struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
475ae127f08SDarrick J. Wong struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
47630f712c9SDave Chinner xfs_agblock_t fbno, /* starting block of free extent */
47730f712c9SDave Chinner xfs_extlen_t flen, /* length of free extent */
47830f712c9SDave Chinner xfs_agblock_t rbno, /* starting block of returned extent */
47930f712c9SDave Chinner xfs_extlen_t rlen, /* length of returned extent */
48030f712c9SDave Chinner int flags) /* flags, XFSA_FIXUP_... */
48130f712c9SDave Chinner {
48230f712c9SDave Chinner int error; /* error code */
48330f712c9SDave Chinner int i; /* operation results */
48430f712c9SDave Chinner xfs_agblock_t nfbno1; /* first new free startblock */
48530f712c9SDave Chinner xfs_agblock_t nfbno2; /* second new free startblock */
48630f712c9SDave Chinner xfs_extlen_t nflen1=0; /* first new free length */
48730f712c9SDave Chinner xfs_extlen_t nflen2=0; /* second new free length */
4885fb5aeeeSEric Sandeen struct xfs_mount *mp;
4895fb5aeeeSEric Sandeen
4905fb5aeeeSEric Sandeen mp = cnt_cur->bc_mp;
49130f712c9SDave Chinner
49230f712c9SDave Chinner /*
49330f712c9SDave Chinner * Look up the record in the by-size tree if necessary.
49430f712c9SDave Chinner */
49530f712c9SDave Chinner if (flags & XFSA_FIXUP_CNT_OK) {
49630f712c9SDave Chinner #ifdef DEBUG
49730f712c9SDave Chinner if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
49830f712c9SDave Chinner return error;
499f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
500f9e03706SDarrick J. Wong i != 1 ||
501f9e03706SDarrick J. Wong nfbno1 != fbno ||
502f9e03706SDarrick J. Wong nflen1 != flen))
503f9e03706SDarrick J. Wong return -EFSCORRUPTED;
50430f712c9SDave Chinner #endif
50530f712c9SDave Chinner } else {
50630f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
50730f712c9SDave Chinner return error;
508f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
509f9e03706SDarrick J. Wong return -EFSCORRUPTED;
51030f712c9SDave Chinner }
51130f712c9SDave Chinner /*
51230f712c9SDave Chinner * Look up the record in the by-block tree if necessary.
51330f712c9SDave Chinner */
51430f712c9SDave Chinner if (flags & XFSA_FIXUP_BNO_OK) {
51530f712c9SDave Chinner #ifdef DEBUG
51630f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
51730f712c9SDave Chinner return error;
518f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
519f9e03706SDarrick J. Wong i != 1 ||
520f9e03706SDarrick J. Wong nfbno1 != fbno ||
521f9e03706SDarrick J. Wong nflen1 != flen))
522f9e03706SDarrick J. Wong return -EFSCORRUPTED;
52330f712c9SDave Chinner #endif
52430f712c9SDave Chinner } else {
52530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
52630f712c9SDave Chinner return error;
527f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
528f9e03706SDarrick J. Wong return -EFSCORRUPTED;
52930f712c9SDave Chinner }
53030f712c9SDave Chinner
53130f712c9SDave Chinner #ifdef DEBUG
53230f712c9SDave Chinner if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
53330f712c9SDave Chinner struct xfs_btree_block *bnoblock;
53430f712c9SDave Chinner struct xfs_btree_block *cntblock;
53530f712c9SDave Chinner
5366ca444cfSDarrick J. Wong bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
5376ca444cfSDarrick J. Wong cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
53830f712c9SDave Chinner
539f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
540f9e03706SDarrick J. Wong bnoblock->bb_numrecs !=
541f9e03706SDarrick J. Wong cntblock->bb_numrecs))
542f9e03706SDarrick J. Wong return -EFSCORRUPTED;
54330f712c9SDave Chinner }
54430f712c9SDave Chinner #endif
54530f712c9SDave Chinner
54630f712c9SDave Chinner /*
54730f712c9SDave Chinner * Deal with all four cases: the allocated record is contained
54830f712c9SDave Chinner * within the freespace record, so we can have new freespace
54930f712c9SDave Chinner * at either (or both) end, or no freespace remaining.
55030f712c9SDave Chinner */
55130f712c9SDave Chinner if (rbno == fbno && rlen == flen)
55230f712c9SDave Chinner nfbno1 = nfbno2 = NULLAGBLOCK;
55330f712c9SDave Chinner else if (rbno == fbno) {
55430f712c9SDave Chinner nfbno1 = rbno + rlen;
55530f712c9SDave Chinner nflen1 = flen - rlen;
55630f712c9SDave Chinner nfbno2 = NULLAGBLOCK;
55730f712c9SDave Chinner } else if (rbno + rlen == fbno + flen) {
55830f712c9SDave Chinner nfbno1 = fbno;
55930f712c9SDave Chinner nflen1 = flen - rlen;
56030f712c9SDave Chinner nfbno2 = NULLAGBLOCK;
56130f712c9SDave Chinner } else {
56230f712c9SDave Chinner nfbno1 = fbno;
56330f712c9SDave Chinner nflen1 = rbno - fbno;
56430f712c9SDave Chinner nfbno2 = rbno + rlen;
56530f712c9SDave Chinner nflen2 = (fbno + flen) - nfbno2;
56630f712c9SDave Chinner }
56730f712c9SDave Chinner /*
56830f712c9SDave Chinner * Delete the entry from the by-size btree.
56930f712c9SDave Chinner */
57030f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
57130f712c9SDave Chinner return error;
572f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
573f9e03706SDarrick J. Wong return -EFSCORRUPTED;
57430f712c9SDave Chinner /*
57530f712c9SDave Chinner * Add new by-size btree entry(s).
57630f712c9SDave Chinner */
57730f712c9SDave Chinner if (nfbno1 != NULLAGBLOCK) {
57830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
57930f712c9SDave Chinner return error;
580f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
581f9e03706SDarrick J. Wong return -EFSCORRUPTED;
58230f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
58330f712c9SDave Chinner return error;
584f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
585f9e03706SDarrick J. Wong return -EFSCORRUPTED;
58630f712c9SDave Chinner }
58730f712c9SDave Chinner if (nfbno2 != NULLAGBLOCK) {
58830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
58930f712c9SDave Chinner return error;
590f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
591f9e03706SDarrick J. Wong return -EFSCORRUPTED;
59230f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
59330f712c9SDave Chinner return error;
594f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
595f9e03706SDarrick J. Wong return -EFSCORRUPTED;
59630f712c9SDave Chinner }
59730f712c9SDave Chinner /*
59830f712c9SDave Chinner * Fix up the by-block btree entry(s).
59930f712c9SDave Chinner */
60030f712c9SDave Chinner if (nfbno1 == NULLAGBLOCK) {
60130f712c9SDave Chinner /*
60230f712c9SDave Chinner * No remaining freespace, just delete the by-block tree entry.
60330f712c9SDave Chinner */
60430f712c9SDave Chinner if ((error = xfs_btree_delete(bno_cur, &i)))
60530f712c9SDave Chinner return error;
606f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
607f9e03706SDarrick J. Wong return -EFSCORRUPTED;
60830f712c9SDave Chinner } else {
60930f712c9SDave Chinner /*
61030f712c9SDave Chinner * Update the by-block entry to start later|be shorter.
61130f712c9SDave Chinner */
61230f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
61330f712c9SDave Chinner return error;
61430f712c9SDave Chinner }
61530f712c9SDave Chinner if (nfbno2 != NULLAGBLOCK) {
61630f712c9SDave Chinner /*
61730f712c9SDave Chinner * 2 resulting free entries, need to add one.
61830f712c9SDave Chinner */
61930f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
62030f712c9SDave Chinner return error;
621f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0))
622f9e03706SDarrick J. Wong return -EFSCORRUPTED;
62330f712c9SDave Chinner if ((error = xfs_btree_insert(bno_cur, &i)))
62430f712c9SDave Chinner return error;
625f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1))
626f9e03706SDarrick J. Wong return -EFSCORRUPTED;
62730f712c9SDave Chinner }
62830f712c9SDave Chinner return 0;
62930f712c9SDave Chinner }
63030f712c9SDave Chinner
631e0a8de7dSDave Chinner /*
632e0a8de7dSDave Chinner * We do not verify the AGFL contents against AGF-based index counters here,
633e0a8de7dSDave Chinner * even though we may have access to the perag that contains shadow copies. We
634e0a8de7dSDave Chinner * don't know if the AGF based counters have been checked, and if they have they
635e0a8de7dSDave Chinner * still may be inconsistent because they haven't yet been reset on the first
636e0a8de7dSDave Chinner * allocation after the AGF has been read in.
637e0a8de7dSDave Chinner *
638e0a8de7dSDave Chinner * This means we can only check that all agfl entries contain valid or null
639e0a8de7dSDave Chinner * values because we can't reliably determine the active range to exclude
640e0a8de7dSDave Chinner * NULLAGBNO as a valid value.
641e0a8de7dSDave Chinner *
642e0a8de7dSDave Chinner * However, we can't even do that for v4 format filesystems because there are
643e0a8de7dSDave Chinner * old versions of mkfs out there that does not initialise the AGFL to known,
644e0a8de7dSDave Chinner * verifiable values. HEnce we can't tell the difference between a AGFL block
645e0a8de7dSDave Chinner * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
646e0a8de7dSDave Chinner *
647e0a8de7dSDave Chinner * As a result, we can only fully validate AGFL block numbers when we pull them
648e0a8de7dSDave Chinner * from the freelist in xfs_alloc_get_freelist().
649e0a8de7dSDave Chinner */
650a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agfl_verify(struct xfs_buf * bp)65130f712c9SDave Chinner xfs_agfl_verify(
65230f712c9SDave Chinner struct xfs_buf *bp)
65330f712c9SDave Chinner {
654dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
65530f712c9SDave Chinner struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
656183606d8SChristoph Hellwig __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
65730f712c9SDave Chinner int i;
65830f712c9SDave Chinner
65938c26bfdSDave Chinner if (!xfs_has_crc(mp))
660b5572597SDarrick J. Wong return NULL;
661b5572597SDarrick J. Wong
66239708c20SBrian Foster if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
663a6a781a5SDarrick J. Wong return __this_address;
66439708c20SBrian Foster if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
665a6a781a5SDarrick J. Wong return __this_address;
66630f712c9SDave Chinner /*
66730f712c9SDave Chinner * during growfs operations, the perag is not fully initialised,
66830f712c9SDave Chinner * so we can't use it for any useful checking. growfs ensures we can't
66930f712c9SDave Chinner * use it by using uncached buffers that don't have the perag attached
67030f712c9SDave Chinner * so we can detect and avoid this problem.
67130f712c9SDave Chinner */
67230f712c9SDave Chinner if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
673a6a781a5SDarrick J. Wong return __this_address;
67430f712c9SDave Chinner
675a78ee256SDave Chinner for (i = 0; i < xfs_agfl_size(mp); i++) {
676183606d8SChristoph Hellwig if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
677183606d8SChristoph Hellwig be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
678a6a781a5SDarrick J. Wong return __this_address;
67930f712c9SDave Chinner }
680a45086e2SBrian Foster
681a6a781a5SDarrick J. Wong if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
682a6a781a5SDarrick J. Wong return __this_address;
683a6a781a5SDarrick J. Wong return NULL;
68430f712c9SDave Chinner }
68530f712c9SDave Chinner
68630f712c9SDave Chinner static void
xfs_agfl_read_verify(struct xfs_buf * bp)68730f712c9SDave Chinner xfs_agfl_read_verify(
68830f712c9SDave Chinner struct xfs_buf *bp)
68930f712c9SDave Chinner {
690dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
691bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
69230f712c9SDave Chinner
69330f712c9SDave Chinner /*
69430f712c9SDave Chinner * There is no verification of non-crc AGFLs because mkfs does not
69530f712c9SDave Chinner * initialise the AGFL to zero or NULL. Hence the only valid part of the
69630f712c9SDave Chinner * AGFL is what the AGF says is active. We can't get to the AGF, so we
69730f712c9SDave Chinner * can't verify just those entries are valid.
69830f712c9SDave Chinner */
69938c26bfdSDave Chinner if (!xfs_has_crc(mp))
70030f712c9SDave Chinner return;
70130f712c9SDave Chinner
70230f712c9SDave Chinner if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
703bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSBADCRC, __this_address);
704bc1a09b8SDarrick J. Wong else {
705bc1a09b8SDarrick J. Wong fa = xfs_agfl_verify(bp);
706bc1a09b8SDarrick J. Wong if (fa)
707bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
708bc1a09b8SDarrick J. Wong }
70930f712c9SDave Chinner }
71030f712c9SDave Chinner
71130f712c9SDave Chinner static void
xfs_agfl_write_verify(struct xfs_buf * bp)71230f712c9SDave Chinner xfs_agfl_write_verify(
71330f712c9SDave Chinner struct xfs_buf *bp)
71430f712c9SDave Chinner {
715dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
716fb1755a6SCarlos Maiolino struct xfs_buf_log_item *bip = bp->b_log_item;
717bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
71830f712c9SDave Chinner
71930f712c9SDave Chinner /* no verification of non-crc AGFLs */
72038c26bfdSDave Chinner if (!xfs_has_crc(mp))
72130f712c9SDave Chinner return;
72230f712c9SDave Chinner
723bc1a09b8SDarrick J. Wong fa = xfs_agfl_verify(bp);
724bc1a09b8SDarrick J. Wong if (fa) {
725bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
72630f712c9SDave Chinner return;
72730f712c9SDave Chinner }
72830f712c9SDave Chinner
72930f712c9SDave Chinner if (bip)
73030f712c9SDave Chinner XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
73130f712c9SDave Chinner
73230f712c9SDave Chinner xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
73330f712c9SDave Chinner }
73430f712c9SDave Chinner
73530f712c9SDave Chinner const struct xfs_buf_ops xfs_agfl_buf_ops = {
736233135b7SEric Sandeen .name = "xfs_agfl",
73739708c20SBrian Foster .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
73830f712c9SDave Chinner .verify_read = xfs_agfl_read_verify,
73930f712c9SDave Chinner .verify_write = xfs_agfl_write_verify,
740b5572597SDarrick J. Wong .verify_struct = xfs_agfl_verify,
74130f712c9SDave Chinner };
74230f712c9SDave Chinner
74330f712c9SDave Chinner /*
74430f712c9SDave Chinner * Read in the allocation group free block array.
74530f712c9SDave Chinner */
746cec7bb7dSDave Chinner int
xfs_alloc_read_agfl(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf ** bpp)74730f712c9SDave Chinner xfs_alloc_read_agfl(
748cec7bb7dSDave Chinner struct xfs_perag *pag,
749cec7bb7dSDave Chinner struct xfs_trans *tp,
750cec7bb7dSDave Chinner struct xfs_buf **bpp)
75130f712c9SDave Chinner {
752cec7bb7dSDave Chinner struct xfs_mount *mp = pag->pag_mount;
753cec7bb7dSDave Chinner struct xfs_buf *bp;
75430f712c9SDave Chinner int error;
75530f712c9SDave Chinner
75630f712c9SDave Chinner error = xfs_trans_read_buf(
75730f712c9SDave Chinner mp, tp, mp->m_ddev_targp,
758cec7bb7dSDave Chinner XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
75930f712c9SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
76030f712c9SDave Chinner if (error)
76130f712c9SDave Chinner return error;
76230f712c9SDave Chinner xfs_buf_set_ref(bp, XFS_AGFL_REF);
76330f712c9SDave Chinner *bpp = bp;
76430f712c9SDave Chinner return 0;
76530f712c9SDave Chinner }
76630f712c9SDave Chinner
76730f712c9SDave Chinner STATIC int
xfs_alloc_update_counters(struct xfs_trans * tp,struct xfs_buf * agbp,long len)76830f712c9SDave Chinner xfs_alloc_update_counters(
76930f712c9SDave Chinner struct xfs_trans *tp,
77030f712c9SDave Chinner struct xfs_buf *agbp,
77130f712c9SDave Chinner long len)
77230f712c9SDave Chinner {
7739798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
77430f712c9SDave Chinner
77592a00544SGao Xiang agbp->b_pag->pagf_freeblks += len;
77630f712c9SDave Chinner be32_add_cpu(&agf->agf_freeblks, len);
77730f712c9SDave Chinner
77830f712c9SDave Chinner if (unlikely(be32_to_cpu(agf->agf_freeblks) >
779a5155b87SDarrick J. Wong be32_to_cpu(agf->agf_length))) {
7808d57c216SDarrick J. Wong xfs_buf_mark_corrupt(agbp);
7812451337dSDave Chinner return -EFSCORRUPTED;
782a5155b87SDarrick J. Wong }
78330f712c9SDave Chinner
78430f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
78530f712c9SDave Chinner return 0;
78630f712c9SDave Chinner }
78730f712c9SDave Chinner
78830f712c9SDave Chinner /*
789f5e7dbeaSBrian Foster * Block allocation algorithm and data structures.
79030f712c9SDave Chinner */
791f5e7dbeaSBrian Foster struct xfs_alloc_cur {
792f5e7dbeaSBrian Foster struct xfs_btree_cur *cnt; /* btree cursors */
793f5e7dbeaSBrian Foster struct xfs_btree_cur *bnolt;
794f5e7dbeaSBrian Foster struct xfs_btree_cur *bnogt;
795dc8e69bdSBrian Foster xfs_extlen_t cur_len;/* current search length */
796c62321a2SBrian Foster xfs_agblock_t rec_bno;/* extent startblock */
797c62321a2SBrian Foster xfs_extlen_t rec_len;/* extent length */
798c62321a2SBrian Foster xfs_agblock_t bno; /* alloc bno */
799c62321a2SBrian Foster xfs_extlen_t len; /* alloc len */
800c62321a2SBrian Foster xfs_extlen_t diff; /* diff from search bno */
801d6d3aff2SBrian Foster unsigned int busy_gen;/* busy state */
802d6d3aff2SBrian Foster bool busy;
803f5e7dbeaSBrian Foster };
804f5e7dbeaSBrian Foster
805f5e7dbeaSBrian Foster /*
806f5e7dbeaSBrian Foster * Set up cursors, etc. in the extent allocation cursor. This function can be
807f5e7dbeaSBrian Foster * called multiple times to reset an initialized structure without having to
808f5e7dbeaSBrian Foster * reallocate cursors.
809f5e7dbeaSBrian Foster */
810f5e7dbeaSBrian Foster static int
xfs_alloc_cur_setup(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)811f5e7dbeaSBrian Foster xfs_alloc_cur_setup(
812f5e7dbeaSBrian Foster struct xfs_alloc_arg *args,
813f5e7dbeaSBrian Foster struct xfs_alloc_cur *acur)
814f5e7dbeaSBrian Foster {
815f5e7dbeaSBrian Foster int error;
816f5e7dbeaSBrian Foster int i;
817f5e7dbeaSBrian Foster
818dc8e69bdSBrian Foster acur->cur_len = args->maxlen;
819c62321a2SBrian Foster acur->rec_bno = 0;
820c62321a2SBrian Foster acur->rec_len = 0;
821c62321a2SBrian Foster acur->bno = 0;
822c62321a2SBrian Foster acur->len = 0;
823396bbf3cSBrian Foster acur->diff = -1;
824d6d3aff2SBrian Foster acur->busy = false;
825d6d3aff2SBrian Foster acur->busy_gen = 0;
826d6d3aff2SBrian Foster
827f5e7dbeaSBrian Foster /*
828f5e7dbeaSBrian Foster * Perform an initial cntbt lookup to check for availability of maxlen
829f5e7dbeaSBrian Foster * extents. If this fails, we'll return -ENOSPC to signal the caller to
830f5e7dbeaSBrian Foster * attempt a small allocation.
831f5e7dbeaSBrian Foster */
832f5e7dbeaSBrian Foster if (!acur->cnt)
833f5e7dbeaSBrian Foster acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
834289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_CNT);
835f5e7dbeaSBrian Foster error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
836f5e7dbeaSBrian Foster if (error)
837f5e7dbeaSBrian Foster return error;
838f5e7dbeaSBrian Foster
839f5e7dbeaSBrian Foster /*
840f5e7dbeaSBrian Foster * Allocate the bnobt left and right search cursors.
841f5e7dbeaSBrian Foster */
842f5e7dbeaSBrian Foster if (!acur->bnolt)
843f5e7dbeaSBrian Foster acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
844289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_BNO);
845f5e7dbeaSBrian Foster if (!acur->bnogt)
846f5e7dbeaSBrian Foster acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
847289d38d2SDave Chinner args->agbp, args->pag, XFS_BTNUM_BNO);
848f5e7dbeaSBrian Foster return i == 1 ? 0 : -ENOSPC;
849f5e7dbeaSBrian Foster }
850f5e7dbeaSBrian Foster
851f5e7dbeaSBrian Foster static void
xfs_alloc_cur_close(struct xfs_alloc_cur * acur,bool error)852f5e7dbeaSBrian Foster xfs_alloc_cur_close(
853f5e7dbeaSBrian Foster struct xfs_alloc_cur *acur,
854f5e7dbeaSBrian Foster bool error)
855f5e7dbeaSBrian Foster {
856f5e7dbeaSBrian Foster int cur_error = XFS_BTREE_NOERROR;
857f5e7dbeaSBrian Foster
858f5e7dbeaSBrian Foster if (error)
859f5e7dbeaSBrian Foster cur_error = XFS_BTREE_ERROR;
860f5e7dbeaSBrian Foster
861f5e7dbeaSBrian Foster if (acur->cnt)
862f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->cnt, cur_error);
863f5e7dbeaSBrian Foster if (acur->bnolt)
864f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->bnolt, cur_error);
865f5e7dbeaSBrian Foster if (acur->bnogt)
866f5e7dbeaSBrian Foster xfs_btree_del_cursor(acur->bnogt, cur_error);
867f5e7dbeaSBrian Foster acur->cnt = acur->bnolt = acur->bnogt = NULL;
868f5e7dbeaSBrian Foster }
86930f712c9SDave Chinner
87030f712c9SDave Chinner /*
871396bbf3cSBrian Foster * Check an extent for allocation and track the best available candidate in the
872396bbf3cSBrian Foster * allocation structure. The cursor is deactivated if it has entered an out of
873396bbf3cSBrian Foster * range state based on allocation arguments. Optionally return the extent
874396bbf3cSBrian Foster * extent geometry and allocation status if requested by the caller.
875396bbf3cSBrian Foster */
876396bbf3cSBrian Foster static int
xfs_alloc_cur_check(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,int * new)877396bbf3cSBrian Foster xfs_alloc_cur_check(
878396bbf3cSBrian Foster struct xfs_alloc_arg *args,
879396bbf3cSBrian Foster struct xfs_alloc_cur *acur,
880396bbf3cSBrian Foster struct xfs_btree_cur *cur,
881396bbf3cSBrian Foster int *new)
882396bbf3cSBrian Foster {
883396bbf3cSBrian Foster int error, i;
884396bbf3cSBrian Foster xfs_agblock_t bno, bnoa, bnew;
885396bbf3cSBrian Foster xfs_extlen_t len, lena, diff = -1;
886396bbf3cSBrian Foster bool busy;
887396bbf3cSBrian Foster unsigned busy_gen = 0;
888396bbf3cSBrian Foster bool deactivate = false;
889fec0afdaSBrian Foster bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
890396bbf3cSBrian Foster
891396bbf3cSBrian Foster *new = 0;
892396bbf3cSBrian Foster
893396bbf3cSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
894396bbf3cSBrian Foster if (error)
895396bbf3cSBrian Foster return error;
896f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1))
897f9e03706SDarrick J. Wong return -EFSCORRUPTED;
898396bbf3cSBrian Foster
899396bbf3cSBrian Foster /*
900396bbf3cSBrian Foster * Check minlen and deactivate a cntbt cursor if out of acceptable size
901396bbf3cSBrian Foster * range (i.e., walking backwards looking for a minlen extent).
902396bbf3cSBrian Foster */
903396bbf3cSBrian Foster if (len < args->minlen) {
904fec0afdaSBrian Foster deactivate = !isbnobt;
905396bbf3cSBrian Foster goto out;
906396bbf3cSBrian Foster }
907396bbf3cSBrian Foster
908396bbf3cSBrian Foster busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
909396bbf3cSBrian Foster &busy_gen);
910396bbf3cSBrian Foster acur->busy |= busy;
911396bbf3cSBrian Foster if (busy)
912396bbf3cSBrian Foster acur->busy_gen = busy_gen;
913396bbf3cSBrian Foster /* deactivate a bnobt cursor outside of locality range */
914fec0afdaSBrian Foster if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
915fec0afdaSBrian Foster deactivate = isbnobt;
916396bbf3cSBrian Foster goto out;
917fec0afdaSBrian Foster }
918396bbf3cSBrian Foster if (lena < args->minlen)
919396bbf3cSBrian Foster goto out;
920396bbf3cSBrian Foster
921396bbf3cSBrian Foster args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
922396bbf3cSBrian Foster xfs_alloc_fix_len(args);
923396bbf3cSBrian Foster ASSERT(args->len >= args->minlen);
924396bbf3cSBrian Foster if (args->len < acur->len)
925396bbf3cSBrian Foster goto out;
926396bbf3cSBrian Foster
927396bbf3cSBrian Foster /*
928396bbf3cSBrian Foster * We have an aligned record that satisfies minlen and beats or matches
929396bbf3cSBrian Foster * the candidate extent size. Compare locality for near allocation mode.
930396bbf3cSBrian Foster */
931396bbf3cSBrian Foster diff = xfs_alloc_compute_diff(args->agbno, args->len,
932396bbf3cSBrian Foster args->alignment, args->datatype,
933396bbf3cSBrian Foster bnoa, lena, &bnew);
934396bbf3cSBrian Foster if (bnew == NULLAGBLOCK)
935396bbf3cSBrian Foster goto out;
936fec0afdaSBrian Foster
937fec0afdaSBrian Foster /*
938fec0afdaSBrian Foster * Deactivate a bnobt cursor with worse locality than the current best.
939fec0afdaSBrian Foster */
940fec0afdaSBrian Foster if (diff > acur->diff) {
941fec0afdaSBrian Foster deactivate = isbnobt;
942396bbf3cSBrian Foster goto out;
943fec0afdaSBrian Foster }
944396bbf3cSBrian Foster
945396bbf3cSBrian Foster ASSERT(args->len > acur->len ||
946396bbf3cSBrian Foster (args->len == acur->len && diff <= acur->diff));
947396bbf3cSBrian Foster acur->rec_bno = bno;
948396bbf3cSBrian Foster acur->rec_len = len;
949396bbf3cSBrian Foster acur->bno = bnew;
950396bbf3cSBrian Foster acur->len = args->len;
951396bbf3cSBrian Foster acur->diff = diff;
952396bbf3cSBrian Foster *new = 1;
953396bbf3cSBrian Foster
95478d7aabdSBrian Foster /*
95578d7aabdSBrian Foster * We're done if we found a perfect allocation. This only deactivates
95678d7aabdSBrian Foster * the current cursor, but this is just an optimization to terminate a
95778d7aabdSBrian Foster * cntbt search that otherwise runs to the edge of the tree.
95878d7aabdSBrian Foster */
95978d7aabdSBrian Foster if (acur->diff == 0 && acur->len == args->maxlen)
96078d7aabdSBrian Foster deactivate = true;
961396bbf3cSBrian Foster out:
962396bbf3cSBrian Foster if (deactivate)
963c4aa10d0SDave Chinner cur->bc_ag.abt.active = false;
964396bbf3cSBrian Foster trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
965396bbf3cSBrian Foster *new);
966396bbf3cSBrian Foster return 0;
967396bbf3cSBrian Foster }
968396bbf3cSBrian Foster
969396bbf3cSBrian Foster /*
970d2968825SBrian Foster * Complete an allocation of a candidate extent. Remove the extent from both
971d2968825SBrian Foster * trees and update the args structure.
972d2968825SBrian Foster */
973d2968825SBrian Foster STATIC int
xfs_alloc_cur_finish(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)974d2968825SBrian Foster xfs_alloc_cur_finish(
975d2968825SBrian Foster struct xfs_alloc_arg *args,
976d2968825SBrian Foster struct xfs_alloc_cur *acur)
977d2968825SBrian Foster {
9789798f615SChristoph Hellwig struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
979d2968825SBrian Foster int error;
980d2968825SBrian Foster
981d2968825SBrian Foster ASSERT(acur->cnt && acur->bnolt);
982d2968825SBrian Foster ASSERT(acur->bno >= acur->rec_bno);
983d2968825SBrian Foster ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
9849798f615SChristoph Hellwig ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
985d2968825SBrian Foster
986d2968825SBrian Foster error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
987d2968825SBrian Foster acur->rec_len, acur->bno, acur->len, 0);
988d2968825SBrian Foster if (error)
989d2968825SBrian Foster return error;
990d2968825SBrian Foster
991d2968825SBrian Foster args->agbno = acur->bno;
992d2968825SBrian Foster args->len = acur->len;
993d2968825SBrian Foster args->wasfromfl = 0;
994d2968825SBrian Foster
995d2968825SBrian Foster trace_xfs_alloc_cur(args);
996d2968825SBrian Foster return 0;
997d2968825SBrian Foster }
998d2968825SBrian Foster
999d2968825SBrian Foster /*
1000dc8e69bdSBrian Foster * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1001dc8e69bdSBrian Foster * bno optimized lookup to search for extents with ideal size and locality.
1002dc8e69bdSBrian Foster */
1003dc8e69bdSBrian Foster STATIC int
xfs_alloc_cntbt_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)1004dc8e69bdSBrian Foster xfs_alloc_cntbt_iter(
1005dc8e69bdSBrian Foster struct xfs_alloc_arg *args,
1006dc8e69bdSBrian Foster struct xfs_alloc_cur *acur)
1007dc8e69bdSBrian Foster {
1008dc8e69bdSBrian Foster struct xfs_btree_cur *cur = acur->cnt;
1009dc8e69bdSBrian Foster xfs_agblock_t bno;
1010dc8e69bdSBrian Foster xfs_extlen_t len, cur_len;
1011dc8e69bdSBrian Foster int error;
1012dc8e69bdSBrian Foster int i;
1013dc8e69bdSBrian Foster
1014dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(cur))
1015dc8e69bdSBrian Foster return 0;
1016dc8e69bdSBrian Foster
1017dc8e69bdSBrian Foster /* locality optimized lookup */
1018dc8e69bdSBrian Foster cur_len = acur->cur_len;
1019dc8e69bdSBrian Foster error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1020dc8e69bdSBrian Foster if (error)
1021dc8e69bdSBrian Foster return error;
1022dc8e69bdSBrian Foster if (i == 0)
1023dc8e69bdSBrian Foster return 0;
1024dc8e69bdSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1025dc8e69bdSBrian Foster if (error)
1026dc8e69bdSBrian Foster return error;
1027dc8e69bdSBrian Foster
1028dc8e69bdSBrian Foster /* check the current record and update search length from it */
1029dc8e69bdSBrian Foster error = xfs_alloc_cur_check(args, acur, cur, &i);
1030dc8e69bdSBrian Foster if (error)
1031dc8e69bdSBrian Foster return error;
1032dc8e69bdSBrian Foster ASSERT(len >= acur->cur_len);
1033dc8e69bdSBrian Foster acur->cur_len = len;
1034dc8e69bdSBrian Foster
1035dc8e69bdSBrian Foster /*
1036dc8e69bdSBrian Foster * We looked up the first record >= [agbno, len] above. The agbno is a
1037dc8e69bdSBrian Foster * secondary key and so the current record may lie just before or after
1038dc8e69bdSBrian Foster * agbno. If it is past agbno, check the previous record too so long as
1039dc8e69bdSBrian Foster * the length matches as it may be closer. Don't check a smaller record
1040dc8e69bdSBrian Foster * because that could deactivate our cursor.
1041dc8e69bdSBrian Foster */
1042dc8e69bdSBrian Foster if (bno > args->agbno) {
1043dc8e69bdSBrian Foster error = xfs_btree_decrement(cur, 0, &i);
1044dc8e69bdSBrian Foster if (!error && i) {
1045dc8e69bdSBrian Foster error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1046dc8e69bdSBrian Foster if (!error && i && len == acur->cur_len)
1047dc8e69bdSBrian Foster error = xfs_alloc_cur_check(args, acur, cur,
1048dc8e69bdSBrian Foster &i);
1049dc8e69bdSBrian Foster }
1050dc8e69bdSBrian Foster if (error)
1051dc8e69bdSBrian Foster return error;
1052dc8e69bdSBrian Foster }
1053dc8e69bdSBrian Foster
1054dc8e69bdSBrian Foster /*
1055dc8e69bdSBrian Foster * Increment the search key until we find at least one allocation
1056dc8e69bdSBrian Foster * candidate or if the extent we found was larger. Otherwise, double the
1057dc8e69bdSBrian Foster * search key to optimize the search. Efficiency is more important here
1058dc8e69bdSBrian Foster * than absolute best locality.
1059dc8e69bdSBrian Foster */
1060dc8e69bdSBrian Foster cur_len <<= 1;
1061dc8e69bdSBrian Foster if (!acur->len || acur->cur_len >= cur_len)
1062dc8e69bdSBrian Foster acur->cur_len++;
1063dc8e69bdSBrian Foster else
1064dc8e69bdSBrian Foster acur->cur_len = cur_len;
1065dc8e69bdSBrian Foster
1066dc8e69bdSBrian Foster return error;
1067dc8e69bdSBrian Foster }
1068dc8e69bdSBrian Foster
1069dc8e69bdSBrian Foster /*
1070c63cdd4fSBrian Foster * Deal with the case where only small freespaces remain. Either return the
1071c63cdd4fSBrian Foster * contents of the last freespace record, or allocate space from the freelist if
1072c63cdd4fSBrian Foster * there is nothing in the tree.
1073c63cdd4fSBrian Foster */
1074c63cdd4fSBrian Foster STATIC int /* error */
xfs_alloc_ag_vextent_small(struct xfs_alloc_arg * args,struct xfs_btree_cur * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1075c63cdd4fSBrian Foster xfs_alloc_ag_vextent_small(
1076c63cdd4fSBrian Foster struct xfs_alloc_arg *args, /* allocation argument structure */
1077c63cdd4fSBrian Foster struct xfs_btree_cur *ccur, /* optional by-size cursor */
1078c63cdd4fSBrian Foster xfs_agblock_t *fbnop, /* result block number */
1079c63cdd4fSBrian Foster xfs_extlen_t *flenp, /* result length */
1080c63cdd4fSBrian Foster int *stat) /* status: 0-freelist, 1-normal/none */
1081c63cdd4fSBrian Foster {
10829798f615SChristoph Hellwig struct xfs_agf *agf = args->agbp->b_addr;
1083c63cdd4fSBrian Foster int error = 0;
1084c63cdd4fSBrian Foster xfs_agblock_t fbno = NULLAGBLOCK;
1085c63cdd4fSBrian Foster xfs_extlen_t flen = 0;
10866691cd92SBrian Foster int i = 0;
1087c63cdd4fSBrian Foster
10886691cd92SBrian Foster /*
10896691cd92SBrian Foster * If a cntbt cursor is provided, try to allocate the largest record in
10906691cd92SBrian Foster * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
10916691cd92SBrian Foster * allocation. Make sure to respect minleft even when pulling from the
10926691cd92SBrian Foster * freelist.
10936691cd92SBrian Foster */
10946691cd92SBrian Foster if (ccur)
1095c63cdd4fSBrian Foster error = xfs_btree_decrement(ccur, 0, &i);
1096c63cdd4fSBrian Foster if (error)
1097c63cdd4fSBrian Foster goto error;
1098c63cdd4fSBrian Foster if (i) {
1099c63cdd4fSBrian Foster error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1100c63cdd4fSBrian Foster if (error)
1101c63cdd4fSBrian Foster goto error;
1102f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1103f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1104f9e03706SDarrick J. Wong goto error;
1105f9e03706SDarrick J. Wong }
1106c63cdd4fSBrian Foster goto out;
1107c63cdd4fSBrian Foster }
1108c63cdd4fSBrian Foster
1109c63cdd4fSBrian Foster if (args->minlen != 1 || args->alignment != 1 ||
1110c63cdd4fSBrian Foster args->resv == XFS_AG_RESV_AGFL ||
11119798f615SChristoph Hellwig be32_to_cpu(agf->agf_flcount) <= args->minleft)
1112c63cdd4fSBrian Foster goto out;
1113c63cdd4fSBrian Foster
111449f0d84eSDave Chinner error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
111549f0d84eSDave Chinner &fbno, 0);
1116c63cdd4fSBrian Foster if (error)
1117c63cdd4fSBrian Foster goto error;
1118c63cdd4fSBrian Foster if (fbno == NULLAGBLOCK)
1119c63cdd4fSBrian Foster goto out;
1120c63cdd4fSBrian Foster
112145d06621SDave Chinner xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1122c34d570dSChristoph Hellwig (args->datatype & XFS_ALLOC_NOBUSY));
1123c63cdd4fSBrian Foster
1124c34d570dSChristoph Hellwig if (args->datatype & XFS_ALLOC_USERDATA) {
1125c63cdd4fSBrian Foster struct xfs_buf *bp;
1126c63cdd4fSBrian Foster
1127ee647f85SDarrick J. Wong error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1128ee647f85SDarrick J. Wong XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1129ee647f85SDarrick J. Wong args->mp->m_bsize, 0, &bp);
1130ee647f85SDarrick J. Wong if (error)
1131c63cdd4fSBrian Foster goto error;
1132c63cdd4fSBrian Foster xfs_trans_binval(args->tp, bp);
1133c63cdd4fSBrian Foster }
11347e36a3a6SBrian Foster *fbnop = args->agbno = fbno;
11357e36a3a6SBrian Foster *flenp = args->len = 1;
11369798f615SChristoph Hellwig if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1137f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1138f9e03706SDarrick J. Wong goto error;
1139f9e03706SDarrick J. Wong }
1140c63cdd4fSBrian Foster args->wasfromfl = 1;
1141c63cdd4fSBrian Foster trace_xfs_alloc_small_freelist(args);
1142c63cdd4fSBrian Foster
1143c63cdd4fSBrian Foster /*
1144c63cdd4fSBrian Foster * If we're feeding an AGFL block to something that doesn't live in the
1145c63cdd4fSBrian Foster * free space, we need to clear out the OWN_AG rmap.
1146c63cdd4fSBrian Foster */
1147fa9c3c19SDave Chinner error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1148c63cdd4fSBrian Foster &XFS_RMAP_OINFO_AG);
1149c63cdd4fSBrian Foster if (error)
1150c63cdd4fSBrian Foster goto error;
1151c63cdd4fSBrian Foster
1152c63cdd4fSBrian Foster *stat = 0;
1153c63cdd4fSBrian Foster return 0;
1154c63cdd4fSBrian Foster
1155c63cdd4fSBrian Foster out:
1156c63cdd4fSBrian Foster /*
1157c63cdd4fSBrian Foster * Can't do the allocation, give up.
1158c63cdd4fSBrian Foster */
1159c63cdd4fSBrian Foster if (flen < args->minlen) {
1160c63cdd4fSBrian Foster args->agbno = NULLAGBLOCK;
1161c63cdd4fSBrian Foster trace_xfs_alloc_small_notenough(args);
1162c63cdd4fSBrian Foster flen = 0;
1163c63cdd4fSBrian Foster }
1164c63cdd4fSBrian Foster *fbnop = fbno;
1165c63cdd4fSBrian Foster *flenp = flen;
1166c63cdd4fSBrian Foster *stat = 1;
1167c63cdd4fSBrian Foster trace_xfs_alloc_small_done(args);
1168c63cdd4fSBrian Foster return 0;
1169c63cdd4fSBrian Foster
1170c63cdd4fSBrian Foster error:
1171c63cdd4fSBrian Foster trace_xfs_alloc_small_error(args);
1172c63cdd4fSBrian Foster return error;
1173c63cdd4fSBrian Foster }
1174c63cdd4fSBrian Foster
1175c63cdd4fSBrian Foster /*
117630f712c9SDave Chinner * Allocate a variable extent at exactly agno/bno.
117730f712c9SDave Chinner * Extent's length (returned in *len) will be between minlen and maxlen,
117830f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
117930f712c9SDave Chinner * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
118030f712c9SDave Chinner */
118130f712c9SDave Chinner STATIC int /* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)118230f712c9SDave Chinner xfs_alloc_ag_vextent_exact(
118330f712c9SDave Chinner xfs_alloc_arg_t *args) /* allocation argument structure */
118430f712c9SDave Chinner {
11859798f615SChristoph Hellwig struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1186ae127f08SDarrick J. Wong struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1187ae127f08SDarrick J. Wong struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
118830f712c9SDave Chinner int error;
118930f712c9SDave Chinner xfs_agblock_t fbno; /* start block of found extent */
119030f712c9SDave Chinner xfs_extlen_t flen; /* length of found extent */
1191ebf55872SChristoph Hellwig xfs_agblock_t tbno; /* start block of busy extent */
1192ebf55872SChristoph Hellwig xfs_extlen_t tlen; /* length of busy extent */
1193ebf55872SChristoph Hellwig xfs_agblock_t tend; /* end block of busy extent */
119430f712c9SDave Chinner int i; /* success/failure of operation */
1195ebf55872SChristoph Hellwig unsigned busy_gen;
119630f712c9SDave Chinner
119730f712c9SDave Chinner ASSERT(args->alignment == 1);
119830f712c9SDave Chinner
119930f712c9SDave Chinner /*
120030f712c9SDave Chinner * Allocate/initialize a cursor for the by-number freespace btree.
120130f712c9SDave Chinner */
120230f712c9SDave Chinner bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1203289d38d2SDave Chinner args->pag, XFS_BTNUM_BNO);
120430f712c9SDave Chinner
120530f712c9SDave Chinner /*
120630f712c9SDave Chinner * Lookup bno and minlen in the btree (minlen is irrelevant, really).
120730f712c9SDave Chinner * Look for the closest free block <= bno, it must contain bno
120830f712c9SDave Chinner * if any free block does.
120930f712c9SDave Chinner */
121030f712c9SDave Chinner error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
121130f712c9SDave Chinner if (error)
121230f712c9SDave Chinner goto error0;
121330f712c9SDave Chinner if (!i)
121430f712c9SDave Chinner goto not_found;
121530f712c9SDave Chinner
121630f712c9SDave Chinner /*
121730f712c9SDave Chinner * Grab the freespace record.
121830f712c9SDave Chinner */
121930f712c9SDave Chinner error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
122030f712c9SDave Chinner if (error)
122130f712c9SDave Chinner goto error0;
1222f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1223f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1224f9e03706SDarrick J. Wong goto error0;
1225f9e03706SDarrick J. Wong }
122630f712c9SDave Chinner ASSERT(fbno <= args->agbno);
122730f712c9SDave Chinner
122830f712c9SDave Chinner /*
122930f712c9SDave Chinner * Check for overlapping busy extents.
123030f712c9SDave Chinner */
1231ebf55872SChristoph Hellwig tbno = fbno;
1232ebf55872SChristoph Hellwig tlen = flen;
1233ebf55872SChristoph Hellwig xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
123430f712c9SDave Chinner
123530f712c9SDave Chinner /*
123630f712c9SDave Chinner * Give up if the start of the extent is busy, or the freespace isn't
123730f712c9SDave Chinner * long enough for the minimum request.
123830f712c9SDave Chinner */
123930f712c9SDave Chinner if (tbno > args->agbno)
124030f712c9SDave Chinner goto not_found;
124130f712c9SDave Chinner if (tlen < args->minlen)
124230f712c9SDave Chinner goto not_found;
124330f712c9SDave Chinner tend = tbno + tlen;
124430f712c9SDave Chinner if (tend < args->agbno + args->minlen)
124530f712c9SDave Chinner goto not_found;
124630f712c9SDave Chinner
124730f712c9SDave Chinner /*
124830f712c9SDave Chinner * End of extent will be smaller of the freespace end and the
124930f712c9SDave Chinner * maximal requested end.
125030f712c9SDave Chinner *
125130f712c9SDave Chinner * Fix the length according to mod and prod if given.
125230f712c9SDave Chinner */
125330f712c9SDave Chinner args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
125430f712c9SDave Chinner - args->agbno;
125530f712c9SDave Chinner xfs_alloc_fix_len(args);
125630f712c9SDave Chinner ASSERT(args->agbno + args->len <= tend);
125730f712c9SDave Chinner
125830f712c9SDave Chinner /*
125930f712c9SDave Chinner * We are allocating agbno for args->len
126030f712c9SDave Chinner * Allocate/initialize a cursor for the by-size btree.
126130f712c9SDave Chinner */
126230f712c9SDave Chinner cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1263289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
12649798f615SChristoph Hellwig ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
126530f712c9SDave Chinner error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
126630f712c9SDave Chinner args->len, XFSA_FIXUP_BNO_OK);
126730f712c9SDave Chinner if (error) {
126830f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
126930f712c9SDave Chinner goto error0;
127030f712c9SDave Chinner }
127130f712c9SDave Chinner
127230f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
127330f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
127430f712c9SDave Chinner
127530f712c9SDave Chinner args->wasfromfl = 0;
127630f712c9SDave Chinner trace_xfs_alloc_exact_done(args);
127730f712c9SDave Chinner return 0;
127830f712c9SDave Chinner
127930f712c9SDave Chinner not_found:
128030f712c9SDave Chinner /* Didn't find it, return null. */
128130f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
128230f712c9SDave Chinner args->agbno = NULLAGBLOCK;
128330f712c9SDave Chinner trace_xfs_alloc_exact_notfound(args);
128430f712c9SDave Chinner return 0;
128530f712c9SDave Chinner
128630f712c9SDave Chinner error0:
128730f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
128830f712c9SDave Chinner trace_xfs_alloc_exact_error(args);
128930f712c9SDave Chinner return error;
129030f712c9SDave Chinner }
129130f712c9SDave Chinner
129230f712c9SDave Chinner /*
129378d7aabdSBrian Foster * Search a given number of btree records in a given direction. Check each
129478d7aabdSBrian Foster * record against the good extent we've already found.
129530f712c9SDave Chinner */
129630f712c9SDave Chinner STATIC int
xfs_alloc_walk_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,bool increment,bool find_one,int count,int * stat)129778d7aabdSBrian Foster xfs_alloc_walk_iter(
1298fec0afdaSBrian Foster struct xfs_alloc_arg *args,
1299fec0afdaSBrian Foster struct xfs_alloc_cur *acur,
1300fec0afdaSBrian Foster struct xfs_btree_cur *cur,
130178d7aabdSBrian Foster bool increment,
130278d7aabdSBrian Foster bool find_one, /* quit on first candidate */
130378d7aabdSBrian Foster int count, /* rec count (-1 for infinite) */
130478d7aabdSBrian Foster int *stat)
130530f712c9SDave Chinner {
130630f712c9SDave Chinner int error;
130730f712c9SDave Chinner int i;
130830f712c9SDave Chinner
130978d7aabdSBrian Foster *stat = 0;
131078d7aabdSBrian Foster
131130f712c9SDave Chinner /*
1312fec0afdaSBrian Foster * Search so long as the cursor is active or we find a better extent.
1313fec0afdaSBrian Foster * The cursor is deactivated if it extends beyond the range of the
1314fec0afdaSBrian Foster * current allocation candidate.
131530f712c9SDave Chinner */
131678d7aabdSBrian Foster while (xfs_alloc_cur_active(cur) && count) {
1317fec0afdaSBrian Foster error = xfs_alloc_cur_check(args, acur, cur, &i);
131830f712c9SDave Chinner if (error)
131930f712c9SDave Chinner return error;
132078d7aabdSBrian Foster if (i == 1) {
132178d7aabdSBrian Foster *stat = 1;
132278d7aabdSBrian Foster if (find_one)
1323fec0afdaSBrian Foster break;
132478d7aabdSBrian Foster }
1325fec0afdaSBrian Foster if (!xfs_alloc_cur_active(cur))
1326fec0afdaSBrian Foster break;
1327fec0afdaSBrian Foster
1328fec0afdaSBrian Foster if (increment)
1329fec0afdaSBrian Foster error = xfs_btree_increment(cur, 0, &i);
1330fec0afdaSBrian Foster else
1331fec0afdaSBrian Foster error = xfs_btree_decrement(cur, 0, &i);
1332fec0afdaSBrian Foster if (error)
1333fec0afdaSBrian Foster return error;
1334fec0afdaSBrian Foster if (i == 0)
1335c4aa10d0SDave Chinner cur->bc_ag.abt.active = false;
133678d7aabdSBrian Foster
133778d7aabdSBrian Foster if (count > 0)
133878d7aabdSBrian Foster count--;
1339fec0afdaSBrian Foster }
1340fec0afdaSBrian Foster
1341fec0afdaSBrian Foster return 0;
134230f712c9SDave Chinner }
134330f712c9SDave Chinner
134430f712c9SDave Chinner /*
1345dc8e69bdSBrian Foster * Search the by-bno and by-size btrees in parallel in search of an extent with
1346dc8e69bdSBrian Foster * ideal locality based on the NEAR mode ->agbno locality hint.
13470e26d5caSBrian Foster */
13480e26d5caSBrian Foster STATIC int
xfs_alloc_ag_vextent_locality(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,int * stat)1349dc8e69bdSBrian Foster xfs_alloc_ag_vextent_locality(
13500e26d5caSBrian Foster struct xfs_alloc_arg *args,
13510e26d5caSBrian Foster struct xfs_alloc_cur *acur,
13520e26d5caSBrian Foster int *stat)
13530e26d5caSBrian Foster {
13540e26d5caSBrian Foster struct xfs_btree_cur *fbcur = NULL;
13550e26d5caSBrian Foster int error;
13560e26d5caSBrian Foster int i;
13570e26d5caSBrian Foster bool fbinc;
13580e26d5caSBrian Foster
13590e26d5caSBrian Foster ASSERT(acur->len == 0);
13600e26d5caSBrian Foster
13610e26d5caSBrian Foster *stat = 0;
13620e26d5caSBrian Foster
1363dc8e69bdSBrian Foster error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1364dc8e69bdSBrian Foster if (error)
1365dc8e69bdSBrian Foster return error;
13660e26d5caSBrian Foster error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
13670e26d5caSBrian Foster if (error)
13680e26d5caSBrian Foster return error;
13690e26d5caSBrian Foster error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
13700e26d5caSBrian Foster if (error)
13710e26d5caSBrian Foster return error;
13720e26d5caSBrian Foster
13730e26d5caSBrian Foster /*
1374dc8e69bdSBrian Foster * Search the bnobt and cntbt in parallel. Search the bnobt left and
1375dc8e69bdSBrian Foster * right and lookup the closest extent to the locality hint for each
1376dc8e69bdSBrian Foster * extent size key in the cntbt. The entire search terminates
1377dc8e69bdSBrian Foster * immediately on a bnobt hit because that means we've found best case
1378dc8e69bdSBrian Foster * locality. Otherwise the search continues until the cntbt cursor runs
1379dc8e69bdSBrian Foster * off the end of the tree. If no allocation candidate is found at this
1380dc8e69bdSBrian Foster * point, give up on locality, walk backwards from the end of the cntbt
1381dc8e69bdSBrian Foster * and take the first available extent.
1382dc8e69bdSBrian Foster *
1383dc8e69bdSBrian Foster * The parallel tree searches balance each other out to provide fairly
1384dc8e69bdSBrian Foster * consistent performance for various situations. The bnobt search can
1385dc8e69bdSBrian Foster * have pathological behavior in the worst case scenario of larger
1386dc8e69bdSBrian Foster * allocation requests and fragmented free space. On the other hand, the
1387dc8e69bdSBrian Foster * bnobt is able to satisfy most smaller allocation requests much more
1388dc8e69bdSBrian Foster * quickly than the cntbt. The cntbt search can sift through fragmented
1389dc8e69bdSBrian Foster * free space and sets of free extents for larger allocation requests
1390dc8e69bdSBrian Foster * more quickly than the bnobt. Since the locality hint is just a hint
1391dc8e69bdSBrian Foster * and we don't want to scan the entire bnobt for perfect locality, the
1392dc8e69bdSBrian Foster * cntbt search essentially bounds the bnobt search such that we can
1393dc8e69bdSBrian Foster * find good enough locality at reasonable performance in most cases.
13940e26d5caSBrian Foster */
13950e26d5caSBrian Foster while (xfs_alloc_cur_active(acur->bnolt) ||
1396dc8e69bdSBrian Foster xfs_alloc_cur_active(acur->bnogt) ||
1397dc8e69bdSBrian Foster xfs_alloc_cur_active(acur->cnt)) {
1398dc8e69bdSBrian Foster
1399dc8e69bdSBrian Foster trace_xfs_alloc_cur_lookup(args);
1400dc8e69bdSBrian Foster
1401dc8e69bdSBrian Foster /*
1402dc8e69bdSBrian Foster * Search the bnobt left and right. In the case of a hit, finish
1403dc8e69bdSBrian Foster * the search in the opposite direction and we're done.
1404dc8e69bdSBrian Foster */
14050e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
14060e26d5caSBrian Foster true, 1, &i);
14070e26d5caSBrian Foster if (error)
14080e26d5caSBrian Foster return error;
14090e26d5caSBrian Foster if (i == 1) {
14100e26d5caSBrian Foster trace_xfs_alloc_cur_left(args);
14110e26d5caSBrian Foster fbcur = acur->bnogt;
14120e26d5caSBrian Foster fbinc = true;
14130e26d5caSBrian Foster break;
14140e26d5caSBrian Foster }
14150e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
14160e26d5caSBrian Foster 1, &i);
14170e26d5caSBrian Foster if (error)
14180e26d5caSBrian Foster return error;
14190e26d5caSBrian Foster if (i == 1) {
14200e26d5caSBrian Foster trace_xfs_alloc_cur_right(args);
14210e26d5caSBrian Foster fbcur = acur->bnolt;
14220e26d5caSBrian Foster fbinc = false;
14230e26d5caSBrian Foster break;
14240e26d5caSBrian Foster }
1425dc8e69bdSBrian Foster
1426dc8e69bdSBrian Foster /*
1427dc8e69bdSBrian Foster * Check the extent with best locality based on the current
1428dc8e69bdSBrian Foster * extent size search key and keep track of the best candidate.
1429dc8e69bdSBrian Foster */
1430dc8e69bdSBrian Foster error = xfs_alloc_cntbt_iter(args, acur);
1431dc8e69bdSBrian Foster if (error)
1432dc8e69bdSBrian Foster return error;
1433dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(acur->cnt)) {
1434dc8e69bdSBrian Foster trace_xfs_alloc_cur_lookup_done(args);
1435dc8e69bdSBrian Foster break;
1436dc8e69bdSBrian Foster }
14370e26d5caSBrian Foster }
14380e26d5caSBrian Foster
1439dc8e69bdSBrian Foster /*
1440dc8e69bdSBrian Foster * If we failed to find anything due to busy extents, return empty
1441dc8e69bdSBrian Foster * handed so the caller can flush and retry. If no busy extents were
1442dc8e69bdSBrian Foster * found, walk backwards from the end of the cntbt as a last resort.
1443dc8e69bdSBrian Foster */
1444dc8e69bdSBrian Foster if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1445dc8e69bdSBrian Foster error = xfs_btree_decrement(acur->cnt, 0, &i);
1446dc8e69bdSBrian Foster if (error)
1447dc8e69bdSBrian Foster return error;
1448dc8e69bdSBrian Foster if (i) {
1449c4aa10d0SDave Chinner acur->cnt->bc_ag.abt.active = true;
1450dc8e69bdSBrian Foster fbcur = acur->cnt;
1451dc8e69bdSBrian Foster fbinc = false;
1452dc8e69bdSBrian Foster }
1453dc8e69bdSBrian Foster }
1454dc8e69bdSBrian Foster
1455dc8e69bdSBrian Foster /*
1456dc8e69bdSBrian Foster * Search in the opposite direction for a better entry in the case of
1457dc8e69bdSBrian Foster * a bnobt hit or walk backwards from the end of the cntbt.
1458dc8e69bdSBrian Foster */
14590e26d5caSBrian Foster if (fbcur) {
14600e26d5caSBrian Foster error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
14610e26d5caSBrian Foster &i);
14620e26d5caSBrian Foster if (error)
14630e26d5caSBrian Foster return error;
14640e26d5caSBrian Foster }
14650e26d5caSBrian Foster
14660e26d5caSBrian Foster if (acur->len)
14670e26d5caSBrian Foster *stat = 1;
14680e26d5caSBrian Foster
14690e26d5caSBrian Foster return 0;
14700e26d5caSBrian Foster }
14710e26d5caSBrian Foster
14725113f8ecSDarrick J. Wong /* Check the last block of the cnt btree for allocations. */
14735113f8ecSDarrick J. Wong static int
xfs_alloc_ag_vextent_lastblock(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,xfs_agblock_t * bno,xfs_extlen_t * len,bool * allocated)14745113f8ecSDarrick J. Wong xfs_alloc_ag_vextent_lastblock(
14755113f8ecSDarrick J. Wong struct xfs_alloc_arg *args,
14765113f8ecSDarrick J. Wong struct xfs_alloc_cur *acur,
14775113f8ecSDarrick J. Wong xfs_agblock_t *bno,
14785113f8ecSDarrick J. Wong xfs_extlen_t *len,
14795113f8ecSDarrick J. Wong bool *allocated)
14805113f8ecSDarrick J. Wong {
14815113f8ecSDarrick J. Wong int error;
14825113f8ecSDarrick J. Wong int i;
14835113f8ecSDarrick J. Wong
14845113f8ecSDarrick J. Wong #ifdef DEBUG
14855113f8ecSDarrick J. Wong /* Randomly don't execute the first algorithm. */
14868032bf12SJason A. Donenfeld if (get_random_u32_below(2))
14875113f8ecSDarrick J. Wong return 0;
14885113f8ecSDarrick J. Wong #endif
14895113f8ecSDarrick J. Wong
14905113f8ecSDarrick J. Wong /*
14915113f8ecSDarrick J. Wong * Start from the entry that lookup found, sequence through all larger
14925113f8ecSDarrick J. Wong * free blocks. If we're actually pointing at a record smaller than
14935113f8ecSDarrick J. Wong * maxlen, go to the start of this block, and skip all those smaller
14945113f8ecSDarrick J. Wong * than minlen.
14955113f8ecSDarrick J. Wong */
149677ca1eedSDarrick J. Wong if (*len || args->alignment > 1) {
14976ca444cfSDarrick J. Wong acur->cnt->bc_levels[0].ptr = 1;
14985113f8ecSDarrick J. Wong do {
14995113f8ecSDarrick J. Wong error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
15005113f8ecSDarrick J. Wong if (error)
15015113f8ecSDarrick J. Wong return error;
1502f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1))
1503f9e03706SDarrick J. Wong return -EFSCORRUPTED;
15045113f8ecSDarrick J. Wong if (*len >= args->minlen)
15055113f8ecSDarrick J. Wong break;
15065113f8ecSDarrick J. Wong error = xfs_btree_increment(acur->cnt, 0, &i);
15075113f8ecSDarrick J. Wong if (error)
15085113f8ecSDarrick J. Wong return error;
15095113f8ecSDarrick J. Wong } while (i);
15105113f8ecSDarrick J. Wong ASSERT(*len >= args->minlen);
15115113f8ecSDarrick J. Wong if (!i)
15125113f8ecSDarrick J. Wong return 0;
15135113f8ecSDarrick J. Wong }
15145113f8ecSDarrick J. Wong
15155113f8ecSDarrick J. Wong error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
15165113f8ecSDarrick J. Wong if (error)
15175113f8ecSDarrick J. Wong return error;
15185113f8ecSDarrick J. Wong
15195113f8ecSDarrick J. Wong /*
15205113f8ecSDarrick J. Wong * It didn't work. We COULD be in a case where there's a good record
15215113f8ecSDarrick J. Wong * somewhere, so try again.
15225113f8ecSDarrick J. Wong */
15235113f8ecSDarrick J. Wong if (acur->len == 0)
15245113f8ecSDarrick J. Wong return 0;
15255113f8ecSDarrick J. Wong
15265113f8ecSDarrick J. Wong trace_xfs_alloc_near_first(args);
15275113f8ecSDarrick J. Wong *allocated = true;
15285113f8ecSDarrick J. Wong return 0;
15295113f8ecSDarrick J. Wong }
15305113f8ecSDarrick J. Wong
15310e26d5caSBrian Foster /*
153230f712c9SDave Chinner * Allocate a variable extent near bno in the allocation group agno.
153330f712c9SDave Chinner * Extent's length (returned in len) will be between minlen and maxlen,
153430f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
153530f712c9SDave Chinner * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
153630f712c9SDave Chinner */
1537f5e7dbeaSBrian Foster STATIC int
xfs_alloc_ag_vextent_near(struct xfs_alloc_arg * args,uint32_t alloc_flags)153830f712c9SDave Chinner xfs_alloc_ag_vextent_near(
15396a2a9d77SDave Chinner struct xfs_alloc_arg *args,
15406a2a9d77SDave Chinner uint32_t alloc_flags)
154130f712c9SDave Chinner {
1542f5e7dbeaSBrian Foster struct xfs_alloc_cur acur = {};
154330f712c9SDave Chinner int error; /* error code */
154430f712c9SDave Chinner int i; /* result code, temporary */
1545fec0afdaSBrian Foster xfs_agblock_t bno;
1546fec0afdaSBrian Foster xfs_extlen_t len;
154730f712c9SDave Chinner
1548cf085a1bSJoe Perches /* handle uninitialized agbno range so caller doesn't have to */
1549bfe46d4eSBrian Foster if (!args->min_agbno && !args->max_agbno)
1550bfe46d4eSBrian Foster args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1551bfe46d4eSBrian Foster ASSERT(args->min_agbno <= args->max_agbno);
1552bfe46d4eSBrian Foster
1553bfe46d4eSBrian Foster /* clamp agbno to the range if it's outside */
1554bfe46d4eSBrian Foster if (args->agbno < args->min_agbno)
1555bfe46d4eSBrian Foster args->agbno = args->min_agbno;
1556bfe46d4eSBrian Foster if (args->agbno > args->max_agbno)
1557bfe46d4eSBrian Foster args->agbno = args->max_agbno;
1558bfe46d4eSBrian Foster
15598ebbf262SDave Chinner /* Retry once quickly if we find busy extents before blocking. */
15608ebbf262SDave Chinner alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
156130f712c9SDave Chinner restart:
1562fec0afdaSBrian Foster len = 0;
156330f712c9SDave Chinner
156430f712c9SDave Chinner /*
1565f5e7dbeaSBrian Foster * Set up cursors and see if there are any free extents as big as
1566f5e7dbeaSBrian Foster * maxlen. If not, pick the last entry in the tree unless the tree is
1567f5e7dbeaSBrian Foster * empty.
156830f712c9SDave Chinner */
1569f5e7dbeaSBrian Foster error = xfs_alloc_cur_setup(args, &acur);
1570f5e7dbeaSBrian Foster if (error == -ENOSPC) {
1571fec0afdaSBrian Foster error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1572fec0afdaSBrian Foster &len, &i);
1573f5e7dbeaSBrian Foster if (error)
1574f5e7dbeaSBrian Foster goto out;
1575fec0afdaSBrian Foster if (i == 0 || len == 0) {
157630f712c9SDave Chinner trace_xfs_alloc_near_noentry(args);
1577f5e7dbeaSBrian Foster goto out;
157830f712c9SDave Chinner }
157930f712c9SDave Chinner ASSERT(i == 1);
1580f5e7dbeaSBrian Foster } else if (error) {
1581f5e7dbeaSBrian Foster goto out;
158230f712c9SDave Chinner }
158330f712c9SDave Chinner
158430f712c9SDave Chinner /*
158530f712c9SDave Chinner * First algorithm.
158630f712c9SDave Chinner * If the requested extent is large wrt the freespaces available
158730f712c9SDave Chinner * in this a.g., then the cursor will be pointing to a btree entry
158830f712c9SDave Chinner * near the right edge of the tree. If it's in the last btree leaf
158930f712c9SDave Chinner * block, then we just examine all the entries in that block
159030f712c9SDave Chinner * that are big enough, and pick the best one.
159130f712c9SDave Chinner */
15925113f8ecSDarrick J. Wong if (xfs_btree_islastblock(acur.cnt, 0)) {
15935113f8ecSDarrick J. Wong bool allocated = false;
15945113f8ecSDarrick J. Wong
15955113f8ecSDarrick J. Wong error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
15965113f8ecSDarrick J. Wong &allocated);
1597f5e7dbeaSBrian Foster if (error)
1598f5e7dbeaSBrian Foster goto out;
15995113f8ecSDarrick J. Wong if (allocated)
16005113f8ecSDarrick J. Wong goto alloc_finish;
160130f712c9SDave Chinner }
1602f5e7dbeaSBrian Foster
160330f712c9SDave Chinner /*
1604dc8e69bdSBrian Foster * Second algorithm. Combined cntbt and bnobt search to find ideal
1605dc8e69bdSBrian Foster * locality.
160630f712c9SDave Chinner */
1607dc8e69bdSBrian Foster error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1608f5e7dbeaSBrian Foster if (error)
1609f5e7dbeaSBrian Foster goto out;
161030f712c9SDave Chinner
161130f712c9SDave Chinner /*
161230f712c9SDave Chinner * If we couldn't get anything, give up.
161330f712c9SDave Chinner */
1614fec0afdaSBrian Foster if (!acur.len) {
1615d6d3aff2SBrian Foster if (acur.busy) {
16168ebbf262SDave Chinner /*
16178ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
16188ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
16198ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
16208ebbf262SDave Chinner * and the current transaction needs committing before
16218ebbf262SDave Chinner * the allocation can be retried.
16228ebbf262SDave Chinner */
162330f712c9SDave Chinner trace_xfs_alloc_near_busy(args);
16248ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
16256a2a9d77SDave Chinner acur.busy_gen, alloc_flags);
16268ebbf262SDave Chinner if (error)
16278ebbf262SDave Chinner goto out;
16288ebbf262SDave Chinner
16298ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
163030f712c9SDave Chinner goto restart;
163130f712c9SDave Chinner }
163230f712c9SDave Chinner trace_xfs_alloc_size_neither(args);
163330f712c9SDave Chinner args->agbno = NULLAGBLOCK;
1634f5e7dbeaSBrian Foster goto out;
163530f712c9SDave Chinner }
163630f712c9SDave Chinner
16375113f8ecSDarrick J. Wong alloc_finish:
1638d2968825SBrian Foster /* fix up btrees on a successful allocation */
1639d2968825SBrian Foster error = xfs_alloc_cur_finish(args, &acur);
164030f712c9SDave Chinner
1641f5e7dbeaSBrian Foster out:
1642f5e7dbeaSBrian Foster xfs_alloc_cur_close(&acur, error);
164330f712c9SDave Chinner return error;
164430f712c9SDave Chinner }
164530f712c9SDave Chinner
164630f712c9SDave Chinner /*
164730f712c9SDave Chinner * Allocate a variable extent anywhere in the allocation group agno.
164830f712c9SDave Chinner * Extent's length (returned in len) will be between minlen and maxlen,
164930f712c9SDave Chinner * and of the form k * prod + mod unless there's nothing that large.
165030f712c9SDave Chinner * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
165130f712c9SDave Chinner */
16526a2a9d77SDave Chinner static int
xfs_alloc_ag_vextent_size(struct xfs_alloc_arg * args,uint32_t alloc_flags)165330f712c9SDave Chinner xfs_alloc_ag_vextent_size(
16546a2a9d77SDave Chinner struct xfs_alloc_arg *args,
16556a2a9d77SDave Chinner uint32_t alloc_flags)
165630f712c9SDave Chinner {
16579798f615SChristoph Hellwig struct xfs_agf *agf = args->agbp->b_addr;
16586a2a9d77SDave Chinner struct xfs_btree_cur *bno_cur;
16596a2a9d77SDave Chinner struct xfs_btree_cur *cnt_cur;
166030f712c9SDave Chinner xfs_agblock_t fbno; /* start of found freespace */
166130f712c9SDave Chinner xfs_extlen_t flen; /* length of found freespace */
166230f712c9SDave Chinner xfs_agblock_t rbno; /* returned block number */
166330f712c9SDave Chinner xfs_extlen_t rlen; /* length of returned extent */
1664ebf55872SChristoph Hellwig bool busy;
1665ebf55872SChristoph Hellwig unsigned busy_gen;
16666a2a9d77SDave Chinner int error;
16676a2a9d77SDave Chinner int i;
166830f712c9SDave Chinner
16698ebbf262SDave Chinner /* Retry once quickly if we find busy extents before blocking. */
16708ebbf262SDave Chinner alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
167130f712c9SDave Chinner restart:
167230f712c9SDave Chinner /*
167330f712c9SDave Chinner * Allocate and initialize a cursor for the by-size btree.
167430f712c9SDave Chinner */
167530f712c9SDave Chinner cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1676289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
167730f712c9SDave Chinner bno_cur = NULL;
167830f712c9SDave Chinner
167930f712c9SDave Chinner /*
168030f712c9SDave Chinner * Look for an entry >= maxlen+alignment-1 blocks.
168130f712c9SDave Chinner */
168230f712c9SDave Chinner if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
168330f712c9SDave Chinner args->maxlen + args->alignment - 1, &i)))
168430f712c9SDave Chinner goto error0;
168530f712c9SDave Chinner
168630f712c9SDave Chinner /*
1687ebf55872SChristoph Hellwig * If none then we have to settle for a smaller extent. In the case that
1688ebf55872SChristoph Hellwig * there are no large extents, this will return the last entry in the
1689ebf55872SChristoph Hellwig * tree unless the tree is empty. In the case that there are only busy
1690ebf55872SChristoph Hellwig * large extents, this will return the largest small extent unless there
169130f712c9SDave Chinner * are no smaller extents available.
169230f712c9SDave Chinner */
1693ebf55872SChristoph Hellwig if (!i) {
169430f712c9SDave Chinner error = xfs_alloc_ag_vextent_small(args, cnt_cur,
169530f712c9SDave Chinner &fbno, &flen, &i);
169630f712c9SDave Chinner if (error)
169730f712c9SDave Chinner goto error0;
169830f712c9SDave Chinner if (i == 0 || flen == 0) {
169930f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
170030f712c9SDave Chinner trace_xfs_alloc_size_noentry(args);
170130f712c9SDave Chinner return 0;
170230f712c9SDave Chinner }
170330f712c9SDave Chinner ASSERT(i == 1);
1704ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1705ebf55872SChristoph Hellwig &rlen, &busy_gen);
170630f712c9SDave Chinner } else {
170730f712c9SDave Chinner /*
170830f712c9SDave Chinner * Search for a non-busy extent that is large enough.
170930f712c9SDave Chinner */
171030f712c9SDave Chinner for (;;) {
171130f712c9SDave Chinner error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
171230f712c9SDave Chinner if (error)
171330f712c9SDave Chinner goto error0;
1714f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1715f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1716f9e03706SDarrick J. Wong goto error0;
1717f9e03706SDarrick J. Wong }
171830f712c9SDave Chinner
1719ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen,
1720ebf55872SChristoph Hellwig &rbno, &rlen, &busy_gen);
172130f712c9SDave Chinner
172230f712c9SDave Chinner if (rlen >= args->maxlen)
172330f712c9SDave Chinner break;
172430f712c9SDave Chinner
172530f712c9SDave Chinner error = xfs_btree_increment(cnt_cur, 0, &i);
172630f712c9SDave Chinner if (error)
172730f712c9SDave Chinner goto error0;
17288ebbf262SDave Chinner if (i)
17298ebbf262SDave Chinner continue;
17308ebbf262SDave Chinner
173130f712c9SDave Chinner /*
17328ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
17338ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
17348ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
17358ebbf262SDave Chinner * and the current transaction needs committing before
17368ebbf262SDave Chinner * the allocation can be retried.
173730f712c9SDave Chinner */
173830f712c9SDave Chinner trace_xfs_alloc_size_busy(args);
17398ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
17406a2a9d77SDave Chinner busy_gen, alloc_flags);
17418ebbf262SDave Chinner if (error)
17428ebbf262SDave Chinner goto error0;
17438ebbf262SDave Chinner
17448ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
17458ebbf262SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
174630f712c9SDave Chinner goto restart;
174730f712c9SDave Chinner }
174830f712c9SDave Chinner }
174930f712c9SDave Chinner
175030f712c9SDave Chinner /*
175130f712c9SDave Chinner * In the first case above, we got the last entry in the
175230f712c9SDave Chinner * by-size btree. Now we check to see if the space hits maxlen
175330f712c9SDave Chinner * once aligned; if not, we search left for something better.
175430f712c9SDave Chinner * This can't happen in the second case above.
175530f712c9SDave Chinner */
175630f712c9SDave Chinner rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1757f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1758f9e03706SDarrick J. Wong rlen != 0 &&
1759f9e03706SDarrick J. Wong (rlen > flen ||
1760f9e03706SDarrick J. Wong rbno + rlen > fbno + flen))) {
1761f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1762f9e03706SDarrick J. Wong goto error0;
1763f9e03706SDarrick J. Wong }
176430f712c9SDave Chinner if (rlen < args->maxlen) {
176530f712c9SDave Chinner xfs_agblock_t bestfbno;
176630f712c9SDave Chinner xfs_extlen_t bestflen;
176730f712c9SDave Chinner xfs_agblock_t bestrbno;
176830f712c9SDave Chinner xfs_extlen_t bestrlen;
176930f712c9SDave Chinner
177030f712c9SDave Chinner bestrlen = rlen;
177130f712c9SDave Chinner bestrbno = rbno;
177230f712c9SDave Chinner bestflen = flen;
177330f712c9SDave Chinner bestfbno = fbno;
177430f712c9SDave Chinner for (;;) {
177530f712c9SDave Chinner if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
177630f712c9SDave Chinner goto error0;
177730f712c9SDave Chinner if (i == 0)
177830f712c9SDave Chinner break;
177930f712c9SDave Chinner if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
178030f712c9SDave Chinner &i)))
178130f712c9SDave Chinner goto error0;
1782f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1783f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1784f9e03706SDarrick J. Wong goto error0;
1785f9e03706SDarrick J. Wong }
178630f712c9SDave Chinner if (flen < bestrlen)
178730f712c9SDave Chinner break;
1788ebf55872SChristoph Hellwig busy = xfs_alloc_compute_aligned(args, fbno, flen,
1789ebf55872SChristoph Hellwig &rbno, &rlen, &busy_gen);
179030f712c9SDave Chinner rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1791f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1792f9e03706SDarrick J. Wong rlen != 0 &&
1793f9e03706SDarrick J. Wong (rlen > flen ||
1794f9e03706SDarrick J. Wong rbno + rlen > fbno + flen))) {
1795f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1796f9e03706SDarrick J. Wong goto error0;
1797f9e03706SDarrick J. Wong }
179830f712c9SDave Chinner if (rlen > bestrlen) {
179930f712c9SDave Chinner bestrlen = rlen;
180030f712c9SDave Chinner bestrbno = rbno;
180130f712c9SDave Chinner bestflen = flen;
180230f712c9SDave Chinner bestfbno = fbno;
180330f712c9SDave Chinner if (rlen == args->maxlen)
180430f712c9SDave Chinner break;
180530f712c9SDave Chinner }
180630f712c9SDave Chinner }
180730f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
180830f712c9SDave Chinner &i)))
180930f712c9SDave Chinner goto error0;
1810f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1811f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1812f9e03706SDarrick J. Wong goto error0;
1813f9e03706SDarrick J. Wong }
181430f712c9SDave Chinner rlen = bestrlen;
181530f712c9SDave Chinner rbno = bestrbno;
181630f712c9SDave Chinner flen = bestflen;
181730f712c9SDave Chinner fbno = bestfbno;
181830f712c9SDave Chinner }
181930f712c9SDave Chinner args->wasfromfl = 0;
182030f712c9SDave Chinner /*
182130f712c9SDave Chinner * Fix up the length.
182230f712c9SDave Chinner */
182330f712c9SDave Chinner args->len = rlen;
182430f712c9SDave Chinner if (rlen < args->minlen) {
1825ebf55872SChristoph Hellwig if (busy) {
18268ebbf262SDave Chinner /*
18278ebbf262SDave Chinner * Our only valid extents must have been busy. Flush and
18288ebbf262SDave Chinner * retry the allocation again. If we get an -EAGAIN
18298ebbf262SDave Chinner * error, we're being told that a deadlock was avoided
18308ebbf262SDave Chinner * and the current transaction needs committing before
18318ebbf262SDave Chinner * the allocation can be retried.
18328ebbf262SDave Chinner */
183330f712c9SDave Chinner trace_xfs_alloc_size_busy(args);
18348ebbf262SDave Chinner error = xfs_extent_busy_flush(args->tp, args->pag,
18358ebbf262SDave Chinner busy_gen, alloc_flags);
18368ebbf262SDave Chinner if (error)
18378ebbf262SDave Chinner goto error0;
18388ebbf262SDave Chinner
18398ebbf262SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
18408ebbf262SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
184130f712c9SDave Chinner goto restart;
184230f712c9SDave Chinner }
184330f712c9SDave Chinner goto out_nominleft;
184430f712c9SDave Chinner }
184530f712c9SDave Chinner xfs_alloc_fix_len(args);
184630f712c9SDave Chinner
184730f712c9SDave Chinner rlen = args->len;
1848f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1849f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1850f9e03706SDarrick J. Wong goto error0;
1851f9e03706SDarrick J. Wong }
185230f712c9SDave Chinner /*
185330f712c9SDave Chinner * Allocate and initialize a cursor for the by-block tree.
185430f712c9SDave Chinner */
185530f712c9SDave Chinner bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1856289d38d2SDave Chinner args->pag, XFS_BTNUM_BNO);
185730f712c9SDave Chinner if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
185830f712c9SDave Chinner rbno, rlen, XFSA_FIXUP_CNT_OK)))
185930f712c9SDave Chinner goto error0;
186030f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
186130f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
186230f712c9SDave Chinner cnt_cur = bno_cur = NULL;
186330f712c9SDave Chinner args->len = rlen;
186430f712c9SDave Chinner args->agbno = rbno;
1865f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(args->mp,
1866f9e03706SDarrick J. Wong args->agbno + args->len >
18679798f615SChristoph Hellwig be32_to_cpu(agf->agf_length))) {
1868f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1869f9e03706SDarrick J. Wong goto error0;
1870f9e03706SDarrick J. Wong }
187130f712c9SDave Chinner trace_xfs_alloc_size_done(args);
187230f712c9SDave Chinner return 0;
187330f712c9SDave Chinner
187430f712c9SDave Chinner error0:
187530f712c9SDave Chinner trace_xfs_alloc_size_error(args);
187630f712c9SDave Chinner if (cnt_cur)
187730f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
187830f712c9SDave Chinner if (bno_cur)
187930f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
188030f712c9SDave Chinner return error;
188130f712c9SDave Chinner
188230f712c9SDave Chinner out_nominleft:
188330f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
188430f712c9SDave Chinner trace_xfs_alloc_size_nominleft(args);
188530f712c9SDave Chinner args->agbno = NULLAGBLOCK;
188630f712c9SDave Chinner return 0;
188730f712c9SDave Chinner }
188830f712c9SDave Chinner
188930f712c9SDave Chinner /*
189030f712c9SDave Chinner * Free the extent starting at agno/bno for length.
189130f712c9SDave Chinner */
1892340785ccSDarrick J. Wong STATIC int
xfs_free_ag_extent(struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)189330f712c9SDave Chinner xfs_free_ag_extent(
189466e3237eSDarrick J. Wong struct xfs_trans *tp,
189566e3237eSDarrick J. Wong struct xfs_buf *agbp,
1896340785ccSDarrick J. Wong xfs_agnumber_t agno,
1897340785ccSDarrick J. Wong xfs_agblock_t bno,
1898340785ccSDarrick J. Wong xfs_extlen_t len,
189966e3237eSDarrick J. Wong const struct xfs_owner_info *oinfo,
19003fd129b6SDarrick J. Wong enum xfs_ag_resv_type type)
190130f712c9SDave Chinner {
190266e3237eSDarrick J. Wong struct xfs_mount *mp;
190366e3237eSDarrick J. Wong struct xfs_btree_cur *bno_cur;
190466e3237eSDarrick J. Wong struct xfs_btree_cur *cnt_cur;
190566e3237eSDarrick J. Wong xfs_agblock_t gtbno; /* start of right neighbor */
190666e3237eSDarrick J. Wong xfs_extlen_t gtlen; /* length of right neighbor */
190766e3237eSDarrick J. Wong xfs_agblock_t ltbno; /* start of left neighbor */
190866e3237eSDarrick J. Wong xfs_extlen_t ltlen; /* length of left neighbor */
190966e3237eSDarrick J. Wong xfs_agblock_t nbno; /* new starting block of freesp */
191030f712c9SDave Chinner xfs_extlen_t nlen; /* new length of freespace */
191166e3237eSDarrick J. Wong int haveleft; /* have a left neighbor */
191266e3237eSDarrick J. Wong int haveright; /* have a right neighbor */
191366e3237eSDarrick J. Wong int i;
191466e3237eSDarrick J. Wong int error;
1915fa9c3c19SDave Chinner struct xfs_perag *pag = agbp->b_pag;
191630f712c9SDave Chinner
1917673930c3SDarrick J. Wong bno_cur = cnt_cur = NULL;
191830f712c9SDave Chinner mp = tp->t_mountp;
1919673930c3SDarrick J. Wong
192033df3a9cSDarrick J. Wong if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1921fa9c3c19SDave Chinner error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1922673930c3SDarrick J. Wong if (error)
1923673930c3SDarrick J. Wong goto error0;
1924673930c3SDarrick J. Wong }
1925673930c3SDarrick J. Wong
192630f712c9SDave Chinner /*
192730f712c9SDave Chinner * Allocate and initialize a cursor for the by-block btree.
192830f712c9SDave Chinner */
1929289d38d2SDave Chinner bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
193030f712c9SDave Chinner /*
193130f712c9SDave Chinner * Look for a neighboring block on the left (lower block numbers)
193230f712c9SDave Chinner * that is contiguous with this space.
193330f712c9SDave Chinner */
193430f712c9SDave Chinner if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
193530f712c9SDave Chinner goto error0;
193630f712c9SDave Chinner if (haveleft) {
193730f712c9SDave Chinner /*
193830f712c9SDave Chinner * There is a block to our left.
193930f712c9SDave Chinner */
194030f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
194130f712c9SDave Chinner goto error0;
1942f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
1943f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1944f9e03706SDarrick J. Wong goto error0;
1945f9e03706SDarrick J. Wong }
194630f712c9SDave Chinner /*
194730f712c9SDave Chinner * It's not contiguous, though.
194830f712c9SDave Chinner */
194930f712c9SDave Chinner if (ltbno + ltlen < bno)
195030f712c9SDave Chinner haveleft = 0;
195130f712c9SDave Chinner else {
195230f712c9SDave Chinner /*
195330f712c9SDave Chinner * If this failure happens the request to free this
195430f712c9SDave Chinner * space was invalid, it's (partly) already free.
195530f712c9SDave Chinner * Very bad.
195630f712c9SDave Chinner */
1957f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1958f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1959f9e03706SDarrick J. Wong goto error0;
1960f9e03706SDarrick J. Wong }
196130f712c9SDave Chinner }
196230f712c9SDave Chinner }
196330f712c9SDave Chinner /*
196430f712c9SDave Chinner * Look for a neighboring block on the right (higher block numbers)
196530f712c9SDave Chinner * that is contiguous with this space.
196630f712c9SDave Chinner */
196730f712c9SDave Chinner if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
196830f712c9SDave Chinner goto error0;
196930f712c9SDave Chinner if (haveright) {
197030f712c9SDave Chinner /*
197130f712c9SDave Chinner * There is a block to our right.
197230f712c9SDave Chinner */
197330f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
197430f712c9SDave Chinner goto error0;
1975f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
1976f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1977f9e03706SDarrick J. Wong goto error0;
1978f9e03706SDarrick J. Wong }
197930f712c9SDave Chinner /*
198030f712c9SDave Chinner * It's not contiguous, though.
198130f712c9SDave Chinner */
198230f712c9SDave Chinner if (bno + len < gtbno)
198330f712c9SDave Chinner haveright = 0;
198430f712c9SDave Chinner else {
198530f712c9SDave Chinner /*
198630f712c9SDave Chinner * If this failure happens the request to free this
198730f712c9SDave Chinner * space was invalid, it's (partly) already free.
198830f712c9SDave Chinner * Very bad.
198930f712c9SDave Chinner */
1990f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1991f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
1992f9e03706SDarrick J. Wong goto error0;
1993f9e03706SDarrick J. Wong }
199430f712c9SDave Chinner }
199530f712c9SDave Chinner }
199630f712c9SDave Chinner /*
199730f712c9SDave Chinner * Now allocate and initialize a cursor for the by-size tree.
199830f712c9SDave Chinner */
1999289d38d2SDave Chinner cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
200030f712c9SDave Chinner /*
200130f712c9SDave Chinner * Have both left and right contiguous neighbors.
200230f712c9SDave Chinner * Merge all three into a single free block.
200330f712c9SDave Chinner */
200430f712c9SDave Chinner if (haveleft && haveright) {
200530f712c9SDave Chinner /*
200630f712c9SDave Chinner * Delete the old by-size entry on the left.
200730f712c9SDave Chinner */
200830f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
200930f712c9SDave Chinner goto error0;
2010f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2011f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2012f9e03706SDarrick J. Wong goto error0;
2013f9e03706SDarrick J. Wong }
201430f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
201530f712c9SDave Chinner goto error0;
2016f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2017f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2018f9e03706SDarrick J. Wong goto error0;
2019f9e03706SDarrick J. Wong }
202030f712c9SDave Chinner /*
202130f712c9SDave Chinner * Delete the old by-size entry on the right.
202230f712c9SDave Chinner */
202330f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
202430f712c9SDave Chinner goto error0;
2025f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2026f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2027f9e03706SDarrick J. Wong goto error0;
2028f9e03706SDarrick J. Wong }
202930f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
203030f712c9SDave Chinner goto error0;
2031f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2032f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2033f9e03706SDarrick J. Wong goto error0;
2034f9e03706SDarrick J. Wong }
203530f712c9SDave Chinner /*
203630f712c9SDave Chinner * Delete the old by-block entry for the right block.
203730f712c9SDave Chinner */
203830f712c9SDave Chinner if ((error = xfs_btree_delete(bno_cur, &i)))
203930f712c9SDave Chinner goto error0;
2040f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2041f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2042f9e03706SDarrick J. Wong goto error0;
2043f9e03706SDarrick J. Wong }
204430f712c9SDave Chinner /*
204530f712c9SDave Chinner * Move the by-block cursor back to the left neighbor.
204630f712c9SDave Chinner */
204730f712c9SDave Chinner if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
204830f712c9SDave Chinner goto error0;
2049f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2050f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2051f9e03706SDarrick J. Wong goto error0;
2052f9e03706SDarrick J. Wong }
205330f712c9SDave Chinner #ifdef DEBUG
205430f712c9SDave Chinner /*
205530f712c9SDave Chinner * Check that this is the right record: delete didn't
205630f712c9SDave Chinner * mangle the cursor.
205730f712c9SDave Chinner */
205830f712c9SDave Chinner {
205930f712c9SDave Chinner xfs_agblock_t xxbno;
206030f712c9SDave Chinner xfs_extlen_t xxlen;
206130f712c9SDave Chinner
206230f712c9SDave Chinner if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
206330f712c9SDave Chinner &i)))
206430f712c9SDave Chinner goto error0;
2065f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp,
2066f9e03706SDarrick J. Wong i != 1 ||
2067f9e03706SDarrick J. Wong xxbno != ltbno ||
2068f9e03706SDarrick J. Wong xxlen != ltlen)) {
2069f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2070f9e03706SDarrick J. Wong goto error0;
2071f9e03706SDarrick J. Wong }
207230f712c9SDave Chinner }
207330f712c9SDave Chinner #endif
207430f712c9SDave Chinner /*
207530f712c9SDave Chinner * Update remaining by-block entry to the new, joined block.
207630f712c9SDave Chinner */
207730f712c9SDave Chinner nbno = ltbno;
207830f712c9SDave Chinner nlen = len + ltlen + gtlen;
207930f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
208030f712c9SDave Chinner goto error0;
208130f712c9SDave Chinner }
208230f712c9SDave Chinner /*
208330f712c9SDave Chinner * Have only a left contiguous neighbor.
208430f712c9SDave Chinner * Merge it together with the new freespace.
208530f712c9SDave Chinner */
208630f712c9SDave Chinner else if (haveleft) {
208730f712c9SDave Chinner /*
208830f712c9SDave Chinner * Delete the old by-size entry on the left.
208930f712c9SDave Chinner */
209030f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
209130f712c9SDave Chinner goto error0;
2092f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2093f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2094f9e03706SDarrick J. Wong goto error0;
2095f9e03706SDarrick J. Wong }
209630f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
209730f712c9SDave Chinner goto error0;
2098f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2099f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2100f9e03706SDarrick J. Wong goto error0;
2101f9e03706SDarrick J. Wong }
210230f712c9SDave Chinner /*
210330f712c9SDave Chinner * Back up the by-block cursor to the left neighbor, and
210430f712c9SDave Chinner * update its length.
210530f712c9SDave Chinner */
210630f712c9SDave Chinner if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
210730f712c9SDave Chinner goto error0;
2108f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2109f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2110f9e03706SDarrick J. Wong goto error0;
2111f9e03706SDarrick J. Wong }
211230f712c9SDave Chinner nbno = ltbno;
211330f712c9SDave Chinner nlen = len + ltlen;
211430f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
211530f712c9SDave Chinner goto error0;
211630f712c9SDave Chinner }
211730f712c9SDave Chinner /*
211830f712c9SDave Chinner * Have only a right contiguous neighbor.
211930f712c9SDave Chinner * Merge it together with the new freespace.
212030f712c9SDave Chinner */
212130f712c9SDave Chinner else if (haveright) {
212230f712c9SDave Chinner /*
212330f712c9SDave Chinner * Delete the old by-size entry on the right.
212430f712c9SDave Chinner */
212530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
212630f712c9SDave Chinner goto error0;
2127f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2128f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2129f9e03706SDarrick J. Wong goto error0;
2130f9e03706SDarrick J. Wong }
213130f712c9SDave Chinner if ((error = xfs_btree_delete(cnt_cur, &i)))
213230f712c9SDave Chinner goto error0;
2133f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2134f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2135f9e03706SDarrick J. Wong goto error0;
2136f9e03706SDarrick J. Wong }
213730f712c9SDave Chinner /*
213830f712c9SDave Chinner * Update the starting block and length of the right
213930f712c9SDave Chinner * neighbor in the by-block tree.
214030f712c9SDave Chinner */
214130f712c9SDave Chinner nbno = bno;
214230f712c9SDave Chinner nlen = len + gtlen;
214330f712c9SDave Chinner if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
214430f712c9SDave Chinner goto error0;
214530f712c9SDave Chinner }
214630f712c9SDave Chinner /*
214730f712c9SDave Chinner * No contiguous neighbors.
214830f712c9SDave Chinner * Insert the new freespace into the by-block tree.
214930f712c9SDave Chinner */
215030f712c9SDave Chinner else {
215130f712c9SDave Chinner nbno = bno;
215230f712c9SDave Chinner nlen = len;
215330f712c9SDave Chinner if ((error = xfs_btree_insert(bno_cur, &i)))
215430f712c9SDave Chinner goto error0;
2155f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2156f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2157f9e03706SDarrick J. Wong goto error0;
2158f9e03706SDarrick J. Wong }
215930f712c9SDave Chinner }
216030f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
216130f712c9SDave Chinner bno_cur = NULL;
216230f712c9SDave Chinner /*
216330f712c9SDave Chinner * In all cases we need to insert the new freespace in the by-size tree.
216430f712c9SDave Chinner */
216530f712c9SDave Chinner if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
216630f712c9SDave Chinner goto error0;
2167f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 0)) {
2168f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2169f9e03706SDarrick J. Wong goto error0;
2170f9e03706SDarrick J. Wong }
217130f712c9SDave Chinner if ((error = xfs_btree_insert(cnt_cur, &i)))
217230f712c9SDave Chinner goto error0;
2173f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, i != 1)) {
2174f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
2175f9e03706SDarrick J. Wong goto error0;
2176f9e03706SDarrick J. Wong }
217730f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
217830f712c9SDave Chinner cnt_cur = NULL;
217930f712c9SDave Chinner
218030f712c9SDave Chinner /*
218130f712c9SDave Chinner * Update the freespace totals in the ag and superblock.
218230f712c9SDave Chinner */
218392a00544SGao Xiang error = xfs_alloc_update_counters(tp, agbp, len);
218492a00544SGao Xiang xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
218530f712c9SDave Chinner if (error)
218630f712c9SDave Chinner goto error0;
218730f712c9SDave Chinner
2188ff6d6af2SBill O'Donnell XFS_STATS_INC(mp, xs_freex);
2189ff6d6af2SBill O'Donnell XFS_STATS_ADD(mp, xs_freeb, len);
219030f712c9SDave Chinner
219121592863SBrian Foster trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
219230f712c9SDave Chinner
219330f712c9SDave Chinner return 0;
219430f712c9SDave Chinner
219530f712c9SDave Chinner error0:
219621592863SBrian Foster trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
219730f712c9SDave Chinner if (bno_cur)
219830f712c9SDave Chinner xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
219930f712c9SDave Chinner if (cnt_cur)
220030f712c9SDave Chinner xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
220130f712c9SDave Chinner return error;
220230f712c9SDave Chinner }
220330f712c9SDave Chinner
220430f712c9SDave Chinner /*
220530f712c9SDave Chinner * Visible (exported) allocation/free functions.
220630f712c9SDave Chinner * Some of these are used just by xfs_alloc_btree.c and this file.
220730f712c9SDave Chinner */
220830f712c9SDave Chinner
220930f712c9SDave Chinner /*
22107cb3efb4SDarrick J. Wong * Compute and fill in value of m_alloc_maxlevels.
221130f712c9SDave Chinner */
221230f712c9SDave Chinner void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)221330f712c9SDave Chinner xfs_alloc_compute_maxlevels(
221430f712c9SDave Chinner xfs_mount_t *mp) /* file system mount structure */
221530f712c9SDave Chinner {
22167cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
221719b54ee6SDarrick J. Wong (mp->m_sb.sb_agblocks + 1) / 2);
22180ed5f735SDarrick J. Wong ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
221930f712c9SDave Chinner }
222030f712c9SDave Chinner
222130f712c9SDave Chinner /*
22223fd129b6SDarrick J. Wong * Find the length of the longest extent in an AG. The 'need' parameter
22233fd129b6SDarrick J. Wong * specifies how much space we're going to need for the AGFL and the
22243fd129b6SDarrick J. Wong * 'reserved' parameter tells us how many blocks in this AG are reserved for
22253fd129b6SDarrick J. Wong * other callers.
222630f712c9SDave Chinner */
222730f712c9SDave Chinner xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_perag * pag,xfs_extlen_t need,xfs_extlen_t reserved)222830f712c9SDave Chinner xfs_alloc_longest_free_extent(
222950adbcb4SDave Chinner struct xfs_perag *pag,
22303fd129b6SDarrick J. Wong xfs_extlen_t need,
22313fd129b6SDarrick J. Wong xfs_extlen_t reserved)
223230f712c9SDave Chinner {
223350adbcb4SDave Chinner xfs_extlen_t delta = 0;
223430f712c9SDave Chinner
22353fd129b6SDarrick J. Wong /*
22363fd129b6SDarrick J. Wong * If the AGFL needs a recharge, we'll have to subtract that from the
22373fd129b6SDarrick J. Wong * longest extent.
22383fd129b6SDarrick J. Wong */
223930f712c9SDave Chinner if (need > pag->pagf_flcount)
224030f712c9SDave Chinner delta = need - pag->pagf_flcount;
224130f712c9SDave Chinner
22423fd129b6SDarrick J. Wong /*
22433fd129b6SDarrick J. Wong * If we cannot maintain others' reservations with space from the
22443fd129b6SDarrick J. Wong * not-longest freesp extents, we'll have to subtract /that/ from
22453fd129b6SDarrick J. Wong * the longest extent too.
22463fd129b6SDarrick J. Wong */
22473fd129b6SDarrick J. Wong if (pag->pagf_freeblks - pag->pagf_longest < reserved)
22483fd129b6SDarrick J. Wong delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
22493fd129b6SDarrick J. Wong
22503fd129b6SDarrick J. Wong /*
22513fd129b6SDarrick J. Wong * If the longest extent is long enough to satisfy all the
22523fd129b6SDarrick J. Wong * reservations and AGFL rules in place, we can return this extent.
22533fd129b6SDarrick J. Wong */
225430f712c9SDave Chinner if (pag->pagf_longest > delta)
22551c743574SDave Chinner return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
22561c743574SDave Chinner pag->pagf_longest - delta);
22573fd129b6SDarrick J. Wong
22583fd129b6SDarrick J. Wong /* Otherwise, let the caller try for 1 block if there's space. */
225930f712c9SDave Chinner return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
226030f712c9SDave Chinner }
226130f712c9SDave Chinner
22621cac233cSDarrick J. Wong /*
22631cac233cSDarrick J. Wong * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
22641cac233cSDarrick J. Wong * return the largest possible minimum length.
22651cac233cSDarrick J. Wong */
2266496817b4SDave Chinner unsigned int
xfs_alloc_min_freelist(struct xfs_mount * mp,struct xfs_perag * pag)2267496817b4SDave Chinner xfs_alloc_min_freelist(
2268496817b4SDave Chinner struct xfs_mount *mp,
2269496817b4SDave Chinner struct xfs_perag *pag)
2270496817b4SDave Chinner {
22711cac233cSDarrick J. Wong /* AG btrees have at least 1 level. */
22721cac233cSDarrick J. Wong static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
22731cac233cSDarrick J. Wong const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2274496817b4SDave Chinner unsigned int min_free;
2275496817b4SDave Chinner
22767cb3efb4SDarrick J. Wong ASSERT(mp->m_alloc_maxlevels > 0);
22771cac233cSDarrick J. Wong
2278*0838177bSOmar Sandoval /*
2279*0838177bSOmar Sandoval * For a btree shorter than the maximum height, the worst case is that
2280*0838177bSOmar Sandoval * every level gets split and a new level is added, then while inserting
2281*0838177bSOmar Sandoval * another entry to refill the AGFL, every level under the old root gets
2282*0838177bSOmar Sandoval * split again. This is:
2283*0838177bSOmar Sandoval *
2284*0838177bSOmar Sandoval * (full height split reservation) + (AGFL refill split height)
2285*0838177bSOmar Sandoval * = (current height + 1) + (current height - 1)
2286*0838177bSOmar Sandoval * = (new height) + (new height - 2)
2287*0838177bSOmar Sandoval * = 2 * new height - 2
2288*0838177bSOmar Sandoval *
2289*0838177bSOmar Sandoval * For a btree of maximum height, the worst case is that every level
2290*0838177bSOmar Sandoval * under the root gets split, then while inserting another entry to
2291*0838177bSOmar Sandoval * refill the AGFL, every level under the root gets split again. This is
2292*0838177bSOmar Sandoval * also:
2293*0838177bSOmar Sandoval *
2294*0838177bSOmar Sandoval * 2 * (current height - 1)
2295*0838177bSOmar Sandoval * = 2 * (new height - 1)
2296*0838177bSOmar Sandoval * = 2 * new height - 2
2297*0838177bSOmar Sandoval */
2298*0838177bSOmar Sandoval
2299496817b4SDave Chinner /* space needed by-bno freespace btree */
23001cac233cSDarrick J. Wong min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2301*0838177bSOmar Sandoval mp->m_alloc_maxlevels) * 2 - 2;
2302496817b4SDave Chinner /* space needed by-size freespace btree */
23031cac233cSDarrick J. Wong min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2304*0838177bSOmar Sandoval mp->m_alloc_maxlevels) * 2 - 2;
230552548852SDarrick J. Wong /* space needed reverse mapping used space btree */
2306ebd9027dSDave Chinner if (xfs_has_rmapbt(mp))
23071cac233cSDarrick J. Wong min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2308*0838177bSOmar Sandoval mp->m_rmap_maxlevels) * 2 - 2;
2309496817b4SDave Chinner
2310496817b4SDave Chinner return min_free;
2311496817b4SDave Chinner }
2312496817b4SDave Chinner
231330f712c9SDave Chinner /*
231472d55285SDave Chinner * Check if the operation we are fixing up the freelist for should go ahead or
231572d55285SDave Chinner * not. If we are freeing blocks, we always allow it, otherwise the allocation
231672d55285SDave Chinner * is dependent on whether the size and shape of free space available will
231772d55285SDave Chinner * permit the requested allocation to take place.
231872d55285SDave Chinner */
231972d55285SDave Chinner static bool
xfs_alloc_space_available(struct xfs_alloc_arg * args,xfs_extlen_t min_free,int flags)232072d55285SDave Chinner xfs_alloc_space_available(
232172d55285SDave Chinner struct xfs_alloc_arg *args,
232272d55285SDave Chinner xfs_extlen_t min_free,
232372d55285SDave Chinner int flags)
232472d55285SDave Chinner {
232572d55285SDave Chinner struct xfs_perag *pag = args->pag;
232612ef8301SChristoph Hellwig xfs_extlen_t alloc_len, longest;
23273fd129b6SDarrick J. Wong xfs_extlen_t reservation; /* blocks that are still reserved */
232872d55285SDave Chinner int available;
23291ca89fbcSBrian Foster xfs_extlen_t agflcount;
233072d55285SDave Chinner
233172d55285SDave Chinner if (flags & XFS_ALLOC_FLAG_FREEING)
233272d55285SDave Chinner return true;
233372d55285SDave Chinner
23343fd129b6SDarrick J. Wong reservation = xfs_ag_resv_needed(pag, args->resv);
23353fd129b6SDarrick J. Wong
233672d55285SDave Chinner /* do we have enough contiguous free space for the allocation? */
233712ef8301SChristoph Hellwig alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2338a1f69417SEric Sandeen longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
233912ef8301SChristoph Hellwig if (longest < alloc_len)
234072d55285SDave Chinner return false;
234172d55285SDave Chinner
23421ca89fbcSBrian Foster /*
23431ca89fbcSBrian Foster * Do we have enough free space remaining for the allocation? Don't
23441ca89fbcSBrian Foster * account extra agfl blocks because we are about to defer free them,
23451ca89fbcSBrian Foster * making them unavailable until the current transaction commits.
23461ca89fbcSBrian Foster */
23471ca89fbcSBrian Foster agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
23481ca89fbcSBrian Foster available = (int)(pag->pagf_freeblks + agflcount -
234954fee133SChristoph Hellwig reservation - min_free - args->minleft);
235012ef8301SChristoph Hellwig if (available < (int)max(args->total, alloc_len))
235172d55285SDave Chinner return false;
235272d55285SDave Chinner
235354fee133SChristoph Hellwig /*
235454fee133SChristoph Hellwig * Clamp maxlen to the amount of free space available for the actual
235554fee133SChristoph Hellwig * extent allocation.
235654fee133SChristoph Hellwig */
235754fee133SChristoph Hellwig if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
235854fee133SChristoph Hellwig args->maxlen = available;
235954fee133SChristoph Hellwig ASSERT(args->maxlen > 0);
236054fee133SChristoph Hellwig ASSERT(args->maxlen >= args->minlen);
236154fee133SChristoph Hellwig }
236254fee133SChristoph Hellwig
236372d55285SDave Chinner return true;
236472d55285SDave Chinner }
236572d55285SDave Chinner
23664223f659SBrian Foster int
xfs_free_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_buf * agbp,struct xfs_owner_info * oinfo)23674223f659SBrian Foster xfs_free_agfl_block(
23684223f659SBrian Foster struct xfs_trans *tp,
23694223f659SBrian Foster xfs_agnumber_t agno,
23704223f659SBrian Foster xfs_agblock_t agbno,
23714223f659SBrian Foster struct xfs_buf *agbp,
23724223f659SBrian Foster struct xfs_owner_info *oinfo)
23734223f659SBrian Foster {
23744223f659SBrian Foster int error;
23754223f659SBrian Foster struct xfs_buf *bp;
23764223f659SBrian Foster
23774223f659SBrian Foster error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
23784223f659SBrian Foster XFS_AG_RESV_AGFL);
23794223f659SBrian Foster if (error)
23804223f659SBrian Foster return error;
23814223f659SBrian Foster
2382ee647f85SDarrick J. Wong error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2383ee647f85SDarrick J. Wong XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2384ee647f85SDarrick J. Wong tp->t_mountp->m_bsize, 0, &bp);
2385ee647f85SDarrick J. Wong if (error)
2386ee647f85SDarrick J. Wong return error;
23874223f659SBrian Foster xfs_trans_binval(tp, bp);
23884223f659SBrian Foster
23894223f659SBrian Foster return 0;
23904223f659SBrian Foster }
23914223f659SBrian Foster
239230f712c9SDave Chinner /*
2393e0a8de7dSDave Chinner * Check the agfl fields of the agf for inconsistency or corruption.
2394e0a8de7dSDave Chinner *
2395e0a8de7dSDave Chinner * The original purpose was to detect an agfl header padding mismatch between
2396e0a8de7dSDave Chinner * current and early v5 kernels. This problem manifests as a 1-slot size
2397e0a8de7dSDave Chinner * difference between the on-disk flcount and the active [first, last] range of
2398e0a8de7dSDave Chinner * a wrapped agfl.
2399e0a8de7dSDave Chinner *
2400e0a8de7dSDave Chinner * However, we need to use these same checks to catch agfl count corruptions
2401e0a8de7dSDave Chinner * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2402e0a8de7dSDave Chinner * way, we need to reset the agfl and warn the user.
2403a27ba260SBrian Foster *
2404a27ba260SBrian Foster * Return true if a reset is required before the agfl can be used, false
2405a27ba260SBrian Foster * otherwise.
2406a27ba260SBrian Foster */
2407a27ba260SBrian Foster static bool
xfs_agfl_needs_reset(struct xfs_mount * mp,struct xfs_agf * agf)2408a27ba260SBrian Foster xfs_agfl_needs_reset(
2409a27ba260SBrian Foster struct xfs_mount *mp,
2410a27ba260SBrian Foster struct xfs_agf *agf)
2411a27ba260SBrian Foster {
2412a27ba260SBrian Foster uint32_t f = be32_to_cpu(agf->agf_flfirst);
2413a27ba260SBrian Foster uint32_t l = be32_to_cpu(agf->agf_fllast);
2414a27ba260SBrian Foster uint32_t c = be32_to_cpu(agf->agf_flcount);
2415a27ba260SBrian Foster int agfl_size = xfs_agfl_size(mp);
2416a27ba260SBrian Foster int active;
2417a27ba260SBrian Foster
2418a27ba260SBrian Foster /*
2419a27ba260SBrian Foster * The agf read verifier catches severe corruption of these fields.
2420a27ba260SBrian Foster * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2421a27ba260SBrian Foster * the verifier allows it.
2422a27ba260SBrian Foster */
2423a27ba260SBrian Foster if (f >= agfl_size || l >= agfl_size)
2424a27ba260SBrian Foster return true;
2425a27ba260SBrian Foster if (c > agfl_size)
2426a27ba260SBrian Foster return true;
2427a27ba260SBrian Foster
2428a27ba260SBrian Foster /*
2429a27ba260SBrian Foster * Check consistency between the on-disk count and the active range. An
2430a27ba260SBrian Foster * agfl padding mismatch manifests as an inconsistent flcount.
2431a27ba260SBrian Foster */
2432a27ba260SBrian Foster if (c && l >= f)
2433a27ba260SBrian Foster active = l - f + 1;
2434a27ba260SBrian Foster else if (c)
2435a27ba260SBrian Foster active = agfl_size - f + l + 1;
2436a27ba260SBrian Foster else
2437a27ba260SBrian Foster active = 0;
2438a27ba260SBrian Foster
2439a27ba260SBrian Foster return active != c;
2440a27ba260SBrian Foster }
2441a27ba260SBrian Foster
2442a27ba260SBrian Foster /*
2443a27ba260SBrian Foster * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2444a27ba260SBrian Foster * agfl content cannot be trusted. Warn the user that a repair is required to
2445a27ba260SBrian Foster * recover leaked blocks.
2446a27ba260SBrian Foster *
2447a27ba260SBrian Foster * The purpose of this mechanism is to handle filesystems affected by the agfl
2448a27ba260SBrian Foster * header padding mismatch problem. A reset keeps the filesystem online with a
2449a27ba260SBrian Foster * relatively minor free space accounting inconsistency rather than suffer the
2450a27ba260SBrian Foster * inevitable crash from use of an invalid agfl block.
2451a27ba260SBrian Foster */
2452a27ba260SBrian Foster static void
xfs_agfl_reset(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag)2453a27ba260SBrian Foster xfs_agfl_reset(
2454a27ba260SBrian Foster struct xfs_trans *tp,
2455a27ba260SBrian Foster struct xfs_buf *agbp,
2456a27ba260SBrian Foster struct xfs_perag *pag)
2457a27ba260SBrian Foster {
2458a27ba260SBrian Foster struct xfs_mount *mp = tp->t_mountp;
24599798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
2460a27ba260SBrian Foster
24617ac2ff8bSDave Chinner ASSERT(xfs_perag_agfl_needs_reset(pag));
2462a27ba260SBrian Foster trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2463a27ba260SBrian Foster
2464a27ba260SBrian Foster xfs_warn(mp,
2465a27ba260SBrian Foster "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2466a27ba260SBrian Foster "Please unmount and run xfs_repair.",
2467a27ba260SBrian Foster pag->pag_agno, pag->pagf_flcount);
2468a27ba260SBrian Foster
2469a27ba260SBrian Foster agf->agf_flfirst = 0;
2470a27ba260SBrian Foster agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2471a27ba260SBrian Foster agf->agf_flcount = 0;
2472a27ba260SBrian Foster xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2473a27ba260SBrian Foster XFS_AGF_FLCOUNT);
2474a27ba260SBrian Foster
2475a27ba260SBrian Foster pag->pagf_flcount = 0;
24767ac2ff8bSDave Chinner clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2477a27ba260SBrian Foster }
2478a27ba260SBrian Foster
2479a27ba260SBrian Foster /*
2480f8f2835aSBrian Foster * Defer an AGFL block free. This is effectively equivalent to
2481c201d9caSDarrick J. Wong * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2482f8f2835aSBrian Foster *
2483f8f2835aSBrian Foster * Deferring AGFL frees helps prevent log reservation overruns due to too many
2484f8f2835aSBrian Foster * allocation operations in a transaction. AGFL frees are prone to this problem
2485f8f2835aSBrian Foster * because for one they are always freed one at a time. Further, an immediate
2486f8f2835aSBrian Foster * AGFL block free can cause a btree join and require another block free before
2487f8f2835aSBrian Foster * the real allocation can proceed. Deferring the free disconnects freeing up
2488f8f2835aSBrian Foster * the AGFL slot from freeing the block.
2489f8f2835aSBrian Foster */
24907dfee17bSDave Chinner static int
xfs_defer_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_owner_info * oinfo)2491f8f2835aSBrian Foster xfs_defer_agfl_block(
24920f37d178SBrian Foster struct xfs_trans *tp,
2493f8f2835aSBrian Foster xfs_agnumber_t agno,
24942bed0d82SDave Chinner xfs_agblock_t agbno,
2495f8f2835aSBrian Foster struct xfs_owner_info *oinfo)
2496f8f2835aSBrian Foster {
24970f37d178SBrian Foster struct xfs_mount *mp = tp->t_mountp;
2498578c714bSDarrick J. Wong struct xfs_extent_free_item *xefi;
24992bed0d82SDave Chinner xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2500f8f2835aSBrian Foster
2501c201d9caSDarrick J. Wong ASSERT(xfs_extfree_item_cache != NULL);
2502f8f2835aSBrian Foster ASSERT(oinfo != NULL);
2503f8f2835aSBrian Foster
25042bed0d82SDave Chinner if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
25052bed0d82SDave Chinner return -EFSCORRUPTED;
25062bed0d82SDave Chinner
2507578c714bSDarrick J. Wong xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
25083050bd0bSCarlos Maiolino GFP_KERNEL | __GFP_NOFAIL);
25092bed0d82SDave Chinner xefi->xefi_startblock = fsbno;
2510578c714bSDarrick J. Wong xefi->xefi_blockcount = 1;
2511578c714bSDarrick J. Wong xefi->xefi_owner = oinfo->oi_owner;
2512b742d7b4SDave Chinner xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2513f8f2835aSBrian Foster
2514f8f2835aSBrian Foster trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2515f8f2835aSBrian Foster
2516f6b38463SDarrick J. Wong xfs_extent_free_get_group(mp, xefi);
2517578c714bSDarrick J. Wong xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
25187dfee17bSDave Chinner return 0;
2519f8f2835aSBrian Foster }
2520f8f2835aSBrian Foster
2521c201d9caSDarrick J. Wong /*
2522c201d9caSDarrick J. Wong * Add the extent to the list of extents to be free at transaction end.
2523c201d9caSDarrick J. Wong * The list is maintained sorted (by block number).
2524c201d9caSDarrick J. Wong */
25257dfee17bSDave Chinner int
__xfs_free_extent_later(struct xfs_trans * tp,xfs_fsblock_t bno,xfs_filblks_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)2526c201d9caSDarrick J. Wong __xfs_free_extent_later(
2527c201d9caSDarrick J. Wong struct xfs_trans *tp,
2528c201d9caSDarrick J. Wong xfs_fsblock_t bno,
2529c201d9caSDarrick J. Wong xfs_filblks_t len,
2530c201d9caSDarrick J. Wong const struct xfs_owner_info *oinfo,
2531b742d7b4SDave Chinner enum xfs_ag_resv_type type,
2532c201d9caSDarrick J. Wong bool skip_discard)
2533c201d9caSDarrick J. Wong {
2534578c714bSDarrick J. Wong struct xfs_extent_free_item *xefi;
2535c201d9caSDarrick J. Wong struct xfs_mount *mp = tp->t_mountp;
2536f6b38463SDarrick J. Wong #ifdef DEBUG
2537c201d9caSDarrick J. Wong xfs_agnumber_t agno;
2538c201d9caSDarrick J. Wong xfs_agblock_t agbno;
2539c201d9caSDarrick J. Wong
2540c201d9caSDarrick J. Wong ASSERT(bno != NULLFSBLOCK);
2541c201d9caSDarrick J. Wong ASSERT(len > 0);
254295f0b95eSChandan Babu R ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2543c201d9caSDarrick J. Wong ASSERT(!isnullstartblock(bno));
2544c201d9caSDarrick J. Wong agno = XFS_FSB_TO_AGNO(mp, bno);
2545c201d9caSDarrick J. Wong agbno = XFS_FSB_TO_AGBNO(mp, bno);
2546c201d9caSDarrick J. Wong ASSERT(agno < mp->m_sb.sb_agcount);
2547c201d9caSDarrick J. Wong ASSERT(agbno < mp->m_sb.sb_agblocks);
2548c201d9caSDarrick J. Wong ASSERT(len < mp->m_sb.sb_agblocks);
2549c201d9caSDarrick J. Wong ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2550c201d9caSDarrick J. Wong #endif
2551c201d9caSDarrick J. Wong ASSERT(xfs_extfree_item_cache != NULL);
2552b742d7b4SDave Chinner ASSERT(type != XFS_AG_RESV_AGFL);
2553c201d9caSDarrick J. Wong
25547dfee17bSDave Chinner if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
25557dfee17bSDave Chinner return -EFSCORRUPTED;
25567dfee17bSDave Chinner
2557578c714bSDarrick J. Wong xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2558c201d9caSDarrick J. Wong GFP_KERNEL | __GFP_NOFAIL);
2559578c714bSDarrick J. Wong xefi->xefi_startblock = bno;
2560578c714bSDarrick J. Wong xefi->xefi_blockcount = (xfs_extlen_t)len;
2561b742d7b4SDave Chinner xefi->xefi_agresv = type;
2562b3b5ff41SDarrick J. Wong if (skip_discard)
2563578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2564b3b5ff41SDarrick J. Wong if (oinfo) {
2565b3b5ff41SDarrick J. Wong ASSERT(oinfo->oi_offset == 0);
2566b3b5ff41SDarrick J. Wong
2567b3b5ff41SDarrick J. Wong if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2568578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2569b3b5ff41SDarrick J. Wong if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2570578c714bSDarrick J. Wong xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2571578c714bSDarrick J. Wong xefi->xefi_owner = oinfo->oi_owner;
2572b3b5ff41SDarrick J. Wong } else {
2573578c714bSDarrick J. Wong xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2574b3b5ff41SDarrick J. Wong }
2575f6b38463SDarrick J. Wong trace_xfs_bmap_free_defer(mp,
2576c201d9caSDarrick J. Wong XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2577c201d9caSDarrick J. Wong XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2578f6b38463SDarrick J. Wong
2579f6b38463SDarrick J. Wong xfs_extent_free_get_group(mp, xefi);
2580578c714bSDarrick J. Wong xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
25817dfee17bSDave Chinner return 0;
2582c201d9caSDarrick J. Wong }
2583c201d9caSDarrick J. Wong
258430151967SChandan Babu R #ifdef DEBUG
258530151967SChandan Babu R /*
258630151967SChandan Babu R * Check if an AGF has a free extent record whose length is equal to
258730151967SChandan Babu R * args->minlen.
258830151967SChandan Babu R */
258930151967SChandan Babu R STATIC int
xfs_exact_minlen_extent_available(struct xfs_alloc_arg * args,struct xfs_buf * agbp,int * stat)259030151967SChandan Babu R xfs_exact_minlen_extent_available(
259130151967SChandan Babu R struct xfs_alloc_arg *args,
259230151967SChandan Babu R struct xfs_buf *agbp,
259330151967SChandan Babu R int *stat)
259430151967SChandan Babu R {
259530151967SChandan Babu R struct xfs_btree_cur *cnt_cur;
259630151967SChandan Babu R xfs_agblock_t fbno;
259730151967SChandan Babu R xfs_extlen_t flen;
259830151967SChandan Babu R int error = 0;
259930151967SChandan Babu R
260030151967SChandan Babu R cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2601289d38d2SDave Chinner args->pag, XFS_BTNUM_CNT);
260230151967SChandan Babu R error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
260330151967SChandan Babu R if (error)
260430151967SChandan Babu R goto out;
260530151967SChandan Babu R
260630151967SChandan Babu R if (*stat == 0) {
260730151967SChandan Babu R error = -EFSCORRUPTED;
260830151967SChandan Babu R goto out;
260930151967SChandan Babu R }
261030151967SChandan Babu R
261130151967SChandan Babu R error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
261230151967SChandan Babu R if (error)
261330151967SChandan Babu R goto out;
261430151967SChandan Babu R
261530151967SChandan Babu R if (*stat == 1 && flen != args->minlen)
261630151967SChandan Babu R *stat = 0;
261730151967SChandan Babu R
261830151967SChandan Babu R out:
261930151967SChandan Babu R xfs_btree_del_cursor(cnt_cur, error);
262030151967SChandan Babu R
262130151967SChandan Babu R return error;
262230151967SChandan Babu R }
262330151967SChandan Babu R #endif
262430151967SChandan Babu R
2625f8f2835aSBrian Foster /*
262630f712c9SDave Chinner * Decide whether to use this allocation group for this allocation.
262730f712c9SDave Chinner * If so, fix up the btree freelist's size.
262830f712c9SDave Chinner */
26292e9101daSDarrick J. Wong int /* error */
xfs_alloc_fix_freelist(struct xfs_alloc_arg * args,uint32_t alloc_flags)263030f712c9SDave Chinner xfs_alloc_fix_freelist(
2631396503fcSDave Chinner struct xfs_alloc_arg *args, /* allocation argument structure */
26326a2a9d77SDave Chinner uint32_t alloc_flags)
263330f712c9SDave Chinner {
2634396503fcSDave Chinner struct xfs_mount *mp = args->mp;
2635396503fcSDave Chinner struct xfs_perag *pag = args->pag;
2636396503fcSDave Chinner struct xfs_trans *tp = args->tp;
2637396503fcSDave Chinner struct xfs_buf *agbp = NULL;
2638396503fcSDave Chinner struct xfs_buf *agflbp = NULL;
2639396503fcSDave Chinner struct xfs_alloc_arg targs; /* local allocation arguments */
264030f712c9SDave Chinner xfs_agblock_t bno; /* freelist block */
264130f712c9SDave Chinner xfs_extlen_t need; /* total blocks needed in freelist */
2642c184f855SJan Kara int error = 0;
264330f712c9SDave Chinner
2644362f5e74SBrian Foster /* deferred ops (AGFL block frees) require permanent transactions */
2645362f5e74SBrian Foster ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2646362f5e74SBrian Foster
26477ac2ff8bSDave Chinner if (!xfs_perag_initialised_agf(pag)) {
26486a2a9d77SDave Chinner error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2649f48e2df8SDarrick J. Wong if (error) {
2650f48e2df8SDarrick J. Wong /* Couldn't lock the AGF so skip this AG. */
2651f48e2df8SDarrick J. Wong if (error == -EAGAIN)
2652f48e2df8SDarrick J. Wong error = 0;
2653396503fcSDave Chinner goto out_no_agbp;
265430f712c9SDave Chinner }
2655396503fcSDave Chinner }
265630f712c9SDave Chinner
265730f712c9SDave Chinner /*
2658396503fcSDave Chinner * If this is a metadata preferred pag and we are user data then try
2659396503fcSDave Chinner * somewhere else if we are not being asked to try harder at this
2660396503fcSDave Chinner * point
266130f712c9SDave Chinner */
26627ac2ff8bSDave Chinner if (xfs_perag_prefers_metadata(pag) &&
26637ac2ff8bSDave Chinner (args->datatype & XFS_ALLOC_USERDATA) &&
26646a2a9d77SDave Chinner (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
26656a2a9d77SDave Chinner ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2666396503fcSDave Chinner goto out_agbp_relse;
266730f712c9SDave Chinner }
266830f712c9SDave Chinner
2669496817b4SDave Chinner need = xfs_alloc_min_freelist(mp, pag);
26706a2a9d77SDave Chinner if (!xfs_alloc_space_available(args, need, alloc_flags |
267154fee133SChristoph Hellwig XFS_ALLOC_FLAG_CHECK))
2672396503fcSDave Chinner goto out_agbp_relse;
267330f712c9SDave Chinner
267430f712c9SDave Chinner /*
267530f712c9SDave Chinner * Get the a.g. freespace buffer.
267630f712c9SDave Chinner * Can fail if we're not blocking on locks, and it's held.
267730f712c9SDave Chinner */
2678396503fcSDave Chinner if (!agbp) {
26796a2a9d77SDave Chinner error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2680f48e2df8SDarrick J. Wong if (error) {
2681f48e2df8SDarrick J. Wong /* Couldn't lock the AGF so skip this AG. */
2682f48e2df8SDarrick J. Wong if (error == -EAGAIN)
2683f48e2df8SDarrick J. Wong error = 0;
2684396503fcSDave Chinner goto out_no_agbp;
268530f712c9SDave Chinner }
268630f712c9SDave Chinner }
268750adbcb4SDave Chinner
2688a27ba260SBrian Foster /* reset a padding mismatched agfl before final free space check */
26897ac2ff8bSDave Chinner if (xfs_perag_agfl_needs_reset(pag))
2690a27ba260SBrian Foster xfs_agfl_reset(tp, agbp, pag);
2691a27ba260SBrian Foster
269250adbcb4SDave Chinner /* If there isn't enough total space or single-extent, reject it. */
2693496817b4SDave Chinner need = xfs_alloc_min_freelist(mp, pag);
26946a2a9d77SDave Chinner if (!xfs_alloc_space_available(args, need, alloc_flags))
2695396503fcSDave Chinner goto out_agbp_relse;
269672d55285SDave Chinner
269730151967SChandan Babu R #ifdef DEBUG
269830151967SChandan Babu R if (args->alloc_minlen_only) {
269930151967SChandan Babu R int stat;
270030151967SChandan Babu R
270130151967SChandan Babu R error = xfs_exact_minlen_extent_available(args, agbp, &stat);
270230151967SChandan Babu R if (error || !stat)
270330151967SChandan Babu R goto out_agbp_relse;
270430151967SChandan Babu R }
270530151967SChandan Babu R #endif
270630f712c9SDave Chinner /*
270730f712c9SDave Chinner * Make the freelist shorter if it's too long.
270850adbcb4SDave Chinner *
2709396503fcSDave Chinner * Note that from this point onwards, we will always release the agf and
2710396503fcSDave Chinner * agfl buffers on error. This handles the case where we error out and
2711396503fcSDave Chinner * the buffers are clean or may not have been joined to the transaction
2712396503fcSDave Chinner * and hence need to be released manually. If they have been joined to
2713396503fcSDave Chinner * the transaction, then xfs_trans_brelse() will handle them
2714396503fcSDave Chinner * appropriately based on the recursion count and dirty state of the
2715396503fcSDave Chinner * buffer.
2716396503fcSDave Chinner *
271750adbcb4SDave Chinner * XXX (dgc): When we have lots of free space, does this buy us
271850adbcb4SDave Chinner * anything other than extra overhead when we need to put more blocks
271950adbcb4SDave Chinner * back on the free list? Maybe we should only do this when space is
272050adbcb4SDave Chinner * getting low or the AGFL is more than half full?
272104f13060SDarrick J. Wong *
272204f13060SDarrick J. Wong * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
272304f13060SDarrick J. Wong * big; the NORMAP flag prevents AGFL expand/shrink operations from
272404f13060SDarrick J. Wong * updating the rmapbt. Both flags are used in xfs_repair while we're
272504f13060SDarrick J. Wong * rebuilding the rmapbt, and neither are used by the kernel. They're
272604f13060SDarrick J. Wong * both required to ensure that rmaps are correctly recorded for the
272704f13060SDarrick J. Wong * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
272804f13060SDarrick J. Wong * repair/rmap.c in xfsprogs for details.
272930f712c9SDave Chinner */
273004f13060SDarrick J. Wong memset(&targs, 0, sizeof(targs));
27317280fedaSDarrick J. Wong /* struct copy below */
27326a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
27337280fedaSDarrick J. Wong targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
273404f13060SDarrick J. Wong else
27357280fedaSDarrick J. Wong targs.oinfo = XFS_RMAP_OINFO_AG;
27366a2a9d77SDave Chinner while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
27376a2a9d77SDave Chinner pag->pagf_flcount > need) {
273849f0d84eSDave Chinner error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
273930f712c9SDave Chinner if (error)
2740396503fcSDave Chinner goto out_agbp_relse;
27414223f659SBrian Foster
2742c03edc9eSBrian Foster /* defer agfl frees */
27437dfee17bSDave Chinner error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
27447dfee17bSDave Chinner if (error)
27457dfee17bSDave Chinner goto out_agbp_relse;
2746f8f2835aSBrian Foster }
274750adbcb4SDave Chinner
274830f712c9SDave Chinner targs.tp = tp;
274930f712c9SDave Chinner targs.mp = mp;
275030f712c9SDave Chinner targs.agbp = agbp;
275130f712c9SDave Chinner targs.agno = args->agno;
27523fd129b6SDarrick J. Wong targs.alignment = targs.minlen = targs.prod = 1;
275330f712c9SDave Chinner targs.pag = pag;
2754cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
275550adbcb4SDave Chinner if (error)
2756396503fcSDave Chinner goto out_agbp_relse;
275750adbcb4SDave Chinner
275850adbcb4SDave Chinner /* Make the freelist longer if it's too short. */
275950adbcb4SDave Chinner while (pag->pagf_flcount < need) {
276030f712c9SDave Chinner targs.agbno = 0;
276150adbcb4SDave Chinner targs.maxlen = need - pag->pagf_flcount;
27620ab32086SBrian Foster targs.resv = XFS_AG_RESV_AGFL;
276350adbcb4SDave Chinner
276450adbcb4SDave Chinner /* Allocate as many blocks as possible at once. */
27656a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2766396503fcSDave Chinner if (error)
2767396503fcSDave Chinner goto out_agflbp_relse;
2768396503fcSDave Chinner
276930f712c9SDave Chinner /*
277030f712c9SDave Chinner * Stop if we run out. Won't happen if callers are obeying
277130f712c9SDave Chinner * the restrictions correctly. Can happen for free calls
277230f712c9SDave Chinner * on a completely full ag.
277330f712c9SDave Chinner */
277430f712c9SDave Chinner if (targs.agbno == NULLAGBLOCK) {
27756a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
277630f712c9SDave Chinner break;
2777396503fcSDave Chinner goto out_agflbp_relse;
277830f712c9SDave Chinner }
27794811c933SDave Chinner
27804811c933SDave Chinner if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
27814811c933SDave Chinner error = xfs_rmap_alloc(tp, agbp, pag,
27824811c933SDave Chinner targs.agbno, targs.len, &targs.oinfo);
27834811c933SDave Chinner if (error)
27844811c933SDave Chinner goto out_agflbp_relse;
27854811c933SDave Chinner }
27864811c933SDave Chinner error = xfs_alloc_update_counters(tp, agbp,
27874811c933SDave Chinner -((long)(targs.len)));
27884811c933SDave Chinner if (error)
27894811c933SDave Chinner goto out_agflbp_relse;
27904811c933SDave Chinner
279130f712c9SDave Chinner /*
279230f712c9SDave Chinner * Put each allocated block on the list.
279330f712c9SDave Chinner */
279430f712c9SDave Chinner for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
27958c392eb2SDave Chinner error = xfs_alloc_put_freelist(pag, tp, agbp,
279630f712c9SDave Chinner agflbp, bno, 0);
279730f712c9SDave Chinner if (error)
2798396503fcSDave Chinner goto out_agflbp_relse;
279930f712c9SDave Chinner }
280030f712c9SDave Chinner }
280130f712c9SDave Chinner xfs_trans_brelse(tp, agflbp);
280230f712c9SDave Chinner args->agbp = agbp;
280330f712c9SDave Chinner return 0;
2804396503fcSDave Chinner
2805396503fcSDave Chinner out_agflbp_relse:
2806396503fcSDave Chinner xfs_trans_brelse(tp, agflbp);
2807396503fcSDave Chinner out_agbp_relse:
2808396503fcSDave Chinner if (agbp)
2809396503fcSDave Chinner xfs_trans_brelse(tp, agbp);
2810396503fcSDave Chinner out_no_agbp:
2811396503fcSDave Chinner args->agbp = NULL;
2812396503fcSDave Chinner return error;
281330f712c9SDave Chinner }
281430f712c9SDave Chinner
281530f712c9SDave Chinner /*
281630f712c9SDave Chinner * Get a block from the freelist.
281730f712c9SDave Chinner * Returns with the buffer for the block gotten.
281830f712c9SDave Chinner */
281950920116SDave Chinner int
xfs_alloc_get_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agblock_t * bnop,int btreeblk)282030f712c9SDave Chinner xfs_alloc_get_freelist(
282149f0d84eSDave Chinner struct xfs_perag *pag,
282250920116SDave Chinner struct xfs_trans *tp,
282350920116SDave Chinner struct xfs_buf *agbp,
282450920116SDave Chinner xfs_agblock_t *bnop,
282550920116SDave Chinner int btreeblk)
282630f712c9SDave Chinner {
28279798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
282850920116SDave Chinner struct xfs_buf *agflbp;
282950920116SDave Chinner xfs_agblock_t bno;
283030f712c9SDave Chinner __be32 *agfl_bno;
283130f712c9SDave Chinner int error;
2832f53dde11SDave Chinner uint32_t logflags;
283350920116SDave Chinner struct xfs_mount *mp = tp->t_mountp;
283430f712c9SDave Chinner
283530f712c9SDave Chinner /*
283630f712c9SDave Chinner * Freelist is empty, give up.
283730f712c9SDave Chinner */
283830f712c9SDave Chinner if (!agf->agf_flcount) {
283930f712c9SDave Chinner *bnop = NULLAGBLOCK;
284030f712c9SDave Chinner return 0;
284130f712c9SDave Chinner }
284230f712c9SDave Chinner /*
284330f712c9SDave Chinner * Read the array of free blocks.
284430f712c9SDave Chinner */
2845cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
284630f712c9SDave Chinner if (error)
284730f712c9SDave Chinner return error;
284830f712c9SDave Chinner
284930f712c9SDave Chinner
285030f712c9SDave Chinner /*
285130f712c9SDave Chinner * Get the block number and update the data structures.
285230f712c9SDave Chinner */
2853183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
285430f712c9SDave Chinner bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
28553148ebf2SDave Chinner if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
28563148ebf2SDave Chinner return -EFSCORRUPTED;
28573148ebf2SDave Chinner
285830f712c9SDave Chinner be32_add_cpu(&agf->agf_flfirst, 1);
285930f712c9SDave Chinner xfs_trans_brelse(tp, agflbp);
2860a78ee256SDave Chinner if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
286130f712c9SDave Chinner agf->agf_flfirst = 0;
286230f712c9SDave Chinner
28637ac2ff8bSDave Chinner ASSERT(!xfs_perag_agfl_needs_reset(pag));
286430f712c9SDave Chinner be32_add_cpu(&agf->agf_flcount, -1);
286530f712c9SDave Chinner pag->pagf_flcount--;
286630f712c9SDave Chinner
286730f712c9SDave Chinner logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
286830f712c9SDave Chinner if (btreeblk) {
286930f712c9SDave Chinner be32_add_cpu(&agf->agf_btreeblks, 1);
287030f712c9SDave Chinner pag->pagf_btreeblks++;
287130f712c9SDave Chinner logflags |= XFS_AGF_BTREEBLKS;
287230f712c9SDave Chinner }
287330f712c9SDave Chinner
287430f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
287530f712c9SDave Chinner *bnop = bno;
287630f712c9SDave Chinner
287730f712c9SDave Chinner return 0;
287830f712c9SDave Chinner }
287930f712c9SDave Chinner
288030f712c9SDave Chinner /*
288130f712c9SDave Chinner * Log the given fields from the agf structure.
288230f712c9SDave Chinner */
288330f712c9SDave Chinner void
xfs_alloc_log_agf(struct xfs_trans * tp,struct xfs_buf * bp,uint32_t fields)288430f712c9SDave Chinner xfs_alloc_log_agf(
2885f53dde11SDave Chinner struct xfs_trans *tp,
2886f53dde11SDave Chinner struct xfs_buf *bp,
2887f53dde11SDave Chinner uint32_t fields)
288830f712c9SDave Chinner {
288930f712c9SDave Chinner int first; /* first byte offset */
289030f712c9SDave Chinner int last; /* last byte offset */
289130f712c9SDave Chinner static const short offsets[] = {
289230f712c9SDave Chinner offsetof(xfs_agf_t, agf_magicnum),
289330f712c9SDave Chinner offsetof(xfs_agf_t, agf_versionnum),
289430f712c9SDave Chinner offsetof(xfs_agf_t, agf_seqno),
289530f712c9SDave Chinner offsetof(xfs_agf_t, agf_length),
289630f712c9SDave Chinner offsetof(xfs_agf_t, agf_roots[0]),
289730f712c9SDave Chinner offsetof(xfs_agf_t, agf_levels[0]),
289830f712c9SDave Chinner offsetof(xfs_agf_t, agf_flfirst),
289930f712c9SDave Chinner offsetof(xfs_agf_t, agf_fllast),
290030f712c9SDave Chinner offsetof(xfs_agf_t, agf_flcount),
290130f712c9SDave Chinner offsetof(xfs_agf_t, agf_freeblks),
290230f712c9SDave Chinner offsetof(xfs_agf_t, agf_longest),
290330f712c9SDave Chinner offsetof(xfs_agf_t, agf_btreeblks),
290430f712c9SDave Chinner offsetof(xfs_agf_t, agf_uuid),
2905f32866fdSDarrick J. Wong offsetof(xfs_agf_t, agf_rmap_blocks),
2906bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_blocks),
2907bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_root),
2908bdf28630SDarrick J. Wong offsetof(xfs_agf_t, agf_refcount_level),
2909da1f039dSDarrick J. Wong /* needed so that we don't log the whole rest of the structure: */
2910da1f039dSDarrick J. Wong offsetof(xfs_agf_t, agf_spare64),
291130f712c9SDave Chinner sizeof(xfs_agf_t)
291230f712c9SDave Chinner };
291330f712c9SDave Chinner
29149798f615SChristoph Hellwig trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
291530f712c9SDave Chinner
291630f712c9SDave Chinner xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
291730f712c9SDave Chinner
291830f712c9SDave Chinner xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
291930f712c9SDave Chinner xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
292030f712c9SDave Chinner }
292130f712c9SDave Chinner
292230f712c9SDave Chinner /*
292330f712c9SDave Chinner * Put the block on the freelist for the allocation group.
292430f712c9SDave Chinner */
292550920116SDave Chinner int
xfs_alloc_put_freelist(struct xfs_perag * pag,struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_buf * agflbp,xfs_agblock_t bno,int btreeblk)292630f712c9SDave Chinner xfs_alloc_put_freelist(
29278c392eb2SDave Chinner struct xfs_perag *pag,
292850920116SDave Chinner struct xfs_trans *tp,
292950920116SDave Chinner struct xfs_buf *agbp,
293050920116SDave Chinner struct xfs_buf *agflbp,
293150920116SDave Chinner xfs_agblock_t bno,
293250920116SDave Chinner int btreeblk)
293330f712c9SDave Chinner {
29349798f615SChristoph Hellwig struct xfs_mount *mp = tp->t_mountp;
29359798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr;
293650920116SDave Chinner __be32 *blockp;
293730f712c9SDave Chinner int error;
2938f53dde11SDave Chinner uint32_t logflags;
293930f712c9SDave Chinner __be32 *agfl_bno;
294030f712c9SDave Chinner int startoff;
294130f712c9SDave Chinner
2942cec7bb7dSDave Chinner if (!agflbp) {
2943cec7bb7dSDave Chinner error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2944cec7bb7dSDave Chinner if (error)
294530f712c9SDave Chinner return error;
2946cec7bb7dSDave Chinner }
2947cec7bb7dSDave Chinner
294830f712c9SDave Chinner be32_add_cpu(&agf->agf_fllast, 1);
2949a78ee256SDave Chinner if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
295030f712c9SDave Chinner agf->agf_fllast = 0;
295130f712c9SDave Chinner
29527ac2ff8bSDave Chinner ASSERT(!xfs_perag_agfl_needs_reset(pag));
295330f712c9SDave Chinner be32_add_cpu(&agf->agf_flcount, 1);
295430f712c9SDave Chinner pag->pagf_flcount++;
295530f712c9SDave Chinner
295630f712c9SDave Chinner logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
295730f712c9SDave Chinner if (btreeblk) {
295830f712c9SDave Chinner be32_add_cpu(&agf->agf_btreeblks, -1);
295930f712c9SDave Chinner pag->pagf_btreeblks--;
296030f712c9SDave Chinner logflags |= XFS_AGF_BTREEBLKS;
296130f712c9SDave Chinner }
296230f712c9SDave Chinner
296330f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
296430f712c9SDave Chinner
2965a78ee256SDave Chinner ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
296630f712c9SDave Chinner
2967183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
296830f712c9SDave Chinner blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
296930f712c9SDave Chinner *blockp = cpu_to_be32(bno);
297030f712c9SDave Chinner startoff = (char *)blockp - (char *)agflbp->b_addr;
297130f712c9SDave Chinner
297230f712c9SDave Chinner xfs_alloc_log_agf(tp, agbp, logflags);
297330f712c9SDave Chinner
297430f712c9SDave Chinner xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
297530f712c9SDave Chinner xfs_trans_log_buf(tp, agflbp, startoff,
297630f712c9SDave Chinner startoff + sizeof(xfs_agblock_t) - 1);
297730f712c9SDave Chinner return 0;
297830f712c9SDave Chinner }
297930f712c9SDave Chinner
2980e0a8de7dSDave Chinner /*
29812d7d1e7eSDarrick J. Wong * Check that this AGF/AGI header's sequence number and length matches the AG
29822d7d1e7eSDarrick J. Wong * number and size in fsblocks.
29832d7d1e7eSDarrick J. Wong */
29842d7d1e7eSDarrick J. Wong xfs_failaddr_t
xfs_validate_ag_length(struct xfs_buf * bp,uint32_t seqno,uint32_t length)29852d7d1e7eSDarrick J. Wong xfs_validate_ag_length(
29862d7d1e7eSDarrick J. Wong struct xfs_buf *bp,
29872d7d1e7eSDarrick J. Wong uint32_t seqno,
29882d7d1e7eSDarrick J. Wong uint32_t length)
29892d7d1e7eSDarrick J. Wong {
29902d7d1e7eSDarrick J. Wong struct xfs_mount *mp = bp->b_mount;
29912d7d1e7eSDarrick J. Wong /*
29922d7d1e7eSDarrick J. Wong * During growfs operations, the perag is not fully initialised,
29932d7d1e7eSDarrick J. Wong * so we can't use it for any useful checking. growfs ensures we can't
29942d7d1e7eSDarrick J. Wong * use it by using uncached buffers that don't have the perag attached
29952d7d1e7eSDarrick J. Wong * so we can detect and avoid this problem.
29962d7d1e7eSDarrick J. Wong */
29972d7d1e7eSDarrick J. Wong if (bp->b_pag && seqno != bp->b_pag->pag_agno)
29982d7d1e7eSDarrick J. Wong return __this_address;
29992d7d1e7eSDarrick J. Wong
30002d7d1e7eSDarrick J. Wong /*
30012d7d1e7eSDarrick J. Wong * Only the last AG in the filesystem is allowed to be shorter
30022d7d1e7eSDarrick J. Wong * than the AG size recorded in the superblock.
30032d7d1e7eSDarrick J. Wong */
30042d7d1e7eSDarrick J. Wong if (length != mp->m_sb.sb_agblocks) {
30052d7d1e7eSDarrick J. Wong /*
30062d7d1e7eSDarrick J. Wong * During growfs, the new last AG can get here before we
30072d7d1e7eSDarrick J. Wong * have updated the superblock. Give it a pass on the seqno
30082d7d1e7eSDarrick J. Wong * check.
30092d7d1e7eSDarrick J. Wong */
30102d7d1e7eSDarrick J. Wong if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
30112d7d1e7eSDarrick J. Wong return __this_address;
30122d7d1e7eSDarrick J. Wong if (length < XFS_MIN_AG_BLOCKS)
30132d7d1e7eSDarrick J. Wong return __this_address;
30142d7d1e7eSDarrick J. Wong if (length > mp->m_sb.sb_agblocks)
30152d7d1e7eSDarrick J. Wong return __this_address;
30162d7d1e7eSDarrick J. Wong }
30172d7d1e7eSDarrick J. Wong
30182d7d1e7eSDarrick J. Wong return NULL;
30192d7d1e7eSDarrick J. Wong }
30202d7d1e7eSDarrick J. Wong
30212d7d1e7eSDarrick J. Wong /*
3022e0a8de7dSDave Chinner * Verify the AGF is consistent.
3023e0a8de7dSDave Chinner *
3024e0a8de7dSDave Chinner * We do not verify the AGFL indexes in the AGF are fully consistent here
3025e0a8de7dSDave Chinner * because of issues with variable on-disk structure sizes. Instead, we check
3026e0a8de7dSDave Chinner * the agfl indexes for consistency when we initialise the perag from the AGF
3027e0a8de7dSDave Chinner * information after a read completes.
3028e0a8de7dSDave Chinner *
3029e0a8de7dSDave Chinner * If the index is inconsistent, then we mark the perag as needing an AGFL
3030e0a8de7dSDave Chinner * reset. The first AGFL update performed then resets the AGFL indexes and
3031e0a8de7dSDave Chinner * refills the AGFL with known good free blocks, allowing the filesystem to
3032e0a8de7dSDave Chinner * continue operating normally at the cost of a few leaked free space blocks.
3033e0a8de7dSDave Chinner */
3034a6a781a5SDarrick J. Wong static xfs_failaddr_t
xfs_agf_verify(struct xfs_buf * bp)303530f712c9SDave Chinner xfs_agf_verify(
303630f712c9SDave Chinner struct xfs_buf *bp)
303730f712c9SDave Chinner {
3038dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
30399798f615SChristoph Hellwig struct xfs_agf *agf = bp->b_addr;
30402d7d1e7eSDarrick J. Wong xfs_failaddr_t fa;
30412d7d1e7eSDarrick J. Wong uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3042edd8276dSDave Chinner uint32_t agf_length = be32_to_cpu(agf->agf_length);
304330f712c9SDave Chinner
304438c26bfdSDave Chinner if (xfs_has_crc(mp)) {
3045a45086e2SBrian Foster if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3046a6a781a5SDarrick J. Wong return __this_address;
30479798f615SChristoph Hellwig if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3048a6a781a5SDarrick J. Wong return __this_address;
3049a45086e2SBrian Foster }
305030f712c9SDave Chinner
305139708c20SBrian Foster if (!xfs_verify_magic(bp, agf->agf_magicnum))
305239708c20SBrian Foster return __this_address;
305339708c20SBrian Foster
3054edd8276dSDave Chinner if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3055a6a781a5SDarrick J. Wong return __this_address;
305630f712c9SDave Chinner
3057edd8276dSDave Chinner /*
3058edd8276dSDave Chinner * Both agf_seqno and agf_length need to validated before anything else
3059edd8276dSDave Chinner * block number related in the AGF or AGFL can be checked.
3060edd8276dSDave Chinner */
30612d7d1e7eSDarrick J. Wong fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
30622d7d1e7eSDarrick J. Wong if (fa)
30632d7d1e7eSDarrick J. Wong return fa;
3064edd8276dSDave Chinner
3065edd8276dSDave Chinner if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3066edd8276dSDave Chinner return __this_address;
3067edd8276dSDave Chinner if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3068edd8276dSDave Chinner return __this_address;
3069edd8276dSDave Chinner if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3070d0c7feafSZheng Bin return __this_address;
3071d0c7feafSZheng Bin
3072d0c7feafSZheng Bin if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3073edd8276dSDave Chinner be32_to_cpu(agf->agf_freeblks) > agf_length)
3074d0c7feafSZheng Bin return __this_address;
3075d0c7feafSZheng Bin
3076d2a047f3SDarrick J. Wong if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3077d2a047f3SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
30787cb3efb4SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
30797cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels ||
30807cb3efb4SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
30817cb3efb4SDarrick J. Wong mp->m_alloc_maxlevels)
3082a6a781a5SDarrick J. Wong return __this_address;
3083e1b05723SEric Sandeen
3084ebd9027dSDave Chinner if (xfs_has_lazysbcount(mp) &&
3085edd8276dSDave Chinner be32_to_cpu(agf->agf_btreeblks) > agf_length)
3086a6a781a5SDarrick J. Wong return __this_address;
308730f712c9SDave Chinner
3088edd8276dSDave Chinner if (xfs_has_rmapbt(mp)) {
3089edd8276dSDave Chinner if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3090d0c7feafSZheng Bin return __this_address;
3091d0c7feafSZheng Bin
3092edd8276dSDave Chinner if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3093edd8276dSDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3094edd8276dSDave Chinner mp->m_rmap_maxlevels)
3095a6a781a5SDarrick J. Wong return __this_address;
3096edd8276dSDave Chinner }
3097edd8276dSDave Chinner
3098edd8276dSDave Chinner if (xfs_has_reflink(mp)) {
3099edd8276dSDave Chinner if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3100edd8276dSDave Chinner return __this_address;
3101edd8276dSDave Chinner
3102edd8276dSDave Chinner if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3103edd8276dSDave Chinner be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3104edd8276dSDave Chinner return __this_address;
3105edd8276dSDave Chinner }
310646eeb521SDarrick J. Wong
3107a6a781a5SDarrick J. Wong return NULL;
310830f712c9SDave Chinner }
310930f712c9SDave Chinner
311030f712c9SDave Chinner static void
xfs_agf_read_verify(struct xfs_buf * bp)311130f712c9SDave Chinner xfs_agf_read_verify(
311230f712c9SDave Chinner struct xfs_buf *bp)
311330f712c9SDave Chinner {
3114dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
3115bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
311630f712c9SDave Chinner
311738c26bfdSDave Chinner if (xfs_has_crc(mp) &&
311830f712c9SDave Chinner !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3119bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3120bc1a09b8SDarrick J. Wong else {
3121b5572597SDarrick J. Wong fa = xfs_agf_verify(bp);
3122bc1a09b8SDarrick J. Wong if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3123bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3124bc1a09b8SDarrick J. Wong }
312530f712c9SDave Chinner }
312630f712c9SDave Chinner
312730f712c9SDave Chinner static void
xfs_agf_write_verify(struct xfs_buf * bp)312830f712c9SDave Chinner xfs_agf_write_verify(
312930f712c9SDave Chinner struct xfs_buf *bp)
313030f712c9SDave Chinner {
3131dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount;
3132fb1755a6SCarlos Maiolino struct xfs_buf_log_item *bip = bp->b_log_item;
31339798f615SChristoph Hellwig struct xfs_agf *agf = bp->b_addr;
3134bc1a09b8SDarrick J. Wong xfs_failaddr_t fa;
313530f712c9SDave Chinner
3136b5572597SDarrick J. Wong fa = xfs_agf_verify(bp);
3137bc1a09b8SDarrick J. Wong if (fa) {
3138bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa);
313930f712c9SDave Chinner return;
314030f712c9SDave Chinner }
314130f712c9SDave Chinner
314238c26bfdSDave Chinner if (!xfs_has_crc(mp))
314330f712c9SDave Chinner return;
314430f712c9SDave Chinner
314530f712c9SDave Chinner if (bip)
31469798f615SChristoph Hellwig agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
314730f712c9SDave Chinner
314830f712c9SDave Chinner xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
314930f712c9SDave Chinner }
315030f712c9SDave Chinner
315130f712c9SDave Chinner const struct xfs_buf_ops xfs_agf_buf_ops = {
3152233135b7SEric Sandeen .name = "xfs_agf",
315339708c20SBrian Foster .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
315430f712c9SDave Chinner .verify_read = xfs_agf_read_verify,
315530f712c9SDave Chinner .verify_write = xfs_agf_write_verify,
3156b5572597SDarrick J. Wong .verify_struct = xfs_agf_verify,
315730f712c9SDave Chinner };
315830f712c9SDave Chinner
315930f712c9SDave Chinner /*
316030f712c9SDave Chinner * Read in the allocation group header (free/alloc section).
316130f712c9SDave Chinner */
3162fa044ae7SDave Chinner int
xfs_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)316330f712c9SDave Chinner xfs_read_agf(
3164fa044ae7SDave Chinner struct xfs_perag *pag,
3165fa044ae7SDave Chinner struct xfs_trans *tp,
3166fa044ae7SDave Chinner int flags,
3167fa044ae7SDave Chinner struct xfs_buf **agfbpp)
316830f712c9SDave Chinner {
3169fa044ae7SDave Chinner struct xfs_mount *mp = pag->pag_mount;
317030f712c9SDave Chinner int error;
317130f712c9SDave Chinner
3172fa044ae7SDave Chinner trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
317330f712c9SDave Chinner
31744ed8e27bSDarrick J. Wong error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3175fa044ae7SDave Chinner XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3176fa044ae7SDave Chinner XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
317730f712c9SDave Chinner if (error)
317830f712c9SDave Chinner return error;
317930f712c9SDave Chinner
3180fa044ae7SDave Chinner xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
318130f712c9SDave Chinner return 0;
318230f712c9SDave Chinner }
318330f712c9SDave Chinner
318430f712c9SDave Chinner /*
318576b47e52SDave Chinner * Read in the allocation group header (free/alloc section) and initialise the
318676b47e52SDave Chinner * perag structure if necessary. If the caller provides @agfbpp, then return the
318776b47e52SDave Chinner * locked buffer to the caller, otherwise free it.
318830f712c9SDave Chinner */
318908d3e84fSDave Chinner int
xfs_alloc_read_agf(struct xfs_perag * pag,struct xfs_trans * tp,int flags,struct xfs_buf ** agfbpp)319030f712c9SDave Chinner xfs_alloc_read_agf(
319108d3e84fSDave Chinner struct xfs_perag *pag,
319208d3e84fSDave Chinner struct xfs_trans *tp,
319308d3e84fSDave Chinner int flags,
319476b47e52SDave Chinner struct xfs_buf **agfbpp)
319530f712c9SDave Chinner {
319676b47e52SDave Chinner struct xfs_buf *agfbp;
319708d3e84fSDave Chinner struct xfs_agf *agf;
319830f712c9SDave Chinner int error;
319916eaab83SBrian Foster int allocbt_blks;
320030f712c9SDave Chinner
320108d3e84fSDave Chinner trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
320230f712c9SDave Chinner
3203f48e2df8SDarrick J. Wong /* We don't support trylock when freeing. */
3204f48e2df8SDarrick J. Wong ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3205f48e2df8SDarrick J. Wong (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3206fa044ae7SDave Chinner error = xfs_read_agf(pag, tp,
320730f712c9SDave Chinner (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
320876b47e52SDave Chinner &agfbp);
320930f712c9SDave Chinner if (error)
321030f712c9SDave Chinner return error;
321130f712c9SDave Chinner
321276b47e52SDave Chinner agf = agfbp->b_addr;
32137ac2ff8bSDave Chinner if (!xfs_perag_initialised_agf(pag)) {
321430f712c9SDave Chinner pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
321530f712c9SDave Chinner pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
321630f712c9SDave Chinner pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
321730f712c9SDave Chinner pag->pagf_longest = be32_to_cpu(agf->agf_longest);
321830f712c9SDave Chinner pag->pagf_levels[XFS_BTNUM_BNOi] =
321930f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
322030f712c9SDave Chinner pag->pagf_levels[XFS_BTNUM_CNTi] =
322130f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3222b8704944SDarrick J. Wong pag->pagf_levels[XFS_BTNUM_RMAPi] =
3223b8704944SDarrick J. Wong be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
322446eeb521SDarrick J. Wong pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
32257ac2ff8bSDave Chinner if (xfs_agfl_needs_reset(pag->pag_mount, agf))
32267ac2ff8bSDave Chinner set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3227e2e63b07SDarrick J. Wong else
3228e2e63b07SDarrick J. Wong clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
322916eaab83SBrian Foster
323016eaab83SBrian Foster /*
323116eaab83SBrian Foster * Update the in-core allocbt counter. Filter out the rmapbt
323216eaab83SBrian Foster * subset of the btreeblks counter because the rmapbt is managed
323316eaab83SBrian Foster * by perag reservation. Subtract one for the rmapbt root block
323416eaab83SBrian Foster * because the rmap counter includes it while the btreeblks
323516eaab83SBrian Foster * counter only tracks non-root blocks.
323616eaab83SBrian Foster */
323716eaab83SBrian Foster allocbt_blks = pag->pagf_btreeblks;
323808d3e84fSDave Chinner if (xfs_has_rmapbt(pag->pag_mount))
323916eaab83SBrian Foster allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
324016eaab83SBrian Foster if (allocbt_blks > 0)
324108d3e84fSDave Chinner atomic64_add(allocbt_blks,
324208d3e84fSDave Chinner &pag->pag_mount->m_allocbt_blks);
32437ac2ff8bSDave Chinner
32447ac2ff8bSDave Chinner set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
324530f712c9SDave Chinner }
324630f712c9SDave Chinner #ifdef DEBUG
324708d3e84fSDave Chinner else if (!xfs_is_shutdown(pag->pag_mount)) {
324830f712c9SDave Chinner ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
324930f712c9SDave Chinner ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
325030f712c9SDave Chinner ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
325130f712c9SDave Chinner ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
325230f712c9SDave Chinner ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
325330f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
325430f712c9SDave Chinner ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
325530f712c9SDave Chinner be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
325630f712c9SDave Chinner }
325730f712c9SDave Chinner #endif
325876b47e52SDave Chinner if (agfbpp)
325976b47e52SDave Chinner *agfbpp = agfbp;
326076b47e52SDave Chinner else
326176b47e52SDave Chinner xfs_trans_brelse(tp, agfbp);
326230f712c9SDave Chinner return 0;
326330f712c9SDave Chinner }
326430f712c9SDave Chinner
326530f712c9SDave Chinner /*
3266ecd788a9SDave Chinner * Pre-proces allocation arguments to set initial state that we don't require
3267ecd788a9SDave Chinner * callers to set up correctly, as well as bounds check the allocation args
3268ecd788a9SDave Chinner * that are set up.
326930f712c9SDave Chinner */
3270ecd788a9SDave Chinner static int
xfs_alloc_vextent_check_args(struct xfs_alloc_arg * args,xfs_fsblock_t target,xfs_agnumber_t * minimum_agno)3271ecd788a9SDave Chinner xfs_alloc_vextent_check_args(
3272319c9e87SDave Chinner struct xfs_alloc_arg *args,
32738b813568SDave Chinner xfs_fsblock_t target,
32748b813568SDave Chinner xfs_agnumber_t *minimum_agno)
327530f712c9SDave Chinner {
3276ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
3277ecd788a9SDave Chinner xfs_agblock_t agsize;
327830f712c9SDave Chinner
3279230e8fe8SDave Chinner args->fsbno = NULLFSBLOCK;
3280ecd788a9SDave Chinner
32818b813568SDave Chinner *minimum_agno = 0;
32828b813568SDave Chinner if (args->tp->t_highest_agno != NULLAGNUMBER)
32838b813568SDave Chinner *minimum_agno = args->tp->t_highest_agno;
32848b813568SDave Chinner
328530f712c9SDave Chinner /*
328630f712c9SDave Chinner * Just fix this up, for the case where the last a.g. is shorter
328730f712c9SDave Chinner * (or there's only one a.g.) and the caller couldn't easily figure
328830f712c9SDave Chinner * that out (xfs_bmap_alloc).
328930f712c9SDave Chinner */
329030f712c9SDave Chinner agsize = mp->m_sb.sb_agblocks;
329130f712c9SDave Chinner if (args->maxlen > agsize)
329230f712c9SDave Chinner args->maxlen = agsize;
329330f712c9SDave Chinner if (args->alignment == 0)
329430f712c9SDave Chinner args->alignment = 1;
329574b9aa63SDave Chinner
329674b9aa63SDave Chinner ASSERT(args->minlen > 0);
329774b9aa63SDave Chinner ASSERT(args->maxlen > 0);
329874b9aa63SDave Chinner ASSERT(args->alignment > 0);
329974b9aa63SDave Chinner ASSERT(args->resv != XFS_AG_RESV_AGFL);
330074b9aa63SDave Chinner
3301319c9e87SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3302319c9e87SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
330330f712c9SDave Chinner ASSERT(args->minlen <= args->maxlen);
330430f712c9SDave Chinner ASSERT(args->minlen <= agsize);
330530f712c9SDave Chinner ASSERT(args->mod < args->prod);
330674b9aa63SDave Chinner
3307319c9e87SDave Chinner if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3308319c9e87SDave Chinner XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
330930f712c9SDave Chinner args->minlen > args->maxlen || args->minlen > agsize ||
331030f712c9SDave Chinner args->mod >= args->prod) {
331130f712c9SDave Chinner trace_xfs_alloc_vextent_badargs(args);
3312ecd788a9SDave Chinner return -ENOSPC;
3313ecd788a9SDave Chinner }
33148b813568SDave Chinner
33158b813568SDave Chinner if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
33168b813568SDave Chinner trace_xfs_alloc_vextent_skip_deadlock(args);
33178b813568SDave Chinner return -ENOSPC;
33188b813568SDave Chinner }
331930f712c9SDave Chinner return 0;
33208b813568SDave Chinner
332130f712c9SDave Chinner }
332230f712c9SDave Chinner
332330f712c9SDave Chinner /*
332474b9aa63SDave Chinner * Prepare an AG for allocation. If the AG is not prepared to accept the
332574b9aa63SDave Chinner * allocation, return failure.
332674b9aa63SDave Chinner *
332774b9aa63SDave Chinner * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
332874b9aa63SDave Chinner * modified to hold their own perag references.
332974b9aa63SDave Chinner */
333074b9aa63SDave Chinner static int
xfs_alloc_vextent_prepare_ag(struct xfs_alloc_arg * args,uint32_t alloc_flags)333174b9aa63SDave Chinner xfs_alloc_vextent_prepare_ag(
333200dcd17cSDave Chinner struct xfs_alloc_arg *args,
33336a2a9d77SDave Chinner uint32_t alloc_flags)
333474b9aa63SDave Chinner {
333574b9aa63SDave Chinner bool need_pag = !args->pag;
333674b9aa63SDave Chinner int error;
333774b9aa63SDave Chinner
333874b9aa63SDave Chinner if (need_pag)
333974b9aa63SDave Chinner args->pag = xfs_perag_get(args->mp, args->agno);
334074b9aa63SDave Chinner
33413432ef61SDave Chinner args->agbp = NULL;
33426a2a9d77SDave Chinner error = xfs_alloc_fix_freelist(args, alloc_flags);
334374b9aa63SDave Chinner if (error) {
334474b9aa63SDave Chinner trace_xfs_alloc_vextent_nofix(args);
334574b9aa63SDave Chinner if (need_pag)
334674b9aa63SDave Chinner xfs_perag_put(args->pag);
334774b9aa63SDave Chinner args->agbno = NULLAGBLOCK;
334874b9aa63SDave Chinner return error;
334974b9aa63SDave Chinner }
335074b9aa63SDave Chinner if (!args->agbp) {
335174b9aa63SDave Chinner /* cannot allocate in this AG at all */
335274b9aa63SDave Chinner trace_xfs_alloc_vextent_noagbp(args);
335374b9aa63SDave Chinner args->agbno = NULLAGBLOCK;
335474b9aa63SDave Chinner return 0;
335574b9aa63SDave Chinner }
335674b9aa63SDave Chinner args->wasfromfl = 0;
335774b9aa63SDave Chinner return 0;
335874b9aa63SDave Chinner }
335974b9aa63SDave Chinner
336074b9aa63SDave Chinner /*
3361e4d17426SDave Chinner * Post-process allocation results to account for the allocation if it succeed
3362e4d17426SDave Chinner * and set the allocated block number correctly for the caller.
3363ecd788a9SDave Chinner *
3364e4d17426SDave Chinner * XXX: we should really be returning ENOSPC for ENOSPC, not
3365ecd788a9SDave Chinner * hiding it behind a "successful" NULLFSBLOCK allocation.
336630f712c9SDave Chinner */
3367e4d17426SDave Chinner static int
xfs_alloc_vextent_finish(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,int alloc_error,bool drop_perag)3368e4d17426SDave Chinner xfs_alloc_vextent_finish(
3369ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3370e4d17426SDave Chinner xfs_agnumber_t minimum_agno,
3371e4d17426SDave Chinner int alloc_error,
3372e4d17426SDave Chinner bool drop_perag)
3373ecd788a9SDave Chinner {
3374ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
3375e4d17426SDave Chinner int error = 0;
33761dd0510fSDave Chinner
3377ecd788a9SDave Chinner /*
3378ecd788a9SDave Chinner * We can end up here with a locked AGF. If we failed, the caller is
3379ecd788a9SDave Chinner * likely going to try to allocate again with different parameters, and
3380ecd788a9SDave Chinner * that can widen the AGs that are searched for free space. If we have
3381ecd788a9SDave Chinner * to do BMBT block allocation, we have to do a new allocation.
3382ecd788a9SDave Chinner *
3383ecd788a9SDave Chinner * Hence leaving this function with the AGF locked opens up potential
3384ecd788a9SDave Chinner * ABBA AGF deadlocks because a future allocation attempt in this
3385ecd788a9SDave Chinner * transaction may attempt to lock a lower number AGF.
3386ecd788a9SDave Chinner *
3387ecd788a9SDave Chinner * We can't release the AGF until the transaction is commited, so at
3388ecd788a9SDave Chinner * this point we must update the "first allocation" tracker to point at
3389ecd788a9SDave Chinner * this AG if the tracker is empty or points to a lower AG. This allows
3390ecd788a9SDave Chinner * the next allocation attempt to be modified appropriately to avoid
3391ecd788a9SDave Chinner * deadlocks.
3392ecd788a9SDave Chinner */
3393ecd788a9SDave Chinner if (args->agbp &&
3394ecd788a9SDave Chinner (args->tp->t_highest_agno == NULLAGNUMBER ||
3395ecd788a9SDave Chinner args->agno > minimum_agno))
3396ecd788a9SDave Chinner args->tp->t_highest_agno = args->agno;
3397ecd788a9SDave Chinner
3398e4d17426SDave Chinner /*
3399e4d17426SDave Chinner * If the allocation failed with an error or we had an ENOSPC result,
3400e4d17426SDave Chinner * preserve the returned error whilst also marking the allocation result
3401e4d17426SDave Chinner * as "no extent allocated". This ensures that callers that fail to
3402e4d17426SDave Chinner * capture the error will still treat it as a failed allocation.
3403e4d17426SDave Chinner */
3404e4d17426SDave Chinner if (alloc_error || args->agbno == NULLAGBLOCK) {
3405ecd788a9SDave Chinner args->fsbno = NULLFSBLOCK;
3406e4d17426SDave Chinner error = alloc_error;
3407e4d17426SDave Chinner goto out_drop_perag;
34081dd0510fSDave Chinner }
34091dd0510fSDave Chinner
3410ecd788a9SDave Chinner args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3411e4d17426SDave Chinner
3412ecd788a9SDave Chinner ASSERT(args->len >= args->minlen);
3413ecd788a9SDave Chinner ASSERT(args->len <= args->maxlen);
3414ecd788a9SDave Chinner ASSERT(args->agbno % args->alignment == 0);
3415ecd788a9SDave Chinner XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3416e4d17426SDave Chinner
3417e4d17426SDave Chinner /* if not file data, insert new block into the reverse map btree */
3418e4d17426SDave Chinner if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3419e4d17426SDave Chinner error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3420e4d17426SDave Chinner args->agbno, args->len, &args->oinfo);
3421e4d17426SDave Chinner if (error)
3422e4d17426SDave Chinner goto out_drop_perag;
3423e4d17426SDave Chinner }
3424e4d17426SDave Chinner
3425e4d17426SDave Chinner if (!args->wasfromfl) {
3426e4d17426SDave Chinner error = xfs_alloc_update_counters(args->tp, args->agbp,
3427e4d17426SDave Chinner -((long)(args->len)));
3428e4d17426SDave Chinner if (error)
3429e4d17426SDave Chinner goto out_drop_perag;
3430e4d17426SDave Chinner
3431e4d17426SDave Chinner ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3432e4d17426SDave Chinner args->len));
3433e4d17426SDave Chinner }
3434e4d17426SDave Chinner
3435e4d17426SDave Chinner xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3436e4d17426SDave Chinner
3437e4d17426SDave Chinner XFS_STATS_INC(mp, xs_allocx);
3438e4d17426SDave Chinner XFS_STATS_ADD(mp, xs_allocb, args->len);
3439e4d17426SDave Chinner
3440e6fbb716SDarrick J. Wong trace_xfs_alloc_vextent_finish(args);
3441e6fbb716SDarrick J. Wong
3442e4d17426SDave Chinner out_drop_perag:
34433432ef61SDave Chinner if (drop_perag && args->pag) {
34443432ef61SDave Chinner xfs_perag_rele(args->pag);
3445e4d17426SDave Chinner args->pag = NULL;
3446e4d17426SDave Chinner }
3447e4d17426SDave Chinner return error;
3448ecd788a9SDave Chinner }
3449ecd788a9SDave Chinner
3450ecd788a9SDave Chinner /*
3451230e8fe8SDave Chinner * Allocate within a single AG only. This uses a best-fit length algorithm so if
3452230e8fe8SDave Chinner * you need an exact sized allocation without locality constraints, this is the
3453230e8fe8SDave Chinner * fastest way to do it.
3454230e8fe8SDave Chinner *
3455230e8fe8SDave Chinner * Caller is expected to hold a perag reference in args->pag.
3456ecd788a9SDave Chinner */
345774c36a86SDave Chinner int
xfs_alloc_vextent_this_ag(struct xfs_alloc_arg * args,xfs_agnumber_t agno)3458ecd788a9SDave Chinner xfs_alloc_vextent_this_ag(
34595f36b2ceSDave Chinner struct xfs_alloc_arg *args,
34605f36b2ceSDave Chinner xfs_agnumber_t agno)
3461ecd788a9SDave Chinner {
3462ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
34638b813568SDave Chinner xfs_agnumber_t minimum_agno;
34646a2a9d77SDave Chinner uint32_t alloc_flags = 0;
3465ecd788a9SDave Chinner int error;
3466ecd788a9SDave Chinner
34676de4b1abSDarrick J. Wong ASSERT(args->pag != NULL);
34686de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == agno);
34696de4b1abSDarrick J. Wong
34708b813568SDave Chinner args->agno = agno;
34718b813568SDave Chinner args->agbno = 0;
3472e6fbb716SDarrick J. Wong
3473e6fbb716SDarrick J. Wong trace_xfs_alloc_vextent_this_ag(args);
3474e6fbb716SDarrick J. Wong
34758b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
34768b813568SDave Chinner &minimum_agno);
3477ecd788a9SDave Chinner if (error) {
3478ecd788a9SDave Chinner if (error == -ENOSPC)
3479ecd788a9SDave Chinner return 0;
3480ecd788a9SDave Chinner return error;
3481ecd788a9SDave Chinner }
3482ecd788a9SDave Chinner
34836a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
348474b9aa63SDave Chinner if (!error && args->agbp)
34856a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(args, alloc_flags);
348674b9aa63SDave Chinner
3487e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
348830f712c9SDave Chinner }
3489ecd788a9SDave Chinner
349030f712c9SDave Chinner /*
3491ecd788a9SDave Chinner * Iterate all AGs trying to allocate an extent starting from @start_ag.
3492ecd788a9SDave Chinner *
3493ecd788a9SDave Chinner * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3494ecd788a9SDave Chinner * allocation attempts in @start_agno have locality information. If we fail to
3495ecd788a9SDave Chinner * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3496ecd788a9SDave Chinner * we attempt to allocation in as there is no locality optimisation possible for
3497ecd788a9SDave Chinner * those allocations.
3498ecd788a9SDave Chinner *
34993432ef61SDave Chinner * On return, args->pag may be left referenced if we finish before the "all
35003432ef61SDave Chinner * failed" return point. The allocation finish still needs the perag, and
35013432ef61SDave Chinner * so the caller will release it once they've finished the allocation.
35023432ef61SDave Chinner *
3503ecd788a9SDave Chinner * When we wrap the AG iteration at the end of the filesystem, we have to be
3504ecd788a9SDave Chinner * careful not to wrap into AGs below ones we already have locked in the
3505ecd788a9SDave Chinner * transaction if we are doing a blocking iteration. This will result in an
3506ecd788a9SDave Chinner * out-of-order locking of AGFs and hence can cause deadlocks.
350730f712c9SDave Chinner */
3508ecd788a9SDave Chinner static int
xfs_alloc_vextent_iterate_ags(struct xfs_alloc_arg * args,xfs_agnumber_t minimum_agno,xfs_agnumber_t start_agno,xfs_agblock_t target_agbno,uint32_t alloc_flags)3509ecd788a9SDave Chinner xfs_alloc_vextent_iterate_ags(
3510ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3511ecd788a9SDave Chinner xfs_agnumber_t minimum_agno,
3512ecd788a9SDave Chinner xfs_agnumber_t start_agno,
3513230e8fe8SDave Chinner xfs_agblock_t target_agbno,
35146a2a9d77SDave Chinner uint32_t alloc_flags)
3515ecd788a9SDave Chinner {
3516ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
35179eb77596SDarrick J. Wong xfs_agnumber_t restart_agno = minimum_agno;
35183432ef61SDave Chinner xfs_agnumber_t agno;
3519ecd788a9SDave Chinner int error = 0;
3520ecd788a9SDave Chinner
35216a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
35229eb77596SDarrick J. Wong restart_agno = 0;
35233432ef61SDave Chinner restart:
35249eb77596SDarrick J. Wong for_each_perag_wrap_range(mp, start_agno, restart_agno,
35253432ef61SDave Chinner mp->m_sb.sb_agcount, agno, args->pag) {
35263432ef61SDave Chinner args->agno = agno;
35276a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
352874b9aa63SDave Chinner if (error)
352974b9aa63SDave Chinner break;
35303432ef61SDave Chinner if (!args->agbp) {
35313432ef61SDave Chinner trace_xfs_alloc_vextent_loopfailed(args);
35323432ef61SDave Chinner continue;
35333432ef61SDave Chinner }
353474b9aa63SDave Chinner
353574b9aa63SDave Chinner /*
35363432ef61SDave Chinner * Allocation is supposed to succeed now, so break out of the
35373432ef61SDave Chinner * loop regardless of whether we succeed or not.
353874b9aa63SDave Chinner */
3539230e8fe8SDave Chinner if (args->agno == start_agno && target_agbno) {
3540230e8fe8SDave Chinner args->agbno = target_agbno;
35416a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3542230e8fe8SDave Chinner } else {
3543230e8fe8SDave Chinner args->agbno = 0;
35446a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3545230e8fe8SDave Chinner }
3546ecd788a9SDave Chinner break;
354730f712c9SDave Chinner }
35483432ef61SDave Chinner if (error) {
35493432ef61SDave Chinner xfs_perag_rele(args->pag);
3550ecd788a9SDave Chinner args->pag = NULL;
3551ecd788a9SDave Chinner return error;
3552ecd788a9SDave Chinner }
35533432ef61SDave Chinner if (args->agbp)
35543432ef61SDave Chinner return 0;
35553432ef61SDave Chinner
35563432ef61SDave Chinner /*
35573432ef61SDave Chinner * We didn't find an AG we can alloation from. If we were given
35583432ef61SDave Chinner * constraining flags by the caller, drop them and retry the allocation
35593432ef61SDave Chinner * without any constraints being set.
35603432ef61SDave Chinner */
35616a2a9d77SDave Chinner if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
35626a2a9d77SDave Chinner alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
35639eb77596SDarrick J. Wong restart_agno = minimum_agno;
35643432ef61SDave Chinner goto restart;
35653432ef61SDave Chinner }
35663432ef61SDave Chinner
35673432ef61SDave Chinner ASSERT(args->pag == NULL);
35683432ef61SDave Chinner trace_xfs_alloc_vextent_allfailed(args);
35693432ef61SDave Chinner return 0;
35703432ef61SDave Chinner }
3571ecd788a9SDave Chinner
3572ecd788a9SDave Chinner /*
3573ecd788a9SDave Chinner * Iterate from the AGs from the start AG to the end of the filesystem, trying
3574ecd788a9SDave Chinner * to allocate blocks. It starts with a near allocation attempt in the initial
3575ecd788a9SDave Chinner * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3576ecd788a9SDave Chinner * back to zero if allowed by previous allocations in this transaction,
3577ecd788a9SDave Chinner * otherwise will wrap back to the start AG and run a second blocking pass to
3578ecd788a9SDave Chinner * the end of the filesystem.
3579ecd788a9SDave Chinner */
35802a7f6d41SDave Chinner int
xfs_alloc_vextent_start_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3581ecd788a9SDave Chinner xfs_alloc_vextent_start_ag(
3582ecd788a9SDave Chinner struct xfs_alloc_arg *args,
35832a7f6d41SDave Chinner xfs_fsblock_t target)
3584ecd788a9SDave Chinner {
3585ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
35868b813568SDave Chinner xfs_agnumber_t minimum_agno;
3587ecd788a9SDave Chinner xfs_agnumber_t start_agno;
3588ecd788a9SDave Chinner xfs_agnumber_t rotorstep = xfs_rotorstep;
3589ecd788a9SDave Chinner bool bump_rotor = false;
35906a2a9d77SDave Chinner uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3591ecd788a9SDave Chinner int error;
3592ecd788a9SDave Chinner
35936de4b1abSDarrick J. Wong ASSERT(args->pag == NULL);
35946de4b1abSDarrick J. Wong
35958b813568SDave Chinner args->agno = NULLAGNUMBER;
35968b813568SDave Chinner args->agbno = NULLAGBLOCK;
3597e6fbb716SDarrick J. Wong
35984dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_start_ag(args);
3599e6fbb716SDarrick J. Wong
36008b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3601ecd788a9SDave Chinner if (error) {
3602ecd788a9SDave Chinner if (error == -ENOSPC)
3603ecd788a9SDave Chinner return 0;
3604ecd788a9SDave Chinner return error;
3605ecd788a9SDave Chinner }
3606ecd788a9SDave Chinner
3607ecd788a9SDave Chinner if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3608ecd788a9SDave Chinner xfs_is_inode32(mp)) {
36092a7f6d41SDave Chinner target = XFS_AGB_TO_FSB(mp,
3610ecd788a9SDave Chinner ((mp->m_agfrotor / rotorstep) %
3611ecd788a9SDave Chinner mp->m_sb.sb_agcount), 0);
3612ecd788a9SDave Chinner bump_rotor = 1;
3613ecd788a9SDave Chinner }
3614ecd788a9SDave Chinner
3615230e8fe8SDave Chinner start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3616ecd788a9SDave Chinner error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36176a2a9d77SDave Chinner XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3618230e8fe8SDave Chinner
36198d242e93SChristoph Hellwig if (bump_rotor) {
3620ecd788a9SDave Chinner if (args->agno == start_agno)
362130f712c9SDave Chinner mp->m_agfrotor = (mp->m_agfrotor + 1) %
362230f712c9SDave Chinner (mp->m_sb.sb_agcount * rotorstep);
362330f712c9SDave Chinner else
362430f712c9SDave Chinner mp->m_agfrotor = (args->agno * rotorstep + 1) %
362530f712c9SDave Chinner (mp->m_sb.sb_agcount * rotorstep);
362630f712c9SDave Chinner }
3627ecd788a9SDave Chinner
3628e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3629ecd788a9SDave Chinner }
3630ecd788a9SDave Chinner
3631ecd788a9SDave Chinner /*
3632230e8fe8SDave Chinner * Iterate from the agno indicated via @target through to the end of the
3633ecd788a9SDave Chinner * filesystem attempting blocking allocation. This does not wrap or try a second
3634230e8fe8SDave Chinner * pass, so will not recurse into AGs lower than indicated by the target.
3635ecd788a9SDave Chinner */
3636319c9e87SDave Chinner int
xfs_alloc_vextent_first_ag(struct xfs_alloc_arg * args,xfs_fsblock_t target)3637ecd788a9SDave Chinner xfs_alloc_vextent_first_ag(
3638ecd788a9SDave Chinner struct xfs_alloc_arg *args,
3639319c9e87SDave Chinner xfs_fsblock_t target)
3640ecd788a9SDave Chinner {
3641ecd788a9SDave Chinner struct xfs_mount *mp = args->mp;
36428b813568SDave Chinner xfs_agnumber_t minimum_agno;
3643ecd788a9SDave Chinner xfs_agnumber_t start_agno;
36446a2a9d77SDave Chinner uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3645ecd788a9SDave Chinner int error;
3646ecd788a9SDave Chinner
36476de4b1abSDarrick J. Wong ASSERT(args->pag == NULL);
36486de4b1abSDarrick J. Wong
36498b813568SDave Chinner args->agno = NULLAGNUMBER;
36508b813568SDave Chinner args->agbno = NULLAGBLOCK;
3651e6fbb716SDarrick J. Wong
36524dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_first_ag(args);
3653e6fbb716SDarrick J. Wong
36548b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3655ecd788a9SDave Chinner if (error) {
3656ecd788a9SDave Chinner if (error == -ENOSPC)
3657ecd788a9SDave Chinner return 0;
3658ecd788a9SDave Chinner return error;
3659ecd788a9SDave Chinner }
3660ecd788a9SDave Chinner
3661319c9e87SDave Chinner start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3662230e8fe8SDave Chinner error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
36636a2a9d77SDave Chinner XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3664e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3665ecd788a9SDave Chinner }
3666ecd788a9SDave Chinner
3667ecd788a9SDave Chinner /*
366874b9aa63SDave Chinner * Allocate at the exact block target or fail. Caller is expected to hold a
366974b9aa63SDave Chinner * perag reference in args->pag.
36705f36b2ceSDave Chinner */
36715f36b2ceSDave Chinner int
xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)36725f36b2ceSDave Chinner xfs_alloc_vextent_exact_bno(
36735f36b2ceSDave Chinner struct xfs_alloc_arg *args,
36745f36b2ceSDave Chinner xfs_fsblock_t target)
36755f36b2ceSDave Chinner {
36765f36b2ceSDave Chinner struct xfs_mount *mp = args->mp;
36778b813568SDave Chinner xfs_agnumber_t minimum_agno;
36785f36b2ceSDave Chinner int error;
36795f36b2ceSDave Chinner
36806de4b1abSDarrick J. Wong ASSERT(args->pag != NULL);
36816de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
36826de4b1abSDarrick J. Wong
36838b813568SDave Chinner args->agno = XFS_FSB_TO_AGNO(mp, target);
36848b813568SDave Chinner args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3685e6fbb716SDarrick J. Wong
36864dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_exact_bno(args);
3687e6fbb716SDarrick J. Wong
36888b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
36895f36b2ceSDave Chinner if (error) {
36905f36b2ceSDave Chinner if (error == -ENOSPC)
36915f36b2ceSDave Chinner return 0;
36925f36b2ceSDave Chinner return error;
36935f36b2ceSDave Chinner }
36945f36b2ceSDave Chinner
369500dcd17cSDave Chinner error = xfs_alloc_vextent_prepare_ag(args, 0);
369674b9aa63SDave Chinner if (!error && args->agbp)
3697230e8fe8SDave Chinner error = xfs_alloc_ag_vextent_exact(args);
36985f36b2ceSDave Chinner
3699e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
37005f36b2ceSDave Chinner }
37015f36b2ceSDave Chinner
37025f36b2ceSDave Chinner /*
3703db4710fdSDave Chinner * Allocate an extent as close to the target as possible. If there are not
3704db4710fdSDave Chinner * viable candidates in the AG, then fail the allocation.
370574b9aa63SDave Chinner *
370674b9aa63SDave Chinner * Caller may or may not have a per-ag reference in args->pag.
3707ecd788a9SDave Chinner */
3708ecd788a9SDave Chinner int
xfs_alloc_vextent_near_bno(struct xfs_alloc_arg * args,xfs_fsblock_t target)3709db4710fdSDave Chinner xfs_alloc_vextent_near_bno(
3710db4710fdSDave Chinner struct xfs_alloc_arg *args,
3711db4710fdSDave Chinner xfs_fsblock_t target)
3712ecd788a9SDave Chinner {
3713db4710fdSDave Chinner struct xfs_mount *mp = args->mp;
37148b813568SDave Chinner xfs_agnumber_t minimum_agno;
3715e4d17426SDave Chinner bool needs_perag = args->pag == NULL;
37166a2a9d77SDave Chinner uint32_t alloc_flags = 0;
371774c36a86SDave Chinner int error;
3718ecd788a9SDave Chinner
37196de4b1abSDarrick J. Wong if (!needs_perag)
37206de4b1abSDarrick J. Wong ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
37216de4b1abSDarrick J. Wong
37228b813568SDave Chinner args->agno = XFS_FSB_TO_AGNO(mp, target);
37238b813568SDave Chinner args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3724e6fbb716SDarrick J. Wong
37254dfb02d5SDarrick J. Wong trace_xfs_alloc_vextent_near_bno(args);
3726e6fbb716SDarrick J. Wong
37278b813568SDave Chinner error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3728db4710fdSDave Chinner if (error) {
3729db4710fdSDave Chinner if (error == -ENOSPC)
3730db4710fdSDave Chinner return 0;
373174c36a86SDave Chinner return error;
373230f712c9SDave Chinner }
373330f712c9SDave Chinner
3734e4d17426SDave Chinner if (needs_perag)
37353432ef61SDave Chinner args->pag = xfs_perag_grab(mp, args->agno);
3736e4d17426SDave Chinner
37376a2a9d77SDave Chinner error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
373874b9aa63SDave Chinner if (!error && args->agbp)
37396a2a9d77SDave Chinner error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3740db4710fdSDave Chinner
3741e4d17426SDave Chinner return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3742db4710fdSDave Chinner }
3743db4710fdSDave Chinner
37444d89e20bSDave Chinner /* Ensure that the freelist is at full capacity. */
37454d89e20bSDave Chinner int
xfs_free_extent_fix_freelist(struct xfs_trans * tp,struct xfs_perag * pag,struct xfs_buf ** agbp)37464d89e20bSDave Chinner xfs_free_extent_fix_freelist(
37474d89e20bSDave Chinner struct xfs_trans *tp,
374845d06621SDave Chinner struct xfs_perag *pag,
37494d89e20bSDave Chinner struct xfs_buf **agbp)
375030f712c9SDave Chinner {
37514d89e20bSDave Chinner struct xfs_alloc_arg args;
375230f712c9SDave Chinner int error;
375330f712c9SDave Chinner
37544d89e20bSDave Chinner memset(&args, 0, sizeof(struct xfs_alloc_arg));
375530f712c9SDave Chinner args.tp = tp;
375630f712c9SDave Chinner args.mp = tp->t_mountp;
375745d06621SDave Chinner args.agno = pag->pag_agno;
375845d06621SDave Chinner args.pag = pag;
375930f712c9SDave Chinner
376030f712c9SDave Chinner /*
376130f712c9SDave Chinner * validate that the block number is legal - the enables us to detect
376230f712c9SDave Chinner * and handle a silent filesystem corruption rather than crashing.
376330f712c9SDave Chinner */
376430f712c9SDave Chinner if (args.agno >= args.mp->m_sb.sb_agcount)
37652451337dSDave Chinner return -EFSCORRUPTED;
376630f712c9SDave Chinner
376730f712c9SDave Chinner error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
376830f712c9SDave Chinner if (error)
376945d06621SDave Chinner return error;
377030f712c9SDave Chinner
37714d89e20bSDave Chinner *agbp = args.agbp;
377245d06621SDave Chinner return 0;
377330f712c9SDave Chinner }
377430f712c9SDave Chinner
37754d89e20bSDave Chinner /*
37764d89e20bSDave Chinner * Free an extent.
37774d89e20bSDave Chinner * Just break up the extent address and hand off to xfs_free_ag_extent
37784d89e20bSDave Chinner * after fixing up the freelist.
37794d89e20bSDave Chinner */
378066e3237eSDarrick J. Wong int
__xfs_free_extent(struct xfs_trans * tp,struct xfs_perag * pag,xfs_agblock_t agbno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)3781fcb762f5SBrian Foster __xfs_free_extent(
378266e3237eSDarrick J. Wong struct xfs_trans *tp,
3783b2ccab31SDarrick J. Wong struct xfs_perag *pag,
3784b2ccab31SDarrick J. Wong xfs_agblock_t agbno,
378566e3237eSDarrick J. Wong xfs_extlen_t len,
378666e3237eSDarrick J. Wong const struct xfs_owner_info *oinfo,
378766e3237eSDarrick J. Wong enum xfs_ag_resv_type type,
3788fcb762f5SBrian Foster bool skip_discard)
37894d89e20bSDave Chinner {
37904d89e20bSDave Chinner struct xfs_mount *mp = tp->t_mountp;
37914d89e20bSDave Chinner struct xfs_buf *agbp;
37929798f615SChristoph Hellwig struct xfs_agf *agf;
37934d89e20bSDave Chinner int error;
3794fcb762f5SBrian Foster unsigned int busy_flags = 0;
37954d89e20bSDave Chinner
37964d89e20bSDave Chinner ASSERT(len != 0);
37970ab32086SBrian Foster ASSERT(type != XFS_AG_RESV_AGFL);
37984d89e20bSDave Chinner
3799ba9e7802SDarrick J. Wong if (XFS_TEST_ERROR(false, mp,
38009e24cfd0SDarrick J. Wong XFS_ERRTAG_FREE_EXTENT))
3801ba9e7802SDarrick J. Wong return -EIO;
3802ba9e7802SDarrick J. Wong
380345d06621SDave Chinner error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
38044d89e20bSDave Chinner if (error)
3805b2ccab31SDarrick J. Wong return error;
38069798f615SChristoph Hellwig agf = agbp->b_addr;
38074d89e20bSDave Chinner
3808f9e03706SDarrick J. Wong if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3809f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
381045d06621SDave Chinner goto err_release;
3811f9e03706SDarrick J. Wong }
38124d89e20bSDave Chinner
38134d89e20bSDave Chinner /* validate the extent size is legal now we have the agf locked */
38149798f615SChristoph Hellwig if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3815f9e03706SDarrick J. Wong error = -EFSCORRUPTED;
381645d06621SDave Chinner goto err_release;
3817f9e03706SDarrick J. Wong }
38184d89e20bSDave Chinner
3819b2ccab31SDarrick J. Wong error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3820b2ccab31SDarrick J. Wong type);
38214d89e20bSDave Chinner if (error)
382245d06621SDave Chinner goto err_release;
38234d89e20bSDave Chinner
3824fcb762f5SBrian Foster if (skip_discard)
3825fcb762f5SBrian Foster busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
382645d06621SDave Chinner xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
38274d89e20bSDave Chinner return 0;
38284d89e20bSDave Chinner
382945d06621SDave Chinner err_release:
38304d89e20bSDave Chinner xfs_trans_brelse(tp, agbp);
383130f712c9SDave Chinner return error;
383230f712c9SDave Chinner }
38332d520bfaSDarrick J. Wong
38342d520bfaSDarrick J. Wong struct xfs_alloc_query_range_info {
38352d520bfaSDarrick J. Wong xfs_alloc_query_range_fn fn;
38362d520bfaSDarrick J. Wong void *priv;
38372d520bfaSDarrick J. Wong };
38382d520bfaSDarrick J. Wong
38392d520bfaSDarrick J. Wong /* Format btree record and pass to our callback. */
38402d520bfaSDarrick J. Wong STATIC int
xfs_alloc_query_range_helper(struct xfs_btree_cur * cur,const union xfs_btree_rec * rec,void * priv)38412d520bfaSDarrick J. Wong xfs_alloc_query_range_helper(
38422d520bfaSDarrick J. Wong struct xfs_btree_cur *cur,
3843159eb69dSDarrick J. Wong const union xfs_btree_rec *rec,
38442d520bfaSDarrick J. Wong void *priv)
38452d520bfaSDarrick J. Wong {
38462d520bfaSDarrick J. Wong struct xfs_alloc_query_range_info *query = priv;
38472d520bfaSDarrick J. Wong struct xfs_alloc_rec_incore irec;
3848ee12eaaaSDarrick J. Wong xfs_failaddr_t fa;
38492d520bfaSDarrick J. Wong
385035e3b9a1SDarrick J. Wong xfs_alloc_btrec_to_irec(rec, &irec);
3851ee12eaaaSDarrick J. Wong fa = xfs_alloc_check_irec(cur, &irec);
3852ee12eaaaSDarrick J. Wong if (fa)
3853ee12eaaaSDarrick J. Wong return xfs_alloc_complain_bad_rec(cur, fa, &irec);
385435e3b9a1SDarrick J. Wong
38552d520bfaSDarrick J. Wong return query->fn(cur, &irec, query->priv);
38562d520bfaSDarrick J. Wong }
38572d520bfaSDarrick J. Wong
38582d520bfaSDarrick J. Wong /* Find all free space within a given range of blocks. */
38592d520bfaSDarrick J. Wong int
xfs_alloc_query_range(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * low_rec,const struct xfs_alloc_rec_incore * high_rec,xfs_alloc_query_range_fn fn,void * priv)38602d520bfaSDarrick J. Wong xfs_alloc_query_range(
38612d520bfaSDarrick J. Wong struct xfs_btree_cur *cur,
386204dcb474SDarrick J. Wong const struct xfs_alloc_rec_incore *low_rec,
386304dcb474SDarrick J. Wong const struct xfs_alloc_rec_incore *high_rec,
38642d520bfaSDarrick J. Wong xfs_alloc_query_range_fn fn,
38652d520bfaSDarrick J. Wong void *priv)
38662d520bfaSDarrick J. Wong {
386775dc0345SDarrick J. Wong union xfs_btree_irec low_brec = { .a = *low_rec };
386875dc0345SDarrick J. Wong union xfs_btree_irec high_brec = { .a = *high_rec };
386975dc0345SDarrick J. Wong struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
38702d520bfaSDarrick J. Wong
38712d520bfaSDarrick J. Wong ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
38722d520bfaSDarrick J. Wong return xfs_btree_query_range(cur, &low_brec, &high_brec,
38732d520bfaSDarrick J. Wong xfs_alloc_query_range_helper, &query);
38742d520bfaSDarrick J. Wong }
3875e9a2599aSDarrick J. Wong
3876e9a2599aSDarrick J. Wong /* Find all free space records. */
3877e9a2599aSDarrick J. Wong int
xfs_alloc_query_all(struct xfs_btree_cur * cur,xfs_alloc_query_range_fn fn,void * priv)3878e9a2599aSDarrick J. Wong xfs_alloc_query_all(
3879e9a2599aSDarrick J. Wong struct xfs_btree_cur *cur,
3880e9a2599aSDarrick J. Wong xfs_alloc_query_range_fn fn,
3881e9a2599aSDarrick J. Wong void *priv)
3882e9a2599aSDarrick J. Wong {
3883e9a2599aSDarrick J. Wong struct xfs_alloc_query_range_info query;
3884e9a2599aSDarrick J. Wong
3885e9a2599aSDarrick J. Wong ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3886e9a2599aSDarrick J. Wong query.priv = priv;
3887e9a2599aSDarrick J. Wong query.fn = fn;
3888e9a2599aSDarrick J. Wong return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3889e9a2599aSDarrick J. Wong }
389021ec5416SDarrick J. Wong
38916abc7aefSDarrick J. Wong /*
38926abc7aefSDarrick J. Wong * Scan part of the keyspace of the free space and tell us if the area has no
38936abc7aefSDarrick J. Wong * records, is fully mapped by records, or is partially filled.
38946abc7aefSDarrick J. Wong */
3895ce1d802eSDarrick J. Wong int
xfs_alloc_has_records(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,enum xbtree_recpacking * outcome)38966abc7aefSDarrick J. Wong xfs_alloc_has_records(
3897ce1d802eSDarrick J. Wong struct xfs_btree_cur *cur,
3898ce1d802eSDarrick J. Wong xfs_agblock_t bno,
3899ce1d802eSDarrick J. Wong xfs_extlen_t len,
39006abc7aefSDarrick J. Wong enum xbtree_recpacking *outcome)
3901ce1d802eSDarrick J. Wong {
3902ce1d802eSDarrick J. Wong union xfs_btree_irec low;
3903ce1d802eSDarrick J. Wong union xfs_btree_irec high;
3904ce1d802eSDarrick J. Wong
3905ce1d802eSDarrick J. Wong memset(&low, 0, sizeof(low));
3906ce1d802eSDarrick J. Wong low.a.ar_startblock = bno;
3907ce1d802eSDarrick J. Wong memset(&high, 0xFF, sizeof(high));
3908ce1d802eSDarrick J. Wong high.a.ar_startblock = bno + len - 1;
3909ce1d802eSDarrick J. Wong
39104a200a09SDarrick J. Wong return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3911ce1d802eSDarrick J. Wong }
39129f3a080eSDarrick J. Wong
39139f3a080eSDarrick J. Wong /*
39149f3a080eSDarrick J. Wong * Walk all the blocks in the AGFL. The @walk_fn can return any negative
39155bb46e3eSDarrick J. Wong * error code or XFS_ITER_*.
39169f3a080eSDarrick J. Wong */
39179f3a080eSDarrick J. Wong int
xfs_agfl_walk(struct xfs_mount * mp,struct xfs_agf * agf,struct xfs_buf * agflbp,xfs_agfl_walk_fn walk_fn,void * priv)39189f3a080eSDarrick J. Wong xfs_agfl_walk(
39199f3a080eSDarrick J. Wong struct xfs_mount *mp,
39209f3a080eSDarrick J. Wong struct xfs_agf *agf,
39219f3a080eSDarrick J. Wong struct xfs_buf *agflbp,
39229f3a080eSDarrick J. Wong xfs_agfl_walk_fn walk_fn,
39239f3a080eSDarrick J. Wong void *priv)
39249f3a080eSDarrick J. Wong {
39259f3a080eSDarrick J. Wong __be32 *agfl_bno;
39269f3a080eSDarrick J. Wong unsigned int i;
39279f3a080eSDarrick J. Wong int error;
39289f3a080eSDarrick J. Wong
3929183606d8SChristoph Hellwig agfl_bno = xfs_buf_to_agfl_bno(agflbp);
39309f3a080eSDarrick J. Wong i = be32_to_cpu(agf->agf_flfirst);
39319f3a080eSDarrick J. Wong
39329f3a080eSDarrick J. Wong /* Nothing to walk in an empty AGFL. */
39339f3a080eSDarrick J. Wong if (agf->agf_flcount == cpu_to_be32(0))
39349f3a080eSDarrick J. Wong return 0;
39359f3a080eSDarrick J. Wong
39369f3a080eSDarrick J. Wong /* Otherwise, walk from first to last, wrapping as needed. */
39379f3a080eSDarrick J. Wong for (;;) {
39389f3a080eSDarrick J. Wong error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
39399f3a080eSDarrick J. Wong if (error)
39409f3a080eSDarrick J. Wong return error;
39419f3a080eSDarrick J. Wong if (i == be32_to_cpu(agf->agf_fllast))
39429f3a080eSDarrick J. Wong break;
39439f3a080eSDarrick J. Wong if (++i == xfs_agfl_size(mp))
39449f3a080eSDarrick J. Wong i = 0;
39459f3a080eSDarrick J. Wong }
39469f3a080eSDarrick J. Wong
39479f3a080eSDarrick J. Wong return 0;
39489f3a080eSDarrick J. Wong }
3949c201d9caSDarrick J. Wong
3950c201d9caSDarrick J. Wong int __init
xfs_extfree_intent_init_cache(void)3951c201d9caSDarrick J. Wong xfs_extfree_intent_init_cache(void)
3952c201d9caSDarrick J. Wong {
3953c201d9caSDarrick J. Wong xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3954c201d9caSDarrick J. Wong sizeof(struct xfs_extent_free_item),
3955c201d9caSDarrick J. Wong 0, 0, NULL);
3956c201d9caSDarrick J. Wong
3957c201d9caSDarrick J. Wong return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3958c201d9caSDarrick J. Wong }
3959c201d9caSDarrick J. Wong
3960c201d9caSDarrick J. Wong void
xfs_extfree_intent_destroy_cache(void)3961c201d9caSDarrick J. Wong xfs_extfree_intent_destroy_cache(void)
3962c201d9caSDarrick J. Wong {
3963c201d9caSDarrick J. Wong kmem_cache_destroy(xfs_extfree_item_cache);
3964c201d9caSDarrick J. Wong xfs_extfree_item_cache = NULL;
3965c201d9caSDarrick J. Wong }
3966