xref: /openbmc/linux/fs/xfs/libxfs/xfs_rmap_btree.c (revision 9ec69120)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2035e00acSDarrick J. Wong /*
3035e00acSDarrick J. Wong  * Copyright (c) 2014 Red Hat, Inc.
4035e00acSDarrick J. Wong  * All Rights Reserved.
5035e00acSDarrick J. Wong  */
6035e00acSDarrick J. Wong #include "xfs.h"
7035e00acSDarrick J. Wong #include "xfs_fs.h"
8035e00acSDarrick J. Wong #include "xfs_shared.h"
9035e00acSDarrick J. Wong #include "xfs_format.h"
10035e00acSDarrick J. Wong #include "xfs_log_format.h"
11035e00acSDarrick J. Wong #include "xfs_trans_resv.h"
12035e00acSDarrick J. Wong #include "xfs_mount.h"
13035e00acSDarrick J. Wong #include "xfs_trans.h"
14035e00acSDarrick J. Wong #include "xfs_alloc.h"
15035e00acSDarrick J. Wong #include "xfs_btree.h"
1659d67712SDarrick J. Wong #include "xfs_btree_staging.h"
174b8ed677SDarrick J. Wong #include "xfs_rmap.h"
18035e00acSDarrick J. Wong #include "xfs_rmap_btree.h"
19035e00acSDarrick J. Wong #include "xfs_trace.h"
20035e00acSDarrick J. Wong #include "xfs_error.h"
21035e00acSDarrick J. Wong #include "xfs_extent_busy.h"
229bbafc71SDave Chinner #include "xfs_ag.h"
2384d69619SDarrick J. Wong #include "xfs_ag_resv.h"
24035e00acSDarrick J. Wong 
254b8ed677SDarrick J. Wong /*
264b8ed677SDarrick J. Wong  * Reverse map btree.
274b8ed677SDarrick J. Wong  *
284b8ed677SDarrick J. Wong  * This is a per-ag tree used to track the owner(s) of a given extent. With
294b8ed677SDarrick J. Wong  * reflink it is possible for there to be multiple owners, which is a departure
304b8ed677SDarrick J. Wong  * from classic XFS. Owner records for data extents are inserted when the
314b8ed677SDarrick J. Wong  * extent is mapped and removed when an extent is unmapped.  Owner records for
324b8ed677SDarrick J. Wong  * all other block types (i.e. metadata) are inserted when an extent is
334b8ed677SDarrick J. Wong  * allocated and removed when an extent is freed. There can only be one owner
344b8ed677SDarrick J. Wong  * of a metadata extent, usually an inode or some other metadata structure like
354b8ed677SDarrick J. Wong  * an AG btree.
364b8ed677SDarrick J. Wong  *
374b8ed677SDarrick J. Wong  * The rmap btree is part of the free space management, so blocks for the tree
384b8ed677SDarrick J. Wong  * are sourced from the agfl. Hence we need transaction reservation support for
394b8ed677SDarrick J. Wong  * this tree so that the freelist is always large enough. This also impacts on
404b8ed677SDarrick J. Wong  * the minimum space we need to leave free in the AG.
414b8ed677SDarrick J. Wong  *
424b8ed677SDarrick J. Wong  * The tree is ordered by [ag block, owner, offset]. This is a large key size,
434b8ed677SDarrick J. Wong  * but it is the only way to enforce unique keys when a block can be owned by
444b8ed677SDarrick J. Wong  * multiple files at any offset. There's no need to order/search by extent
454b8ed677SDarrick J. Wong  * size for online updating/management of the tree. It is intended that most
464b8ed677SDarrick J. Wong  * reverse lookups will be to find the owner(s) of a particular block, or to
474b8ed677SDarrick J. Wong  * try to recover tree and file data from corrupt primary metadata.
484b8ed677SDarrick J. Wong  */
494b8ed677SDarrick J. Wong 
50035e00acSDarrick J. Wong static struct xfs_btree_cur *
51035e00acSDarrick J. Wong xfs_rmapbt_dup_cursor(
52035e00acSDarrick J. Wong 	struct xfs_btree_cur	*cur)
53035e00acSDarrick J. Wong {
54035e00acSDarrick J. Wong 	return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
55fa9c3c19SDave Chinner 				cur->bc_ag.agbp, cur->bc_ag.pag);
56035e00acSDarrick J. Wong }
57035e00acSDarrick J. Wong 
584b8ed677SDarrick J. Wong STATIC void
594b8ed677SDarrick J. Wong xfs_rmapbt_set_root(
604b8ed677SDarrick J. Wong 	struct xfs_btree_cur		*cur,
61b5a6e5feSDarrick J. Wong 	const union xfs_btree_ptr	*ptr,
624b8ed677SDarrick J. Wong 	int				inc)
634b8ed677SDarrick J. Wong {
64576af732SDave Chinner 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
659798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
664b8ed677SDarrick J. Wong 	int			btnum = cur->bc_btnum;
674b8ed677SDarrick J. Wong 
684b8ed677SDarrick J. Wong 	ASSERT(ptr->s != 0);
694b8ed677SDarrick J. Wong 
704b8ed677SDarrick J. Wong 	agf->agf_roots[btnum] = ptr->s;
714b8ed677SDarrick J. Wong 	be32_add_cpu(&agf->agf_levels[btnum], inc);
72fa9c3c19SDave Chinner 	cur->bc_ag.pag->pagf_levels[btnum] += inc;
734b8ed677SDarrick J. Wong 
744b8ed677SDarrick J. Wong 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
754b8ed677SDarrick J. Wong }
764b8ed677SDarrick J. Wong 
774b8ed677SDarrick J. Wong STATIC int
784b8ed677SDarrick J. Wong xfs_rmapbt_alloc_block(
794b8ed677SDarrick J. Wong 	struct xfs_btree_cur		*cur,
80deb06b9aSDarrick J. Wong 	const union xfs_btree_ptr	*start,
814b8ed677SDarrick J. Wong 	union xfs_btree_ptr		*new,
824b8ed677SDarrick J. Wong 	int				*stat)
834b8ed677SDarrick J. Wong {
84576af732SDave Chinner 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
859798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
86fa9c3c19SDave Chinner 	struct xfs_perag	*pag = cur->bc_ag.pag;
874b8ed677SDarrick J. Wong 	int			error;
884b8ed677SDarrick J. Wong 	xfs_agblock_t		bno;
894b8ed677SDarrick J. Wong 
904b8ed677SDarrick J. Wong 	/* Allocate the new block from the freelist. If we can't, give up.  */
91576af732SDave Chinner 	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp,
924b8ed677SDarrick J. Wong 				       &bno, 1);
93e157ebdcSCarlos Maiolino 	if (error)
944b8ed677SDarrick J. Wong 		return error;
954b8ed677SDarrick J. Wong 
96fa9c3c19SDave Chinner 	trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1);
974b8ed677SDarrick J. Wong 	if (bno == NULLAGBLOCK) {
984b8ed677SDarrick J. Wong 		*stat = 0;
994b8ed677SDarrick J. Wong 		return 0;
1004b8ed677SDarrick J. Wong 	}
1014b8ed677SDarrick J. Wong 
102fa9c3c19SDave Chinner 	xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
1034b8ed677SDarrick J. Wong 
1044b8ed677SDarrick J. Wong 	new->s = cpu_to_be32(bno);
105f32866fdSDarrick J. Wong 	be32_add_cpu(&agf->agf_rmap_blocks, 1);
106f32866fdSDarrick J. Wong 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
1074b8ed677SDarrick J. Wong 
108fa9c3c19SDave Chinner 	xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
1090ab32086SBrian Foster 
1104b8ed677SDarrick J. Wong 	*stat = 1;
1114b8ed677SDarrick J. Wong 	return 0;
1124b8ed677SDarrick J. Wong }
1134b8ed677SDarrick J. Wong 
1144b8ed677SDarrick J. Wong STATIC int
1154b8ed677SDarrick J. Wong xfs_rmapbt_free_block(
1164b8ed677SDarrick J. Wong 	struct xfs_btree_cur	*cur,
1174b8ed677SDarrick J. Wong 	struct xfs_buf		*bp)
1184b8ed677SDarrick J. Wong {
119576af732SDave Chinner 	struct xfs_buf		*agbp = cur->bc_ag.agbp;
1209798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
121fa9c3c19SDave Chinner 	struct xfs_perag	*pag = cur->bc_ag.pag;
1224b8ed677SDarrick J. Wong 	xfs_agblock_t		bno;
1234b8ed677SDarrick J. Wong 	int			error;
1244b8ed677SDarrick J. Wong 
12504fcad80SDave Chinner 	bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
126fa9c3c19SDave Chinner 	trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
1274b8ed677SDarrick J. Wong 			bno, 1);
128f32866fdSDarrick J. Wong 	be32_add_cpu(&agf->agf_rmap_blocks, -1);
129f32866fdSDarrick J. Wong 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
1304b8ed677SDarrick J. Wong 	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
1314b8ed677SDarrick J. Wong 	if (error)
1324b8ed677SDarrick J. Wong 		return error;
1334b8ed677SDarrick J. Wong 
13445d06621SDave Chinner 	xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
1354b8ed677SDarrick J. Wong 			      XFS_EXTENT_BUSY_SKIP_DISCARD);
1364b8ed677SDarrick J. Wong 
13792a00544SGao Xiang 	xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
1384b8ed677SDarrick J. Wong 	return 0;
1394b8ed677SDarrick J. Wong }
1404b8ed677SDarrick J. Wong 
1414b8ed677SDarrick J. Wong STATIC int
1424b8ed677SDarrick J. Wong xfs_rmapbt_get_minrecs(
1434b8ed677SDarrick J. Wong 	struct xfs_btree_cur	*cur,
1444b8ed677SDarrick J. Wong 	int			level)
1454b8ed677SDarrick J. Wong {
1464b8ed677SDarrick J. Wong 	return cur->bc_mp->m_rmap_mnr[level != 0];
1474b8ed677SDarrick J. Wong }
1484b8ed677SDarrick J. Wong 
1494b8ed677SDarrick J. Wong STATIC int
1504b8ed677SDarrick J. Wong xfs_rmapbt_get_maxrecs(
1514b8ed677SDarrick J. Wong 	struct xfs_btree_cur	*cur,
1524b8ed677SDarrick J. Wong 	int			level)
1534b8ed677SDarrick J. Wong {
1544b8ed677SDarrick J. Wong 	return cur->bc_mp->m_rmap_mxr[level != 0];
1554b8ed677SDarrick J. Wong }
1564b8ed677SDarrick J. Wong 
1574b8ed677SDarrick J. Wong STATIC void
1584b8ed677SDarrick J. Wong xfs_rmapbt_init_key_from_rec(
1594b8ed677SDarrick J. Wong 	union xfs_btree_key		*key,
16023825cd1SDarrick J. Wong 	const union xfs_btree_rec	*rec)
1614b8ed677SDarrick J. Wong {
1624b8ed677SDarrick J. Wong 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
1634b8ed677SDarrick J. Wong 	key->rmap.rm_owner = rec->rmap.rm_owner;
1644b8ed677SDarrick J. Wong 	key->rmap.rm_offset = rec->rmap.rm_offset;
1654b8ed677SDarrick J. Wong }
1664b8ed677SDarrick J. Wong 
167cfed56aeSDarrick J. Wong /*
168cfed56aeSDarrick J. Wong  * The high key for a reverse mapping record can be computed by shifting
169cfed56aeSDarrick J. Wong  * the startblock and offset to the highest value that would still map
170cfed56aeSDarrick J. Wong  * to that record.  In practice this means that we add blockcount-1 to
171cfed56aeSDarrick J. Wong  * the startblock for all records, and if the record is for a data/attr
172cfed56aeSDarrick J. Wong  * fork mapping, we add blockcount-1 to the offset too.
173cfed56aeSDarrick J. Wong  */
174cfed56aeSDarrick J. Wong STATIC void
175cfed56aeSDarrick J. Wong xfs_rmapbt_init_high_key_from_rec(
176cfed56aeSDarrick J. Wong 	union xfs_btree_key		*key,
17723825cd1SDarrick J. Wong 	const union xfs_btree_rec	*rec)
178cfed56aeSDarrick J. Wong {
179c8ce540dSDarrick J. Wong 	uint64_t			off;
180cfed56aeSDarrick J. Wong 	int				adj;
181cfed56aeSDarrick J. Wong 
182cfed56aeSDarrick J. Wong 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
183cfed56aeSDarrick J. Wong 
184cfed56aeSDarrick J. Wong 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
185cfed56aeSDarrick J. Wong 	be32_add_cpu(&key->rmap.rm_startblock, adj);
186cfed56aeSDarrick J. Wong 	key->rmap.rm_owner = rec->rmap.rm_owner;
187cfed56aeSDarrick J. Wong 	key->rmap.rm_offset = rec->rmap.rm_offset;
188cfed56aeSDarrick J. Wong 	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
189cfed56aeSDarrick J. Wong 	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
190cfed56aeSDarrick J. Wong 		return;
191cfed56aeSDarrick J. Wong 	off = be64_to_cpu(key->rmap.rm_offset);
192cfed56aeSDarrick J. Wong 	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
193cfed56aeSDarrick J. Wong 	key->rmap.rm_offset = cpu_to_be64(off);
194cfed56aeSDarrick J. Wong }
195cfed56aeSDarrick J. Wong 
1964b8ed677SDarrick J. Wong STATIC void
1974b8ed677SDarrick J. Wong xfs_rmapbt_init_rec_from_cur(
1984b8ed677SDarrick J. Wong 	struct xfs_btree_cur	*cur,
1994b8ed677SDarrick J. Wong 	union xfs_btree_rec	*rec)
2004b8ed677SDarrick J. Wong {
2014b8ed677SDarrick J. Wong 	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
2024b8ed677SDarrick J. Wong 	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
2034b8ed677SDarrick J. Wong 	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
2044b8ed677SDarrick J. Wong 	rec->rmap.rm_offset = cpu_to_be64(
2054b8ed677SDarrick J. Wong 			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
2064b8ed677SDarrick J. Wong }
2074b8ed677SDarrick J. Wong 
2084b8ed677SDarrick J. Wong STATIC void
2094b8ed677SDarrick J. Wong xfs_rmapbt_init_ptr_from_cur(
2104b8ed677SDarrick J. Wong 	struct xfs_btree_cur	*cur,
2114b8ed677SDarrick J. Wong 	union xfs_btree_ptr	*ptr)
2124b8ed677SDarrick J. Wong {
213576af732SDave Chinner 	struct xfs_agf		*agf = cur->bc_ag.agbp->b_addr;
2144b8ed677SDarrick J. Wong 
215fa9c3c19SDave Chinner 	ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
2164b8ed677SDarrick J. Wong 
2174b8ed677SDarrick J. Wong 	ptr->s = agf->agf_roots[cur->bc_btnum];
2184b8ed677SDarrick J. Wong }
2194b8ed677SDarrick J. Wong 
220c8ce540dSDarrick J. Wong STATIC int64_t
2214b8ed677SDarrick J. Wong xfs_rmapbt_key_diff(
2224b8ed677SDarrick J. Wong 	struct xfs_btree_cur		*cur,
223d29d5577SDarrick J. Wong 	const union xfs_btree_key	*key)
2244b8ed677SDarrick J. Wong {
2254b8ed677SDarrick J. Wong 	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
226d29d5577SDarrick J. Wong 	const struct xfs_rmap_key	*kp = &key->rmap;
2274b8ed677SDarrick J. Wong 	__u64				x, y;
228c8ce540dSDarrick J. Wong 	int64_t				d;
2294b8ed677SDarrick J. Wong 
230c8ce540dSDarrick J. Wong 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
2314b8ed677SDarrick J. Wong 	if (d)
2324b8ed677SDarrick J. Wong 		return d;
2334b8ed677SDarrick J. Wong 
2344b8ed677SDarrick J. Wong 	x = be64_to_cpu(kp->rm_owner);
2354b8ed677SDarrick J. Wong 	y = rec->rm_owner;
2364b8ed677SDarrick J. Wong 	if (x > y)
2374b8ed677SDarrick J. Wong 		return 1;
2384b8ed677SDarrick J. Wong 	else if (y > x)
2394b8ed677SDarrick J. Wong 		return -1;
2404b8ed677SDarrick J. Wong 
241eb840907SDarrick J. Wong 	x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
242eb840907SDarrick J. Wong 	y = rec->rm_offset;
2434b8ed677SDarrick J. Wong 	if (x > y)
2444b8ed677SDarrick J. Wong 		return 1;
2454b8ed677SDarrick J. Wong 	else if (y > x)
2464b8ed677SDarrick J. Wong 		return -1;
2474b8ed677SDarrick J. Wong 	return 0;
2484b8ed677SDarrick J. Wong }
2494b8ed677SDarrick J. Wong 
250c8ce540dSDarrick J. Wong STATIC int64_t
251cfed56aeSDarrick J. Wong xfs_rmapbt_diff_two_keys(
252cfed56aeSDarrick J. Wong 	struct xfs_btree_cur		*cur,
253d29d5577SDarrick J. Wong 	const union xfs_btree_key	*k1,
254d29d5577SDarrick J. Wong 	const union xfs_btree_key	*k2)
255cfed56aeSDarrick J. Wong {
256d29d5577SDarrick J. Wong 	const struct xfs_rmap_key	*kp1 = &k1->rmap;
257d29d5577SDarrick J. Wong 	const struct xfs_rmap_key	*kp2 = &k2->rmap;
258c8ce540dSDarrick J. Wong 	int64_t				d;
259cfed56aeSDarrick J. Wong 	__u64				x, y;
260cfed56aeSDarrick J. Wong 
261c8ce540dSDarrick J. Wong 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
262cfed56aeSDarrick J. Wong 		       be32_to_cpu(kp2->rm_startblock);
263cfed56aeSDarrick J. Wong 	if (d)
264cfed56aeSDarrick J. Wong 		return d;
265cfed56aeSDarrick J. Wong 
266cfed56aeSDarrick J. Wong 	x = be64_to_cpu(kp1->rm_owner);
267cfed56aeSDarrick J. Wong 	y = be64_to_cpu(kp2->rm_owner);
268cfed56aeSDarrick J. Wong 	if (x > y)
269cfed56aeSDarrick J. Wong 		return 1;
270cfed56aeSDarrick J. Wong 	else if (y > x)
271cfed56aeSDarrick J. Wong 		return -1;
272cfed56aeSDarrick J. Wong 
273eb840907SDarrick J. Wong 	x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
274eb840907SDarrick J. Wong 	y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
275cfed56aeSDarrick J. Wong 	if (x > y)
276cfed56aeSDarrick J. Wong 		return 1;
277cfed56aeSDarrick J. Wong 	else if (y > x)
278cfed56aeSDarrick J. Wong 		return -1;
279cfed56aeSDarrick J. Wong 	return 0;
280cfed56aeSDarrick J. Wong }
281cfed56aeSDarrick J. Wong 
282a6a781a5SDarrick J. Wong static xfs_failaddr_t
283035e00acSDarrick J. Wong xfs_rmapbt_verify(
284035e00acSDarrick J. Wong 	struct xfs_buf		*bp)
285035e00acSDarrick J. Wong {
286dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
287035e00acSDarrick J. Wong 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
288035e00acSDarrick J. Wong 	struct xfs_perag	*pag = bp->b_pag;
289a6a781a5SDarrick J. Wong 	xfs_failaddr_t		fa;
290035e00acSDarrick J. Wong 	unsigned int		level;
291035e00acSDarrick J. Wong 
292035e00acSDarrick J. Wong 	/*
293035e00acSDarrick J. Wong 	 * magic number and level verification
294035e00acSDarrick J. Wong 	 *
295035e00acSDarrick J. Wong 	 * During growfs operations, we can't verify the exact level or owner as
296035e00acSDarrick J. Wong 	 * the perag is not fully initialised and hence not attached to the
297035e00acSDarrick J. Wong 	 * buffer.  In this case, check against the maximum tree depth.
298035e00acSDarrick J. Wong 	 *
299035e00acSDarrick J. Wong 	 * Similarly, during log recovery we will have a perag structure
300035e00acSDarrick J. Wong 	 * attached, but the agf information will not yet have been initialised
301035e00acSDarrick J. Wong 	 * from the on disk AGF. Again, we can only check against maximum limits
302035e00acSDarrick J. Wong 	 * in this case.
303035e00acSDarrick J. Wong 	 */
30439708c20SBrian Foster 	if (!xfs_verify_magic(bp, block->bb_magic))
305a6a781a5SDarrick J. Wong 		return __this_address;
306035e00acSDarrick J. Wong 
30738c26bfdSDave Chinner 	if (!xfs_has_rmapbt(mp))
308a6a781a5SDarrick J. Wong 		return __this_address;
309a6a781a5SDarrick J. Wong 	fa = xfs_btree_sblock_v5hdr_verify(bp);
310a6a781a5SDarrick J. Wong 	if (fa)
311a6a781a5SDarrick J. Wong 		return fa;
312035e00acSDarrick J. Wong 
313035e00acSDarrick J. Wong 	level = be16_to_cpu(block->bb_level);
314035e00acSDarrick J. Wong 	if (pag && pag->pagf_init) {
315035e00acSDarrick J. Wong 		if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
316a6a781a5SDarrick J. Wong 			return __this_address;
317035e00acSDarrick J. Wong 	} else if (level >= mp->m_rmap_maxlevels)
318a6a781a5SDarrick J. Wong 		return __this_address;
319035e00acSDarrick J. Wong 
320035e00acSDarrick J. Wong 	return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
321035e00acSDarrick J. Wong }
322035e00acSDarrick J. Wong 
323035e00acSDarrick J. Wong static void
324035e00acSDarrick J. Wong xfs_rmapbt_read_verify(
325035e00acSDarrick J. Wong 	struct xfs_buf	*bp)
326035e00acSDarrick J. Wong {
327bc1a09b8SDarrick J. Wong 	xfs_failaddr_t	fa;
328bc1a09b8SDarrick J. Wong 
329035e00acSDarrick J. Wong 	if (!xfs_btree_sblock_verify_crc(bp))
330bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
331bc1a09b8SDarrick J. Wong 	else {
332bc1a09b8SDarrick J. Wong 		fa = xfs_rmapbt_verify(bp);
333bc1a09b8SDarrick J. Wong 		if (fa)
334bc1a09b8SDarrick J. Wong 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
335bc1a09b8SDarrick J. Wong 	}
336035e00acSDarrick J. Wong 
33731ca03c9SDarrick J. Wong 	if (bp->b_error)
338035e00acSDarrick J. Wong 		trace_xfs_btree_corrupt(bp, _RET_IP_);
339035e00acSDarrick J. Wong }
340035e00acSDarrick J. Wong 
341035e00acSDarrick J. Wong static void
342035e00acSDarrick J. Wong xfs_rmapbt_write_verify(
343035e00acSDarrick J. Wong 	struct xfs_buf	*bp)
344035e00acSDarrick J. Wong {
345bc1a09b8SDarrick J. Wong 	xfs_failaddr_t	fa;
346bc1a09b8SDarrick J. Wong 
347bc1a09b8SDarrick J. Wong 	fa = xfs_rmapbt_verify(bp);
348bc1a09b8SDarrick J. Wong 	if (fa) {
349035e00acSDarrick J. Wong 		trace_xfs_btree_corrupt(bp, _RET_IP_);
350bc1a09b8SDarrick J. Wong 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
351035e00acSDarrick J. Wong 		return;
352035e00acSDarrick J. Wong 	}
353035e00acSDarrick J. Wong 	xfs_btree_sblock_calc_crc(bp);
354035e00acSDarrick J. Wong 
355035e00acSDarrick J. Wong }
356035e00acSDarrick J. Wong 
357035e00acSDarrick J. Wong const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
358035e00acSDarrick J. Wong 	.name			= "xfs_rmapbt",
35939708c20SBrian Foster 	.magic			= { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
360035e00acSDarrick J. Wong 	.verify_read		= xfs_rmapbt_read_verify,
361035e00acSDarrick J. Wong 	.verify_write		= xfs_rmapbt_write_verify,
362b5572597SDarrick J. Wong 	.verify_struct		= xfs_rmapbt_verify,
363035e00acSDarrick J. Wong };
364035e00acSDarrick J. Wong 
3654b8ed677SDarrick J. Wong STATIC int
3664b8ed677SDarrick J. Wong xfs_rmapbt_keys_inorder(
3674b8ed677SDarrick J. Wong 	struct xfs_btree_cur		*cur,
3688e38dc88SDarrick J. Wong 	const union xfs_btree_key	*k1,
3698e38dc88SDarrick J. Wong 	const union xfs_btree_key	*k2)
3704b8ed677SDarrick J. Wong {
371c8ce540dSDarrick J. Wong 	uint32_t		x;
372c8ce540dSDarrick J. Wong 	uint32_t		y;
373c8ce540dSDarrick J. Wong 	uint64_t		a;
374c8ce540dSDarrick J. Wong 	uint64_t		b;
3754b8ed677SDarrick J. Wong 
3764b8ed677SDarrick J. Wong 	x = be32_to_cpu(k1->rmap.rm_startblock);
3774b8ed677SDarrick J. Wong 	y = be32_to_cpu(k2->rmap.rm_startblock);
3784b8ed677SDarrick J. Wong 	if (x < y)
3794b8ed677SDarrick J. Wong 		return 1;
3804b8ed677SDarrick J. Wong 	else if (x > y)
3814b8ed677SDarrick J. Wong 		return 0;
3824b8ed677SDarrick J. Wong 	a = be64_to_cpu(k1->rmap.rm_owner);
3834b8ed677SDarrick J. Wong 	b = be64_to_cpu(k2->rmap.rm_owner);
3844b8ed677SDarrick J. Wong 	if (a < b)
3854b8ed677SDarrick J. Wong 		return 1;
3864b8ed677SDarrick J. Wong 	else if (a > b)
3874b8ed677SDarrick J. Wong 		return 0;
388eb840907SDarrick J. Wong 	a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
389eb840907SDarrick J. Wong 	b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
3904b8ed677SDarrick J. Wong 	if (a <= b)
3914b8ed677SDarrick J. Wong 		return 1;
3924b8ed677SDarrick J. Wong 	return 0;
3934b8ed677SDarrick J. Wong }
3944b8ed677SDarrick J. Wong 
3954b8ed677SDarrick J. Wong STATIC int
3964b8ed677SDarrick J. Wong xfs_rmapbt_recs_inorder(
3974b8ed677SDarrick J. Wong 	struct xfs_btree_cur		*cur,
3988e38dc88SDarrick J. Wong 	const union xfs_btree_rec	*r1,
3998e38dc88SDarrick J. Wong 	const union xfs_btree_rec	*r2)
4004b8ed677SDarrick J. Wong {
401c8ce540dSDarrick J. Wong 	uint32_t		x;
402c8ce540dSDarrick J. Wong 	uint32_t		y;
403c8ce540dSDarrick J. Wong 	uint64_t		a;
404c8ce540dSDarrick J. Wong 	uint64_t		b;
4054b8ed677SDarrick J. Wong 
4064b8ed677SDarrick J. Wong 	x = be32_to_cpu(r1->rmap.rm_startblock);
4074b8ed677SDarrick J. Wong 	y = be32_to_cpu(r2->rmap.rm_startblock);
4084b8ed677SDarrick J. Wong 	if (x < y)
4094b8ed677SDarrick J. Wong 		return 1;
4104b8ed677SDarrick J. Wong 	else if (x > y)
4114b8ed677SDarrick J. Wong 		return 0;
4124b8ed677SDarrick J. Wong 	a = be64_to_cpu(r1->rmap.rm_owner);
4134b8ed677SDarrick J. Wong 	b = be64_to_cpu(r2->rmap.rm_owner);
4144b8ed677SDarrick J. Wong 	if (a < b)
4154b8ed677SDarrick J. Wong 		return 1;
4164b8ed677SDarrick J. Wong 	else if (a > b)
4174b8ed677SDarrick J. Wong 		return 0;
418eb840907SDarrick J. Wong 	a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
419eb840907SDarrick J. Wong 	b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
4204b8ed677SDarrick J. Wong 	if (a <= b)
4214b8ed677SDarrick J. Wong 		return 1;
4224b8ed677SDarrick J. Wong 	return 0;
4234b8ed677SDarrick J. Wong }
4244b8ed677SDarrick J. Wong 
425035e00acSDarrick J. Wong static const struct xfs_btree_ops xfs_rmapbt_ops = {
426035e00acSDarrick J. Wong 	.rec_len		= sizeof(struct xfs_rmap_rec),
427035e00acSDarrick J. Wong 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
428035e00acSDarrick J. Wong 
429035e00acSDarrick J. Wong 	.dup_cursor		= xfs_rmapbt_dup_cursor,
4304b8ed677SDarrick J. Wong 	.set_root		= xfs_rmapbt_set_root,
4314b8ed677SDarrick J. Wong 	.alloc_block		= xfs_rmapbt_alloc_block,
4324b8ed677SDarrick J. Wong 	.free_block		= xfs_rmapbt_free_block,
4334b8ed677SDarrick J. Wong 	.get_minrecs		= xfs_rmapbt_get_minrecs,
4344b8ed677SDarrick J. Wong 	.get_maxrecs		= xfs_rmapbt_get_maxrecs,
4354b8ed677SDarrick J. Wong 	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
436cfed56aeSDarrick J. Wong 	.init_high_key_from_rec	= xfs_rmapbt_init_high_key_from_rec,
4374b8ed677SDarrick J. Wong 	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
4384b8ed677SDarrick J. Wong 	.init_ptr_from_cur	= xfs_rmapbt_init_ptr_from_cur,
4394b8ed677SDarrick J. Wong 	.key_diff		= xfs_rmapbt_key_diff,
440035e00acSDarrick J. Wong 	.buf_ops		= &xfs_rmapbt_buf_ops,
441cfed56aeSDarrick J. Wong 	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
4424b8ed677SDarrick J. Wong 	.keys_inorder		= xfs_rmapbt_keys_inorder,
4434b8ed677SDarrick J. Wong 	.recs_inorder		= xfs_rmapbt_recs_inorder,
444035e00acSDarrick J. Wong };
445035e00acSDarrick J. Wong 
44659d67712SDarrick J. Wong static struct xfs_btree_cur *
44759d67712SDarrick J. Wong xfs_rmapbt_init_common(
44859d67712SDarrick J. Wong 	struct xfs_mount	*mp,
44959d67712SDarrick J. Wong 	struct xfs_trans	*tp,
450be9fb17dSDave Chinner 	struct xfs_perag	*pag)
45159d67712SDarrick J. Wong {
45259d67712SDarrick J. Wong 	struct xfs_btree_cur	*cur;
45359d67712SDarrick J. Wong 
45459d67712SDarrick J. Wong 	/* Overlapping btree; 2 keys per pointer. */
455c940a0c5SDarrick J. Wong 	cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
456c940a0c5SDarrick J. Wong 			mp->m_rmap_maxlevels);
45759d67712SDarrick J. Wong 	cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
45859d67712SDarrick J. Wong 	cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
45959d67712SDarrick J. Wong 	cur->bc_ops = &xfs_rmapbt_ops;
460fa9c3c19SDave Chinner 
461be9fb17dSDave Chinner 	/* take a reference for the cursor */
462be9fb17dSDave Chinner 	atomic_inc(&pag->pag_ref);
463be9fb17dSDave Chinner 	cur->bc_ag.pag = pag;
46459d67712SDarrick J. Wong 
46559d67712SDarrick J. Wong 	return cur;
46659d67712SDarrick J. Wong }
46759d67712SDarrick J. Wong 
46859d67712SDarrick J. Wong /* Create a new reverse mapping btree cursor. */
469035e00acSDarrick J. Wong struct xfs_btree_cur *
470035e00acSDarrick J. Wong xfs_rmapbt_init_cursor(
471035e00acSDarrick J. Wong 	struct xfs_mount	*mp,
472035e00acSDarrick J. Wong 	struct xfs_trans	*tp,
473035e00acSDarrick J. Wong 	struct xfs_buf		*agbp,
474be9fb17dSDave Chinner 	struct xfs_perag	*pag)
475035e00acSDarrick J. Wong {
4769798f615SChristoph Hellwig 	struct xfs_agf		*agf = agbp->b_addr;
477035e00acSDarrick J. Wong 	struct xfs_btree_cur	*cur;
478035e00acSDarrick J. Wong 
479fa9c3c19SDave Chinner 	cur = xfs_rmapbt_init_common(mp, tp, pag);
480035e00acSDarrick J. Wong 	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
481576af732SDave Chinner 	cur->bc_ag.agbp = agbp;
482035e00acSDarrick J. Wong 	return cur;
483035e00acSDarrick J. Wong }
484035e00acSDarrick J. Wong 
48559d67712SDarrick J. Wong /* Create a new reverse mapping btree cursor with a fake root for staging. */
48659d67712SDarrick J. Wong struct xfs_btree_cur *
48759d67712SDarrick J. Wong xfs_rmapbt_stage_cursor(
48859d67712SDarrick J. Wong 	struct xfs_mount	*mp,
48959d67712SDarrick J. Wong 	struct xbtree_afakeroot	*afake,
490fa9c3c19SDave Chinner 	struct xfs_perag	*pag)
49159d67712SDarrick J. Wong {
49259d67712SDarrick J. Wong 	struct xfs_btree_cur	*cur;
49359d67712SDarrick J. Wong 
494fa9c3c19SDave Chinner 	cur = xfs_rmapbt_init_common(mp, NULL, pag);
49559d67712SDarrick J. Wong 	xfs_btree_stage_afakeroot(cur, afake);
49659d67712SDarrick J. Wong 	return cur;
49759d67712SDarrick J. Wong }
49859d67712SDarrick J. Wong 
49959d67712SDarrick J. Wong /*
50059d67712SDarrick J. Wong  * Install a new reverse mapping btree root.  Caller is responsible for
50159d67712SDarrick J. Wong  * invalidating and freeing the old btree blocks.
50259d67712SDarrick J. Wong  */
50359d67712SDarrick J. Wong void
50459d67712SDarrick J. Wong xfs_rmapbt_commit_staged_btree(
50559d67712SDarrick J. Wong 	struct xfs_btree_cur	*cur,
50659d67712SDarrick J. Wong 	struct xfs_trans	*tp,
50759d67712SDarrick J. Wong 	struct xfs_buf		*agbp)
50859d67712SDarrick J. Wong {
50959d67712SDarrick J. Wong 	struct xfs_agf		*agf = agbp->b_addr;
51059d67712SDarrick J. Wong 	struct xbtree_afakeroot	*afake = cur->bc_ag.afake;
51159d67712SDarrick J. Wong 
51259d67712SDarrick J. Wong 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
51359d67712SDarrick J. Wong 
51459d67712SDarrick J. Wong 	agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
51559d67712SDarrick J. Wong 	agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
51659d67712SDarrick J. Wong 	agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
51759d67712SDarrick J. Wong 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
51859d67712SDarrick J. Wong 				    XFS_AGF_RMAP_BLOCKS);
51959d67712SDarrick J. Wong 	xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
52059d67712SDarrick J. Wong }
52159d67712SDarrick J. Wong 
522035e00acSDarrick J. Wong /*
523035e00acSDarrick J. Wong  * Calculate number of records in an rmap btree block.
524035e00acSDarrick J. Wong  */
525035e00acSDarrick J. Wong int
526035e00acSDarrick J. Wong xfs_rmapbt_maxrecs(
527035e00acSDarrick J. Wong 	int			blocklen,
528035e00acSDarrick J. Wong 	int			leaf)
529035e00acSDarrick J. Wong {
530035e00acSDarrick J. Wong 	blocklen -= XFS_RMAP_BLOCK_LEN;
531035e00acSDarrick J. Wong 
532035e00acSDarrick J. Wong 	if (leaf)
533035e00acSDarrick J. Wong 		return blocklen / sizeof(struct xfs_rmap_rec);
534035e00acSDarrick J. Wong 	return blocklen /
535cfed56aeSDarrick J. Wong 		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
536035e00acSDarrick J. Wong }
537035e00acSDarrick J. Wong 
538035e00acSDarrick J. Wong /* Compute the maximum height of an rmap btree. */
539035e00acSDarrick J. Wong void
540035e00acSDarrick J. Wong xfs_rmapbt_compute_maxlevels(
541035e00acSDarrick J. Wong 	struct xfs_mount		*mp)
542035e00acSDarrick J. Wong {
543*9ec69120SDarrick J. Wong 	if (!xfs_has_rmapbt(mp)) {
544*9ec69120SDarrick J. Wong 		mp->m_rmap_maxlevels = 0;
545*9ec69120SDarrick J. Wong 		return;
546*9ec69120SDarrick J. Wong 	}
547*9ec69120SDarrick J. Wong 
548*9ec69120SDarrick J. Wong 	if (xfs_has_reflink(mp)) {
54946eeb521SDarrick J. Wong 		/*
550*9ec69120SDarrick J. Wong 		 * Compute the asymptotic maxlevels for an rmap btree on a
551*9ec69120SDarrick J. Wong 		 * filesystem that supports reflink.
55246eeb521SDarrick J. Wong 		 *
553*9ec69120SDarrick J. Wong 		 * On a reflink filesystem, each AG block can have up to 2^32
554*9ec69120SDarrick J. Wong 		 * (per the refcount record format) owners, which means that
555*9ec69120SDarrick J. Wong 		 * theoretically we could face up to 2^64 rmap records.
556*9ec69120SDarrick J. Wong 		 * However, we're likely to run out of blocks in the AG long
557*9ec69120SDarrick J. Wong 		 * before that happens, which means that we must compute the
558*9ec69120SDarrick J. Wong 		 * max height based on what the btree will look like if it
559*9ec69120SDarrick J. Wong 		 * consumes almost all the blocks in the AG due to maximal
560*9ec69120SDarrick J. Wong 		 * sharing factor.
56146eeb521SDarrick J. Wong 		 */
562*9ec69120SDarrick J. Wong 		mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
563*9ec69120SDarrick J. Wong 				mp->m_sb.sb_agblocks);
564*9ec69120SDarrick J. Wong 	} else {
565*9ec69120SDarrick J. Wong 		/*
566*9ec69120SDarrick J. Wong 		 * If there's no block sharing, compute the maximum rmapbt
567*9ec69120SDarrick J. Wong 		 * height assuming one rmap record per AG block.
568*9ec69120SDarrick J. Wong 		 */
569a1f69417SEric Sandeen 		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
570035e00acSDarrick J. Wong 				mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
571035e00acSDarrick J. Wong 	}
572*9ec69120SDarrick J. Wong }
57384d69619SDarrick J. Wong 
57484d69619SDarrick J. Wong /* Calculate the refcount btree size for some records. */
57584d69619SDarrick J. Wong xfs_extlen_t
57684d69619SDarrick J. Wong xfs_rmapbt_calc_size(
57784d69619SDarrick J. Wong 	struct xfs_mount	*mp,
57884d69619SDarrick J. Wong 	unsigned long long	len)
57984d69619SDarrick J. Wong {
580a1f69417SEric Sandeen 	return xfs_btree_calc_size(mp->m_rmap_mnr, len);
58184d69619SDarrick J. Wong }
58284d69619SDarrick J. Wong 
58384d69619SDarrick J. Wong /*
58484d69619SDarrick J. Wong  * Calculate the maximum refcount btree size.
58584d69619SDarrick J. Wong  */
58684d69619SDarrick J. Wong xfs_extlen_t
58784d69619SDarrick J. Wong xfs_rmapbt_max_size(
58820e73b00SDarrick J. Wong 	struct xfs_mount	*mp,
58920e73b00SDarrick J. Wong 	xfs_agblock_t		agblocks)
59084d69619SDarrick J. Wong {
59184d69619SDarrick J. Wong 	/* Bail out if we're uninitialized, which can happen in mkfs. */
59284d69619SDarrick J. Wong 	if (mp->m_rmap_mxr[0] == 0)
59384d69619SDarrick J. Wong 		return 0;
59484d69619SDarrick J. Wong 
59520e73b00SDarrick J. Wong 	return xfs_rmapbt_calc_size(mp, agblocks);
59684d69619SDarrick J. Wong }
59784d69619SDarrick J. Wong 
59884d69619SDarrick J. Wong /*
59984d69619SDarrick J. Wong  * Figure out how many blocks to reserve and how many are used by this btree.
60084d69619SDarrick J. Wong  */
60184d69619SDarrick J. Wong int
60284d69619SDarrick J. Wong xfs_rmapbt_calc_reserves(
60384d69619SDarrick J. Wong 	struct xfs_mount	*mp,
604ebcbef3aSDarrick J. Wong 	struct xfs_trans	*tp,
60530933120SDave Chinner 	struct xfs_perag	*pag,
60684d69619SDarrick J. Wong 	xfs_extlen_t		*ask,
60784d69619SDarrick J. Wong 	xfs_extlen_t		*used)
60884d69619SDarrick J. Wong {
60984d69619SDarrick J. Wong 	struct xfs_buf		*agbp;
61084d69619SDarrick J. Wong 	struct xfs_agf		*agf;
61120e73b00SDarrick J. Wong 	xfs_agblock_t		agblocks;
61284d69619SDarrick J. Wong 	xfs_extlen_t		tree_len;
61384d69619SDarrick J. Wong 	int			error;
61484d69619SDarrick J. Wong 
61538c26bfdSDave Chinner 	if (!xfs_has_rmapbt(mp))
61684d69619SDarrick J. Wong 		return 0;
61784d69619SDarrick J. Wong 
61830933120SDave Chinner 	error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp);
61984d69619SDarrick J. Wong 	if (error)
62084d69619SDarrick J. Wong 		return error;
62184d69619SDarrick J. Wong 
6229798f615SChristoph Hellwig 	agf = agbp->b_addr;
62320e73b00SDarrick J. Wong 	agblocks = be32_to_cpu(agf->agf_length);
62484d69619SDarrick J. Wong 	tree_len = be32_to_cpu(agf->agf_rmap_blocks);
625ebcbef3aSDarrick J. Wong 	xfs_trans_brelse(tp, agbp);
62684d69619SDarrick J. Wong 
6275cd213b0SDarrick J. Wong 	/*
6285cd213b0SDarrick J. Wong 	 * The log is permanently allocated, so the space it occupies will
6295cd213b0SDarrick J. Wong 	 * never be available for the kinds of things that would require btree
6305cd213b0SDarrick J. Wong 	 * expansion.  We therefore can pretend the space isn't there.
6315cd213b0SDarrick J. Wong 	 */
6325cd213b0SDarrick J. Wong 	if (mp->m_sb.sb_logstart &&
63330933120SDave Chinner 	    XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno)
6345cd213b0SDarrick J. Wong 		agblocks -= mp->m_sb.sb_logblocks;
6355cd213b0SDarrick J. Wong 
63620e73b00SDarrick J. Wong 	/* Reserve 1% of the AG or enough for 1 block per record. */
63720e73b00SDarrick J. Wong 	*ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
63884d69619SDarrick J. Wong 	*used += tree_len;
63984d69619SDarrick J. Wong 
64084d69619SDarrick J. Wong 	return error;
64184d69619SDarrick J. Wong }
642