10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 2035e00acSDarrick J. Wong /* 3035e00acSDarrick J. Wong * Copyright (c) 2014 Red Hat, Inc. 4035e00acSDarrick J. Wong * All Rights Reserved. 5035e00acSDarrick J. Wong */ 6035e00acSDarrick J. Wong #include "xfs.h" 7035e00acSDarrick J. Wong #include "xfs_fs.h" 8035e00acSDarrick J. Wong #include "xfs_shared.h" 9035e00acSDarrick J. Wong #include "xfs_format.h" 10035e00acSDarrick J. Wong #include "xfs_log_format.h" 11035e00acSDarrick J. Wong #include "xfs_trans_resv.h" 12035e00acSDarrick J. Wong #include "xfs_mount.h" 13035e00acSDarrick J. Wong #include "xfs_trans.h" 14035e00acSDarrick J. Wong #include "xfs_alloc.h" 15035e00acSDarrick J. Wong #include "xfs_btree.h" 1659d67712SDarrick J. Wong #include "xfs_btree_staging.h" 174b8ed677SDarrick J. Wong #include "xfs_rmap.h" 18035e00acSDarrick J. Wong #include "xfs_rmap_btree.h" 19035e00acSDarrick J. Wong #include "xfs_trace.h" 20035e00acSDarrick J. Wong #include "xfs_error.h" 21035e00acSDarrick J. Wong #include "xfs_extent_busy.h" 229bbafc71SDave Chinner #include "xfs_ag.h" 2384d69619SDarrick J. Wong #include "xfs_ag_resv.h" 24035e00acSDarrick J. Wong 254b8ed677SDarrick J. Wong /* 264b8ed677SDarrick J. Wong * Reverse map btree. 274b8ed677SDarrick J. Wong * 284b8ed677SDarrick J. Wong * This is a per-ag tree used to track the owner(s) of a given extent. With 294b8ed677SDarrick J. Wong * reflink it is possible for there to be multiple owners, which is a departure 304b8ed677SDarrick J. Wong * from classic XFS. Owner records for data extents are inserted when the 314b8ed677SDarrick J. Wong * extent is mapped and removed when an extent is unmapped. Owner records for 324b8ed677SDarrick J. Wong * all other block types (i.e. metadata) are inserted when an extent is 334b8ed677SDarrick J. Wong * allocated and removed when an extent is freed. There can only be one owner 344b8ed677SDarrick J. Wong * of a metadata extent, usually an inode or some other metadata structure like 354b8ed677SDarrick J. Wong * an AG btree. 364b8ed677SDarrick J. Wong * 374b8ed677SDarrick J. Wong * The rmap btree is part of the free space management, so blocks for the tree 384b8ed677SDarrick J. Wong * are sourced from the agfl. Hence we need transaction reservation support for 394b8ed677SDarrick J. Wong * this tree so that the freelist is always large enough. This also impacts on 404b8ed677SDarrick J. Wong * the minimum space we need to leave free in the AG. 414b8ed677SDarrick J. Wong * 424b8ed677SDarrick J. Wong * The tree is ordered by [ag block, owner, offset]. This is a large key size, 434b8ed677SDarrick J. Wong * but it is the only way to enforce unique keys when a block can be owned by 444b8ed677SDarrick J. Wong * multiple files at any offset. There's no need to order/search by extent 454b8ed677SDarrick J. Wong * size for online updating/management of the tree. It is intended that most 464b8ed677SDarrick J. Wong * reverse lookups will be to find the owner(s) of a particular block, or to 474b8ed677SDarrick J. Wong * try to recover tree and file data from corrupt primary metadata. 484b8ed677SDarrick J. Wong */ 494b8ed677SDarrick J. Wong 50035e00acSDarrick J. Wong static struct xfs_btree_cur * 51035e00acSDarrick J. Wong xfs_rmapbt_dup_cursor( 52035e00acSDarrick J. Wong struct xfs_btree_cur *cur) 53035e00acSDarrick J. Wong { 54035e00acSDarrick J. Wong return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, 55fa9c3c19SDave Chinner cur->bc_ag.agbp, cur->bc_ag.pag); 56035e00acSDarrick J. Wong } 57035e00acSDarrick J. Wong 584b8ed677SDarrick J. Wong STATIC void 594b8ed677SDarrick J. Wong xfs_rmapbt_set_root( 604b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 61b5a6e5feSDarrick J. Wong const union xfs_btree_ptr *ptr, 624b8ed677SDarrick J. Wong int inc) 634b8ed677SDarrick J. Wong { 64576af732SDave Chinner struct xfs_buf *agbp = cur->bc_ag.agbp; 659798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr; 664b8ed677SDarrick J. Wong int btnum = cur->bc_btnum; 674b8ed677SDarrick J. Wong 684b8ed677SDarrick J. Wong ASSERT(ptr->s != 0); 694b8ed677SDarrick J. Wong 704b8ed677SDarrick J. Wong agf->agf_roots[btnum] = ptr->s; 714b8ed677SDarrick J. Wong be32_add_cpu(&agf->agf_levels[btnum], inc); 72fa9c3c19SDave Chinner cur->bc_ag.pag->pagf_levels[btnum] += inc; 734b8ed677SDarrick J. Wong 744b8ed677SDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 754b8ed677SDarrick J. Wong } 764b8ed677SDarrick J. Wong 774b8ed677SDarrick J. Wong STATIC int 784b8ed677SDarrick J. Wong xfs_rmapbt_alloc_block( 794b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 80deb06b9aSDarrick J. Wong const union xfs_btree_ptr *start, 814b8ed677SDarrick J. Wong union xfs_btree_ptr *new, 824b8ed677SDarrick J. Wong int *stat) 834b8ed677SDarrick J. Wong { 84576af732SDave Chinner struct xfs_buf *agbp = cur->bc_ag.agbp; 859798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr; 86fa9c3c19SDave Chinner struct xfs_perag *pag = cur->bc_ag.pag; 874b8ed677SDarrick J. Wong int error; 884b8ed677SDarrick J. Wong xfs_agblock_t bno; 894b8ed677SDarrick J. Wong 904b8ed677SDarrick J. Wong /* Allocate the new block from the freelist. If we can't, give up. */ 91576af732SDave Chinner error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, 924b8ed677SDarrick J. Wong &bno, 1); 93e157ebdcSCarlos Maiolino if (error) 944b8ed677SDarrick J. Wong return error; 954b8ed677SDarrick J. Wong 96fa9c3c19SDave Chinner trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1); 974b8ed677SDarrick J. Wong if (bno == NULLAGBLOCK) { 984b8ed677SDarrick J. Wong *stat = 0; 994b8ed677SDarrick J. Wong return 0; 1004b8ed677SDarrick J. Wong } 1014b8ed677SDarrick J. Wong 102fa9c3c19SDave Chinner xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false); 1034b8ed677SDarrick J. Wong 1044b8ed677SDarrick J. Wong new->s = cpu_to_be32(bno); 105f32866fdSDarrick J. Wong be32_add_cpu(&agf->agf_rmap_blocks, 1); 106f32866fdSDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1074b8ed677SDarrick J. Wong 108fa9c3c19SDave Chinner xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno); 1090ab32086SBrian Foster 1104b8ed677SDarrick J. Wong *stat = 1; 1114b8ed677SDarrick J. Wong return 0; 1124b8ed677SDarrick J. Wong } 1134b8ed677SDarrick J. Wong 1144b8ed677SDarrick J. Wong STATIC int 1154b8ed677SDarrick J. Wong xfs_rmapbt_free_block( 1164b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1174b8ed677SDarrick J. Wong struct xfs_buf *bp) 1184b8ed677SDarrick J. Wong { 119576af732SDave Chinner struct xfs_buf *agbp = cur->bc_ag.agbp; 1209798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr; 121fa9c3c19SDave Chinner struct xfs_perag *pag = cur->bc_ag.pag; 1224b8ed677SDarrick J. Wong xfs_agblock_t bno; 1234b8ed677SDarrick J. Wong int error; 1244b8ed677SDarrick J. Wong 12504fcad80SDave Chinner bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); 126fa9c3c19SDave Chinner trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, 1274b8ed677SDarrick J. Wong bno, 1); 128f32866fdSDarrick J. Wong be32_add_cpu(&agf->agf_rmap_blocks, -1); 129f32866fdSDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1304b8ed677SDarrick J. Wong error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); 1314b8ed677SDarrick J. Wong if (error) 1324b8ed677SDarrick J. Wong return error; 1334b8ed677SDarrick J. Wong 13445d06621SDave Chinner xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1, 1354b8ed677SDarrick J. Wong XFS_EXTENT_BUSY_SKIP_DISCARD); 1364b8ed677SDarrick J. Wong 13792a00544SGao Xiang xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); 1384b8ed677SDarrick J. Wong return 0; 1394b8ed677SDarrick J. Wong } 1404b8ed677SDarrick J. Wong 1414b8ed677SDarrick J. Wong STATIC int 1424b8ed677SDarrick J. Wong xfs_rmapbt_get_minrecs( 1434b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1444b8ed677SDarrick J. Wong int level) 1454b8ed677SDarrick J. Wong { 1464b8ed677SDarrick J. Wong return cur->bc_mp->m_rmap_mnr[level != 0]; 1474b8ed677SDarrick J. Wong } 1484b8ed677SDarrick J. Wong 1494b8ed677SDarrick J. Wong STATIC int 1504b8ed677SDarrick J. Wong xfs_rmapbt_get_maxrecs( 1514b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1524b8ed677SDarrick J. Wong int level) 1534b8ed677SDarrick J. Wong { 1544b8ed677SDarrick J. Wong return cur->bc_mp->m_rmap_mxr[level != 0]; 1554b8ed677SDarrick J. Wong } 1564b8ed677SDarrick J. Wong 1574b8ed677SDarrick J. Wong STATIC void 1584b8ed677SDarrick J. Wong xfs_rmapbt_init_key_from_rec( 1594b8ed677SDarrick J. Wong union xfs_btree_key *key, 16023825cd1SDarrick J. Wong const union xfs_btree_rec *rec) 1614b8ed677SDarrick J. Wong { 1624b8ed677SDarrick J. Wong key->rmap.rm_startblock = rec->rmap.rm_startblock; 1634b8ed677SDarrick J. Wong key->rmap.rm_owner = rec->rmap.rm_owner; 1644b8ed677SDarrick J. Wong key->rmap.rm_offset = rec->rmap.rm_offset; 1654b8ed677SDarrick J. Wong } 1664b8ed677SDarrick J. Wong 167cfed56aeSDarrick J. Wong /* 168cfed56aeSDarrick J. Wong * The high key for a reverse mapping record can be computed by shifting 169cfed56aeSDarrick J. Wong * the startblock and offset to the highest value that would still map 170cfed56aeSDarrick J. Wong * to that record. In practice this means that we add blockcount-1 to 171cfed56aeSDarrick J. Wong * the startblock for all records, and if the record is for a data/attr 172cfed56aeSDarrick J. Wong * fork mapping, we add blockcount-1 to the offset too. 173cfed56aeSDarrick J. Wong */ 174cfed56aeSDarrick J. Wong STATIC void 175cfed56aeSDarrick J. Wong xfs_rmapbt_init_high_key_from_rec( 176cfed56aeSDarrick J. Wong union xfs_btree_key *key, 17723825cd1SDarrick J. Wong const union xfs_btree_rec *rec) 178cfed56aeSDarrick J. Wong { 179c8ce540dSDarrick J. Wong uint64_t off; 180cfed56aeSDarrick J. Wong int adj; 181cfed56aeSDarrick J. Wong 182cfed56aeSDarrick J. Wong adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 183cfed56aeSDarrick J. Wong 184cfed56aeSDarrick J. Wong key->rmap.rm_startblock = rec->rmap.rm_startblock; 185cfed56aeSDarrick J. Wong be32_add_cpu(&key->rmap.rm_startblock, adj); 186cfed56aeSDarrick J. Wong key->rmap.rm_owner = rec->rmap.rm_owner; 187cfed56aeSDarrick J. Wong key->rmap.rm_offset = rec->rmap.rm_offset; 188cfed56aeSDarrick J. Wong if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 189cfed56aeSDarrick J. Wong XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 190cfed56aeSDarrick J. Wong return; 191cfed56aeSDarrick J. Wong off = be64_to_cpu(key->rmap.rm_offset); 192cfed56aeSDarrick J. Wong off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 193cfed56aeSDarrick J. Wong key->rmap.rm_offset = cpu_to_be64(off); 194cfed56aeSDarrick J. Wong } 195cfed56aeSDarrick J. Wong 1964b8ed677SDarrick J. Wong STATIC void 1974b8ed677SDarrick J. Wong xfs_rmapbt_init_rec_from_cur( 1984b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1994b8ed677SDarrick J. Wong union xfs_btree_rec *rec) 2004b8ed677SDarrick J. Wong { 2014b8ed677SDarrick J. Wong rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 2024b8ed677SDarrick J. Wong rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 2034b8ed677SDarrick J. Wong rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 2044b8ed677SDarrick J. Wong rec->rmap.rm_offset = cpu_to_be64( 2054b8ed677SDarrick J. Wong xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 2064b8ed677SDarrick J. Wong } 2074b8ed677SDarrick J. Wong 2084b8ed677SDarrick J. Wong STATIC void 2094b8ed677SDarrick J. Wong xfs_rmapbt_init_ptr_from_cur( 2104b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 2114b8ed677SDarrick J. Wong union xfs_btree_ptr *ptr) 2124b8ed677SDarrick J. Wong { 213576af732SDave Chinner struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 2144b8ed677SDarrick J. Wong 215fa9c3c19SDave Chinner ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); 2164b8ed677SDarrick J. Wong 2174b8ed677SDarrick J. Wong ptr->s = agf->agf_roots[cur->bc_btnum]; 2184b8ed677SDarrick J. Wong } 2194b8ed677SDarrick J. Wong 220c8ce540dSDarrick J. Wong STATIC int64_t 2214b8ed677SDarrick J. Wong xfs_rmapbt_key_diff( 2224b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 223d29d5577SDarrick J. Wong const union xfs_btree_key *key) 2244b8ed677SDarrick J. Wong { 2254b8ed677SDarrick J. Wong struct xfs_rmap_irec *rec = &cur->bc_rec.r; 226d29d5577SDarrick J. Wong const struct xfs_rmap_key *kp = &key->rmap; 2274b8ed677SDarrick J. Wong __u64 x, y; 228c8ce540dSDarrick J. Wong int64_t d; 2294b8ed677SDarrick J. Wong 230c8ce540dSDarrick J. Wong d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 2314b8ed677SDarrick J. Wong if (d) 2324b8ed677SDarrick J. Wong return d; 2334b8ed677SDarrick J. Wong 2344b8ed677SDarrick J. Wong x = be64_to_cpu(kp->rm_owner); 2354b8ed677SDarrick J. Wong y = rec->rm_owner; 2364b8ed677SDarrick J. Wong if (x > y) 2374b8ed677SDarrick J. Wong return 1; 2384b8ed677SDarrick J. Wong else if (y > x) 2394b8ed677SDarrick J. Wong return -1; 2404b8ed677SDarrick J. Wong 241eb840907SDarrick J. Wong x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); 242eb840907SDarrick J. Wong y = rec->rm_offset; 2434b8ed677SDarrick J. Wong if (x > y) 2444b8ed677SDarrick J. Wong return 1; 2454b8ed677SDarrick J. Wong else if (y > x) 2464b8ed677SDarrick J. Wong return -1; 2474b8ed677SDarrick J. Wong return 0; 2484b8ed677SDarrick J. Wong } 2494b8ed677SDarrick J. Wong 250c8ce540dSDarrick J. Wong STATIC int64_t 251cfed56aeSDarrick J. Wong xfs_rmapbt_diff_two_keys( 252cfed56aeSDarrick J. Wong struct xfs_btree_cur *cur, 253d29d5577SDarrick J. Wong const union xfs_btree_key *k1, 254d29d5577SDarrick J. Wong const union xfs_btree_key *k2) 255cfed56aeSDarrick J. Wong { 256d29d5577SDarrick J. Wong const struct xfs_rmap_key *kp1 = &k1->rmap; 257d29d5577SDarrick J. Wong const struct xfs_rmap_key *kp2 = &k2->rmap; 258c8ce540dSDarrick J. Wong int64_t d; 259cfed56aeSDarrick J. Wong __u64 x, y; 260cfed56aeSDarrick J. Wong 261c8ce540dSDarrick J. Wong d = (int64_t)be32_to_cpu(kp1->rm_startblock) - 262cfed56aeSDarrick J. Wong be32_to_cpu(kp2->rm_startblock); 263cfed56aeSDarrick J. Wong if (d) 264cfed56aeSDarrick J. Wong return d; 265cfed56aeSDarrick J. Wong 266cfed56aeSDarrick J. Wong x = be64_to_cpu(kp1->rm_owner); 267cfed56aeSDarrick J. Wong y = be64_to_cpu(kp2->rm_owner); 268cfed56aeSDarrick J. Wong if (x > y) 269cfed56aeSDarrick J. Wong return 1; 270cfed56aeSDarrick J. Wong else if (y > x) 271cfed56aeSDarrick J. Wong return -1; 272cfed56aeSDarrick J. Wong 273eb840907SDarrick J. Wong x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); 274eb840907SDarrick J. Wong y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); 275cfed56aeSDarrick J. Wong if (x > y) 276cfed56aeSDarrick J. Wong return 1; 277cfed56aeSDarrick J. Wong else if (y > x) 278cfed56aeSDarrick J. Wong return -1; 279cfed56aeSDarrick J. Wong return 0; 280cfed56aeSDarrick J. Wong } 281cfed56aeSDarrick J. Wong 282a6a781a5SDarrick J. Wong static xfs_failaddr_t 283035e00acSDarrick J. Wong xfs_rmapbt_verify( 284035e00acSDarrick J. Wong struct xfs_buf *bp) 285035e00acSDarrick J. Wong { 286dbd329f1SChristoph Hellwig struct xfs_mount *mp = bp->b_mount; 287035e00acSDarrick J. Wong struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 288035e00acSDarrick J. Wong struct xfs_perag *pag = bp->b_pag; 289a6a781a5SDarrick J. Wong xfs_failaddr_t fa; 290035e00acSDarrick J. Wong unsigned int level; 291035e00acSDarrick J. Wong 292035e00acSDarrick J. Wong /* 293035e00acSDarrick J. Wong * magic number and level verification 294035e00acSDarrick J. Wong * 295035e00acSDarrick J. Wong * During growfs operations, we can't verify the exact level or owner as 296035e00acSDarrick J. Wong * the perag is not fully initialised and hence not attached to the 297035e00acSDarrick J. Wong * buffer. In this case, check against the maximum tree depth. 298035e00acSDarrick J. Wong * 299035e00acSDarrick J. Wong * Similarly, during log recovery we will have a perag structure 300035e00acSDarrick J. Wong * attached, but the agf information will not yet have been initialised 301035e00acSDarrick J. Wong * from the on disk AGF. Again, we can only check against maximum limits 302035e00acSDarrick J. Wong * in this case. 303035e00acSDarrick J. Wong */ 30439708c20SBrian Foster if (!xfs_verify_magic(bp, block->bb_magic)) 305a6a781a5SDarrick J. Wong return __this_address; 306035e00acSDarrick J. Wong 30738c26bfdSDave Chinner if (!xfs_has_rmapbt(mp)) 308a6a781a5SDarrick J. Wong return __this_address; 309a6a781a5SDarrick J. Wong fa = xfs_btree_sblock_v5hdr_verify(bp); 310a6a781a5SDarrick J. Wong if (fa) 311a6a781a5SDarrick J. Wong return fa; 312035e00acSDarrick J. Wong 313035e00acSDarrick J. Wong level = be16_to_cpu(block->bb_level); 314035e00acSDarrick J. Wong if (pag && pag->pagf_init) { 315035e00acSDarrick J. Wong if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 316a6a781a5SDarrick J. Wong return __this_address; 317035e00acSDarrick J. Wong } else if (level >= mp->m_rmap_maxlevels) 318a6a781a5SDarrick J. Wong return __this_address; 319035e00acSDarrick J. Wong 320035e00acSDarrick J. Wong return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 321035e00acSDarrick J. Wong } 322035e00acSDarrick J. Wong 323035e00acSDarrick J. Wong static void 324035e00acSDarrick J. Wong xfs_rmapbt_read_verify( 325035e00acSDarrick J. Wong struct xfs_buf *bp) 326035e00acSDarrick J. Wong { 327bc1a09b8SDarrick J. Wong xfs_failaddr_t fa; 328bc1a09b8SDarrick J. Wong 329035e00acSDarrick J. Wong if (!xfs_btree_sblock_verify_crc(bp)) 330bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSBADCRC, __this_address); 331bc1a09b8SDarrick J. Wong else { 332bc1a09b8SDarrick J. Wong fa = xfs_rmapbt_verify(bp); 333bc1a09b8SDarrick J. Wong if (fa) 334bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa); 335bc1a09b8SDarrick J. Wong } 336035e00acSDarrick J. Wong 33731ca03c9SDarrick J. Wong if (bp->b_error) 338035e00acSDarrick J. Wong trace_xfs_btree_corrupt(bp, _RET_IP_); 339035e00acSDarrick J. Wong } 340035e00acSDarrick J. Wong 341035e00acSDarrick J. Wong static void 342035e00acSDarrick J. Wong xfs_rmapbt_write_verify( 343035e00acSDarrick J. Wong struct xfs_buf *bp) 344035e00acSDarrick J. Wong { 345bc1a09b8SDarrick J. Wong xfs_failaddr_t fa; 346bc1a09b8SDarrick J. Wong 347bc1a09b8SDarrick J. Wong fa = xfs_rmapbt_verify(bp); 348bc1a09b8SDarrick J. Wong if (fa) { 349035e00acSDarrick J. Wong trace_xfs_btree_corrupt(bp, _RET_IP_); 350bc1a09b8SDarrick J. Wong xfs_verifier_error(bp, -EFSCORRUPTED, fa); 351035e00acSDarrick J. Wong return; 352035e00acSDarrick J. Wong } 353035e00acSDarrick J. Wong xfs_btree_sblock_calc_crc(bp); 354035e00acSDarrick J. Wong 355035e00acSDarrick J. Wong } 356035e00acSDarrick J. Wong 357035e00acSDarrick J. Wong const struct xfs_buf_ops xfs_rmapbt_buf_ops = { 358035e00acSDarrick J. Wong .name = "xfs_rmapbt", 35939708c20SBrian Foster .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) }, 360035e00acSDarrick J. Wong .verify_read = xfs_rmapbt_read_verify, 361035e00acSDarrick J. Wong .verify_write = xfs_rmapbt_write_verify, 362b5572597SDarrick J. Wong .verify_struct = xfs_rmapbt_verify, 363035e00acSDarrick J. Wong }; 364035e00acSDarrick J. Wong 3654b8ed677SDarrick J. Wong STATIC int 3664b8ed677SDarrick J. Wong xfs_rmapbt_keys_inorder( 3674b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 3688e38dc88SDarrick J. Wong const union xfs_btree_key *k1, 3698e38dc88SDarrick J. Wong const union xfs_btree_key *k2) 3704b8ed677SDarrick J. Wong { 371c8ce540dSDarrick J. Wong uint32_t x; 372c8ce540dSDarrick J. Wong uint32_t y; 373c8ce540dSDarrick J. Wong uint64_t a; 374c8ce540dSDarrick J. Wong uint64_t b; 3754b8ed677SDarrick J. Wong 3764b8ed677SDarrick J. Wong x = be32_to_cpu(k1->rmap.rm_startblock); 3774b8ed677SDarrick J. Wong y = be32_to_cpu(k2->rmap.rm_startblock); 3784b8ed677SDarrick J. Wong if (x < y) 3794b8ed677SDarrick J. Wong return 1; 3804b8ed677SDarrick J. Wong else if (x > y) 3814b8ed677SDarrick J. Wong return 0; 3824b8ed677SDarrick J. Wong a = be64_to_cpu(k1->rmap.rm_owner); 3834b8ed677SDarrick J. Wong b = be64_to_cpu(k2->rmap.rm_owner); 3844b8ed677SDarrick J. Wong if (a < b) 3854b8ed677SDarrick J. Wong return 1; 3864b8ed677SDarrick J. Wong else if (a > b) 3874b8ed677SDarrick J. Wong return 0; 388eb840907SDarrick J. Wong a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); 389eb840907SDarrick J. Wong b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); 3904b8ed677SDarrick J. Wong if (a <= b) 3914b8ed677SDarrick J. Wong return 1; 3924b8ed677SDarrick J. Wong return 0; 3934b8ed677SDarrick J. Wong } 3944b8ed677SDarrick J. Wong 3954b8ed677SDarrick J. Wong STATIC int 3964b8ed677SDarrick J. Wong xfs_rmapbt_recs_inorder( 3974b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 3988e38dc88SDarrick J. Wong const union xfs_btree_rec *r1, 3998e38dc88SDarrick J. Wong const union xfs_btree_rec *r2) 4004b8ed677SDarrick J. Wong { 401c8ce540dSDarrick J. Wong uint32_t x; 402c8ce540dSDarrick J. Wong uint32_t y; 403c8ce540dSDarrick J. Wong uint64_t a; 404c8ce540dSDarrick J. Wong uint64_t b; 4054b8ed677SDarrick J. Wong 4064b8ed677SDarrick J. Wong x = be32_to_cpu(r1->rmap.rm_startblock); 4074b8ed677SDarrick J. Wong y = be32_to_cpu(r2->rmap.rm_startblock); 4084b8ed677SDarrick J. Wong if (x < y) 4094b8ed677SDarrick J. Wong return 1; 4104b8ed677SDarrick J. Wong else if (x > y) 4114b8ed677SDarrick J. Wong return 0; 4124b8ed677SDarrick J. Wong a = be64_to_cpu(r1->rmap.rm_owner); 4134b8ed677SDarrick J. Wong b = be64_to_cpu(r2->rmap.rm_owner); 4144b8ed677SDarrick J. Wong if (a < b) 4154b8ed677SDarrick J. Wong return 1; 4164b8ed677SDarrick J. Wong else if (a > b) 4174b8ed677SDarrick J. Wong return 0; 418eb840907SDarrick J. Wong a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); 419eb840907SDarrick J. Wong b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); 4204b8ed677SDarrick J. Wong if (a <= b) 4214b8ed677SDarrick J. Wong return 1; 4224b8ed677SDarrick J. Wong return 0; 4234b8ed677SDarrick J. Wong } 4244b8ed677SDarrick J. Wong 425035e00acSDarrick J. Wong static const struct xfs_btree_ops xfs_rmapbt_ops = { 426035e00acSDarrick J. Wong .rec_len = sizeof(struct xfs_rmap_rec), 427035e00acSDarrick J. Wong .key_len = 2 * sizeof(struct xfs_rmap_key), 428035e00acSDarrick J. Wong 429035e00acSDarrick J. Wong .dup_cursor = xfs_rmapbt_dup_cursor, 4304b8ed677SDarrick J. Wong .set_root = xfs_rmapbt_set_root, 4314b8ed677SDarrick J. Wong .alloc_block = xfs_rmapbt_alloc_block, 4324b8ed677SDarrick J. Wong .free_block = xfs_rmapbt_free_block, 4334b8ed677SDarrick J. Wong .get_minrecs = xfs_rmapbt_get_minrecs, 4344b8ed677SDarrick J. Wong .get_maxrecs = xfs_rmapbt_get_maxrecs, 4354b8ed677SDarrick J. Wong .init_key_from_rec = xfs_rmapbt_init_key_from_rec, 436cfed56aeSDarrick J. Wong .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, 4374b8ed677SDarrick J. Wong .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, 4384b8ed677SDarrick J. Wong .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, 4394b8ed677SDarrick J. Wong .key_diff = xfs_rmapbt_key_diff, 440035e00acSDarrick J. Wong .buf_ops = &xfs_rmapbt_buf_ops, 441cfed56aeSDarrick J. Wong .diff_two_keys = xfs_rmapbt_diff_two_keys, 4424b8ed677SDarrick J. Wong .keys_inorder = xfs_rmapbt_keys_inorder, 4434b8ed677SDarrick J. Wong .recs_inorder = xfs_rmapbt_recs_inorder, 444035e00acSDarrick J. Wong }; 445035e00acSDarrick J. Wong 44659d67712SDarrick J. Wong static struct xfs_btree_cur * 44759d67712SDarrick J. Wong xfs_rmapbt_init_common( 44859d67712SDarrick J. Wong struct xfs_mount *mp, 44959d67712SDarrick J. Wong struct xfs_trans *tp, 450be9fb17dSDave Chinner struct xfs_perag *pag) 45159d67712SDarrick J. Wong { 45259d67712SDarrick J. Wong struct xfs_btree_cur *cur; 45359d67712SDarrick J. Wong 45459d67712SDarrick J. Wong /* Overlapping btree; 2 keys per pointer. */ 455*c940a0c5SDarrick J. Wong cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, 456*c940a0c5SDarrick J. Wong mp->m_rmap_maxlevels); 45759d67712SDarrick J. Wong cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; 45859d67712SDarrick J. Wong cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); 45959d67712SDarrick J. Wong cur->bc_ops = &xfs_rmapbt_ops; 460fa9c3c19SDave Chinner 461be9fb17dSDave Chinner /* take a reference for the cursor */ 462be9fb17dSDave Chinner atomic_inc(&pag->pag_ref); 463be9fb17dSDave Chinner cur->bc_ag.pag = pag; 46459d67712SDarrick J. Wong 46559d67712SDarrick J. Wong return cur; 46659d67712SDarrick J. Wong } 46759d67712SDarrick J. Wong 46859d67712SDarrick J. Wong /* Create a new reverse mapping btree cursor. */ 469035e00acSDarrick J. Wong struct xfs_btree_cur * 470035e00acSDarrick J. Wong xfs_rmapbt_init_cursor( 471035e00acSDarrick J. Wong struct xfs_mount *mp, 472035e00acSDarrick J. Wong struct xfs_trans *tp, 473035e00acSDarrick J. Wong struct xfs_buf *agbp, 474be9fb17dSDave Chinner struct xfs_perag *pag) 475035e00acSDarrick J. Wong { 4769798f615SChristoph Hellwig struct xfs_agf *agf = agbp->b_addr; 477035e00acSDarrick J. Wong struct xfs_btree_cur *cur; 478035e00acSDarrick J. Wong 479fa9c3c19SDave Chinner cur = xfs_rmapbt_init_common(mp, tp, pag); 480035e00acSDarrick J. Wong cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 481576af732SDave Chinner cur->bc_ag.agbp = agbp; 482035e00acSDarrick J. Wong return cur; 483035e00acSDarrick J. Wong } 484035e00acSDarrick J. Wong 48559d67712SDarrick J. Wong /* Create a new reverse mapping btree cursor with a fake root for staging. */ 48659d67712SDarrick J. Wong struct xfs_btree_cur * 48759d67712SDarrick J. Wong xfs_rmapbt_stage_cursor( 48859d67712SDarrick J. Wong struct xfs_mount *mp, 48959d67712SDarrick J. Wong struct xbtree_afakeroot *afake, 490fa9c3c19SDave Chinner struct xfs_perag *pag) 49159d67712SDarrick J. Wong { 49259d67712SDarrick J. Wong struct xfs_btree_cur *cur; 49359d67712SDarrick J. Wong 494fa9c3c19SDave Chinner cur = xfs_rmapbt_init_common(mp, NULL, pag); 49559d67712SDarrick J. Wong xfs_btree_stage_afakeroot(cur, afake); 49659d67712SDarrick J. Wong return cur; 49759d67712SDarrick J. Wong } 49859d67712SDarrick J. Wong 49959d67712SDarrick J. Wong /* 50059d67712SDarrick J. Wong * Install a new reverse mapping btree root. Caller is responsible for 50159d67712SDarrick J. Wong * invalidating and freeing the old btree blocks. 50259d67712SDarrick J. Wong */ 50359d67712SDarrick J. Wong void 50459d67712SDarrick J. Wong xfs_rmapbt_commit_staged_btree( 50559d67712SDarrick J. Wong struct xfs_btree_cur *cur, 50659d67712SDarrick J. Wong struct xfs_trans *tp, 50759d67712SDarrick J. Wong struct xfs_buf *agbp) 50859d67712SDarrick J. Wong { 50959d67712SDarrick J. Wong struct xfs_agf *agf = agbp->b_addr; 51059d67712SDarrick J. Wong struct xbtree_afakeroot *afake = cur->bc_ag.afake; 51159d67712SDarrick J. Wong 51259d67712SDarrick J. Wong ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 51359d67712SDarrick J. Wong 51459d67712SDarrick J. Wong agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); 51559d67712SDarrick J. Wong agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); 51659d67712SDarrick J. Wong agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); 51759d67712SDarrick J. Wong xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | 51859d67712SDarrick J. Wong XFS_AGF_RMAP_BLOCKS); 51959d67712SDarrick J. Wong xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); 52059d67712SDarrick J. Wong } 52159d67712SDarrick J. Wong 522035e00acSDarrick J. Wong /* 523035e00acSDarrick J. Wong * Calculate number of records in an rmap btree block. 524035e00acSDarrick J. Wong */ 525035e00acSDarrick J. Wong int 526035e00acSDarrick J. Wong xfs_rmapbt_maxrecs( 527035e00acSDarrick J. Wong int blocklen, 528035e00acSDarrick J. Wong int leaf) 529035e00acSDarrick J. Wong { 530035e00acSDarrick J. Wong blocklen -= XFS_RMAP_BLOCK_LEN; 531035e00acSDarrick J. Wong 532035e00acSDarrick J. Wong if (leaf) 533035e00acSDarrick J. Wong return blocklen / sizeof(struct xfs_rmap_rec); 534035e00acSDarrick J. Wong return blocklen / 535cfed56aeSDarrick J. Wong (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); 536035e00acSDarrick J. Wong } 537035e00acSDarrick J. Wong 538035e00acSDarrick J. Wong /* Compute the maximum height of an rmap btree. */ 539035e00acSDarrick J. Wong void 540035e00acSDarrick J. Wong xfs_rmapbt_compute_maxlevels( 541035e00acSDarrick J. Wong struct xfs_mount *mp) 542035e00acSDarrick J. Wong { 54346eeb521SDarrick J. Wong /* 54446eeb521SDarrick J. Wong * On a non-reflink filesystem, the maximum number of rmap 54546eeb521SDarrick J. Wong * records is the number of blocks in the AG, hence the max 54646eeb521SDarrick J. Wong * rmapbt height is log_$maxrecs($agblocks). However, with 54746eeb521SDarrick J. Wong * reflink each AG block can have up to 2^32 (per the refcount 54846eeb521SDarrick J. Wong * record format) owners, which means that theoretically we 54946eeb521SDarrick J. Wong * could face up to 2^64 rmap records. 55046eeb521SDarrick J. Wong * 55146eeb521SDarrick J. Wong * That effectively means that the max rmapbt height must be 55246eeb521SDarrick J. Wong * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG 55346eeb521SDarrick J. Wong * blocks to feed the rmapbt long before the rmapbt reaches 55446eeb521SDarrick J. Wong * maximum height. The reflink code uses ag_resv_critical to 55546eeb521SDarrick J. Wong * disallow reflinking when less than 10% of the per-AG metadata 55646eeb521SDarrick J. Wong * block reservation since the fallback is a regular file copy. 55746eeb521SDarrick J. Wong */ 55838c26bfdSDave Chinner if (xfs_has_reflink(mp)) 55946eeb521SDarrick J. Wong mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; 56046eeb521SDarrick J. Wong else 561a1f69417SEric Sandeen mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( 562035e00acSDarrick J. Wong mp->m_rmap_mnr, mp->m_sb.sb_agblocks); 563035e00acSDarrick J. Wong } 56484d69619SDarrick J. Wong 56584d69619SDarrick J. Wong /* Calculate the refcount btree size for some records. */ 56684d69619SDarrick J. Wong xfs_extlen_t 56784d69619SDarrick J. Wong xfs_rmapbt_calc_size( 56884d69619SDarrick J. Wong struct xfs_mount *mp, 56984d69619SDarrick J. Wong unsigned long long len) 57084d69619SDarrick J. Wong { 571a1f69417SEric Sandeen return xfs_btree_calc_size(mp->m_rmap_mnr, len); 57284d69619SDarrick J. Wong } 57384d69619SDarrick J. Wong 57484d69619SDarrick J. Wong /* 57584d69619SDarrick J. Wong * Calculate the maximum refcount btree size. 57684d69619SDarrick J. Wong */ 57784d69619SDarrick J. Wong xfs_extlen_t 57884d69619SDarrick J. Wong xfs_rmapbt_max_size( 57920e73b00SDarrick J. Wong struct xfs_mount *mp, 58020e73b00SDarrick J. Wong xfs_agblock_t agblocks) 58184d69619SDarrick J. Wong { 58284d69619SDarrick J. Wong /* Bail out if we're uninitialized, which can happen in mkfs. */ 58384d69619SDarrick J. Wong if (mp->m_rmap_mxr[0] == 0) 58484d69619SDarrick J. Wong return 0; 58584d69619SDarrick J. Wong 58620e73b00SDarrick J. Wong return xfs_rmapbt_calc_size(mp, agblocks); 58784d69619SDarrick J. Wong } 58884d69619SDarrick J. Wong 58984d69619SDarrick J. Wong /* 59084d69619SDarrick J. Wong * Figure out how many blocks to reserve and how many are used by this btree. 59184d69619SDarrick J. Wong */ 59284d69619SDarrick J. Wong int 59384d69619SDarrick J. Wong xfs_rmapbt_calc_reserves( 59484d69619SDarrick J. Wong struct xfs_mount *mp, 595ebcbef3aSDarrick J. Wong struct xfs_trans *tp, 59630933120SDave Chinner struct xfs_perag *pag, 59784d69619SDarrick J. Wong xfs_extlen_t *ask, 59884d69619SDarrick J. Wong xfs_extlen_t *used) 59984d69619SDarrick J. Wong { 60084d69619SDarrick J. Wong struct xfs_buf *agbp; 60184d69619SDarrick J. Wong struct xfs_agf *agf; 60220e73b00SDarrick J. Wong xfs_agblock_t agblocks; 60384d69619SDarrick J. Wong xfs_extlen_t tree_len; 60484d69619SDarrick J. Wong int error; 60584d69619SDarrick J. Wong 60638c26bfdSDave Chinner if (!xfs_has_rmapbt(mp)) 60784d69619SDarrick J. Wong return 0; 60884d69619SDarrick J. Wong 60930933120SDave Chinner error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); 61084d69619SDarrick J. Wong if (error) 61184d69619SDarrick J. Wong return error; 61284d69619SDarrick J. Wong 6139798f615SChristoph Hellwig agf = agbp->b_addr; 61420e73b00SDarrick J. Wong agblocks = be32_to_cpu(agf->agf_length); 61584d69619SDarrick J. Wong tree_len = be32_to_cpu(agf->agf_rmap_blocks); 616ebcbef3aSDarrick J. Wong xfs_trans_brelse(tp, agbp); 61784d69619SDarrick J. Wong 6185cd213b0SDarrick J. Wong /* 6195cd213b0SDarrick J. Wong * The log is permanently allocated, so the space it occupies will 6205cd213b0SDarrick J. Wong * never be available for the kinds of things that would require btree 6215cd213b0SDarrick J. Wong * expansion. We therefore can pretend the space isn't there. 6225cd213b0SDarrick J. Wong */ 6235cd213b0SDarrick J. Wong if (mp->m_sb.sb_logstart && 62430933120SDave Chinner XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno) 6255cd213b0SDarrick J. Wong agblocks -= mp->m_sb.sb_logblocks; 6265cd213b0SDarrick J. Wong 62720e73b00SDarrick J. Wong /* Reserve 1% of the AG or enough for 1 block per record. */ 62820e73b00SDarrick J. Wong *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); 62984d69619SDarrick J. Wong *used += tree_len; 63084d69619SDarrick J. Wong 63184d69619SDarrick J. Wong return error; 63284d69619SDarrick J. Wong } 633