1035e00acSDarrick J. Wong /* 2035e00acSDarrick J. Wong * Copyright (c) 2014 Red Hat, Inc. 3035e00acSDarrick J. Wong * All Rights Reserved. 4035e00acSDarrick J. Wong * 5035e00acSDarrick J. Wong * This program is free software; you can redistribute it and/or 6035e00acSDarrick J. Wong * modify it under the terms of the GNU General Public License as 7035e00acSDarrick J. Wong * published by the Free Software Foundation. 8035e00acSDarrick J. Wong * 9035e00acSDarrick J. Wong * This program is distributed in the hope that it would be useful, 10035e00acSDarrick J. Wong * but WITHOUT ANY WARRANTY; without even the implied warranty of 11035e00acSDarrick J. Wong * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12035e00acSDarrick J. Wong * GNU General Public License for more details. 13035e00acSDarrick J. Wong * 14035e00acSDarrick J. Wong * You should have received a copy of the GNU General Public License 15035e00acSDarrick J. Wong * along with this program; if not, write the Free Software Foundation, 16035e00acSDarrick J. Wong * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17035e00acSDarrick J. Wong */ 18035e00acSDarrick J. Wong #include "xfs.h" 19035e00acSDarrick J. Wong #include "xfs_fs.h" 20035e00acSDarrick J. Wong #include "xfs_shared.h" 21035e00acSDarrick J. Wong #include "xfs_format.h" 22035e00acSDarrick J. Wong #include "xfs_log_format.h" 23035e00acSDarrick J. Wong #include "xfs_trans_resv.h" 24035e00acSDarrick J. Wong #include "xfs_bit.h" 25035e00acSDarrick J. Wong #include "xfs_sb.h" 26035e00acSDarrick J. Wong #include "xfs_mount.h" 27035e00acSDarrick J. Wong #include "xfs_defer.h" 28035e00acSDarrick J. Wong #include "xfs_inode.h" 29035e00acSDarrick J. Wong #include "xfs_trans.h" 30035e00acSDarrick J. Wong #include "xfs_alloc.h" 31035e00acSDarrick J. Wong #include "xfs_btree.h" 324b8ed677SDarrick J. Wong #include "xfs_rmap.h" 33035e00acSDarrick J. Wong #include "xfs_rmap_btree.h" 34035e00acSDarrick J. Wong #include "xfs_trace.h" 35035e00acSDarrick J. Wong #include "xfs_cksum.h" 36035e00acSDarrick J. Wong #include "xfs_error.h" 37035e00acSDarrick J. Wong #include "xfs_extent_busy.h" 3884d69619SDarrick J. Wong #include "xfs_ag_resv.h" 39035e00acSDarrick J. Wong 404b8ed677SDarrick J. Wong /* 414b8ed677SDarrick J. Wong * Reverse map btree. 424b8ed677SDarrick J. Wong * 434b8ed677SDarrick J. Wong * This is a per-ag tree used to track the owner(s) of a given extent. With 444b8ed677SDarrick J. Wong * reflink it is possible for there to be multiple owners, which is a departure 454b8ed677SDarrick J. Wong * from classic XFS. Owner records for data extents are inserted when the 464b8ed677SDarrick J. Wong * extent is mapped and removed when an extent is unmapped. Owner records for 474b8ed677SDarrick J. Wong * all other block types (i.e. metadata) are inserted when an extent is 484b8ed677SDarrick J. Wong * allocated and removed when an extent is freed. There can only be one owner 494b8ed677SDarrick J. Wong * of a metadata extent, usually an inode or some other metadata structure like 504b8ed677SDarrick J. Wong * an AG btree. 514b8ed677SDarrick J. Wong * 524b8ed677SDarrick J. Wong * The rmap btree is part of the free space management, so blocks for the tree 534b8ed677SDarrick J. Wong * are sourced from the agfl. Hence we need transaction reservation support for 544b8ed677SDarrick J. Wong * this tree so that the freelist is always large enough. This also impacts on 554b8ed677SDarrick J. Wong * the minimum space we need to leave free in the AG. 564b8ed677SDarrick J. Wong * 574b8ed677SDarrick J. Wong * The tree is ordered by [ag block, owner, offset]. This is a large key size, 584b8ed677SDarrick J. Wong * but it is the only way to enforce unique keys when a block can be owned by 594b8ed677SDarrick J. Wong * multiple files at any offset. There's no need to order/search by extent 604b8ed677SDarrick J. Wong * size for online updating/management of the tree. It is intended that most 614b8ed677SDarrick J. Wong * reverse lookups will be to find the owner(s) of a particular block, or to 624b8ed677SDarrick J. Wong * try to recover tree and file data from corrupt primary metadata. 634b8ed677SDarrick J. Wong */ 644b8ed677SDarrick J. Wong 65035e00acSDarrick J. Wong static struct xfs_btree_cur * 66035e00acSDarrick J. Wong xfs_rmapbt_dup_cursor( 67035e00acSDarrick J. Wong struct xfs_btree_cur *cur) 68035e00acSDarrick J. Wong { 69035e00acSDarrick J. Wong return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, 70035e00acSDarrick J. Wong cur->bc_private.a.agbp, cur->bc_private.a.agno); 71035e00acSDarrick J. Wong } 72035e00acSDarrick J. Wong 734b8ed677SDarrick J. Wong STATIC void 744b8ed677SDarrick J. Wong xfs_rmapbt_set_root( 754b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 764b8ed677SDarrick J. Wong union xfs_btree_ptr *ptr, 774b8ed677SDarrick J. Wong int inc) 784b8ed677SDarrick J. Wong { 794b8ed677SDarrick J. Wong struct xfs_buf *agbp = cur->bc_private.a.agbp; 804b8ed677SDarrick J. Wong struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 814b8ed677SDarrick J. Wong xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 824b8ed677SDarrick J. Wong int btnum = cur->bc_btnum; 834b8ed677SDarrick J. Wong struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); 844b8ed677SDarrick J. Wong 854b8ed677SDarrick J. Wong ASSERT(ptr->s != 0); 864b8ed677SDarrick J. Wong 874b8ed677SDarrick J. Wong agf->agf_roots[btnum] = ptr->s; 884b8ed677SDarrick J. Wong be32_add_cpu(&agf->agf_levels[btnum], inc); 894b8ed677SDarrick J. Wong pag->pagf_levels[btnum] += inc; 904b8ed677SDarrick J. Wong xfs_perag_put(pag); 914b8ed677SDarrick J. Wong 924b8ed677SDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 934b8ed677SDarrick J. Wong } 944b8ed677SDarrick J. Wong 954b8ed677SDarrick J. Wong STATIC int 964b8ed677SDarrick J. Wong xfs_rmapbt_alloc_block( 974b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 984b8ed677SDarrick J. Wong union xfs_btree_ptr *start, 994b8ed677SDarrick J. Wong union xfs_btree_ptr *new, 1004b8ed677SDarrick J. Wong int *stat) 1014b8ed677SDarrick J. Wong { 102f32866fdSDarrick J. Wong struct xfs_buf *agbp = cur->bc_private.a.agbp; 103f32866fdSDarrick J. Wong struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 1044b8ed677SDarrick J. Wong int error; 1054b8ed677SDarrick J. Wong xfs_agblock_t bno; 1064b8ed677SDarrick J. Wong 1074b8ed677SDarrick J. Wong XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1084b8ed677SDarrick J. Wong 1094b8ed677SDarrick J. Wong /* Allocate the new block from the freelist. If we can't, give up. */ 1104b8ed677SDarrick J. Wong error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 1114b8ed677SDarrick J. Wong &bno, 1); 1124b8ed677SDarrick J. Wong if (error) { 1134b8ed677SDarrick J. Wong XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1144b8ed677SDarrick J. Wong return error; 1154b8ed677SDarrick J. Wong } 1164b8ed677SDarrick J. Wong 1174b8ed677SDarrick J. Wong trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, 1184b8ed677SDarrick J. Wong bno, 1); 1194b8ed677SDarrick J. Wong if (bno == NULLAGBLOCK) { 1204b8ed677SDarrick J. Wong XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1214b8ed677SDarrick J. Wong *stat = 0; 1224b8ed677SDarrick J. Wong return 0; 1234b8ed677SDarrick J. Wong } 1244b8ed677SDarrick J. Wong 1254b8ed677SDarrick J. Wong xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, 1264b8ed677SDarrick J. Wong false); 1274b8ed677SDarrick J. Wong 1284b8ed677SDarrick J. Wong xfs_trans_agbtree_delta(cur->bc_tp, 1); 1294b8ed677SDarrick J. Wong new->s = cpu_to_be32(bno); 130f32866fdSDarrick J. Wong be32_add_cpu(&agf->agf_rmap_blocks, 1); 131f32866fdSDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1324b8ed677SDarrick J. Wong 1334b8ed677SDarrick J. Wong XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1344b8ed677SDarrick J. Wong *stat = 1; 1354b8ed677SDarrick J. Wong return 0; 1364b8ed677SDarrick J. Wong } 1374b8ed677SDarrick J. Wong 1384b8ed677SDarrick J. Wong STATIC int 1394b8ed677SDarrick J. Wong xfs_rmapbt_free_block( 1404b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1414b8ed677SDarrick J. Wong struct xfs_buf *bp) 1424b8ed677SDarrick J. Wong { 1434b8ed677SDarrick J. Wong struct xfs_buf *agbp = cur->bc_private.a.agbp; 1444b8ed677SDarrick J. Wong struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 1454b8ed677SDarrick J. Wong xfs_agblock_t bno; 1464b8ed677SDarrick J. Wong int error; 1474b8ed677SDarrick J. Wong 1484b8ed677SDarrick J. Wong bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); 1494b8ed677SDarrick J. Wong trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, 1504b8ed677SDarrick J. Wong bno, 1); 151f32866fdSDarrick J. Wong be32_add_cpu(&agf->agf_rmap_blocks, -1); 152f32866fdSDarrick J. Wong xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1534b8ed677SDarrick J. Wong error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); 1544b8ed677SDarrick J. Wong if (error) 1554b8ed677SDarrick J. Wong return error; 1564b8ed677SDarrick J. Wong 1574b8ed677SDarrick J. Wong xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 1584b8ed677SDarrick J. Wong XFS_EXTENT_BUSY_SKIP_DISCARD); 1594b8ed677SDarrick J. Wong xfs_trans_agbtree_delta(cur->bc_tp, -1); 1604b8ed677SDarrick J. Wong 1614b8ed677SDarrick J. Wong return 0; 1624b8ed677SDarrick J. Wong } 1634b8ed677SDarrick J. Wong 1644b8ed677SDarrick J. Wong STATIC int 1654b8ed677SDarrick J. Wong xfs_rmapbt_get_minrecs( 1664b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1674b8ed677SDarrick J. Wong int level) 1684b8ed677SDarrick J. Wong { 1694b8ed677SDarrick J. Wong return cur->bc_mp->m_rmap_mnr[level != 0]; 1704b8ed677SDarrick J. Wong } 1714b8ed677SDarrick J. Wong 1724b8ed677SDarrick J. Wong STATIC int 1734b8ed677SDarrick J. Wong xfs_rmapbt_get_maxrecs( 1744b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 1754b8ed677SDarrick J. Wong int level) 1764b8ed677SDarrick J. Wong { 1774b8ed677SDarrick J. Wong return cur->bc_mp->m_rmap_mxr[level != 0]; 1784b8ed677SDarrick J. Wong } 1794b8ed677SDarrick J. Wong 1804b8ed677SDarrick J. Wong STATIC void 1814b8ed677SDarrick J. Wong xfs_rmapbt_init_key_from_rec( 1824b8ed677SDarrick J. Wong union xfs_btree_key *key, 1834b8ed677SDarrick J. Wong union xfs_btree_rec *rec) 1844b8ed677SDarrick J. Wong { 1854b8ed677SDarrick J. Wong key->rmap.rm_startblock = rec->rmap.rm_startblock; 1864b8ed677SDarrick J. Wong key->rmap.rm_owner = rec->rmap.rm_owner; 1874b8ed677SDarrick J. Wong key->rmap.rm_offset = rec->rmap.rm_offset; 1884b8ed677SDarrick J. Wong } 1894b8ed677SDarrick J. Wong 190cfed56aeSDarrick J. Wong /* 191cfed56aeSDarrick J. Wong * The high key for a reverse mapping record can be computed by shifting 192cfed56aeSDarrick J. Wong * the startblock and offset to the highest value that would still map 193cfed56aeSDarrick J. Wong * to that record. In practice this means that we add blockcount-1 to 194cfed56aeSDarrick J. Wong * the startblock for all records, and if the record is for a data/attr 195cfed56aeSDarrick J. Wong * fork mapping, we add blockcount-1 to the offset too. 196cfed56aeSDarrick J. Wong */ 197cfed56aeSDarrick J. Wong STATIC void 198cfed56aeSDarrick J. Wong xfs_rmapbt_init_high_key_from_rec( 199cfed56aeSDarrick J. Wong union xfs_btree_key *key, 200cfed56aeSDarrick J. Wong union xfs_btree_rec *rec) 201cfed56aeSDarrick J. Wong { 202cfed56aeSDarrick J. Wong __uint64_t off; 203cfed56aeSDarrick J. Wong int adj; 204cfed56aeSDarrick J. Wong 205cfed56aeSDarrick J. Wong adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 206cfed56aeSDarrick J. Wong 207cfed56aeSDarrick J. Wong key->rmap.rm_startblock = rec->rmap.rm_startblock; 208cfed56aeSDarrick J. Wong be32_add_cpu(&key->rmap.rm_startblock, adj); 209cfed56aeSDarrick J. Wong key->rmap.rm_owner = rec->rmap.rm_owner; 210cfed56aeSDarrick J. Wong key->rmap.rm_offset = rec->rmap.rm_offset; 211cfed56aeSDarrick J. Wong if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 212cfed56aeSDarrick J. Wong XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 213cfed56aeSDarrick J. Wong return; 214cfed56aeSDarrick J. Wong off = be64_to_cpu(key->rmap.rm_offset); 215cfed56aeSDarrick J. Wong off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 216cfed56aeSDarrick J. Wong key->rmap.rm_offset = cpu_to_be64(off); 217cfed56aeSDarrick J. Wong } 218cfed56aeSDarrick J. Wong 2194b8ed677SDarrick J. Wong STATIC void 2204b8ed677SDarrick J. Wong xfs_rmapbt_init_rec_from_cur( 2214b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 2224b8ed677SDarrick J. Wong union xfs_btree_rec *rec) 2234b8ed677SDarrick J. Wong { 2244b8ed677SDarrick J. Wong rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 2254b8ed677SDarrick J. Wong rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 2264b8ed677SDarrick J. Wong rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 2274b8ed677SDarrick J. Wong rec->rmap.rm_offset = cpu_to_be64( 2284b8ed677SDarrick J. Wong xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 2294b8ed677SDarrick J. Wong } 2304b8ed677SDarrick J. Wong 2314b8ed677SDarrick J. Wong STATIC void 2324b8ed677SDarrick J. Wong xfs_rmapbt_init_ptr_from_cur( 2334b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 2344b8ed677SDarrick J. Wong union xfs_btree_ptr *ptr) 2354b8ed677SDarrick J. Wong { 2364b8ed677SDarrick J. Wong struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 2374b8ed677SDarrick J. Wong 2384b8ed677SDarrick J. Wong ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); 2394b8ed677SDarrick J. Wong ASSERT(agf->agf_roots[cur->bc_btnum] != 0); 2404b8ed677SDarrick J. Wong 2414b8ed677SDarrick J. Wong ptr->s = agf->agf_roots[cur->bc_btnum]; 2424b8ed677SDarrick J. Wong } 2434b8ed677SDarrick J. Wong 2444b8ed677SDarrick J. Wong STATIC __int64_t 2454b8ed677SDarrick J. Wong xfs_rmapbt_key_diff( 2464b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 2474b8ed677SDarrick J. Wong union xfs_btree_key *key) 2484b8ed677SDarrick J. Wong { 2494b8ed677SDarrick J. Wong struct xfs_rmap_irec *rec = &cur->bc_rec.r; 2504b8ed677SDarrick J. Wong struct xfs_rmap_key *kp = &key->rmap; 2514b8ed677SDarrick J. Wong __u64 x, y; 2524b8ed677SDarrick J. Wong __int64_t d; 2534b8ed677SDarrick J. Wong 2544b8ed677SDarrick J. Wong d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 2554b8ed677SDarrick J. Wong if (d) 2564b8ed677SDarrick J. Wong return d; 2574b8ed677SDarrick J. Wong 2584b8ed677SDarrick J. Wong x = be64_to_cpu(kp->rm_owner); 2594b8ed677SDarrick J. Wong y = rec->rm_owner; 2604b8ed677SDarrick J. Wong if (x > y) 2614b8ed677SDarrick J. Wong return 1; 2624b8ed677SDarrick J. Wong else if (y > x) 2634b8ed677SDarrick J. Wong return -1; 2644b8ed677SDarrick J. Wong 2654b8ed677SDarrick J. Wong x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); 2664b8ed677SDarrick J. Wong y = rec->rm_offset; 2674b8ed677SDarrick J. Wong if (x > y) 2684b8ed677SDarrick J. Wong return 1; 2694b8ed677SDarrick J. Wong else if (y > x) 2704b8ed677SDarrick J. Wong return -1; 2714b8ed677SDarrick J. Wong return 0; 2724b8ed677SDarrick J. Wong } 2734b8ed677SDarrick J. Wong 274cfed56aeSDarrick J. Wong STATIC __int64_t 275cfed56aeSDarrick J. Wong xfs_rmapbt_diff_two_keys( 276cfed56aeSDarrick J. Wong struct xfs_btree_cur *cur, 277cfed56aeSDarrick J. Wong union xfs_btree_key *k1, 278cfed56aeSDarrick J. Wong union xfs_btree_key *k2) 279cfed56aeSDarrick J. Wong { 280cfed56aeSDarrick J. Wong struct xfs_rmap_key *kp1 = &k1->rmap; 281cfed56aeSDarrick J. Wong struct xfs_rmap_key *kp2 = &k2->rmap; 282cfed56aeSDarrick J. Wong __int64_t d; 283cfed56aeSDarrick J. Wong __u64 x, y; 284cfed56aeSDarrick J. Wong 285cfed56aeSDarrick J. Wong d = (__int64_t)be32_to_cpu(kp1->rm_startblock) - 286cfed56aeSDarrick J. Wong be32_to_cpu(kp2->rm_startblock); 287cfed56aeSDarrick J. Wong if (d) 288cfed56aeSDarrick J. Wong return d; 289cfed56aeSDarrick J. Wong 290cfed56aeSDarrick J. Wong x = be64_to_cpu(kp1->rm_owner); 291cfed56aeSDarrick J. Wong y = be64_to_cpu(kp2->rm_owner); 292cfed56aeSDarrick J. Wong if (x > y) 293cfed56aeSDarrick J. Wong return 1; 294cfed56aeSDarrick J. Wong else if (y > x) 295cfed56aeSDarrick J. Wong return -1; 296cfed56aeSDarrick J. Wong 297cfed56aeSDarrick J. Wong x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); 298cfed56aeSDarrick J. Wong y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); 299cfed56aeSDarrick J. Wong if (x > y) 300cfed56aeSDarrick J. Wong return 1; 301cfed56aeSDarrick J. Wong else if (y > x) 302cfed56aeSDarrick J. Wong return -1; 303cfed56aeSDarrick J. Wong return 0; 304cfed56aeSDarrick J. Wong } 305cfed56aeSDarrick J. Wong 306035e00acSDarrick J. Wong static bool 307035e00acSDarrick J. Wong xfs_rmapbt_verify( 308035e00acSDarrick J. Wong struct xfs_buf *bp) 309035e00acSDarrick J. Wong { 310035e00acSDarrick J. Wong struct xfs_mount *mp = bp->b_target->bt_mount; 311035e00acSDarrick J. Wong struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312035e00acSDarrick J. Wong struct xfs_perag *pag = bp->b_pag; 313035e00acSDarrick J. Wong unsigned int level; 314035e00acSDarrick J. Wong 315035e00acSDarrick J. Wong /* 316035e00acSDarrick J. Wong * magic number and level verification 317035e00acSDarrick J. Wong * 318035e00acSDarrick J. Wong * During growfs operations, we can't verify the exact level or owner as 319035e00acSDarrick J. Wong * the perag is not fully initialised and hence not attached to the 320035e00acSDarrick J. Wong * buffer. In this case, check against the maximum tree depth. 321035e00acSDarrick J. Wong * 322035e00acSDarrick J. Wong * Similarly, during log recovery we will have a perag structure 323035e00acSDarrick J. Wong * attached, but the agf information will not yet have been initialised 324035e00acSDarrick J. Wong * from the on disk AGF. Again, we can only check against maximum limits 325035e00acSDarrick J. Wong * in this case. 326035e00acSDarrick J. Wong */ 327035e00acSDarrick J. Wong if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) 328035e00acSDarrick J. Wong return false; 329035e00acSDarrick J. Wong 330035e00acSDarrick J. Wong if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 331035e00acSDarrick J. Wong return false; 332035e00acSDarrick J. Wong if (!xfs_btree_sblock_v5hdr_verify(bp)) 333035e00acSDarrick J. Wong return false; 334035e00acSDarrick J. Wong 335035e00acSDarrick J. Wong level = be16_to_cpu(block->bb_level); 336035e00acSDarrick J. Wong if (pag && pag->pagf_init) { 337035e00acSDarrick J. Wong if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 338035e00acSDarrick J. Wong return false; 339035e00acSDarrick J. Wong } else if (level >= mp->m_rmap_maxlevels) 340035e00acSDarrick J. Wong return false; 341035e00acSDarrick J. Wong 342035e00acSDarrick J. Wong return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 343035e00acSDarrick J. Wong } 344035e00acSDarrick J. Wong 345035e00acSDarrick J. Wong static void 346035e00acSDarrick J. Wong xfs_rmapbt_read_verify( 347035e00acSDarrick J. Wong struct xfs_buf *bp) 348035e00acSDarrick J. Wong { 349035e00acSDarrick J. Wong if (!xfs_btree_sblock_verify_crc(bp)) 350035e00acSDarrick J. Wong xfs_buf_ioerror(bp, -EFSBADCRC); 351035e00acSDarrick J. Wong else if (!xfs_rmapbt_verify(bp)) 352035e00acSDarrick J. Wong xfs_buf_ioerror(bp, -EFSCORRUPTED); 353035e00acSDarrick J. Wong 354035e00acSDarrick J. Wong if (bp->b_error) { 355035e00acSDarrick J. Wong trace_xfs_btree_corrupt(bp, _RET_IP_); 356035e00acSDarrick J. Wong xfs_verifier_error(bp); 357035e00acSDarrick J. Wong } 358035e00acSDarrick J. Wong } 359035e00acSDarrick J. Wong 360035e00acSDarrick J. Wong static void 361035e00acSDarrick J. Wong xfs_rmapbt_write_verify( 362035e00acSDarrick J. Wong struct xfs_buf *bp) 363035e00acSDarrick J. Wong { 364035e00acSDarrick J. Wong if (!xfs_rmapbt_verify(bp)) { 365035e00acSDarrick J. Wong trace_xfs_btree_corrupt(bp, _RET_IP_); 366035e00acSDarrick J. Wong xfs_buf_ioerror(bp, -EFSCORRUPTED); 367035e00acSDarrick J. Wong xfs_verifier_error(bp); 368035e00acSDarrick J. Wong return; 369035e00acSDarrick J. Wong } 370035e00acSDarrick J. Wong xfs_btree_sblock_calc_crc(bp); 371035e00acSDarrick J. Wong 372035e00acSDarrick J. Wong } 373035e00acSDarrick J. Wong 374035e00acSDarrick J. Wong const struct xfs_buf_ops xfs_rmapbt_buf_ops = { 375035e00acSDarrick J. Wong .name = "xfs_rmapbt", 376035e00acSDarrick J. Wong .verify_read = xfs_rmapbt_read_verify, 377035e00acSDarrick J. Wong .verify_write = xfs_rmapbt_write_verify, 378035e00acSDarrick J. Wong }; 379035e00acSDarrick J. Wong 3804b8ed677SDarrick J. Wong #if defined(DEBUG) || defined(XFS_WARN) 3814b8ed677SDarrick J. Wong STATIC int 3824b8ed677SDarrick J. Wong xfs_rmapbt_keys_inorder( 3834b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 3844b8ed677SDarrick J. Wong union xfs_btree_key *k1, 3854b8ed677SDarrick J. Wong union xfs_btree_key *k2) 3864b8ed677SDarrick J. Wong { 3874b8ed677SDarrick J. Wong __uint32_t x; 3884b8ed677SDarrick J. Wong __uint32_t y; 3894b8ed677SDarrick J. Wong __uint64_t a; 3904b8ed677SDarrick J. Wong __uint64_t b; 3914b8ed677SDarrick J. Wong 3924b8ed677SDarrick J. Wong x = be32_to_cpu(k1->rmap.rm_startblock); 3934b8ed677SDarrick J. Wong y = be32_to_cpu(k2->rmap.rm_startblock); 3944b8ed677SDarrick J. Wong if (x < y) 3954b8ed677SDarrick J. Wong return 1; 3964b8ed677SDarrick J. Wong else if (x > y) 3974b8ed677SDarrick J. Wong return 0; 3984b8ed677SDarrick J. Wong a = be64_to_cpu(k1->rmap.rm_owner); 3994b8ed677SDarrick J. Wong b = be64_to_cpu(k2->rmap.rm_owner); 4004b8ed677SDarrick J. Wong if (a < b) 4014b8ed677SDarrick J. Wong return 1; 4024b8ed677SDarrick J. Wong else if (a > b) 4034b8ed677SDarrick J. Wong return 0; 4044b8ed677SDarrick J. Wong a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); 4054b8ed677SDarrick J. Wong b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); 4064b8ed677SDarrick J. Wong if (a <= b) 4074b8ed677SDarrick J. Wong return 1; 4084b8ed677SDarrick J. Wong return 0; 4094b8ed677SDarrick J. Wong } 4104b8ed677SDarrick J. Wong 4114b8ed677SDarrick J. Wong STATIC int 4124b8ed677SDarrick J. Wong xfs_rmapbt_recs_inorder( 4134b8ed677SDarrick J. Wong struct xfs_btree_cur *cur, 4144b8ed677SDarrick J. Wong union xfs_btree_rec *r1, 4154b8ed677SDarrick J. Wong union xfs_btree_rec *r2) 4164b8ed677SDarrick J. Wong { 4174b8ed677SDarrick J. Wong __uint32_t x; 4184b8ed677SDarrick J. Wong __uint32_t y; 4194b8ed677SDarrick J. Wong __uint64_t a; 4204b8ed677SDarrick J. Wong __uint64_t b; 4214b8ed677SDarrick J. Wong 4224b8ed677SDarrick J. Wong x = be32_to_cpu(r1->rmap.rm_startblock); 4234b8ed677SDarrick J. Wong y = be32_to_cpu(r2->rmap.rm_startblock); 4244b8ed677SDarrick J. Wong if (x < y) 4254b8ed677SDarrick J. Wong return 1; 4264b8ed677SDarrick J. Wong else if (x > y) 4274b8ed677SDarrick J. Wong return 0; 4284b8ed677SDarrick J. Wong a = be64_to_cpu(r1->rmap.rm_owner); 4294b8ed677SDarrick J. Wong b = be64_to_cpu(r2->rmap.rm_owner); 4304b8ed677SDarrick J. Wong if (a < b) 4314b8ed677SDarrick J. Wong return 1; 4324b8ed677SDarrick J. Wong else if (a > b) 4334b8ed677SDarrick J. Wong return 0; 4344b8ed677SDarrick J. Wong a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); 4354b8ed677SDarrick J. Wong b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); 4364b8ed677SDarrick J. Wong if (a <= b) 4374b8ed677SDarrick J. Wong return 1; 4384b8ed677SDarrick J. Wong return 0; 4394b8ed677SDarrick J. Wong } 4404b8ed677SDarrick J. Wong #endif /* DEBUG */ 4414b8ed677SDarrick J. Wong 442035e00acSDarrick J. Wong static const struct xfs_btree_ops xfs_rmapbt_ops = { 443035e00acSDarrick J. Wong .rec_len = sizeof(struct xfs_rmap_rec), 444035e00acSDarrick J. Wong .key_len = 2 * sizeof(struct xfs_rmap_key), 445035e00acSDarrick J. Wong 446035e00acSDarrick J. Wong .dup_cursor = xfs_rmapbt_dup_cursor, 4474b8ed677SDarrick J. Wong .set_root = xfs_rmapbt_set_root, 4484b8ed677SDarrick J. Wong .alloc_block = xfs_rmapbt_alloc_block, 4494b8ed677SDarrick J. Wong .free_block = xfs_rmapbt_free_block, 4504b8ed677SDarrick J. Wong .get_minrecs = xfs_rmapbt_get_minrecs, 4514b8ed677SDarrick J. Wong .get_maxrecs = xfs_rmapbt_get_maxrecs, 4524b8ed677SDarrick J. Wong .init_key_from_rec = xfs_rmapbt_init_key_from_rec, 453cfed56aeSDarrick J. Wong .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, 4544b8ed677SDarrick J. Wong .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, 4554b8ed677SDarrick J. Wong .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, 4564b8ed677SDarrick J. Wong .key_diff = xfs_rmapbt_key_diff, 457035e00acSDarrick J. Wong .buf_ops = &xfs_rmapbt_buf_ops, 458cfed56aeSDarrick J. Wong .diff_two_keys = xfs_rmapbt_diff_two_keys, 4594b8ed677SDarrick J. Wong #if defined(DEBUG) || defined(XFS_WARN) 4604b8ed677SDarrick J. Wong .keys_inorder = xfs_rmapbt_keys_inorder, 4614b8ed677SDarrick J. Wong .recs_inorder = xfs_rmapbt_recs_inorder, 4624b8ed677SDarrick J. Wong #endif 463035e00acSDarrick J. Wong }; 464035e00acSDarrick J. Wong 465035e00acSDarrick J. Wong /* 466035e00acSDarrick J. Wong * Allocate a new allocation btree cursor. 467035e00acSDarrick J. Wong */ 468035e00acSDarrick J. Wong struct xfs_btree_cur * 469035e00acSDarrick J. Wong xfs_rmapbt_init_cursor( 470035e00acSDarrick J. Wong struct xfs_mount *mp, 471035e00acSDarrick J. Wong struct xfs_trans *tp, 472035e00acSDarrick J. Wong struct xfs_buf *agbp, 473035e00acSDarrick J. Wong xfs_agnumber_t agno) 474035e00acSDarrick J. Wong { 475035e00acSDarrick J. Wong struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 476035e00acSDarrick J. Wong struct xfs_btree_cur *cur; 477035e00acSDarrick J. Wong 478035e00acSDarrick J. Wong cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 479035e00acSDarrick J. Wong cur->bc_tp = tp; 480035e00acSDarrick J. Wong cur->bc_mp = mp; 481cfed56aeSDarrick J. Wong /* Overlapping btree; 2 keys per pointer. */ 482035e00acSDarrick J. Wong cur->bc_btnum = XFS_BTNUM_RMAP; 483cfed56aeSDarrick J. Wong cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; 484035e00acSDarrick J. Wong cur->bc_blocklog = mp->m_sb.sb_blocklog; 485035e00acSDarrick J. Wong cur->bc_ops = &xfs_rmapbt_ops; 486035e00acSDarrick J. Wong cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 487035e00acSDarrick J. Wong 488035e00acSDarrick J. Wong cur->bc_private.a.agbp = agbp; 489035e00acSDarrick J. Wong cur->bc_private.a.agno = agno; 490035e00acSDarrick J. Wong 491035e00acSDarrick J. Wong return cur; 492035e00acSDarrick J. Wong } 493035e00acSDarrick J. Wong 494035e00acSDarrick J. Wong /* 495035e00acSDarrick J. Wong * Calculate number of records in an rmap btree block. 496035e00acSDarrick J. Wong */ 497035e00acSDarrick J. Wong int 498035e00acSDarrick J. Wong xfs_rmapbt_maxrecs( 499035e00acSDarrick J. Wong struct xfs_mount *mp, 500035e00acSDarrick J. Wong int blocklen, 501035e00acSDarrick J. Wong int leaf) 502035e00acSDarrick J. Wong { 503035e00acSDarrick J. Wong blocklen -= XFS_RMAP_BLOCK_LEN; 504035e00acSDarrick J. Wong 505035e00acSDarrick J. Wong if (leaf) 506035e00acSDarrick J. Wong return blocklen / sizeof(struct xfs_rmap_rec); 507035e00acSDarrick J. Wong return blocklen / 508cfed56aeSDarrick J. Wong (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); 509035e00acSDarrick J. Wong } 510035e00acSDarrick J. Wong 511035e00acSDarrick J. Wong /* Compute the maximum height of an rmap btree. */ 512035e00acSDarrick J. Wong void 513035e00acSDarrick J. Wong xfs_rmapbt_compute_maxlevels( 514035e00acSDarrick J. Wong struct xfs_mount *mp) 515035e00acSDarrick J. Wong { 51646eeb521SDarrick J. Wong /* 51746eeb521SDarrick J. Wong * On a non-reflink filesystem, the maximum number of rmap 51846eeb521SDarrick J. Wong * records is the number of blocks in the AG, hence the max 51946eeb521SDarrick J. Wong * rmapbt height is log_$maxrecs($agblocks). However, with 52046eeb521SDarrick J. Wong * reflink each AG block can have up to 2^32 (per the refcount 52146eeb521SDarrick J. Wong * record format) owners, which means that theoretically we 52246eeb521SDarrick J. Wong * could face up to 2^64 rmap records. 52346eeb521SDarrick J. Wong * 52446eeb521SDarrick J. Wong * That effectively means that the max rmapbt height must be 52546eeb521SDarrick J. Wong * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG 52646eeb521SDarrick J. Wong * blocks to feed the rmapbt long before the rmapbt reaches 52746eeb521SDarrick J. Wong * maximum height. The reflink code uses ag_resv_critical to 52846eeb521SDarrick J. Wong * disallow reflinking when less than 10% of the per-AG metadata 52946eeb521SDarrick J. Wong * block reservation since the fallback is a regular file copy. 53046eeb521SDarrick J. Wong */ 53146eeb521SDarrick J. Wong if (xfs_sb_version_hasreflink(&mp->m_sb)) 53246eeb521SDarrick J. Wong mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; 53346eeb521SDarrick J. Wong else 534035e00acSDarrick J. Wong mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp, 535035e00acSDarrick J. Wong mp->m_rmap_mnr, mp->m_sb.sb_agblocks); 536035e00acSDarrick J. Wong } 53784d69619SDarrick J. Wong 53884d69619SDarrick J. Wong /* Calculate the refcount btree size for some records. */ 53984d69619SDarrick J. Wong xfs_extlen_t 54084d69619SDarrick J. Wong xfs_rmapbt_calc_size( 54184d69619SDarrick J. Wong struct xfs_mount *mp, 54284d69619SDarrick J. Wong unsigned long long len) 54384d69619SDarrick J. Wong { 54484d69619SDarrick J. Wong return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len); 54584d69619SDarrick J. Wong } 54684d69619SDarrick J. Wong 54784d69619SDarrick J. Wong /* 54884d69619SDarrick J. Wong * Calculate the maximum refcount btree size. 54984d69619SDarrick J. Wong */ 55084d69619SDarrick J. Wong xfs_extlen_t 55184d69619SDarrick J. Wong xfs_rmapbt_max_size( 55284d69619SDarrick J. Wong struct xfs_mount *mp) 55384d69619SDarrick J. Wong { 55484d69619SDarrick J. Wong /* Bail out if we're uninitialized, which can happen in mkfs. */ 55584d69619SDarrick J. Wong if (mp->m_rmap_mxr[0] == 0) 55684d69619SDarrick J. Wong return 0; 55784d69619SDarrick J. Wong 55884d69619SDarrick J. Wong return xfs_rmapbt_calc_size(mp, mp->m_sb.sb_agblocks); 55984d69619SDarrick J. Wong } 56084d69619SDarrick J. Wong 56184d69619SDarrick J. Wong /* 56284d69619SDarrick J. Wong * Figure out how many blocks to reserve and how many are used by this btree. 56384d69619SDarrick J. Wong */ 56484d69619SDarrick J. Wong int 56584d69619SDarrick J. Wong xfs_rmapbt_calc_reserves( 56684d69619SDarrick J. Wong struct xfs_mount *mp, 56784d69619SDarrick J. Wong xfs_agnumber_t agno, 56884d69619SDarrick J. Wong xfs_extlen_t *ask, 56984d69619SDarrick J. Wong xfs_extlen_t *used) 57084d69619SDarrick J. Wong { 57184d69619SDarrick J. Wong struct xfs_buf *agbp; 57284d69619SDarrick J. Wong struct xfs_agf *agf; 57384d69619SDarrick J. Wong xfs_extlen_t pool_len; 57484d69619SDarrick J. Wong xfs_extlen_t tree_len; 57584d69619SDarrick J. Wong int error; 57684d69619SDarrick J. Wong 57784d69619SDarrick J. Wong if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 57884d69619SDarrick J. Wong return 0; 57984d69619SDarrick J. Wong 58084d69619SDarrick J. Wong /* Reserve 1% of the AG or enough for 1 block per record. */ 58184d69619SDarrick J. Wong pool_len = max(mp->m_sb.sb_agblocks / 100, xfs_rmapbt_max_size(mp)); 58284d69619SDarrick J. Wong *ask += pool_len; 58384d69619SDarrick J. Wong 58484d69619SDarrick J. Wong error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); 58584d69619SDarrick J. Wong if (error) 58684d69619SDarrick J. Wong return error; 58784d69619SDarrick J. Wong 58884d69619SDarrick J. Wong agf = XFS_BUF_TO_AGF(agbp); 58984d69619SDarrick J. Wong tree_len = be32_to_cpu(agf->agf_rmap_blocks); 59084d69619SDarrick J. Wong xfs_buf_relse(agbp); 59184d69619SDarrick J. Wong 59284d69619SDarrick J. Wong *used += tree_len; 59384d69619SDarrick J. Wong 59484d69619SDarrick J. Wong return error; 59584d69619SDarrick J. Wong } 596