1*30f712c9SDave Chinner /* 2*30f712c9SDave Chinner * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3*30f712c9SDave Chinner * All Rights Reserved. 4*30f712c9SDave Chinner * 5*30f712c9SDave Chinner * This program is free software; you can redistribute it and/or 6*30f712c9SDave Chinner * modify it under the terms of the GNU General Public License as 7*30f712c9SDave Chinner * published by the Free Software Foundation. 8*30f712c9SDave Chinner * 9*30f712c9SDave Chinner * This program is distributed in the hope that it would be useful, 10*30f712c9SDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 11*30f712c9SDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*30f712c9SDave Chinner * GNU General Public License for more details. 13*30f712c9SDave Chinner * 14*30f712c9SDave Chinner * You should have received a copy of the GNU General Public License 15*30f712c9SDave Chinner * along with this program; if not, write the Free Software Foundation, 16*30f712c9SDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17*30f712c9SDave Chinner */ 18*30f712c9SDave Chinner #include "xfs.h" 19*30f712c9SDave Chinner #include "xfs_fs.h" 20*30f712c9SDave Chinner #include "xfs_shared.h" 21*30f712c9SDave Chinner #include "xfs_format.h" 22*30f712c9SDave Chinner #include "xfs_log_format.h" 23*30f712c9SDave Chinner #include "xfs_trans_resv.h" 24*30f712c9SDave Chinner #include "xfs_bit.h" 25*30f712c9SDave Chinner #include "xfs_inum.h" 26*30f712c9SDave Chinner #include "xfs_sb.h" 27*30f712c9SDave Chinner #include "xfs_ag.h" 28*30f712c9SDave Chinner #include "xfs_mount.h" 29*30f712c9SDave Chinner #include "xfs_inode.h" 30*30f712c9SDave Chinner #include "xfs_btree.h" 31*30f712c9SDave Chinner #include "xfs_ialloc.h" 32*30f712c9SDave Chinner #include "xfs_ialloc_btree.h" 33*30f712c9SDave Chinner #include "xfs_alloc.h" 34*30f712c9SDave Chinner #include "xfs_rtalloc.h" 35*30f712c9SDave Chinner #include "xfs_error.h" 36*30f712c9SDave Chinner #include "xfs_bmap.h" 37*30f712c9SDave Chinner #include "xfs_cksum.h" 38*30f712c9SDave Chinner #include "xfs_trans.h" 39*30f712c9SDave Chinner #include "xfs_buf_item.h" 40*30f712c9SDave Chinner #include "xfs_icreate_item.h" 41*30f712c9SDave Chinner #include "xfs_icache.h" 42*30f712c9SDave Chinner #include "xfs_dinode.h" 43*30f712c9SDave Chinner #include "xfs_trace.h" 44*30f712c9SDave Chinner 45*30f712c9SDave Chinner 46*30f712c9SDave Chinner /* 47*30f712c9SDave Chinner * Allocation group level functions. 48*30f712c9SDave Chinner */ 49*30f712c9SDave Chinner static inline int 50*30f712c9SDave Chinner xfs_ialloc_cluster_alignment( 51*30f712c9SDave Chinner xfs_alloc_arg_t *args) 52*30f712c9SDave Chinner { 53*30f712c9SDave Chinner if (xfs_sb_version_hasalign(&args->mp->m_sb) && 54*30f712c9SDave Chinner args->mp->m_sb.sb_inoalignmt >= 55*30f712c9SDave Chinner XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) 56*30f712c9SDave Chinner return args->mp->m_sb.sb_inoalignmt; 57*30f712c9SDave Chinner return 1; 58*30f712c9SDave Chinner } 59*30f712c9SDave Chinner 60*30f712c9SDave Chinner /* 61*30f712c9SDave Chinner * Lookup a record by ino in the btree given by cur. 62*30f712c9SDave Chinner */ 63*30f712c9SDave Chinner int /* error */ 64*30f712c9SDave Chinner xfs_inobt_lookup( 65*30f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */ 66*30f712c9SDave Chinner xfs_agino_t ino, /* starting inode of chunk */ 67*30f712c9SDave Chinner xfs_lookup_t dir, /* <=, >=, == */ 68*30f712c9SDave Chinner int *stat) /* success/failure */ 69*30f712c9SDave Chinner { 70*30f712c9SDave Chinner cur->bc_rec.i.ir_startino = ino; 71*30f712c9SDave Chinner cur->bc_rec.i.ir_freecount = 0; 72*30f712c9SDave Chinner cur->bc_rec.i.ir_free = 0; 73*30f712c9SDave Chinner return xfs_btree_lookup(cur, dir, stat); 74*30f712c9SDave Chinner } 75*30f712c9SDave Chinner 76*30f712c9SDave Chinner /* 77*30f712c9SDave Chinner * Update the record referred to by cur to the value given. 78*30f712c9SDave Chinner * This either works (return 0) or gets an EFSCORRUPTED error. 79*30f712c9SDave Chinner */ 80*30f712c9SDave Chinner STATIC int /* error */ 81*30f712c9SDave Chinner xfs_inobt_update( 82*30f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */ 83*30f712c9SDave Chinner xfs_inobt_rec_incore_t *irec) /* btree record */ 84*30f712c9SDave Chinner { 85*30f712c9SDave Chinner union xfs_btree_rec rec; 86*30f712c9SDave Chinner 87*30f712c9SDave Chinner rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); 88*30f712c9SDave Chinner rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); 89*30f712c9SDave Chinner rec.inobt.ir_free = cpu_to_be64(irec->ir_free); 90*30f712c9SDave Chinner return xfs_btree_update(cur, &rec); 91*30f712c9SDave Chinner } 92*30f712c9SDave Chinner 93*30f712c9SDave Chinner /* 94*30f712c9SDave Chinner * Get the data from the pointed-to record. 95*30f712c9SDave Chinner */ 96*30f712c9SDave Chinner int /* error */ 97*30f712c9SDave Chinner xfs_inobt_get_rec( 98*30f712c9SDave Chinner struct xfs_btree_cur *cur, /* btree cursor */ 99*30f712c9SDave Chinner xfs_inobt_rec_incore_t *irec, /* btree record */ 100*30f712c9SDave Chinner int *stat) /* output: success/failure */ 101*30f712c9SDave Chinner { 102*30f712c9SDave Chinner union xfs_btree_rec *rec; 103*30f712c9SDave Chinner int error; 104*30f712c9SDave Chinner 105*30f712c9SDave Chinner error = xfs_btree_get_rec(cur, &rec, stat); 106*30f712c9SDave Chinner if (!error && *stat == 1) { 107*30f712c9SDave Chinner irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); 108*30f712c9SDave Chinner irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); 109*30f712c9SDave Chinner irec->ir_free = be64_to_cpu(rec->inobt.ir_free); 110*30f712c9SDave Chinner } 111*30f712c9SDave Chinner return error; 112*30f712c9SDave Chinner } 113*30f712c9SDave Chinner 114*30f712c9SDave Chinner /* 115*30f712c9SDave Chinner * Insert a single inobt record. Cursor must already point to desired location. 116*30f712c9SDave Chinner */ 117*30f712c9SDave Chinner STATIC int 118*30f712c9SDave Chinner xfs_inobt_insert_rec( 119*30f712c9SDave Chinner struct xfs_btree_cur *cur, 120*30f712c9SDave Chinner __int32_t freecount, 121*30f712c9SDave Chinner xfs_inofree_t free, 122*30f712c9SDave Chinner int *stat) 123*30f712c9SDave Chinner { 124*30f712c9SDave Chinner cur->bc_rec.i.ir_freecount = freecount; 125*30f712c9SDave Chinner cur->bc_rec.i.ir_free = free; 126*30f712c9SDave Chinner return xfs_btree_insert(cur, stat); 127*30f712c9SDave Chinner } 128*30f712c9SDave Chinner 129*30f712c9SDave Chinner /* 130*30f712c9SDave Chinner * Insert records describing a newly allocated inode chunk into the inobt. 131*30f712c9SDave Chinner */ 132*30f712c9SDave Chinner STATIC int 133*30f712c9SDave Chinner xfs_inobt_insert( 134*30f712c9SDave Chinner struct xfs_mount *mp, 135*30f712c9SDave Chinner struct xfs_trans *tp, 136*30f712c9SDave Chinner struct xfs_buf *agbp, 137*30f712c9SDave Chinner xfs_agino_t newino, 138*30f712c9SDave Chinner xfs_agino_t newlen, 139*30f712c9SDave Chinner xfs_btnum_t btnum) 140*30f712c9SDave Chinner { 141*30f712c9SDave Chinner struct xfs_btree_cur *cur; 142*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 143*30f712c9SDave Chinner xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 144*30f712c9SDave Chinner xfs_agino_t thisino; 145*30f712c9SDave Chinner int i; 146*30f712c9SDave Chinner int error; 147*30f712c9SDave Chinner 148*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); 149*30f712c9SDave Chinner 150*30f712c9SDave Chinner for (thisino = newino; 151*30f712c9SDave Chinner thisino < newino + newlen; 152*30f712c9SDave Chinner thisino += XFS_INODES_PER_CHUNK) { 153*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); 154*30f712c9SDave Chinner if (error) { 155*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 156*30f712c9SDave Chinner return error; 157*30f712c9SDave Chinner } 158*30f712c9SDave Chinner ASSERT(i == 0); 159*30f712c9SDave Chinner 160*30f712c9SDave Chinner error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, 161*30f712c9SDave Chinner XFS_INOBT_ALL_FREE, &i); 162*30f712c9SDave Chinner if (error) { 163*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 164*30f712c9SDave Chinner return error; 165*30f712c9SDave Chinner } 166*30f712c9SDave Chinner ASSERT(i == 1); 167*30f712c9SDave Chinner } 168*30f712c9SDave Chinner 169*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 170*30f712c9SDave Chinner 171*30f712c9SDave Chinner return 0; 172*30f712c9SDave Chinner } 173*30f712c9SDave Chinner 174*30f712c9SDave Chinner /* 175*30f712c9SDave Chinner * Verify that the number of free inodes in the AGI is correct. 176*30f712c9SDave Chinner */ 177*30f712c9SDave Chinner #ifdef DEBUG 178*30f712c9SDave Chinner STATIC int 179*30f712c9SDave Chinner xfs_check_agi_freecount( 180*30f712c9SDave Chinner struct xfs_btree_cur *cur, 181*30f712c9SDave Chinner struct xfs_agi *agi) 182*30f712c9SDave Chinner { 183*30f712c9SDave Chinner if (cur->bc_nlevels == 1) { 184*30f712c9SDave Chinner xfs_inobt_rec_incore_t rec; 185*30f712c9SDave Chinner int freecount = 0; 186*30f712c9SDave Chinner int error; 187*30f712c9SDave Chinner int i; 188*30f712c9SDave Chinner 189*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 190*30f712c9SDave Chinner if (error) 191*30f712c9SDave Chinner return error; 192*30f712c9SDave Chinner 193*30f712c9SDave Chinner do { 194*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 195*30f712c9SDave Chinner if (error) 196*30f712c9SDave Chinner return error; 197*30f712c9SDave Chinner 198*30f712c9SDave Chinner if (i) { 199*30f712c9SDave Chinner freecount += rec.ir_freecount; 200*30f712c9SDave Chinner error = xfs_btree_increment(cur, 0, &i); 201*30f712c9SDave Chinner if (error) 202*30f712c9SDave Chinner return error; 203*30f712c9SDave Chinner } 204*30f712c9SDave Chinner } while (i == 1); 205*30f712c9SDave Chinner 206*30f712c9SDave Chinner if (!XFS_FORCED_SHUTDOWN(cur->bc_mp)) 207*30f712c9SDave Chinner ASSERT(freecount == be32_to_cpu(agi->agi_freecount)); 208*30f712c9SDave Chinner } 209*30f712c9SDave Chinner return 0; 210*30f712c9SDave Chinner } 211*30f712c9SDave Chinner #else 212*30f712c9SDave Chinner #define xfs_check_agi_freecount(cur, agi) 0 213*30f712c9SDave Chinner #endif 214*30f712c9SDave Chinner 215*30f712c9SDave Chinner /* 216*30f712c9SDave Chinner * Initialise a new set of inodes. When called without a transaction context 217*30f712c9SDave Chinner * (e.g. from recovery) we initiate a delayed write of the inode buffers rather 218*30f712c9SDave Chinner * than logging them (which in a transaction context puts them into the AIL 219*30f712c9SDave Chinner * for writeback rather than the xfsbufd queue). 220*30f712c9SDave Chinner */ 221*30f712c9SDave Chinner int 222*30f712c9SDave Chinner xfs_ialloc_inode_init( 223*30f712c9SDave Chinner struct xfs_mount *mp, 224*30f712c9SDave Chinner struct xfs_trans *tp, 225*30f712c9SDave Chinner struct list_head *buffer_list, 226*30f712c9SDave Chinner xfs_agnumber_t agno, 227*30f712c9SDave Chinner xfs_agblock_t agbno, 228*30f712c9SDave Chinner xfs_agblock_t length, 229*30f712c9SDave Chinner unsigned int gen) 230*30f712c9SDave Chinner { 231*30f712c9SDave Chinner struct xfs_buf *fbuf; 232*30f712c9SDave Chinner struct xfs_dinode *free; 233*30f712c9SDave Chinner int nbufs, blks_per_cluster, inodes_per_cluster; 234*30f712c9SDave Chinner int version; 235*30f712c9SDave Chinner int i, j; 236*30f712c9SDave Chinner xfs_daddr_t d; 237*30f712c9SDave Chinner xfs_ino_t ino = 0; 238*30f712c9SDave Chinner 239*30f712c9SDave Chinner /* 240*30f712c9SDave Chinner * Loop over the new block(s), filling in the inodes. For small block 241*30f712c9SDave Chinner * sizes, manipulate the inodes in buffers which are multiples of the 242*30f712c9SDave Chinner * blocks size. 243*30f712c9SDave Chinner */ 244*30f712c9SDave Chinner blks_per_cluster = xfs_icluster_size_fsb(mp); 245*30f712c9SDave Chinner inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; 246*30f712c9SDave Chinner nbufs = length / blks_per_cluster; 247*30f712c9SDave Chinner 248*30f712c9SDave Chinner /* 249*30f712c9SDave Chinner * Figure out what version number to use in the inodes we create. If 250*30f712c9SDave Chinner * the superblock version has caught up to the one that supports the new 251*30f712c9SDave Chinner * inode format, then use the new inode version. Otherwise use the old 252*30f712c9SDave Chinner * version so that old kernels will continue to be able to use the file 253*30f712c9SDave Chinner * system. 254*30f712c9SDave Chinner * 255*30f712c9SDave Chinner * For v3 inodes, we also need to write the inode number into the inode, 256*30f712c9SDave Chinner * so calculate the first inode number of the chunk here as 257*30f712c9SDave Chinner * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not 258*30f712c9SDave Chinner * across multiple filesystem blocks (such as a cluster) and so cannot 259*30f712c9SDave Chinner * be used in the cluster buffer loop below. 260*30f712c9SDave Chinner * 261*30f712c9SDave Chinner * Further, because we are writing the inode directly into the buffer 262*30f712c9SDave Chinner * and calculating a CRC on the entire inode, we have ot log the entire 263*30f712c9SDave Chinner * inode so that the entire range the CRC covers is present in the log. 264*30f712c9SDave Chinner * That means for v3 inode we log the entire buffer rather than just the 265*30f712c9SDave Chinner * inode cores. 266*30f712c9SDave Chinner */ 267*30f712c9SDave Chinner if (xfs_sb_version_hascrc(&mp->m_sb)) { 268*30f712c9SDave Chinner version = 3; 269*30f712c9SDave Chinner ino = XFS_AGINO_TO_INO(mp, agno, 270*30f712c9SDave Chinner XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); 271*30f712c9SDave Chinner 272*30f712c9SDave Chinner /* 273*30f712c9SDave Chinner * log the initialisation that is about to take place as an 274*30f712c9SDave Chinner * logical operation. This means the transaction does not 275*30f712c9SDave Chinner * need to log the physical changes to the inode buffers as log 276*30f712c9SDave Chinner * recovery will know what initialisation is actually needed. 277*30f712c9SDave Chinner * Hence we only need to log the buffers as "ordered" buffers so 278*30f712c9SDave Chinner * they track in the AIL as if they were physically logged. 279*30f712c9SDave Chinner */ 280*30f712c9SDave Chinner if (tp) 281*30f712c9SDave Chinner xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, 282*30f712c9SDave Chinner mp->m_sb.sb_inodesize, length, gen); 283*30f712c9SDave Chinner } else 284*30f712c9SDave Chinner version = 2; 285*30f712c9SDave Chinner 286*30f712c9SDave Chinner for (j = 0; j < nbufs; j++) { 287*30f712c9SDave Chinner /* 288*30f712c9SDave Chinner * Get the block. 289*30f712c9SDave Chinner */ 290*30f712c9SDave Chinner d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); 291*30f712c9SDave Chinner fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 292*30f712c9SDave Chinner mp->m_bsize * blks_per_cluster, 293*30f712c9SDave Chinner XBF_UNMAPPED); 294*30f712c9SDave Chinner if (!fbuf) 295*30f712c9SDave Chinner return ENOMEM; 296*30f712c9SDave Chinner 297*30f712c9SDave Chinner /* Initialize the inode buffers and log them appropriately. */ 298*30f712c9SDave Chinner fbuf->b_ops = &xfs_inode_buf_ops; 299*30f712c9SDave Chinner xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); 300*30f712c9SDave Chinner for (i = 0; i < inodes_per_cluster; i++) { 301*30f712c9SDave Chinner int ioffset = i << mp->m_sb.sb_inodelog; 302*30f712c9SDave Chinner uint isize = xfs_dinode_size(version); 303*30f712c9SDave Chinner 304*30f712c9SDave Chinner free = xfs_make_iptr(mp, fbuf, i); 305*30f712c9SDave Chinner free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 306*30f712c9SDave Chinner free->di_version = version; 307*30f712c9SDave Chinner free->di_gen = cpu_to_be32(gen); 308*30f712c9SDave Chinner free->di_next_unlinked = cpu_to_be32(NULLAGINO); 309*30f712c9SDave Chinner 310*30f712c9SDave Chinner if (version == 3) { 311*30f712c9SDave Chinner free->di_ino = cpu_to_be64(ino); 312*30f712c9SDave Chinner ino++; 313*30f712c9SDave Chinner uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); 314*30f712c9SDave Chinner xfs_dinode_calc_crc(mp, free); 315*30f712c9SDave Chinner } else if (tp) { 316*30f712c9SDave Chinner /* just log the inode core */ 317*30f712c9SDave Chinner xfs_trans_log_buf(tp, fbuf, ioffset, 318*30f712c9SDave Chinner ioffset + isize - 1); 319*30f712c9SDave Chinner } 320*30f712c9SDave Chinner } 321*30f712c9SDave Chinner 322*30f712c9SDave Chinner if (tp) { 323*30f712c9SDave Chinner /* 324*30f712c9SDave Chinner * Mark the buffer as an inode allocation buffer so it 325*30f712c9SDave Chinner * sticks in AIL at the point of this allocation 326*30f712c9SDave Chinner * transaction. This ensures the they are on disk before 327*30f712c9SDave Chinner * the tail of the log can be moved past this 328*30f712c9SDave Chinner * transaction (i.e. by preventing relogging from moving 329*30f712c9SDave Chinner * it forward in the log). 330*30f712c9SDave Chinner */ 331*30f712c9SDave Chinner xfs_trans_inode_alloc_buf(tp, fbuf); 332*30f712c9SDave Chinner if (version == 3) { 333*30f712c9SDave Chinner /* 334*30f712c9SDave Chinner * Mark the buffer as ordered so that they are 335*30f712c9SDave Chinner * not physically logged in the transaction but 336*30f712c9SDave Chinner * still tracked in the AIL as part of the 337*30f712c9SDave Chinner * transaction and pin the log appropriately. 338*30f712c9SDave Chinner */ 339*30f712c9SDave Chinner xfs_trans_ordered_buf(tp, fbuf); 340*30f712c9SDave Chinner xfs_trans_log_buf(tp, fbuf, 0, 341*30f712c9SDave Chinner BBTOB(fbuf->b_length) - 1); 342*30f712c9SDave Chinner } 343*30f712c9SDave Chinner } else { 344*30f712c9SDave Chinner fbuf->b_flags |= XBF_DONE; 345*30f712c9SDave Chinner xfs_buf_delwri_queue(fbuf, buffer_list); 346*30f712c9SDave Chinner xfs_buf_relse(fbuf); 347*30f712c9SDave Chinner } 348*30f712c9SDave Chinner } 349*30f712c9SDave Chinner return 0; 350*30f712c9SDave Chinner } 351*30f712c9SDave Chinner 352*30f712c9SDave Chinner /* 353*30f712c9SDave Chinner * Allocate new inodes in the allocation group specified by agbp. 354*30f712c9SDave Chinner * Return 0 for success, else error code. 355*30f712c9SDave Chinner */ 356*30f712c9SDave Chinner STATIC int /* error code or 0 */ 357*30f712c9SDave Chinner xfs_ialloc_ag_alloc( 358*30f712c9SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 359*30f712c9SDave Chinner xfs_buf_t *agbp, /* alloc group buffer */ 360*30f712c9SDave Chinner int *alloc) 361*30f712c9SDave Chinner { 362*30f712c9SDave Chinner xfs_agi_t *agi; /* allocation group header */ 363*30f712c9SDave Chinner xfs_alloc_arg_t args; /* allocation argument structure */ 364*30f712c9SDave Chinner xfs_agnumber_t agno; 365*30f712c9SDave Chinner int error; 366*30f712c9SDave Chinner xfs_agino_t newino; /* new first inode's number */ 367*30f712c9SDave Chinner xfs_agino_t newlen; /* new number of inodes */ 368*30f712c9SDave Chinner int isaligned = 0; /* inode allocation at stripe unit */ 369*30f712c9SDave Chinner /* boundary */ 370*30f712c9SDave Chinner struct xfs_perag *pag; 371*30f712c9SDave Chinner 372*30f712c9SDave Chinner memset(&args, 0, sizeof(args)); 373*30f712c9SDave Chinner args.tp = tp; 374*30f712c9SDave Chinner args.mp = tp->t_mountp; 375*30f712c9SDave Chinner 376*30f712c9SDave Chinner /* 377*30f712c9SDave Chinner * Locking will ensure that we don't have two callers in here 378*30f712c9SDave Chinner * at one time. 379*30f712c9SDave Chinner */ 380*30f712c9SDave Chinner newlen = args.mp->m_ialloc_inos; 381*30f712c9SDave Chinner if (args.mp->m_maxicount && 382*30f712c9SDave Chinner args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 383*30f712c9SDave Chinner return ENOSPC; 384*30f712c9SDave Chinner args.minlen = args.maxlen = args.mp->m_ialloc_blks; 385*30f712c9SDave Chinner /* 386*30f712c9SDave Chinner * First try to allocate inodes contiguous with the last-allocated 387*30f712c9SDave Chinner * chunk of inodes. If the filesystem is striped, this will fill 388*30f712c9SDave Chinner * an entire stripe unit with inodes. 389*30f712c9SDave Chinner */ 390*30f712c9SDave Chinner agi = XFS_BUF_TO_AGI(agbp); 391*30f712c9SDave Chinner newino = be32_to_cpu(agi->agi_newino); 392*30f712c9SDave Chinner agno = be32_to_cpu(agi->agi_seqno); 393*30f712c9SDave Chinner args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 394*30f712c9SDave Chinner args.mp->m_ialloc_blks; 395*30f712c9SDave Chinner if (likely(newino != NULLAGINO && 396*30f712c9SDave Chinner (args.agbno < be32_to_cpu(agi->agi_length)))) { 397*30f712c9SDave Chinner args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 398*30f712c9SDave Chinner args.type = XFS_ALLOCTYPE_THIS_BNO; 399*30f712c9SDave Chinner args.prod = 1; 400*30f712c9SDave Chinner 401*30f712c9SDave Chinner /* 402*30f712c9SDave Chinner * We need to take into account alignment here to ensure that 403*30f712c9SDave Chinner * we don't modify the free list if we fail to have an exact 404*30f712c9SDave Chinner * block. If we don't have an exact match, and every oher 405*30f712c9SDave Chinner * attempt allocation attempt fails, we'll end up cancelling 406*30f712c9SDave Chinner * a dirty transaction and shutting down. 407*30f712c9SDave Chinner * 408*30f712c9SDave Chinner * For an exact allocation, alignment must be 1, 409*30f712c9SDave Chinner * however we need to take cluster alignment into account when 410*30f712c9SDave Chinner * fixing up the freelist. Use the minalignslop field to 411*30f712c9SDave Chinner * indicate that extra blocks might be required for alignment, 412*30f712c9SDave Chinner * but not to use them in the actual exact allocation. 413*30f712c9SDave Chinner */ 414*30f712c9SDave Chinner args.alignment = 1; 415*30f712c9SDave Chinner args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; 416*30f712c9SDave Chinner 417*30f712c9SDave Chinner /* Allow space for the inode btree to split. */ 418*30f712c9SDave Chinner args.minleft = args.mp->m_in_maxlevels - 1; 419*30f712c9SDave Chinner if ((error = xfs_alloc_vextent(&args))) 420*30f712c9SDave Chinner return error; 421*30f712c9SDave Chinner 422*30f712c9SDave Chinner /* 423*30f712c9SDave Chinner * This request might have dirtied the transaction if the AG can 424*30f712c9SDave Chinner * satisfy the request, but the exact block was not available. 425*30f712c9SDave Chinner * If the allocation did fail, subsequent requests will relax 426*30f712c9SDave Chinner * the exact agbno requirement and increase the alignment 427*30f712c9SDave Chinner * instead. It is critical that the total size of the request 428*30f712c9SDave Chinner * (len + alignment + slop) does not increase from this point 429*30f712c9SDave Chinner * on, so reset minalignslop to ensure it is not included in 430*30f712c9SDave Chinner * subsequent requests. 431*30f712c9SDave Chinner */ 432*30f712c9SDave Chinner args.minalignslop = 0; 433*30f712c9SDave Chinner } else 434*30f712c9SDave Chinner args.fsbno = NULLFSBLOCK; 435*30f712c9SDave Chinner 436*30f712c9SDave Chinner if (unlikely(args.fsbno == NULLFSBLOCK)) { 437*30f712c9SDave Chinner /* 438*30f712c9SDave Chinner * Set the alignment for the allocation. 439*30f712c9SDave Chinner * If stripe alignment is turned on then align at stripe unit 440*30f712c9SDave Chinner * boundary. 441*30f712c9SDave Chinner * If the cluster size is smaller than a filesystem block 442*30f712c9SDave Chinner * then we're doing I/O for inodes in filesystem block size 443*30f712c9SDave Chinner * pieces, so don't need alignment anyway. 444*30f712c9SDave Chinner */ 445*30f712c9SDave Chinner isaligned = 0; 446*30f712c9SDave Chinner if (args.mp->m_sinoalign) { 447*30f712c9SDave Chinner ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 448*30f712c9SDave Chinner args.alignment = args.mp->m_dalign; 449*30f712c9SDave Chinner isaligned = 1; 450*30f712c9SDave Chinner } else 451*30f712c9SDave Chinner args.alignment = xfs_ialloc_cluster_alignment(&args); 452*30f712c9SDave Chinner /* 453*30f712c9SDave Chinner * Need to figure out where to allocate the inode blocks. 454*30f712c9SDave Chinner * Ideally they should be spaced out through the a.g. 455*30f712c9SDave Chinner * For now, just allocate blocks up front. 456*30f712c9SDave Chinner */ 457*30f712c9SDave Chinner args.agbno = be32_to_cpu(agi->agi_root); 458*30f712c9SDave Chinner args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 459*30f712c9SDave Chinner /* 460*30f712c9SDave Chinner * Allocate a fixed-size extent of inodes. 461*30f712c9SDave Chinner */ 462*30f712c9SDave Chinner args.type = XFS_ALLOCTYPE_NEAR_BNO; 463*30f712c9SDave Chinner args.prod = 1; 464*30f712c9SDave Chinner /* 465*30f712c9SDave Chinner * Allow space for the inode btree to split. 466*30f712c9SDave Chinner */ 467*30f712c9SDave Chinner args.minleft = args.mp->m_in_maxlevels - 1; 468*30f712c9SDave Chinner if ((error = xfs_alloc_vextent(&args))) 469*30f712c9SDave Chinner return error; 470*30f712c9SDave Chinner } 471*30f712c9SDave Chinner 472*30f712c9SDave Chinner /* 473*30f712c9SDave Chinner * If stripe alignment is turned on, then try again with cluster 474*30f712c9SDave Chinner * alignment. 475*30f712c9SDave Chinner */ 476*30f712c9SDave Chinner if (isaligned && args.fsbno == NULLFSBLOCK) { 477*30f712c9SDave Chinner args.type = XFS_ALLOCTYPE_NEAR_BNO; 478*30f712c9SDave Chinner args.agbno = be32_to_cpu(agi->agi_root); 479*30f712c9SDave Chinner args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 480*30f712c9SDave Chinner args.alignment = xfs_ialloc_cluster_alignment(&args); 481*30f712c9SDave Chinner if ((error = xfs_alloc_vextent(&args))) 482*30f712c9SDave Chinner return error; 483*30f712c9SDave Chinner } 484*30f712c9SDave Chinner 485*30f712c9SDave Chinner if (args.fsbno == NULLFSBLOCK) { 486*30f712c9SDave Chinner *alloc = 0; 487*30f712c9SDave Chinner return 0; 488*30f712c9SDave Chinner } 489*30f712c9SDave Chinner ASSERT(args.len == args.minlen); 490*30f712c9SDave Chinner 491*30f712c9SDave Chinner /* 492*30f712c9SDave Chinner * Stamp and write the inode buffers. 493*30f712c9SDave Chinner * 494*30f712c9SDave Chinner * Seed the new inode cluster with a random generation number. This 495*30f712c9SDave Chinner * prevents short-term reuse of generation numbers if a chunk is 496*30f712c9SDave Chinner * freed and then immediately reallocated. We use random numbers 497*30f712c9SDave Chinner * rather than a linear progression to prevent the next generation 498*30f712c9SDave Chinner * number from being easily guessable. 499*30f712c9SDave Chinner */ 500*30f712c9SDave Chinner error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, 501*30f712c9SDave Chinner args.len, prandom_u32()); 502*30f712c9SDave Chinner 503*30f712c9SDave Chinner if (error) 504*30f712c9SDave Chinner return error; 505*30f712c9SDave Chinner /* 506*30f712c9SDave Chinner * Convert the results. 507*30f712c9SDave Chinner */ 508*30f712c9SDave Chinner newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 509*30f712c9SDave Chinner be32_add_cpu(&agi->agi_count, newlen); 510*30f712c9SDave Chinner be32_add_cpu(&agi->agi_freecount, newlen); 511*30f712c9SDave Chinner pag = xfs_perag_get(args.mp, agno); 512*30f712c9SDave Chinner pag->pagi_freecount += newlen; 513*30f712c9SDave Chinner xfs_perag_put(pag); 514*30f712c9SDave Chinner agi->agi_newino = cpu_to_be32(newino); 515*30f712c9SDave Chinner 516*30f712c9SDave Chinner /* 517*30f712c9SDave Chinner * Insert records describing the new inode chunk into the btrees. 518*30f712c9SDave Chinner */ 519*30f712c9SDave Chinner error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, 520*30f712c9SDave Chinner XFS_BTNUM_INO); 521*30f712c9SDave Chinner if (error) 522*30f712c9SDave Chinner return error; 523*30f712c9SDave Chinner 524*30f712c9SDave Chinner if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { 525*30f712c9SDave Chinner error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, 526*30f712c9SDave Chinner XFS_BTNUM_FINO); 527*30f712c9SDave Chinner if (error) 528*30f712c9SDave Chinner return error; 529*30f712c9SDave Chinner } 530*30f712c9SDave Chinner /* 531*30f712c9SDave Chinner * Log allocation group header fields 532*30f712c9SDave Chinner */ 533*30f712c9SDave Chinner xfs_ialloc_log_agi(tp, agbp, 534*30f712c9SDave Chinner XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); 535*30f712c9SDave Chinner /* 536*30f712c9SDave Chinner * Modify/log superblock values for inode count and inode free count. 537*30f712c9SDave Chinner */ 538*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); 539*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); 540*30f712c9SDave Chinner *alloc = 1; 541*30f712c9SDave Chinner return 0; 542*30f712c9SDave Chinner } 543*30f712c9SDave Chinner 544*30f712c9SDave Chinner STATIC xfs_agnumber_t 545*30f712c9SDave Chinner xfs_ialloc_next_ag( 546*30f712c9SDave Chinner xfs_mount_t *mp) 547*30f712c9SDave Chinner { 548*30f712c9SDave Chinner xfs_agnumber_t agno; 549*30f712c9SDave Chinner 550*30f712c9SDave Chinner spin_lock(&mp->m_agirotor_lock); 551*30f712c9SDave Chinner agno = mp->m_agirotor; 552*30f712c9SDave Chinner if (++mp->m_agirotor >= mp->m_maxagi) 553*30f712c9SDave Chinner mp->m_agirotor = 0; 554*30f712c9SDave Chinner spin_unlock(&mp->m_agirotor_lock); 555*30f712c9SDave Chinner 556*30f712c9SDave Chinner return agno; 557*30f712c9SDave Chinner } 558*30f712c9SDave Chinner 559*30f712c9SDave Chinner /* 560*30f712c9SDave Chinner * Select an allocation group to look for a free inode in, based on the parent 561*30f712c9SDave Chinner * inode and the mode. Return the allocation group buffer. 562*30f712c9SDave Chinner */ 563*30f712c9SDave Chinner STATIC xfs_agnumber_t 564*30f712c9SDave Chinner xfs_ialloc_ag_select( 565*30f712c9SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 566*30f712c9SDave Chinner xfs_ino_t parent, /* parent directory inode number */ 567*30f712c9SDave Chinner umode_t mode, /* bits set to indicate file type */ 568*30f712c9SDave Chinner int okalloc) /* ok to allocate more space */ 569*30f712c9SDave Chinner { 570*30f712c9SDave Chinner xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 571*30f712c9SDave Chinner xfs_agnumber_t agno; /* current ag number */ 572*30f712c9SDave Chinner int flags; /* alloc buffer locking flags */ 573*30f712c9SDave Chinner xfs_extlen_t ineed; /* blocks needed for inode allocation */ 574*30f712c9SDave Chinner xfs_extlen_t longest = 0; /* longest extent available */ 575*30f712c9SDave Chinner xfs_mount_t *mp; /* mount point structure */ 576*30f712c9SDave Chinner int needspace; /* file mode implies space allocated */ 577*30f712c9SDave Chinner xfs_perag_t *pag; /* per allocation group data */ 578*30f712c9SDave Chinner xfs_agnumber_t pagno; /* parent (starting) ag number */ 579*30f712c9SDave Chinner int error; 580*30f712c9SDave Chinner 581*30f712c9SDave Chinner /* 582*30f712c9SDave Chinner * Files of these types need at least one block if length > 0 583*30f712c9SDave Chinner * (and they won't fit in the inode, but that's hard to figure out). 584*30f712c9SDave Chinner */ 585*30f712c9SDave Chinner needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); 586*30f712c9SDave Chinner mp = tp->t_mountp; 587*30f712c9SDave Chinner agcount = mp->m_maxagi; 588*30f712c9SDave Chinner if (S_ISDIR(mode)) 589*30f712c9SDave Chinner pagno = xfs_ialloc_next_ag(mp); 590*30f712c9SDave Chinner else { 591*30f712c9SDave Chinner pagno = XFS_INO_TO_AGNO(mp, parent); 592*30f712c9SDave Chinner if (pagno >= agcount) 593*30f712c9SDave Chinner pagno = 0; 594*30f712c9SDave Chinner } 595*30f712c9SDave Chinner 596*30f712c9SDave Chinner ASSERT(pagno < agcount); 597*30f712c9SDave Chinner 598*30f712c9SDave Chinner /* 599*30f712c9SDave Chinner * Loop through allocation groups, looking for one with a little 600*30f712c9SDave Chinner * free space in it. Note we don't look for free inodes, exactly. 601*30f712c9SDave Chinner * Instead, we include whether there is a need to allocate inodes 602*30f712c9SDave Chinner * to mean that blocks must be allocated for them, 603*30f712c9SDave Chinner * if none are currently free. 604*30f712c9SDave Chinner */ 605*30f712c9SDave Chinner agno = pagno; 606*30f712c9SDave Chinner flags = XFS_ALLOC_FLAG_TRYLOCK; 607*30f712c9SDave Chinner for (;;) { 608*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 609*30f712c9SDave Chinner if (!pag->pagi_inodeok) { 610*30f712c9SDave Chinner xfs_ialloc_next_ag(mp); 611*30f712c9SDave Chinner goto nextag; 612*30f712c9SDave Chinner } 613*30f712c9SDave Chinner 614*30f712c9SDave Chinner if (!pag->pagi_init) { 615*30f712c9SDave Chinner error = xfs_ialloc_pagi_init(mp, tp, agno); 616*30f712c9SDave Chinner if (error) 617*30f712c9SDave Chinner goto nextag; 618*30f712c9SDave Chinner } 619*30f712c9SDave Chinner 620*30f712c9SDave Chinner if (pag->pagi_freecount) { 621*30f712c9SDave Chinner xfs_perag_put(pag); 622*30f712c9SDave Chinner return agno; 623*30f712c9SDave Chinner } 624*30f712c9SDave Chinner 625*30f712c9SDave Chinner if (!okalloc) 626*30f712c9SDave Chinner goto nextag; 627*30f712c9SDave Chinner 628*30f712c9SDave Chinner if (!pag->pagf_init) { 629*30f712c9SDave Chinner error = xfs_alloc_pagf_init(mp, tp, agno, flags); 630*30f712c9SDave Chinner if (error) 631*30f712c9SDave Chinner goto nextag; 632*30f712c9SDave Chinner } 633*30f712c9SDave Chinner 634*30f712c9SDave Chinner /* 635*30f712c9SDave Chinner * Is there enough free space for the file plus a block of 636*30f712c9SDave Chinner * inodes? (if we need to allocate some)? 637*30f712c9SDave Chinner */ 638*30f712c9SDave Chinner ineed = mp->m_ialloc_blks; 639*30f712c9SDave Chinner longest = pag->pagf_longest; 640*30f712c9SDave Chinner if (!longest) 641*30f712c9SDave Chinner longest = pag->pagf_flcount > 0; 642*30f712c9SDave Chinner 643*30f712c9SDave Chinner if (pag->pagf_freeblks >= needspace + ineed && 644*30f712c9SDave Chinner longest >= ineed) { 645*30f712c9SDave Chinner xfs_perag_put(pag); 646*30f712c9SDave Chinner return agno; 647*30f712c9SDave Chinner } 648*30f712c9SDave Chinner nextag: 649*30f712c9SDave Chinner xfs_perag_put(pag); 650*30f712c9SDave Chinner /* 651*30f712c9SDave Chinner * No point in iterating over the rest, if we're shutting 652*30f712c9SDave Chinner * down. 653*30f712c9SDave Chinner */ 654*30f712c9SDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 655*30f712c9SDave Chinner return NULLAGNUMBER; 656*30f712c9SDave Chinner agno++; 657*30f712c9SDave Chinner if (agno >= agcount) 658*30f712c9SDave Chinner agno = 0; 659*30f712c9SDave Chinner if (agno == pagno) { 660*30f712c9SDave Chinner if (flags == 0) 661*30f712c9SDave Chinner return NULLAGNUMBER; 662*30f712c9SDave Chinner flags = 0; 663*30f712c9SDave Chinner } 664*30f712c9SDave Chinner } 665*30f712c9SDave Chinner } 666*30f712c9SDave Chinner 667*30f712c9SDave Chinner /* 668*30f712c9SDave Chinner * Try to retrieve the next record to the left/right from the current one. 669*30f712c9SDave Chinner */ 670*30f712c9SDave Chinner STATIC int 671*30f712c9SDave Chinner xfs_ialloc_next_rec( 672*30f712c9SDave Chinner struct xfs_btree_cur *cur, 673*30f712c9SDave Chinner xfs_inobt_rec_incore_t *rec, 674*30f712c9SDave Chinner int *done, 675*30f712c9SDave Chinner int left) 676*30f712c9SDave Chinner { 677*30f712c9SDave Chinner int error; 678*30f712c9SDave Chinner int i; 679*30f712c9SDave Chinner 680*30f712c9SDave Chinner if (left) 681*30f712c9SDave Chinner error = xfs_btree_decrement(cur, 0, &i); 682*30f712c9SDave Chinner else 683*30f712c9SDave Chinner error = xfs_btree_increment(cur, 0, &i); 684*30f712c9SDave Chinner 685*30f712c9SDave Chinner if (error) 686*30f712c9SDave Chinner return error; 687*30f712c9SDave Chinner *done = !i; 688*30f712c9SDave Chinner if (i) { 689*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, rec, &i); 690*30f712c9SDave Chinner if (error) 691*30f712c9SDave Chinner return error; 692*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 693*30f712c9SDave Chinner } 694*30f712c9SDave Chinner 695*30f712c9SDave Chinner return 0; 696*30f712c9SDave Chinner } 697*30f712c9SDave Chinner 698*30f712c9SDave Chinner STATIC int 699*30f712c9SDave Chinner xfs_ialloc_get_rec( 700*30f712c9SDave Chinner struct xfs_btree_cur *cur, 701*30f712c9SDave Chinner xfs_agino_t agino, 702*30f712c9SDave Chinner xfs_inobt_rec_incore_t *rec, 703*30f712c9SDave Chinner int *done) 704*30f712c9SDave Chinner { 705*30f712c9SDave Chinner int error; 706*30f712c9SDave Chinner int i; 707*30f712c9SDave Chinner 708*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i); 709*30f712c9SDave Chinner if (error) 710*30f712c9SDave Chinner return error; 711*30f712c9SDave Chinner *done = !i; 712*30f712c9SDave Chinner if (i) { 713*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, rec, &i); 714*30f712c9SDave Chinner if (error) 715*30f712c9SDave Chinner return error; 716*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 717*30f712c9SDave Chinner } 718*30f712c9SDave Chinner 719*30f712c9SDave Chinner return 0; 720*30f712c9SDave Chinner } 721*30f712c9SDave Chinner 722*30f712c9SDave Chinner /* 723*30f712c9SDave Chinner * Allocate an inode using the inobt-only algorithm. 724*30f712c9SDave Chinner */ 725*30f712c9SDave Chinner STATIC int 726*30f712c9SDave Chinner xfs_dialloc_ag_inobt( 727*30f712c9SDave Chinner struct xfs_trans *tp, 728*30f712c9SDave Chinner struct xfs_buf *agbp, 729*30f712c9SDave Chinner xfs_ino_t parent, 730*30f712c9SDave Chinner xfs_ino_t *inop) 731*30f712c9SDave Chinner { 732*30f712c9SDave Chinner struct xfs_mount *mp = tp->t_mountp; 733*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 734*30f712c9SDave Chinner xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 735*30f712c9SDave Chinner xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); 736*30f712c9SDave Chinner xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); 737*30f712c9SDave Chinner struct xfs_perag *pag; 738*30f712c9SDave Chinner struct xfs_btree_cur *cur, *tcur; 739*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec, trec; 740*30f712c9SDave Chinner xfs_ino_t ino; 741*30f712c9SDave Chinner int error; 742*30f712c9SDave Chinner int offset; 743*30f712c9SDave Chinner int i, j; 744*30f712c9SDave Chinner 745*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 746*30f712c9SDave Chinner 747*30f712c9SDave Chinner ASSERT(pag->pagi_init); 748*30f712c9SDave Chinner ASSERT(pag->pagi_inodeok); 749*30f712c9SDave Chinner ASSERT(pag->pagi_freecount > 0); 750*30f712c9SDave Chinner 751*30f712c9SDave Chinner restart_pagno: 752*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); 753*30f712c9SDave Chinner /* 754*30f712c9SDave Chinner * If pagino is 0 (this is the root inode allocation) use newino. 755*30f712c9SDave Chinner * This must work because we've just allocated some. 756*30f712c9SDave Chinner */ 757*30f712c9SDave Chinner if (!pagino) 758*30f712c9SDave Chinner pagino = be32_to_cpu(agi->agi_newino); 759*30f712c9SDave Chinner 760*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 761*30f712c9SDave Chinner if (error) 762*30f712c9SDave Chinner goto error0; 763*30f712c9SDave Chinner 764*30f712c9SDave Chinner /* 765*30f712c9SDave Chinner * If in the same AG as the parent, try to get near the parent. 766*30f712c9SDave Chinner */ 767*30f712c9SDave Chinner if (pagno == agno) { 768*30f712c9SDave Chinner int doneleft; /* done, to the left */ 769*30f712c9SDave Chinner int doneright; /* done, to the right */ 770*30f712c9SDave Chinner int searchdistance = 10; 771*30f712c9SDave Chinner 772*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 773*30f712c9SDave Chinner if (error) 774*30f712c9SDave Chinner goto error0; 775*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 776*30f712c9SDave Chinner 777*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &j); 778*30f712c9SDave Chinner if (error) 779*30f712c9SDave Chinner goto error0; 780*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(j == 1, error0); 781*30f712c9SDave Chinner 782*30f712c9SDave Chinner if (rec.ir_freecount > 0) { 783*30f712c9SDave Chinner /* 784*30f712c9SDave Chinner * Found a free inode in the same chunk 785*30f712c9SDave Chinner * as the parent, done. 786*30f712c9SDave Chinner */ 787*30f712c9SDave Chinner goto alloc_inode; 788*30f712c9SDave Chinner } 789*30f712c9SDave Chinner 790*30f712c9SDave Chinner 791*30f712c9SDave Chinner /* 792*30f712c9SDave Chinner * In the same AG as parent, but parent's chunk is full. 793*30f712c9SDave Chinner */ 794*30f712c9SDave Chinner 795*30f712c9SDave Chinner /* duplicate the cursor, search left & right simultaneously */ 796*30f712c9SDave Chinner error = xfs_btree_dup_cursor(cur, &tcur); 797*30f712c9SDave Chinner if (error) 798*30f712c9SDave Chinner goto error0; 799*30f712c9SDave Chinner 800*30f712c9SDave Chinner /* 801*30f712c9SDave Chinner * Skip to last blocks looked up if same parent inode. 802*30f712c9SDave Chinner */ 803*30f712c9SDave Chinner if (pagino != NULLAGINO && 804*30f712c9SDave Chinner pag->pagl_pagino == pagino && 805*30f712c9SDave Chinner pag->pagl_leftrec != NULLAGINO && 806*30f712c9SDave Chinner pag->pagl_rightrec != NULLAGINO) { 807*30f712c9SDave Chinner error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, 808*30f712c9SDave Chinner &trec, &doneleft); 809*30f712c9SDave Chinner if (error) 810*30f712c9SDave Chinner goto error1; 811*30f712c9SDave Chinner 812*30f712c9SDave Chinner error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, 813*30f712c9SDave Chinner &rec, &doneright); 814*30f712c9SDave Chinner if (error) 815*30f712c9SDave Chinner goto error1; 816*30f712c9SDave Chinner } else { 817*30f712c9SDave Chinner /* search left with tcur, back up 1 record */ 818*30f712c9SDave Chinner error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); 819*30f712c9SDave Chinner if (error) 820*30f712c9SDave Chinner goto error1; 821*30f712c9SDave Chinner 822*30f712c9SDave Chinner /* search right with cur, go forward 1 record. */ 823*30f712c9SDave Chinner error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); 824*30f712c9SDave Chinner if (error) 825*30f712c9SDave Chinner goto error1; 826*30f712c9SDave Chinner } 827*30f712c9SDave Chinner 828*30f712c9SDave Chinner /* 829*30f712c9SDave Chinner * Loop until we find an inode chunk with a free inode. 830*30f712c9SDave Chinner */ 831*30f712c9SDave Chinner while (!doneleft || !doneright) { 832*30f712c9SDave Chinner int useleft; /* using left inode chunk this time */ 833*30f712c9SDave Chinner 834*30f712c9SDave Chinner if (!--searchdistance) { 835*30f712c9SDave Chinner /* 836*30f712c9SDave Chinner * Not in range - save last search 837*30f712c9SDave Chinner * location and allocate a new inode 838*30f712c9SDave Chinner */ 839*30f712c9SDave Chinner xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 840*30f712c9SDave Chinner pag->pagl_leftrec = trec.ir_startino; 841*30f712c9SDave Chinner pag->pagl_rightrec = rec.ir_startino; 842*30f712c9SDave Chinner pag->pagl_pagino = pagino; 843*30f712c9SDave Chinner goto newino; 844*30f712c9SDave Chinner } 845*30f712c9SDave Chinner 846*30f712c9SDave Chinner /* figure out the closer block if both are valid. */ 847*30f712c9SDave Chinner if (!doneleft && !doneright) { 848*30f712c9SDave Chinner useleft = pagino - 849*30f712c9SDave Chinner (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < 850*30f712c9SDave Chinner rec.ir_startino - pagino; 851*30f712c9SDave Chinner } else { 852*30f712c9SDave Chinner useleft = !doneleft; 853*30f712c9SDave Chinner } 854*30f712c9SDave Chinner 855*30f712c9SDave Chinner /* free inodes to the left? */ 856*30f712c9SDave Chinner if (useleft && trec.ir_freecount) { 857*30f712c9SDave Chinner rec = trec; 858*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 859*30f712c9SDave Chinner cur = tcur; 860*30f712c9SDave Chinner 861*30f712c9SDave Chinner pag->pagl_leftrec = trec.ir_startino; 862*30f712c9SDave Chinner pag->pagl_rightrec = rec.ir_startino; 863*30f712c9SDave Chinner pag->pagl_pagino = pagino; 864*30f712c9SDave Chinner goto alloc_inode; 865*30f712c9SDave Chinner } 866*30f712c9SDave Chinner 867*30f712c9SDave Chinner /* free inodes to the right? */ 868*30f712c9SDave Chinner if (!useleft && rec.ir_freecount) { 869*30f712c9SDave Chinner xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 870*30f712c9SDave Chinner 871*30f712c9SDave Chinner pag->pagl_leftrec = trec.ir_startino; 872*30f712c9SDave Chinner pag->pagl_rightrec = rec.ir_startino; 873*30f712c9SDave Chinner pag->pagl_pagino = pagino; 874*30f712c9SDave Chinner goto alloc_inode; 875*30f712c9SDave Chinner } 876*30f712c9SDave Chinner 877*30f712c9SDave Chinner /* get next record to check */ 878*30f712c9SDave Chinner if (useleft) { 879*30f712c9SDave Chinner error = xfs_ialloc_next_rec(tcur, &trec, 880*30f712c9SDave Chinner &doneleft, 1); 881*30f712c9SDave Chinner } else { 882*30f712c9SDave Chinner error = xfs_ialloc_next_rec(cur, &rec, 883*30f712c9SDave Chinner &doneright, 0); 884*30f712c9SDave Chinner } 885*30f712c9SDave Chinner if (error) 886*30f712c9SDave Chinner goto error1; 887*30f712c9SDave Chinner } 888*30f712c9SDave Chinner 889*30f712c9SDave Chinner /* 890*30f712c9SDave Chinner * We've reached the end of the btree. because 891*30f712c9SDave Chinner * we are only searching a small chunk of the 892*30f712c9SDave Chinner * btree each search, there is obviously free 893*30f712c9SDave Chinner * inodes closer to the parent inode than we 894*30f712c9SDave Chinner * are now. restart the search again. 895*30f712c9SDave Chinner */ 896*30f712c9SDave Chinner pag->pagl_pagino = NULLAGINO; 897*30f712c9SDave Chinner pag->pagl_leftrec = NULLAGINO; 898*30f712c9SDave Chinner pag->pagl_rightrec = NULLAGINO; 899*30f712c9SDave Chinner xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 900*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 901*30f712c9SDave Chinner goto restart_pagno; 902*30f712c9SDave Chinner } 903*30f712c9SDave Chinner 904*30f712c9SDave Chinner /* 905*30f712c9SDave Chinner * In a different AG from the parent. 906*30f712c9SDave Chinner * See if the most recently allocated block has any free. 907*30f712c9SDave Chinner */ 908*30f712c9SDave Chinner newino: 909*30f712c9SDave Chinner if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 910*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), 911*30f712c9SDave Chinner XFS_LOOKUP_EQ, &i); 912*30f712c9SDave Chinner if (error) 913*30f712c9SDave Chinner goto error0; 914*30f712c9SDave Chinner 915*30f712c9SDave Chinner if (i == 1) { 916*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &j); 917*30f712c9SDave Chinner if (error) 918*30f712c9SDave Chinner goto error0; 919*30f712c9SDave Chinner 920*30f712c9SDave Chinner if (j == 1 && rec.ir_freecount > 0) { 921*30f712c9SDave Chinner /* 922*30f712c9SDave Chinner * The last chunk allocated in the group 923*30f712c9SDave Chinner * still has a free inode. 924*30f712c9SDave Chinner */ 925*30f712c9SDave Chinner goto alloc_inode; 926*30f712c9SDave Chinner } 927*30f712c9SDave Chinner } 928*30f712c9SDave Chinner } 929*30f712c9SDave Chinner 930*30f712c9SDave Chinner /* 931*30f712c9SDave Chinner * None left in the last group, search the whole AG 932*30f712c9SDave Chinner */ 933*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 934*30f712c9SDave Chinner if (error) 935*30f712c9SDave Chinner goto error0; 936*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 937*30f712c9SDave Chinner 938*30f712c9SDave Chinner for (;;) { 939*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 940*30f712c9SDave Chinner if (error) 941*30f712c9SDave Chinner goto error0; 942*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 943*30f712c9SDave Chinner if (rec.ir_freecount > 0) 944*30f712c9SDave Chinner break; 945*30f712c9SDave Chinner error = xfs_btree_increment(cur, 0, &i); 946*30f712c9SDave Chinner if (error) 947*30f712c9SDave Chinner goto error0; 948*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 949*30f712c9SDave Chinner } 950*30f712c9SDave Chinner 951*30f712c9SDave Chinner alloc_inode: 952*30f712c9SDave Chinner offset = xfs_lowbit64(rec.ir_free); 953*30f712c9SDave Chinner ASSERT(offset >= 0); 954*30f712c9SDave Chinner ASSERT(offset < XFS_INODES_PER_CHUNK); 955*30f712c9SDave Chinner ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 956*30f712c9SDave Chinner XFS_INODES_PER_CHUNK) == 0); 957*30f712c9SDave Chinner ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 958*30f712c9SDave Chinner rec.ir_free &= ~XFS_INOBT_MASK(offset); 959*30f712c9SDave Chinner rec.ir_freecount--; 960*30f712c9SDave Chinner error = xfs_inobt_update(cur, &rec); 961*30f712c9SDave Chinner if (error) 962*30f712c9SDave Chinner goto error0; 963*30f712c9SDave Chinner be32_add_cpu(&agi->agi_freecount, -1); 964*30f712c9SDave Chinner xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 965*30f712c9SDave Chinner pag->pagi_freecount--; 966*30f712c9SDave Chinner 967*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 968*30f712c9SDave Chinner if (error) 969*30f712c9SDave Chinner goto error0; 970*30f712c9SDave Chinner 971*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 972*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 973*30f712c9SDave Chinner xfs_perag_put(pag); 974*30f712c9SDave Chinner *inop = ino; 975*30f712c9SDave Chinner return 0; 976*30f712c9SDave Chinner error1: 977*30f712c9SDave Chinner xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 978*30f712c9SDave Chinner error0: 979*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 980*30f712c9SDave Chinner xfs_perag_put(pag); 981*30f712c9SDave Chinner return error; 982*30f712c9SDave Chinner } 983*30f712c9SDave Chinner 984*30f712c9SDave Chinner /* 985*30f712c9SDave Chinner * Use the free inode btree to allocate an inode based on distance from the 986*30f712c9SDave Chinner * parent. Note that the provided cursor may be deleted and replaced. 987*30f712c9SDave Chinner */ 988*30f712c9SDave Chinner STATIC int 989*30f712c9SDave Chinner xfs_dialloc_ag_finobt_near( 990*30f712c9SDave Chinner xfs_agino_t pagino, 991*30f712c9SDave Chinner struct xfs_btree_cur **ocur, 992*30f712c9SDave Chinner struct xfs_inobt_rec_incore *rec) 993*30f712c9SDave Chinner { 994*30f712c9SDave Chinner struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ 995*30f712c9SDave Chinner struct xfs_btree_cur *rcur; /* right search cursor */ 996*30f712c9SDave Chinner struct xfs_inobt_rec_incore rrec; 997*30f712c9SDave Chinner int error; 998*30f712c9SDave Chinner int i, j; 999*30f712c9SDave Chinner 1000*30f712c9SDave Chinner error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); 1001*30f712c9SDave Chinner if (error) 1002*30f712c9SDave Chinner return error; 1003*30f712c9SDave Chinner 1004*30f712c9SDave Chinner if (i == 1) { 1005*30f712c9SDave Chinner error = xfs_inobt_get_rec(lcur, rec, &i); 1006*30f712c9SDave Chinner if (error) 1007*30f712c9SDave Chinner return error; 1008*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1009*30f712c9SDave Chinner 1010*30f712c9SDave Chinner /* 1011*30f712c9SDave Chinner * See if we've landed in the parent inode record. The finobt 1012*30f712c9SDave Chinner * only tracks chunks with at least one free inode, so record 1013*30f712c9SDave Chinner * existence is enough. 1014*30f712c9SDave Chinner */ 1015*30f712c9SDave Chinner if (pagino >= rec->ir_startino && 1016*30f712c9SDave Chinner pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) 1017*30f712c9SDave Chinner return 0; 1018*30f712c9SDave Chinner } 1019*30f712c9SDave Chinner 1020*30f712c9SDave Chinner error = xfs_btree_dup_cursor(lcur, &rcur); 1021*30f712c9SDave Chinner if (error) 1022*30f712c9SDave Chinner return error; 1023*30f712c9SDave Chinner 1024*30f712c9SDave Chinner error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); 1025*30f712c9SDave Chinner if (error) 1026*30f712c9SDave Chinner goto error_rcur; 1027*30f712c9SDave Chinner if (j == 1) { 1028*30f712c9SDave Chinner error = xfs_inobt_get_rec(rcur, &rrec, &j); 1029*30f712c9SDave Chinner if (error) 1030*30f712c9SDave Chinner goto error_rcur; 1031*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); 1032*30f712c9SDave Chinner } 1033*30f712c9SDave Chinner 1034*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); 1035*30f712c9SDave Chinner if (i == 1 && j == 1) { 1036*30f712c9SDave Chinner /* 1037*30f712c9SDave Chinner * Both the left and right records are valid. Choose the closer 1038*30f712c9SDave Chinner * inode chunk to the target. 1039*30f712c9SDave Chinner */ 1040*30f712c9SDave Chinner if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > 1041*30f712c9SDave Chinner (rrec.ir_startino - pagino)) { 1042*30f712c9SDave Chinner *rec = rrec; 1043*30f712c9SDave Chinner xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); 1044*30f712c9SDave Chinner *ocur = rcur; 1045*30f712c9SDave Chinner } else { 1046*30f712c9SDave Chinner xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); 1047*30f712c9SDave Chinner } 1048*30f712c9SDave Chinner } else if (j == 1) { 1049*30f712c9SDave Chinner /* only the right record is valid */ 1050*30f712c9SDave Chinner *rec = rrec; 1051*30f712c9SDave Chinner xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); 1052*30f712c9SDave Chinner *ocur = rcur; 1053*30f712c9SDave Chinner } else if (i == 1) { 1054*30f712c9SDave Chinner /* only the left record is valid */ 1055*30f712c9SDave Chinner xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); 1056*30f712c9SDave Chinner } 1057*30f712c9SDave Chinner 1058*30f712c9SDave Chinner return 0; 1059*30f712c9SDave Chinner 1060*30f712c9SDave Chinner error_rcur: 1061*30f712c9SDave Chinner xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); 1062*30f712c9SDave Chinner return error; 1063*30f712c9SDave Chinner } 1064*30f712c9SDave Chinner 1065*30f712c9SDave Chinner /* 1066*30f712c9SDave Chinner * Use the free inode btree to find a free inode based on a newino hint. If 1067*30f712c9SDave Chinner * the hint is NULL, find the first free inode in the AG. 1068*30f712c9SDave Chinner */ 1069*30f712c9SDave Chinner STATIC int 1070*30f712c9SDave Chinner xfs_dialloc_ag_finobt_newino( 1071*30f712c9SDave Chinner struct xfs_agi *agi, 1072*30f712c9SDave Chinner struct xfs_btree_cur *cur, 1073*30f712c9SDave Chinner struct xfs_inobt_rec_incore *rec) 1074*30f712c9SDave Chinner { 1075*30f712c9SDave Chinner int error; 1076*30f712c9SDave Chinner int i; 1077*30f712c9SDave Chinner 1078*30f712c9SDave Chinner if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { 1079*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, 1080*30f712c9SDave Chinner &i); 1081*30f712c9SDave Chinner if (error) 1082*30f712c9SDave Chinner return error; 1083*30f712c9SDave Chinner if (i == 1) { 1084*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, rec, &i); 1085*30f712c9SDave Chinner if (error) 1086*30f712c9SDave Chinner return error; 1087*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1088*30f712c9SDave Chinner 1089*30f712c9SDave Chinner return 0; 1090*30f712c9SDave Chinner } 1091*30f712c9SDave Chinner } 1092*30f712c9SDave Chinner 1093*30f712c9SDave Chinner /* 1094*30f712c9SDave Chinner * Find the first inode available in the AG. 1095*30f712c9SDave Chinner */ 1096*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 1097*30f712c9SDave Chinner if (error) 1098*30f712c9SDave Chinner return error; 1099*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1100*30f712c9SDave Chinner 1101*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, rec, &i); 1102*30f712c9SDave Chinner if (error) 1103*30f712c9SDave Chinner return error; 1104*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1105*30f712c9SDave Chinner 1106*30f712c9SDave Chinner return 0; 1107*30f712c9SDave Chinner } 1108*30f712c9SDave Chinner 1109*30f712c9SDave Chinner /* 1110*30f712c9SDave Chinner * Update the inobt based on a modification made to the finobt. Also ensure that 1111*30f712c9SDave Chinner * the records from both trees are equivalent post-modification. 1112*30f712c9SDave Chinner */ 1113*30f712c9SDave Chinner STATIC int 1114*30f712c9SDave Chinner xfs_dialloc_ag_update_inobt( 1115*30f712c9SDave Chinner struct xfs_btree_cur *cur, /* inobt cursor */ 1116*30f712c9SDave Chinner struct xfs_inobt_rec_incore *frec, /* finobt record */ 1117*30f712c9SDave Chinner int offset) /* inode offset */ 1118*30f712c9SDave Chinner { 1119*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec; 1120*30f712c9SDave Chinner int error; 1121*30f712c9SDave Chinner int i; 1122*30f712c9SDave Chinner 1123*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); 1124*30f712c9SDave Chinner if (error) 1125*30f712c9SDave Chinner return error; 1126*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1127*30f712c9SDave Chinner 1128*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 1129*30f712c9SDave Chinner if (error) 1130*30f712c9SDave Chinner return error; 1131*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN(i == 1); 1132*30f712c9SDave Chinner ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % 1133*30f712c9SDave Chinner XFS_INODES_PER_CHUNK) == 0); 1134*30f712c9SDave Chinner 1135*30f712c9SDave Chinner rec.ir_free &= ~XFS_INOBT_MASK(offset); 1136*30f712c9SDave Chinner rec.ir_freecount--; 1137*30f712c9SDave Chinner 1138*30f712c9SDave Chinner XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && 1139*30f712c9SDave Chinner (rec.ir_freecount == frec->ir_freecount)); 1140*30f712c9SDave Chinner 1141*30f712c9SDave Chinner error = xfs_inobt_update(cur, &rec); 1142*30f712c9SDave Chinner if (error) 1143*30f712c9SDave Chinner return error; 1144*30f712c9SDave Chinner 1145*30f712c9SDave Chinner return 0; 1146*30f712c9SDave Chinner } 1147*30f712c9SDave Chinner 1148*30f712c9SDave Chinner /* 1149*30f712c9SDave Chinner * Allocate an inode using the free inode btree, if available. Otherwise, fall 1150*30f712c9SDave Chinner * back to the inobt search algorithm. 1151*30f712c9SDave Chinner * 1152*30f712c9SDave Chinner * The caller selected an AG for us, and made sure that free inodes are 1153*30f712c9SDave Chinner * available. 1154*30f712c9SDave Chinner */ 1155*30f712c9SDave Chinner STATIC int 1156*30f712c9SDave Chinner xfs_dialloc_ag( 1157*30f712c9SDave Chinner struct xfs_trans *tp, 1158*30f712c9SDave Chinner struct xfs_buf *agbp, 1159*30f712c9SDave Chinner xfs_ino_t parent, 1160*30f712c9SDave Chinner xfs_ino_t *inop) 1161*30f712c9SDave Chinner { 1162*30f712c9SDave Chinner struct xfs_mount *mp = tp->t_mountp; 1163*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 1164*30f712c9SDave Chinner xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 1165*30f712c9SDave Chinner xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); 1166*30f712c9SDave Chinner xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); 1167*30f712c9SDave Chinner struct xfs_perag *pag; 1168*30f712c9SDave Chinner struct xfs_btree_cur *cur; /* finobt cursor */ 1169*30f712c9SDave Chinner struct xfs_btree_cur *icur; /* inobt cursor */ 1170*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec; 1171*30f712c9SDave Chinner xfs_ino_t ino; 1172*30f712c9SDave Chinner int error; 1173*30f712c9SDave Chinner int offset; 1174*30f712c9SDave Chinner int i; 1175*30f712c9SDave Chinner 1176*30f712c9SDave Chinner if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 1177*30f712c9SDave Chinner return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); 1178*30f712c9SDave Chinner 1179*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 1180*30f712c9SDave Chinner 1181*30f712c9SDave Chinner /* 1182*30f712c9SDave Chinner * If pagino is 0 (this is the root inode allocation) use newino. 1183*30f712c9SDave Chinner * This must work because we've just allocated some. 1184*30f712c9SDave Chinner */ 1185*30f712c9SDave Chinner if (!pagino) 1186*30f712c9SDave Chinner pagino = be32_to_cpu(agi->agi_newino); 1187*30f712c9SDave Chinner 1188*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); 1189*30f712c9SDave Chinner 1190*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 1191*30f712c9SDave Chinner if (error) 1192*30f712c9SDave Chinner goto error_cur; 1193*30f712c9SDave Chinner 1194*30f712c9SDave Chinner /* 1195*30f712c9SDave Chinner * The search algorithm depends on whether we're in the same AG as the 1196*30f712c9SDave Chinner * parent. If so, find the closest available inode to the parent. If 1197*30f712c9SDave Chinner * not, consider the agi hint or find the first free inode in the AG. 1198*30f712c9SDave Chinner */ 1199*30f712c9SDave Chinner if (agno == pagno) 1200*30f712c9SDave Chinner error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); 1201*30f712c9SDave Chinner else 1202*30f712c9SDave Chinner error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); 1203*30f712c9SDave Chinner if (error) 1204*30f712c9SDave Chinner goto error_cur; 1205*30f712c9SDave Chinner 1206*30f712c9SDave Chinner offset = xfs_lowbit64(rec.ir_free); 1207*30f712c9SDave Chinner ASSERT(offset >= 0); 1208*30f712c9SDave Chinner ASSERT(offset < XFS_INODES_PER_CHUNK); 1209*30f712c9SDave Chinner ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 1210*30f712c9SDave Chinner XFS_INODES_PER_CHUNK) == 0); 1211*30f712c9SDave Chinner ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); 1212*30f712c9SDave Chinner 1213*30f712c9SDave Chinner /* 1214*30f712c9SDave Chinner * Modify or remove the finobt record. 1215*30f712c9SDave Chinner */ 1216*30f712c9SDave Chinner rec.ir_free &= ~XFS_INOBT_MASK(offset); 1217*30f712c9SDave Chinner rec.ir_freecount--; 1218*30f712c9SDave Chinner if (rec.ir_freecount) 1219*30f712c9SDave Chinner error = xfs_inobt_update(cur, &rec); 1220*30f712c9SDave Chinner else 1221*30f712c9SDave Chinner error = xfs_btree_delete(cur, &i); 1222*30f712c9SDave Chinner if (error) 1223*30f712c9SDave Chinner goto error_cur; 1224*30f712c9SDave Chinner 1225*30f712c9SDave Chinner /* 1226*30f712c9SDave Chinner * The finobt has now been updated appropriately. We haven't updated the 1227*30f712c9SDave Chinner * agi and superblock yet, so we can create an inobt cursor and validate 1228*30f712c9SDave Chinner * the original freecount. If all is well, make the equivalent update to 1229*30f712c9SDave Chinner * the inobt using the finobt record and offset information. 1230*30f712c9SDave Chinner */ 1231*30f712c9SDave Chinner icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); 1232*30f712c9SDave Chinner 1233*30f712c9SDave Chinner error = xfs_check_agi_freecount(icur, agi); 1234*30f712c9SDave Chinner if (error) 1235*30f712c9SDave Chinner goto error_icur; 1236*30f712c9SDave Chinner 1237*30f712c9SDave Chinner error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); 1238*30f712c9SDave Chinner if (error) 1239*30f712c9SDave Chinner goto error_icur; 1240*30f712c9SDave Chinner 1241*30f712c9SDave Chinner /* 1242*30f712c9SDave Chinner * Both trees have now been updated. We must update the perag and 1243*30f712c9SDave Chinner * superblock before we can check the freecount for each btree. 1244*30f712c9SDave Chinner */ 1245*30f712c9SDave Chinner be32_add_cpu(&agi->agi_freecount, -1); 1246*30f712c9SDave Chinner xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1247*30f712c9SDave Chinner pag->pagi_freecount--; 1248*30f712c9SDave Chinner 1249*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); 1250*30f712c9SDave Chinner 1251*30f712c9SDave Chinner error = xfs_check_agi_freecount(icur, agi); 1252*30f712c9SDave Chinner if (error) 1253*30f712c9SDave Chinner goto error_icur; 1254*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 1255*30f712c9SDave Chinner if (error) 1256*30f712c9SDave Chinner goto error_icur; 1257*30f712c9SDave Chinner 1258*30f712c9SDave Chinner xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); 1259*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1260*30f712c9SDave Chinner xfs_perag_put(pag); 1261*30f712c9SDave Chinner *inop = ino; 1262*30f712c9SDave Chinner return 0; 1263*30f712c9SDave Chinner 1264*30f712c9SDave Chinner error_icur: 1265*30f712c9SDave Chinner xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); 1266*30f712c9SDave Chinner error_cur: 1267*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1268*30f712c9SDave Chinner xfs_perag_put(pag); 1269*30f712c9SDave Chinner return error; 1270*30f712c9SDave Chinner } 1271*30f712c9SDave Chinner 1272*30f712c9SDave Chinner /* 1273*30f712c9SDave Chinner * Allocate an inode on disk. 1274*30f712c9SDave Chinner * 1275*30f712c9SDave Chinner * Mode is used to tell whether the new inode will need space, and whether it 1276*30f712c9SDave Chinner * is a directory. 1277*30f712c9SDave Chinner * 1278*30f712c9SDave Chinner * This function is designed to be called twice if it has to do an allocation 1279*30f712c9SDave Chinner * to make more free inodes. On the first call, *IO_agbp should be set to NULL. 1280*30f712c9SDave Chinner * If an inode is available without having to performn an allocation, an inode 1281*30f712c9SDave Chinner * number is returned. In this case, *IO_agbp is set to NULL. If an allocation 1282*30f712c9SDave Chinner * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. 1283*30f712c9SDave Chinner * The caller should then commit the current transaction, allocate a 1284*30f712c9SDave Chinner * new transaction, and call xfs_dialloc() again, passing in the previous value 1285*30f712c9SDave Chinner * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI 1286*30f712c9SDave Chinner * buffer is locked across the two calls, the second call is guaranteed to have 1287*30f712c9SDave Chinner * a free inode available. 1288*30f712c9SDave Chinner * 1289*30f712c9SDave Chinner * Once we successfully pick an inode its number is returned and the on-disk 1290*30f712c9SDave Chinner * data structures are updated. The inode itself is not read in, since doing so 1291*30f712c9SDave Chinner * would break ordering constraints with xfs_reclaim. 1292*30f712c9SDave Chinner */ 1293*30f712c9SDave Chinner int 1294*30f712c9SDave Chinner xfs_dialloc( 1295*30f712c9SDave Chinner struct xfs_trans *tp, 1296*30f712c9SDave Chinner xfs_ino_t parent, 1297*30f712c9SDave Chinner umode_t mode, 1298*30f712c9SDave Chinner int okalloc, 1299*30f712c9SDave Chinner struct xfs_buf **IO_agbp, 1300*30f712c9SDave Chinner xfs_ino_t *inop) 1301*30f712c9SDave Chinner { 1302*30f712c9SDave Chinner struct xfs_mount *mp = tp->t_mountp; 1303*30f712c9SDave Chinner struct xfs_buf *agbp; 1304*30f712c9SDave Chinner xfs_agnumber_t agno; 1305*30f712c9SDave Chinner int error; 1306*30f712c9SDave Chinner int ialloced; 1307*30f712c9SDave Chinner int noroom = 0; 1308*30f712c9SDave Chinner xfs_agnumber_t start_agno; 1309*30f712c9SDave Chinner struct xfs_perag *pag; 1310*30f712c9SDave Chinner 1311*30f712c9SDave Chinner if (*IO_agbp) { 1312*30f712c9SDave Chinner /* 1313*30f712c9SDave Chinner * If the caller passes in a pointer to the AGI buffer, 1314*30f712c9SDave Chinner * continue where we left off before. In this case, we 1315*30f712c9SDave Chinner * know that the allocation group has free inodes. 1316*30f712c9SDave Chinner */ 1317*30f712c9SDave Chinner agbp = *IO_agbp; 1318*30f712c9SDave Chinner goto out_alloc; 1319*30f712c9SDave Chinner } 1320*30f712c9SDave Chinner 1321*30f712c9SDave Chinner /* 1322*30f712c9SDave Chinner * We do not have an agbp, so select an initial allocation 1323*30f712c9SDave Chinner * group for inode allocation. 1324*30f712c9SDave Chinner */ 1325*30f712c9SDave Chinner start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 1326*30f712c9SDave Chinner if (start_agno == NULLAGNUMBER) { 1327*30f712c9SDave Chinner *inop = NULLFSINO; 1328*30f712c9SDave Chinner return 0; 1329*30f712c9SDave Chinner } 1330*30f712c9SDave Chinner 1331*30f712c9SDave Chinner /* 1332*30f712c9SDave Chinner * If we have already hit the ceiling of inode blocks then clear 1333*30f712c9SDave Chinner * okalloc so we scan all available agi structures for a free 1334*30f712c9SDave Chinner * inode. 1335*30f712c9SDave Chinner */ 1336*30f712c9SDave Chinner if (mp->m_maxicount && 1337*30f712c9SDave Chinner mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { 1338*30f712c9SDave Chinner noroom = 1; 1339*30f712c9SDave Chinner okalloc = 0; 1340*30f712c9SDave Chinner } 1341*30f712c9SDave Chinner 1342*30f712c9SDave Chinner /* 1343*30f712c9SDave Chinner * Loop until we find an allocation group that either has free inodes 1344*30f712c9SDave Chinner * or in which we can allocate some inodes. Iterate through the 1345*30f712c9SDave Chinner * allocation groups upward, wrapping at the end. 1346*30f712c9SDave Chinner */ 1347*30f712c9SDave Chinner agno = start_agno; 1348*30f712c9SDave Chinner for (;;) { 1349*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 1350*30f712c9SDave Chinner if (!pag->pagi_inodeok) { 1351*30f712c9SDave Chinner xfs_ialloc_next_ag(mp); 1352*30f712c9SDave Chinner goto nextag; 1353*30f712c9SDave Chinner } 1354*30f712c9SDave Chinner 1355*30f712c9SDave Chinner if (!pag->pagi_init) { 1356*30f712c9SDave Chinner error = xfs_ialloc_pagi_init(mp, tp, agno); 1357*30f712c9SDave Chinner if (error) 1358*30f712c9SDave Chinner goto out_error; 1359*30f712c9SDave Chinner } 1360*30f712c9SDave Chinner 1361*30f712c9SDave Chinner /* 1362*30f712c9SDave Chinner * Do a first racy fast path check if this AG is usable. 1363*30f712c9SDave Chinner */ 1364*30f712c9SDave Chinner if (!pag->pagi_freecount && !okalloc) 1365*30f712c9SDave Chinner goto nextag; 1366*30f712c9SDave Chinner 1367*30f712c9SDave Chinner /* 1368*30f712c9SDave Chinner * Then read in the AGI buffer and recheck with the AGI buffer 1369*30f712c9SDave Chinner * lock held. 1370*30f712c9SDave Chinner */ 1371*30f712c9SDave Chinner error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1372*30f712c9SDave Chinner if (error) 1373*30f712c9SDave Chinner goto out_error; 1374*30f712c9SDave Chinner 1375*30f712c9SDave Chinner if (pag->pagi_freecount) { 1376*30f712c9SDave Chinner xfs_perag_put(pag); 1377*30f712c9SDave Chinner goto out_alloc; 1378*30f712c9SDave Chinner } 1379*30f712c9SDave Chinner 1380*30f712c9SDave Chinner if (!okalloc) 1381*30f712c9SDave Chinner goto nextag_relse_buffer; 1382*30f712c9SDave Chinner 1383*30f712c9SDave Chinner 1384*30f712c9SDave Chinner error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); 1385*30f712c9SDave Chinner if (error) { 1386*30f712c9SDave Chinner xfs_trans_brelse(tp, agbp); 1387*30f712c9SDave Chinner 1388*30f712c9SDave Chinner if (error != ENOSPC) 1389*30f712c9SDave Chinner goto out_error; 1390*30f712c9SDave Chinner 1391*30f712c9SDave Chinner xfs_perag_put(pag); 1392*30f712c9SDave Chinner *inop = NULLFSINO; 1393*30f712c9SDave Chinner return 0; 1394*30f712c9SDave Chinner } 1395*30f712c9SDave Chinner 1396*30f712c9SDave Chinner if (ialloced) { 1397*30f712c9SDave Chinner /* 1398*30f712c9SDave Chinner * We successfully allocated some inodes, return 1399*30f712c9SDave Chinner * the current context to the caller so that it 1400*30f712c9SDave Chinner * can commit the current transaction and call 1401*30f712c9SDave Chinner * us again where we left off. 1402*30f712c9SDave Chinner */ 1403*30f712c9SDave Chinner ASSERT(pag->pagi_freecount > 0); 1404*30f712c9SDave Chinner xfs_perag_put(pag); 1405*30f712c9SDave Chinner 1406*30f712c9SDave Chinner *IO_agbp = agbp; 1407*30f712c9SDave Chinner *inop = NULLFSINO; 1408*30f712c9SDave Chinner return 0; 1409*30f712c9SDave Chinner } 1410*30f712c9SDave Chinner 1411*30f712c9SDave Chinner nextag_relse_buffer: 1412*30f712c9SDave Chinner xfs_trans_brelse(tp, agbp); 1413*30f712c9SDave Chinner nextag: 1414*30f712c9SDave Chinner xfs_perag_put(pag); 1415*30f712c9SDave Chinner if (++agno == mp->m_sb.sb_agcount) 1416*30f712c9SDave Chinner agno = 0; 1417*30f712c9SDave Chinner if (agno == start_agno) { 1418*30f712c9SDave Chinner *inop = NULLFSINO; 1419*30f712c9SDave Chinner return noroom ? ENOSPC : 0; 1420*30f712c9SDave Chinner } 1421*30f712c9SDave Chinner } 1422*30f712c9SDave Chinner 1423*30f712c9SDave Chinner out_alloc: 1424*30f712c9SDave Chinner *IO_agbp = NULL; 1425*30f712c9SDave Chinner return xfs_dialloc_ag(tp, agbp, parent, inop); 1426*30f712c9SDave Chinner out_error: 1427*30f712c9SDave Chinner xfs_perag_put(pag); 1428*30f712c9SDave Chinner return error; 1429*30f712c9SDave Chinner } 1430*30f712c9SDave Chinner 1431*30f712c9SDave Chinner STATIC int 1432*30f712c9SDave Chinner xfs_difree_inobt( 1433*30f712c9SDave Chinner struct xfs_mount *mp, 1434*30f712c9SDave Chinner struct xfs_trans *tp, 1435*30f712c9SDave Chinner struct xfs_buf *agbp, 1436*30f712c9SDave Chinner xfs_agino_t agino, 1437*30f712c9SDave Chinner struct xfs_bmap_free *flist, 1438*30f712c9SDave Chinner int *deleted, 1439*30f712c9SDave Chinner xfs_ino_t *first_ino, 1440*30f712c9SDave Chinner struct xfs_inobt_rec_incore *orec) 1441*30f712c9SDave Chinner { 1442*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 1443*30f712c9SDave Chinner xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 1444*30f712c9SDave Chinner struct xfs_perag *pag; 1445*30f712c9SDave Chinner struct xfs_btree_cur *cur; 1446*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec; 1447*30f712c9SDave Chinner int ilen; 1448*30f712c9SDave Chinner int error; 1449*30f712c9SDave Chinner int i; 1450*30f712c9SDave Chinner int off; 1451*30f712c9SDave Chinner 1452*30f712c9SDave Chinner ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1453*30f712c9SDave Chinner ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); 1454*30f712c9SDave Chinner 1455*30f712c9SDave Chinner /* 1456*30f712c9SDave Chinner * Initialize the cursor. 1457*30f712c9SDave Chinner */ 1458*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); 1459*30f712c9SDave Chinner 1460*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 1461*30f712c9SDave Chinner if (error) 1462*30f712c9SDave Chinner goto error0; 1463*30f712c9SDave Chinner 1464*30f712c9SDave Chinner /* 1465*30f712c9SDave Chinner * Look for the entry describing this inode. 1466*30f712c9SDave Chinner */ 1467*30f712c9SDave Chinner if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1468*30f712c9SDave Chinner xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", 1469*30f712c9SDave Chinner __func__, error); 1470*30f712c9SDave Chinner goto error0; 1471*30f712c9SDave Chinner } 1472*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1473*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 1474*30f712c9SDave Chinner if (error) { 1475*30f712c9SDave Chinner xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1476*30f712c9SDave Chinner __func__, error); 1477*30f712c9SDave Chinner goto error0; 1478*30f712c9SDave Chinner } 1479*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1480*30f712c9SDave Chinner /* 1481*30f712c9SDave Chinner * Get the offset in the inode chunk. 1482*30f712c9SDave Chinner */ 1483*30f712c9SDave Chinner off = agino - rec.ir_startino; 1484*30f712c9SDave Chinner ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); 1485*30f712c9SDave Chinner ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); 1486*30f712c9SDave Chinner /* 1487*30f712c9SDave Chinner * Mark the inode free & increment the count. 1488*30f712c9SDave Chinner */ 1489*30f712c9SDave Chinner rec.ir_free |= XFS_INOBT_MASK(off); 1490*30f712c9SDave Chinner rec.ir_freecount++; 1491*30f712c9SDave Chinner 1492*30f712c9SDave Chinner /* 1493*30f712c9SDave Chinner * When an inode cluster is free, it becomes eligible for removal 1494*30f712c9SDave Chinner */ 1495*30f712c9SDave Chinner if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1496*30f712c9SDave Chinner (rec.ir_freecount == mp->m_ialloc_inos)) { 1497*30f712c9SDave Chinner 1498*30f712c9SDave Chinner *deleted = 1; 1499*30f712c9SDave Chinner *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1500*30f712c9SDave Chinner 1501*30f712c9SDave Chinner /* 1502*30f712c9SDave Chinner * Remove the inode cluster from the AGI B+Tree, adjust the 1503*30f712c9SDave Chinner * AGI and Superblock inode counts, and mark the disk space 1504*30f712c9SDave Chinner * to be freed when the transaction is committed. 1505*30f712c9SDave Chinner */ 1506*30f712c9SDave Chinner ilen = mp->m_ialloc_inos; 1507*30f712c9SDave Chinner be32_add_cpu(&agi->agi_count, -ilen); 1508*30f712c9SDave Chinner be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1509*30f712c9SDave Chinner xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1510*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 1511*30f712c9SDave Chinner pag->pagi_freecount -= ilen - 1; 1512*30f712c9SDave Chinner xfs_perag_put(pag); 1513*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1514*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1515*30f712c9SDave Chinner 1516*30f712c9SDave Chinner if ((error = xfs_btree_delete(cur, &i))) { 1517*30f712c9SDave Chinner xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", 1518*30f712c9SDave Chinner __func__, error); 1519*30f712c9SDave Chinner goto error0; 1520*30f712c9SDave Chinner } 1521*30f712c9SDave Chinner 1522*30f712c9SDave Chinner xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, 1523*30f712c9SDave Chinner XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), 1524*30f712c9SDave Chinner mp->m_ialloc_blks, flist, mp); 1525*30f712c9SDave Chinner } else { 1526*30f712c9SDave Chinner *deleted = 0; 1527*30f712c9SDave Chinner 1528*30f712c9SDave Chinner error = xfs_inobt_update(cur, &rec); 1529*30f712c9SDave Chinner if (error) { 1530*30f712c9SDave Chinner xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", 1531*30f712c9SDave Chinner __func__, error); 1532*30f712c9SDave Chinner goto error0; 1533*30f712c9SDave Chinner } 1534*30f712c9SDave Chinner 1535*30f712c9SDave Chinner /* 1536*30f712c9SDave Chinner * Change the inode free counts and log the ag/sb changes. 1537*30f712c9SDave Chinner */ 1538*30f712c9SDave Chinner be32_add_cpu(&agi->agi_freecount, 1); 1539*30f712c9SDave Chinner xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1540*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 1541*30f712c9SDave Chinner pag->pagi_freecount++; 1542*30f712c9SDave Chinner xfs_perag_put(pag); 1543*30f712c9SDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1544*30f712c9SDave Chinner } 1545*30f712c9SDave Chinner 1546*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 1547*30f712c9SDave Chinner if (error) 1548*30f712c9SDave Chinner goto error0; 1549*30f712c9SDave Chinner 1550*30f712c9SDave Chinner *orec = rec; 1551*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1552*30f712c9SDave Chinner return 0; 1553*30f712c9SDave Chinner 1554*30f712c9SDave Chinner error0: 1555*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1556*30f712c9SDave Chinner return error; 1557*30f712c9SDave Chinner } 1558*30f712c9SDave Chinner 1559*30f712c9SDave Chinner /* 1560*30f712c9SDave Chinner * Free an inode in the free inode btree. 1561*30f712c9SDave Chinner */ 1562*30f712c9SDave Chinner STATIC int 1563*30f712c9SDave Chinner xfs_difree_finobt( 1564*30f712c9SDave Chinner struct xfs_mount *mp, 1565*30f712c9SDave Chinner struct xfs_trans *tp, 1566*30f712c9SDave Chinner struct xfs_buf *agbp, 1567*30f712c9SDave Chinner xfs_agino_t agino, 1568*30f712c9SDave Chinner struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ 1569*30f712c9SDave Chinner { 1570*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 1571*30f712c9SDave Chinner xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); 1572*30f712c9SDave Chinner struct xfs_btree_cur *cur; 1573*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec; 1574*30f712c9SDave Chinner int offset = agino - ibtrec->ir_startino; 1575*30f712c9SDave Chinner int error; 1576*30f712c9SDave Chinner int i; 1577*30f712c9SDave Chinner 1578*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); 1579*30f712c9SDave Chinner 1580*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); 1581*30f712c9SDave Chinner if (error) 1582*30f712c9SDave Chinner goto error; 1583*30f712c9SDave Chinner if (i == 0) { 1584*30f712c9SDave Chinner /* 1585*30f712c9SDave Chinner * If the record does not exist in the finobt, we must have just 1586*30f712c9SDave Chinner * freed an inode in a previously fully allocated chunk. If not, 1587*30f712c9SDave Chinner * something is out of sync. 1588*30f712c9SDave Chinner */ 1589*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); 1590*30f712c9SDave Chinner 1591*30f712c9SDave Chinner error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, 1592*30f712c9SDave Chinner ibtrec->ir_free, &i); 1593*30f712c9SDave Chinner if (error) 1594*30f712c9SDave Chinner goto error; 1595*30f712c9SDave Chinner ASSERT(i == 1); 1596*30f712c9SDave Chinner 1597*30f712c9SDave Chinner goto out; 1598*30f712c9SDave Chinner } 1599*30f712c9SDave Chinner 1600*30f712c9SDave Chinner /* 1601*30f712c9SDave Chinner * Read and update the existing record. We could just copy the ibtrec 1602*30f712c9SDave Chinner * across here, but that would defeat the purpose of having redundant 1603*30f712c9SDave Chinner * metadata. By making the modifications independently, we can catch 1604*30f712c9SDave Chinner * corruptions that we wouldn't see if we just copied from one record 1605*30f712c9SDave Chinner * to another. 1606*30f712c9SDave Chinner */ 1607*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 1608*30f712c9SDave Chinner if (error) 1609*30f712c9SDave Chinner goto error; 1610*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO(i == 1, error); 1611*30f712c9SDave Chinner 1612*30f712c9SDave Chinner rec.ir_free |= XFS_INOBT_MASK(offset); 1613*30f712c9SDave Chinner rec.ir_freecount++; 1614*30f712c9SDave Chinner 1615*30f712c9SDave Chinner XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && 1616*30f712c9SDave Chinner (rec.ir_freecount == ibtrec->ir_freecount), 1617*30f712c9SDave Chinner error); 1618*30f712c9SDave Chinner 1619*30f712c9SDave Chinner /* 1620*30f712c9SDave Chinner * The content of inobt records should always match between the inobt 1621*30f712c9SDave Chinner * and finobt. The lifecycle of records in the finobt is different from 1622*30f712c9SDave Chinner * the inobt in that the finobt only tracks records with at least one 1623*30f712c9SDave Chinner * free inode. Hence, if all of the inodes are free and we aren't 1624*30f712c9SDave Chinner * keeping inode chunks permanently on disk, remove the record. 1625*30f712c9SDave Chinner * Otherwise, update the record with the new information. 1626*30f712c9SDave Chinner */ 1627*30f712c9SDave Chinner if (rec.ir_freecount == mp->m_ialloc_inos && 1628*30f712c9SDave Chinner !(mp->m_flags & XFS_MOUNT_IKEEP)) { 1629*30f712c9SDave Chinner error = xfs_btree_delete(cur, &i); 1630*30f712c9SDave Chinner if (error) 1631*30f712c9SDave Chinner goto error; 1632*30f712c9SDave Chinner ASSERT(i == 1); 1633*30f712c9SDave Chinner } else { 1634*30f712c9SDave Chinner error = xfs_inobt_update(cur, &rec); 1635*30f712c9SDave Chinner if (error) 1636*30f712c9SDave Chinner goto error; 1637*30f712c9SDave Chinner } 1638*30f712c9SDave Chinner 1639*30f712c9SDave Chinner out: 1640*30f712c9SDave Chinner error = xfs_check_agi_freecount(cur, agi); 1641*30f712c9SDave Chinner if (error) 1642*30f712c9SDave Chinner goto error; 1643*30f712c9SDave Chinner 1644*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1645*30f712c9SDave Chinner return 0; 1646*30f712c9SDave Chinner 1647*30f712c9SDave Chinner error: 1648*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1649*30f712c9SDave Chinner return error; 1650*30f712c9SDave Chinner } 1651*30f712c9SDave Chinner 1652*30f712c9SDave Chinner /* 1653*30f712c9SDave Chinner * Free disk inode. Carefully avoids touching the incore inode, all 1654*30f712c9SDave Chinner * manipulations incore are the caller's responsibility. 1655*30f712c9SDave Chinner * The on-disk inode is not changed by this operation, only the 1656*30f712c9SDave Chinner * btree (free inode mask) is changed. 1657*30f712c9SDave Chinner */ 1658*30f712c9SDave Chinner int 1659*30f712c9SDave Chinner xfs_difree( 1660*30f712c9SDave Chinner struct xfs_trans *tp, /* transaction pointer */ 1661*30f712c9SDave Chinner xfs_ino_t inode, /* inode to be freed */ 1662*30f712c9SDave Chinner struct xfs_bmap_free *flist, /* extents to free */ 1663*30f712c9SDave Chinner int *deleted,/* set if inode cluster was deleted */ 1664*30f712c9SDave Chinner xfs_ino_t *first_ino)/* first inode in deleted cluster */ 1665*30f712c9SDave Chinner { 1666*30f712c9SDave Chinner /* REFERENCED */ 1667*30f712c9SDave Chinner xfs_agblock_t agbno; /* block number containing inode */ 1668*30f712c9SDave Chinner struct xfs_buf *agbp; /* buffer for allocation group header */ 1669*30f712c9SDave Chinner xfs_agino_t agino; /* allocation group inode number */ 1670*30f712c9SDave Chinner xfs_agnumber_t agno; /* allocation group number */ 1671*30f712c9SDave Chinner int error; /* error return value */ 1672*30f712c9SDave Chinner struct xfs_mount *mp; /* mount structure for filesystem */ 1673*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec;/* btree record */ 1674*30f712c9SDave Chinner 1675*30f712c9SDave Chinner mp = tp->t_mountp; 1676*30f712c9SDave Chinner 1677*30f712c9SDave Chinner /* 1678*30f712c9SDave Chinner * Break up inode number into its components. 1679*30f712c9SDave Chinner */ 1680*30f712c9SDave Chinner agno = XFS_INO_TO_AGNO(mp, inode); 1681*30f712c9SDave Chinner if (agno >= mp->m_sb.sb_agcount) { 1682*30f712c9SDave Chinner xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", 1683*30f712c9SDave Chinner __func__, agno, mp->m_sb.sb_agcount); 1684*30f712c9SDave Chinner ASSERT(0); 1685*30f712c9SDave Chinner return EINVAL; 1686*30f712c9SDave Chinner } 1687*30f712c9SDave Chinner agino = XFS_INO_TO_AGINO(mp, inode); 1688*30f712c9SDave Chinner if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1689*30f712c9SDave Chinner xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", 1690*30f712c9SDave Chinner __func__, (unsigned long long)inode, 1691*30f712c9SDave Chinner (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); 1692*30f712c9SDave Chinner ASSERT(0); 1693*30f712c9SDave Chinner return EINVAL; 1694*30f712c9SDave Chinner } 1695*30f712c9SDave Chinner agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1696*30f712c9SDave Chinner if (agbno >= mp->m_sb.sb_agblocks) { 1697*30f712c9SDave Chinner xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", 1698*30f712c9SDave Chinner __func__, agbno, mp->m_sb.sb_agblocks); 1699*30f712c9SDave Chinner ASSERT(0); 1700*30f712c9SDave Chinner return EINVAL; 1701*30f712c9SDave Chinner } 1702*30f712c9SDave Chinner /* 1703*30f712c9SDave Chinner * Get the allocation group header. 1704*30f712c9SDave Chinner */ 1705*30f712c9SDave Chinner error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1706*30f712c9SDave Chinner if (error) { 1707*30f712c9SDave Chinner xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", 1708*30f712c9SDave Chinner __func__, error); 1709*30f712c9SDave Chinner return error; 1710*30f712c9SDave Chinner } 1711*30f712c9SDave Chinner 1712*30f712c9SDave Chinner /* 1713*30f712c9SDave Chinner * Fix up the inode allocation btree. 1714*30f712c9SDave Chinner */ 1715*30f712c9SDave Chinner error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, 1716*30f712c9SDave Chinner &rec); 1717*30f712c9SDave Chinner if (error) 1718*30f712c9SDave Chinner goto error0; 1719*30f712c9SDave Chinner 1720*30f712c9SDave Chinner /* 1721*30f712c9SDave Chinner * Fix up the free inode btree. 1722*30f712c9SDave Chinner */ 1723*30f712c9SDave Chinner if (xfs_sb_version_hasfinobt(&mp->m_sb)) { 1724*30f712c9SDave Chinner error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); 1725*30f712c9SDave Chinner if (error) 1726*30f712c9SDave Chinner goto error0; 1727*30f712c9SDave Chinner } 1728*30f712c9SDave Chinner 1729*30f712c9SDave Chinner return 0; 1730*30f712c9SDave Chinner 1731*30f712c9SDave Chinner error0: 1732*30f712c9SDave Chinner return error; 1733*30f712c9SDave Chinner } 1734*30f712c9SDave Chinner 1735*30f712c9SDave Chinner STATIC int 1736*30f712c9SDave Chinner xfs_imap_lookup( 1737*30f712c9SDave Chinner struct xfs_mount *mp, 1738*30f712c9SDave Chinner struct xfs_trans *tp, 1739*30f712c9SDave Chinner xfs_agnumber_t agno, 1740*30f712c9SDave Chinner xfs_agino_t agino, 1741*30f712c9SDave Chinner xfs_agblock_t agbno, 1742*30f712c9SDave Chinner xfs_agblock_t *chunk_agbno, 1743*30f712c9SDave Chinner xfs_agblock_t *offset_agbno, 1744*30f712c9SDave Chinner int flags) 1745*30f712c9SDave Chinner { 1746*30f712c9SDave Chinner struct xfs_inobt_rec_incore rec; 1747*30f712c9SDave Chinner struct xfs_btree_cur *cur; 1748*30f712c9SDave Chinner struct xfs_buf *agbp; 1749*30f712c9SDave Chinner int error; 1750*30f712c9SDave Chinner int i; 1751*30f712c9SDave Chinner 1752*30f712c9SDave Chinner error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1753*30f712c9SDave Chinner if (error) { 1754*30f712c9SDave Chinner xfs_alert(mp, 1755*30f712c9SDave Chinner "%s: xfs_ialloc_read_agi() returned error %d, agno %d", 1756*30f712c9SDave Chinner __func__, error, agno); 1757*30f712c9SDave Chinner return error; 1758*30f712c9SDave Chinner } 1759*30f712c9SDave Chinner 1760*30f712c9SDave Chinner /* 1761*30f712c9SDave Chinner * Lookup the inode record for the given agino. If the record cannot be 1762*30f712c9SDave Chinner * found, then it's an invalid inode number and we should abort. Once 1763*30f712c9SDave Chinner * we have a record, we need to ensure it contains the inode number 1764*30f712c9SDave Chinner * we are looking up. 1765*30f712c9SDave Chinner */ 1766*30f712c9SDave Chinner cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); 1767*30f712c9SDave Chinner error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); 1768*30f712c9SDave Chinner if (!error) { 1769*30f712c9SDave Chinner if (i) 1770*30f712c9SDave Chinner error = xfs_inobt_get_rec(cur, &rec, &i); 1771*30f712c9SDave Chinner if (!error && i == 0) 1772*30f712c9SDave Chinner error = EINVAL; 1773*30f712c9SDave Chinner } 1774*30f712c9SDave Chinner 1775*30f712c9SDave Chinner xfs_trans_brelse(tp, agbp); 1776*30f712c9SDave Chinner xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1777*30f712c9SDave Chinner if (error) 1778*30f712c9SDave Chinner return error; 1779*30f712c9SDave Chinner 1780*30f712c9SDave Chinner /* check that the returned record contains the required inode */ 1781*30f712c9SDave Chinner if (rec.ir_startino > agino || 1782*30f712c9SDave Chinner rec.ir_startino + mp->m_ialloc_inos <= agino) 1783*30f712c9SDave Chinner return EINVAL; 1784*30f712c9SDave Chinner 1785*30f712c9SDave Chinner /* for untrusted inodes check it is allocated first */ 1786*30f712c9SDave Chinner if ((flags & XFS_IGET_UNTRUSTED) && 1787*30f712c9SDave Chinner (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1788*30f712c9SDave Chinner return EINVAL; 1789*30f712c9SDave Chinner 1790*30f712c9SDave Chinner *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); 1791*30f712c9SDave Chinner *offset_agbno = agbno - *chunk_agbno; 1792*30f712c9SDave Chinner return 0; 1793*30f712c9SDave Chinner } 1794*30f712c9SDave Chinner 1795*30f712c9SDave Chinner /* 1796*30f712c9SDave Chinner * Return the location of the inode in imap, for mapping it into a buffer. 1797*30f712c9SDave Chinner */ 1798*30f712c9SDave Chinner int 1799*30f712c9SDave Chinner xfs_imap( 1800*30f712c9SDave Chinner xfs_mount_t *mp, /* file system mount structure */ 1801*30f712c9SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 1802*30f712c9SDave Chinner xfs_ino_t ino, /* inode to locate */ 1803*30f712c9SDave Chinner struct xfs_imap *imap, /* location map structure */ 1804*30f712c9SDave Chinner uint flags) /* flags for inode btree lookup */ 1805*30f712c9SDave Chinner { 1806*30f712c9SDave Chinner xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1807*30f712c9SDave Chinner xfs_agino_t agino; /* inode number within alloc group */ 1808*30f712c9SDave Chinner xfs_agnumber_t agno; /* allocation group number */ 1809*30f712c9SDave Chinner int blks_per_cluster; /* num blocks per inode cluster */ 1810*30f712c9SDave Chinner xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1811*30f712c9SDave Chinner xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1812*30f712c9SDave Chinner int error; /* error code */ 1813*30f712c9SDave Chinner int offset; /* index of inode in its buffer */ 1814*30f712c9SDave Chinner xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ 1815*30f712c9SDave Chinner 1816*30f712c9SDave Chinner ASSERT(ino != NULLFSINO); 1817*30f712c9SDave Chinner 1818*30f712c9SDave Chinner /* 1819*30f712c9SDave Chinner * Split up the inode number into its parts. 1820*30f712c9SDave Chinner */ 1821*30f712c9SDave Chinner agno = XFS_INO_TO_AGNO(mp, ino); 1822*30f712c9SDave Chinner agino = XFS_INO_TO_AGINO(mp, ino); 1823*30f712c9SDave Chinner agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1824*30f712c9SDave Chinner if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1825*30f712c9SDave Chinner ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1826*30f712c9SDave Chinner #ifdef DEBUG 1827*30f712c9SDave Chinner /* 1828*30f712c9SDave Chinner * Don't output diagnostic information for untrusted inodes 1829*30f712c9SDave Chinner * as they can be invalid without implying corruption. 1830*30f712c9SDave Chinner */ 1831*30f712c9SDave Chinner if (flags & XFS_IGET_UNTRUSTED) 1832*30f712c9SDave Chinner return EINVAL; 1833*30f712c9SDave Chinner if (agno >= mp->m_sb.sb_agcount) { 1834*30f712c9SDave Chinner xfs_alert(mp, 1835*30f712c9SDave Chinner "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", 1836*30f712c9SDave Chinner __func__, agno, mp->m_sb.sb_agcount); 1837*30f712c9SDave Chinner } 1838*30f712c9SDave Chinner if (agbno >= mp->m_sb.sb_agblocks) { 1839*30f712c9SDave Chinner xfs_alert(mp, 1840*30f712c9SDave Chinner "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", 1841*30f712c9SDave Chinner __func__, (unsigned long long)agbno, 1842*30f712c9SDave Chinner (unsigned long)mp->m_sb.sb_agblocks); 1843*30f712c9SDave Chinner } 1844*30f712c9SDave Chinner if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1845*30f712c9SDave Chinner xfs_alert(mp, 1846*30f712c9SDave Chinner "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", 1847*30f712c9SDave Chinner __func__, ino, 1848*30f712c9SDave Chinner XFS_AGINO_TO_INO(mp, agno, agino)); 1849*30f712c9SDave Chinner } 1850*30f712c9SDave Chinner xfs_stack_trace(); 1851*30f712c9SDave Chinner #endif /* DEBUG */ 1852*30f712c9SDave Chinner return EINVAL; 1853*30f712c9SDave Chinner } 1854*30f712c9SDave Chinner 1855*30f712c9SDave Chinner blks_per_cluster = xfs_icluster_size_fsb(mp); 1856*30f712c9SDave Chinner 1857*30f712c9SDave Chinner /* 1858*30f712c9SDave Chinner * For bulkstat and handle lookups, we have an untrusted inode number 1859*30f712c9SDave Chinner * that we have to verify is valid. We cannot do this just by reading 1860*30f712c9SDave Chinner * the inode buffer as it may have been unlinked and removed leaving 1861*30f712c9SDave Chinner * inodes in stale state on disk. Hence we have to do a btree lookup 1862*30f712c9SDave Chinner * in all cases where an untrusted inode number is passed. 1863*30f712c9SDave Chinner */ 1864*30f712c9SDave Chinner if (flags & XFS_IGET_UNTRUSTED) { 1865*30f712c9SDave Chinner error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1866*30f712c9SDave Chinner &chunk_agbno, &offset_agbno, flags); 1867*30f712c9SDave Chinner if (error) 1868*30f712c9SDave Chinner return error; 1869*30f712c9SDave Chinner goto out_map; 1870*30f712c9SDave Chinner } 1871*30f712c9SDave Chinner 1872*30f712c9SDave Chinner /* 1873*30f712c9SDave Chinner * If the inode cluster size is the same as the blocksize or 1874*30f712c9SDave Chinner * smaller we get to the buffer by simple arithmetics. 1875*30f712c9SDave Chinner */ 1876*30f712c9SDave Chinner if (blks_per_cluster == 1) { 1877*30f712c9SDave Chinner offset = XFS_INO_TO_OFFSET(mp, ino); 1878*30f712c9SDave Chinner ASSERT(offset < mp->m_sb.sb_inopblock); 1879*30f712c9SDave Chinner 1880*30f712c9SDave Chinner imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 1881*30f712c9SDave Chinner imap->im_len = XFS_FSB_TO_BB(mp, 1); 1882*30f712c9SDave Chinner imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1883*30f712c9SDave Chinner return 0; 1884*30f712c9SDave Chinner } 1885*30f712c9SDave Chinner 1886*30f712c9SDave Chinner /* 1887*30f712c9SDave Chinner * If the inode chunks are aligned then use simple maths to 1888*30f712c9SDave Chinner * find the location. Otherwise we have to do a btree 1889*30f712c9SDave Chinner * lookup to find the location. 1890*30f712c9SDave Chinner */ 1891*30f712c9SDave Chinner if (mp->m_inoalign_mask) { 1892*30f712c9SDave Chinner offset_agbno = agbno & mp->m_inoalign_mask; 1893*30f712c9SDave Chinner chunk_agbno = agbno - offset_agbno; 1894*30f712c9SDave Chinner } else { 1895*30f712c9SDave Chinner error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 1896*30f712c9SDave Chinner &chunk_agbno, &offset_agbno, flags); 1897*30f712c9SDave Chinner if (error) 1898*30f712c9SDave Chinner return error; 1899*30f712c9SDave Chinner } 1900*30f712c9SDave Chinner 1901*30f712c9SDave Chinner out_map: 1902*30f712c9SDave Chinner ASSERT(agbno >= chunk_agbno); 1903*30f712c9SDave Chinner cluster_agbno = chunk_agbno + 1904*30f712c9SDave Chinner ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1905*30f712c9SDave Chinner offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1906*30f712c9SDave Chinner XFS_INO_TO_OFFSET(mp, ino); 1907*30f712c9SDave Chinner 1908*30f712c9SDave Chinner imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 1909*30f712c9SDave Chinner imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); 1910*30f712c9SDave Chinner imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); 1911*30f712c9SDave Chinner 1912*30f712c9SDave Chinner /* 1913*30f712c9SDave Chinner * If the inode number maps to a block outside the bounds 1914*30f712c9SDave Chinner * of the file system then return NULL rather than calling 1915*30f712c9SDave Chinner * read_buf and panicing when we get an error from the 1916*30f712c9SDave Chinner * driver. 1917*30f712c9SDave Chinner */ 1918*30f712c9SDave Chinner if ((imap->im_blkno + imap->im_len) > 1919*30f712c9SDave Chinner XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1920*30f712c9SDave Chinner xfs_alert(mp, 1921*30f712c9SDave Chinner "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", 1922*30f712c9SDave Chinner __func__, (unsigned long long) imap->im_blkno, 1923*30f712c9SDave Chinner (unsigned long long) imap->im_len, 1924*30f712c9SDave Chinner XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1925*30f712c9SDave Chinner return EINVAL; 1926*30f712c9SDave Chinner } 1927*30f712c9SDave Chinner return 0; 1928*30f712c9SDave Chinner } 1929*30f712c9SDave Chinner 1930*30f712c9SDave Chinner /* 1931*30f712c9SDave Chinner * Compute and fill in value of m_in_maxlevels. 1932*30f712c9SDave Chinner */ 1933*30f712c9SDave Chinner void 1934*30f712c9SDave Chinner xfs_ialloc_compute_maxlevels( 1935*30f712c9SDave Chinner xfs_mount_t *mp) /* file system mount structure */ 1936*30f712c9SDave Chinner { 1937*30f712c9SDave Chinner int level; 1938*30f712c9SDave Chinner uint maxblocks; 1939*30f712c9SDave Chinner uint maxleafents; 1940*30f712c9SDave Chinner int minleafrecs; 1941*30f712c9SDave Chinner int minnoderecs; 1942*30f712c9SDave Chinner 1943*30f712c9SDave Chinner maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> 1944*30f712c9SDave Chinner XFS_INODES_PER_CHUNK_LOG; 1945*30f712c9SDave Chinner minleafrecs = mp->m_alloc_mnr[0]; 1946*30f712c9SDave Chinner minnoderecs = mp->m_alloc_mnr[1]; 1947*30f712c9SDave Chinner maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1948*30f712c9SDave Chinner for (level = 1; maxblocks > 1; level++) 1949*30f712c9SDave Chinner maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1950*30f712c9SDave Chinner mp->m_in_maxlevels = level; 1951*30f712c9SDave Chinner } 1952*30f712c9SDave Chinner 1953*30f712c9SDave Chinner /* 1954*30f712c9SDave Chinner * Log specified fields for the ag hdr (inode section). The growth of the agi 1955*30f712c9SDave Chinner * structure over time requires that we interpret the buffer as two logical 1956*30f712c9SDave Chinner * regions delineated by the end of the unlinked list. This is due to the size 1957*30f712c9SDave Chinner * of the hash table and its location in the middle of the agi. 1958*30f712c9SDave Chinner * 1959*30f712c9SDave Chinner * For example, a request to log a field before agi_unlinked and a field after 1960*30f712c9SDave Chinner * agi_unlinked could cause us to log the entire hash table and use an excessive 1961*30f712c9SDave Chinner * amount of log space. To avoid this behavior, log the region up through 1962*30f712c9SDave Chinner * agi_unlinked in one call and the region after agi_unlinked through the end of 1963*30f712c9SDave Chinner * the structure in another. 1964*30f712c9SDave Chinner */ 1965*30f712c9SDave Chinner void 1966*30f712c9SDave Chinner xfs_ialloc_log_agi( 1967*30f712c9SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 1968*30f712c9SDave Chinner xfs_buf_t *bp, /* allocation group header buffer */ 1969*30f712c9SDave Chinner int fields) /* bitmask of fields to log */ 1970*30f712c9SDave Chinner { 1971*30f712c9SDave Chinner int first; /* first byte number */ 1972*30f712c9SDave Chinner int last; /* last byte number */ 1973*30f712c9SDave Chinner static const short offsets[] = { /* field starting offsets */ 1974*30f712c9SDave Chinner /* keep in sync with bit definitions */ 1975*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_magicnum), 1976*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_versionnum), 1977*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_seqno), 1978*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_length), 1979*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_count), 1980*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_root), 1981*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_level), 1982*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_freecount), 1983*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_newino), 1984*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_dirino), 1985*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_unlinked), 1986*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_free_root), 1987*30f712c9SDave Chinner offsetof(xfs_agi_t, agi_free_level), 1988*30f712c9SDave Chinner sizeof(xfs_agi_t) 1989*30f712c9SDave Chinner }; 1990*30f712c9SDave Chinner #ifdef DEBUG 1991*30f712c9SDave Chinner xfs_agi_t *agi; /* allocation group header */ 1992*30f712c9SDave Chinner 1993*30f712c9SDave Chinner agi = XFS_BUF_TO_AGI(bp); 1994*30f712c9SDave Chinner ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); 1995*30f712c9SDave Chinner #endif 1996*30f712c9SDave Chinner 1997*30f712c9SDave Chinner xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); 1998*30f712c9SDave Chinner 1999*30f712c9SDave Chinner /* 2000*30f712c9SDave Chinner * Compute byte offsets for the first and last fields in the first 2001*30f712c9SDave Chinner * region and log the agi buffer. This only logs up through 2002*30f712c9SDave Chinner * agi_unlinked. 2003*30f712c9SDave Chinner */ 2004*30f712c9SDave Chinner if (fields & XFS_AGI_ALL_BITS_R1) { 2005*30f712c9SDave Chinner xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, 2006*30f712c9SDave Chinner &first, &last); 2007*30f712c9SDave Chinner xfs_trans_log_buf(tp, bp, first, last); 2008*30f712c9SDave Chinner } 2009*30f712c9SDave Chinner 2010*30f712c9SDave Chinner /* 2011*30f712c9SDave Chinner * Mask off the bits in the first region and calculate the first and 2012*30f712c9SDave Chinner * last field offsets for any bits in the second region. 2013*30f712c9SDave Chinner */ 2014*30f712c9SDave Chinner fields &= ~XFS_AGI_ALL_BITS_R1; 2015*30f712c9SDave Chinner if (fields) { 2016*30f712c9SDave Chinner xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, 2017*30f712c9SDave Chinner &first, &last); 2018*30f712c9SDave Chinner xfs_trans_log_buf(tp, bp, first, last); 2019*30f712c9SDave Chinner } 2020*30f712c9SDave Chinner } 2021*30f712c9SDave Chinner 2022*30f712c9SDave Chinner #ifdef DEBUG 2023*30f712c9SDave Chinner STATIC void 2024*30f712c9SDave Chinner xfs_check_agi_unlinked( 2025*30f712c9SDave Chinner struct xfs_agi *agi) 2026*30f712c9SDave Chinner { 2027*30f712c9SDave Chinner int i; 2028*30f712c9SDave Chinner 2029*30f712c9SDave Chinner for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 2030*30f712c9SDave Chinner ASSERT(agi->agi_unlinked[i]); 2031*30f712c9SDave Chinner } 2032*30f712c9SDave Chinner #else 2033*30f712c9SDave Chinner #define xfs_check_agi_unlinked(agi) 2034*30f712c9SDave Chinner #endif 2035*30f712c9SDave Chinner 2036*30f712c9SDave Chinner static bool 2037*30f712c9SDave Chinner xfs_agi_verify( 2038*30f712c9SDave Chinner struct xfs_buf *bp) 2039*30f712c9SDave Chinner { 2040*30f712c9SDave Chinner struct xfs_mount *mp = bp->b_target->bt_mount; 2041*30f712c9SDave Chinner struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2042*30f712c9SDave Chinner 2043*30f712c9SDave Chinner if (xfs_sb_version_hascrc(&mp->m_sb) && 2044*30f712c9SDave Chinner !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid)) 2045*30f712c9SDave Chinner return false; 2046*30f712c9SDave Chinner /* 2047*30f712c9SDave Chinner * Validate the magic number of the agi block. 2048*30f712c9SDave Chinner */ 2049*30f712c9SDave Chinner if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) 2050*30f712c9SDave Chinner return false; 2051*30f712c9SDave Chinner if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2052*30f712c9SDave Chinner return false; 2053*30f712c9SDave Chinner 2054*30f712c9SDave Chinner /* 2055*30f712c9SDave Chinner * during growfs operations, the perag is not fully initialised, 2056*30f712c9SDave Chinner * so we can't use it for any useful checking. growfs ensures we can't 2057*30f712c9SDave Chinner * use it by using uncached buffers that don't have the perag attached 2058*30f712c9SDave Chinner * so we can detect and avoid this problem. 2059*30f712c9SDave Chinner */ 2060*30f712c9SDave Chinner if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 2061*30f712c9SDave Chinner return false; 2062*30f712c9SDave Chinner 2063*30f712c9SDave Chinner xfs_check_agi_unlinked(agi); 2064*30f712c9SDave Chinner return true; 2065*30f712c9SDave Chinner } 2066*30f712c9SDave Chinner 2067*30f712c9SDave Chinner static void 2068*30f712c9SDave Chinner xfs_agi_read_verify( 2069*30f712c9SDave Chinner struct xfs_buf *bp) 2070*30f712c9SDave Chinner { 2071*30f712c9SDave Chinner struct xfs_mount *mp = bp->b_target->bt_mount; 2072*30f712c9SDave Chinner 2073*30f712c9SDave Chinner if (xfs_sb_version_hascrc(&mp->m_sb) && 2074*30f712c9SDave Chinner !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) 2075*30f712c9SDave Chinner xfs_buf_ioerror(bp, EFSBADCRC); 2076*30f712c9SDave Chinner else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, 2077*30f712c9SDave Chinner XFS_ERRTAG_IALLOC_READ_AGI, 2078*30f712c9SDave Chinner XFS_RANDOM_IALLOC_READ_AGI)) 2079*30f712c9SDave Chinner xfs_buf_ioerror(bp, EFSCORRUPTED); 2080*30f712c9SDave Chinner 2081*30f712c9SDave Chinner if (bp->b_error) 2082*30f712c9SDave Chinner xfs_verifier_error(bp); 2083*30f712c9SDave Chinner } 2084*30f712c9SDave Chinner 2085*30f712c9SDave Chinner static void 2086*30f712c9SDave Chinner xfs_agi_write_verify( 2087*30f712c9SDave Chinner struct xfs_buf *bp) 2088*30f712c9SDave Chinner { 2089*30f712c9SDave Chinner struct xfs_mount *mp = bp->b_target->bt_mount; 2090*30f712c9SDave Chinner struct xfs_buf_log_item *bip = bp->b_fspriv; 2091*30f712c9SDave Chinner 2092*30f712c9SDave Chinner if (!xfs_agi_verify(bp)) { 2093*30f712c9SDave Chinner xfs_buf_ioerror(bp, EFSCORRUPTED); 2094*30f712c9SDave Chinner xfs_verifier_error(bp); 2095*30f712c9SDave Chinner return; 2096*30f712c9SDave Chinner } 2097*30f712c9SDave Chinner 2098*30f712c9SDave Chinner if (!xfs_sb_version_hascrc(&mp->m_sb)) 2099*30f712c9SDave Chinner return; 2100*30f712c9SDave Chinner 2101*30f712c9SDave Chinner if (bip) 2102*30f712c9SDave Chinner XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2103*30f712c9SDave Chinner xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); 2104*30f712c9SDave Chinner } 2105*30f712c9SDave Chinner 2106*30f712c9SDave Chinner const struct xfs_buf_ops xfs_agi_buf_ops = { 2107*30f712c9SDave Chinner .verify_read = xfs_agi_read_verify, 2108*30f712c9SDave Chinner .verify_write = xfs_agi_write_verify, 2109*30f712c9SDave Chinner }; 2110*30f712c9SDave Chinner 2111*30f712c9SDave Chinner /* 2112*30f712c9SDave Chinner * Read in the allocation group header (inode allocation section) 2113*30f712c9SDave Chinner */ 2114*30f712c9SDave Chinner int 2115*30f712c9SDave Chinner xfs_read_agi( 2116*30f712c9SDave Chinner struct xfs_mount *mp, /* file system mount structure */ 2117*30f712c9SDave Chinner struct xfs_trans *tp, /* transaction pointer */ 2118*30f712c9SDave Chinner xfs_agnumber_t agno, /* allocation group number */ 2119*30f712c9SDave Chinner struct xfs_buf **bpp) /* allocation group hdr buf */ 2120*30f712c9SDave Chinner { 2121*30f712c9SDave Chinner int error; 2122*30f712c9SDave Chinner 2123*30f712c9SDave Chinner trace_xfs_read_agi(mp, agno); 2124*30f712c9SDave Chinner 2125*30f712c9SDave Chinner ASSERT(agno != NULLAGNUMBER); 2126*30f712c9SDave Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 2127*30f712c9SDave Chinner XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 2128*30f712c9SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); 2129*30f712c9SDave Chinner if (error) 2130*30f712c9SDave Chinner return error; 2131*30f712c9SDave Chinner 2132*30f712c9SDave Chinner xfs_buf_set_ref(*bpp, XFS_AGI_REF); 2133*30f712c9SDave Chinner return 0; 2134*30f712c9SDave Chinner } 2135*30f712c9SDave Chinner 2136*30f712c9SDave Chinner int 2137*30f712c9SDave Chinner xfs_ialloc_read_agi( 2138*30f712c9SDave Chinner struct xfs_mount *mp, /* file system mount structure */ 2139*30f712c9SDave Chinner struct xfs_trans *tp, /* transaction pointer */ 2140*30f712c9SDave Chinner xfs_agnumber_t agno, /* allocation group number */ 2141*30f712c9SDave Chinner struct xfs_buf **bpp) /* allocation group hdr buf */ 2142*30f712c9SDave Chinner { 2143*30f712c9SDave Chinner struct xfs_agi *agi; /* allocation group header */ 2144*30f712c9SDave Chinner struct xfs_perag *pag; /* per allocation group data */ 2145*30f712c9SDave Chinner int error; 2146*30f712c9SDave Chinner 2147*30f712c9SDave Chinner trace_xfs_ialloc_read_agi(mp, agno); 2148*30f712c9SDave Chinner 2149*30f712c9SDave Chinner error = xfs_read_agi(mp, tp, agno, bpp); 2150*30f712c9SDave Chinner if (error) 2151*30f712c9SDave Chinner return error; 2152*30f712c9SDave Chinner 2153*30f712c9SDave Chinner agi = XFS_BUF_TO_AGI(*bpp); 2154*30f712c9SDave Chinner pag = xfs_perag_get(mp, agno); 2155*30f712c9SDave Chinner if (!pag->pagi_init) { 2156*30f712c9SDave Chinner pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 2157*30f712c9SDave Chinner pag->pagi_count = be32_to_cpu(agi->agi_count); 2158*30f712c9SDave Chinner pag->pagi_init = 1; 2159*30f712c9SDave Chinner } 2160*30f712c9SDave Chinner 2161*30f712c9SDave Chinner /* 2162*30f712c9SDave Chinner * It's possible for these to be out of sync if 2163*30f712c9SDave Chinner * we are in the middle of a forced shutdown. 2164*30f712c9SDave Chinner */ 2165*30f712c9SDave Chinner ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 2166*30f712c9SDave Chinner XFS_FORCED_SHUTDOWN(mp)); 2167*30f712c9SDave Chinner xfs_perag_put(pag); 2168*30f712c9SDave Chinner return 0; 2169*30f712c9SDave Chinner } 2170*30f712c9SDave Chinner 2171*30f712c9SDave Chinner /* 2172*30f712c9SDave Chinner * Read in the agi to initialise the per-ag data in the mount structure 2173*30f712c9SDave Chinner */ 2174*30f712c9SDave Chinner int 2175*30f712c9SDave Chinner xfs_ialloc_pagi_init( 2176*30f712c9SDave Chinner xfs_mount_t *mp, /* file system mount structure */ 2177*30f712c9SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 2178*30f712c9SDave Chinner xfs_agnumber_t agno) /* allocation group number */ 2179*30f712c9SDave Chinner { 2180*30f712c9SDave Chinner xfs_buf_t *bp = NULL; 2181*30f712c9SDave Chinner int error; 2182*30f712c9SDave Chinner 2183*30f712c9SDave Chinner error = xfs_ialloc_read_agi(mp, tp, agno, &bp); 2184*30f712c9SDave Chinner if (error) 2185*30f712c9SDave Chinner return error; 2186*30f712c9SDave Chinner if (bp) 2187*30f712c9SDave Chinner xfs_trans_brelse(tp, bp); 2188*30f712c9SDave Chinner return 0; 2189*30f712c9SDave Chinner } 2190