11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 221da177e4SLinus Torvalds #include "xfs_types.h" 231da177e4SLinus Torvalds #include "xfs_log.h" 24a844f451SNathan Scott #include "xfs_inum.h" 251da177e4SLinus Torvalds #include "xfs_trans.h" 261da177e4SLinus Torvalds #include "xfs_trans_priv.h" 271da177e4SLinus Torvalds #include "xfs_sb.h" 281da177e4SLinus Torvalds #include "xfs_ag.h" 291da177e4SLinus Torvalds #include "xfs_mount.h" 301da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 31a844f451SNathan Scott #include "xfs_alloc_btree.h" 321da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 33a844f451SNathan Scott #include "xfs_attr_sf.h" 341da177e4SLinus Torvalds #include "xfs_dinode.h" 351da177e4SLinus Torvalds #include "xfs_inode.h" 361da177e4SLinus Torvalds #include "xfs_buf_item.h" 37a844f451SNathan Scott #include "xfs_inode_item.h" 38a844f451SNathan Scott #include "xfs_btree.h" 39a844f451SNathan Scott #include "xfs_alloc.h" 40a844f451SNathan Scott #include "xfs_ialloc.h" 41a844f451SNathan Scott #include "xfs_bmap.h" 421da177e4SLinus Torvalds #include "xfs_error.h" 431da177e4SLinus Torvalds #include "xfs_utils.h" 441da177e4SLinus Torvalds #include "xfs_quota.h" 452a82b8beSDavid Chinner #include "xfs_filestream.h" 46739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h" 470b1b213fSChristoph Hellwig #include "xfs_trace.h" 481da177e4SLinus Torvalds 491da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 501da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds /* 538f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 541da177e4SLinus Torvalds * freed from a file in a single transaction. 551da177e4SLinus Torvalds */ 561da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 571da177e4SLinus Torvalds 581da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 591da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 601da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 611da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 621da177e4SLinus Torvalds 632a0ec1d9SDave Chinner /* 642a0ec1d9SDave Chinner * helper function to extract extent size hint from inode 652a0ec1d9SDave Chinner */ 662a0ec1d9SDave Chinner xfs_extlen_t 672a0ec1d9SDave Chinner xfs_get_extsz_hint( 682a0ec1d9SDave Chinner struct xfs_inode *ip) 692a0ec1d9SDave Chinner { 702a0ec1d9SDave Chinner if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) 712a0ec1d9SDave Chinner return ip->i_d.di_extsize; 722a0ec1d9SDave Chinner if (XFS_IS_REALTIME_INODE(ip)) 732a0ec1d9SDave Chinner return ip->i_mount->m_sb.sb_rextsize; 742a0ec1d9SDave Chinner return 0; 752a0ec1d9SDave Chinner } 762a0ec1d9SDave Chinner 77*fa96acadSDave Chinner /* 78*fa96acadSDave Chinner * This is a wrapper routine around the xfs_ilock() routine used to centralize 79*fa96acadSDave Chinner * some grungy code. It is used in places that wish to lock the inode solely 80*fa96acadSDave Chinner * for reading the extents. The reason these places can't just call 81*fa96acadSDave Chinner * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the 82*fa96acadSDave Chinner * extents from disk for a file in b-tree format. If the inode is in b-tree 83*fa96acadSDave Chinner * format, then we need to lock the inode exclusively until the extents are read 84*fa96acadSDave Chinner * in. Locking it exclusively all the time would limit our parallelism 85*fa96acadSDave Chinner * unnecessarily, though. What we do instead is check to see if the extents 86*fa96acadSDave Chinner * have been read in yet, and only lock the inode exclusively if they have not. 87*fa96acadSDave Chinner * 88*fa96acadSDave Chinner * The function returns a value which should be given to the corresponding 89*fa96acadSDave Chinner * xfs_iunlock_map_shared(). This value is the mode in which the lock was 90*fa96acadSDave Chinner * actually taken. 91*fa96acadSDave Chinner */ 92*fa96acadSDave Chinner uint 93*fa96acadSDave Chinner xfs_ilock_map_shared( 94*fa96acadSDave Chinner xfs_inode_t *ip) 95*fa96acadSDave Chinner { 96*fa96acadSDave Chinner uint lock_mode; 97*fa96acadSDave Chinner 98*fa96acadSDave Chinner if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && 99*fa96acadSDave Chinner ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { 100*fa96acadSDave Chinner lock_mode = XFS_ILOCK_EXCL; 101*fa96acadSDave Chinner } else { 102*fa96acadSDave Chinner lock_mode = XFS_ILOCK_SHARED; 103*fa96acadSDave Chinner } 104*fa96acadSDave Chinner 105*fa96acadSDave Chinner xfs_ilock(ip, lock_mode); 106*fa96acadSDave Chinner 107*fa96acadSDave Chinner return lock_mode; 108*fa96acadSDave Chinner } 109*fa96acadSDave Chinner 110*fa96acadSDave Chinner /* 111*fa96acadSDave Chinner * This is simply the unlock routine to go with xfs_ilock_map_shared(). 112*fa96acadSDave Chinner * All it does is call xfs_iunlock() with the given lock_mode. 113*fa96acadSDave Chinner */ 114*fa96acadSDave Chinner void 115*fa96acadSDave Chinner xfs_iunlock_map_shared( 116*fa96acadSDave Chinner xfs_inode_t *ip, 117*fa96acadSDave Chinner unsigned int lock_mode) 118*fa96acadSDave Chinner { 119*fa96acadSDave Chinner xfs_iunlock(ip, lock_mode); 120*fa96acadSDave Chinner } 121*fa96acadSDave Chinner 122*fa96acadSDave Chinner /* 123*fa96acadSDave Chinner * The xfs inode contains 2 locks: a multi-reader lock called the 124*fa96acadSDave Chinner * i_iolock and a multi-reader lock called the i_lock. This routine 125*fa96acadSDave Chinner * allows either or both of the locks to be obtained. 126*fa96acadSDave Chinner * 127*fa96acadSDave Chinner * The 2 locks should always be ordered so that the IO lock is 128*fa96acadSDave Chinner * obtained first in order to prevent deadlock. 129*fa96acadSDave Chinner * 130*fa96acadSDave Chinner * ip -- the inode being locked 131*fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks 132*fa96acadSDave Chinner * to be locked. It can be: 133*fa96acadSDave Chinner * XFS_IOLOCK_SHARED, 134*fa96acadSDave Chinner * XFS_IOLOCK_EXCL, 135*fa96acadSDave Chinner * XFS_ILOCK_SHARED, 136*fa96acadSDave Chinner * XFS_ILOCK_EXCL, 137*fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 138*fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 139*fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 140*fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 141*fa96acadSDave Chinner */ 142*fa96acadSDave Chinner void 143*fa96acadSDave Chinner xfs_ilock( 144*fa96acadSDave Chinner xfs_inode_t *ip, 145*fa96acadSDave Chinner uint lock_flags) 146*fa96acadSDave Chinner { 147*fa96acadSDave Chinner trace_xfs_ilock(ip, lock_flags, _RET_IP_); 148*fa96acadSDave Chinner 149*fa96acadSDave Chinner /* 150*fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 151*fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 152*fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 153*fa96acadSDave Chinner */ 154*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 155*fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 156*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 157*fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 158*fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 159*fa96acadSDave Chinner 160*fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 161*fa96acadSDave Chinner mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 162*fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 163*fa96acadSDave Chinner mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 164*fa96acadSDave Chinner 165*fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 166*fa96acadSDave Chinner mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 167*fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 168*fa96acadSDave Chinner mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 169*fa96acadSDave Chinner } 170*fa96acadSDave Chinner 171*fa96acadSDave Chinner /* 172*fa96acadSDave Chinner * This is just like xfs_ilock(), except that the caller 173*fa96acadSDave Chinner * is guaranteed not to sleep. It returns 1 if it gets 174*fa96acadSDave Chinner * the requested locks and 0 otherwise. If the IO lock is 175*fa96acadSDave Chinner * obtained but the inode lock cannot be, then the IO lock 176*fa96acadSDave Chinner * is dropped before returning. 177*fa96acadSDave Chinner * 178*fa96acadSDave Chinner * ip -- the inode being locked 179*fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 180*fa96acadSDave Chinner * to be locked. See the comment for xfs_ilock() for a list 181*fa96acadSDave Chinner * of valid values. 182*fa96acadSDave Chinner */ 183*fa96acadSDave Chinner int 184*fa96acadSDave Chinner xfs_ilock_nowait( 185*fa96acadSDave Chinner xfs_inode_t *ip, 186*fa96acadSDave Chinner uint lock_flags) 187*fa96acadSDave Chinner { 188*fa96acadSDave Chinner trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); 189*fa96acadSDave Chinner 190*fa96acadSDave Chinner /* 191*fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 192*fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 193*fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 194*fa96acadSDave Chinner */ 195*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 196*fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 197*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 198*fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 199*fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 200*fa96acadSDave Chinner 201*fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) { 202*fa96acadSDave Chinner if (!mrtryupdate(&ip->i_iolock)) 203*fa96acadSDave Chinner goto out; 204*fa96acadSDave Chinner } else if (lock_flags & XFS_IOLOCK_SHARED) { 205*fa96acadSDave Chinner if (!mrtryaccess(&ip->i_iolock)) 206*fa96acadSDave Chinner goto out; 207*fa96acadSDave Chinner } 208*fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) { 209*fa96acadSDave Chinner if (!mrtryupdate(&ip->i_lock)) 210*fa96acadSDave Chinner goto out_undo_iolock; 211*fa96acadSDave Chinner } else if (lock_flags & XFS_ILOCK_SHARED) { 212*fa96acadSDave Chinner if (!mrtryaccess(&ip->i_lock)) 213*fa96acadSDave Chinner goto out_undo_iolock; 214*fa96acadSDave Chinner } 215*fa96acadSDave Chinner return 1; 216*fa96acadSDave Chinner 217*fa96acadSDave Chinner out_undo_iolock: 218*fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 219*fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 220*fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 221*fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 222*fa96acadSDave Chinner out: 223*fa96acadSDave Chinner return 0; 224*fa96acadSDave Chinner } 225*fa96acadSDave Chinner 226*fa96acadSDave Chinner /* 227*fa96acadSDave Chinner * xfs_iunlock() is used to drop the inode locks acquired with 228*fa96acadSDave Chinner * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 229*fa96acadSDave Chinner * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 230*fa96acadSDave Chinner * that we know which locks to drop. 231*fa96acadSDave Chinner * 232*fa96acadSDave Chinner * ip -- the inode being unlocked 233*fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 234*fa96acadSDave Chinner * to be unlocked. See the comment for xfs_ilock() for a list 235*fa96acadSDave Chinner * of valid values for this parameter. 236*fa96acadSDave Chinner * 237*fa96acadSDave Chinner */ 238*fa96acadSDave Chinner void 239*fa96acadSDave Chinner xfs_iunlock( 240*fa96acadSDave Chinner xfs_inode_t *ip, 241*fa96acadSDave Chinner uint lock_flags) 242*fa96acadSDave Chinner { 243*fa96acadSDave Chinner /* 244*fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 245*fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 246*fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 247*fa96acadSDave Chinner */ 248*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 249*fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 250*fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 251*fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 252*fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 253*fa96acadSDave Chinner ASSERT(lock_flags != 0); 254*fa96acadSDave Chinner 255*fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 256*fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 257*fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 258*fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 259*fa96acadSDave Chinner 260*fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 261*fa96acadSDave Chinner mrunlock_excl(&ip->i_lock); 262*fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 263*fa96acadSDave Chinner mrunlock_shared(&ip->i_lock); 264*fa96acadSDave Chinner 265*fa96acadSDave Chinner trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 266*fa96acadSDave Chinner } 267*fa96acadSDave Chinner 268*fa96acadSDave Chinner /* 269*fa96acadSDave Chinner * give up write locks. the i/o lock cannot be held nested 270*fa96acadSDave Chinner * if it is being demoted. 271*fa96acadSDave Chinner */ 272*fa96acadSDave Chinner void 273*fa96acadSDave Chinner xfs_ilock_demote( 274*fa96acadSDave Chinner xfs_inode_t *ip, 275*fa96acadSDave Chinner uint lock_flags) 276*fa96acadSDave Chinner { 277*fa96acadSDave Chinner ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 278*fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 279*fa96acadSDave Chinner 280*fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 281*fa96acadSDave Chinner mrdemote(&ip->i_lock); 282*fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 283*fa96acadSDave Chinner mrdemote(&ip->i_iolock); 284*fa96acadSDave Chinner 285*fa96acadSDave Chinner trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 286*fa96acadSDave Chinner } 287*fa96acadSDave Chinner 288*fa96acadSDave Chinner #ifdef DEBUG 289*fa96acadSDave Chinner int 290*fa96acadSDave Chinner xfs_isilocked( 291*fa96acadSDave Chinner xfs_inode_t *ip, 292*fa96acadSDave Chinner uint lock_flags) 293*fa96acadSDave Chinner { 294*fa96acadSDave Chinner if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { 295*fa96acadSDave Chinner if (!(lock_flags & XFS_ILOCK_SHARED)) 296*fa96acadSDave Chinner return !!ip->i_lock.mr_writer; 297*fa96acadSDave Chinner return rwsem_is_locked(&ip->i_lock.mr_lock); 298*fa96acadSDave Chinner } 299*fa96acadSDave Chinner 300*fa96acadSDave Chinner if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 301*fa96acadSDave Chinner if (!(lock_flags & XFS_IOLOCK_SHARED)) 302*fa96acadSDave Chinner return !!ip->i_iolock.mr_writer; 303*fa96acadSDave Chinner return rwsem_is_locked(&ip->i_iolock.mr_lock); 304*fa96acadSDave Chinner } 305*fa96acadSDave Chinner 306*fa96acadSDave Chinner ASSERT(0); 307*fa96acadSDave Chinner return 0; 308*fa96acadSDave Chinner } 309*fa96acadSDave Chinner #endif 310*fa96acadSDave Chinner 311*fa96acadSDave Chinner void 312*fa96acadSDave Chinner __xfs_iflock( 313*fa96acadSDave Chinner struct xfs_inode *ip) 314*fa96acadSDave Chinner { 315*fa96acadSDave Chinner wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); 316*fa96acadSDave Chinner DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); 317*fa96acadSDave Chinner 318*fa96acadSDave Chinner do { 319*fa96acadSDave Chinner prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 320*fa96acadSDave Chinner if (xfs_isiflocked(ip)) 321*fa96acadSDave Chinner io_schedule(); 322*fa96acadSDave Chinner } while (!xfs_iflock_nowait(ip)); 323*fa96acadSDave Chinner 324*fa96acadSDave Chinner finish_wait(wq, &wait.wait); 325*fa96acadSDave Chinner } 326*fa96acadSDave Chinner 3271da177e4SLinus Torvalds #ifdef DEBUG 3281da177e4SLinus Torvalds /* 3291da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 3301da177e4SLinus Torvalds * are valid. 3311da177e4SLinus Torvalds */ 3321da177e4SLinus Torvalds STATIC void 3331da177e4SLinus Torvalds xfs_validate_extents( 3344eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, 3351da177e4SLinus Torvalds int nrecs, 3361da177e4SLinus Torvalds xfs_exntfmt_t fmt) 3371da177e4SLinus Torvalds { 3381da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 339a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t rec; 3401da177e4SLinus Torvalds int i; 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 343a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 344a6f64d4aSChristoph Hellwig rec.l0 = get_unaligned(&ep->l0); 345a6f64d4aSChristoph Hellwig rec.l1 = get_unaligned(&ep->l1); 3461da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 3471da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 3481da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds } 3511da177e4SLinus Torvalds #else /* DEBUG */ 352a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt) 3531da177e4SLinus Torvalds #endif /* DEBUG */ 3541da177e4SLinus Torvalds 3551da177e4SLinus Torvalds /* 3561da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 3571da177e4SLinus Torvalds * unlinked field of 0. 3581da177e4SLinus Torvalds */ 3591da177e4SLinus Torvalds #if defined(DEBUG) 3601da177e4SLinus Torvalds void 3611da177e4SLinus Torvalds xfs_inobp_check( 3621da177e4SLinus Torvalds xfs_mount_t *mp, 3631da177e4SLinus Torvalds xfs_buf_t *bp) 3641da177e4SLinus Torvalds { 3651da177e4SLinus Torvalds int i; 3661da177e4SLinus Torvalds int j; 3671da177e4SLinus Torvalds xfs_dinode_t *dip; 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds for (i = 0; i < j; i++) { 3721da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 3731da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 3741da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 37553487786SDave Chinner xfs_alert(mp, 37653487786SDave Chinner "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", 3771da177e4SLinus Torvalds bp); 3781da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 3791da177e4SLinus Torvalds } 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds } 3821da177e4SLinus Torvalds #endif 3831da177e4SLinus Torvalds 3841da177e4SLinus Torvalds /* 385475ee413SChristoph Hellwig * This routine is called to map an inode to the buffer containing the on-disk 386475ee413SChristoph Hellwig * version of the inode. It returns a pointer to the buffer containing the 387475ee413SChristoph Hellwig * on-disk inode in the bpp parameter, and in the dipp parameter it returns a 388475ee413SChristoph Hellwig * pointer to the on-disk inode within that buffer. 389475ee413SChristoph Hellwig * 390475ee413SChristoph Hellwig * If a non-zero error is returned, then the contents of bpp and dipp are 391475ee413SChristoph Hellwig * undefined. 3924ae29b43SDavid Chinner */ 393475ee413SChristoph Hellwig int 3944ae29b43SDavid Chinner xfs_imap_to_bp( 395475ee413SChristoph Hellwig struct xfs_mount *mp, 396475ee413SChristoph Hellwig struct xfs_trans *tp, 39792bfc6e7SChristoph Hellwig struct xfs_imap *imap, 398475ee413SChristoph Hellwig struct xfs_dinode **dipp, 399475ee413SChristoph Hellwig struct xfs_buf **bpp, 4004ae29b43SDavid Chinner uint buf_flags, 401b48d8d64SChristoph Hellwig uint iget_flags) 4024ae29b43SDavid Chinner { 403475ee413SChristoph Hellwig struct xfs_buf *bp; 4044ae29b43SDavid Chinner int error; 4054ae29b43SDavid Chinner int i; 4064ae29b43SDavid Chinner int ni; 4074ae29b43SDavid Chinner 408611c9946SDave Chinner buf_flags |= XBF_UNMAPPED; 4094ae29b43SDavid Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 410a3f74ffbSDavid Chinner (int)imap->im_len, buf_flags, &bp); 4114ae29b43SDavid Chinner if (error) { 412a3f74ffbSDavid Chinner if (error != EAGAIN) { 4130b932cccSDave Chinner xfs_warn(mp, 4140b932cccSDave Chinner "%s: xfs_trans_read_buf() returned error %d.", 4150b932cccSDave Chinner __func__, error); 416a3f74ffbSDavid Chinner } else { 4170cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 418a3f74ffbSDavid Chinner } 4194ae29b43SDavid Chinner return error; 4204ae29b43SDavid Chinner } 4214ae29b43SDavid Chinner 4224ae29b43SDavid Chinner /* 4234ae29b43SDavid Chinner * Validate the magic number and version of every inode in the buffer 4244ae29b43SDavid Chinner * (if DEBUG kernel) or the first inode in the buffer, otherwise. 4254ae29b43SDavid Chinner */ 4264ae29b43SDavid Chinner #ifdef DEBUG 4274ae29b43SDavid Chinner ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 4284ae29b43SDavid Chinner #else /* usual case */ 4294ae29b43SDavid Chinner ni = 1; 4304ae29b43SDavid Chinner #endif 4314ae29b43SDavid Chinner 4324ae29b43SDavid Chinner for (i = 0; i < ni; i++) { 4334ae29b43SDavid Chinner int di_ok; 4344ae29b43SDavid Chinner xfs_dinode_t *dip; 4354ae29b43SDavid Chinner 4364ae29b43SDavid Chinner dip = (xfs_dinode_t *)xfs_buf_offset(bp, 4374ae29b43SDavid Chinner (i << mp->m_sb.sb_inodelog)); 43869ef921bSChristoph Hellwig di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 43981591fe2SChristoph Hellwig XFS_DINODE_GOOD_VERSION(dip->di_version); 4404ae29b43SDavid Chinner if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 4414ae29b43SDavid Chinner XFS_ERRTAG_ITOBP_INOTOBP, 4424ae29b43SDavid Chinner XFS_RANDOM_ITOBP_INOTOBP))) { 4431920779eSDave Chinner if (iget_flags & XFS_IGET_UNTRUSTED) { 4444ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 4454ae29b43SDavid Chinner return XFS_ERROR(EINVAL); 4464ae29b43SDavid Chinner } 447475ee413SChristoph Hellwig XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, 448475ee413SChristoph Hellwig mp, dip); 4494ae29b43SDavid Chinner #ifdef DEBUG 4500b932cccSDave Chinner xfs_emerg(mp, 4510b932cccSDave Chinner "bad inode magic/vsn daddr %lld #%d (magic=%x)", 4524ae29b43SDavid Chinner (unsigned long long)imap->im_blkno, i, 45381591fe2SChristoph Hellwig be16_to_cpu(dip->di_magic)); 4540b932cccSDave Chinner ASSERT(0); 4554ae29b43SDavid Chinner #endif 4564ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 4574ae29b43SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 4584ae29b43SDavid Chinner } 4594ae29b43SDavid Chinner } 4604ae29b43SDavid Chinner 4614ae29b43SDavid Chinner xfs_inobp_check(mp, bp); 462475ee413SChristoph Hellwig 4634ae29b43SDavid Chinner *bpp = bp; 464475ee413SChristoph Hellwig *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); 4654ae29b43SDavid Chinner return 0; 4664ae29b43SDavid Chinner } 4674ae29b43SDavid Chinner 4684ae29b43SDavid Chinner /* 4691da177e4SLinus Torvalds * Move inode type and inode format specific information from the 4701da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 4711da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 4721da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 4731da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 4741da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 4751da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 4761da177e4SLinus Torvalds */ 4771da177e4SLinus Torvalds STATIC int 4781da177e4SLinus Torvalds xfs_iformat( 4791da177e4SLinus Torvalds xfs_inode_t *ip, 4801da177e4SLinus Torvalds xfs_dinode_t *dip) 4811da177e4SLinus Torvalds { 4821da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 4831da177e4SLinus Torvalds int size; 4848096b1ebSChristoph Hellwig int error = 0; 4851da177e4SLinus Torvalds xfs_fsize_t di_size; 4861da177e4SLinus Torvalds 48781591fe2SChristoph Hellwig if (unlikely(be32_to_cpu(dip->di_nextents) + 48881591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents) > 48981591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks))) { 49065333b4cSDave Chinner xfs_warn(ip->i_mount, 4913762ec6bSNathan Scott "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 4921da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 49381591fe2SChristoph Hellwig (int)(be32_to_cpu(dip->di_nextents) + 49481591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents)), 4951da177e4SLinus Torvalds (unsigned long long) 49681591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks)); 4971da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 4981da177e4SLinus Torvalds ip->i_mount, dip); 4991da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5001da177e4SLinus Torvalds } 5011da177e4SLinus Torvalds 50281591fe2SChristoph Hellwig if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 50365333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 5041da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 50581591fe2SChristoph Hellwig dip->di_forkoff); 5061da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 5071da177e4SLinus Torvalds ip->i_mount, dip); 5081da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5091da177e4SLinus Torvalds } 5101da177e4SLinus Torvalds 511b89d4208SChristoph Hellwig if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 512b89d4208SChristoph Hellwig !ip->i_mount->m_rtdev_targp)) { 51365333b4cSDave Chinner xfs_warn(ip->i_mount, 514b89d4208SChristoph Hellwig "corrupt dinode %Lu, has realtime flag set.", 515b89d4208SChristoph Hellwig ip->i_ino); 516b89d4208SChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 517b89d4208SChristoph Hellwig XFS_ERRLEVEL_LOW, ip->i_mount, dip); 518b89d4208SChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 519b89d4208SChristoph Hellwig } 520b89d4208SChristoph Hellwig 5211da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 5221da177e4SLinus Torvalds case S_IFIFO: 5231da177e4SLinus Torvalds case S_IFCHR: 5241da177e4SLinus Torvalds case S_IFBLK: 5251da177e4SLinus Torvalds case S_IFSOCK: 52681591fe2SChristoph Hellwig if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 5271da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 5281da177e4SLinus Torvalds ip->i_mount, dip); 5291da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5301da177e4SLinus Torvalds } 5311da177e4SLinus Torvalds ip->i_d.di_size = 0; 53281591fe2SChristoph Hellwig ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 5331da177e4SLinus Torvalds break; 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds case S_IFREG: 5361da177e4SLinus Torvalds case S_IFLNK: 5371da177e4SLinus Torvalds case S_IFDIR: 53881591fe2SChristoph Hellwig switch (dip->di_format) { 5391da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 5401da177e4SLinus Torvalds /* 5411da177e4SLinus Torvalds * no local regular files yet 5421da177e4SLinus Torvalds */ 543abbede1bSAl Viro if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { 54465333b4cSDave Chinner xfs_warn(ip->i_mount, 54565333b4cSDave Chinner "corrupt inode %Lu (local format for regular file).", 5461da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 5471da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 5481da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5491da177e4SLinus Torvalds ip->i_mount, dip); 5501da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5511da177e4SLinus Torvalds } 5521da177e4SLinus Torvalds 55381591fe2SChristoph Hellwig di_size = be64_to_cpu(dip->di_size); 5541da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 55565333b4cSDave Chinner xfs_warn(ip->i_mount, 55665333b4cSDave Chinner "corrupt inode %Lu (bad size %Ld for local inode).", 5571da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 5581da177e4SLinus Torvalds (long long) di_size); 5591da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 5601da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5611da177e4SLinus Torvalds ip->i_mount, dip); 5621da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds size = (int)di_size; 5661da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 5671da177e4SLinus Torvalds break; 5681da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 5691da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 5701da177e4SLinus Torvalds break; 5711da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 5721da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 5731da177e4SLinus Torvalds break; 5741da177e4SLinus Torvalds default: 5751da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 5761da177e4SLinus Torvalds ip->i_mount); 5771da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds break; 5801da177e4SLinus Torvalds 5811da177e4SLinus Torvalds default: 5821da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 5831da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5841da177e4SLinus Torvalds } 5851da177e4SLinus Torvalds if (error) { 5861da177e4SLinus Torvalds return error; 5871da177e4SLinus Torvalds } 5881da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 5891da177e4SLinus Torvalds return 0; 5908096b1ebSChristoph Hellwig 5911da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 5924a7edddcSDave Chinner ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); 5938096b1ebSChristoph Hellwig 59481591fe2SChristoph Hellwig switch (dip->di_aformat) { 5951da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 5961da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 5973b244aa8SNathan Scott size = be16_to_cpu(atp->hdr.totsize); 5982809f76aSChristoph Hellwig 5992809f76aSChristoph Hellwig if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 60065333b4cSDave Chinner xfs_warn(ip->i_mount, 60165333b4cSDave Chinner "corrupt inode %Lu (bad attr fork size %Ld).", 6022809f76aSChristoph Hellwig (unsigned long long) ip->i_ino, 6032809f76aSChristoph Hellwig (long long) size); 6042809f76aSChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(8)", 6052809f76aSChristoph Hellwig XFS_ERRLEVEL_LOW, 6062809f76aSChristoph Hellwig ip->i_mount, dip); 6072809f76aSChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 6082809f76aSChristoph Hellwig } 6092809f76aSChristoph Hellwig 6101da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 6111da177e4SLinus Torvalds break; 6121da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 6131da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 6141da177e4SLinus Torvalds break; 6151da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 6161da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 6171da177e4SLinus Torvalds break; 6181da177e4SLinus Torvalds default: 6191da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 6201da177e4SLinus Torvalds break; 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds if (error) { 6231da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 6241da177e4SLinus Torvalds ip->i_afp = NULL; 6251da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 6261da177e4SLinus Torvalds } 6271da177e4SLinus Torvalds return error; 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds /* 6311da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 6321da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 6331da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 6341da177e4SLinus Torvalds * and copy the data there. Either way, set 6351da177e4SLinus Torvalds * if_data to point at the data. 6361da177e4SLinus Torvalds * If we allocate a buffer for the data, make 6371da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 6381da177e4SLinus Torvalds * record the real size in i_real_bytes. 6391da177e4SLinus Torvalds */ 6401da177e4SLinus Torvalds STATIC int 6411da177e4SLinus Torvalds xfs_iformat_local( 6421da177e4SLinus Torvalds xfs_inode_t *ip, 6431da177e4SLinus Torvalds xfs_dinode_t *dip, 6441da177e4SLinus Torvalds int whichfork, 6451da177e4SLinus Torvalds int size) 6461da177e4SLinus Torvalds { 6471da177e4SLinus Torvalds xfs_ifork_t *ifp; 6481da177e4SLinus Torvalds int real_size; 6491da177e4SLinus Torvalds 6501da177e4SLinus Torvalds /* 6511da177e4SLinus Torvalds * If the size is unreasonable, then something 6521da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 6531da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 6541da177e4SLinus Torvalds */ 6551da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 65665333b4cSDave Chinner xfs_warn(ip->i_mount, 65765333b4cSDave Chinner "corrupt inode %Lu (bad size %d for local fork, size = %d).", 6581da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 6591da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 6601da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 6611da177e4SLinus Torvalds ip->i_mount, dip); 6621da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6631da177e4SLinus Torvalds } 6641da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6651da177e4SLinus Torvalds real_size = 0; 6661da177e4SLinus Torvalds if (size == 0) 6671da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 6681da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 6691da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 6701da177e4SLinus Torvalds else { 6711da177e4SLinus Torvalds real_size = roundup(size, 4); 6724a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 6731da177e4SLinus Torvalds } 6741da177e4SLinus Torvalds ifp->if_bytes = size; 6751da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 6761da177e4SLinus Torvalds if (size) 6771da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 6781da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 6791da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 6801da177e4SLinus Torvalds return 0; 6811da177e4SLinus Torvalds } 6821da177e4SLinus Torvalds 6831da177e4SLinus Torvalds /* 6841da177e4SLinus Torvalds * The file consists of a set of extents all 6851da177e4SLinus Torvalds * of which fit into the on-disk inode. 6861da177e4SLinus Torvalds * If there are few enough extents to fit into 6871da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 6881da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 6891da177e4SLinus Torvalds * them into it. Either way, set if_extents 6901da177e4SLinus Torvalds * to point at the extents. 6911da177e4SLinus Torvalds */ 6921da177e4SLinus Torvalds STATIC int 6931da177e4SLinus Torvalds xfs_iformat_extents( 6941da177e4SLinus Torvalds xfs_inode_t *ip, 6951da177e4SLinus Torvalds xfs_dinode_t *dip, 6961da177e4SLinus Torvalds int whichfork) 6971da177e4SLinus Torvalds { 698a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp; 6991da177e4SLinus Torvalds xfs_ifork_t *ifp; 7001da177e4SLinus Torvalds int nex; 7011da177e4SLinus Torvalds int size; 7021da177e4SLinus Torvalds int i; 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 7051da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 7061da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds /* 7091da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 7101da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 7111da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 7121da177e4SLinus Torvalds */ 7131da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 71465333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", 7151da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 7161da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 7171da177e4SLinus Torvalds ip->i_mount, dip); 7181da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds 7214eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 7221da177e4SLinus Torvalds if (nex == 0) 7231da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 7241da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 7251da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 7264eea22f0SMandy Kirkconnell else 7274eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nex); 7284eea22f0SMandy Kirkconnell 7291da177e4SLinus Torvalds ifp->if_bytes = size; 7301da177e4SLinus Torvalds if (size) { 7311da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 732a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 7334eea22f0SMandy Kirkconnell for (i = 0; i < nex; i++, dp++) { 734a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 735597bca63SHarvey Harrison ep->l0 = get_unaligned_be64(&dp->l0); 736597bca63SHarvey Harrison ep->l1 = get_unaligned_be64(&dp->l1); 7371da177e4SLinus Torvalds } 7383a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 7391da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 7401da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 7411da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 7424eea22f0SMandy Kirkconnell ifp, 0, nex))) { 7431da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 7441da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 7451da177e4SLinus Torvalds ip->i_mount); 7461da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7471da177e4SLinus Torvalds } 7481da177e4SLinus Torvalds } 7491da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 7501da177e4SLinus Torvalds return 0; 7511da177e4SLinus Torvalds } 7521da177e4SLinus Torvalds 7531da177e4SLinus Torvalds /* 7541da177e4SLinus Torvalds * The file has too many extents to fit into 7551da177e4SLinus Torvalds * the inode, so they are in B-tree format. 7561da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 7571da177e4SLinus Torvalds * and copy the root into it. The i_extents 7581da177e4SLinus Torvalds * field will remain NULL until all of the 7591da177e4SLinus Torvalds * extents are read in (when they are needed). 7601da177e4SLinus Torvalds */ 7611da177e4SLinus Torvalds STATIC int 7621da177e4SLinus Torvalds xfs_iformat_btree( 7631da177e4SLinus Torvalds xfs_inode_t *ip, 7641da177e4SLinus Torvalds xfs_dinode_t *dip, 7651da177e4SLinus Torvalds int whichfork) 7661da177e4SLinus Torvalds { 7671da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 7681da177e4SLinus Torvalds xfs_ifork_t *ifp; 7691da177e4SLinus Torvalds /* REFERENCED */ 7701da177e4SLinus Torvalds int nrecs; 7711da177e4SLinus Torvalds int size; 7721da177e4SLinus Torvalds 7731da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 7741da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 7751da177e4SLinus Torvalds size = XFS_BMAP_BROOT_SPACE(dfp); 77660197e8dSChristoph Hellwig nrecs = be16_to_cpu(dfp->bb_numrecs); 7771da177e4SLinus Torvalds 7781da177e4SLinus Torvalds /* 7791da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 7801da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 7811da177e4SLinus Torvalds * block has more records than can fit into the fork, 7821da177e4SLinus Torvalds * or the number of extents is greater than the number of 7831da177e4SLinus Torvalds * blocks. 7841da177e4SLinus Torvalds */ 7858096b1ebSChristoph Hellwig if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 7868096b1ebSChristoph Hellwig XFS_IFORK_MAXEXT(ip, whichfork) || 7878096b1ebSChristoph Hellwig XFS_BMDR_SPACE_CALC(nrecs) > 7888096b1ebSChristoph Hellwig XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || 7898096b1ebSChristoph Hellwig XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 79065333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 7911da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 79265333b4cSDave Chinner XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 79365333b4cSDave Chinner ip->i_mount, dip); 7941da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7951da177e4SLinus Torvalds } 7961da177e4SLinus Torvalds 7971da177e4SLinus Torvalds ifp->if_broot_bytes = size; 7984a7edddcSDave Chinner ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); 7991da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 8001da177e4SLinus Torvalds /* 8011da177e4SLinus Torvalds * Copy and convert from the on-disk structure 8021da177e4SLinus Torvalds * to the in-memory structure. 8031da177e4SLinus Torvalds */ 80460197e8dSChristoph Hellwig xfs_bmdr_to_bmbt(ip->i_mount, dfp, 80560197e8dSChristoph Hellwig XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 8061da177e4SLinus Torvalds ifp->if_broot, size); 8071da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 8081da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 8091da177e4SLinus Torvalds 8101da177e4SLinus Torvalds return 0; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 813d96f8f89SEric Sandeen STATIC void 814347d1c01SChristoph Hellwig xfs_dinode_from_disk( 815347d1c01SChristoph Hellwig xfs_icdinode_t *to, 81681591fe2SChristoph Hellwig xfs_dinode_t *from) 8171da177e4SLinus Torvalds { 818347d1c01SChristoph Hellwig to->di_magic = be16_to_cpu(from->di_magic); 819347d1c01SChristoph Hellwig to->di_mode = be16_to_cpu(from->di_mode); 820347d1c01SChristoph Hellwig to->di_version = from ->di_version; 821347d1c01SChristoph Hellwig to->di_format = from->di_format; 822347d1c01SChristoph Hellwig to->di_onlink = be16_to_cpu(from->di_onlink); 823347d1c01SChristoph Hellwig to->di_uid = be32_to_cpu(from->di_uid); 824347d1c01SChristoph Hellwig to->di_gid = be32_to_cpu(from->di_gid); 825347d1c01SChristoph Hellwig to->di_nlink = be32_to_cpu(from->di_nlink); 8266743099cSArkadiusz Mi?kiewicz to->di_projid_lo = be16_to_cpu(from->di_projid_lo); 8276743099cSArkadiusz Mi?kiewicz to->di_projid_hi = be16_to_cpu(from->di_projid_hi); 828347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 829347d1c01SChristoph Hellwig to->di_flushiter = be16_to_cpu(from->di_flushiter); 830347d1c01SChristoph Hellwig to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 831347d1c01SChristoph Hellwig to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 832347d1c01SChristoph Hellwig to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 833347d1c01SChristoph Hellwig to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 834347d1c01SChristoph Hellwig to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 835347d1c01SChristoph Hellwig to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 836347d1c01SChristoph Hellwig to->di_size = be64_to_cpu(from->di_size); 837347d1c01SChristoph Hellwig to->di_nblocks = be64_to_cpu(from->di_nblocks); 838347d1c01SChristoph Hellwig to->di_extsize = be32_to_cpu(from->di_extsize); 839347d1c01SChristoph Hellwig to->di_nextents = be32_to_cpu(from->di_nextents); 840347d1c01SChristoph Hellwig to->di_anextents = be16_to_cpu(from->di_anextents); 841347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 842347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 843347d1c01SChristoph Hellwig to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 844347d1c01SChristoph Hellwig to->di_dmstate = be16_to_cpu(from->di_dmstate); 845347d1c01SChristoph Hellwig to->di_flags = be16_to_cpu(from->di_flags); 846347d1c01SChristoph Hellwig to->di_gen = be32_to_cpu(from->di_gen); 8471da177e4SLinus Torvalds } 8481da177e4SLinus Torvalds 849347d1c01SChristoph Hellwig void 850347d1c01SChristoph Hellwig xfs_dinode_to_disk( 85181591fe2SChristoph Hellwig xfs_dinode_t *to, 852347d1c01SChristoph Hellwig xfs_icdinode_t *from) 853347d1c01SChristoph Hellwig { 854347d1c01SChristoph Hellwig to->di_magic = cpu_to_be16(from->di_magic); 855347d1c01SChristoph Hellwig to->di_mode = cpu_to_be16(from->di_mode); 856347d1c01SChristoph Hellwig to->di_version = from ->di_version; 857347d1c01SChristoph Hellwig to->di_format = from->di_format; 858347d1c01SChristoph Hellwig to->di_onlink = cpu_to_be16(from->di_onlink); 859347d1c01SChristoph Hellwig to->di_uid = cpu_to_be32(from->di_uid); 860347d1c01SChristoph Hellwig to->di_gid = cpu_to_be32(from->di_gid); 861347d1c01SChristoph Hellwig to->di_nlink = cpu_to_be32(from->di_nlink); 8626743099cSArkadiusz Mi?kiewicz to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 8636743099cSArkadiusz Mi?kiewicz to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 864347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 865347d1c01SChristoph Hellwig to->di_flushiter = cpu_to_be16(from->di_flushiter); 866347d1c01SChristoph Hellwig to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 867347d1c01SChristoph Hellwig to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 868347d1c01SChristoph Hellwig to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 869347d1c01SChristoph Hellwig to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 870347d1c01SChristoph Hellwig to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 871347d1c01SChristoph Hellwig to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 872347d1c01SChristoph Hellwig to->di_size = cpu_to_be64(from->di_size); 873347d1c01SChristoph Hellwig to->di_nblocks = cpu_to_be64(from->di_nblocks); 874347d1c01SChristoph Hellwig to->di_extsize = cpu_to_be32(from->di_extsize); 875347d1c01SChristoph Hellwig to->di_nextents = cpu_to_be32(from->di_nextents); 876347d1c01SChristoph Hellwig to->di_anextents = cpu_to_be16(from->di_anextents); 877347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 878347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 879347d1c01SChristoph Hellwig to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 880347d1c01SChristoph Hellwig to->di_dmstate = cpu_to_be16(from->di_dmstate); 881347d1c01SChristoph Hellwig to->di_flags = cpu_to_be16(from->di_flags); 882347d1c01SChristoph Hellwig to->di_gen = cpu_to_be32(from->di_gen); 8831da177e4SLinus Torvalds } 8841da177e4SLinus Torvalds 8851da177e4SLinus Torvalds STATIC uint 8861da177e4SLinus Torvalds _xfs_dic2xflags( 8871da177e4SLinus Torvalds __uint16_t di_flags) 8881da177e4SLinus Torvalds { 8891da177e4SLinus Torvalds uint flags = 0; 8901da177e4SLinus Torvalds 8911da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 8921da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 8931da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 8941da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 8951da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 8961da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 8971da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 8981da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 8991da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 9001da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 9011da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 9021da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 9031da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 9041da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 9051da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 9061da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 9071da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 9081da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 9091da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 9101da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 9111da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 912dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 913dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 914dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 915dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 916d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 917d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 9182a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 9192a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 9201da177e4SLinus Torvalds } 9211da177e4SLinus Torvalds 9221da177e4SLinus Torvalds return flags; 9231da177e4SLinus Torvalds } 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds uint 9261da177e4SLinus Torvalds xfs_ip2xflags( 9271da177e4SLinus Torvalds xfs_inode_t *ip) 9281da177e4SLinus Torvalds { 929347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 9301da177e4SLinus Torvalds 931a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 93245ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 9331da177e4SLinus Torvalds } 9341da177e4SLinus Torvalds 9351da177e4SLinus Torvalds uint 9361da177e4SLinus Torvalds xfs_dic2xflags( 93745ba598eSChristoph Hellwig xfs_dinode_t *dip) 9381da177e4SLinus Torvalds { 93981591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 94045ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 9411da177e4SLinus Torvalds } 9421da177e4SLinus Torvalds 9431da177e4SLinus Torvalds /* 94424f211baSChristoph Hellwig * Read the disk inode attributes into the in-core inode structure. 9451da177e4SLinus Torvalds */ 9461da177e4SLinus Torvalds int 9471da177e4SLinus Torvalds xfs_iread( 9481da177e4SLinus Torvalds xfs_mount_t *mp, 9491da177e4SLinus Torvalds xfs_trans_t *tp, 95024f211baSChristoph Hellwig xfs_inode_t *ip, 95124f211baSChristoph Hellwig uint iget_flags) 9521da177e4SLinus Torvalds { 9531da177e4SLinus Torvalds xfs_buf_t *bp; 9541da177e4SLinus Torvalds xfs_dinode_t *dip; 9551da177e4SLinus Torvalds int error; 9561da177e4SLinus Torvalds 9571da177e4SLinus Torvalds /* 95892bfc6e7SChristoph Hellwig * Fill in the location information in the in-core inode. 9591da177e4SLinus Torvalds */ 96024f211baSChristoph Hellwig error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 9619ed0451eSChristoph Hellwig if (error) 96224f211baSChristoph Hellwig return error; 9631da177e4SLinus Torvalds 9641da177e4SLinus Torvalds /* 96592bfc6e7SChristoph Hellwig * Get pointers to the on-disk inode and the buffer containing it. 96676d8b277SChristoph Hellwig */ 967475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); 96876d8b277SChristoph Hellwig if (error) 96924f211baSChristoph Hellwig return error; 97076d8b277SChristoph Hellwig 97176d8b277SChristoph Hellwig /* 9721da177e4SLinus Torvalds * If we got something that isn't an inode it means someone 9731da177e4SLinus Torvalds * (nfs or dmi) has a stale handle. 9741da177e4SLinus Torvalds */ 97569ef921bSChristoph Hellwig if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { 9761da177e4SLinus Torvalds #ifdef DEBUG 97753487786SDave Chinner xfs_alert(mp, 97853487786SDave Chinner "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", 97953487786SDave Chinner __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); 9801da177e4SLinus Torvalds #endif /* DEBUG */ 9819ed0451eSChristoph Hellwig error = XFS_ERROR(EINVAL); 9829ed0451eSChristoph Hellwig goto out_brelse; 9831da177e4SLinus Torvalds } 9841da177e4SLinus Torvalds 9851da177e4SLinus Torvalds /* 9861da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 9871da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 9881da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 9891da177e4SLinus Torvalds * specific information. 9901da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 9911da177e4SLinus Torvalds */ 99281591fe2SChristoph Hellwig if (dip->di_mode) { 99381591fe2SChristoph Hellwig xfs_dinode_from_disk(&ip->i_d, dip); 9941da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 9951da177e4SLinus Torvalds if (error) { 9961da177e4SLinus Torvalds #ifdef DEBUG 99753487786SDave Chinner xfs_alert(mp, "%s: xfs_iformat() returned error %d", 99853487786SDave Chinner __func__, error); 9991da177e4SLinus Torvalds #endif /* DEBUG */ 10009ed0451eSChristoph Hellwig goto out_brelse; 10011da177e4SLinus Torvalds } 10021da177e4SLinus Torvalds } else { 100381591fe2SChristoph Hellwig ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 100481591fe2SChristoph Hellwig ip->i_d.di_version = dip->di_version; 100581591fe2SChristoph Hellwig ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 100681591fe2SChristoph Hellwig ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 10071da177e4SLinus Torvalds /* 10081da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 10091da177e4SLinus Torvalds * case the inode is released without being used. 10101da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 10111da177e4SLinus Torvalds * the inode is already free and not try to mess 10121da177e4SLinus Torvalds * with the uninitialized part of it. 10131da177e4SLinus Torvalds */ 10141da177e4SLinus Torvalds ip->i_d.di_mode = 0; 10151da177e4SLinus Torvalds } 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds /* 10181da177e4SLinus Torvalds * The inode format changed when we moved the link count and 10191da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 10201da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 10211da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 10221da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 10231da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 10241da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 10251da177e4SLinus Torvalds * the new format. We don't change the version number so that we 10261da177e4SLinus Torvalds * can distinguish this from a real new format inode. 10271da177e4SLinus Torvalds */ 102851ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 10291da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 10301da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 10316743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, 0); 10321da177e4SLinus Torvalds } 10331da177e4SLinus Torvalds 10341da177e4SLinus Torvalds ip->i_delayed_blks = 0; 10351da177e4SLinus Torvalds 10361da177e4SLinus Torvalds /* 10371da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 10381da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 10391da177e4SLinus Torvalds * meta-data in-core longer. 10401da177e4SLinus Torvalds */ 1041821eb21dSDave Chinner xfs_buf_set_ref(bp, XFS_INO_REF); 10421da177e4SLinus Torvalds 10431da177e4SLinus Torvalds /* 10441da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 10451da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 1046475ee413SChristoph Hellwig * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal 10471da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 10481da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 10491da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 10501da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 10511da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 10521da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 10531da177e4SLinus Torvalds * to worry about the inode being changed just because we released 10541da177e4SLinus Torvalds * the buffer. 10551da177e4SLinus Torvalds */ 10569ed0451eSChristoph Hellwig out_brelse: 10579ed0451eSChristoph Hellwig xfs_trans_brelse(tp, bp); 10589ed0451eSChristoph Hellwig return error; 10591da177e4SLinus Torvalds } 10601da177e4SLinus Torvalds 10611da177e4SLinus Torvalds /* 10621da177e4SLinus Torvalds * Read in extents from a btree-format inode. 10631da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 10641da177e4SLinus Torvalds */ 10651da177e4SLinus Torvalds int 10661da177e4SLinus Torvalds xfs_iread_extents( 10671da177e4SLinus Torvalds xfs_trans_t *tp, 10681da177e4SLinus Torvalds xfs_inode_t *ip, 10691da177e4SLinus Torvalds int whichfork) 10701da177e4SLinus Torvalds { 10711da177e4SLinus Torvalds int error; 10721da177e4SLinus Torvalds xfs_ifork_t *ifp; 10734eea22f0SMandy Kirkconnell xfs_extnum_t nextents; 10741da177e4SLinus Torvalds 10751da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 10761da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 10771da177e4SLinus Torvalds ip->i_mount); 10781da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 10791da177e4SLinus Torvalds } 10804eea22f0SMandy Kirkconnell nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 10811da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 10824eea22f0SMandy Kirkconnell 10831da177e4SLinus Torvalds /* 10841da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 10851da177e4SLinus Torvalds */ 10864eea22f0SMandy Kirkconnell ifp->if_bytes = ifp->if_real_bytes = 0; 10871da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 10884eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nextents); 10891da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 10901da177e4SLinus Torvalds if (error) { 10914eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 10921da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 10931da177e4SLinus Torvalds return error; 10941da177e4SLinus Torvalds } 1095a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 10961da177e4SLinus Torvalds return 0; 10971da177e4SLinus Torvalds } 10981da177e4SLinus Torvalds 10991da177e4SLinus Torvalds /* 11001da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 11011da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 11021da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 11031da177e4SLinus Torvalds * set according to the contents of the given cred structure. 11041da177e4SLinus Torvalds * 11051da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 11061da177e4SLinus Torvalds * has a free inode available, call xfs_iget() 11071da177e4SLinus Torvalds * to obtain the in-core version of the allocated inode. Finally, 11081da177e4SLinus Torvalds * fill in the inode and log its initial contents. In this case, 11091da177e4SLinus Torvalds * ialloc_context would be set to NULL and call_again set to false. 11101da177e4SLinus Torvalds * 11111da177e4SLinus Torvalds * If xfs_dialloc() does not have an available inode, 11121da177e4SLinus Torvalds * it will replenish its supply by doing an allocation. Since we can 11131da177e4SLinus Torvalds * only do one allocation within a transaction without deadlocks, we 11141da177e4SLinus Torvalds * must commit the current transaction before returning the inode itself. 11151da177e4SLinus Torvalds * In this case, therefore, we will set call_again to true and return. 11161da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 11171da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 11181da177e4SLinus Torvalds * 11191da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 11201da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 11211da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 11221da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 11231da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 1124b11f94d5SDavid Chinner * 1125b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 1126b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 1127b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 1128b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 11291da177e4SLinus Torvalds */ 11301da177e4SLinus Torvalds int 11311da177e4SLinus Torvalds xfs_ialloc( 11321da177e4SLinus Torvalds xfs_trans_t *tp, 11331da177e4SLinus Torvalds xfs_inode_t *pip, 1134576b1d67SAl Viro umode_t mode, 113531b084aeSNathan Scott xfs_nlink_t nlink, 11361da177e4SLinus Torvalds xfs_dev_t rdev, 11376743099cSArkadiusz Mi?kiewicz prid_t prid, 11381da177e4SLinus Torvalds int okalloc, 11391da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 11401da177e4SLinus Torvalds xfs_inode_t **ipp) 11411da177e4SLinus Torvalds { 11421da177e4SLinus Torvalds xfs_ino_t ino; 11431da177e4SLinus Torvalds xfs_inode_t *ip; 11441da177e4SLinus Torvalds uint flags; 11451da177e4SLinus Torvalds int error; 1146dff35fd4SChristoph Hellwig timespec_t tv; 1147bf904248SDavid Chinner int filestreams = 0; 11481da177e4SLinus Torvalds 11491da177e4SLinus Torvalds /* 11501da177e4SLinus Torvalds * Call the space management code to pick 11511da177e4SLinus Torvalds * the on-disk inode to be allocated. 11521da177e4SLinus Torvalds */ 1153b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 115408358906SChristoph Hellwig ialloc_context, &ino); 1155bf904248SDavid Chinner if (error) 11561da177e4SLinus Torvalds return error; 115708358906SChristoph Hellwig if (*ialloc_context || ino == NULLFSINO) { 11581da177e4SLinus Torvalds *ipp = NULL; 11591da177e4SLinus Torvalds return 0; 11601da177e4SLinus Torvalds } 11611da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds /* 11641da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 11651da177e4SLinus Torvalds * This is because we're setting fields here we need 11661da177e4SLinus Torvalds * to prevent others from looking at until we're done. 11671da177e4SLinus Torvalds */ 1168ec3ba85fSChristoph Hellwig error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, 1169ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 1170bf904248SDavid Chinner if (error) 11711da177e4SLinus Torvalds return error; 11721da177e4SLinus Torvalds ASSERT(ip != NULL); 11731da177e4SLinus Torvalds 1174576b1d67SAl Viro ip->i_d.di_mode = mode; 11751da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 11761da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 11771da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 11789e2b2dc4SDavid Howells ip->i_d.di_uid = current_fsuid(); 11799e2b2dc4SDavid Howells ip->i_d.di_gid = current_fsgid(); 11806743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 11811da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 11821da177e4SLinus Torvalds 11831da177e4SLinus Torvalds /* 11841da177e4SLinus Torvalds * If the superblock version is up to where we support new format 11851da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 11861da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 11871da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 11881da177e4SLinus Torvalds */ 118962118709SEric Sandeen if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 119051ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 119151ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 11921da177e4SLinus Torvalds /* 11931da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 11941da177e4SLinus Torvalds * and the pad field. 11951da177e4SLinus Torvalds */ 11961da177e4SLinus Torvalds } 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds /* 11991da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 12001da177e4SLinus Torvalds */ 120151ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 12021da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 12031da177e4SLinus Torvalds 1204bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 12051da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 1206abbede1bSAl Viro if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { 12071da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 12081da177e4SLinus Torvalds } 12091da177e4SLinus Torvalds } 12101da177e4SLinus Torvalds 12111da177e4SLinus Torvalds /* 12121da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 12131da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 12141da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 12151da177e4SLinus Torvalds */ 12161da177e4SLinus Torvalds if ((irix_sgid_inherit) && 12171da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 12181da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 12191da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 12201da177e4SLinus Torvalds } 12211da177e4SLinus Torvalds 12221da177e4SLinus Torvalds ip->i_d.di_size = 0; 12231da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 12241da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 1225dff35fd4SChristoph Hellwig 1226dff35fd4SChristoph Hellwig nanotime(&tv); 1227dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1228dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1229dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 1230dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 1231dff35fd4SChristoph Hellwig 12321da177e4SLinus Torvalds /* 12331da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 12341da177e4SLinus Torvalds */ 12351da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 12361da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 12371da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 12381da177e4SLinus Torvalds ip->i_d.di_flags = 0; 12391da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 12401da177e4SLinus Torvalds switch (mode & S_IFMT) { 12411da177e4SLinus Torvalds case S_IFIFO: 12421da177e4SLinus Torvalds case S_IFCHR: 12431da177e4SLinus Torvalds case S_IFBLK: 12441da177e4SLinus Torvalds case S_IFSOCK: 12451da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 12461da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 12471da177e4SLinus Torvalds ip->i_df.if_flags = 0; 12481da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 12491da177e4SLinus Torvalds break; 12501da177e4SLinus Torvalds case S_IFREG: 1251bf904248SDavid Chinner /* 1252bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 1253bf904248SDavid Chinner * is set up properly. 1254bf904248SDavid Chinner */ 1255bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 1256bf904248SDavid Chinner filestreams = 1; 12572a82b8beSDavid Chinner /* fall through */ 12581da177e4SLinus Torvalds case S_IFDIR: 1259b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1260365ca83dSNathan Scott uint di_flags = 0; 1261365ca83dSNathan Scott 1262abbede1bSAl Viro if (S_ISDIR(mode)) { 1263365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1264365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 1265dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1266dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1267dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1268dd9f438eSNathan Scott } 1269abbede1bSAl Viro } else if (S_ISREG(mode)) { 1270613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1271365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 1272dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1273dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 1274dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1275dd9f438eSNathan Scott } 12761da177e4SLinus Torvalds } 12771da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 12781da177e4SLinus Torvalds xfs_inherit_noatime) 1279365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 12801da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 12811da177e4SLinus Torvalds xfs_inherit_nodump) 1282365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 12831da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 12841da177e4SLinus Torvalds xfs_inherit_sync) 1285365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 12861da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 12871da177e4SLinus Torvalds xfs_inherit_nosymlinks) 1288365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 1289365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1290365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 1291d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1292d3446eacSBarry Naujok xfs_inherit_nodefrag) 1293d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 12942a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 12952a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 1296365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 12971da177e4SLinus Torvalds } 12981da177e4SLinus Torvalds /* FALLTHROUGH */ 12991da177e4SLinus Torvalds case S_IFLNK: 13001da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 13011da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 13021da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 13031da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 13041da177e4SLinus Torvalds break; 13051da177e4SLinus Torvalds default: 13061da177e4SLinus Torvalds ASSERT(0); 13071da177e4SLinus Torvalds } 13081da177e4SLinus Torvalds /* 13091da177e4SLinus Torvalds * Attribute fork settings for new inode. 13101da177e4SLinus Torvalds */ 13111da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 13121da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 13131da177e4SLinus Torvalds 13141da177e4SLinus Torvalds /* 13151da177e4SLinus Torvalds * Log the new values stuffed into the inode. 13161da177e4SLinus Torvalds */ 1317ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 13181da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 13191da177e4SLinus Torvalds 1320b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 132141be8bedSChristoph Hellwig xfs_setup_inode(ip); 13221da177e4SLinus Torvalds 1323bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 1324bf904248SDavid Chinner if (filestreams) { 1325bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 1326bf904248SDavid Chinner if (error < 0) 1327bf904248SDavid Chinner return -error; 1328bf904248SDavid Chinner if (!error) 1329bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 1330bf904248SDavid Chinner } 1331bf904248SDavid Chinner 13321da177e4SLinus Torvalds *ipp = ip; 13331da177e4SLinus Torvalds return 0; 13341da177e4SLinus Torvalds } 13351da177e4SLinus Torvalds 13361da177e4SLinus Torvalds /* 13378f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 13388f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 13398f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 13401da177e4SLinus Torvalds * 1341f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1342f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1343f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1344f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1345f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1346f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1347f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1348f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1349f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 13501da177e4SLinus Torvalds * 1351f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1352f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1353f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1354f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1355f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 13561da177e4SLinus Torvalds */ 13571da177e4SLinus Torvalds int 13588f04c47aSChristoph Hellwig xfs_itruncate_extents( 13598f04c47aSChristoph Hellwig struct xfs_trans **tpp, 13608f04c47aSChristoph Hellwig struct xfs_inode *ip, 13618f04c47aSChristoph Hellwig int whichfork, 13628f04c47aSChristoph Hellwig xfs_fsize_t new_size) 13631da177e4SLinus Torvalds { 13648f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 13658f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 13668f04c47aSChristoph Hellwig struct xfs_trans *ntp; 13678f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 13681da177e4SLinus Torvalds xfs_fsblock_t first_block; 13691da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 13701da177e4SLinus Torvalds xfs_fileoff_t last_block; 13718f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 13721da177e4SLinus Torvalds int committed; 13738f04c47aSChristoph Hellwig int error = 0; 13748f04c47aSChristoph Hellwig int done = 0; 13751da177e4SLinus Torvalds 13760b56185bSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 13770b56185bSChristoph Hellwig ASSERT(!atomic_read(&VFS_I(ip)->i_count) || 13780b56185bSChristoph Hellwig xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1379ce7ae151SChristoph Hellwig ASSERT(new_size <= XFS_ISIZE(ip)); 13808f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 13811da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1382898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 13831da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 13841da177e4SLinus Torvalds 1385673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_start(ip, new_size); 1386673e8e59SChristoph Hellwig 13871da177e4SLinus Torvalds /* 13881da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 13891da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 13901da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 13911da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 13921da177e4SLinus Torvalds * possible file size. If the first block to be removed is 13931da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 13941da177e4SLinus Torvalds * then there is nothing to do. 13951da177e4SLinus Torvalds */ 13968f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 139732972383SDave Chinner last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 13988f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 13998f04c47aSChristoph Hellwig return 0; 14008f04c47aSChristoph Hellwig 14018f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 14021da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 14031da177e4SLinus Torvalds while (!done) { 14049d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 14058f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 14063e57ecf6SOlaf Weber first_unmap_block, unmap_len, 14078f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 14081da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 14093e57ecf6SOlaf Weber &first_block, &free_list, 1410b4e9181eSChristoph Hellwig &done); 14118f04c47aSChristoph Hellwig if (error) 14128f04c47aSChristoph Hellwig goto out_bmap_cancel; 14131da177e4SLinus Torvalds 14141da177e4SLinus Torvalds /* 14151da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 14161da177e4SLinus Torvalds * reservation and commit the old transaction. 14171da177e4SLinus Torvalds */ 14188f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1419898621d5SChristoph Hellwig if (committed) 1420ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 14218f04c47aSChristoph Hellwig if (error) 14228f04c47aSChristoph Hellwig goto out_bmap_cancel; 14231da177e4SLinus Torvalds 14241da177e4SLinus Torvalds if (committed) { 14251da177e4SLinus Torvalds /* 1426f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1427e5720eecSDavid Chinner * moved forward in the log as part of every commit. 14281da177e4SLinus Torvalds */ 14298f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 14301da177e4SLinus Torvalds } 1431f6485057SDavid Chinner 14328f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 14338f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 14348f04c47aSChristoph Hellwig tp = ntp; 1435f6485057SDavid Chinner 1436ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1437f6485057SDavid Chinner 1438cc09c0dcSDave Chinner if (error) 14398f04c47aSChristoph Hellwig goto out; 14408f04c47aSChristoph Hellwig 1441cc09c0dcSDave Chinner /* 14428f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1443cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1444cc09c0dcSDave Chinner */ 14458f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 14468f04c47aSChristoph Hellwig error = xfs_trans_reserve(tp, 0, 1447f6485057SDavid Chinner XFS_ITRUNCATE_LOG_RES(mp), 0, 14481da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 14491da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 14501da177e4SLinus Torvalds if (error) 14518f04c47aSChristoph Hellwig goto out; 14521da177e4SLinus Torvalds } 14538f04c47aSChristoph Hellwig 1454673e8e59SChristoph Hellwig /* 1455673e8e59SChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 1456673e8e59SChristoph Hellwig * on rolling it forward in the log. 1457673e8e59SChristoph Hellwig */ 1458673e8e59SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1459673e8e59SChristoph Hellwig 1460673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_end(ip, new_size); 1461673e8e59SChristoph Hellwig 14628f04c47aSChristoph Hellwig out: 14638f04c47aSChristoph Hellwig *tpp = tp; 14648f04c47aSChristoph Hellwig return error; 14658f04c47aSChristoph Hellwig out_bmap_cancel: 14661da177e4SLinus Torvalds /* 14678f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 14688f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 14698f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 14701da177e4SLinus Torvalds */ 14718f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 14728f04c47aSChristoph Hellwig goto out; 14738f04c47aSChristoph Hellwig } 14748f04c47aSChristoph Hellwig 14751da177e4SLinus Torvalds /* 14761da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 14771da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 14781da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 14791da177e4SLinus Torvalds */ 14801da177e4SLinus Torvalds int 14811da177e4SLinus Torvalds xfs_iunlink( 14821da177e4SLinus Torvalds xfs_trans_t *tp, 14831da177e4SLinus Torvalds xfs_inode_t *ip) 14841da177e4SLinus Torvalds { 14851da177e4SLinus Torvalds xfs_mount_t *mp; 14861da177e4SLinus Torvalds xfs_agi_t *agi; 14871da177e4SLinus Torvalds xfs_dinode_t *dip; 14881da177e4SLinus Torvalds xfs_buf_t *agibp; 14891da177e4SLinus Torvalds xfs_buf_t *ibp; 14901da177e4SLinus Torvalds xfs_agino_t agino; 14911da177e4SLinus Torvalds short bucket_index; 14921da177e4SLinus Torvalds int offset; 14931da177e4SLinus Torvalds int error; 14941da177e4SLinus Torvalds 14951da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 14961da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 14971da177e4SLinus Torvalds 14981da177e4SLinus Torvalds mp = tp->t_mountp; 14991da177e4SLinus Torvalds 15001da177e4SLinus Torvalds /* 15011da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 15021da177e4SLinus Torvalds * on the list. 15031da177e4SLinus Torvalds */ 15045e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1505859d7182SVlad Apostolov if (error) 15061da177e4SLinus Torvalds return error; 15071da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 15085e1be0fbSChristoph Hellwig 15091da177e4SLinus Torvalds /* 15101da177e4SLinus Torvalds * Get the index into the agi hash table for the 15111da177e4SLinus Torvalds * list this inode will go on. 15121da177e4SLinus Torvalds */ 15131da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 15141da177e4SLinus Torvalds ASSERT(agino != 0); 15151da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 15161da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 151716259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 15181da177e4SLinus Torvalds 151969ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 15201da177e4SLinus Torvalds /* 15211da177e4SLinus Torvalds * There is already another inode in the bucket we need 15221da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 15231da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 15241da177e4SLinus Torvalds * and then we fall through to point the head at us. 15251da177e4SLinus Torvalds */ 1526475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1527475ee413SChristoph Hellwig 0, 0); 1528c319b58bSVlad Apostolov if (error) 1529c319b58bSVlad Apostolov return error; 1530c319b58bSVlad Apostolov 153169ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 15321da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 153392bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 15341da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 15351da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 15361da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 15371da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15381da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 15391da177e4SLinus Torvalds } 15401da177e4SLinus Torvalds 15411da177e4SLinus Torvalds /* 15421da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 15431da177e4SLinus Torvalds */ 15441da177e4SLinus Torvalds ASSERT(agino != 0); 154516259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 15461da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 15471da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 15481da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 15491da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15501da177e4SLinus Torvalds return 0; 15511da177e4SLinus Torvalds } 15521da177e4SLinus Torvalds 15531da177e4SLinus Torvalds /* 15541da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 15551da177e4SLinus Torvalds */ 15561da177e4SLinus Torvalds STATIC int 15571da177e4SLinus Torvalds xfs_iunlink_remove( 15581da177e4SLinus Torvalds xfs_trans_t *tp, 15591da177e4SLinus Torvalds xfs_inode_t *ip) 15601da177e4SLinus Torvalds { 15611da177e4SLinus Torvalds xfs_ino_t next_ino; 15621da177e4SLinus Torvalds xfs_mount_t *mp; 15631da177e4SLinus Torvalds xfs_agi_t *agi; 15641da177e4SLinus Torvalds xfs_dinode_t *dip; 15651da177e4SLinus Torvalds xfs_buf_t *agibp; 15661da177e4SLinus Torvalds xfs_buf_t *ibp; 15671da177e4SLinus Torvalds xfs_agnumber_t agno; 15681da177e4SLinus Torvalds xfs_agino_t agino; 15691da177e4SLinus Torvalds xfs_agino_t next_agino; 15701da177e4SLinus Torvalds xfs_buf_t *last_ibp; 15716fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 15721da177e4SLinus Torvalds short bucket_index; 15736fdf8cccSNathan Scott int offset, last_offset = 0; 15741da177e4SLinus Torvalds int error; 15751da177e4SLinus Torvalds 15761da177e4SLinus Torvalds mp = tp->t_mountp; 15771da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 15781da177e4SLinus Torvalds 15791da177e4SLinus Torvalds /* 15801da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 15811da177e4SLinus Torvalds * on the list. 15821da177e4SLinus Torvalds */ 15835e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 15845e1be0fbSChristoph Hellwig if (error) 15851da177e4SLinus Torvalds return error; 15865e1be0fbSChristoph Hellwig 15871da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 15885e1be0fbSChristoph Hellwig 15891da177e4SLinus Torvalds /* 15901da177e4SLinus Torvalds * Get the index into the agi hash table for the 15911da177e4SLinus Torvalds * list this inode will go on. 15921da177e4SLinus Torvalds */ 15931da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 15941da177e4SLinus Torvalds ASSERT(agino != 0); 15951da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 159669ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 15971da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 15981da177e4SLinus Torvalds 159916259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 16001da177e4SLinus Torvalds /* 1601475ee413SChristoph Hellwig * We're at the head of the list. Get the inode's on-disk 1602475ee413SChristoph Hellwig * buffer to see if there is anyone after us on the list. 1603475ee413SChristoph Hellwig * Only modify our next pointer if it is not already NULLAGINO. 1604475ee413SChristoph Hellwig * This saves us the overhead of dealing with the buffer when 1605475ee413SChristoph Hellwig * there is no need to change it. 16061da177e4SLinus Torvalds */ 1607475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1608475ee413SChristoph Hellwig 0, 0); 16091da177e4SLinus Torvalds if (error) { 1610475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", 16110b932cccSDave Chinner __func__, error); 16121da177e4SLinus Torvalds return error; 16131da177e4SLinus Torvalds } 1614347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 16151da177e4SLinus Torvalds ASSERT(next_agino != 0); 16161da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1617347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 161892bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 16191da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 16201da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 16211da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 16221da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16231da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 16241da177e4SLinus Torvalds } else { 16251da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 16261da177e4SLinus Torvalds } 16271da177e4SLinus Torvalds /* 16281da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 16291da177e4SLinus Torvalds */ 16301da177e4SLinus Torvalds ASSERT(next_agino != 0); 16311da177e4SLinus Torvalds ASSERT(next_agino != agino); 163216259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 16331da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 16341da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 16351da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 16361da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16371da177e4SLinus Torvalds } else { 16381da177e4SLinus Torvalds /* 16391da177e4SLinus Torvalds * We need to search the list for the inode being freed. 16401da177e4SLinus Torvalds */ 164116259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 16421da177e4SLinus Torvalds last_ibp = NULL; 16431da177e4SLinus Torvalds while (next_agino != agino) { 1644129dbc9aSChristoph Hellwig struct xfs_imap imap; 1645129dbc9aSChristoph Hellwig 1646129dbc9aSChristoph Hellwig if (last_ibp) 16471da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 1648129dbc9aSChristoph Hellwig 1649129dbc9aSChristoph Hellwig imap.im_blkno = 0; 16501da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 1651129dbc9aSChristoph Hellwig 1652129dbc9aSChristoph Hellwig error = xfs_imap(mp, tp, next_ino, &imap, 0); 16531da177e4SLinus Torvalds if (error) { 16540b932cccSDave Chinner xfs_warn(mp, 1655129dbc9aSChristoph Hellwig "%s: xfs_imap returned error %d.", 16560b932cccSDave Chinner __func__, error); 16571da177e4SLinus Torvalds return error; 16581da177e4SLinus Torvalds } 1659129dbc9aSChristoph Hellwig 1660129dbc9aSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, 1661129dbc9aSChristoph Hellwig &last_ibp, 0, 0); 1662129dbc9aSChristoph Hellwig if (error) { 1663129dbc9aSChristoph Hellwig xfs_warn(mp, 1664129dbc9aSChristoph Hellwig "%s: xfs_imap_to_bp returned error %d.", 1665129dbc9aSChristoph Hellwig __func__, error); 1666129dbc9aSChristoph Hellwig return error; 1667129dbc9aSChristoph Hellwig } 1668129dbc9aSChristoph Hellwig 1669129dbc9aSChristoph Hellwig last_offset = imap.im_boffset; 1670347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 16711da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 16721da177e4SLinus Torvalds ASSERT(next_agino != 0); 16731da177e4SLinus Torvalds } 1674475ee413SChristoph Hellwig 16751da177e4SLinus Torvalds /* 1676475ee413SChristoph Hellwig * Now last_ibp points to the buffer previous to us on the 1677475ee413SChristoph Hellwig * unlinked list. Pull us from the list. 16781da177e4SLinus Torvalds */ 1679475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1680475ee413SChristoph Hellwig 0, 0); 16811da177e4SLinus Torvalds if (error) { 1682475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", 16830b932cccSDave Chinner __func__, error); 16841da177e4SLinus Torvalds return error; 16851da177e4SLinus Torvalds } 1686347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 16871da177e4SLinus Torvalds ASSERT(next_agino != 0); 16881da177e4SLinus Torvalds ASSERT(next_agino != agino); 16891da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1690347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 169192bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 16921da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 16931da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 16941da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 16951da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16961da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 16971da177e4SLinus Torvalds } else { 16981da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 16991da177e4SLinus Torvalds } 17001da177e4SLinus Torvalds /* 17011da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 17021da177e4SLinus Torvalds */ 1703347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 17041da177e4SLinus Torvalds ASSERT(next_agino != 0); 17051da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 17061da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 17071da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 17081da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 17091da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 17101da177e4SLinus Torvalds } 17111da177e4SLinus Torvalds return 0; 17121da177e4SLinus Torvalds } 17131da177e4SLinus Torvalds 17145b3eed75SDave Chinner /* 17155b3eed75SDave Chinner * A big issue when freeing the inode cluster is is that we _cannot_ skip any 17165b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 17175b3eed75SDave Chinner * the cluster buffer. 17185b3eed75SDave Chinner */ 17192a30f36dSChandra Seetharaman STATIC int 17201da177e4SLinus Torvalds xfs_ifree_cluster( 17211da177e4SLinus Torvalds xfs_inode_t *free_ip, 17221da177e4SLinus Torvalds xfs_trans_t *tp, 17231da177e4SLinus Torvalds xfs_ino_t inum) 17241da177e4SLinus Torvalds { 17251da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 17261da177e4SLinus Torvalds int blks_per_cluster; 17271da177e4SLinus Torvalds int nbufs; 17281da177e4SLinus Torvalds int ninodes; 17295b257b4aSDave Chinner int i, j; 17301da177e4SLinus Torvalds xfs_daddr_t blkno; 17311da177e4SLinus Torvalds xfs_buf_t *bp; 17325b257b4aSDave Chinner xfs_inode_t *ip; 17331da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 17341da177e4SLinus Torvalds xfs_log_item_t *lip; 17355017e97dSDave Chinner struct xfs_perag *pag; 17361da177e4SLinus Torvalds 17375017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 17381da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 17391da177e4SLinus Torvalds blks_per_cluster = 1; 17401da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 17411da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 17421da177e4SLinus Torvalds } else { 17431da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 17441da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 17451da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 17461da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 17471da177e4SLinus Torvalds } 17481da177e4SLinus Torvalds 17491da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 17501da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 17511da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 17521da177e4SLinus Torvalds 17531da177e4SLinus Torvalds /* 17545b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 17555b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 17565b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 17575b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 17585b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 17595b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 17601da177e4SLinus Torvalds */ 17611da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1762a8acad70SDave Chinner mp->m_bsize * blks_per_cluster, 0); 17631da177e4SLinus Torvalds 17642a30f36dSChandra Seetharaman if (!bp) 17652a30f36dSChandra Seetharaman return ENOMEM; 17665b257b4aSDave Chinner /* 17675b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 17685b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 17695b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 17705b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 17715b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 17725b257b4aSDave Chinner */ 1773adadbeefSChristoph Hellwig lip = bp->b_fspriv; 17741da177e4SLinus Torvalds while (lip) { 17751da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 17761da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 17771da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 1778ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 17797b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 17807b2e2a31SDavid Chinner &iip->ili_flush_lsn, 17817b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 1782e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 17831da177e4SLinus Torvalds } 17841da177e4SLinus Torvalds lip = lip->li_bio_list; 17851da177e4SLinus Torvalds } 17861da177e4SLinus Torvalds 17875b3eed75SDave Chinner 17885b257b4aSDave Chinner /* 17895b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 17905b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 17915b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 17925b257b4aSDave Chinner * and flushing by locking the buffer. 17935b257b4aSDave Chinner * 17945b257b4aSDave Chinner * We have already marked every inode that was part of a 17955b257b4aSDave Chinner * transaction stale above, which means there is no point in 17965b257b4aSDave Chinner * even trying to lock them. 17975b257b4aSDave Chinner */ 17985b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 17995b3eed75SDave Chinner retry: 18001a3e8f3dSDave Chinner rcu_read_lock(); 18015b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 18025b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 18031da177e4SLinus Torvalds 18041a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 18051a3e8f3dSDave Chinner if (!ip) { 18061a3e8f3dSDave Chinner rcu_read_unlock(); 18075b257b4aSDave Chinner continue; 18085b257b4aSDave Chinner } 18095b257b4aSDave Chinner 18105b3eed75SDave Chinner /* 18111a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 18121a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 18131a3e8f3dSDave Chinner * during the lookup. We need to check under the 18141a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 18151a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 18161a3e8f3dSDave Chinner */ 18171a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 18181a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 18191a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 18201a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 18211a3e8f3dSDave Chinner rcu_read_unlock(); 18221a3e8f3dSDave Chinner continue; 18231a3e8f3dSDave Chinner } 18241a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 18251a3e8f3dSDave Chinner 18261a3e8f3dSDave Chinner /* 18275b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 18285b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 18295b3eed75SDave Chinner * in the list attached to the buffer and are not 18305b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 18315b3eed75SDave Chinner * and retry. 18325b3eed75SDave Chinner */ 18335b257b4aSDave Chinner if (ip != free_ip && 18345b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 18351a3e8f3dSDave Chinner rcu_read_unlock(); 18365b3eed75SDave Chinner delay(1); 18375b3eed75SDave Chinner goto retry; 18385b257b4aSDave Chinner } 18391a3e8f3dSDave Chinner rcu_read_unlock(); 18405b257b4aSDave Chinner 18415b3eed75SDave Chinner xfs_iflock(ip); 18425b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 18435b257b4aSDave Chinner 18445b3eed75SDave Chinner /* 18455b3eed75SDave Chinner * we don't need to attach clean inodes or those only 18465b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 18475b3eed75SDave Chinner */ 18485b257b4aSDave Chinner iip = ip->i_itemp; 18495b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 18505b257b4aSDave Chinner ASSERT(ip != free_ip); 18511da177e4SLinus Torvalds xfs_ifunlock(ip); 18521da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 18531da177e4SLinus Torvalds continue; 18541da177e4SLinus Torvalds } 18551da177e4SLinus Torvalds 1856f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 1857f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 18581da177e4SLinus Torvalds iip->ili_logged = 1; 18597b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 18607b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 18611da177e4SLinus Torvalds 1862ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 1863ca30b2a7SChristoph Hellwig &iip->ili_item); 18645b257b4aSDave Chinner 18655b257b4aSDave Chinner if (ip != free_ip) 18661da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 18671da177e4SLinus Torvalds } 18681da177e4SLinus Torvalds 18691da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 18701da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 18711da177e4SLinus Torvalds } 18721da177e4SLinus Torvalds 18735017e97dSDave Chinner xfs_perag_put(pag); 18742a30f36dSChandra Seetharaman return 0; 18751da177e4SLinus Torvalds } 18761da177e4SLinus Torvalds 18771da177e4SLinus Torvalds /* 18781da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 18791da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 18801da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 18811da177e4SLinus Torvalds * the inode is already a part of the transaction. 18821da177e4SLinus Torvalds * 18831da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 18841da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 18851da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 18861da177e4SLinus Torvalds */ 18871da177e4SLinus Torvalds int 18881da177e4SLinus Torvalds xfs_ifree( 18891da177e4SLinus Torvalds xfs_trans_t *tp, 18901da177e4SLinus Torvalds xfs_inode_t *ip, 18911da177e4SLinus Torvalds xfs_bmap_free_t *flist) 18921da177e4SLinus Torvalds { 18931da177e4SLinus Torvalds int error; 18941da177e4SLinus Torvalds int delete; 18951da177e4SLinus Torvalds xfs_ino_t first_ino; 1896c319b58bSVlad Apostolov xfs_dinode_t *dip; 1897c319b58bSVlad Apostolov xfs_buf_t *ibp; 18981da177e4SLinus Torvalds 1899579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 19001da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 19011da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 19021da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 1903ce7ae151SChristoph Hellwig ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode)); 19041da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 19051da177e4SLinus Torvalds 19061da177e4SLinus Torvalds /* 19071da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 19081da177e4SLinus Torvalds */ 19091da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 19101da177e4SLinus Torvalds if (error != 0) { 19111da177e4SLinus Torvalds return error; 19121da177e4SLinus Torvalds } 19131da177e4SLinus Torvalds 19141da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 19151da177e4SLinus Torvalds if (error != 0) { 19161da177e4SLinus Torvalds return error; 19171da177e4SLinus Torvalds } 19181da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 19191da177e4SLinus Torvalds ip->i_d.di_flags = 0; 19201da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 19211da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 19221da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 19231da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 19241da177e4SLinus Torvalds /* 19251da177e4SLinus Torvalds * Bump the generation count so no one will be confused 19261da177e4SLinus Torvalds * by reincarnations of this inode. 19271da177e4SLinus Torvalds */ 19281da177e4SLinus Torvalds ip->i_d.di_gen++; 1929c319b58bSVlad Apostolov 19301da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 19311da177e4SLinus Torvalds 1932475ee413SChristoph Hellwig error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, 1933475ee413SChristoph Hellwig 0, 0); 1934c319b58bSVlad Apostolov if (error) 1935c319b58bSVlad Apostolov return error; 1936c319b58bSVlad Apostolov 1937c319b58bSVlad Apostolov /* 1938c319b58bSVlad Apostolov * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 1939c319b58bSVlad Apostolov * from picking up this inode when it is reclaimed (its incore state 1940c319b58bSVlad Apostolov * initialzed but not flushed to disk yet). The in-core di_mode is 1941c319b58bSVlad Apostolov * already cleared and a corresponding transaction logged. 1942c319b58bSVlad Apostolov * The hack here just synchronizes the in-core to on-disk 1943c319b58bSVlad Apostolov * di_mode value in advance before the actual inode sync to disk. 1944c319b58bSVlad Apostolov * This is OK because the inode is already unlinked and would never 1945c319b58bSVlad Apostolov * change its di_mode again for this inode generation. 1946c319b58bSVlad Apostolov * This is a temporary hack that would require a proper fix 1947c319b58bSVlad Apostolov * in the future. 1948c319b58bSVlad Apostolov */ 194981591fe2SChristoph Hellwig dip->di_mode = 0; 1950c319b58bSVlad Apostolov 19511da177e4SLinus Torvalds if (delete) { 19522a30f36dSChandra Seetharaman error = xfs_ifree_cluster(ip, tp, first_ino); 19531da177e4SLinus Torvalds } 19541da177e4SLinus Torvalds 19552a30f36dSChandra Seetharaman return error; 19561da177e4SLinus Torvalds } 19571da177e4SLinus Torvalds 19581da177e4SLinus Torvalds /* 19591da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 19601da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 19611da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 19621da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 19631da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 19641da177e4SLinus Torvalds * by the caller. 19651da177e4SLinus Torvalds * 19661da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 19671da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 19681da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 19691da177e4SLinus Torvalds * not request that the number of records go below zero, although 19701da177e4SLinus Torvalds * it can go to zero. 19711da177e4SLinus Torvalds * 19721da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 19731da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 19741da177e4SLinus Torvalds * requested for the if_broot array. 19751da177e4SLinus Torvalds */ 19761da177e4SLinus Torvalds void 19771da177e4SLinus Torvalds xfs_iroot_realloc( 19781da177e4SLinus Torvalds xfs_inode_t *ip, 19791da177e4SLinus Torvalds int rec_diff, 19801da177e4SLinus Torvalds int whichfork) 19811da177e4SLinus Torvalds { 198260197e8dSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 19831da177e4SLinus Torvalds int cur_max; 19841da177e4SLinus Torvalds xfs_ifork_t *ifp; 19857cc95a82SChristoph Hellwig struct xfs_btree_block *new_broot; 19861da177e4SLinus Torvalds int new_max; 19871da177e4SLinus Torvalds size_t new_size; 19881da177e4SLinus Torvalds char *np; 19891da177e4SLinus Torvalds char *op; 19901da177e4SLinus Torvalds 19911da177e4SLinus Torvalds /* 19921da177e4SLinus Torvalds * Handle the degenerate case quietly. 19931da177e4SLinus Torvalds */ 19941da177e4SLinus Torvalds if (rec_diff == 0) { 19951da177e4SLinus Torvalds return; 19961da177e4SLinus Torvalds } 19971da177e4SLinus Torvalds 19981da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 19991da177e4SLinus Torvalds if (rec_diff > 0) { 20001da177e4SLinus Torvalds /* 20011da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 20021da177e4SLinus Torvalds * allocate it now and get out. 20031da177e4SLinus Torvalds */ 20041da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 20051da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 20064a7edddcSDave Chinner ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 20071da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 20081da177e4SLinus Torvalds return; 20091da177e4SLinus Torvalds } 20101da177e4SLinus Torvalds 20111da177e4SLinus Torvalds /* 20121da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 20131da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 20141da177e4SLinus Torvalds * location. The records don't change location because 20151da177e4SLinus Torvalds * they are kept butted up against the btree block header. 20161da177e4SLinus Torvalds */ 201760197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 20181da177e4SLinus Torvalds new_max = cur_max + rec_diff; 20191da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 20207cc95a82SChristoph Hellwig ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 20211da177e4SLinus Torvalds (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 20224a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 202360197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 20241da177e4SLinus Torvalds ifp->if_broot_bytes); 202560197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 20261da177e4SLinus Torvalds (int)new_size); 20271da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 20281da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 20291da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 20301da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 20311da177e4SLinus Torvalds return; 20321da177e4SLinus Torvalds } 20331da177e4SLinus Torvalds 20341da177e4SLinus Torvalds /* 20351da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 20361da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 20371da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 20381da177e4SLinus Torvalds */ 20391da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 204060197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 20411da177e4SLinus Torvalds new_max = cur_max + rec_diff; 20421da177e4SLinus Torvalds ASSERT(new_max >= 0); 20431da177e4SLinus Torvalds if (new_max > 0) 20441da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 20451da177e4SLinus Torvalds else 20461da177e4SLinus Torvalds new_size = 0; 20471da177e4SLinus Torvalds if (new_size > 0) { 20484a7edddcSDave Chinner new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 20491da177e4SLinus Torvalds /* 20501da177e4SLinus Torvalds * First copy over the btree block header. 20511da177e4SLinus Torvalds */ 20527cc95a82SChristoph Hellwig memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 20531da177e4SLinus Torvalds } else { 20541da177e4SLinus Torvalds new_broot = NULL; 20551da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 20561da177e4SLinus Torvalds } 20571da177e4SLinus Torvalds 20581da177e4SLinus Torvalds /* 20591da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 20601da177e4SLinus Torvalds */ 20611da177e4SLinus Torvalds if (new_max > 0) { 20621da177e4SLinus Torvalds /* 20631da177e4SLinus Torvalds * First copy the records. 20641da177e4SLinus Torvalds */ 2065136341b4SChristoph Hellwig op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 2066136341b4SChristoph Hellwig np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 20671da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 20681da177e4SLinus Torvalds 20691da177e4SLinus Torvalds /* 20701da177e4SLinus Torvalds * Then copy the pointers. 20711da177e4SLinus Torvalds */ 207260197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 20731da177e4SLinus Torvalds ifp->if_broot_bytes); 207460197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 20751da177e4SLinus Torvalds (int)new_size); 20761da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 20771da177e4SLinus Torvalds } 2078f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 20791da177e4SLinus Torvalds ifp->if_broot = new_broot; 20801da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 20811da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 20821da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 20831da177e4SLinus Torvalds return; 20841da177e4SLinus Torvalds } 20851da177e4SLinus Torvalds 20861da177e4SLinus Torvalds 20871da177e4SLinus Torvalds /* 20881da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 20891da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 20901da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 20911da177e4SLinus Torvalds * byte_diff parameter. 20921da177e4SLinus Torvalds * 20931da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 20941da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 20951da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 20961da177e4SLinus Torvalds * to what is needed. 20971da177e4SLinus Torvalds * 20981da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 20991da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 21001da177e4SLinus Torvalds * requested for the if_data array. 21011da177e4SLinus Torvalds */ 21021da177e4SLinus Torvalds void 21031da177e4SLinus Torvalds xfs_idata_realloc( 21041da177e4SLinus Torvalds xfs_inode_t *ip, 21051da177e4SLinus Torvalds int byte_diff, 21061da177e4SLinus Torvalds int whichfork) 21071da177e4SLinus Torvalds { 21081da177e4SLinus Torvalds xfs_ifork_t *ifp; 21091da177e4SLinus Torvalds int new_size; 21101da177e4SLinus Torvalds int real_size; 21111da177e4SLinus Torvalds 21121da177e4SLinus Torvalds if (byte_diff == 0) { 21131da177e4SLinus Torvalds return; 21141da177e4SLinus Torvalds } 21151da177e4SLinus Torvalds 21161da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 21171da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 21181da177e4SLinus Torvalds ASSERT(new_size >= 0); 21191da177e4SLinus Torvalds 21201da177e4SLinus Torvalds if (new_size == 0) { 21211da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2122f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 21231da177e4SLinus Torvalds } 21241da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 21251da177e4SLinus Torvalds real_size = 0; 21261da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 21271da177e4SLinus Torvalds /* 21281da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 21291da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 21301da177e4SLinus Torvalds */ 21311da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 21321da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 21331da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 21341da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 21351da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 21361da177e4SLinus Torvalds new_size); 2137f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 21381da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 21391da177e4SLinus Torvalds } 21401da177e4SLinus Torvalds real_size = 0; 21411da177e4SLinus Torvalds } else { 21421da177e4SLinus Torvalds /* 21431da177e4SLinus Torvalds * Stuck with malloc/realloc. 21441da177e4SLinus Torvalds * For inline data, the underlying buffer must be 21451da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 21461da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 21471da177e4SLinus Torvalds * that here. 21481da177e4SLinus Torvalds */ 21491da177e4SLinus Torvalds real_size = roundup(new_size, 4); 21501da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 21511da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 21524a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 21534a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 21541da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 21551da177e4SLinus Torvalds /* 21561da177e4SLinus Torvalds * Only do the realloc if the underlying size 21571da177e4SLinus Torvalds * is really changing. 21581da177e4SLinus Torvalds */ 21591da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 21601da177e4SLinus Torvalds ifp->if_u1.if_data = 21611da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 21621da177e4SLinus Torvalds real_size, 21631da177e4SLinus Torvalds ifp->if_real_bytes, 21644a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 21651da177e4SLinus Torvalds } 21661da177e4SLinus Torvalds } else { 21671da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 21684a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 21694a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 21701da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 21711da177e4SLinus Torvalds ifp->if_bytes); 21721da177e4SLinus Torvalds } 21731da177e4SLinus Torvalds } 21741da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 21751da177e4SLinus Torvalds ifp->if_bytes = new_size; 21761da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 21771da177e4SLinus Torvalds } 21781da177e4SLinus Torvalds 21791da177e4SLinus Torvalds void 21801da177e4SLinus Torvalds xfs_idestroy_fork( 21811da177e4SLinus Torvalds xfs_inode_t *ip, 21821da177e4SLinus Torvalds int whichfork) 21831da177e4SLinus Torvalds { 21841da177e4SLinus Torvalds xfs_ifork_t *ifp; 21851da177e4SLinus Torvalds 21861da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 21871da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 2188f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 21891da177e4SLinus Torvalds ifp->if_broot = NULL; 21901da177e4SLinus Torvalds } 21911da177e4SLinus Torvalds 21921da177e4SLinus Torvalds /* 21931da177e4SLinus Torvalds * If the format is local, then we can't have an extents 21941da177e4SLinus Torvalds * array so just look for an inline data array. If we're 21951da177e4SLinus Torvalds * not local then we may or may not have an extents list, 21961da177e4SLinus Torvalds * so check and free it up if we do. 21971da177e4SLinus Torvalds */ 21981da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 21991da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 22001da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 22011da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 2202f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 22031da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 22041da177e4SLinus Torvalds ifp->if_real_bytes = 0; 22051da177e4SLinus Torvalds } 22061da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 22070293ce3aSMandy Kirkconnell ((ifp->if_flags & XFS_IFEXTIREC) || 22080293ce3aSMandy Kirkconnell ((ifp->if_u1.if_extents != NULL) && 22090293ce3aSMandy Kirkconnell (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 22101da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 22114eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 22121da177e4SLinus Torvalds } 22131da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 22141da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 22151da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 22161da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 22171da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 22181da177e4SLinus Torvalds ip->i_afp = NULL; 22191da177e4SLinus Torvalds } 22201da177e4SLinus Torvalds } 22211da177e4SLinus Torvalds 22221da177e4SLinus Torvalds /* 222360ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 222460ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 222560ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 22261da177e4SLinus Torvalds */ 222760ec6783SChristoph Hellwig static void 2228f392e631SChristoph Hellwig xfs_iunpin( 222960ec6783SChristoph Hellwig struct xfs_inode *ip) 2230a3f74ffbSDavid Chinner { 2231579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2232a3f74ffbSDavid Chinner 22334aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 22344aaf15d1SDave Chinner 2235a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 223660ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2237a14a348bSChristoph Hellwig 2238a3f74ffbSDavid Chinner } 2239a3f74ffbSDavid Chinner 2240f392e631SChristoph Hellwig static void 2241f392e631SChristoph Hellwig __xfs_iunpin_wait( 2242f392e631SChristoph Hellwig struct xfs_inode *ip) 2243f392e631SChristoph Hellwig { 2244f392e631SChristoph Hellwig wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); 2245f392e631SChristoph Hellwig DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); 2246f392e631SChristoph Hellwig 2247f392e631SChristoph Hellwig xfs_iunpin(ip); 2248f392e631SChristoph Hellwig 2249f392e631SChristoph Hellwig do { 2250f392e631SChristoph Hellwig prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2251f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2252f392e631SChristoph Hellwig io_schedule(); 2253f392e631SChristoph Hellwig } while (xfs_ipincount(ip)); 2254f392e631SChristoph Hellwig finish_wait(wq, &wait.wait); 2255f392e631SChristoph Hellwig } 2256f392e631SChristoph Hellwig 2257777df5afSDave Chinner void 22581da177e4SLinus Torvalds xfs_iunpin_wait( 225960ec6783SChristoph Hellwig struct xfs_inode *ip) 22601da177e4SLinus Torvalds { 2261f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2262f392e631SChristoph Hellwig __xfs_iunpin_wait(ip); 22631da177e4SLinus Torvalds } 22641da177e4SLinus Torvalds 22651da177e4SLinus Torvalds /* 22661da177e4SLinus Torvalds * xfs_iextents_copy() 22671da177e4SLinus Torvalds * 22681da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 22691da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 22701da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 22711da177e4SLinus Torvalds * 22721da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 22731da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 22741da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 22751da177e4SLinus Torvalds */ 22761da177e4SLinus Torvalds int 22771da177e4SLinus Torvalds xfs_iextents_copy( 22781da177e4SLinus Torvalds xfs_inode_t *ip, 2279a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp, 22801da177e4SLinus Torvalds int whichfork) 22811da177e4SLinus Torvalds { 22821da177e4SLinus Torvalds int copied; 22831da177e4SLinus Torvalds int i; 22841da177e4SLinus Torvalds xfs_ifork_t *ifp; 22851da177e4SLinus Torvalds int nrecs; 22861da177e4SLinus Torvalds xfs_fsblock_t start_block; 22871da177e4SLinus Torvalds 22881da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 2289579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 22901da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 22911da177e4SLinus Torvalds 22921da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 22933a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 22941da177e4SLinus Torvalds ASSERT(nrecs > 0); 22951da177e4SLinus Torvalds 22961da177e4SLinus Torvalds /* 22971da177e4SLinus Torvalds * There are some delayed allocation extents in the 22981da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 22991da177e4SLinus Torvalds * the delayed ones. There must be at least one 23001da177e4SLinus Torvalds * non-delayed extent. 23011da177e4SLinus Torvalds */ 23021da177e4SLinus Torvalds copied = 0; 23031da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 2304a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 23051da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 23069d87c319SEric Sandeen if (isnullstartblock(start_block)) { 23071da177e4SLinus Torvalds /* 23081da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 23091da177e4SLinus Torvalds */ 23101da177e4SLinus Torvalds continue; 23111da177e4SLinus Torvalds } 23121da177e4SLinus Torvalds 23131da177e4SLinus Torvalds /* Translate to on disk format */ 2314cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2315cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2316a6f64d4aSChristoph Hellwig dp++; 23171da177e4SLinus Torvalds copied++; 23181da177e4SLinus Torvalds } 23191da177e4SLinus Torvalds ASSERT(copied != 0); 2320a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 23211da177e4SLinus Torvalds 23221da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 23231da177e4SLinus Torvalds } 23241da177e4SLinus Torvalds 23251da177e4SLinus Torvalds /* 23261da177e4SLinus Torvalds * Each of the following cases stores data into the same region 23271da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 23281da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 23291da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 23301da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 23311da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 23321da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 23331da177e4SLinus Torvalds * format indicates the current state of the fork. 23341da177e4SLinus Torvalds */ 23351da177e4SLinus Torvalds /*ARGSUSED*/ 2336e4ac967bSDavid Chinner STATIC void 23371da177e4SLinus Torvalds xfs_iflush_fork( 23381da177e4SLinus Torvalds xfs_inode_t *ip, 23391da177e4SLinus Torvalds xfs_dinode_t *dip, 23401da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 23411da177e4SLinus Torvalds int whichfork, 23421da177e4SLinus Torvalds xfs_buf_t *bp) 23431da177e4SLinus Torvalds { 23441da177e4SLinus Torvalds char *cp; 23451da177e4SLinus Torvalds xfs_ifork_t *ifp; 23461da177e4SLinus Torvalds xfs_mount_t *mp; 23471da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 23481da177e4SLinus Torvalds int first; 23491da177e4SLinus Torvalds #endif 23501da177e4SLinus Torvalds static const short brootflag[2] = 23511da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 23521da177e4SLinus Torvalds static const short dataflag[2] = 23531da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 23541da177e4SLinus Torvalds static const short extflag[2] = 23551da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 23561da177e4SLinus Torvalds 2357e4ac967bSDavid Chinner if (!iip) 2358e4ac967bSDavid Chinner return; 23591da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 23601da177e4SLinus Torvalds /* 23611da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 23621da177e4SLinus Torvalds * for the attribute fork. 23631da177e4SLinus Torvalds */ 2364e4ac967bSDavid Chinner if (!ifp) { 23651da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 2366e4ac967bSDavid Chinner return; 23671da177e4SLinus Torvalds } 23681da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 23691da177e4SLinus Torvalds mp = ip->i_mount; 23701da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 23711da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 2372f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & dataflag[whichfork]) && 23731da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 23741da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 23751da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 23761da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 23771da177e4SLinus Torvalds } 23781da177e4SLinus Torvalds break; 23791da177e4SLinus Torvalds 23801da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 23811da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2382f5d8d5c4SChristoph Hellwig !(iip->ili_fields & extflag[whichfork])); 2383f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & extflag[whichfork]) && 23841da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 2385ab1908a5SChristoph Hellwig ASSERT(xfs_iext_get_ext(ifp, 0)); 23861da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 23871da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 23881da177e4SLinus Torvalds whichfork); 23891da177e4SLinus Torvalds } 23901da177e4SLinus Torvalds break; 23911da177e4SLinus Torvalds 23921da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 2393f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & brootflag[whichfork]) && 23941da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 23951da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 23961da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 23971da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 23981da177e4SLinus Torvalds XFS_BROOT_SIZE_ADJ)); 239960197e8dSChristoph Hellwig xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 24001da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 24011da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 24021da177e4SLinus Torvalds } 24031da177e4SLinus Torvalds break; 24041da177e4SLinus Torvalds 24051da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 2406f5d8d5c4SChristoph Hellwig if (iip->ili_fields & XFS_ILOG_DEV) { 24071da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 240881591fe2SChristoph Hellwig xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 24091da177e4SLinus Torvalds } 24101da177e4SLinus Torvalds break; 24111da177e4SLinus Torvalds 24121da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 2413f5d8d5c4SChristoph Hellwig if (iip->ili_fields & XFS_ILOG_UUID) { 24141da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 241581591fe2SChristoph Hellwig memcpy(XFS_DFORK_DPTR(dip), 241681591fe2SChristoph Hellwig &ip->i_df.if_u2.if_uuid, 24171da177e4SLinus Torvalds sizeof(uuid_t)); 24181da177e4SLinus Torvalds } 24191da177e4SLinus Torvalds break; 24201da177e4SLinus Torvalds 24211da177e4SLinus Torvalds default: 24221da177e4SLinus Torvalds ASSERT(0); 24231da177e4SLinus Torvalds break; 24241da177e4SLinus Torvalds } 24251da177e4SLinus Torvalds } 24261da177e4SLinus Torvalds 2427bad55843SDavid Chinner STATIC int 2428bad55843SDavid Chinner xfs_iflush_cluster( 2429bad55843SDavid Chinner xfs_inode_t *ip, 2430bad55843SDavid Chinner xfs_buf_t *bp) 2431bad55843SDavid Chinner { 2432bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 24335017e97dSDave Chinner struct xfs_perag *pag; 2434bad55843SDavid Chinner unsigned long first_index, mask; 2435c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2436bad55843SDavid Chinner int ilist_size; 2437bad55843SDavid Chinner xfs_inode_t **ilist; 2438bad55843SDavid Chinner xfs_inode_t *iq; 2439bad55843SDavid Chinner int nr_found; 2440bad55843SDavid Chinner int clcount = 0; 2441bad55843SDavid Chinner int bufwasdelwri; 2442bad55843SDavid Chinner int i; 2443bad55843SDavid Chinner 24445017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2445bad55843SDavid Chinner 2446c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2447c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 244849383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2449bad55843SDavid Chinner if (!ilist) 245044b56e0aSDave Chinner goto out_put; 2451bad55843SDavid Chinner 2452bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2453bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 24541a3e8f3dSDave Chinner rcu_read_lock(); 2455bad55843SDavid Chinner /* really need a gang lookup range call here */ 2456bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2457c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2458bad55843SDavid Chinner if (nr_found == 0) 2459bad55843SDavid Chinner goto out_free; 2460bad55843SDavid Chinner 2461bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2462bad55843SDavid Chinner iq = ilist[i]; 2463bad55843SDavid Chinner if (iq == ip) 2464bad55843SDavid Chinner continue; 24651a3e8f3dSDave Chinner 24661a3e8f3dSDave Chinner /* 24671a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 24681a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 24691a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 24701a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 24711a3e8f3dSDave Chinner */ 24721a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 24731a3e8f3dSDave Chinner if (!ip->i_ino || 24741a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 24751a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 24761a3e8f3dSDave Chinner continue; 24771a3e8f3dSDave Chinner } 24781a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 24791a3e8f3dSDave Chinner 2480bad55843SDavid Chinner /* 2481bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2482bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2483bad55843SDavid Chinner * later after the appropriate locks are acquired. 2484bad55843SDavid Chinner */ 248533540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2486bad55843SDavid Chinner continue; 2487bad55843SDavid Chinner 2488bad55843SDavid Chinner /* 2489bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2490bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2491bad55843SDavid Chinner */ 2492bad55843SDavid Chinner 2493bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2494bad55843SDavid Chinner continue; 2495bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2496bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2497bad55843SDavid Chinner continue; 2498bad55843SDavid Chinner } 2499bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2500bad55843SDavid Chinner xfs_ifunlock(iq); 2501bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2502bad55843SDavid Chinner continue; 2503bad55843SDavid Chinner } 2504bad55843SDavid Chinner 2505bad55843SDavid Chinner /* 2506bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2507bad55843SDavid Chinner * re-check that it's dirty before flushing. 2508bad55843SDavid Chinner */ 250933540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2510bad55843SDavid Chinner int error; 2511bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2512bad55843SDavid Chinner if (error) { 2513bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2514bad55843SDavid Chinner goto cluster_corrupt_out; 2515bad55843SDavid Chinner } 2516bad55843SDavid Chinner clcount++; 2517bad55843SDavid Chinner } else { 2518bad55843SDavid Chinner xfs_ifunlock(iq); 2519bad55843SDavid Chinner } 2520bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2521bad55843SDavid Chinner } 2522bad55843SDavid Chinner 2523bad55843SDavid Chinner if (clcount) { 2524bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2525bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2526bad55843SDavid Chinner } 2527bad55843SDavid Chinner 2528bad55843SDavid Chinner out_free: 25291a3e8f3dSDave Chinner rcu_read_unlock(); 2530f0e2d93cSDenys Vlasenko kmem_free(ilist); 253144b56e0aSDave Chinner out_put: 253244b56e0aSDave Chinner xfs_perag_put(pag); 2533bad55843SDavid Chinner return 0; 2534bad55843SDavid Chinner 2535bad55843SDavid Chinner 2536bad55843SDavid Chinner cluster_corrupt_out: 2537bad55843SDavid Chinner /* 2538bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2539bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2540bad55843SDavid Chinner */ 25411a3e8f3dSDave Chinner rcu_read_unlock(); 2542bad55843SDavid Chinner /* 254343ff2122SChristoph Hellwig * Clean up the buffer. If it was delwri, just release it -- 2544bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 2545bad55843SDavid Chinner * filesystem before releasing the buffer. 2546bad55843SDavid Chinner */ 254743ff2122SChristoph Hellwig bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); 2548bad55843SDavid Chinner if (bufwasdelwri) 2549bad55843SDavid Chinner xfs_buf_relse(bp); 2550bad55843SDavid Chinner 2551bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2552bad55843SDavid Chinner 2553bad55843SDavid Chinner if (!bufwasdelwri) { 2554bad55843SDavid Chinner /* 2555bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 2556bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 2557bad55843SDavid Chinner * mark it as stale and brelse. 2558bad55843SDavid Chinner */ 2559cb669ca5SChristoph Hellwig if (bp->b_iodone) { 2560bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 2561c867cb61SChristoph Hellwig xfs_buf_stale(bp); 25625a52c2a5SChandra Seetharaman xfs_buf_ioerror(bp, EIO); 25631a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 2564bad55843SDavid Chinner } else { 2565c867cb61SChristoph Hellwig xfs_buf_stale(bp); 2566bad55843SDavid Chinner xfs_buf_relse(bp); 2567bad55843SDavid Chinner } 2568bad55843SDavid Chinner } 2569bad55843SDavid Chinner 2570bad55843SDavid Chinner /* 2571bad55843SDavid Chinner * Unlocks the flush lock 2572bad55843SDavid Chinner */ 257304913fddSDave Chinner xfs_iflush_abort(iq, false); 2574f0e2d93cSDenys Vlasenko kmem_free(ilist); 257544b56e0aSDave Chinner xfs_perag_put(pag); 2576bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 2577bad55843SDavid Chinner } 2578bad55843SDavid Chinner 25791da177e4SLinus Torvalds /* 25804c46819aSChristoph Hellwig * Flush dirty inode metadata into the backing buffer. 25814c46819aSChristoph Hellwig * 25824c46819aSChristoph Hellwig * The caller must have the inode lock and the inode flush lock held. The 25834c46819aSChristoph Hellwig * inode lock will still be held upon return to the caller, and the inode 25844c46819aSChristoph Hellwig * flush lock will be released after the inode has reached the disk. 25854c46819aSChristoph Hellwig * 25864c46819aSChristoph Hellwig * The caller must write out the buffer returned in *bpp and release it. 25871da177e4SLinus Torvalds */ 25881da177e4SLinus Torvalds int 25891da177e4SLinus Torvalds xfs_iflush( 25904c46819aSChristoph Hellwig struct xfs_inode *ip, 25914c46819aSChristoph Hellwig struct xfs_buf **bpp) 25921da177e4SLinus Torvalds { 25934c46819aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 25944c46819aSChristoph Hellwig struct xfs_buf *bp; 25954c46819aSChristoph Hellwig struct xfs_dinode *dip; 25961da177e4SLinus Torvalds int error; 25971da177e4SLinus Torvalds 25981da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 25991da177e4SLinus Torvalds 2600579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2601474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 26021da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 26038096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 26041da177e4SLinus Torvalds 26054c46819aSChristoph Hellwig *bpp = NULL; 26061da177e4SLinus Torvalds 26071da177e4SLinus Torvalds xfs_iunpin_wait(ip); 26081da177e4SLinus Torvalds 26091da177e4SLinus Torvalds /* 26104b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 26114b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 2612475ee413SChristoph Hellwig * xfs_imap_to_bp() below may give us a buffer that no longer contains 26134b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 26144b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 26154b6a4688SDave Chinner * flush call. 26164b6a4688SDave Chinner */ 26174b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 26184b6a4688SDave Chinner xfs_ifunlock(ip); 26194b6a4688SDave Chinner return 0; 26204b6a4688SDave Chinner } 26214b6a4688SDave Chinner 26224b6a4688SDave Chinner /* 26231da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 26241da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 262532ce90a4SChristoph Hellwig * to disk, because the log record didn't make it to disk. 262632ce90a4SChristoph Hellwig * 262732ce90a4SChristoph Hellwig * We also have to remove the log item from the AIL in this case, 262832ce90a4SChristoph Hellwig * as we wait for an empty AIL as part of the unmount process. 26291da177e4SLinus Torvalds */ 26301da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 263132ce90a4SChristoph Hellwig error = XFS_ERROR(EIO); 263232ce90a4SChristoph Hellwig goto abort_out; 26331da177e4SLinus Torvalds } 26341da177e4SLinus Torvalds 26351da177e4SLinus Torvalds /* 2636a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 2637a3f74ffbSDavid Chinner */ 2638475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 2639475ee413SChristoph Hellwig 0); 2640a3f74ffbSDavid Chinner if (error || !bp) { 2641a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2642a3f74ffbSDavid Chinner return error; 2643a3f74ffbSDavid Chinner } 2644a3f74ffbSDavid Chinner 2645a3f74ffbSDavid Chinner /* 26461da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 26471da177e4SLinus Torvalds */ 26481da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 2649bad55843SDavid Chinner if (error) 26501da177e4SLinus Torvalds goto corrupt_out; 26511da177e4SLinus Torvalds 26521da177e4SLinus Torvalds /* 2653a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 2654a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 2655a3f74ffbSDavid Chinner */ 2656811e64c7SChandra Seetharaman if (xfs_buf_ispinned(bp)) 2657a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 2658a3f74ffbSDavid Chinner 2659a3f74ffbSDavid Chinner /* 26601da177e4SLinus Torvalds * inode clustering: 26611da177e4SLinus Torvalds * see if other inodes can be gathered into this write 26621da177e4SLinus Torvalds */ 2663bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 2664bad55843SDavid Chinner if (error) 26651da177e4SLinus Torvalds goto cluster_corrupt_out; 26661da177e4SLinus Torvalds 26674c46819aSChristoph Hellwig *bpp = bp; 26684c46819aSChristoph Hellwig return 0; 26691da177e4SLinus Torvalds 26701da177e4SLinus Torvalds corrupt_out: 26711da177e4SLinus Torvalds xfs_buf_relse(bp); 26727d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 26731da177e4SLinus Torvalds cluster_corrupt_out: 267432ce90a4SChristoph Hellwig error = XFS_ERROR(EFSCORRUPTED); 267532ce90a4SChristoph Hellwig abort_out: 26761da177e4SLinus Torvalds /* 26771da177e4SLinus Torvalds * Unlocks the flush lock 26781da177e4SLinus Torvalds */ 267904913fddSDave Chinner xfs_iflush_abort(ip, false); 268032ce90a4SChristoph Hellwig return error; 26811da177e4SLinus Torvalds } 26821da177e4SLinus Torvalds 26831da177e4SLinus Torvalds 26841da177e4SLinus Torvalds STATIC int 26851da177e4SLinus Torvalds xfs_iflush_int( 26861da177e4SLinus Torvalds xfs_inode_t *ip, 26871da177e4SLinus Torvalds xfs_buf_t *bp) 26881da177e4SLinus Torvalds { 26891da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 26901da177e4SLinus Torvalds xfs_dinode_t *dip; 26911da177e4SLinus Torvalds xfs_mount_t *mp; 26921da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 26931da177e4SLinus Torvalds int first; 26941da177e4SLinus Torvalds #endif 26951da177e4SLinus Torvalds 2696579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2697474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 26981da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 26998096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 27001da177e4SLinus Torvalds 27011da177e4SLinus Torvalds iip = ip->i_itemp; 27021da177e4SLinus Torvalds mp = ip->i_mount; 27031da177e4SLinus Torvalds 27041da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 270592bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 27061da177e4SLinus Torvalds 270769ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 27081da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 27096a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27106a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 27116a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 27121da177e4SLinus Torvalds goto corrupt_out; 27131da177e4SLinus Torvalds } 27141da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 27151da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 27166a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27176a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 27186a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 27191da177e4SLinus Torvalds goto corrupt_out; 27201da177e4SLinus Torvalds } 2721abbede1bSAl Viro if (S_ISREG(ip->i_d.di_mode)) { 27221da177e4SLinus Torvalds if (XFS_TEST_ERROR( 27231da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 27241da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 27251da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 27266a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27276a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 27286a19d939SDave Chinner __func__, ip->i_ino, ip); 27291da177e4SLinus Torvalds goto corrupt_out; 27301da177e4SLinus Torvalds } 2731abbede1bSAl Viro } else if (S_ISDIR(ip->i_d.di_mode)) { 27321da177e4SLinus Torvalds if (XFS_TEST_ERROR( 27331da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 27341da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 27351da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 27361da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 27376a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27386a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 27396a19d939SDave Chinner __func__, ip->i_ino, ip); 27401da177e4SLinus Torvalds goto corrupt_out; 27411da177e4SLinus Torvalds } 27421da177e4SLinus Torvalds } 27431da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 27441da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 27451da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 27466a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27476a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 27486a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 27496a19d939SDave Chinner __func__, ip->i_ino, 27501da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 27516a19d939SDave Chinner ip->i_d.di_nblocks, ip); 27521da177e4SLinus Torvalds goto corrupt_out; 27531da177e4SLinus Torvalds } 27541da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 27551da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 27566a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27576a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 27586a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 27591da177e4SLinus Torvalds goto corrupt_out; 27601da177e4SLinus Torvalds } 27611da177e4SLinus Torvalds /* 27621da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 27631da177e4SLinus Torvalds * postdate a log record during recovery. 27641da177e4SLinus Torvalds */ 27651da177e4SLinus Torvalds 27661da177e4SLinus Torvalds ip->i_d.di_flushiter++; 27671da177e4SLinus Torvalds 27681da177e4SLinus Torvalds /* 27691da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 27701da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 27711da177e4SLinus Torvalds * because if the inode is dirty at all the core must 27721da177e4SLinus Torvalds * be. 27731da177e4SLinus Torvalds */ 277481591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 27751da177e4SLinus Torvalds 27761da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 27771da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 27781da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 27791da177e4SLinus Torvalds 27801da177e4SLinus Torvalds /* 27811da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 27821da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 27831da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 27841da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 27851da177e4SLinus Torvalds */ 278651ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 278751ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 278862118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 27891da177e4SLinus Torvalds /* 27901da177e4SLinus Torvalds * Convert it back. 27911da177e4SLinus Torvalds */ 27921da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 279381591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 27941da177e4SLinus Torvalds } else { 27951da177e4SLinus Torvalds /* 27961da177e4SLinus Torvalds * The superblock version has already been bumped, 27971da177e4SLinus Torvalds * so just make the conversion to the new inode 27981da177e4SLinus Torvalds * format permanent. 27991da177e4SLinus Torvalds */ 280051ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 280151ce16d5SChristoph Hellwig dip->di_version = 2; 28021da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 280381591fe2SChristoph Hellwig dip->di_onlink = 0; 28041da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 280581591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 280681591fe2SChristoph Hellwig sizeof(dip->di_pad)); 28076743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 28081da177e4SLinus Torvalds } 28091da177e4SLinus Torvalds } 28101da177e4SLinus Torvalds 2811e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 2812e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 2813e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 28141da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 28151da177e4SLinus Torvalds 28161da177e4SLinus Torvalds /* 2817f5d8d5c4SChristoph Hellwig * We've recorded everything logged in the inode, so we'd like to clear 2818f5d8d5c4SChristoph Hellwig * the ili_fields bits so we don't log and flush things unnecessarily. 2819f5d8d5c4SChristoph Hellwig * However, we can't stop logging all this information until the data 2820f5d8d5c4SChristoph Hellwig * we've copied into the disk buffer is written to disk. If we did we 2821f5d8d5c4SChristoph Hellwig * might overwrite the copy of the inode in the log with all the data 2822f5d8d5c4SChristoph Hellwig * after re-logging only part of it, and in the face of a crash we 2823f5d8d5c4SChristoph Hellwig * wouldn't have all the data we need to recover. 28241da177e4SLinus Torvalds * 2825f5d8d5c4SChristoph Hellwig * What we do is move the bits to the ili_last_fields field. When 2826f5d8d5c4SChristoph Hellwig * logging the inode, these bits are moved back to the ili_fields field. 2827f5d8d5c4SChristoph Hellwig * In the xfs_iflush_done() routine we clear ili_last_fields, since we 2828f5d8d5c4SChristoph Hellwig * know that the information those bits represent is permanently on 2829f5d8d5c4SChristoph Hellwig * disk. As long as the flush completes before the inode is logged 2830f5d8d5c4SChristoph Hellwig * again, then both ili_fields and ili_last_fields will be cleared. 28311da177e4SLinus Torvalds * 2832f5d8d5c4SChristoph Hellwig * We can play with the ili_fields bits here, because the inode lock 2833f5d8d5c4SChristoph Hellwig * must be held exclusively in order to set bits there and the flush 2834f5d8d5c4SChristoph Hellwig * lock protects the ili_last_fields bits. Set ili_logged so the flush 2835f5d8d5c4SChristoph Hellwig * done routine can tell whether or not to look in the AIL. Also, store 2836f5d8d5c4SChristoph Hellwig * the current LSN of the inode so that we can tell whether the item has 2837f5d8d5c4SChristoph Hellwig * moved in the AIL from xfs_iflush_done(). In order to read the lsn we 2838f5d8d5c4SChristoph Hellwig * need the AIL lock, because it is a 64 bit value that cannot be read 2839f5d8d5c4SChristoph Hellwig * atomically. 28401da177e4SLinus Torvalds */ 2841f5d8d5c4SChristoph Hellwig if (iip != NULL && iip->ili_fields != 0) { 2842f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 2843f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 28441da177e4SLinus Torvalds iip->ili_logged = 1; 28451da177e4SLinus Torvalds 28467b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 28477b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 28481da177e4SLinus Torvalds 28491da177e4SLinus Torvalds /* 28501da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 28511da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 28521da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 28531da177e4SLinus Torvalds * completely written to disk. 28541da177e4SLinus Torvalds */ 2855ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 28561da177e4SLinus Torvalds 2857adadbeefSChristoph Hellwig ASSERT(bp->b_fspriv != NULL); 2858cb669ca5SChristoph Hellwig ASSERT(bp->b_iodone != NULL); 28591da177e4SLinus Torvalds } else { 28601da177e4SLinus Torvalds /* 28611da177e4SLinus Torvalds * We're flushing an inode which is not in the AIL and has 28628a9c9980SChristoph Hellwig * not been logged. For this case we can immediately drop 28631da177e4SLinus Torvalds * the inode flush lock because we can avoid the whole 28641da177e4SLinus Torvalds * AIL state thing. It's OK to drop the flush lock now, 28651da177e4SLinus Torvalds * because we've already locked the buffer and to do anything 28661da177e4SLinus Torvalds * you really need both. 28671da177e4SLinus Torvalds */ 28681da177e4SLinus Torvalds if (iip != NULL) { 28691da177e4SLinus Torvalds ASSERT(iip->ili_logged == 0); 28701da177e4SLinus Torvalds ASSERT(iip->ili_last_fields == 0); 28711da177e4SLinus Torvalds ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 28721da177e4SLinus Torvalds } 28731da177e4SLinus Torvalds xfs_ifunlock(ip); 28741da177e4SLinus Torvalds } 28751da177e4SLinus Torvalds 28761da177e4SLinus Torvalds return 0; 28771da177e4SLinus Torvalds 28781da177e4SLinus Torvalds corrupt_out: 28791da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 28801da177e4SLinus Torvalds } 28811da177e4SLinus Torvalds 28824eea22f0SMandy Kirkconnell /* 28834eea22f0SMandy Kirkconnell * Return a pointer to the extent record at file index idx. 28844eea22f0SMandy Kirkconnell */ 2885a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * 28864eea22f0SMandy Kirkconnell xfs_iext_get_ext( 28874eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 28884eea22f0SMandy Kirkconnell xfs_extnum_t idx) /* index of target extent */ 28894eea22f0SMandy Kirkconnell { 28904eea22f0SMandy Kirkconnell ASSERT(idx >= 0); 289187bef181SChristoph Hellwig ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 289287bef181SChristoph Hellwig 28930293ce3aSMandy Kirkconnell if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 28940293ce3aSMandy Kirkconnell return ifp->if_u1.if_ext_irec->er_extbuf; 28950293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 28960293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* irec pointer */ 28970293ce3aSMandy Kirkconnell int erp_idx = 0; /* irec index */ 28980293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = idx; /* ext index in target list */ 28990293ce3aSMandy Kirkconnell 29000293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 29010293ce3aSMandy Kirkconnell return &erp->er_extbuf[page_idx]; 29020293ce3aSMandy Kirkconnell } else if (ifp->if_bytes) { 29034eea22f0SMandy Kirkconnell return &ifp->if_u1.if_extents[idx]; 29044eea22f0SMandy Kirkconnell } else { 29054eea22f0SMandy Kirkconnell return NULL; 29064eea22f0SMandy Kirkconnell } 29074eea22f0SMandy Kirkconnell } 29084eea22f0SMandy Kirkconnell 29094eea22f0SMandy Kirkconnell /* 29104eea22f0SMandy Kirkconnell * Insert new item(s) into the extent records for incore inode 29114eea22f0SMandy Kirkconnell * fork 'ifp'. 'count' new items are inserted at index 'idx'. 29124eea22f0SMandy Kirkconnell */ 29134eea22f0SMandy Kirkconnell void 29144eea22f0SMandy Kirkconnell xfs_iext_insert( 29156ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 29164eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* starting index of new items */ 29174eea22f0SMandy Kirkconnell xfs_extnum_t count, /* number of inserted items */ 29186ef35544SChristoph Hellwig xfs_bmbt_irec_t *new, /* items to insert */ 29196ef35544SChristoph Hellwig int state) /* type of extent conversion */ 29204eea22f0SMandy Kirkconnell { 29216ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 29224eea22f0SMandy Kirkconnell xfs_extnum_t i; /* extent record index */ 29234eea22f0SMandy Kirkconnell 29240b1b213fSChristoph Hellwig trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); 29250b1b213fSChristoph Hellwig 29264eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 29274eea22f0SMandy Kirkconnell xfs_iext_add(ifp, idx, count); 2928a6f64d4aSChristoph Hellwig for (i = idx; i < idx + count; i++, new++) 2929a6f64d4aSChristoph Hellwig xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 29304eea22f0SMandy Kirkconnell } 29314eea22f0SMandy Kirkconnell 29324eea22f0SMandy Kirkconnell /* 29334eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 29344eea22f0SMandy Kirkconnell * extents needs to be increased. The ext_diff parameter stores the 29354eea22f0SMandy Kirkconnell * number of new extents being added and the idx parameter contains 29364eea22f0SMandy Kirkconnell * the extent index where the new extents will be added. If the new 29374eea22f0SMandy Kirkconnell * extents are being appended, then we just need to (re)allocate and 29384eea22f0SMandy Kirkconnell * initialize the space. Otherwise, if the new extents are being 29394eea22f0SMandy Kirkconnell * inserted into the middle of the existing entries, a bit more work 29404eea22f0SMandy Kirkconnell * is required to make room for the new extents to be inserted. The 29414eea22f0SMandy Kirkconnell * caller is responsible for filling in the new extent entries upon 29424eea22f0SMandy Kirkconnell * return. 29434eea22f0SMandy Kirkconnell */ 29444eea22f0SMandy Kirkconnell void 29454eea22f0SMandy Kirkconnell xfs_iext_add( 29464eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 29474eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin adding exts */ 2948c41564b5SNathan Scott int ext_diff) /* number of extents to add */ 29494eea22f0SMandy Kirkconnell { 29504eea22f0SMandy Kirkconnell int byte_diff; /* new bytes being added */ 29514eea22f0SMandy Kirkconnell int new_size; /* size of extents after adding */ 29524eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 29534eea22f0SMandy Kirkconnell 29544eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 29554eea22f0SMandy Kirkconnell ASSERT((idx >= 0) && (idx <= nextents)); 29564eea22f0SMandy Kirkconnell byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 29574eea22f0SMandy Kirkconnell new_size = ifp->if_bytes + byte_diff; 29584eea22f0SMandy Kirkconnell /* 29594eea22f0SMandy Kirkconnell * If the new number of extents (nextents + ext_diff) 29604eea22f0SMandy Kirkconnell * fits inside the inode, then continue to use the inline 29614eea22f0SMandy Kirkconnell * extent buffer. 29624eea22f0SMandy Kirkconnell */ 29634eea22f0SMandy Kirkconnell if (nextents + ext_diff <= XFS_INLINE_EXTS) { 29644eea22f0SMandy Kirkconnell if (idx < nextents) { 29654eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 29664eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx], 29674eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 29684eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 29694eea22f0SMandy Kirkconnell } 29704eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 29714eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 29724eea22f0SMandy Kirkconnell } 29734eea22f0SMandy Kirkconnell /* 29744eea22f0SMandy Kirkconnell * Otherwise use a linear (direct) extent list. 29754eea22f0SMandy Kirkconnell * If the extents are currently inside the inode, 29764eea22f0SMandy Kirkconnell * xfs_iext_realloc_direct will switch us from 29774eea22f0SMandy Kirkconnell * inline to direct extent allocation mode. 29784eea22f0SMandy Kirkconnell */ 29790293ce3aSMandy Kirkconnell else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 29804eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 29814eea22f0SMandy Kirkconnell if (idx < nextents) { 29824eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx + ext_diff], 29834eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx], 29844eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 29854eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 29864eea22f0SMandy Kirkconnell } 29874eea22f0SMandy Kirkconnell } 29880293ce3aSMandy Kirkconnell /* Indirection array */ 29890293ce3aSMandy Kirkconnell else { 29900293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; 29910293ce3aSMandy Kirkconnell int erp_idx = 0; 29920293ce3aSMandy Kirkconnell int page_idx = idx; 29930293ce3aSMandy Kirkconnell 29940293ce3aSMandy Kirkconnell ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 29950293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 29960293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 29970293ce3aSMandy Kirkconnell } else { 29980293ce3aSMandy Kirkconnell xfs_iext_irec_init(ifp); 29990293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 30000293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 30010293ce3aSMandy Kirkconnell } 30020293ce3aSMandy Kirkconnell /* Extents fit in target extent page */ 30030293ce3aSMandy Kirkconnell if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 30040293ce3aSMandy Kirkconnell if (page_idx < erp->er_extcount) { 30050293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[page_idx + ext_diff], 30060293ce3aSMandy Kirkconnell &erp->er_extbuf[page_idx], 30070293ce3aSMandy Kirkconnell (erp->er_extcount - page_idx) * 30080293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 30090293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[page_idx], 0, byte_diff); 30100293ce3aSMandy Kirkconnell } 30110293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 30120293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 30130293ce3aSMandy Kirkconnell } 30140293ce3aSMandy Kirkconnell /* Insert a new extent page */ 30150293ce3aSMandy Kirkconnell else if (erp) { 30160293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(ifp, 30170293ce3aSMandy Kirkconnell erp_idx, page_idx, ext_diff); 30180293ce3aSMandy Kirkconnell } 30190293ce3aSMandy Kirkconnell /* 30200293ce3aSMandy Kirkconnell * If extent(s) are being appended to the last page in 30210293ce3aSMandy Kirkconnell * the indirection array and the new extent(s) don't fit 30220293ce3aSMandy Kirkconnell * in the page, then erp is NULL and erp_idx is set to 30230293ce3aSMandy Kirkconnell * the next index needed in the indirection array. 30240293ce3aSMandy Kirkconnell */ 30250293ce3aSMandy Kirkconnell else { 30260293ce3aSMandy Kirkconnell int count = ext_diff; 30270293ce3aSMandy Kirkconnell 30280293ce3aSMandy Kirkconnell while (count) { 30290293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 30300293ce3aSMandy Kirkconnell erp->er_extcount = count; 30310293ce3aSMandy Kirkconnell count -= MIN(count, (int)XFS_LINEAR_EXTS); 30320293ce3aSMandy Kirkconnell if (count) { 30330293ce3aSMandy Kirkconnell erp_idx++; 30340293ce3aSMandy Kirkconnell } 30350293ce3aSMandy Kirkconnell } 30360293ce3aSMandy Kirkconnell } 30370293ce3aSMandy Kirkconnell } 30384eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 30394eea22f0SMandy Kirkconnell } 30404eea22f0SMandy Kirkconnell 30414eea22f0SMandy Kirkconnell /* 30420293ce3aSMandy Kirkconnell * This is called when incore extents are being added to the indirection 30430293ce3aSMandy Kirkconnell * array and the new extents do not fit in the target extent list. The 30440293ce3aSMandy Kirkconnell * erp_idx parameter contains the irec index for the target extent list 30450293ce3aSMandy Kirkconnell * in the indirection array, and the idx parameter contains the extent 30460293ce3aSMandy Kirkconnell * index within the list. The number of extents being added is stored 30470293ce3aSMandy Kirkconnell * in the count parameter. 30480293ce3aSMandy Kirkconnell * 30490293ce3aSMandy Kirkconnell * |-------| |-------| 30500293ce3aSMandy Kirkconnell * | | | | idx - number of extents before idx 30510293ce3aSMandy Kirkconnell * | idx | | count | 30520293ce3aSMandy Kirkconnell * | | | | count - number of extents being inserted at idx 30530293ce3aSMandy Kirkconnell * |-------| |-------| 30540293ce3aSMandy Kirkconnell * | count | | nex2 | nex2 - number of extents after idx + count 30550293ce3aSMandy Kirkconnell * |-------| |-------| 30560293ce3aSMandy Kirkconnell */ 30570293ce3aSMandy Kirkconnell void 30580293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi( 30590293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 30600293ce3aSMandy Kirkconnell int erp_idx, /* target extent irec index */ 30610293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index within target list */ 30620293ce3aSMandy Kirkconnell int count) /* new extents being added */ 30630293ce3aSMandy Kirkconnell { 30640293ce3aSMandy Kirkconnell int byte_diff; /* new bytes being added */ 30650293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* pointer to irec entry */ 30660293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* number of extents to add */ 30670293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* new extents still needed */ 30680293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 30690293ce3aSMandy Kirkconnell xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 30700293ce3aSMandy Kirkconnell int nlists; /* number of irec's (lists) */ 30710293ce3aSMandy Kirkconnell 30720293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 30730293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 30740293ce3aSMandy Kirkconnell nex2 = erp->er_extcount - idx; 30750293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 30760293ce3aSMandy Kirkconnell 30770293ce3aSMandy Kirkconnell /* 30780293ce3aSMandy Kirkconnell * Save second part of target extent list 30790293ce3aSMandy Kirkconnell * (all extents past */ 30800293ce3aSMandy Kirkconnell if (nex2) { 30810293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 30826785073bSDavid Chinner nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 30830293ce3aSMandy Kirkconnell memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 30840293ce3aSMandy Kirkconnell erp->er_extcount -= nex2; 30850293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 30860293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[idx], 0, byte_diff); 30870293ce3aSMandy Kirkconnell } 30880293ce3aSMandy Kirkconnell 30890293ce3aSMandy Kirkconnell /* 30900293ce3aSMandy Kirkconnell * Add the new extents to the end of the target 30910293ce3aSMandy Kirkconnell * list, then allocate new irec record(s) and 30920293ce3aSMandy Kirkconnell * extent buffer(s) as needed to store the rest 30930293ce3aSMandy Kirkconnell * of the new extents. 30940293ce3aSMandy Kirkconnell */ 30950293ce3aSMandy Kirkconnell ext_cnt = count; 30960293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 30970293ce3aSMandy Kirkconnell if (ext_diff) { 30980293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 30990293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 31000293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 31010293ce3aSMandy Kirkconnell } 31020293ce3aSMandy Kirkconnell while (ext_cnt) { 31030293ce3aSMandy Kirkconnell erp_idx++; 31040293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 31050293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 31060293ce3aSMandy Kirkconnell erp->er_extcount = ext_diff; 31070293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 31080293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 31090293ce3aSMandy Kirkconnell } 31100293ce3aSMandy Kirkconnell 31110293ce3aSMandy Kirkconnell /* Add nex2 extents back to indirection array */ 31120293ce3aSMandy Kirkconnell if (nex2) { 31130293ce3aSMandy Kirkconnell xfs_extnum_t ext_avail; 31140293ce3aSMandy Kirkconnell int i; 31150293ce3aSMandy Kirkconnell 31160293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 31170293ce3aSMandy Kirkconnell ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 31180293ce3aSMandy Kirkconnell i = 0; 31190293ce3aSMandy Kirkconnell /* 31200293ce3aSMandy Kirkconnell * If nex2 extents fit in the current page, append 31210293ce3aSMandy Kirkconnell * nex2_ep after the new extents. 31220293ce3aSMandy Kirkconnell */ 31230293ce3aSMandy Kirkconnell if (nex2 <= ext_avail) { 31240293ce3aSMandy Kirkconnell i = erp->er_extcount; 31250293ce3aSMandy Kirkconnell } 31260293ce3aSMandy Kirkconnell /* 31270293ce3aSMandy Kirkconnell * Otherwise, check if space is available in the 31280293ce3aSMandy Kirkconnell * next page. 31290293ce3aSMandy Kirkconnell */ 31300293ce3aSMandy Kirkconnell else if ((erp_idx < nlists - 1) && 31310293ce3aSMandy Kirkconnell (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 31320293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 31330293ce3aSMandy Kirkconnell erp_idx++; 31340293ce3aSMandy Kirkconnell erp++; 31350293ce3aSMandy Kirkconnell /* Create a hole for nex2 extents */ 31360293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 31370293ce3aSMandy Kirkconnell erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 31380293ce3aSMandy Kirkconnell } 31390293ce3aSMandy Kirkconnell /* 31400293ce3aSMandy Kirkconnell * Final choice, create a new extent page for 31410293ce3aSMandy Kirkconnell * nex2 extents. 31420293ce3aSMandy Kirkconnell */ 31430293ce3aSMandy Kirkconnell else { 31440293ce3aSMandy Kirkconnell erp_idx++; 31450293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 31460293ce3aSMandy Kirkconnell } 31470293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3148f0e2d93cSDenys Vlasenko kmem_free(nex2_ep); 31490293ce3aSMandy Kirkconnell erp->er_extcount += nex2; 31500293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 31510293ce3aSMandy Kirkconnell } 31520293ce3aSMandy Kirkconnell } 31530293ce3aSMandy Kirkconnell 31540293ce3aSMandy Kirkconnell /* 31554eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 31564eea22f0SMandy Kirkconnell * extents needs to be decreased. The ext_diff parameter stores the 31574eea22f0SMandy Kirkconnell * number of extents to be removed and the idx parameter contains 31584eea22f0SMandy Kirkconnell * the extent index where the extents will be removed from. 31590293ce3aSMandy Kirkconnell * 31600293ce3aSMandy Kirkconnell * If the amount of space needed has decreased below the linear 31610293ce3aSMandy Kirkconnell * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 31620293ce3aSMandy Kirkconnell * extent array. Otherwise, use kmem_realloc() to adjust the 31630293ce3aSMandy Kirkconnell * size to what is needed. 31644eea22f0SMandy Kirkconnell */ 31654eea22f0SMandy Kirkconnell void 31664eea22f0SMandy Kirkconnell xfs_iext_remove( 31676ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 31684eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 31696ef35544SChristoph Hellwig int ext_diff, /* number of extents to remove */ 31706ef35544SChristoph Hellwig int state) /* type of extent conversion */ 31714eea22f0SMandy Kirkconnell { 31726ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 31734eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 31744eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 31754eea22f0SMandy Kirkconnell 31760b1b213fSChristoph Hellwig trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 31770b1b213fSChristoph Hellwig 31784eea22f0SMandy Kirkconnell ASSERT(ext_diff > 0); 31794eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 31804eea22f0SMandy Kirkconnell new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 31814eea22f0SMandy Kirkconnell 31824eea22f0SMandy Kirkconnell if (new_size == 0) { 31834eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 31840293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 31850293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(ifp, idx, ext_diff); 31864eea22f0SMandy Kirkconnell } else if (ifp->if_real_bytes) { 31874eea22f0SMandy Kirkconnell xfs_iext_remove_direct(ifp, idx, ext_diff); 31884eea22f0SMandy Kirkconnell } else { 31894eea22f0SMandy Kirkconnell xfs_iext_remove_inline(ifp, idx, ext_diff); 31904eea22f0SMandy Kirkconnell } 31914eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 31924eea22f0SMandy Kirkconnell } 31934eea22f0SMandy Kirkconnell 31944eea22f0SMandy Kirkconnell /* 31954eea22f0SMandy Kirkconnell * This removes ext_diff extents from the inline buffer, beginning 31964eea22f0SMandy Kirkconnell * at extent index idx. 31974eea22f0SMandy Kirkconnell */ 31984eea22f0SMandy Kirkconnell void 31994eea22f0SMandy Kirkconnell xfs_iext_remove_inline( 32004eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32014eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 32024eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 32034eea22f0SMandy Kirkconnell { 32044eea22f0SMandy Kirkconnell int nextents; /* number of extents in file */ 32054eea22f0SMandy Kirkconnell 32060293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 32074eea22f0SMandy Kirkconnell ASSERT(idx < XFS_INLINE_EXTS); 32084eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 32094eea22f0SMandy Kirkconnell ASSERT(((nextents - ext_diff) > 0) && 32104eea22f0SMandy Kirkconnell (nextents - ext_diff) < XFS_INLINE_EXTS); 32114eea22f0SMandy Kirkconnell 32124eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 32134eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx], 32144eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx + ext_diff], 32154eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 32164eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 32174eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 32184eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 32194eea22f0SMandy Kirkconnell } else { 32204eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, 32214eea22f0SMandy Kirkconnell ext_diff * sizeof(xfs_bmbt_rec_t)); 32224eea22f0SMandy Kirkconnell } 32234eea22f0SMandy Kirkconnell } 32244eea22f0SMandy Kirkconnell 32254eea22f0SMandy Kirkconnell /* 32264eea22f0SMandy Kirkconnell * This removes ext_diff extents from a linear (direct) extent list, 32274eea22f0SMandy Kirkconnell * beginning at extent index idx. If the extents are being removed 32284eea22f0SMandy Kirkconnell * from the end of the list (ie. truncate) then we just need to re- 32294eea22f0SMandy Kirkconnell * allocate the list to remove the extra space. Otherwise, if the 32304eea22f0SMandy Kirkconnell * extents are being removed from the middle of the existing extent 32314eea22f0SMandy Kirkconnell * entries, then we first need to move the extent records beginning 32324eea22f0SMandy Kirkconnell * at idx + ext_diff up in the list to overwrite the records being 32334eea22f0SMandy Kirkconnell * removed, then remove the extra space via kmem_realloc. 32344eea22f0SMandy Kirkconnell */ 32354eea22f0SMandy Kirkconnell void 32364eea22f0SMandy Kirkconnell xfs_iext_remove_direct( 32374eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32384eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 32394eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 32404eea22f0SMandy Kirkconnell { 32414eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 32424eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 32434eea22f0SMandy Kirkconnell 32440293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 32454eea22f0SMandy Kirkconnell new_size = ifp->if_bytes - 32464eea22f0SMandy Kirkconnell (ext_diff * sizeof(xfs_bmbt_rec_t)); 32474eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 32484eea22f0SMandy Kirkconnell 32494eea22f0SMandy Kirkconnell if (new_size == 0) { 32504eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 32514eea22f0SMandy Kirkconnell return; 32524eea22f0SMandy Kirkconnell } 32534eea22f0SMandy Kirkconnell /* Move extents up in the list (if needed) */ 32544eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 32554eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx], 32564eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx + ext_diff], 32574eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 32584eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 32594eea22f0SMandy Kirkconnell } 32604eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[nextents - ext_diff], 32614eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 32624eea22f0SMandy Kirkconnell /* 32634eea22f0SMandy Kirkconnell * Reallocate the direct extent list. If the extents 32644eea22f0SMandy Kirkconnell * will fit inside the inode then xfs_iext_realloc_direct 32654eea22f0SMandy Kirkconnell * will switch from direct to inline extent allocation 32664eea22f0SMandy Kirkconnell * mode for us. 32674eea22f0SMandy Kirkconnell */ 32684eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 32694eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 32704eea22f0SMandy Kirkconnell } 32714eea22f0SMandy Kirkconnell 32724eea22f0SMandy Kirkconnell /* 32730293ce3aSMandy Kirkconnell * This is called when incore extents are being removed from the 32740293ce3aSMandy Kirkconnell * indirection array and the extents being removed span multiple extent 32750293ce3aSMandy Kirkconnell * buffers. The idx parameter contains the file extent index where we 32760293ce3aSMandy Kirkconnell * want to begin removing extents, and the count parameter contains 32770293ce3aSMandy Kirkconnell * how many extents need to be removed. 32780293ce3aSMandy Kirkconnell * 32790293ce3aSMandy Kirkconnell * |-------| |-------| 32800293ce3aSMandy Kirkconnell * | nex1 | | | nex1 - number of extents before idx 32810293ce3aSMandy Kirkconnell * |-------| | count | 32820293ce3aSMandy Kirkconnell * | | | | count - number of extents being removed at idx 32830293ce3aSMandy Kirkconnell * | count | |-------| 32840293ce3aSMandy Kirkconnell * | | | nex2 | nex2 - number of extents after idx + count 32850293ce3aSMandy Kirkconnell * |-------| |-------| 32860293ce3aSMandy Kirkconnell */ 32870293ce3aSMandy Kirkconnell void 32880293ce3aSMandy Kirkconnell xfs_iext_remove_indirect( 32890293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32900293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing extents */ 32910293ce3aSMandy Kirkconnell int count) /* number of extents to remove */ 32920293ce3aSMandy Kirkconnell { 32930293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 32940293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 32950293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* extents left to remove */ 32960293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* extents to remove in current list */ 32970293ce3aSMandy Kirkconnell xfs_extnum_t nex1; /* number of extents before idx */ 32980293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 32990293ce3aSMandy Kirkconnell int page_idx = idx; /* index in target extent list */ 33000293ce3aSMandy Kirkconnell 33010293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 33020293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 33030293ce3aSMandy Kirkconnell ASSERT(erp != NULL); 33040293ce3aSMandy Kirkconnell nex1 = page_idx; 33050293ce3aSMandy Kirkconnell ext_cnt = count; 33060293ce3aSMandy Kirkconnell while (ext_cnt) { 33070293ce3aSMandy Kirkconnell nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 33080293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 33090293ce3aSMandy Kirkconnell /* 33100293ce3aSMandy Kirkconnell * Check for deletion of entire list; 33110293ce3aSMandy Kirkconnell * xfs_iext_irec_remove() updates extent offsets. 33120293ce3aSMandy Kirkconnell */ 33130293ce3aSMandy Kirkconnell if (ext_diff == erp->er_extcount) { 33140293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 33150293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 33160293ce3aSMandy Kirkconnell nex1 = 0; 33170293ce3aSMandy Kirkconnell if (ext_cnt) { 33180293ce3aSMandy Kirkconnell ASSERT(erp_idx < ifp->if_real_bytes / 33190293ce3aSMandy Kirkconnell XFS_IEXT_BUFSZ); 33200293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 33210293ce3aSMandy Kirkconnell nex1 = 0; 33220293ce3aSMandy Kirkconnell continue; 33230293ce3aSMandy Kirkconnell } else { 33240293ce3aSMandy Kirkconnell break; 33250293ce3aSMandy Kirkconnell } 33260293ce3aSMandy Kirkconnell } 33270293ce3aSMandy Kirkconnell /* Move extents up (if needed) */ 33280293ce3aSMandy Kirkconnell if (nex2) { 33290293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex1], 33300293ce3aSMandy Kirkconnell &erp->er_extbuf[nex1 + ext_diff], 33310293ce3aSMandy Kirkconnell nex2 * sizeof(xfs_bmbt_rec_t)); 33320293ce3aSMandy Kirkconnell } 33330293ce3aSMandy Kirkconnell /* Zero out rest of page */ 33340293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 33350293ce3aSMandy Kirkconnell ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 33360293ce3aSMandy Kirkconnell /* Update remaining counters */ 33370293ce3aSMandy Kirkconnell erp->er_extcount -= ext_diff; 33380293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 33390293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 33400293ce3aSMandy Kirkconnell nex1 = 0; 33410293ce3aSMandy Kirkconnell erp_idx++; 33420293ce3aSMandy Kirkconnell erp++; 33430293ce3aSMandy Kirkconnell } 33440293ce3aSMandy Kirkconnell ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 33450293ce3aSMandy Kirkconnell xfs_iext_irec_compact(ifp); 33460293ce3aSMandy Kirkconnell } 33470293ce3aSMandy Kirkconnell 33480293ce3aSMandy Kirkconnell /* 33494eea22f0SMandy Kirkconnell * Create, destroy, or resize a linear (direct) block of extents. 33504eea22f0SMandy Kirkconnell */ 33514eea22f0SMandy Kirkconnell void 33524eea22f0SMandy Kirkconnell xfs_iext_realloc_direct( 33534eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33544eea22f0SMandy Kirkconnell int new_size) /* new size of extents */ 33554eea22f0SMandy Kirkconnell { 33564eea22f0SMandy Kirkconnell int rnew_size; /* real new size of extents */ 33574eea22f0SMandy Kirkconnell 33584eea22f0SMandy Kirkconnell rnew_size = new_size; 33594eea22f0SMandy Kirkconnell 33600293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 33610293ce3aSMandy Kirkconnell ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 33620293ce3aSMandy Kirkconnell (new_size != ifp->if_real_bytes))); 33630293ce3aSMandy Kirkconnell 33644eea22f0SMandy Kirkconnell /* Free extent records */ 33654eea22f0SMandy Kirkconnell if (new_size == 0) { 33664eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 33674eea22f0SMandy Kirkconnell } 33684eea22f0SMandy Kirkconnell /* Resize direct extent list and zero any new bytes */ 33694eea22f0SMandy Kirkconnell else if (ifp->if_real_bytes) { 33704eea22f0SMandy Kirkconnell /* Check if extents will fit inside the inode */ 33714eea22f0SMandy Kirkconnell if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 33724eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(ifp, new_size / 33734eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)); 33744eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 33754eea22f0SMandy Kirkconnell return; 33764eea22f0SMandy Kirkconnell } 337716a087d8SVignesh Babu if (!is_power_of_2(new_size)){ 337840ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 33794eea22f0SMandy Kirkconnell } 33804eea22f0SMandy Kirkconnell if (rnew_size != ifp->if_real_bytes) { 3381a6f64d4aSChristoph Hellwig ifp->if_u1.if_extents = 33824eea22f0SMandy Kirkconnell kmem_realloc(ifp->if_u1.if_extents, 33834eea22f0SMandy Kirkconnell rnew_size, 33846785073bSDavid Chinner ifp->if_real_bytes, KM_NOFS); 33854eea22f0SMandy Kirkconnell } 33864eea22f0SMandy Kirkconnell if (rnew_size > ifp->if_real_bytes) { 33874eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[ifp->if_bytes / 33884eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)], 0, 33894eea22f0SMandy Kirkconnell rnew_size - ifp->if_real_bytes); 33904eea22f0SMandy Kirkconnell } 33914eea22f0SMandy Kirkconnell } 33924eea22f0SMandy Kirkconnell /* 33934eea22f0SMandy Kirkconnell * Switch from the inline extent buffer to a direct 33944eea22f0SMandy Kirkconnell * extent list. Be sure to include the inline extent 33954eea22f0SMandy Kirkconnell * bytes in new_size. 33964eea22f0SMandy Kirkconnell */ 33974eea22f0SMandy Kirkconnell else { 33984eea22f0SMandy Kirkconnell new_size += ifp->if_bytes; 339916a087d8SVignesh Babu if (!is_power_of_2(new_size)) { 340040ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 34014eea22f0SMandy Kirkconnell } 34024eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(ifp, rnew_size); 34034eea22f0SMandy Kirkconnell } 34044eea22f0SMandy Kirkconnell ifp->if_real_bytes = rnew_size; 34054eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 34064eea22f0SMandy Kirkconnell } 34074eea22f0SMandy Kirkconnell 34084eea22f0SMandy Kirkconnell /* 34094eea22f0SMandy Kirkconnell * Switch from linear (direct) extent records to inline buffer. 34104eea22f0SMandy Kirkconnell */ 34114eea22f0SMandy Kirkconnell void 34124eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline( 34134eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34144eea22f0SMandy Kirkconnell xfs_extnum_t nextents) /* number of extents in file */ 34154eea22f0SMandy Kirkconnell { 34164eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 34174eea22f0SMandy Kirkconnell ASSERT(nextents <= XFS_INLINE_EXTS); 34184eea22f0SMandy Kirkconnell /* 34194eea22f0SMandy Kirkconnell * The inline buffer was zeroed when we switched 34204eea22f0SMandy Kirkconnell * from inline to direct extent allocation mode, 34214eea22f0SMandy Kirkconnell * so we don't need to clear it here. 34224eea22f0SMandy Kirkconnell */ 34234eea22f0SMandy Kirkconnell memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 34244eea22f0SMandy Kirkconnell nextents * sizeof(xfs_bmbt_rec_t)); 3425f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 34264eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 34274eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 34284eea22f0SMandy Kirkconnell } 34294eea22f0SMandy Kirkconnell 34304eea22f0SMandy Kirkconnell /* 34314eea22f0SMandy Kirkconnell * Switch from inline buffer to linear (direct) extent records. 34324eea22f0SMandy Kirkconnell * new_size should already be rounded up to the next power of 2 34334eea22f0SMandy Kirkconnell * by the caller (when appropriate), so use new_size as it is. 34344eea22f0SMandy Kirkconnell * However, since new_size may be rounded up, we can't update 34354eea22f0SMandy Kirkconnell * if_bytes here. It is the caller's responsibility to update 34364eea22f0SMandy Kirkconnell * if_bytes upon return. 34374eea22f0SMandy Kirkconnell */ 34384eea22f0SMandy Kirkconnell void 34394eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct( 34404eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34414eea22f0SMandy Kirkconnell int new_size) /* number of extents in file */ 34424eea22f0SMandy Kirkconnell { 34436785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 34444eea22f0SMandy Kirkconnell memset(ifp->if_u1.if_extents, 0, new_size); 34454eea22f0SMandy Kirkconnell if (ifp->if_bytes) { 34464eea22f0SMandy Kirkconnell memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 34474eea22f0SMandy Kirkconnell ifp->if_bytes); 34484eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 34494eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 34504eea22f0SMandy Kirkconnell } 34514eea22f0SMandy Kirkconnell ifp->if_real_bytes = new_size; 34524eea22f0SMandy Kirkconnell } 34534eea22f0SMandy Kirkconnell 34544eea22f0SMandy Kirkconnell /* 34550293ce3aSMandy Kirkconnell * Resize an extent indirection array to new_size bytes. 34560293ce3aSMandy Kirkconnell */ 3457d96f8f89SEric Sandeen STATIC void 34580293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect( 34590293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34600293ce3aSMandy Kirkconnell int new_size) /* new indirection array size */ 34610293ce3aSMandy Kirkconnell { 34620293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 34630293ce3aSMandy Kirkconnell int size; /* current indirection array size */ 34640293ce3aSMandy Kirkconnell 34650293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34660293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 34670293ce3aSMandy Kirkconnell size = nlists * sizeof(xfs_ext_irec_t); 34680293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes); 34690293ce3aSMandy Kirkconnell ASSERT((new_size >= 0) && (new_size != size)); 34700293ce3aSMandy Kirkconnell if (new_size == 0) { 34710293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 34720293ce3aSMandy Kirkconnell } else { 34730293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 34740293ce3aSMandy Kirkconnell kmem_realloc(ifp->if_u1.if_ext_irec, 34756785073bSDavid Chinner new_size, size, KM_NOFS); 34760293ce3aSMandy Kirkconnell } 34770293ce3aSMandy Kirkconnell } 34780293ce3aSMandy Kirkconnell 34790293ce3aSMandy Kirkconnell /* 34800293ce3aSMandy Kirkconnell * Switch from indirection array to linear (direct) extent allocations. 34810293ce3aSMandy Kirkconnell */ 3482d96f8f89SEric Sandeen STATIC void 34830293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct( 34840293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 34850293ce3aSMandy Kirkconnell { 3486a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 34870293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 34880293ce3aSMandy Kirkconnell int size; /* size of file extents */ 34890293ce3aSMandy Kirkconnell 34900293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34910293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 34920293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 34930293ce3aSMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 34940293ce3aSMandy Kirkconnell 349571a8c87fSLachlan McIlroy xfs_iext_irec_compact_pages(ifp); 34960293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 34970293ce3aSMandy Kirkconnell 34980293ce3aSMandy Kirkconnell ep = ifp->if_u1.if_ext_irec->er_extbuf; 3499f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 35000293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 35010293ce3aSMandy Kirkconnell ifp->if_u1.if_extents = ep; 35020293ce3aSMandy Kirkconnell ifp->if_bytes = size; 35030293ce3aSMandy Kirkconnell if (nextents < XFS_LINEAR_EXTS) { 35040293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, size); 35050293ce3aSMandy Kirkconnell } 35060293ce3aSMandy Kirkconnell } 35070293ce3aSMandy Kirkconnell 35080293ce3aSMandy Kirkconnell /* 35094eea22f0SMandy Kirkconnell * Free incore file extents. 35104eea22f0SMandy Kirkconnell */ 35114eea22f0SMandy Kirkconnell void 35124eea22f0SMandy Kirkconnell xfs_iext_destroy( 35134eea22f0SMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 35144eea22f0SMandy Kirkconnell { 35150293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35160293ce3aSMandy Kirkconnell int erp_idx; 35170293ce3aSMandy Kirkconnell int nlists; 35180293ce3aSMandy Kirkconnell 35190293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 35200293ce3aSMandy Kirkconnell for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 35210293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 35220293ce3aSMandy Kirkconnell } 35230293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 35240293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes) { 3525f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 35264eea22f0SMandy Kirkconnell } else if (ifp->if_bytes) { 35274eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 35284eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 35294eea22f0SMandy Kirkconnell } 35304eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = NULL; 35314eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 35324eea22f0SMandy Kirkconnell ifp->if_bytes = 0; 35334eea22f0SMandy Kirkconnell } 35340293ce3aSMandy Kirkconnell 35350293ce3aSMandy Kirkconnell /* 35368867bc9bSMandy Kirkconnell * Return a pointer to the extent record for file system block bno. 35378867bc9bSMandy Kirkconnell */ 3538a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * /* pointer to found extent record */ 35398867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext( 35408867bc9bSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35418867bc9bSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 35428867bc9bSMandy Kirkconnell xfs_extnum_t *idxp) /* index of target extent */ 35438867bc9bSMandy Kirkconnell { 3544a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 35458867bc9bSMandy Kirkconnell xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 3546a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 35478867bc9bSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 3548c41564b5SNathan Scott int high; /* upper boundary in search */ 35498867bc9bSMandy Kirkconnell xfs_extnum_t idx = 0; /* index of target extent */ 3550c41564b5SNathan Scott int low; /* lower boundary in search */ 35518867bc9bSMandy Kirkconnell xfs_extnum_t nextents; /* number of file extents */ 35528867bc9bSMandy Kirkconnell xfs_fileoff_t startoff = 0; /* start offset of extent */ 35538867bc9bSMandy Kirkconnell 35548867bc9bSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 35558867bc9bSMandy Kirkconnell if (nextents == 0) { 35568867bc9bSMandy Kirkconnell *idxp = 0; 35578867bc9bSMandy Kirkconnell return NULL; 35588867bc9bSMandy Kirkconnell } 35598867bc9bSMandy Kirkconnell low = 0; 35608867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35618867bc9bSMandy Kirkconnell /* Find target extent list */ 35628867bc9bSMandy Kirkconnell int erp_idx = 0; 35638867bc9bSMandy Kirkconnell erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 35648867bc9bSMandy Kirkconnell base = erp->er_extbuf; 35658867bc9bSMandy Kirkconnell high = erp->er_extcount - 1; 35668867bc9bSMandy Kirkconnell } else { 35678867bc9bSMandy Kirkconnell base = ifp->if_u1.if_extents; 35688867bc9bSMandy Kirkconnell high = nextents - 1; 35698867bc9bSMandy Kirkconnell } 35708867bc9bSMandy Kirkconnell /* Binary search extent records */ 35718867bc9bSMandy Kirkconnell while (low <= high) { 35728867bc9bSMandy Kirkconnell idx = (low + high) >> 1; 35738867bc9bSMandy Kirkconnell ep = base + idx; 35748867bc9bSMandy Kirkconnell startoff = xfs_bmbt_get_startoff(ep); 35758867bc9bSMandy Kirkconnell blockcount = xfs_bmbt_get_blockcount(ep); 35768867bc9bSMandy Kirkconnell if (bno < startoff) { 35778867bc9bSMandy Kirkconnell high = idx - 1; 35788867bc9bSMandy Kirkconnell } else if (bno >= startoff + blockcount) { 35798867bc9bSMandy Kirkconnell low = idx + 1; 35808867bc9bSMandy Kirkconnell } else { 35818867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 35828867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35838867bc9bSMandy Kirkconnell idx += erp->er_extoff; 35848867bc9bSMandy Kirkconnell } 35858867bc9bSMandy Kirkconnell *idxp = idx; 35868867bc9bSMandy Kirkconnell return ep; 35878867bc9bSMandy Kirkconnell } 35888867bc9bSMandy Kirkconnell } 35898867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 35908867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35918867bc9bSMandy Kirkconnell idx += erp->er_extoff; 35928867bc9bSMandy Kirkconnell } 35938867bc9bSMandy Kirkconnell if (bno >= startoff + blockcount) { 35948867bc9bSMandy Kirkconnell if (++idx == nextents) { 35958867bc9bSMandy Kirkconnell ep = NULL; 35968867bc9bSMandy Kirkconnell } else { 35978867bc9bSMandy Kirkconnell ep = xfs_iext_get_ext(ifp, idx); 35988867bc9bSMandy Kirkconnell } 35998867bc9bSMandy Kirkconnell } 36008867bc9bSMandy Kirkconnell *idxp = idx; 36018867bc9bSMandy Kirkconnell return ep; 36028867bc9bSMandy Kirkconnell } 36038867bc9bSMandy Kirkconnell 36048867bc9bSMandy Kirkconnell /* 36050293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 36060293ce3aSMandy Kirkconnell * extent record for filesystem block bno. Store the index of the 36070293ce3aSMandy Kirkconnell * target irec in *erp_idxp. 36080293ce3aSMandy Kirkconnell */ 36098867bc9bSMandy Kirkconnell xfs_ext_irec_t * /* pointer to found extent record */ 36100293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec( 36110293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36120293ce3aSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 36130293ce3aSMandy Kirkconnell int *erp_idxp) /* irec index of target ext list */ 36140293ce3aSMandy Kirkconnell { 36150293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 36160293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp_next; /* next indirection array entry */ 36178867bc9bSMandy Kirkconnell int erp_idx; /* indirection array index */ 36180293ce3aSMandy Kirkconnell int nlists; /* number of extent irec's (lists) */ 36190293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 36200293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 36210293ce3aSMandy Kirkconnell 36220293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 36230293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36240293ce3aSMandy Kirkconnell erp_idx = 0; 36250293ce3aSMandy Kirkconnell low = 0; 36260293ce3aSMandy Kirkconnell high = nlists - 1; 36270293ce3aSMandy Kirkconnell while (low <= high) { 36280293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 36290293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 36300293ce3aSMandy Kirkconnell erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 36310293ce3aSMandy Kirkconnell if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 36320293ce3aSMandy Kirkconnell high = erp_idx - 1; 36330293ce3aSMandy Kirkconnell } else if (erp_next && bno >= 36340293ce3aSMandy Kirkconnell xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 36350293ce3aSMandy Kirkconnell low = erp_idx + 1; 36360293ce3aSMandy Kirkconnell } else { 36370293ce3aSMandy Kirkconnell break; 36380293ce3aSMandy Kirkconnell } 36390293ce3aSMandy Kirkconnell } 36400293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 36410293ce3aSMandy Kirkconnell return erp; 36420293ce3aSMandy Kirkconnell } 36430293ce3aSMandy Kirkconnell 36440293ce3aSMandy Kirkconnell /* 36450293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 36460293ce3aSMandy Kirkconnell * extent record at file extent index *idxp. Store the index of the 36470293ce3aSMandy Kirkconnell * target irec in *erp_idxp and store the page index of the target 36480293ce3aSMandy Kirkconnell * extent record in *idxp. 36490293ce3aSMandy Kirkconnell */ 36500293ce3aSMandy Kirkconnell xfs_ext_irec_t * 36510293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec( 36520293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36530293ce3aSMandy Kirkconnell xfs_extnum_t *idxp, /* extent index (file -> page) */ 36540293ce3aSMandy Kirkconnell int *erp_idxp, /* pointer to target irec */ 36550293ce3aSMandy Kirkconnell int realloc) /* new bytes were just added */ 36560293ce3aSMandy Kirkconnell { 36570293ce3aSMandy Kirkconnell xfs_ext_irec_t *prev; /* pointer to previous irec */ 36580293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 36590293ce3aSMandy Kirkconnell int erp_idx; /* indirection array index */ 36600293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 36610293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 36620293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 36630293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 36640293ce3aSMandy Kirkconnell 36650293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 366687bef181SChristoph Hellwig ASSERT(page_idx >= 0); 366787bef181SChristoph Hellwig ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 366887bef181SChristoph Hellwig ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); 366987bef181SChristoph Hellwig 36700293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36710293ce3aSMandy Kirkconnell erp_idx = 0; 36720293ce3aSMandy Kirkconnell low = 0; 36730293ce3aSMandy Kirkconnell high = nlists - 1; 36740293ce3aSMandy Kirkconnell 36750293ce3aSMandy Kirkconnell /* Binary search extent irec's */ 36760293ce3aSMandy Kirkconnell while (low <= high) { 36770293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 36780293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 36790293ce3aSMandy Kirkconnell prev = erp_idx > 0 ? erp - 1 : NULL; 36800293ce3aSMandy Kirkconnell if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 36810293ce3aSMandy Kirkconnell realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 36820293ce3aSMandy Kirkconnell high = erp_idx - 1; 36830293ce3aSMandy Kirkconnell } else if (page_idx > erp->er_extoff + erp->er_extcount || 36840293ce3aSMandy Kirkconnell (page_idx == erp->er_extoff + erp->er_extcount && 36850293ce3aSMandy Kirkconnell !realloc)) { 36860293ce3aSMandy Kirkconnell low = erp_idx + 1; 36870293ce3aSMandy Kirkconnell } else if (page_idx == erp->er_extoff + erp->er_extcount && 36880293ce3aSMandy Kirkconnell erp->er_extcount == XFS_LINEAR_EXTS) { 36890293ce3aSMandy Kirkconnell ASSERT(realloc); 36900293ce3aSMandy Kirkconnell page_idx = 0; 36910293ce3aSMandy Kirkconnell erp_idx++; 36920293ce3aSMandy Kirkconnell erp = erp_idx < nlists ? erp + 1 : NULL; 36930293ce3aSMandy Kirkconnell break; 36940293ce3aSMandy Kirkconnell } else { 36950293ce3aSMandy Kirkconnell page_idx -= erp->er_extoff; 36960293ce3aSMandy Kirkconnell break; 36970293ce3aSMandy Kirkconnell } 36980293ce3aSMandy Kirkconnell } 36990293ce3aSMandy Kirkconnell *idxp = page_idx; 37000293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 37010293ce3aSMandy Kirkconnell return(erp); 37020293ce3aSMandy Kirkconnell } 37030293ce3aSMandy Kirkconnell 37040293ce3aSMandy Kirkconnell /* 37050293ce3aSMandy Kirkconnell * Allocate and initialize an indirection array once the space needed 37060293ce3aSMandy Kirkconnell * for incore extents increases above XFS_IEXT_BUFSZ. 37070293ce3aSMandy Kirkconnell */ 37080293ce3aSMandy Kirkconnell void 37090293ce3aSMandy Kirkconnell xfs_iext_irec_init( 37100293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 37110293ce3aSMandy Kirkconnell { 37120293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 37130293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 37140293ce3aSMandy Kirkconnell 37150293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 37160293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 37170293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 37180293ce3aSMandy Kirkconnell 37196785073bSDavid Chinner erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 37200293ce3aSMandy Kirkconnell 37210293ce3aSMandy Kirkconnell if (nextents == 0) { 37226785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 37230293ce3aSMandy Kirkconnell } else if (!ifp->if_real_bytes) { 37240293ce3aSMandy Kirkconnell xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 37250293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 37260293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 37270293ce3aSMandy Kirkconnell } 37280293ce3aSMandy Kirkconnell erp->er_extbuf = ifp->if_u1.if_extents; 37290293ce3aSMandy Kirkconnell erp->er_extcount = nextents; 37300293ce3aSMandy Kirkconnell erp->er_extoff = 0; 37310293ce3aSMandy Kirkconnell 37320293ce3aSMandy Kirkconnell ifp->if_flags |= XFS_IFEXTIREC; 37330293ce3aSMandy Kirkconnell ifp->if_real_bytes = XFS_IEXT_BUFSZ; 37340293ce3aSMandy Kirkconnell ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 37350293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = erp; 37360293ce3aSMandy Kirkconnell 37370293ce3aSMandy Kirkconnell return; 37380293ce3aSMandy Kirkconnell } 37390293ce3aSMandy Kirkconnell 37400293ce3aSMandy Kirkconnell /* 37410293ce3aSMandy Kirkconnell * Allocate and initialize a new entry in the indirection array. 37420293ce3aSMandy Kirkconnell */ 37430293ce3aSMandy Kirkconnell xfs_ext_irec_t * 37440293ce3aSMandy Kirkconnell xfs_iext_irec_new( 37450293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37460293ce3aSMandy Kirkconnell int erp_idx) /* index for new irec */ 37470293ce3aSMandy Kirkconnell { 37480293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 37490293ce3aSMandy Kirkconnell int i; /* loop counter */ 37500293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37510293ce3aSMandy Kirkconnell 37520293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37530293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37540293ce3aSMandy Kirkconnell 37550293ce3aSMandy Kirkconnell /* Resize indirection array */ 37560293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, ++nlists * 37570293ce3aSMandy Kirkconnell sizeof(xfs_ext_irec_t)); 37580293ce3aSMandy Kirkconnell /* 37590293ce3aSMandy Kirkconnell * Move records down in the array so the 37600293ce3aSMandy Kirkconnell * new page can use erp_idx. 37610293ce3aSMandy Kirkconnell */ 37620293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 37630293ce3aSMandy Kirkconnell for (i = nlists - 1; i > erp_idx; i--) { 37640293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 37650293ce3aSMandy Kirkconnell } 37660293ce3aSMandy Kirkconnell ASSERT(i == erp_idx); 37670293ce3aSMandy Kirkconnell 37680293ce3aSMandy Kirkconnell /* Initialize new extent record */ 37690293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 37706785073bSDavid Chinner erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 37710293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 37720293ce3aSMandy Kirkconnell memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 37730293ce3aSMandy Kirkconnell erp[erp_idx].er_extcount = 0; 37740293ce3aSMandy Kirkconnell erp[erp_idx].er_extoff = erp_idx > 0 ? 37750293ce3aSMandy Kirkconnell erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 37760293ce3aSMandy Kirkconnell return (&erp[erp_idx]); 37770293ce3aSMandy Kirkconnell } 37780293ce3aSMandy Kirkconnell 37790293ce3aSMandy Kirkconnell /* 37800293ce3aSMandy Kirkconnell * Remove a record from the indirection array. 37810293ce3aSMandy Kirkconnell */ 37820293ce3aSMandy Kirkconnell void 37830293ce3aSMandy Kirkconnell xfs_iext_irec_remove( 37840293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37850293ce3aSMandy Kirkconnell int erp_idx) /* irec index to remove */ 37860293ce3aSMandy Kirkconnell { 37870293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 37880293ce3aSMandy Kirkconnell int i; /* loop counter */ 37890293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37900293ce3aSMandy Kirkconnell 37910293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37920293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37930293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 37940293ce3aSMandy Kirkconnell if (erp->er_extbuf) { 37950293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 37960293ce3aSMandy Kirkconnell -erp->er_extcount); 3797f0e2d93cSDenys Vlasenko kmem_free(erp->er_extbuf); 37980293ce3aSMandy Kirkconnell } 37990293ce3aSMandy Kirkconnell /* Compact extent records */ 38000293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 38010293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists - 1; i++) { 38020293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 38030293ce3aSMandy Kirkconnell } 38040293ce3aSMandy Kirkconnell /* 38050293ce3aSMandy Kirkconnell * Manually free the last extent record from the indirection 38060293ce3aSMandy Kirkconnell * array. A call to xfs_iext_realloc_indirect() with a size 38070293ce3aSMandy Kirkconnell * of zero would result in a call to xfs_iext_destroy() which 38080293ce3aSMandy Kirkconnell * would in turn call this function again, creating a nasty 38090293ce3aSMandy Kirkconnell * infinite loop. 38100293ce3aSMandy Kirkconnell */ 38110293ce3aSMandy Kirkconnell if (--nlists) { 38120293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, 38130293ce3aSMandy Kirkconnell nlists * sizeof(xfs_ext_irec_t)); 38140293ce3aSMandy Kirkconnell } else { 3815f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 38160293ce3aSMandy Kirkconnell } 38170293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 38180293ce3aSMandy Kirkconnell } 38190293ce3aSMandy Kirkconnell 38200293ce3aSMandy Kirkconnell /* 38210293ce3aSMandy Kirkconnell * This is called to clean up large amounts of unused memory allocated 38220293ce3aSMandy Kirkconnell * by the indirection array. Before compacting anything though, verify 38230293ce3aSMandy Kirkconnell * that the indirection array is still needed and switch back to the 38240293ce3aSMandy Kirkconnell * linear extent list (or even the inline buffer) if possible. The 38250293ce3aSMandy Kirkconnell * compaction policy is as follows: 38260293ce3aSMandy Kirkconnell * 38270293ce3aSMandy Kirkconnell * Full Compaction: Extents fit into a single page (or inline buffer) 382871a8c87fSLachlan McIlroy * Partial Compaction: Extents occupy less than 50% of allocated space 38290293ce3aSMandy Kirkconnell * No Compaction: Extents occupy at least 50% of allocated space 38300293ce3aSMandy Kirkconnell */ 38310293ce3aSMandy Kirkconnell void 38320293ce3aSMandy Kirkconnell xfs_iext_irec_compact( 38330293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 38340293ce3aSMandy Kirkconnell { 38350293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 38360293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 38370293ce3aSMandy Kirkconnell 38380293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38390293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38400293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 38410293ce3aSMandy Kirkconnell 38420293ce3aSMandy Kirkconnell if (nextents == 0) { 38430293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 38440293ce3aSMandy Kirkconnell } else if (nextents <= XFS_INLINE_EXTS) { 38450293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 38460293ce3aSMandy Kirkconnell xfs_iext_direct_to_inline(ifp, nextents); 38470293ce3aSMandy Kirkconnell } else if (nextents <= XFS_LINEAR_EXTS) { 38480293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 38490293ce3aSMandy Kirkconnell } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 38500293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(ifp); 38510293ce3aSMandy Kirkconnell } 38520293ce3aSMandy Kirkconnell } 38530293ce3aSMandy Kirkconnell 38540293ce3aSMandy Kirkconnell /* 38550293ce3aSMandy Kirkconnell * Combine extents from neighboring extent pages. 38560293ce3aSMandy Kirkconnell */ 38570293ce3aSMandy Kirkconnell void 38580293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages( 38590293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 38600293ce3aSMandy Kirkconnell { 38610293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 38620293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 38630293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 38640293ce3aSMandy Kirkconnell 38650293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38660293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38670293ce3aSMandy Kirkconnell while (erp_idx < nlists - 1) { 38680293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 38690293ce3aSMandy Kirkconnell erp_next = erp + 1; 38700293ce3aSMandy Kirkconnell if (erp_next->er_extcount <= 38710293ce3aSMandy Kirkconnell (XFS_LINEAR_EXTS - erp->er_extcount)) { 387271a8c87fSLachlan McIlroy memcpy(&erp->er_extbuf[erp->er_extcount], 38730293ce3aSMandy Kirkconnell erp_next->er_extbuf, erp_next->er_extcount * 38740293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 38750293ce3aSMandy Kirkconnell erp->er_extcount += erp_next->er_extcount; 38760293ce3aSMandy Kirkconnell /* 38770293ce3aSMandy Kirkconnell * Free page before removing extent record 38780293ce3aSMandy Kirkconnell * so er_extoffs don't get modified in 38790293ce3aSMandy Kirkconnell * xfs_iext_irec_remove. 38800293ce3aSMandy Kirkconnell */ 3881f0e2d93cSDenys Vlasenko kmem_free(erp_next->er_extbuf); 38820293ce3aSMandy Kirkconnell erp_next->er_extbuf = NULL; 38830293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx + 1); 38840293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38850293ce3aSMandy Kirkconnell } else { 38860293ce3aSMandy Kirkconnell erp_idx++; 38870293ce3aSMandy Kirkconnell } 38880293ce3aSMandy Kirkconnell } 38890293ce3aSMandy Kirkconnell } 38900293ce3aSMandy Kirkconnell 38910293ce3aSMandy Kirkconnell /* 38920293ce3aSMandy Kirkconnell * This is called to update the er_extoff field in the indirection 38930293ce3aSMandy Kirkconnell * array when extents have been added or removed from one of the 38940293ce3aSMandy Kirkconnell * extent lists. erp_idx contains the irec index to begin updating 38950293ce3aSMandy Kirkconnell * at and ext_diff contains the number of extents that were added 38960293ce3aSMandy Kirkconnell * or removed. 38970293ce3aSMandy Kirkconnell */ 38980293ce3aSMandy Kirkconnell void 38990293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs( 39000293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 39010293ce3aSMandy Kirkconnell int erp_idx, /* irec index to update */ 39020293ce3aSMandy Kirkconnell int ext_diff) /* number of new extents */ 39030293ce3aSMandy Kirkconnell { 39040293ce3aSMandy Kirkconnell int i; /* loop counter */ 39050293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists */ 39060293ce3aSMandy Kirkconnell 39070293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 39080293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 39090293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists; i++) { 39100293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 39110293ce3aSMandy Kirkconnell } 39120293ce3aSMandy Kirkconnell } 3913