11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 221da177e4SLinus Torvalds #include "xfs_types.h" 231da177e4SLinus Torvalds #include "xfs_log.h" 24a844f451SNathan Scott #include "xfs_inum.h" 251da177e4SLinus Torvalds #include "xfs_trans.h" 261da177e4SLinus Torvalds #include "xfs_trans_priv.h" 271da177e4SLinus Torvalds #include "xfs_sb.h" 281da177e4SLinus Torvalds #include "xfs_ag.h" 291da177e4SLinus Torvalds #include "xfs_mount.h" 301da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 31a844f451SNathan Scott #include "xfs_alloc_btree.h" 321da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 33a844f451SNathan Scott #include "xfs_attr_sf.h" 341da177e4SLinus Torvalds #include "xfs_dinode.h" 351da177e4SLinus Torvalds #include "xfs_inode.h" 361da177e4SLinus Torvalds #include "xfs_buf_item.h" 37a844f451SNathan Scott #include "xfs_inode_item.h" 38a844f451SNathan Scott #include "xfs_btree.h" 39a844f451SNathan Scott #include "xfs_alloc.h" 40a844f451SNathan Scott #include "xfs_ialloc.h" 41a844f451SNathan Scott #include "xfs_bmap.h" 421da177e4SLinus Torvalds #include "xfs_error.h" 431da177e4SLinus Torvalds #include "xfs_utils.h" 441da177e4SLinus Torvalds #include "xfs_quota.h" 452a82b8beSDavid Chinner #include "xfs_filestream.h" 46739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h" 47*93848a99SChristoph Hellwig #include "xfs_cksum.h" 480b1b213fSChristoph Hellwig #include "xfs_trace.h" 4933479e05SDave Chinner #include "xfs_icache.h" 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 521da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds /* 558f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 561da177e4SLinus Torvalds * freed from a file in a single transaction. 571da177e4SLinus Torvalds */ 581da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 611da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 621da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 631da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 641da177e4SLinus Torvalds 652a0ec1d9SDave Chinner /* 662a0ec1d9SDave Chinner * helper function to extract extent size hint from inode 672a0ec1d9SDave Chinner */ 682a0ec1d9SDave Chinner xfs_extlen_t 692a0ec1d9SDave Chinner xfs_get_extsz_hint( 702a0ec1d9SDave Chinner struct xfs_inode *ip) 712a0ec1d9SDave Chinner { 722a0ec1d9SDave Chinner if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) 732a0ec1d9SDave Chinner return ip->i_d.di_extsize; 742a0ec1d9SDave Chinner if (XFS_IS_REALTIME_INODE(ip)) 752a0ec1d9SDave Chinner return ip->i_mount->m_sb.sb_rextsize; 762a0ec1d9SDave Chinner return 0; 772a0ec1d9SDave Chinner } 782a0ec1d9SDave Chinner 79fa96acadSDave Chinner /* 80fa96acadSDave Chinner * This is a wrapper routine around the xfs_ilock() routine used to centralize 81fa96acadSDave Chinner * some grungy code. It is used in places that wish to lock the inode solely 82fa96acadSDave Chinner * for reading the extents. The reason these places can't just call 83fa96acadSDave Chinner * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the 84fa96acadSDave Chinner * extents from disk for a file in b-tree format. If the inode is in b-tree 85fa96acadSDave Chinner * format, then we need to lock the inode exclusively until the extents are read 86fa96acadSDave Chinner * in. Locking it exclusively all the time would limit our parallelism 87fa96acadSDave Chinner * unnecessarily, though. What we do instead is check to see if the extents 88fa96acadSDave Chinner * have been read in yet, and only lock the inode exclusively if they have not. 89fa96acadSDave Chinner * 90fa96acadSDave Chinner * The function returns a value which should be given to the corresponding 91fa96acadSDave Chinner * xfs_iunlock_map_shared(). This value is the mode in which the lock was 92fa96acadSDave Chinner * actually taken. 93fa96acadSDave Chinner */ 94fa96acadSDave Chinner uint 95fa96acadSDave Chinner xfs_ilock_map_shared( 96fa96acadSDave Chinner xfs_inode_t *ip) 97fa96acadSDave Chinner { 98fa96acadSDave Chinner uint lock_mode; 99fa96acadSDave Chinner 100fa96acadSDave Chinner if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && 101fa96acadSDave Chinner ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { 102fa96acadSDave Chinner lock_mode = XFS_ILOCK_EXCL; 103fa96acadSDave Chinner } else { 104fa96acadSDave Chinner lock_mode = XFS_ILOCK_SHARED; 105fa96acadSDave Chinner } 106fa96acadSDave Chinner 107fa96acadSDave Chinner xfs_ilock(ip, lock_mode); 108fa96acadSDave Chinner 109fa96acadSDave Chinner return lock_mode; 110fa96acadSDave Chinner } 111fa96acadSDave Chinner 112fa96acadSDave Chinner /* 113fa96acadSDave Chinner * This is simply the unlock routine to go with xfs_ilock_map_shared(). 114fa96acadSDave Chinner * All it does is call xfs_iunlock() with the given lock_mode. 115fa96acadSDave Chinner */ 116fa96acadSDave Chinner void 117fa96acadSDave Chinner xfs_iunlock_map_shared( 118fa96acadSDave Chinner xfs_inode_t *ip, 119fa96acadSDave Chinner unsigned int lock_mode) 120fa96acadSDave Chinner { 121fa96acadSDave Chinner xfs_iunlock(ip, lock_mode); 122fa96acadSDave Chinner } 123fa96acadSDave Chinner 124fa96acadSDave Chinner /* 125fa96acadSDave Chinner * The xfs inode contains 2 locks: a multi-reader lock called the 126fa96acadSDave Chinner * i_iolock and a multi-reader lock called the i_lock. This routine 127fa96acadSDave Chinner * allows either or both of the locks to be obtained. 128fa96acadSDave Chinner * 129fa96acadSDave Chinner * The 2 locks should always be ordered so that the IO lock is 130fa96acadSDave Chinner * obtained first in order to prevent deadlock. 131fa96acadSDave Chinner * 132fa96acadSDave Chinner * ip -- the inode being locked 133fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks 134fa96acadSDave Chinner * to be locked. It can be: 135fa96acadSDave Chinner * XFS_IOLOCK_SHARED, 136fa96acadSDave Chinner * XFS_IOLOCK_EXCL, 137fa96acadSDave Chinner * XFS_ILOCK_SHARED, 138fa96acadSDave Chinner * XFS_ILOCK_EXCL, 139fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 140fa96acadSDave Chinner * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 141fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 142fa96acadSDave Chinner * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 143fa96acadSDave Chinner */ 144fa96acadSDave Chinner void 145fa96acadSDave Chinner xfs_ilock( 146fa96acadSDave Chinner xfs_inode_t *ip, 147fa96acadSDave Chinner uint lock_flags) 148fa96acadSDave Chinner { 149fa96acadSDave Chinner trace_xfs_ilock(ip, lock_flags, _RET_IP_); 150fa96acadSDave Chinner 151fa96acadSDave Chinner /* 152fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 153fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 154fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 155fa96acadSDave Chinner */ 156fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 157fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 158fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 159fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 160fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 161fa96acadSDave Chinner 162fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 163fa96acadSDave Chinner mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 164fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 165fa96acadSDave Chinner mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 166fa96acadSDave Chinner 167fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 168fa96acadSDave Chinner mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 169fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 170fa96acadSDave Chinner mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 171fa96acadSDave Chinner } 172fa96acadSDave Chinner 173fa96acadSDave Chinner /* 174fa96acadSDave Chinner * This is just like xfs_ilock(), except that the caller 175fa96acadSDave Chinner * is guaranteed not to sleep. It returns 1 if it gets 176fa96acadSDave Chinner * the requested locks and 0 otherwise. If the IO lock is 177fa96acadSDave Chinner * obtained but the inode lock cannot be, then the IO lock 178fa96acadSDave Chinner * is dropped before returning. 179fa96acadSDave Chinner * 180fa96acadSDave Chinner * ip -- the inode being locked 181fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 182fa96acadSDave Chinner * to be locked. See the comment for xfs_ilock() for a list 183fa96acadSDave Chinner * of valid values. 184fa96acadSDave Chinner */ 185fa96acadSDave Chinner int 186fa96acadSDave Chinner xfs_ilock_nowait( 187fa96acadSDave Chinner xfs_inode_t *ip, 188fa96acadSDave Chinner uint lock_flags) 189fa96acadSDave Chinner { 190fa96acadSDave Chinner trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); 191fa96acadSDave Chinner 192fa96acadSDave Chinner /* 193fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 194fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 195fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 196fa96acadSDave Chinner */ 197fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 198fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 199fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 200fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 201fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 202fa96acadSDave Chinner 203fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) { 204fa96acadSDave Chinner if (!mrtryupdate(&ip->i_iolock)) 205fa96acadSDave Chinner goto out; 206fa96acadSDave Chinner } else if (lock_flags & XFS_IOLOCK_SHARED) { 207fa96acadSDave Chinner if (!mrtryaccess(&ip->i_iolock)) 208fa96acadSDave Chinner goto out; 209fa96acadSDave Chinner } 210fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) { 211fa96acadSDave Chinner if (!mrtryupdate(&ip->i_lock)) 212fa96acadSDave Chinner goto out_undo_iolock; 213fa96acadSDave Chinner } else if (lock_flags & XFS_ILOCK_SHARED) { 214fa96acadSDave Chinner if (!mrtryaccess(&ip->i_lock)) 215fa96acadSDave Chinner goto out_undo_iolock; 216fa96acadSDave Chinner } 217fa96acadSDave Chinner return 1; 218fa96acadSDave Chinner 219fa96acadSDave Chinner out_undo_iolock: 220fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 221fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 222fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 223fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 224fa96acadSDave Chinner out: 225fa96acadSDave Chinner return 0; 226fa96acadSDave Chinner } 227fa96acadSDave Chinner 228fa96acadSDave Chinner /* 229fa96acadSDave Chinner * xfs_iunlock() is used to drop the inode locks acquired with 230fa96acadSDave Chinner * xfs_ilock() and xfs_ilock_nowait(). The caller must pass 231fa96acadSDave Chinner * in the flags given to xfs_ilock() or xfs_ilock_nowait() so 232fa96acadSDave Chinner * that we know which locks to drop. 233fa96acadSDave Chinner * 234fa96acadSDave Chinner * ip -- the inode being unlocked 235fa96acadSDave Chinner * lock_flags -- this parameter indicates the inode's locks to be 236fa96acadSDave Chinner * to be unlocked. See the comment for xfs_ilock() for a list 237fa96acadSDave Chinner * of valid values for this parameter. 238fa96acadSDave Chinner * 239fa96acadSDave Chinner */ 240fa96acadSDave Chinner void 241fa96acadSDave Chinner xfs_iunlock( 242fa96acadSDave Chinner xfs_inode_t *ip, 243fa96acadSDave Chinner uint lock_flags) 244fa96acadSDave Chinner { 245fa96acadSDave Chinner /* 246fa96acadSDave Chinner * You can't set both SHARED and EXCL for the same lock, 247fa96acadSDave Chinner * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 248fa96acadSDave Chinner * and XFS_ILOCK_EXCL are valid values to set in lock_flags. 249fa96acadSDave Chinner */ 250fa96acadSDave Chinner ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 251fa96acadSDave Chinner (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 252fa96acadSDave Chinner ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 253fa96acadSDave Chinner (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 254fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 255fa96acadSDave Chinner ASSERT(lock_flags != 0); 256fa96acadSDave Chinner 257fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 258fa96acadSDave Chinner mrunlock_excl(&ip->i_iolock); 259fa96acadSDave Chinner else if (lock_flags & XFS_IOLOCK_SHARED) 260fa96acadSDave Chinner mrunlock_shared(&ip->i_iolock); 261fa96acadSDave Chinner 262fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 263fa96acadSDave Chinner mrunlock_excl(&ip->i_lock); 264fa96acadSDave Chinner else if (lock_flags & XFS_ILOCK_SHARED) 265fa96acadSDave Chinner mrunlock_shared(&ip->i_lock); 266fa96acadSDave Chinner 267fa96acadSDave Chinner trace_xfs_iunlock(ip, lock_flags, _RET_IP_); 268fa96acadSDave Chinner } 269fa96acadSDave Chinner 270fa96acadSDave Chinner /* 271fa96acadSDave Chinner * give up write locks. the i/o lock cannot be held nested 272fa96acadSDave Chinner * if it is being demoted. 273fa96acadSDave Chinner */ 274fa96acadSDave Chinner void 275fa96acadSDave Chinner xfs_ilock_demote( 276fa96acadSDave Chinner xfs_inode_t *ip, 277fa96acadSDave Chinner uint lock_flags) 278fa96acadSDave Chinner { 279fa96acadSDave Chinner ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 280fa96acadSDave Chinner ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 281fa96acadSDave Chinner 282fa96acadSDave Chinner if (lock_flags & XFS_ILOCK_EXCL) 283fa96acadSDave Chinner mrdemote(&ip->i_lock); 284fa96acadSDave Chinner if (lock_flags & XFS_IOLOCK_EXCL) 285fa96acadSDave Chinner mrdemote(&ip->i_iolock); 286fa96acadSDave Chinner 287fa96acadSDave Chinner trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 288fa96acadSDave Chinner } 289fa96acadSDave Chinner 290fa96acadSDave Chinner #ifdef DEBUG 291fa96acadSDave Chinner int 292fa96acadSDave Chinner xfs_isilocked( 293fa96acadSDave Chinner xfs_inode_t *ip, 294fa96acadSDave Chinner uint lock_flags) 295fa96acadSDave Chinner { 296fa96acadSDave Chinner if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { 297fa96acadSDave Chinner if (!(lock_flags & XFS_ILOCK_SHARED)) 298fa96acadSDave Chinner return !!ip->i_lock.mr_writer; 299fa96acadSDave Chinner return rwsem_is_locked(&ip->i_lock.mr_lock); 300fa96acadSDave Chinner } 301fa96acadSDave Chinner 302fa96acadSDave Chinner if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 303fa96acadSDave Chinner if (!(lock_flags & XFS_IOLOCK_SHARED)) 304fa96acadSDave Chinner return !!ip->i_iolock.mr_writer; 305fa96acadSDave Chinner return rwsem_is_locked(&ip->i_iolock.mr_lock); 306fa96acadSDave Chinner } 307fa96acadSDave Chinner 308fa96acadSDave Chinner ASSERT(0); 309fa96acadSDave Chinner return 0; 310fa96acadSDave Chinner } 311fa96acadSDave Chinner #endif 312fa96acadSDave Chinner 313fa96acadSDave Chinner void 314fa96acadSDave Chinner __xfs_iflock( 315fa96acadSDave Chinner struct xfs_inode *ip) 316fa96acadSDave Chinner { 317fa96acadSDave Chinner wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); 318fa96acadSDave Chinner DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); 319fa96acadSDave Chinner 320fa96acadSDave Chinner do { 321fa96acadSDave Chinner prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 322fa96acadSDave Chinner if (xfs_isiflocked(ip)) 323fa96acadSDave Chinner io_schedule(); 324fa96acadSDave Chinner } while (!xfs_iflock_nowait(ip)); 325fa96acadSDave Chinner 326fa96acadSDave Chinner finish_wait(wq, &wait.wait); 327fa96acadSDave Chinner } 328fa96acadSDave Chinner 3291da177e4SLinus Torvalds #ifdef DEBUG 3301da177e4SLinus Torvalds /* 3311da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 3321da177e4SLinus Torvalds * are valid. 3331da177e4SLinus Torvalds */ 3341da177e4SLinus Torvalds STATIC void 3351da177e4SLinus Torvalds xfs_validate_extents( 3364eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, 3371da177e4SLinus Torvalds int nrecs, 3381da177e4SLinus Torvalds xfs_exntfmt_t fmt) 3391da177e4SLinus Torvalds { 3401da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 341a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t rec; 3421da177e4SLinus Torvalds int i; 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 345a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 346a6f64d4aSChristoph Hellwig rec.l0 = get_unaligned(&ep->l0); 347a6f64d4aSChristoph Hellwig rec.l1 = get_unaligned(&ep->l1); 3481da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 3491da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 3501da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds #else /* DEBUG */ 354a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt) 3551da177e4SLinus Torvalds #endif /* DEBUG */ 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds /* 3581da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 3591da177e4SLinus Torvalds * unlinked field of 0. 3601da177e4SLinus Torvalds */ 3611da177e4SLinus Torvalds #if defined(DEBUG) 3621da177e4SLinus Torvalds void 3631da177e4SLinus Torvalds xfs_inobp_check( 3641da177e4SLinus Torvalds xfs_mount_t *mp, 3651da177e4SLinus Torvalds xfs_buf_t *bp) 3661da177e4SLinus Torvalds { 3671da177e4SLinus Torvalds int i; 3681da177e4SLinus Torvalds int j; 3691da177e4SLinus Torvalds xfs_dinode_t *dip; 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3721da177e4SLinus Torvalds 3731da177e4SLinus Torvalds for (i = 0; i < j; i++) { 3741da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 3751da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 3761da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 37753487786SDave Chinner xfs_alert(mp, 37853487786SDave Chinner "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", 3791da177e4SLinus Torvalds bp); 3801da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 3811da177e4SLinus Torvalds } 3821da177e4SLinus Torvalds } 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds #endif 3851da177e4SLinus Torvalds 386612cfbfeSDave Chinner static void 387af133e86SDave Chinner xfs_inode_buf_verify( 388af133e86SDave Chinner struct xfs_buf *bp) 389af133e86SDave Chinner { 390af133e86SDave Chinner struct xfs_mount *mp = bp->b_target->bt_mount; 391af133e86SDave Chinner int i; 392af133e86SDave Chinner int ni; 393af133e86SDave Chinner 394af133e86SDave Chinner /* 395af133e86SDave Chinner * Validate the magic number and version of every inode in the buffer 396af133e86SDave Chinner */ 397af133e86SDave Chinner ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 398af133e86SDave Chinner for (i = 0; i < ni; i++) { 399af133e86SDave Chinner int di_ok; 400af133e86SDave Chinner xfs_dinode_t *dip; 401af133e86SDave Chinner 402af133e86SDave Chinner dip = (struct xfs_dinode *)xfs_buf_offset(bp, 403af133e86SDave Chinner (i << mp->m_sb.sb_inodelog)); 404af133e86SDave Chinner di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 405af133e86SDave Chinner XFS_DINODE_GOOD_VERSION(dip->di_version); 406af133e86SDave Chinner if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 407af133e86SDave Chinner XFS_ERRTAG_ITOBP_INOTOBP, 408af133e86SDave Chinner XFS_RANDOM_ITOBP_INOTOBP))) { 409af133e86SDave Chinner xfs_buf_ioerror(bp, EFSCORRUPTED); 410af133e86SDave Chinner XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, 411af133e86SDave Chinner mp, dip); 412af133e86SDave Chinner #ifdef DEBUG 413af133e86SDave Chinner xfs_emerg(mp, 414af133e86SDave Chinner "bad inode magic/vsn daddr %lld #%d (magic=%x)", 415af133e86SDave Chinner (unsigned long long)bp->b_bn, i, 416af133e86SDave Chinner be16_to_cpu(dip->di_magic)); 417af133e86SDave Chinner ASSERT(0); 418af133e86SDave Chinner #endif 419af133e86SDave Chinner } 420af133e86SDave Chinner } 421af133e86SDave Chinner xfs_inobp_check(mp, bp); 422612cfbfeSDave Chinner } 423612cfbfeSDave Chinner 4241813dd64SDave Chinner 4251813dd64SDave Chinner static void 4261813dd64SDave Chinner xfs_inode_buf_read_verify( 4271813dd64SDave Chinner struct xfs_buf *bp) 4281813dd64SDave Chinner { 4291813dd64SDave Chinner xfs_inode_buf_verify(bp); 4301813dd64SDave Chinner } 4311813dd64SDave Chinner 4321813dd64SDave Chinner static void 433612cfbfeSDave Chinner xfs_inode_buf_write_verify( 434612cfbfeSDave Chinner struct xfs_buf *bp) 435612cfbfeSDave Chinner { 436612cfbfeSDave Chinner xfs_inode_buf_verify(bp); 437612cfbfeSDave Chinner } 438612cfbfeSDave Chinner 4391813dd64SDave Chinner const struct xfs_buf_ops xfs_inode_buf_ops = { 4401813dd64SDave Chinner .verify_read = xfs_inode_buf_read_verify, 4411813dd64SDave Chinner .verify_write = xfs_inode_buf_write_verify, 4421813dd64SDave Chinner }; 4431813dd64SDave Chinner 444af133e86SDave Chinner 4451da177e4SLinus Torvalds /* 446475ee413SChristoph Hellwig * This routine is called to map an inode to the buffer containing the on-disk 447475ee413SChristoph Hellwig * version of the inode. It returns a pointer to the buffer containing the 448475ee413SChristoph Hellwig * on-disk inode in the bpp parameter, and in the dipp parameter it returns a 449475ee413SChristoph Hellwig * pointer to the on-disk inode within that buffer. 450475ee413SChristoph Hellwig * 451475ee413SChristoph Hellwig * If a non-zero error is returned, then the contents of bpp and dipp are 452475ee413SChristoph Hellwig * undefined. 4534ae29b43SDavid Chinner */ 454475ee413SChristoph Hellwig int 4554ae29b43SDavid Chinner xfs_imap_to_bp( 456475ee413SChristoph Hellwig struct xfs_mount *mp, 457475ee413SChristoph Hellwig struct xfs_trans *tp, 45892bfc6e7SChristoph Hellwig struct xfs_imap *imap, 459475ee413SChristoph Hellwig struct xfs_dinode **dipp, 460475ee413SChristoph Hellwig struct xfs_buf **bpp, 4614ae29b43SDavid Chinner uint buf_flags, 462b48d8d64SChristoph Hellwig uint iget_flags) 4634ae29b43SDavid Chinner { 464475ee413SChristoph Hellwig struct xfs_buf *bp; 4654ae29b43SDavid Chinner int error; 4664ae29b43SDavid Chinner 467611c9946SDave Chinner buf_flags |= XBF_UNMAPPED; 4684ae29b43SDavid Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 469af133e86SDave Chinner (int)imap->im_len, buf_flags, &bp, 4701813dd64SDave Chinner &xfs_inode_buf_ops); 4714ae29b43SDavid Chinner if (error) { 472af133e86SDave Chinner if (error == EAGAIN) { 4730cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 4744ae29b43SDavid Chinner return error; 4754ae29b43SDavid Chinner } 4764ae29b43SDavid Chinner 477af133e86SDave Chinner if (error == EFSCORRUPTED && 478af133e86SDave Chinner (iget_flags & XFS_IGET_UNTRUSTED)) 4794ae29b43SDavid Chinner return XFS_ERROR(EINVAL); 4804ae29b43SDavid Chinner 481af133e86SDave Chinner xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", 482af133e86SDave Chinner __func__, error); 483af133e86SDave Chinner return error; 484af133e86SDave Chinner } 485475ee413SChristoph Hellwig 4864ae29b43SDavid Chinner *bpp = bp; 487475ee413SChristoph Hellwig *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); 4884ae29b43SDavid Chinner return 0; 4894ae29b43SDavid Chinner } 4904ae29b43SDavid Chinner 4914ae29b43SDavid Chinner /* 4921da177e4SLinus Torvalds * Move inode type and inode format specific information from the 4931da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 4941da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 4951da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 4961da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 4971da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 4981da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 4991da177e4SLinus Torvalds */ 5001da177e4SLinus Torvalds STATIC int 5011da177e4SLinus Torvalds xfs_iformat( 5021da177e4SLinus Torvalds xfs_inode_t *ip, 5031da177e4SLinus Torvalds xfs_dinode_t *dip) 5041da177e4SLinus Torvalds { 5051da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 5061da177e4SLinus Torvalds int size; 5078096b1ebSChristoph Hellwig int error = 0; 5081da177e4SLinus Torvalds xfs_fsize_t di_size; 5091da177e4SLinus Torvalds 51081591fe2SChristoph Hellwig if (unlikely(be32_to_cpu(dip->di_nextents) + 51181591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents) > 51281591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks))) { 51365333b4cSDave Chinner xfs_warn(ip->i_mount, 5143762ec6bSNathan Scott "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 5151da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 51681591fe2SChristoph Hellwig (int)(be32_to_cpu(dip->di_nextents) + 51781591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents)), 5181da177e4SLinus Torvalds (unsigned long long) 51981591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks)); 5201da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 5211da177e4SLinus Torvalds ip->i_mount, dip); 5221da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5231da177e4SLinus Torvalds } 5241da177e4SLinus Torvalds 52581591fe2SChristoph Hellwig if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 52665333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 5271da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 52881591fe2SChristoph Hellwig dip->di_forkoff); 5291da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 5301da177e4SLinus Torvalds ip->i_mount, dip); 5311da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5321da177e4SLinus Torvalds } 5331da177e4SLinus Torvalds 534b89d4208SChristoph Hellwig if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 535b89d4208SChristoph Hellwig !ip->i_mount->m_rtdev_targp)) { 53665333b4cSDave Chinner xfs_warn(ip->i_mount, 537b89d4208SChristoph Hellwig "corrupt dinode %Lu, has realtime flag set.", 538b89d4208SChristoph Hellwig ip->i_ino); 539b89d4208SChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 540b89d4208SChristoph Hellwig XFS_ERRLEVEL_LOW, ip->i_mount, dip); 541b89d4208SChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 542b89d4208SChristoph Hellwig } 543b89d4208SChristoph Hellwig 5441da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 5451da177e4SLinus Torvalds case S_IFIFO: 5461da177e4SLinus Torvalds case S_IFCHR: 5471da177e4SLinus Torvalds case S_IFBLK: 5481da177e4SLinus Torvalds case S_IFSOCK: 54981591fe2SChristoph Hellwig if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 5501da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 5511da177e4SLinus Torvalds ip->i_mount, dip); 5521da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds ip->i_d.di_size = 0; 55581591fe2SChristoph Hellwig ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 5561da177e4SLinus Torvalds break; 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds case S_IFREG: 5591da177e4SLinus Torvalds case S_IFLNK: 5601da177e4SLinus Torvalds case S_IFDIR: 56181591fe2SChristoph Hellwig switch (dip->di_format) { 5621da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 5631da177e4SLinus Torvalds /* 5641da177e4SLinus Torvalds * no local regular files yet 5651da177e4SLinus Torvalds */ 566abbede1bSAl Viro if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { 56765333b4cSDave Chinner xfs_warn(ip->i_mount, 56865333b4cSDave Chinner "corrupt inode %Lu (local format for regular file).", 5691da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 5701da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 5711da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5721da177e4SLinus Torvalds ip->i_mount, dip); 5731da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds 57681591fe2SChristoph Hellwig di_size = be64_to_cpu(dip->di_size); 5771da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 57865333b4cSDave Chinner xfs_warn(ip->i_mount, 57965333b4cSDave Chinner "corrupt inode %Lu (bad size %Ld for local inode).", 5801da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 5811da177e4SLinus Torvalds (long long) di_size); 5821da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 5831da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5841da177e4SLinus Torvalds ip->i_mount, dip); 5851da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5861da177e4SLinus Torvalds } 5871da177e4SLinus Torvalds 5881da177e4SLinus Torvalds size = (int)di_size; 5891da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 5901da177e4SLinus Torvalds break; 5911da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 5921da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 5931da177e4SLinus Torvalds break; 5941da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 5951da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 5961da177e4SLinus Torvalds break; 5971da177e4SLinus Torvalds default: 5981da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 5991da177e4SLinus Torvalds ip->i_mount); 6001da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6011da177e4SLinus Torvalds } 6021da177e4SLinus Torvalds break; 6031da177e4SLinus Torvalds 6041da177e4SLinus Torvalds default: 6051da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 6061da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6071da177e4SLinus Torvalds } 6081da177e4SLinus Torvalds if (error) { 6091da177e4SLinus Torvalds return error; 6101da177e4SLinus Torvalds } 6111da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 6121da177e4SLinus Torvalds return 0; 6138096b1ebSChristoph Hellwig 6141da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 6154a7edddcSDave Chinner ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); 6168096b1ebSChristoph Hellwig 61781591fe2SChristoph Hellwig switch (dip->di_aformat) { 6181da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 6191da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 6203b244aa8SNathan Scott size = be16_to_cpu(atp->hdr.totsize); 6212809f76aSChristoph Hellwig 6222809f76aSChristoph Hellwig if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 62365333b4cSDave Chinner xfs_warn(ip->i_mount, 62465333b4cSDave Chinner "corrupt inode %Lu (bad attr fork size %Ld).", 6252809f76aSChristoph Hellwig (unsigned long long) ip->i_ino, 6262809f76aSChristoph Hellwig (long long) size); 6272809f76aSChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(8)", 6282809f76aSChristoph Hellwig XFS_ERRLEVEL_LOW, 6292809f76aSChristoph Hellwig ip->i_mount, dip); 6302809f76aSChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 6312809f76aSChristoph Hellwig } 6322809f76aSChristoph Hellwig 6331da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 6341da177e4SLinus Torvalds break; 6351da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 6361da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 6371da177e4SLinus Torvalds break; 6381da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 6391da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 6401da177e4SLinus Torvalds break; 6411da177e4SLinus Torvalds default: 6421da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 6431da177e4SLinus Torvalds break; 6441da177e4SLinus Torvalds } 6451da177e4SLinus Torvalds if (error) { 6461da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 6471da177e4SLinus Torvalds ip->i_afp = NULL; 6481da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 6491da177e4SLinus Torvalds } 6501da177e4SLinus Torvalds return error; 6511da177e4SLinus Torvalds } 6521da177e4SLinus Torvalds 6531da177e4SLinus Torvalds /* 6541da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 6551da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 6561da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 6571da177e4SLinus Torvalds * and copy the data there. Either way, set 6581da177e4SLinus Torvalds * if_data to point at the data. 6591da177e4SLinus Torvalds * If we allocate a buffer for the data, make 6601da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 6611da177e4SLinus Torvalds * record the real size in i_real_bytes. 6621da177e4SLinus Torvalds */ 6631da177e4SLinus Torvalds STATIC int 6641da177e4SLinus Torvalds xfs_iformat_local( 6651da177e4SLinus Torvalds xfs_inode_t *ip, 6661da177e4SLinus Torvalds xfs_dinode_t *dip, 6671da177e4SLinus Torvalds int whichfork, 6681da177e4SLinus Torvalds int size) 6691da177e4SLinus Torvalds { 6701da177e4SLinus Torvalds xfs_ifork_t *ifp; 6711da177e4SLinus Torvalds int real_size; 6721da177e4SLinus Torvalds 6731da177e4SLinus Torvalds /* 6741da177e4SLinus Torvalds * If the size is unreasonable, then something 6751da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 6761da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 6771da177e4SLinus Torvalds */ 6781da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 67965333b4cSDave Chinner xfs_warn(ip->i_mount, 68065333b4cSDave Chinner "corrupt inode %Lu (bad size %d for local fork, size = %d).", 6811da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 6821da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 6831da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 6841da177e4SLinus Torvalds ip->i_mount, dip); 6851da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6861da177e4SLinus Torvalds } 6871da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6881da177e4SLinus Torvalds real_size = 0; 6891da177e4SLinus Torvalds if (size == 0) 6901da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 6911da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 6921da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 6931da177e4SLinus Torvalds else { 6941da177e4SLinus Torvalds real_size = roundup(size, 4); 6954a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 6961da177e4SLinus Torvalds } 6971da177e4SLinus Torvalds ifp->if_bytes = size; 6981da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 6991da177e4SLinus Torvalds if (size) 7001da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 7011da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 7021da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 7031da177e4SLinus Torvalds return 0; 7041da177e4SLinus Torvalds } 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds /* 7071da177e4SLinus Torvalds * The file consists of a set of extents all 7081da177e4SLinus Torvalds * of which fit into the on-disk inode. 7091da177e4SLinus Torvalds * If there are few enough extents to fit into 7101da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 7111da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 7121da177e4SLinus Torvalds * them into it. Either way, set if_extents 7131da177e4SLinus Torvalds * to point at the extents. 7141da177e4SLinus Torvalds */ 7151da177e4SLinus Torvalds STATIC int 7161da177e4SLinus Torvalds xfs_iformat_extents( 7171da177e4SLinus Torvalds xfs_inode_t *ip, 7181da177e4SLinus Torvalds xfs_dinode_t *dip, 7191da177e4SLinus Torvalds int whichfork) 7201da177e4SLinus Torvalds { 721a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp; 7221da177e4SLinus Torvalds xfs_ifork_t *ifp; 7231da177e4SLinus Torvalds int nex; 7241da177e4SLinus Torvalds int size; 7251da177e4SLinus Torvalds int i; 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 7281da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 7291da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds /* 7321da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 7331da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 7341da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 7351da177e4SLinus Torvalds */ 7361da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 73765333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", 7381da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 7391da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 7401da177e4SLinus Torvalds ip->i_mount, dip); 7411da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds 7444eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 7451da177e4SLinus Torvalds if (nex == 0) 7461da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 7471da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 7481da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 7494eea22f0SMandy Kirkconnell else 7504eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nex); 7514eea22f0SMandy Kirkconnell 7521da177e4SLinus Torvalds ifp->if_bytes = size; 7531da177e4SLinus Torvalds if (size) { 7541da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 755a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 7564eea22f0SMandy Kirkconnell for (i = 0; i < nex; i++, dp++) { 757a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 758597bca63SHarvey Harrison ep->l0 = get_unaligned_be64(&dp->l0); 759597bca63SHarvey Harrison ep->l1 = get_unaligned_be64(&dp->l1); 7601da177e4SLinus Torvalds } 7613a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 7621da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 7631da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 7641da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 7654eea22f0SMandy Kirkconnell ifp, 0, nex))) { 7661da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 7671da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 7681da177e4SLinus Torvalds ip->i_mount); 7691da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7701da177e4SLinus Torvalds } 7711da177e4SLinus Torvalds } 7721da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 7731da177e4SLinus Torvalds return 0; 7741da177e4SLinus Torvalds } 7751da177e4SLinus Torvalds 7761da177e4SLinus Torvalds /* 7771da177e4SLinus Torvalds * The file has too many extents to fit into 7781da177e4SLinus Torvalds * the inode, so they are in B-tree format. 7791da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 7801da177e4SLinus Torvalds * and copy the root into it. The i_extents 7811da177e4SLinus Torvalds * field will remain NULL until all of the 7821da177e4SLinus Torvalds * extents are read in (when they are needed). 7831da177e4SLinus Torvalds */ 7841da177e4SLinus Torvalds STATIC int 7851da177e4SLinus Torvalds xfs_iformat_btree( 7861da177e4SLinus Torvalds xfs_inode_t *ip, 7871da177e4SLinus Torvalds xfs_dinode_t *dip, 7881da177e4SLinus Torvalds int whichfork) 7891da177e4SLinus Torvalds { 790ee1a47abSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 7911da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 7921da177e4SLinus Torvalds xfs_ifork_t *ifp; 7931da177e4SLinus Torvalds /* REFERENCED */ 7941da177e4SLinus Torvalds int nrecs; 7951da177e4SLinus Torvalds int size; 7961da177e4SLinus Torvalds 7971da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 7981da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 799ee1a47abSChristoph Hellwig size = XFS_BMAP_BROOT_SPACE(mp, dfp); 80060197e8dSChristoph Hellwig nrecs = be16_to_cpu(dfp->bb_numrecs); 8011da177e4SLinus Torvalds 8021da177e4SLinus Torvalds /* 8031da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 8041da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 8051da177e4SLinus Torvalds * block has more records than can fit into the fork, 8061da177e4SLinus Torvalds * or the number of extents is greater than the number of 8071da177e4SLinus Torvalds * blocks. 8081da177e4SLinus Torvalds */ 8098096b1ebSChristoph Hellwig if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= 8108096b1ebSChristoph Hellwig XFS_IFORK_MAXEXT(ip, whichfork) || 8118096b1ebSChristoph Hellwig XFS_BMDR_SPACE_CALC(nrecs) > 812ee1a47abSChristoph Hellwig XFS_DFORK_SIZE(dip, mp, whichfork) || 8138096b1ebSChristoph Hellwig XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 814ee1a47abSChristoph Hellwig xfs_warn(mp, "corrupt inode %Lu (btree).", 8151da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 81665333b4cSDave Chinner XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 817ee1a47abSChristoph Hellwig mp, dip); 8181da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 8191da177e4SLinus Torvalds } 8201da177e4SLinus Torvalds 8211da177e4SLinus Torvalds ifp->if_broot_bytes = size; 8224a7edddcSDave Chinner ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); 8231da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 8241da177e4SLinus Torvalds /* 8251da177e4SLinus Torvalds * Copy and convert from the on-disk structure 8261da177e4SLinus Torvalds * to the in-memory structure. 8271da177e4SLinus Torvalds */ 828ee1a47abSChristoph Hellwig xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 8291da177e4SLinus Torvalds ifp->if_broot, size); 8301da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 8311da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 8321da177e4SLinus Torvalds 8331da177e4SLinus Torvalds return 0; 8341da177e4SLinus Torvalds } 8351da177e4SLinus Torvalds 836d96f8f89SEric Sandeen STATIC void 837347d1c01SChristoph Hellwig xfs_dinode_from_disk( 838347d1c01SChristoph Hellwig xfs_icdinode_t *to, 83981591fe2SChristoph Hellwig xfs_dinode_t *from) 8401da177e4SLinus Torvalds { 841347d1c01SChristoph Hellwig to->di_magic = be16_to_cpu(from->di_magic); 842347d1c01SChristoph Hellwig to->di_mode = be16_to_cpu(from->di_mode); 843347d1c01SChristoph Hellwig to->di_version = from ->di_version; 844347d1c01SChristoph Hellwig to->di_format = from->di_format; 845347d1c01SChristoph Hellwig to->di_onlink = be16_to_cpu(from->di_onlink); 846347d1c01SChristoph Hellwig to->di_uid = be32_to_cpu(from->di_uid); 847347d1c01SChristoph Hellwig to->di_gid = be32_to_cpu(from->di_gid); 848347d1c01SChristoph Hellwig to->di_nlink = be32_to_cpu(from->di_nlink); 8496743099cSArkadiusz Mi?kiewicz to->di_projid_lo = be16_to_cpu(from->di_projid_lo); 8506743099cSArkadiusz Mi?kiewicz to->di_projid_hi = be16_to_cpu(from->di_projid_hi); 851347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 852347d1c01SChristoph Hellwig to->di_flushiter = be16_to_cpu(from->di_flushiter); 853347d1c01SChristoph Hellwig to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 854347d1c01SChristoph Hellwig to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 855347d1c01SChristoph Hellwig to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 856347d1c01SChristoph Hellwig to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 857347d1c01SChristoph Hellwig to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 858347d1c01SChristoph Hellwig to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 859347d1c01SChristoph Hellwig to->di_size = be64_to_cpu(from->di_size); 860347d1c01SChristoph Hellwig to->di_nblocks = be64_to_cpu(from->di_nblocks); 861347d1c01SChristoph Hellwig to->di_extsize = be32_to_cpu(from->di_extsize); 862347d1c01SChristoph Hellwig to->di_nextents = be32_to_cpu(from->di_nextents); 863347d1c01SChristoph Hellwig to->di_anextents = be16_to_cpu(from->di_anextents); 864347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 865347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 866347d1c01SChristoph Hellwig to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 867347d1c01SChristoph Hellwig to->di_dmstate = be16_to_cpu(from->di_dmstate); 868347d1c01SChristoph Hellwig to->di_flags = be16_to_cpu(from->di_flags); 869347d1c01SChristoph Hellwig to->di_gen = be32_to_cpu(from->di_gen); 870*93848a99SChristoph Hellwig 871*93848a99SChristoph Hellwig if (to->di_version == 3) { 872*93848a99SChristoph Hellwig to->di_changecount = be64_to_cpu(from->di_changecount); 873*93848a99SChristoph Hellwig to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); 874*93848a99SChristoph Hellwig to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); 875*93848a99SChristoph Hellwig to->di_flags2 = be64_to_cpu(from->di_flags2); 876*93848a99SChristoph Hellwig to->di_ino = be64_to_cpu(from->di_ino); 877*93848a99SChristoph Hellwig to->di_lsn = be64_to_cpu(from->di_lsn); 878*93848a99SChristoph Hellwig memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 879*93848a99SChristoph Hellwig uuid_copy(&to->di_uuid, &from->di_uuid); 880*93848a99SChristoph Hellwig } 8811da177e4SLinus Torvalds } 8821da177e4SLinus Torvalds 883347d1c01SChristoph Hellwig void 884347d1c01SChristoph Hellwig xfs_dinode_to_disk( 88581591fe2SChristoph Hellwig xfs_dinode_t *to, 886347d1c01SChristoph Hellwig xfs_icdinode_t *from) 887347d1c01SChristoph Hellwig { 888347d1c01SChristoph Hellwig to->di_magic = cpu_to_be16(from->di_magic); 889347d1c01SChristoph Hellwig to->di_mode = cpu_to_be16(from->di_mode); 890347d1c01SChristoph Hellwig to->di_version = from ->di_version; 891347d1c01SChristoph Hellwig to->di_format = from->di_format; 892347d1c01SChristoph Hellwig to->di_onlink = cpu_to_be16(from->di_onlink); 893347d1c01SChristoph Hellwig to->di_uid = cpu_to_be32(from->di_uid); 894347d1c01SChristoph Hellwig to->di_gid = cpu_to_be32(from->di_gid); 895347d1c01SChristoph Hellwig to->di_nlink = cpu_to_be32(from->di_nlink); 8966743099cSArkadiusz Mi?kiewicz to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 8976743099cSArkadiusz Mi?kiewicz to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 898347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 899347d1c01SChristoph Hellwig to->di_flushiter = cpu_to_be16(from->di_flushiter); 900347d1c01SChristoph Hellwig to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 901347d1c01SChristoph Hellwig to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 902347d1c01SChristoph Hellwig to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 903347d1c01SChristoph Hellwig to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 904347d1c01SChristoph Hellwig to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 905347d1c01SChristoph Hellwig to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 906347d1c01SChristoph Hellwig to->di_size = cpu_to_be64(from->di_size); 907347d1c01SChristoph Hellwig to->di_nblocks = cpu_to_be64(from->di_nblocks); 908347d1c01SChristoph Hellwig to->di_extsize = cpu_to_be32(from->di_extsize); 909347d1c01SChristoph Hellwig to->di_nextents = cpu_to_be32(from->di_nextents); 910347d1c01SChristoph Hellwig to->di_anextents = cpu_to_be16(from->di_anextents); 911347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 912347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 913347d1c01SChristoph Hellwig to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 914347d1c01SChristoph Hellwig to->di_dmstate = cpu_to_be16(from->di_dmstate); 915347d1c01SChristoph Hellwig to->di_flags = cpu_to_be16(from->di_flags); 916347d1c01SChristoph Hellwig to->di_gen = cpu_to_be32(from->di_gen); 917*93848a99SChristoph Hellwig 918*93848a99SChristoph Hellwig if (from->di_version == 3) { 919*93848a99SChristoph Hellwig to->di_changecount = cpu_to_be64(from->di_changecount); 920*93848a99SChristoph Hellwig to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); 921*93848a99SChristoph Hellwig to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); 922*93848a99SChristoph Hellwig to->di_flags2 = cpu_to_be64(from->di_flags2); 923*93848a99SChristoph Hellwig to->di_ino = cpu_to_be64(from->di_ino); 924*93848a99SChristoph Hellwig to->di_lsn = cpu_to_be64(from->di_lsn); 925*93848a99SChristoph Hellwig memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 926*93848a99SChristoph Hellwig uuid_copy(&to->di_uuid, &from->di_uuid); 927*93848a99SChristoph Hellwig } 9281da177e4SLinus Torvalds } 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds STATIC uint 9311da177e4SLinus Torvalds _xfs_dic2xflags( 9321da177e4SLinus Torvalds __uint16_t di_flags) 9331da177e4SLinus Torvalds { 9341da177e4SLinus Torvalds uint flags = 0; 9351da177e4SLinus Torvalds 9361da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 9371da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 9381da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 9391da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 9401da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 9411da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 9421da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 9431da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 9441da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 9451da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 9461da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 9471da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 9481da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 9491da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 9501da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 9511da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 9521da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 9531da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 9541da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 9551da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 9561da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 957dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 958dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 959dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 960dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 961d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 962d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 9632a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 9642a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 9651da177e4SLinus Torvalds } 9661da177e4SLinus Torvalds 9671da177e4SLinus Torvalds return flags; 9681da177e4SLinus Torvalds } 9691da177e4SLinus Torvalds 9701da177e4SLinus Torvalds uint 9711da177e4SLinus Torvalds xfs_ip2xflags( 9721da177e4SLinus Torvalds xfs_inode_t *ip) 9731da177e4SLinus Torvalds { 974347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 9751da177e4SLinus Torvalds 976a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 97745ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 9781da177e4SLinus Torvalds } 9791da177e4SLinus Torvalds 9801da177e4SLinus Torvalds uint 9811da177e4SLinus Torvalds xfs_dic2xflags( 98245ba598eSChristoph Hellwig xfs_dinode_t *dip) 9831da177e4SLinus Torvalds { 98481591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 98545ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 9861da177e4SLinus Torvalds } 9871da177e4SLinus Torvalds 988*93848a99SChristoph Hellwig static bool 989*93848a99SChristoph Hellwig xfs_dinode_verify( 990*93848a99SChristoph Hellwig struct xfs_mount *mp, 991*93848a99SChristoph Hellwig struct xfs_inode *ip, 992*93848a99SChristoph Hellwig struct xfs_dinode *dip) 993*93848a99SChristoph Hellwig { 994*93848a99SChristoph Hellwig if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 995*93848a99SChristoph Hellwig return false; 996*93848a99SChristoph Hellwig 997*93848a99SChristoph Hellwig /* only version 3 or greater inodes are extensively verified here */ 998*93848a99SChristoph Hellwig if (dip->di_version < 3) 999*93848a99SChristoph Hellwig return true; 1000*93848a99SChristoph Hellwig 1001*93848a99SChristoph Hellwig if (!xfs_sb_version_hascrc(&mp->m_sb)) 1002*93848a99SChristoph Hellwig return false; 1003*93848a99SChristoph Hellwig if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 1004*93848a99SChristoph Hellwig offsetof(struct xfs_dinode, di_crc))) 1005*93848a99SChristoph Hellwig return false; 1006*93848a99SChristoph Hellwig if (be64_to_cpu(dip->di_ino) != ip->i_ino) 1007*93848a99SChristoph Hellwig return false; 1008*93848a99SChristoph Hellwig if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid)) 1009*93848a99SChristoph Hellwig return false; 1010*93848a99SChristoph Hellwig return true; 1011*93848a99SChristoph Hellwig } 1012*93848a99SChristoph Hellwig 1013*93848a99SChristoph Hellwig void 1014*93848a99SChristoph Hellwig xfs_dinode_calc_crc( 1015*93848a99SChristoph Hellwig struct xfs_mount *mp, 1016*93848a99SChristoph Hellwig struct xfs_dinode *dip) 1017*93848a99SChristoph Hellwig { 1018*93848a99SChristoph Hellwig __uint32_t crc; 1019*93848a99SChristoph Hellwig 1020*93848a99SChristoph Hellwig if (dip->di_version < 3) 1021*93848a99SChristoph Hellwig return; 1022*93848a99SChristoph Hellwig 1023*93848a99SChristoph Hellwig ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); 1024*93848a99SChristoph Hellwig crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, 1025*93848a99SChristoph Hellwig offsetof(struct xfs_dinode, di_crc)); 1026*93848a99SChristoph Hellwig dip->di_crc = xfs_end_cksum(crc); 1027*93848a99SChristoph Hellwig } 1028*93848a99SChristoph Hellwig 10291da177e4SLinus Torvalds /* 103024f211baSChristoph Hellwig * Read the disk inode attributes into the in-core inode structure. 10311da177e4SLinus Torvalds */ 10321da177e4SLinus Torvalds int 10331da177e4SLinus Torvalds xfs_iread( 10341da177e4SLinus Torvalds xfs_mount_t *mp, 10351da177e4SLinus Torvalds xfs_trans_t *tp, 103624f211baSChristoph Hellwig xfs_inode_t *ip, 103724f211baSChristoph Hellwig uint iget_flags) 10381da177e4SLinus Torvalds { 10391da177e4SLinus Torvalds xfs_buf_t *bp; 10401da177e4SLinus Torvalds xfs_dinode_t *dip; 10411da177e4SLinus Torvalds int error; 10421da177e4SLinus Torvalds 10431da177e4SLinus Torvalds /* 104492bfc6e7SChristoph Hellwig * Fill in the location information in the in-core inode. 10451da177e4SLinus Torvalds */ 104624f211baSChristoph Hellwig error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 10479ed0451eSChristoph Hellwig if (error) 104824f211baSChristoph Hellwig return error; 10491da177e4SLinus Torvalds 10501da177e4SLinus Torvalds /* 105192bfc6e7SChristoph Hellwig * Get pointers to the on-disk inode and the buffer containing it. 105276d8b277SChristoph Hellwig */ 1053475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); 105476d8b277SChristoph Hellwig if (error) 105524f211baSChristoph Hellwig return error; 105676d8b277SChristoph Hellwig 1057*93848a99SChristoph Hellwig /* even unallocated inodes are verified */ 1058*93848a99SChristoph Hellwig if (!xfs_dinode_verify(mp, ip, dip)) { 1059*93848a99SChristoph Hellwig xfs_alert(mp, "%s: validation failed for inode %lld failed", 1060*93848a99SChristoph Hellwig __func__, ip->i_ino); 1061*93848a99SChristoph Hellwig 1062*93848a99SChristoph Hellwig XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); 1063*93848a99SChristoph Hellwig error = XFS_ERROR(EFSCORRUPTED); 10649ed0451eSChristoph Hellwig goto out_brelse; 10651da177e4SLinus Torvalds } 10661da177e4SLinus Torvalds 10671da177e4SLinus Torvalds /* 10681da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 10691da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 10701da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 10711da177e4SLinus Torvalds * specific information. 10721da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 10731da177e4SLinus Torvalds */ 107481591fe2SChristoph Hellwig if (dip->di_mode) { 107581591fe2SChristoph Hellwig xfs_dinode_from_disk(&ip->i_d, dip); 10761da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 10771da177e4SLinus Torvalds if (error) { 10781da177e4SLinus Torvalds #ifdef DEBUG 107953487786SDave Chinner xfs_alert(mp, "%s: xfs_iformat() returned error %d", 108053487786SDave Chinner __func__, error); 10811da177e4SLinus Torvalds #endif /* DEBUG */ 10829ed0451eSChristoph Hellwig goto out_brelse; 10831da177e4SLinus Torvalds } 10841da177e4SLinus Torvalds } else { 1085*93848a99SChristoph Hellwig /* 1086*93848a99SChristoph Hellwig * Partial initialisation of the in-core inode. Just the bits 1087*93848a99SChristoph Hellwig * that xfs_ialloc won't overwrite or relies on being correct. 1088*93848a99SChristoph Hellwig */ 108981591fe2SChristoph Hellwig ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 109081591fe2SChristoph Hellwig ip->i_d.di_version = dip->di_version; 109181591fe2SChristoph Hellwig ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 109281591fe2SChristoph Hellwig ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 1093*93848a99SChristoph Hellwig 1094*93848a99SChristoph Hellwig if (dip->di_version == 3) { 1095*93848a99SChristoph Hellwig ip->i_d.di_ino = be64_to_cpu(dip->di_ino); 1096*93848a99SChristoph Hellwig uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); 1097*93848a99SChristoph Hellwig } 1098*93848a99SChristoph Hellwig 10991da177e4SLinus Torvalds /* 11001da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 11011da177e4SLinus Torvalds * case the inode is released without being used. 11021da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 11031da177e4SLinus Torvalds * the inode is already free and not try to mess 11041da177e4SLinus Torvalds * with the uninitialized part of it. 11051da177e4SLinus Torvalds */ 11061da177e4SLinus Torvalds ip->i_d.di_mode = 0; 11071da177e4SLinus Torvalds } 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds /* 11101da177e4SLinus Torvalds * The inode format changed when we moved the link count and 11111da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 11121da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 11131da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 11141da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 11151da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 11161da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 11171da177e4SLinus Torvalds * the new format. We don't change the version number so that we 11181da177e4SLinus Torvalds * can distinguish this from a real new format inode. 11191da177e4SLinus Torvalds */ 112051ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 11211da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 11221da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 11236743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, 0); 11241da177e4SLinus Torvalds } 11251da177e4SLinus Torvalds 11261da177e4SLinus Torvalds ip->i_delayed_blks = 0; 11271da177e4SLinus Torvalds 11281da177e4SLinus Torvalds /* 11291da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 11301da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 11311da177e4SLinus Torvalds * meta-data in-core longer. 11321da177e4SLinus Torvalds */ 1133821eb21dSDave Chinner xfs_buf_set_ref(bp, XFS_INO_REF); 11341da177e4SLinus Torvalds 11351da177e4SLinus Torvalds /* 11361da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 11371da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 1138475ee413SChristoph Hellwig * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal 11391da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 11401da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 11411da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 11421da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 11431da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 11441da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 11451da177e4SLinus Torvalds * to worry about the inode being changed just because we released 11461da177e4SLinus Torvalds * the buffer. 11471da177e4SLinus Torvalds */ 11489ed0451eSChristoph Hellwig out_brelse: 11499ed0451eSChristoph Hellwig xfs_trans_brelse(tp, bp); 11509ed0451eSChristoph Hellwig return error; 11511da177e4SLinus Torvalds } 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds /* 11541da177e4SLinus Torvalds * Read in extents from a btree-format inode. 11551da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 11561da177e4SLinus Torvalds */ 11571da177e4SLinus Torvalds int 11581da177e4SLinus Torvalds xfs_iread_extents( 11591da177e4SLinus Torvalds xfs_trans_t *tp, 11601da177e4SLinus Torvalds xfs_inode_t *ip, 11611da177e4SLinus Torvalds int whichfork) 11621da177e4SLinus Torvalds { 11631da177e4SLinus Torvalds int error; 11641da177e4SLinus Torvalds xfs_ifork_t *ifp; 11654eea22f0SMandy Kirkconnell xfs_extnum_t nextents; 11661da177e4SLinus Torvalds 11671da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 11681da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 11691da177e4SLinus Torvalds ip->i_mount); 11701da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 11711da177e4SLinus Torvalds } 11724eea22f0SMandy Kirkconnell nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 11731da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 11744eea22f0SMandy Kirkconnell 11751da177e4SLinus Torvalds /* 11761da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 11771da177e4SLinus Torvalds */ 11784eea22f0SMandy Kirkconnell ifp->if_bytes = ifp->if_real_bytes = 0; 11791da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 11804eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nextents); 11811da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 11821da177e4SLinus Torvalds if (error) { 11834eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 11841da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 11851da177e4SLinus Torvalds return error; 11861da177e4SLinus Torvalds } 1187a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 11881da177e4SLinus Torvalds return 0; 11891da177e4SLinus Torvalds } 11901da177e4SLinus Torvalds 11911da177e4SLinus Torvalds /* 11921da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 11931da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 11941da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 11951da177e4SLinus Torvalds * set according to the contents of the given cred structure. 11961da177e4SLinus Torvalds * 11971da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 1198cd856db6SCarlos Maiolino * has a free inode available, call xfs_iget() to obtain the in-core 1199cd856db6SCarlos Maiolino * version of the allocated inode. Finally, fill in the inode and 1200cd856db6SCarlos Maiolino * log its initial contents. In this case, ialloc_context would be 1201cd856db6SCarlos Maiolino * set to NULL. 12021da177e4SLinus Torvalds * 1203cd856db6SCarlos Maiolino * If xfs_dialloc() does not have an available inode, it will replenish 1204cd856db6SCarlos Maiolino * its supply by doing an allocation. Since we can only do one 1205cd856db6SCarlos Maiolino * allocation within a transaction without deadlocks, we must commit 1206cd856db6SCarlos Maiolino * the current transaction before returning the inode itself. 1207cd856db6SCarlos Maiolino * In this case, therefore, we will set ialloc_context and return. 12081da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 12091da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 12101da177e4SLinus Torvalds * 12111da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 12121da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 12131da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 12141da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 12151da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 1216b11f94d5SDavid Chinner * 1217b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 1218b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 1219b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 1220b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 12211da177e4SLinus Torvalds */ 12221da177e4SLinus Torvalds int 12231da177e4SLinus Torvalds xfs_ialloc( 12241da177e4SLinus Torvalds xfs_trans_t *tp, 12251da177e4SLinus Torvalds xfs_inode_t *pip, 1226576b1d67SAl Viro umode_t mode, 122731b084aeSNathan Scott xfs_nlink_t nlink, 12281da177e4SLinus Torvalds xfs_dev_t rdev, 12296743099cSArkadiusz Mi?kiewicz prid_t prid, 12301da177e4SLinus Torvalds int okalloc, 12311da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 12321da177e4SLinus Torvalds xfs_inode_t **ipp) 12331da177e4SLinus Torvalds { 1234*93848a99SChristoph Hellwig struct xfs_mount *mp = tp->t_mountp; 12351da177e4SLinus Torvalds xfs_ino_t ino; 12361da177e4SLinus Torvalds xfs_inode_t *ip; 12371da177e4SLinus Torvalds uint flags; 12381da177e4SLinus Torvalds int error; 1239dff35fd4SChristoph Hellwig timespec_t tv; 1240bf904248SDavid Chinner int filestreams = 0; 12411da177e4SLinus Torvalds 12421da177e4SLinus Torvalds /* 12431da177e4SLinus Torvalds * Call the space management code to pick 12441da177e4SLinus Torvalds * the on-disk inode to be allocated. 12451da177e4SLinus Torvalds */ 1246b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 124708358906SChristoph Hellwig ialloc_context, &ino); 1248bf904248SDavid Chinner if (error) 12491da177e4SLinus Torvalds return error; 125008358906SChristoph Hellwig if (*ialloc_context || ino == NULLFSINO) { 12511da177e4SLinus Torvalds *ipp = NULL; 12521da177e4SLinus Torvalds return 0; 12531da177e4SLinus Torvalds } 12541da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 12551da177e4SLinus Torvalds 12561da177e4SLinus Torvalds /* 12571da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 12581da177e4SLinus Torvalds * This is because we're setting fields here we need 12591da177e4SLinus Torvalds * to prevent others from looking at until we're done. 12601da177e4SLinus Torvalds */ 1261*93848a99SChristoph Hellwig error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, 1262ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 1263bf904248SDavid Chinner if (error) 12641da177e4SLinus Torvalds return error; 12651da177e4SLinus Torvalds ASSERT(ip != NULL); 12661da177e4SLinus Torvalds 1267576b1d67SAl Viro ip->i_d.di_mode = mode; 12681da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 12691da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 12701da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 12719e2b2dc4SDavid Howells ip->i_d.di_uid = current_fsuid(); 12729e2b2dc4SDavid Howells ip->i_d.di_gid = current_fsgid(); 12736743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 12741da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 12751da177e4SLinus Torvalds 12761da177e4SLinus Torvalds /* 12771da177e4SLinus Torvalds * If the superblock version is up to where we support new format 12781da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 12791da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 12801da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 12811da177e4SLinus Torvalds */ 1282*93848a99SChristoph Hellwig if (xfs_sb_version_hasnlink(&mp->m_sb) && 128351ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 128451ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 12851da177e4SLinus Torvalds /* 12861da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 12871da177e4SLinus Torvalds * and the pad field. 12881da177e4SLinus Torvalds */ 12891da177e4SLinus Torvalds } 12901da177e4SLinus Torvalds 12911da177e4SLinus Torvalds /* 12921da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 12931da177e4SLinus Torvalds */ 129451ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 12951da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 12961da177e4SLinus Torvalds 1297bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 12981da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 1299abbede1bSAl Viro if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { 13001da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 13011da177e4SLinus Torvalds } 13021da177e4SLinus Torvalds } 13031da177e4SLinus Torvalds 13041da177e4SLinus Torvalds /* 13051da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 13061da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 13071da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 13081da177e4SLinus Torvalds */ 13091da177e4SLinus Torvalds if ((irix_sgid_inherit) && 13101da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 13111da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 13121da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 13131da177e4SLinus Torvalds } 13141da177e4SLinus Torvalds 13151da177e4SLinus Torvalds ip->i_d.di_size = 0; 13161da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 13171da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 1318dff35fd4SChristoph Hellwig 1319dff35fd4SChristoph Hellwig nanotime(&tv); 1320dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1321dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1322dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 1323dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 1324dff35fd4SChristoph Hellwig 13251da177e4SLinus Torvalds /* 13261da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 13271da177e4SLinus Torvalds */ 13281da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 13291da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 13301da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 13311da177e4SLinus Torvalds ip->i_d.di_flags = 0; 1332*93848a99SChristoph Hellwig 1333*93848a99SChristoph Hellwig if (ip->i_d.di_version == 3) { 1334*93848a99SChristoph Hellwig ASSERT(ip->i_d.di_ino == ino); 1335*93848a99SChristoph Hellwig ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid)); 1336*93848a99SChristoph Hellwig ip->i_d.di_crc = 0; 1337*93848a99SChristoph Hellwig ip->i_d.di_changecount = 1; 1338*93848a99SChristoph Hellwig ip->i_d.di_lsn = 0; 1339*93848a99SChristoph Hellwig ip->i_d.di_flags2 = 0; 1340*93848a99SChristoph Hellwig memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2)); 1341*93848a99SChristoph Hellwig ip->i_d.di_crtime = ip->i_d.di_mtime; 1342*93848a99SChristoph Hellwig } 1343*93848a99SChristoph Hellwig 1344*93848a99SChristoph Hellwig 13451da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 13461da177e4SLinus Torvalds switch (mode & S_IFMT) { 13471da177e4SLinus Torvalds case S_IFIFO: 13481da177e4SLinus Torvalds case S_IFCHR: 13491da177e4SLinus Torvalds case S_IFBLK: 13501da177e4SLinus Torvalds case S_IFSOCK: 13511da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 13521da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 13531da177e4SLinus Torvalds ip->i_df.if_flags = 0; 13541da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 13551da177e4SLinus Torvalds break; 13561da177e4SLinus Torvalds case S_IFREG: 1357bf904248SDavid Chinner /* 1358bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 1359bf904248SDavid Chinner * is set up properly. 1360bf904248SDavid Chinner */ 1361bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 1362bf904248SDavid Chinner filestreams = 1; 13632a82b8beSDavid Chinner /* fall through */ 13641da177e4SLinus Torvalds case S_IFDIR: 1365b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1366365ca83dSNathan Scott uint di_flags = 0; 1367365ca83dSNathan Scott 1368abbede1bSAl Viro if (S_ISDIR(mode)) { 1369365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1370365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 1371dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1372dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1373dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1374dd9f438eSNathan Scott } 1375abbede1bSAl Viro } else if (S_ISREG(mode)) { 1376613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1377365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 1378dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1379dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 1380dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1381dd9f438eSNathan Scott } 13821da177e4SLinus Torvalds } 13831da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 13841da177e4SLinus Torvalds xfs_inherit_noatime) 1385365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 13861da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 13871da177e4SLinus Torvalds xfs_inherit_nodump) 1388365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 13891da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 13901da177e4SLinus Torvalds xfs_inherit_sync) 1391365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 13921da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 13931da177e4SLinus Torvalds xfs_inherit_nosymlinks) 1394365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 1395365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1396365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 1397d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1398d3446eacSBarry Naujok xfs_inherit_nodefrag) 1399d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 14002a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 14012a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 1402365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 14031da177e4SLinus Torvalds } 14041da177e4SLinus Torvalds /* FALLTHROUGH */ 14051da177e4SLinus Torvalds case S_IFLNK: 14061da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 14071da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 14081da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 14091da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 14101da177e4SLinus Torvalds break; 14111da177e4SLinus Torvalds default: 14121da177e4SLinus Torvalds ASSERT(0); 14131da177e4SLinus Torvalds } 14141da177e4SLinus Torvalds /* 14151da177e4SLinus Torvalds * Attribute fork settings for new inode. 14161da177e4SLinus Torvalds */ 14171da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 14181da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 14191da177e4SLinus Torvalds 14201da177e4SLinus Torvalds /* 14211da177e4SLinus Torvalds * Log the new values stuffed into the inode. 14221da177e4SLinus Torvalds */ 1423ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 14241da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 14251da177e4SLinus Torvalds 1426b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 142741be8bedSChristoph Hellwig xfs_setup_inode(ip); 14281da177e4SLinus Torvalds 1429bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 1430bf904248SDavid Chinner if (filestreams) { 1431bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 1432bf904248SDavid Chinner if (error < 0) 1433bf904248SDavid Chinner return -error; 1434bf904248SDavid Chinner if (!error) 1435bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 1436bf904248SDavid Chinner } 1437bf904248SDavid Chinner 14381da177e4SLinus Torvalds *ipp = ip; 14391da177e4SLinus Torvalds return 0; 14401da177e4SLinus Torvalds } 14411da177e4SLinus Torvalds 14421da177e4SLinus Torvalds /* 14438f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 14448f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 14458f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 14461da177e4SLinus Torvalds * 1447f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1448f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1449f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1450f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1451f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1452f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1453f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1454f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1455f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 14561da177e4SLinus Torvalds * 1457f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1458f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1459f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1460f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1461f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 14621da177e4SLinus Torvalds */ 14631da177e4SLinus Torvalds int 14648f04c47aSChristoph Hellwig xfs_itruncate_extents( 14658f04c47aSChristoph Hellwig struct xfs_trans **tpp, 14668f04c47aSChristoph Hellwig struct xfs_inode *ip, 14678f04c47aSChristoph Hellwig int whichfork, 14688f04c47aSChristoph Hellwig xfs_fsize_t new_size) 14691da177e4SLinus Torvalds { 14708f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 14718f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 14728f04c47aSChristoph Hellwig struct xfs_trans *ntp; 14738f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 14741da177e4SLinus Torvalds xfs_fsblock_t first_block; 14751da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 14761da177e4SLinus Torvalds xfs_fileoff_t last_block; 14778f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 14781da177e4SLinus Torvalds int committed; 14798f04c47aSChristoph Hellwig int error = 0; 14808f04c47aSChristoph Hellwig int done = 0; 14811da177e4SLinus Torvalds 14820b56185bSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 14830b56185bSChristoph Hellwig ASSERT(!atomic_read(&VFS_I(ip)->i_count) || 14840b56185bSChristoph Hellwig xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1485ce7ae151SChristoph Hellwig ASSERT(new_size <= XFS_ISIZE(ip)); 14868f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 14871da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1488898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 14891da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 14901da177e4SLinus Torvalds 1491673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_start(ip, new_size); 1492673e8e59SChristoph Hellwig 14931da177e4SLinus Torvalds /* 14941da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 14951da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 14961da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 14971da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 14981da177e4SLinus Torvalds * possible file size. If the first block to be removed is 14991da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 15001da177e4SLinus Torvalds * then there is nothing to do. 15011da177e4SLinus Torvalds */ 15028f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 150332972383SDave Chinner last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 15048f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 15058f04c47aSChristoph Hellwig return 0; 15068f04c47aSChristoph Hellwig 15078f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 15081da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 15091da177e4SLinus Torvalds while (!done) { 15109d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 15118f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 15123e57ecf6SOlaf Weber first_unmap_block, unmap_len, 15138f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 15141da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 15153e57ecf6SOlaf Weber &first_block, &free_list, 1516b4e9181eSChristoph Hellwig &done); 15178f04c47aSChristoph Hellwig if (error) 15188f04c47aSChristoph Hellwig goto out_bmap_cancel; 15191da177e4SLinus Torvalds 15201da177e4SLinus Torvalds /* 15211da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 15221da177e4SLinus Torvalds * reservation and commit the old transaction. 15231da177e4SLinus Torvalds */ 15248f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1525898621d5SChristoph Hellwig if (committed) 1526ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 15278f04c47aSChristoph Hellwig if (error) 15288f04c47aSChristoph Hellwig goto out_bmap_cancel; 15291da177e4SLinus Torvalds 15301da177e4SLinus Torvalds if (committed) { 15311da177e4SLinus Torvalds /* 1532f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1533e5720eecSDavid Chinner * moved forward in the log as part of every commit. 15341da177e4SLinus Torvalds */ 15358f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 15361da177e4SLinus Torvalds } 1537f6485057SDavid Chinner 15388f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 15398f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 15408f04c47aSChristoph Hellwig tp = ntp; 1541f6485057SDavid Chinner 1542ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1543f6485057SDavid Chinner 1544cc09c0dcSDave Chinner if (error) 15458f04c47aSChristoph Hellwig goto out; 15468f04c47aSChristoph Hellwig 1547cc09c0dcSDave Chinner /* 15488f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1549cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1550cc09c0dcSDave Chinner */ 15518f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 15528f04c47aSChristoph Hellwig error = xfs_trans_reserve(tp, 0, 1553f6485057SDavid Chinner XFS_ITRUNCATE_LOG_RES(mp), 0, 15541da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 15551da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 15561da177e4SLinus Torvalds if (error) 15578f04c47aSChristoph Hellwig goto out; 15581da177e4SLinus Torvalds } 15598f04c47aSChristoph Hellwig 1560673e8e59SChristoph Hellwig /* 1561673e8e59SChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 1562673e8e59SChristoph Hellwig * on rolling it forward in the log. 1563673e8e59SChristoph Hellwig */ 1564673e8e59SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1565673e8e59SChristoph Hellwig 1566673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_end(ip, new_size); 1567673e8e59SChristoph Hellwig 15688f04c47aSChristoph Hellwig out: 15698f04c47aSChristoph Hellwig *tpp = tp; 15708f04c47aSChristoph Hellwig return error; 15718f04c47aSChristoph Hellwig out_bmap_cancel: 15721da177e4SLinus Torvalds /* 15738f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 15748f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 15758f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 15761da177e4SLinus Torvalds */ 15778f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 15788f04c47aSChristoph Hellwig goto out; 15798f04c47aSChristoph Hellwig } 15808f04c47aSChristoph Hellwig 15811da177e4SLinus Torvalds /* 15821da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 15831da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 15841da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 15851da177e4SLinus Torvalds */ 15861da177e4SLinus Torvalds int 15871da177e4SLinus Torvalds xfs_iunlink( 15881da177e4SLinus Torvalds xfs_trans_t *tp, 15891da177e4SLinus Torvalds xfs_inode_t *ip) 15901da177e4SLinus Torvalds { 15911da177e4SLinus Torvalds xfs_mount_t *mp; 15921da177e4SLinus Torvalds xfs_agi_t *agi; 15931da177e4SLinus Torvalds xfs_dinode_t *dip; 15941da177e4SLinus Torvalds xfs_buf_t *agibp; 15951da177e4SLinus Torvalds xfs_buf_t *ibp; 15961da177e4SLinus Torvalds xfs_agino_t agino; 15971da177e4SLinus Torvalds short bucket_index; 15981da177e4SLinus Torvalds int offset; 15991da177e4SLinus Torvalds int error; 16001da177e4SLinus Torvalds 16011da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 16021da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 16031da177e4SLinus Torvalds 16041da177e4SLinus Torvalds mp = tp->t_mountp; 16051da177e4SLinus Torvalds 16061da177e4SLinus Torvalds /* 16071da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 16081da177e4SLinus Torvalds * on the list. 16091da177e4SLinus Torvalds */ 16105e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1611859d7182SVlad Apostolov if (error) 16121da177e4SLinus Torvalds return error; 16131da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 16145e1be0fbSChristoph Hellwig 16151da177e4SLinus Torvalds /* 16161da177e4SLinus Torvalds * Get the index into the agi hash table for the 16171da177e4SLinus Torvalds * list this inode will go on. 16181da177e4SLinus Torvalds */ 16191da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 16201da177e4SLinus Torvalds ASSERT(agino != 0); 16211da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 16221da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 162316259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 16241da177e4SLinus Torvalds 162569ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 16261da177e4SLinus Torvalds /* 16271da177e4SLinus Torvalds * There is already another inode in the bucket we need 16281da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 16291da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 16301da177e4SLinus Torvalds * and then we fall through to point the head at us. 16311da177e4SLinus Torvalds */ 1632475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1633475ee413SChristoph Hellwig 0, 0); 1634c319b58bSVlad Apostolov if (error) 1635c319b58bSVlad Apostolov return error; 1636c319b58bSVlad Apostolov 163769ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 16381da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 163992bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 16401da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 16411da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 16421da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 16431da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16441da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 16451da177e4SLinus Torvalds } 16461da177e4SLinus Torvalds 16471da177e4SLinus Torvalds /* 16481da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 16491da177e4SLinus Torvalds */ 16501da177e4SLinus Torvalds ASSERT(agino != 0); 165116259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 16521da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 16531da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 16541da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 16551da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16561da177e4SLinus Torvalds return 0; 16571da177e4SLinus Torvalds } 16581da177e4SLinus Torvalds 16591da177e4SLinus Torvalds /* 16601da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 16611da177e4SLinus Torvalds */ 16621da177e4SLinus Torvalds STATIC int 16631da177e4SLinus Torvalds xfs_iunlink_remove( 16641da177e4SLinus Torvalds xfs_trans_t *tp, 16651da177e4SLinus Torvalds xfs_inode_t *ip) 16661da177e4SLinus Torvalds { 16671da177e4SLinus Torvalds xfs_ino_t next_ino; 16681da177e4SLinus Torvalds xfs_mount_t *mp; 16691da177e4SLinus Torvalds xfs_agi_t *agi; 16701da177e4SLinus Torvalds xfs_dinode_t *dip; 16711da177e4SLinus Torvalds xfs_buf_t *agibp; 16721da177e4SLinus Torvalds xfs_buf_t *ibp; 16731da177e4SLinus Torvalds xfs_agnumber_t agno; 16741da177e4SLinus Torvalds xfs_agino_t agino; 16751da177e4SLinus Torvalds xfs_agino_t next_agino; 16761da177e4SLinus Torvalds xfs_buf_t *last_ibp; 16776fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 16781da177e4SLinus Torvalds short bucket_index; 16796fdf8cccSNathan Scott int offset, last_offset = 0; 16801da177e4SLinus Torvalds int error; 16811da177e4SLinus Torvalds 16821da177e4SLinus Torvalds mp = tp->t_mountp; 16831da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 16841da177e4SLinus Torvalds 16851da177e4SLinus Torvalds /* 16861da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 16871da177e4SLinus Torvalds * on the list. 16881da177e4SLinus Torvalds */ 16895e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 16905e1be0fbSChristoph Hellwig if (error) 16911da177e4SLinus Torvalds return error; 16925e1be0fbSChristoph Hellwig 16931da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 16945e1be0fbSChristoph Hellwig 16951da177e4SLinus Torvalds /* 16961da177e4SLinus Torvalds * Get the index into the agi hash table for the 16971da177e4SLinus Torvalds * list this inode will go on. 16981da177e4SLinus Torvalds */ 16991da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 17001da177e4SLinus Torvalds ASSERT(agino != 0); 17011da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 170269ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 17031da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 17041da177e4SLinus Torvalds 170516259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 17061da177e4SLinus Torvalds /* 1707475ee413SChristoph Hellwig * We're at the head of the list. Get the inode's on-disk 1708475ee413SChristoph Hellwig * buffer to see if there is anyone after us on the list. 1709475ee413SChristoph Hellwig * Only modify our next pointer if it is not already NULLAGINO. 1710475ee413SChristoph Hellwig * This saves us the overhead of dealing with the buffer when 1711475ee413SChristoph Hellwig * there is no need to change it. 17121da177e4SLinus Torvalds */ 1713475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1714475ee413SChristoph Hellwig 0, 0); 17151da177e4SLinus Torvalds if (error) { 1716475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", 17170b932cccSDave Chinner __func__, error); 17181da177e4SLinus Torvalds return error; 17191da177e4SLinus Torvalds } 1720347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 17211da177e4SLinus Torvalds ASSERT(next_agino != 0); 17221da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1723347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 172492bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 17251da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 17261da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 17271da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 17281da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 17291da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 17301da177e4SLinus Torvalds } else { 17311da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 17321da177e4SLinus Torvalds } 17331da177e4SLinus Torvalds /* 17341da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 17351da177e4SLinus Torvalds */ 17361da177e4SLinus Torvalds ASSERT(next_agino != 0); 17371da177e4SLinus Torvalds ASSERT(next_agino != agino); 173816259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 17391da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 17401da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 17411da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 17421da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 17431da177e4SLinus Torvalds } else { 17441da177e4SLinus Torvalds /* 17451da177e4SLinus Torvalds * We need to search the list for the inode being freed. 17461da177e4SLinus Torvalds */ 174716259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 17481da177e4SLinus Torvalds last_ibp = NULL; 17491da177e4SLinus Torvalds while (next_agino != agino) { 1750129dbc9aSChristoph Hellwig struct xfs_imap imap; 1751129dbc9aSChristoph Hellwig 1752129dbc9aSChristoph Hellwig if (last_ibp) 17531da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 1754129dbc9aSChristoph Hellwig 1755129dbc9aSChristoph Hellwig imap.im_blkno = 0; 17561da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 1757129dbc9aSChristoph Hellwig 1758129dbc9aSChristoph Hellwig error = xfs_imap(mp, tp, next_ino, &imap, 0); 17591da177e4SLinus Torvalds if (error) { 17600b932cccSDave Chinner xfs_warn(mp, 1761129dbc9aSChristoph Hellwig "%s: xfs_imap returned error %d.", 17620b932cccSDave Chinner __func__, error); 17631da177e4SLinus Torvalds return error; 17641da177e4SLinus Torvalds } 1765129dbc9aSChristoph Hellwig 1766129dbc9aSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, 1767129dbc9aSChristoph Hellwig &last_ibp, 0, 0); 1768129dbc9aSChristoph Hellwig if (error) { 1769129dbc9aSChristoph Hellwig xfs_warn(mp, 1770129dbc9aSChristoph Hellwig "%s: xfs_imap_to_bp returned error %d.", 1771129dbc9aSChristoph Hellwig __func__, error); 1772129dbc9aSChristoph Hellwig return error; 1773129dbc9aSChristoph Hellwig } 1774129dbc9aSChristoph Hellwig 1775129dbc9aSChristoph Hellwig last_offset = imap.im_boffset; 1776347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 17771da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 17781da177e4SLinus Torvalds ASSERT(next_agino != 0); 17791da177e4SLinus Torvalds } 1780475ee413SChristoph Hellwig 17811da177e4SLinus Torvalds /* 1782475ee413SChristoph Hellwig * Now last_ibp points to the buffer previous to us on the 1783475ee413SChristoph Hellwig * unlinked list. Pull us from the list. 17841da177e4SLinus Torvalds */ 1785475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 1786475ee413SChristoph Hellwig 0, 0); 17871da177e4SLinus Torvalds if (error) { 1788475ee413SChristoph Hellwig xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", 17890b932cccSDave Chinner __func__, error); 17901da177e4SLinus Torvalds return error; 17911da177e4SLinus Torvalds } 1792347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 17931da177e4SLinus Torvalds ASSERT(next_agino != 0); 17941da177e4SLinus Torvalds ASSERT(next_agino != agino); 17951da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1796347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 179792bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 17981da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 17991da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 18001da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 18011da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 18021da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 18031da177e4SLinus Torvalds } else { 18041da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 18051da177e4SLinus Torvalds } 18061da177e4SLinus Torvalds /* 18071da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 18081da177e4SLinus Torvalds */ 1809347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 18101da177e4SLinus Torvalds ASSERT(next_agino != 0); 18111da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 18121da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 18131da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 18141da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 18151da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 18161da177e4SLinus Torvalds } 18171da177e4SLinus Torvalds return 0; 18181da177e4SLinus Torvalds } 18191da177e4SLinus Torvalds 18205b3eed75SDave Chinner /* 18215b3eed75SDave Chinner * A big issue when freeing the inode cluster is is that we _cannot_ skip any 18225b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 18235b3eed75SDave Chinner * the cluster buffer. 18245b3eed75SDave Chinner */ 18252a30f36dSChandra Seetharaman STATIC int 18261da177e4SLinus Torvalds xfs_ifree_cluster( 18271da177e4SLinus Torvalds xfs_inode_t *free_ip, 18281da177e4SLinus Torvalds xfs_trans_t *tp, 18291da177e4SLinus Torvalds xfs_ino_t inum) 18301da177e4SLinus Torvalds { 18311da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 18321da177e4SLinus Torvalds int blks_per_cluster; 18331da177e4SLinus Torvalds int nbufs; 18341da177e4SLinus Torvalds int ninodes; 18355b257b4aSDave Chinner int i, j; 18361da177e4SLinus Torvalds xfs_daddr_t blkno; 18371da177e4SLinus Torvalds xfs_buf_t *bp; 18385b257b4aSDave Chinner xfs_inode_t *ip; 18391da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 18401da177e4SLinus Torvalds xfs_log_item_t *lip; 18415017e97dSDave Chinner struct xfs_perag *pag; 18421da177e4SLinus Torvalds 18435017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 18441da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 18451da177e4SLinus Torvalds blks_per_cluster = 1; 18461da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 18471da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 18481da177e4SLinus Torvalds } else { 18491da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 18501da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 18511da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 18521da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 18531da177e4SLinus Torvalds } 18541da177e4SLinus Torvalds 18551da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 18561da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 18571da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 18581da177e4SLinus Torvalds 18591da177e4SLinus Torvalds /* 18605b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 18615b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 18625b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 18635b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 18645b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 18655b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 18661da177e4SLinus Torvalds */ 18671da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1868b6aff29fSDave Chinner mp->m_bsize * blks_per_cluster, 1869b6aff29fSDave Chinner XBF_UNMAPPED); 18701da177e4SLinus Torvalds 18712a30f36dSChandra Seetharaman if (!bp) 18722a30f36dSChandra Seetharaman return ENOMEM; 1873b0f539deSDave Chinner 1874b0f539deSDave Chinner /* 1875b0f539deSDave Chinner * This buffer may not have been correctly initialised as we 1876b0f539deSDave Chinner * didn't read it from disk. That's not important because we are 1877b0f539deSDave Chinner * only using to mark the buffer as stale in the log, and to 1878b0f539deSDave Chinner * attach stale cached inodes on it. That means it will never be 1879b0f539deSDave Chinner * dispatched for IO. If it is, we want to know about it, and we 1880b0f539deSDave Chinner * want it to fail. We can acheive this by adding a write 1881b0f539deSDave Chinner * verifier to the buffer. 1882b0f539deSDave Chinner */ 18831813dd64SDave Chinner bp->b_ops = &xfs_inode_buf_ops; 1884b0f539deSDave Chinner 18855b257b4aSDave Chinner /* 18865b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 18875b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 18885b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 18895b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 18905b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 18915b257b4aSDave Chinner */ 1892adadbeefSChristoph Hellwig lip = bp->b_fspriv; 18931da177e4SLinus Torvalds while (lip) { 18941da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 18951da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 18961da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 1897ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 18987b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 18997b2e2a31SDavid Chinner &iip->ili_flush_lsn, 19007b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 1901e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 19021da177e4SLinus Torvalds } 19031da177e4SLinus Torvalds lip = lip->li_bio_list; 19041da177e4SLinus Torvalds } 19051da177e4SLinus Torvalds 19065b3eed75SDave Chinner 19075b257b4aSDave Chinner /* 19085b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 19095b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 19105b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 19115b257b4aSDave Chinner * and flushing by locking the buffer. 19125b257b4aSDave Chinner * 19135b257b4aSDave Chinner * We have already marked every inode that was part of a 19145b257b4aSDave Chinner * transaction stale above, which means there is no point in 19155b257b4aSDave Chinner * even trying to lock them. 19165b257b4aSDave Chinner */ 19175b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 19185b3eed75SDave Chinner retry: 19191a3e8f3dSDave Chinner rcu_read_lock(); 19205b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 19215b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 19221da177e4SLinus Torvalds 19231a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 19241a3e8f3dSDave Chinner if (!ip) { 19251a3e8f3dSDave Chinner rcu_read_unlock(); 19265b257b4aSDave Chinner continue; 19275b257b4aSDave Chinner } 19285b257b4aSDave Chinner 19295b3eed75SDave Chinner /* 19301a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 19311a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 19321a3e8f3dSDave Chinner * during the lookup. We need to check under the 19331a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 19341a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 19351a3e8f3dSDave Chinner */ 19361a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 19371a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 19381a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 19391a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 19401a3e8f3dSDave Chinner rcu_read_unlock(); 19411a3e8f3dSDave Chinner continue; 19421a3e8f3dSDave Chinner } 19431a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 19441a3e8f3dSDave Chinner 19451a3e8f3dSDave Chinner /* 19465b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 19475b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 19485b3eed75SDave Chinner * in the list attached to the buffer and are not 19495b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 19505b3eed75SDave Chinner * and retry. 19515b3eed75SDave Chinner */ 19525b257b4aSDave Chinner if (ip != free_ip && 19535b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 19541a3e8f3dSDave Chinner rcu_read_unlock(); 19555b3eed75SDave Chinner delay(1); 19565b3eed75SDave Chinner goto retry; 19575b257b4aSDave Chinner } 19581a3e8f3dSDave Chinner rcu_read_unlock(); 19595b257b4aSDave Chinner 19605b3eed75SDave Chinner xfs_iflock(ip); 19615b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 19625b257b4aSDave Chinner 19635b3eed75SDave Chinner /* 19645b3eed75SDave Chinner * we don't need to attach clean inodes or those only 19655b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 19665b3eed75SDave Chinner */ 19675b257b4aSDave Chinner iip = ip->i_itemp; 19685b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 19695b257b4aSDave Chinner ASSERT(ip != free_ip); 19701da177e4SLinus Torvalds xfs_ifunlock(ip); 19711da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 19721da177e4SLinus Torvalds continue; 19731da177e4SLinus Torvalds } 19741da177e4SLinus Torvalds 1975f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 1976f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 19771da177e4SLinus Torvalds iip->ili_logged = 1; 19787b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 19797b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 19801da177e4SLinus Torvalds 1981ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 1982ca30b2a7SChristoph Hellwig &iip->ili_item); 19835b257b4aSDave Chinner 19845b257b4aSDave Chinner if (ip != free_ip) 19851da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 19861da177e4SLinus Torvalds } 19871da177e4SLinus Torvalds 19881da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 19891da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 19901da177e4SLinus Torvalds } 19911da177e4SLinus Torvalds 19925017e97dSDave Chinner xfs_perag_put(pag); 19932a30f36dSChandra Seetharaman return 0; 19941da177e4SLinus Torvalds } 19951da177e4SLinus Torvalds 19961da177e4SLinus Torvalds /* 19971da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 19981da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 19991da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 20001da177e4SLinus Torvalds * the inode is already a part of the transaction. 20011da177e4SLinus Torvalds * 20021da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 20031da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 20041da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 20051da177e4SLinus Torvalds */ 20061da177e4SLinus Torvalds int 20071da177e4SLinus Torvalds xfs_ifree( 20081da177e4SLinus Torvalds xfs_trans_t *tp, 20091da177e4SLinus Torvalds xfs_inode_t *ip, 20101da177e4SLinus Torvalds xfs_bmap_free_t *flist) 20111da177e4SLinus Torvalds { 20121da177e4SLinus Torvalds int error; 20131da177e4SLinus Torvalds int delete; 20141da177e4SLinus Torvalds xfs_ino_t first_ino; 2015c319b58bSVlad Apostolov xfs_dinode_t *dip; 2016c319b58bSVlad Apostolov xfs_buf_t *ibp; 20171da177e4SLinus Torvalds 2018579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 20191da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 20201da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 20211da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 2022ce7ae151SChristoph Hellwig ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode)); 20231da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 20241da177e4SLinus Torvalds 20251da177e4SLinus Torvalds /* 20261da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 20271da177e4SLinus Torvalds */ 20281da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 20291da177e4SLinus Torvalds if (error != 0) { 20301da177e4SLinus Torvalds return error; 20311da177e4SLinus Torvalds } 20321da177e4SLinus Torvalds 20331da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 20341da177e4SLinus Torvalds if (error != 0) { 20351da177e4SLinus Torvalds return error; 20361da177e4SLinus Torvalds } 20371da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 20381da177e4SLinus Torvalds ip->i_d.di_flags = 0; 20391da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 20401da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 20411da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 20421da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 20431da177e4SLinus Torvalds /* 20441da177e4SLinus Torvalds * Bump the generation count so no one will be confused 20451da177e4SLinus Torvalds * by reincarnations of this inode. 20461da177e4SLinus Torvalds */ 20471da177e4SLinus Torvalds ip->i_d.di_gen++; 2048c319b58bSVlad Apostolov 20491da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 20501da177e4SLinus Torvalds 2051475ee413SChristoph Hellwig error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, 2052475ee413SChristoph Hellwig 0, 0); 2053c319b58bSVlad Apostolov if (error) 2054c319b58bSVlad Apostolov return error; 2055c319b58bSVlad Apostolov 2056c319b58bSVlad Apostolov /* 2057c319b58bSVlad Apostolov * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 2058c319b58bSVlad Apostolov * from picking up this inode when it is reclaimed (its incore state 2059c319b58bSVlad Apostolov * initialzed but not flushed to disk yet). The in-core di_mode is 2060c319b58bSVlad Apostolov * already cleared and a corresponding transaction logged. 2061c319b58bSVlad Apostolov * The hack here just synchronizes the in-core to on-disk 2062c319b58bSVlad Apostolov * di_mode value in advance before the actual inode sync to disk. 2063c319b58bSVlad Apostolov * This is OK because the inode is already unlinked and would never 2064c319b58bSVlad Apostolov * change its di_mode again for this inode generation. 2065c319b58bSVlad Apostolov * This is a temporary hack that would require a proper fix 2066c319b58bSVlad Apostolov * in the future. 2067c319b58bSVlad Apostolov */ 206881591fe2SChristoph Hellwig dip->di_mode = 0; 2069c319b58bSVlad Apostolov 20701da177e4SLinus Torvalds if (delete) { 20712a30f36dSChandra Seetharaman error = xfs_ifree_cluster(ip, tp, first_ino); 20721da177e4SLinus Torvalds } 20731da177e4SLinus Torvalds 20742a30f36dSChandra Seetharaman return error; 20751da177e4SLinus Torvalds } 20761da177e4SLinus Torvalds 20771da177e4SLinus Torvalds /* 20781da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 20791da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 20801da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 20811da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 20821da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 20831da177e4SLinus Torvalds * by the caller. 20841da177e4SLinus Torvalds * 20851da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 20861da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 20871da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 20881da177e4SLinus Torvalds * not request that the number of records go below zero, although 20891da177e4SLinus Torvalds * it can go to zero. 20901da177e4SLinus Torvalds * 20911da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 20921da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 20931da177e4SLinus Torvalds * requested for the if_broot array. 20941da177e4SLinus Torvalds */ 20951da177e4SLinus Torvalds void 20961da177e4SLinus Torvalds xfs_iroot_realloc( 20971da177e4SLinus Torvalds xfs_inode_t *ip, 20981da177e4SLinus Torvalds int rec_diff, 20991da177e4SLinus Torvalds int whichfork) 21001da177e4SLinus Torvalds { 210160197e8dSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 21021da177e4SLinus Torvalds int cur_max; 21031da177e4SLinus Torvalds xfs_ifork_t *ifp; 21047cc95a82SChristoph Hellwig struct xfs_btree_block *new_broot; 21051da177e4SLinus Torvalds int new_max; 21061da177e4SLinus Torvalds size_t new_size; 21071da177e4SLinus Torvalds char *np; 21081da177e4SLinus Torvalds char *op; 21091da177e4SLinus Torvalds 21101da177e4SLinus Torvalds /* 21111da177e4SLinus Torvalds * Handle the degenerate case quietly. 21121da177e4SLinus Torvalds */ 21131da177e4SLinus Torvalds if (rec_diff == 0) { 21141da177e4SLinus Torvalds return; 21151da177e4SLinus Torvalds } 21161da177e4SLinus Torvalds 21171da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 21181da177e4SLinus Torvalds if (rec_diff > 0) { 21191da177e4SLinus Torvalds /* 21201da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 21211da177e4SLinus Torvalds * allocate it now and get out. 21221da177e4SLinus Torvalds */ 21231da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 2124ee1a47abSChristoph Hellwig new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); 21254a7edddcSDave Chinner ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 21261da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 21271da177e4SLinus Torvalds return; 21281da177e4SLinus Torvalds } 21291da177e4SLinus Torvalds 21301da177e4SLinus Torvalds /* 21311da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 21321da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 21331da177e4SLinus Torvalds * location. The records don't change location because 21341da177e4SLinus Torvalds * they are kept butted up against the btree block header. 21351da177e4SLinus Torvalds */ 213660197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 21371da177e4SLinus Torvalds new_max = cur_max + rec_diff; 2138ee1a47abSChristoph Hellwig new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); 21397cc95a82SChristoph Hellwig ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 2140ee1a47abSChristoph Hellwig XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max), 21414a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 214260197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 21431da177e4SLinus Torvalds ifp->if_broot_bytes); 214460197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 21451da177e4SLinus Torvalds (int)new_size); 21461da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 21471da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 2148ee1a47abSChristoph Hellwig XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); 21491da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 21501da177e4SLinus Torvalds return; 21511da177e4SLinus Torvalds } 21521da177e4SLinus Torvalds 21531da177e4SLinus Torvalds /* 21541da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 21551da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 21561da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 21571da177e4SLinus Torvalds */ 21581da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 215960197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 21601da177e4SLinus Torvalds new_max = cur_max + rec_diff; 21611da177e4SLinus Torvalds ASSERT(new_max >= 0); 21621da177e4SLinus Torvalds if (new_max > 0) 2163ee1a47abSChristoph Hellwig new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); 21641da177e4SLinus Torvalds else 21651da177e4SLinus Torvalds new_size = 0; 21661da177e4SLinus Torvalds if (new_size > 0) { 21674a7edddcSDave Chinner new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 21681da177e4SLinus Torvalds /* 21691da177e4SLinus Torvalds * First copy over the btree block header. 21701da177e4SLinus Torvalds */ 2171ee1a47abSChristoph Hellwig memcpy(new_broot, ifp->if_broot, 2172ee1a47abSChristoph Hellwig XFS_BMBT_BLOCK_LEN(ip->i_mount)); 21731da177e4SLinus Torvalds } else { 21741da177e4SLinus Torvalds new_broot = NULL; 21751da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 21761da177e4SLinus Torvalds } 21771da177e4SLinus Torvalds 21781da177e4SLinus Torvalds /* 21791da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 21801da177e4SLinus Torvalds */ 21811da177e4SLinus Torvalds if (new_max > 0) { 21821da177e4SLinus Torvalds /* 21831da177e4SLinus Torvalds * First copy the records. 21841da177e4SLinus Torvalds */ 2185136341b4SChristoph Hellwig op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 2186136341b4SChristoph Hellwig np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 21871da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 21881da177e4SLinus Torvalds 21891da177e4SLinus Torvalds /* 21901da177e4SLinus Torvalds * Then copy the pointers. 21911da177e4SLinus Torvalds */ 219260197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 21931da177e4SLinus Torvalds ifp->if_broot_bytes); 219460197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 21951da177e4SLinus Torvalds (int)new_size); 21961da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 21971da177e4SLinus Torvalds } 2198f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 21991da177e4SLinus Torvalds ifp->if_broot = new_broot; 22001da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 22011da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 2202ee1a47abSChristoph Hellwig XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); 22031da177e4SLinus Torvalds return; 22041da177e4SLinus Torvalds } 22051da177e4SLinus Torvalds 22061da177e4SLinus Torvalds 22071da177e4SLinus Torvalds /* 22081da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 22091da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 22101da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 22111da177e4SLinus Torvalds * byte_diff parameter. 22121da177e4SLinus Torvalds * 22131da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 22141da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 22151da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 22161da177e4SLinus Torvalds * to what is needed. 22171da177e4SLinus Torvalds * 22181da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 22191da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 22201da177e4SLinus Torvalds * requested for the if_data array. 22211da177e4SLinus Torvalds */ 22221da177e4SLinus Torvalds void 22231da177e4SLinus Torvalds xfs_idata_realloc( 22241da177e4SLinus Torvalds xfs_inode_t *ip, 22251da177e4SLinus Torvalds int byte_diff, 22261da177e4SLinus Torvalds int whichfork) 22271da177e4SLinus Torvalds { 22281da177e4SLinus Torvalds xfs_ifork_t *ifp; 22291da177e4SLinus Torvalds int new_size; 22301da177e4SLinus Torvalds int real_size; 22311da177e4SLinus Torvalds 22321da177e4SLinus Torvalds if (byte_diff == 0) { 22331da177e4SLinus Torvalds return; 22341da177e4SLinus Torvalds } 22351da177e4SLinus Torvalds 22361da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 22371da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 22381da177e4SLinus Torvalds ASSERT(new_size >= 0); 22391da177e4SLinus Torvalds 22401da177e4SLinus Torvalds if (new_size == 0) { 22411da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2242f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 22431da177e4SLinus Torvalds } 22441da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 22451da177e4SLinus Torvalds real_size = 0; 22461da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 22471da177e4SLinus Torvalds /* 22481da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 22491da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 22501da177e4SLinus Torvalds */ 22511da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 22521da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 22531da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 22541da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 22551da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 22561da177e4SLinus Torvalds new_size); 2257f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 22581da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 22591da177e4SLinus Torvalds } 22601da177e4SLinus Torvalds real_size = 0; 22611da177e4SLinus Torvalds } else { 22621da177e4SLinus Torvalds /* 22631da177e4SLinus Torvalds * Stuck with malloc/realloc. 22641da177e4SLinus Torvalds * For inline data, the underlying buffer must be 22651da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 22661da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 22671da177e4SLinus Torvalds * that here. 22681da177e4SLinus Torvalds */ 22691da177e4SLinus Torvalds real_size = roundup(new_size, 4); 22701da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 22711da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 22724a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 22734a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 22741da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 22751da177e4SLinus Torvalds /* 22761da177e4SLinus Torvalds * Only do the realloc if the underlying size 22771da177e4SLinus Torvalds * is really changing. 22781da177e4SLinus Torvalds */ 22791da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 22801da177e4SLinus Torvalds ifp->if_u1.if_data = 22811da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 22821da177e4SLinus Torvalds real_size, 22831da177e4SLinus Torvalds ifp->if_real_bytes, 22844a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 22851da177e4SLinus Torvalds } 22861da177e4SLinus Torvalds } else { 22871da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 22884a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 22894a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 22901da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 22911da177e4SLinus Torvalds ifp->if_bytes); 22921da177e4SLinus Torvalds } 22931da177e4SLinus Torvalds } 22941da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 22951da177e4SLinus Torvalds ifp->if_bytes = new_size; 22961da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 22971da177e4SLinus Torvalds } 22981da177e4SLinus Torvalds 22991da177e4SLinus Torvalds void 23001da177e4SLinus Torvalds xfs_idestroy_fork( 23011da177e4SLinus Torvalds xfs_inode_t *ip, 23021da177e4SLinus Torvalds int whichfork) 23031da177e4SLinus Torvalds { 23041da177e4SLinus Torvalds xfs_ifork_t *ifp; 23051da177e4SLinus Torvalds 23061da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 23071da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 2308f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 23091da177e4SLinus Torvalds ifp->if_broot = NULL; 23101da177e4SLinus Torvalds } 23111da177e4SLinus Torvalds 23121da177e4SLinus Torvalds /* 23131da177e4SLinus Torvalds * If the format is local, then we can't have an extents 23141da177e4SLinus Torvalds * array so just look for an inline data array. If we're 23151da177e4SLinus Torvalds * not local then we may or may not have an extents list, 23161da177e4SLinus Torvalds * so check and free it up if we do. 23171da177e4SLinus Torvalds */ 23181da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 23191da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 23201da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 23211da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 2322f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 23231da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 23241da177e4SLinus Torvalds ifp->if_real_bytes = 0; 23251da177e4SLinus Torvalds } 23261da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 23270293ce3aSMandy Kirkconnell ((ifp->if_flags & XFS_IFEXTIREC) || 23280293ce3aSMandy Kirkconnell ((ifp->if_u1.if_extents != NULL) && 23290293ce3aSMandy Kirkconnell (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 23301da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 23314eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 23321da177e4SLinus Torvalds } 23331da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 23341da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 23351da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 23361da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 23371da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 23381da177e4SLinus Torvalds ip->i_afp = NULL; 23391da177e4SLinus Torvalds } 23401da177e4SLinus Torvalds } 23411da177e4SLinus Torvalds 23421da177e4SLinus Torvalds /* 234360ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 234460ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 234560ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 23461da177e4SLinus Torvalds */ 234760ec6783SChristoph Hellwig static void 2348f392e631SChristoph Hellwig xfs_iunpin( 234960ec6783SChristoph Hellwig struct xfs_inode *ip) 2350a3f74ffbSDavid Chinner { 2351579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2352a3f74ffbSDavid Chinner 23534aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 23544aaf15d1SDave Chinner 2355a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 235660ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2357a14a348bSChristoph Hellwig 2358a3f74ffbSDavid Chinner } 2359a3f74ffbSDavid Chinner 2360f392e631SChristoph Hellwig static void 2361f392e631SChristoph Hellwig __xfs_iunpin_wait( 2362f392e631SChristoph Hellwig struct xfs_inode *ip) 2363f392e631SChristoph Hellwig { 2364f392e631SChristoph Hellwig wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT); 2365f392e631SChristoph Hellwig DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT); 2366f392e631SChristoph Hellwig 2367f392e631SChristoph Hellwig xfs_iunpin(ip); 2368f392e631SChristoph Hellwig 2369f392e631SChristoph Hellwig do { 2370f392e631SChristoph Hellwig prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2371f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2372f392e631SChristoph Hellwig io_schedule(); 2373f392e631SChristoph Hellwig } while (xfs_ipincount(ip)); 2374f392e631SChristoph Hellwig finish_wait(wq, &wait.wait); 2375f392e631SChristoph Hellwig } 2376f392e631SChristoph Hellwig 2377777df5afSDave Chinner void 23781da177e4SLinus Torvalds xfs_iunpin_wait( 237960ec6783SChristoph Hellwig struct xfs_inode *ip) 23801da177e4SLinus Torvalds { 2381f392e631SChristoph Hellwig if (xfs_ipincount(ip)) 2382f392e631SChristoph Hellwig __xfs_iunpin_wait(ip); 23831da177e4SLinus Torvalds } 23841da177e4SLinus Torvalds 23851da177e4SLinus Torvalds /* 23861da177e4SLinus Torvalds * xfs_iextents_copy() 23871da177e4SLinus Torvalds * 23881da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 23891da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 23901da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 23911da177e4SLinus Torvalds * 23921da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 23931da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 23941da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 23951da177e4SLinus Torvalds */ 23961da177e4SLinus Torvalds int 23971da177e4SLinus Torvalds xfs_iextents_copy( 23981da177e4SLinus Torvalds xfs_inode_t *ip, 2399a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp, 24001da177e4SLinus Torvalds int whichfork) 24011da177e4SLinus Torvalds { 24021da177e4SLinus Torvalds int copied; 24031da177e4SLinus Torvalds int i; 24041da177e4SLinus Torvalds xfs_ifork_t *ifp; 24051da177e4SLinus Torvalds int nrecs; 24061da177e4SLinus Torvalds xfs_fsblock_t start_block; 24071da177e4SLinus Torvalds 24081da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 2409579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 24101da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 24111da177e4SLinus Torvalds 24121da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 24133a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 24141da177e4SLinus Torvalds ASSERT(nrecs > 0); 24151da177e4SLinus Torvalds 24161da177e4SLinus Torvalds /* 24171da177e4SLinus Torvalds * There are some delayed allocation extents in the 24181da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 24191da177e4SLinus Torvalds * the delayed ones. There must be at least one 24201da177e4SLinus Torvalds * non-delayed extent. 24211da177e4SLinus Torvalds */ 24221da177e4SLinus Torvalds copied = 0; 24231da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 2424a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 24251da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 24269d87c319SEric Sandeen if (isnullstartblock(start_block)) { 24271da177e4SLinus Torvalds /* 24281da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 24291da177e4SLinus Torvalds */ 24301da177e4SLinus Torvalds continue; 24311da177e4SLinus Torvalds } 24321da177e4SLinus Torvalds 24331da177e4SLinus Torvalds /* Translate to on disk format */ 2434cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2435cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2436a6f64d4aSChristoph Hellwig dp++; 24371da177e4SLinus Torvalds copied++; 24381da177e4SLinus Torvalds } 24391da177e4SLinus Torvalds ASSERT(copied != 0); 2440a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 24411da177e4SLinus Torvalds 24421da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 24431da177e4SLinus Torvalds } 24441da177e4SLinus Torvalds 24451da177e4SLinus Torvalds /* 24461da177e4SLinus Torvalds * Each of the following cases stores data into the same region 24471da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 24481da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 24491da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 24501da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 24511da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 24521da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 24531da177e4SLinus Torvalds * format indicates the current state of the fork. 24541da177e4SLinus Torvalds */ 24551da177e4SLinus Torvalds /*ARGSUSED*/ 2456e4ac967bSDavid Chinner STATIC void 24571da177e4SLinus Torvalds xfs_iflush_fork( 24581da177e4SLinus Torvalds xfs_inode_t *ip, 24591da177e4SLinus Torvalds xfs_dinode_t *dip, 24601da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 24611da177e4SLinus Torvalds int whichfork, 24621da177e4SLinus Torvalds xfs_buf_t *bp) 24631da177e4SLinus Torvalds { 24641da177e4SLinus Torvalds char *cp; 24651da177e4SLinus Torvalds xfs_ifork_t *ifp; 24661da177e4SLinus Torvalds xfs_mount_t *mp; 24671da177e4SLinus Torvalds static const short brootflag[2] = 24681da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 24691da177e4SLinus Torvalds static const short dataflag[2] = 24701da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 24711da177e4SLinus Torvalds static const short extflag[2] = 24721da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 24731da177e4SLinus Torvalds 2474e4ac967bSDavid Chinner if (!iip) 2475e4ac967bSDavid Chinner return; 24761da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 24771da177e4SLinus Torvalds /* 24781da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 24791da177e4SLinus Torvalds * for the attribute fork. 24801da177e4SLinus Torvalds */ 2481e4ac967bSDavid Chinner if (!ifp) { 24821da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 2483e4ac967bSDavid Chinner return; 24841da177e4SLinus Torvalds } 24851da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 24861da177e4SLinus Torvalds mp = ip->i_mount; 24871da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 24881da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 2489f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & dataflag[whichfork]) && 24901da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 24911da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 24921da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 24931da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 24941da177e4SLinus Torvalds } 24951da177e4SLinus Torvalds break; 24961da177e4SLinus Torvalds 24971da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 24981da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 2499f5d8d5c4SChristoph Hellwig !(iip->ili_fields & extflag[whichfork])); 2500f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & extflag[whichfork]) && 25011da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 2502ab1908a5SChristoph Hellwig ASSERT(xfs_iext_get_ext(ifp, 0)); 25031da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 25041da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 25051da177e4SLinus Torvalds whichfork); 25061da177e4SLinus Torvalds } 25071da177e4SLinus Torvalds break; 25081da177e4SLinus Torvalds 25091da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 2510f5d8d5c4SChristoph Hellwig if ((iip->ili_fields & brootflag[whichfork]) && 25111da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 25121da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 25131da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 25141da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 2515ee1a47abSChristoph Hellwig XFS_BROOT_SIZE_ADJ(ip))); 251660197e8dSChristoph Hellwig xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 25171da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 25181da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 25191da177e4SLinus Torvalds } 25201da177e4SLinus Torvalds break; 25211da177e4SLinus Torvalds 25221da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 2523f5d8d5c4SChristoph Hellwig if (iip->ili_fields & XFS_ILOG_DEV) { 25241da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 252581591fe2SChristoph Hellwig xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 25261da177e4SLinus Torvalds } 25271da177e4SLinus Torvalds break; 25281da177e4SLinus Torvalds 25291da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 2530f5d8d5c4SChristoph Hellwig if (iip->ili_fields & XFS_ILOG_UUID) { 25311da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 253281591fe2SChristoph Hellwig memcpy(XFS_DFORK_DPTR(dip), 253381591fe2SChristoph Hellwig &ip->i_df.if_u2.if_uuid, 25341da177e4SLinus Torvalds sizeof(uuid_t)); 25351da177e4SLinus Torvalds } 25361da177e4SLinus Torvalds break; 25371da177e4SLinus Torvalds 25381da177e4SLinus Torvalds default: 25391da177e4SLinus Torvalds ASSERT(0); 25401da177e4SLinus Torvalds break; 25411da177e4SLinus Torvalds } 25421da177e4SLinus Torvalds } 25431da177e4SLinus Torvalds 2544bad55843SDavid Chinner STATIC int 2545bad55843SDavid Chinner xfs_iflush_cluster( 2546bad55843SDavid Chinner xfs_inode_t *ip, 2547bad55843SDavid Chinner xfs_buf_t *bp) 2548bad55843SDavid Chinner { 2549bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 25505017e97dSDave Chinner struct xfs_perag *pag; 2551bad55843SDavid Chinner unsigned long first_index, mask; 2552c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2553bad55843SDavid Chinner int ilist_size; 2554bad55843SDavid Chinner xfs_inode_t **ilist; 2555bad55843SDavid Chinner xfs_inode_t *iq; 2556bad55843SDavid Chinner int nr_found; 2557bad55843SDavid Chinner int clcount = 0; 2558bad55843SDavid Chinner int bufwasdelwri; 2559bad55843SDavid Chinner int i; 2560bad55843SDavid Chinner 25615017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2562bad55843SDavid Chinner 2563c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2564c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 256549383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2566bad55843SDavid Chinner if (!ilist) 256744b56e0aSDave Chinner goto out_put; 2568bad55843SDavid Chinner 2569bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2570bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 25711a3e8f3dSDave Chinner rcu_read_lock(); 2572bad55843SDavid Chinner /* really need a gang lookup range call here */ 2573bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2574c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2575bad55843SDavid Chinner if (nr_found == 0) 2576bad55843SDavid Chinner goto out_free; 2577bad55843SDavid Chinner 2578bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2579bad55843SDavid Chinner iq = ilist[i]; 2580bad55843SDavid Chinner if (iq == ip) 2581bad55843SDavid Chinner continue; 25821a3e8f3dSDave Chinner 25831a3e8f3dSDave Chinner /* 25841a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 25851a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 25861a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 25871a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 25881a3e8f3dSDave Chinner */ 25891a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 25901a3e8f3dSDave Chinner if (!ip->i_ino || 25911a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 25921a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 25931a3e8f3dSDave Chinner continue; 25941a3e8f3dSDave Chinner } 25951a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 25961a3e8f3dSDave Chinner 2597bad55843SDavid Chinner /* 2598bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2599bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2600bad55843SDavid Chinner * later after the appropriate locks are acquired. 2601bad55843SDavid Chinner */ 260233540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2603bad55843SDavid Chinner continue; 2604bad55843SDavid Chinner 2605bad55843SDavid Chinner /* 2606bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2607bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2608bad55843SDavid Chinner */ 2609bad55843SDavid Chinner 2610bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2611bad55843SDavid Chinner continue; 2612bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2613bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2614bad55843SDavid Chinner continue; 2615bad55843SDavid Chinner } 2616bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2617bad55843SDavid Chinner xfs_ifunlock(iq); 2618bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2619bad55843SDavid Chinner continue; 2620bad55843SDavid Chinner } 2621bad55843SDavid Chinner 2622bad55843SDavid Chinner /* 2623bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2624bad55843SDavid Chinner * re-check that it's dirty before flushing. 2625bad55843SDavid Chinner */ 262633540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2627bad55843SDavid Chinner int error; 2628bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2629bad55843SDavid Chinner if (error) { 2630bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2631bad55843SDavid Chinner goto cluster_corrupt_out; 2632bad55843SDavid Chinner } 2633bad55843SDavid Chinner clcount++; 2634bad55843SDavid Chinner } else { 2635bad55843SDavid Chinner xfs_ifunlock(iq); 2636bad55843SDavid Chinner } 2637bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2638bad55843SDavid Chinner } 2639bad55843SDavid Chinner 2640bad55843SDavid Chinner if (clcount) { 2641bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2642bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2643bad55843SDavid Chinner } 2644bad55843SDavid Chinner 2645bad55843SDavid Chinner out_free: 26461a3e8f3dSDave Chinner rcu_read_unlock(); 2647f0e2d93cSDenys Vlasenko kmem_free(ilist); 264844b56e0aSDave Chinner out_put: 264944b56e0aSDave Chinner xfs_perag_put(pag); 2650bad55843SDavid Chinner return 0; 2651bad55843SDavid Chinner 2652bad55843SDavid Chinner 2653bad55843SDavid Chinner cluster_corrupt_out: 2654bad55843SDavid Chinner /* 2655bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2656bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2657bad55843SDavid Chinner */ 26581a3e8f3dSDave Chinner rcu_read_unlock(); 2659bad55843SDavid Chinner /* 266043ff2122SChristoph Hellwig * Clean up the buffer. If it was delwri, just release it -- 2661bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 2662bad55843SDavid Chinner * filesystem before releasing the buffer. 2663bad55843SDavid Chinner */ 266443ff2122SChristoph Hellwig bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); 2665bad55843SDavid Chinner if (bufwasdelwri) 2666bad55843SDavid Chinner xfs_buf_relse(bp); 2667bad55843SDavid Chinner 2668bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2669bad55843SDavid Chinner 2670bad55843SDavid Chinner if (!bufwasdelwri) { 2671bad55843SDavid Chinner /* 2672bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 2673bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 2674bad55843SDavid Chinner * mark it as stale and brelse. 2675bad55843SDavid Chinner */ 2676cb669ca5SChristoph Hellwig if (bp->b_iodone) { 2677bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 2678c867cb61SChristoph Hellwig xfs_buf_stale(bp); 26795a52c2a5SChandra Seetharaman xfs_buf_ioerror(bp, EIO); 26801a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 2681bad55843SDavid Chinner } else { 2682c867cb61SChristoph Hellwig xfs_buf_stale(bp); 2683bad55843SDavid Chinner xfs_buf_relse(bp); 2684bad55843SDavid Chinner } 2685bad55843SDavid Chinner } 2686bad55843SDavid Chinner 2687bad55843SDavid Chinner /* 2688bad55843SDavid Chinner * Unlocks the flush lock 2689bad55843SDavid Chinner */ 269004913fddSDave Chinner xfs_iflush_abort(iq, false); 2691f0e2d93cSDenys Vlasenko kmem_free(ilist); 269244b56e0aSDave Chinner xfs_perag_put(pag); 2693bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 2694bad55843SDavid Chinner } 2695bad55843SDavid Chinner 26961da177e4SLinus Torvalds /* 26974c46819aSChristoph Hellwig * Flush dirty inode metadata into the backing buffer. 26984c46819aSChristoph Hellwig * 26994c46819aSChristoph Hellwig * The caller must have the inode lock and the inode flush lock held. The 27004c46819aSChristoph Hellwig * inode lock will still be held upon return to the caller, and the inode 27014c46819aSChristoph Hellwig * flush lock will be released after the inode has reached the disk. 27024c46819aSChristoph Hellwig * 27034c46819aSChristoph Hellwig * The caller must write out the buffer returned in *bpp and release it. 27041da177e4SLinus Torvalds */ 27051da177e4SLinus Torvalds int 27061da177e4SLinus Torvalds xfs_iflush( 27074c46819aSChristoph Hellwig struct xfs_inode *ip, 27084c46819aSChristoph Hellwig struct xfs_buf **bpp) 27091da177e4SLinus Torvalds { 27104c46819aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 27114c46819aSChristoph Hellwig struct xfs_buf *bp; 27124c46819aSChristoph Hellwig struct xfs_dinode *dip; 27131da177e4SLinus Torvalds int error; 27141da177e4SLinus Torvalds 27151da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 27161da177e4SLinus Torvalds 2717579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2718474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 27191da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 27208096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 27211da177e4SLinus Torvalds 27224c46819aSChristoph Hellwig *bpp = NULL; 27231da177e4SLinus Torvalds 27241da177e4SLinus Torvalds xfs_iunpin_wait(ip); 27251da177e4SLinus Torvalds 27261da177e4SLinus Torvalds /* 27274b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 27284b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 2729475ee413SChristoph Hellwig * xfs_imap_to_bp() below may give us a buffer that no longer contains 27304b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 27314b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 27324b6a4688SDave Chinner * flush call. 27334b6a4688SDave Chinner */ 27344b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 27354b6a4688SDave Chinner xfs_ifunlock(ip); 27364b6a4688SDave Chinner return 0; 27374b6a4688SDave Chinner } 27384b6a4688SDave Chinner 27394b6a4688SDave Chinner /* 27401da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 27411da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 274232ce90a4SChristoph Hellwig * to disk, because the log record didn't make it to disk. 274332ce90a4SChristoph Hellwig * 274432ce90a4SChristoph Hellwig * We also have to remove the log item from the AIL in this case, 274532ce90a4SChristoph Hellwig * as we wait for an empty AIL as part of the unmount process. 27461da177e4SLinus Torvalds */ 27471da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 274832ce90a4SChristoph Hellwig error = XFS_ERROR(EIO); 274932ce90a4SChristoph Hellwig goto abort_out; 27501da177e4SLinus Torvalds } 27511da177e4SLinus Torvalds 27521da177e4SLinus Torvalds /* 2753a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 2754a3f74ffbSDavid Chinner */ 2755475ee413SChristoph Hellwig error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 2756475ee413SChristoph Hellwig 0); 2757a3f74ffbSDavid Chinner if (error || !bp) { 2758a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2759a3f74ffbSDavid Chinner return error; 2760a3f74ffbSDavid Chinner } 2761a3f74ffbSDavid Chinner 2762a3f74ffbSDavid Chinner /* 27631da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 27641da177e4SLinus Torvalds */ 27651da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 2766bad55843SDavid Chinner if (error) 27671da177e4SLinus Torvalds goto corrupt_out; 27681da177e4SLinus Torvalds 27691da177e4SLinus Torvalds /* 2770a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 2771a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 2772a3f74ffbSDavid Chinner */ 2773811e64c7SChandra Seetharaman if (xfs_buf_ispinned(bp)) 2774a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 2775a3f74ffbSDavid Chinner 2776a3f74ffbSDavid Chinner /* 27771da177e4SLinus Torvalds * inode clustering: 27781da177e4SLinus Torvalds * see if other inodes can be gathered into this write 27791da177e4SLinus Torvalds */ 2780bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 2781bad55843SDavid Chinner if (error) 27821da177e4SLinus Torvalds goto cluster_corrupt_out; 27831da177e4SLinus Torvalds 27844c46819aSChristoph Hellwig *bpp = bp; 27854c46819aSChristoph Hellwig return 0; 27861da177e4SLinus Torvalds 27871da177e4SLinus Torvalds corrupt_out: 27881da177e4SLinus Torvalds xfs_buf_relse(bp); 27897d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 27901da177e4SLinus Torvalds cluster_corrupt_out: 279132ce90a4SChristoph Hellwig error = XFS_ERROR(EFSCORRUPTED); 279232ce90a4SChristoph Hellwig abort_out: 27931da177e4SLinus Torvalds /* 27941da177e4SLinus Torvalds * Unlocks the flush lock 27951da177e4SLinus Torvalds */ 279604913fddSDave Chinner xfs_iflush_abort(ip, false); 279732ce90a4SChristoph Hellwig return error; 27981da177e4SLinus Torvalds } 27991da177e4SLinus Torvalds 28001da177e4SLinus Torvalds 28011da177e4SLinus Torvalds STATIC int 28021da177e4SLinus Torvalds xfs_iflush_int( 2803*93848a99SChristoph Hellwig struct xfs_inode *ip, 2804*93848a99SChristoph Hellwig struct xfs_buf *bp) 28051da177e4SLinus Torvalds { 2806*93848a99SChristoph Hellwig struct xfs_inode_log_item *iip = ip->i_itemp; 2807*93848a99SChristoph Hellwig struct xfs_dinode *dip; 2808*93848a99SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 28091da177e4SLinus Torvalds 2810579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2811474fce06SChristoph Hellwig ASSERT(xfs_isiflocked(ip)); 28121da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 28138096b1ebSChristoph Hellwig ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2814*93848a99SChristoph Hellwig ASSERT(iip != NULL && iip->ili_fields != 0); 28151da177e4SLinus Torvalds 28161da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 281792bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 28181da177e4SLinus Torvalds 281969ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 28201da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 28216a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28226a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 28236a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 28241da177e4SLinus Torvalds goto corrupt_out; 28251da177e4SLinus Torvalds } 28261da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 28271da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 28286a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28296a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 28306a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 28311da177e4SLinus Torvalds goto corrupt_out; 28321da177e4SLinus Torvalds } 2833abbede1bSAl Viro if (S_ISREG(ip->i_d.di_mode)) { 28341da177e4SLinus Torvalds if (XFS_TEST_ERROR( 28351da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 28361da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 28371da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 28386a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28396a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 28406a19d939SDave Chinner __func__, ip->i_ino, ip); 28411da177e4SLinus Torvalds goto corrupt_out; 28421da177e4SLinus Torvalds } 2843abbede1bSAl Viro } else if (S_ISDIR(ip->i_d.di_mode)) { 28441da177e4SLinus Torvalds if (XFS_TEST_ERROR( 28451da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 28461da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 28471da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 28481da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 28496a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28506a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 28516a19d939SDave Chinner __func__, ip->i_ino, ip); 28521da177e4SLinus Torvalds goto corrupt_out; 28531da177e4SLinus Torvalds } 28541da177e4SLinus Torvalds } 28551da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 28561da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 28571da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 28586a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28596a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 28606a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 28616a19d939SDave Chinner __func__, ip->i_ino, 28621da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 28636a19d939SDave Chinner ip->i_d.di_nblocks, ip); 28641da177e4SLinus Torvalds goto corrupt_out; 28651da177e4SLinus Torvalds } 28661da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 28671da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 28686a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 28696a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 28706a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 28711da177e4SLinus Torvalds goto corrupt_out; 28721da177e4SLinus Torvalds } 28731da177e4SLinus Torvalds /* 28741da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 2875*93848a99SChristoph Hellwig * postdate a log record during recovery. This is redundant as we now 2876*93848a99SChristoph Hellwig * log every change and hence this can't happen. Still, it doesn't hurt. 28771da177e4SLinus Torvalds */ 28781da177e4SLinus Torvalds ip->i_d.di_flushiter++; 28791da177e4SLinus Torvalds 28801da177e4SLinus Torvalds /* 28811da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 28821da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 28831da177e4SLinus Torvalds * because if the inode is dirty at all the core must 28841da177e4SLinus Torvalds * be. 28851da177e4SLinus Torvalds */ 288681591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 28871da177e4SLinus Torvalds 28881da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 28891da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 28901da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 28911da177e4SLinus Torvalds 28921da177e4SLinus Torvalds /* 28931da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 28941da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 28951da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 28961da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 28971da177e4SLinus Torvalds */ 289851ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 289951ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 290062118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 29011da177e4SLinus Torvalds /* 29021da177e4SLinus Torvalds * Convert it back. 29031da177e4SLinus Torvalds */ 29041da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 290581591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 29061da177e4SLinus Torvalds } else { 29071da177e4SLinus Torvalds /* 29081da177e4SLinus Torvalds * The superblock version has already been bumped, 29091da177e4SLinus Torvalds * so just make the conversion to the new inode 29101da177e4SLinus Torvalds * format permanent. 29111da177e4SLinus Torvalds */ 291251ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 291351ce16d5SChristoph Hellwig dip->di_version = 2; 29141da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 291581591fe2SChristoph Hellwig dip->di_onlink = 0; 29161da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 291781591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 291881591fe2SChristoph Hellwig sizeof(dip->di_pad)); 29196743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 29201da177e4SLinus Torvalds } 29211da177e4SLinus Torvalds } 29221da177e4SLinus Torvalds 2923e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 2924e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 2925e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 29261da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 29271da177e4SLinus Torvalds 29281da177e4SLinus Torvalds /* 2929f5d8d5c4SChristoph Hellwig * We've recorded everything logged in the inode, so we'd like to clear 2930f5d8d5c4SChristoph Hellwig * the ili_fields bits so we don't log and flush things unnecessarily. 2931f5d8d5c4SChristoph Hellwig * However, we can't stop logging all this information until the data 2932f5d8d5c4SChristoph Hellwig * we've copied into the disk buffer is written to disk. If we did we 2933f5d8d5c4SChristoph Hellwig * might overwrite the copy of the inode in the log with all the data 2934f5d8d5c4SChristoph Hellwig * after re-logging only part of it, and in the face of a crash we 2935f5d8d5c4SChristoph Hellwig * wouldn't have all the data we need to recover. 29361da177e4SLinus Torvalds * 2937f5d8d5c4SChristoph Hellwig * What we do is move the bits to the ili_last_fields field. When 2938f5d8d5c4SChristoph Hellwig * logging the inode, these bits are moved back to the ili_fields field. 2939f5d8d5c4SChristoph Hellwig * In the xfs_iflush_done() routine we clear ili_last_fields, since we 2940f5d8d5c4SChristoph Hellwig * know that the information those bits represent is permanently on 2941f5d8d5c4SChristoph Hellwig * disk. As long as the flush completes before the inode is logged 2942f5d8d5c4SChristoph Hellwig * again, then both ili_fields and ili_last_fields will be cleared. 29431da177e4SLinus Torvalds * 2944f5d8d5c4SChristoph Hellwig * We can play with the ili_fields bits here, because the inode lock 2945f5d8d5c4SChristoph Hellwig * must be held exclusively in order to set bits there and the flush 2946f5d8d5c4SChristoph Hellwig * lock protects the ili_last_fields bits. Set ili_logged so the flush 2947f5d8d5c4SChristoph Hellwig * done routine can tell whether or not to look in the AIL. Also, store 2948f5d8d5c4SChristoph Hellwig * the current LSN of the inode so that we can tell whether the item has 2949f5d8d5c4SChristoph Hellwig * moved in the AIL from xfs_iflush_done(). In order to read the lsn we 2950f5d8d5c4SChristoph Hellwig * need the AIL lock, because it is a 64 bit value that cannot be read 2951f5d8d5c4SChristoph Hellwig * atomically. 29521da177e4SLinus Torvalds */ 2953f5d8d5c4SChristoph Hellwig iip->ili_last_fields = iip->ili_fields; 2954f5d8d5c4SChristoph Hellwig iip->ili_fields = 0; 29551da177e4SLinus Torvalds iip->ili_logged = 1; 29561da177e4SLinus Torvalds 29577b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 29587b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 29591da177e4SLinus Torvalds 29601da177e4SLinus Torvalds /* 29611da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 29621da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 29631da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 29641da177e4SLinus Torvalds * completely written to disk. 29651da177e4SLinus Torvalds */ 2966ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 29671da177e4SLinus Torvalds 2968*93848a99SChristoph Hellwig /* update the lsn in the on disk inode if required */ 2969*93848a99SChristoph Hellwig if (ip->i_d.di_version == 3) 2970*93848a99SChristoph Hellwig dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn); 2971*93848a99SChristoph Hellwig 2972*93848a99SChristoph Hellwig /* generate the checksum. */ 2973*93848a99SChristoph Hellwig xfs_dinode_calc_crc(mp, dip); 2974*93848a99SChristoph Hellwig 2975adadbeefSChristoph Hellwig ASSERT(bp->b_fspriv != NULL); 2976cb669ca5SChristoph Hellwig ASSERT(bp->b_iodone != NULL); 29771da177e4SLinus Torvalds return 0; 29781da177e4SLinus Torvalds 29791da177e4SLinus Torvalds corrupt_out: 29801da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 29811da177e4SLinus Torvalds } 29821da177e4SLinus Torvalds 29834eea22f0SMandy Kirkconnell /* 29844eea22f0SMandy Kirkconnell * Return a pointer to the extent record at file index idx. 29854eea22f0SMandy Kirkconnell */ 2986a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * 29874eea22f0SMandy Kirkconnell xfs_iext_get_ext( 29884eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 29894eea22f0SMandy Kirkconnell xfs_extnum_t idx) /* index of target extent */ 29904eea22f0SMandy Kirkconnell { 29914eea22f0SMandy Kirkconnell ASSERT(idx >= 0); 299287bef181SChristoph Hellwig ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 299387bef181SChristoph Hellwig 29940293ce3aSMandy Kirkconnell if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 29950293ce3aSMandy Kirkconnell return ifp->if_u1.if_ext_irec->er_extbuf; 29960293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 29970293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* irec pointer */ 29980293ce3aSMandy Kirkconnell int erp_idx = 0; /* irec index */ 29990293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = idx; /* ext index in target list */ 30000293ce3aSMandy Kirkconnell 30010293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 30020293ce3aSMandy Kirkconnell return &erp->er_extbuf[page_idx]; 30030293ce3aSMandy Kirkconnell } else if (ifp->if_bytes) { 30044eea22f0SMandy Kirkconnell return &ifp->if_u1.if_extents[idx]; 30054eea22f0SMandy Kirkconnell } else { 30064eea22f0SMandy Kirkconnell return NULL; 30074eea22f0SMandy Kirkconnell } 30084eea22f0SMandy Kirkconnell } 30094eea22f0SMandy Kirkconnell 30104eea22f0SMandy Kirkconnell /* 30114eea22f0SMandy Kirkconnell * Insert new item(s) into the extent records for incore inode 30124eea22f0SMandy Kirkconnell * fork 'ifp'. 'count' new items are inserted at index 'idx'. 30134eea22f0SMandy Kirkconnell */ 30144eea22f0SMandy Kirkconnell void 30154eea22f0SMandy Kirkconnell xfs_iext_insert( 30166ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 30174eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* starting index of new items */ 30184eea22f0SMandy Kirkconnell xfs_extnum_t count, /* number of inserted items */ 30196ef35544SChristoph Hellwig xfs_bmbt_irec_t *new, /* items to insert */ 30206ef35544SChristoph Hellwig int state) /* type of extent conversion */ 30214eea22f0SMandy Kirkconnell { 30226ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 30234eea22f0SMandy Kirkconnell xfs_extnum_t i; /* extent record index */ 30244eea22f0SMandy Kirkconnell 30250b1b213fSChristoph Hellwig trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); 30260b1b213fSChristoph Hellwig 30274eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 30284eea22f0SMandy Kirkconnell xfs_iext_add(ifp, idx, count); 3029a6f64d4aSChristoph Hellwig for (i = idx; i < idx + count; i++, new++) 3030a6f64d4aSChristoph Hellwig xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 30314eea22f0SMandy Kirkconnell } 30324eea22f0SMandy Kirkconnell 30334eea22f0SMandy Kirkconnell /* 30344eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 30354eea22f0SMandy Kirkconnell * extents needs to be increased. The ext_diff parameter stores the 30364eea22f0SMandy Kirkconnell * number of new extents being added and the idx parameter contains 30374eea22f0SMandy Kirkconnell * the extent index where the new extents will be added. If the new 30384eea22f0SMandy Kirkconnell * extents are being appended, then we just need to (re)allocate and 30394eea22f0SMandy Kirkconnell * initialize the space. Otherwise, if the new extents are being 30404eea22f0SMandy Kirkconnell * inserted into the middle of the existing entries, a bit more work 30414eea22f0SMandy Kirkconnell * is required to make room for the new extents to be inserted. The 30424eea22f0SMandy Kirkconnell * caller is responsible for filling in the new extent entries upon 30434eea22f0SMandy Kirkconnell * return. 30444eea22f0SMandy Kirkconnell */ 30454eea22f0SMandy Kirkconnell void 30464eea22f0SMandy Kirkconnell xfs_iext_add( 30474eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 30484eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin adding exts */ 3049c41564b5SNathan Scott int ext_diff) /* number of extents to add */ 30504eea22f0SMandy Kirkconnell { 30514eea22f0SMandy Kirkconnell int byte_diff; /* new bytes being added */ 30524eea22f0SMandy Kirkconnell int new_size; /* size of extents after adding */ 30534eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 30544eea22f0SMandy Kirkconnell 30554eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 30564eea22f0SMandy Kirkconnell ASSERT((idx >= 0) && (idx <= nextents)); 30574eea22f0SMandy Kirkconnell byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 30584eea22f0SMandy Kirkconnell new_size = ifp->if_bytes + byte_diff; 30594eea22f0SMandy Kirkconnell /* 30604eea22f0SMandy Kirkconnell * If the new number of extents (nextents + ext_diff) 30614eea22f0SMandy Kirkconnell * fits inside the inode, then continue to use the inline 30624eea22f0SMandy Kirkconnell * extent buffer. 30634eea22f0SMandy Kirkconnell */ 30644eea22f0SMandy Kirkconnell if (nextents + ext_diff <= XFS_INLINE_EXTS) { 30654eea22f0SMandy Kirkconnell if (idx < nextents) { 30664eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 30674eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx], 30684eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 30694eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 30704eea22f0SMandy Kirkconnell } 30714eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 30724eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 30734eea22f0SMandy Kirkconnell } 30744eea22f0SMandy Kirkconnell /* 30754eea22f0SMandy Kirkconnell * Otherwise use a linear (direct) extent list. 30764eea22f0SMandy Kirkconnell * If the extents are currently inside the inode, 30774eea22f0SMandy Kirkconnell * xfs_iext_realloc_direct will switch us from 30784eea22f0SMandy Kirkconnell * inline to direct extent allocation mode. 30794eea22f0SMandy Kirkconnell */ 30800293ce3aSMandy Kirkconnell else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 30814eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 30824eea22f0SMandy Kirkconnell if (idx < nextents) { 30834eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx + ext_diff], 30844eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx], 30854eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 30864eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 30874eea22f0SMandy Kirkconnell } 30884eea22f0SMandy Kirkconnell } 30890293ce3aSMandy Kirkconnell /* Indirection array */ 30900293ce3aSMandy Kirkconnell else { 30910293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; 30920293ce3aSMandy Kirkconnell int erp_idx = 0; 30930293ce3aSMandy Kirkconnell int page_idx = idx; 30940293ce3aSMandy Kirkconnell 30950293ce3aSMandy Kirkconnell ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 30960293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 30970293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 30980293ce3aSMandy Kirkconnell } else { 30990293ce3aSMandy Kirkconnell xfs_iext_irec_init(ifp); 31000293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 31010293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 31020293ce3aSMandy Kirkconnell } 31030293ce3aSMandy Kirkconnell /* Extents fit in target extent page */ 31040293ce3aSMandy Kirkconnell if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 31050293ce3aSMandy Kirkconnell if (page_idx < erp->er_extcount) { 31060293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[page_idx + ext_diff], 31070293ce3aSMandy Kirkconnell &erp->er_extbuf[page_idx], 31080293ce3aSMandy Kirkconnell (erp->er_extcount - page_idx) * 31090293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 31100293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[page_idx], 0, byte_diff); 31110293ce3aSMandy Kirkconnell } 31120293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 31130293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 31140293ce3aSMandy Kirkconnell } 31150293ce3aSMandy Kirkconnell /* Insert a new extent page */ 31160293ce3aSMandy Kirkconnell else if (erp) { 31170293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(ifp, 31180293ce3aSMandy Kirkconnell erp_idx, page_idx, ext_diff); 31190293ce3aSMandy Kirkconnell } 31200293ce3aSMandy Kirkconnell /* 31210293ce3aSMandy Kirkconnell * If extent(s) are being appended to the last page in 31220293ce3aSMandy Kirkconnell * the indirection array and the new extent(s) don't fit 31230293ce3aSMandy Kirkconnell * in the page, then erp is NULL and erp_idx is set to 31240293ce3aSMandy Kirkconnell * the next index needed in the indirection array. 31250293ce3aSMandy Kirkconnell */ 31260293ce3aSMandy Kirkconnell else { 31270293ce3aSMandy Kirkconnell int count = ext_diff; 31280293ce3aSMandy Kirkconnell 31290293ce3aSMandy Kirkconnell while (count) { 31300293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 31310293ce3aSMandy Kirkconnell erp->er_extcount = count; 31320293ce3aSMandy Kirkconnell count -= MIN(count, (int)XFS_LINEAR_EXTS); 31330293ce3aSMandy Kirkconnell if (count) { 31340293ce3aSMandy Kirkconnell erp_idx++; 31350293ce3aSMandy Kirkconnell } 31360293ce3aSMandy Kirkconnell } 31370293ce3aSMandy Kirkconnell } 31380293ce3aSMandy Kirkconnell } 31394eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 31404eea22f0SMandy Kirkconnell } 31414eea22f0SMandy Kirkconnell 31424eea22f0SMandy Kirkconnell /* 31430293ce3aSMandy Kirkconnell * This is called when incore extents are being added to the indirection 31440293ce3aSMandy Kirkconnell * array and the new extents do not fit in the target extent list. The 31450293ce3aSMandy Kirkconnell * erp_idx parameter contains the irec index for the target extent list 31460293ce3aSMandy Kirkconnell * in the indirection array, and the idx parameter contains the extent 31470293ce3aSMandy Kirkconnell * index within the list. The number of extents being added is stored 31480293ce3aSMandy Kirkconnell * in the count parameter. 31490293ce3aSMandy Kirkconnell * 31500293ce3aSMandy Kirkconnell * |-------| |-------| 31510293ce3aSMandy Kirkconnell * | | | | idx - number of extents before idx 31520293ce3aSMandy Kirkconnell * | idx | | count | 31530293ce3aSMandy Kirkconnell * | | | | count - number of extents being inserted at idx 31540293ce3aSMandy Kirkconnell * |-------| |-------| 31550293ce3aSMandy Kirkconnell * | count | | nex2 | nex2 - number of extents after idx + count 31560293ce3aSMandy Kirkconnell * |-------| |-------| 31570293ce3aSMandy Kirkconnell */ 31580293ce3aSMandy Kirkconnell void 31590293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi( 31600293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 31610293ce3aSMandy Kirkconnell int erp_idx, /* target extent irec index */ 31620293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index within target list */ 31630293ce3aSMandy Kirkconnell int count) /* new extents being added */ 31640293ce3aSMandy Kirkconnell { 31650293ce3aSMandy Kirkconnell int byte_diff; /* new bytes being added */ 31660293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* pointer to irec entry */ 31670293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* number of extents to add */ 31680293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* new extents still needed */ 31690293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 31700293ce3aSMandy Kirkconnell xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 31710293ce3aSMandy Kirkconnell int nlists; /* number of irec's (lists) */ 31720293ce3aSMandy Kirkconnell 31730293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 31740293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 31750293ce3aSMandy Kirkconnell nex2 = erp->er_extcount - idx; 31760293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 31770293ce3aSMandy Kirkconnell 31780293ce3aSMandy Kirkconnell /* 31790293ce3aSMandy Kirkconnell * Save second part of target extent list 31800293ce3aSMandy Kirkconnell * (all extents past */ 31810293ce3aSMandy Kirkconnell if (nex2) { 31820293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 31836785073bSDavid Chinner nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 31840293ce3aSMandy Kirkconnell memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 31850293ce3aSMandy Kirkconnell erp->er_extcount -= nex2; 31860293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 31870293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[idx], 0, byte_diff); 31880293ce3aSMandy Kirkconnell } 31890293ce3aSMandy Kirkconnell 31900293ce3aSMandy Kirkconnell /* 31910293ce3aSMandy Kirkconnell * Add the new extents to the end of the target 31920293ce3aSMandy Kirkconnell * list, then allocate new irec record(s) and 31930293ce3aSMandy Kirkconnell * extent buffer(s) as needed to store the rest 31940293ce3aSMandy Kirkconnell * of the new extents. 31950293ce3aSMandy Kirkconnell */ 31960293ce3aSMandy Kirkconnell ext_cnt = count; 31970293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 31980293ce3aSMandy Kirkconnell if (ext_diff) { 31990293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 32000293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 32010293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 32020293ce3aSMandy Kirkconnell } 32030293ce3aSMandy Kirkconnell while (ext_cnt) { 32040293ce3aSMandy Kirkconnell erp_idx++; 32050293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 32060293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 32070293ce3aSMandy Kirkconnell erp->er_extcount = ext_diff; 32080293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 32090293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 32100293ce3aSMandy Kirkconnell } 32110293ce3aSMandy Kirkconnell 32120293ce3aSMandy Kirkconnell /* Add nex2 extents back to indirection array */ 32130293ce3aSMandy Kirkconnell if (nex2) { 32140293ce3aSMandy Kirkconnell xfs_extnum_t ext_avail; 32150293ce3aSMandy Kirkconnell int i; 32160293ce3aSMandy Kirkconnell 32170293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 32180293ce3aSMandy Kirkconnell ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 32190293ce3aSMandy Kirkconnell i = 0; 32200293ce3aSMandy Kirkconnell /* 32210293ce3aSMandy Kirkconnell * If nex2 extents fit in the current page, append 32220293ce3aSMandy Kirkconnell * nex2_ep after the new extents. 32230293ce3aSMandy Kirkconnell */ 32240293ce3aSMandy Kirkconnell if (nex2 <= ext_avail) { 32250293ce3aSMandy Kirkconnell i = erp->er_extcount; 32260293ce3aSMandy Kirkconnell } 32270293ce3aSMandy Kirkconnell /* 32280293ce3aSMandy Kirkconnell * Otherwise, check if space is available in the 32290293ce3aSMandy Kirkconnell * next page. 32300293ce3aSMandy Kirkconnell */ 32310293ce3aSMandy Kirkconnell else if ((erp_idx < nlists - 1) && 32320293ce3aSMandy Kirkconnell (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 32330293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 32340293ce3aSMandy Kirkconnell erp_idx++; 32350293ce3aSMandy Kirkconnell erp++; 32360293ce3aSMandy Kirkconnell /* Create a hole for nex2 extents */ 32370293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 32380293ce3aSMandy Kirkconnell erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 32390293ce3aSMandy Kirkconnell } 32400293ce3aSMandy Kirkconnell /* 32410293ce3aSMandy Kirkconnell * Final choice, create a new extent page for 32420293ce3aSMandy Kirkconnell * nex2 extents. 32430293ce3aSMandy Kirkconnell */ 32440293ce3aSMandy Kirkconnell else { 32450293ce3aSMandy Kirkconnell erp_idx++; 32460293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 32470293ce3aSMandy Kirkconnell } 32480293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3249f0e2d93cSDenys Vlasenko kmem_free(nex2_ep); 32500293ce3aSMandy Kirkconnell erp->er_extcount += nex2; 32510293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 32520293ce3aSMandy Kirkconnell } 32530293ce3aSMandy Kirkconnell } 32540293ce3aSMandy Kirkconnell 32550293ce3aSMandy Kirkconnell /* 32564eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 32574eea22f0SMandy Kirkconnell * extents needs to be decreased. The ext_diff parameter stores the 32584eea22f0SMandy Kirkconnell * number of extents to be removed and the idx parameter contains 32594eea22f0SMandy Kirkconnell * the extent index where the extents will be removed from. 32600293ce3aSMandy Kirkconnell * 32610293ce3aSMandy Kirkconnell * If the amount of space needed has decreased below the linear 32620293ce3aSMandy Kirkconnell * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 32630293ce3aSMandy Kirkconnell * extent array. Otherwise, use kmem_realloc() to adjust the 32640293ce3aSMandy Kirkconnell * size to what is needed. 32654eea22f0SMandy Kirkconnell */ 32664eea22f0SMandy Kirkconnell void 32674eea22f0SMandy Kirkconnell xfs_iext_remove( 32686ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 32694eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 32706ef35544SChristoph Hellwig int ext_diff, /* number of extents to remove */ 32716ef35544SChristoph Hellwig int state) /* type of extent conversion */ 32724eea22f0SMandy Kirkconnell { 32736ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 32744eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 32754eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 32764eea22f0SMandy Kirkconnell 32770b1b213fSChristoph Hellwig trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 32780b1b213fSChristoph Hellwig 32794eea22f0SMandy Kirkconnell ASSERT(ext_diff > 0); 32804eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 32814eea22f0SMandy Kirkconnell new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 32824eea22f0SMandy Kirkconnell 32834eea22f0SMandy Kirkconnell if (new_size == 0) { 32844eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 32850293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 32860293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(ifp, idx, ext_diff); 32874eea22f0SMandy Kirkconnell } else if (ifp->if_real_bytes) { 32884eea22f0SMandy Kirkconnell xfs_iext_remove_direct(ifp, idx, ext_diff); 32894eea22f0SMandy Kirkconnell } else { 32904eea22f0SMandy Kirkconnell xfs_iext_remove_inline(ifp, idx, ext_diff); 32914eea22f0SMandy Kirkconnell } 32924eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 32934eea22f0SMandy Kirkconnell } 32944eea22f0SMandy Kirkconnell 32954eea22f0SMandy Kirkconnell /* 32964eea22f0SMandy Kirkconnell * This removes ext_diff extents from the inline buffer, beginning 32974eea22f0SMandy Kirkconnell * at extent index idx. 32984eea22f0SMandy Kirkconnell */ 32994eea22f0SMandy Kirkconnell void 33004eea22f0SMandy Kirkconnell xfs_iext_remove_inline( 33014eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33024eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 33034eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 33044eea22f0SMandy Kirkconnell { 33054eea22f0SMandy Kirkconnell int nextents; /* number of extents in file */ 33064eea22f0SMandy Kirkconnell 33070293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 33084eea22f0SMandy Kirkconnell ASSERT(idx < XFS_INLINE_EXTS); 33094eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 33104eea22f0SMandy Kirkconnell ASSERT(((nextents - ext_diff) > 0) && 33114eea22f0SMandy Kirkconnell (nextents - ext_diff) < XFS_INLINE_EXTS); 33124eea22f0SMandy Kirkconnell 33134eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 33144eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx], 33154eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx + ext_diff], 33164eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 33174eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 33184eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 33194eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 33204eea22f0SMandy Kirkconnell } else { 33214eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, 33224eea22f0SMandy Kirkconnell ext_diff * sizeof(xfs_bmbt_rec_t)); 33234eea22f0SMandy Kirkconnell } 33244eea22f0SMandy Kirkconnell } 33254eea22f0SMandy Kirkconnell 33264eea22f0SMandy Kirkconnell /* 33274eea22f0SMandy Kirkconnell * This removes ext_diff extents from a linear (direct) extent list, 33284eea22f0SMandy Kirkconnell * beginning at extent index idx. If the extents are being removed 33294eea22f0SMandy Kirkconnell * from the end of the list (ie. truncate) then we just need to re- 33304eea22f0SMandy Kirkconnell * allocate the list to remove the extra space. Otherwise, if the 33314eea22f0SMandy Kirkconnell * extents are being removed from the middle of the existing extent 33324eea22f0SMandy Kirkconnell * entries, then we first need to move the extent records beginning 33334eea22f0SMandy Kirkconnell * at idx + ext_diff up in the list to overwrite the records being 33344eea22f0SMandy Kirkconnell * removed, then remove the extra space via kmem_realloc. 33354eea22f0SMandy Kirkconnell */ 33364eea22f0SMandy Kirkconnell void 33374eea22f0SMandy Kirkconnell xfs_iext_remove_direct( 33384eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33394eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 33404eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 33414eea22f0SMandy Kirkconnell { 33424eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 33434eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 33444eea22f0SMandy Kirkconnell 33450293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 33464eea22f0SMandy Kirkconnell new_size = ifp->if_bytes - 33474eea22f0SMandy Kirkconnell (ext_diff * sizeof(xfs_bmbt_rec_t)); 33484eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 33494eea22f0SMandy Kirkconnell 33504eea22f0SMandy Kirkconnell if (new_size == 0) { 33514eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 33524eea22f0SMandy Kirkconnell return; 33534eea22f0SMandy Kirkconnell } 33544eea22f0SMandy Kirkconnell /* Move extents up in the list (if needed) */ 33554eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 33564eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx], 33574eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx + ext_diff], 33584eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 33594eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 33604eea22f0SMandy Kirkconnell } 33614eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[nextents - ext_diff], 33624eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 33634eea22f0SMandy Kirkconnell /* 33644eea22f0SMandy Kirkconnell * Reallocate the direct extent list. If the extents 33654eea22f0SMandy Kirkconnell * will fit inside the inode then xfs_iext_realloc_direct 33664eea22f0SMandy Kirkconnell * will switch from direct to inline extent allocation 33674eea22f0SMandy Kirkconnell * mode for us. 33684eea22f0SMandy Kirkconnell */ 33694eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 33704eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 33714eea22f0SMandy Kirkconnell } 33724eea22f0SMandy Kirkconnell 33734eea22f0SMandy Kirkconnell /* 33740293ce3aSMandy Kirkconnell * This is called when incore extents are being removed from the 33750293ce3aSMandy Kirkconnell * indirection array and the extents being removed span multiple extent 33760293ce3aSMandy Kirkconnell * buffers. The idx parameter contains the file extent index where we 33770293ce3aSMandy Kirkconnell * want to begin removing extents, and the count parameter contains 33780293ce3aSMandy Kirkconnell * how many extents need to be removed. 33790293ce3aSMandy Kirkconnell * 33800293ce3aSMandy Kirkconnell * |-------| |-------| 33810293ce3aSMandy Kirkconnell * | nex1 | | | nex1 - number of extents before idx 33820293ce3aSMandy Kirkconnell * |-------| | count | 33830293ce3aSMandy Kirkconnell * | | | | count - number of extents being removed at idx 33840293ce3aSMandy Kirkconnell * | count | |-------| 33850293ce3aSMandy Kirkconnell * | | | nex2 | nex2 - number of extents after idx + count 33860293ce3aSMandy Kirkconnell * |-------| |-------| 33870293ce3aSMandy Kirkconnell */ 33880293ce3aSMandy Kirkconnell void 33890293ce3aSMandy Kirkconnell xfs_iext_remove_indirect( 33900293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33910293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing extents */ 33920293ce3aSMandy Kirkconnell int count) /* number of extents to remove */ 33930293ce3aSMandy Kirkconnell { 33940293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 33950293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 33960293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* extents left to remove */ 33970293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* extents to remove in current list */ 33980293ce3aSMandy Kirkconnell xfs_extnum_t nex1; /* number of extents before idx */ 33990293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 34000293ce3aSMandy Kirkconnell int page_idx = idx; /* index in target extent list */ 34010293ce3aSMandy Kirkconnell 34020293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34030293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 34040293ce3aSMandy Kirkconnell ASSERT(erp != NULL); 34050293ce3aSMandy Kirkconnell nex1 = page_idx; 34060293ce3aSMandy Kirkconnell ext_cnt = count; 34070293ce3aSMandy Kirkconnell while (ext_cnt) { 34080293ce3aSMandy Kirkconnell nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 34090293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 34100293ce3aSMandy Kirkconnell /* 34110293ce3aSMandy Kirkconnell * Check for deletion of entire list; 34120293ce3aSMandy Kirkconnell * xfs_iext_irec_remove() updates extent offsets. 34130293ce3aSMandy Kirkconnell */ 34140293ce3aSMandy Kirkconnell if (ext_diff == erp->er_extcount) { 34150293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 34160293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 34170293ce3aSMandy Kirkconnell nex1 = 0; 34180293ce3aSMandy Kirkconnell if (ext_cnt) { 34190293ce3aSMandy Kirkconnell ASSERT(erp_idx < ifp->if_real_bytes / 34200293ce3aSMandy Kirkconnell XFS_IEXT_BUFSZ); 34210293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 34220293ce3aSMandy Kirkconnell nex1 = 0; 34230293ce3aSMandy Kirkconnell continue; 34240293ce3aSMandy Kirkconnell } else { 34250293ce3aSMandy Kirkconnell break; 34260293ce3aSMandy Kirkconnell } 34270293ce3aSMandy Kirkconnell } 34280293ce3aSMandy Kirkconnell /* Move extents up (if needed) */ 34290293ce3aSMandy Kirkconnell if (nex2) { 34300293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex1], 34310293ce3aSMandy Kirkconnell &erp->er_extbuf[nex1 + ext_diff], 34320293ce3aSMandy Kirkconnell nex2 * sizeof(xfs_bmbt_rec_t)); 34330293ce3aSMandy Kirkconnell } 34340293ce3aSMandy Kirkconnell /* Zero out rest of page */ 34350293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 34360293ce3aSMandy Kirkconnell ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 34370293ce3aSMandy Kirkconnell /* Update remaining counters */ 34380293ce3aSMandy Kirkconnell erp->er_extcount -= ext_diff; 34390293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 34400293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 34410293ce3aSMandy Kirkconnell nex1 = 0; 34420293ce3aSMandy Kirkconnell erp_idx++; 34430293ce3aSMandy Kirkconnell erp++; 34440293ce3aSMandy Kirkconnell } 34450293ce3aSMandy Kirkconnell ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 34460293ce3aSMandy Kirkconnell xfs_iext_irec_compact(ifp); 34470293ce3aSMandy Kirkconnell } 34480293ce3aSMandy Kirkconnell 34490293ce3aSMandy Kirkconnell /* 34504eea22f0SMandy Kirkconnell * Create, destroy, or resize a linear (direct) block of extents. 34514eea22f0SMandy Kirkconnell */ 34524eea22f0SMandy Kirkconnell void 34534eea22f0SMandy Kirkconnell xfs_iext_realloc_direct( 34544eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34554eea22f0SMandy Kirkconnell int new_size) /* new size of extents */ 34564eea22f0SMandy Kirkconnell { 34574eea22f0SMandy Kirkconnell int rnew_size; /* real new size of extents */ 34584eea22f0SMandy Kirkconnell 34594eea22f0SMandy Kirkconnell rnew_size = new_size; 34604eea22f0SMandy Kirkconnell 34610293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 34620293ce3aSMandy Kirkconnell ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 34630293ce3aSMandy Kirkconnell (new_size != ifp->if_real_bytes))); 34640293ce3aSMandy Kirkconnell 34654eea22f0SMandy Kirkconnell /* Free extent records */ 34664eea22f0SMandy Kirkconnell if (new_size == 0) { 34674eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 34684eea22f0SMandy Kirkconnell } 34694eea22f0SMandy Kirkconnell /* Resize direct extent list and zero any new bytes */ 34704eea22f0SMandy Kirkconnell else if (ifp->if_real_bytes) { 34714eea22f0SMandy Kirkconnell /* Check if extents will fit inside the inode */ 34724eea22f0SMandy Kirkconnell if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 34734eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(ifp, new_size / 34744eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)); 34754eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 34764eea22f0SMandy Kirkconnell return; 34774eea22f0SMandy Kirkconnell } 347816a087d8SVignesh Babu if (!is_power_of_2(new_size)){ 347940ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 34804eea22f0SMandy Kirkconnell } 34814eea22f0SMandy Kirkconnell if (rnew_size != ifp->if_real_bytes) { 3482a6f64d4aSChristoph Hellwig ifp->if_u1.if_extents = 34834eea22f0SMandy Kirkconnell kmem_realloc(ifp->if_u1.if_extents, 34844eea22f0SMandy Kirkconnell rnew_size, 34856785073bSDavid Chinner ifp->if_real_bytes, KM_NOFS); 34864eea22f0SMandy Kirkconnell } 34874eea22f0SMandy Kirkconnell if (rnew_size > ifp->if_real_bytes) { 34884eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[ifp->if_bytes / 34894eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)], 0, 34904eea22f0SMandy Kirkconnell rnew_size - ifp->if_real_bytes); 34914eea22f0SMandy Kirkconnell } 34924eea22f0SMandy Kirkconnell } 34934eea22f0SMandy Kirkconnell /* 34944eea22f0SMandy Kirkconnell * Switch from the inline extent buffer to a direct 34954eea22f0SMandy Kirkconnell * extent list. Be sure to include the inline extent 34964eea22f0SMandy Kirkconnell * bytes in new_size. 34974eea22f0SMandy Kirkconnell */ 34984eea22f0SMandy Kirkconnell else { 34994eea22f0SMandy Kirkconnell new_size += ifp->if_bytes; 350016a087d8SVignesh Babu if (!is_power_of_2(new_size)) { 350140ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 35024eea22f0SMandy Kirkconnell } 35034eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(ifp, rnew_size); 35044eea22f0SMandy Kirkconnell } 35054eea22f0SMandy Kirkconnell ifp->if_real_bytes = rnew_size; 35064eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 35074eea22f0SMandy Kirkconnell } 35084eea22f0SMandy Kirkconnell 35094eea22f0SMandy Kirkconnell /* 35104eea22f0SMandy Kirkconnell * Switch from linear (direct) extent records to inline buffer. 35114eea22f0SMandy Kirkconnell */ 35124eea22f0SMandy Kirkconnell void 35134eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline( 35144eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35154eea22f0SMandy Kirkconnell xfs_extnum_t nextents) /* number of extents in file */ 35164eea22f0SMandy Kirkconnell { 35174eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 35184eea22f0SMandy Kirkconnell ASSERT(nextents <= XFS_INLINE_EXTS); 35194eea22f0SMandy Kirkconnell /* 35204eea22f0SMandy Kirkconnell * The inline buffer was zeroed when we switched 35214eea22f0SMandy Kirkconnell * from inline to direct extent allocation mode, 35224eea22f0SMandy Kirkconnell * so we don't need to clear it here. 35234eea22f0SMandy Kirkconnell */ 35244eea22f0SMandy Kirkconnell memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 35254eea22f0SMandy Kirkconnell nextents * sizeof(xfs_bmbt_rec_t)); 3526f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 35274eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 35284eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 35294eea22f0SMandy Kirkconnell } 35304eea22f0SMandy Kirkconnell 35314eea22f0SMandy Kirkconnell /* 35324eea22f0SMandy Kirkconnell * Switch from inline buffer to linear (direct) extent records. 35334eea22f0SMandy Kirkconnell * new_size should already be rounded up to the next power of 2 35344eea22f0SMandy Kirkconnell * by the caller (when appropriate), so use new_size as it is. 35354eea22f0SMandy Kirkconnell * However, since new_size may be rounded up, we can't update 35364eea22f0SMandy Kirkconnell * if_bytes here. It is the caller's responsibility to update 35374eea22f0SMandy Kirkconnell * if_bytes upon return. 35384eea22f0SMandy Kirkconnell */ 35394eea22f0SMandy Kirkconnell void 35404eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct( 35414eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35424eea22f0SMandy Kirkconnell int new_size) /* number of extents in file */ 35434eea22f0SMandy Kirkconnell { 35446785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 35454eea22f0SMandy Kirkconnell memset(ifp->if_u1.if_extents, 0, new_size); 35464eea22f0SMandy Kirkconnell if (ifp->if_bytes) { 35474eea22f0SMandy Kirkconnell memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 35484eea22f0SMandy Kirkconnell ifp->if_bytes); 35494eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 35504eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 35514eea22f0SMandy Kirkconnell } 35524eea22f0SMandy Kirkconnell ifp->if_real_bytes = new_size; 35534eea22f0SMandy Kirkconnell } 35544eea22f0SMandy Kirkconnell 35554eea22f0SMandy Kirkconnell /* 35560293ce3aSMandy Kirkconnell * Resize an extent indirection array to new_size bytes. 35570293ce3aSMandy Kirkconnell */ 3558d96f8f89SEric Sandeen STATIC void 35590293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect( 35600293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35610293ce3aSMandy Kirkconnell int new_size) /* new indirection array size */ 35620293ce3aSMandy Kirkconnell { 35630293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 35640293ce3aSMandy Kirkconnell int size; /* current indirection array size */ 35650293ce3aSMandy Kirkconnell 35660293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 35670293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 35680293ce3aSMandy Kirkconnell size = nlists * sizeof(xfs_ext_irec_t); 35690293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes); 35700293ce3aSMandy Kirkconnell ASSERT((new_size >= 0) && (new_size != size)); 35710293ce3aSMandy Kirkconnell if (new_size == 0) { 35720293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 35730293ce3aSMandy Kirkconnell } else { 35740293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 35750293ce3aSMandy Kirkconnell kmem_realloc(ifp->if_u1.if_ext_irec, 35766785073bSDavid Chinner new_size, size, KM_NOFS); 35770293ce3aSMandy Kirkconnell } 35780293ce3aSMandy Kirkconnell } 35790293ce3aSMandy Kirkconnell 35800293ce3aSMandy Kirkconnell /* 35810293ce3aSMandy Kirkconnell * Switch from indirection array to linear (direct) extent allocations. 35820293ce3aSMandy Kirkconnell */ 3583d96f8f89SEric Sandeen STATIC void 35840293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct( 35850293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 35860293ce3aSMandy Kirkconnell { 3587a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 35880293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 35890293ce3aSMandy Kirkconnell int size; /* size of file extents */ 35900293ce3aSMandy Kirkconnell 35910293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 35920293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 35930293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 35940293ce3aSMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 35950293ce3aSMandy Kirkconnell 359671a8c87fSLachlan McIlroy xfs_iext_irec_compact_pages(ifp); 35970293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 35980293ce3aSMandy Kirkconnell 35990293ce3aSMandy Kirkconnell ep = ifp->if_u1.if_ext_irec->er_extbuf; 3600f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 36010293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 36020293ce3aSMandy Kirkconnell ifp->if_u1.if_extents = ep; 36030293ce3aSMandy Kirkconnell ifp->if_bytes = size; 36040293ce3aSMandy Kirkconnell if (nextents < XFS_LINEAR_EXTS) { 36050293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, size); 36060293ce3aSMandy Kirkconnell } 36070293ce3aSMandy Kirkconnell } 36080293ce3aSMandy Kirkconnell 36090293ce3aSMandy Kirkconnell /* 36104eea22f0SMandy Kirkconnell * Free incore file extents. 36114eea22f0SMandy Kirkconnell */ 36124eea22f0SMandy Kirkconnell void 36134eea22f0SMandy Kirkconnell xfs_iext_destroy( 36144eea22f0SMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 36154eea22f0SMandy Kirkconnell { 36160293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 36170293ce3aSMandy Kirkconnell int erp_idx; 36180293ce3aSMandy Kirkconnell int nlists; 36190293ce3aSMandy Kirkconnell 36200293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36210293ce3aSMandy Kirkconnell for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 36220293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 36230293ce3aSMandy Kirkconnell } 36240293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 36250293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes) { 3626f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 36274eea22f0SMandy Kirkconnell } else if (ifp->if_bytes) { 36284eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 36294eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 36304eea22f0SMandy Kirkconnell } 36314eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = NULL; 36324eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 36334eea22f0SMandy Kirkconnell ifp->if_bytes = 0; 36344eea22f0SMandy Kirkconnell } 36350293ce3aSMandy Kirkconnell 36360293ce3aSMandy Kirkconnell /* 36378867bc9bSMandy Kirkconnell * Return a pointer to the extent record for file system block bno. 36388867bc9bSMandy Kirkconnell */ 3639a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * /* pointer to found extent record */ 36408867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext( 36418867bc9bSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36428867bc9bSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 36438867bc9bSMandy Kirkconnell xfs_extnum_t *idxp) /* index of target extent */ 36448867bc9bSMandy Kirkconnell { 3645a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 36468867bc9bSMandy Kirkconnell xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 3647a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 36488867bc9bSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 3649c41564b5SNathan Scott int high; /* upper boundary in search */ 36508867bc9bSMandy Kirkconnell xfs_extnum_t idx = 0; /* index of target extent */ 3651c41564b5SNathan Scott int low; /* lower boundary in search */ 36528867bc9bSMandy Kirkconnell xfs_extnum_t nextents; /* number of file extents */ 36538867bc9bSMandy Kirkconnell xfs_fileoff_t startoff = 0; /* start offset of extent */ 36548867bc9bSMandy Kirkconnell 36558867bc9bSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 36568867bc9bSMandy Kirkconnell if (nextents == 0) { 36578867bc9bSMandy Kirkconnell *idxp = 0; 36588867bc9bSMandy Kirkconnell return NULL; 36598867bc9bSMandy Kirkconnell } 36608867bc9bSMandy Kirkconnell low = 0; 36618867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 36628867bc9bSMandy Kirkconnell /* Find target extent list */ 36638867bc9bSMandy Kirkconnell int erp_idx = 0; 36648867bc9bSMandy Kirkconnell erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 36658867bc9bSMandy Kirkconnell base = erp->er_extbuf; 36668867bc9bSMandy Kirkconnell high = erp->er_extcount - 1; 36678867bc9bSMandy Kirkconnell } else { 36688867bc9bSMandy Kirkconnell base = ifp->if_u1.if_extents; 36698867bc9bSMandy Kirkconnell high = nextents - 1; 36708867bc9bSMandy Kirkconnell } 36718867bc9bSMandy Kirkconnell /* Binary search extent records */ 36728867bc9bSMandy Kirkconnell while (low <= high) { 36738867bc9bSMandy Kirkconnell idx = (low + high) >> 1; 36748867bc9bSMandy Kirkconnell ep = base + idx; 36758867bc9bSMandy Kirkconnell startoff = xfs_bmbt_get_startoff(ep); 36768867bc9bSMandy Kirkconnell blockcount = xfs_bmbt_get_blockcount(ep); 36778867bc9bSMandy Kirkconnell if (bno < startoff) { 36788867bc9bSMandy Kirkconnell high = idx - 1; 36798867bc9bSMandy Kirkconnell } else if (bno >= startoff + blockcount) { 36808867bc9bSMandy Kirkconnell low = idx + 1; 36818867bc9bSMandy Kirkconnell } else { 36828867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 36838867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 36848867bc9bSMandy Kirkconnell idx += erp->er_extoff; 36858867bc9bSMandy Kirkconnell } 36868867bc9bSMandy Kirkconnell *idxp = idx; 36878867bc9bSMandy Kirkconnell return ep; 36888867bc9bSMandy Kirkconnell } 36898867bc9bSMandy Kirkconnell } 36908867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 36918867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 36928867bc9bSMandy Kirkconnell idx += erp->er_extoff; 36938867bc9bSMandy Kirkconnell } 36948867bc9bSMandy Kirkconnell if (bno >= startoff + blockcount) { 36958867bc9bSMandy Kirkconnell if (++idx == nextents) { 36968867bc9bSMandy Kirkconnell ep = NULL; 36978867bc9bSMandy Kirkconnell } else { 36988867bc9bSMandy Kirkconnell ep = xfs_iext_get_ext(ifp, idx); 36998867bc9bSMandy Kirkconnell } 37008867bc9bSMandy Kirkconnell } 37018867bc9bSMandy Kirkconnell *idxp = idx; 37028867bc9bSMandy Kirkconnell return ep; 37038867bc9bSMandy Kirkconnell } 37048867bc9bSMandy Kirkconnell 37058867bc9bSMandy Kirkconnell /* 37060293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 37070293ce3aSMandy Kirkconnell * extent record for filesystem block bno. Store the index of the 37080293ce3aSMandy Kirkconnell * target irec in *erp_idxp. 37090293ce3aSMandy Kirkconnell */ 37108867bc9bSMandy Kirkconnell xfs_ext_irec_t * /* pointer to found extent record */ 37110293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec( 37120293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37130293ce3aSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 37140293ce3aSMandy Kirkconnell int *erp_idxp) /* irec index of target ext list */ 37150293ce3aSMandy Kirkconnell { 37160293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 37170293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp_next; /* next indirection array entry */ 37188867bc9bSMandy Kirkconnell int erp_idx; /* indirection array index */ 37190293ce3aSMandy Kirkconnell int nlists; /* number of extent irec's (lists) */ 37200293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 37210293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 37220293ce3aSMandy Kirkconnell 37230293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37240293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37250293ce3aSMandy Kirkconnell erp_idx = 0; 37260293ce3aSMandy Kirkconnell low = 0; 37270293ce3aSMandy Kirkconnell high = nlists - 1; 37280293ce3aSMandy Kirkconnell while (low <= high) { 37290293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 37300293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 37310293ce3aSMandy Kirkconnell erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 37320293ce3aSMandy Kirkconnell if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 37330293ce3aSMandy Kirkconnell high = erp_idx - 1; 37340293ce3aSMandy Kirkconnell } else if (erp_next && bno >= 37350293ce3aSMandy Kirkconnell xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 37360293ce3aSMandy Kirkconnell low = erp_idx + 1; 37370293ce3aSMandy Kirkconnell } else { 37380293ce3aSMandy Kirkconnell break; 37390293ce3aSMandy Kirkconnell } 37400293ce3aSMandy Kirkconnell } 37410293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 37420293ce3aSMandy Kirkconnell return erp; 37430293ce3aSMandy Kirkconnell } 37440293ce3aSMandy Kirkconnell 37450293ce3aSMandy Kirkconnell /* 37460293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 37470293ce3aSMandy Kirkconnell * extent record at file extent index *idxp. Store the index of the 37480293ce3aSMandy Kirkconnell * target irec in *erp_idxp and store the page index of the target 37490293ce3aSMandy Kirkconnell * extent record in *idxp. 37500293ce3aSMandy Kirkconnell */ 37510293ce3aSMandy Kirkconnell xfs_ext_irec_t * 37520293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec( 37530293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37540293ce3aSMandy Kirkconnell xfs_extnum_t *idxp, /* extent index (file -> page) */ 37550293ce3aSMandy Kirkconnell int *erp_idxp, /* pointer to target irec */ 37560293ce3aSMandy Kirkconnell int realloc) /* new bytes were just added */ 37570293ce3aSMandy Kirkconnell { 37580293ce3aSMandy Kirkconnell xfs_ext_irec_t *prev; /* pointer to previous irec */ 37590293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 37600293ce3aSMandy Kirkconnell int erp_idx; /* indirection array index */ 37610293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37620293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 37630293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 37640293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 37650293ce3aSMandy Kirkconnell 37660293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 376787bef181SChristoph Hellwig ASSERT(page_idx >= 0); 376887bef181SChristoph Hellwig ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 376987bef181SChristoph Hellwig ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); 377087bef181SChristoph Hellwig 37710293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37720293ce3aSMandy Kirkconnell erp_idx = 0; 37730293ce3aSMandy Kirkconnell low = 0; 37740293ce3aSMandy Kirkconnell high = nlists - 1; 37750293ce3aSMandy Kirkconnell 37760293ce3aSMandy Kirkconnell /* Binary search extent irec's */ 37770293ce3aSMandy Kirkconnell while (low <= high) { 37780293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 37790293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 37800293ce3aSMandy Kirkconnell prev = erp_idx > 0 ? erp - 1 : NULL; 37810293ce3aSMandy Kirkconnell if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 37820293ce3aSMandy Kirkconnell realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 37830293ce3aSMandy Kirkconnell high = erp_idx - 1; 37840293ce3aSMandy Kirkconnell } else if (page_idx > erp->er_extoff + erp->er_extcount || 37850293ce3aSMandy Kirkconnell (page_idx == erp->er_extoff + erp->er_extcount && 37860293ce3aSMandy Kirkconnell !realloc)) { 37870293ce3aSMandy Kirkconnell low = erp_idx + 1; 37880293ce3aSMandy Kirkconnell } else if (page_idx == erp->er_extoff + erp->er_extcount && 37890293ce3aSMandy Kirkconnell erp->er_extcount == XFS_LINEAR_EXTS) { 37900293ce3aSMandy Kirkconnell ASSERT(realloc); 37910293ce3aSMandy Kirkconnell page_idx = 0; 37920293ce3aSMandy Kirkconnell erp_idx++; 37930293ce3aSMandy Kirkconnell erp = erp_idx < nlists ? erp + 1 : NULL; 37940293ce3aSMandy Kirkconnell break; 37950293ce3aSMandy Kirkconnell } else { 37960293ce3aSMandy Kirkconnell page_idx -= erp->er_extoff; 37970293ce3aSMandy Kirkconnell break; 37980293ce3aSMandy Kirkconnell } 37990293ce3aSMandy Kirkconnell } 38000293ce3aSMandy Kirkconnell *idxp = page_idx; 38010293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 38020293ce3aSMandy Kirkconnell return(erp); 38030293ce3aSMandy Kirkconnell } 38040293ce3aSMandy Kirkconnell 38050293ce3aSMandy Kirkconnell /* 38060293ce3aSMandy Kirkconnell * Allocate and initialize an indirection array once the space needed 38070293ce3aSMandy Kirkconnell * for incore extents increases above XFS_IEXT_BUFSZ. 38080293ce3aSMandy Kirkconnell */ 38090293ce3aSMandy Kirkconnell void 38100293ce3aSMandy Kirkconnell xfs_iext_irec_init( 38110293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 38120293ce3aSMandy Kirkconnell { 38130293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 38140293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 38150293ce3aSMandy Kirkconnell 38160293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 38170293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 38180293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 38190293ce3aSMandy Kirkconnell 38206785073bSDavid Chinner erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 38210293ce3aSMandy Kirkconnell 38220293ce3aSMandy Kirkconnell if (nextents == 0) { 38236785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 38240293ce3aSMandy Kirkconnell } else if (!ifp->if_real_bytes) { 38250293ce3aSMandy Kirkconnell xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 38260293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 38270293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 38280293ce3aSMandy Kirkconnell } 38290293ce3aSMandy Kirkconnell erp->er_extbuf = ifp->if_u1.if_extents; 38300293ce3aSMandy Kirkconnell erp->er_extcount = nextents; 38310293ce3aSMandy Kirkconnell erp->er_extoff = 0; 38320293ce3aSMandy Kirkconnell 38330293ce3aSMandy Kirkconnell ifp->if_flags |= XFS_IFEXTIREC; 38340293ce3aSMandy Kirkconnell ifp->if_real_bytes = XFS_IEXT_BUFSZ; 38350293ce3aSMandy Kirkconnell ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 38360293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = erp; 38370293ce3aSMandy Kirkconnell 38380293ce3aSMandy Kirkconnell return; 38390293ce3aSMandy Kirkconnell } 38400293ce3aSMandy Kirkconnell 38410293ce3aSMandy Kirkconnell /* 38420293ce3aSMandy Kirkconnell * Allocate and initialize a new entry in the indirection array. 38430293ce3aSMandy Kirkconnell */ 38440293ce3aSMandy Kirkconnell xfs_ext_irec_t * 38450293ce3aSMandy Kirkconnell xfs_iext_irec_new( 38460293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38470293ce3aSMandy Kirkconnell int erp_idx) /* index for new irec */ 38480293ce3aSMandy Kirkconnell { 38490293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 38500293ce3aSMandy Kirkconnell int i; /* loop counter */ 38510293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 38520293ce3aSMandy Kirkconnell 38530293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38540293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38550293ce3aSMandy Kirkconnell 38560293ce3aSMandy Kirkconnell /* Resize indirection array */ 38570293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, ++nlists * 38580293ce3aSMandy Kirkconnell sizeof(xfs_ext_irec_t)); 38590293ce3aSMandy Kirkconnell /* 38600293ce3aSMandy Kirkconnell * Move records down in the array so the 38610293ce3aSMandy Kirkconnell * new page can use erp_idx. 38620293ce3aSMandy Kirkconnell */ 38630293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 38640293ce3aSMandy Kirkconnell for (i = nlists - 1; i > erp_idx; i--) { 38650293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 38660293ce3aSMandy Kirkconnell } 38670293ce3aSMandy Kirkconnell ASSERT(i == erp_idx); 38680293ce3aSMandy Kirkconnell 38690293ce3aSMandy Kirkconnell /* Initialize new extent record */ 38700293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 38716785073bSDavid Chinner erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 38720293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 38730293ce3aSMandy Kirkconnell memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 38740293ce3aSMandy Kirkconnell erp[erp_idx].er_extcount = 0; 38750293ce3aSMandy Kirkconnell erp[erp_idx].er_extoff = erp_idx > 0 ? 38760293ce3aSMandy Kirkconnell erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 38770293ce3aSMandy Kirkconnell return (&erp[erp_idx]); 38780293ce3aSMandy Kirkconnell } 38790293ce3aSMandy Kirkconnell 38800293ce3aSMandy Kirkconnell /* 38810293ce3aSMandy Kirkconnell * Remove a record from the indirection array. 38820293ce3aSMandy Kirkconnell */ 38830293ce3aSMandy Kirkconnell void 38840293ce3aSMandy Kirkconnell xfs_iext_irec_remove( 38850293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38860293ce3aSMandy Kirkconnell int erp_idx) /* irec index to remove */ 38870293ce3aSMandy Kirkconnell { 38880293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 38890293ce3aSMandy Kirkconnell int i; /* loop counter */ 38900293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 38910293ce3aSMandy Kirkconnell 38920293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38930293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38940293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 38950293ce3aSMandy Kirkconnell if (erp->er_extbuf) { 38960293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 38970293ce3aSMandy Kirkconnell -erp->er_extcount); 3898f0e2d93cSDenys Vlasenko kmem_free(erp->er_extbuf); 38990293ce3aSMandy Kirkconnell } 39000293ce3aSMandy Kirkconnell /* Compact extent records */ 39010293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 39020293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists - 1; i++) { 39030293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 39040293ce3aSMandy Kirkconnell } 39050293ce3aSMandy Kirkconnell /* 39060293ce3aSMandy Kirkconnell * Manually free the last extent record from the indirection 39070293ce3aSMandy Kirkconnell * array. A call to xfs_iext_realloc_indirect() with a size 39080293ce3aSMandy Kirkconnell * of zero would result in a call to xfs_iext_destroy() which 39090293ce3aSMandy Kirkconnell * would in turn call this function again, creating a nasty 39100293ce3aSMandy Kirkconnell * infinite loop. 39110293ce3aSMandy Kirkconnell */ 39120293ce3aSMandy Kirkconnell if (--nlists) { 39130293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, 39140293ce3aSMandy Kirkconnell nlists * sizeof(xfs_ext_irec_t)); 39150293ce3aSMandy Kirkconnell } else { 3916f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 39170293ce3aSMandy Kirkconnell } 39180293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 39190293ce3aSMandy Kirkconnell } 39200293ce3aSMandy Kirkconnell 39210293ce3aSMandy Kirkconnell /* 39220293ce3aSMandy Kirkconnell * This is called to clean up large amounts of unused memory allocated 39230293ce3aSMandy Kirkconnell * by the indirection array. Before compacting anything though, verify 39240293ce3aSMandy Kirkconnell * that the indirection array is still needed and switch back to the 39250293ce3aSMandy Kirkconnell * linear extent list (or even the inline buffer) if possible. The 39260293ce3aSMandy Kirkconnell * compaction policy is as follows: 39270293ce3aSMandy Kirkconnell * 39280293ce3aSMandy Kirkconnell * Full Compaction: Extents fit into a single page (or inline buffer) 392971a8c87fSLachlan McIlroy * Partial Compaction: Extents occupy less than 50% of allocated space 39300293ce3aSMandy Kirkconnell * No Compaction: Extents occupy at least 50% of allocated space 39310293ce3aSMandy Kirkconnell */ 39320293ce3aSMandy Kirkconnell void 39330293ce3aSMandy Kirkconnell xfs_iext_irec_compact( 39340293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 39350293ce3aSMandy Kirkconnell { 39360293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 39370293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 39380293ce3aSMandy Kirkconnell 39390293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 39400293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 39410293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 39420293ce3aSMandy Kirkconnell 39430293ce3aSMandy Kirkconnell if (nextents == 0) { 39440293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 39450293ce3aSMandy Kirkconnell } else if (nextents <= XFS_INLINE_EXTS) { 39460293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 39470293ce3aSMandy Kirkconnell xfs_iext_direct_to_inline(ifp, nextents); 39480293ce3aSMandy Kirkconnell } else if (nextents <= XFS_LINEAR_EXTS) { 39490293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 39500293ce3aSMandy Kirkconnell } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 39510293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(ifp); 39520293ce3aSMandy Kirkconnell } 39530293ce3aSMandy Kirkconnell } 39540293ce3aSMandy Kirkconnell 39550293ce3aSMandy Kirkconnell /* 39560293ce3aSMandy Kirkconnell * Combine extents from neighboring extent pages. 39570293ce3aSMandy Kirkconnell */ 39580293ce3aSMandy Kirkconnell void 39590293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages( 39600293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 39610293ce3aSMandy Kirkconnell { 39620293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 39630293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 39640293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 39650293ce3aSMandy Kirkconnell 39660293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 39670293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 39680293ce3aSMandy Kirkconnell while (erp_idx < nlists - 1) { 39690293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 39700293ce3aSMandy Kirkconnell erp_next = erp + 1; 39710293ce3aSMandy Kirkconnell if (erp_next->er_extcount <= 39720293ce3aSMandy Kirkconnell (XFS_LINEAR_EXTS - erp->er_extcount)) { 397371a8c87fSLachlan McIlroy memcpy(&erp->er_extbuf[erp->er_extcount], 39740293ce3aSMandy Kirkconnell erp_next->er_extbuf, erp_next->er_extcount * 39750293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 39760293ce3aSMandy Kirkconnell erp->er_extcount += erp_next->er_extcount; 39770293ce3aSMandy Kirkconnell /* 39780293ce3aSMandy Kirkconnell * Free page before removing extent record 39790293ce3aSMandy Kirkconnell * so er_extoffs don't get modified in 39800293ce3aSMandy Kirkconnell * xfs_iext_irec_remove. 39810293ce3aSMandy Kirkconnell */ 3982f0e2d93cSDenys Vlasenko kmem_free(erp_next->er_extbuf); 39830293ce3aSMandy Kirkconnell erp_next->er_extbuf = NULL; 39840293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx + 1); 39850293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 39860293ce3aSMandy Kirkconnell } else { 39870293ce3aSMandy Kirkconnell erp_idx++; 39880293ce3aSMandy Kirkconnell } 39890293ce3aSMandy Kirkconnell } 39900293ce3aSMandy Kirkconnell } 39910293ce3aSMandy Kirkconnell 39920293ce3aSMandy Kirkconnell /* 39930293ce3aSMandy Kirkconnell * This is called to update the er_extoff field in the indirection 39940293ce3aSMandy Kirkconnell * array when extents have been added or removed from one of the 39950293ce3aSMandy Kirkconnell * extent lists. erp_idx contains the irec index to begin updating 39960293ce3aSMandy Kirkconnell * at and ext_diff contains the number of extents that were added 39970293ce3aSMandy Kirkconnell * or removed. 39980293ce3aSMandy Kirkconnell */ 39990293ce3aSMandy Kirkconnell void 40000293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs( 40010293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 40020293ce3aSMandy Kirkconnell int erp_idx, /* irec index to update */ 40030293ce3aSMandy Kirkconnell int ext_diff) /* number of new extents */ 40040293ce3aSMandy Kirkconnell { 40050293ce3aSMandy Kirkconnell int i; /* loop counter */ 40060293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists */ 40070293ce3aSMandy Kirkconnell 40080293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 40090293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 40100293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists; i++) { 40110293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 40120293ce3aSMandy Kirkconnell } 40130293ce3aSMandy Kirkconnell } 401472b53efaSBrian Foster 401572b53efaSBrian Foster /* 401672b53efaSBrian Foster * Test whether it is appropriate to check an inode for and free post EOF 401772b53efaSBrian Foster * blocks. The 'force' parameter determines whether we should also consider 401872b53efaSBrian Foster * regular files that are marked preallocated or append-only. 401972b53efaSBrian Foster */ 402072b53efaSBrian Foster bool 402172b53efaSBrian Foster xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 402272b53efaSBrian Foster { 402372b53efaSBrian Foster /* prealloc/delalloc exists only on regular files */ 402472b53efaSBrian Foster if (!S_ISREG(ip->i_d.di_mode)) 402572b53efaSBrian Foster return false; 402672b53efaSBrian Foster 402772b53efaSBrian Foster /* 402872b53efaSBrian Foster * Zero sized files with no cached pages and delalloc blocks will not 402972b53efaSBrian Foster * have speculative prealloc/delalloc blocks to remove. 403072b53efaSBrian Foster */ 403172b53efaSBrian Foster if (VFS_I(ip)->i_size == 0 && 403272b53efaSBrian Foster VN_CACHED(VFS_I(ip)) == 0 && 403372b53efaSBrian Foster ip->i_delayed_blks == 0) 403472b53efaSBrian Foster return false; 403572b53efaSBrian Foster 403672b53efaSBrian Foster /* If we haven't read in the extent list, then don't do it now. */ 403772b53efaSBrian Foster if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 403872b53efaSBrian Foster return false; 403972b53efaSBrian Foster 404072b53efaSBrian Foster /* 404172b53efaSBrian Foster * Do not free real preallocated or append-only files unless the file 404272b53efaSBrian Foster * has delalloc blocks and we are forced to remove them. 404372b53efaSBrian Foster */ 404472b53efaSBrian Foster if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 404572b53efaSBrian Foster if (!force || ip->i_delayed_blks == 0) 404672b53efaSBrian Foster return false; 404772b53efaSBrian Foster 404872b53efaSBrian Foster return true; 404972b53efaSBrian Foster } 405072b53efaSBrian Foster 4051