11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 221da177e4SLinus Torvalds #include "xfs_types.h" 23a844f451SNathan Scott #include "xfs_bit.h" 241da177e4SLinus Torvalds #include "xfs_log.h" 25a844f451SNathan Scott #include "xfs_inum.h" 26a844f451SNathan Scott #include "xfs_imap.h" 271da177e4SLinus Torvalds #include "xfs_trans.h" 281da177e4SLinus Torvalds #include "xfs_trans_priv.h" 291da177e4SLinus Torvalds #include "xfs_sb.h" 301da177e4SLinus Torvalds #include "xfs_ag.h" 311da177e4SLinus Torvalds #include "xfs_dir2.h" 321da177e4SLinus Torvalds #include "xfs_dmapi.h" 331da177e4SLinus Torvalds #include "xfs_mount.h" 341da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 35a844f451SNathan Scott #include "xfs_alloc_btree.h" 361da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 371da177e4SLinus Torvalds #include "xfs_dir2_sf.h" 38a844f451SNathan Scott #include "xfs_attr_sf.h" 391da177e4SLinus Torvalds #include "xfs_dinode.h" 401da177e4SLinus Torvalds #include "xfs_inode.h" 411da177e4SLinus Torvalds #include "xfs_buf_item.h" 42a844f451SNathan Scott #include "xfs_inode_item.h" 43a844f451SNathan Scott #include "xfs_btree.h" 448c4ed633SChristoph Hellwig #include "xfs_btree_trace.h" 45a844f451SNathan Scott #include "xfs_alloc.h" 46a844f451SNathan Scott #include "xfs_ialloc.h" 47a844f451SNathan Scott #include "xfs_bmap.h" 481da177e4SLinus Torvalds #include "xfs_rw.h" 491da177e4SLinus Torvalds #include "xfs_error.h" 501da177e4SLinus Torvalds #include "xfs_utils.h" 511da177e4SLinus Torvalds #include "xfs_dir2_trace.h" 521da177e4SLinus Torvalds #include "xfs_quota.h" 531da177e4SLinus Torvalds #include "xfs_acl.h" 542a82b8beSDavid Chinner #include "xfs_filestream.h" 55739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h" 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 581da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds /* 611da177e4SLinus Torvalds * Used in xfs_itruncate(). This is the maximum number of extents 621da177e4SLinus Torvalds * freed from a file in a single transaction. 631da177e4SLinus Torvalds */ 641da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 671da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 681da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 691da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds #ifdef DEBUG 721da177e4SLinus Torvalds /* 731da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 741da177e4SLinus Torvalds * are valid. 751da177e4SLinus Torvalds */ 761da177e4SLinus Torvalds STATIC void 771da177e4SLinus Torvalds xfs_validate_extents( 784eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, 791da177e4SLinus Torvalds int nrecs, 801da177e4SLinus Torvalds xfs_exntfmt_t fmt) 811da177e4SLinus Torvalds { 821da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 83a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t rec; 841da177e4SLinus Torvalds int i; 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 87a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 88a6f64d4aSChristoph Hellwig rec.l0 = get_unaligned(&ep->l0); 89a6f64d4aSChristoph Hellwig rec.l1 = get_unaligned(&ep->l1); 901da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 911da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 921da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 931da177e4SLinus Torvalds } 941da177e4SLinus Torvalds } 951da177e4SLinus Torvalds #else /* DEBUG */ 96a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt) 971da177e4SLinus Torvalds #endif /* DEBUG */ 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds /* 1001da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 1011da177e4SLinus Torvalds * unlinked field of 0. 1021da177e4SLinus Torvalds */ 1031da177e4SLinus Torvalds #if defined(DEBUG) 1041da177e4SLinus Torvalds void 1051da177e4SLinus Torvalds xfs_inobp_check( 1061da177e4SLinus Torvalds xfs_mount_t *mp, 1071da177e4SLinus Torvalds xfs_buf_t *bp) 1081da177e4SLinus Torvalds { 1091da177e4SLinus Torvalds int i; 1101da177e4SLinus Torvalds int j; 1111da177e4SLinus Torvalds xfs_dinode_t *dip; 1121da177e4SLinus Torvalds 1131da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds for (i = 0; i < j; i++) { 1161da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1171da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 1181da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 1191da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, 1201da177e4SLinus Torvalds "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 1211da177e4SLinus Torvalds bp); 1221da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 1231da177e4SLinus Torvalds } 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds } 1261da177e4SLinus Torvalds #endif 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds /* 1294ae29b43SDavid Chinner * Find the buffer associated with the given inode map 1304ae29b43SDavid Chinner * We do basic validation checks on the buffer once it has been 1314ae29b43SDavid Chinner * retrieved from disk. 1324ae29b43SDavid Chinner */ 1334ae29b43SDavid Chinner STATIC int 1344ae29b43SDavid Chinner xfs_imap_to_bp( 1354ae29b43SDavid Chinner xfs_mount_t *mp, 1364ae29b43SDavid Chinner xfs_trans_t *tp, 1374ae29b43SDavid Chinner xfs_imap_t *imap, 1384ae29b43SDavid Chinner xfs_buf_t **bpp, 1394ae29b43SDavid Chinner uint buf_flags, 1404ae29b43SDavid Chinner uint imap_flags) 1414ae29b43SDavid Chinner { 1424ae29b43SDavid Chinner int error; 1434ae29b43SDavid Chinner int i; 1444ae29b43SDavid Chinner int ni; 1454ae29b43SDavid Chinner xfs_buf_t *bp; 1464ae29b43SDavid Chinner 1474ae29b43SDavid Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 148a3f74ffbSDavid Chinner (int)imap->im_len, buf_flags, &bp); 1494ae29b43SDavid Chinner if (error) { 150a3f74ffbSDavid Chinner if (error != EAGAIN) { 151a3f74ffbSDavid Chinner cmn_err(CE_WARN, 152a3f74ffbSDavid Chinner "xfs_imap_to_bp: xfs_trans_read_buf()returned " 1534ae29b43SDavid Chinner "an error %d on %s. Returning error.", 1544ae29b43SDavid Chinner error, mp->m_fsname); 155a3f74ffbSDavid Chinner } else { 156a3f74ffbSDavid Chinner ASSERT(buf_flags & XFS_BUF_TRYLOCK); 157a3f74ffbSDavid Chinner } 1584ae29b43SDavid Chinner return error; 1594ae29b43SDavid Chinner } 1604ae29b43SDavid Chinner 1614ae29b43SDavid Chinner /* 1624ae29b43SDavid Chinner * Validate the magic number and version of every inode in the buffer 1634ae29b43SDavid Chinner * (if DEBUG kernel) or the first inode in the buffer, otherwise. 1644ae29b43SDavid Chinner */ 1654ae29b43SDavid Chinner #ifdef DEBUG 1664ae29b43SDavid Chinner ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 1674ae29b43SDavid Chinner #else /* usual case */ 1684ae29b43SDavid Chinner ni = 1; 1694ae29b43SDavid Chinner #endif 1704ae29b43SDavid Chinner 1714ae29b43SDavid Chinner for (i = 0; i < ni; i++) { 1724ae29b43SDavid Chinner int di_ok; 1734ae29b43SDavid Chinner xfs_dinode_t *dip; 1744ae29b43SDavid Chinner 1754ae29b43SDavid Chinner dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1764ae29b43SDavid Chinner (i << mp->m_sb.sb_inodelog)); 1774ae29b43SDavid Chinner di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 1784ae29b43SDavid Chinner XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 1794ae29b43SDavid Chinner if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 1804ae29b43SDavid Chinner XFS_ERRTAG_ITOBP_INOTOBP, 1814ae29b43SDavid Chinner XFS_RANDOM_ITOBP_INOTOBP))) { 1824ae29b43SDavid Chinner if (imap_flags & XFS_IMAP_BULKSTAT) { 1834ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1844ae29b43SDavid Chinner return XFS_ERROR(EINVAL); 1854ae29b43SDavid Chinner } 1864ae29b43SDavid Chinner XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 1874ae29b43SDavid Chinner XFS_ERRLEVEL_HIGH, mp, dip); 1884ae29b43SDavid Chinner #ifdef DEBUG 1894ae29b43SDavid Chinner cmn_err(CE_PANIC, 1904ae29b43SDavid Chinner "Device %s - bad inode magic/vsn " 1914ae29b43SDavid Chinner "daddr %lld #%d (magic=%x)", 1924ae29b43SDavid Chinner XFS_BUFTARG_NAME(mp->m_ddev_targp), 1934ae29b43SDavid Chinner (unsigned long long)imap->im_blkno, i, 1944ae29b43SDavid Chinner be16_to_cpu(dip->di_core.di_magic)); 1954ae29b43SDavid Chinner #endif 1964ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1974ae29b43SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 1984ae29b43SDavid Chinner } 1994ae29b43SDavid Chinner } 2004ae29b43SDavid Chinner 2014ae29b43SDavid Chinner xfs_inobp_check(mp, bp); 2024ae29b43SDavid Chinner 2034ae29b43SDavid Chinner /* 2044ae29b43SDavid Chinner * Mark the buffer as an inode buffer now that it looks good 2054ae29b43SDavid Chinner */ 2064ae29b43SDavid Chinner XFS_BUF_SET_VTYPE(bp, B_FS_INO); 2074ae29b43SDavid Chinner 2084ae29b43SDavid Chinner *bpp = bp; 2094ae29b43SDavid Chinner return 0; 2104ae29b43SDavid Chinner } 2114ae29b43SDavid Chinner 2124ae29b43SDavid Chinner /* 2131da177e4SLinus Torvalds * This routine is called to map an inode number within a file 2141da177e4SLinus Torvalds * system to the buffer containing the on-disk version of the 2151da177e4SLinus Torvalds * inode. It returns a pointer to the buffer containing the 2161da177e4SLinus Torvalds * on-disk inode in the bpp parameter, and in the dip parameter 2171da177e4SLinus Torvalds * it returns a pointer to the on-disk inode within that buffer. 2181da177e4SLinus Torvalds * 2191da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2201da177e4SLinus Torvalds * dipp are undefined. 2211da177e4SLinus Torvalds * 2221da177e4SLinus Torvalds * Use xfs_imap() to determine the size and location of the 2231da177e4SLinus Torvalds * buffer to read from disk. 2241da177e4SLinus Torvalds */ 225c679eef0SChristoph Hellwig int 2261da177e4SLinus Torvalds xfs_inotobp( 2271da177e4SLinus Torvalds xfs_mount_t *mp, 2281da177e4SLinus Torvalds xfs_trans_t *tp, 2291da177e4SLinus Torvalds xfs_ino_t ino, 2301da177e4SLinus Torvalds xfs_dinode_t **dipp, 2311da177e4SLinus Torvalds xfs_buf_t **bpp, 232c679eef0SChristoph Hellwig int *offset, 233c679eef0SChristoph Hellwig uint imap_flags) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds xfs_imap_t imap; 2361da177e4SLinus Torvalds xfs_buf_t *bp; 2371da177e4SLinus Torvalds int error; 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds imap.im_blkno = 0; 240c679eef0SChristoph Hellwig error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP); 2414ae29b43SDavid Chinner if (error) 2421da177e4SLinus Torvalds return error; 2431da177e4SLinus Torvalds 244c679eef0SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); 2454ae29b43SDavid Chinner if (error) 2461da177e4SLinus Torvalds return error; 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2491da177e4SLinus Torvalds *bpp = bp; 2501da177e4SLinus Torvalds *offset = imap.im_boffset; 2511da177e4SLinus Torvalds return 0; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds /* 2561da177e4SLinus Torvalds * This routine is called to map an inode to the buffer containing 2571da177e4SLinus Torvalds * the on-disk version of the inode. It returns a pointer to the 2581da177e4SLinus Torvalds * buffer containing the on-disk inode in the bpp parameter, and in 2591da177e4SLinus Torvalds * the dip parameter it returns a pointer to the on-disk inode within 2601da177e4SLinus Torvalds * that buffer. 2611da177e4SLinus Torvalds * 2621da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2631da177e4SLinus Torvalds * dipp are undefined. 2641da177e4SLinus Torvalds * 2651da177e4SLinus Torvalds * If the inode is new and has not yet been initialized, use xfs_imap() 2661da177e4SLinus Torvalds * to determine the size and location of the buffer to read from disk. 2671da177e4SLinus Torvalds * If the inode has already been mapped to its buffer and read in once, 2681da177e4SLinus Torvalds * then use the mapping information stored in the inode rather than 2691da177e4SLinus Torvalds * calling xfs_imap(). This allows us to avoid the overhead of looking 2701da177e4SLinus Torvalds * at the inode btree for small block file systems (see xfs_dilocate()). 2711da177e4SLinus Torvalds * We can tell whether the inode has been mapped in before by comparing 2721da177e4SLinus Torvalds * its disk block address to 0. Only uninitialized inodes will have 2731da177e4SLinus Torvalds * 0 for the disk block address. 2741da177e4SLinus Torvalds */ 2751da177e4SLinus Torvalds int 2761da177e4SLinus Torvalds xfs_itobp( 2771da177e4SLinus Torvalds xfs_mount_t *mp, 2781da177e4SLinus Torvalds xfs_trans_t *tp, 2791da177e4SLinus Torvalds xfs_inode_t *ip, 2801da177e4SLinus Torvalds xfs_dinode_t **dipp, 2811da177e4SLinus Torvalds xfs_buf_t **bpp, 282b12dd342SNathan Scott xfs_daddr_t bno, 283a3f74ffbSDavid Chinner uint imap_flags, 284a3f74ffbSDavid Chinner uint buf_flags) 2851da177e4SLinus Torvalds { 2864d1a2ed3SNathan Scott xfs_imap_t imap; 2871da177e4SLinus Torvalds xfs_buf_t *bp; 2881da177e4SLinus Torvalds int error; 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds if (ip->i_blkno == (xfs_daddr_t)0) { 2911da177e4SLinus Torvalds imap.im_blkno = bno; 2924ae29b43SDavid Chinner error = xfs_imap(mp, tp, ip->i_ino, &imap, 2934ae29b43SDavid Chinner XFS_IMAP_LOOKUP | imap_flags); 2944ae29b43SDavid Chinner if (error) 2951da177e4SLinus Torvalds return error; 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds /* 2981da177e4SLinus Torvalds * Fill in the fields in the inode that will be used to 2991da177e4SLinus Torvalds * map the inode to its buffer from now on. 3001da177e4SLinus Torvalds */ 3011da177e4SLinus Torvalds ip->i_blkno = imap.im_blkno; 3021da177e4SLinus Torvalds ip->i_len = imap.im_len; 3031da177e4SLinus Torvalds ip->i_boffset = imap.im_boffset; 3041da177e4SLinus Torvalds } else { 3051da177e4SLinus Torvalds /* 3061da177e4SLinus Torvalds * We've already mapped the inode once, so just use the 3071da177e4SLinus Torvalds * mapping that we saved the first time. 3081da177e4SLinus Torvalds */ 3091da177e4SLinus Torvalds imap.im_blkno = ip->i_blkno; 3101da177e4SLinus Torvalds imap.im_len = ip->i_len; 3111da177e4SLinus Torvalds imap.im_boffset = ip->i_boffset; 3121da177e4SLinus Torvalds } 3131da177e4SLinus Torvalds ASSERT(bno == 0 || bno == imap.im_blkno); 3141da177e4SLinus Torvalds 315a3f74ffbSDavid Chinner error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); 3164ae29b43SDavid Chinner if (error) 3171da177e4SLinus Torvalds return error; 3184d1a2ed3SNathan Scott 319a3f74ffbSDavid Chinner if (!bp) { 320a3f74ffbSDavid Chinner ASSERT(buf_flags & XFS_BUF_TRYLOCK); 321a3f74ffbSDavid Chinner ASSERT(tp == NULL); 322a3f74ffbSDavid Chinner *bpp = NULL; 323a3f74ffbSDavid Chinner return EAGAIN; 324a3f74ffbSDavid Chinner } 325a3f74ffbSDavid Chinner 3261da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 3271da177e4SLinus Torvalds *bpp = bp; 3281da177e4SLinus Torvalds return 0; 3291da177e4SLinus Torvalds } 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds /* 3321da177e4SLinus Torvalds * Move inode type and inode format specific information from the 3331da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 3341da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 3351da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 3361da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 3371da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 3381da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 3391da177e4SLinus Torvalds */ 3401da177e4SLinus Torvalds STATIC int 3411da177e4SLinus Torvalds xfs_iformat( 3421da177e4SLinus Torvalds xfs_inode_t *ip, 3431da177e4SLinus Torvalds xfs_dinode_t *dip) 3441da177e4SLinus Torvalds { 3451da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 3461da177e4SLinus Torvalds int size; 3471da177e4SLinus Torvalds int error; 3481da177e4SLinus Torvalds xfs_fsize_t di_size; 3491da177e4SLinus Torvalds ip->i_df.if_ext_max = 3501da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 3511da177e4SLinus Torvalds error = 0; 3521da177e4SLinus Torvalds 353347d1c01SChristoph Hellwig if (unlikely(be32_to_cpu(dip->di_core.di_nextents) + 354347d1c01SChristoph Hellwig be16_to_cpu(dip->di_core.di_anextents) > 355347d1c01SChristoph Hellwig be64_to_cpu(dip->di_core.di_nblocks))) { 3563762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 3573762ec6bSNathan Scott "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 3581da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 359347d1c01SChristoph Hellwig (int)(be32_to_cpu(dip->di_core.di_nextents) + 360347d1c01SChristoph Hellwig be16_to_cpu(dip->di_core.di_anextents)), 3611da177e4SLinus Torvalds (unsigned long long) 362347d1c01SChristoph Hellwig be64_to_cpu(dip->di_core.di_nblocks)); 3631da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 3641da177e4SLinus Torvalds ip->i_mount, dip); 3651da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds 368347d1c01SChristoph Hellwig if (unlikely(dip->di_core.di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 3693762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 3703762ec6bSNathan Scott "corrupt dinode %Lu, forkoff = 0x%x.", 3711da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 372347d1c01SChristoph Hellwig dip->di_core.di_forkoff); 3731da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 3741da177e4SLinus Torvalds ip->i_mount, dip); 3751da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3761da177e4SLinus Torvalds } 3771da177e4SLinus Torvalds 3781da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 3791da177e4SLinus Torvalds case S_IFIFO: 3801da177e4SLinus Torvalds case S_IFCHR: 3811da177e4SLinus Torvalds case S_IFBLK: 3821da177e4SLinus Torvalds case S_IFSOCK: 383347d1c01SChristoph Hellwig if (unlikely(dip->di_core.di_format != XFS_DINODE_FMT_DEV)) { 3841da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 3851da177e4SLinus Torvalds ip->i_mount, dip); 3861da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3871da177e4SLinus Torvalds } 3881da177e4SLinus Torvalds ip->i_d.di_size = 0; 389ba87ea69SLachlan McIlroy ip->i_size = 0; 390347d1c01SChristoph Hellwig ip->i_df.if_u2.if_rdev = be32_to_cpu(dip->di_u.di_dev); 3911da177e4SLinus Torvalds break; 3921da177e4SLinus Torvalds 3931da177e4SLinus Torvalds case S_IFREG: 3941da177e4SLinus Torvalds case S_IFLNK: 3951da177e4SLinus Torvalds case S_IFDIR: 396347d1c01SChristoph Hellwig switch (dip->di_core.di_format) { 3971da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 3981da177e4SLinus Torvalds /* 3991da177e4SLinus Torvalds * no local regular files yet 4001da177e4SLinus Torvalds */ 401347d1c01SChristoph Hellwig if (unlikely((be16_to_cpu(dip->di_core.di_mode) & S_IFMT) == S_IFREG)) { 4023762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4033762ec6bSNathan Scott "corrupt inode %Lu " 4043762ec6bSNathan Scott "(local format for regular file).", 4051da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 4061da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 4071da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 4081da177e4SLinus Torvalds ip->i_mount, dip); 4091da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4101da177e4SLinus Torvalds } 4111da177e4SLinus Torvalds 412347d1c01SChristoph Hellwig di_size = be64_to_cpu(dip->di_core.di_size); 4131da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 4143762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4153762ec6bSNathan Scott "corrupt inode %Lu " 4163762ec6bSNathan Scott "(bad size %Ld for local inode).", 4171da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 4181da177e4SLinus Torvalds (long long) di_size); 4191da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 4201da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 4211da177e4SLinus Torvalds ip->i_mount, dip); 4221da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4231da177e4SLinus Torvalds } 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds size = (int)di_size; 4261da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 4271da177e4SLinus Torvalds break; 4281da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 4291da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 4301da177e4SLinus Torvalds break; 4311da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 4321da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 4331da177e4SLinus Torvalds break; 4341da177e4SLinus Torvalds default: 4351da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 4361da177e4SLinus Torvalds ip->i_mount); 4371da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4381da177e4SLinus Torvalds } 4391da177e4SLinus Torvalds break; 4401da177e4SLinus Torvalds 4411da177e4SLinus Torvalds default: 4421da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 4431da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4441da177e4SLinus Torvalds } 4451da177e4SLinus Torvalds if (error) { 4461da177e4SLinus Torvalds return error; 4471da177e4SLinus Torvalds } 4481da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 4491da177e4SLinus Torvalds return 0; 4501da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 4511da177e4SLinus Torvalds ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 4521da177e4SLinus Torvalds ip->i_afp->if_ext_max = 4531da177e4SLinus Torvalds XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 454347d1c01SChristoph Hellwig switch (dip->di_core.di_aformat) { 4551da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 4561da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 4573b244aa8SNathan Scott size = be16_to_cpu(atp->hdr.totsize); 4581da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 4591da177e4SLinus Torvalds break; 4601da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 4611da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 4621da177e4SLinus Torvalds break; 4631da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 4641da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 4651da177e4SLinus Torvalds break; 4661da177e4SLinus Torvalds default: 4671da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 4681da177e4SLinus Torvalds break; 4691da177e4SLinus Torvalds } 4701da177e4SLinus Torvalds if (error) { 4711da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 4721da177e4SLinus Torvalds ip->i_afp = NULL; 4731da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 4741da177e4SLinus Torvalds } 4751da177e4SLinus Torvalds return error; 4761da177e4SLinus Torvalds } 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds /* 4791da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 4801da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 4811da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 4821da177e4SLinus Torvalds * and copy the data there. Either way, set 4831da177e4SLinus Torvalds * if_data to point at the data. 4841da177e4SLinus Torvalds * If we allocate a buffer for the data, make 4851da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 4861da177e4SLinus Torvalds * record the real size in i_real_bytes. 4871da177e4SLinus Torvalds */ 4881da177e4SLinus Torvalds STATIC int 4891da177e4SLinus Torvalds xfs_iformat_local( 4901da177e4SLinus Torvalds xfs_inode_t *ip, 4911da177e4SLinus Torvalds xfs_dinode_t *dip, 4921da177e4SLinus Torvalds int whichfork, 4931da177e4SLinus Torvalds int size) 4941da177e4SLinus Torvalds { 4951da177e4SLinus Torvalds xfs_ifork_t *ifp; 4961da177e4SLinus Torvalds int real_size; 4971da177e4SLinus Torvalds 4981da177e4SLinus Torvalds /* 4991da177e4SLinus Torvalds * If the size is unreasonable, then something 5001da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 5011da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 5021da177e4SLinus Torvalds */ 5031da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 5043762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 5053762ec6bSNathan Scott "corrupt inode %Lu " 5063762ec6bSNathan Scott "(bad size %d for local fork, size = %d).", 5071da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 5081da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 5091da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 5101da177e4SLinus Torvalds ip->i_mount, dip); 5111da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5121da177e4SLinus Torvalds } 5131da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 5141da177e4SLinus Torvalds real_size = 0; 5151da177e4SLinus Torvalds if (size == 0) 5161da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 5171da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 5181da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 5191da177e4SLinus Torvalds else { 5201da177e4SLinus Torvalds real_size = roundup(size, 4); 5211da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 5221da177e4SLinus Torvalds } 5231da177e4SLinus Torvalds ifp->if_bytes = size; 5241da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 5251da177e4SLinus Torvalds if (size) 5261da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 5271da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 5281da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 5291da177e4SLinus Torvalds return 0; 5301da177e4SLinus Torvalds } 5311da177e4SLinus Torvalds 5321da177e4SLinus Torvalds /* 5331da177e4SLinus Torvalds * The file consists of a set of extents all 5341da177e4SLinus Torvalds * of which fit into the on-disk inode. 5351da177e4SLinus Torvalds * If there are few enough extents to fit into 5361da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 5371da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 5381da177e4SLinus Torvalds * them into it. Either way, set if_extents 5391da177e4SLinus Torvalds * to point at the extents. 5401da177e4SLinus Torvalds */ 5411da177e4SLinus Torvalds STATIC int 5421da177e4SLinus Torvalds xfs_iformat_extents( 5431da177e4SLinus Torvalds xfs_inode_t *ip, 5441da177e4SLinus Torvalds xfs_dinode_t *dip, 5451da177e4SLinus Torvalds int whichfork) 5461da177e4SLinus Torvalds { 547a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp; 5481da177e4SLinus Torvalds xfs_ifork_t *ifp; 5491da177e4SLinus Torvalds int nex; 5501da177e4SLinus Torvalds int size; 5511da177e4SLinus Torvalds int i; 5521da177e4SLinus Torvalds 5531da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 5541da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 5551da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds /* 5581da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 5591da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 5601da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 5611da177e4SLinus Torvalds */ 5621da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 5633762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 5643762ec6bSNathan Scott "corrupt inode %Lu ((a)extents = %d).", 5651da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 5661da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 5671da177e4SLinus Torvalds ip->i_mount, dip); 5681da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5714eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 5721da177e4SLinus Torvalds if (nex == 0) 5731da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 5741da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 5751da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 5764eea22f0SMandy Kirkconnell else 5774eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nex); 5784eea22f0SMandy Kirkconnell 5791da177e4SLinus Torvalds ifp->if_bytes = size; 5801da177e4SLinus Torvalds if (size) { 5811da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 582a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 5834eea22f0SMandy Kirkconnell for (i = 0; i < nex; i++, dp++) { 584a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 585597bca63SHarvey Harrison ep->l0 = get_unaligned_be64(&dp->l0); 586597bca63SHarvey Harrison ep->l1 = get_unaligned_be64(&dp->l1); 5871da177e4SLinus Torvalds } 5883a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 5891da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 5901da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 5911da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 5924eea22f0SMandy Kirkconnell ifp, 0, nex))) { 5931da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 5941da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5951da177e4SLinus Torvalds ip->i_mount); 5961da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5971da177e4SLinus Torvalds } 5981da177e4SLinus Torvalds } 5991da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 6001da177e4SLinus Torvalds return 0; 6011da177e4SLinus Torvalds } 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds /* 6041da177e4SLinus Torvalds * The file has too many extents to fit into 6051da177e4SLinus Torvalds * the inode, so they are in B-tree format. 6061da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 6071da177e4SLinus Torvalds * and copy the root into it. The i_extents 6081da177e4SLinus Torvalds * field will remain NULL until all of the 6091da177e4SLinus Torvalds * extents are read in (when they are needed). 6101da177e4SLinus Torvalds */ 6111da177e4SLinus Torvalds STATIC int 6121da177e4SLinus Torvalds xfs_iformat_btree( 6131da177e4SLinus Torvalds xfs_inode_t *ip, 6141da177e4SLinus Torvalds xfs_dinode_t *dip, 6151da177e4SLinus Torvalds int whichfork) 6161da177e4SLinus Torvalds { 6171da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 6181da177e4SLinus Torvalds xfs_ifork_t *ifp; 6191da177e4SLinus Torvalds /* REFERENCED */ 6201da177e4SLinus Torvalds int nrecs; 6211da177e4SLinus Torvalds int size; 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6241da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 6251da177e4SLinus Torvalds size = XFS_BMAP_BROOT_SPACE(dfp); 62660197e8dSChristoph Hellwig nrecs = be16_to_cpu(dfp->bb_numrecs); 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds /* 6291da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 6301da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 6311da177e4SLinus Torvalds * block has more records than can fit into the fork, 6321da177e4SLinus Torvalds * or the number of extents is greater than the number of 6331da177e4SLinus Torvalds * blocks. 6341da177e4SLinus Torvalds */ 6351da177e4SLinus Torvalds if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 6361da177e4SLinus Torvalds || XFS_BMDR_SPACE_CALC(nrecs) > 6371da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 6381da177e4SLinus Torvalds || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 6393762ec6bSNathan Scott xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 6403762ec6bSNathan Scott "corrupt inode %Lu (btree).", 6411da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 6421da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 6431da177e4SLinus Torvalds ip->i_mount); 6441da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6451da177e4SLinus Torvalds } 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds ifp->if_broot_bytes = size; 6481da177e4SLinus Torvalds ifp->if_broot = kmem_alloc(size, KM_SLEEP); 6491da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 6501da177e4SLinus Torvalds /* 6511da177e4SLinus Torvalds * Copy and convert from the on-disk structure 6521da177e4SLinus Torvalds * to the in-memory structure. 6531da177e4SLinus Torvalds */ 65460197e8dSChristoph Hellwig xfs_bmdr_to_bmbt(ip->i_mount, dfp, 65560197e8dSChristoph Hellwig XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 6561da177e4SLinus Torvalds ifp->if_broot, size); 6571da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 6581da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 6591da177e4SLinus Torvalds 6601da177e4SLinus Torvalds return 0; 6611da177e4SLinus Torvalds } 6621da177e4SLinus Torvalds 6631da177e4SLinus Torvalds void 664347d1c01SChristoph Hellwig xfs_dinode_from_disk( 665347d1c01SChristoph Hellwig xfs_icdinode_t *to, 666347d1c01SChristoph Hellwig xfs_dinode_core_t *from) 6671da177e4SLinus Torvalds { 668347d1c01SChristoph Hellwig to->di_magic = be16_to_cpu(from->di_magic); 669347d1c01SChristoph Hellwig to->di_mode = be16_to_cpu(from->di_mode); 670347d1c01SChristoph Hellwig to->di_version = from ->di_version; 671347d1c01SChristoph Hellwig to->di_format = from->di_format; 672347d1c01SChristoph Hellwig to->di_onlink = be16_to_cpu(from->di_onlink); 673347d1c01SChristoph Hellwig to->di_uid = be32_to_cpu(from->di_uid); 674347d1c01SChristoph Hellwig to->di_gid = be32_to_cpu(from->di_gid); 675347d1c01SChristoph Hellwig to->di_nlink = be32_to_cpu(from->di_nlink); 676347d1c01SChristoph Hellwig to->di_projid = be16_to_cpu(from->di_projid); 677347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 678347d1c01SChristoph Hellwig to->di_flushiter = be16_to_cpu(from->di_flushiter); 679347d1c01SChristoph Hellwig to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 680347d1c01SChristoph Hellwig to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 681347d1c01SChristoph Hellwig to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 682347d1c01SChristoph Hellwig to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 683347d1c01SChristoph Hellwig to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 684347d1c01SChristoph Hellwig to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 685347d1c01SChristoph Hellwig to->di_size = be64_to_cpu(from->di_size); 686347d1c01SChristoph Hellwig to->di_nblocks = be64_to_cpu(from->di_nblocks); 687347d1c01SChristoph Hellwig to->di_extsize = be32_to_cpu(from->di_extsize); 688347d1c01SChristoph Hellwig to->di_nextents = be32_to_cpu(from->di_nextents); 689347d1c01SChristoph Hellwig to->di_anextents = be16_to_cpu(from->di_anextents); 690347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 691347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 692347d1c01SChristoph Hellwig to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 693347d1c01SChristoph Hellwig to->di_dmstate = be16_to_cpu(from->di_dmstate); 694347d1c01SChristoph Hellwig to->di_flags = be16_to_cpu(from->di_flags); 695347d1c01SChristoph Hellwig to->di_gen = be32_to_cpu(from->di_gen); 6961da177e4SLinus Torvalds } 6971da177e4SLinus Torvalds 698347d1c01SChristoph Hellwig void 699347d1c01SChristoph Hellwig xfs_dinode_to_disk( 700347d1c01SChristoph Hellwig xfs_dinode_core_t *to, 701347d1c01SChristoph Hellwig xfs_icdinode_t *from) 702347d1c01SChristoph Hellwig { 703347d1c01SChristoph Hellwig to->di_magic = cpu_to_be16(from->di_magic); 704347d1c01SChristoph Hellwig to->di_mode = cpu_to_be16(from->di_mode); 705347d1c01SChristoph Hellwig to->di_version = from ->di_version; 706347d1c01SChristoph Hellwig to->di_format = from->di_format; 707347d1c01SChristoph Hellwig to->di_onlink = cpu_to_be16(from->di_onlink); 708347d1c01SChristoph Hellwig to->di_uid = cpu_to_be32(from->di_uid); 709347d1c01SChristoph Hellwig to->di_gid = cpu_to_be32(from->di_gid); 710347d1c01SChristoph Hellwig to->di_nlink = cpu_to_be32(from->di_nlink); 711347d1c01SChristoph Hellwig to->di_projid = cpu_to_be16(from->di_projid); 712347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 713347d1c01SChristoph Hellwig to->di_flushiter = cpu_to_be16(from->di_flushiter); 714347d1c01SChristoph Hellwig to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 715347d1c01SChristoph Hellwig to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 716347d1c01SChristoph Hellwig to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 717347d1c01SChristoph Hellwig to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 718347d1c01SChristoph Hellwig to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 719347d1c01SChristoph Hellwig to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 720347d1c01SChristoph Hellwig to->di_size = cpu_to_be64(from->di_size); 721347d1c01SChristoph Hellwig to->di_nblocks = cpu_to_be64(from->di_nblocks); 722347d1c01SChristoph Hellwig to->di_extsize = cpu_to_be32(from->di_extsize); 723347d1c01SChristoph Hellwig to->di_nextents = cpu_to_be32(from->di_nextents); 724347d1c01SChristoph Hellwig to->di_anextents = cpu_to_be16(from->di_anextents); 725347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 726347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 727347d1c01SChristoph Hellwig to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 728347d1c01SChristoph Hellwig to->di_dmstate = cpu_to_be16(from->di_dmstate); 729347d1c01SChristoph Hellwig to->di_flags = cpu_to_be16(from->di_flags); 730347d1c01SChristoph Hellwig to->di_gen = cpu_to_be32(from->di_gen); 7311da177e4SLinus Torvalds } 7321da177e4SLinus Torvalds 7331da177e4SLinus Torvalds STATIC uint 7341da177e4SLinus Torvalds _xfs_dic2xflags( 7351da177e4SLinus Torvalds __uint16_t di_flags) 7361da177e4SLinus Torvalds { 7371da177e4SLinus Torvalds uint flags = 0; 7381da177e4SLinus Torvalds 7391da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 7401da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 7411da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 7421da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 7431da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 7441da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 7451da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 7461da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 7471da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 7481da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 7491da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 7501da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 7511da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 7521da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 7531da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 7541da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 7551da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 7561da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 7571da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 7581da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 7591da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 760dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 761dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 762dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 763dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 764d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 765d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 7662a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 7672a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds 7701da177e4SLinus Torvalds return flags; 7711da177e4SLinus Torvalds } 7721da177e4SLinus Torvalds 7731da177e4SLinus Torvalds uint 7741da177e4SLinus Torvalds xfs_ip2xflags( 7751da177e4SLinus Torvalds xfs_inode_t *ip) 7761da177e4SLinus Torvalds { 777347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 7781da177e4SLinus Torvalds 779a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 78045ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 7811da177e4SLinus Torvalds } 7821da177e4SLinus Torvalds 7831da177e4SLinus Torvalds uint 7841da177e4SLinus Torvalds xfs_dic2xflags( 78545ba598eSChristoph Hellwig xfs_dinode_t *dip) 7861da177e4SLinus Torvalds { 78745ba598eSChristoph Hellwig xfs_dinode_core_t *dic = &dip->di_core; 78845ba598eSChristoph Hellwig 789347d1c01SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) | 79045ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 7911da177e4SLinus Torvalds } 7921da177e4SLinus Torvalds 7931da177e4SLinus Torvalds /* 79407c8f675SDavid Chinner * Allocate and initialise an xfs_inode. 79507c8f675SDavid Chinner */ 796c679eef0SChristoph Hellwig STATIC struct xfs_inode * 79707c8f675SDavid Chinner xfs_inode_alloc( 79807c8f675SDavid Chinner struct xfs_mount *mp, 79907c8f675SDavid Chinner xfs_ino_t ino) 80007c8f675SDavid Chinner { 80107c8f675SDavid Chinner struct xfs_inode *ip; 80207c8f675SDavid Chinner 80307c8f675SDavid Chinner /* 80407c8f675SDavid Chinner * if this didn't occur in transactions, we could use 80507c8f675SDavid Chinner * KM_MAYFAIL and return NULL here on ENOMEM. Set the 80607c8f675SDavid Chinner * code up to do this anyway. 80707c8f675SDavid Chinner */ 80807c8f675SDavid Chinner ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 80907c8f675SDavid Chinner if (!ip) 81007c8f675SDavid Chinner return NULL; 81107c8f675SDavid Chinner 81207c8f675SDavid Chinner ASSERT(atomic_read(&ip->i_iocount) == 0); 81307c8f675SDavid Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 81407c8f675SDavid Chinner ASSERT(!spin_is_locked(&ip->i_flags_lock)); 81511654513SDavid Chinner ASSERT(completion_done(&ip->i_flush)); 81607c8f675SDavid Chinner 817bf904248SDavid Chinner /* 818bf904248SDavid Chinner * initialise the VFS inode here to get failures 819bf904248SDavid Chinner * out of the way early. 820bf904248SDavid Chinner */ 821bf904248SDavid Chinner if (!inode_init_always(mp->m_super, VFS_I(ip))) { 822bf904248SDavid Chinner kmem_zone_free(xfs_inode_zone, ip); 823bf904248SDavid Chinner return NULL; 824bf904248SDavid Chinner } 825bf904248SDavid Chinner 826bf904248SDavid Chinner /* initialise the xfs inode */ 82707c8f675SDavid Chinner ip->i_ino = ino; 82807c8f675SDavid Chinner ip->i_mount = mp; 82907c8f675SDavid Chinner ip->i_blkno = 0; 83007c8f675SDavid Chinner ip->i_len = 0; 83107c8f675SDavid Chinner ip->i_boffset =0; 83207c8f675SDavid Chinner ip->i_afp = NULL; 83307c8f675SDavid Chinner memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); 83407c8f675SDavid Chinner ip->i_flags = 0; 83507c8f675SDavid Chinner ip->i_update_core = 0; 83607c8f675SDavid Chinner ip->i_update_size = 0; 83707c8f675SDavid Chinner ip->i_delayed_blks = 0; 83807c8f675SDavid Chinner memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 83907c8f675SDavid Chinner ip->i_size = 0; 84007c8f675SDavid Chinner ip->i_new_size = 0; 84107c8f675SDavid Chinner 84207c8f675SDavid Chinner /* 84307c8f675SDavid Chinner * Initialize inode's trace buffers. 84407c8f675SDavid Chinner */ 84507c8f675SDavid Chinner #ifdef XFS_INODE_TRACE 84607c8f675SDavid Chinner ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); 84707c8f675SDavid Chinner #endif 84807c8f675SDavid Chinner #ifdef XFS_BMAP_TRACE 84907c8f675SDavid Chinner ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); 85007c8f675SDavid Chinner #endif 8518c4ed633SChristoph Hellwig #ifdef XFS_BTREE_TRACE 85207c8f675SDavid Chinner ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); 85307c8f675SDavid Chinner #endif 85407c8f675SDavid Chinner #ifdef XFS_RW_TRACE 85507c8f675SDavid Chinner ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); 85607c8f675SDavid Chinner #endif 85707c8f675SDavid Chinner #ifdef XFS_ILOCK_TRACE 85807c8f675SDavid Chinner ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); 85907c8f675SDavid Chinner #endif 86007c8f675SDavid Chinner #ifdef XFS_DIR2_TRACE 86107c8f675SDavid Chinner ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 86207c8f675SDavid Chinner #endif 86307c8f675SDavid Chinner 86407c8f675SDavid Chinner return ip; 86507c8f675SDavid Chinner } 86607c8f675SDavid Chinner 86707c8f675SDavid Chinner /* 8681da177e4SLinus Torvalds * Given a mount structure and an inode number, return a pointer 869c41564b5SNathan Scott * to a newly allocated in-core inode corresponding to the given 8701da177e4SLinus Torvalds * inode number. 8711da177e4SLinus Torvalds * 8721da177e4SLinus Torvalds * Initialize the inode's attributes and extent pointers if it 8731da177e4SLinus Torvalds * already has them (it will not if the inode has no links). 8741da177e4SLinus Torvalds */ 8751da177e4SLinus Torvalds int 8761da177e4SLinus Torvalds xfs_iread( 8771da177e4SLinus Torvalds xfs_mount_t *mp, 8781da177e4SLinus Torvalds xfs_trans_t *tp, 8791da177e4SLinus Torvalds xfs_ino_t ino, 8801da177e4SLinus Torvalds xfs_inode_t **ipp, 881745b1f47SNathan Scott xfs_daddr_t bno, 882745b1f47SNathan Scott uint imap_flags) 8831da177e4SLinus Torvalds { 8841da177e4SLinus Torvalds xfs_buf_t *bp; 8851da177e4SLinus Torvalds xfs_dinode_t *dip; 8861da177e4SLinus Torvalds xfs_inode_t *ip; 8871da177e4SLinus Torvalds int error; 8881da177e4SLinus Torvalds 88907c8f675SDavid Chinner ip = xfs_inode_alloc(mp, ino); 89007c8f675SDavid Chinner if (!ip) 89107c8f675SDavid Chinner return ENOMEM; 8921da177e4SLinus Torvalds 8931da177e4SLinus Torvalds /* 8941da177e4SLinus Torvalds * Get pointer's to the on-disk inode and the buffer containing it. 8951da177e4SLinus Torvalds * If the inode number refers to a block outside the file system 8961da177e4SLinus Torvalds * then xfs_itobp() will return NULL. In this case we should 8971da177e4SLinus Torvalds * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 8981da177e4SLinus Torvalds * know that this is a new incore inode. 8991da177e4SLinus Torvalds */ 900a3f74ffbSDavid Chinner error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); 9019ed0451eSChristoph Hellwig if (error) 9029ed0451eSChristoph Hellwig goto out_destroy_inode; 9031da177e4SLinus Torvalds 9041da177e4SLinus Torvalds /* 9051da177e4SLinus Torvalds * If we got something that isn't an inode it means someone 9061da177e4SLinus Torvalds * (nfs or dmi) has a stale handle. 9071da177e4SLinus Torvalds */ 908347d1c01SChristoph Hellwig if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { 9091da177e4SLinus Torvalds #ifdef DEBUG 9101da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 9111da177e4SLinus Torvalds "dip->di_core.di_magic (0x%x) != " 9121da177e4SLinus Torvalds "XFS_DINODE_MAGIC (0x%x)", 913347d1c01SChristoph Hellwig be16_to_cpu(dip->di_core.di_magic), 9141da177e4SLinus Torvalds XFS_DINODE_MAGIC); 9151da177e4SLinus Torvalds #endif /* DEBUG */ 9169ed0451eSChristoph Hellwig error = XFS_ERROR(EINVAL); 9179ed0451eSChristoph Hellwig goto out_brelse; 9181da177e4SLinus Torvalds } 9191da177e4SLinus Torvalds 9201da177e4SLinus Torvalds /* 9211da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 9221da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 9231da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 9241da177e4SLinus Torvalds * specific information. 9251da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 9261da177e4SLinus Torvalds */ 9271da177e4SLinus Torvalds if (dip->di_core.di_mode) { 928347d1c01SChristoph Hellwig xfs_dinode_from_disk(&ip->i_d, &dip->di_core); 9291da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 9301da177e4SLinus Torvalds if (error) { 9311da177e4SLinus Torvalds #ifdef DEBUG 9321da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 9331da177e4SLinus Torvalds "xfs_iformat() returned error %d", 9341da177e4SLinus Torvalds error); 9351da177e4SLinus Torvalds #endif /* DEBUG */ 9369ed0451eSChristoph Hellwig goto out_brelse; 9371da177e4SLinus Torvalds } 9381da177e4SLinus Torvalds } else { 939347d1c01SChristoph Hellwig ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); 940347d1c01SChristoph Hellwig ip->i_d.di_version = dip->di_core.di_version; 941347d1c01SChristoph Hellwig ip->i_d.di_gen = be32_to_cpu(dip->di_core.di_gen); 942347d1c01SChristoph Hellwig ip->i_d.di_flushiter = be16_to_cpu(dip->di_core.di_flushiter); 9431da177e4SLinus Torvalds /* 9441da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 9451da177e4SLinus Torvalds * case the inode is released without being used. 9461da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 9471da177e4SLinus Torvalds * the inode is already free and not try to mess 9481da177e4SLinus Torvalds * with the uninitialized part of it. 9491da177e4SLinus Torvalds */ 9501da177e4SLinus Torvalds ip->i_d.di_mode = 0; 9511da177e4SLinus Torvalds /* 9521da177e4SLinus Torvalds * Initialize the per-fork minima and maxima for a new 9531da177e4SLinus Torvalds * inode here. xfs_iformat will do it for old inodes. 9541da177e4SLinus Torvalds */ 9551da177e4SLinus Torvalds ip->i_df.if_ext_max = 9561da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 9571da177e4SLinus Torvalds } 9581da177e4SLinus Torvalds 9591da177e4SLinus Torvalds /* 9601da177e4SLinus Torvalds * The inode format changed when we moved the link count and 9611da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 9621da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 9631da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 9641da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 9651da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 9661da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 9671da177e4SLinus Torvalds * the new format. We don't change the version number so that we 9681da177e4SLinus Torvalds * can distinguish this from a real new format inode. 9691da177e4SLinus Torvalds */ 9701da177e4SLinus Torvalds if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 9711da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 9721da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 9731da177e4SLinus Torvalds ip->i_d.di_projid = 0; 9741da177e4SLinus Torvalds } 9751da177e4SLinus Torvalds 9761da177e4SLinus Torvalds ip->i_delayed_blks = 0; 977ba87ea69SLachlan McIlroy ip->i_size = ip->i_d.di_size; 9781da177e4SLinus Torvalds 9791da177e4SLinus Torvalds /* 9801da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 9811da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 9821da177e4SLinus Torvalds * meta-data in-core longer. 9831da177e4SLinus Torvalds */ 9841da177e4SLinus Torvalds XFS_BUF_SET_REF(bp, XFS_INO_REF); 9851da177e4SLinus Torvalds 9861da177e4SLinus Torvalds /* 9871da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 9881da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 9891da177e4SLinus Torvalds * in xfs_itobp() above. If tp is NULL, this is just a normal 9901da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 9911da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 9921da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 9931da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 9941da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 9951da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 9961da177e4SLinus Torvalds * to worry about the inode being changed just because we released 9971da177e4SLinus Torvalds * the buffer. 9981da177e4SLinus Torvalds */ 9991da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 10001da177e4SLinus Torvalds *ipp = ip; 10011da177e4SLinus Torvalds return 0; 10029ed0451eSChristoph Hellwig 10039ed0451eSChristoph Hellwig out_brelse: 10049ed0451eSChristoph Hellwig xfs_trans_brelse(tp, bp); 10059ed0451eSChristoph Hellwig out_destroy_inode: 10069ed0451eSChristoph Hellwig xfs_destroy_inode(ip); 10079ed0451eSChristoph Hellwig return error; 10081da177e4SLinus Torvalds } 10091da177e4SLinus Torvalds 10101da177e4SLinus Torvalds /* 10111da177e4SLinus Torvalds * Read in extents from a btree-format inode. 10121da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 10131da177e4SLinus Torvalds */ 10141da177e4SLinus Torvalds int 10151da177e4SLinus Torvalds xfs_iread_extents( 10161da177e4SLinus Torvalds xfs_trans_t *tp, 10171da177e4SLinus Torvalds xfs_inode_t *ip, 10181da177e4SLinus Torvalds int whichfork) 10191da177e4SLinus Torvalds { 10201da177e4SLinus Torvalds int error; 10211da177e4SLinus Torvalds xfs_ifork_t *ifp; 10224eea22f0SMandy Kirkconnell xfs_extnum_t nextents; 10231da177e4SLinus Torvalds size_t size; 10241da177e4SLinus Torvalds 10251da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 10261da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 10271da177e4SLinus Torvalds ip->i_mount); 10281da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 10291da177e4SLinus Torvalds } 10304eea22f0SMandy Kirkconnell nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 10314eea22f0SMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 10321da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 10334eea22f0SMandy Kirkconnell 10341da177e4SLinus Torvalds /* 10351da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 10361da177e4SLinus Torvalds */ 10371da177e4SLinus Torvalds ifp->if_lastex = NULLEXTNUM; 10384eea22f0SMandy Kirkconnell ifp->if_bytes = ifp->if_real_bytes = 0; 10391da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 10404eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nextents); 10411da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 10421da177e4SLinus Torvalds if (error) { 10434eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 10441da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 10451da177e4SLinus Torvalds return error; 10461da177e4SLinus Torvalds } 1047a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 10481da177e4SLinus Torvalds return 0; 10491da177e4SLinus Torvalds } 10501da177e4SLinus Torvalds 10511da177e4SLinus Torvalds /* 10521da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 10531da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 10541da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 10551da177e4SLinus Torvalds * set according to the contents of the given cred structure. 10561da177e4SLinus Torvalds * 10571da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 10581da177e4SLinus Torvalds * has a free inode available, call xfs_iget() 10591da177e4SLinus Torvalds * to obtain the in-core version of the allocated inode. Finally, 10601da177e4SLinus Torvalds * fill in the inode and log its initial contents. In this case, 10611da177e4SLinus Torvalds * ialloc_context would be set to NULL and call_again set to false. 10621da177e4SLinus Torvalds * 10631da177e4SLinus Torvalds * If xfs_dialloc() does not have an available inode, 10641da177e4SLinus Torvalds * it will replenish its supply by doing an allocation. Since we can 10651da177e4SLinus Torvalds * only do one allocation within a transaction without deadlocks, we 10661da177e4SLinus Torvalds * must commit the current transaction before returning the inode itself. 10671da177e4SLinus Torvalds * In this case, therefore, we will set call_again to true and return. 10681da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 10691da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 10701da177e4SLinus Torvalds * 10711da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 10721da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 10731da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 10741da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 10751da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 1076b11f94d5SDavid Chinner * 1077b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 1078b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 1079b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 1080b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 10811da177e4SLinus Torvalds */ 10821da177e4SLinus Torvalds int 10831da177e4SLinus Torvalds xfs_ialloc( 10841da177e4SLinus Torvalds xfs_trans_t *tp, 10851da177e4SLinus Torvalds xfs_inode_t *pip, 10861da177e4SLinus Torvalds mode_t mode, 108731b084aeSNathan Scott xfs_nlink_t nlink, 10881da177e4SLinus Torvalds xfs_dev_t rdev, 10891da177e4SLinus Torvalds cred_t *cr, 10901da177e4SLinus Torvalds xfs_prid_t prid, 10911da177e4SLinus Torvalds int okalloc, 10921da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 10931da177e4SLinus Torvalds boolean_t *call_again, 10941da177e4SLinus Torvalds xfs_inode_t **ipp) 10951da177e4SLinus Torvalds { 10961da177e4SLinus Torvalds xfs_ino_t ino; 10971da177e4SLinus Torvalds xfs_inode_t *ip; 10981da177e4SLinus Torvalds uint flags; 10991da177e4SLinus Torvalds int error; 1100dff35fd4SChristoph Hellwig timespec_t tv; 1101bf904248SDavid Chinner int filestreams = 0; 11021da177e4SLinus Torvalds 11031da177e4SLinus Torvalds /* 11041da177e4SLinus Torvalds * Call the space management code to pick 11051da177e4SLinus Torvalds * the on-disk inode to be allocated. 11061da177e4SLinus Torvalds */ 1107b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 11081da177e4SLinus Torvalds ialloc_context, call_again, &ino); 1109bf904248SDavid Chinner if (error) 11101da177e4SLinus Torvalds return error; 11111da177e4SLinus Torvalds if (*call_again || ino == NULLFSINO) { 11121da177e4SLinus Torvalds *ipp = NULL; 11131da177e4SLinus Torvalds return 0; 11141da177e4SLinus Torvalds } 11151da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 11161da177e4SLinus Torvalds 11171da177e4SLinus Torvalds /* 11181da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 11191da177e4SLinus Torvalds * This is because we're setting fields here we need 11201da177e4SLinus Torvalds * to prevent others from looking at until we're done. 11211da177e4SLinus Torvalds */ 11221da177e4SLinus Torvalds error = xfs_trans_iget(tp->t_mountp, tp, ino, 1123745b1f47SNathan Scott XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1124bf904248SDavid Chinner if (error) 11251da177e4SLinus Torvalds return error; 11261da177e4SLinus Torvalds ASSERT(ip != NULL); 11271da177e4SLinus Torvalds 11281da177e4SLinus Torvalds ip->i_d.di_mode = (__uint16_t)mode; 11291da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 11301da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 11311da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 11329e2b2dc4SDavid Howells ip->i_d.di_uid = current_fsuid(); 11339e2b2dc4SDavid Howells ip->i_d.di_gid = current_fsgid(); 11341da177e4SLinus Torvalds ip->i_d.di_projid = prid; 11351da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 11361da177e4SLinus Torvalds 11371da177e4SLinus Torvalds /* 11381da177e4SLinus Torvalds * If the superblock version is up to where we support new format 11391da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 11401da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 11411da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 11421da177e4SLinus Torvalds */ 114362118709SEric Sandeen if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 11441da177e4SLinus Torvalds ip->i_d.di_version == XFS_DINODE_VERSION_1) { 11451da177e4SLinus Torvalds ip->i_d.di_version = XFS_DINODE_VERSION_2; 11461da177e4SLinus Torvalds /* 11471da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 11481da177e4SLinus Torvalds * and the pad field. 11491da177e4SLinus Torvalds */ 11501da177e4SLinus Torvalds } 11511da177e4SLinus Torvalds 11521da177e4SLinus Torvalds /* 11531da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 11541da177e4SLinus Torvalds */ 11551da177e4SLinus Torvalds if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 11561da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 11571da177e4SLinus Torvalds 1158bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 11591da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 11601da177e4SLinus Torvalds if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 11611da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 11621da177e4SLinus Torvalds } 11631da177e4SLinus Torvalds } 11641da177e4SLinus Torvalds 11651da177e4SLinus Torvalds /* 11661da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 11671da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 11681da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 11691da177e4SLinus Torvalds */ 11701da177e4SLinus Torvalds if ((irix_sgid_inherit) && 11711da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 11721da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 11731da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 11741da177e4SLinus Torvalds } 11751da177e4SLinus Torvalds 11761da177e4SLinus Torvalds ip->i_d.di_size = 0; 1177ba87ea69SLachlan McIlroy ip->i_size = 0; 11781da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 11791da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 1180dff35fd4SChristoph Hellwig 1181dff35fd4SChristoph Hellwig nanotime(&tv); 1182dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1183dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1184dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 1185dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 1186dff35fd4SChristoph Hellwig 11871da177e4SLinus Torvalds /* 11881da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 11891da177e4SLinus Torvalds */ 11901da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 11911da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 11921da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 11931da177e4SLinus Torvalds ip->i_d.di_flags = 0; 11941da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 11951da177e4SLinus Torvalds switch (mode & S_IFMT) { 11961da177e4SLinus Torvalds case S_IFIFO: 11971da177e4SLinus Torvalds case S_IFCHR: 11981da177e4SLinus Torvalds case S_IFBLK: 11991da177e4SLinus Torvalds case S_IFSOCK: 12001da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 12011da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 12021da177e4SLinus Torvalds ip->i_df.if_flags = 0; 12031da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 12041da177e4SLinus Torvalds break; 12051da177e4SLinus Torvalds case S_IFREG: 1206bf904248SDavid Chinner /* 1207bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 1208bf904248SDavid Chinner * is set up properly. 1209bf904248SDavid Chinner */ 1210bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 1211bf904248SDavid Chinner filestreams = 1; 12122a82b8beSDavid Chinner /* fall through */ 12131da177e4SLinus Torvalds case S_IFDIR: 1214b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1215365ca83dSNathan Scott uint di_flags = 0; 1216365ca83dSNathan Scott 12171da177e4SLinus Torvalds if ((mode & S_IFMT) == S_IFDIR) { 1218365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1219365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 1220dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1221dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1222dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1223dd9f438eSNathan Scott } 1224dd9f438eSNathan Scott } else if ((mode & S_IFMT) == S_IFREG) { 1225613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1226365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 1227dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1228dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 1229dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1230dd9f438eSNathan Scott } 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 12331da177e4SLinus Torvalds xfs_inherit_noatime) 1234365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 12351da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 12361da177e4SLinus Torvalds xfs_inherit_nodump) 1237365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 12381da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 12391da177e4SLinus Torvalds xfs_inherit_sync) 1240365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 12411da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 12421da177e4SLinus Torvalds xfs_inherit_nosymlinks) 1243365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 1244365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1245365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 1246d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1247d3446eacSBarry Naujok xfs_inherit_nodefrag) 1248d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 12492a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 12502a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 1251365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 12521da177e4SLinus Torvalds } 12531da177e4SLinus Torvalds /* FALLTHROUGH */ 12541da177e4SLinus Torvalds case S_IFLNK: 12551da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 12561da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 12571da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 12581da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 12591da177e4SLinus Torvalds break; 12601da177e4SLinus Torvalds default: 12611da177e4SLinus Torvalds ASSERT(0); 12621da177e4SLinus Torvalds } 12631da177e4SLinus Torvalds /* 12641da177e4SLinus Torvalds * Attribute fork settings for new inode. 12651da177e4SLinus Torvalds */ 12661da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 12671da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 12681da177e4SLinus Torvalds 12691da177e4SLinus Torvalds /* 12701da177e4SLinus Torvalds * Log the new values stuffed into the inode. 12711da177e4SLinus Torvalds */ 12721da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 12731da177e4SLinus Torvalds 1274b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 127541be8bedSChristoph Hellwig xfs_setup_inode(ip); 12761da177e4SLinus Torvalds 1277bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 1278bf904248SDavid Chinner if (filestreams) { 1279bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 1280bf904248SDavid Chinner if (error < 0) 1281bf904248SDavid Chinner return -error; 1282bf904248SDavid Chinner if (!error) 1283bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 1284bf904248SDavid Chinner } 1285bf904248SDavid Chinner 12861da177e4SLinus Torvalds *ipp = ip; 12871da177e4SLinus Torvalds return 0; 12881da177e4SLinus Torvalds } 12891da177e4SLinus Torvalds 12901da177e4SLinus Torvalds /* 12911da177e4SLinus Torvalds * Check to make sure that there are no blocks allocated to the 12921da177e4SLinus Torvalds * file beyond the size of the file. We don't check this for 12931da177e4SLinus Torvalds * files with fixed size extents or real time extents, but we 12941da177e4SLinus Torvalds * at least do it for regular files. 12951da177e4SLinus Torvalds */ 12961da177e4SLinus Torvalds #ifdef DEBUG 12971da177e4SLinus Torvalds void 12981da177e4SLinus Torvalds xfs_isize_check( 12991da177e4SLinus Torvalds xfs_mount_t *mp, 13001da177e4SLinus Torvalds xfs_inode_t *ip, 13011da177e4SLinus Torvalds xfs_fsize_t isize) 13021da177e4SLinus Torvalds { 13031da177e4SLinus Torvalds xfs_fileoff_t map_first; 13041da177e4SLinus Torvalds int nimaps; 13051da177e4SLinus Torvalds xfs_bmbt_irec_t imaps[2]; 13061da177e4SLinus Torvalds 13071da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 13081da177e4SLinus Torvalds return; 13091da177e4SLinus Torvalds 131071ddabb9SEric Sandeen if (XFS_IS_REALTIME_INODE(ip)) 131171ddabb9SEric Sandeen return; 131271ddabb9SEric Sandeen 131371ddabb9SEric Sandeen if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 13141da177e4SLinus Torvalds return; 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds nimaps = 2; 13171da177e4SLinus Torvalds map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 13181da177e4SLinus Torvalds /* 13191da177e4SLinus Torvalds * The filesystem could be shutting down, so bmapi may return 13201da177e4SLinus Torvalds * an error. 13211da177e4SLinus Torvalds */ 13221da177e4SLinus Torvalds if (xfs_bmapi(NULL, ip, map_first, 13231da177e4SLinus Torvalds (XFS_B_TO_FSB(mp, 13241da177e4SLinus Torvalds (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 13251da177e4SLinus Torvalds map_first), 13261da177e4SLinus Torvalds XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 13273e57ecf6SOlaf Weber NULL, NULL)) 13281da177e4SLinus Torvalds return; 13291da177e4SLinus Torvalds ASSERT(nimaps == 1); 13301da177e4SLinus Torvalds ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 13311da177e4SLinus Torvalds } 13321da177e4SLinus Torvalds #endif /* DEBUG */ 13331da177e4SLinus Torvalds 13341da177e4SLinus Torvalds /* 13351da177e4SLinus Torvalds * Calculate the last possible buffered byte in a file. This must 13361da177e4SLinus Torvalds * include data that was buffered beyond the EOF by the write code. 13371da177e4SLinus Torvalds * This also needs to deal with overflowing the xfs_fsize_t type 13381da177e4SLinus Torvalds * which can happen for sizes near the limit. 13391da177e4SLinus Torvalds * 13401da177e4SLinus Torvalds * We also need to take into account any blocks beyond the EOF. It 13411da177e4SLinus Torvalds * may be the case that they were buffered by a write which failed. 13421da177e4SLinus Torvalds * In that case the pages will still be in memory, but the inode size 13431da177e4SLinus Torvalds * will never have been updated. 13441da177e4SLinus Torvalds */ 13451da177e4SLinus Torvalds xfs_fsize_t 13461da177e4SLinus Torvalds xfs_file_last_byte( 13471da177e4SLinus Torvalds xfs_inode_t *ip) 13481da177e4SLinus Torvalds { 13491da177e4SLinus Torvalds xfs_mount_t *mp; 13501da177e4SLinus Torvalds xfs_fsize_t last_byte; 13511da177e4SLinus Torvalds xfs_fileoff_t last_block; 13521da177e4SLinus Torvalds xfs_fileoff_t size_last_block; 13531da177e4SLinus Torvalds int error; 13541da177e4SLinus Torvalds 1355579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)); 13561da177e4SLinus Torvalds 13571da177e4SLinus Torvalds mp = ip->i_mount; 13581da177e4SLinus Torvalds /* 13591da177e4SLinus Torvalds * Only check for blocks beyond the EOF if the extents have 13601da177e4SLinus Torvalds * been read in. This eliminates the need for the inode lock, 13611da177e4SLinus Torvalds * and it also saves us from looking when it really isn't 13621da177e4SLinus Torvalds * necessary. 13631da177e4SLinus Torvalds */ 13641da177e4SLinus Torvalds if (ip->i_df.if_flags & XFS_IFEXTENTS) { 13651da177e4SLinus Torvalds error = xfs_bmap_last_offset(NULL, ip, &last_block, 13661da177e4SLinus Torvalds XFS_DATA_FORK); 13671da177e4SLinus Torvalds if (error) { 13681da177e4SLinus Torvalds last_block = 0; 13691da177e4SLinus Torvalds } 13701da177e4SLinus Torvalds } else { 13711da177e4SLinus Torvalds last_block = 0; 13721da177e4SLinus Torvalds } 1373ba87ea69SLachlan McIlroy size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size); 13741da177e4SLinus Torvalds last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 13751da177e4SLinus Torvalds 13761da177e4SLinus Torvalds last_byte = XFS_FSB_TO_B(mp, last_block); 13771da177e4SLinus Torvalds if (last_byte < 0) { 13781da177e4SLinus Torvalds return XFS_MAXIOFFSET(mp); 13791da177e4SLinus Torvalds } 13801da177e4SLinus Torvalds last_byte += (1 << mp->m_writeio_log); 13811da177e4SLinus Torvalds if (last_byte < 0) { 13821da177e4SLinus Torvalds return XFS_MAXIOFFSET(mp); 13831da177e4SLinus Torvalds } 13841da177e4SLinus Torvalds return last_byte; 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds #if defined(XFS_RW_TRACE) 13881da177e4SLinus Torvalds STATIC void 13891da177e4SLinus Torvalds xfs_itrunc_trace( 13901da177e4SLinus Torvalds int tag, 13911da177e4SLinus Torvalds xfs_inode_t *ip, 13921da177e4SLinus Torvalds int flag, 13931da177e4SLinus Torvalds xfs_fsize_t new_size, 13941da177e4SLinus Torvalds xfs_off_t toss_start, 13951da177e4SLinus Torvalds xfs_off_t toss_finish) 13961da177e4SLinus Torvalds { 13971da177e4SLinus Torvalds if (ip->i_rwtrace == NULL) { 13981da177e4SLinus Torvalds return; 13991da177e4SLinus Torvalds } 14001da177e4SLinus Torvalds 14011da177e4SLinus Torvalds ktrace_enter(ip->i_rwtrace, 14021da177e4SLinus Torvalds (void*)((long)tag), 14031da177e4SLinus Torvalds (void*)ip, 14041da177e4SLinus Torvalds (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), 14051da177e4SLinus Torvalds (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), 14061da177e4SLinus Torvalds (void*)((long)flag), 14071da177e4SLinus Torvalds (void*)(unsigned long)((new_size >> 32) & 0xffffffff), 14081da177e4SLinus Torvalds (void*)(unsigned long)(new_size & 0xffffffff), 14091da177e4SLinus Torvalds (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), 14101da177e4SLinus Torvalds (void*)(unsigned long)(toss_start & 0xffffffff), 14111da177e4SLinus Torvalds (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), 14121da177e4SLinus Torvalds (void*)(unsigned long)(toss_finish & 0xffffffff), 14131da177e4SLinus Torvalds (void*)(unsigned long)current_cpu(), 1414f1fdc848SYingping Lu (void*)(unsigned long)current_pid(), 1415f1fdc848SYingping Lu (void*)NULL, 1416f1fdc848SYingping Lu (void*)NULL, 1417f1fdc848SYingping Lu (void*)NULL); 14181da177e4SLinus Torvalds } 14191da177e4SLinus Torvalds #else 14201da177e4SLinus Torvalds #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) 14211da177e4SLinus Torvalds #endif 14221da177e4SLinus Torvalds 14231da177e4SLinus Torvalds /* 14241da177e4SLinus Torvalds * Start the truncation of the file to new_size. The new size 14251da177e4SLinus Torvalds * must be smaller than the current size. This routine will 14261da177e4SLinus Torvalds * clear the buffer and page caches of file data in the removed 14271da177e4SLinus Torvalds * range, and xfs_itruncate_finish() will remove the underlying 14281da177e4SLinus Torvalds * disk blocks. 14291da177e4SLinus Torvalds * 14301da177e4SLinus Torvalds * The inode must have its I/O lock locked EXCLUSIVELY, and it 14311da177e4SLinus Torvalds * must NOT have the inode lock held at all. This is because we're 14321da177e4SLinus Torvalds * calling into the buffer/page cache code and we can't hold the 14331da177e4SLinus Torvalds * inode lock when we do so. 14341da177e4SLinus Torvalds * 143538e2299aSDavid Chinner * We need to wait for any direct I/Os in flight to complete before we 143638e2299aSDavid Chinner * proceed with the truncate. This is needed to prevent the extents 143738e2299aSDavid Chinner * being read or written by the direct I/Os from being removed while the 143838e2299aSDavid Chinner * I/O is in flight as there is no other method of synchronising 143938e2299aSDavid Chinner * direct I/O with the truncate operation. Also, because we hold 144038e2299aSDavid Chinner * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 144138e2299aSDavid Chinner * started until the truncate completes and drops the lock. Essentially, 144238e2299aSDavid Chinner * the vn_iowait() call forms an I/O barrier that provides strict ordering 144338e2299aSDavid Chinner * between direct I/Os and the truncate operation. 144438e2299aSDavid Chinner * 14451da177e4SLinus Torvalds * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 14461da177e4SLinus Torvalds * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 14471da177e4SLinus Torvalds * in the case that the caller is locking things out of order and 14481da177e4SLinus Torvalds * may not be able to call xfs_itruncate_finish() with the inode lock 14491da177e4SLinus Torvalds * held without dropping the I/O lock. If the caller must drop the 14501da177e4SLinus Torvalds * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() 14511da177e4SLinus Torvalds * must be called again with all the same restrictions as the initial 14521da177e4SLinus Torvalds * call. 14531da177e4SLinus Torvalds */ 1454d3cf2094SLachlan McIlroy int 14551da177e4SLinus Torvalds xfs_itruncate_start( 14561da177e4SLinus Torvalds xfs_inode_t *ip, 14571da177e4SLinus Torvalds uint flags, 14581da177e4SLinus Torvalds xfs_fsize_t new_size) 14591da177e4SLinus Torvalds { 14601da177e4SLinus Torvalds xfs_fsize_t last_byte; 14611da177e4SLinus Torvalds xfs_off_t toss_start; 14621da177e4SLinus Torvalds xfs_mount_t *mp; 1463d3cf2094SLachlan McIlroy int error = 0; 14641da177e4SLinus Torvalds 1465579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1466ba87ea69SLachlan McIlroy ASSERT((new_size == 0) || (new_size <= ip->i_size)); 14671da177e4SLinus Torvalds ASSERT((flags == XFS_ITRUNC_DEFINITE) || 14681da177e4SLinus Torvalds (flags == XFS_ITRUNC_MAYBE)); 14691da177e4SLinus Torvalds 14701da177e4SLinus Torvalds mp = ip->i_mount; 14719fa8046fSYingping Lu 1472c734c79bSLachlan McIlroy /* wait for the completion of any pending DIOs */ 1473d112f298SLachlan McIlroy if (new_size == 0 || new_size < ip->i_size) 1474c734c79bSLachlan McIlroy vn_iowait(ip); 14759fa8046fSYingping Lu 14761da177e4SLinus Torvalds /* 147767fcaa73SNathan Scott * Call toss_pages or flushinval_pages to get rid of pages 14781da177e4SLinus Torvalds * overlapping the region being removed. We have to use 147967fcaa73SNathan Scott * the less efficient flushinval_pages in the case that the 14801da177e4SLinus Torvalds * caller may not be able to finish the truncate without 14811da177e4SLinus Torvalds * dropping the inode's I/O lock. Make sure 14821da177e4SLinus Torvalds * to catch any pages brought in by buffers overlapping 14831da177e4SLinus Torvalds * the EOF by searching out beyond the isize by our 14841da177e4SLinus Torvalds * block size. We round new_size up to a block boundary 14851da177e4SLinus Torvalds * so that we don't toss things on the same block as 14861da177e4SLinus Torvalds * new_size but before it. 14871da177e4SLinus Torvalds * 148867fcaa73SNathan Scott * Before calling toss_page or flushinval_pages, make sure to 14891da177e4SLinus Torvalds * call remapf() over the same region if the file is mapped. 14901da177e4SLinus Torvalds * This frees up mapped file references to the pages in the 149167fcaa73SNathan Scott * given range and for the flushinval_pages case it ensures 14921da177e4SLinus Torvalds * that we get the latest mapped changes flushed out. 14931da177e4SLinus Torvalds */ 14941da177e4SLinus Torvalds toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 14951da177e4SLinus Torvalds toss_start = XFS_FSB_TO_B(mp, toss_start); 14961da177e4SLinus Torvalds if (toss_start < 0) { 14971da177e4SLinus Torvalds /* 14981da177e4SLinus Torvalds * The place to start tossing is beyond our maximum 14991da177e4SLinus Torvalds * file size, so there is no way that the data extended 15001da177e4SLinus Torvalds * out there. 15011da177e4SLinus Torvalds */ 1502d3cf2094SLachlan McIlroy return 0; 15031da177e4SLinus Torvalds } 15041da177e4SLinus Torvalds last_byte = xfs_file_last_byte(ip); 15051da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 15061da177e4SLinus Torvalds last_byte); 15071da177e4SLinus Torvalds if (last_byte > toss_start) { 15081da177e4SLinus Torvalds if (flags & XFS_ITRUNC_DEFINITE) { 1509739bfb2aSChristoph Hellwig xfs_tosspages(ip, toss_start, 1510739bfb2aSChristoph Hellwig -1, FI_REMAPF_LOCKED); 15111da177e4SLinus Torvalds } else { 1512739bfb2aSChristoph Hellwig error = xfs_flushinval_pages(ip, toss_start, 1513739bfb2aSChristoph Hellwig -1, FI_REMAPF_LOCKED); 15141da177e4SLinus Torvalds } 15151da177e4SLinus Torvalds } 15161da177e4SLinus Torvalds 15171da177e4SLinus Torvalds #ifdef DEBUG 15181da177e4SLinus Torvalds if (new_size == 0) { 1519df80c933SChristoph Hellwig ASSERT(VN_CACHED(VFS_I(ip)) == 0); 15201da177e4SLinus Torvalds } 15211da177e4SLinus Torvalds #endif 1522d3cf2094SLachlan McIlroy return error; 15231da177e4SLinus Torvalds } 15241da177e4SLinus Torvalds 15251da177e4SLinus Torvalds /* 1526f6485057SDavid Chinner * Shrink the file to the given new_size. The new size must be smaller than 1527f6485057SDavid Chinner * the current size. This will free up the underlying blocks in the removed 1528f6485057SDavid Chinner * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). 15291da177e4SLinus Torvalds * 1530f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1531f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1532f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1533f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1534f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1535f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1536f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1537f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1538f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 15391da177e4SLinus Torvalds * 1540f6485057SDavid Chinner * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it 1541f6485057SDavid Chinner * indicates the fork which is to be truncated. For the attribute fork we only 1542f6485057SDavid Chinner * support truncation to size 0. 15431da177e4SLinus Torvalds * 1544f6485057SDavid Chinner * We use the sync parameter to indicate whether or not the first transaction 1545f6485057SDavid Chinner * we perform might have to be synchronous. For the attr fork, it needs to be 1546f6485057SDavid Chinner * so if the unlink of the inode is not yet known to be permanent in the log. 1547f6485057SDavid Chinner * This keeps us from freeing and reusing the blocks of the attribute fork 1548f6485057SDavid Chinner * before the unlink of the inode becomes permanent. 15491da177e4SLinus Torvalds * 1550f6485057SDavid Chinner * For the data fork, we normally have to run synchronously if we're being 1551f6485057SDavid Chinner * called out of the inactive path or we're being called out of the create path 1552f6485057SDavid Chinner * where we're truncating an existing file. Either way, the truncate needs to 1553f6485057SDavid Chinner * be sync so blocks don't reappear in the file with altered data in case of a 1554f6485057SDavid Chinner * crash. wsync filesystems can run the first case async because anything that 1555f6485057SDavid Chinner * shrinks the inode has to run sync so by the time we're called here from 1556f6485057SDavid Chinner * inactive, the inode size is permanently set to 0. 15571da177e4SLinus Torvalds * 1558f6485057SDavid Chinner * Calls from the truncate path always need to be sync unless we're in a wsync 1559f6485057SDavid Chinner * filesystem and the file has already been unlinked. 15601da177e4SLinus Torvalds * 1561f6485057SDavid Chinner * The caller is responsible for correctly setting the sync parameter. It gets 1562f6485057SDavid Chinner * too hard for us to guess here which path we're being called out of just 1563f6485057SDavid Chinner * based on inode state. 1564f6485057SDavid Chinner * 1565f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1566f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1567f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1568f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1569f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 15701da177e4SLinus Torvalds */ 15711da177e4SLinus Torvalds int 15721da177e4SLinus Torvalds xfs_itruncate_finish( 15731da177e4SLinus Torvalds xfs_trans_t **tp, 15741da177e4SLinus Torvalds xfs_inode_t *ip, 15751da177e4SLinus Torvalds xfs_fsize_t new_size, 15761da177e4SLinus Torvalds int fork, 15771da177e4SLinus Torvalds int sync) 15781da177e4SLinus Torvalds { 15791da177e4SLinus Torvalds xfs_fsblock_t first_block; 15801da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 15811da177e4SLinus Torvalds xfs_fileoff_t last_block; 15821da177e4SLinus Torvalds xfs_filblks_t unmap_len=0; 15831da177e4SLinus Torvalds xfs_mount_t *mp; 15841da177e4SLinus Torvalds xfs_trans_t *ntp; 15851da177e4SLinus Torvalds int done; 15861da177e4SLinus Torvalds int committed; 15871da177e4SLinus Torvalds xfs_bmap_free_t free_list; 15881da177e4SLinus Torvalds int error; 15891da177e4SLinus Torvalds 1590579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 1591ba87ea69SLachlan McIlroy ASSERT((new_size == 0) || (new_size <= ip->i_size)); 15921da177e4SLinus Torvalds ASSERT(*tp != NULL); 15931da177e4SLinus Torvalds ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 15941da177e4SLinus Torvalds ASSERT(ip->i_transp == *tp); 15951da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 15961da177e4SLinus Torvalds ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 15971da177e4SLinus Torvalds 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds ntp = *tp; 16001da177e4SLinus Torvalds mp = (ntp)->t_mountp; 16011da177e4SLinus Torvalds ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds /* 16041da177e4SLinus Torvalds * We only support truncating the entire attribute fork. 16051da177e4SLinus Torvalds */ 16061da177e4SLinus Torvalds if (fork == XFS_ATTR_FORK) { 16071da177e4SLinus Torvalds new_size = 0LL; 16081da177e4SLinus Torvalds } 16091da177e4SLinus Torvalds first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 16101da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 16111da177e4SLinus Torvalds /* 16121da177e4SLinus Torvalds * The first thing we do is set the size to new_size permanently 16131da177e4SLinus Torvalds * on disk. This way we don't have to worry about anyone ever 16141da177e4SLinus Torvalds * being able to look at the data being freed even in the face 16151da177e4SLinus Torvalds * of a crash. What we're getting around here is the case where 16161da177e4SLinus Torvalds * we free a block, it is allocated to another file, it is written 16171da177e4SLinus Torvalds * to, and then we crash. If the new data gets written to the 16181da177e4SLinus Torvalds * file but the log buffers containing the free and reallocation 16191da177e4SLinus Torvalds * don't, then we'd end up with garbage in the blocks being freed. 16201da177e4SLinus Torvalds * As long as we make the new_size permanent before actually 16211da177e4SLinus Torvalds * freeing any blocks it doesn't matter if they get writtten to. 16221da177e4SLinus Torvalds * 16231da177e4SLinus Torvalds * The callers must signal into us whether or not the size 16241da177e4SLinus Torvalds * setting here must be synchronous. There are a few cases 16251da177e4SLinus Torvalds * where it doesn't have to be synchronous. Those cases 16261da177e4SLinus Torvalds * occur if the file is unlinked and we know the unlink is 16271da177e4SLinus Torvalds * permanent or if the blocks being truncated are guaranteed 16281da177e4SLinus Torvalds * to be beyond the inode eof (regardless of the link count) 16291da177e4SLinus Torvalds * and the eof value is permanent. Both of these cases occur 16301da177e4SLinus Torvalds * only on wsync-mounted filesystems. In those cases, we're 16311da177e4SLinus Torvalds * guaranteed that no user will ever see the data in the blocks 16321da177e4SLinus Torvalds * that are being truncated so the truncate can run async. 16331da177e4SLinus Torvalds * In the free beyond eof case, the file may wind up with 16341da177e4SLinus Torvalds * more blocks allocated to it than it needs if we crash 16351da177e4SLinus Torvalds * and that won't get fixed until the next time the file 16361da177e4SLinus Torvalds * is re-opened and closed but that's ok as that shouldn't 16371da177e4SLinus Torvalds * be too many blocks. 16381da177e4SLinus Torvalds * 16391da177e4SLinus Torvalds * However, we can't just make all wsync xactions run async 16401da177e4SLinus Torvalds * because there's one call out of the create path that needs 16411da177e4SLinus Torvalds * to run sync where it's truncating an existing file to size 16421da177e4SLinus Torvalds * 0 whose size is > 0. 16431da177e4SLinus Torvalds * 16441da177e4SLinus Torvalds * It's probably possible to come up with a test in this 16451da177e4SLinus Torvalds * routine that would correctly distinguish all the above 16461da177e4SLinus Torvalds * cases from the values of the function parameters and the 16471da177e4SLinus Torvalds * inode state but for sanity's sake, I've decided to let the 16481da177e4SLinus Torvalds * layers above just tell us. It's simpler to correctly figure 16491da177e4SLinus Torvalds * out in the layer above exactly under what conditions we 16501da177e4SLinus Torvalds * can run async and I think it's easier for others read and 16511da177e4SLinus Torvalds * follow the logic in case something has to be changed. 16521da177e4SLinus Torvalds * cscope is your friend -- rcc. 16531da177e4SLinus Torvalds * 16541da177e4SLinus Torvalds * The attribute fork is much simpler. 16551da177e4SLinus Torvalds * 16561da177e4SLinus Torvalds * For the attribute fork we allow the caller to tell us whether 16571da177e4SLinus Torvalds * the unlink of the inode that led to this call is yet permanent 16581da177e4SLinus Torvalds * in the on disk log. If it is not and we will be freeing extents 16591da177e4SLinus Torvalds * in this inode then we make the first transaction synchronous 16601da177e4SLinus Torvalds * to make sure that the unlink is permanent by the time we free 16611da177e4SLinus Torvalds * the blocks. 16621da177e4SLinus Torvalds */ 16631da177e4SLinus Torvalds if (fork == XFS_DATA_FORK) { 16641da177e4SLinus Torvalds if (ip->i_d.di_nextents > 0) { 1665ba87ea69SLachlan McIlroy /* 1666ba87ea69SLachlan McIlroy * If we are not changing the file size then do 1667ba87ea69SLachlan McIlroy * not update the on-disk file size - we may be 1668ba87ea69SLachlan McIlroy * called from xfs_inactive_free_eofblocks(). If we 1669ba87ea69SLachlan McIlroy * update the on-disk file size and then the system 1670ba87ea69SLachlan McIlroy * crashes before the contents of the file are 1671ba87ea69SLachlan McIlroy * flushed to disk then the files may be full of 1672ba87ea69SLachlan McIlroy * holes (ie NULL files bug). 1673ba87ea69SLachlan McIlroy */ 1674ba87ea69SLachlan McIlroy if (ip->i_size != new_size) { 16751da177e4SLinus Torvalds ip->i_d.di_size = new_size; 1676ba87ea69SLachlan McIlroy ip->i_size = new_size; 16771da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 16781da177e4SLinus Torvalds } 1679ba87ea69SLachlan McIlroy } 16801da177e4SLinus Torvalds } else if (sync) { 16811da177e4SLinus Torvalds ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 16821da177e4SLinus Torvalds if (ip->i_d.di_anextents > 0) 16831da177e4SLinus Torvalds xfs_trans_set_sync(ntp); 16841da177e4SLinus Torvalds } 16851da177e4SLinus Torvalds ASSERT(fork == XFS_DATA_FORK || 16861da177e4SLinus Torvalds (fork == XFS_ATTR_FORK && 16871da177e4SLinus Torvalds ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || 16881da177e4SLinus Torvalds (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); 16891da177e4SLinus Torvalds 16901da177e4SLinus Torvalds /* 16911da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 16921da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 16931da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 16941da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 16951da177e4SLinus Torvalds * possible file size. If the first block to be removed is 16961da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 16971da177e4SLinus Torvalds * then there is nothing to do. 16981da177e4SLinus Torvalds */ 16991da177e4SLinus Torvalds last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 17001da177e4SLinus Torvalds ASSERT(first_unmap_block <= last_block); 17011da177e4SLinus Torvalds done = 0; 17021da177e4SLinus Torvalds if (last_block == first_unmap_block) { 17031da177e4SLinus Torvalds done = 1; 17041da177e4SLinus Torvalds } else { 17051da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 17061da177e4SLinus Torvalds } 17071da177e4SLinus Torvalds while (!done) { 17081da177e4SLinus Torvalds /* 17091da177e4SLinus Torvalds * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() 17101da177e4SLinus Torvalds * will tell us whether it freed the entire range or 17111da177e4SLinus Torvalds * not. If this is a synchronous mount (wsync), 17121da177e4SLinus Torvalds * then we can tell bunmapi to keep all the 17131da177e4SLinus Torvalds * transactions asynchronous since the unlink 17141da177e4SLinus Torvalds * transaction that made this inode inactive has 17151da177e4SLinus Torvalds * already hit the disk. There's no danger of 17161da177e4SLinus Torvalds * the freed blocks being reused, there being a 17171da177e4SLinus Torvalds * crash, and the reused blocks suddenly reappearing 17181da177e4SLinus Torvalds * in this file with garbage in them once recovery 17191da177e4SLinus Torvalds * runs. 17201da177e4SLinus Torvalds */ 17211da177e4SLinus Torvalds XFS_BMAP_INIT(&free_list, &first_block); 1722541d7d3cSLachlan McIlroy error = xfs_bunmapi(ntp, ip, 17233e57ecf6SOlaf Weber first_unmap_block, unmap_len, 17241da177e4SLinus Torvalds XFS_BMAPI_AFLAG(fork) | 17251da177e4SLinus Torvalds (sync ? 0 : XFS_BMAPI_ASYNC), 17261da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 17273e57ecf6SOlaf Weber &first_block, &free_list, 17283e57ecf6SOlaf Weber NULL, &done); 17291da177e4SLinus Torvalds if (error) { 17301da177e4SLinus Torvalds /* 17311da177e4SLinus Torvalds * If the bunmapi call encounters an error, 17321da177e4SLinus Torvalds * return to the caller where the transaction 17331da177e4SLinus Torvalds * can be properly aborted. We just need to 17341da177e4SLinus Torvalds * make sure we're not holding any resources 17351da177e4SLinus Torvalds * that we were not when we came in. 17361da177e4SLinus Torvalds */ 17371da177e4SLinus Torvalds xfs_bmap_cancel(&free_list); 17381da177e4SLinus Torvalds return error; 17391da177e4SLinus Torvalds } 17401da177e4SLinus Torvalds 17411da177e4SLinus Torvalds /* 17421da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 17431da177e4SLinus Torvalds * reservation and commit the old transaction. 17441da177e4SLinus Torvalds */ 1745f7c99b6fSEric Sandeen error = xfs_bmap_finish(tp, &free_list, &committed); 17461da177e4SLinus Torvalds ntp = *tp; 1747f6485057SDavid Chinner if (committed) { 1748f6485057SDavid Chinner /* link the inode into the next xact in the chain */ 1749f6485057SDavid Chinner xfs_trans_ijoin(ntp, ip, 1750f6485057SDavid Chinner XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1751f6485057SDavid Chinner xfs_trans_ihold(ntp, ip); 1752f6485057SDavid Chinner } 1753f6485057SDavid Chinner 17541da177e4SLinus Torvalds if (error) { 17551da177e4SLinus Torvalds /* 1756f6485057SDavid Chinner * If the bmap finish call encounters an error, return 1757f6485057SDavid Chinner * to the caller where the transaction can be properly 1758f6485057SDavid Chinner * aborted. We just need to make sure we're not 1759f6485057SDavid Chinner * holding any resources that we were not when we came 1760f6485057SDavid Chinner * in. 17611da177e4SLinus Torvalds * 1762f6485057SDavid Chinner * Aborting from this point might lose some blocks in 1763f6485057SDavid Chinner * the file system, but oh well. 17641da177e4SLinus Torvalds */ 17651da177e4SLinus Torvalds xfs_bmap_cancel(&free_list); 17661da177e4SLinus Torvalds return error; 17671da177e4SLinus Torvalds } 17681da177e4SLinus Torvalds 17691da177e4SLinus Torvalds if (committed) { 17701da177e4SLinus Torvalds /* 1771f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1772e5720eecSDavid Chinner * moved forward in the log as part of every commit. 17731da177e4SLinus Torvalds */ 17741da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 17751da177e4SLinus Torvalds } 1776f6485057SDavid Chinner 17771da177e4SLinus Torvalds ntp = xfs_trans_dup(ntp); 1778e5720eecSDavid Chinner error = xfs_trans_commit(*tp, 0); 17791da177e4SLinus Torvalds *tp = ntp; 1780f6485057SDavid Chinner 1781f6485057SDavid Chinner /* link the inode into the next transaction in the chain */ 1782f6485057SDavid Chinner xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1783f6485057SDavid Chinner xfs_trans_ihold(ntp, ip); 1784f6485057SDavid Chinner 1785cc09c0dcSDave Chinner if (error) 1786cc09c0dcSDave Chinner return error; 1787cc09c0dcSDave Chinner /* 1788cc09c0dcSDave Chinner * transaction commit worked ok so we can drop the extra ticket 1789cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1790cc09c0dcSDave Chinner */ 1791cc09c0dcSDave Chinner xfs_log_ticket_put(ntp->t_ticket); 1792f6485057SDavid Chinner error = xfs_trans_reserve(ntp, 0, 1793f6485057SDavid Chinner XFS_ITRUNCATE_LOG_RES(mp), 0, 17941da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 17951da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 17961da177e4SLinus Torvalds if (error) 1797f6485057SDavid Chinner return error; 17981da177e4SLinus Torvalds } 17991da177e4SLinus Torvalds /* 18001da177e4SLinus Torvalds * Only update the size in the case of the data fork, but 18011da177e4SLinus Torvalds * always re-log the inode so that our permanent transaction 18021da177e4SLinus Torvalds * can keep on rolling it forward in the log. 18031da177e4SLinus Torvalds */ 18041da177e4SLinus Torvalds if (fork == XFS_DATA_FORK) { 18051da177e4SLinus Torvalds xfs_isize_check(mp, ip, new_size); 1806ba87ea69SLachlan McIlroy /* 1807ba87ea69SLachlan McIlroy * If we are not changing the file size then do 1808ba87ea69SLachlan McIlroy * not update the on-disk file size - we may be 1809ba87ea69SLachlan McIlroy * called from xfs_inactive_free_eofblocks(). If we 1810ba87ea69SLachlan McIlroy * update the on-disk file size and then the system 1811ba87ea69SLachlan McIlroy * crashes before the contents of the file are 1812ba87ea69SLachlan McIlroy * flushed to disk then the files may be full of 1813ba87ea69SLachlan McIlroy * holes (ie NULL files bug). 1814ba87ea69SLachlan McIlroy */ 1815ba87ea69SLachlan McIlroy if (ip->i_size != new_size) { 18161da177e4SLinus Torvalds ip->i_d.di_size = new_size; 1817ba87ea69SLachlan McIlroy ip->i_size = new_size; 1818ba87ea69SLachlan McIlroy } 18191da177e4SLinus Torvalds } 18201da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 18211da177e4SLinus Torvalds ASSERT((new_size != 0) || 18221da177e4SLinus Torvalds (fork == XFS_ATTR_FORK) || 18231da177e4SLinus Torvalds (ip->i_delayed_blks == 0)); 18241da177e4SLinus Torvalds ASSERT((new_size != 0) || 18251da177e4SLinus Torvalds (fork == XFS_ATTR_FORK) || 18261da177e4SLinus Torvalds (ip->i_d.di_nextents == 0)); 18271da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 18281da177e4SLinus Torvalds return 0; 18291da177e4SLinus Torvalds } 18301da177e4SLinus Torvalds 18311da177e4SLinus Torvalds /* 18321da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 18331da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 18341da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 18351da177e4SLinus Torvalds */ 18361da177e4SLinus Torvalds int 18371da177e4SLinus Torvalds xfs_iunlink( 18381da177e4SLinus Torvalds xfs_trans_t *tp, 18391da177e4SLinus Torvalds xfs_inode_t *ip) 18401da177e4SLinus Torvalds { 18411da177e4SLinus Torvalds xfs_mount_t *mp; 18421da177e4SLinus Torvalds xfs_agi_t *agi; 18431da177e4SLinus Torvalds xfs_dinode_t *dip; 18441da177e4SLinus Torvalds xfs_buf_t *agibp; 18451da177e4SLinus Torvalds xfs_buf_t *ibp; 18461da177e4SLinus Torvalds xfs_agino_t agino; 18471da177e4SLinus Torvalds short bucket_index; 18481da177e4SLinus Torvalds int offset; 18491da177e4SLinus Torvalds int error; 18501da177e4SLinus Torvalds 18511da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 18521da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 18531da177e4SLinus Torvalds ASSERT(ip->i_transp == tp); 18541da177e4SLinus Torvalds 18551da177e4SLinus Torvalds mp = tp->t_mountp; 18561da177e4SLinus Torvalds 18571da177e4SLinus Torvalds /* 18581da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 18591da177e4SLinus Torvalds * on the list. 18601da177e4SLinus Torvalds */ 1861*5e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1862859d7182SVlad Apostolov if (error) 18631da177e4SLinus Torvalds return error; 18641da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 1865*5e1be0fbSChristoph Hellwig 18661da177e4SLinus Torvalds /* 18671da177e4SLinus Torvalds * Get the index into the agi hash table for the 18681da177e4SLinus Torvalds * list this inode will go on. 18691da177e4SLinus Torvalds */ 18701da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 18711da177e4SLinus Torvalds ASSERT(agino != 0); 18721da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 18731da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 187416259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 18751da177e4SLinus Torvalds 187616259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { 18771da177e4SLinus Torvalds /* 18781da177e4SLinus Torvalds * There is already another inode in the bucket we need 18791da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 18801da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 18811da177e4SLinus Torvalds * and then we fall through to point the head at us. 18821da177e4SLinus Torvalds */ 1883a3f74ffbSDavid Chinner error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1884c319b58bSVlad Apostolov if (error) 1885c319b58bSVlad Apostolov return error; 1886c319b58bSVlad Apostolov 1887347d1c01SChristoph Hellwig ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); 18881da177e4SLinus Torvalds /* both on-disk, don't endian flip twice */ 18891da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 18901da177e4SLinus Torvalds offset = ip->i_boffset + 18911da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 18921da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 18931da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 18941da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 18951da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 18961da177e4SLinus Torvalds } 18971da177e4SLinus Torvalds 18981da177e4SLinus Torvalds /* 18991da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 19001da177e4SLinus Torvalds */ 19011da177e4SLinus Torvalds ASSERT(agino != 0); 190216259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 19031da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 19041da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 19051da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 19061da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19071da177e4SLinus Torvalds return 0; 19081da177e4SLinus Torvalds } 19091da177e4SLinus Torvalds 19101da177e4SLinus Torvalds /* 19111da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 19121da177e4SLinus Torvalds */ 19131da177e4SLinus Torvalds STATIC int 19141da177e4SLinus Torvalds xfs_iunlink_remove( 19151da177e4SLinus Torvalds xfs_trans_t *tp, 19161da177e4SLinus Torvalds xfs_inode_t *ip) 19171da177e4SLinus Torvalds { 19181da177e4SLinus Torvalds xfs_ino_t next_ino; 19191da177e4SLinus Torvalds xfs_mount_t *mp; 19201da177e4SLinus Torvalds xfs_agi_t *agi; 19211da177e4SLinus Torvalds xfs_dinode_t *dip; 19221da177e4SLinus Torvalds xfs_buf_t *agibp; 19231da177e4SLinus Torvalds xfs_buf_t *ibp; 19241da177e4SLinus Torvalds xfs_agnumber_t agno; 19251da177e4SLinus Torvalds xfs_agino_t agino; 19261da177e4SLinus Torvalds xfs_agino_t next_agino; 19271da177e4SLinus Torvalds xfs_buf_t *last_ibp; 19286fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 19291da177e4SLinus Torvalds short bucket_index; 19306fdf8cccSNathan Scott int offset, last_offset = 0; 19311da177e4SLinus Torvalds int error; 19321da177e4SLinus Torvalds 19331da177e4SLinus Torvalds mp = tp->t_mountp; 19341da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 19351da177e4SLinus Torvalds 19361da177e4SLinus Torvalds /* 19371da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 19381da177e4SLinus Torvalds * on the list. 19391da177e4SLinus Torvalds */ 1940*5e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 1941*5e1be0fbSChristoph Hellwig if (error) 19421da177e4SLinus Torvalds return error; 1943*5e1be0fbSChristoph Hellwig 19441da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 1945*5e1be0fbSChristoph Hellwig 19461da177e4SLinus Torvalds /* 19471da177e4SLinus Torvalds * Get the index into the agi hash table for the 19481da177e4SLinus Torvalds * list this inode will go on. 19491da177e4SLinus Torvalds */ 19501da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19511da177e4SLinus Torvalds ASSERT(agino != 0); 19521da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 195316259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); 19541da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 19551da177e4SLinus Torvalds 195616259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 19571da177e4SLinus Torvalds /* 19581da177e4SLinus Torvalds * We're at the head of the list. Get the inode's 19591da177e4SLinus Torvalds * on-disk buffer to see if there is anyone after us 19601da177e4SLinus Torvalds * on the list. Only modify our next pointer if it 19611da177e4SLinus Torvalds * is not already NULLAGINO. This saves us the overhead 19621da177e4SLinus Torvalds * of dealing with the buffer when there is no need to 19631da177e4SLinus Torvalds * change it. 19641da177e4SLinus Torvalds */ 1965a3f74ffbSDavid Chinner error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 19661da177e4SLinus Torvalds if (error) { 19671da177e4SLinus Torvalds cmn_err(CE_WARN, 19681da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 19691da177e4SLinus Torvalds error, mp->m_fsname); 19701da177e4SLinus Torvalds return error; 19711da177e4SLinus Torvalds } 1972347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 19731da177e4SLinus Torvalds ASSERT(next_agino != 0); 19741da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1975347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 19761da177e4SLinus Torvalds offset = ip->i_boffset + 19771da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 19781da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 19791da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 19801da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19811da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 19821da177e4SLinus Torvalds } else { 19831da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 19841da177e4SLinus Torvalds } 19851da177e4SLinus Torvalds /* 19861da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 19871da177e4SLinus Torvalds */ 19881da177e4SLinus Torvalds ASSERT(next_agino != 0); 19891da177e4SLinus Torvalds ASSERT(next_agino != agino); 199016259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 19911da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 19921da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 19931da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 19941da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19951da177e4SLinus Torvalds } else { 19961da177e4SLinus Torvalds /* 19971da177e4SLinus Torvalds * We need to search the list for the inode being freed. 19981da177e4SLinus Torvalds */ 199916259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 20001da177e4SLinus Torvalds last_ibp = NULL; 20011da177e4SLinus Torvalds while (next_agino != agino) { 20021da177e4SLinus Torvalds /* 20031da177e4SLinus Torvalds * If the last inode wasn't the one pointing to 20041da177e4SLinus Torvalds * us, then release its buffer since we're not 20051da177e4SLinus Torvalds * going to do anything with it. 20061da177e4SLinus Torvalds */ 20071da177e4SLinus Torvalds if (last_ibp != NULL) { 20081da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 20091da177e4SLinus Torvalds } 20101da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 20111da177e4SLinus Torvalds error = xfs_inotobp(mp, tp, next_ino, &last_dip, 2012c679eef0SChristoph Hellwig &last_ibp, &last_offset, 0); 20131da177e4SLinus Torvalds if (error) { 20141da177e4SLinus Torvalds cmn_err(CE_WARN, 20151da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 20161da177e4SLinus Torvalds error, mp->m_fsname); 20171da177e4SLinus Torvalds return error; 20181da177e4SLinus Torvalds } 2019347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 20201da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 20211da177e4SLinus Torvalds ASSERT(next_agino != 0); 20221da177e4SLinus Torvalds } 20231da177e4SLinus Torvalds /* 20241da177e4SLinus Torvalds * Now last_ibp points to the buffer previous to us on 20251da177e4SLinus Torvalds * the unlinked list. Pull us from the list. 20261da177e4SLinus Torvalds */ 2027a3f74ffbSDavid Chinner error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 20281da177e4SLinus Torvalds if (error) { 20291da177e4SLinus Torvalds cmn_err(CE_WARN, 20301da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 20311da177e4SLinus Torvalds error, mp->m_fsname); 20321da177e4SLinus Torvalds return error; 20331da177e4SLinus Torvalds } 2034347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 20351da177e4SLinus Torvalds ASSERT(next_agino != 0); 20361da177e4SLinus Torvalds ASSERT(next_agino != agino); 20371da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 2038347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 20391da177e4SLinus Torvalds offset = ip->i_boffset + 20401da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20411da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 20421da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 20431da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20441da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 20451da177e4SLinus Torvalds } else { 20461da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 20471da177e4SLinus Torvalds } 20481da177e4SLinus Torvalds /* 20491da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 20501da177e4SLinus Torvalds */ 2051347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 20521da177e4SLinus Torvalds ASSERT(next_agino != 0); 20531da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 20541da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 20551da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 20561da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20571da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 20581da177e4SLinus Torvalds } 20591da177e4SLinus Torvalds return 0; 20601da177e4SLinus Torvalds } 20611da177e4SLinus Torvalds 2062ba0f32d4SChristoph Hellwig STATIC void 20631da177e4SLinus Torvalds xfs_ifree_cluster( 20641da177e4SLinus Torvalds xfs_inode_t *free_ip, 20651da177e4SLinus Torvalds xfs_trans_t *tp, 20661da177e4SLinus Torvalds xfs_ino_t inum) 20671da177e4SLinus Torvalds { 20681da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 20691da177e4SLinus Torvalds int blks_per_cluster; 20701da177e4SLinus Torvalds int nbufs; 20711da177e4SLinus Torvalds int ninodes; 20721da177e4SLinus Torvalds int i, j, found, pre_flushed; 20731da177e4SLinus Torvalds xfs_daddr_t blkno; 20741da177e4SLinus Torvalds xfs_buf_t *bp; 20751da177e4SLinus Torvalds xfs_inode_t *ip, **ip_found; 20761da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 20771da177e4SLinus Torvalds xfs_log_item_t *lip; 2078da353b0dSDavid Chinner xfs_perag_t *pag = xfs_get_perag(mp, inum); 20791da177e4SLinus Torvalds 20801da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 20811da177e4SLinus Torvalds blks_per_cluster = 1; 20821da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 20831da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 20841da177e4SLinus Torvalds } else { 20851da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 20861da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 20871da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 20881da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 20891da177e4SLinus Torvalds } 20901da177e4SLinus Torvalds 20911da177e4SLinus Torvalds ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); 20921da177e4SLinus Torvalds 20931da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 20941da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 20951da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 20961da177e4SLinus Torvalds 20971da177e4SLinus Torvalds 20981da177e4SLinus Torvalds /* 20991da177e4SLinus Torvalds * Look for each inode in memory and attempt to lock it, 21001da177e4SLinus Torvalds * we can be racing with flush and tail pushing here. 21011da177e4SLinus Torvalds * any inode we get the locks on, add to an array of 21021da177e4SLinus Torvalds * inode items to process later. 21031da177e4SLinus Torvalds * 21041da177e4SLinus Torvalds * The get the buffer lock, we could beat a flush 21051da177e4SLinus Torvalds * or tail pushing thread to the lock here, in which 21061da177e4SLinus Torvalds * case they will go looking for the inode buffer 21071da177e4SLinus Torvalds * and fail, we need some other form of interlock 21081da177e4SLinus Torvalds * here. 21091da177e4SLinus Torvalds */ 21101da177e4SLinus Torvalds found = 0; 21111da177e4SLinus Torvalds for (i = 0; i < ninodes; i++) { 2112da353b0dSDavid Chinner read_lock(&pag->pag_ici_lock); 2113da353b0dSDavid Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 2114da353b0dSDavid Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 21151da177e4SLinus Torvalds 21161da177e4SLinus Torvalds /* Inode not in memory or we found it already, 21171da177e4SLinus Torvalds * nothing to do 21181da177e4SLinus Torvalds */ 21197a18c386SDavid Chinner if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { 2120da353b0dSDavid Chinner read_unlock(&pag->pag_ici_lock); 21211da177e4SLinus Torvalds continue; 21221da177e4SLinus Torvalds } 21231da177e4SLinus Torvalds 21241da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 2125da353b0dSDavid Chinner read_unlock(&pag->pag_ici_lock); 21261da177e4SLinus Torvalds continue; 21271da177e4SLinus Torvalds } 21281da177e4SLinus Torvalds 21291da177e4SLinus Torvalds /* If we can get the locks then add it to the 21301da177e4SLinus Torvalds * list, otherwise by the time we get the bp lock 21311da177e4SLinus Torvalds * below it will already be attached to the 21321da177e4SLinus Torvalds * inode buffer. 21331da177e4SLinus Torvalds */ 21341da177e4SLinus Torvalds 21351da177e4SLinus Torvalds /* This inode will already be locked - by us, lets 21361da177e4SLinus Torvalds * keep it that way. 21371da177e4SLinus Torvalds */ 21381da177e4SLinus Torvalds 21391da177e4SLinus Torvalds if (ip == free_ip) { 21401da177e4SLinus Torvalds if (xfs_iflock_nowait(ip)) { 21417a18c386SDavid Chinner xfs_iflags_set(ip, XFS_ISTALE); 21421da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 21431da177e4SLinus Torvalds xfs_ifunlock(ip); 21441da177e4SLinus Torvalds } else { 21451da177e4SLinus Torvalds ip_found[found++] = ip; 21461da177e4SLinus Torvalds } 21471da177e4SLinus Torvalds } 2148da353b0dSDavid Chinner read_unlock(&pag->pag_ici_lock); 21491da177e4SLinus Torvalds continue; 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds 21521da177e4SLinus Torvalds if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 21531da177e4SLinus Torvalds if (xfs_iflock_nowait(ip)) { 21547a18c386SDavid Chinner xfs_iflags_set(ip, XFS_ISTALE); 21551da177e4SLinus Torvalds 21561da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 21571da177e4SLinus Torvalds xfs_ifunlock(ip); 21581da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 21591da177e4SLinus Torvalds } else { 21601da177e4SLinus Torvalds ip_found[found++] = ip; 21611da177e4SLinus Torvalds } 21621da177e4SLinus Torvalds } else { 21631da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 21641da177e4SLinus Torvalds } 21651da177e4SLinus Torvalds } 2166da353b0dSDavid Chinner read_unlock(&pag->pag_ici_lock); 21671da177e4SLinus Torvalds } 21681da177e4SLinus Torvalds 21691da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 21701da177e4SLinus Torvalds mp->m_bsize * blks_per_cluster, 21711da177e4SLinus Torvalds XFS_BUF_LOCK); 21721da177e4SLinus Torvalds 21731da177e4SLinus Torvalds pre_flushed = 0; 21741da177e4SLinus Torvalds lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 21751da177e4SLinus Torvalds while (lip) { 21761da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 21771da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 21781da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 21791da177e4SLinus Torvalds lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 21807b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 21817b2e2a31SDavid Chinner &iip->ili_flush_lsn, 21827b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 2183e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 21841da177e4SLinus Torvalds pre_flushed++; 21851da177e4SLinus Torvalds } 21861da177e4SLinus Torvalds lip = lip->li_bio_list; 21871da177e4SLinus Torvalds } 21881da177e4SLinus Torvalds 21891da177e4SLinus Torvalds for (i = 0; i < found; i++) { 21901da177e4SLinus Torvalds ip = ip_found[i]; 21911da177e4SLinus Torvalds iip = ip->i_itemp; 21921da177e4SLinus Torvalds 21931da177e4SLinus Torvalds if (!iip) { 21941da177e4SLinus Torvalds ip->i_update_core = 0; 21951da177e4SLinus Torvalds xfs_ifunlock(ip); 21961da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 21971da177e4SLinus Torvalds continue; 21981da177e4SLinus Torvalds } 21991da177e4SLinus Torvalds 22001da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 22011da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 22021da177e4SLinus Torvalds iip->ili_logged = 1; 22037b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 22047b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 22051da177e4SLinus Torvalds 22061da177e4SLinus Torvalds xfs_buf_attach_iodone(bp, 22071da177e4SLinus Torvalds (void(*)(xfs_buf_t*,xfs_log_item_t*)) 22081da177e4SLinus Torvalds xfs_istale_done, (xfs_log_item_t *)iip); 22091da177e4SLinus Torvalds if (ip != free_ip) { 22101da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22111da177e4SLinus Torvalds } 22121da177e4SLinus Torvalds } 22131da177e4SLinus Torvalds 22141da177e4SLinus Torvalds if (found || pre_flushed) 22151da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 22161da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 22171da177e4SLinus Torvalds } 22181da177e4SLinus Torvalds 2219f0e2d93cSDenys Vlasenko kmem_free(ip_found); 2220da353b0dSDavid Chinner xfs_put_perag(mp, pag); 22211da177e4SLinus Torvalds } 22221da177e4SLinus Torvalds 22231da177e4SLinus Torvalds /* 22241da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 22251da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 22261da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 22271da177e4SLinus Torvalds * the inode is already a part of the transaction. 22281da177e4SLinus Torvalds * 22291da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 22301da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 22311da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 22321da177e4SLinus Torvalds */ 22331da177e4SLinus Torvalds int 22341da177e4SLinus Torvalds xfs_ifree( 22351da177e4SLinus Torvalds xfs_trans_t *tp, 22361da177e4SLinus Torvalds xfs_inode_t *ip, 22371da177e4SLinus Torvalds xfs_bmap_free_t *flist) 22381da177e4SLinus Torvalds { 22391da177e4SLinus Torvalds int error; 22401da177e4SLinus Torvalds int delete; 22411da177e4SLinus Torvalds xfs_ino_t first_ino; 2242c319b58bSVlad Apostolov xfs_dinode_t *dip; 2243c319b58bSVlad Apostolov xfs_buf_t *ibp; 22441da177e4SLinus Torvalds 2245579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 22461da177e4SLinus Torvalds ASSERT(ip->i_transp == tp); 22471da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 22481da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 22491da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 2250ba87ea69SLachlan McIlroy ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 22511da177e4SLinus Torvalds ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 22521da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 22531da177e4SLinus Torvalds 22541da177e4SLinus Torvalds /* 22551da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 22561da177e4SLinus Torvalds */ 22571da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 22581da177e4SLinus Torvalds if (error != 0) { 22591da177e4SLinus Torvalds return error; 22601da177e4SLinus Torvalds } 22611da177e4SLinus Torvalds 22621da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 22631da177e4SLinus Torvalds if (error != 0) { 22641da177e4SLinus Torvalds return error; 22651da177e4SLinus Torvalds } 22661da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 22671da177e4SLinus Torvalds ip->i_d.di_flags = 0; 22681da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 22691da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 22701da177e4SLinus Torvalds ip->i_df.if_ext_max = 22711da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 22721da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 22731da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 22741da177e4SLinus Torvalds /* 22751da177e4SLinus Torvalds * Bump the generation count so no one will be confused 22761da177e4SLinus Torvalds * by reincarnations of this inode. 22771da177e4SLinus Torvalds */ 22781da177e4SLinus Torvalds ip->i_d.di_gen++; 2279c319b58bSVlad Apostolov 22801da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 22811da177e4SLinus Torvalds 2282a3f74ffbSDavid Chinner error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2283c319b58bSVlad Apostolov if (error) 2284c319b58bSVlad Apostolov return error; 2285c319b58bSVlad Apostolov 2286c319b58bSVlad Apostolov /* 2287c319b58bSVlad Apostolov * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 2288c319b58bSVlad Apostolov * from picking up this inode when it is reclaimed (its incore state 2289c319b58bSVlad Apostolov * initialzed but not flushed to disk yet). The in-core di_mode is 2290c319b58bSVlad Apostolov * already cleared and a corresponding transaction logged. 2291c319b58bSVlad Apostolov * The hack here just synchronizes the in-core to on-disk 2292c319b58bSVlad Apostolov * di_mode value in advance before the actual inode sync to disk. 2293c319b58bSVlad Apostolov * This is OK because the inode is already unlinked and would never 2294c319b58bSVlad Apostolov * change its di_mode again for this inode generation. 2295c319b58bSVlad Apostolov * This is a temporary hack that would require a proper fix 2296c319b58bSVlad Apostolov * in the future. 2297c319b58bSVlad Apostolov */ 2298c319b58bSVlad Apostolov dip->di_core.di_mode = 0; 2299c319b58bSVlad Apostolov 23001da177e4SLinus Torvalds if (delete) { 23011da177e4SLinus Torvalds xfs_ifree_cluster(ip, tp, first_ino); 23021da177e4SLinus Torvalds } 23031da177e4SLinus Torvalds 23041da177e4SLinus Torvalds return 0; 23051da177e4SLinus Torvalds } 23061da177e4SLinus Torvalds 23071da177e4SLinus Torvalds /* 23081da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 23091da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 23101da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 23111da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 23121da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 23131da177e4SLinus Torvalds * by the caller. 23141da177e4SLinus Torvalds * 23151da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 23161da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 23171da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 23181da177e4SLinus Torvalds * not request that the number of records go below zero, although 23191da177e4SLinus Torvalds * it can go to zero. 23201da177e4SLinus Torvalds * 23211da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 23221da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 23231da177e4SLinus Torvalds * requested for the if_broot array. 23241da177e4SLinus Torvalds */ 23251da177e4SLinus Torvalds void 23261da177e4SLinus Torvalds xfs_iroot_realloc( 23271da177e4SLinus Torvalds xfs_inode_t *ip, 23281da177e4SLinus Torvalds int rec_diff, 23291da177e4SLinus Torvalds int whichfork) 23301da177e4SLinus Torvalds { 233160197e8dSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 23321da177e4SLinus Torvalds int cur_max; 23331da177e4SLinus Torvalds xfs_ifork_t *ifp; 23347cc95a82SChristoph Hellwig struct xfs_btree_block *new_broot; 23351da177e4SLinus Torvalds int new_max; 23361da177e4SLinus Torvalds size_t new_size; 23371da177e4SLinus Torvalds char *np; 23381da177e4SLinus Torvalds char *op; 23391da177e4SLinus Torvalds 23401da177e4SLinus Torvalds /* 23411da177e4SLinus Torvalds * Handle the degenerate case quietly. 23421da177e4SLinus Torvalds */ 23431da177e4SLinus Torvalds if (rec_diff == 0) { 23441da177e4SLinus Torvalds return; 23451da177e4SLinus Torvalds } 23461da177e4SLinus Torvalds 23471da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 23481da177e4SLinus Torvalds if (rec_diff > 0) { 23491da177e4SLinus Torvalds /* 23501da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 23511da177e4SLinus Torvalds * allocate it now and get out. 23521da177e4SLinus Torvalds */ 23531da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 23541da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 23557cc95a82SChristoph Hellwig ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); 23561da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 23571da177e4SLinus Torvalds return; 23581da177e4SLinus Torvalds } 23591da177e4SLinus Torvalds 23601da177e4SLinus Torvalds /* 23611da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 23621da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 23631da177e4SLinus Torvalds * location. The records don't change location because 23641da177e4SLinus Torvalds * they are kept butted up against the btree block header. 23651da177e4SLinus Torvalds */ 236660197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 23671da177e4SLinus Torvalds new_max = cur_max + rec_diff; 23681da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 23697cc95a82SChristoph Hellwig ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 23701da177e4SLinus Torvalds (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 23711da177e4SLinus Torvalds KM_SLEEP); 237260197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 23731da177e4SLinus Torvalds ifp->if_broot_bytes); 237460197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 23751da177e4SLinus Torvalds (int)new_size); 23761da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 23771da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 23781da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 23791da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 23801da177e4SLinus Torvalds return; 23811da177e4SLinus Torvalds } 23821da177e4SLinus Torvalds 23831da177e4SLinus Torvalds /* 23841da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 23851da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 23861da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 23871da177e4SLinus Torvalds */ 23881da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 238960197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 23901da177e4SLinus Torvalds new_max = cur_max + rec_diff; 23911da177e4SLinus Torvalds ASSERT(new_max >= 0); 23921da177e4SLinus Torvalds if (new_max > 0) 23931da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 23941da177e4SLinus Torvalds else 23951da177e4SLinus Torvalds new_size = 0; 23961da177e4SLinus Torvalds if (new_size > 0) { 23977cc95a82SChristoph Hellwig new_broot = kmem_alloc(new_size, KM_SLEEP); 23981da177e4SLinus Torvalds /* 23991da177e4SLinus Torvalds * First copy over the btree block header. 24001da177e4SLinus Torvalds */ 24017cc95a82SChristoph Hellwig memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 24021da177e4SLinus Torvalds } else { 24031da177e4SLinus Torvalds new_broot = NULL; 24041da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 24051da177e4SLinus Torvalds } 24061da177e4SLinus Torvalds 24071da177e4SLinus Torvalds /* 24081da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 24091da177e4SLinus Torvalds */ 24101da177e4SLinus Torvalds if (new_max > 0) { 24111da177e4SLinus Torvalds /* 24121da177e4SLinus Torvalds * First copy the records. 24131da177e4SLinus Torvalds */ 2414136341b4SChristoph Hellwig op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 2415136341b4SChristoph Hellwig np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 24161da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 24171da177e4SLinus Torvalds 24181da177e4SLinus Torvalds /* 24191da177e4SLinus Torvalds * Then copy the pointers. 24201da177e4SLinus Torvalds */ 242160197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 24221da177e4SLinus Torvalds ifp->if_broot_bytes); 242360197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 24241da177e4SLinus Torvalds (int)new_size); 24251da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 24261da177e4SLinus Torvalds } 2427f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 24281da177e4SLinus Torvalds ifp->if_broot = new_broot; 24291da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 24301da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 24311da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 24321da177e4SLinus Torvalds return; 24331da177e4SLinus Torvalds } 24341da177e4SLinus Torvalds 24351da177e4SLinus Torvalds 24361da177e4SLinus Torvalds /* 24371da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 24381da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 24391da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 24401da177e4SLinus Torvalds * byte_diff parameter. 24411da177e4SLinus Torvalds * 24421da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 24431da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 24441da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 24451da177e4SLinus Torvalds * to what is needed. 24461da177e4SLinus Torvalds * 24471da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 24481da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 24491da177e4SLinus Torvalds * requested for the if_data array. 24501da177e4SLinus Torvalds */ 24511da177e4SLinus Torvalds void 24521da177e4SLinus Torvalds xfs_idata_realloc( 24531da177e4SLinus Torvalds xfs_inode_t *ip, 24541da177e4SLinus Torvalds int byte_diff, 24551da177e4SLinus Torvalds int whichfork) 24561da177e4SLinus Torvalds { 24571da177e4SLinus Torvalds xfs_ifork_t *ifp; 24581da177e4SLinus Torvalds int new_size; 24591da177e4SLinus Torvalds int real_size; 24601da177e4SLinus Torvalds 24611da177e4SLinus Torvalds if (byte_diff == 0) { 24621da177e4SLinus Torvalds return; 24631da177e4SLinus Torvalds } 24641da177e4SLinus Torvalds 24651da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 24661da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 24671da177e4SLinus Torvalds ASSERT(new_size >= 0); 24681da177e4SLinus Torvalds 24691da177e4SLinus Torvalds if (new_size == 0) { 24701da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2471f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 24721da177e4SLinus Torvalds } 24731da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 24741da177e4SLinus Torvalds real_size = 0; 24751da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 24761da177e4SLinus Torvalds /* 24771da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 24781da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 24791da177e4SLinus Torvalds */ 24801da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 24811da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 24821da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 24831da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 24841da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 24851da177e4SLinus Torvalds new_size); 2486f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 24871da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 24881da177e4SLinus Torvalds } 24891da177e4SLinus Torvalds real_size = 0; 24901da177e4SLinus Torvalds } else { 24911da177e4SLinus Torvalds /* 24921da177e4SLinus Torvalds * Stuck with malloc/realloc. 24931da177e4SLinus Torvalds * For inline data, the underlying buffer must be 24941da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 24951da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 24961da177e4SLinus Torvalds * that here. 24971da177e4SLinus Torvalds */ 24981da177e4SLinus Torvalds real_size = roundup(new_size, 4); 24991da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 25001da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 25011da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 25021da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 25031da177e4SLinus Torvalds /* 25041da177e4SLinus Torvalds * Only do the realloc if the underlying size 25051da177e4SLinus Torvalds * is really changing. 25061da177e4SLinus Torvalds */ 25071da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 25081da177e4SLinus Torvalds ifp->if_u1.if_data = 25091da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 25101da177e4SLinus Torvalds real_size, 25111da177e4SLinus Torvalds ifp->if_real_bytes, 25121da177e4SLinus Torvalds KM_SLEEP); 25131da177e4SLinus Torvalds } 25141da177e4SLinus Torvalds } else { 25151da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 25161da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 25171da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 25181da177e4SLinus Torvalds ifp->if_bytes); 25191da177e4SLinus Torvalds } 25201da177e4SLinus Torvalds } 25211da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 25221da177e4SLinus Torvalds ifp->if_bytes = new_size; 25231da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 25241da177e4SLinus Torvalds } 25251da177e4SLinus Torvalds 25261da177e4SLinus Torvalds 25271da177e4SLinus Torvalds 25281da177e4SLinus Torvalds 25291da177e4SLinus Torvalds /* 25301da177e4SLinus Torvalds * Map inode to disk block and offset. 25311da177e4SLinus Torvalds * 25321da177e4SLinus Torvalds * mp -- the mount point structure for the current file system 25331da177e4SLinus Torvalds * tp -- the current transaction 25341da177e4SLinus Torvalds * ino -- the inode number of the inode to be located 25351da177e4SLinus Torvalds * imap -- this structure is filled in with the information necessary 25361da177e4SLinus Torvalds * to retrieve the given inode from disk 25371da177e4SLinus Torvalds * flags -- flags to pass to xfs_dilocate indicating whether or not 25381da177e4SLinus Torvalds * lookups in the inode btree were OK or not 25391da177e4SLinus Torvalds */ 25401da177e4SLinus Torvalds int 25411da177e4SLinus Torvalds xfs_imap( 25421da177e4SLinus Torvalds xfs_mount_t *mp, 25431da177e4SLinus Torvalds xfs_trans_t *tp, 25441da177e4SLinus Torvalds xfs_ino_t ino, 25451da177e4SLinus Torvalds xfs_imap_t *imap, 25461da177e4SLinus Torvalds uint flags) 25471da177e4SLinus Torvalds { 25481da177e4SLinus Torvalds xfs_fsblock_t fsbno; 25491da177e4SLinus Torvalds int len; 25501da177e4SLinus Torvalds int off; 25511da177e4SLinus Torvalds int error; 25521da177e4SLinus Torvalds 25531da177e4SLinus Torvalds fsbno = imap->im_blkno ? 25541da177e4SLinus Torvalds XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 25551da177e4SLinus Torvalds error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 25564ae29b43SDavid Chinner if (error) 25571da177e4SLinus Torvalds return error; 25584ae29b43SDavid Chinner 25591da177e4SLinus Torvalds imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 25601da177e4SLinus Torvalds imap->im_len = XFS_FSB_TO_BB(mp, len); 25611da177e4SLinus Torvalds imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 25621da177e4SLinus Torvalds imap->im_ioffset = (ushort)off; 25631da177e4SLinus Torvalds imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 25644ae29b43SDavid Chinner 25654ae29b43SDavid Chinner /* 25664ae29b43SDavid Chinner * If the inode number maps to a block outside the bounds 25674ae29b43SDavid Chinner * of the file system then return NULL rather than calling 25684ae29b43SDavid Chinner * read_buf and panicing when we get an error from the 25694ae29b43SDavid Chinner * driver. 25704ae29b43SDavid Chinner */ 25714ae29b43SDavid Chinner if ((imap->im_blkno + imap->im_len) > 25724ae29b43SDavid Chinner XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 25734ae29b43SDavid Chinner xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 25744ae29b43SDavid Chinner "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 25754ae29b43SDavid Chinner " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 25764ae29b43SDavid Chinner (unsigned long long) imap->im_blkno, 25774ae29b43SDavid Chinner (unsigned long long) imap->im_len, 25784ae29b43SDavid Chinner XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 25794ae29b43SDavid Chinner return EINVAL; 25804ae29b43SDavid Chinner } 25811da177e4SLinus Torvalds return 0; 25821da177e4SLinus Torvalds } 25831da177e4SLinus Torvalds 25841da177e4SLinus Torvalds void 25851da177e4SLinus Torvalds xfs_idestroy_fork( 25861da177e4SLinus Torvalds xfs_inode_t *ip, 25871da177e4SLinus Torvalds int whichfork) 25881da177e4SLinus Torvalds { 25891da177e4SLinus Torvalds xfs_ifork_t *ifp; 25901da177e4SLinus Torvalds 25911da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 25921da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 2593f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 25941da177e4SLinus Torvalds ifp->if_broot = NULL; 25951da177e4SLinus Torvalds } 25961da177e4SLinus Torvalds 25971da177e4SLinus Torvalds /* 25981da177e4SLinus Torvalds * If the format is local, then we can't have an extents 25991da177e4SLinus Torvalds * array so just look for an inline data array. If we're 26001da177e4SLinus Torvalds * not local then we may or may not have an extents list, 26011da177e4SLinus Torvalds * so check and free it up if we do. 26021da177e4SLinus Torvalds */ 26031da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 26041da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 26051da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 26061da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 2607f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 26081da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 26091da177e4SLinus Torvalds ifp->if_real_bytes = 0; 26101da177e4SLinus Torvalds } 26111da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 26120293ce3aSMandy Kirkconnell ((ifp->if_flags & XFS_IFEXTIREC) || 26130293ce3aSMandy Kirkconnell ((ifp->if_u1.if_extents != NULL) && 26140293ce3aSMandy Kirkconnell (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 26151da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 26164eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 26171da177e4SLinus Torvalds } 26181da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 26191da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 26201da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 26211da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 26221da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 26231da177e4SLinus Torvalds ip->i_afp = NULL; 26241da177e4SLinus Torvalds } 26251da177e4SLinus Torvalds } 26261da177e4SLinus Torvalds 26271da177e4SLinus Torvalds /* 26281da177e4SLinus Torvalds * This is called free all the memory associated with an inode. 26291da177e4SLinus Torvalds * It must free the inode itself and any buffers allocated for 26301da177e4SLinus Torvalds * if_extents/if_data and if_broot. It must also free the lock 26311da177e4SLinus Torvalds * associated with the inode. 2632bf904248SDavid Chinner * 2633bf904248SDavid Chinner * Note: because we don't initialise everything on reallocation out 2634bf904248SDavid Chinner * of the zone, we must ensure we nullify everything correctly before 2635bf904248SDavid Chinner * freeing the structure. 26361da177e4SLinus Torvalds */ 26371da177e4SLinus Torvalds void 26381da177e4SLinus Torvalds xfs_idestroy( 26391da177e4SLinus Torvalds xfs_inode_t *ip) 26401da177e4SLinus Torvalds { 26411da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 26421da177e4SLinus Torvalds case S_IFREG: 26431da177e4SLinus Torvalds case S_IFDIR: 26441da177e4SLinus Torvalds case S_IFLNK: 26451da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 26461da177e4SLinus Torvalds break; 26471da177e4SLinus Torvalds } 26481da177e4SLinus Torvalds if (ip->i_afp) 26491da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_ATTR_FORK); 26501543d79cSChristoph Hellwig 2651cf441eebSLachlan McIlroy #ifdef XFS_INODE_TRACE 26521543d79cSChristoph Hellwig ktrace_free(ip->i_trace); 26531543d79cSChristoph Hellwig #endif 26541da177e4SLinus Torvalds #ifdef XFS_BMAP_TRACE 26551da177e4SLinus Torvalds ktrace_free(ip->i_xtrace); 26561da177e4SLinus Torvalds #endif 26578c4ed633SChristoph Hellwig #ifdef XFS_BTREE_TRACE 26581da177e4SLinus Torvalds ktrace_free(ip->i_btrace); 26591da177e4SLinus Torvalds #endif 26601da177e4SLinus Torvalds #ifdef XFS_RW_TRACE 26611da177e4SLinus Torvalds ktrace_free(ip->i_rwtrace); 26621da177e4SLinus Torvalds #endif 26631da177e4SLinus Torvalds #ifdef XFS_ILOCK_TRACE 26641da177e4SLinus Torvalds ktrace_free(ip->i_lock_trace); 26651da177e4SLinus Torvalds #endif 26661da177e4SLinus Torvalds #ifdef XFS_DIR2_TRACE 26671da177e4SLinus Torvalds ktrace_free(ip->i_dir_trace); 26681da177e4SLinus Torvalds #endif 26691da177e4SLinus Torvalds if (ip->i_itemp) { 2670f74eaf59SDavid Chinner /* 2671f74eaf59SDavid Chinner * Only if we are shutting down the fs will we see an 2672f74eaf59SDavid Chinner * inode still in the AIL. If it is there, we should remove 2673f74eaf59SDavid Chinner * it to prevent a use-after-free from occurring. 2674f74eaf59SDavid Chinner */ 2675f74eaf59SDavid Chinner xfs_log_item_t *lip = &ip->i_itemp->ili_item; 2676783a2f65SDavid Chinner struct xfs_ail *ailp = lip->li_ailp; 2677f74eaf59SDavid Chinner 2678f74eaf59SDavid Chinner ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || 26791da177e4SLinus Torvalds XFS_FORCED_SHUTDOWN(ip->i_mount)); 2680f74eaf59SDavid Chinner if (lip->li_flags & XFS_LI_IN_AIL) { 2681783a2f65SDavid Chinner spin_lock(&ailp->xa_lock); 2682f74eaf59SDavid Chinner if (lip->li_flags & XFS_LI_IN_AIL) 2683783a2f65SDavid Chinner xfs_trans_ail_delete(ailp, lip); 2684f74eaf59SDavid Chinner else 2685783a2f65SDavid Chinner spin_unlock(&ailp->xa_lock); 2686f74eaf59SDavid Chinner } 26871da177e4SLinus Torvalds xfs_inode_item_destroy(ip); 268807c8f675SDavid Chinner ip->i_itemp = NULL; 26891da177e4SLinus Torvalds } 269007c8f675SDavid Chinner /* asserts to verify all state is correct here */ 269107c8f675SDavid Chinner ASSERT(atomic_read(&ip->i_iocount) == 0); 269207c8f675SDavid Chinner ASSERT(atomic_read(&ip->i_pincount) == 0); 269307c8f675SDavid Chinner ASSERT(!spin_is_locked(&ip->i_flags_lock)); 269411654513SDavid Chinner ASSERT(completion_done(&ip->i_flush)); 26951da177e4SLinus Torvalds kmem_zone_free(xfs_inode_zone, ip); 26961da177e4SLinus Torvalds } 26971da177e4SLinus Torvalds 26981da177e4SLinus Torvalds 26991da177e4SLinus Torvalds /* 27001da177e4SLinus Torvalds * Increment the pin count of the given buffer. 27011da177e4SLinus Torvalds * This value is protected by ipinlock spinlock in the mount structure. 27021da177e4SLinus Torvalds */ 27031da177e4SLinus Torvalds void 27041da177e4SLinus Torvalds xfs_ipin( 27051da177e4SLinus Torvalds xfs_inode_t *ip) 27061da177e4SLinus Torvalds { 2707579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 27081da177e4SLinus Torvalds 27091da177e4SLinus Torvalds atomic_inc(&ip->i_pincount); 27101da177e4SLinus Torvalds } 27111da177e4SLinus Torvalds 27121da177e4SLinus Torvalds /* 27131da177e4SLinus Torvalds * Decrement the pin count of the given inode, and wake up 27141da177e4SLinus Torvalds * anyone in xfs_iwait_unpin() if the count goes to 0. The 2715c41564b5SNathan Scott * inode must have been previously pinned with a call to xfs_ipin(). 27161da177e4SLinus Torvalds */ 27171da177e4SLinus Torvalds void 27181da177e4SLinus Torvalds xfs_iunpin( 27191da177e4SLinus Torvalds xfs_inode_t *ip) 27201da177e4SLinus Torvalds { 27211da177e4SLinus Torvalds ASSERT(atomic_read(&ip->i_pincount) > 0); 27221da177e4SLinus Torvalds 27235d51eff4SDavid Chinner if (atomic_dec_and_test(&ip->i_pincount)) 27241da177e4SLinus Torvalds wake_up(&ip->i_ipin_wait); 27251da177e4SLinus Torvalds } 27261da177e4SLinus Torvalds 27271da177e4SLinus Torvalds /* 2728a3f74ffbSDavid Chinner * This is called to unpin an inode. It can be directed to wait or to return 2729a3f74ffbSDavid Chinner * immediately without waiting for the inode to be unpinned. The caller must 2730a3f74ffbSDavid Chinner * have the inode locked in at least shared mode so that the buffer cannot be 2731a3f74ffbSDavid Chinner * subsequently pinned once someone is waiting for it to be unpinned. 27321da177e4SLinus Torvalds */ 2733ba0f32d4SChristoph Hellwig STATIC void 2734a3f74ffbSDavid Chinner __xfs_iunpin_wait( 2735a3f74ffbSDavid Chinner xfs_inode_t *ip, 2736a3f74ffbSDavid Chinner int wait) 2737a3f74ffbSDavid Chinner { 2738a3f74ffbSDavid Chinner xfs_inode_log_item_t *iip = ip->i_itemp; 2739a3f74ffbSDavid Chinner 2740579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2741a3f74ffbSDavid Chinner if (atomic_read(&ip->i_pincount) == 0) 2742a3f74ffbSDavid Chinner return; 2743a3f74ffbSDavid Chinner 2744a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 2745a3f74ffbSDavid Chinner xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? 2746a3f74ffbSDavid Chinner iip->ili_last_lsn : 0, XFS_LOG_FORCE); 2747a3f74ffbSDavid Chinner if (wait) 2748a3f74ffbSDavid Chinner wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 2749a3f74ffbSDavid Chinner } 2750a3f74ffbSDavid Chinner 2751a3f74ffbSDavid Chinner static inline void 27521da177e4SLinus Torvalds xfs_iunpin_wait( 27531da177e4SLinus Torvalds xfs_inode_t *ip) 27541da177e4SLinus Torvalds { 2755a3f74ffbSDavid Chinner __xfs_iunpin_wait(ip, 1); 27561da177e4SLinus Torvalds } 27571da177e4SLinus Torvalds 2758a3f74ffbSDavid Chinner static inline void 2759a3f74ffbSDavid Chinner xfs_iunpin_nowait( 2760a3f74ffbSDavid Chinner xfs_inode_t *ip) 2761a3f74ffbSDavid Chinner { 2762a3f74ffbSDavid Chinner __xfs_iunpin_wait(ip, 0); 27631da177e4SLinus Torvalds } 27641da177e4SLinus Torvalds 27651da177e4SLinus Torvalds 27661da177e4SLinus Torvalds /* 27671da177e4SLinus Torvalds * xfs_iextents_copy() 27681da177e4SLinus Torvalds * 27691da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 27701da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 27711da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 27721da177e4SLinus Torvalds * 27731da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 27741da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 27751da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 27761da177e4SLinus Torvalds */ 27771da177e4SLinus Torvalds int 27781da177e4SLinus Torvalds xfs_iextents_copy( 27791da177e4SLinus Torvalds xfs_inode_t *ip, 2780a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp, 27811da177e4SLinus Torvalds int whichfork) 27821da177e4SLinus Torvalds { 27831da177e4SLinus Torvalds int copied; 27841da177e4SLinus Torvalds int i; 27851da177e4SLinus Torvalds xfs_ifork_t *ifp; 27861da177e4SLinus Torvalds int nrecs; 27871da177e4SLinus Torvalds xfs_fsblock_t start_block; 27881da177e4SLinus Torvalds 27891da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 2790579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 27911da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 27921da177e4SLinus Torvalds 27931da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 27943a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 27951da177e4SLinus Torvalds ASSERT(nrecs > 0); 27961da177e4SLinus Torvalds 27971da177e4SLinus Torvalds /* 27981da177e4SLinus Torvalds * There are some delayed allocation extents in the 27991da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 28001da177e4SLinus Torvalds * the delayed ones. There must be at least one 28011da177e4SLinus Torvalds * non-delayed extent. 28021da177e4SLinus Torvalds */ 28031da177e4SLinus Torvalds copied = 0; 28041da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 2805a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 28061da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 28071da177e4SLinus Torvalds if (ISNULLSTARTBLOCK(start_block)) { 28081da177e4SLinus Torvalds /* 28091da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 28101da177e4SLinus Torvalds */ 28111da177e4SLinus Torvalds continue; 28121da177e4SLinus Torvalds } 28131da177e4SLinus Torvalds 28141da177e4SLinus Torvalds /* Translate to on disk format */ 2815cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2816cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2817a6f64d4aSChristoph Hellwig dp++; 28181da177e4SLinus Torvalds copied++; 28191da177e4SLinus Torvalds } 28201da177e4SLinus Torvalds ASSERT(copied != 0); 2821a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 28221da177e4SLinus Torvalds 28231da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 28241da177e4SLinus Torvalds } 28251da177e4SLinus Torvalds 28261da177e4SLinus Torvalds /* 28271da177e4SLinus Torvalds * Each of the following cases stores data into the same region 28281da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 28291da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 28301da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 28311da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 28321da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 28331da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 28341da177e4SLinus Torvalds * format indicates the current state of the fork. 28351da177e4SLinus Torvalds */ 28361da177e4SLinus Torvalds /*ARGSUSED*/ 2837e4ac967bSDavid Chinner STATIC void 28381da177e4SLinus Torvalds xfs_iflush_fork( 28391da177e4SLinus Torvalds xfs_inode_t *ip, 28401da177e4SLinus Torvalds xfs_dinode_t *dip, 28411da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 28421da177e4SLinus Torvalds int whichfork, 28431da177e4SLinus Torvalds xfs_buf_t *bp) 28441da177e4SLinus Torvalds { 28451da177e4SLinus Torvalds char *cp; 28461da177e4SLinus Torvalds xfs_ifork_t *ifp; 28471da177e4SLinus Torvalds xfs_mount_t *mp; 28481da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 28491da177e4SLinus Torvalds int first; 28501da177e4SLinus Torvalds #endif 28511da177e4SLinus Torvalds static const short brootflag[2] = 28521da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 28531da177e4SLinus Torvalds static const short dataflag[2] = 28541da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 28551da177e4SLinus Torvalds static const short extflag[2] = 28561da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 28571da177e4SLinus Torvalds 2858e4ac967bSDavid Chinner if (!iip) 2859e4ac967bSDavid Chinner return; 28601da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 28611da177e4SLinus Torvalds /* 28621da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 28631da177e4SLinus Torvalds * for the attribute fork. 28641da177e4SLinus Torvalds */ 2865e4ac967bSDavid Chinner if (!ifp) { 28661da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 2867e4ac967bSDavid Chinner return; 28681da177e4SLinus Torvalds } 28691da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 28701da177e4SLinus Torvalds mp = ip->i_mount; 28711da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 28721da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 28731da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 28741da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 28751da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 28761da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 28771da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 28781da177e4SLinus Torvalds } 28791da177e4SLinus Torvalds break; 28801da177e4SLinus Torvalds 28811da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 28821da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 28831da177e4SLinus Torvalds !(iip->ili_format.ilf_fields & extflag[whichfork])); 28844eea22f0SMandy Kirkconnell ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || 28854eea22f0SMandy Kirkconnell (ifp->if_bytes == 0)); 28864eea22f0SMandy Kirkconnell ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || 28874eea22f0SMandy Kirkconnell (ifp->if_bytes > 0)); 28881da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 28891da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 28901da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 28911da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 28921da177e4SLinus Torvalds whichfork); 28931da177e4SLinus Torvalds } 28941da177e4SLinus Torvalds break; 28951da177e4SLinus Torvalds 28961da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 28971da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 28981da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 28991da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 29001da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 29011da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 29021da177e4SLinus Torvalds XFS_BROOT_SIZE_ADJ)); 290360197e8dSChristoph Hellwig xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 29041da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 29051da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 29061da177e4SLinus Torvalds } 29071da177e4SLinus Torvalds break; 29081da177e4SLinus Torvalds 29091da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 29101da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 29111da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 2912347d1c01SChristoph Hellwig dip->di_u.di_dev = cpu_to_be32(ip->i_df.if_u2.if_rdev); 29131da177e4SLinus Torvalds } 29141da177e4SLinus Torvalds break; 29151da177e4SLinus Torvalds 29161da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 29171da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 29181da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 29191da177e4SLinus Torvalds memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 29201da177e4SLinus Torvalds sizeof(uuid_t)); 29211da177e4SLinus Torvalds } 29221da177e4SLinus Torvalds break; 29231da177e4SLinus Torvalds 29241da177e4SLinus Torvalds default: 29251da177e4SLinus Torvalds ASSERT(0); 29261da177e4SLinus Torvalds break; 29271da177e4SLinus Torvalds } 29281da177e4SLinus Torvalds } 29291da177e4SLinus Torvalds 2930bad55843SDavid Chinner STATIC int 2931bad55843SDavid Chinner xfs_iflush_cluster( 2932bad55843SDavid Chinner xfs_inode_t *ip, 2933bad55843SDavid Chinner xfs_buf_t *bp) 2934bad55843SDavid Chinner { 2935bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 2936bad55843SDavid Chinner xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2937bad55843SDavid Chinner unsigned long first_index, mask; 2938c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2939bad55843SDavid Chinner int ilist_size; 2940bad55843SDavid Chinner xfs_inode_t **ilist; 2941bad55843SDavid Chinner xfs_inode_t *iq; 2942bad55843SDavid Chinner int nr_found; 2943bad55843SDavid Chinner int clcount = 0; 2944bad55843SDavid Chinner int bufwasdelwri; 2945bad55843SDavid Chinner int i; 2946bad55843SDavid Chinner 2947bad55843SDavid Chinner ASSERT(pag->pagi_inodeok); 2948bad55843SDavid Chinner ASSERT(pag->pag_ici_init); 2949bad55843SDavid Chinner 2950c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2951c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 295249383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2953bad55843SDavid Chinner if (!ilist) 2954bad55843SDavid Chinner return 0; 2955bad55843SDavid Chinner 2956bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2957bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 2958bad55843SDavid Chinner read_lock(&pag->pag_ici_lock); 2959bad55843SDavid Chinner /* really need a gang lookup range call here */ 2960bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2961c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2962bad55843SDavid Chinner if (nr_found == 0) 2963bad55843SDavid Chinner goto out_free; 2964bad55843SDavid Chinner 2965bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2966bad55843SDavid Chinner iq = ilist[i]; 2967bad55843SDavid Chinner if (iq == ip) 2968bad55843SDavid Chinner continue; 2969bad55843SDavid Chinner /* if the inode lies outside this cluster, we're done. */ 2970bad55843SDavid Chinner if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) 2971bad55843SDavid Chinner break; 2972bad55843SDavid Chinner /* 2973bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2974bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2975bad55843SDavid Chinner * later after the appropriate locks are acquired. 2976bad55843SDavid Chinner */ 297733540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2978bad55843SDavid Chinner continue; 2979bad55843SDavid Chinner 2980bad55843SDavid Chinner /* 2981bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2982bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2983bad55843SDavid Chinner */ 2984bad55843SDavid Chinner 2985bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2986bad55843SDavid Chinner continue; 2987bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2988bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2989bad55843SDavid Chinner continue; 2990bad55843SDavid Chinner } 2991bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2992bad55843SDavid Chinner xfs_ifunlock(iq); 2993bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2994bad55843SDavid Chinner continue; 2995bad55843SDavid Chinner } 2996bad55843SDavid Chinner 2997bad55843SDavid Chinner /* 2998bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2999bad55843SDavid Chinner * re-check that it's dirty before flushing. 3000bad55843SDavid Chinner */ 300133540408SDavid Chinner if (!xfs_inode_clean(iq)) { 3002bad55843SDavid Chinner int error; 3003bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 3004bad55843SDavid Chinner if (error) { 3005bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 3006bad55843SDavid Chinner goto cluster_corrupt_out; 3007bad55843SDavid Chinner } 3008bad55843SDavid Chinner clcount++; 3009bad55843SDavid Chinner } else { 3010bad55843SDavid Chinner xfs_ifunlock(iq); 3011bad55843SDavid Chinner } 3012bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 3013bad55843SDavid Chinner } 3014bad55843SDavid Chinner 3015bad55843SDavid Chinner if (clcount) { 3016bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 3017bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3018bad55843SDavid Chinner } 3019bad55843SDavid Chinner 3020bad55843SDavid Chinner out_free: 3021bad55843SDavid Chinner read_unlock(&pag->pag_ici_lock); 3022f0e2d93cSDenys Vlasenko kmem_free(ilist); 3023bad55843SDavid Chinner return 0; 3024bad55843SDavid Chinner 3025bad55843SDavid Chinner 3026bad55843SDavid Chinner cluster_corrupt_out: 3027bad55843SDavid Chinner /* 3028bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 3029bad55843SDavid Chinner * inode buffer and shut down the filesystem. 3030bad55843SDavid Chinner */ 3031bad55843SDavid Chinner read_unlock(&pag->pag_ici_lock); 3032bad55843SDavid Chinner /* 3033bad55843SDavid Chinner * Clean up the buffer. If it was B_DELWRI, just release it -- 3034bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 3035bad55843SDavid Chinner * filesystem before releasing the buffer. 3036bad55843SDavid Chinner */ 3037bad55843SDavid Chinner bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); 3038bad55843SDavid Chinner if (bufwasdelwri) 3039bad55843SDavid Chinner xfs_buf_relse(bp); 3040bad55843SDavid Chinner 3041bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3042bad55843SDavid Chinner 3043bad55843SDavid Chinner if (!bufwasdelwri) { 3044bad55843SDavid Chinner /* 3045bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 3046bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 3047bad55843SDavid Chinner * mark it as stale and brelse. 3048bad55843SDavid Chinner */ 3049bad55843SDavid Chinner if (XFS_BUF_IODONE_FUNC(bp)) { 3050bad55843SDavid Chinner XFS_BUF_CLR_BDSTRAT_FUNC(bp); 3051bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 3052bad55843SDavid Chinner XFS_BUF_STALE(bp); 3053bad55843SDavid Chinner XFS_BUF_SHUT(bp); 3054bad55843SDavid Chinner XFS_BUF_ERROR(bp,EIO); 3055bad55843SDavid Chinner xfs_biodone(bp); 3056bad55843SDavid Chinner } else { 3057bad55843SDavid Chinner XFS_BUF_STALE(bp); 3058bad55843SDavid Chinner xfs_buf_relse(bp); 3059bad55843SDavid Chinner } 3060bad55843SDavid Chinner } 3061bad55843SDavid Chinner 3062bad55843SDavid Chinner /* 3063bad55843SDavid Chinner * Unlocks the flush lock 3064bad55843SDavid Chinner */ 3065bad55843SDavid Chinner xfs_iflush_abort(iq); 3066f0e2d93cSDenys Vlasenko kmem_free(ilist); 3067bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 3068bad55843SDavid Chinner } 3069bad55843SDavid Chinner 30701da177e4SLinus Torvalds /* 30711da177e4SLinus Torvalds * xfs_iflush() will write a modified inode's changes out to the 30721da177e4SLinus Torvalds * inode's on disk home. The caller must have the inode lock held 3073c63942d3SDavid Chinner * in at least shared mode and the inode flush completion must be 3074c63942d3SDavid Chinner * active as well. The inode lock will still be held upon return from 30751da177e4SLinus Torvalds * the call and the caller is free to unlock it. 3076c63942d3SDavid Chinner * The inode flush will be completed when the inode reaches the disk. 30771da177e4SLinus Torvalds * The flags indicate how the inode's buffer should be written out. 30781da177e4SLinus Torvalds */ 30791da177e4SLinus Torvalds int 30801da177e4SLinus Torvalds xfs_iflush( 30811da177e4SLinus Torvalds xfs_inode_t *ip, 30821da177e4SLinus Torvalds uint flags) 30831da177e4SLinus Torvalds { 30841da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 30851da177e4SLinus Torvalds xfs_buf_t *bp; 30861da177e4SLinus Torvalds xfs_dinode_t *dip; 30871da177e4SLinus Torvalds xfs_mount_t *mp; 30881da177e4SLinus Torvalds int error; 3089a3f74ffbSDavid Chinner int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); 3090bad55843SDavid Chinner enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 30911da177e4SLinus Torvalds 30921da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 30931da177e4SLinus Torvalds 3094579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3095c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 30961da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 30971da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 30981da177e4SLinus Torvalds 30991da177e4SLinus Torvalds iip = ip->i_itemp; 31001da177e4SLinus Torvalds mp = ip->i_mount; 31011da177e4SLinus Torvalds 31021da177e4SLinus Torvalds /* 31031da177e4SLinus Torvalds * If the inode isn't dirty, then just release the inode 31041da177e4SLinus Torvalds * flush lock and do nothing. 31051da177e4SLinus Torvalds */ 310633540408SDavid Chinner if (xfs_inode_clean(ip)) { 31071da177e4SLinus Torvalds xfs_ifunlock(ip); 31081da177e4SLinus Torvalds return 0; 31091da177e4SLinus Torvalds } 31101da177e4SLinus Torvalds 31111da177e4SLinus Torvalds /* 3112a3f74ffbSDavid Chinner * We can't flush the inode until it is unpinned, so wait for it if we 3113a3f74ffbSDavid Chinner * are allowed to block. We know noone new can pin it, because we are 3114a3f74ffbSDavid Chinner * holding the inode lock shared and you need to hold it exclusively to 3115a3f74ffbSDavid Chinner * pin the inode. 3116a3f74ffbSDavid Chinner * 3117a3f74ffbSDavid Chinner * If we are not allowed to block, force the log out asynchronously so 3118a3f74ffbSDavid Chinner * that when we come back the inode will be unpinned. If other inodes 3119a3f74ffbSDavid Chinner * in the same cluster are dirty, they will probably write the inode 3120a3f74ffbSDavid Chinner * out for us if they occur after the log force completes. 31211da177e4SLinus Torvalds */ 3122a3f74ffbSDavid Chinner if (noblock && xfs_ipincount(ip)) { 3123a3f74ffbSDavid Chinner xfs_iunpin_nowait(ip); 3124a3f74ffbSDavid Chinner xfs_ifunlock(ip); 3125a3f74ffbSDavid Chinner return EAGAIN; 3126a3f74ffbSDavid Chinner } 31271da177e4SLinus Torvalds xfs_iunpin_wait(ip); 31281da177e4SLinus Torvalds 31291da177e4SLinus Torvalds /* 31301da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 31311da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 31321da177e4SLinus Torvalds * to disk, because the log record didn't make it to disk! 31331da177e4SLinus Torvalds */ 31341da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 31351da177e4SLinus Torvalds ip->i_update_core = 0; 31361da177e4SLinus Torvalds if (iip) 31371da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 31381da177e4SLinus Torvalds xfs_ifunlock(ip); 31391da177e4SLinus Torvalds return XFS_ERROR(EIO); 31401da177e4SLinus Torvalds } 31411da177e4SLinus Torvalds 31421da177e4SLinus Torvalds /* 31431da177e4SLinus Torvalds * Decide how buffer will be flushed out. This is done before 31441da177e4SLinus Torvalds * the call to xfs_iflush_int because this field is zeroed by it. 31451da177e4SLinus Torvalds */ 31461da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 31471da177e4SLinus Torvalds /* 31481da177e4SLinus Torvalds * Flush out the inode buffer according to the directions 31491da177e4SLinus Torvalds * of the caller. In the cases where the caller has given 31501da177e4SLinus Torvalds * us a choice choose the non-delwri case. This is because 31511da177e4SLinus Torvalds * the inode is in the AIL and we need to get it out soon. 31521da177e4SLinus Torvalds */ 31531da177e4SLinus Torvalds switch (flags) { 31541da177e4SLinus Torvalds case XFS_IFLUSH_SYNC: 31551da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_SYNC: 31561da177e4SLinus Torvalds flags = 0; 31571da177e4SLinus Torvalds break; 3158a3f74ffbSDavid Chinner case XFS_IFLUSH_ASYNC_NOBLOCK: 31591da177e4SLinus Torvalds case XFS_IFLUSH_ASYNC: 31601da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 31611da177e4SLinus Torvalds flags = INT_ASYNC; 31621da177e4SLinus Torvalds break; 31631da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI: 31641da177e4SLinus Torvalds flags = INT_DELWRI; 31651da177e4SLinus Torvalds break; 31661da177e4SLinus Torvalds default: 31671da177e4SLinus Torvalds ASSERT(0); 31681da177e4SLinus Torvalds flags = 0; 31691da177e4SLinus Torvalds break; 31701da177e4SLinus Torvalds } 31711da177e4SLinus Torvalds } else { 31721da177e4SLinus Torvalds switch (flags) { 31731da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_SYNC: 31741da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 31751da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI: 31761da177e4SLinus Torvalds flags = INT_DELWRI; 31771da177e4SLinus Torvalds break; 3178a3f74ffbSDavid Chinner case XFS_IFLUSH_ASYNC_NOBLOCK: 31791da177e4SLinus Torvalds case XFS_IFLUSH_ASYNC: 31801da177e4SLinus Torvalds flags = INT_ASYNC; 31811da177e4SLinus Torvalds break; 31821da177e4SLinus Torvalds case XFS_IFLUSH_SYNC: 31831da177e4SLinus Torvalds flags = 0; 31841da177e4SLinus Torvalds break; 31851da177e4SLinus Torvalds default: 31861da177e4SLinus Torvalds ASSERT(0); 31871da177e4SLinus Torvalds flags = 0; 31881da177e4SLinus Torvalds break; 31891da177e4SLinus Torvalds } 31901da177e4SLinus Torvalds } 31911da177e4SLinus Torvalds 31921da177e4SLinus Torvalds /* 3193a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 3194a3f74ffbSDavid Chinner */ 3195a3f74ffbSDavid Chinner error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, 3196a3f74ffbSDavid Chinner noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 3197a3f74ffbSDavid Chinner if (error || !bp) { 3198a3f74ffbSDavid Chinner xfs_ifunlock(ip); 3199a3f74ffbSDavid Chinner return error; 3200a3f74ffbSDavid Chinner } 3201a3f74ffbSDavid Chinner 3202a3f74ffbSDavid Chinner /* 32031da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 32041da177e4SLinus Torvalds */ 32051da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 3206bad55843SDavid Chinner if (error) 32071da177e4SLinus Torvalds goto corrupt_out; 32081da177e4SLinus Torvalds 32091da177e4SLinus Torvalds /* 3210a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 3211a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 3212a3f74ffbSDavid Chinner */ 3213a3f74ffbSDavid Chinner if (XFS_BUF_ISPINNED(bp)) 3214a3f74ffbSDavid Chinner xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 3215a3f74ffbSDavid Chinner 3216a3f74ffbSDavid Chinner /* 32171da177e4SLinus Torvalds * inode clustering: 32181da177e4SLinus Torvalds * see if other inodes can be gathered into this write 32191da177e4SLinus Torvalds */ 3220bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 3221bad55843SDavid Chinner if (error) 32221da177e4SLinus Torvalds goto cluster_corrupt_out; 32231da177e4SLinus Torvalds 32241da177e4SLinus Torvalds if (flags & INT_DELWRI) { 32251da177e4SLinus Torvalds xfs_bdwrite(mp, bp); 32261da177e4SLinus Torvalds } else if (flags & INT_ASYNC) { 3227db7a19f2SDavid Chinner error = xfs_bawrite(mp, bp); 32281da177e4SLinus Torvalds } else { 32291da177e4SLinus Torvalds error = xfs_bwrite(mp, bp); 32301da177e4SLinus Torvalds } 32311da177e4SLinus Torvalds return error; 32321da177e4SLinus Torvalds 32331da177e4SLinus Torvalds corrupt_out: 32341da177e4SLinus Torvalds xfs_buf_relse(bp); 32357d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 32361da177e4SLinus Torvalds cluster_corrupt_out: 32371da177e4SLinus Torvalds /* 32381da177e4SLinus Torvalds * Unlocks the flush lock 32391da177e4SLinus Torvalds */ 3240bad55843SDavid Chinner xfs_iflush_abort(ip); 32411da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 32421da177e4SLinus Torvalds } 32431da177e4SLinus Torvalds 32441da177e4SLinus Torvalds 32451da177e4SLinus Torvalds STATIC int 32461da177e4SLinus Torvalds xfs_iflush_int( 32471da177e4SLinus Torvalds xfs_inode_t *ip, 32481da177e4SLinus Torvalds xfs_buf_t *bp) 32491da177e4SLinus Torvalds { 32501da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 32511da177e4SLinus Torvalds xfs_dinode_t *dip; 32521da177e4SLinus Torvalds xfs_mount_t *mp; 32531da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 32541da177e4SLinus Torvalds int first; 32551da177e4SLinus Torvalds #endif 32561da177e4SLinus Torvalds 3257579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3258c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 32591da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 32601da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 32611da177e4SLinus Torvalds 32621da177e4SLinus Torvalds iip = ip->i_itemp; 32631da177e4SLinus Torvalds mp = ip->i_mount; 32641da177e4SLinus Torvalds 32651da177e4SLinus Torvalds 32661da177e4SLinus Torvalds /* 32671da177e4SLinus Torvalds * If the inode isn't dirty, then just release the inode 32681da177e4SLinus Torvalds * flush lock and do nothing. 32691da177e4SLinus Torvalds */ 327033540408SDavid Chinner if (xfs_inode_clean(ip)) { 32711da177e4SLinus Torvalds xfs_ifunlock(ip); 32721da177e4SLinus Torvalds return 0; 32731da177e4SLinus Torvalds } 32741da177e4SLinus Torvalds 32751da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 32761da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 32771da177e4SLinus Torvalds 32781da177e4SLinus Torvalds /* 32791da177e4SLinus Torvalds * Clear i_update_core before copying out the data. 32801da177e4SLinus Torvalds * This is for coordination with our timestamp updates 32811da177e4SLinus Torvalds * that don't hold the inode lock. They will always 32821da177e4SLinus Torvalds * update the timestamps BEFORE setting i_update_core, 32831da177e4SLinus Torvalds * so if we clear i_update_core after they set it we 32841da177e4SLinus Torvalds * are guaranteed to see their updates to the timestamps. 32851da177e4SLinus Torvalds * I believe that this depends on strongly ordered memory 32861da177e4SLinus Torvalds * semantics, but we have that. We use the SYNCHRONIZE 32871da177e4SLinus Torvalds * macro to make sure that the compiler does not reorder 32881da177e4SLinus Torvalds * the i_update_core access below the data copy below. 32891da177e4SLinus Torvalds */ 32901da177e4SLinus Torvalds ip->i_update_core = 0; 32911da177e4SLinus Torvalds SYNCHRONIZE(); 32921da177e4SLinus Torvalds 329342fe2b1fSChristoph Hellwig /* 329442fe2b1fSChristoph Hellwig * Make sure to get the latest atime from the Linux inode. 329542fe2b1fSChristoph Hellwig */ 329642fe2b1fSChristoph Hellwig xfs_synchronize_atime(ip); 329742fe2b1fSChristoph Hellwig 3298347d1c01SChristoph Hellwig if (XFS_TEST_ERROR(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC, 32991da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 33001da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33011da177e4SLinus Torvalds "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3302347d1c01SChristoph Hellwig ip->i_ino, be16_to_cpu(dip->di_core.di_magic), dip); 33031da177e4SLinus Torvalds goto corrupt_out; 33041da177e4SLinus Torvalds } 33051da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 33061da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 33071da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33081da177e4SLinus Torvalds "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 33091da177e4SLinus Torvalds ip->i_ino, ip, ip->i_d.di_magic); 33101da177e4SLinus Torvalds goto corrupt_out; 33111da177e4SLinus Torvalds } 33121da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 33131da177e4SLinus Torvalds if (XFS_TEST_ERROR( 33141da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 33151da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 33161da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 33171da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33181da177e4SLinus Torvalds "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 33191da177e4SLinus Torvalds ip->i_ino, ip); 33201da177e4SLinus Torvalds goto corrupt_out; 33211da177e4SLinus Torvalds } 33221da177e4SLinus Torvalds } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 33231da177e4SLinus Torvalds if (XFS_TEST_ERROR( 33241da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 33251da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 33261da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 33271da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 33281da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33291da177e4SLinus Torvalds "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 33301da177e4SLinus Torvalds ip->i_ino, ip); 33311da177e4SLinus Torvalds goto corrupt_out; 33321da177e4SLinus Torvalds } 33331da177e4SLinus Torvalds } 33341da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 33351da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 33361da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 33371da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33381da177e4SLinus Torvalds "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 33391da177e4SLinus Torvalds ip->i_ino, 33401da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 33411da177e4SLinus Torvalds ip->i_d.di_nblocks, 33421da177e4SLinus Torvalds ip); 33431da177e4SLinus Torvalds goto corrupt_out; 33441da177e4SLinus Torvalds } 33451da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 33461da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 33471da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 33481da177e4SLinus Torvalds "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 33491da177e4SLinus Torvalds ip->i_ino, ip->i_d.di_forkoff, ip); 33501da177e4SLinus Torvalds goto corrupt_out; 33511da177e4SLinus Torvalds } 33521da177e4SLinus Torvalds /* 33531da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 33541da177e4SLinus Torvalds * postdate a log record during recovery. 33551da177e4SLinus Torvalds */ 33561da177e4SLinus Torvalds 33571da177e4SLinus Torvalds ip->i_d.di_flushiter++; 33581da177e4SLinus Torvalds 33591da177e4SLinus Torvalds /* 33601da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 33611da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 33621da177e4SLinus Torvalds * because if the inode is dirty at all the core must 33631da177e4SLinus Torvalds * be. 33641da177e4SLinus Torvalds */ 3365347d1c01SChristoph Hellwig xfs_dinode_to_disk(&dip->di_core, &ip->i_d); 33661da177e4SLinus Torvalds 33671da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 33681da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 33691da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 33701da177e4SLinus Torvalds 33711da177e4SLinus Torvalds /* 33721da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 33731da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 33741da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 33751da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 33761da177e4SLinus Torvalds */ 33771da177e4SLinus Torvalds ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 337862118709SEric Sandeen xfs_sb_version_hasnlink(&mp->m_sb)); 33791da177e4SLinus Torvalds if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 338062118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 33811da177e4SLinus Torvalds /* 33821da177e4SLinus Torvalds * Convert it back. 33831da177e4SLinus Torvalds */ 33841da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3385347d1c01SChristoph Hellwig dip->di_core.di_onlink = cpu_to_be16(ip->i_d.di_nlink); 33861da177e4SLinus Torvalds } else { 33871da177e4SLinus Torvalds /* 33881da177e4SLinus Torvalds * The superblock version has already been bumped, 33891da177e4SLinus Torvalds * so just make the conversion to the new inode 33901da177e4SLinus Torvalds * format permanent. 33911da177e4SLinus Torvalds */ 33921da177e4SLinus Torvalds ip->i_d.di_version = XFS_DINODE_VERSION_2; 3393347d1c01SChristoph Hellwig dip->di_core.di_version = XFS_DINODE_VERSION_2; 33941da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 33951da177e4SLinus Torvalds dip->di_core.di_onlink = 0; 33961da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 33971da177e4SLinus Torvalds memset(&(dip->di_core.di_pad[0]), 0, 33981da177e4SLinus Torvalds sizeof(dip->di_core.di_pad)); 33991da177e4SLinus Torvalds ASSERT(ip->i_d.di_projid == 0); 34001da177e4SLinus Torvalds } 34011da177e4SLinus Torvalds } 34021da177e4SLinus Torvalds 3403e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 3404e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 3405e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 34061da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 34071da177e4SLinus Torvalds 34081da177e4SLinus Torvalds /* 34091da177e4SLinus Torvalds * We've recorded everything logged in the inode, so we'd 34101da177e4SLinus Torvalds * like to clear the ilf_fields bits so we don't log and 34111da177e4SLinus Torvalds * flush things unnecessarily. However, we can't stop 34121da177e4SLinus Torvalds * logging all this information until the data we've copied 34131da177e4SLinus Torvalds * into the disk buffer is written to disk. If we did we might 34141da177e4SLinus Torvalds * overwrite the copy of the inode in the log with all the 34151da177e4SLinus Torvalds * data after re-logging only part of it, and in the face of 34161da177e4SLinus Torvalds * a crash we wouldn't have all the data we need to recover. 34171da177e4SLinus Torvalds * 34181da177e4SLinus Torvalds * What we do is move the bits to the ili_last_fields field. 34191da177e4SLinus Torvalds * When logging the inode, these bits are moved back to the 34201da177e4SLinus Torvalds * ilf_fields field. In the xfs_iflush_done() routine we 34211da177e4SLinus Torvalds * clear ili_last_fields, since we know that the information 34221da177e4SLinus Torvalds * those bits represent is permanently on disk. As long as 34231da177e4SLinus Torvalds * the flush completes before the inode is logged again, then 34241da177e4SLinus Torvalds * both ilf_fields and ili_last_fields will be cleared. 34251da177e4SLinus Torvalds * 34261da177e4SLinus Torvalds * We can play with the ilf_fields bits here, because the inode 34271da177e4SLinus Torvalds * lock must be held exclusively in order to set bits there 34281da177e4SLinus Torvalds * and the flush lock protects the ili_last_fields bits. 34291da177e4SLinus Torvalds * Set ili_logged so the flush done 34301da177e4SLinus Torvalds * routine can tell whether or not to look in the AIL. 34311da177e4SLinus Torvalds * Also, store the current LSN of the inode so that we can tell 34321da177e4SLinus Torvalds * whether the item has moved in the AIL from xfs_iflush_done(). 34331da177e4SLinus Torvalds * In order to read the lsn we need the AIL lock, because 34341da177e4SLinus Torvalds * it is a 64 bit value that cannot be read atomically. 34351da177e4SLinus Torvalds */ 34361da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 34371da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 34381da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 34391da177e4SLinus Torvalds iip->ili_logged = 1; 34401da177e4SLinus Torvalds 34417b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 34427b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 34431da177e4SLinus Torvalds 34441da177e4SLinus Torvalds /* 34451da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 34461da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 34471da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 34481da177e4SLinus Torvalds * completely written to disk. 34491da177e4SLinus Torvalds */ 34501da177e4SLinus Torvalds xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 34511da177e4SLinus Torvalds xfs_iflush_done, (xfs_log_item_t *)iip); 34521da177e4SLinus Torvalds 34531da177e4SLinus Torvalds ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 34541da177e4SLinus Torvalds ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 34551da177e4SLinus Torvalds } else { 34561da177e4SLinus Torvalds /* 34571da177e4SLinus Torvalds * We're flushing an inode which is not in the AIL and has 34581da177e4SLinus Torvalds * not been logged but has i_update_core set. For this 34591da177e4SLinus Torvalds * case we can use a B_DELWRI flush and immediately drop 34601da177e4SLinus Torvalds * the inode flush lock because we can avoid the whole 34611da177e4SLinus Torvalds * AIL state thing. It's OK to drop the flush lock now, 34621da177e4SLinus Torvalds * because we've already locked the buffer and to do anything 34631da177e4SLinus Torvalds * you really need both. 34641da177e4SLinus Torvalds */ 34651da177e4SLinus Torvalds if (iip != NULL) { 34661da177e4SLinus Torvalds ASSERT(iip->ili_logged == 0); 34671da177e4SLinus Torvalds ASSERT(iip->ili_last_fields == 0); 34681da177e4SLinus Torvalds ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 34691da177e4SLinus Torvalds } 34701da177e4SLinus Torvalds xfs_ifunlock(ip); 34711da177e4SLinus Torvalds } 34721da177e4SLinus Torvalds 34731da177e4SLinus Torvalds return 0; 34741da177e4SLinus Torvalds 34751da177e4SLinus Torvalds corrupt_out: 34761da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 34771da177e4SLinus Torvalds } 34781da177e4SLinus Torvalds 34791da177e4SLinus Torvalds 34801da177e4SLinus Torvalds 34811da177e4SLinus Torvalds #ifdef XFS_ILOCK_TRACE 34821da177e4SLinus Torvalds ktrace_t *xfs_ilock_trace_buf; 34831da177e4SLinus Torvalds 34841da177e4SLinus Torvalds void 34851da177e4SLinus Torvalds xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 34861da177e4SLinus Torvalds { 34871da177e4SLinus Torvalds ktrace_enter(ip->i_lock_trace, 34881da177e4SLinus Torvalds (void *)ip, 34891da177e4SLinus Torvalds (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ 34901da177e4SLinus Torvalds (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ 34911da177e4SLinus Torvalds (void *)ra, /* caller of ilock */ 34921da177e4SLinus Torvalds (void *)(unsigned long)current_cpu(), 34931da177e4SLinus Torvalds (void *)(unsigned long)current_pid(), 34941da177e4SLinus Torvalds NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); 34951da177e4SLinus Torvalds } 34961da177e4SLinus Torvalds #endif 34974eea22f0SMandy Kirkconnell 34984eea22f0SMandy Kirkconnell /* 34994eea22f0SMandy Kirkconnell * Return a pointer to the extent record at file index idx. 35004eea22f0SMandy Kirkconnell */ 3501a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * 35024eea22f0SMandy Kirkconnell xfs_iext_get_ext( 35034eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35044eea22f0SMandy Kirkconnell xfs_extnum_t idx) /* index of target extent */ 35054eea22f0SMandy Kirkconnell { 35064eea22f0SMandy Kirkconnell ASSERT(idx >= 0); 35070293ce3aSMandy Kirkconnell if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 35080293ce3aSMandy Kirkconnell return ifp->if_u1.if_ext_irec->er_extbuf; 35090293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 35100293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* irec pointer */ 35110293ce3aSMandy Kirkconnell int erp_idx = 0; /* irec index */ 35120293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = idx; /* ext index in target list */ 35130293ce3aSMandy Kirkconnell 35140293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 35150293ce3aSMandy Kirkconnell return &erp->er_extbuf[page_idx]; 35160293ce3aSMandy Kirkconnell } else if (ifp->if_bytes) { 35174eea22f0SMandy Kirkconnell return &ifp->if_u1.if_extents[idx]; 35184eea22f0SMandy Kirkconnell } else { 35194eea22f0SMandy Kirkconnell return NULL; 35204eea22f0SMandy Kirkconnell } 35214eea22f0SMandy Kirkconnell } 35224eea22f0SMandy Kirkconnell 35234eea22f0SMandy Kirkconnell /* 35244eea22f0SMandy Kirkconnell * Insert new item(s) into the extent records for incore inode 35254eea22f0SMandy Kirkconnell * fork 'ifp'. 'count' new items are inserted at index 'idx'. 35264eea22f0SMandy Kirkconnell */ 35274eea22f0SMandy Kirkconnell void 35284eea22f0SMandy Kirkconnell xfs_iext_insert( 35294eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35304eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* starting index of new items */ 35314eea22f0SMandy Kirkconnell xfs_extnum_t count, /* number of inserted items */ 35324eea22f0SMandy Kirkconnell xfs_bmbt_irec_t *new) /* items to insert */ 35334eea22f0SMandy Kirkconnell { 35344eea22f0SMandy Kirkconnell xfs_extnum_t i; /* extent record index */ 35354eea22f0SMandy Kirkconnell 35364eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 35374eea22f0SMandy Kirkconnell xfs_iext_add(ifp, idx, count); 3538a6f64d4aSChristoph Hellwig for (i = idx; i < idx + count; i++, new++) 3539a6f64d4aSChristoph Hellwig xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 35404eea22f0SMandy Kirkconnell } 35414eea22f0SMandy Kirkconnell 35424eea22f0SMandy Kirkconnell /* 35434eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 35444eea22f0SMandy Kirkconnell * extents needs to be increased. The ext_diff parameter stores the 35454eea22f0SMandy Kirkconnell * number of new extents being added and the idx parameter contains 35464eea22f0SMandy Kirkconnell * the extent index where the new extents will be added. If the new 35474eea22f0SMandy Kirkconnell * extents are being appended, then we just need to (re)allocate and 35484eea22f0SMandy Kirkconnell * initialize the space. Otherwise, if the new extents are being 35494eea22f0SMandy Kirkconnell * inserted into the middle of the existing entries, a bit more work 35504eea22f0SMandy Kirkconnell * is required to make room for the new extents to be inserted. The 35514eea22f0SMandy Kirkconnell * caller is responsible for filling in the new extent entries upon 35524eea22f0SMandy Kirkconnell * return. 35534eea22f0SMandy Kirkconnell */ 35544eea22f0SMandy Kirkconnell void 35554eea22f0SMandy Kirkconnell xfs_iext_add( 35564eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35574eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin adding exts */ 3558c41564b5SNathan Scott int ext_diff) /* number of extents to add */ 35594eea22f0SMandy Kirkconnell { 35604eea22f0SMandy Kirkconnell int byte_diff; /* new bytes being added */ 35614eea22f0SMandy Kirkconnell int new_size; /* size of extents after adding */ 35624eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 35634eea22f0SMandy Kirkconnell 35644eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 35654eea22f0SMandy Kirkconnell ASSERT((idx >= 0) && (idx <= nextents)); 35664eea22f0SMandy Kirkconnell byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 35674eea22f0SMandy Kirkconnell new_size = ifp->if_bytes + byte_diff; 35684eea22f0SMandy Kirkconnell /* 35694eea22f0SMandy Kirkconnell * If the new number of extents (nextents + ext_diff) 35704eea22f0SMandy Kirkconnell * fits inside the inode, then continue to use the inline 35714eea22f0SMandy Kirkconnell * extent buffer. 35724eea22f0SMandy Kirkconnell */ 35734eea22f0SMandy Kirkconnell if (nextents + ext_diff <= XFS_INLINE_EXTS) { 35744eea22f0SMandy Kirkconnell if (idx < nextents) { 35754eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 35764eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx], 35774eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 35784eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 35794eea22f0SMandy Kirkconnell } 35804eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 35814eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 35820293ce3aSMandy Kirkconnell ifp->if_lastex = nextents + ext_diff; 35834eea22f0SMandy Kirkconnell } 35844eea22f0SMandy Kirkconnell /* 35854eea22f0SMandy Kirkconnell * Otherwise use a linear (direct) extent list. 35864eea22f0SMandy Kirkconnell * If the extents are currently inside the inode, 35874eea22f0SMandy Kirkconnell * xfs_iext_realloc_direct will switch us from 35884eea22f0SMandy Kirkconnell * inline to direct extent allocation mode. 35894eea22f0SMandy Kirkconnell */ 35900293ce3aSMandy Kirkconnell else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 35914eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 35924eea22f0SMandy Kirkconnell if (idx < nextents) { 35934eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx + ext_diff], 35944eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx], 35954eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 35964eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 35974eea22f0SMandy Kirkconnell } 35984eea22f0SMandy Kirkconnell } 35990293ce3aSMandy Kirkconnell /* Indirection array */ 36000293ce3aSMandy Kirkconnell else { 36010293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; 36020293ce3aSMandy Kirkconnell int erp_idx = 0; 36030293ce3aSMandy Kirkconnell int page_idx = idx; 36040293ce3aSMandy Kirkconnell 36050293ce3aSMandy Kirkconnell ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 36060293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 36070293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 36080293ce3aSMandy Kirkconnell } else { 36090293ce3aSMandy Kirkconnell xfs_iext_irec_init(ifp); 36100293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 36110293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 36120293ce3aSMandy Kirkconnell } 36130293ce3aSMandy Kirkconnell /* Extents fit in target extent page */ 36140293ce3aSMandy Kirkconnell if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 36150293ce3aSMandy Kirkconnell if (page_idx < erp->er_extcount) { 36160293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[page_idx + ext_diff], 36170293ce3aSMandy Kirkconnell &erp->er_extbuf[page_idx], 36180293ce3aSMandy Kirkconnell (erp->er_extcount - page_idx) * 36190293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 36200293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[page_idx], 0, byte_diff); 36210293ce3aSMandy Kirkconnell } 36220293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 36230293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 36240293ce3aSMandy Kirkconnell } 36250293ce3aSMandy Kirkconnell /* Insert a new extent page */ 36260293ce3aSMandy Kirkconnell else if (erp) { 36270293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(ifp, 36280293ce3aSMandy Kirkconnell erp_idx, page_idx, ext_diff); 36290293ce3aSMandy Kirkconnell } 36300293ce3aSMandy Kirkconnell /* 36310293ce3aSMandy Kirkconnell * If extent(s) are being appended to the last page in 36320293ce3aSMandy Kirkconnell * the indirection array and the new extent(s) don't fit 36330293ce3aSMandy Kirkconnell * in the page, then erp is NULL and erp_idx is set to 36340293ce3aSMandy Kirkconnell * the next index needed in the indirection array. 36350293ce3aSMandy Kirkconnell */ 36360293ce3aSMandy Kirkconnell else { 36370293ce3aSMandy Kirkconnell int count = ext_diff; 36380293ce3aSMandy Kirkconnell 36390293ce3aSMandy Kirkconnell while (count) { 36400293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 36410293ce3aSMandy Kirkconnell erp->er_extcount = count; 36420293ce3aSMandy Kirkconnell count -= MIN(count, (int)XFS_LINEAR_EXTS); 36430293ce3aSMandy Kirkconnell if (count) { 36440293ce3aSMandy Kirkconnell erp_idx++; 36450293ce3aSMandy Kirkconnell } 36460293ce3aSMandy Kirkconnell } 36470293ce3aSMandy Kirkconnell } 36480293ce3aSMandy Kirkconnell } 36494eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 36504eea22f0SMandy Kirkconnell } 36514eea22f0SMandy Kirkconnell 36524eea22f0SMandy Kirkconnell /* 36530293ce3aSMandy Kirkconnell * This is called when incore extents are being added to the indirection 36540293ce3aSMandy Kirkconnell * array and the new extents do not fit in the target extent list. The 36550293ce3aSMandy Kirkconnell * erp_idx parameter contains the irec index for the target extent list 36560293ce3aSMandy Kirkconnell * in the indirection array, and the idx parameter contains the extent 36570293ce3aSMandy Kirkconnell * index within the list. The number of extents being added is stored 36580293ce3aSMandy Kirkconnell * in the count parameter. 36590293ce3aSMandy Kirkconnell * 36600293ce3aSMandy Kirkconnell * |-------| |-------| 36610293ce3aSMandy Kirkconnell * | | | | idx - number of extents before idx 36620293ce3aSMandy Kirkconnell * | idx | | count | 36630293ce3aSMandy Kirkconnell * | | | | count - number of extents being inserted at idx 36640293ce3aSMandy Kirkconnell * |-------| |-------| 36650293ce3aSMandy Kirkconnell * | count | | nex2 | nex2 - number of extents after idx + count 36660293ce3aSMandy Kirkconnell * |-------| |-------| 36670293ce3aSMandy Kirkconnell */ 36680293ce3aSMandy Kirkconnell void 36690293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi( 36700293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36710293ce3aSMandy Kirkconnell int erp_idx, /* target extent irec index */ 36720293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index within target list */ 36730293ce3aSMandy Kirkconnell int count) /* new extents being added */ 36740293ce3aSMandy Kirkconnell { 36750293ce3aSMandy Kirkconnell int byte_diff; /* new bytes being added */ 36760293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* pointer to irec entry */ 36770293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* number of extents to add */ 36780293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* new extents still needed */ 36790293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 36800293ce3aSMandy Kirkconnell xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 36810293ce3aSMandy Kirkconnell int nlists; /* number of irec's (lists) */ 36820293ce3aSMandy Kirkconnell 36830293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 36840293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 36850293ce3aSMandy Kirkconnell nex2 = erp->er_extcount - idx; 36860293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36870293ce3aSMandy Kirkconnell 36880293ce3aSMandy Kirkconnell /* 36890293ce3aSMandy Kirkconnell * Save second part of target extent list 36900293ce3aSMandy Kirkconnell * (all extents past */ 36910293ce3aSMandy Kirkconnell if (nex2) { 36920293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 36936785073bSDavid Chinner nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 36940293ce3aSMandy Kirkconnell memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 36950293ce3aSMandy Kirkconnell erp->er_extcount -= nex2; 36960293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 36970293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[idx], 0, byte_diff); 36980293ce3aSMandy Kirkconnell } 36990293ce3aSMandy Kirkconnell 37000293ce3aSMandy Kirkconnell /* 37010293ce3aSMandy Kirkconnell * Add the new extents to the end of the target 37020293ce3aSMandy Kirkconnell * list, then allocate new irec record(s) and 37030293ce3aSMandy Kirkconnell * extent buffer(s) as needed to store the rest 37040293ce3aSMandy Kirkconnell * of the new extents. 37050293ce3aSMandy Kirkconnell */ 37060293ce3aSMandy Kirkconnell ext_cnt = count; 37070293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 37080293ce3aSMandy Kirkconnell if (ext_diff) { 37090293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 37100293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 37110293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 37120293ce3aSMandy Kirkconnell } 37130293ce3aSMandy Kirkconnell while (ext_cnt) { 37140293ce3aSMandy Kirkconnell erp_idx++; 37150293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 37160293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 37170293ce3aSMandy Kirkconnell erp->er_extcount = ext_diff; 37180293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 37190293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 37200293ce3aSMandy Kirkconnell } 37210293ce3aSMandy Kirkconnell 37220293ce3aSMandy Kirkconnell /* Add nex2 extents back to indirection array */ 37230293ce3aSMandy Kirkconnell if (nex2) { 37240293ce3aSMandy Kirkconnell xfs_extnum_t ext_avail; 37250293ce3aSMandy Kirkconnell int i; 37260293ce3aSMandy Kirkconnell 37270293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 37280293ce3aSMandy Kirkconnell ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 37290293ce3aSMandy Kirkconnell i = 0; 37300293ce3aSMandy Kirkconnell /* 37310293ce3aSMandy Kirkconnell * If nex2 extents fit in the current page, append 37320293ce3aSMandy Kirkconnell * nex2_ep after the new extents. 37330293ce3aSMandy Kirkconnell */ 37340293ce3aSMandy Kirkconnell if (nex2 <= ext_avail) { 37350293ce3aSMandy Kirkconnell i = erp->er_extcount; 37360293ce3aSMandy Kirkconnell } 37370293ce3aSMandy Kirkconnell /* 37380293ce3aSMandy Kirkconnell * Otherwise, check if space is available in the 37390293ce3aSMandy Kirkconnell * next page. 37400293ce3aSMandy Kirkconnell */ 37410293ce3aSMandy Kirkconnell else if ((erp_idx < nlists - 1) && 37420293ce3aSMandy Kirkconnell (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 37430293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 37440293ce3aSMandy Kirkconnell erp_idx++; 37450293ce3aSMandy Kirkconnell erp++; 37460293ce3aSMandy Kirkconnell /* Create a hole for nex2 extents */ 37470293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 37480293ce3aSMandy Kirkconnell erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 37490293ce3aSMandy Kirkconnell } 37500293ce3aSMandy Kirkconnell /* 37510293ce3aSMandy Kirkconnell * Final choice, create a new extent page for 37520293ce3aSMandy Kirkconnell * nex2 extents. 37530293ce3aSMandy Kirkconnell */ 37540293ce3aSMandy Kirkconnell else { 37550293ce3aSMandy Kirkconnell erp_idx++; 37560293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 37570293ce3aSMandy Kirkconnell } 37580293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3759f0e2d93cSDenys Vlasenko kmem_free(nex2_ep); 37600293ce3aSMandy Kirkconnell erp->er_extcount += nex2; 37610293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 37620293ce3aSMandy Kirkconnell } 37630293ce3aSMandy Kirkconnell } 37640293ce3aSMandy Kirkconnell 37650293ce3aSMandy Kirkconnell /* 37664eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 37674eea22f0SMandy Kirkconnell * extents needs to be decreased. The ext_diff parameter stores the 37684eea22f0SMandy Kirkconnell * number of extents to be removed and the idx parameter contains 37694eea22f0SMandy Kirkconnell * the extent index where the extents will be removed from. 37700293ce3aSMandy Kirkconnell * 37710293ce3aSMandy Kirkconnell * If the amount of space needed has decreased below the linear 37720293ce3aSMandy Kirkconnell * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 37730293ce3aSMandy Kirkconnell * extent array. Otherwise, use kmem_realloc() to adjust the 37740293ce3aSMandy Kirkconnell * size to what is needed. 37754eea22f0SMandy Kirkconnell */ 37764eea22f0SMandy Kirkconnell void 37774eea22f0SMandy Kirkconnell xfs_iext_remove( 37784eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37794eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 37804eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 37814eea22f0SMandy Kirkconnell { 37824eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 37834eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 37844eea22f0SMandy Kirkconnell 37854eea22f0SMandy Kirkconnell ASSERT(ext_diff > 0); 37864eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 37874eea22f0SMandy Kirkconnell new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 37884eea22f0SMandy Kirkconnell 37894eea22f0SMandy Kirkconnell if (new_size == 0) { 37904eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 37910293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 37920293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(ifp, idx, ext_diff); 37934eea22f0SMandy Kirkconnell } else if (ifp->if_real_bytes) { 37944eea22f0SMandy Kirkconnell xfs_iext_remove_direct(ifp, idx, ext_diff); 37954eea22f0SMandy Kirkconnell } else { 37964eea22f0SMandy Kirkconnell xfs_iext_remove_inline(ifp, idx, ext_diff); 37974eea22f0SMandy Kirkconnell } 37984eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 37994eea22f0SMandy Kirkconnell } 38004eea22f0SMandy Kirkconnell 38014eea22f0SMandy Kirkconnell /* 38024eea22f0SMandy Kirkconnell * This removes ext_diff extents from the inline buffer, beginning 38034eea22f0SMandy Kirkconnell * at extent index idx. 38044eea22f0SMandy Kirkconnell */ 38054eea22f0SMandy Kirkconnell void 38064eea22f0SMandy Kirkconnell xfs_iext_remove_inline( 38074eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38084eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 38094eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 38104eea22f0SMandy Kirkconnell { 38114eea22f0SMandy Kirkconnell int nextents; /* number of extents in file */ 38124eea22f0SMandy Kirkconnell 38130293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 38144eea22f0SMandy Kirkconnell ASSERT(idx < XFS_INLINE_EXTS); 38154eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 38164eea22f0SMandy Kirkconnell ASSERT(((nextents - ext_diff) > 0) && 38174eea22f0SMandy Kirkconnell (nextents - ext_diff) < XFS_INLINE_EXTS); 38184eea22f0SMandy Kirkconnell 38194eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 38204eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx], 38214eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx + ext_diff], 38224eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 38234eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 38244eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 38254eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 38264eea22f0SMandy Kirkconnell } else { 38274eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, 38284eea22f0SMandy Kirkconnell ext_diff * sizeof(xfs_bmbt_rec_t)); 38294eea22f0SMandy Kirkconnell } 38304eea22f0SMandy Kirkconnell } 38314eea22f0SMandy Kirkconnell 38324eea22f0SMandy Kirkconnell /* 38334eea22f0SMandy Kirkconnell * This removes ext_diff extents from a linear (direct) extent list, 38344eea22f0SMandy Kirkconnell * beginning at extent index idx. If the extents are being removed 38354eea22f0SMandy Kirkconnell * from the end of the list (ie. truncate) then we just need to re- 38364eea22f0SMandy Kirkconnell * allocate the list to remove the extra space. Otherwise, if the 38374eea22f0SMandy Kirkconnell * extents are being removed from the middle of the existing extent 38384eea22f0SMandy Kirkconnell * entries, then we first need to move the extent records beginning 38394eea22f0SMandy Kirkconnell * at idx + ext_diff up in the list to overwrite the records being 38404eea22f0SMandy Kirkconnell * removed, then remove the extra space via kmem_realloc. 38414eea22f0SMandy Kirkconnell */ 38424eea22f0SMandy Kirkconnell void 38434eea22f0SMandy Kirkconnell xfs_iext_remove_direct( 38444eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38454eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 38464eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 38474eea22f0SMandy Kirkconnell { 38484eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 38494eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 38504eea22f0SMandy Kirkconnell 38510293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 38524eea22f0SMandy Kirkconnell new_size = ifp->if_bytes - 38534eea22f0SMandy Kirkconnell (ext_diff * sizeof(xfs_bmbt_rec_t)); 38544eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 38554eea22f0SMandy Kirkconnell 38564eea22f0SMandy Kirkconnell if (new_size == 0) { 38574eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 38584eea22f0SMandy Kirkconnell return; 38594eea22f0SMandy Kirkconnell } 38604eea22f0SMandy Kirkconnell /* Move extents up in the list (if needed) */ 38614eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 38624eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx], 38634eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx + ext_diff], 38644eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 38654eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 38664eea22f0SMandy Kirkconnell } 38674eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[nextents - ext_diff], 38684eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 38694eea22f0SMandy Kirkconnell /* 38704eea22f0SMandy Kirkconnell * Reallocate the direct extent list. If the extents 38714eea22f0SMandy Kirkconnell * will fit inside the inode then xfs_iext_realloc_direct 38724eea22f0SMandy Kirkconnell * will switch from direct to inline extent allocation 38734eea22f0SMandy Kirkconnell * mode for us. 38744eea22f0SMandy Kirkconnell */ 38754eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 38764eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 38774eea22f0SMandy Kirkconnell } 38784eea22f0SMandy Kirkconnell 38794eea22f0SMandy Kirkconnell /* 38800293ce3aSMandy Kirkconnell * This is called when incore extents are being removed from the 38810293ce3aSMandy Kirkconnell * indirection array and the extents being removed span multiple extent 38820293ce3aSMandy Kirkconnell * buffers. The idx parameter contains the file extent index where we 38830293ce3aSMandy Kirkconnell * want to begin removing extents, and the count parameter contains 38840293ce3aSMandy Kirkconnell * how many extents need to be removed. 38850293ce3aSMandy Kirkconnell * 38860293ce3aSMandy Kirkconnell * |-------| |-------| 38870293ce3aSMandy Kirkconnell * | nex1 | | | nex1 - number of extents before idx 38880293ce3aSMandy Kirkconnell * |-------| | count | 38890293ce3aSMandy Kirkconnell * | | | | count - number of extents being removed at idx 38900293ce3aSMandy Kirkconnell * | count | |-------| 38910293ce3aSMandy Kirkconnell * | | | nex2 | nex2 - number of extents after idx + count 38920293ce3aSMandy Kirkconnell * |-------| |-------| 38930293ce3aSMandy Kirkconnell */ 38940293ce3aSMandy Kirkconnell void 38950293ce3aSMandy Kirkconnell xfs_iext_remove_indirect( 38960293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38970293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing extents */ 38980293ce3aSMandy Kirkconnell int count) /* number of extents to remove */ 38990293ce3aSMandy Kirkconnell { 39000293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 39010293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 39020293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* extents left to remove */ 39030293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* extents to remove in current list */ 39040293ce3aSMandy Kirkconnell xfs_extnum_t nex1; /* number of extents before idx */ 39050293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 3906c41564b5SNathan Scott int nlists; /* entries in indirection array */ 39070293ce3aSMandy Kirkconnell int page_idx = idx; /* index in target extent list */ 39080293ce3aSMandy Kirkconnell 39090293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 39100293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 39110293ce3aSMandy Kirkconnell ASSERT(erp != NULL); 39120293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 39130293ce3aSMandy Kirkconnell nex1 = page_idx; 39140293ce3aSMandy Kirkconnell ext_cnt = count; 39150293ce3aSMandy Kirkconnell while (ext_cnt) { 39160293ce3aSMandy Kirkconnell nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 39170293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 39180293ce3aSMandy Kirkconnell /* 39190293ce3aSMandy Kirkconnell * Check for deletion of entire list; 39200293ce3aSMandy Kirkconnell * xfs_iext_irec_remove() updates extent offsets. 39210293ce3aSMandy Kirkconnell */ 39220293ce3aSMandy Kirkconnell if (ext_diff == erp->er_extcount) { 39230293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 39240293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 39250293ce3aSMandy Kirkconnell nex1 = 0; 39260293ce3aSMandy Kirkconnell if (ext_cnt) { 39270293ce3aSMandy Kirkconnell ASSERT(erp_idx < ifp->if_real_bytes / 39280293ce3aSMandy Kirkconnell XFS_IEXT_BUFSZ); 39290293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 39300293ce3aSMandy Kirkconnell nex1 = 0; 39310293ce3aSMandy Kirkconnell continue; 39320293ce3aSMandy Kirkconnell } else { 39330293ce3aSMandy Kirkconnell break; 39340293ce3aSMandy Kirkconnell } 39350293ce3aSMandy Kirkconnell } 39360293ce3aSMandy Kirkconnell /* Move extents up (if needed) */ 39370293ce3aSMandy Kirkconnell if (nex2) { 39380293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex1], 39390293ce3aSMandy Kirkconnell &erp->er_extbuf[nex1 + ext_diff], 39400293ce3aSMandy Kirkconnell nex2 * sizeof(xfs_bmbt_rec_t)); 39410293ce3aSMandy Kirkconnell } 39420293ce3aSMandy Kirkconnell /* Zero out rest of page */ 39430293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 39440293ce3aSMandy Kirkconnell ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 39450293ce3aSMandy Kirkconnell /* Update remaining counters */ 39460293ce3aSMandy Kirkconnell erp->er_extcount -= ext_diff; 39470293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 39480293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 39490293ce3aSMandy Kirkconnell nex1 = 0; 39500293ce3aSMandy Kirkconnell erp_idx++; 39510293ce3aSMandy Kirkconnell erp++; 39520293ce3aSMandy Kirkconnell } 39530293ce3aSMandy Kirkconnell ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 39540293ce3aSMandy Kirkconnell xfs_iext_irec_compact(ifp); 39550293ce3aSMandy Kirkconnell } 39560293ce3aSMandy Kirkconnell 39570293ce3aSMandy Kirkconnell /* 39584eea22f0SMandy Kirkconnell * Create, destroy, or resize a linear (direct) block of extents. 39594eea22f0SMandy Kirkconnell */ 39604eea22f0SMandy Kirkconnell void 39614eea22f0SMandy Kirkconnell xfs_iext_realloc_direct( 39624eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 39634eea22f0SMandy Kirkconnell int new_size) /* new size of extents */ 39644eea22f0SMandy Kirkconnell { 39654eea22f0SMandy Kirkconnell int rnew_size; /* real new size of extents */ 39664eea22f0SMandy Kirkconnell 39674eea22f0SMandy Kirkconnell rnew_size = new_size; 39684eea22f0SMandy Kirkconnell 39690293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 39700293ce3aSMandy Kirkconnell ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 39710293ce3aSMandy Kirkconnell (new_size != ifp->if_real_bytes))); 39720293ce3aSMandy Kirkconnell 39734eea22f0SMandy Kirkconnell /* Free extent records */ 39744eea22f0SMandy Kirkconnell if (new_size == 0) { 39754eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 39764eea22f0SMandy Kirkconnell } 39774eea22f0SMandy Kirkconnell /* Resize direct extent list and zero any new bytes */ 39784eea22f0SMandy Kirkconnell else if (ifp->if_real_bytes) { 39794eea22f0SMandy Kirkconnell /* Check if extents will fit inside the inode */ 39804eea22f0SMandy Kirkconnell if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 39814eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(ifp, new_size / 39824eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)); 39834eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 39844eea22f0SMandy Kirkconnell return; 39854eea22f0SMandy Kirkconnell } 398616a087d8SVignesh Babu if (!is_power_of_2(new_size)){ 398740ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 39884eea22f0SMandy Kirkconnell } 39894eea22f0SMandy Kirkconnell if (rnew_size != ifp->if_real_bytes) { 3990a6f64d4aSChristoph Hellwig ifp->if_u1.if_extents = 39914eea22f0SMandy Kirkconnell kmem_realloc(ifp->if_u1.if_extents, 39924eea22f0SMandy Kirkconnell rnew_size, 39936785073bSDavid Chinner ifp->if_real_bytes, KM_NOFS); 39944eea22f0SMandy Kirkconnell } 39954eea22f0SMandy Kirkconnell if (rnew_size > ifp->if_real_bytes) { 39964eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[ifp->if_bytes / 39974eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)], 0, 39984eea22f0SMandy Kirkconnell rnew_size - ifp->if_real_bytes); 39994eea22f0SMandy Kirkconnell } 40004eea22f0SMandy Kirkconnell } 40014eea22f0SMandy Kirkconnell /* 40024eea22f0SMandy Kirkconnell * Switch from the inline extent buffer to a direct 40034eea22f0SMandy Kirkconnell * extent list. Be sure to include the inline extent 40044eea22f0SMandy Kirkconnell * bytes in new_size. 40054eea22f0SMandy Kirkconnell */ 40064eea22f0SMandy Kirkconnell else { 40074eea22f0SMandy Kirkconnell new_size += ifp->if_bytes; 400816a087d8SVignesh Babu if (!is_power_of_2(new_size)) { 400940ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 40104eea22f0SMandy Kirkconnell } 40114eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(ifp, rnew_size); 40124eea22f0SMandy Kirkconnell } 40134eea22f0SMandy Kirkconnell ifp->if_real_bytes = rnew_size; 40144eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 40154eea22f0SMandy Kirkconnell } 40164eea22f0SMandy Kirkconnell 40174eea22f0SMandy Kirkconnell /* 40184eea22f0SMandy Kirkconnell * Switch from linear (direct) extent records to inline buffer. 40194eea22f0SMandy Kirkconnell */ 40204eea22f0SMandy Kirkconnell void 40214eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline( 40224eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 40234eea22f0SMandy Kirkconnell xfs_extnum_t nextents) /* number of extents in file */ 40244eea22f0SMandy Kirkconnell { 40254eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 40264eea22f0SMandy Kirkconnell ASSERT(nextents <= XFS_INLINE_EXTS); 40274eea22f0SMandy Kirkconnell /* 40284eea22f0SMandy Kirkconnell * The inline buffer was zeroed when we switched 40294eea22f0SMandy Kirkconnell * from inline to direct extent allocation mode, 40304eea22f0SMandy Kirkconnell * so we don't need to clear it here. 40314eea22f0SMandy Kirkconnell */ 40324eea22f0SMandy Kirkconnell memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 40334eea22f0SMandy Kirkconnell nextents * sizeof(xfs_bmbt_rec_t)); 4034f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 40354eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 40364eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 40374eea22f0SMandy Kirkconnell } 40384eea22f0SMandy Kirkconnell 40394eea22f0SMandy Kirkconnell /* 40404eea22f0SMandy Kirkconnell * Switch from inline buffer to linear (direct) extent records. 40414eea22f0SMandy Kirkconnell * new_size should already be rounded up to the next power of 2 40424eea22f0SMandy Kirkconnell * by the caller (when appropriate), so use new_size as it is. 40434eea22f0SMandy Kirkconnell * However, since new_size may be rounded up, we can't update 40444eea22f0SMandy Kirkconnell * if_bytes here. It is the caller's responsibility to update 40454eea22f0SMandy Kirkconnell * if_bytes upon return. 40464eea22f0SMandy Kirkconnell */ 40474eea22f0SMandy Kirkconnell void 40484eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct( 40494eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 40504eea22f0SMandy Kirkconnell int new_size) /* number of extents in file */ 40514eea22f0SMandy Kirkconnell { 40526785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 40534eea22f0SMandy Kirkconnell memset(ifp->if_u1.if_extents, 0, new_size); 40544eea22f0SMandy Kirkconnell if (ifp->if_bytes) { 40554eea22f0SMandy Kirkconnell memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 40564eea22f0SMandy Kirkconnell ifp->if_bytes); 40574eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 40584eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 40594eea22f0SMandy Kirkconnell } 40604eea22f0SMandy Kirkconnell ifp->if_real_bytes = new_size; 40614eea22f0SMandy Kirkconnell } 40624eea22f0SMandy Kirkconnell 40634eea22f0SMandy Kirkconnell /* 40640293ce3aSMandy Kirkconnell * Resize an extent indirection array to new_size bytes. 40650293ce3aSMandy Kirkconnell */ 40660293ce3aSMandy Kirkconnell void 40670293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect( 40680293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 40690293ce3aSMandy Kirkconnell int new_size) /* new indirection array size */ 40700293ce3aSMandy Kirkconnell { 40710293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 40720293ce3aSMandy Kirkconnell int size; /* current indirection array size */ 40730293ce3aSMandy Kirkconnell 40740293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 40750293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 40760293ce3aSMandy Kirkconnell size = nlists * sizeof(xfs_ext_irec_t); 40770293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes); 40780293ce3aSMandy Kirkconnell ASSERT((new_size >= 0) && (new_size != size)); 40790293ce3aSMandy Kirkconnell if (new_size == 0) { 40800293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 40810293ce3aSMandy Kirkconnell } else { 40820293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 40830293ce3aSMandy Kirkconnell kmem_realloc(ifp->if_u1.if_ext_irec, 40846785073bSDavid Chinner new_size, size, KM_NOFS); 40850293ce3aSMandy Kirkconnell } 40860293ce3aSMandy Kirkconnell } 40870293ce3aSMandy Kirkconnell 40880293ce3aSMandy Kirkconnell /* 40890293ce3aSMandy Kirkconnell * Switch from indirection array to linear (direct) extent allocations. 40900293ce3aSMandy Kirkconnell */ 40910293ce3aSMandy Kirkconnell void 40920293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct( 40930293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 40940293ce3aSMandy Kirkconnell { 4095a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 40960293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 40970293ce3aSMandy Kirkconnell int size; /* size of file extents */ 40980293ce3aSMandy Kirkconnell 40990293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 41000293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 41010293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 41020293ce3aSMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 41030293ce3aSMandy Kirkconnell 410471a8c87fSLachlan McIlroy xfs_iext_irec_compact_pages(ifp); 41050293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 41060293ce3aSMandy Kirkconnell 41070293ce3aSMandy Kirkconnell ep = ifp->if_u1.if_ext_irec->er_extbuf; 4108f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 41090293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 41100293ce3aSMandy Kirkconnell ifp->if_u1.if_extents = ep; 41110293ce3aSMandy Kirkconnell ifp->if_bytes = size; 41120293ce3aSMandy Kirkconnell if (nextents < XFS_LINEAR_EXTS) { 41130293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, size); 41140293ce3aSMandy Kirkconnell } 41150293ce3aSMandy Kirkconnell } 41160293ce3aSMandy Kirkconnell 41170293ce3aSMandy Kirkconnell /* 41184eea22f0SMandy Kirkconnell * Free incore file extents. 41194eea22f0SMandy Kirkconnell */ 41204eea22f0SMandy Kirkconnell void 41214eea22f0SMandy Kirkconnell xfs_iext_destroy( 41224eea22f0SMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 41234eea22f0SMandy Kirkconnell { 41240293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 41250293ce3aSMandy Kirkconnell int erp_idx; 41260293ce3aSMandy Kirkconnell int nlists; 41270293ce3aSMandy Kirkconnell 41280293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 41290293ce3aSMandy Kirkconnell for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 41300293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 41310293ce3aSMandy Kirkconnell } 41320293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 41330293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes) { 4134f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 41354eea22f0SMandy Kirkconnell } else if (ifp->if_bytes) { 41364eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 41374eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 41384eea22f0SMandy Kirkconnell } 41394eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = NULL; 41404eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 41414eea22f0SMandy Kirkconnell ifp->if_bytes = 0; 41424eea22f0SMandy Kirkconnell } 41430293ce3aSMandy Kirkconnell 41440293ce3aSMandy Kirkconnell /* 41458867bc9bSMandy Kirkconnell * Return a pointer to the extent record for file system block bno. 41468867bc9bSMandy Kirkconnell */ 4147a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * /* pointer to found extent record */ 41488867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext( 41498867bc9bSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 41508867bc9bSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 41518867bc9bSMandy Kirkconnell xfs_extnum_t *idxp) /* index of target extent */ 41528867bc9bSMandy Kirkconnell { 4153a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 41548867bc9bSMandy Kirkconnell xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 4155a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 41568867bc9bSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 4157c41564b5SNathan Scott int high; /* upper boundary in search */ 41588867bc9bSMandy Kirkconnell xfs_extnum_t idx = 0; /* index of target extent */ 4159c41564b5SNathan Scott int low; /* lower boundary in search */ 41608867bc9bSMandy Kirkconnell xfs_extnum_t nextents; /* number of file extents */ 41618867bc9bSMandy Kirkconnell xfs_fileoff_t startoff = 0; /* start offset of extent */ 41628867bc9bSMandy Kirkconnell 41638867bc9bSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 41648867bc9bSMandy Kirkconnell if (nextents == 0) { 41658867bc9bSMandy Kirkconnell *idxp = 0; 41668867bc9bSMandy Kirkconnell return NULL; 41678867bc9bSMandy Kirkconnell } 41688867bc9bSMandy Kirkconnell low = 0; 41698867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 41708867bc9bSMandy Kirkconnell /* Find target extent list */ 41718867bc9bSMandy Kirkconnell int erp_idx = 0; 41728867bc9bSMandy Kirkconnell erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 41738867bc9bSMandy Kirkconnell base = erp->er_extbuf; 41748867bc9bSMandy Kirkconnell high = erp->er_extcount - 1; 41758867bc9bSMandy Kirkconnell } else { 41768867bc9bSMandy Kirkconnell base = ifp->if_u1.if_extents; 41778867bc9bSMandy Kirkconnell high = nextents - 1; 41788867bc9bSMandy Kirkconnell } 41798867bc9bSMandy Kirkconnell /* Binary search extent records */ 41808867bc9bSMandy Kirkconnell while (low <= high) { 41818867bc9bSMandy Kirkconnell idx = (low + high) >> 1; 41828867bc9bSMandy Kirkconnell ep = base + idx; 41838867bc9bSMandy Kirkconnell startoff = xfs_bmbt_get_startoff(ep); 41848867bc9bSMandy Kirkconnell blockcount = xfs_bmbt_get_blockcount(ep); 41858867bc9bSMandy Kirkconnell if (bno < startoff) { 41868867bc9bSMandy Kirkconnell high = idx - 1; 41878867bc9bSMandy Kirkconnell } else if (bno >= startoff + blockcount) { 41888867bc9bSMandy Kirkconnell low = idx + 1; 41898867bc9bSMandy Kirkconnell } else { 41908867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 41918867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 41928867bc9bSMandy Kirkconnell idx += erp->er_extoff; 41938867bc9bSMandy Kirkconnell } 41948867bc9bSMandy Kirkconnell *idxp = idx; 41958867bc9bSMandy Kirkconnell return ep; 41968867bc9bSMandy Kirkconnell } 41978867bc9bSMandy Kirkconnell } 41988867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 41998867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 42008867bc9bSMandy Kirkconnell idx += erp->er_extoff; 42018867bc9bSMandy Kirkconnell } 42028867bc9bSMandy Kirkconnell if (bno >= startoff + blockcount) { 42038867bc9bSMandy Kirkconnell if (++idx == nextents) { 42048867bc9bSMandy Kirkconnell ep = NULL; 42058867bc9bSMandy Kirkconnell } else { 42068867bc9bSMandy Kirkconnell ep = xfs_iext_get_ext(ifp, idx); 42078867bc9bSMandy Kirkconnell } 42088867bc9bSMandy Kirkconnell } 42098867bc9bSMandy Kirkconnell *idxp = idx; 42108867bc9bSMandy Kirkconnell return ep; 42118867bc9bSMandy Kirkconnell } 42128867bc9bSMandy Kirkconnell 42138867bc9bSMandy Kirkconnell /* 42140293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 42150293ce3aSMandy Kirkconnell * extent record for filesystem block bno. Store the index of the 42160293ce3aSMandy Kirkconnell * target irec in *erp_idxp. 42170293ce3aSMandy Kirkconnell */ 42188867bc9bSMandy Kirkconnell xfs_ext_irec_t * /* pointer to found extent record */ 42190293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec( 42200293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 42210293ce3aSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 42220293ce3aSMandy Kirkconnell int *erp_idxp) /* irec index of target ext list */ 42230293ce3aSMandy Kirkconnell { 42240293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 42250293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp_next; /* next indirection array entry */ 42268867bc9bSMandy Kirkconnell int erp_idx; /* indirection array index */ 42270293ce3aSMandy Kirkconnell int nlists; /* number of extent irec's (lists) */ 42280293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 42290293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 42300293ce3aSMandy Kirkconnell 42310293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 42320293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 42330293ce3aSMandy Kirkconnell erp_idx = 0; 42340293ce3aSMandy Kirkconnell low = 0; 42350293ce3aSMandy Kirkconnell high = nlists - 1; 42360293ce3aSMandy Kirkconnell while (low <= high) { 42370293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 42380293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 42390293ce3aSMandy Kirkconnell erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 42400293ce3aSMandy Kirkconnell if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 42410293ce3aSMandy Kirkconnell high = erp_idx - 1; 42420293ce3aSMandy Kirkconnell } else if (erp_next && bno >= 42430293ce3aSMandy Kirkconnell xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 42440293ce3aSMandy Kirkconnell low = erp_idx + 1; 42450293ce3aSMandy Kirkconnell } else { 42460293ce3aSMandy Kirkconnell break; 42470293ce3aSMandy Kirkconnell } 42480293ce3aSMandy Kirkconnell } 42490293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 42500293ce3aSMandy Kirkconnell return erp; 42510293ce3aSMandy Kirkconnell } 42520293ce3aSMandy Kirkconnell 42530293ce3aSMandy Kirkconnell /* 42540293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 42550293ce3aSMandy Kirkconnell * extent record at file extent index *idxp. Store the index of the 42560293ce3aSMandy Kirkconnell * target irec in *erp_idxp and store the page index of the target 42570293ce3aSMandy Kirkconnell * extent record in *idxp. 42580293ce3aSMandy Kirkconnell */ 42590293ce3aSMandy Kirkconnell xfs_ext_irec_t * 42600293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec( 42610293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 42620293ce3aSMandy Kirkconnell xfs_extnum_t *idxp, /* extent index (file -> page) */ 42630293ce3aSMandy Kirkconnell int *erp_idxp, /* pointer to target irec */ 42640293ce3aSMandy Kirkconnell int realloc) /* new bytes were just added */ 42650293ce3aSMandy Kirkconnell { 42660293ce3aSMandy Kirkconnell xfs_ext_irec_t *prev; /* pointer to previous irec */ 42670293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 42680293ce3aSMandy Kirkconnell int erp_idx; /* indirection array index */ 42690293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 42700293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 42710293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 42720293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 42730293ce3aSMandy Kirkconnell 42740293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 42750293ce3aSMandy Kirkconnell ASSERT(page_idx >= 0 && page_idx <= 42760293ce3aSMandy Kirkconnell ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); 42770293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 42780293ce3aSMandy Kirkconnell erp_idx = 0; 42790293ce3aSMandy Kirkconnell low = 0; 42800293ce3aSMandy Kirkconnell high = nlists - 1; 42810293ce3aSMandy Kirkconnell 42820293ce3aSMandy Kirkconnell /* Binary search extent irec's */ 42830293ce3aSMandy Kirkconnell while (low <= high) { 42840293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 42850293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 42860293ce3aSMandy Kirkconnell prev = erp_idx > 0 ? erp - 1 : NULL; 42870293ce3aSMandy Kirkconnell if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 42880293ce3aSMandy Kirkconnell realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 42890293ce3aSMandy Kirkconnell high = erp_idx - 1; 42900293ce3aSMandy Kirkconnell } else if (page_idx > erp->er_extoff + erp->er_extcount || 42910293ce3aSMandy Kirkconnell (page_idx == erp->er_extoff + erp->er_extcount && 42920293ce3aSMandy Kirkconnell !realloc)) { 42930293ce3aSMandy Kirkconnell low = erp_idx + 1; 42940293ce3aSMandy Kirkconnell } else if (page_idx == erp->er_extoff + erp->er_extcount && 42950293ce3aSMandy Kirkconnell erp->er_extcount == XFS_LINEAR_EXTS) { 42960293ce3aSMandy Kirkconnell ASSERT(realloc); 42970293ce3aSMandy Kirkconnell page_idx = 0; 42980293ce3aSMandy Kirkconnell erp_idx++; 42990293ce3aSMandy Kirkconnell erp = erp_idx < nlists ? erp + 1 : NULL; 43000293ce3aSMandy Kirkconnell break; 43010293ce3aSMandy Kirkconnell } else { 43020293ce3aSMandy Kirkconnell page_idx -= erp->er_extoff; 43030293ce3aSMandy Kirkconnell break; 43040293ce3aSMandy Kirkconnell } 43050293ce3aSMandy Kirkconnell } 43060293ce3aSMandy Kirkconnell *idxp = page_idx; 43070293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 43080293ce3aSMandy Kirkconnell return(erp); 43090293ce3aSMandy Kirkconnell } 43100293ce3aSMandy Kirkconnell 43110293ce3aSMandy Kirkconnell /* 43120293ce3aSMandy Kirkconnell * Allocate and initialize an indirection array once the space needed 43130293ce3aSMandy Kirkconnell * for incore extents increases above XFS_IEXT_BUFSZ. 43140293ce3aSMandy Kirkconnell */ 43150293ce3aSMandy Kirkconnell void 43160293ce3aSMandy Kirkconnell xfs_iext_irec_init( 43170293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 43180293ce3aSMandy Kirkconnell { 43190293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 43200293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 43210293ce3aSMandy Kirkconnell 43220293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 43230293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 43240293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 43250293ce3aSMandy Kirkconnell 43266785073bSDavid Chinner erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 43270293ce3aSMandy Kirkconnell 43280293ce3aSMandy Kirkconnell if (nextents == 0) { 43296785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 43300293ce3aSMandy Kirkconnell } else if (!ifp->if_real_bytes) { 43310293ce3aSMandy Kirkconnell xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 43320293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 43330293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 43340293ce3aSMandy Kirkconnell } 43350293ce3aSMandy Kirkconnell erp->er_extbuf = ifp->if_u1.if_extents; 43360293ce3aSMandy Kirkconnell erp->er_extcount = nextents; 43370293ce3aSMandy Kirkconnell erp->er_extoff = 0; 43380293ce3aSMandy Kirkconnell 43390293ce3aSMandy Kirkconnell ifp->if_flags |= XFS_IFEXTIREC; 43400293ce3aSMandy Kirkconnell ifp->if_real_bytes = XFS_IEXT_BUFSZ; 43410293ce3aSMandy Kirkconnell ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 43420293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = erp; 43430293ce3aSMandy Kirkconnell 43440293ce3aSMandy Kirkconnell return; 43450293ce3aSMandy Kirkconnell } 43460293ce3aSMandy Kirkconnell 43470293ce3aSMandy Kirkconnell /* 43480293ce3aSMandy Kirkconnell * Allocate and initialize a new entry in the indirection array. 43490293ce3aSMandy Kirkconnell */ 43500293ce3aSMandy Kirkconnell xfs_ext_irec_t * 43510293ce3aSMandy Kirkconnell xfs_iext_irec_new( 43520293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 43530293ce3aSMandy Kirkconnell int erp_idx) /* index for new irec */ 43540293ce3aSMandy Kirkconnell { 43550293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 43560293ce3aSMandy Kirkconnell int i; /* loop counter */ 43570293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 43580293ce3aSMandy Kirkconnell 43590293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 43600293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 43610293ce3aSMandy Kirkconnell 43620293ce3aSMandy Kirkconnell /* Resize indirection array */ 43630293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, ++nlists * 43640293ce3aSMandy Kirkconnell sizeof(xfs_ext_irec_t)); 43650293ce3aSMandy Kirkconnell /* 43660293ce3aSMandy Kirkconnell * Move records down in the array so the 43670293ce3aSMandy Kirkconnell * new page can use erp_idx. 43680293ce3aSMandy Kirkconnell */ 43690293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 43700293ce3aSMandy Kirkconnell for (i = nlists - 1; i > erp_idx; i--) { 43710293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 43720293ce3aSMandy Kirkconnell } 43730293ce3aSMandy Kirkconnell ASSERT(i == erp_idx); 43740293ce3aSMandy Kirkconnell 43750293ce3aSMandy Kirkconnell /* Initialize new extent record */ 43760293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 43776785073bSDavid Chinner erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 43780293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 43790293ce3aSMandy Kirkconnell memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 43800293ce3aSMandy Kirkconnell erp[erp_idx].er_extcount = 0; 43810293ce3aSMandy Kirkconnell erp[erp_idx].er_extoff = erp_idx > 0 ? 43820293ce3aSMandy Kirkconnell erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 43830293ce3aSMandy Kirkconnell return (&erp[erp_idx]); 43840293ce3aSMandy Kirkconnell } 43850293ce3aSMandy Kirkconnell 43860293ce3aSMandy Kirkconnell /* 43870293ce3aSMandy Kirkconnell * Remove a record from the indirection array. 43880293ce3aSMandy Kirkconnell */ 43890293ce3aSMandy Kirkconnell void 43900293ce3aSMandy Kirkconnell xfs_iext_irec_remove( 43910293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 43920293ce3aSMandy Kirkconnell int erp_idx) /* irec index to remove */ 43930293ce3aSMandy Kirkconnell { 43940293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 43950293ce3aSMandy Kirkconnell int i; /* loop counter */ 43960293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 43970293ce3aSMandy Kirkconnell 43980293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 43990293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 44000293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 44010293ce3aSMandy Kirkconnell if (erp->er_extbuf) { 44020293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 44030293ce3aSMandy Kirkconnell -erp->er_extcount); 4404f0e2d93cSDenys Vlasenko kmem_free(erp->er_extbuf); 44050293ce3aSMandy Kirkconnell } 44060293ce3aSMandy Kirkconnell /* Compact extent records */ 44070293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 44080293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists - 1; i++) { 44090293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 44100293ce3aSMandy Kirkconnell } 44110293ce3aSMandy Kirkconnell /* 44120293ce3aSMandy Kirkconnell * Manually free the last extent record from the indirection 44130293ce3aSMandy Kirkconnell * array. A call to xfs_iext_realloc_indirect() with a size 44140293ce3aSMandy Kirkconnell * of zero would result in a call to xfs_iext_destroy() which 44150293ce3aSMandy Kirkconnell * would in turn call this function again, creating a nasty 44160293ce3aSMandy Kirkconnell * infinite loop. 44170293ce3aSMandy Kirkconnell */ 44180293ce3aSMandy Kirkconnell if (--nlists) { 44190293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, 44200293ce3aSMandy Kirkconnell nlists * sizeof(xfs_ext_irec_t)); 44210293ce3aSMandy Kirkconnell } else { 4422f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 44230293ce3aSMandy Kirkconnell } 44240293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 44250293ce3aSMandy Kirkconnell } 44260293ce3aSMandy Kirkconnell 44270293ce3aSMandy Kirkconnell /* 44280293ce3aSMandy Kirkconnell * This is called to clean up large amounts of unused memory allocated 44290293ce3aSMandy Kirkconnell * by the indirection array. Before compacting anything though, verify 44300293ce3aSMandy Kirkconnell * that the indirection array is still needed and switch back to the 44310293ce3aSMandy Kirkconnell * linear extent list (or even the inline buffer) if possible. The 44320293ce3aSMandy Kirkconnell * compaction policy is as follows: 44330293ce3aSMandy Kirkconnell * 44340293ce3aSMandy Kirkconnell * Full Compaction: Extents fit into a single page (or inline buffer) 443571a8c87fSLachlan McIlroy * Partial Compaction: Extents occupy less than 50% of allocated space 44360293ce3aSMandy Kirkconnell * No Compaction: Extents occupy at least 50% of allocated space 44370293ce3aSMandy Kirkconnell */ 44380293ce3aSMandy Kirkconnell void 44390293ce3aSMandy Kirkconnell xfs_iext_irec_compact( 44400293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 44410293ce3aSMandy Kirkconnell { 44420293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 44430293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 44440293ce3aSMandy Kirkconnell 44450293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 44460293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 44470293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 44480293ce3aSMandy Kirkconnell 44490293ce3aSMandy Kirkconnell if (nextents == 0) { 44500293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 44510293ce3aSMandy Kirkconnell } else if (nextents <= XFS_INLINE_EXTS) { 44520293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 44530293ce3aSMandy Kirkconnell xfs_iext_direct_to_inline(ifp, nextents); 44540293ce3aSMandy Kirkconnell } else if (nextents <= XFS_LINEAR_EXTS) { 44550293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 44560293ce3aSMandy Kirkconnell } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 44570293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(ifp); 44580293ce3aSMandy Kirkconnell } 44590293ce3aSMandy Kirkconnell } 44600293ce3aSMandy Kirkconnell 44610293ce3aSMandy Kirkconnell /* 44620293ce3aSMandy Kirkconnell * Combine extents from neighboring extent pages. 44630293ce3aSMandy Kirkconnell */ 44640293ce3aSMandy Kirkconnell void 44650293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages( 44660293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 44670293ce3aSMandy Kirkconnell { 44680293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 44690293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 44700293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 44710293ce3aSMandy Kirkconnell 44720293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 44730293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 44740293ce3aSMandy Kirkconnell while (erp_idx < nlists - 1) { 44750293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 44760293ce3aSMandy Kirkconnell erp_next = erp + 1; 44770293ce3aSMandy Kirkconnell if (erp_next->er_extcount <= 44780293ce3aSMandy Kirkconnell (XFS_LINEAR_EXTS - erp->er_extcount)) { 447971a8c87fSLachlan McIlroy memcpy(&erp->er_extbuf[erp->er_extcount], 44800293ce3aSMandy Kirkconnell erp_next->er_extbuf, erp_next->er_extcount * 44810293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 44820293ce3aSMandy Kirkconnell erp->er_extcount += erp_next->er_extcount; 44830293ce3aSMandy Kirkconnell /* 44840293ce3aSMandy Kirkconnell * Free page before removing extent record 44850293ce3aSMandy Kirkconnell * so er_extoffs don't get modified in 44860293ce3aSMandy Kirkconnell * xfs_iext_irec_remove. 44870293ce3aSMandy Kirkconnell */ 4488f0e2d93cSDenys Vlasenko kmem_free(erp_next->er_extbuf); 44890293ce3aSMandy Kirkconnell erp_next->er_extbuf = NULL; 44900293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx + 1); 44910293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 44920293ce3aSMandy Kirkconnell } else { 44930293ce3aSMandy Kirkconnell erp_idx++; 44940293ce3aSMandy Kirkconnell } 44950293ce3aSMandy Kirkconnell } 44960293ce3aSMandy Kirkconnell } 44970293ce3aSMandy Kirkconnell 44980293ce3aSMandy Kirkconnell /* 44990293ce3aSMandy Kirkconnell * This is called to update the er_extoff field in the indirection 45000293ce3aSMandy Kirkconnell * array when extents have been added or removed from one of the 45010293ce3aSMandy Kirkconnell * extent lists. erp_idx contains the irec index to begin updating 45020293ce3aSMandy Kirkconnell * at and ext_diff contains the number of extents that were added 45030293ce3aSMandy Kirkconnell * or removed. 45040293ce3aSMandy Kirkconnell */ 45050293ce3aSMandy Kirkconnell void 45060293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs( 45070293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 45080293ce3aSMandy Kirkconnell int erp_idx, /* irec index to update */ 45090293ce3aSMandy Kirkconnell int ext_diff) /* number of new extents */ 45100293ce3aSMandy Kirkconnell { 45110293ce3aSMandy Kirkconnell int i; /* loop counter */ 45120293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists */ 45130293ce3aSMandy Kirkconnell 45140293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 45150293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 45160293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists; i++) { 45170293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 45180293ce3aSMandy Kirkconnell } 45190293ce3aSMandy Kirkconnell } 4520