11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 221da177e4SLinus Torvalds #include "xfs_types.h" 23a844f451SNathan Scott #include "xfs_bit.h" 241da177e4SLinus Torvalds #include "xfs_log.h" 25a844f451SNathan Scott #include "xfs_inum.h" 261da177e4SLinus Torvalds #include "xfs_trans.h" 271da177e4SLinus Torvalds #include "xfs_trans_priv.h" 281da177e4SLinus Torvalds #include "xfs_sb.h" 291da177e4SLinus Torvalds #include "xfs_ag.h" 301da177e4SLinus Torvalds #include "xfs_mount.h" 311da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 32a844f451SNathan Scott #include "xfs_alloc_btree.h" 331da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 34a844f451SNathan Scott #include "xfs_attr_sf.h" 351da177e4SLinus Torvalds #include "xfs_dinode.h" 361da177e4SLinus Torvalds #include "xfs_inode.h" 371da177e4SLinus Torvalds #include "xfs_buf_item.h" 38a844f451SNathan Scott #include "xfs_inode_item.h" 39a844f451SNathan Scott #include "xfs_btree.h" 40a844f451SNathan Scott #include "xfs_alloc.h" 41a844f451SNathan Scott #include "xfs_ialloc.h" 42a844f451SNathan Scott #include "xfs_bmap.h" 431da177e4SLinus Torvalds #include "xfs_error.h" 441da177e4SLinus Torvalds #include "xfs_utils.h" 451da177e4SLinus Torvalds #include "xfs_quota.h" 462a82b8beSDavid Chinner #include "xfs_filestream.h" 47739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h" 480b1b213fSChristoph Hellwig #include "xfs_trace.h" 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 511da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds /* 548f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 551da177e4SLinus Torvalds * freed from a file in a single transaction. 561da177e4SLinus Torvalds */ 571da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 581da177e4SLinus Torvalds 591da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 601da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 611da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 621da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds #ifdef DEBUG 651da177e4SLinus Torvalds /* 661da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 671da177e4SLinus Torvalds * are valid. 681da177e4SLinus Torvalds */ 691da177e4SLinus Torvalds STATIC void 701da177e4SLinus Torvalds xfs_validate_extents( 714eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, 721da177e4SLinus Torvalds int nrecs, 731da177e4SLinus Torvalds xfs_exntfmt_t fmt) 741da177e4SLinus Torvalds { 751da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 76a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t rec; 771da177e4SLinus Torvalds int i; 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 80a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 81a6f64d4aSChristoph Hellwig rec.l0 = get_unaligned(&ep->l0); 82a6f64d4aSChristoph Hellwig rec.l1 = get_unaligned(&ep->l1); 831da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 841da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 851da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 861da177e4SLinus Torvalds } 871da177e4SLinus Torvalds } 881da177e4SLinus Torvalds #else /* DEBUG */ 89a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt) 901da177e4SLinus Torvalds #endif /* DEBUG */ 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds /* 931da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 941da177e4SLinus Torvalds * unlinked field of 0. 951da177e4SLinus Torvalds */ 961da177e4SLinus Torvalds #if defined(DEBUG) 971da177e4SLinus Torvalds void 981da177e4SLinus Torvalds xfs_inobp_check( 991da177e4SLinus Torvalds xfs_mount_t *mp, 1001da177e4SLinus Torvalds xfs_buf_t *bp) 1011da177e4SLinus Torvalds { 1021da177e4SLinus Torvalds int i; 1031da177e4SLinus Torvalds int j; 1041da177e4SLinus Torvalds xfs_dinode_t *dip; 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds for (i = 0; i < j; i++) { 1091da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1101da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 1111da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 11253487786SDave Chinner xfs_alert(mp, 11353487786SDave Chinner "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", 1141da177e4SLinus Torvalds bp); 1151da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 1161da177e4SLinus Torvalds } 1171da177e4SLinus Torvalds } 1181da177e4SLinus Torvalds } 1191da177e4SLinus Torvalds #endif 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds /* 1224ae29b43SDavid Chinner * Find the buffer associated with the given inode map 1234ae29b43SDavid Chinner * We do basic validation checks on the buffer once it has been 1244ae29b43SDavid Chinner * retrieved from disk. 1254ae29b43SDavid Chinner */ 1264ae29b43SDavid Chinner STATIC int 1274ae29b43SDavid Chinner xfs_imap_to_bp( 1284ae29b43SDavid Chinner xfs_mount_t *mp, 1294ae29b43SDavid Chinner xfs_trans_t *tp, 13092bfc6e7SChristoph Hellwig struct xfs_imap *imap, 1314ae29b43SDavid Chinner xfs_buf_t **bpp, 1324ae29b43SDavid Chinner uint buf_flags, 133b48d8d64SChristoph Hellwig uint iget_flags) 1344ae29b43SDavid Chinner { 1354ae29b43SDavid Chinner int error; 1364ae29b43SDavid Chinner int i; 1374ae29b43SDavid Chinner int ni; 1384ae29b43SDavid Chinner xfs_buf_t *bp; 1394ae29b43SDavid Chinner 1404ae29b43SDavid Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 141a3f74ffbSDavid Chinner (int)imap->im_len, buf_flags, &bp); 1424ae29b43SDavid Chinner if (error) { 143a3f74ffbSDavid Chinner if (error != EAGAIN) { 1440b932cccSDave Chinner xfs_warn(mp, 1450b932cccSDave Chinner "%s: xfs_trans_read_buf() returned error %d.", 1460b932cccSDave Chinner __func__, error); 147a3f74ffbSDavid Chinner } else { 1480cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 149a3f74ffbSDavid Chinner } 1504ae29b43SDavid Chinner return error; 1514ae29b43SDavid Chinner } 1524ae29b43SDavid Chinner 1534ae29b43SDavid Chinner /* 1544ae29b43SDavid Chinner * Validate the magic number and version of every inode in the buffer 1554ae29b43SDavid Chinner * (if DEBUG kernel) or the first inode in the buffer, otherwise. 1564ae29b43SDavid Chinner */ 1574ae29b43SDavid Chinner #ifdef DEBUG 1584ae29b43SDavid Chinner ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 1594ae29b43SDavid Chinner #else /* usual case */ 1604ae29b43SDavid Chinner ni = 1; 1614ae29b43SDavid Chinner #endif 1624ae29b43SDavid Chinner 1634ae29b43SDavid Chinner for (i = 0; i < ni; i++) { 1644ae29b43SDavid Chinner int di_ok; 1654ae29b43SDavid Chinner xfs_dinode_t *dip; 1664ae29b43SDavid Chinner 1674ae29b43SDavid Chinner dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1684ae29b43SDavid Chinner (i << mp->m_sb.sb_inodelog)); 16969ef921bSChristoph Hellwig di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 17081591fe2SChristoph Hellwig XFS_DINODE_GOOD_VERSION(dip->di_version); 1714ae29b43SDavid Chinner if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 1724ae29b43SDavid Chinner XFS_ERRTAG_ITOBP_INOTOBP, 1734ae29b43SDavid Chinner XFS_RANDOM_ITOBP_INOTOBP))) { 1741920779eSDave Chinner if (iget_flags & XFS_IGET_UNTRUSTED) { 1754ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1764ae29b43SDavid Chinner return XFS_ERROR(EINVAL); 1774ae29b43SDavid Chinner } 1784ae29b43SDavid Chinner XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 1794ae29b43SDavid Chinner XFS_ERRLEVEL_HIGH, mp, dip); 1804ae29b43SDavid Chinner #ifdef DEBUG 1810b932cccSDave Chinner xfs_emerg(mp, 1820b932cccSDave Chinner "bad inode magic/vsn daddr %lld #%d (magic=%x)", 1834ae29b43SDavid Chinner (unsigned long long)imap->im_blkno, i, 18481591fe2SChristoph Hellwig be16_to_cpu(dip->di_magic)); 1850b932cccSDave Chinner ASSERT(0); 1864ae29b43SDavid Chinner #endif 1874ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1884ae29b43SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 1894ae29b43SDavid Chinner } 1904ae29b43SDavid Chinner } 1914ae29b43SDavid Chinner 1924ae29b43SDavid Chinner xfs_inobp_check(mp, bp); 1934ae29b43SDavid Chinner *bpp = bp; 1944ae29b43SDavid Chinner return 0; 1954ae29b43SDavid Chinner } 1964ae29b43SDavid Chinner 1974ae29b43SDavid Chinner /* 1981da177e4SLinus Torvalds * This routine is called to map an inode number within a file 1991da177e4SLinus Torvalds * system to the buffer containing the on-disk version of the 2001da177e4SLinus Torvalds * inode. It returns a pointer to the buffer containing the 2011da177e4SLinus Torvalds * on-disk inode in the bpp parameter, and in the dip parameter 2021da177e4SLinus Torvalds * it returns a pointer to the on-disk inode within that buffer. 2031da177e4SLinus Torvalds * 2041da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2051da177e4SLinus Torvalds * dipp are undefined. 2061da177e4SLinus Torvalds * 2071da177e4SLinus Torvalds * Use xfs_imap() to determine the size and location of the 2081da177e4SLinus Torvalds * buffer to read from disk. 2091da177e4SLinus Torvalds */ 210c679eef0SChristoph Hellwig int 2111da177e4SLinus Torvalds xfs_inotobp( 2121da177e4SLinus Torvalds xfs_mount_t *mp, 2131da177e4SLinus Torvalds xfs_trans_t *tp, 2141da177e4SLinus Torvalds xfs_ino_t ino, 2151da177e4SLinus Torvalds xfs_dinode_t **dipp, 2161da177e4SLinus Torvalds xfs_buf_t **bpp, 217c679eef0SChristoph Hellwig int *offset, 218c679eef0SChristoph Hellwig uint imap_flags) 2191da177e4SLinus Torvalds { 22092bfc6e7SChristoph Hellwig struct xfs_imap imap; 2211da177e4SLinus Torvalds xfs_buf_t *bp; 2221da177e4SLinus Torvalds int error; 2231da177e4SLinus Torvalds 2241da177e4SLinus Torvalds imap.im_blkno = 0; 225a1941895SChristoph Hellwig error = xfs_imap(mp, tp, ino, &imap, imap_flags); 2264ae29b43SDavid Chinner if (error) 2271da177e4SLinus Torvalds return error; 2281da177e4SLinus Torvalds 2290cadda1cSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); 2304ae29b43SDavid Chinner if (error) 2311da177e4SLinus Torvalds return error; 2321da177e4SLinus Torvalds 2331da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2341da177e4SLinus Torvalds *bpp = bp; 2351da177e4SLinus Torvalds *offset = imap.im_boffset; 2361da177e4SLinus Torvalds return 0; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds /* 2411da177e4SLinus Torvalds * This routine is called to map an inode to the buffer containing 2421da177e4SLinus Torvalds * the on-disk version of the inode. It returns a pointer to the 2431da177e4SLinus Torvalds * buffer containing the on-disk inode in the bpp parameter, and in 2441da177e4SLinus Torvalds * the dip parameter it returns a pointer to the on-disk inode within 2451da177e4SLinus Torvalds * that buffer. 2461da177e4SLinus Torvalds * 2471da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2481da177e4SLinus Torvalds * dipp are undefined. 2491da177e4SLinus Torvalds * 25076d8b277SChristoph Hellwig * The inode is expected to already been mapped to its buffer and read 25176d8b277SChristoph Hellwig * in once, thus we can use the mapping information stored in the inode 25276d8b277SChristoph Hellwig * rather than calling xfs_imap(). This allows us to avoid the overhead 25376d8b277SChristoph Hellwig * of looking at the inode btree for small block file systems 25494e1b69dSChristoph Hellwig * (see xfs_imap()). 2551da177e4SLinus Torvalds */ 2561da177e4SLinus Torvalds int 2571da177e4SLinus Torvalds xfs_itobp( 2581da177e4SLinus Torvalds xfs_mount_t *mp, 2591da177e4SLinus Torvalds xfs_trans_t *tp, 2601da177e4SLinus Torvalds xfs_inode_t *ip, 2611da177e4SLinus Torvalds xfs_dinode_t **dipp, 2621da177e4SLinus Torvalds xfs_buf_t **bpp, 263a3f74ffbSDavid Chinner uint buf_flags) 2641da177e4SLinus Torvalds { 2651da177e4SLinus Torvalds xfs_buf_t *bp; 2661da177e4SLinus Torvalds int error; 2671da177e4SLinus Torvalds 26892bfc6e7SChristoph Hellwig ASSERT(ip->i_imap.im_blkno != 0); 2691da177e4SLinus Torvalds 27092bfc6e7SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); 2714ae29b43SDavid Chinner if (error) 2721da177e4SLinus Torvalds return error; 2734d1a2ed3SNathan Scott 274a3f74ffbSDavid Chinner if (!bp) { 2750cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 276a3f74ffbSDavid Chinner ASSERT(tp == NULL); 277a3f74ffbSDavid Chinner *bpp = NULL; 278a3f74ffbSDavid Chinner return EAGAIN; 279a3f74ffbSDavid Chinner } 280a3f74ffbSDavid Chinner 28192bfc6e7SChristoph Hellwig *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2821da177e4SLinus Torvalds *bpp = bp; 2831da177e4SLinus Torvalds return 0; 2841da177e4SLinus Torvalds } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds /* 2871da177e4SLinus Torvalds * Move inode type and inode format specific information from the 2881da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 2891da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 2901da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 2911da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 2921da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 2931da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 2941da177e4SLinus Torvalds */ 2951da177e4SLinus Torvalds STATIC int 2961da177e4SLinus Torvalds xfs_iformat( 2971da177e4SLinus Torvalds xfs_inode_t *ip, 2981da177e4SLinus Torvalds xfs_dinode_t *dip) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 3011da177e4SLinus Torvalds int size; 3021da177e4SLinus Torvalds int error; 3031da177e4SLinus Torvalds xfs_fsize_t di_size; 3041da177e4SLinus Torvalds ip->i_df.if_ext_max = 3051da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 3061da177e4SLinus Torvalds error = 0; 3071da177e4SLinus Torvalds 30881591fe2SChristoph Hellwig if (unlikely(be32_to_cpu(dip->di_nextents) + 30981591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents) > 31081591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks))) { 31165333b4cSDave Chinner xfs_warn(ip->i_mount, 3123762ec6bSNathan Scott "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 3131da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 31481591fe2SChristoph Hellwig (int)(be32_to_cpu(dip->di_nextents) + 31581591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents)), 3161da177e4SLinus Torvalds (unsigned long long) 31781591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks)); 3181da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 3191da177e4SLinus Torvalds ip->i_mount, dip); 3201da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3211da177e4SLinus Torvalds } 3221da177e4SLinus Torvalds 32381591fe2SChristoph Hellwig if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 32465333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 3251da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 32681591fe2SChristoph Hellwig dip->di_forkoff); 3271da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 3281da177e4SLinus Torvalds ip->i_mount, dip); 3291da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds 332b89d4208SChristoph Hellwig if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 333b89d4208SChristoph Hellwig !ip->i_mount->m_rtdev_targp)) { 33465333b4cSDave Chinner xfs_warn(ip->i_mount, 335b89d4208SChristoph Hellwig "corrupt dinode %Lu, has realtime flag set.", 336b89d4208SChristoph Hellwig ip->i_ino); 337b89d4208SChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 338b89d4208SChristoph Hellwig XFS_ERRLEVEL_LOW, ip->i_mount, dip); 339b89d4208SChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 340b89d4208SChristoph Hellwig } 341b89d4208SChristoph Hellwig 3421da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 3431da177e4SLinus Torvalds case S_IFIFO: 3441da177e4SLinus Torvalds case S_IFCHR: 3451da177e4SLinus Torvalds case S_IFBLK: 3461da177e4SLinus Torvalds case S_IFSOCK: 34781591fe2SChristoph Hellwig if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 3481da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 3491da177e4SLinus Torvalds ip->i_mount, dip); 3501da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds ip->i_d.di_size = 0; 353ba87ea69SLachlan McIlroy ip->i_size = 0; 35481591fe2SChristoph Hellwig ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 3551da177e4SLinus Torvalds break; 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds case S_IFREG: 3581da177e4SLinus Torvalds case S_IFLNK: 3591da177e4SLinus Torvalds case S_IFDIR: 36081591fe2SChristoph Hellwig switch (dip->di_format) { 3611da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 3621da177e4SLinus Torvalds /* 3631da177e4SLinus Torvalds * no local regular files yet 3641da177e4SLinus Torvalds */ 365abbede1bSAl Viro if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { 36665333b4cSDave Chinner xfs_warn(ip->i_mount, 36765333b4cSDave Chinner "corrupt inode %Lu (local format for regular file).", 3681da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 3691da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 3701da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 3711da177e4SLinus Torvalds ip->i_mount, dip); 3721da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds 37581591fe2SChristoph Hellwig di_size = be64_to_cpu(dip->di_size); 3761da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 37765333b4cSDave Chinner xfs_warn(ip->i_mount, 37865333b4cSDave Chinner "corrupt inode %Lu (bad size %Ld for local inode).", 3791da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 3801da177e4SLinus Torvalds (long long) di_size); 3811da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 3821da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 3831da177e4SLinus Torvalds ip->i_mount, dip); 3841da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds 3871da177e4SLinus Torvalds size = (int)di_size; 3881da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 3891da177e4SLinus Torvalds break; 3901da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 3911da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 3921da177e4SLinus Torvalds break; 3931da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 3941da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 3951da177e4SLinus Torvalds break; 3961da177e4SLinus Torvalds default: 3971da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 3981da177e4SLinus Torvalds ip->i_mount); 3991da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4001da177e4SLinus Torvalds } 4011da177e4SLinus Torvalds break; 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds default: 4041da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 4051da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds if (error) { 4081da177e4SLinus Torvalds return error; 4091da177e4SLinus Torvalds } 4101da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 4111da177e4SLinus Torvalds return 0; 4121da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 4134a7edddcSDave Chinner ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); 4141da177e4SLinus Torvalds ip->i_afp->if_ext_max = 4151da177e4SLinus Torvalds XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 41681591fe2SChristoph Hellwig switch (dip->di_aformat) { 4171da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 4181da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 4193b244aa8SNathan Scott size = be16_to_cpu(atp->hdr.totsize); 4202809f76aSChristoph Hellwig 4212809f76aSChristoph Hellwig if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 42265333b4cSDave Chinner xfs_warn(ip->i_mount, 42365333b4cSDave Chinner "corrupt inode %Lu (bad attr fork size %Ld).", 4242809f76aSChristoph Hellwig (unsigned long long) ip->i_ino, 4252809f76aSChristoph Hellwig (long long) size); 4262809f76aSChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(8)", 4272809f76aSChristoph Hellwig XFS_ERRLEVEL_LOW, 4282809f76aSChristoph Hellwig ip->i_mount, dip); 4292809f76aSChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 4302809f76aSChristoph Hellwig } 4312809f76aSChristoph Hellwig 4321da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 4331da177e4SLinus Torvalds break; 4341da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 4351da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 4361da177e4SLinus Torvalds break; 4371da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 4381da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 4391da177e4SLinus Torvalds break; 4401da177e4SLinus Torvalds default: 4411da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 4421da177e4SLinus Torvalds break; 4431da177e4SLinus Torvalds } 4441da177e4SLinus Torvalds if (error) { 4451da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 4461da177e4SLinus Torvalds ip->i_afp = NULL; 4471da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 4481da177e4SLinus Torvalds } 4491da177e4SLinus Torvalds return error; 4501da177e4SLinus Torvalds } 4511da177e4SLinus Torvalds 4521da177e4SLinus Torvalds /* 4531da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 4541da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 4551da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 4561da177e4SLinus Torvalds * and copy the data there. Either way, set 4571da177e4SLinus Torvalds * if_data to point at the data. 4581da177e4SLinus Torvalds * If we allocate a buffer for the data, make 4591da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 4601da177e4SLinus Torvalds * record the real size in i_real_bytes. 4611da177e4SLinus Torvalds */ 4621da177e4SLinus Torvalds STATIC int 4631da177e4SLinus Torvalds xfs_iformat_local( 4641da177e4SLinus Torvalds xfs_inode_t *ip, 4651da177e4SLinus Torvalds xfs_dinode_t *dip, 4661da177e4SLinus Torvalds int whichfork, 4671da177e4SLinus Torvalds int size) 4681da177e4SLinus Torvalds { 4691da177e4SLinus Torvalds xfs_ifork_t *ifp; 4701da177e4SLinus Torvalds int real_size; 4711da177e4SLinus Torvalds 4721da177e4SLinus Torvalds /* 4731da177e4SLinus Torvalds * If the size is unreasonable, then something 4741da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 4751da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 4761da177e4SLinus Torvalds */ 4771da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 47865333b4cSDave Chinner xfs_warn(ip->i_mount, 47965333b4cSDave Chinner "corrupt inode %Lu (bad size %d for local fork, size = %d).", 4801da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 4811da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 4821da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 4831da177e4SLinus Torvalds ip->i_mount, dip); 4841da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 4871da177e4SLinus Torvalds real_size = 0; 4881da177e4SLinus Torvalds if (size == 0) 4891da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 4901da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 4911da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 4921da177e4SLinus Torvalds else { 4931da177e4SLinus Torvalds real_size = roundup(size, 4); 4944a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 4951da177e4SLinus Torvalds } 4961da177e4SLinus Torvalds ifp->if_bytes = size; 4971da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 4981da177e4SLinus Torvalds if (size) 4991da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 5001da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 5011da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 5021da177e4SLinus Torvalds return 0; 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds /* 5061da177e4SLinus Torvalds * The file consists of a set of extents all 5071da177e4SLinus Torvalds * of which fit into the on-disk inode. 5081da177e4SLinus Torvalds * If there are few enough extents to fit into 5091da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 5101da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 5111da177e4SLinus Torvalds * them into it. Either way, set if_extents 5121da177e4SLinus Torvalds * to point at the extents. 5131da177e4SLinus Torvalds */ 5141da177e4SLinus Torvalds STATIC int 5151da177e4SLinus Torvalds xfs_iformat_extents( 5161da177e4SLinus Torvalds xfs_inode_t *ip, 5171da177e4SLinus Torvalds xfs_dinode_t *dip, 5181da177e4SLinus Torvalds int whichfork) 5191da177e4SLinus Torvalds { 520a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp; 5211da177e4SLinus Torvalds xfs_ifork_t *ifp; 5221da177e4SLinus Torvalds int nex; 5231da177e4SLinus Torvalds int size; 5241da177e4SLinus Torvalds int i; 5251da177e4SLinus Torvalds 5261da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 5271da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 5281da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds /* 5311da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 5321da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 5331da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 5341da177e4SLinus Torvalds */ 5351da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 53665333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", 5371da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 5381da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 5391da177e4SLinus Torvalds ip->i_mount, dip); 5401da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5411da177e4SLinus Torvalds } 5421da177e4SLinus Torvalds 5434eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 5441da177e4SLinus Torvalds if (nex == 0) 5451da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 5461da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 5471da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 5484eea22f0SMandy Kirkconnell else 5494eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nex); 5504eea22f0SMandy Kirkconnell 5511da177e4SLinus Torvalds ifp->if_bytes = size; 5521da177e4SLinus Torvalds if (size) { 5531da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 554a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 5554eea22f0SMandy Kirkconnell for (i = 0; i < nex; i++, dp++) { 556a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 557597bca63SHarvey Harrison ep->l0 = get_unaligned_be64(&dp->l0); 558597bca63SHarvey Harrison ep->l1 = get_unaligned_be64(&dp->l1); 5591da177e4SLinus Torvalds } 5603a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 5611da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 5621da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 5631da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 5644eea22f0SMandy Kirkconnell ifp, 0, nex))) { 5651da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 5661da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5671da177e4SLinus Torvalds ip->i_mount); 5681da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds } 5711da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 5721da177e4SLinus Torvalds return 0; 5731da177e4SLinus Torvalds } 5741da177e4SLinus Torvalds 5751da177e4SLinus Torvalds /* 5761da177e4SLinus Torvalds * The file has too many extents to fit into 5771da177e4SLinus Torvalds * the inode, so they are in B-tree format. 5781da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 5791da177e4SLinus Torvalds * and copy the root into it. The i_extents 5801da177e4SLinus Torvalds * field will remain NULL until all of the 5811da177e4SLinus Torvalds * extents are read in (when they are needed). 5821da177e4SLinus Torvalds */ 5831da177e4SLinus Torvalds STATIC int 5841da177e4SLinus Torvalds xfs_iformat_btree( 5851da177e4SLinus Torvalds xfs_inode_t *ip, 5861da177e4SLinus Torvalds xfs_dinode_t *dip, 5871da177e4SLinus Torvalds int whichfork) 5881da177e4SLinus Torvalds { 5891da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 5901da177e4SLinus Torvalds xfs_ifork_t *ifp; 5911da177e4SLinus Torvalds /* REFERENCED */ 5921da177e4SLinus Torvalds int nrecs; 5931da177e4SLinus Torvalds int size; 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 5961da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 5971da177e4SLinus Torvalds size = XFS_BMAP_BROOT_SPACE(dfp); 59860197e8dSChristoph Hellwig nrecs = be16_to_cpu(dfp->bb_numrecs); 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds /* 6011da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 6021da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 6031da177e4SLinus Torvalds * block has more records than can fit into the fork, 6041da177e4SLinus Torvalds * or the number of extents is greater than the number of 6051da177e4SLinus Torvalds * blocks. 6061da177e4SLinus Torvalds */ 6071da177e4SLinus Torvalds if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 6081da177e4SLinus Torvalds || XFS_BMDR_SPACE_CALC(nrecs) > 6091da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 6101da177e4SLinus Torvalds || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 61165333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 6121da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 61365333b4cSDave Chinner XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 61465333b4cSDave Chinner ip->i_mount, dip); 6151da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6161da177e4SLinus Torvalds } 6171da177e4SLinus Torvalds 6181da177e4SLinus Torvalds ifp->if_broot_bytes = size; 6194a7edddcSDave Chinner ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); 6201da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 6211da177e4SLinus Torvalds /* 6221da177e4SLinus Torvalds * Copy and convert from the on-disk structure 6231da177e4SLinus Torvalds * to the in-memory structure. 6241da177e4SLinus Torvalds */ 62560197e8dSChristoph Hellwig xfs_bmdr_to_bmbt(ip->i_mount, dfp, 62660197e8dSChristoph Hellwig XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 6271da177e4SLinus Torvalds ifp->if_broot, size); 6281da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 6291da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 6301da177e4SLinus Torvalds 6311da177e4SLinus Torvalds return 0; 6321da177e4SLinus Torvalds } 6331da177e4SLinus Torvalds 634d96f8f89SEric Sandeen STATIC void 635347d1c01SChristoph Hellwig xfs_dinode_from_disk( 636347d1c01SChristoph Hellwig xfs_icdinode_t *to, 63781591fe2SChristoph Hellwig xfs_dinode_t *from) 6381da177e4SLinus Torvalds { 639347d1c01SChristoph Hellwig to->di_magic = be16_to_cpu(from->di_magic); 640347d1c01SChristoph Hellwig to->di_mode = be16_to_cpu(from->di_mode); 641347d1c01SChristoph Hellwig to->di_version = from ->di_version; 642347d1c01SChristoph Hellwig to->di_format = from->di_format; 643347d1c01SChristoph Hellwig to->di_onlink = be16_to_cpu(from->di_onlink); 644347d1c01SChristoph Hellwig to->di_uid = be32_to_cpu(from->di_uid); 645347d1c01SChristoph Hellwig to->di_gid = be32_to_cpu(from->di_gid); 646347d1c01SChristoph Hellwig to->di_nlink = be32_to_cpu(from->di_nlink); 6476743099cSArkadiusz Mi?kiewicz to->di_projid_lo = be16_to_cpu(from->di_projid_lo); 6486743099cSArkadiusz Mi?kiewicz to->di_projid_hi = be16_to_cpu(from->di_projid_hi); 649347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 650347d1c01SChristoph Hellwig to->di_flushiter = be16_to_cpu(from->di_flushiter); 651347d1c01SChristoph Hellwig to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 652347d1c01SChristoph Hellwig to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 653347d1c01SChristoph Hellwig to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 654347d1c01SChristoph Hellwig to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 655347d1c01SChristoph Hellwig to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 656347d1c01SChristoph Hellwig to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 657347d1c01SChristoph Hellwig to->di_size = be64_to_cpu(from->di_size); 658347d1c01SChristoph Hellwig to->di_nblocks = be64_to_cpu(from->di_nblocks); 659347d1c01SChristoph Hellwig to->di_extsize = be32_to_cpu(from->di_extsize); 660347d1c01SChristoph Hellwig to->di_nextents = be32_to_cpu(from->di_nextents); 661347d1c01SChristoph Hellwig to->di_anextents = be16_to_cpu(from->di_anextents); 662347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 663347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 664347d1c01SChristoph Hellwig to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 665347d1c01SChristoph Hellwig to->di_dmstate = be16_to_cpu(from->di_dmstate); 666347d1c01SChristoph Hellwig to->di_flags = be16_to_cpu(from->di_flags); 667347d1c01SChristoph Hellwig to->di_gen = be32_to_cpu(from->di_gen); 6681da177e4SLinus Torvalds } 6691da177e4SLinus Torvalds 670347d1c01SChristoph Hellwig void 671347d1c01SChristoph Hellwig xfs_dinode_to_disk( 67281591fe2SChristoph Hellwig xfs_dinode_t *to, 673347d1c01SChristoph Hellwig xfs_icdinode_t *from) 674347d1c01SChristoph Hellwig { 675347d1c01SChristoph Hellwig to->di_magic = cpu_to_be16(from->di_magic); 676347d1c01SChristoph Hellwig to->di_mode = cpu_to_be16(from->di_mode); 677347d1c01SChristoph Hellwig to->di_version = from ->di_version; 678347d1c01SChristoph Hellwig to->di_format = from->di_format; 679347d1c01SChristoph Hellwig to->di_onlink = cpu_to_be16(from->di_onlink); 680347d1c01SChristoph Hellwig to->di_uid = cpu_to_be32(from->di_uid); 681347d1c01SChristoph Hellwig to->di_gid = cpu_to_be32(from->di_gid); 682347d1c01SChristoph Hellwig to->di_nlink = cpu_to_be32(from->di_nlink); 6836743099cSArkadiusz Mi?kiewicz to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 6846743099cSArkadiusz Mi?kiewicz to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 685347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 686347d1c01SChristoph Hellwig to->di_flushiter = cpu_to_be16(from->di_flushiter); 687347d1c01SChristoph Hellwig to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 688347d1c01SChristoph Hellwig to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 689347d1c01SChristoph Hellwig to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 690347d1c01SChristoph Hellwig to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 691347d1c01SChristoph Hellwig to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 692347d1c01SChristoph Hellwig to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 693347d1c01SChristoph Hellwig to->di_size = cpu_to_be64(from->di_size); 694347d1c01SChristoph Hellwig to->di_nblocks = cpu_to_be64(from->di_nblocks); 695347d1c01SChristoph Hellwig to->di_extsize = cpu_to_be32(from->di_extsize); 696347d1c01SChristoph Hellwig to->di_nextents = cpu_to_be32(from->di_nextents); 697347d1c01SChristoph Hellwig to->di_anextents = cpu_to_be16(from->di_anextents); 698347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 699347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 700347d1c01SChristoph Hellwig to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 701347d1c01SChristoph Hellwig to->di_dmstate = cpu_to_be16(from->di_dmstate); 702347d1c01SChristoph Hellwig to->di_flags = cpu_to_be16(from->di_flags); 703347d1c01SChristoph Hellwig to->di_gen = cpu_to_be32(from->di_gen); 7041da177e4SLinus Torvalds } 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds STATIC uint 7071da177e4SLinus Torvalds _xfs_dic2xflags( 7081da177e4SLinus Torvalds __uint16_t di_flags) 7091da177e4SLinus Torvalds { 7101da177e4SLinus Torvalds uint flags = 0; 7111da177e4SLinus Torvalds 7121da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 7131da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 7141da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 7151da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 7161da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 7171da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 7181da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 7191da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 7201da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 7211da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 7221da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 7231da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 7241da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 7251da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 7261da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 7271da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 7281da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 7291da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 7301da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 7311da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 7321da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 733dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 734dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 735dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 736dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 737d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 738d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 7392a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 7402a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 7411da177e4SLinus Torvalds } 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds return flags; 7441da177e4SLinus Torvalds } 7451da177e4SLinus Torvalds 7461da177e4SLinus Torvalds uint 7471da177e4SLinus Torvalds xfs_ip2xflags( 7481da177e4SLinus Torvalds xfs_inode_t *ip) 7491da177e4SLinus Torvalds { 750347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 7511da177e4SLinus Torvalds 752a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 75345ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 7541da177e4SLinus Torvalds } 7551da177e4SLinus Torvalds 7561da177e4SLinus Torvalds uint 7571da177e4SLinus Torvalds xfs_dic2xflags( 75845ba598eSChristoph Hellwig xfs_dinode_t *dip) 7591da177e4SLinus Torvalds { 76081591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 76145ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 7621da177e4SLinus Torvalds } 7631da177e4SLinus Torvalds 7641da177e4SLinus Torvalds /* 76524f211baSChristoph Hellwig * Read the disk inode attributes into the in-core inode structure. 7661da177e4SLinus Torvalds */ 7671da177e4SLinus Torvalds int 7681da177e4SLinus Torvalds xfs_iread( 7691da177e4SLinus Torvalds xfs_mount_t *mp, 7701da177e4SLinus Torvalds xfs_trans_t *tp, 77124f211baSChristoph Hellwig xfs_inode_t *ip, 77224f211baSChristoph Hellwig uint iget_flags) 7731da177e4SLinus Torvalds { 7741da177e4SLinus Torvalds xfs_buf_t *bp; 7751da177e4SLinus Torvalds xfs_dinode_t *dip; 7761da177e4SLinus Torvalds int error; 7771da177e4SLinus Torvalds 7781da177e4SLinus Torvalds /* 77992bfc6e7SChristoph Hellwig * Fill in the location information in the in-core inode. 7801da177e4SLinus Torvalds */ 78124f211baSChristoph Hellwig error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 7829ed0451eSChristoph Hellwig if (error) 78324f211baSChristoph Hellwig return error; 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds /* 78692bfc6e7SChristoph Hellwig * Get pointers to the on-disk inode and the buffer containing it. 78776d8b277SChristoph Hellwig */ 78892bfc6e7SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 7890cadda1cSChristoph Hellwig XBF_LOCK, iget_flags); 79076d8b277SChristoph Hellwig if (error) 79124f211baSChristoph Hellwig return error; 79292bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 79376d8b277SChristoph Hellwig 79476d8b277SChristoph Hellwig /* 7951da177e4SLinus Torvalds * If we got something that isn't an inode it means someone 7961da177e4SLinus Torvalds * (nfs or dmi) has a stale handle. 7971da177e4SLinus Torvalds */ 79869ef921bSChristoph Hellwig if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { 7991da177e4SLinus Torvalds #ifdef DEBUG 80053487786SDave Chinner xfs_alert(mp, 80153487786SDave Chinner "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", 80253487786SDave Chinner __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); 8031da177e4SLinus Torvalds #endif /* DEBUG */ 8049ed0451eSChristoph Hellwig error = XFS_ERROR(EINVAL); 8059ed0451eSChristoph Hellwig goto out_brelse; 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds /* 8091da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 8101da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 8111da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 8121da177e4SLinus Torvalds * specific information. 8131da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 8141da177e4SLinus Torvalds */ 81581591fe2SChristoph Hellwig if (dip->di_mode) { 81681591fe2SChristoph Hellwig xfs_dinode_from_disk(&ip->i_d, dip); 8171da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 8181da177e4SLinus Torvalds if (error) { 8191da177e4SLinus Torvalds #ifdef DEBUG 82053487786SDave Chinner xfs_alert(mp, "%s: xfs_iformat() returned error %d", 82153487786SDave Chinner __func__, error); 8221da177e4SLinus Torvalds #endif /* DEBUG */ 8239ed0451eSChristoph Hellwig goto out_brelse; 8241da177e4SLinus Torvalds } 8251da177e4SLinus Torvalds } else { 82681591fe2SChristoph Hellwig ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 82781591fe2SChristoph Hellwig ip->i_d.di_version = dip->di_version; 82881591fe2SChristoph Hellwig ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 82981591fe2SChristoph Hellwig ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 8301da177e4SLinus Torvalds /* 8311da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 8321da177e4SLinus Torvalds * case the inode is released without being used. 8331da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 8341da177e4SLinus Torvalds * the inode is already free and not try to mess 8351da177e4SLinus Torvalds * with the uninitialized part of it. 8361da177e4SLinus Torvalds */ 8371da177e4SLinus Torvalds ip->i_d.di_mode = 0; 8381da177e4SLinus Torvalds /* 8391da177e4SLinus Torvalds * Initialize the per-fork minima and maxima for a new 8401da177e4SLinus Torvalds * inode here. xfs_iformat will do it for old inodes. 8411da177e4SLinus Torvalds */ 8421da177e4SLinus Torvalds ip->i_df.if_ext_max = 8431da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 8441da177e4SLinus Torvalds } 8451da177e4SLinus Torvalds 8461da177e4SLinus Torvalds /* 8471da177e4SLinus Torvalds * The inode format changed when we moved the link count and 8481da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 8491da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 8501da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 8511da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 8521da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 8531da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 8541da177e4SLinus Torvalds * the new format. We don't change the version number so that we 8551da177e4SLinus Torvalds * can distinguish this from a real new format inode. 8561da177e4SLinus Torvalds */ 85751ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 8581da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 8591da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 8606743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, 0); 8611da177e4SLinus Torvalds } 8621da177e4SLinus Torvalds 8631da177e4SLinus Torvalds ip->i_delayed_blks = 0; 864ba87ea69SLachlan McIlroy ip->i_size = ip->i_d.di_size; 8651da177e4SLinus Torvalds 8661da177e4SLinus Torvalds /* 8671da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 8681da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 8691da177e4SLinus Torvalds * meta-data in-core longer. 8701da177e4SLinus Torvalds */ 871821eb21dSDave Chinner xfs_buf_set_ref(bp, XFS_INO_REF); 8721da177e4SLinus Torvalds 8731da177e4SLinus Torvalds /* 8741da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 8751da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 8761da177e4SLinus Torvalds * in xfs_itobp() above. If tp is NULL, this is just a normal 8771da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 8781da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 8791da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 8801da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 8811da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 8821da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 8831da177e4SLinus Torvalds * to worry about the inode being changed just because we released 8841da177e4SLinus Torvalds * the buffer. 8851da177e4SLinus Torvalds */ 8869ed0451eSChristoph Hellwig out_brelse: 8879ed0451eSChristoph Hellwig xfs_trans_brelse(tp, bp); 8889ed0451eSChristoph Hellwig return error; 8891da177e4SLinus Torvalds } 8901da177e4SLinus Torvalds 8911da177e4SLinus Torvalds /* 8921da177e4SLinus Torvalds * Read in extents from a btree-format inode. 8931da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 8941da177e4SLinus Torvalds */ 8951da177e4SLinus Torvalds int 8961da177e4SLinus Torvalds xfs_iread_extents( 8971da177e4SLinus Torvalds xfs_trans_t *tp, 8981da177e4SLinus Torvalds xfs_inode_t *ip, 8991da177e4SLinus Torvalds int whichfork) 9001da177e4SLinus Torvalds { 9011da177e4SLinus Torvalds int error; 9021da177e4SLinus Torvalds xfs_ifork_t *ifp; 9034eea22f0SMandy Kirkconnell xfs_extnum_t nextents; 9041da177e4SLinus Torvalds 9051da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 9061da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 9071da177e4SLinus Torvalds ip->i_mount); 9081da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 9091da177e4SLinus Torvalds } 9104eea22f0SMandy Kirkconnell nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 9111da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 9124eea22f0SMandy Kirkconnell 9131da177e4SLinus Torvalds /* 9141da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 9151da177e4SLinus Torvalds */ 9164eea22f0SMandy Kirkconnell ifp->if_bytes = ifp->if_real_bytes = 0; 9171da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 9184eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nextents); 9191da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 9201da177e4SLinus Torvalds if (error) { 9214eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 9221da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 9231da177e4SLinus Torvalds return error; 9241da177e4SLinus Torvalds } 925a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 9261da177e4SLinus Torvalds return 0; 9271da177e4SLinus Torvalds } 9281da177e4SLinus Torvalds 9291da177e4SLinus Torvalds /* 9301da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 9311da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 9321da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 9331da177e4SLinus Torvalds * set according to the contents of the given cred structure. 9341da177e4SLinus Torvalds * 9351da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 9361da177e4SLinus Torvalds * has a free inode available, call xfs_iget() 9371da177e4SLinus Torvalds * to obtain the in-core version of the allocated inode. Finally, 9381da177e4SLinus Torvalds * fill in the inode and log its initial contents. In this case, 9391da177e4SLinus Torvalds * ialloc_context would be set to NULL and call_again set to false. 9401da177e4SLinus Torvalds * 9411da177e4SLinus Torvalds * If xfs_dialloc() does not have an available inode, 9421da177e4SLinus Torvalds * it will replenish its supply by doing an allocation. Since we can 9431da177e4SLinus Torvalds * only do one allocation within a transaction without deadlocks, we 9441da177e4SLinus Torvalds * must commit the current transaction before returning the inode itself. 9451da177e4SLinus Torvalds * In this case, therefore, we will set call_again to true and return. 9461da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 9471da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 9481da177e4SLinus Torvalds * 9491da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 9501da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 9511da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 9521da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 9531da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 954b11f94d5SDavid Chinner * 955b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 956b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 957b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 958b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 9591da177e4SLinus Torvalds */ 9601da177e4SLinus Torvalds int 9611da177e4SLinus Torvalds xfs_ialloc( 9621da177e4SLinus Torvalds xfs_trans_t *tp, 9631da177e4SLinus Torvalds xfs_inode_t *pip, 964576b1d67SAl Viro umode_t mode, 96531b084aeSNathan Scott xfs_nlink_t nlink, 9661da177e4SLinus Torvalds xfs_dev_t rdev, 9676743099cSArkadiusz Mi?kiewicz prid_t prid, 9681da177e4SLinus Torvalds int okalloc, 9691da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 9701da177e4SLinus Torvalds boolean_t *call_again, 9711da177e4SLinus Torvalds xfs_inode_t **ipp) 9721da177e4SLinus Torvalds { 9731da177e4SLinus Torvalds xfs_ino_t ino; 9741da177e4SLinus Torvalds xfs_inode_t *ip; 9751da177e4SLinus Torvalds uint flags; 9761da177e4SLinus Torvalds int error; 977dff35fd4SChristoph Hellwig timespec_t tv; 978bf904248SDavid Chinner int filestreams = 0; 9791da177e4SLinus Torvalds 9801da177e4SLinus Torvalds /* 9811da177e4SLinus Torvalds * Call the space management code to pick 9821da177e4SLinus Torvalds * the on-disk inode to be allocated. 9831da177e4SLinus Torvalds */ 984b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 9851da177e4SLinus Torvalds ialloc_context, call_again, &ino); 986bf904248SDavid Chinner if (error) 9871da177e4SLinus Torvalds return error; 9881da177e4SLinus Torvalds if (*call_again || ino == NULLFSINO) { 9891da177e4SLinus Torvalds *ipp = NULL; 9901da177e4SLinus Torvalds return 0; 9911da177e4SLinus Torvalds } 9921da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 9931da177e4SLinus Torvalds 9941da177e4SLinus Torvalds /* 9951da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 9961da177e4SLinus Torvalds * This is because we're setting fields here we need 9971da177e4SLinus Torvalds * to prevent others from looking at until we're done. 9981da177e4SLinus Torvalds */ 999ec3ba85fSChristoph Hellwig error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, 1000ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 1001bf904248SDavid Chinner if (error) 10021da177e4SLinus Torvalds return error; 10031da177e4SLinus Torvalds ASSERT(ip != NULL); 10041da177e4SLinus Torvalds 1005576b1d67SAl Viro ip->i_d.di_mode = mode; 10061da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 10071da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 10081da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 10099e2b2dc4SDavid Howells ip->i_d.di_uid = current_fsuid(); 10109e2b2dc4SDavid Howells ip->i_d.di_gid = current_fsgid(); 10116743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 10121da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds /* 10151da177e4SLinus Torvalds * If the superblock version is up to where we support new format 10161da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 10171da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 10181da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 10191da177e4SLinus Torvalds */ 102062118709SEric Sandeen if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 102151ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 102251ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 10231da177e4SLinus Torvalds /* 10241da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 10251da177e4SLinus Torvalds * and the pad field. 10261da177e4SLinus Torvalds */ 10271da177e4SLinus Torvalds } 10281da177e4SLinus Torvalds 10291da177e4SLinus Torvalds /* 10301da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 10311da177e4SLinus Torvalds */ 103251ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 10331da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 10341da177e4SLinus Torvalds 1035bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 10361da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 1037abbede1bSAl Viro if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { 10381da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 10391da177e4SLinus Torvalds } 10401da177e4SLinus Torvalds } 10411da177e4SLinus Torvalds 10421da177e4SLinus Torvalds /* 10431da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 10441da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 10451da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 10461da177e4SLinus Torvalds */ 10471da177e4SLinus Torvalds if ((irix_sgid_inherit) && 10481da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 10491da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 10501da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 10511da177e4SLinus Torvalds } 10521da177e4SLinus Torvalds 10531da177e4SLinus Torvalds ip->i_d.di_size = 0; 1054ba87ea69SLachlan McIlroy ip->i_size = 0; 10551da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 10561da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 1057dff35fd4SChristoph Hellwig 1058dff35fd4SChristoph Hellwig nanotime(&tv); 1059dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1060dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1061dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 1062dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 1063dff35fd4SChristoph Hellwig 10641da177e4SLinus Torvalds /* 10651da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 10661da177e4SLinus Torvalds */ 10671da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 10681da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 10691da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 10701da177e4SLinus Torvalds ip->i_d.di_flags = 0; 10711da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 10721da177e4SLinus Torvalds switch (mode & S_IFMT) { 10731da177e4SLinus Torvalds case S_IFIFO: 10741da177e4SLinus Torvalds case S_IFCHR: 10751da177e4SLinus Torvalds case S_IFBLK: 10761da177e4SLinus Torvalds case S_IFSOCK: 10771da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 10781da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 10791da177e4SLinus Torvalds ip->i_df.if_flags = 0; 10801da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 10811da177e4SLinus Torvalds break; 10821da177e4SLinus Torvalds case S_IFREG: 1083bf904248SDavid Chinner /* 1084bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 1085bf904248SDavid Chinner * is set up properly. 1086bf904248SDavid Chinner */ 1087bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 1088bf904248SDavid Chinner filestreams = 1; 10892a82b8beSDavid Chinner /* fall through */ 10901da177e4SLinus Torvalds case S_IFDIR: 1091b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1092365ca83dSNathan Scott uint di_flags = 0; 1093365ca83dSNathan Scott 1094abbede1bSAl Viro if (S_ISDIR(mode)) { 1095365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1096365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 1097dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1098dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1099dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1100dd9f438eSNathan Scott } 1101abbede1bSAl Viro } else if (S_ISREG(mode)) { 1102613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1103365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 1104dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1105dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 1106dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1107dd9f438eSNathan Scott } 11081da177e4SLinus Torvalds } 11091da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 11101da177e4SLinus Torvalds xfs_inherit_noatime) 1111365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 11121da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 11131da177e4SLinus Torvalds xfs_inherit_nodump) 1114365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 11151da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 11161da177e4SLinus Torvalds xfs_inherit_sync) 1117365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 11181da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 11191da177e4SLinus Torvalds xfs_inherit_nosymlinks) 1120365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 1121365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1122365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 1123d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1124d3446eacSBarry Naujok xfs_inherit_nodefrag) 1125d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 11262a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 11272a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 1128365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 11291da177e4SLinus Torvalds } 11301da177e4SLinus Torvalds /* FALLTHROUGH */ 11311da177e4SLinus Torvalds case S_IFLNK: 11321da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 11331da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 11341da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 11351da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 11361da177e4SLinus Torvalds break; 11371da177e4SLinus Torvalds default: 11381da177e4SLinus Torvalds ASSERT(0); 11391da177e4SLinus Torvalds } 11401da177e4SLinus Torvalds /* 11411da177e4SLinus Torvalds * Attribute fork settings for new inode. 11421da177e4SLinus Torvalds */ 11431da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 11441da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 11451da177e4SLinus Torvalds 11461da177e4SLinus Torvalds /* 11471da177e4SLinus Torvalds * Log the new values stuffed into the inode. 11481da177e4SLinus Torvalds */ 1149ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 11501da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 11511da177e4SLinus Torvalds 1152b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 115341be8bedSChristoph Hellwig xfs_setup_inode(ip); 11541da177e4SLinus Torvalds 1155bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 1156bf904248SDavid Chinner if (filestreams) { 1157bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 1158bf904248SDavid Chinner if (error < 0) 1159bf904248SDavid Chinner return -error; 1160bf904248SDavid Chinner if (!error) 1161bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 1162bf904248SDavid Chinner } 1163bf904248SDavid Chinner 11641da177e4SLinus Torvalds *ipp = ip; 11651da177e4SLinus Torvalds return 0; 11661da177e4SLinus Torvalds } 11671da177e4SLinus Torvalds 11681da177e4SLinus Torvalds /* 11698f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 11708f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 11718f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 11721da177e4SLinus Torvalds * 1173f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1174f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1175f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1176f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1177f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1178f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1179f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1180f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1181f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 11821da177e4SLinus Torvalds * 1183f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1184f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1185f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1186f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1187f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 11881da177e4SLinus Torvalds */ 11891da177e4SLinus Torvalds int 11908f04c47aSChristoph Hellwig xfs_itruncate_extents( 11918f04c47aSChristoph Hellwig struct xfs_trans **tpp, 11928f04c47aSChristoph Hellwig struct xfs_inode *ip, 11938f04c47aSChristoph Hellwig int whichfork, 11948f04c47aSChristoph Hellwig xfs_fsize_t new_size) 11951da177e4SLinus Torvalds { 11968f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 11978f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 11988f04c47aSChristoph Hellwig struct xfs_trans *ntp; 11998f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 12001da177e4SLinus Torvalds xfs_fsblock_t first_block; 12011da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 12021da177e4SLinus Torvalds xfs_fileoff_t last_block; 12038f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 12041da177e4SLinus Torvalds int committed; 12058f04c47aSChristoph Hellwig int error = 0; 12068f04c47aSChristoph Hellwig int done = 0; 12071da177e4SLinus Torvalds 1208579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 12098f04c47aSChristoph Hellwig ASSERT(new_size <= ip->i_size); 12108f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 12111da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1212898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 12131da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 12141da177e4SLinus Torvalds 1215*673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_start(ip, new_size); 1216*673e8e59SChristoph Hellwig 12171da177e4SLinus Torvalds /* 12181da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 12191da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 12201da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 12211da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 12221da177e4SLinus Torvalds * possible file size. If the first block to be removed is 12231da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 12241da177e4SLinus Torvalds * then there is nothing to do. 12251da177e4SLinus Torvalds */ 12268f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 12271da177e4SLinus Torvalds last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 12288f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 12298f04c47aSChristoph Hellwig return 0; 12308f04c47aSChristoph Hellwig 12318f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 12321da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 12331da177e4SLinus Torvalds while (!done) { 12349d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 12358f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 12363e57ecf6SOlaf Weber first_unmap_block, unmap_len, 12378f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 12381da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 12393e57ecf6SOlaf Weber &first_block, &free_list, 1240b4e9181eSChristoph Hellwig &done); 12418f04c47aSChristoph Hellwig if (error) 12428f04c47aSChristoph Hellwig goto out_bmap_cancel; 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds /* 12451da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 12461da177e4SLinus Torvalds * reservation and commit the old transaction. 12471da177e4SLinus Torvalds */ 12488f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1249898621d5SChristoph Hellwig if (committed) 1250ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 12518f04c47aSChristoph Hellwig if (error) 12528f04c47aSChristoph Hellwig goto out_bmap_cancel; 12531da177e4SLinus Torvalds 12541da177e4SLinus Torvalds if (committed) { 12551da177e4SLinus Torvalds /* 1256f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1257e5720eecSDavid Chinner * moved forward in the log as part of every commit. 12581da177e4SLinus Torvalds */ 12598f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 12601da177e4SLinus Torvalds } 1261f6485057SDavid Chinner 12628f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 12638f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 12648f04c47aSChristoph Hellwig tp = ntp; 1265f6485057SDavid Chinner 1266ddc3415aSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1267f6485057SDavid Chinner 1268cc09c0dcSDave Chinner if (error) 12698f04c47aSChristoph Hellwig goto out; 12708f04c47aSChristoph Hellwig 1271cc09c0dcSDave Chinner /* 12728f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1273cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1274cc09c0dcSDave Chinner */ 12758f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 12768f04c47aSChristoph Hellwig error = xfs_trans_reserve(tp, 0, 1277f6485057SDavid Chinner XFS_ITRUNCATE_LOG_RES(mp), 0, 12781da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 12791da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 12801da177e4SLinus Torvalds if (error) 12818f04c47aSChristoph Hellwig goto out; 12821da177e4SLinus Torvalds } 12838f04c47aSChristoph Hellwig 1284*673e8e59SChristoph Hellwig /* 1285*673e8e59SChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 1286*673e8e59SChristoph Hellwig * on rolling it forward in the log. 1287*673e8e59SChristoph Hellwig */ 1288*673e8e59SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1289*673e8e59SChristoph Hellwig 1290*673e8e59SChristoph Hellwig trace_xfs_itruncate_extents_end(ip, new_size); 1291*673e8e59SChristoph Hellwig 12928f04c47aSChristoph Hellwig out: 12938f04c47aSChristoph Hellwig *tpp = tp; 12948f04c47aSChristoph Hellwig return error; 12958f04c47aSChristoph Hellwig out_bmap_cancel: 12961da177e4SLinus Torvalds /* 12978f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 12988f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 12998f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 13001da177e4SLinus Torvalds */ 13018f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 13028f04c47aSChristoph Hellwig goto out; 13038f04c47aSChristoph Hellwig } 13048f04c47aSChristoph Hellwig 13051da177e4SLinus Torvalds /* 13061da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 13071da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 13081da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 13091da177e4SLinus Torvalds */ 13101da177e4SLinus Torvalds int 13111da177e4SLinus Torvalds xfs_iunlink( 13121da177e4SLinus Torvalds xfs_trans_t *tp, 13131da177e4SLinus Torvalds xfs_inode_t *ip) 13141da177e4SLinus Torvalds { 13151da177e4SLinus Torvalds xfs_mount_t *mp; 13161da177e4SLinus Torvalds xfs_agi_t *agi; 13171da177e4SLinus Torvalds xfs_dinode_t *dip; 13181da177e4SLinus Torvalds xfs_buf_t *agibp; 13191da177e4SLinus Torvalds xfs_buf_t *ibp; 13201da177e4SLinus Torvalds xfs_agino_t agino; 13211da177e4SLinus Torvalds short bucket_index; 13221da177e4SLinus Torvalds int offset; 13231da177e4SLinus Torvalds int error; 13241da177e4SLinus Torvalds 13251da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 13261da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 13271da177e4SLinus Torvalds 13281da177e4SLinus Torvalds mp = tp->t_mountp; 13291da177e4SLinus Torvalds 13301da177e4SLinus Torvalds /* 13311da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 13321da177e4SLinus Torvalds * on the list. 13331da177e4SLinus Torvalds */ 13345e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1335859d7182SVlad Apostolov if (error) 13361da177e4SLinus Torvalds return error; 13371da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 13385e1be0fbSChristoph Hellwig 13391da177e4SLinus Torvalds /* 13401da177e4SLinus Torvalds * Get the index into the agi hash table for the 13411da177e4SLinus Torvalds * list this inode will go on. 13421da177e4SLinus Torvalds */ 13431da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 13441da177e4SLinus Torvalds ASSERT(agino != 0); 13451da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 13461da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 134716259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 13481da177e4SLinus Torvalds 134969ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 13501da177e4SLinus Torvalds /* 13511da177e4SLinus Torvalds * There is already another inode in the bucket we need 13521da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 13531da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 13541da177e4SLinus Torvalds * and then we fall through to point the head at us. 13551da177e4SLinus Torvalds */ 13560cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1357c319b58bSVlad Apostolov if (error) 1358c319b58bSVlad Apostolov return error; 1359c319b58bSVlad Apostolov 136069ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 13611da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 136292bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 13631da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 13641da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 13651da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 13661da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 13671da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 13681da177e4SLinus Torvalds } 13691da177e4SLinus Torvalds 13701da177e4SLinus Torvalds /* 13711da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 13721da177e4SLinus Torvalds */ 13731da177e4SLinus Torvalds ASSERT(agino != 0); 137416259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 13751da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 13761da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 13771da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 13781da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 13791da177e4SLinus Torvalds return 0; 13801da177e4SLinus Torvalds } 13811da177e4SLinus Torvalds 13821da177e4SLinus Torvalds /* 13831da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 13841da177e4SLinus Torvalds */ 13851da177e4SLinus Torvalds STATIC int 13861da177e4SLinus Torvalds xfs_iunlink_remove( 13871da177e4SLinus Torvalds xfs_trans_t *tp, 13881da177e4SLinus Torvalds xfs_inode_t *ip) 13891da177e4SLinus Torvalds { 13901da177e4SLinus Torvalds xfs_ino_t next_ino; 13911da177e4SLinus Torvalds xfs_mount_t *mp; 13921da177e4SLinus Torvalds xfs_agi_t *agi; 13931da177e4SLinus Torvalds xfs_dinode_t *dip; 13941da177e4SLinus Torvalds xfs_buf_t *agibp; 13951da177e4SLinus Torvalds xfs_buf_t *ibp; 13961da177e4SLinus Torvalds xfs_agnumber_t agno; 13971da177e4SLinus Torvalds xfs_agino_t agino; 13981da177e4SLinus Torvalds xfs_agino_t next_agino; 13991da177e4SLinus Torvalds xfs_buf_t *last_ibp; 14006fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 14011da177e4SLinus Torvalds short bucket_index; 14026fdf8cccSNathan Scott int offset, last_offset = 0; 14031da177e4SLinus Torvalds int error; 14041da177e4SLinus Torvalds 14051da177e4SLinus Torvalds mp = tp->t_mountp; 14061da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 14071da177e4SLinus Torvalds 14081da177e4SLinus Torvalds /* 14091da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 14101da177e4SLinus Torvalds * on the list. 14111da177e4SLinus Torvalds */ 14125e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 14135e1be0fbSChristoph Hellwig if (error) 14141da177e4SLinus Torvalds return error; 14155e1be0fbSChristoph Hellwig 14161da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 14175e1be0fbSChristoph Hellwig 14181da177e4SLinus Torvalds /* 14191da177e4SLinus Torvalds * Get the index into the agi hash table for the 14201da177e4SLinus Torvalds * list this inode will go on. 14211da177e4SLinus Torvalds */ 14221da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 14231da177e4SLinus Torvalds ASSERT(agino != 0); 14241da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 142569ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 14261da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 14271da177e4SLinus Torvalds 142816259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 14291da177e4SLinus Torvalds /* 14301da177e4SLinus Torvalds * We're at the head of the list. Get the inode's 14311da177e4SLinus Torvalds * on-disk buffer to see if there is anyone after us 14321da177e4SLinus Torvalds * on the list. Only modify our next pointer if it 14331da177e4SLinus Torvalds * is not already NULLAGINO. This saves us the overhead 14341da177e4SLinus Torvalds * of dealing with the buffer when there is no need to 14351da177e4SLinus Torvalds * change it. 14361da177e4SLinus Torvalds */ 14370cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 14381da177e4SLinus Torvalds if (error) { 14390b932cccSDave Chinner xfs_warn(mp, "%s: xfs_itobp() returned error %d.", 14400b932cccSDave Chinner __func__, error); 14411da177e4SLinus Torvalds return error; 14421da177e4SLinus Torvalds } 1443347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 14441da177e4SLinus Torvalds ASSERT(next_agino != 0); 14451da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1446347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 144792bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 14481da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 14491da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 14501da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 14511da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 14521da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 14531da177e4SLinus Torvalds } else { 14541da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 14551da177e4SLinus Torvalds } 14561da177e4SLinus Torvalds /* 14571da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 14581da177e4SLinus Torvalds */ 14591da177e4SLinus Torvalds ASSERT(next_agino != 0); 14601da177e4SLinus Torvalds ASSERT(next_agino != agino); 146116259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 14621da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 14631da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 14641da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 14651da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 14661da177e4SLinus Torvalds } else { 14671da177e4SLinus Torvalds /* 14681da177e4SLinus Torvalds * We need to search the list for the inode being freed. 14691da177e4SLinus Torvalds */ 147016259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 14711da177e4SLinus Torvalds last_ibp = NULL; 14721da177e4SLinus Torvalds while (next_agino != agino) { 14731da177e4SLinus Torvalds /* 14741da177e4SLinus Torvalds * If the last inode wasn't the one pointing to 14751da177e4SLinus Torvalds * us, then release its buffer since we're not 14761da177e4SLinus Torvalds * going to do anything with it. 14771da177e4SLinus Torvalds */ 14781da177e4SLinus Torvalds if (last_ibp != NULL) { 14791da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 14801da177e4SLinus Torvalds } 14811da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 14821da177e4SLinus Torvalds error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1483c679eef0SChristoph Hellwig &last_ibp, &last_offset, 0); 14841da177e4SLinus Torvalds if (error) { 14850b932cccSDave Chinner xfs_warn(mp, 14860b932cccSDave Chinner "%s: xfs_inotobp() returned error %d.", 14870b932cccSDave Chinner __func__, error); 14881da177e4SLinus Torvalds return error; 14891da177e4SLinus Torvalds } 1490347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 14911da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 14921da177e4SLinus Torvalds ASSERT(next_agino != 0); 14931da177e4SLinus Torvalds } 14941da177e4SLinus Torvalds /* 14951da177e4SLinus Torvalds * Now last_ibp points to the buffer previous to us on 14961da177e4SLinus Torvalds * the unlinked list. Pull us from the list. 14971da177e4SLinus Torvalds */ 14980cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 14991da177e4SLinus Torvalds if (error) { 15000b932cccSDave Chinner xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", 15010b932cccSDave Chinner __func__, error); 15021da177e4SLinus Torvalds return error; 15031da177e4SLinus Torvalds } 1504347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 15051da177e4SLinus Torvalds ASSERT(next_agino != 0); 15061da177e4SLinus Torvalds ASSERT(next_agino != agino); 15071da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1508347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 150992bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 15101da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 15111da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 15121da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 15131da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15141da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 15151da177e4SLinus Torvalds } else { 15161da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 15171da177e4SLinus Torvalds } 15181da177e4SLinus Torvalds /* 15191da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 15201da177e4SLinus Torvalds */ 1521347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 15221da177e4SLinus Torvalds ASSERT(next_agino != 0); 15231da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 15241da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 15251da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 15261da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15271da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 15281da177e4SLinus Torvalds } 15291da177e4SLinus Torvalds return 0; 15301da177e4SLinus Torvalds } 15311da177e4SLinus Torvalds 15325b3eed75SDave Chinner /* 15335b3eed75SDave Chinner * A big issue when freeing the inode cluster is is that we _cannot_ skip any 15345b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 15355b3eed75SDave Chinner * the cluster buffer. 15365b3eed75SDave Chinner */ 15372a30f36dSChandra Seetharaman STATIC int 15381da177e4SLinus Torvalds xfs_ifree_cluster( 15391da177e4SLinus Torvalds xfs_inode_t *free_ip, 15401da177e4SLinus Torvalds xfs_trans_t *tp, 15411da177e4SLinus Torvalds xfs_ino_t inum) 15421da177e4SLinus Torvalds { 15431da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 15441da177e4SLinus Torvalds int blks_per_cluster; 15451da177e4SLinus Torvalds int nbufs; 15461da177e4SLinus Torvalds int ninodes; 15475b257b4aSDave Chinner int i, j; 15481da177e4SLinus Torvalds xfs_daddr_t blkno; 15491da177e4SLinus Torvalds xfs_buf_t *bp; 15505b257b4aSDave Chinner xfs_inode_t *ip; 15511da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 15521da177e4SLinus Torvalds xfs_log_item_t *lip; 15535017e97dSDave Chinner struct xfs_perag *pag; 15541da177e4SLinus Torvalds 15555017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 15561da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 15571da177e4SLinus Torvalds blks_per_cluster = 1; 15581da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 15591da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 15601da177e4SLinus Torvalds } else { 15611da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 15621da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 15631da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 15641da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 15651da177e4SLinus Torvalds } 15661da177e4SLinus Torvalds 15671da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 15681da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 15691da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 15701da177e4SLinus Torvalds 15711da177e4SLinus Torvalds /* 15725b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 15735b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 15745b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 15755b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 15765b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 15775b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 15781da177e4SLinus Torvalds */ 15791da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 15801da177e4SLinus Torvalds mp->m_bsize * blks_per_cluster, 15810cadda1cSChristoph Hellwig XBF_LOCK); 15821da177e4SLinus Torvalds 15832a30f36dSChandra Seetharaman if (!bp) 15842a30f36dSChandra Seetharaman return ENOMEM; 15855b257b4aSDave Chinner /* 15865b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 15875b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 15885b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 15895b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 15905b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 15915b257b4aSDave Chinner */ 1592adadbeefSChristoph Hellwig lip = bp->b_fspriv; 15931da177e4SLinus Torvalds while (lip) { 15941da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 15951da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 15961da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 1597ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 15987b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 15997b2e2a31SDavid Chinner &iip->ili_flush_lsn, 16007b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 1601e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 16021da177e4SLinus Torvalds } 16031da177e4SLinus Torvalds lip = lip->li_bio_list; 16041da177e4SLinus Torvalds } 16051da177e4SLinus Torvalds 16065b3eed75SDave Chinner 16075b257b4aSDave Chinner /* 16085b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 16095b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 16105b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 16115b257b4aSDave Chinner * and flushing by locking the buffer. 16125b257b4aSDave Chinner * 16135b257b4aSDave Chinner * We have already marked every inode that was part of a 16145b257b4aSDave Chinner * transaction stale above, which means there is no point in 16155b257b4aSDave Chinner * even trying to lock them. 16165b257b4aSDave Chinner */ 16175b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 16185b3eed75SDave Chinner retry: 16191a3e8f3dSDave Chinner rcu_read_lock(); 16205b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 16215b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 16221da177e4SLinus Torvalds 16231a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 16241a3e8f3dSDave Chinner if (!ip) { 16251a3e8f3dSDave Chinner rcu_read_unlock(); 16265b257b4aSDave Chinner continue; 16275b257b4aSDave Chinner } 16285b257b4aSDave Chinner 16295b3eed75SDave Chinner /* 16301a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 16311a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 16321a3e8f3dSDave Chinner * during the lookup. We need to check under the 16331a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 16341a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 16351a3e8f3dSDave Chinner */ 16361a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 16371a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 16381a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 16391a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 16401a3e8f3dSDave Chinner rcu_read_unlock(); 16411a3e8f3dSDave Chinner continue; 16421a3e8f3dSDave Chinner } 16431a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 16441a3e8f3dSDave Chinner 16451a3e8f3dSDave Chinner /* 16465b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 16475b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 16485b3eed75SDave Chinner * in the list attached to the buffer and are not 16495b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 16505b3eed75SDave Chinner * and retry. 16515b3eed75SDave Chinner */ 16525b257b4aSDave Chinner if (ip != free_ip && 16535b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 16541a3e8f3dSDave Chinner rcu_read_unlock(); 16555b3eed75SDave Chinner delay(1); 16565b3eed75SDave Chinner goto retry; 16575b257b4aSDave Chinner } 16581a3e8f3dSDave Chinner rcu_read_unlock(); 16595b257b4aSDave Chinner 16605b3eed75SDave Chinner xfs_iflock(ip); 16615b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 16625b257b4aSDave Chinner 16635b3eed75SDave Chinner /* 16645b3eed75SDave Chinner * we don't need to attach clean inodes or those only 16655b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 16665b3eed75SDave Chinner */ 16675b257b4aSDave Chinner iip = ip->i_itemp; 16685b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 16695b257b4aSDave Chinner ASSERT(ip != free_ip); 16701da177e4SLinus Torvalds ip->i_update_core = 0; 16711da177e4SLinus Torvalds xfs_ifunlock(ip); 16721da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 16731da177e4SLinus Torvalds continue; 16741da177e4SLinus Torvalds } 16751da177e4SLinus Torvalds 16761da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 16771da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 16781da177e4SLinus Torvalds iip->ili_logged = 1; 16797b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 16807b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 16811da177e4SLinus Torvalds 1682ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 1683ca30b2a7SChristoph Hellwig &iip->ili_item); 16845b257b4aSDave Chinner 16855b257b4aSDave Chinner if (ip != free_ip) 16861da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 16871da177e4SLinus Torvalds } 16881da177e4SLinus Torvalds 16891da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 16901da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 16911da177e4SLinus Torvalds } 16921da177e4SLinus Torvalds 16935017e97dSDave Chinner xfs_perag_put(pag); 16942a30f36dSChandra Seetharaman return 0; 16951da177e4SLinus Torvalds } 16961da177e4SLinus Torvalds 16971da177e4SLinus Torvalds /* 16981da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 16991da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 17001da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 17011da177e4SLinus Torvalds * the inode is already a part of the transaction. 17021da177e4SLinus Torvalds * 17031da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 17041da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 17051da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 17061da177e4SLinus Torvalds */ 17071da177e4SLinus Torvalds int 17081da177e4SLinus Torvalds xfs_ifree( 17091da177e4SLinus Torvalds xfs_trans_t *tp, 17101da177e4SLinus Torvalds xfs_inode_t *ip, 17111da177e4SLinus Torvalds xfs_bmap_free_t *flist) 17121da177e4SLinus Torvalds { 17131da177e4SLinus Torvalds int error; 17141da177e4SLinus Torvalds int delete; 17151da177e4SLinus Torvalds xfs_ino_t first_ino; 1716c319b58bSVlad Apostolov xfs_dinode_t *dip; 1717c319b58bSVlad Apostolov xfs_buf_t *ibp; 17181da177e4SLinus Torvalds 1719579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 17201da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 17211da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 17221da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 1723ba87ea69SLachlan McIlroy ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 1724abbede1bSAl Viro (!S_ISREG(ip->i_d.di_mode))); 17251da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 17261da177e4SLinus Torvalds 17271da177e4SLinus Torvalds /* 17281da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 17291da177e4SLinus Torvalds */ 17301da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 17311da177e4SLinus Torvalds if (error != 0) { 17321da177e4SLinus Torvalds return error; 17331da177e4SLinus Torvalds } 17341da177e4SLinus Torvalds 17351da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 17361da177e4SLinus Torvalds if (error != 0) { 17371da177e4SLinus Torvalds return error; 17381da177e4SLinus Torvalds } 17391da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 17401da177e4SLinus Torvalds ip->i_d.di_flags = 0; 17411da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 17421da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 17431da177e4SLinus Torvalds ip->i_df.if_ext_max = 17441da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 17451da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 17461da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 17471da177e4SLinus Torvalds /* 17481da177e4SLinus Torvalds * Bump the generation count so no one will be confused 17491da177e4SLinus Torvalds * by reincarnations of this inode. 17501da177e4SLinus Torvalds */ 17511da177e4SLinus Torvalds ip->i_d.di_gen++; 1752c319b58bSVlad Apostolov 17531da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 17541da177e4SLinus Torvalds 17550cadda1cSChristoph Hellwig error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); 1756c319b58bSVlad Apostolov if (error) 1757c319b58bSVlad Apostolov return error; 1758c319b58bSVlad Apostolov 1759c319b58bSVlad Apostolov /* 1760c319b58bSVlad Apostolov * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 1761c319b58bSVlad Apostolov * from picking up this inode when it is reclaimed (its incore state 1762c319b58bSVlad Apostolov * initialzed but not flushed to disk yet). The in-core di_mode is 1763c319b58bSVlad Apostolov * already cleared and a corresponding transaction logged. 1764c319b58bSVlad Apostolov * The hack here just synchronizes the in-core to on-disk 1765c319b58bSVlad Apostolov * di_mode value in advance before the actual inode sync to disk. 1766c319b58bSVlad Apostolov * This is OK because the inode is already unlinked and would never 1767c319b58bSVlad Apostolov * change its di_mode again for this inode generation. 1768c319b58bSVlad Apostolov * This is a temporary hack that would require a proper fix 1769c319b58bSVlad Apostolov * in the future. 1770c319b58bSVlad Apostolov */ 177181591fe2SChristoph Hellwig dip->di_mode = 0; 1772c319b58bSVlad Apostolov 17731da177e4SLinus Torvalds if (delete) { 17742a30f36dSChandra Seetharaman error = xfs_ifree_cluster(ip, tp, first_ino); 17751da177e4SLinus Torvalds } 17761da177e4SLinus Torvalds 17772a30f36dSChandra Seetharaman return error; 17781da177e4SLinus Torvalds } 17791da177e4SLinus Torvalds 17801da177e4SLinus Torvalds /* 17811da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 17821da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 17831da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 17841da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 17851da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 17861da177e4SLinus Torvalds * by the caller. 17871da177e4SLinus Torvalds * 17881da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 17891da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 17901da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 17911da177e4SLinus Torvalds * not request that the number of records go below zero, although 17921da177e4SLinus Torvalds * it can go to zero. 17931da177e4SLinus Torvalds * 17941da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 17951da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 17961da177e4SLinus Torvalds * requested for the if_broot array. 17971da177e4SLinus Torvalds */ 17981da177e4SLinus Torvalds void 17991da177e4SLinus Torvalds xfs_iroot_realloc( 18001da177e4SLinus Torvalds xfs_inode_t *ip, 18011da177e4SLinus Torvalds int rec_diff, 18021da177e4SLinus Torvalds int whichfork) 18031da177e4SLinus Torvalds { 180460197e8dSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 18051da177e4SLinus Torvalds int cur_max; 18061da177e4SLinus Torvalds xfs_ifork_t *ifp; 18077cc95a82SChristoph Hellwig struct xfs_btree_block *new_broot; 18081da177e4SLinus Torvalds int new_max; 18091da177e4SLinus Torvalds size_t new_size; 18101da177e4SLinus Torvalds char *np; 18111da177e4SLinus Torvalds char *op; 18121da177e4SLinus Torvalds 18131da177e4SLinus Torvalds /* 18141da177e4SLinus Torvalds * Handle the degenerate case quietly. 18151da177e4SLinus Torvalds */ 18161da177e4SLinus Torvalds if (rec_diff == 0) { 18171da177e4SLinus Torvalds return; 18181da177e4SLinus Torvalds } 18191da177e4SLinus Torvalds 18201da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 18211da177e4SLinus Torvalds if (rec_diff > 0) { 18221da177e4SLinus Torvalds /* 18231da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 18241da177e4SLinus Torvalds * allocate it now and get out. 18251da177e4SLinus Torvalds */ 18261da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 18271da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 18284a7edddcSDave Chinner ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 18291da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 18301da177e4SLinus Torvalds return; 18311da177e4SLinus Torvalds } 18321da177e4SLinus Torvalds 18331da177e4SLinus Torvalds /* 18341da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 18351da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 18361da177e4SLinus Torvalds * location. The records don't change location because 18371da177e4SLinus Torvalds * they are kept butted up against the btree block header. 18381da177e4SLinus Torvalds */ 183960197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 18401da177e4SLinus Torvalds new_max = cur_max + rec_diff; 18411da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 18427cc95a82SChristoph Hellwig ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 18431da177e4SLinus Torvalds (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 18444a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 184560197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 18461da177e4SLinus Torvalds ifp->if_broot_bytes); 184760197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 18481da177e4SLinus Torvalds (int)new_size); 18491da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 18501da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 18511da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 18521da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 18531da177e4SLinus Torvalds return; 18541da177e4SLinus Torvalds } 18551da177e4SLinus Torvalds 18561da177e4SLinus Torvalds /* 18571da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 18581da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 18591da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 18601da177e4SLinus Torvalds */ 18611da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 186260197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 18631da177e4SLinus Torvalds new_max = cur_max + rec_diff; 18641da177e4SLinus Torvalds ASSERT(new_max >= 0); 18651da177e4SLinus Torvalds if (new_max > 0) 18661da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 18671da177e4SLinus Torvalds else 18681da177e4SLinus Torvalds new_size = 0; 18691da177e4SLinus Torvalds if (new_size > 0) { 18704a7edddcSDave Chinner new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 18711da177e4SLinus Torvalds /* 18721da177e4SLinus Torvalds * First copy over the btree block header. 18731da177e4SLinus Torvalds */ 18747cc95a82SChristoph Hellwig memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 18751da177e4SLinus Torvalds } else { 18761da177e4SLinus Torvalds new_broot = NULL; 18771da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 18781da177e4SLinus Torvalds } 18791da177e4SLinus Torvalds 18801da177e4SLinus Torvalds /* 18811da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 18821da177e4SLinus Torvalds */ 18831da177e4SLinus Torvalds if (new_max > 0) { 18841da177e4SLinus Torvalds /* 18851da177e4SLinus Torvalds * First copy the records. 18861da177e4SLinus Torvalds */ 1887136341b4SChristoph Hellwig op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 1888136341b4SChristoph Hellwig np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 18891da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 18901da177e4SLinus Torvalds 18911da177e4SLinus Torvalds /* 18921da177e4SLinus Torvalds * Then copy the pointers. 18931da177e4SLinus Torvalds */ 189460197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 18951da177e4SLinus Torvalds ifp->if_broot_bytes); 189660197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 18971da177e4SLinus Torvalds (int)new_size); 18981da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 18991da177e4SLinus Torvalds } 1900f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 19011da177e4SLinus Torvalds ifp->if_broot = new_broot; 19021da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 19031da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 19041da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 19051da177e4SLinus Torvalds return; 19061da177e4SLinus Torvalds } 19071da177e4SLinus Torvalds 19081da177e4SLinus Torvalds 19091da177e4SLinus Torvalds /* 19101da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 19111da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 19121da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 19131da177e4SLinus Torvalds * byte_diff parameter. 19141da177e4SLinus Torvalds * 19151da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 19161da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 19171da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 19181da177e4SLinus Torvalds * to what is needed. 19191da177e4SLinus Torvalds * 19201da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 19211da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 19221da177e4SLinus Torvalds * requested for the if_data array. 19231da177e4SLinus Torvalds */ 19241da177e4SLinus Torvalds void 19251da177e4SLinus Torvalds xfs_idata_realloc( 19261da177e4SLinus Torvalds xfs_inode_t *ip, 19271da177e4SLinus Torvalds int byte_diff, 19281da177e4SLinus Torvalds int whichfork) 19291da177e4SLinus Torvalds { 19301da177e4SLinus Torvalds xfs_ifork_t *ifp; 19311da177e4SLinus Torvalds int new_size; 19321da177e4SLinus Torvalds int real_size; 19331da177e4SLinus Torvalds 19341da177e4SLinus Torvalds if (byte_diff == 0) { 19351da177e4SLinus Torvalds return; 19361da177e4SLinus Torvalds } 19371da177e4SLinus Torvalds 19381da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 19391da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 19401da177e4SLinus Torvalds ASSERT(new_size >= 0); 19411da177e4SLinus Torvalds 19421da177e4SLinus Torvalds if (new_size == 0) { 19431da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 1944f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 19451da177e4SLinus Torvalds } 19461da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 19471da177e4SLinus Torvalds real_size = 0; 19481da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 19491da177e4SLinus Torvalds /* 19501da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 19511da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 19521da177e4SLinus Torvalds */ 19531da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 19541da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 19551da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 19561da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 19571da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 19581da177e4SLinus Torvalds new_size); 1959f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 19601da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 19611da177e4SLinus Torvalds } 19621da177e4SLinus Torvalds real_size = 0; 19631da177e4SLinus Torvalds } else { 19641da177e4SLinus Torvalds /* 19651da177e4SLinus Torvalds * Stuck with malloc/realloc. 19661da177e4SLinus Torvalds * For inline data, the underlying buffer must be 19671da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 19681da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 19691da177e4SLinus Torvalds * that here. 19701da177e4SLinus Torvalds */ 19711da177e4SLinus Torvalds real_size = roundup(new_size, 4); 19721da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 19731da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 19744a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 19754a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 19761da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 19771da177e4SLinus Torvalds /* 19781da177e4SLinus Torvalds * Only do the realloc if the underlying size 19791da177e4SLinus Torvalds * is really changing. 19801da177e4SLinus Torvalds */ 19811da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 19821da177e4SLinus Torvalds ifp->if_u1.if_data = 19831da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 19841da177e4SLinus Torvalds real_size, 19851da177e4SLinus Torvalds ifp->if_real_bytes, 19864a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 19871da177e4SLinus Torvalds } 19881da177e4SLinus Torvalds } else { 19891da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 19904a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 19914a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 19921da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 19931da177e4SLinus Torvalds ifp->if_bytes); 19941da177e4SLinus Torvalds } 19951da177e4SLinus Torvalds } 19961da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 19971da177e4SLinus Torvalds ifp->if_bytes = new_size; 19981da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 19991da177e4SLinus Torvalds } 20001da177e4SLinus Torvalds 20011da177e4SLinus Torvalds void 20021da177e4SLinus Torvalds xfs_idestroy_fork( 20031da177e4SLinus Torvalds xfs_inode_t *ip, 20041da177e4SLinus Torvalds int whichfork) 20051da177e4SLinus Torvalds { 20061da177e4SLinus Torvalds xfs_ifork_t *ifp; 20071da177e4SLinus Torvalds 20081da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 20091da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 2010f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 20111da177e4SLinus Torvalds ifp->if_broot = NULL; 20121da177e4SLinus Torvalds } 20131da177e4SLinus Torvalds 20141da177e4SLinus Torvalds /* 20151da177e4SLinus Torvalds * If the format is local, then we can't have an extents 20161da177e4SLinus Torvalds * array so just look for an inline data array. If we're 20171da177e4SLinus Torvalds * not local then we may or may not have an extents list, 20181da177e4SLinus Torvalds * so check and free it up if we do. 20191da177e4SLinus Torvalds */ 20201da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 20211da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 20221da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 20231da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 2024f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 20251da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 20261da177e4SLinus Torvalds ifp->if_real_bytes = 0; 20271da177e4SLinus Torvalds } 20281da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 20290293ce3aSMandy Kirkconnell ((ifp->if_flags & XFS_IFEXTIREC) || 20300293ce3aSMandy Kirkconnell ((ifp->if_u1.if_extents != NULL) && 20310293ce3aSMandy Kirkconnell (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 20321da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 20334eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 20341da177e4SLinus Torvalds } 20351da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 20361da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 20371da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 20381da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 20391da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 20401da177e4SLinus Torvalds ip->i_afp = NULL; 20411da177e4SLinus Torvalds } 20421da177e4SLinus Torvalds } 20431da177e4SLinus Torvalds 20441da177e4SLinus Torvalds /* 204560ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 204660ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 204760ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 20481da177e4SLinus Torvalds */ 204960ec6783SChristoph Hellwig static void 205060ec6783SChristoph Hellwig xfs_iunpin_nowait( 205160ec6783SChristoph Hellwig struct xfs_inode *ip) 2052a3f74ffbSDavid Chinner { 2053579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2054a3f74ffbSDavid Chinner 20554aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 20564aaf15d1SDave Chinner 2057a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 205860ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2059a14a348bSChristoph Hellwig 2060a3f74ffbSDavid Chinner } 2061a3f74ffbSDavid Chinner 2062777df5afSDave Chinner void 20631da177e4SLinus Torvalds xfs_iunpin_wait( 206460ec6783SChristoph Hellwig struct xfs_inode *ip) 20651da177e4SLinus Torvalds { 206660ec6783SChristoph Hellwig if (xfs_ipincount(ip)) { 206760ec6783SChristoph Hellwig xfs_iunpin_nowait(ip); 206860ec6783SChristoph Hellwig wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0)); 20691da177e4SLinus Torvalds } 20701da177e4SLinus Torvalds } 20711da177e4SLinus Torvalds 20721da177e4SLinus Torvalds /* 20731da177e4SLinus Torvalds * xfs_iextents_copy() 20741da177e4SLinus Torvalds * 20751da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 20761da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 20771da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 20781da177e4SLinus Torvalds * 20791da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 20801da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 20811da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 20821da177e4SLinus Torvalds */ 20831da177e4SLinus Torvalds int 20841da177e4SLinus Torvalds xfs_iextents_copy( 20851da177e4SLinus Torvalds xfs_inode_t *ip, 2086a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp, 20871da177e4SLinus Torvalds int whichfork) 20881da177e4SLinus Torvalds { 20891da177e4SLinus Torvalds int copied; 20901da177e4SLinus Torvalds int i; 20911da177e4SLinus Torvalds xfs_ifork_t *ifp; 20921da177e4SLinus Torvalds int nrecs; 20931da177e4SLinus Torvalds xfs_fsblock_t start_block; 20941da177e4SLinus Torvalds 20951da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 2096579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 20971da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 20981da177e4SLinus Torvalds 20991da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 21003a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 21011da177e4SLinus Torvalds ASSERT(nrecs > 0); 21021da177e4SLinus Torvalds 21031da177e4SLinus Torvalds /* 21041da177e4SLinus Torvalds * There are some delayed allocation extents in the 21051da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 21061da177e4SLinus Torvalds * the delayed ones. There must be at least one 21071da177e4SLinus Torvalds * non-delayed extent. 21081da177e4SLinus Torvalds */ 21091da177e4SLinus Torvalds copied = 0; 21101da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 2111a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 21121da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 21139d87c319SEric Sandeen if (isnullstartblock(start_block)) { 21141da177e4SLinus Torvalds /* 21151da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 21161da177e4SLinus Torvalds */ 21171da177e4SLinus Torvalds continue; 21181da177e4SLinus Torvalds } 21191da177e4SLinus Torvalds 21201da177e4SLinus Torvalds /* Translate to on disk format */ 2121cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2122cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2123a6f64d4aSChristoph Hellwig dp++; 21241da177e4SLinus Torvalds copied++; 21251da177e4SLinus Torvalds } 21261da177e4SLinus Torvalds ASSERT(copied != 0); 2127a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 21281da177e4SLinus Torvalds 21291da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 21301da177e4SLinus Torvalds } 21311da177e4SLinus Torvalds 21321da177e4SLinus Torvalds /* 21331da177e4SLinus Torvalds * Each of the following cases stores data into the same region 21341da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 21351da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 21361da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 21371da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 21381da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 21391da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 21401da177e4SLinus Torvalds * format indicates the current state of the fork. 21411da177e4SLinus Torvalds */ 21421da177e4SLinus Torvalds /*ARGSUSED*/ 2143e4ac967bSDavid Chinner STATIC void 21441da177e4SLinus Torvalds xfs_iflush_fork( 21451da177e4SLinus Torvalds xfs_inode_t *ip, 21461da177e4SLinus Torvalds xfs_dinode_t *dip, 21471da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 21481da177e4SLinus Torvalds int whichfork, 21491da177e4SLinus Torvalds xfs_buf_t *bp) 21501da177e4SLinus Torvalds { 21511da177e4SLinus Torvalds char *cp; 21521da177e4SLinus Torvalds xfs_ifork_t *ifp; 21531da177e4SLinus Torvalds xfs_mount_t *mp; 21541da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 21551da177e4SLinus Torvalds int first; 21561da177e4SLinus Torvalds #endif 21571da177e4SLinus Torvalds static const short brootflag[2] = 21581da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 21591da177e4SLinus Torvalds static const short dataflag[2] = 21601da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 21611da177e4SLinus Torvalds static const short extflag[2] = 21621da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 21631da177e4SLinus Torvalds 2164e4ac967bSDavid Chinner if (!iip) 2165e4ac967bSDavid Chinner return; 21661da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 21671da177e4SLinus Torvalds /* 21681da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 21691da177e4SLinus Torvalds * for the attribute fork. 21701da177e4SLinus Torvalds */ 2171e4ac967bSDavid Chinner if (!ifp) { 21721da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 2173e4ac967bSDavid Chinner return; 21741da177e4SLinus Torvalds } 21751da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 21761da177e4SLinus Torvalds mp = ip->i_mount; 21771da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 21781da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 21791da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 21801da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 21811da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 21821da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 21831da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 21841da177e4SLinus Torvalds } 21851da177e4SLinus Torvalds break; 21861da177e4SLinus Torvalds 21871da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 21881da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 21891da177e4SLinus Torvalds !(iip->ili_format.ilf_fields & extflag[whichfork])); 21901da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 21911da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 2192ab1908a5SChristoph Hellwig ASSERT(xfs_iext_get_ext(ifp, 0)); 21931da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 21941da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 21951da177e4SLinus Torvalds whichfork); 21961da177e4SLinus Torvalds } 21971da177e4SLinus Torvalds break; 21981da177e4SLinus Torvalds 21991da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 22001da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 22011da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 22021da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 22031da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 22041da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 22051da177e4SLinus Torvalds XFS_BROOT_SIZE_ADJ)); 220660197e8dSChristoph Hellwig xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 22071da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 22081da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 22091da177e4SLinus Torvalds } 22101da177e4SLinus Torvalds break; 22111da177e4SLinus Torvalds 22121da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 22131da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 22141da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 221581591fe2SChristoph Hellwig xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 22161da177e4SLinus Torvalds } 22171da177e4SLinus Torvalds break; 22181da177e4SLinus Torvalds 22191da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 22201da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 22211da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 222281591fe2SChristoph Hellwig memcpy(XFS_DFORK_DPTR(dip), 222381591fe2SChristoph Hellwig &ip->i_df.if_u2.if_uuid, 22241da177e4SLinus Torvalds sizeof(uuid_t)); 22251da177e4SLinus Torvalds } 22261da177e4SLinus Torvalds break; 22271da177e4SLinus Torvalds 22281da177e4SLinus Torvalds default: 22291da177e4SLinus Torvalds ASSERT(0); 22301da177e4SLinus Torvalds break; 22311da177e4SLinus Torvalds } 22321da177e4SLinus Torvalds } 22331da177e4SLinus Torvalds 2234bad55843SDavid Chinner STATIC int 2235bad55843SDavid Chinner xfs_iflush_cluster( 2236bad55843SDavid Chinner xfs_inode_t *ip, 2237bad55843SDavid Chinner xfs_buf_t *bp) 2238bad55843SDavid Chinner { 2239bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 22405017e97dSDave Chinner struct xfs_perag *pag; 2241bad55843SDavid Chinner unsigned long first_index, mask; 2242c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2243bad55843SDavid Chinner int ilist_size; 2244bad55843SDavid Chinner xfs_inode_t **ilist; 2245bad55843SDavid Chinner xfs_inode_t *iq; 2246bad55843SDavid Chinner int nr_found; 2247bad55843SDavid Chinner int clcount = 0; 2248bad55843SDavid Chinner int bufwasdelwri; 2249bad55843SDavid Chinner int i; 2250bad55843SDavid Chinner 22515017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2252bad55843SDavid Chinner 2253c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2254c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 225549383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2256bad55843SDavid Chinner if (!ilist) 225744b56e0aSDave Chinner goto out_put; 2258bad55843SDavid Chinner 2259bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2260bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 22611a3e8f3dSDave Chinner rcu_read_lock(); 2262bad55843SDavid Chinner /* really need a gang lookup range call here */ 2263bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2264c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2265bad55843SDavid Chinner if (nr_found == 0) 2266bad55843SDavid Chinner goto out_free; 2267bad55843SDavid Chinner 2268bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2269bad55843SDavid Chinner iq = ilist[i]; 2270bad55843SDavid Chinner if (iq == ip) 2271bad55843SDavid Chinner continue; 22721a3e8f3dSDave Chinner 22731a3e8f3dSDave Chinner /* 22741a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 22751a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 22761a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 22771a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 22781a3e8f3dSDave Chinner */ 22791a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 22801a3e8f3dSDave Chinner if (!ip->i_ino || 22811a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 22821a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22831a3e8f3dSDave Chinner continue; 22841a3e8f3dSDave Chinner } 22851a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 22861a3e8f3dSDave Chinner 2287bad55843SDavid Chinner /* 2288bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2289bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2290bad55843SDavid Chinner * later after the appropriate locks are acquired. 2291bad55843SDavid Chinner */ 229233540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2293bad55843SDavid Chinner continue; 2294bad55843SDavid Chinner 2295bad55843SDavid Chinner /* 2296bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2297bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2298bad55843SDavid Chinner */ 2299bad55843SDavid Chinner 2300bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2301bad55843SDavid Chinner continue; 2302bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2303bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2304bad55843SDavid Chinner continue; 2305bad55843SDavid Chinner } 2306bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2307bad55843SDavid Chinner xfs_ifunlock(iq); 2308bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2309bad55843SDavid Chinner continue; 2310bad55843SDavid Chinner } 2311bad55843SDavid Chinner 2312bad55843SDavid Chinner /* 2313bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2314bad55843SDavid Chinner * re-check that it's dirty before flushing. 2315bad55843SDavid Chinner */ 231633540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2317bad55843SDavid Chinner int error; 2318bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2319bad55843SDavid Chinner if (error) { 2320bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2321bad55843SDavid Chinner goto cluster_corrupt_out; 2322bad55843SDavid Chinner } 2323bad55843SDavid Chinner clcount++; 2324bad55843SDavid Chinner } else { 2325bad55843SDavid Chinner xfs_ifunlock(iq); 2326bad55843SDavid Chinner } 2327bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2328bad55843SDavid Chinner } 2329bad55843SDavid Chinner 2330bad55843SDavid Chinner if (clcount) { 2331bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2332bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2333bad55843SDavid Chinner } 2334bad55843SDavid Chinner 2335bad55843SDavid Chinner out_free: 23361a3e8f3dSDave Chinner rcu_read_unlock(); 2337f0e2d93cSDenys Vlasenko kmem_free(ilist); 233844b56e0aSDave Chinner out_put: 233944b56e0aSDave Chinner xfs_perag_put(pag); 2340bad55843SDavid Chinner return 0; 2341bad55843SDavid Chinner 2342bad55843SDavid Chinner 2343bad55843SDavid Chinner cluster_corrupt_out: 2344bad55843SDavid Chinner /* 2345bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2346bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2347bad55843SDavid Chinner */ 23481a3e8f3dSDave Chinner rcu_read_unlock(); 2349bad55843SDavid Chinner /* 2350bad55843SDavid Chinner * Clean up the buffer. If it was B_DELWRI, just release it -- 2351bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 2352bad55843SDavid Chinner * filesystem before releasing the buffer. 2353bad55843SDavid Chinner */ 2354bad55843SDavid Chinner bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); 2355bad55843SDavid Chinner if (bufwasdelwri) 2356bad55843SDavid Chinner xfs_buf_relse(bp); 2357bad55843SDavid Chinner 2358bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2359bad55843SDavid Chinner 2360bad55843SDavid Chinner if (!bufwasdelwri) { 2361bad55843SDavid Chinner /* 2362bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 2363bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 2364bad55843SDavid Chinner * mark it as stale and brelse. 2365bad55843SDavid Chinner */ 2366cb669ca5SChristoph Hellwig if (bp->b_iodone) { 2367bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 2368c867cb61SChristoph Hellwig xfs_buf_stale(bp); 23695a52c2a5SChandra Seetharaman xfs_buf_ioerror(bp, EIO); 23701a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 2371bad55843SDavid Chinner } else { 2372c867cb61SChristoph Hellwig xfs_buf_stale(bp); 2373bad55843SDavid Chinner xfs_buf_relse(bp); 2374bad55843SDavid Chinner } 2375bad55843SDavid Chinner } 2376bad55843SDavid Chinner 2377bad55843SDavid Chinner /* 2378bad55843SDavid Chinner * Unlocks the flush lock 2379bad55843SDavid Chinner */ 2380bad55843SDavid Chinner xfs_iflush_abort(iq); 2381f0e2d93cSDenys Vlasenko kmem_free(ilist); 238244b56e0aSDave Chinner xfs_perag_put(pag); 2383bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 2384bad55843SDavid Chinner } 2385bad55843SDavid Chinner 23861da177e4SLinus Torvalds /* 23871da177e4SLinus Torvalds * xfs_iflush() will write a modified inode's changes out to the 23881da177e4SLinus Torvalds * inode's on disk home. The caller must have the inode lock held 2389c63942d3SDavid Chinner * in at least shared mode and the inode flush completion must be 2390c63942d3SDavid Chinner * active as well. The inode lock will still be held upon return from 23911da177e4SLinus Torvalds * the call and the caller is free to unlock it. 2392c63942d3SDavid Chinner * The inode flush will be completed when the inode reaches the disk. 23931da177e4SLinus Torvalds * The flags indicate how the inode's buffer should be written out. 23941da177e4SLinus Torvalds */ 23951da177e4SLinus Torvalds int 23961da177e4SLinus Torvalds xfs_iflush( 23971da177e4SLinus Torvalds xfs_inode_t *ip, 23981da177e4SLinus Torvalds uint flags) 23991da177e4SLinus Torvalds { 24001da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 24011da177e4SLinus Torvalds xfs_buf_t *bp; 24021da177e4SLinus Torvalds xfs_dinode_t *dip; 24031da177e4SLinus Torvalds xfs_mount_t *mp; 24041da177e4SLinus Torvalds int error; 24051da177e4SLinus Torvalds 24061da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 24071da177e4SLinus Torvalds 2408579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2409c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 24101da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 24111da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 24121da177e4SLinus Torvalds 24131da177e4SLinus Torvalds iip = ip->i_itemp; 24141da177e4SLinus Torvalds mp = ip->i_mount; 24151da177e4SLinus Torvalds 24161da177e4SLinus Torvalds /* 2417a3f74ffbSDavid Chinner * We can't flush the inode until it is unpinned, so wait for it if we 2418a3f74ffbSDavid Chinner * are allowed to block. We know no one new can pin it, because we are 2419a3f74ffbSDavid Chinner * holding the inode lock shared and you need to hold it exclusively to 2420a3f74ffbSDavid Chinner * pin the inode. 2421a3f74ffbSDavid Chinner * 2422a3f74ffbSDavid Chinner * If we are not allowed to block, force the log out asynchronously so 2423a3f74ffbSDavid Chinner * that when we come back the inode will be unpinned. If other inodes 2424a3f74ffbSDavid Chinner * in the same cluster are dirty, they will probably write the inode 2425a3f74ffbSDavid Chinner * out for us if they occur after the log force completes. 24261da177e4SLinus Torvalds */ 2427c854363eSDave Chinner if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { 2428a3f74ffbSDavid Chinner xfs_iunpin_nowait(ip); 2429a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2430a3f74ffbSDavid Chinner return EAGAIN; 2431a3f74ffbSDavid Chinner } 24321da177e4SLinus Torvalds xfs_iunpin_wait(ip); 24331da177e4SLinus Torvalds 24341da177e4SLinus Torvalds /* 24354b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 24364b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 24374b6a4688SDave Chinner * xfs_itobp() below may give us a buffer that no longer contains 24384b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 24394b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 24404b6a4688SDave Chinner * flush call. 24414b6a4688SDave Chinner */ 24424b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 24434b6a4688SDave Chinner xfs_ifunlock(ip); 24444b6a4688SDave Chinner return 0; 24454b6a4688SDave Chinner } 24464b6a4688SDave Chinner 24474b6a4688SDave Chinner /* 24481da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 24491da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 24501da177e4SLinus Torvalds * to disk, because the log record didn't make it to disk! 24511da177e4SLinus Torvalds */ 24521da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 24531da177e4SLinus Torvalds ip->i_update_core = 0; 24541da177e4SLinus Torvalds if (iip) 24551da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 24561da177e4SLinus Torvalds xfs_ifunlock(ip); 24571da177e4SLinus Torvalds return XFS_ERROR(EIO); 24581da177e4SLinus Torvalds } 24591da177e4SLinus Torvalds 24601da177e4SLinus Torvalds /* 2461a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 2462a3f74ffbSDavid Chinner */ 246376d8b277SChristoph Hellwig error = xfs_itobp(mp, NULL, ip, &dip, &bp, 24641bfd8d04SDave Chinner (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); 2465a3f74ffbSDavid Chinner if (error || !bp) { 2466a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2467a3f74ffbSDavid Chinner return error; 2468a3f74ffbSDavid Chinner } 2469a3f74ffbSDavid Chinner 2470a3f74ffbSDavid Chinner /* 24711da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 24721da177e4SLinus Torvalds */ 24731da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 2474bad55843SDavid Chinner if (error) 24751da177e4SLinus Torvalds goto corrupt_out; 24761da177e4SLinus Torvalds 24771da177e4SLinus Torvalds /* 2478a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 2479a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 2480a3f74ffbSDavid Chinner */ 2481811e64c7SChandra Seetharaman if (xfs_buf_ispinned(bp)) 2482a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 2483a3f74ffbSDavid Chinner 2484a3f74ffbSDavid Chinner /* 24851da177e4SLinus Torvalds * inode clustering: 24861da177e4SLinus Torvalds * see if other inodes can be gathered into this write 24871da177e4SLinus Torvalds */ 2488bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 2489bad55843SDavid Chinner if (error) 24901da177e4SLinus Torvalds goto cluster_corrupt_out; 24911da177e4SLinus Torvalds 2492c854363eSDave Chinner if (flags & SYNC_WAIT) 2493c2b006c1SChristoph Hellwig error = xfs_bwrite(bp); 2494c2b006c1SChristoph Hellwig else 249561551f1eSChristoph Hellwig xfs_buf_delwri_queue(bp); 2496c2b006c1SChristoph Hellwig 249761551f1eSChristoph Hellwig xfs_buf_relse(bp); 24981da177e4SLinus Torvalds return error; 24991da177e4SLinus Torvalds 25001da177e4SLinus Torvalds corrupt_out: 25011da177e4SLinus Torvalds xfs_buf_relse(bp); 25027d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 25031da177e4SLinus Torvalds cluster_corrupt_out: 25041da177e4SLinus Torvalds /* 25051da177e4SLinus Torvalds * Unlocks the flush lock 25061da177e4SLinus Torvalds */ 2507bad55843SDavid Chinner xfs_iflush_abort(ip); 25081da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 25091da177e4SLinus Torvalds } 25101da177e4SLinus Torvalds 25111da177e4SLinus Torvalds 25121da177e4SLinus Torvalds STATIC int 25131da177e4SLinus Torvalds xfs_iflush_int( 25141da177e4SLinus Torvalds xfs_inode_t *ip, 25151da177e4SLinus Torvalds xfs_buf_t *bp) 25161da177e4SLinus Torvalds { 25171da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 25181da177e4SLinus Torvalds xfs_dinode_t *dip; 25191da177e4SLinus Torvalds xfs_mount_t *mp; 25201da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 25211da177e4SLinus Torvalds int first; 25221da177e4SLinus Torvalds #endif 25231da177e4SLinus Torvalds 2524579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2525c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 25261da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 25271da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 25281da177e4SLinus Torvalds 25291da177e4SLinus Torvalds iip = ip->i_itemp; 25301da177e4SLinus Torvalds mp = ip->i_mount; 25311da177e4SLinus Torvalds 25321da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 253392bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 25341da177e4SLinus Torvalds 25351da177e4SLinus Torvalds /* 25361da177e4SLinus Torvalds * Clear i_update_core before copying out the data. 25371da177e4SLinus Torvalds * This is for coordination with our timestamp updates 25381da177e4SLinus Torvalds * that don't hold the inode lock. They will always 25391da177e4SLinus Torvalds * update the timestamps BEFORE setting i_update_core, 25401da177e4SLinus Torvalds * so if we clear i_update_core after they set it we 25411da177e4SLinus Torvalds * are guaranteed to see their updates to the timestamps. 25421da177e4SLinus Torvalds * I believe that this depends on strongly ordered memory 25431da177e4SLinus Torvalds * semantics, but we have that. We use the SYNCHRONIZE 25441da177e4SLinus Torvalds * macro to make sure that the compiler does not reorder 25451da177e4SLinus Torvalds * the i_update_core access below the data copy below. 25461da177e4SLinus Torvalds */ 25471da177e4SLinus Torvalds ip->i_update_core = 0; 25481da177e4SLinus Torvalds SYNCHRONIZE(); 25491da177e4SLinus Torvalds 255042fe2b1fSChristoph Hellwig /* 2551f9581b14SChristoph Hellwig * Make sure to get the latest timestamps from the Linux inode. 255242fe2b1fSChristoph Hellwig */ 2553f9581b14SChristoph Hellwig xfs_synchronize_times(ip); 255442fe2b1fSChristoph Hellwig 255569ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 25561da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 25576a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 25586a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 25596a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 25601da177e4SLinus Torvalds goto corrupt_out; 25611da177e4SLinus Torvalds } 25621da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 25631da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 25646a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 25656a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 25666a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 25671da177e4SLinus Torvalds goto corrupt_out; 25681da177e4SLinus Torvalds } 2569abbede1bSAl Viro if (S_ISREG(ip->i_d.di_mode)) { 25701da177e4SLinus Torvalds if (XFS_TEST_ERROR( 25711da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 25721da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 25731da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 25746a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 25756a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 25766a19d939SDave Chinner __func__, ip->i_ino, ip); 25771da177e4SLinus Torvalds goto corrupt_out; 25781da177e4SLinus Torvalds } 2579abbede1bSAl Viro } else if (S_ISDIR(ip->i_d.di_mode)) { 25801da177e4SLinus Torvalds if (XFS_TEST_ERROR( 25811da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 25821da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 25831da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 25841da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 25856a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 25866a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 25876a19d939SDave Chinner __func__, ip->i_ino, ip); 25881da177e4SLinus Torvalds goto corrupt_out; 25891da177e4SLinus Torvalds } 25901da177e4SLinus Torvalds } 25911da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 25921da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 25931da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 25946a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 25956a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 25966a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 25976a19d939SDave Chinner __func__, ip->i_ino, 25981da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 25996a19d939SDave Chinner ip->i_d.di_nblocks, ip); 26001da177e4SLinus Torvalds goto corrupt_out; 26011da177e4SLinus Torvalds } 26021da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 26031da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 26046a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 26056a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 26066a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 26071da177e4SLinus Torvalds goto corrupt_out; 26081da177e4SLinus Torvalds } 26091da177e4SLinus Torvalds /* 26101da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 26111da177e4SLinus Torvalds * postdate a log record during recovery. 26121da177e4SLinus Torvalds */ 26131da177e4SLinus Torvalds 26141da177e4SLinus Torvalds ip->i_d.di_flushiter++; 26151da177e4SLinus Torvalds 26161da177e4SLinus Torvalds /* 26171da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 26181da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 26191da177e4SLinus Torvalds * because if the inode is dirty at all the core must 26201da177e4SLinus Torvalds * be. 26211da177e4SLinus Torvalds */ 262281591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 26231da177e4SLinus Torvalds 26241da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 26251da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 26261da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 26271da177e4SLinus Torvalds 26281da177e4SLinus Torvalds /* 26291da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 26301da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 26311da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 26321da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 26331da177e4SLinus Torvalds */ 263451ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 263551ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 263662118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 26371da177e4SLinus Torvalds /* 26381da177e4SLinus Torvalds * Convert it back. 26391da177e4SLinus Torvalds */ 26401da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 264181591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 26421da177e4SLinus Torvalds } else { 26431da177e4SLinus Torvalds /* 26441da177e4SLinus Torvalds * The superblock version has already been bumped, 26451da177e4SLinus Torvalds * so just make the conversion to the new inode 26461da177e4SLinus Torvalds * format permanent. 26471da177e4SLinus Torvalds */ 264851ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 264951ce16d5SChristoph Hellwig dip->di_version = 2; 26501da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 265181591fe2SChristoph Hellwig dip->di_onlink = 0; 26521da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 265381591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 265481591fe2SChristoph Hellwig sizeof(dip->di_pad)); 26556743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 26561da177e4SLinus Torvalds } 26571da177e4SLinus Torvalds } 26581da177e4SLinus Torvalds 2659e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 2660e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 2661e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 26621da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 26631da177e4SLinus Torvalds 26641da177e4SLinus Torvalds /* 26651da177e4SLinus Torvalds * We've recorded everything logged in the inode, so we'd 26661da177e4SLinus Torvalds * like to clear the ilf_fields bits so we don't log and 26671da177e4SLinus Torvalds * flush things unnecessarily. However, we can't stop 26681da177e4SLinus Torvalds * logging all this information until the data we've copied 26691da177e4SLinus Torvalds * into the disk buffer is written to disk. If we did we might 26701da177e4SLinus Torvalds * overwrite the copy of the inode in the log with all the 26711da177e4SLinus Torvalds * data after re-logging only part of it, and in the face of 26721da177e4SLinus Torvalds * a crash we wouldn't have all the data we need to recover. 26731da177e4SLinus Torvalds * 26741da177e4SLinus Torvalds * What we do is move the bits to the ili_last_fields field. 26751da177e4SLinus Torvalds * When logging the inode, these bits are moved back to the 26761da177e4SLinus Torvalds * ilf_fields field. In the xfs_iflush_done() routine we 26771da177e4SLinus Torvalds * clear ili_last_fields, since we know that the information 26781da177e4SLinus Torvalds * those bits represent is permanently on disk. As long as 26791da177e4SLinus Torvalds * the flush completes before the inode is logged again, then 26801da177e4SLinus Torvalds * both ilf_fields and ili_last_fields will be cleared. 26811da177e4SLinus Torvalds * 26821da177e4SLinus Torvalds * We can play with the ilf_fields bits here, because the inode 26831da177e4SLinus Torvalds * lock must be held exclusively in order to set bits there 26841da177e4SLinus Torvalds * and the flush lock protects the ili_last_fields bits. 26851da177e4SLinus Torvalds * Set ili_logged so the flush done 26861da177e4SLinus Torvalds * routine can tell whether or not to look in the AIL. 26871da177e4SLinus Torvalds * Also, store the current LSN of the inode so that we can tell 26881da177e4SLinus Torvalds * whether the item has moved in the AIL from xfs_iflush_done(). 26891da177e4SLinus Torvalds * In order to read the lsn we need the AIL lock, because 26901da177e4SLinus Torvalds * it is a 64 bit value that cannot be read atomically. 26911da177e4SLinus Torvalds */ 26921da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 26931da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 26941da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 26951da177e4SLinus Torvalds iip->ili_logged = 1; 26961da177e4SLinus Torvalds 26977b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 26987b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 26991da177e4SLinus Torvalds 27001da177e4SLinus Torvalds /* 27011da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 27021da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 27031da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 27041da177e4SLinus Torvalds * completely written to disk. 27051da177e4SLinus Torvalds */ 2706ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 27071da177e4SLinus Torvalds 2708adadbeefSChristoph Hellwig ASSERT(bp->b_fspriv != NULL); 2709cb669ca5SChristoph Hellwig ASSERT(bp->b_iodone != NULL); 27101da177e4SLinus Torvalds } else { 27111da177e4SLinus Torvalds /* 27121da177e4SLinus Torvalds * We're flushing an inode which is not in the AIL and has 27131da177e4SLinus Torvalds * not been logged but has i_update_core set. For this 27141da177e4SLinus Torvalds * case we can use a B_DELWRI flush and immediately drop 27151da177e4SLinus Torvalds * the inode flush lock because we can avoid the whole 27161da177e4SLinus Torvalds * AIL state thing. It's OK to drop the flush lock now, 27171da177e4SLinus Torvalds * because we've already locked the buffer and to do anything 27181da177e4SLinus Torvalds * you really need both. 27191da177e4SLinus Torvalds */ 27201da177e4SLinus Torvalds if (iip != NULL) { 27211da177e4SLinus Torvalds ASSERT(iip->ili_logged == 0); 27221da177e4SLinus Torvalds ASSERT(iip->ili_last_fields == 0); 27231da177e4SLinus Torvalds ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 27241da177e4SLinus Torvalds } 27251da177e4SLinus Torvalds xfs_ifunlock(ip); 27261da177e4SLinus Torvalds } 27271da177e4SLinus Torvalds 27281da177e4SLinus Torvalds return 0; 27291da177e4SLinus Torvalds 27301da177e4SLinus Torvalds corrupt_out: 27311da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 27321da177e4SLinus Torvalds } 27331da177e4SLinus Torvalds 27344dd2cb4aSChristoph Hellwig void 27354dd2cb4aSChristoph Hellwig xfs_promote_inode( 27364dd2cb4aSChristoph Hellwig struct xfs_inode *ip) 27374dd2cb4aSChristoph Hellwig { 27384dd2cb4aSChristoph Hellwig struct xfs_buf *bp; 27394dd2cb4aSChristoph Hellwig 27404dd2cb4aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 27414dd2cb4aSChristoph Hellwig 27424dd2cb4aSChristoph Hellwig bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno, 27434dd2cb4aSChristoph Hellwig ip->i_imap.im_len, XBF_TRYLOCK); 27444dd2cb4aSChristoph Hellwig if (!bp) 27454dd2cb4aSChristoph Hellwig return; 27464dd2cb4aSChristoph Hellwig 27474dd2cb4aSChristoph Hellwig if (XFS_BUF_ISDELAYWRITE(bp)) { 27484dd2cb4aSChristoph Hellwig xfs_buf_delwri_promote(bp); 27494dd2cb4aSChristoph Hellwig wake_up_process(ip->i_mount->m_ddev_targp->bt_task); 27504dd2cb4aSChristoph Hellwig } 27514dd2cb4aSChristoph Hellwig 27524dd2cb4aSChristoph Hellwig xfs_buf_relse(bp); 27534dd2cb4aSChristoph Hellwig } 27544dd2cb4aSChristoph Hellwig 27554eea22f0SMandy Kirkconnell /* 27564eea22f0SMandy Kirkconnell * Return a pointer to the extent record at file index idx. 27574eea22f0SMandy Kirkconnell */ 2758a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * 27594eea22f0SMandy Kirkconnell xfs_iext_get_ext( 27604eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 27614eea22f0SMandy Kirkconnell xfs_extnum_t idx) /* index of target extent */ 27624eea22f0SMandy Kirkconnell { 27634eea22f0SMandy Kirkconnell ASSERT(idx >= 0); 276487bef181SChristoph Hellwig ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 276587bef181SChristoph Hellwig 27660293ce3aSMandy Kirkconnell if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 27670293ce3aSMandy Kirkconnell return ifp->if_u1.if_ext_irec->er_extbuf; 27680293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 27690293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* irec pointer */ 27700293ce3aSMandy Kirkconnell int erp_idx = 0; /* irec index */ 27710293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = idx; /* ext index in target list */ 27720293ce3aSMandy Kirkconnell 27730293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 27740293ce3aSMandy Kirkconnell return &erp->er_extbuf[page_idx]; 27750293ce3aSMandy Kirkconnell } else if (ifp->if_bytes) { 27764eea22f0SMandy Kirkconnell return &ifp->if_u1.if_extents[idx]; 27774eea22f0SMandy Kirkconnell } else { 27784eea22f0SMandy Kirkconnell return NULL; 27794eea22f0SMandy Kirkconnell } 27804eea22f0SMandy Kirkconnell } 27814eea22f0SMandy Kirkconnell 27824eea22f0SMandy Kirkconnell /* 27834eea22f0SMandy Kirkconnell * Insert new item(s) into the extent records for incore inode 27844eea22f0SMandy Kirkconnell * fork 'ifp'. 'count' new items are inserted at index 'idx'. 27854eea22f0SMandy Kirkconnell */ 27864eea22f0SMandy Kirkconnell void 27874eea22f0SMandy Kirkconnell xfs_iext_insert( 27886ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 27894eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* starting index of new items */ 27904eea22f0SMandy Kirkconnell xfs_extnum_t count, /* number of inserted items */ 27916ef35544SChristoph Hellwig xfs_bmbt_irec_t *new, /* items to insert */ 27926ef35544SChristoph Hellwig int state) /* type of extent conversion */ 27934eea22f0SMandy Kirkconnell { 27946ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 27954eea22f0SMandy Kirkconnell xfs_extnum_t i; /* extent record index */ 27964eea22f0SMandy Kirkconnell 27970b1b213fSChristoph Hellwig trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); 27980b1b213fSChristoph Hellwig 27994eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 28004eea22f0SMandy Kirkconnell xfs_iext_add(ifp, idx, count); 2801a6f64d4aSChristoph Hellwig for (i = idx; i < idx + count; i++, new++) 2802a6f64d4aSChristoph Hellwig xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 28034eea22f0SMandy Kirkconnell } 28044eea22f0SMandy Kirkconnell 28054eea22f0SMandy Kirkconnell /* 28064eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 28074eea22f0SMandy Kirkconnell * extents needs to be increased. The ext_diff parameter stores the 28084eea22f0SMandy Kirkconnell * number of new extents being added and the idx parameter contains 28094eea22f0SMandy Kirkconnell * the extent index where the new extents will be added. If the new 28104eea22f0SMandy Kirkconnell * extents are being appended, then we just need to (re)allocate and 28114eea22f0SMandy Kirkconnell * initialize the space. Otherwise, if the new extents are being 28124eea22f0SMandy Kirkconnell * inserted into the middle of the existing entries, a bit more work 28134eea22f0SMandy Kirkconnell * is required to make room for the new extents to be inserted. The 28144eea22f0SMandy Kirkconnell * caller is responsible for filling in the new extent entries upon 28154eea22f0SMandy Kirkconnell * return. 28164eea22f0SMandy Kirkconnell */ 28174eea22f0SMandy Kirkconnell void 28184eea22f0SMandy Kirkconnell xfs_iext_add( 28194eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 28204eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin adding exts */ 2821c41564b5SNathan Scott int ext_diff) /* number of extents to add */ 28224eea22f0SMandy Kirkconnell { 28234eea22f0SMandy Kirkconnell int byte_diff; /* new bytes being added */ 28244eea22f0SMandy Kirkconnell int new_size; /* size of extents after adding */ 28254eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 28264eea22f0SMandy Kirkconnell 28274eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 28284eea22f0SMandy Kirkconnell ASSERT((idx >= 0) && (idx <= nextents)); 28294eea22f0SMandy Kirkconnell byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 28304eea22f0SMandy Kirkconnell new_size = ifp->if_bytes + byte_diff; 28314eea22f0SMandy Kirkconnell /* 28324eea22f0SMandy Kirkconnell * If the new number of extents (nextents + ext_diff) 28334eea22f0SMandy Kirkconnell * fits inside the inode, then continue to use the inline 28344eea22f0SMandy Kirkconnell * extent buffer. 28354eea22f0SMandy Kirkconnell */ 28364eea22f0SMandy Kirkconnell if (nextents + ext_diff <= XFS_INLINE_EXTS) { 28374eea22f0SMandy Kirkconnell if (idx < nextents) { 28384eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 28394eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx], 28404eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 28414eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 28424eea22f0SMandy Kirkconnell } 28434eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 28444eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 28454eea22f0SMandy Kirkconnell } 28464eea22f0SMandy Kirkconnell /* 28474eea22f0SMandy Kirkconnell * Otherwise use a linear (direct) extent list. 28484eea22f0SMandy Kirkconnell * If the extents are currently inside the inode, 28494eea22f0SMandy Kirkconnell * xfs_iext_realloc_direct will switch us from 28504eea22f0SMandy Kirkconnell * inline to direct extent allocation mode. 28514eea22f0SMandy Kirkconnell */ 28520293ce3aSMandy Kirkconnell else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 28534eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 28544eea22f0SMandy Kirkconnell if (idx < nextents) { 28554eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx + ext_diff], 28564eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx], 28574eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 28584eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 28594eea22f0SMandy Kirkconnell } 28604eea22f0SMandy Kirkconnell } 28610293ce3aSMandy Kirkconnell /* Indirection array */ 28620293ce3aSMandy Kirkconnell else { 28630293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; 28640293ce3aSMandy Kirkconnell int erp_idx = 0; 28650293ce3aSMandy Kirkconnell int page_idx = idx; 28660293ce3aSMandy Kirkconnell 28670293ce3aSMandy Kirkconnell ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 28680293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 28690293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 28700293ce3aSMandy Kirkconnell } else { 28710293ce3aSMandy Kirkconnell xfs_iext_irec_init(ifp); 28720293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 28730293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 28740293ce3aSMandy Kirkconnell } 28750293ce3aSMandy Kirkconnell /* Extents fit in target extent page */ 28760293ce3aSMandy Kirkconnell if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 28770293ce3aSMandy Kirkconnell if (page_idx < erp->er_extcount) { 28780293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[page_idx + ext_diff], 28790293ce3aSMandy Kirkconnell &erp->er_extbuf[page_idx], 28800293ce3aSMandy Kirkconnell (erp->er_extcount - page_idx) * 28810293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 28820293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[page_idx], 0, byte_diff); 28830293ce3aSMandy Kirkconnell } 28840293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 28850293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 28860293ce3aSMandy Kirkconnell } 28870293ce3aSMandy Kirkconnell /* Insert a new extent page */ 28880293ce3aSMandy Kirkconnell else if (erp) { 28890293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(ifp, 28900293ce3aSMandy Kirkconnell erp_idx, page_idx, ext_diff); 28910293ce3aSMandy Kirkconnell } 28920293ce3aSMandy Kirkconnell /* 28930293ce3aSMandy Kirkconnell * If extent(s) are being appended to the last page in 28940293ce3aSMandy Kirkconnell * the indirection array and the new extent(s) don't fit 28950293ce3aSMandy Kirkconnell * in the page, then erp is NULL and erp_idx is set to 28960293ce3aSMandy Kirkconnell * the next index needed in the indirection array. 28970293ce3aSMandy Kirkconnell */ 28980293ce3aSMandy Kirkconnell else { 28990293ce3aSMandy Kirkconnell int count = ext_diff; 29000293ce3aSMandy Kirkconnell 29010293ce3aSMandy Kirkconnell while (count) { 29020293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 29030293ce3aSMandy Kirkconnell erp->er_extcount = count; 29040293ce3aSMandy Kirkconnell count -= MIN(count, (int)XFS_LINEAR_EXTS); 29050293ce3aSMandy Kirkconnell if (count) { 29060293ce3aSMandy Kirkconnell erp_idx++; 29070293ce3aSMandy Kirkconnell } 29080293ce3aSMandy Kirkconnell } 29090293ce3aSMandy Kirkconnell } 29100293ce3aSMandy Kirkconnell } 29114eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 29124eea22f0SMandy Kirkconnell } 29134eea22f0SMandy Kirkconnell 29144eea22f0SMandy Kirkconnell /* 29150293ce3aSMandy Kirkconnell * This is called when incore extents are being added to the indirection 29160293ce3aSMandy Kirkconnell * array and the new extents do not fit in the target extent list. The 29170293ce3aSMandy Kirkconnell * erp_idx parameter contains the irec index for the target extent list 29180293ce3aSMandy Kirkconnell * in the indirection array, and the idx parameter contains the extent 29190293ce3aSMandy Kirkconnell * index within the list. The number of extents being added is stored 29200293ce3aSMandy Kirkconnell * in the count parameter. 29210293ce3aSMandy Kirkconnell * 29220293ce3aSMandy Kirkconnell * |-------| |-------| 29230293ce3aSMandy Kirkconnell * | | | | idx - number of extents before idx 29240293ce3aSMandy Kirkconnell * | idx | | count | 29250293ce3aSMandy Kirkconnell * | | | | count - number of extents being inserted at idx 29260293ce3aSMandy Kirkconnell * |-------| |-------| 29270293ce3aSMandy Kirkconnell * | count | | nex2 | nex2 - number of extents after idx + count 29280293ce3aSMandy Kirkconnell * |-------| |-------| 29290293ce3aSMandy Kirkconnell */ 29300293ce3aSMandy Kirkconnell void 29310293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi( 29320293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 29330293ce3aSMandy Kirkconnell int erp_idx, /* target extent irec index */ 29340293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index within target list */ 29350293ce3aSMandy Kirkconnell int count) /* new extents being added */ 29360293ce3aSMandy Kirkconnell { 29370293ce3aSMandy Kirkconnell int byte_diff; /* new bytes being added */ 29380293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* pointer to irec entry */ 29390293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* number of extents to add */ 29400293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* new extents still needed */ 29410293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 29420293ce3aSMandy Kirkconnell xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 29430293ce3aSMandy Kirkconnell int nlists; /* number of irec's (lists) */ 29440293ce3aSMandy Kirkconnell 29450293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 29460293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 29470293ce3aSMandy Kirkconnell nex2 = erp->er_extcount - idx; 29480293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 29490293ce3aSMandy Kirkconnell 29500293ce3aSMandy Kirkconnell /* 29510293ce3aSMandy Kirkconnell * Save second part of target extent list 29520293ce3aSMandy Kirkconnell * (all extents past */ 29530293ce3aSMandy Kirkconnell if (nex2) { 29540293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 29556785073bSDavid Chinner nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 29560293ce3aSMandy Kirkconnell memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 29570293ce3aSMandy Kirkconnell erp->er_extcount -= nex2; 29580293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 29590293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[idx], 0, byte_diff); 29600293ce3aSMandy Kirkconnell } 29610293ce3aSMandy Kirkconnell 29620293ce3aSMandy Kirkconnell /* 29630293ce3aSMandy Kirkconnell * Add the new extents to the end of the target 29640293ce3aSMandy Kirkconnell * list, then allocate new irec record(s) and 29650293ce3aSMandy Kirkconnell * extent buffer(s) as needed to store the rest 29660293ce3aSMandy Kirkconnell * of the new extents. 29670293ce3aSMandy Kirkconnell */ 29680293ce3aSMandy Kirkconnell ext_cnt = count; 29690293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 29700293ce3aSMandy Kirkconnell if (ext_diff) { 29710293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 29720293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 29730293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 29740293ce3aSMandy Kirkconnell } 29750293ce3aSMandy Kirkconnell while (ext_cnt) { 29760293ce3aSMandy Kirkconnell erp_idx++; 29770293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 29780293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 29790293ce3aSMandy Kirkconnell erp->er_extcount = ext_diff; 29800293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 29810293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 29820293ce3aSMandy Kirkconnell } 29830293ce3aSMandy Kirkconnell 29840293ce3aSMandy Kirkconnell /* Add nex2 extents back to indirection array */ 29850293ce3aSMandy Kirkconnell if (nex2) { 29860293ce3aSMandy Kirkconnell xfs_extnum_t ext_avail; 29870293ce3aSMandy Kirkconnell int i; 29880293ce3aSMandy Kirkconnell 29890293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 29900293ce3aSMandy Kirkconnell ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 29910293ce3aSMandy Kirkconnell i = 0; 29920293ce3aSMandy Kirkconnell /* 29930293ce3aSMandy Kirkconnell * If nex2 extents fit in the current page, append 29940293ce3aSMandy Kirkconnell * nex2_ep after the new extents. 29950293ce3aSMandy Kirkconnell */ 29960293ce3aSMandy Kirkconnell if (nex2 <= ext_avail) { 29970293ce3aSMandy Kirkconnell i = erp->er_extcount; 29980293ce3aSMandy Kirkconnell } 29990293ce3aSMandy Kirkconnell /* 30000293ce3aSMandy Kirkconnell * Otherwise, check if space is available in the 30010293ce3aSMandy Kirkconnell * next page. 30020293ce3aSMandy Kirkconnell */ 30030293ce3aSMandy Kirkconnell else if ((erp_idx < nlists - 1) && 30040293ce3aSMandy Kirkconnell (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 30050293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 30060293ce3aSMandy Kirkconnell erp_idx++; 30070293ce3aSMandy Kirkconnell erp++; 30080293ce3aSMandy Kirkconnell /* Create a hole for nex2 extents */ 30090293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 30100293ce3aSMandy Kirkconnell erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 30110293ce3aSMandy Kirkconnell } 30120293ce3aSMandy Kirkconnell /* 30130293ce3aSMandy Kirkconnell * Final choice, create a new extent page for 30140293ce3aSMandy Kirkconnell * nex2 extents. 30150293ce3aSMandy Kirkconnell */ 30160293ce3aSMandy Kirkconnell else { 30170293ce3aSMandy Kirkconnell erp_idx++; 30180293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 30190293ce3aSMandy Kirkconnell } 30200293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3021f0e2d93cSDenys Vlasenko kmem_free(nex2_ep); 30220293ce3aSMandy Kirkconnell erp->er_extcount += nex2; 30230293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 30240293ce3aSMandy Kirkconnell } 30250293ce3aSMandy Kirkconnell } 30260293ce3aSMandy Kirkconnell 30270293ce3aSMandy Kirkconnell /* 30284eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 30294eea22f0SMandy Kirkconnell * extents needs to be decreased. The ext_diff parameter stores the 30304eea22f0SMandy Kirkconnell * number of extents to be removed and the idx parameter contains 30314eea22f0SMandy Kirkconnell * the extent index where the extents will be removed from. 30320293ce3aSMandy Kirkconnell * 30330293ce3aSMandy Kirkconnell * If the amount of space needed has decreased below the linear 30340293ce3aSMandy Kirkconnell * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 30350293ce3aSMandy Kirkconnell * extent array. Otherwise, use kmem_realloc() to adjust the 30360293ce3aSMandy Kirkconnell * size to what is needed. 30374eea22f0SMandy Kirkconnell */ 30384eea22f0SMandy Kirkconnell void 30394eea22f0SMandy Kirkconnell xfs_iext_remove( 30406ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 30414eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 30426ef35544SChristoph Hellwig int ext_diff, /* number of extents to remove */ 30436ef35544SChristoph Hellwig int state) /* type of extent conversion */ 30444eea22f0SMandy Kirkconnell { 30456ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 30464eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 30474eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 30484eea22f0SMandy Kirkconnell 30490b1b213fSChristoph Hellwig trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 30500b1b213fSChristoph Hellwig 30514eea22f0SMandy Kirkconnell ASSERT(ext_diff > 0); 30524eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 30534eea22f0SMandy Kirkconnell new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 30544eea22f0SMandy Kirkconnell 30554eea22f0SMandy Kirkconnell if (new_size == 0) { 30564eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 30570293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 30580293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(ifp, idx, ext_diff); 30594eea22f0SMandy Kirkconnell } else if (ifp->if_real_bytes) { 30604eea22f0SMandy Kirkconnell xfs_iext_remove_direct(ifp, idx, ext_diff); 30614eea22f0SMandy Kirkconnell } else { 30624eea22f0SMandy Kirkconnell xfs_iext_remove_inline(ifp, idx, ext_diff); 30634eea22f0SMandy Kirkconnell } 30644eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 30654eea22f0SMandy Kirkconnell } 30664eea22f0SMandy Kirkconnell 30674eea22f0SMandy Kirkconnell /* 30684eea22f0SMandy Kirkconnell * This removes ext_diff extents from the inline buffer, beginning 30694eea22f0SMandy Kirkconnell * at extent index idx. 30704eea22f0SMandy Kirkconnell */ 30714eea22f0SMandy Kirkconnell void 30724eea22f0SMandy Kirkconnell xfs_iext_remove_inline( 30734eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 30744eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 30754eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 30764eea22f0SMandy Kirkconnell { 30774eea22f0SMandy Kirkconnell int nextents; /* number of extents in file */ 30784eea22f0SMandy Kirkconnell 30790293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 30804eea22f0SMandy Kirkconnell ASSERT(idx < XFS_INLINE_EXTS); 30814eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 30824eea22f0SMandy Kirkconnell ASSERT(((nextents - ext_diff) > 0) && 30834eea22f0SMandy Kirkconnell (nextents - ext_diff) < XFS_INLINE_EXTS); 30844eea22f0SMandy Kirkconnell 30854eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 30864eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx], 30874eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx + ext_diff], 30884eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 30894eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 30904eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 30914eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 30924eea22f0SMandy Kirkconnell } else { 30934eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, 30944eea22f0SMandy Kirkconnell ext_diff * sizeof(xfs_bmbt_rec_t)); 30954eea22f0SMandy Kirkconnell } 30964eea22f0SMandy Kirkconnell } 30974eea22f0SMandy Kirkconnell 30984eea22f0SMandy Kirkconnell /* 30994eea22f0SMandy Kirkconnell * This removes ext_diff extents from a linear (direct) extent list, 31004eea22f0SMandy Kirkconnell * beginning at extent index idx. If the extents are being removed 31014eea22f0SMandy Kirkconnell * from the end of the list (ie. truncate) then we just need to re- 31024eea22f0SMandy Kirkconnell * allocate the list to remove the extra space. Otherwise, if the 31034eea22f0SMandy Kirkconnell * extents are being removed from the middle of the existing extent 31044eea22f0SMandy Kirkconnell * entries, then we first need to move the extent records beginning 31054eea22f0SMandy Kirkconnell * at idx + ext_diff up in the list to overwrite the records being 31064eea22f0SMandy Kirkconnell * removed, then remove the extra space via kmem_realloc. 31074eea22f0SMandy Kirkconnell */ 31084eea22f0SMandy Kirkconnell void 31094eea22f0SMandy Kirkconnell xfs_iext_remove_direct( 31104eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 31114eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 31124eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 31134eea22f0SMandy Kirkconnell { 31144eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 31154eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 31164eea22f0SMandy Kirkconnell 31170293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 31184eea22f0SMandy Kirkconnell new_size = ifp->if_bytes - 31194eea22f0SMandy Kirkconnell (ext_diff * sizeof(xfs_bmbt_rec_t)); 31204eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 31214eea22f0SMandy Kirkconnell 31224eea22f0SMandy Kirkconnell if (new_size == 0) { 31234eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 31244eea22f0SMandy Kirkconnell return; 31254eea22f0SMandy Kirkconnell } 31264eea22f0SMandy Kirkconnell /* Move extents up in the list (if needed) */ 31274eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 31284eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx], 31294eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx + ext_diff], 31304eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 31314eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 31324eea22f0SMandy Kirkconnell } 31334eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[nextents - ext_diff], 31344eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 31354eea22f0SMandy Kirkconnell /* 31364eea22f0SMandy Kirkconnell * Reallocate the direct extent list. If the extents 31374eea22f0SMandy Kirkconnell * will fit inside the inode then xfs_iext_realloc_direct 31384eea22f0SMandy Kirkconnell * will switch from direct to inline extent allocation 31394eea22f0SMandy Kirkconnell * mode for us. 31404eea22f0SMandy Kirkconnell */ 31414eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 31424eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 31434eea22f0SMandy Kirkconnell } 31444eea22f0SMandy Kirkconnell 31454eea22f0SMandy Kirkconnell /* 31460293ce3aSMandy Kirkconnell * This is called when incore extents are being removed from the 31470293ce3aSMandy Kirkconnell * indirection array and the extents being removed span multiple extent 31480293ce3aSMandy Kirkconnell * buffers. The idx parameter contains the file extent index where we 31490293ce3aSMandy Kirkconnell * want to begin removing extents, and the count parameter contains 31500293ce3aSMandy Kirkconnell * how many extents need to be removed. 31510293ce3aSMandy Kirkconnell * 31520293ce3aSMandy Kirkconnell * |-------| |-------| 31530293ce3aSMandy Kirkconnell * | nex1 | | | nex1 - number of extents before idx 31540293ce3aSMandy Kirkconnell * |-------| | count | 31550293ce3aSMandy Kirkconnell * | | | | count - number of extents being removed at idx 31560293ce3aSMandy Kirkconnell * | count | |-------| 31570293ce3aSMandy Kirkconnell * | | | nex2 | nex2 - number of extents after idx + count 31580293ce3aSMandy Kirkconnell * |-------| |-------| 31590293ce3aSMandy Kirkconnell */ 31600293ce3aSMandy Kirkconnell void 31610293ce3aSMandy Kirkconnell xfs_iext_remove_indirect( 31620293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 31630293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing extents */ 31640293ce3aSMandy Kirkconnell int count) /* number of extents to remove */ 31650293ce3aSMandy Kirkconnell { 31660293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 31670293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 31680293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* extents left to remove */ 31690293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* extents to remove in current list */ 31700293ce3aSMandy Kirkconnell xfs_extnum_t nex1; /* number of extents before idx */ 31710293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 31720293ce3aSMandy Kirkconnell int page_idx = idx; /* index in target extent list */ 31730293ce3aSMandy Kirkconnell 31740293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 31750293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 31760293ce3aSMandy Kirkconnell ASSERT(erp != NULL); 31770293ce3aSMandy Kirkconnell nex1 = page_idx; 31780293ce3aSMandy Kirkconnell ext_cnt = count; 31790293ce3aSMandy Kirkconnell while (ext_cnt) { 31800293ce3aSMandy Kirkconnell nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 31810293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 31820293ce3aSMandy Kirkconnell /* 31830293ce3aSMandy Kirkconnell * Check for deletion of entire list; 31840293ce3aSMandy Kirkconnell * xfs_iext_irec_remove() updates extent offsets. 31850293ce3aSMandy Kirkconnell */ 31860293ce3aSMandy Kirkconnell if (ext_diff == erp->er_extcount) { 31870293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 31880293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 31890293ce3aSMandy Kirkconnell nex1 = 0; 31900293ce3aSMandy Kirkconnell if (ext_cnt) { 31910293ce3aSMandy Kirkconnell ASSERT(erp_idx < ifp->if_real_bytes / 31920293ce3aSMandy Kirkconnell XFS_IEXT_BUFSZ); 31930293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 31940293ce3aSMandy Kirkconnell nex1 = 0; 31950293ce3aSMandy Kirkconnell continue; 31960293ce3aSMandy Kirkconnell } else { 31970293ce3aSMandy Kirkconnell break; 31980293ce3aSMandy Kirkconnell } 31990293ce3aSMandy Kirkconnell } 32000293ce3aSMandy Kirkconnell /* Move extents up (if needed) */ 32010293ce3aSMandy Kirkconnell if (nex2) { 32020293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex1], 32030293ce3aSMandy Kirkconnell &erp->er_extbuf[nex1 + ext_diff], 32040293ce3aSMandy Kirkconnell nex2 * sizeof(xfs_bmbt_rec_t)); 32050293ce3aSMandy Kirkconnell } 32060293ce3aSMandy Kirkconnell /* Zero out rest of page */ 32070293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 32080293ce3aSMandy Kirkconnell ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 32090293ce3aSMandy Kirkconnell /* Update remaining counters */ 32100293ce3aSMandy Kirkconnell erp->er_extcount -= ext_diff; 32110293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 32120293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 32130293ce3aSMandy Kirkconnell nex1 = 0; 32140293ce3aSMandy Kirkconnell erp_idx++; 32150293ce3aSMandy Kirkconnell erp++; 32160293ce3aSMandy Kirkconnell } 32170293ce3aSMandy Kirkconnell ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 32180293ce3aSMandy Kirkconnell xfs_iext_irec_compact(ifp); 32190293ce3aSMandy Kirkconnell } 32200293ce3aSMandy Kirkconnell 32210293ce3aSMandy Kirkconnell /* 32224eea22f0SMandy Kirkconnell * Create, destroy, or resize a linear (direct) block of extents. 32234eea22f0SMandy Kirkconnell */ 32244eea22f0SMandy Kirkconnell void 32254eea22f0SMandy Kirkconnell xfs_iext_realloc_direct( 32264eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32274eea22f0SMandy Kirkconnell int new_size) /* new size of extents */ 32284eea22f0SMandy Kirkconnell { 32294eea22f0SMandy Kirkconnell int rnew_size; /* real new size of extents */ 32304eea22f0SMandy Kirkconnell 32314eea22f0SMandy Kirkconnell rnew_size = new_size; 32324eea22f0SMandy Kirkconnell 32330293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 32340293ce3aSMandy Kirkconnell ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 32350293ce3aSMandy Kirkconnell (new_size != ifp->if_real_bytes))); 32360293ce3aSMandy Kirkconnell 32374eea22f0SMandy Kirkconnell /* Free extent records */ 32384eea22f0SMandy Kirkconnell if (new_size == 0) { 32394eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 32404eea22f0SMandy Kirkconnell } 32414eea22f0SMandy Kirkconnell /* Resize direct extent list and zero any new bytes */ 32424eea22f0SMandy Kirkconnell else if (ifp->if_real_bytes) { 32434eea22f0SMandy Kirkconnell /* Check if extents will fit inside the inode */ 32444eea22f0SMandy Kirkconnell if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 32454eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(ifp, new_size / 32464eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)); 32474eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 32484eea22f0SMandy Kirkconnell return; 32494eea22f0SMandy Kirkconnell } 325016a087d8SVignesh Babu if (!is_power_of_2(new_size)){ 325140ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 32524eea22f0SMandy Kirkconnell } 32534eea22f0SMandy Kirkconnell if (rnew_size != ifp->if_real_bytes) { 3254a6f64d4aSChristoph Hellwig ifp->if_u1.if_extents = 32554eea22f0SMandy Kirkconnell kmem_realloc(ifp->if_u1.if_extents, 32564eea22f0SMandy Kirkconnell rnew_size, 32576785073bSDavid Chinner ifp->if_real_bytes, KM_NOFS); 32584eea22f0SMandy Kirkconnell } 32594eea22f0SMandy Kirkconnell if (rnew_size > ifp->if_real_bytes) { 32604eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[ifp->if_bytes / 32614eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)], 0, 32624eea22f0SMandy Kirkconnell rnew_size - ifp->if_real_bytes); 32634eea22f0SMandy Kirkconnell } 32644eea22f0SMandy Kirkconnell } 32654eea22f0SMandy Kirkconnell /* 32664eea22f0SMandy Kirkconnell * Switch from the inline extent buffer to a direct 32674eea22f0SMandy Kirkconnell * extent list. Be sure to include the inline extent 32684eea22f0SMandy Kirkconnell * bytes in new_size. 32694eea22f0SMandy Kirkconnell */ 32704eea22f0SMandy Kirkconnell else { 32714eea22f0SMandy Kirkconnell new_size += ifp->if_bytes; 327216a087d8SVignesh Babu if (!is_power_of_2(new_size)) { 327340ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 32744eea22f0SMandy Kirkconnell } 32754eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(ifp, rnew_size); 32764eea22f0SMandy Kirkconnell } 32774eea22f0SMandy Kirkconnell ifp->if_real_bytes = rnew_size; 32784eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 32794eea22f0SMandy Kirkconnell } 32804eea22f0SMandy Kirkconnell 32814eea22f0SMandy Kirkconnell /* 32824eea22f0SMandy Kirkconnell * Switch from linear (direct) extent records to inline buffer. 32834eea22f0SMandy Kirkconnell */ 32844eea22f0SMandy Kirkconnell void 32854eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline( 32864eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32874eea22f0SMandy Kirkconnell xfs_extnum_t nextents) /* number of extents in file */ 32884eea22f0SMandy Kirkconnell { 32894eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 32904eea22f0SMandy Kirkconnell ASSERT(nextents <= XFS_INLINE_EXTS); 32914eea22f0SMandy Kirkconnell /* 32924eea22f0SMandy Kirkconnell * The inline buffer was zeroed when we switched 32934eea22f0SMandy Kirkconnell * from inline to direct extent allocation mode, 32944eea22f0SMandy Kirkconnell * so we don't need to clear it here. 32954eea22f0SMandy Kirkconnell */ 32964eea22f0SMandy Kirkconnell memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 32974eea22f0SMandy Kirkconnell nextents * sizeof(xfs_bmbt_rec_t)); 3298f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 32994eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 33004eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 33014eea22f0SMandy Kirkconnell } 33024eea22f0SMandy Kirkconnell 33034eea22f0SMandy Kirkconnell /* 33044eea22f0SMandy Kirkconnell * Switch from inline buffer to linear (direct) extent records. 33054eea22f0SMandy Kirkconnell * new_size should already be rounded up to the next power of 2 33064eea22f0SMandy Kirkconnell * by the caller (when appropriate), so use new_size as it is. 33074eea22f0SMandy Kirkconnell * However, since new_size may be rounded up, we can't update 33084eea22f0SMandy Kirkconnell * if_bytes here. It is the caller's responsibility to update 33094eea22f0SMandy Kirkconnell * if_bytes upon return. 33104eea22f0SMandy Kirkconnell */ 33114eea22f0SMandy Kirkconnell void 33124eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct( 33134eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33144eea22f0SMandy Kirkconnell int new_size) /* number of extents in file */ 33154eea22f0SMandy Kirkconnell { 33166785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 33174eea22f0SMandy Kirkconnell memset(ifp->if_u1.if_extents, 0, new_size); 33184eea22f0SMandy Kirkconnell if (ifp->if_bytes) { 33194eea22f0SMandy Kirkconnell memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 33204eea22f0SMandy Kirkconnell ifp->if_bytes); 33214eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 33224eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 33234eea22f0SMandy Kirkconnell } 33244eea22f0SMandy Kirkconnell ifp->if_real_bytes = new_size; 33254eea22f0SMandy Kirkconnell } 33264eea22f0SMandy Kirkconnell 33274eea22f0SMandy Kirkconnell /* 33280293ce3aSMandy Kirkconnell * Resize an extent indirection array to new_size bytes. 33290293ce3aSMandy Kirkconnell */ 3330d96f8f89SEric Sandeen STATIC void 33310293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect( 33320293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33330293ce3aSMandy Kirkconnell int new_size) /* new indirection array size */ 33340293ce3aSMandy Kirkconnell { 33350293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 33360293ce3aSMandy Kirkconnell int size; /* current indirection array size */ 33370293ce3aSMandy Kirkconnell 33380293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 33390293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 33400293ce3aSMandy Kirkconnell size = nlists * sizeof(xfs_ext_irec_t); 33410293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes); 33420293ce3aSMandy Kirkconnell ASSERT((new_size >= 0) && (new_size != size)); 33430293ce3aSMandy Kirkconnell if (new_size == 0) { 33440293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 33450293ce3aSMandy Kirkconnell } else { 33460293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 33470293ce3aSMandy Kirkconnell kmem_realloc(ifp->if_u1.if_ext_irec, 33486785073bSDavid Chinner new_size, size, KM_NOFS); 33490293ce3aSMandy Kirkconnell } 33500293ce3aSMandy Kirkconnell } 33510293ce3aSMandy Kirkconnell 33520293ce3aSMandy Kirkconnell /* 33530293ce3aSMandy Kirkconnell * Switch from indirection array to linear (direct) extent allocations. 33540293ce3aSMandy Kirkconnell */ 3355d96f8f89SEric Sandeen STATIC void 33560293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct( 33570293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 33580293ce3aSMandy Kirkconnell { 3359a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 33600293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 33610293ce3aSMandy Kirkconnell int size; /* size of file extents */ 33620293ce3aSMandy Kirkconnell 33630293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 33640293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 33650293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 33660293ce3aSMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 33670293ce3aSMandy Kirkconnell 336871a8c87fSLachlan McIlroy xfs_iext_irec_compact_pages(ifp); 33690293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 33700293ce3aSMandy Kirkconnell 33710293ce3aSMandy Kirkconnell ep = ifp->if_u1.if_ext_irec->er_extbuf; 3372f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 33730293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 33740293ce3aSMandy Kirkconnell ifp->if_u1.if_extents = ep; 33750293ce3aSMandy Kirkconnell ifp->if_bytes = size; 33760293ce3aSMandy Kirkconnell if (nextents < XFS_LINEAR_EXTS) { 33770293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, size); 33780293ce3aSMandy Kirkconnell } 33790293ce3aSMandy Kirkconnell } 33800293ce3aSMandy Kirkconnell 33810293ce3aSMandy Kirkconnell /* 33824eea22f0SMandy Kirkconnell * Free incore file extents. 33834eea22f0SMandy Kirkconnell */ 33844eea22f0SMandy Kirkconnell void 33854eea22f0SMandy Kirkconnell xfs_iext_destroy( 33864eea22f0SMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 33874eea22f0SMandy Kirkconnell { 33880293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 33890293ce3aSMandy Kirkconnell int erp_idx; 33900293ce3aSMandy Kirkconnell int nlists; 33910293ce3aSMandy Kirkconnell 33920293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 33930293ce3aSMandy Kirkconnell for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 33940293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 33950293ce3aSMandy Kirkconnell } 33960293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 33970293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes) { 3398f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 33994eea22f0SMandy Kirkconnell } else if (ifp->if_bytes) { 34004eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 34014eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 34024eea22f0SMandy Kirkconnell } 34034eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = NULL; 34044eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 34054eea22f0SMandy Kirkconnell ifp->if_bytes = 0; 34064eea22f0SMandy Kirkconnell } 34070293ce3aSMandy Kirkconnell 34080293ce3aSMandy Kirkconnell /* 34098867bc9bSMandy Kirkconnell * Return a pointer to the extent record for file system block bno. 34108867bc9bSMandy Kirkconnell */ 3411a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * /* pointer to found extent record */ 34128867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext( 34138867bc9bSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34148867bc9bSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 34158867bc9bSMandy Kirkconnell xfs_extnum_t *idxp) /* index of target extent */ 34168867bc9bSMandy Kirkconnell { 3417a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 34188867bc9bSMandy Kirkconnell xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 3419a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 34208867bc9bSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 3421c41564b5SNathan Scott int high; /* upper boundary in search */ 34228867bc9bSMandy Kirkconnell xfs_extnum_t idx = 0; /* index of target extent */ 3423c41564b5SNathan Scott int low; /* lower boundary in search */ 34248867bc9bSMandy Kirkconnell xfs_extnum_t nextents; /* number of file extents */ 34258867bc9bSMandy Kirkconnell xfs_fileoff_t startoff = 0; /* start offset of extent */ 34268867bc9bSMandy Kirkconnell 34278867bc9bSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 34288867bc9bSMandy Kirkconnell if (nextents == 0) { 34298867bc9bSMandy Kirkconnell *idxp = 0; 34308867bc9bSMandy Kirkconnell return NULL; 34318867bc9bSMandy Kirkconnell } 34328867bc9bSMandy Kirkconnell low = 0; 34338867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 34348867bc9bSMandy Kirkconnell /* Find target extent list */ 34358867bc9bSMandy Kirkconnell int erp_idx = 0; 34368867bc9bSMandy Kirkconnell erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 34378867bc9bSMandy Kirkconnell base = erp->er_extbuf; 34388867bc9bSMandy Kirkconnell high = erp->er_extcount - 1; 34398867bc9bSMandy Kirkconnell } else { 34408867bc9bSMandy Kirkconnell base = ifp->if_u1.if_extents; 34418867bc9bSMandy Kirkconnell high = nextents - 1; 34428867bc9bSMandy Kirkconnell } 34438867bc9bSMandy Kirkconnell /* Binary search extent records */ 34448867bc9bSMandy Kirkconnell while (low <= high) { 34458867bc9bSMandy Kirkconnell idx = (low + high) >> 1; 34468867bc9bSMandy Kirkconnell ep = base + idx; 34478867bc9bSMandy Kirkconnell startoff = xfs_bmbt_get_startoff(ep); 34488867bc9bSMandy Kirkconnell blockcount = xfs_bmbt_get_blockcount(ep); 34498867bc9bSMandy Kirkconnell if (bno < startoff) { 34508867bc9bSMandy Kirkconnell high = idx - 1; 34518867bc9bSMandy Kirkconnell } else if (bno >= startoff + blockcount) { 34528867bc9bSMandy Kirkconnell low = idx + 1; 34538867bc9bSMandy Kirkconnell } else { 34548867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 34558867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 34568867bc9bSMandy Kirkconnell idx += erp->er_extoff; 34578867bc9bSMandy Kirkconnell } 34588867bc9bSMandy Kirkconnell *idxp = idx; 34598867bc9bSMandy Kirkconnell return ep; 34608867bc9bSMandy Kirkconnell } 34618867bc9bSMandy Kirkconnell } 34628867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 34638867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 34648867bc9bSMandy Kirkconnell idx += erp->er_extoff; 34658867bc9bSMandy Kirkconnell } 34668867bc9bSMandy Kirkconnell if (bno >= startoff + blockcount) { 34678867bc9bSMandy Kirkconnell if (++idx == nextents) { 34688867bc9bSMandy Kirkconnell ep = NULL; 34698867bc9bSMandy Kirkconnell } else { 34708867bc9bSMandy Kirkconnell ep = xfs_iext_get_ext(ifp, idx); 34718867bc9bSMandy Kirkconnell } 34728867bc9bSMandy Kirkconnell } 34738867bc9bSMandy Kirkconnell *idxp = idx; 34748867bc9bSMandy Kirkconnell return ep; 34758867bc9bSMandy Kirkconnell } 34768867bc9bSMandy Kirkconnell 34778867bc9bSMandy Kirkconnell /* 34780293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 34790293ce3aSMandy Kirkconnell * extent record for filesystem block bno. Store the index of the 34800293ce3aSMandy Kirkconnell * target irec in *erp_idxp. 34810293ce3aSMandy Kirkconnell */ 34828867bc9bSMandy Kirkconnell xfs_ext_irec_t * /* pointer to found extent record */ 34830293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec( 34840293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34850293ce3aSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 34860293ce3aSMandy Kirkconnell int *erp_idxp) /* irec index of target ext list */ 34870293ce3aSMandy Kirkconnell { 34880293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 34890293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp_next; /* next indirection array entry */ 34908867bc9bSMandy Kirkconnell int erp_idx; /* indirection array index */ 34910293ce3aSMandy Kirkconnell int nlists; /* number of extent irec's (lists) */ 34920293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 34930293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 34940293ce3aSMandy Kirkconnell 34950293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34960293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 34970293ce3aSMandy Kirkconnell erp_idx = 0; 34980293ce3aSMandy Kirkconnell low = 0; 34990293ce3aSMandy Kirkconnell high = nlists - 1; 35000293ce3aSMandy Kirkconnell while (low <= high) { 35010293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 35020293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 35030293ce3aSMandy Kirkconnell erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 35040293ce3aSMandy Kirkconnell if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 35050293ce3aSMandy Kirkconnell high = erp_idx - 1; 35060293ce3aSMandy Kirkconnell } else if (erp_next && bno >= 35070293ce3aSMandy Kirkconnell xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 35080293ce3aSMandy Kirkconnell low = erp_idx + 1; 35090293ce3aSMandy Kirkconnell } else { 35100293ce3aSMandy Kirkconnell break; 35110293ce3aSMandy Kirkconnell } 35120293ce3aSMandy Kirkconnell } 35130293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 35140293ce3aSMandy Kirkconnell return erp; 35150293ce3aSMandy Kirkconnell } 35160293ce3aSMandy Kirkconnell 35170293ce3aSMandy Kirkconnell /* 35180293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 35190293ce3aSMandy Kirkconnell * extent record at file extent index *idxp. Store the index of the 35200293ce3aSMandy Kirkconnell * target irec in *erp_idxp and store the page index of the target 35210293ce3aSMandy Kirkconnell * extent record in *idxp. 35220293ce3aSMandy Kirkconnell */ 35230293ce3aSMandy Kirkconnell xfs_ext_irec_t * 35240293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec( 35250293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35260293ce3aSMandy Kirkconnell xfs_extnum_t *idxp, /* extent index (file -> page) */ 35270293ce3aSMandy Kirkconnell int *erp_idxp, /* pointer to target irec */ 35280293ce3aSMandy Kirkconnell int realloc) /* new bytes were just added */ 35290293ce3aSMandy Kirkconnell { 35300293ce3aSMandy Kirkconnell xfs_ext_irec_t *prev; /* pointer to previous irec */ 35310293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 35320293ce3aSMandy Kirkconnell int erp_idx; /* indirection array index */ 35330293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 35340293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 35350293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 35360293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 35370293ce3aSMandy Kirkconnell 35380293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 353987bef181SChristoph Hellwig ASSERT(page_idx >= 0); 354087bef181SChristoph Hellwig ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 354187bef181SChristoph Hellwig ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); 354287bef181SChristoph Hellwig 35430293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 35440293ce3aSMandy Kirkconnell erp_idx = 0; 35450293ce3aSMandy Kirkconnell low = 0; 35460293ce3aSMandy Kirkconnell high = nlists - 1; 35470293ce3aSMandy Kirkconnell 35480293ce3aSMandy Kirkconnell /* Binary search extent irec's */ 35490293ce3aSMandy Kirkconnell while (low <= high) { 35500293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 35510293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 35520293ce3aSMandy Kirkconnell prev = erp_idx > 0 ? erp - 1 : NULL; 35530293ce3aSMandy Kirkconnell if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 35540293ce3aSMandy Kirkconnell realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 35550293ce3aSMandy Kirkconnell high = erp_idx - 1; 35560293ce3aSMandy Kirkconnell } else if (page_idx > erp->er_extoff + erp->er_extcount || 35570293ce3aSMandy Kirkconnell (page_idx == erp->er_extoff + erp->er_extcount && 35580293ce3aSMandy Kirkconnell !realloc)) { 35590293ce3aSMandy Kirkconnell low = erp_idx + 1; 35600293ce3aSMandy Kirkconnell } else if (page_idx == erp->er_extoff + erp->er_extcount && 35610293ce3aSMandy Kirkconnell erp->er_extcount == XFS_LINEAR_EXTS) { 35620293ce3aSMandy Kirkconnell ASSERT(realloc); 35630293ce3aSMandy Kirkconnell page_idx = 0; 35640293ce3aSMandy Kirkconnell erp_idx++; 35650293ce3aSMandy Kirkconnell erp = erp_idx < nlists ? erp + 1 : NULL; 35660293ce3aSMandy Kirkconnell break; 35670293ce3aSMandy Kirkconnell } else { 35680293ce3aSMandy Kirkconnell page_idx -= erp->er_extoff; 35690293ce3aSMandy Kirkconnell break; 35700293ce3aSMandy Kirkconnell } 35710293ce3aSMandy Kirkconnell } 35720293ce3aSMandy Kirkconnell *idxp = page_idx; 35730293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 35740293ce3aSMandy Kirkconnell return(erp); 35750293ce3aSMandy Kirkconnell } 35760293ce3aSMandy Kirkconnell 35770293ce3aSMandy Kirkconnell /* 35780293ce3aSMandy Kirkconnell * Allocate and initialize an indirection array once the space needed 35790293ce3aSMandy Kirkconnell * for incore extents increases above XFS_IEXT_BUFSZ. 35800293ce3aSMandy Kirkconnell */ 35810293ce3aSMandy Kirkconnell void 35820293ce3aSMandy Kirkconnell xfs_iext_irec_init( 35830293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 35840293ce3aSMandy Kirkconnell { 35850293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 35860293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 35870293ce3aSMandy Kirkconnell 35880293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 35890293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 35900293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 35910293ce3aSMandy Kirkconnell 35926785073bSDavid Chinner erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 35930293ce3aSMandy Kirkconnell 35940293ce3aSMandy Kirkconnell if (nextents == 0) { 35956785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 35960293ce3aSMandy Kirkconnell } else if (!ifp->if_real_bytes) { 35970293ce3aSMandy Kirkconnell xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 35980293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 35990293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 36000293ce3aSMandy Kirkconnell } 36010293ce3aSMandy Kirkconnell erp->er_extbuf = ifp->if_u1.if_extents; 36020293ce3aSMandy Kirkconnell erp->er_extcount = nextents; 36030293ce3aSMandy Kirkconnell erp->er_extoff = 0; 36040293ce3aSMandy Kirkconnell 36050293ce3aSMandy Kirkconnell ifp->if_flags |= XFS_IFEXTIREC; 36060293ce3aSMandy Kirkconnell ifp->if_real_bytes = XFS_IEXT_BUFSZ; 36070293ce3aSMandy Kirkconnell ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 36080293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = erp; 36090293ce3aSMandy Kirkconnell 36100293ce3aSMandy Kirkconnell return; 36110293ce3aSMandy Kirkconnell } 36120293ce3aSMandy Kirkconnell 36130293ce3aSMandy Kirkconnell /* 36140293ce3aSMandy Kirkconnell * Allocate and initialize a new entry in the indirection array. 36150293ce3aSMandy Kirkconnell */ 36160293ce3aSMandy Kirkconnell xfs_ext_irec_t * 36170293ce3aSMandy Kirkconnell xfs_iext_irec_new( 36180293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36190293ce3aSMandy Kirkconnell int erp_idx) /* index for new irec */ 36200293ce3aSMandy Kirkconnell { 36210293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 36220293ce3aSMandy Kirkconnell int i; /* loop counter */ 36230293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 36240293ce3aSMandy Kirkconnell 36250293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 36260293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36270293ce3aSMandy Kirkconnell 36280293ce3aSMandy Kirkconnell /* Resize indirection array */ 36290293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, ++nlists * 36300293ce3aSMandy Kirkconnell sizeof(xfs_ext_irec_t)); 36310293ce3aSMandy Kirkconnell /* 36320293ce3aSMandy Kirkconnell * Move records down in the array so the 36330293ce3aSMandy Kirkconnell * new page can use erp_idx. 36340293ce3aSMandy Kirkconnell */ 36350293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 36360293ce3aSMandy Kirkconnell for (i = nlists - 1; i > erp_idx; i--) { 36370293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 36380293ce3aSMandy Kirkconnell } 36390293ce3aSMandy Kirkconnell ASSERT(i == erp_idx); 36400293ce3aSMandy Kirkconnell 36410293ce3aSMandy Kirkconnell /* Initialize new extent record */ 36420293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 36436785073bSDavid Chinner erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 36440293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 36450293ce3aSMandy Kirkconnell memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 36460293ce3aSMandy Kirkconnell erp[erp_idx].er_extcount = 0; 36470293ce3aSMandy Kirkconnell erp[erp_idx].er_extoff = erp_idx > 0 ? 36480293ce3aSMandy Kirkconnell erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 36490293ce3aSMandy Kirkconnell return (&erp[erp_idx]); 36500293ce3aSMandy Kirkconnell } 36510293ce3aSMandy Kirkconnell 36520293ce3aSMandy Kirkconnell /* 36530293ce3aSMandy Kirkconnell * Remove a record from the indirection array. 36540293ce3aSMandy Kirkconnell */ 36550293ce3aSMandy Kirkconnell void 36560293ce3aSMandy Kirkconnell xfs_iext_irec_remove( 36570293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36580293ce3aSMandy Kirkconnell int erp_idx) /* irec index to remove */ 36590293ce3aSMandy Kirkconnell { 36600293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 36610293ce3aSMandy Kirkconnell int i; /* loop counter */ 36620293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 36630293ce3aSMandy Kirkconnell 36640293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 36650293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36660293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 36670293ce3aSMandy Kirkconnell if (erp->er_extbuf) { 36680293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 36690293ce3aSMandy Kirkconnell -erp->er_extcount); 3670f0e2d93cSDenys Vlasenko kmem_free(erp->er_extbuf); 36710293ce3aSMandy Kirkconnell } 36720293ce3aSMandy Kirkconnell /* Compact extent records */ 36730293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 36740293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists - 1; i++) { 36750293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 36760293ce3aSMandy Kirkconnell } 36770293ce3aSMandy Kirkconnell /* 36780293ce3aSMandy Kirkconnell * Manually free the last extent record from the indirection 36790293ce3aSMandy Kirkconnell * array. A call to xfs_iext_realloc_indirect() with a size 36800293ce3aSMandy Kirkconnell * of zero would result in a call to xfs_iext_destroy() which 36810293ce3aSMandy Kirkconnell * would in turn call this function again, creating a nasty 36820293ce3aSMandy Kirkconnell * infinite loop. 36830293ce3aSMandy Kirkconnell */ 36840293ce3aSMandy Kirkconnell if (--nlists) { 36850293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, 36860293ce3aSMandy Kirkconnell nlists * sizeof(xfs_ext_irec_t)); 36870293ce3aSMandy Kirkconnell } else { 3688f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 36890293ce3aSMandy Kirkconnell } 36900293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 36910293ce3aSMandy Kirkconnell } 36920293ce3aSMandy Kirkconnell 36930293ce3aSMandy Kirkconnell /* 36940293ce3aSMandy Kirkconnell * This is called to clean up large amounts of unused memory allocated 36950293ce3aSMandy Kirkconnell * by the indirection array. Before compacting anything though, verify 36960293ce3aSMandy Kirkconnell * that the indirection array is still needed and switch back to the 36970293ce3aSMandy Kirkconnell * linear extent list (or even the inline buffer) if possible. The 36980293ce3aSMandy Kirkconnell * compaction policy is as follows: 36990293ce3aSMandy Kirkconnell * 37000293ce3aSMandy Kirkconnell * Full Compaction: Extents fit into a single page (or inline buffer) 370171a8c87fSLachlan McIlroy * Partial Compaction: Extents occupy less than 50% of allocated space 37020293ce3aSMandy Kirkconnell * No Compaction: Extents occupy at least 50% of allocated space 37030293ce3aSMandy Kirkconnell */ 37040293ce3aSMandy Kirkconnell void 37050293ce3aSMandy Kirkconnell xfs_iext_irec_compact( 37060293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 37070293ce3aSMandy Kirkconnell { 37080293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 37090293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37100293ce3aSMandy Kirkconnell 37110293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37120293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37130293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 37140293ce3aSMandy Kirkconnell 37150293ce3aSMandy Kirkconnell if (nextents == 0) { 37160293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 37170293ce3aSMandy Kirkconnell } else if (nextents <= XFS_INLINE_EXTS) { 37180293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 37190293ce3aSMandy Kirkconnell xfs_iext_direct_to_inline(ifp, nextents); 37200293ce3aSMandy Kirkconnell } else if (nextents <= XFS_LINEAR_EXTS) { 37210293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 37220293ce3aSMandy Kirkconnell } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 37230293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(ifp); 37240293ce3aSMandy Kirkconnell } 37250293ce3aSMandy Kirkconnell } 37260293ce3aSMandy Kirkconnell 37270293ce3aSMandy Kirkconnell /* 37280293ce3aSMandy Kirkconnell * Combine extents from neighboring extent pages. 37290293ce3aSMandy Kirkconnell */ 37300293ce3aSMandy Kirkconnell void 37310293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages( 37320293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 37330293ce3aSMandy Kirkconnell { 37340293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 37350293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 37360293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37370293ce3aSMandy Kirkconnell 37380293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37390293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37400293ce3aSMandy Kirkconnell while (erp_idx < nlists - 1) { 37410293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 37420293ce3aSMandy Kirkconnell erp_next = erp + 1; 37430293ce3aSMandy Kirkconnell if (erp_next->er_extcount <= 37440293ce3aSMandy Kirkconnell (XFS_LINEAR_EXTS - erp->er_extcount)) { 374571a8c87fSLachlan McIlroy memcpy(&erp->er_extbuf[erp->er_extcount], 37460293ce3aSMandy Kirkconnell erp_next->er_extbuf, erp_next->er_extcount * 37470293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 37480293ce3aSMandy Kirkconnell erp->er_extcount += erp_next->er_extcount; 37490293ce3aSMandy Kirkconnell /* 37500293ce3aSMandy Kirkconnell * Free page before removing extent record 37510293ce3aSMandy Kirkconnell * so er_extoffs don't get modified in 37520293ce3aSMandy Kirkconnell * xfs_iext_irec_remove. 37530293ce3aSMandy Kirkconnell */ 3754f0e2d93cSDenys Vlasenko kmem_free(erp_next->er_extbuf); 37550293ce3aSMandy Kirkconnell erp_next->er_extbuf = NULL; 37560293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx + 1); 37570293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37580293ce3aSMandy Kirkconnell } else { 37590293ce3aSMandy Kirkconnell erp_idx++; 37600293ce3aSMandy Kirkconnell } 37610293ce3aSMandy Kirkconnell } 37620293ce3aSMandy Kirkconnell } 37630293ce3aSMandy Kirkconnell 37640293ce3aSMandy Kirkconnell /* 37650293ce3aSMandy Kirkconnell * This is called to update the er_extoff field in the indirection 37660293ce3aSMandy Kirkconnell * array when extents have been added or removed from one of the 37670293ce3aSMandy Kirkconnell * extent lists. erp_idx contains the irec index to begin updating 37680293ce3aSMandy Kirkconnell * at and ext_diff contains the number of extents that were added 37690293ce3aSMandy Kirkconnell * or removed. 37700293ce3aSMandy Kirkconnell */ 37710293ce3aSMandy Kirkconnell void 37720293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs( 37730293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37740293ce3aSMandy Kirkconnell int erp_idx, /* irec index to update */ 37750293ce3aSMandy Kirkconnell int ext_diff) /* number of new extents */ 37760293ce3aSMandy Kirkconnell { 37770293ce3aSMandy Kirkconnell int i; /* loop counter */ 37780293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists */ 37790293ce3aSMandy Kirkconnell 37800293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37810293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37820293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists; i++) { 37830293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 37840293ce3aSMandy Kirkconnell } 37850293ce3aSMandy Kirkconnell } 3786