11da177e4SLinus Torvalds /* 23e57ecf6SOlaf Weber * Copyright (c) 2000-2006 Silicon Graphics, Inc. 37b718769SNathan Scott * All Rights Reserved. 41da177e4SLinus Torvalds * 57b718769SNathan Scott * This program is free software; you can redistribute it and/or 67b718769SNathan Scott * modify it under the terms of the GNU General Public License as 71da177e4SLinus Torvalds * published by the Free Software Foundation. 81da177e4SLinus Torvalds * 97b718769SNathan Scott * This program is distributed in the hope that it would be useful, 107b718769SNathan Scott * but WITHOUT ANY WARRANTY; without even the implied warranty of 117b718769SNathan Scott * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 127b718769SNathan Scott * GNU General Public License for more details. 131da177e4SLinus Torvalds * 147b718769SNathan Scott * You should have received a copy of the GNU General Public License 157b718769SNathan Scott * along with this program; if not, write the Free Software Foundation, 167b718769SNathan Scott * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 171da177e4SLinus Torvalds */ 1840ebd81dSRobert P. J. Day #include <linux/log2.h> 1940ebd81dSRobert P. J. Day 201da177e4SLinus Torvalds #include "xfs.h" 21a844f451SNathan Scott #include "xfs_fs.h" 221da177e4SLinus Torvalds #include "xfs_types.h" 23a844f451SNathan Scott #include "xfs_bit.h" 241da177e4SLinus Torvalds #include "xfs_log.h" 25a844f451SNathan Scott #include "xfs_inum.h" 261da177e4SLinus Torvalds #include "xfs_trans.h" 271da177e4SLinus Torvalds #include "xfs_trans_priv.h" 281da177e4SLinus Torvalds #include "xfs_sb.h" 291da177e4SLinus Torvalds #include "xfs_ag.h" 301da177e4SLinus Torvalds #include "xfs_mount.h" 311da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 32a844f451SNathan Scott #include "xfs_alloc_btree.h" 331da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 34a844f451SNathan Scott #include "xfs_attr_sf.h" 351da177e4SLinus Torvalds #include "xfs_dinode.h" 361da177e4SLinus Torvalds #include "xfs_inode.h" 371da177e4SLinus Torvalds #include "xfs_buf_item.h" 38a844f451SNathan Scott #include "xfs_inode_item.h" 39a844f451SNathan Scott #include "xfs_btree.h" 408c4ed633SChristoph Hellwig #include "xfs_btree_trace.h" 41a844f451SNathan Scott #include "xfs_alloc.h" 42a844f451SNathan Scott #include "xfs_ialloc.h" 43a844f451SNathan Scott #include "xfs_bmap.h" 441da177e4SLinus Torvalds #include "xfs_error.h" 451da177e4SLinus Torvalds #include "xfs_utils.h" 461da177e4SLinus Torvalds #include "xfs_quota.h" 472a82b8beSDavid Chinner #include "xfs_filestream.h" 48739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h" 490b1b213fSChristoph Hellwig #include "xfs_trace.h" 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 521da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds /* 558f04c47aSChristoph Hellwig * Used in xfs_itruncate_extents(). This is the maximum number of extents 561da177e4SLinus Torvalds * freed from a file in a single transaction. 571da177e4SLinus Torvalds */ 581da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 611da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 621da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 631da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds #ifdef DEBUG 661da177e4SLinus Torvalds /* 671da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 681da177e4SLinus Torvalds * are valid. 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds STATIC void 711da177e4SLinus Torvalds xfs_validate_extents( 724eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, 731da177e4SLinus Torvalds int nrecs, 741da177e4SLinus Torvalds xfs_exntfmt_t fmt) 751da177e4SLinus Torvalds { 761da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 77a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t rec; 781da177e4SLinus Torvalds int i; 791da177e4SLinus Torvalds 801da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 81a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 82a6f64d4aSChristoph Hellwig rec.l0 = get_unaligned(&ep->l0); 83a6f64d4aSChristoph Hellwig rec.l1 = get_unaligned(&ep->l1); 841da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 851da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 861da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 871da177e4SLinus Torvalds } 881da177e4SLinus Torvalds } 891da177e4SLinus Torvalds #else /* DEBUG */ 90a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt) 911da177e4SLinus Torvalds #endif /* DEBUG */ 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds /* 941da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 951da177e4SLinus Torvalds * unlinked field of 0. 961da177e4SLinus Torvalds */ 971da177e4SLinus Torvalds #if defined(DEBUG) 981da177e4SLinus Torvalds void 991da177e4SLinus Torvalds xfs_inobp_check( 1001da177e4SLinus Torvalds xfs_mount_t *mp, 1011da177e4SLinus Torvalds xfs_buf_t *bp) 1021da177e4SLinus Torvalds { 1031da177e4SLinus Torvalds int i; 1041da177e4SLinus Torvalds int j; 1051da177e4SLinus Torvalds xfs_dinode_t *dip; 1061da177e4SLinus Torvalds 1071da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds for (i = 0; i < j; i++) { 1101da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1111da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 1121da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 11353487786SDave Chinner xfs_alert(mp, 11453487786SDave Chinner "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", 1151da177e4SLinus Torvalds bp); 1161da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 1171da177e4SLinus Torvalds } 1181da177e4SLinus Torvalds } 1191da177e4SLinus Torvalds } 1201da177e4SLinus Torvalds #endif 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds /* 1234ae29b43SDavid Chinner * Find the buffer associated with the given inode map 1244ae29b43SDavid Chinner * We do basic validation checks on the buffer once it has been 1254ae29b43SDavid Chinner * retrieved from disk. 1264ae29b43SDavid Chinner */ 1274ae29b43SDavid Chinner STATIC int 1284ae29b43SDavid Chinner xfs_imap_to_bp( 1294ae29b43SDavid Chinner xfs_mount_t *mp, 1304ae29b43SDavid Chinner xfs_trans_t *tp, 13192bfc6e7SChristoph Hellwig struct xfs_imap *imap, 1324ae29b43SDavid Chinner xfs_buf_t **bpp, 1334ae29b43SDavid Chinner uint buf_flags, 134b48d8d64SChristoph Hellwig uint iget_flags) 1354ae29b43SDavid Chinner { 1364ae29b43SDavid Chinner int error; 1374ae29b43SDavid Chinner int i; 1384ae29b43SDavid Chinner int ni; 1394ae29b43SDavid Chinner xfs_buf_t *bp; 1404ae29b43SDavid Chinner 1414ae29b43SDavid Chinner error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 142a3f74ffbSDavid Chinner (int)imap->im_len, buf_flags, &bp); 1434ae29b43SDavid Chinner if (error) { 144a3f74ffbSDavid Chinner if (error != EAGAIN) { 1450b932cccSDave Chinner xfs_warn(mp, 1460b932cccSDave Chinner "%s: xfs_trans_read_buf() returned error %d.", 1470b932cccSDave Chinner __func__, error); 148a3f74ffbSDavid Chinner } else { 1490cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 150a3f74ffbSDavid Chinner } 1514ae29b43SDavid Chinner return error; 1524ae29b43SDavid Chinner } 1534ae29b43SDavid Chinner 1544ae29b43SDavid Chinner /* 1554ae29b43SDavid Chinner * Validate the magic number and version of every inode in the buffer 1564ae29b43SDavid Chinner * (if DEBUG kernel) or the first inode in the buffer, otherwise. 1574ae29b43SDavid Chinner */ 1584ae29b43SDavid Chinner #ifdef DEBUG 1594ae29b43SDavid Chinner ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; 1604ae29b43SDavid Chinner #else /* usual case */ 1614ae29b43SDavid Chinner ni = 1; 1624ae29b43SDavid Chinner #endif 1634ae29b43SDavid Chinner 1644ae29b43SDavid Chinner for (i = 0; i < ni; i++) { 1654ae29b43SDavid Chinner int di_ok; 1664ae29b43SDavid Chinner xfs_dinode_t *dip; 1674ae29b43SDavid Chinner 1684ae29b43SDavid Chinner dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1694ae29b43SDavid Chinner (i << mp->m_sb.sb_inodelog)); 170*69ef921bSChristoph Hellwig di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 17181591fe2SChristoph Hellwig XFS_DINODE_GOOD_VERSION(dip->di_version); 1724ae29b43SDavid Chinner if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 1734ae29b43SDavid Chinner XFS_ERRTAG_ITOBP_INOTOBP, 1744ae29b43SDavid Chinner XFS_RANDOM_ITOBP_INOTOBP))) { 1751920779eSDave Chinner if (iget_flags & XFS_IGET_UNTRUSTED) { 1764ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1774ae29b43SDavid Chinner return XFS_ERROR(EINVAL); 1784ae29b43SDavid Chinner } 1794ae29b43SDavid Chinner XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 1804ae29b43SDavid Chinner XFS_ERRLEVEL_HIGH, mp, dip); 1814ae29b43SDavid Chinner #ifdef DEBUG 1820b932cccSDave Chinner xfs_emerg(mp, 1830b932cccSDave Chinner "bad inode magic/vsn daddr %lld #%d (magic=%x)", 1844ae29b43SDavid Chinner (unsigned long long)imap->im_blkno, i, 18581591fe2SChristoph Hellwig be16_to_cpu(dip->di_magic)); 1860b932cccSDave Chinner ASSERT(0); 1874ae29b43SDavid Chinner #endif 1884ae29b43SDavid Chinner xfs_trans_brelse(tp, bp); 1894ae29b43SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 1904ae29b43SDavid Chinner } 1914ae29b43SDavid Chinner } 1924ae29b43SDavid Chinner 1934ae29b43SDavid Chinner xfs_inobp_check(mp, bp); 1944ae29b43SDavid Chinner 1954ae29b43SDavid Chinner /* 1964ae29b43SDavid Chinner * Mark the buffer as an inode buffer now that it looks good 1974ae29b43SDavid Chinner */ 1984ae29b43SDavid Chinner XFS_BUF_SET_VTYPE(bp, B_FS_INO); 1994ae29b43SDavid Chinner 2004ae29b43SDavid Chinner *bpp = bp; 2014ae29b43SDavid Chinner return 0; 2024ae29b43SDavid Chinner } 2034ae29b43SDavid Chinner 2044ae29b43SDavid Chinner /* 2051da177e4SLinus Torvalds * This routine is called to map an inode number within a file 2061da177e4SLinus Torvalds * system to the buffer containing the on-disk version of the 2071da177e4SLinus Torvalds * inode. It returns a pointer to the buffer containing the 2081da177e4SLinus Torvalds * on-disk inode in the bpp parameter, and in the dip parameter 2091da177e4SLinus Torvalds * it returns a pointer to the on-disk inode within that buffer. 2101da177e4SLinus Torvalds * 2111da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2121da177e4SLinus Torvalds * dipp are undefined. 2131da177e4SLinus Torvalds * 2141da177e4SLinus Torvalds * Use xfs_imap() to determine the size and location of the 2151da177e4SLinus Torvalds * buffer to read from disk. 2161da177e4SLinus Torvalds */ 217c679eef0SChristoph Hellwig int 2181da177e4SLinus Torvalds xfs_inotobp( 2191da177e4SLinus Torvalds xfs_mount_t *mp, 2201da177e4SLinus Torvalds xfs_trans_t *tp, 2211da177e4SLinus Torvalds xfs_ino_t ino, 2221da177e4SLinus Torvalds xfs_dinode_t **dipp, 2231da177e4SLinus Torvalds xfs_buf_t **bpp, 224c679eef0SChristoph Hellwig int *offset, 225c679eef0SChristoph Hellwig uint imap_flags) 2261da177e4SLinus Torvalds { 22792bfc6e7SChristoph Hellwig struct xfs_imap imap; 2281da177e4SLinus Torvalds xfs_buf_t *bp; 2291da177e4SLinus Torvalds int error; 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds imap.im_blkno = 0; 232a1941895SChristoph Hellwig error = xfs_imap(mp, tp, ino, &imap, imap_flags); 2334ae29b43SDavid Chinner if (error) 2341da177e4SLinus Torvalds return error; 2351da177e4SLinus Torvalds 2360cadda1cSChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); 2374ae29b43SDavid Chinner if (error) 2381da177e4SLinus Torvalds return error; 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2411da177e4SLinus Torvalds *bpp = bp; 2421da177e4SLinus Torvalds *offset = imap.im_boffset; 2431da177e4SLinus Torvalds return 0; 2441da177e4SLinus Torvalds } 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds 2471da177e4SLinus Torvalds /* 2481da177e4SLinus Torvalds * This routine is called to map an inode to the buffer containing 2491da177e4SLinus Torvalds * the on-disk version of the inode. It returns a pointer to the 2501da177e4SLinus Torvalds * buffer containing the on-disk inode in the bpp parameter, and in 2511da177e4SLinus Torvalds * the dip parameter it returns a pointer to the on-disk inode within 2521da177e4SLinus Torvalds * that buffer. 2531da177e4SLinus Torvalds * 2541da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2551da177e4SLinus Torvalds * dipp are undefined. 2561da177e4SLinus Torvalds * 25776d8b277SChristoph Hellwig * The inode is expected to already been mapped to its buffer and read 25876d8b277SChristoph Hellwig * in once, thus we can use the mapping information stored in the inode 25976d8b277SChristoph Hellwig * rather than calling xfs_imap(). This allows us to avoid the overhead 26076d8b277SChristoph Hellwig * of looking at the inode btree for small block file systems 26194e1b69dSChristoph Hellwig * (see xfs_imap()). 2621da177e4SLinus Torvalds */ 2631da177e4SLinus Torvalds int 2641da177e4SLinus Torvalds xfs_itobp( 2651da177e4SLinus Torvalds xfs_mount_t *mp, 2661da177e4SLinus Torvalds xfs_trans_t *tp, 2671da177e4SLinus Torvalds xfs_inode_t *ip, 2681da177e4SLinus Torvalds xfs_dinode_t **dipp, 2691da177e4SLinus Torvalds xfs_buf_t **bpp, 270a3f74ffbSDavid Chinner uint buf_flags) 2711da177e4SLinus Torvalds { 2721da177e4SLinus Torvalds xfs_buf_t *bp; 2731da177e4SLinus Torvalds int error; 2741da177e4SLinus Torvalds 27592bfc6e7SChristoph Hellwig ASSERT(ip->i_imap.im_blkno != 0); 2761da177e4SLinus Torvalds 27792bfc6e7SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); 2784ae29b43SDavid Chinner if (error) 2791da177e4SLinus Torvalds return error; 2804d1a2ed3SNathan Scott 281a3f74ffbSDavid Chinner if (!bp) { 2820cadda1cSChristoph Hellwig ASSERT(buf_flags & XBF_TRYLOCK); 283a3f74ffbSDavid Chinner ASSERT(tp == NULL); 284a3f74ffbSDavid Chinner *bpp = NULL; 285a3f74ffbSDavid Chinner return EAGAIN; 286a3f74ffbSDavid Chinner } 287a3f74ffbSDavid Chinner 28892bfc6e7SChristoph Hellwig *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 2891da177e4SLinus Torvalds *bpp = bp; 2901da177e4SLinus Torvalds return 0; 2911da177e4SLinus Torvalds } 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds /* 2941da177e4SLinus Torvalds * Move inode type and inode format specific information from the 2951da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 2961da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 2971da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 2981da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 2991da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 3001da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 3011da177e4SLinus Torvalds */ 3021da177e4SLinus Torvalds STATIC int 3031da177e4SLinus Torvalds xfs_iformat( 3041da177e4SLinus Torvalds xfs_inode_t *ip, 3051da177e4SLinus Torvalds xfs_dinode_t *dip) 3061da177e4SLinus Torvalds { 3071da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 3081da177e4SLinus Torvalds int size; 3091da177e4SLinus Torvalds int error; 3101da177e4SLinus Torvalds xfs_fsize_t di_size; 3111da177e4SLinus Torvalds ip->i_df.if_ext_max = 3121da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 3131da177e4SLinus Torvalds error = 0; 3141da177e4SLinus Torvalds 31581591fe2SChristoph Hellwig if (unlikely(be32_to_cpu(dip->di_nextents) + 31681591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents) > 31781591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks))) { 31865333b4cSDave Chinner xfs_warn(ip->i_mount, 3193762ec6bSNathan Scott "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 3201da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 32181591fe2SChristoph Hellwig (int)(be32_to_cpu(dip->di_nextents) + 32281591fe2SChristoph Hellwig be16_to_cpu(dip->di_anextents)), 3231da177e4SLinus Torvalds (unsigned long long) 32481591fe2SChristoph Hellwig be64_to_cpu(dip->di_nblocks)); 3251da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 3261da177e4SLinus Torvalds ip->i_mount, dip); 3271da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3281da177e4SLinus Torvalds } 3291da177e4SLinus Torvalds 33081591fe2SChristoph Hellwig if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 33165333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", 3321da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 33381591fe2SChristoph Hellwig dip->di_forkoff); 3341da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 3351da177e4SLinus Torvalds ip->i_mount, dip); 3361da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3371da177e4SLinus Torvalds } 3381da177e4SLinus Torvalds 339b89d4208SChristoph Hellwig if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 340b89d4208SChristoph Hellwig !ip->i_mount->m_rtdev_targp)) { 34165333b4cSDave Chinner xfs_warn(ip->i_mount, 342b89d4208SChristoph Hellwig "corrupt dinode %Lu, has realtime flag set.", 343b89d4208SChristoph Hellwig ip->i_ino); 344b89d4208SChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 345b89d4208SChristoph Hellwig XFS_ERRLEVEL_LOW, ip->i_mount, dip); 346b89d4208SChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 347b89d4208SChristoph Hellwig } 348b89d4208SChristoph Hellwig 3491da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 3501da177e4SLinus Torvalds case S_IFIFO: 3511da177e4SLinus Torvalds case S_IFCHR: 3521da177e4SLinus Torvalds case S_IFBLK: 3531da177e4SLinus Torvalds case S_IFSOCK: 35481591fe2SChristoph Hellwig if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 3551da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 3561da177e4SLinus Torvalds ip->i_mount, dip); 3571da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3581da177e4SLinus Torvalds } 3591da177e4SLinus Torvalds ip->i_d.di_size = 0; 360ba87ea69SLachlan McIlroy ip->i_size = 0; 36181591fe2SChristoph Hellwig ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 3621da177e4SLinus Torvalds break; 3631da177e4SLinus Torvalds 3641da177e4SLinus Torvalds case S_IFREG: 3651da177e4SLinus Torvalds case S_IFLNK: 3661da177e4SLinus Torvalds case S_IFDIR: 36781591fe2SChristoph Hellwig switch (dip->di_format) { 3681da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 3691da177e4SLinus Torvalds /* 3701da177e4SLinus Torvalds * no local regular files yet 3711da177e4SLinus Torvalds */ 37281591fe2SChristoph Hellwig if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 37365333b4cSDave Chinner xfs_warn(ip->i_mount, 37465333b4cSDave Chinner "corrupt inode %Lu (local format for regular file).", 3751da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 3761da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 3771da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 3781da177e4SLinus Torvalds ip->i_mount, dip); 3791da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds 38281591fe2SChristoph Hellwig di_size = be64_to_cpu(dip->di_size); 3831da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 38465333b4cSDave Chinner xfs_warn(ip->i_mount, 38565333b4cSDave Chinner "corrupt inode %Lu (bad size %Ld for local inode).", 3861da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 3871da177e4SLinus Torvalds (long long) di_size); 3881da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 3891da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 3901da177e4SLinus Torvalds ip->i_mount, dip); 3911da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds 3941da177e4SLinus Torvalds size = (int)di_size; 3951da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 3961da177e4SLinus Torvalds break; 3971da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 3981da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 3991da177e4SLinus Torvalds break; 4001da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 4011da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 4021da177e4SLinus Torvalds break; 4031da177e4SLinus Torvalds default: 4041da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 4051da177e4SLinus Torvalds ip->i_mount); 4061da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4071da177e4SLinus Torvalds } 4081da177e4SLinus Torvalds break; 4091da177e4SLinus Torvalds 4101da177e4SLinus Torvalds default: 4111da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 4121da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4131da177e4SLinus Torvalds } 4141da177e4SLinus Torvalds if (error) { 4151da177e4SLinus Torvalds return error; 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 4181da177e4SLinus Torvalds return 0; 4191da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 4204a7edddcSDave Chinner ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); 4211da177e4SLinus Torvalds ip->i_afp->if_ext_max = 4221da177e4SLinus Torvalds XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 42381591fe2SChristoph Hellwig switch (dip->di_aformat) { 4241da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 4251da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 4263b244aa8SNathan Scott size = be16_to_cpu(atp->hdr.totsize); 4272809f76aSChristoph Hellwig 4282809f76aSChristoph Hellwig if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 42965333b4cSDave Chinner xfs_warn(ip->i_mount, 43065333b4cSDave Chinner "corrupt inode %Lu (bad attr fork size %Ld).", 4312809f76aSChristoph Hellwig (unsigned long long) ip->i_ino, 4322809f76aSChristoph Hellwig (long long) size); 4332809f76aSChristoph Hellwig XFS_CORRUPTION_ERROR("xfs_iformat(8)", 4342809f76aSChristoph Hellwig XFS_ERRLEVEL_LOW, 4352809f76aSChristoph Hellwig ip->i_mount, dip); 4362809f76aSChristoph Hellwig return XFS_ERROR(EFSCORRUPTED); 4372809f76aSChristoph Hellwig } 4382809f76aSChristoph Hellwig 4391da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 4401da177e4SLinus Torvalds break; 4411da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 4421da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 4431da177e4SLinus Torvalds break; 4441da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 4451da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 4461da177e4SLinus Torvalds break; 4471da177e4SLinus Torvalds default: 4481da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 4491da177e4SLinus Torvalds break; 4501da177e4SLinus Torvalds } 4511da177e4SLinus Torvalds if (error) { 4521da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 4531da177e4SLinus Torvalds ip->i_afp = NULL; 4541da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 4551da177e4SLinus Torvalds } 4561da177e4SLinus Torvalds return error; 4571da177e4SLinus Torvalds } 4581da177e4SLinus Torvalds 4591da177e4SLinus Torvalds /* 4601da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 4611da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 4621da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 4631da177e4SLinus Torvalds * and copy the data there. Either way, set 4641da177e4SLinus Torvalds * if_data to point at the data. 4651da177e4SLinus Torvalds * If we allocate a buffer for the data, make 4661da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 4671da177e4SLinus Torvalds * record the real size in i_real_bytes. 4681da177e4SLinus Torvalds */ 4691da177e4SLinus Torvalds STATIC int 4701da177e4SLinus Torvalds xfs_iformat_local( 4711da177e4SLinus Torvalds xfs_inode_t *ip, 4721da177e4SLinus Torvalds xfs_dinode_t *dip, 4731da177e4SLinus Torvalds int whichfork, 4741da177e4SLinus Torvalds int size) 4751da177e4SLinus Torvalds { 4761da177e4SLinus Torvalds xfs_ifork_t *ifp; 4771da177e4SLinus Torvalds int real_size; 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds /* 4801da177e4SLinus Torvalds * If the size is unreasonable, then something 4811da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 4821da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 4831da177e4SLinus Torvalds */ 4841da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 48565333b4cSDave Chinner xfs_warn(ip->i_mount, 48665333b4cSDave Chinner "corrupt inode %Lu (bad size %d for local fork, size = %d).", 4871da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 4881da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 4891da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 4901da177e4SLinus Torvalds ip->i_mount, dip); 4911da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4921da177e4SLinus Torvalds } 4931da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 4941da177e4SLinus Torvalds real_size = 0; 4951da177e4SLinus Torvalds if (size == 0) 4961da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 4971da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 4981da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 4991da177e4SLinus Torvalds else { 5001da177e4SLinus Torvalds real_size = roundup(size, 4); 5014a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); 5021da177e4SLinus Torvalds } 5031da177e4SLinus Torvalds ifp->if_bytes = size; 5041da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 5051da177e4SLinus Torvalds if (size) 5061da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 5071da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 5081da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 5091da177e4SLinus Torvalds return 0; 5101da177e4SLinus Torvalds } 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds /* 5131da177e4SLinus Torvalds * The file consists of a set of extents all 5141da177e4SLinus Torvalds * of which fit into the on-disk inode. 5151da177e4SLinus Torvalds * If there are few enough extents to fit into 5161da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 5171da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 5181da177e4SLinus Torvalds * them into it. Either way, set if_extents 5191da177e4SLinus Torvalds * to point at the extents. 5201da177e4SLinus Torvalds */ 5211da177e4SLinus Torvalds STATIC int 5221da177e4SLinus Torvalds xfs_iformat_extents( 5231da177e4SLinus Torvalds xfs_inode_t *ip, 5241da177e4SLinus Torvalds xfs_dinode_t *dip, 5251da177e4SLinus Torvalds int whichfork) 5261da177e4SLinus Torvalds { 527a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp; 5281da177e4SLinus Torvalds xfs_ifork_t *ifp; 5291da177e4SLinus Torvalds int nex; 5301da177e4SLinus Torvalds int size; 5311da177e4SLinus Torvalds int i; 5321da177e4SLinus Torvalds 5331da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 5341da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 5351da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds /* 5381da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 5391da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 5401da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 5411da177e4SLinus Torvalds */ 5421da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 54365333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", 5441da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 5451da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 5461da177e4SLinus Torvalds ip->i_mount, dip); 5471da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5481da177e4SLinus Torvalds } 5491da177e4SLinus Torvalds 5504eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 5511da177e4SLinus Torvalds if (nex == 0) 5521da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 5531da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 5541da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 5554eea22f0SMandy Kirkconnell else 5564eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nex); 5574eea22f0SMandy Kirkconnell 5581da177e4SLinus Torvalds ifp->if_bytes = size; 5591da177e4SLinus Torvalds if (size) { 5601da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 561a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); 5624eea22f0SMandy Kirkconnell for (i = 0; i < nex; i++, dp++) { 563a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 564597bca63SHarvey Harrison ep->l0 = get_unaligned_be64(&dp->l0); 565597bca63SHarvey Harrison ep->l1 = get_unaligned_be64(&dp->l1); 5661da177e4SLinus Torvalds } 5673a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 5681da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 5691da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 5701da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 5714eea22f0SMandy Kirkconnell ifp, 0, nex))) { 5721da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 5731da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5741da177e4SLinus Torvalds ip->i_mount); 5751da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5761da177e4SLinus Torvalds } 5771da177e4SLinus Torvalds } 5781da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 5791da177e4SLinus Torvalds return 0; 5801da177e4SLinus Torvalds } 5811da177e4SLinus Torvalds 5821da177e4SLinus Torvalds /* 5831da177e4SLinus Torvalds * The file has too many extents to fit into 5841da177e4SLinus Torvalds * the inode, so they are in B-tree format. 5851da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 5861da177e4SLinus Torvalds * and copy the root into it. The i_extents 5871da177e4SLinus Torvalds * field will remain NULL until all of the 5881da177e4SLinus Torvalds * extents are read in (when they are needed). 5891da177e4SLinus Torvalds */ 5901da177e4SLinus Torvalds STATIC int 5911da177e4SLinus Torvalds xfs_iformat_btree( 5921da177e4SLinus Torvalds xfs_inode_t *ip, 5931da177e4SLinus Torvalds xfs_dinode_t *dip, 5941da177e4SLinus Torvalds int whichfork) 5951da177e4SLinus Torvalds { 5961da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 5971da177e4SLinus Torvalds xfs_ifork_t *ifp; 5981da177e4SLinus Torvalds /* REFERENCED */ 5991da177e4SLinus Torvalds int nrecs; 6001da177e4SLinus Torvalds int size; 6011da177e4SLinus Torvalds 6021da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6031da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 6041da177e4SLinus Torvalds size = XFS_BMAP_BROOT_SPACE(dfp); 60560197e8dSChristoph Hellwig nrecs = be16_to_cpu(dfp->bb_numrecs); 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds /* 6081da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 6091da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 6101da177e4SLinus Torvalds * block has more records than can fit into the fork, 6111da177e4SLinus Torvalds * or the number of extents is greater than the number of 6121da177e4SLinus Torvalds * blocks. 6131da177e4SLinus Torvalds */ 6141da177e4SLinus Torvalds if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 6151da177e4SLinus Torvalds || XFS_BMDR_SPACE_CALC(nrecs) > 6161da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 6171da177e4SLinus Torvalds || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 61865333b4cSDave Chinner xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", 6191da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 62065333b4cSDave Chinner XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 62165333b4cSDave Chinner ip->i_mount, dip); 6221da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6231da177e4SLinus Torvalds } 6241da177e4SLinus Torvalds 6251da177e4SLinus Torvalds ifp->if_broot_bytes = size; 6264a7edddcSDave Chinner ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS); 6271da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 6281da177e4SLinus Torvalds /* 6291da177e4SLinus Torvalds * Copy and convert from the on-disk structure 6301da177e4SLinus Torvalds * to the in-memory structure. 6311da177e4SLinus Torvalds */ 63260197e8dSChristoph Hellwig xfs_bmdr_to_bmbt(ip->i_mount, dfp, 63360197e8dSChristoph Hellwig XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 6341da177e4SLinus Torvalds ifp->if_broot, size); 6351da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 6361da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 6371da177e4SLinus Torvalds 6381da177e4SLinus Torvalds return 0; 6391da177e4SLinus Torvalds } 6401da177e4SLinus Torvalds 641d96f8f89SEric Sandeen STATIC void 642347d1c01SChristoph Hellwig xfs_dinode_from_disk( 643347d1c01SChristoph Hellwig xfs_icdinode_t *to, 64481591fe2SChristoph Hellwig xfs_dinode_t *from) 6451da177e4SLinus Torvalds { 646347d1c01SChristoph Hellwig to->di_magic = be16_to_cpu(from->di_magic); 647347d1c01SChristoph Hellwig to->di_mode = be16_to_cpu(from->di_mode); 648347d1c01SChristoph Hellwig to->di_version = from ->di_version; 649347d1c01SChristoph Hellwig to->di_format = from->di_format; 650347d1c01SChristoph Hellwig to->di_onlink = be16_to_cpu(from->di_onlink); 651347d1c01SChristoph Hellwig to->di_uid = be32_to_cpu(from->di_uid); 652347d1c01SChristoph Hellwig to->di_gid = be32_to_cpu(from->di_gid); 653347d1c01SChristoph Hellwig to->di_nlink = be32_to_cpu(from->di_nlink); 6546743099cSArkadiusz Mi?kiewicz to->di_projid_lo = be16_to_cpu(from->di_projid_lo); 6556743099cSArkadiusz Mi?kiewicz to->di_projid_hi = be16_to_cpu(from->di_projid_hi); 656347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 657347d1c01SChristoph Hellwig to->di_flushiter = be16_to_cpu(from->di_flushiter); 658347d1c01SChristoph Hellwig to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); 659347d1c01SChristoph Hellwig to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec); 660347d1c01SChristoph Hellwig to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec); 661347d1c01SChristoph Hellwig to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec); 662347d1c01SChristoph Hellwig to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec); 663347d1c01SChristoph Hellwig to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec); 664347d1c01SChristoph Hellwig to->di_size = be64_to_cpu(from->di_size); 665347d1c01SChristoph Hellwig to->di_nblocks = be64_to_cpu(from->di_nblocks); 666347d1c01SChristoph Hellwig to->di_extsize = be32_to_cpu(from->di_extsize); 667347d1c01SChristoph Hellwig to->di_nextents = be32_to_cpu(from->di_nextents); 668347d1c01SChristoph Hellwig to->di_anextents = be16_to_cpu(from->di_anextents); 669347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 670347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 671347d1c01SChristoph Hellwig to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 672347d1c01SChristoph Hellwig to->di_dmstate = be16_to_cpu(from->di_dmstate); 673347d1c01SChristoph Hellwig to->di_flags = be16_to_cpu(from->di_flags); 674347d1c01SChristoph Hellwig to->di_gen = be32_to_cpu(from->di_gen); 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds 677347d1c01SChristoph Hellwig void 678347d1c01SChristoph Hellwig xfs_dinode_to_disk( 67981591fe2SChristoph Hellwig xfs_dinode_t *to, 680347d1c01SChristoph Hellwig xfs_icdinode_t *from) 681347d1c01SChristoph Hellwig { 682347d1c01SChristoph Hellwig to->di_magic = cpu_to_be16(from->di_magic); 683347d1c01SChristoph Hellwig to->di_mode = cpu_to_be16(from->di_mode); 684347d1c01SChristoph Hellwig to->di_version = from ->di_version; 685347d1c01SChristoph Hellwig to->di_format = from->di_format; 686347d1c01SChristoph Hellwig to->di_onlink = cpu_to_be16(from->di_onlink); 687347d1c01SChristoph Hellwig to->di_uid = cpu_to_be32(from->di_uid); 688347d1c01SChristoph Hellwig to->di_gid = cpu_to_be32(from->di_gid); 689347d1c01SChristoph Hellwig to->di_nlink = cpu_to_be32(from->di_nlink); 6906743099cSArkadiusz Mi?kiewicz to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 6916743099cSArkadiusz Mi?kiewicz to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 692347d1c01SChristoph Hellwig memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 693347d1c01SChristoph Hellwig to->di_flushiter = cpu_to_be16(from->di_flushiter); 694347d1c01SChristoph Hellwig to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 695347d1c01SChristoph Hellwig to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 696347d1c01SChristoph Hellwig to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 697347d1c01SChristoph Hellwig to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 698347d1c01SChristoph Hellwig to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 699347d1c01SChristoph Hellwig to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 700347d1c01SChristoph Hellwig to->di_size = cpu_to_be64(from->di_size); 701347d1c01SChristoph Hellwig to->di_nblocks = cpu_to_be64(from->di_nblocks); 702347d1c01SChristoph Hellwig to->di_extsize = cpu_to_be32(from->di_extsize); 703347d1c01SChristoph Hellwig to->di_nextents = cpu_to_be32(from->di_nextents); 704347d1c01SChristoph Hellwig to->di_anextents = cpu_to_be16(from->di_anextents); 705347d1c01SChristoph Hellwig to->di_forkoff = from->di_forkoff; 706347d1c01SChristoph Hellwig to->di_aformat = from->di_aformat; 707347d1c01SChristoph Hellwig to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 708347d1c01SChristoph Hellwig to->di_dmstate = cpu_to_be16(from->di_dmstate); 709347d1c01SChristoph Hellwig to->di_flags = cpu_to_be16(from->di_flags); 710347d1c01SChristoph Hellwig to->di_gen = cpu_to_be32(from->di_gen); 7111da177e4SLinus Torvalds } 7121da177e4SLinus Torvalds 7131da177e4SLinus Torvalds STATIC uint 7141da177e4SLinus Torvalds _xfs_dic2xflags( 7151da177e4SLinus Torvalds __uint16_t di_flags) 7161da177e4SLinus Torvalds { 7171da177e4SLinus Torvalds uint flags = 0; 7181da177e4SLinus Torvalds 7191da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 7201da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 7211da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 7221da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 7231da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 7241da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 7251da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 7261da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 7271da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 7281da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 7291da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 7301da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 7311da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 7321da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 7331da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 7341da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 7351da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 7361da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 7371da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 7381da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 7391da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 740dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSIZE) 741dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSIZE; 742dd9f438eSNathan Scott if (di_flags & XFS_DIFLAG_EXTSZINHERIT) 743dd9f438eSNathan Scott flags |= XFS_XFLAG_EXTSZINHERIT; 744d3446eacSBarry Naujok if (di_flags & XFS_DIFLAG_NODEFRAG) 745d3446eacSBarry Naujok flags |= XFS_XFLAG_NODEFRAG; 7462a82b8beSDavid Chinner if (di_flags & XFS_DIFLAG_FILESTREAM) 7472a82b8beSDavid Chinner flags |= XFS_XFLAG_FILESTREAM; 7481da177e4SLinus Torvalds } 7491da177e4SLinus Torvalds 7501da177e4SLinus Torvalds return flags; 7511da177e4SLinus Torvalds } 7521da177e4SLinus Torvalds 7531da177e4SLinus Torvalds uint 7541da177e4SLinus Torvalds xfs_ip2xflags( 7551da177e4SLinus Torvalds xfs_inode_t *ip) 7561da177e4SLinus Torvalds { 757347d1c01SChristoph Hellwig xfs_icdinode_t *dic = &ip->i_d; 7581da177e4SLinus Torvalds 759a916e2bdSNathan Scott return _xfs_dic2xflags(dic->di_flags) | 76045ba598eSChristoph Hellwig (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0); 7611da177e4SLinus Torvalds } 7621da177e4SLinus Torvalds 7631da177e4SLinus Torvalds uint 7641da177e4SLinus Torvalds xfs_dic2xflags( 76545ba598eSChristoph Hellwig xfs_dinode_t *dip) 7661da177e4SLinus Torvalds { 76781591fe2SChristoph Hellwig return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | 76845ba598eSChristoph Hellwig (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 7691da177e4SLinus Torvalds } 7701da177e4SLinus Torvalds 7711da177e4SLinus Torvalds /* 77224f211baSChristoph Hellwig * Read the disk inode attributes into the in-core inode structure. 7731da177e4SLinus Torvalds */ 7741da177e4SLinus Torvalds int 7751da177e4SLinus Torvalds xfs_iread( 7761da177e4SLinus Torvalds xfs_mount_t *mp, 7771da177e4SLinus Torvalds xfs_trans_t *tp, 77824f211baSChristoph Hellwig xfs_inode_t *ip, 77924f211baSChristoph Hellwig uint iget_flags) 7801da177e4SLinus Torvalds { 7811da177e4SLinus Torvalds xfs_buf_t *bp; 7821da177e4SLinus Torvalds xfs_dinode_t *dip; 7831da177e4SLinus Torvalds int error; 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds /* 78692bfc6e7SChristoph Hellwig * Fill in the location information in the in-core inode. 7871da177e4SLinus Torvalds */ 78824f211baSChristoph Hellwig error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 7899ed0451eSChristoph Hellwig if (error) 79024f211baSChristoph Hellwig return error; 7911da177e4SLinus Torvalds 7921da177e4SLinus Torvalds /* 79392bfc6e7SChristoph Hellwig * Get pointers to the on-disk inode and the buffer containing it. 79476d8b277SChristoph Hellwig */ 79592bfc6e7SChristoph Hellwig error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 7960cadda1cSChristoph Hellwig XBF_LOCK, iget_flags); 79776d8b277SChristoph Hellwig if (error) 79824f211baSChristoph Hellwig return error; 79992bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 80076d8b277SChristoph Hellwig 80176d8b277SChristoph Hellwig /* 8021da177e4SLinus Torvalds * If we got something that isn't an inode it means someone 8031da177e4SLinus Torvalds * (nfs or dmi) has a stale handle. 8041da177e4SLinus Torvalds */ 805*69ef921bSChristoph Hellwig if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) { 8061da177e4SLinus Torvalds #ifdef DEBUG 80753487786SDave Chinner xfs_alert(mp, 80853487786SDave Chinner "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", 80953487786SDave Chinner __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); 8101da177e4SLinus Torvalds #endif /* DEBUG */ 8119ed0451eSChristoph Hellwig error = XFS_ERROR(EINVAL); 8129ed0451eSChristoph Hellwig goto out_brelse; 8131da177e4SLinus Torvalds } 8141da177e4SLinus Torvalds 8151da177e4SLinus Torvalds /* 8161da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 8171da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 8181da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 8191da177e4SLinus Torvalds * specific information. 8201da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 8211da177e4SLinus Torvalds */ 82281591fe2SChristoph Hellwig if (dip->di_mode) { 82381591fe2SChristoph Hellwig xfs_dinode_from_disk(&ip->i_d, dip); 8241da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 8251da177e4SLinus Torvalds if (error) { 8261da177e4SLinus Torvalds #ifdef DEBUG 82753487786SDave Chinner xfs_alert(mp, "%s: xfs_iformat() returned error %d", 82853487786SDave Chinner __func__, error); 8291da177e4SLinus Torvalds #endif /* DEBUG */ 8309ed0451eSChristoph Hellwig goto out_brelse; 8311da177e4SLinus Torvalds } 8321da177e4SLinus Torvalds } else { 83381591fe2SChristoph Hellwig ip->i_d.di_magic = be16_to_cpu(dip->di_magic); 83481591fe2SChristoph Hellwig ip->i_d.di_version = dip->di_version; 83581591fe2SChristoph Hellwig ip->i_d.di_gen = be32_to_cpu(dip->di_gen); 83681591fe2SChristoph Hellwig ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 8371da177e4SLinus Torvalds /* 8381da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 8391da177e4SLinus Torvalds * case the inode is released without being used. 8401da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 8411da177e4SLinus Torvalds * the inode is already free and not try to mess 8421da177e4SLinus Torvalds * with the uninitialized part of it. 8431da177e4SLinus Torvalds */ 8441da177e4SLinus Torvalds ip->i_d.di_mode = 0; 8451da177e4SLinus Torvalds /* 8461da177e4SLinus Torvalds * Initialize the per-fork minima and maxima for a new 8471da177e4SLinus Torvalds * inode here. xfs_iformat will do it for old inodes. 8481da177e4SLinus Torvalds */ 8491da177e4SLinus Torvalds ip->i_df.if_ext_max = 8501da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 8511da177e4SLinus Torvalds } 8521da177e4SLinus Torvalds 8531da177e4SLinus Torvalds /* 8541da177e4SLinus Torvalds * The inode format changed when we moved the link count and 8551da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 8561da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 8571da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 8581da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 8591da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 8601da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 8611da177e4SLinus Torvalds * the new format. We don't change the version number so that we 8621da177e4SLinus Torvalds * can distinguish this from a real new format inode. 8631da177e4SLinus Torvalds */ 86451ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 8651da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 8661da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 8676743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, 0); 8681da177e4SLinus Torvalds } 8691da177e4SLinus Torvalds 8701da177e4SLinus Torvalds ip->i_delayed_blks = 0; 871ba87ea69SLachlan McIlroy ip->i_size = ip->i_d.di_size; 8721da177e4SLinus Torvalds 8731da177e4SLinus Torvalds /* 8741da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 8751da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 8761da177e4SLinus Torvalds * meta-data in-core longer. 8771da177e4SLinus Torvalds */ 878821eb21dSDave Chinner xfs_buf_set_ref(bp, XFS_INO_REF); 8791da177e4SLinus Torvalds 8801da177e4SLinus Torvalds /* 8811da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 8821da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 8831da177e4SLinus Torvalds * in xfs_itobp() above. If tp is NULL, this is just a normal 8841da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 8851da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 8861da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 8871da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 8881da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 8891da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 8901da177e4SLinus Torvalds * to worry about the inode being changed just because we released 8911da177e4SLinus Torvalds * the buffer. 8921da177e4SLinus Torvalds */ 8939ed0451eSChristoph Hellwig out_brelse: 8949ed0451eSChristoph Hellwig xfs_trans_brelse(tp, bp); 8959ed0451eSChristoph Hellwig return error; 8961da177e4SLinus Torvalds } 8971da177e4SLinus Torvalds 8981da177e4SLinus Torvalds /* 8991da177e4SLinus Torvalds * Read in extents from a btree-format inode. 9001da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 9011da177e4SLinus Torvalds */ 9021da177e4SLinus Torvalds int 9031da177e4SLinus Torvalds xfs_iread_extents( 9041da177e4SLinus Torvalds xfs_trans_t *tp, 9051da177e4SLinus Torvalds xfs_inode_t *ip, 9061da177e4SLinus Torvalds int whichfork) 9071da177e4SLinus Torvalds { 9081da177e4SLinus Torvalds int error; 9091da177e4SLinus Torvalds xfs_ifork_t *ifp; 9104eea22f0SMandy Kirkconnell xfs_extnum_t nextents; 9111da177e4SLinus Torvalds 9121da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 9131da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 9141da177e4SLinus Torvalds ip->i_mount); 9151da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 9161da177e4SLinus Torvalds } 9174eea22f0SMandy Kirkconnell nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 9181da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 9194eea22f0SMandy Kirkconnell 9201da177e4SLinus Torvalds /* 9211da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 9221da177e4SLinus Torvalds */ 9234eea22f0SMandy Kirkconnell ifp->if_bytes = ifp->if_real_bytes = 0; 9241da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 9254eea22f0SMandy Kirkconnell xfs_iext_add(ifp, 0, nextents); 9261da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 9271da177e4SLinus Torvalds if (error) { 9284eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 9291da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 9301da177e4SLinus Torvalds return error; 9311da177e4SLinus Torvalds } 932a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); 9331da177e4SLinus Torvalds return 0; 9341da177e4SLinus Torvalds } 9351da177e4SLinus Torvalds 9361da177e4SLinus Torvalds /* 9371da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 9381da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 9391da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 9401da177e4SLinus Torvalds * set according to the contents of the given cred structure. 9411da177e4SLinus Torvalds * 9421da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 9431da177e4SLinus Torvalds * has a free inode available, call xfs_iget() 9441da177e4SLinus Torvalds * to obtain the in-core version of the allocated inode. Finally, 9451da177e4SLinus Torvalds * fill in the inode and log its initial contents. In this case, 9461da177e4SLinus Torvalds * ialloc_context would be set to NULL and call_again set to false. 9471da177e4SLinus Torvalds * 9481da177e4SLinus Torvalds * If xfs_dialloc() does not have an available inode, 9491da177e4SLinus Torvalds * it will replenish its supply by doing an allocation. Since we can 9501da177e4SLinus Torvalds * only do one allocation within a transaction without deadlocks, we 9511da177e4SLinus Torvalds * must commit the current transaction before returning the inode itself. 9521da177e4SLinus Torvalds * In this case, therefore, we will set call_again to true and return. 9531da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 9541da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 9551da177e4SLinus Torvalds * 9561da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 9571da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 9581da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 9591da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 9601da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 961b11f94d5SDavid Chinner * 962b11f94d5SDavid Chinner * If we are allocating quota inodes, we do not have a parent inode 963b11f94d5SDavid Chinner * to attach to or associate with (i.e. pip == NULL) because they 964b11f94d5SDavid Chinner * are not linked into the directory structure - they are attached 965b11f94d5SDavid Chinner * directly to the superblock - and so have no parent. 9661da177e4SLinus Torvalds */ 9671da177e4SLinus Torvalds int 9681da177e4SLinus Torvalds xfs_ialloc( 9691da177e4SLinus Torvalds xfs_trans_t *tp, 9701da177e4SLinus Torvalds xfs_inode_t *pip, 9711da177e4SLinus Torvalds mode_t mode, 97231b084aeSNathan Scott xfs_nlink_t nlink, 9731da177e4SLinus Torvalds xfs_dev_t rdev, 9746743099cSArkadiusz Mi?kiewicz prid_t prid, 9751da177e4SLinus Torvalds int okalloc, 9761da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 9771da177e4SLinus Torvalds boolean_t *call_again, 9781da177e4SLinus Torvalds xfs_inode_t **ipp) 9791da177e4SLinus Torvalds { 9801da177e4SLinus Torvalds xfs_ino_t ino; 9811da177e4SLinus Torvalds xfs_inode_t *ip; 9821da177e4SLinus Torvalds uint flags; 9831da177e4SLinus Torvalds int error; 984dff35fd4SChristoph Hellwig timespec_t tv; 985bf904248SDavid Chinner int filestreams = 0; 9861da177e4SLinus Torvalds 9871da177e4SLinus Torvalds /* 9881da177e4SLinus Torvalds * Call the space management code to pick 9891da177e4SLinus Torvalds * the on-disk inode to be allocated. 9901da177e4SLinus Torvalds */ 991b11f94d5SDavid Chinner error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 9921da177e4SLinus Torvalds ialloc_context, call_again, &ino); 993bf904248SDavid Chinner if (error) 9941da177e4SLinus Torvalds return error; 9951da177e4SLinus Torvalds if (*call_again || ino == NULLFSINO) { 9961da177e4SLinus Torvalds *ipp = NULL; 9971da177e4SLinus Torvalds return 0; 9981da177e4SLinus Torvalds } 9991da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 10001da177e4SLinus Torvalds 10011da177e4SLinus Torvalds /* 10021da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 10031da177e4SLinus Torvalds * This is because we're setting fields here we need 10041da177e4SLinus Torvalds * to prevent others from looking at until we're done. 10051da177e4SLinus Torvalds */ 1006ec3ba85fSChristoph Hellwig error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, 1007ec3ba85fSChristoph Hellwig XFS_ILOCK_EXCL, &ip); 1008bf904248SDavid Chinner if (error) 10091da177e4SLinus Torvalds return error; 10101da177e4SLinus Torvalds ASSERT(ip != NULL); 10111da177e4SLinus Torvalds 10121da177e4SLinus Torvalds ip->i_d.di_mode = (__uint16_t)mode; 10131da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 10141da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 10151da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 10169e2b2dc4SDavid Howells ip->i_d.di_uid = current_fsuid(); 10179e2b2dc4SDavid Howells ip->i_d.di_gid = current_fsgid(); 10186743099cSArkadiusz Mi?kiewicz xfs_set_projid(ip, prid); 10191da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 10201da177e4SLinus Torvalds 10211da177e4SLinus Torvalds /* 10221da177e4SLinus Torvalds * If the superblock version is up to where we support new format 10231da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 10241da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 10251da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 10261da177e4SLinus Torvalds */ 102762118709SEric Sandeen if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 102851ce16d5SChristoph Hellwig ip->i_d.di_version == 1) { 102951ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 10301da177e4SLinus Torvalds /* 10311da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 10321da177e4SLinus Torvalds * and the pad field. 10331da177e4SLinus Torvalds */ 10341da177e4SLinus Torvalds } 10351da177e4SLinus Torvalds 10361da177e4SLinus Torvalds /* 10371da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 10381da177e4SLinus Torvalds */ 103951ce16d5SChristoph Hellwig if ((prid != 0) && (ip->i_d.di_version == 1)) 10401da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 10411da177e4SLinus Torvalds 1042bd186aa9SChristoph Hellwig if (pip && XFS_INHERIT_GID(pip)) { 10431da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 10441da177e4SLinus Torvalds if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 10451da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 10461da177e4SLinus Torvalds } 10471da177e4SLinus Torvalds } 10481da177e4SLinus Torvalds 10491da177e4SLinus Torvalds /* 10501da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 10511da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 10521da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 10531da177e4SLinus Torvalds */ 10541da177e4SLinus Torvalds if ((irix_sgid_inherit) && 10551da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 10561da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 10571da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 10581da177e4SLinus Torvalds } 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds ip->i_d.di_size = 0; 1061ba87ea69SLachlan McIlroy ip->i_size = 0; 10621da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 10631da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 1064dff35fd4SChristoph Hellwig 1065dff35fd4SChristoph Hellwig nanotime(&tv); 1066dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 1067dff35fd4SChristoph Hellwig ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 1068dff35fd4SChristoph Hellwig ip->i_d.di_atime = ip->i_d.di_mtime; 1069dff35fd4SChristoph Hellwig ip->i_d.di_ctime = ip->i_d.di_mtime; 1070dff35fd4SChristoph Hellwig 10711da177e4SLinus Torvalds /* 10721da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 10731da177e4SLinus Torvalds */ 10741da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 10751da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 10761da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 10771da177e4SLinus Torvalds ip->i_d.di_flags = 0; 10781da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 10791da177e4SLinus Torvalds switch (mode & S_IFMT) { 10801da177e4SLinus Torvalds case S_IFIFO: 10811da177e4SLinus Torvalds case S_IFCHR: 10821da177e4SLinus Torvalds case S_IFBLK: 10831da177e4SLinus Torvalds case S_IFSOCK: 10841da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 10851da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 10861da177e4SLinus Torvalds ip->i_df.if_flags = 0; 10871da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 10881da177e4SLinus Torvalds break; 10891da177e4SLinus Torvalds case S_IFREG: 1090bf904248SDavid Chinner /* 1091bf904248SDavid Chinner * we can't set up filestreams until after the VFS inode 1092bf904248SDavid Chinner * is set up properly. 1093bf904248SDavid Chinner */ 1094bf904248SDavid Chinner if (pip && xfs_inode_is_filestream(pip)) 1095bf904248SDavid Chinner filestreams = 1; 10962a82b8beSDavid Chinner /* fall through */ 10971da177e4SLinus Torvalds case S_IFDIR: 1098b11f94d5SDavid Chinner if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1099365ca83dSNathan Scott uint di_flags = 0; 1100365ca83dSNathan Scott 11011da177e4SLinus Torvalds if ((mode & S_IFMT) == S_IFDIR) { 1102365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1103365ca83dSNathan Scott di_flags |= XFS_DIFLAG_RTINHERIT; 1104dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1105dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSZINHERIT; 1106dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1107dd9f438eSNathan Scott } 1108dd9f438eSNathan Scott } else if ((mode & S_IFMT) == S_IFREG) { 1109613d7043SChristoph Hellwig if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) 1110365ca83dSNathan Scott di_flags |= XFS_DIFLAG_REALTIME; 1111dd9f438eSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { 1112dd9f438eSNathan Scott di_flags |= XFS_DIFLAG_EXTSIZE; 1113dd9f438eSNathan Scott ip->i_d.di_extsize = pip->i_d.di_extsize; 1114dd9f438eSNathan Scott } 11151da177e4SLinus Torvalds } 11161da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 11171da177e4SLinus Torvalds xfs_inherit_noatime) 1118365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOATIME; 11191da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 11201da177e4SLinus Torvalds xfs_inherit_nodump) 1121365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NODUMP; 11221da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 11231da177e4SLinus Torvalds xfs_inherit_sync) 1124365ca83dSNathan Scott di_flags |= XFS_DIFLAG_SYNC; 11251da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 11261da177e4SLinus Torvalds xfs_inherit_nosymlinks) 1127365ca83dSNathan Scott di_flags |= XFS_DIFLAG_NOSYMLINKS; 1128365ca83dSNathan Scott if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1129365ca83dSNathan Scott di_flags |= XFS_DIFLAG_PROJINHERIT; 1130d3446eacSBarry Naujok if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && 1131d3446eacSBarry Naujok xfs_inherit_nodefrag) 1132d3446eacSBarry Naujok di_flags |= XFS_DIFLAG_NODEFRAG; 11332a82b8beSDavid Chinner if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) 11342a82b8beSDavid Chinner di_flags |= XFS_DIFLAG_FILESTREAM; 1135365ca83dSNathan Scott ip->i_d.di_flags |= di_flags; 11361da177e4SLinus Torvalds } 11371da177e4SLinus Torvalds /* FALLTHROUGH */ 11381da177e4SLinus Torvalds case S_IFLNK: 11391da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 11401da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 11411da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 11421da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 11431da177e4SLinus Torvalds break; 11441da177e4SLinus Torvalds default: 11451da177e4SLinus Torvalds ASSERT(0); 11461da177e4SLinus Torvalds } 11471da177e4SLinus Torvalds /* 11481da177e4SLinus Torvalds * Attribute fork settings for new inode. 11491da177e4SLinus Torvalds */ 11501da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 11511da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds /* 11541da177e4SLinus Torvalds * Log the new values stuffed into the inode. 11551da177e4SLinus Torvalds */ 1156ec3ba85fSChristoph Hellwig xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); 11571da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 11581da177e4SLinus Torvalds 1159b83bd138SNathan Scott /* now that we have an i_mode we can setup inode ops and unlock */ 116041be8bedSChristoph Hellwig xfs_setup_inode(ip); 11611da177e4SLinus Torvalds 1162bf904248SDavid Chinner /* now we have set up the vfs inode we can associate the filestream */ 1163bf904248SDavid Chinner if (filestreams) { 1164bf904248SDavid Chinner error = xfs_filestream_associate(pip, ip); 1165bf904248SDavid Chinner if (error < 0) 1166bf904248SDavid Chinner return -error; 1167bf904248SDavid Chinner if (!error) 1168bf904248SDavid Chinner xfs_iflags_set(ip, XFS_IFILESTREAM); 1169bf904248SDavid Chinner } 1170bf904248SDavid Chinner 11711da177e4SLinus Torvalds *ipp = ip; 11721da177e4SLinus Torvalds return 0; 11731da177e4SLinus Torvalds } 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds /* 11761da177e4SLinus Torvalds * Check to make sure that there are no blocks allocated to the 11771da177e4SLinus Torvalds * file beyond the size of the file. We don't check this for 11781da177e4SLinus Torvalds * files with fixed size extents or real time extents, but we 11791da177e4SLinus Torvalds * at least do it for regular files. 11801da177e4SLinus Torvalds */ 11811da177e4SLinus Torvalds #ifdef DEBUG 11828f04c47aSChristoph Hellwig STATIC void 11831da177e4SLinus Torvalds xfs_isize_check( 11848f04c47aSChristoph Hellwig struct xfs_inode *ip, 11851da177e4SLinus Torvalds xfs_fsize_t isize) 11861da177e4SLinus Torvalds { 11878f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 11881da177e4SLinus Torvalds xfs_fileoff_t map_first; 11891da177e4SLinus Torvalds int nimaps; 11901da177e4SLinus Torvalds xfs_bmbt_irec_t imaps[2]; 11911da177e4SLinus Torvalds 11921da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 11931da177e4SLinus Torvalds return; 11941da177e4SLinus Torvalds 119571ddabb9SEric Sandeen if (XFS_IS_REALTIME_INODE(ip)) 119671ddabb9SEric Sandeen return; 119771ddabb9SEric Sandeen 119871ddabb9SEric Sandeen if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 11991da177e4SLinus Torvalds return; 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds nimaps = 2; 12021da177e4SLinus Torvalds map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 12031da177e4SLinus Torvalds /* 12041da177e4SLinus Torvalds * The filesystem could be shutting down, so bmapi may return 12051da177e4SLinus Torvalds * an error. 12061da177e4SLinus Torvalds */ 12071da177e4SLinus Torvalds if (xfs_bmapi(NULL, ip, map_first, 12081da177e4SLinus Torvalds (XFS_B_TO_FSB(mp, 12091da177e4SLinus Torvalds (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 12101da177e4SLinus Torvalds map_first), 12111da177e4SLinus Torvalds XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 1212b4e9181eSChristoph Hellwig NULL)) 12131da177e4SLinus Torvalds return; 12141da177e4SLinus Torvalds ASSERT(nimaps == 1); 12151da177e4SLinus Torvalds ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 12161da177e4SLinus Torvalds } 12178f04c47aSChristoph Hellwig #else /* DEBUG */ 12188f04c47aSChristoph Hellwig #define xfs_isize_check(ip, isize) 12191da177e4SLinus Torvalds #endif /* DEBUG */ 12201da177e4SLinus Torvalds 12211da177e4SLinus Torvalds /* 12228f04c47aSChristoph Hellwig * Free up the underlying blocks past new_size. The new size must be smaller 12238f04c47aSChristoph Hellwig * than the current size. This routine can be used both for the attribute and 12248f04c47aSChristoph Hellwig * data fork, and does not modify the inode size, which is left to the caller. 12251da177e4SLinus Torvalds * 1226f6485057SDavid Chinner * The transaction passed to this routine must have made a permanent log 1227f6485057SDavid Chinner * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1228f6485057SDavid Chinner * given transaction and start new ones, so make sure everything involved in 1229f6485057SDavid Chinner * the transaction is tidy before calling here. Some transaction will be 1230f6485057SDavid Chinner * returned to the caller to be committed. The incoming transaction must 1231f6485057SDavid Chinner * already include the inode, and both inode locks must be held exclusively. 1232f6485057SDavid Chinner * The inode must also be "held" within the transaction. On return the inode 1233f6485057SDavid Chinner * will be "held" within the returned transaction. This routine does NOT 1234f6485057SDavid Chinner * require any disk space to be reserved for it within the transaction. 12351da177e4SLinus Torvalds * 1236f6485057SDavid Chinner * If we get an error, we must return with the inode locked and linked into the 1237f6485057SDavid Chinner * current transaction. This keeps things simple for the higher level code, 1238f6485057SDavid Chinner * because it always knows that the inode is locked and held in the transaction 1239f6485057SDavid Chinner * that returns to it whether errors occur or not. We don't mark the inode 1240f6485057SDavid Chinner * dirty on error so that transactions can be easily aborted if possible. 12411da177e4SLinus Torvalds */ 12421da177e4SLinus Torvalds int 12438f04c47aSChristoph Hellwig xfs_itruncate_extents( 12448f04c47aSChristoph Hellwig struct xfs_trans **tpp, 12458f04c47aSChristoph Hellwig struct xfs_inode *ip, 12468f04c47aSChristoph Hellwig int whichfork, 12478f04c47aSChristoph Hellwig xfs_fsize_t new_size) 12481da177e4SLinus Torvalds { 12498f04c47aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 12508f04c47aSChristoph Hellwig struct xfs_trans *tp = *tpp; 12518f04c47aSChristoph Hellwig struct xfs_trans *ntp; 12528f04c47aSChristoph Hellwig xfs_bmap_free_t free_list; 12531da177e4SLinus Torvalds xfs_fsblock_t first_block; 12541da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 12551da177e4SLinus Torvalds xfs_fileoff_t last_block; 12568f04c47aSChristoph Hellwig xfs_filblks_t unmap_len; 12571da177e4SLinus Torvalds int committed; 12588f04c47aSChristoph Hellwig int error = 0; 12598f04c47aSChristoph Hellwig int done = 0; 12601da177e4SLinus Torvalds 1261579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 12628f04c47aSChristoph Hellwig ASSERT(new_size <= ip->i_size); 12638f04c47aSChristoph Hellwig ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 12641da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 1265898621d5SChristoph Hellwig ASSERT(ip->i_itemp->ili_lock_flags == 0); 12661da177e4SLinus Torvalds ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 12671da177e4SLinus Torvalds 12681da177e4SLinus Torvalds /* 12691da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 12701da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 12711da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 12721da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 12731da177e4SLinus Torvalds * possible file size. If the first block to be removed is 12741da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 12751da177e4SLinus Torvalds * then there is nothing to do. 12761da177e4SLinus Torvalds */ 12778f04c47aSChristoph Hellwig first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 12781da177e4SLinus Torvalds last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 12798f04c47aSChristoph Hellwig if (first_unmap_block == last_block) 12808f04c47aSChristoph Hellwig return 0; 12818f04c47aSChristoph Hellwig 12828f04c47aSChristoph Hellwig ASSERT(first_unmap_block < last_block); 12831da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 12841da177e4SLinus Torvalds while (!done) { 12859d87c319SEric Sandeen xfs_bmap_init(&free_list, &first_block); 12868f04c47aSChristoph Hellwig error = xfs_bunmapi(tp, ip, 12873e57ecf6SOlaf Weber first_unmap_block, unmap_len, 12888f04c47aSChristoph Hellwig xfs_bmapi_aflag(whichfork), 12891da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 12903e57ecf6SOlaf Weber &first_block, &free_list, 1291b4e9181eSChristoph Hellwig &done); 12928f04c47aSChristoph Hellwig if (error) 12938f04c47aSChristoph Hellwig goto out_bmap_cancel; 12941da177e4SLinus Torvalds 12951da177e4SLinus Torvalds /* 12961da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 12971da177e4SLinus Torvalds * reservation and commit the old transaction. 12981da177e4SLinus Torvalds */ 12998f04c47aSChristoph Hellwig error = xfs_bmap_finish(&tp, &free_list, &committed); 1300898621d5SChristoph Hellwig if (committed) 13018f04c47aSChristoph Hellwig xfs_trans_ijoin(tp, ip); 13028f04c47aSChristoph Hellwig if (error) 13038f04c47aSChristoph Hellwig goto out_bmap_cancel; 13041da177e4SLinus Torvalds 13051da177e4SLinus Torvalds if (committed) { 13061da177e4SLinus Torvalds /* 1307f6485057SDavid Chinner * Mark the inode dirty so it will be logged and 1308e5720eecSDavid Chinner * moved forward in the log as part of every commit. 13091da177e4SLinus Torvalds */ 13108f04c47aSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 13111da177e4SLinus Torvalds } 1312f6485057SDavid Chinner 13138f04c47aSChristoph Hellwig ntp = xfs_trans_dup(tp); 13148f04c47aSChristoph Hellwig error = xfs_trans_commit(tp, 0); 13158f04c47aSChristoph Hellwig tp = ntp; 1316f6485057SDavid Chinner 13178f04c47aSChristoph Hellwig xfs_trans_ijoin(tp, ip); 1318f6485057SDavid Chinner 1319cc09c0dcSDave Chinner if (error) 13208f04c47aSChristoph Hellwig goto out; 13218f04c47aSChristoph Hellwig 1322cc09c0dcSDave Chinner /* 13238f04c47aSChristoph Hellwig * Transaction commit worked ok so we can drop the extra ticket 1324cc09c0dcSDave Chinner * reference that we gained in xfs_trans_dup() 1325cc09c0dcSDave Chinner */ 13268f04c47aSChristoph Hellwig xfs_log_ticket_put(tp->t_ticket); 13278f04c47aSChristoph Hellwig error = xfs_trans_reserve(tp, 0, 1328f6485057SDavid Chinner XFS_ITRUNCATE_LOG_RES(mp), 0, 13291da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 13301da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 13311da177e4SLinus Torvalds if (error) 13328f04c47aSChristoph Hellwig goto out; 13331da177e4SLinus Torvalds } 13348f04c47aSChristoph Hellwig 13358f04c47aSChristoph Hellwig out: 13368f04c47aSChristoph Hellwig *tpp = tp; 13378f04c47aSChristoph Hellwig return error; 13388f04c47aSChristoph Hellwig out_bmap_cancel: 13391da177e4SLinus Torvalds /* 13408f04c47aSChristoph Hellwig * If the bunmapi call encounters an error, return to the caller where 13418f04c47aSChristoph Hellwig * the transaction can be properly aborted. We just need to make sure 13428f04c47aSChristoph Hellwig * we're not holding any resources that we were not when we came in. 13431da177e4SLinus Torvalds */ 13448f04c47aSChristoph Hellwig xfs_bmap_cancel(&free_list); 13458f04c47aSChristoph Hellwig goto out; 13468f04c47aSChristoph Hellwig } 13478f04c47aSChristoph Hellwig 13488f04c47aSChristoph Hellwig int 13498f04c47aSChristoph Hellwig xfs_itruncate_data( 13508f04c47aSChristoph Hellwig struct xfs_trans **tpp, 13518f04c47aSChristoph Hellwig struct xfs_inode *ip, 13528f04c47aSChristoph Hellwig xfs_fsize_t new_size) 13538f04c47aSChristoph Hellwig { 13548f04c47aSChristoph Hellwig int error; 13558f04c47aSChristoph Hellwig 13568f04c47aSChristoph Hellwig trace_xfs_itruncate_data_start(ip, new_size); 13578f04c47aSChristoph Hellwig 1358ba87ea69SLachlan McIlroy /* 13598f04c47aSChristoph Hellwig * The first thing we do is set the size to new_size permanently on 13608f04c47aSChristoph Hellwig * disk. This way we don't have to worry about anyone ever being able 13618f04c47aSChristoph Hellwig * to look at the data being freed even in the face of a crash. 13628f04c47aSChristoph Hellwig * What we're getting around here is the case where we free a block, it 13638f04c47aSChristoph Hellwig * is allocated to another file, it is written to, and then we crash. 13648f04c47aSChristoph Hellwig * If the new data gets written to the file but the log buffers 13658f04c47aSChristoph Hellwig * containing the free and reallocation don't, then we'd end up with 13668f04c47aSChristoph Hellwig * garbage in the blocks being freed. As long as we make the new_size 13678f04c47aSChristoph Hellwig * permanent before actually freeing any blocks it doesn't matter if 13688f04c47aSChristoph Hellwig * they get written to. 13698f04c47aSChristoph Hellwig */ 13708f04c47aSChristoph Hellwig if (ip->i_d.di_nextents > 0) { 13718f04c47aSChristoph Hellwig /* 13728f04c47aSChristoph Hellwig * If we are not changing the file size then do not update 13738f04c47aSChristoph Hellwig * the on-disk file size - we may be called from 13748f04c47aSChristoph Hellwig * xfs_inactive_free_eofblocks(). If we update the on-disk 13758f04c47aSChristoph Hellwig * file size and then the system crashes before the contents 13768f04c47aSChristoph Hellwig * of the file are flushed to disk then the files may be 13778f04c47aSChristoph Hellwig * full of holes (ie NULL files bug). 1378ba87ea69SLachlan McIlroy */ 1379ba87ea69SLachlan McIlroy if (ip->i_size != new_size) { 13801da177e4SLinus Torvalds ip->i_d.di_size = new_size; 1381ba87ea69SLachlan McIlroy ip->i_size = new_size; 13828f04c47aSChristoph Hellwig xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); 1383ba87ea69SLachlan McIlroy } 13841da177e4SLinus Torvalds } 13858f04c47aSChristoph Hellwig 13868f04c47aSChristoph Hellwig error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); 13878f04c47aSChristoph Hellwig if (error) 13888f04c47aSChristoph Hellwig return error; 13898f04c47aSChristoph Hellwig 13908f04c47aSChristoph Hellwig /* 13918f04c47aSChristoph Hellwig * If we are not changing the file size then do not update the on-disk 13928f04c47aSChristoph Hellwig * file size - we may be called from xfs_inactive_free_eofblocks(). 13938f04c47aSChristoph Hellwig * If we update the on-disk file size and then the system crashes 13948f04c47aSChristoph Hellwig * before the contents of the file are flushed to disk then the files 13958f04c47aSChristoph Hellwig * may be full of holes (ie NULL files bug). 13968f04c47aSChristoph Hellwig */ 13978f04c47aSChristoph Hellwig xfs_isize_check(ip, new_size); 13988f04c47aSChristoph Hellwig if (ip->i_size != new_size) { 13998f04c47aSChristoph Hellwig ip->i_d.di_size = new_size; 14008f04c47aSChristoph Hellwig ip->i_size = new_size; 14018f04c47aSChristoph Hellwig } 14028f04c47aSChristoph Hellwig 14038f04c47aSChristoph Hellwig ASSERT(new_size != 0 || ip->i_delayed_blks == 0); 14048f04c47aSChristoph Hellwig ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); 14058f04c47aSChristoph Hellwig 14068f04c47aSChristoph Hellwig /* 14078f04c47aSChristoph Hellwig * Always re-log the inode so that our permanent transaction can keep 14088f04c47aSChristoph Hellwig * on rolling it forward in the log. 14098f04c47aSChristoph Hellwig */ 14108f04c47aSChristoph Hellwig xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); 14118f04c47aSChristoph Hellwig 14128f04c47aSChristoph Hellwig trace_xfs_itruncate_data_end(ip, new_size); 14131da177e4SLinus Torvalds return 0; 14141da177e4SLinus Torvalds } 14151da177e4SLinus Torvalds 14161da177e4SLinus Torvalds /* 14171da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 14181da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 14191da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 14201da177e4SLinus Torvalds */ 14211da177e4SLinus Torvalds int 14221da177e4SLinus Torvalds xfs_iunlink( 14231da177e4SLinus Torvalds xfs_trans_t *tp, 14241da177e4SLinus Torvalds xfs_inode_t *ip) 14251da177e4SLinus Torvalds { 14261da177e4SLinus Torvalds xfs_mount_t *mp; 14271da177e4SLinus Torvalds xfs_agi_t *agi; 14281da177e4SLinus Torvalds xfs_dinode_t *dip; 14291da177e4SLinus Torvalds xfs_buf_t *agibp; 14301da177e4SLinus Torvalds xfs_buf_t *ibp; 14311da177e4SLinus Torvalds xfs_agino_t agino; 14321da177e4SLinus Torvalds short bucket_index; 14331da177e4SLinus Torvalds int offset; 14341da177e4SLinus Torvalds int error; 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 14371da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 14381da177e4SLinus Torvalds 14391da177e4SLinus Torvalds mp = tp->t_mountp; 14401da177e4SLinus Torvalds 14411da177e4SLinus Torvalds /* 14421da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 14431da177e4SLinus Torvalds * on the list. 14441da177e4SLinus Torvalds */ 14455e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); 1446859d7182SVlad Apostolov if (error) 14471da177e4SLinus Torvalds return error; 14481da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 14495e1be0fbSChristoph Hellwig 14501da177e4SLinus Torvalds /* 14511da177e4SLinus Torvalds * Get the index into the agi hash table for the 14521da177e4SLinus Torvalds * list this inode will go on. 14531da177e4SLinus Torvalds */ 14541da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 14551da177e4SLinus Torvalds ASSERT(agino != 0); 14561da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 14571da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 145816259e7dSChristoph Hellwig ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 14591da177e4SLinus Torvalds 1460*69ef921bSChristoph Hellwig if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { 14611da177e4SLinus Torvalds /* 14621da177e4SLinus Torvalds * There is already another inode in the bucket we need 14631da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 14641da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 14651da177e4SLinus Torvalds * and then we fall through to point the head at us. 14661da177e4SLinus Torvalds */ 14670cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1468c319b58bSVlad Apostolov if (error) 1469c319b58bSVlad Apostolov return error; 1470c319b58bSVlad Apostolov 1471*69ef921bSChristoph Hellwig ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); 14721da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 147392bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 14741da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 14751da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 14761da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 14771da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 14781da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 14791da177e4SLinus Torvalds } 14801da177e4SLinus Torvalds 14811da177e4SLinus Torvalds /* 14821da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 14831da177e4SLinus Torvalds */ 14841da177e4SLinus Torvalds ASSERT(agino != 0); 148516259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); 14861da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 14871da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 14881da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 14891da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 14901da177e4SLinus Torvalds return 0; 14911da177e4SLinus Torvalds } 14921da177e4SLinus Torvalds 14931da177e4SLinus Torvalds /* 14941da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 14951da177e4SLinus Torvalds */ 14961da177e4SLinus Torvalds STATIC int 14971da177e4SLinus Torvalds xfs_iunlink_remove( 14981da177e4SLinus Torvalds xfs_trans_t *tp, 14991da177e4SLinus Torvalds xfs_inode_t *ip) 15001da177e4SLinus Torvalds { 15011da177e4SLinus Torvalds xfs_ino_t next_ino; 15021da177e4SLinus Torvalds xfs_mount_t *mp; 15031da177e4SLinus Torvalds xfs_agi_t *agi; 15041da177e4SLinus Torvalds xfs_dinode_t *dip; 15051da177e4SLinus Torvalds xfs_buf_t *agibp; 15061da177e4SLinus Torvalds xfs_buf_t *ibp; 15071da177e4SLinus Torvalds xfs_agnumber_t agno; 15081da177e4SLinus Torvalds xfs_agino_t agino; 15091da177e4SLinus Torvalds xfs_agino_t next_agino; 15101da177e4SLinus Torvalds xfs_buf_t *last_ibp; 15116fdf8cccSNathan Scott xfs_dinode_t *last_dip = NULL; 15121da177e4SLinus Torvalds short bucket_index; 15136fdf8cccSNathan Scott int offset, last_offset = 0; 15141da177e4SLinus Torvalds int error; 15151da177e4SLinus Torvalds 15161da177e4SLinus Torvalds mp = tp->t_mountp; 15171da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds /* 15201da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 15211da177e4SLinus Torvalds * on the list. 15221da177e4SLinus Torvalds */ 15235e1be0fbSChristoph Hellwig error = xfs_read_agi(mp, tp, agno, &agibp); 15245e1be0fbSChristoph Hellwig if (error) 15251da177e4SLinus Torvalds return error; 15265e1be0fbSChristoph Hellwig 15271da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 15285e1be0fbSChristoph Hellwig 15291da177e4SLinus Torvalds /* 15301da177e4SLinus Torvalds * Get the index into the agi hash table for the 15311da177e4SLinus Torvalds * list this inode will go on. 15321da177e4SLinus Torvalds */ 15331da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 15341da177e4SLinus Torvalds ASSERT(agino != 0); 15351da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1536*69ef921bSChristoph Hellwig ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)); 15371da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 15381da177e4SLinus Torvalds 153916259e7dSChristoph Hellwig if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 15401da177e4SLinus Torvalds /* 15411da177e4SLinus Torvalds * We're at the head of the list. Get the inode's 15421da177e4SLinus Torvalds * on-disk buffer to see if there is anyone after us 15431da177e4SLinus Torvalds * on the list. Only modify our next pointer if it 15441da177e4SLinus Torvalds * is not already NULLAGINO. This saves us the overhead 15451da177e4SLinus Torvalds * of dealing with the buffer when there is no need to 15461da177e4SLinus Torvalds * change it. 15471da177e4SLinus Torvalds */ 15480cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 15491da177e4SLinus Torvalds if (error) { 15500b932cccSDave Chinner xfs_warn(mp, "%s: xfs_itobp() returned error %d.", 15510b932cccSDave Chinner __func__, error); 15521da177e4SLinus Torvalds return error; 15531da177e4SLinus Torvalds } 1554347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 15551da177e4SLinus Torvalds ASSERT(next_agino != 0); 15561da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1557347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 155892bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 15591da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 15601da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 15611da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 15621da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15631da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 15641da177e4SLinus Torvalds } else { 15651da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 15661da177e4SLinus Torvalds } 15671da177e4SLinus Torvalds /* 15681da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 15691da177e4SLinus Torvalds */ 15701da177e4SLinus Torvalds ASSERT(next_agino != 0); 15711da177e4SLinus Torvalds ASSERT(next_agino != agino); 157216259e7dSChristoph Hellwig agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); 15731da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 15741da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 15751da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 15761da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 15771da177e4SLinus Torvalds } else { 15781da177e4SLinus Torvalds /* 15791da177e4SLinus Torvalds * We need to search the list for the inode being freed. 15801da177e4SLinus Torvalds */ 158116259e7dSChristoph Hellwig next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 15821da177e4SLinus Torvalds last_ibp = NULL; 15831da177e4SLinus Torvalds while (next_agino != agino) { 15841da177e4SLinus Torvalds /* 15851da177e4SLinus Torvalds * If the last inode wasn't the one pointing to 15861da177e4SLinus Torvalds * us, then release its buffer since we're not 15871da177e4SLinus Torvalds * going to do anything with it. 15881da177e4SLinus Torvalds */ 15891da177e4SLinus Torvalds if (last_ibp != NULL) { 15901da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 15911da177e4SLinus Torvalds } 15921da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 15931da177e4SLinus Torvalds error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1594c679eef0SChristoph Hellwig &last_ibp, &last_offset, 0); 15951da177e4SLinus Torvalds if (error) { 15960b932cccSDave Chinner xfs_warn(mp, 15970b932cccSDave Chinner "%s: xfs_inotobp() returned error %d.", 15980b932cccSDave Chinner __func__, error); 15991da177e4SLinus Torvalds return error; 16001da177e4SLinus Torvalds } 1601347d1c01SChristoph Hellwig next_agino = be32_to_cpu(last_dip->di_next_unlinked); 16021da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 16031da177e4SLinus Torvalds ASSERT(next_agino != 0); 16041da177e4SLinus Torvalds } 16051da177e4SLinus Torvalds /* 16061da177e4SLinus Torvalds * Now last_ibp points to the buffer previous to us on 16071da177e4SLinus Torvalds * the unlinked list. Pull us from the list. 16081da177e4SLinus Torvalds */ 16090cadda1cSChristoph Hellwig error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 16101da177e4SLinus Torvalds if (error) { 16110b932cccSDave Chinner xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", 16120b932cccSDave Chinner __func__, error); 16131da177e4SLinus Torvalds return error; 16141da177e4SLinus Torvalds } 1615347d1c01SChristoph Hellwig next_agino = be32_to_cpu(dip->di_next_unlinked); 16161da177e4SLinus Torvalds ASSERT(next_agino != 0); 16171da177e4SLinus Torvalds ASSERT(next_agino != agino); 16181da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 1619347d1c01SChristoph Hellwig dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 162092bfc6e7SChristoph Hellwig offset = ip->i_imap.im_boffset + 16211da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 16221da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 16231da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 16241da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16251da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 16261da177e4SLinus Torvalds } else { 16271da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 16281da177e4SLinus Torvalds } 16291da177e4SLinus Torvalds /* 16301da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 16311da177e4SLinus Torvalds */ 1632347d1c01SChristoph Hellwig last_dip->di_next_unlinked = cpu_to_be32(next_agino); 16331da177e4SLinus Torvalds ASSERT(next_agino != 0); 16341da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 16351da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 16361da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 16371da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 16381da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 16391da177e4SLinus Torvalds } 16401da177e4SLinus Torvalds return 0; 16411da177e4SLinus Torvalds } 16421da177e4SLinus Torvalds 16435b3eed75SDave Chinner /* 16445b3eed75SDave Chinner * A big issue when freeing the inode cluster is is that we _cannot_ skip any 16455b3eed75SDave Chinner * inodes that are in memory - they all must be marked stale and attached to 16465b3eed75SDave Chinner * the cluster buffer. 16475b3eed75SDave Chinner */ 1648ba0f32d4SChristoph Hellwig STATIC void 16491da177e4SLinus Torvalds xfs_ifree_cluster( 16501da177e4SLinus Torvalds xfs_inode_t *free_ip, 16511da177e4SLinus Torvalds xfs_trans_t *tp, 16521da177e4SLinus Torvalds xfs_ino_t inum) 16531da177e4SLinus Torvalds { 16541da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 16551da177e4SLinus Torvalds int blks_per_cluster; 16561da177e4SLinus Torvalds int nbufs; 16571da177e4SLinus Torvalds int ninodes; 16585b257b4aSDave Chinner int i, j; 16591da177e4SLinus Torvalds xfs_daddr_t blkno; 16601da177e4SLinus Torvalds xfs_buf_t *bp; 16615b257b4aSDave Chinner xfs_inode_t *ip; 16621da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 16631da177e4SLinus Torvalds xfs_log_item_t *lip; 16645017e97dSDave Chinner struct xfs_perag *pag; 16651da177e4SLinus Torvalds 16665017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 16671da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 16681da177e4SLinus Torvalds blks_per_cluster = 1; 16691da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 16701da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 16711da177e4SLinus Torvalds } else { 16721da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 16731da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 16741da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 16751da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 16761da177e4SLinus Torvalds } 16771da177e4SLinus Torvalds 16781da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 16791da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 16801da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 16811da177e4SLinus Torvalds 16821da177e4SLinus Torvalds /* 16835b257b4aSDave Chinner * We obtain and lock the backing buffer first in the process 16845b257b4aSDave Chinner * here, as we have to ensure that any dirty inode that we 16855b257b4aSDave Chinner * can't get the flush lock on is attached to the buffer. 16865b257b4aSDave Chinner * If we scan the in-memory inodes first, then buffer IO can 16875b257b4aSDave Chinner * complete before we get a lock on it, and hence we may fail 16885b257b4aSDave Chinner * to mark all the active inodes on the buffer stale. 16891da177e4SLinus Torvalds */ 16901da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 16911da177e4SLinus Torvalds mp->m_bsize * blks_per_cluster, 16920cadda1cSChristoph Hellwig XBF_LOCK); 16931da177e4SLinus Torvalds 16945b257b4aSDave Chinner /* 16955b257b4aSDave Chinner * Walk the inodes already attached to the buffer and mark them 16965b257b4aSDave Chinner * stale. These will all have the flush locks held, so an 16975b3eed75SDave Chinner * in-memory inode walk can't lock them. By marking them all 16985b3eed75SDave Chinner * stale first, we will not attempt to lock them in the loop 16995b3eed75SDave Chinner * below as the XFS_ISTALE flag will be set. 17005b257b4aSDave Chinner */ 17011da177e4SLinus Torvalds lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 17021da177e4SLinus Torvalds while (lip) { 17031da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 17041da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 17051da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 1706ca30b2a7SChristoph Hellwig lip->li_cb = xfs_istale_done; 17077b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, 17087b2e2a31SDavid Chinner &iip->ili_flush_lsn, 17097b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 1710e5ffd2bbSDavid Chinner xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 17111da177e4SLinus Torvalds } 17121da177e4SLinus Torvalds lip = lip->li_bio_list; 17131da177e4SLinus Torvalds } 17141da177e4SLinus Torvalds 17155b3eed75SDave Chinner 17165b257b4aSDave Chinner /* 17175b257b4aSDave Chinner * For each inode in memory attempt to add it to the inode 17185b257b4aSDave Chinner * buffer and set it up for being staled on buffer IO 17195b257b4aSDave Chinner * completion. This is safe as we've locked out tail pushing 17205b257b4aSDave Chinner * and flushing by locking the buffer. 17215b257b4aSDave Chinner * 17225b257b4aSDave Chinner * We have already marked every inode that was part of a 17235b257b4aSDave Chinner * transaction stale above, which means there is no point in 17245b257b4aSDave Chinner * even trying to lock them. 17255b257b4aSDave Chinner */ 17265b257b4aSDave Chinner for (i = 0; i < ninodes; i++) { 17275b3eed75SDave Chinner retry: 17281a3e8f3dSDave Chinner rcu_read_lock(); 17295b257b4aSDave Chinner ip = radix_tree_lookup(&pag->pag_ici_root, 17305b257b4aSDave Chinner XFS_INO_TO_AGINO(mp, (inum + i))); 17311da177e4SLinus Torvalds 17321a3e8f3dSDave Chinner /* Inode not in memory, nothing to do */ 17331a3e8f3dSDave Chinner if (!ip) { 17341a3e8f3dSDave Chinner rcu_read_unlock(); 17355b257b4aSDave Chinner continue; 17365b257b4aSDave Chinner } 17375b257b4aSDave Chinner 17385b3eed75SDave Chinner /* 17391a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could 17401a3e8f3dSDave Chinner * find a recently freed or even reallocated inode 17411a3e8f3dSDave Chinner * during the lookup. We need to check under the 17421a3e8f3dSDave Chinner * i_flags_lock for a valid inode here. Skip it if it 17431a3e8f3dSDave Chinner * is not valid, the wrong inode or stale. 17441a3e8f3dSDave Chinner */ 17451a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 17461a3e8f3dSDave Chinner if (ip->i_ino != inum + i || 17471a3e8f3dSDave Chinner __xfs_iflags_test(ip, XFS_ISTALE)) { 17481a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 17491a3e8f3dSDave Chinner rcu_read_unlock(); 17501a3e8f3dSDave Chinner continue; 17511a3e8f3dSDave Chinner } 17521a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 17531a3e8f3dSDave Chinner 17541a3e8f3dSDave Chinner /* 17555b3eed75SDave Chinner * Don't try to lock/unlock the current inode, but we 17565b3eed75SDave Chinner * _cannot_ skip the other inodes that we did not find 17575b3eed75SDave Chinner * in the list attached to the buffer and are not 17585b3eed75SDave Chinner * already marked stale. If we can't lock it, back off 17595b3eed75SDave Chinner * and retry. 17605b3eed75SDave Chinner */ 17615b257b4aSDave Chinner if (ip != free_ip && 17625b257b4aSDave Chinner !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 17631a3e8f3dSDave Chinner rcu_read_unlock(); 17645b3eed75SDave Chinner delay(1); 17655b3eed75SDave Chinner goto retry; 17665b257b4aSDave Chinner } 17671a3e8f3dSDave Chinner rcu_read_unlock(); 17685b257b4aSDave Chinner 17695b3eed75SDave Chinner xfs_iflock(ip); 17705b257b4aSDave Chinner xfs_iflags_set(ip, XFS_ISTALE); 17715b257b4aSDave Chinner 17725b3eed75SDave Chinner /* 17735b3eed75SDave Chinner * we don't need to attach clean inodes or those only 17745b3eed75SDave Chinner * with unlogged changes (which we throw away, anyway). 17755b3eed75SDave Chinner */ 17765b257b4aSDave Chinner iip = ip->i_itemp; 17775b3eed75SDave Chinner if (!iip || xfs_inode_clean(ip)) { 17785b257b4aSDave Chinner ASSERT(ip != free_ip); 17791da177e4SLinus Torvalds ip->i_update_core = 0; 17801da177e4SLinus Torvalds xfs_ifunlock(ip); 17811da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 17821da177e4SLinus Torvalds continue; 17831da177e4SLinus Torvalds } 17841da177e4SLinus Torvalds 17851da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 17861da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 17871da177e4SLinus Torvalds iip->ili_logged = 1; 17887b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 17897b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 17901da177e4SLinus Torvalds 1791ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_istale_done, 1792ca30b2a7SChristoph Hellwig &iip->ili_item); 17935b257b4aSDave Chinner 17945b257b4aSDave Chinner if (ip != free_ip) 17951da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 17961da177e4SLinus Torvalds } 17971da177e4SLinus Torvalds 17981da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 17991da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 18001da177e4SLinus Torvalds } 18011da177e4SLinus Torvalds 18025017e97dSDave Chinner xfs_perag_put(pag); 18031da177e4SLinus Torvalds } 18041da177e4SLinus Torvalds 18051da177e4SLinus Torvalds /* 18061da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 18071da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 18081da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 18091da177e4SLinus Torvalds * the inode is already a part of the transaction. 18101da177e4SLinus Torvalds * 18111da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 18121da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 18131da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 18141da177e4SLinus Torvalds */ 18151da177e4SLinus Torvalds int 18161da177e4SLinus Torvalds xfs_ifree( 18171da177e4SLinus Torvalds xfs_trans_t *tp, 18181da177e4SLinus Torvalds xfs_inode_t *ip, 18191da177e4SLinus Torvalds xfs_bmap_free_t *flist) 18201da177e4SLinus Torvalds { 18211da177e4SLinus Torvalds int error; 18221da177e4SLinus Torvalds int delete; 18231da177e4SLinus Torvalds xfs_ino_t first_ino; 1824c319b58bSVlad Apostolov xfs_dinode_t *dip; 1825c319b58bSVlad Apostolov xfs_buf_t *ibp; 18261da177e4SLinus Torvalds 1827579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 18281da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 18291da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 18301da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 1831ba87ea69SLachlan McIlroy ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || 18321da177e4SLinus Torvalds ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 18331da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 18341da177e4SLinus Torvalds 18351da177e4SLinus Torvalds /* 18361da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 18371da177e4SLinus Torvalds */ 18381da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 18391da177e4SLinus Torvalds if (error != 0) { 18401da177e4SLinus Torvalds return error; 18411da177e4SLinus Torvalds } 18421da177e4SLinus Torvalds 18431da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 18441da177e4SLinus Torvalds if (error != 0) { 18451da177e4SLinus Torvalds return error; 18461da177e4SLinus Torvalds } 18471da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 18481da177e4SLinus Torvalds ip->i_d.di_flags = 0; 18491da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 18501da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 18511da177e4SLinus Torvalds ip->i_df.if_ext_max = 18521da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 18531da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 18541da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 18551da177e4SLinus Torvalds /* 18561da177e4SLinus Torvalds * Bump the generation count so no one will be confused 18571da177e4SLinus Torvalds * by reincarnations of this inode. 18581da177e4SLinus Torvalds */ 18591da177e4SLinus Torvalds ip->i_d.di_gen++; 1860c319b58bSVlad Apostolov 18611da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 18621da177e4SLinus Torvalds 18630cadda1cSChristoph Hellwig error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); 1864c319b58bSVlad Apostolov if (error) 1865c319b58bSVlad Apostolov return error; 1866c319b58bSVlad Apostolov 1867c319b58bSVlad Apostolov /* 1868c319b58bSVlad Apostolov * Clear the on-disk di_mode. This is to prevent xfs_bulkstat 1869c319b58bSVlad Apostolov * from picking up this inode when it is reclaimed (its incore state 1870c319b58bSVlad Apostolov * initialzed but not flushed to disk yet). The in-core di_mode is 1871c319b58bSVlad Apostolov * already cleared and a corresponding transaction logged. 1872c319b58bSVlad Apostolov * The hack here just synchronizes the in-core to on-disk 1873c319b58bSVlad Apostolov * di_mode value in advance before the actual inode sync to disk. 1874c319b58bSVlad Apostolov * This is OK because the inode is already unlinked and would never 1875c319b58bSVlad Apostolov * change its di_mode again for this inode generation. 1876c319b58bSVlad Apostolov * This is a temporary hack that would require a proper fix 1877c319b58bSVlad Apostolov * in the future. 1878c319b58bSVlad Apostolov */ 187981591fe2SChristoph Hellwig dip->di_mode = 0; 1880c319b58bSVlad Apostolov 18811da177e4SLinus Torvalds if (delete) { 18821da177e4SLinus Torvalds xfs_ifree_cluster(ip, tp, first_ino); 18831da177e4SLinus Torvalds } 18841da177e4SLinus Torvalds 18851da177e4SLinus Torvalds return 0; 18861da177e4SLinus Torvalds } 18871da177e4SLinus Torvalds 18881da177e4SLinus Torvalds /* 18891da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 18901da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 18911da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 18921da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 18931da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 18941da177e4SLinus Torvalds * by the caller. 18951da177e4SLinus Torvalds * 18961da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 18971da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 18981da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 18991da177e4SLinus Torvalds * not request that the number of records go below zero, although 19001da177e4SLinus Torvalds * it can go to zero. 19011da177e4SLinus Torvalds * 19021da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 19031da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 19041da177e4SLinus Torvalds * requested for the if_broot array. 19051da177e4SLinus Torvalds */ 19061da177e4SLinus Torvalds void 19071da177e4SLinus Torvalds xfs_iroot_realloc( 19081da177e4SLinus Torvalds xfs_inode_t *ip, 19091da177e4SLinus Torvalds int rec_diff, 19101da177e4SLinus Torvalds int whichfork) 19111da177e4SLinus Torvalds { 191260197e8dSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 19131da177e4SLinus Torvalds int cur_max; 19141da177e4SLinus Torvalds xfs_ifork_t *ifp; 19157cc95a82SChristoph Hellwig struct xfs_btree_block *new_broot; 19161da177e4SLinus Torvalds int new_max; 19171da177e4SLinus Torvalds size_t new_size; 19181da177e4SLinus Torvalds char *np; 19191da177e4SLinus Torvalds char *op; 19201da177e4SLinus Torvalds 19211da177e4SLinus Torvalds /* 19221da177e4SLinus Torvalds * Handle the degenerate case quietly. 19231da177e4SLinus Torvalds */ 19241da177e4SLinus Torvalds if (rec_diff == 0) { 19251da177e4SLinus Torvalds return; 19261da177e4SLinus Torvalds } 19271da177e4SLinus Torvalds 19281da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 19291da177e4SLinus Torvalds if (rec_diff > 0) { 19301da177e4SLinus Torvalds /* 19311da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 19321da177e4SLinus Torvalds * allocate it now and get out. 19331da177e4SLinus Torvalds */ 19341da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 19351da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 19364a7edddcSDave Chinner ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 19371da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 19381da177e4SLinus Torvalds return; 19391da177e4SLinus Torvalds } 19401da177e4SLinus Torvalds 19411da177e4SLinus Torvalds /* 19421da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 19431da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 19441da177e4SLinus Torvalds * location. The records don't change location because 19451da177e4SLinus Torvalds * they are kept butted up against the btree block header. 19461da177e4SLinus Torvalds */ 194760197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 19481da177e4SLinus Torvalds new_max = cur_max + rec_diff; 19491da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 19507cc95a82SChristoph Hellwig ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 19511da177e4SLinus Torvalds (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 19524a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 195360197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 19541da177e4SLinus Torvalds ifp->if_broot_bytes); 195560197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 19561da177e4SLinus Torvalds (int)new_size); 19571da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 19581da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 19591da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 19601da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 19611da177e4SLinus Torvalds return; 19621da177e4SLinus Torvalds } 19631da177e4SLinus Torvalds 19641da177e4SLinus Torvalds /* 19651da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 19661da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 19671da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 19681da177e4SLinus Torvalds */ 19691da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 197060197e8dSChristoph Hellwig cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); 19711da177e4SLinus Torvalds new_max = cur_max + rec_diff; 19721da177e4SLinus Torvalds ASSERT(new_max >= 0); 19731da177e4SLinus Torvalds if (new_max > 0) 19741da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 19751da177e4SLinus Torvalds else 19761da177e4SLinus Torvalds new_size = 0; 19771da177e4SLinus Torvalds if (new_size > 0) { 19784a7edddcSDave Chinner new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS); 19791da177e4SLinus Torvalds /* 19801da177e4SLinus Torvalds * First copy over the btree block header. 19811da177e4SLinus Torvalds */ 19827cc95a82SChristoph Hellwig memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); 19831da177e4SLinus Torvalds } else { 19841da177e4SLinus Torvalds new_broot = NULL; 19851da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 19861da177e4SLinus Torvalds } 19871da177e4SLinus Torvalds 19881da177e4SLinus Torvalds /* 19891da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 19901da177e4SLinus Torvalds */ 19911da177e4SLinus Torvalds if (new_max > 0) { 19921da177e4SLinus Torvalds /* 19931da177e4SLinus Torvalds * First copy the records. 19941da177e4SLinus Torvalds */ 1995136341b4SChristoph Hellwig op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); 1996136341b4SChristoph Hellwig np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); 19971da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 19981da177e4SLinus Torvalds 19991da177e4SLinus Torvalds /* 20001da177e4SLinus Torvalds * Then copy the pointers. 20011da177e4SLinus Torvalds */ 200260197e8dSChristoph Hellwig op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 20031da177e4SLinus Torvalds ifp->if_broot_bytes); 200460197e8dSChristoph Hellwig np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 20051da177e4SLinus Torvalds (int)new_size); 20061da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 20071da177e4SLinus Torvalds } 2008f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 20091da177e4SLinus Torvalds ifp->if_broot = new_broot; 20101da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 20111da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 20121da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 20131da177e4SLinus Torvalds return; 20141da177e4SLinus Torvalds } 20151da177e4SLinus Torvalds 20161da177e4SLinus Torvalds 20171da177e4SLinus Torvalds /* 20181da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 20191da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 20201da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 20211da177e4SLinus Torvalds * byte_diff parameter. 20221da177e4SLinus Torvalds * 20231da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 20241da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 20251da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 20261da177e4SLinus Torvalds * to what is needed. 20271da177e4SLinus Torvalds * 20281da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 20291da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 20301da177e4SLinus Torvalds * requested for the if_data array. 20311da177e4SLinus Torvalds */ 20321da177e4SLinus Torvalds void 20331da177e4SLinus Torvalds xfs_idata_realloc( 20341da177e4SLinus Torvalds xfs_inode_t *ip, 20351da177e4SLinus Torvalds int byte_diff, 20361da177e4SLinus Torvalds int whichfork) 20371da177e4SLinus Torvalds { 20381da177e4SLinus Torvalds xfs_ifork_t *ifp; 20391da177e4SLinus Torvalds int new_size; 20401da177e4SLinus Torvalds int real_size; 20411da177e4SLinus Torvalds 20421da177e4SLinus Torvalds if (byte_diff == 0) { 20431da177e4SLinus Torvalds return; 20441da177e4SLinus Torvalds } 20451da177e4SLinus Torvalds 20461da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 20471da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 20481da177e4SLinus Torvalds ASSERT(new_size >= 0); 20491da177e4SLinus Torvalds 20501da177e4SLinus Torvalds if (new_size == 0) { 20511da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2052f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 20531da177e4SLinus Torvalds } 20541da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 20551da177e4SLinus Torvalds real_size = 0; 20561da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 20571da177e4SLinus Torvalds /* 20581da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 20591da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 20601da177e4SLinus Torvalds */ 20611da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 20621da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 20631da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 20641da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 20651da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 20661da177e4SLinus Torvalds new_size); 2067f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 20681da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 20691da177e4SLinus Torvalds } 20701da177e4SLinus Torvalds real_size = 0; 20711da177e4SLinus Torvalds } else { 20721da177e4SLinus Torvalds /* 20731da177e4SLinus Torvalds * Stuck with malloc/realloc. 20741da177e4SLinus Torvalds * For inline data, the underlying buffer must be 20751da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 20761da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 20771da177e4SLinus Torvalds * that here. 20781da177e4SLinus Torvalds */ 20791da177e4SLinus Torvalds real_size = roundup(new_size, 4); 20801da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 20811da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 20824a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 20834a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 20841da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 20851da177e4SLinus Torvalds /* 20861da177e4SLinus Torvalds * Only do the realloc if the underlying size 20871da177e4SLinus Torvalds * is really changing. 20881da177e4SLinus Torvalds */ 20891da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 20901da177e4SLinus Torvalds ifp->if_u1.if_data = 20911da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 20921da177e4SLinus Torvalds real_size, 20931da177e4SLinus Torvalds ifp->if_real_bytes, 20944a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 20951da177e4SLinus Torvalds } 20961da177e4SLinus Torvalds } else { 20971da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 20984a7edddcSDave Chinner ifp->if_u1.if_data = kmem_alloc(real_size, 20994a7edddcSDave Chinner KM_SLEEP | KM_NOFS); 21001da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 21011da177e4SLinus Torvalds ifp->if_bytes); 21021da177e4SLinus Torvalds } 21031da177e4SLinus Torvalds } 21041da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 21051da177e4SLinus Torvalds ifp->if_bytes = new_size; 21061da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 21071da177e4SLinus Torvalds } 21081da177e4SLinus Torvalds 21091da177e4SLinus Torvalds void 21101da177e4SLinus Torvalds xfs_idestroy_fork( 21111da177e4SLinus Torvalds xfs_inode_t *ip, 21121da177e4SLinus Torvalds int whichfork) 21131da177e4SLinus Torvalds { 21141da177e4SLinus Torvalds xfs_ifork_t *ifp; 21151da177e4SLinus Torvalds 21161da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 21171da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 2118f0e2d93cSDenys Vlasenko kmem_free(ifp->if_broot); 21191da177e4SLinus Torvalds ifp->if_broot = NULL; 21201da177e4SLinus Torvalds } 21211da177e4SLinus Torvalds 21221da177e4SLinus Torvalds /* 21231da177e4SLinus Torvalds * If the format is local, then we can't have an extents 21241da177e4SLinus Torvalds * array so just look for an inline data array. If we're 21251da177e4SLinus Torvalds * not local then we may or may not have an extents list, 21261da177e4SLinus Torvalds * so check and free it up if we do. 21271da177e4SLinus Torvalds */ 21281da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 21291da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 21301da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 21311da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 2132f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_data); 21331da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 21341da177e4SLinus Torvalds ifp->if_real_bytes = 0; 21351da177e4SLinus Torvalds } 21361da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 21370293ce3aSMandy Kirkconnell ((ifp->if_flags & XFS_IFEXTIREC) || 21380293ce3aSMandy Kirkconnell ((ifp->if_u1.if_extents != NULL) && 21390293ce3aSMandy Kirkconnell (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) { 21401da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 21414eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 21421da177e4SLinus Torvalds } 21431da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 21441da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 21451da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 21461da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 21471da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 21481da177e4SLinus Torvalds ip->i_afp = NULL; 21491da177e4SLinus Torvalds } 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds 21521da177e4SLinus Torvalds /* 215360ec6783SChristoph Hellwig * This is called to unpin an inode. The caller must have the inode locked 215460ec6783SChristoph Hellwig * in at least shared mode so that the buffer cannot be subsequently pinned 215560ec6783SChristoph Hellwig * once someone is waiting for it to be unpinned. 21561da177e4SLinus Torvalds */ 215760ec6783SChristoph Hellwig static void 215860ec6783SChristoph Hellwig xfs_iunpin_nowait( 215960ec6783SChristoph Hellwig struct xfs_inode *ip) 2160a3f74ffbSDavid Chinner { 2161579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2162a3f74ffbSDavid Chinner 21634aaf15d1SDave Chinner trace_xfs_inode_unpin_nowait(ip, _RET_IP_); 21644aaf15d1SDave Chinner 2165a3f74ffbSDavid Chinner /* Give the log a push to start the unpinning I/O */ 216660ec6783SChristoph Hellwig xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2167a14a348bSChristoph Hellwig 2168a3f74ffbSDavid Chinner } 2169a3f74ffbSDavid Chinner 2170777df5afSDave Chinner void 21711da177e4SLinus Torvalds xfs_iunpin_wait( 217260ec6783SChristoph Hellwig struct xfs_inode *ip) 21731da177e4SLinus Torvalds { 217460ec6783SChristoph Hellwig if (xfs_ipincount(ip)) { 217560ec6783SChristoph Hellwig xfs_iunpin_nowait(ip); 217660ec6783SChristoph Hellwig wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0)); 21771da177e4SLinus Torvalds } 21781da177e4SLinus Torvalds } 21791da177e4SLinus Torvalds 21801da177e4SLinus Torvalds /* 21811da177e4SLinus Torvalds * xfs_iextents_copy() 21821da177e4SLinus Torvalds * 21831da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 21841da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 21851da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 21861da177e4SLinus Torvalds * 21871da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 21881da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 21891da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 21901da177e4SLinus Torvalds */ 21911da177e4SLinus Torvalds int 21921da177e4SLinus Torvalds xfs_iextents_copy( 21931da177e4SLinus Torvalds xfs_inode_t *ip, 2194a6f64d4aSChristoph Hellwig xfs_bmbt_rec_t *dp, 21951da177e4SLinus Torvalds int whichfork) 21961da177e4SLinus Torvalds { 21971da177e4SLinus Torvalds int copied; 21981da177e4SLinus Torvalds int i; 21991da177e4SLinus Torvalds xfs_ifork_t *ifp; 22001da177e4SLinus Torvalds int nrecs; 22011da177e4SLinus Torvalds xfs_fsblock_t start_block; 22021da177e4SLinus Torvalds 22031da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 2204579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 22051da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 22061da177e4SLinus Torvalds 22071da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 22083a59c94cSEric Sandeen XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork); 22091da177e4SLinus Torvalds ASSERT(nrecs > 0); 22101da177e4SLinus Torvalds 22111da177e4SLinus Torvalds /* 22121da177e4SLinus Torvalds * There are some delayed allocation extents in the 22131da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 22141da177e4SLinus Torvalds * the delayed ones. There must be at least one 22151da177e4SLinus Torvalds * non-delayed extent. 22161da177e4SLinus Torvalds */ 22171da177e4SLinus Torvalds copied = 0; 22181da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 2219a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 22201da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 22219d87c319SEric Sandeen if (isnullstartblock(start_block)) { 22221da177e4SLinus Torvalds /* 22231da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 22241da177e4SLinus Torvalds */ 22251da177e4SLinus Torvalds continue; 22261da177e4SLinus Torvalds } 22271da177e4SLinus Torvalds 22281da177e4SLinus Torvalds /* Translate to on disk format */ 2229cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l0), &dp->l0); 2230cd8b0a97SChristoph Hellwig put_unaligned(cpu_to_be64(ep->l1), &dp->l1); 2231a6f64d4aSChristoph Hellwig dp++; 22321da177e4SLinus Torvalds copied++; 22331da177e4SLinus Torvalds } 22341da177e4SLinus Torvalds ASSERT(copied != 0); 2235a6f64d4aSChristoph Hellwig xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); 22361da177e4SLinus Torvalds 22371da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 22381da177e4SLinus Torvalds } 22391da177e4SLinus Torvalds 22401da177e4SLinus Torvalds /* 22411da177e4SLinus Torvalds * Each of the following cases stores data into the same region 22421da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 22431da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 22441da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 22451da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 22461da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 22471da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 22481da177e4SLinus Torvalds * format indicates the current state of the fork. 22491da177e4SLinus Torvalds */ 22501da177e4SLinus Torvalds /*ARGSUSED*/ 2251e4ac967bSDavid Chinner STATIC void 22521da177e4SLinus Torvalds xfs_iflush_fork( 22531da177e4SLinus Torvalds xfs_inode_t *ip, 22541da177e4SLinus Torvalds xfs_dinode_t *dip, 22551da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 22561da177e4SLinus Torvalds int whichfork, 22571da177e4SLinus Torvalds xfs_buf_t *bp) 22581da177e4SLinus Torvalds { 22591da177e4SLinus Torvalds char *cp; 22601da177e4SLinus Torvalds xfs_ifork_t *ifp; 22611da177e4SLinus Torvalds xfs_mount_t *mp; 22621da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 22631da177e4SLinus Torvalds int first; 22641da177e4SLinus Torvalds #endif 22651da177e4SLinus Torvalds static const short brootflag[2] = 22661da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 22671da177e4SLinus Torvalds static const short dataflag[2] = 22681da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 22691da177e4SLinus Torvalds static const short extflag[2] = 22701da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 22711da177e4SLinus Torvalds 2272e4ac967bSDavid Chinner if (!iip) 2273e4ac967bSDavid Chinner return; 22741da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 22751da177e4SLinus Torvalds /* 22761da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 22771da177e4SLinus Torvalds * for the attribute fork. 22781da177e4SLinus Torvalds */ 2279e4ac967bSDavid Chinner if (!ifp) { 22801da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 2281e4ac967bSDavid Chinner return; 22821da177e4SLinus Torvalds } 22831da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 22841da177e4SLinus Torvalds mp = ip->i_mount; 22851da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 22861da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 22871da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 22881da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 22891da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 22901da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 22911da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 22921da177e4SLinus Torvalds } 22931da177e4SLinus Torvalds break; 22941da177e4SLinus Torvalds 22951da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 22961da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 22971da177e4SLinus Torvalds !(iip->ili_format.ilf_fields & extflag[whichfork])); 22981da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 22991da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 2300ab1908a5SChristoph Hellwig ASSERT(xfs_iext_get_ext(ifp, 0)); 23011da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 23021da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 23031da177e4SLinus Torvalds whichfork); 23041da177e4SLinus Torvalds } 23051da177e4SLinus Torvalds break; 23061da177e4SLinus Torvalds 23071da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 23081da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 23091da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 23101da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 23111da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 23121da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 23131da177e4SLinus Torvalds XFS_BROOT_SIZE_ADJ)); 231460197e8dSChristoph Hellwig xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, 23151da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 23161da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 23171da177e4SLinus Torvalds } 23181da177e4SLinus Torvalds break; 23191da177e4SLinus Torvalds 23201da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 23211da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 23221da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 232381591fe2SChristoph Hellwig xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); 23241da177e4SLinus Torvalds } 23251da177e4SLinus Torvalds break; 23261da177e4SLinus Torvalds 23271da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 23281da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 23291da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 233081591fe2SChristoph Hellwig memcpy(XFS_DFORK_DPTR(dip), 233181591fe2SChristoph Hellwig &ip->i_df.if_u2.if_uuid, 23321da177e4SLinus Torvalds sizeof(uuid_t)); 23331da177e4SLinus Torvalds } 23341da177e4SLinus Torvalds break; 23351da177e4SLinus Torvalds 23361da177e4SLinus Torvalds default: 23371da177e4SLinus Torvalds ASSERT(0); 23381da177e4SLinus Torvalds break; 23391da177e4SLinus Torvalds } 23401da177e4SLinus Torvalds } 23411da177e4SLinus Torvalds 2342bad55843SDavid Chinner STATIC int 2343bad55843SDavid Chinner xfs_iflush_cluster( 2344bad55843SDavid Chinner xfs_inode_t *ip, 2345bad55843SDavid Chinner xfs_buf_t *bp) 2346bad55843SDavid Chinner { 2347bad55843SDavid Chinner xfs_mount_t *mp = ip->i_mount; 23485017e97dSDave Chinner struct xfs_perag *pag; 2349bad55843SDavid Chinner unsigned long first_index, mask; 2350c8f5f12eSDavid Chinner unsigned long inodes_per_cluster; 2351bad55843SDavid Chinner int ilist_size; 2352bad55843SDavid Chinner xfs_inode_t **ilist; 2353bad55843SDavid Chinner xfs_inode_t *iq; 2354bad55843SDavid Chinner int nr_found; 2355bad55843SDavid Chinner int clcount = 0; 2356bad55843SDavid Chinner int bufwasdelwri; 2357bad55843SDavid Chinner int i; 2358bad55843SDavid Chinner 23595017e97dSDave Chinner pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 2360bad55843SDavid Chinner 2361c8f5f12eSDavid Chinner inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; 2362c8f5f12eSDavid Chinner ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 236349383b0eSDavid Chinner ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 2364bad55843SDavid Chinner if (!ilist) 236544b56e0aSDave Chinner goto out_put; 2366bad55843SDavid Chinner 2367bad55843SDavid Chinner mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 2368bad55843SDavid Chinner first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 23691a3e8f3dSDave Chinner rcu_read_lock(); 2370bad55843SDavid Chinner /* really need a gang lookup range call here */ 2371bad55843SDavid Chinner nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2372c8f5f12eSDavid Chinner first_index, inodes_per_cluster); 2373bad55843SDavid Chinner if (nr_found == 0) 2374bad55843SDavid Chinner goto out_free; 2375bad55843SDavid Chinner 2376bad55843SDavid Chinner for (i = 0; i < nr_found; i++) { 2377bad55843SDavid Chinner iq = ilist[i]; 2378bad55843SDavid Chinner if (iq == ip) 2379bad55843SDavid Chinner continue; 23801a3e8f3dSDave Chinner 23811a3e8f3dSDave Chinner /* 23821a3e8f3dSDave Chinner * because this is an RCU protected lookup, we could find a 23831a3e8f3dSDave Chinner * recently freed or even reallocated inode during the lookup. 23841a3e8f3dSDave Chinner * We need to check under the i_flags_lock for a valid inode 23851a3e8f3dSDave Chinner * here. Skip it if it is not valid or the wrong inode. 23861a3e8f3dSDave Chinner */ 23871a3e8f3dSDave Chinner spin_lock(&ip->i_flags_lock); 23881a3e8f3dSDave Chinner if (!ip->i_ino || 23891a3e8f3dSDave Chinner (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 23901a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 23911a3e8f3dSDave Chinner continue; 23921a3e8f3dSDave Chinner } 23931a3e8f3dSDave Chinner spin_unlock(&ip->i_flags_lock); 23941a3e8f3dSDave Chinner 2395bad55843SDavid Chinner /* 2396bad55843SDavid Chinner * Do an un-protected check to see if the inode is dirty and 2397bad55843SDavid Chinner * is a candidate for flushing. These checks will be repeated 2398bad55843SDavid Chinner * later after the appropriate locks are acquired. 2399bad55843SDavid Chinner */ 240033540408SDavid Chinner if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 2401bad55843SDavid Chinner continue; 2402bad55843SDavid Chinner 2403bad55843SDavid Chinner /* 2404bad55843SDavid Chinner * Try to get locks. If any are unavailable or it is pinned, 2405bad55843SDavid Chinner * then this inode cannot be flushed and is skipped. 2406bad55843SDavid Chinner */ 2407bad55843SDavid Chinner 2408bad55843SDavid Chinner if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 2409bad55843SDavid Chinner continue; 2410bad55843SDavid Chinner if (!xfs_iflock_nowait(iq)) { 2411bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2412bad55843SDavid Chinner continue; 2413bad55843SDavid Chinner } 2414bad55843SDavid Chinner if (xfs_ipincount(iq)) { 2415bad55843SDavid Chinner xfs_ifunlock(iq); 2416bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2417bad55843SDavid Chinner continue; 2418bad55843SDavid Chinner } 2419bad55843SDavid Chinner 2420bad55843SDavid Chinner /* 2421bad55843SDavid Chinner * arriving here means that this inode can be flushed. First 2422bad55843SDavid Chinner * re-check that it's dirty before flushing. 2423bad55843SDavid Chinner */ 242433540408SDavid Chinner if (!xfs_inode_clean(iq)) { 2425bad55843SDavid Chinner int error; 2426bad55843SDavid Chinner error = xfs_iflush_int(iq, bp); 2427bad55843SDavid Chinner if (error) { 2428bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2429bad55843SDavid Chinner goto cluster_corrupt_out; 2430bad55843SDavid Chinner } 2431bad55843SDavid Chinner clcount++; 2432bad55843SDavid Chinner } else { 2433bad55843SDavid Chinner xfs_ifunlock(iq); 2434bad55843SDavid Chinner } 2435bad55843SDavid Chinner xfs_iunlock(iq, XFS_ILOCK_SHARED); 2436bad55843SDavid Chinner } 2437bad55843SDavid Chinner 2438bad55843SDavid Chinner if (clcount) { 2439bad55843SDavid Chinner XFS_STATS_INC(xs_icluster_flushcnt); 2440bad55843SDavid Chinner XFS_STATS_ADD(xs_icluster_flushinode, clcount); 2441bad55843SDavid Chinner } 2442bad55843SDavid Chinner 2443bad55843SDavid Chinner out_free: 24441a3e8f3dSDave Chinner rcu_read_unlock(); 2445f0e2d93cSDenys Vlasenko kmem_free(ilist); 244644b56e0aSDave Chinner out_put: 244744b56e0aSDave Chinner xfs_perag_put(pag); 2448bad55843SDavid Chinner return 0; 2449bad55843SDavid Chinner 2450bad55843SDavid Chinner 2451bad55843SDavid Chinner cluster_corrupt_out: 2452bad55843SDavid Chinner /* 2453bad55843SDavid Chinner * Corruption detected in the clustering loop. Invalidate the 2454bad55843SDavid Chinner * inode buffer and shut down the filesystem. 2455bad55843SDavid Chinner */ 24561a3e8f3dSDave Chinner rcu_read_unlock(); 2457bad55843SDavid Chinner /* 2458bad55843SDavid Chinner * Clean up the buffer. If it was B_DELWRI, just release it -- 2459bad55843SDavid Chinner * brelse can handle it with no problems. If not, shut down the 2460bad55843SDavid Chinner * filesystem before releasing the buffer. 2461bad55843SDavid Chinner */ 2462bad55843SDavid Chinner bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); 2463bad55843SDavid Chinner if (bufwasdelwri) 2464bad55843SDavid Chinner xfs_buf_relse(bp); 2465bad55843SDavid Chinner 2466bad55843SDavid Chinner xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2467bad55843SDavid Chinner 2468bad55843SDavid Chinner if (!bufwasdelwri) { 2469bad55843SDavid Chinner /* 2470bad55843SDavid Chinner * Just like incore_relse: if we have b_iodone functions, 2471bad55843SDavid Chinner * mark the buffer as an error and call them. Otherwise 2472bad55843SDavid Chinner * mark it as stale and brelse. 2473bad55843SDavid Chinner */ 2474bad55843SDavid Chinner if (XFS_BUF_IODONE_FUNC(bp)) { 2475bad55843SDavid Chinner XFS_BUF_UNDONE(bp); 2476bad55843SDavid Chinner XFS_BUF_STALE(bp); 2477bad55843SDavid Chinner XFS_BUF_ERROR(bp,EIO); 24781a1a3e97SChristoph Hellwig xfs_buf_ioend(bp, 0); 2479bad55843SDavid Chinner } else { 2480bad55843SDavid Chinner XFS_BUF_STALE(bp); 2481bad55843SDavid Chinner xfs_buf_relse(bp); 2482bad55843SDavid Chinner } 2483bad55843SDavid Chinner } 2484bad55843SDavid Chinner 2485bad55843SDavid Chinner /* 2486bad55843SDavid Chinner * Unlocks the flush lock 2487bad55843SDavid Chinner */ 2488bad55843SDavid Chinner xfs_iflush_abort(iq); 2489f0e2d93cSDenys Vlasenko kmem_free(ilist); 249044b56e0aSDave Chinner xfs_perag_put(pag); 2491bad55843SDavid Chinner return XFS_ERROR(EFSCORRUPTED); 2492bad55843SDavid Chinner } 2493bad55843SDavid Chinner 24941da177e4SLinus Torvalds /* 24951da177e4SLinus Torvalds * xfs_iflush() will write a modified inode's changes out to the 24961da177e4SLinus Torvalds * inode's on disk home. The caller must have the inode lock held 2497c63942d3SDavid Chinner * in at least shared mode and the inode flush completion must be 2498c63942d3SDavid Chinner * active as well. The inode lock will still be held upon return from 24991da177e4SLinus Torvalds * the call and the caller is free to unlock it. 2500c63942d3SDavid Chinner * The inode flush will be completed when the inode reaches the disk. 25011da177e4SLinus Torvalds * The flags indicate how the inode's buffer should be written out. 25021da177e4SLinus Torvalds */ 25031da177e4SLinus Torvalds int 25041da177e4SLinus Torvalds xfs_iflush( 25051da177e4SLinus Torvalds xfs_inode_t *ip, 25061da177e4SLinus Torvalds uint flags) 25071da177e4SLinus Torvalds { 25081da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 25091da177e4SLinus Torvalds xfs_buf_t *bp; 25101da177e4SLinus Torvalds xfs_dinode_t *dip; 25111da177e4SLinus Torvalds xfs_mount_t *mp; 25121da177e4SLinus Torvalds int error; 25131da177e4SLinus Torvalds 25141da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 25151da177e4SLinus Torvalds 2516579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2517c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 25181da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 25191da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 25201da177e4SLinus Torvalds 25211da177e4SLinus Torvalds iip = ip->i_itemp; 25221da177e4SLinus Torvalds mp = ip->i_mount; 25231da177e4SLinus Torvalds 25241da177e4SLinus Torvalds /* 2525a3f74ffbSDavid Chinner * We can't flush the inode until it is unpinned, so wait for it if we 2526a3f74ffbSDavid Chinner * are allowed to block. We know no one new can pin it, because we are 2527a3f74ffbSDavid Chinner * holding the inode lock shared and you need to hold it exclusively to 2528a3f74ffbSDavid Chinner * pin the inode. 2529a3f74ffbSDavid Chinner * 2530a3f74ffbSDavid Chinner * If we are not allowed to block, force the log out asynchronously so 2531a3f74ffbSDavid Chinner * that when we come back the inode will be unpinned. If other inodes 2532a3f74ffbSDavid Chinner * in the same cluster are dirty, they will probably write the inode 2533a3f74ffbSDavid Chinner * out for us if they occur after the log force completes. 25341da177e4SLinus Torvalds */ 2535c854363eSDave Chinner if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { 2536a3f74ffbSDavid Chinner xfs_iunpin_nowait(ip); 2537a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2538a3f74ffbSDavid Chinner return EAGAIN; 2539a3f74ffbSDavid Chinner } 25401da177e4SLinus Torvalds xfs_iunpin_wait(ip); 25411da177e4SLinus Torvalds 25421da177e4SLinus Torvalds /* 25434b6a4688SDave Chinner * For stale inodes we cannot rely on the backing buffer remaining 25444b6a4688SDave Chinner * stale in cache for the remaining life of the stale inode and so 25454b6a4688SDave Chinner * xfs_itobp() below may give us a buffer that no longer contains 25464b6a4688SDave Chinner * inodes below. We have to check this after ensuring the inode is 25474b6a4688SDave Chinner * unpinned so that it is safe to reclaim the stale inode after the 25484b6a4688SDave Chinner * flush call. 25494b6a4688SDave Chinner */ 25504b6a4688SDave Chinner if (xfs_iflags_test(ip, XFS_ISTALE)) { 25514b6a4688SDave Chinner xfs_ifunlock(ip); 25524b6a4688SDave Chinner return 0; 25534b6a4688SDave Chinner } 25544b6a4688SDave Chinner 25554b6a4688SDave Chinner /* 25561da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 25571da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 25581da177e4SLinus Torvalds * to disk, because the log record didn't make it to disk! 25591da177e4SLinus Torvalds */ 25601da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 25611da177e4SLinus Torvalds ip->i_update_core = 0; 25621da177e4SLinus Torvalds if (iip) 25631da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 25641da177e4SLinus Torvalds xfs_ifunlock(ip); 25651da177e4SLinus Torvalds return XFS_ERROR(EIO); 25661da177e4SLinus Torvalds } 25671da177e4SLinus Torvalds 25681da177e4SLinus Torvalds /* 2569a3f74ffbSDavid Chinner * Get the buffer containing the on-disk inode. 2570a3f74ffbSDavid Chinner */ 257176d8b277SChristoph Hellwig error = xfs_itobp(mp, NULL, ip, &dip, &bp, 25721bfd8d04SDave Chinner (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); 2573a3f74ffbSDavid Chinner if (error || !bp) { 2574a3f74ffbSDavid Chinner xfs_ifunlock(ip); 2575a3f74ffbSDavid Chinner return error; 2576a3f74ffbSDavid Chinner } 2577a3f74ffbSDavid Chinner 2578a3f74ffbSDavid Chinner /* 25791da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 25801da177e4SLinus Torvalds */ 25811da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 2582bad55843SDavid Chinner if (error) 25831da177e4SLinus Torvalds goto corrupt_out; 25841da177e4SLinus Torvalds 25851da177e4SLinus Torvalds /* 2586a3f74ffbSDavid Chinner * If the buffer is pinned then push on the log now so we won't 2587a3f74ffbSDavid Chinner * get stuck waiting in the write for too long. 2588a3f74ffbSDavid Chinner */ 2589a3f74ffbSDavid Chinner if (XFS_BUF_ISPINNED(bp)) 2590a14a348bSChristoph Hellwig xfs_log_force(mp, 0); 2591a3f74ffbSDavid Chinner 2592a3f74ffbSDavid Chinner /* 25931da177e4SLinus Torvalds * inode clustering: 25941da177e4SLinus Torvalds * see if other inodes can be gathered into this write 25951da177e4SLinus Torvalds */ 2596bad55843SDavid Chinner error = xfs_iflush_cluster(ip, bp); 2597bad55843SDavid Chinner if (error) 25981da177e4SLinus Torvalds goto cluster_corrupt_out; 25991da177e4SLinus Torvalds 2600c854363eSDave Chinner if (flags & SYNC_WAIT) 26011da177e4SLinus Torvalds error = xfs_bwrite(mp, bp); 2602c854363eSDave Chinner else 2603c854363eSDave Chinner xfs_bdwrite(mp, bp); 26041da177e4SLinus Torvalds return error; 26051da177e4SLinus Torvalds 26061da177e4SLinus Torvalds corrupt_out: 26071da177e4SLinus Torvalds xfs_buf_relse(bp); 26087d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 26091da177e4SLinus Torvalds cluster_corrupt_out: 26101da177e4SLinus Torvalds /* 26111da177e4SLinus Torvalds * Unlocks the flush lock 26121da177e4SLinus Torvalds */ 2613bad55843SDavid Chinner xfs_iflush_abort(ip); 26141da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 26151da177e4SLinus Torvalds } 26161da177e4SLinus Torvalds 26171da177e4SLinus Torvalds 26181da177e4SLinus Torvalds STATIC int 26191da177e4SLinus Torvalds xfs_iflush_int( 26201da177e4SLinus Torvalds xfs_inode_t *ip, 26211da177e4SLinus Torvalds xfs_buf_t *bp) 26221da177e4SLinus Torvalds { 26231da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 26241da177e4SLinus Torvalds xfs_dinode_t *dip; 26251da177e4SLinus Torvalds xfs_mount_t *mp; 26261da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 26271da177e4SLinus Torvalds int first; 26281da177e4SLinus Torvalds #endif 26291da177e4SLinus Torvalds 2630579aa9caSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2631c63942d3SDavid Chinner ASSERT(!completion_done(&ip->i_flush)); 26321da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 26331da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 26341da177e4SLinus Torvalds 26351da177e4SLinus Torvalds iip = ip->i_itemp; 26361da177e4SLinus Torvalds mp = ip->i_mount; 26371da177e4SLinus Torvalds 26381da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 263992bfc6e7SChristoph Hellwig dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 26401da177e4SLinus Torvalds 26411da177e4SLinus Torvalds /* 26421da177e4SLinus Torvalds * Clear i_update_core before copying out the data. 26431da177e4SLinus Torvalds * This is for coordination with our timestamp updates 26441da177e4SLinus Torvalds * that don't hold the inode lock. They will always 26451da177e4SLinus Torvalds * update the timestamps BEFORE setting i_update_core, 26461da177e4SLinus Torvalds * so if we clear i_update_core after they set it we 26471da177e4SLinus Torvalds * are guaranteed to see their updates to the timestamps. 26481da177e4SLinus Torvalds * I believe that this depends on strongly ordered memory 26491da177e4SLinus Torvalds * semantics, but we have that. We use the SYNCHRONIZE 26501da177e4SLinus Torvalds * macro to make sure that the compiler does not reorder 26511da177e4SLinus Torvalds * the i_update_core access below the data copy below. 26521da177e4SLinus Torvalds */ 26531da177e4SLinus Torvalds ip->i_update_core = 0; 26541da177e4SLinus Torvalds SYNCHRONIZE(); 26551da177e4SLinus Torvalds 265642fe2b1fSChristoph Hellwig /* 2657f9581b14SChristoph Hellwig * Make sure to get the latest timestamps from the Linux inode. 265842fe2b1fSChristoph Hellwig */ 2659f9581b14SChristoph Hellwig xfs_synchronize_times(ip); 266042fe2b1fSChristoph Hellwig 2661*69ef921bSChristoph Hellwig if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 26621da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 26636a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 26646a19d939SDave Chinner "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 26656a19d939SDave Chinner __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); 26661da177e4SLinus Torvalds goto corrupt_out; 26671da177e4SLinus Torvalds } 26681da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 26691da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 26706a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 26716a19d939SDave Chinner "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 26726a19d939SDave Chinner __func__, ip->i_ino, ip, ip->i_d.di_magic); 26731da177e4SLinus Torvalds goto corrupt_out; 26741da177e4SLinus Torvalds } 26751da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 26761da177e4SLinus Torvalds if (XFS_TEST_ERROR( 26771da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 26781da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 26791da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 26806a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 26816a19d939SDave Chinner "%s: Bad regular inode %Lu, ptr 0x%p", 26826a19d939SDave Chinner __func__, ip->i_ino, ip); 26831da177e4SLinus Torvalds goto corrupt_out; 26841da177e4SLinus Torvalds } 26851da177e4SLinus Torvalds } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 26861da177e4SLinus Torvalds if (XFS_TEST_ERROR( 26871da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 26881da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 26891da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 26901da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 26916a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 26926a19d939SDave Chinner "%s: Bad directory inode %Lu, ptr 0x%p", 26936a19d939SDave Chinner __func__, ip->i_ino, ip); 26941da177e4SLinus Torvalds goto corrupt_out; 26951da177e4SLinus Torvalds } 26961da177e4SLinus Torvalds } 26971da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 26981da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 26991da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 27006a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27016a19d939SDave Chinner "%s: detected corrupt incore inode %Lu, " 27026a19d939SDave Chinner "total extents = %d, nblocks = %Ld, ptr 0x%p", 27036a19d939SDave Chinner __func__, ip->i_ino, 27041da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 27056a19d939SDave Chinner ip->i_d.di_nblocks, ip); 27061da177e4SLinus Torvalds goto corrupt_out; 27071da177e4SLinus Torvalds } 27081da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 27091da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 27106a19d939SDave Chinner xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 27116a19d939SDave Chinner "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 27126a19d939SDave Chinner __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 27131da177e4SLinus Torvalds goto corrupt_out; 27141da177e4SLinus Torvalds } 27151da177e4SLinus Torvalds /* 27161da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 27171da177e4SLinus Torvalds * postdate a log record during recovery. 27181da177e4SLinus Torvalds */ 27191da177e4SLinus Torvalds 27201da177e4SLinus Torvalds ip->i_d.di_flushiter++; 27211da177e4SLinus Torvalds 27221da177e4SLinus Torvalds /* 27231da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 27241da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 27251da177e4SLinus Torvalds * because if the inode is dirty at all the core must 27261da177e4SLinus Torvalds * be. 27271da177e4SLinus Torvalds */ 272881591fe2SChristoph Hellwig xfs_dinode_to_disk(dip, &ip->i_d); 27291da177e4SLinus Torvalds 27301da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 27311da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 27321da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 27331da177e4SLinus Torvalds 27341da177e4SLinus Torvalds /* 27351da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 27361da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 27371da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 27381da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 27391da177e4SLinus Torvalds */ 274051ce16d5SChristoph Hellwig ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); 274151ce16d5SChristoph Hellwig if (ip->i_d.di_version == 1) { 274262118709SEric Sandeen if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 27431da177e4SLinus Torvalds /* 27441da177e4SLinus Torvalds * Convert it back. 27451da177e4SLinus Torvalds */ 27461da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 274781591fe2SChristoph Hellwig dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); 27481da177e4SLinus Torvalds } else { 27491da177e4SLinus Torvalds /* 27501da177e4SLinus Torvalds * The superblock version has already been bumped, 27511da177e4SLinus Torvalds * so just make the conversion to the new inode 27521da177e4SLinus Torvalds * format permanent. 27531da177e4SLinus Torvalds */ 275451ce16d5SChristoph Hellwig ip->i_d.di_version = 2; 275551ce16d5SChristoph Hellwig dip->di_version = 2; 27561da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 275781591fe2SChristoph Hellwig dip->di_onlink = 0; 27581da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 275981591fe2SChristoph Hellwig memset(&(dip->di_pad[0]), 0, 276081591fe2SChristoph Hellwig sizeof(dip->di_pad)); 27616743099cSArkadiusz Mi?kiewicz ASSERT(xfs_get_projid(ip) == 0); 27621da177e4SLinus Torvalds } 27631da177e4SLinus Torvalds } 27641da177e4SLinus Torvalds 2765e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); 2766e4ac967bSDavid Chinner if (XFS_IFORK_Q(ip)) 2767e4ac967bSDavid Chinner xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 27681da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 27691da177e4SLinus Torvalds 27701da177e4SLinus Torvalds /* 27711da177e4SLinus Torvalds * We've recorded everything logged in the inode, so we'd 27721da177e4SLinus Torvalds * like to clear the ilf_fields bits so we don't log and 27731da177e4SLinus Torvalds * flush things unnecessarily. However, we can't stop 27741da177e4SLinus Torvalds * logging all this information until the data we've copied 27751da177e4SLinus Torvalds * into the disk buffer is written to disk. If we did we might 27761da177e4SLinus Torvalds * overwrite the copy of the inode in the log with all the 27771da177e4SLinus Torvalds * data after re-logging only part of it, and in the face of 27781da177e4SLinus Torvalds * a crash we wouldn't have all the data we need to recover. 27791da177e4SLinus Torvalds * 27801da177e4SLinus Torvalds * What we do is move the bits to the ili_last_fields field. 27811da177e4SLinus Torvalds * When logging the inode, these bits are moved back to the 27821da177e4SLinus Torvalds * ilf_fields field. In the xfs_iflush_done() routine we 27831da177e4SLinus Torvalds * clear ili_last_fields, since we know that the information 27841da177e4SLinus Torvalds * those bits represent is permanently on disk. As long as 27851da177e4SLinus Torvalds * the flush completes before the inode is logged again, then 27861da177e4SLinus Torvalds * both ilf_fields and ili_last_fields will be cleared. 27871da177e4SLinus Torvalds * 27881da177e4SLinus Torvalds * We can play with the ilf_fields bits here, because the inode 27891da177e4SLinus Torvalds * lock must be held exclusively in order to set bits there 27901da177e4SLinus Torvalds * and the flush lock protects the ili_last_fields bits. 27911da177e4SLinus Torvalds * Set ili_logged so the flush done 27921da177e4SLinus Torvalds * routine can tell whether or not to look in the AIL. 27931da177e4SLinus Torvalds * Also, store the current LSN of the inode so that we can tell 27941da177e4SLinus Torvalds * whether the item has moved in the AIL from xfs_iflush_done(). 27951da177e4SLinus Torvalds * In order to read the lsn we need the AIL lock, because 27961da177e4SLinus Torvalds * it is a 64 bit value that cannot be read atomically. 27971da177e4SLinus Torvalds */ 27981da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 27991da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 28001da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 28011da177e4SLinus Torvalds iip->ili_logged = 1; 28021da177e4SLinus Torvalds 28037b2e2a31SDavid Chinner xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 28047b2e2a31SDavid Chinner &iip->ili_item.li_lsn); 28051da177e4SLinus Torvalds 28061da177e4SLinus Torvalds /* 28071da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 28081da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 28091da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 28101da177e4SLinus Torvalds * completely written to disk. 28111da177e4SLinus Torvalds */ 2812ca30b2a7SChristoph Hellwig xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 28131da177e4SLinus Torvalds 28141da177e4SLinus Torvalds ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 28151da177e4SLinus Torvalds ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 28161da177e4SLinus Torvalds } else { 28171da177e4SLinus Torvalds /* 28181da177e4SLinus Torvalds * We're flushing an inode which is not in the AIL and has 28191da177e4SLinus Torvalds * not been logged but has i_update_core set. For this 28201da177e4SLinus Torvalds * case we can use a B_DELWRI flush and immediately drop 28211da177e4SLinus Torvalds * the inode flush lock because we can avoid the whole 28221da177e4SLinus Torvalds * AIL state thing. It's OK to drop the flush lock now, 28231da177e4SLinus Torvalds * because we've already locked the buffer and to do anything 28241da177e4SLinus Torvalds * you really need both. 28251da177e4SLinus Torvalds */ 28261da177e4SLinus Torvalds if (iip != NULL) { 28271da177e4SLinus Torvalds ASSERT(iip->ili_logged == 0); 28281da177e4SLinus Torvalds ASSERT(iip->ili_last_fields == 0); 28291da177e4SLinus Torvalds ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 28301da177e4SLinus Torvalds } 28311da177e4SLinus Torvalds xfs_ifunlock(ip); 28321da177e4SLinus Torvalds } 28331da177e4SLinus Torvalds 28341da177e4SLinus Torvalds return 0; 28351da177e4SLinus Torvalds 28361da177e4SLinus Torvalds corrupt_out: 28371da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 28381da177e4SLinus Torvalds } 28391da177e4SLinus Torvalds 28404eea22f0SMandy Kirkconnell /* 28414eea22f0SMandy Kirkconnell * Return a pointer to the extent record at file index idx. 28424eea22f0SMandy Kirkconnell */ 2843a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * 28444eea22f0SMandy Kirkconnell xfs_iext_get_ext( 28454eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 28464eea22f0SMandy Kirkconnell xfs_extnum_t idx) /* index of target extent */ 28474eea22f0SMandy Kirkconnell { 28484eea22f0SMandy Kirkconnell ASSERT(idx >= 0); 284987bef181SChristoph Hellwig ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 285087bef181SChristoph Hellwig 28510293ce3aSMandy Kirkconnell if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { 28520293ce3aSMandy Kirkconnell return ifp->if_u1.if_ext_irec->er_extbuf; 28530293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 28540293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* irec pointer */ 28550293ce3aSMandy Kirkconnell int erp_idx = 0; /* irec index */ 28560293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = idx; /* ext index in target list */ 28570293ce3aSMandy Kirkconnell 28580293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 28590293ce3aSMandy Kirkconnell return &erp->er_extbuf[page_idx]; 28600293ce3aSMandy Kirkconnell } else if (ifp->if_bytes) { 28614eea22f0SMandy Kirkconnell return &ifp->if_u1.if_extents[idx]; 28624eea22f0SMandy Kirkconnell } else { 28634eea22f0SMandy Kirkconnell return NULL; 28644eea22f0SMandy Kirkconnell } 28654eea22f0SMandy Kirkconnell } 28664eea22f0SMandy Kirkconnell 28674eea22f0SMandy Kirkconnell /* 28684eea22f0SMandy Kirkconnell * Insert new item(s) into the extent records for incore inode 28694eea22f0SMandy Kirkconnell * fork 'ifp'. 'count' new items are inserted at index 'idx'. 28704eea22f0SMandy Kirkconnell */ 28714eea22f0SMandy Kirkconnell void 28724eea22f0SMandy Kirkconnell xfs_iext_insert( 28736ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 28744eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* starting index of new items */ 28754eea22f0SMandy Kirkconnell xfs_extnum_t count, /* number of inserted items */ 28766ef35544SChristoph Hellwig xfs_bmbt_irec_t *new, /* items to insert */ 28776ef35544SChristoph Hellwig int state) /* type of extent conversion */ 28784eea22f0SMandy Kirkconnell { 28796ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 28804eea22f0SMandy Kirkconnell xfs_extnum_t i; /* extent record index */ 28814eea22f0SMandy Kirkconnell 28820b1b213fSChristoph Hellwig trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_); 28830b1b213fSChristoph Hellwig 28844eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 28854eea22f0SMandy Kirkconnell xfs_iext_add(ifp, idx, count); 2886a6f64d4aSChristoph Hellwig for (i = idx; i < idx + count; i++, new++) 2887a6f64d4aSChristoph Hellwig xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new); 28884eea22f0SMandy Kirkconnell } 28894eea22f0SMandy Kirkconnell 28904eea22f0SMandy Kirkconnell /* 28914eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 28924eea22f0SMandy Kirkconnell * extents needs to be increased. The ext_diff parameter stores the 28934eea22f0SMandy Kirkconnell * number of new extents being added and the idx parameter contains 28944eea22f0SMandy Kirkconnell * the extent index where the new extents will be added. If the new 28954eea22f0SMandy Kirkconnell * extents are being appended, then we just need to (re)allocate and 28964eea22f0SMandy Kirkconnell * initialize the space. Otherwise, if the new extents are being 28974eea22f0SMandy Kirkconnell * inserted into the middle of the existing entries, a bit more work 28984eea22f0SMandy Kirkconnell * is required to make room for the new extents to be inserted. The 28994eea22f0SMandy Kirkconnell * caller is responsible for filling in the new extent entries upon 29004eea22f0SMandy Kirkconnell * return. 29014eea22f0SMandy Kirkconnell */ 29024eea22f0SMandy Kirkconnell void 29034eea22f0SMandy Kirkconnell xfs_iext_add( 29044eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 29054eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin adding exts */ 2906c41564b5SNathan Scott int ext_diff) /* number of extents to add */ 29074eea22f0SMandy Kirkconnell { 29084eea22f0SMandy Kirkconnell int byte_diff; /* new bytes being added */ 29094eea22f0SMandy Kirkconnell int new_size; /* size of extents after adding */ 29104eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 29114eea22f0SMandy Kirkconnell 29124eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 29134eea22f0SMandy Kirkconnell ASSERT((idx >= 0) && (idx <= nextents)); 29144eea22f0SMandy Kirkconnell byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); 29154eea22f0SMandy Kirkconnell new_size = ifp->if_bytes + byte_diff; 29164eea22f0SMandy Kirkconnell /* 29174eea22f0SMandy Kirkconnell * If the new number of extents (nextents + ext_diff) 29184eea22f0SMandy Kirkconnell * fits inside the inode, then continue to use the inline 29194eea22f0SMandy Kirkconnell * extent buffer. 29204eea22f0SMandy Kirkconnell */ 29214eea22f0SMandy Kirkconnell if (nextents + ext_diff <= XFS_INLINE_EXTS) { 29224eea22f0SMandy Kirkconnell if (idx < nextents) { 29234eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff], 29244eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx], 29254eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 29264eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff); 29274eea22f0SMandy Kirkconnell } 29284eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 29294eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 29304eea22f0SMandy Kirkconnell } 29314eea22f0SMandy Kirkconnell /* 29324eea22f0SMandy Kirkconnell * Otherwise use a linear (direct) extent list. 29334eea22f0SMandy Kirkconnell * If the extents are currently inside the inode, 29344eea22f0SMandy Kirkconnell * xfs_iext_realloc_direct will switch us from 29354eea22f0SMandy Kirkconnell * inline to direct extent allocation mode. 29364eea22f0SMandy Kirkconnell */ 29370293ce3aSMandy Kirkconnell else if (nextents + ext_diff <= XFS_LINEAR_EXTS) { 29384eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 29394eea22f0SMandy Kirkconnell if (idx < nextents) { 29404eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx + ext_diff], 29414eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx], 29424eea22f0SMandy Kirkconnell (nextents - idx) * sizeof(xfs_bmbt_rec_t)); 29434eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); 29444eea22f0SMandy Kirkconnell } 29454eea22f0SMandy Kirkconnell } 29460293ce3aSMandy Kirkconnell /* Indirection array */ 29470293ce3aSMandy Kirkconnell else { 29480293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; 29490293ce3aSMandy Kirkconnell int erp_idx = 0; 29500293ce3aSMandy Kirkconnell int page_idx = idx; 29510293ce3aSMandy Kirkconnell 29520293ce3aSMandy Kirkconnell ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); 29530293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 29540293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); 29550293ce3aSMandy Kirkconnell } else { 29560293ce3aSMandy Kirkconnell xfs_iext_irec_init(ifp); 29570293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 29580293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 29590293ce3aSMandy Kirkconnell } 29600293ce3aSMandy Kirkconnell /* Extents fit in target extent page */ 29610293ce3aSMandy Kirkconnell if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { 29620293ce3aSMandy Kirkconnell if (page_idx < erp->er_extcount) { 29630293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[page_idx + ext_diff], 29640293ce3aSMandy Kirkconnell &erp->er_extbuf[page_idx], 29650293ce3aSMandy Kirkconnell (erp->er_extcount - page_idx) * 29660293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 29670293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[page_idx], 0, byte_diff); 29680293ce3aSMandy Kirkconnell } 29690293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 29700293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 29710293ce3aSMandy Kirkconnell } 29720293ce3aSMandy Kirkconnell /* Insert a new extent page */ 29730293ce3aSMandy Kirkconnell else if (erp) { 29740293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(ifp, 29750293ce3aSMandy Kirkconnell erp_idx, page_idx, ext_diff); 29760293ce3aSMandy Kirkconnell } 29770293ce3aSMandy Kirkconnell /* 29780293ce3aSMandy Kirkconnell * If extent(s) are being appended to the last page in 29790293ce3aSMandy Kirkconnell * the indirection array and the new extent(s) don't fit 29800293ce3aSMandy Kirkconnell * in the page, then erp is NULL and erp_idx is set to 29810293ce3aSMandy Kirkconnell * the next index needed in the indirection array. 29820293ce3aSMandy Kirkconnell */ 29830293ce3aSMandy Kirkconnell else { 29840293ce3aSMandy Kirkconnell int count = ext_diff; 29850293ce3aSMandy Kirkconnell 29860293ce3aSMandy Kirkconnell while (count) { 29870293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 29880293ce3aSMandy Kirkconnell erp->er_extcount = count; 29890293ce3aSMandy Kirkconnell count -= MIN(count, (int)XFS_LINEAR_EXTS); 29900293ce3aSMandy Kirkconnell if (count) { 29910293ce3aSMandy Kirkconnell erp_idx++; 29920293ce3aSMandy Kirkconnell } 29930293ce3aSMandy Kirkconnell } 29940293ce3aSMandy Kirkconnell } 29950293ce3aSMandy Kirkconnell } 29964eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 29974eea22f0SMandy Kirkconnell } 29984eea22f0SMandy Kirkconnell 29994eea22f0SMandy Kirkconnell /* 30000293ce3aSMandy Kirkconnell * This is called when incore extents are being added to the indirection 30010293ce3aSMandy Kirkconnell * array and the new extents do not fit in the target extent list. The 30020293ce3aSMandy Kirkconnell * erp_idx parameter contains the irec index for the target extent list 30030293ce3aSMandy Kirkconnell * in the indirection array, and the idx parameter contains the extent 30040293ce3aSMandy Kirkconnell * index within the list. The number of extents being added is stored 30050293ce3aSMandy Kirkconnell * in the count parameter. 30060293ce3aSMandy Kirkconnell * 30070293ce3aSMandy Kirkconnell * |-------| |-------| 30080293ce3aSMandy Kirkconnell * | | | | idx - number of extents before idx 30090293ce3aSMandy Kirkconnell * | idx | | count | 30100293ce3aSMandy Kirkconnell * | | | | count - number of extents being inserted at idx 30110293ce3aSMandy Kirkconnell * |-------| |-------| 30120293ce3aSMandy Kirkconnell * | count | | nex2 | nex2 - number of extents after idx + count 30130293ce3aSMandy Kirkconnell * |-------| |-------| 30140293ce3aSMandy Kirkconnell */ 30150293ce3aSMandy Kirkconnell void 30160293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi( 30170293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 30180293ce3aSMandy Kirkconnell int erp_idx, /* target extent irec index */ 30190293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index within target list */ 30200293ce3aSMandy Kirkconnell int count) /* new extents being added */ 30210293ce3aSMandy Kirkconnell { 30220293ce3aSMandy Kirkconnell int byte_diff; /* new bytes being added */ 30230293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* pointer to irec entry */ 30240293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* number of extents to add */ 30250293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* new extents still needed */ 30260293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 30270293ce3aSMandy Kirkconnell xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ 30280293ce3aSMandy Kirkconnell int nlists; /* number of irec's (lists) */ 30290293ce3aSMandy Kirkconnell 30300293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 30310293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 30320293ce3aSMandy Kirkconnell nex2 = erp->er_extcount - idx; 30330293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 30340293ce3aSMandy Kirkconnell 30350293ce3aSMandy Kirkconnell /* 30360293ce3aSMandy Kirkconnell * Save second part of target extent list 30370293ce3aSMandy Kirkconnell * (all extents past */ 30380293ce3aSMandy Kirkconnell if (nex2) { 30390293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 30406785073bSDavid Chinner nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); 30410293ce3aSMandy Kirkconnell memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); 30420293ce3aSMandy Kirkconnell erp->er_extcount -= nex2; 30430293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); 30440293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[idx], 0, byte_diff); 30450293ce3aSMandy Kirkconnell } 30460293ce3aSMandy Kirkconnell 30470293ce3aSMandy Kirkconnell /* 30480293ce3aSMandy Kirkconnell * Add the new extents to the end of the target 30490293ce3aSMandy Kirkconnell * list, then allocate new irec record(s) and 30500293ce3aSMandy Kirkconnell * extent buffer(s) as needed to store the rest 30510293ce3aSMandy Kirkconnell * of the new extents. 30520293ce3aSMandy Kirkconnell */ 30530293ce3aSMandy Kirkconnell ext_cnt = count; 30540293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); 30550293ce3aSMandy Kirkconnell if (ext_diff) { 30560293ce3aSMandy Kirkconnell erp->er_extcount += ext_diff; 30570293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 30580293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 30590293ce3aSMandy Kirkconnell } 30600293ce3aSMandy Kirkconnell while (ext_cnt) { 30610293ce3aSMandy Kirkconnell erp_idx++; 30620293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 30630293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); 30640293ce3aSMandy Kirkconnell erp->er_extcount = ext_diff; 30650293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); 30660293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 30670293ce3aSMandy Kirkconnell } 30680293ce3aSMandy Kirkconnell 30690293ce3aSMandy Kirkconnell /* Add nex2 extents back to indirection array */ 30700293ce3aSMandy Kirkconnell if (nex2) { 30710293ce3aSMandy Kirkconnell xfs_extnum_t ext_avail; 30720293ce3aSMandy Kirkconnell int i; 30730293ce3aSMandy Kirkconnell 30740293ce3aSMandy Kirkconnell byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); 30750293ce3aSMandy Kirkconnell ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 30760293ce3aSMandy Kirkconnell i = 0; 30770293ce3aSMandy Kirkconnell /* 30780293ce3aSMandy Kirkconnell * If nex2 extents fit in the current page, append 30790293ce3aSMandy Kirkconnell * nex2_ep after the new extents. 30800293ce3aSMandy Kirkconnell */ 30810293ce3aSMandy Kirkconnell if (nex2 <= ext_avail) { 30820293ce3aSMandy Kirkconnell i = erp->er_extcount; 30830293ce3aSMandy Kirkconnell } 30840293ce3aSMandy Kirkconnell /* 30850293ce3aSMandy Kirkconnell * Otherwise, check if space is available in the 30860293ce3aSMandy Kirkconnell * next page. 30870293ce3aSMandy Kirkconnell */ 30880293ce3aSMandy Kirkconnell else if ((erp_idx < nlists - 1) && 30890293ce3aSMandy Kirkconnell (nex2 <= (ext_avail = XFS_LINEAR_EXTS - 30900293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { 30910293ce3aSMandy Kirkconnell erp_idx++; 30920293ce3aSMandy Kirkconnell erp++; 30930293ce3aSMandy Kirkconnell /* Create a hole for nex2 extents */ 30940293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex2], erp->er_extbuf, 30950293ce3aSMandy Kirkconnell erp->er_extcount * sizeof(xfs_bmbt_rec_t)); 30960293ce3aSMandy Kirkconnell } 30970293ce3aSMandy Kirkconnell /* 30980293ce3aSMandy Kirkconnell * Final choice, create a new extent page for 30990293ce3aSMandy Kirkconnell * nex2 extents. 31000293ce3aSMandy Kirkconnell */ 31010293ce3aSMandy Kirkconnell else { 31020293ce3aSMandy Kirkconnell erp_idx++; 31030293ce3aSMandy Kirkconnell erp = xfs_iext_irec_new(ifp, erp_idx); 31040293ce3aSMandy Kirkconnell } 31050293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3106f0e2d93cSDenys Vlasenko kmem_free(nex2_ep); 31070293ce3aSMandy Kirkconnell erp->er_extcount += nex2; 31080293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 31090293ce3aSMandy Kirkconnell } 31100293ce3aSMandy Kirkconnell } 31110293ce3aSMandy Kirkconnell 31120293ce3aSMandy Kirkconnell /* 31134eea22f0SMandy Kirkconnell * This is called when the amount of space required for incore file 31144eea22f0SMandy Kirkconnell * extents needs to be decreased. The ext_diff parameter stores the 31154eea22f0SMandy Kirkconnell * number of extents to be removed and the idx parameter contains 31164eea22f0SMandy Kirkconnell * the extent index where the extents will be removed from. 31170293ce3aSMandy Kirkconnell * 31180293ce3aSMandy Kirkconnell * If the amount of space needed has decreased below the linear 31190293ce3aSMandy Kirkconnell * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous 31200293ce3aSMandy Kirkconnell * extent array. Otherwise, use kmem_realloc() to adjust the 31210293ce3aSMandy Kirkconnell * size to what is needed. 31224eea22f0SMandy Kirkconnell */ 31234eea22f0SMandy Kirkconnell void 31244eea22f0SMandy Kirkconnell xfs_iext_remove( 31256ef35544SChristoph Hellwig xfs_inode_t *ip, /* incore inode pointer */ 31264eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 31276ef35544SChristoph Hellwig int ext_diff, /* number of extents to remove */ 31286ef35544SChristoph Hellwig int state) /* type of extent conversion */ 31294eea22f0SMandy Kirkconnell { 31306ef35544SChristoph Hellwig xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df; 31314eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 31324eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 31334eea22f0SMandy Kirkconnell 31340b1b213fSChristoph Hellwig trace_xfs_iext_remove(ip, idx, state, _RET_IP_); 31350b1b213fSChristoph Hellwig 31364eea22f0SMandy Kirkconnell ASSERT(ext_diff > 0); 31374eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 31384eea22f0SMandy Kirkconnell new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); 31394eea22f0SMandy Kirkconnell 31404eea22f0SMandy Kirkconnell if (new_size == 0) { 31414eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 31420293ce3aSMandy Kirkconnell } else if (ifp->if_flags & XFS_IFEXTIREC) { 31430293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(ifp, idx, ext_diff); 31444eea22f0SMandy Kirkconnell } else if (ifp->if_real_bytes) { 31454eea22f0SMandy Kirkconnell xfs_iext_remove_direct(ifp, idx, ext_diff); 31464eea22f0SMandy Kirkconnell } else { 31474eea22f0SMandy Kirkconnell xfs_iext_remove_inline(ifp, idx, ext_diff); 31484eea22f0SMandy Kirkconnell } 31494eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 31504eea22f0SMandy Kirkconnell } 31514eea22f0SMandy Kirkconnell 31524eea22f0SMandy Kirkconnell /* 31534eea22f0SMandy Kirkconnell * This removes ext_diff extents from the inline buffer, beginning 31544eea22f0SMandy Kirkconnell * at extent index idx. 31554eea22f0SMandy Kirkconnell */ 31564eea22f0SMandy Kirkconnell void 31574eea22f0SMandy Kirkconnell xfs_iext_remove_inline( 31584eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 31594eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 31604eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 31614eea22f0SMandy Kirkconnell { 31624eea22f0SMandy Kirkconnell int nextents; /* number of extents in file */ 31634eea22f0SMandy Kirkconnell 31640293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 31654eea22f0SMandy Kirkconnell ASSERT(idx < XFS_INLINE_EXTS); 31664eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 31674eea22f0SMandy Kirkconnell ASSERT(((nextents - ext_diff) > 0) && 31684eea22f0SMandy Kirkconnell (nextents - ext_diff) < XFS_INLINE_EXTS); 31694eea22f0SMandy Kirkconnell 31704eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 31714eea22f0SMandy Kirkconnell memmove(&ifp->if_u2.if_inline_ext[idx], 31724eea22f0SMandy Kirkconnell &ifp->if_u2.if_inline_ext[idx + ext_diff], 31734eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 31744eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 31754eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff], 31764eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 31774eea22f0SMandy Kirkconnell } else { 31784eea22f0SMandy Kirkconnell memset(&ifp->if_u2.if_inline_ext[idx], 0, 31794eea22f0SMandy Kirkconnell ext_diff * sizeof(xfs_bmbt_rec_t)); 31804eea22f0SMandy Kirkconnell } 31814eea22f0SMandy Kirkconnell } 31824eea22f0SMandy Kirkconnell 31834eea22f0SMandy Kirkconnell /* 31844eea22f0SMandy Kirkconnell * This removes ext_diff extents from a linear (direct) extent list, 31854eea22f0SMandy Kirkconnell * beginning at extent index idx. If the extents are being removed 31864eea22f0SMandy Kirkconnell * from the end of the list (ie. truncate) then we just need to re- 31874eea22f0SMandy Kirkconnell * allocate the list to remove the extra space. Otherwise, if the 31884eea22f0SMandy Kirkconnell * extents are being removed from the middle of the existing extent 31894eea22f0SMandy Kirkconnell * entries, then we first need to move the extent records beginning 31904eea22f0SMandy Kirkconnell * at idx + ext_diff up in the list to overwrite the records being 31914eea22f0SMandy Kirkconnell * removed, then remove the extra space via kmem_realloc. 31924eea22f0SMandy Kirkconnell */ 31934eea22f0SMandy Kirkconnell void 31944eea22f0SMandy Kirkconnell xfs_iext_remove_direct( 31954eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 31964eea22f0SMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing exts */ 31974eea22f0SMandy Kirkconnell int ext_diff) /* number of extents to remove */ 31984eea22f0SMandy Kirkconnell { 31994eea22f0SMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 32004eea22f0SMandy Kirkconnell int new_size; /* size of extents after removal */ 32014eea22f0SMandy Kirkconnell 32020293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 32034eea22f0SMandy Kirkconnell new_size = ifp->if_bytes - 32044eea22f0SMandy Kirkconnell (ext_diff * sizeof(xfs_bmbt_rec_t)); 32054eea22f0SMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 32064eea22f0SMandy Kirkconnell 32074eea22f0SMandy Kirkconnell if (new_size == 0) { 32084eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 32094eea22f0SMandy Kirkconnell return; 32104eea22f0SMandy Kirkconnell } 32114eea22f0SMandy Kirkconnell /* Move extents up in the list (if needed) */ 32124eea22f0SMandy Kirkconnell if (idx + ext_diff < nextents) { 32134eea22f0SMandy Kirkconnell memmove(&ifp->if_u1.if_extents[idx], 32144eea22f0SMandy Kirkconnell &ifp->if_u1.if_extents[idx + ext_diff], 32154eea22f0SMandy Kirkconnell (nextents - (idx + ext_diff)) * 32164eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 32174eea22f0SMandy Kirkconnell } 32184eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[nextents - ext_diff], 32194eea22f0SMandy Kirkconnell 0, ext_diff * sizeof(xfs_bmbt_rec_t)); 32204eea22f0SMandy Kirkconnell /* 32214eea22f0SMandy Kirkconnell * Reallocate the direct extent list. If the extents 32224eea22f0SMandy Kirkconnell * will fit inside the inode then xfs_iext_realloc_direct 32234eea22f0SMandy Kirkconnell * will switch from direct to inline extent allocation 32244eea22f0SMandy Kirkconnell * mode for us. 32254eea22f0SMandy Kirkconnell */ 32264eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(ifp, new_size); 32274eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 32284eea22f0SMandy Kirkconnell } 32294eea22f0SMandy Kirkconnell 32304eea22f0SMandy Kirkconnell /* 32310293ce3aSMandy Kirkconnell * This is called when incore extents are being removed from the 32320293ce3aSMandy Kirkconnell * indirection array and the extents being removed span multiple extent 32330293ce3aSMandy Kirkconnell * buffers. The idx parameter contains the file extent index where we 32340293ce3aSMandy Kirkconnell * want to begin removing extents, and the count parameter contains 32350293ce3aSMandy Kirkconnell * how many extents need to be removed. 32360293ce3aSMandy Kirkconnell * 32370293ce3aSMandy Kirkconnell * |-------| |-------| 32380293ce3aSMandy Kirkconnell * | nex1 | | | nex1 - number of extents before idx 32390293ce3aSMandy Kirkconnell * |-------| | count | 32400293ce3aSMandy Kirkconnell * | | | | count - number of extents being removed at idx 32410293ce3aSMandy Kirkconnell * | count | |-------| 32420293ce3aSMandy Kirkconnell * | | | nex2 | nex2 - number of extents after idx + count 32430293ce3aSMandy Kirkconnell * |-------| |-------| 32440293ce3aSMandy Kirkconnell */ 32450293ce3aSMandy Kirkconnell void 32460293ce3aSMandy Kirkconnell xfs_iext_remove_indirect( 32470293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 32480293ce3aSMandy Kirkconnell xfs_extnum_t idx, /* index to begin removing extents */ 32490293ce3aSMandy Kirkconnell int count) /* number of extents to remove */ 32500293ce3aSMandy Kirkconnell { 32510293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 32520293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 32530293ce3aSMandy Kirkconnell xfs_extnum_t ext_cnt; /* extents left to remove */ 32540293ce3aSMandy Kirkconnell xfs_extnum_t ext_diff; /* extents to remove in current list */ 32550293ce3aSMandy Kirkconnell xfs_extnum_t nex1; /* number of extents before idx */ 32560293ce3aSMandy Kirkconnell xfs_extnum_t nex2; /* extents after idx + count */ 32570293ce3aSMandy Kirkconnell int page_idx = idx; /* index in target extent list */ 32580293ce3aSMandy Kirkconnell 32590293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 32600293ce3aSMandy Kirkconnell erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); 32610293ce3aSMandy Kirkconnell ASSERT(erp != NULL); 32620293ce3aSMandy Kirkconnell nex1 = page_idx; 32630293ce3aSMandy Kirkconnell ext_cnt = count; 32640293ce3aSMandy Kirkconnell while (ext_cnt) { 32650293ce3aSMandy Kirkconnell nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); 32660293ce3aSMandy Kirkconnell ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); 32670293ce3aSMandy Kirkconnell /* 32680293ce3aSMandy Kirkconnell * Check for deletion of entire list; 32690293ce3aSMandy Kirkconnell * xfs_iext_irec_remove() updates extent offsets. 32700293ce3aSMandy Kirkconnell */ 32710293ce3aSMandy Kirkconnell if (ext_diff == erp->er_extcount) { 32720293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 32730293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 32740293ce3aSMandy Kirkconnell nex1 = 0; 32750293ce3aSMandy Kirkconnell if (ext_cnt) { 32760293ce3aSMandy Kirkconnell ASSERT(erp_idx < ifp->if_real_bytes / 32770293ce3aSMandy Kirkconnell XFS_IEXT_BUFSZ); 32780293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 32790293ce3aSMandy Kirkconnell nex1 = 0; 32800293ce3aSMandy Kirkconnell continue; 32810293ce3aSMandy Kirkconnell } else { 32820293ce3aSMandy Kirkconnell break; 32830293ce3aSMandy Kirkconnell } 32840293ce3aSMandy Kirkconnell } 32850293ce3aSMandy Kirkconnell /* Move extents up (if needed) */ 32860293ce3aSMandy Kirkconnell if (nex2) { 32870293ce3aSMandy Kirkconnell memmove(&erp->er_extbuf[nex1], 32880293ce3aSMandy Kirkconnell &erp->er_extbuf[nex1 + ext_diff], 32890293ce3aSMandy Kirkconnell nex2 * sizeof(xfs_bmbt_rec_t)); 32900293ce3aSMandy Kirkconnell } 32910293ce3aSMandy Kirkconnell /* Zero out rest of page */ 32920293ce3aSMandy Kirkconnell memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - 32930293ce3aSMandy Kirkconnell ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); 32940293ce3aSMandy Kirkconnell /* Update remaining counters */ 32950293ce3aSMandy Kirkconnell erp->er_extcount -= ext_diff; 32960293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); 32970293ce3aSMandy Kirkconnell ext_cnt -= ext_diff; 32980293ce3aSMandy Kirkconnell nex1 = 0; 32990293ce3aSMandy Kirkconnell erp_idx++; 33000293ce3aSMandy Kirkconnell erp++; 33010293ce3aSMandy Kirkconnell } 33020293ce3aSMandy Kirkconnell ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); 33030293ce3aSMandy Kirkconnell xfs_iext_irec_compact(ifp); 33040293ce3aSMandy Kirkconnell } 33050293ce3aSMandy Kirkconnell 33060293ce3aSMandy Kirkconnell /* 33074eea22f0SMandy Kirkconnell * Create, destroy, or resize a linear (direct) block of extents. 33084eea22f0SMandy Kirkconnell */ 33094eea22f0SMandy Kirkconnell void 33104eea22f0SMandy Kirkconnell xfs_iext_realloc_direct( 33114eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33124eea22f0SMandy Kirkconnell int new_size) /* new size of extents */ 33134eea22f0SMandy Kirkconnell { 33144eea22f0SMandy Kirkconnell int rnew_size; /* real new size of extents */ 33154eea22f0SMandy Kirkconnell 33164eea22f0SMandy Kirkconnell rnew_size = new_size; 33174eea22f0SMandy Kirkconnell 33180293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || 33190293ce3aSMandy Kirkconnell ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && 33200293ce3aSMandy Kirkconnell (new_size != ifp->if_real_bytes))); 33210293ce3aSMandy Kirkconnell 33224eea22f0SMandy Kirkconnell /* Free extent records */ 33234eea22f0SMandy Kirkconnell if (new_size == 0) { 33244eea22f0SMandy Kirkconnell xfs_iext_destroy(ifp); 33254eea22f0SMandy Kirkconnell } 33264eea22f0SMandy Kirkconnell /* Resize direct extent list and zero any new bytes */ 33274eea22f0SMandy Kirkconnell else if (ifp->if_real_bytes) { 33284eea22f0SMandy Kirkconnell /* Check if extents will fit inside the inode */ 33294eea22f0SMandy Kirkconnell if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) { 33304eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(ifp, new_size / 33314eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)); 33324eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 33334eea22f0SMandy Kirkconnell return; 33344eea22f0SMandy Kirkconnell } 333516a087d8SVignesh Babu if (!is_power_of_2(new_size)){ 333640ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 33374eea22f0SMandy Kirkconnell } 33384eea22f0SMandy Kirkconnell if (rnew_size != ifp->if_real_bytes) { 3339a6f64d4aSChristoph Hellwig ifp->if_u1.if_extents = 33404eea22f0SMandy Kirkconnell kmem_realloc(ifp->if_u1.if_extents, 33414eea22f0SMandy Kirkconnell rnew_size, 33426785073bSDavid Chinner ifp->if_real_bytes, KM_NOFS); 33434eea22f0SMandy Kirkconnell } 33444eea22f0SMandy Kirkconnell if (rnew_size > ifp->if_real_bytes) { 33454eea22f0SMandy Kirkconnell memset(&ifp->if_u1.if_extents[ifp->if_bytes / 33464eea22f0SMandy Kirkconnell (uint)sizeof(xfs_bmbt_rec_t)], 0, 33474eea22f0SMandy Kirkconnell rnew_size - ifp->if_real_bytes); 33484eea22f0SMandy Kirkconnell } 33494eea22f0SMandy Kirkconnell } 33504eea22f0SMandy Kirkconnell /* 33514eea22f0SMandy Kirkconnell * Switch from the inline extent buffer to a direct 33524eea22f0SMandy Kirkconnell * extent list. Be sure to include the inline extent 33534eea22f0SMandy Kirkconnell * bytes in new_size. 33544eea22f0SMandy Kirkconnell */ 33554eea22f0SMandy Kirkconnell else { 33564eea22f0SMandy Kirkconnell new_size += ifp->if_bytes; 335716a087d8SVignesh Babu if (!is_power_of_2(new_size)) { 335840ebd81dSRobert P. J. Day rnew_size = roundup_pow_of_two(new_size); 33594eea22f0SMandy Kirkconnell } 33604eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(ifp, rnew_size); 33614eea22f0SMandy Kirkconnell } 33624eea22f0SMandy Kirkconnell ifp->if_real_bytes = rnew_size; 33634eea22f0SMandy Kirkconnell ifp->if_bytes = new_size; 33644eea22f0SMandy Kirkconnell } 33654eea22f0SMandy Kirkconnell 33664eea22f0SMandy Kirkconnell /* 33674eea22f0SMandy Kirkconnell * Switch from linear (direct) extent records to inline buffer. 33684eea22f0SMandy Kirkconnell */ 33694eea22f0SMandy Kirkconnell void 33704eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline( 33714eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33724eea22f0SMandy Kirkconnell xfs_extnum_t nextents) /* number of extents in file */ 33734eea22f0SMandy Kirkconnell { 33744eea22f0SMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTENTS); 33754eea22f0SMandy Kirkconnell ASSERT(nextents <= XFS_INLINE_EXTS); 33764eea22f0SMandy Kirkconnell /* 33774eea22f0SMandy Kirkconnell * The inline buffer was zeroed when we switched 33784eea22f0SMandy Kirkconnell * from inline to direct extent allocation mode, 33794eea22f0SMandy Kirkconnell * so we don't need to clear it here. 33804eea22f0SMandy Kirkconnell */ 33814eea22f0SMandy Kirkconnell memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 33824eea22f0SMandy Kirkconnell nextents * sizeof(xfs_bmbt_rec_t)); 3383f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 33844eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 33854eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 33864eea22f0SMandy Kirkconnell } 33874eea22f0SMandy Kirkconnell 33884eea22f0SMandy Kirkconnell /* 33894eea22f0SMandy Kirkconnell * Switch from inline buffer to linear (direct) extent records. 33904eea22f0SMandy Kirkconnell * new_size should already be rounded up to the next power of 2 33914eea22f0SMandy Kirkconnell * by the caller (when appropriate), so use new_size as it is. 33924eea22f0SMandy Kirkconnell * However, since new_size may be rounded up, we can't update 33934eea22f0SMandy Kirkconnell * if_bytes here. It is the caller's responsibility to update 33944eea22f0SMandy Kirkconnell * if_bytes upon return. 33954eea22f0SMandy Kirkconnell */ 33964eea22f0SMandy Kirkconnell void 33974eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct( 33984eea22f0SMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 33994eea22f0SMandy Kirkconnell int new_size) /* number of extents in file */ 34004eea22f0SMandy Kirkconnell { 34016785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); 34024eea22f0SMandy Kirkconnell memset(ifp->if_u1.if_extents, 0, new_size); 34034eea22f0SMandy Kirkconnell if (ifp->if_bytes) { 34044eea22f0SMandy Kirkconnell memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 34054eea22f0SMandy Kirkconnell ifp->if_bytes); 34064eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 34074eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 34084eea22f0SMandy Kirkconnell } 34094eea22f0SMandy Kirkconnell ifp->if_real_bytes = new_size; 34104eea22f0SMandy Kirkconnell } 34114eea22f0SMandy Kirkconnell 34124eea22f0SMandy Kirkconnell /* 34130293ce3aSMandy Kirkconnell * Resize an extent indirection array to new_size bytes. 34140293ce3aSMandy Kirkconnell */ 3415d96f8f89SEric Sandeen STATIC void 34160293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect( 34170293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34180293ce3aSMandy Kirkconnell int new_size) /* new indirection array size */ 34190293ce3aSMandy Kirkconnell { 34200293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 34210293ce3aSMandy Kirkconnell int size; /* current indirection array size */ 34220293ce3aSMandy Kirkconnell 34230293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34240293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 34250293ce3aSMandy Kirkconnell size = nlists * sizeof(xfs_ext_irec_t); 34260293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes); 34270293ce3aSMandy Kirkconnell ASSERT((new_size >= 0) && (new_size != size)); 34280293ce3aSMandy Kirkconnell if (new_size == 0) { 34290293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 34300293ce3aSMandy Kirkconnell } else { 34310293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 34320293ce3aSMandy Kirkconnell kmem_realloc(ifp->if_u1.if_ext_irec, 34336785073bSDavid Chinner new_size, size, KM_NOFS); 34340293ce3aSMandy Kirkconnell } 34350293ce3aSMandy Kirkconnell } 34360293ce3aSMandy Kirkconnell 34370293ce3aSMandy Kirkconnell /* 34380293ce3aSMandy Kirkconnell * Switch from indirection array to linear (direct) extent allocations. 34390293ce3aSMandy Kirkconnell */ 3440d96f8f89SEric Sandeen STATIC void 34410293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct( 34420293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 34430293ce3aSMandy Kirkconnell { 3444a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 34450293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 34460293ce3aSMandy Kirkconnell int size; /* size of file extents */ 34470293ce3aSMandy Kirkconnell 34480293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 34490293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 34500293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 34510293ce3aSMandy Kirkconnell size = nextents * sizeof(xfs_bmbt_rec_t); 34520293ce3aSMandy Kirkconnell 345371a8c87fSLachlan McIlroy xfs_iext_irec_compact_pages(ifp); 34540293ce3aSMandy Kirkconnell ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 34550293ce3aSMandy Kirkconnell 34560293ce3aSMandy Kirkconnell ep = ifp->if_u1.if_ext_irec->er_extbuf; 3457f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 34580293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 34590293ce3aSMandy Kirkconnell ifp->if_u1.if_extents = ep; 34600293ce3aSMandy Kirkconnell ifp->if_bytes = size; 34610293ce3aSMandy Kirkconnell if (nextents < XFS_LINEAR_EXTS) { 34620293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, size); 34630293ce3aSMandy Kirkconnell } 34640293ce3aSMandy Kirkconnell } 34650293ce3aSMandy Kirkconnell 34660293ce3aSMandy Kirkconnell /* 34674eea22f0SMandy Kirkconnell * Free incore file extents. 34684eea22f0SMandy Kirkconnell */ 34694eea22f0SMandy Kirkconnell void 34704eea22f0SMandy Kirkconnell xfs_iext_destroy( 34714eea22f0SMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 34724eea22f0SMandy Kirkconnell { 34730293ce3aSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 34740293ce3aSMandy Kirkconnell int erp_idx; 34750293ce3aSMandy Kirkconnell int nlists; 34760293ce3aSMandy Kirkconnell 34770293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 34780293ce3aSMandy Kirkconnell for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { 34790293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx); 34800293ce3aSMandy Kirkconnell } 34810293ce3aSMandy Kirkconnell ifp->if_flags &= ~XFS_IFEXTIREC; 34820293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes) { 3483f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_extents); 34844eea22f0SMandy Kirkconnell } else if (ifp->if_bytes) { 34854eea22f0SMandy Kirkconnell memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 34864eea22f0SMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 34874eea22f0SMandy Kirkconnell } 34884eea22f0SMandy Kirkconnell ifp->if_u1.if_extents = NULL; 34894eea22f0SMandy Kirkconnell ifp->if_real_bytes = 0; 34904eea22f0SMandy Kirkconnell ifp->if_bytes = 0; 34914eea22f0SMandy Kirkconnell } 34920293ce3aSMandy Kirkconnell 34930293ce3aSMandy Kirkconnell /* 34948867bc9bSMandy Kirkconnell * Return a pointer to the extent record for file system block bno. 34958867bc9bSMandy Kirkconnell */ 3496a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t * /* pointer to found extent record */ 34978867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext( 34988867bc9bSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 34998867bc9bSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 35008867bc9bSMandy Kirkconnell xfs_extnum_t *idxp) /* index of target extent */ 35018867bc9bSMandy Kirkconnell { 3502a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *base; /* pointer to first extent */ 35038867bc9bSMandy Kirkconnell xfs_filblks_t blockcount = 0; /* number of blocks in extent */ 3504a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ 35058867bc9bSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 3506c41564b5SNathan Scott int high; /* upper boundary in search */ 35078867bc9bSMandy Kirkconnell xfs_extnum_t idx = 0; /* index of target extent */ 3508c41564b5SNathan Scott int low; /* lower boundary in search */ 35098867bc9bSMandy Kirkconnell xfs_extnum_t nextents; /* number of file extents */ 35108867bc9bSMandy Kirkconnell xfs_fileoff_t startoff = 0; /* start offset of extent */ 35118867bc9bSMandy Kirkconnell 35128867bc9bSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 35138867bc9bSMandy Kirkconnell if (nextents == 0) { 35148867bc9bSMandy Kirkconnell *idxp = 0; 35158867bc9bSMandy Kirkconnell return NULL; 35168867bc9bSMandy Kirkconnell } 35178867bc9bSMandy Kirkconnell low = 0; 35188867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35198867bc9bSMandy Kirkconnell /* Find target extent list */ 35208867bc9bSMandy Kirkconnell int erp_idx = 0; 35218867bc9bSMandy Kirkconnell erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); 35228867bc9bSMandy Kirkconnell base = erp->er_extbuf; 35238867bc9bSMandy Kirkconnell high = erp->er_extcount - 1; 35248867bc9bSMandy Kirkconnell } else { 35258867bc9bSMandy Kirkconnell base = ifp->if_u1.if_extents; 35268867bc9bSMandy Kirkconnell high = nextents - 1; 35278867bc9bSMandy Kirkconnell } 35288867bc9bSMandy Kirkconnell /* Binary search extent records */ 35298867bc9bSMandy Kirkconnell while (low <= high) { 35308867bc9bSMandy Kirkconnell idx = (low + high) >> 1; 35318867bc9bSMandy Kirkconnell ep = base + idx; 35328867bc9bSMandy Kirkconnell startoff = xfs_bmbt_get_startoff(ep); 35338867bc9bSMandy Kirkconnell blockcount = xfs_bmbt_get_blockcount(ep); 35348867bc9bSMandy Kirkconnell if (bno < startoff) { 35358867bc9bSMandy Kirkconnell high = idx - 1; 35368867bc9bSMandy Kirkconnell } else if (bno >= startoff + blockcount) { 35378867bc9bSMandy Kirkconnell low = idx + 1; 35388867bc9bSMandy Kirkconnell } else { 35398867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 35408867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35418867bc9bSMandy Kirkconnell idx += erp->er_extoff; 35428867bc9bSMandy Kirkconnell } 35438867bc9bSMandy Kirkconnell *idxp = idx; 35448867bc9bSMandy Kirkconnell return ep; 35458867bc9bSMandy Kirkconnell } 35468867bc9bSMandy Kirkconnell } 35478867bc9bSMandy Kirkconnell /* Convert back to file-based extent index */ 35488867bc9bSMandy Kirkconnell if (ifp->if_flags & XFS_IFEXTIREC) { 35498867bc9bSMandy Kirkconnell idx += erp->er_extoff; 35508867bc9bSMandy Kirkconnell } 35518867bc9bSMandy Kirkconnell if (bno >= startoff + blockcount) { 35528867bc9bSMandy Kirkconnell if (++idx == nextents) { 35538867bc9bSMandy Kirkconnell ep = NULL; 35548867bc9bSMandy Kirkconnell } else { 35558867bc9bSMandy Kirkconnell ep = xfs_iext_get_ext(ifp, idx); 35568867bc9bSMandy Kirkconnell } 35578867bc9bSMandy Kirkconnell } 35588867bc9bSMandy Kirkconnell *idxp = idx; 35598867bc9bSMandy Kirkconnell return ep; 35608867bc9bSMandy Kirkconnell } 35618867bc9bSMandy Kirkconnell 35628867bc9bSMandy Kirkconnell /* 35630293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 35640293ce3aSMandy Kirkconnell * extent record for filesystem block bno. Store the index of the 35650293ce3aSMandy Kirkconnell * target irec in *erp_idxp. 35660293ce3aSMandy Kirkconnell */ 35678867bc9bSMandy Kirkconnell xfs_ext_irec_t * /* pointer to found extent record */ 35680293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec( 35690293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 35700293ce3aSMandy Kirkconnell xfs_fileoff_t bno, /* block number to search for */ 35710293ce3aSMandy Kirkconnell int *erp_idxp) /* irec index of target ext list */ 35720293ce3aSMandy Kirkconnell { 35730293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ 35740293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp_next; /* next indirection array entry */ 35758867bc9bSMandy Kirkconnell int erp_idx; /* indirection array index */ 35760293ce3aSMandy Kirkconnell int nlists; /* number of extent irec's (lists) */ 35770293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 35780293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 35790293ce3aSMandy Kirkconnell 35800293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 35810293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 35820293ce3aSMandy Kirkconnell erp_idx = 0; 35830293ce3aSMandy Kirkconnell low = 0; 35840293ce3aSMandy Kirkconnell high = nlists - 1; 35850293ce3aSMandy Kirkconnell while (low <= high) { 35860293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 35870293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 35880293ce3aSMandy Kirkconnell erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; 35890293ce3aSMandy Kirkconnell if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { 35900293ce3aSMandy Kirkconnell high = erp_idx - 1; 35910293ce3aSMandy Kirkconnell } else if (erp_next && bno >= 35920293ce3aSMandy Kirkconnell xfs_bmbt_get_startoff(erp_next->er_extbuf)) { 35930293ce3aSMandy Kirkconnell low = erp_idx + 1; 35940293ce3aSMandy Kirkconnell } else { 35950293ce3aSMandy Kirkconnell break; 35960293ce3aSMandy Kirkconnell } 35970293ce3aSMandy Kirkconnell } 35980293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 35990293ce3aSMandy Kirkconnell return erp; 36000293ce3aSMandy Kirkconnell } 36010293ce3aSMandy Kirkconnell 36020293ce3aSMandy Kirkconnell /* 36030293ce3aSMandy Kirkconnell * Return a pointer to the indirection array entry containing the 36040293ce3aSMandy Kirkconnell * extent record at file extent index *idxp. Store the index of the 36050293ce3aSMandy Kirkconnell * target irec in *erp_idxp and store the page index of the target 36060293ce3aSMandy Kirkconnell * extent record in *idxp. 36070293ce3aSMandy Kirkconnell */ 36080293ce3aSMandy Kirkconnell xfs_ext_irec_t * 36090293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec( 36100293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 36110293ce3aSMandy Kirkconnell xfs_extnum_t *idxp, /* extent index (file -> page) */ 36120293ce3aSMandy Kirkconnell int *erp_idxp, /* pointer to target irec */ 36130293ce3aSMandy Kirkconnell int realloc) /* new bytes were just added */ 36140293ce3aSMandy Kirkconnell { 36150293ce3aSMandy Kirkconnell xfs_ext_irec_t *prev; /* pointer to previous irec */ 36160293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ 36170293ce3aSMandy Kirkconnell int erp_idx; /* indirection array index */ 36180293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 36190293ce3aSMandy Kirkconnell int high; /* binary search upper limit */ 36200293ce3aSMandy Kirkconnell int low; /* binary search lower limit */ 36210293ce3aSMandy Kirkconnell xfs_extnum_t page_idx = *idxp; /* extent index in target list */ 36220293ce3aSMandy Kirkconnell 36230293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 362487bef181SChristoph Hellwig ASSERT(page_idx >= 0); 362587bef181SChristoph Hellwig ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); 362687bef181SChristoph Hellwig ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); 362787bef181SChristoph Hellwig 36280293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 36290293ce3aSMandy Kirkconnell erp_idx = 0; 36300293ce3aSMandy Kirkconnell low = 0; 36310293ce3aSMandy Kirkconnell high = nlists - 1; 36320293ce3aSMandy Kirkconnell 36330293ce3aSMandy Kirkconnell /* Binary search extent irec's */ 36340293ce3aSMandy Kirkconnell while (low <= high) { 36350293ce3aSMandy Kirkconnell erp_idx = (low + high) >> 1; 36360293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 36370293ce3aSMandy Kirkconnell prev = erp_idx > 0 ? erp - 1 : NULL; 36380293ce3aSMandy Kirkconnell if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && 36390293ce3aSMandy Kirkconnell realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { 36400293ce3aSMandy Kirkconnell high = erp_idx - 1; 36410293ce3aSMandy Kirkconnell } else if (page_idx > erp->er_extoff + erp->er_extcount || 36420293ce3aSMandy Kirkconnell (page_idx == erp->er_extoff + erp->er_extcount && 36430293ce3aSMandy Kirkconnell !realloc)) { 36440293ce3aSMandy Kirkconnell low = erp_idx + 1; 36450293ce3aSMandy Kirkconnell } else if (page_idx == erp->er_extoff + erp->er_extcount && 36460293ce3aSMandy Kirkconnell erp->er_extcount == XFS_LINEAR_EXTS) { 36470293ce3aSMandy Kirkconnell ASSERT(realloc); 36480293ce3aSMandy Kirkconnell page_idx = 0; 36490293ce3aSMandy Kirkconnell erp_idx++; 36500293ce3aSMandy Kirkconnell erp = erp_idx < nlists ? erp + 1 : NULL; 36510293ce3aSMandy Kirkconnell break; 36520293ce3aSMandy Kirkconnell } else { 36530293ce3aSMandy Kirkconnell page_idx -= erp->er_extoff; 36540293ce3aSMandy Kirkconnell break; 36550293ce3aSMandy Kirkconnell } 36560293ce3aSMandy Kirkconnell } 36570293ce3aSMandy Kirkconnell *idxp = page_idx; 36580293ce3aSMandy Kirkconnell *erp_idxp = erp_idx; 36590293ce3aSMandy Kirkconnell return(erp); 36600293ce3aSMandy Kirkconnell } 36610293ce3aSMandy Kirkconnell 36620293ce3aSMandy Kirkconnell /* 36630293ce3aSMandy Kirkconnell * Allocate and initialize an indirection array once the space needed 36640293ce3aSMandy Kirkconnell * for incore extents increases above XFS_IEXT_BUFSZ. 36650293ce3aSMandy Kirkconnell */ 36660293ce3aSMandy Kirkconnell void 36670293ce3aSMandy Kirkconnell xfs_iext_irec_init( 36680293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 36690293ce3aSMandy Kirkconnell { 36700293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 36710293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 36720293ce3aSMandy Kirkconnell 36730293ce3aSMandy Kirkconnell ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); 36740293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 36750293ce3aSMandy Kirkconnell ASSERT(nextents <= XFS_LINEAR_EXTS); 36760293ce3aSMandy Kirkconnell 36776785073bSDavid Chinner erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); 36780293ce3aSMandy Kirkconnell 36790293ce3aSMandy Kirkconnell if (nextents == 0) { 36806785073bSDavid Chinner ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 36810293ce3aSMandy Kirkconnell } else if (!ifp->if_real_bytes) { 36820293ce3aSMandy Kirkconnell xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); 36830293ce3aSMandy Kirkconnell } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { 36840293ce3aSMandy Kirkconnell xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); 36850293ce3aSMandy Kirkconnell } 36860293ce3aSMandy Kirkconnell erp->er_extbuf = ifp->if_u1.if_extents; 36870293ce3aSMandy Kirkconnell erp->er_extcount = nextents; 36880293ce3aSMandy Kirkconnell erp->er_extoff = 0; 36890293ce3aSMandy Kirkconnell 36900293ce3aSMandy Kirkconnell ifp->if_flags |= XFS_IFEXTIREC; 36910293ce3aSMandy Kirkconnell ifp->if_real_bytes = XFS_IEXT_BUFSZ; 36920293ce3aSMandy Kirkconnell ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); 36930293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec = erp; 36940293ce3aSMandy Kirkconnell 36950293ce3aSMandy Kirkconnell return; 36960293ce3aSMandy Kirkconnell } 36970293ce3aSMandy Kirkconnell 36980293ce3aSMandy Kirkconnell /* 36990293ce3aSMandy Kirkconnell * Allocate and initialize a new entry in the indirection array. 37000293ce3aSMandy Kirkconnell */ 37010293ce3aSMandy Kirkconnell xfs_ext_irec_t * 37020293ce3aSMandy Kirkconnell xfs_iext_irec_new( 37030293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37040293ce3aSMandy Kirkconnell int erp_idx) /* index for new irec */ 37050293ce3aSMandy Kirkconnell { 37060293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 37070293ce3aSMandy Kirkconnell int i; /* loop counter */ 37080293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37090293ce3aSMandy Kirkconnell 37100293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37110293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37120293ce3aSMandy Kirkconnell 37130293ce3aSMandy Kirkconnell /* Resize indirection array */ 37140293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, ++nlists * 37150293ce3aSMandy Kirkconnell sizeof(xfs_ext_irec_t)); 37160293ce3aSMandy Kirkconnell /* 37170293ce3aSMandy Kirkconnell * Move records down in the array so the 37180293ce3aSMandy Kirkconnell * new page can use erp_idx. 37190293ce3aSMandy Kirkconnell */ 37200293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 37210293ce3aSMandy Kirkconnell for (i = nlists - 1; i > erp_idx; i--) { 37220293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); 37230293ce3aSMandy Kirkconnell } 37240293ce3aSMandy Kirkconnell ASSERT(i == erp_idx); 37250293ce3aSMandy Kirkconnell 37260293ce3aSMandy Kirkconnell /* Initialize new extent record */ 37270293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 37286785073bSDavid Chinner erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); 37290293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 37300293ce3aSMandy Kirkconnell memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); 37310293ce3aSMandy Kirkconnell erp[erp_idx].er_extcount = 0; 37320293ce3aSMandy Kirkconnell erp[erp_idx].er_extoff = erp_idx > 0 ? 37330293ce3aSMandy Kirkconnell erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; 37340293ce3aSMandy Kirkconnell return (&erp[erp_idx]); 37350293ce3aSMandy Kirkconnell } 37360293ce3aSMandy Kirkconnell 37370293ce3aSMandy Kirkconnell /* 37380293ce3aSMandy Kirkconnell * Remove a record from the indirection array. 37390293ce3aSMandy Kirkconnell */ 37400293ce3aSMandy Kirkconnell void 37410293ce3aSMandy Kirkconnell xfs_iext_irec_remove( 37420293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 37430293ce3aSMandy Kirkconnell int erp_idx) /* irec index to remove */ 37440293ce3aSMandy Kirkconnell { 37450293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp; /* indirection array pointer */ 37460293ce3aSMandy Kirkconnell int i; /* loop counter */ 37470293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37480293ce3aSMandy Kirkconnell 37490293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37500293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37510293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 37520293ce3aSMandy Kirkconnell if (erp->er_extbuf) { 37530293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 37540293ce3aSMandy Kirkconnell -erp->er_extcount); 3755f0e2d93cSDenys Vlasenko kmem_free(erp->er_extbuf); 37560293ce3aSMandy Kirkconnell } 37570293ce3aSMandy Kirkconnell /* Compact extent records */ 37580293ce3aSMandy Kirkconnell erp = ifp->if_u1.if_ext_irec; 37590293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists - 1; i++) { 37600293ce3aSMandy Kirkconnell memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); 37610293ce3aSMandy Kirkconnell } 37620293ce3aSMandy Kirkconnell /* 37630293ce3aSMandy Kirkconnell * Manually free the last extent record from the indirection 37640293ce3aSMandy Kirkconnell * array. A call to xfs_iext_realloc_indirect() with a size 37650293ce3aSMandy Kirkconnell * of zero would result in a call to xfs_iext_destroy() which 37660293ce3aSMandy Kirkconnell * would in turn call this function again, creating a nasty 37670293ce3aSMandy Kirkconnell * infinite loop. 37680293ce3aSMandy Kirkconnell */ 37690293ce3aSMandy Kirkconnell if (--nlists) { 37700293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(ifp, 37710293ce3aSMandy Kirkconnell nlists * sizeof(xfs_ext_irec_t)); 37720293ce3aSMandy Kirkconnell } else { 3773f0e2d93cSDenys Vlasenko kmem_free(ifp->if_u1.if_ext_irec); 37740293ce3aSMandy Kirkconnell } 37750293ce3aSMandy Kirkconnell ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 37760293ce3aSMandy Kirkconnell } 37770293ce3aSMandy Kirkconnell 37780293ce3aSMandy Kirkconnell /* 37790293ce3aSMandy Kirkconnell * This is called to clean up large amounts of unused memory allocated 37800293ce3aSMandy Kirkconnell * by the indirection array. Before compacting anything though, verify 37810293ce3aSMandy Kirkconnell * that the indirection array is still needed and switch back to the 37820293ce3aSMandy Kirkconnell * linear extent list (or even the inline buffer) if possible. The 37830293ce3aSMandy Kirkconnell * compaction policy is as follows: 37840293ce3aSMandy Kirkconnell * 37850293ce3aSMandy Kirkconnell * Full Compaction: Extents fit into a single page (or inline buffer) 378671a8c87fSLachlan McIlroy * Partial Compaction: Extents occupy less than 50% of allocated space 37870293ce3aSMandy Kirkconnell * No Compaction: Extents occupy at least 50% of allocated space 37880293ce3aSMandy Kirkconnell */ 37890293ce3aSMandy Kirkconnell void 37900293ce3aSMandy Kirkconnell xfs_iext_irec_compact( 37910293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 37920293ce3aSMandy Kirkconnell { 37930293ce3aSMandy Kirkconnell xfs_extnum_t nextents; /* number of extents in file */ 37940293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 37950293ce3aSMandy Kirkconnell 37960293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 37970293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 37980293ce3aSMandy Kirkconnell nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 37990293ce3aSMandy Kirkconnell 38000293ce3aSMandy Kirkconnell if (nextents == 0) { 38010293ce3aSMandy Kirkconnell xfs_iext_destroy(ifp); 38020293ce3aSMandy Kirkconnell } else if (nextents <= XFS_INLINE_EXTS) { 38030293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 38040293ce3aSMandy Kirkconnell xfs_iext_direct_to_inline(ifp, nextents); 38050293ce3aSMandy Kirkconnell } else if (nextents <= XFS_LINEAR_EXTS) { 38060293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(ifp); 38070293ce3aSMandy Kirkconnell } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 38080293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(ifp); 38090293ce3aSMandy Kirkconnell } 38100293ce3aSMandy Kirkconnell } 38110293ce3aSMandy Kirkconnell 38120293ce3aSMandy Kirkconnell /* 38130293ce3aSMandy Kirkconnell * Combine extents from neighboring extent pages. 38140293ce3aSMandy Kirkconnell */ 38150293ce3aSMandy Kirkconnell void 38160293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages( 38170293ce3aSMandy Kirkconnell xfs_ifork_t *ifp) /* inode fork pointer */ 38180293ce3aSMandy Kirkconnell { 38190293ce3aSMandy Kirkconnell xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ 38200293ce3aSMandy Kirkconnell int erp_idx = 0; /* indirection array index */ 38210293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists) */ 38220293ce3aSMandy Kirkconnell 38230293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38240293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38250293ce3aSMandy Kirkconnell while (erp_idx < nlists - 1) { 38260293ce3aSMandy Kirkconnell erp = &ifp->if_u1.if_ext_irec[erp_idx]; 38270293ce3aSMandy Kirkconnell erp_next = erp + 1; 38280293ce3aSMandy Kirkconnell if (erp_next->er_extcount <= 38290293ce3aSMandy Kirkconnell (XFS_LINEAR_EXTS - erp->er_extcount)) { 383071a8c87fSLachlan McIlroy memcpy(&erp->er_extbuf[erp->er_extcount], 38310293ce3aSMandy Kirkconnell erp_next->er_extbuf, erp_next->er_extcount * 38320293ce3aSMandy Kirkconnell sizeof(xfs_bmbt_rec_t)); 38330293ce3aSMandy Kirkconnell erp->er_extcount += erp_next->er_extcount; 38340293ce3aSMandy Kirkconnell /* 38350293ce3aSMandy Kirkconnell * Free page before removing extent record 38360293ce3aSMandy Kirkconnell * so er_extoffs don't get modified in 38370293ce3aSMandy Kirkconnell * xfs_iext_irec_remove. 38380293ce3aSMandy Kirkconnell */ 3839f0e2d93cSDenys Vlasenko kmem_free(erp_next->er_extbuf); 38400293ce3aSMandy Kirkconnell erp_next->er_extbuf = NULL; 38410293ce3aSMandy Kirkconnell xfs_iext_irec_remove(ifp, erp_idx + 1); 38420293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38430293ce3aSMandy Kirkconnell } else { 38440293ce3aSMandy Kirkconnell erp_idx++; 38450293ce3aSMandy Kirkconnell } 38460293ce3aSMandy Kirkconnell } 38470293ce3aSMandy Kirkconnell } 38480293ce3aSMandy Kirkconnell 38490293ce3aSMandy Kirkconnell /* 38500293ce3aSMandy Kirkconnell * This is called to update the er_extoff field in the indirection 38510293ce3aSMandy Kirkconnell * array when extents have been added or removed from one of the 38520293ce3aSMandy Kirkconnell * extent lists. erp_idx contains the irec index to begin updating 38530293ce3aSMandy Kirkconnell * at and ext_diff contains the number of extents that were added 38540293ce3aSMandy Kirkconnell * or removed. 38550293ce3aSMandy Kirkconnell */ 38560293ce3aSMandy Kirkconnell void 38570293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs( 38580293ce3aSMandy Kirkconnell xfs_ifork_t *ifp, /* inode fork pointer */ 38590293ce3aSMandy Kirkconnell int erp_idx, /* irec index to update */ 38600293ce3aSMandy Kirkconnell int ext_diff) /* number of new extents */ 38610293ce3aSMandy Kirkconnell { 38620293ce3aSMandy Kirkconnell int i; /* loop counter */ 38630293ce3aSMandy Kirkconnell int nlists; /* number of irec's (ex lists */ 38640293ce3aSMandy Kirkconnell 38650293ce3aSMandy Kirkconnell ASSERT(ifp->if_flags & XFS_IFEXTIREC); 38660293ce3aSMandy Kirkconnell nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 38670293ce3aSMandy Kirkconnell for (i = erp_idx; i < nlists; i++) { 38680293ce3aSMandy Kirkconnell ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; 38690293ce3aSMandy Kirkconnell } 38700293ce3aSMandy Kirkconnell } 3871