11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or modify it 51da177e4SLinus Torvalds * under the terms of version 2 of the GNU General Public License as 61da177e4SLinus Torvalds * published by the Free Software Foundation. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * This program is distributed in the hope that it would be useful, but 91da177e4SLinus Torvalds * WITHOUT ANY WARRANTY; without even the implied warranty of 101da177e4SLinus Torvalds * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * Further, this software is distributed without any warranty that it is 131da177e4SLinus Torvalds * free of the rightful claim of any third person regarding infringement 141da177e4SLinus Torvalds * or the like. Any license provided herein, whether implied or 151da177e4SLinus Torvalds * otherwise, applies only to this software file. Patent licenses, if 161da177e4SLinus Torvalds * any, provided herein do not apply to combinations of this program with 171da177e4SLinus Torvalds * other software, or any other product whatsoever. 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License along 201da177e4SLinus Torvalds * with this program; if not, write the Free Software Foundation, Inc., 59 211da177e4SLinus Torvalds * Temple Place - Suite 330, Boston MA 02111-1307, USA. 221da177e4SLinus Torvalds * 231da177e4SLinus Torvalds * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 241da177e4SLinus Torvalds * Mountain View, CA 94043, or: 251da177e4SLinus Torvalds * 261da177e4SLinus Torvalds * http://www.sgi.com 271da177e4SLinus Torvalds * 281da177e4SLinus Torvalds * For further information regarding this notice, see: 291da177e4SLinus Torvalds * 301da177e4SLinus Torvalds * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ 311da177e4SLinus Torvalds */ 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds #include "xfs.h" 341da177e4SLinus Torvalds #include "xfs_macros.h" 351da177e4SLinus Torvalds #include "xfs_types.h" 361da177e4SLinus Torvalds #include "xfs_inum.h" 371da177e4SLinus Torvalds #include "xfs_log.h" 381da177e4SLinus Torvalds #include "xfs_trans.h" 391da177e4SLinus Torvalds #include "xfs_trans_priv.h" 401da177e4SLinus Torvalds #include "xfs_sb.h" 411da177e4SLinus Torvalds #include "xfs_ag.h" 421da177e4SLinus Torvalds #include "xfs_dir.h" 431da177e4SLinus Torvalds #include "xfs_dir2.h" 441da177e4SLinus Torvalds #include "xfs_dmapi.h" 451da177e4SLinus Torvalds #include "xfs_mount.h" 461da177e4SLinus Torvalds #include "xfs_alloc_btree.h" 471da177e4SLinus Torvalds #include "xfs_bmap_btree.h" 481da177e4SLinus Torvalds #include "xfs_ialloc_btree.h" 491da177e4SLinus Torvalds #include "xfs_btree.h" 501da177e4SLinus Torvalds #include "xfs_imap.h" 511da177e4SLinus Torvalds #include "xfs_alloc.h" 521da177e4SLinus Torvalds #include "xfs_ialloc.h" 531da177e4SLinus Torvalds #include "xfs_attr_sf.h" 541da177e4SLinus Torvalds #include "xfs_dir_sf.h" 551da177e4SLinus Torvalds #include "xfs_dir2_sf.h" 561da177e4SLinus Torvalds #include "xfs_dinode.h" 571da177e4SLinus Torvalds #include "xfs_inode_item.h" 581da177e4SLinus Torvalds #include "xfs_inode.h" 591da177e4SLinus Torvalds #include "xfs_bmap.h" 601da177e4SLinus Torvalds #include "xfs_buf_item.h" 611da177e4SLinus Torvalds #include "xfs_rw.h" 621da177e4SLinus Torvalds #include "xfs_error.h" 631da177e4SLinus Torvalds #include "xfs_bit.h" 641da177e4SLinus Torvalds #include "xfs_utils.h" 651da177e4SLinus Torvalds #include "xfs_dir2_trace.h" 661da177e4SLinus Torvalds #include "xfs_quota.h" 671da177e4SLinus Torvalds #include "xfs_mac.h" 681da177e4SLinus Torvalds #include "xfs_acl.h" 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone; 721da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone; 731da177e4SLinus Torvalds kmem_zone_t *xfs_chashlist_zone; 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds /* 761da177e4SLinus Torvalds * Used in xfs_itruncate(). This is the maximum number of extents 771da177e4SLinus Torvalds * freed from a file in a single transaction. 781da177e4SLinus Torvalds */ 791da177e4SLinus Torvalds #define XFS_ITRUNC_MAX_EXTENTS 2 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 821da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); 831da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 841da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds 871da177e4SLinus Torvalds #ifdef DEBUG 881da177e4SLinus Torvalds /* 891da177e4SLinus Torvalds * Make sure that the extents in the given memory buffer 901da177e4SLinus Torvalds * are valid. 911da177e4SLinus Torvalds */ 921da177e4SLinus Torvalds STATIC void 931da177e4SLinus Torvalds xfs_validate_extents( 941da177e4SLinus Torvalds xfs_bmbt_rec_t *ep, 951da177e4SLinus Torvalds int nrecs, 961da177e4SLinus Torvalds int disk, 971da177e4SLinus Torvalds xfs_exntfmt_t fmt) 981da177e4SLinus Torvalds { 991da177e4SLinus Torvalds xfs_bmbt_irec_t irec; 1001da177e4SLinus Torvalds xfs_bmbt_rec_t rec; 1011da177e4SLinus Torvalds int i; 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 1041da177e4SLinus Torvalds rec.l0 = get_unaligned((__uint64_t*)&ep->l0); 1051da177e4SLinus Torvalds rec.l1 = get_unaligned((__uint64_t*)&ep->l1); 1061da177e4SLinus Torvalds if (disk) 1071da177e4SLinus Torvalds xfs_bmbt_disk_get_all(&rec, &irec); 1081da177e4SLinus Torvalds else 1091da177e4SLinus Torvalds xfs_bmbt_get_all(&rec, &irec); 1101da177e4SLinus Torvalds if (fmt == XFS_EXTFMT_NOSTATE) 1111da177e4SLinus Torvalds ASSERT(irec.br_state == XFS_EXT_NORM); 1121da177e4SLinus Torvalds ep++; 1131da177e4SLinus Torvalds } 1141da177e4SLinus Torvalds } 1151da177e4SLinus Torvalds #else /* DEBUG */ 1161da177e4SLinus Torvalds #define xfs_validate_extents(ep, nrecs, disk, fmt) 1171da177e4SLinus Torvalds #endif /* DEBUG */ 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds /* 1201da177e4SLinus Torvalds * Check that none of the inode's in the buffer have a next 1211da177e4SLinus Torvalds * unlinked field of 0. 1221da177e4SLinus Torvalds */ 1231da177e4SLinus Torvalds #if defined(DEBUG) 1241da177e4SLinus Torvalds void 1251da177e4SLinus Torvalds xfs_inobp_check( 1261da177e4SLinus Torvalds xfs_mount_t *mp, 1271da177e4SLinus Torvalds xfs_buf_t *bp) 1281da177e4SLinus Torvalds { 1291da177e4SLinus Torvalds int i; 1301da177e4SLinus Torvalds int j; 1311da177e4SLinus Torvalds xfs_dinode_t *dip; 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 1341da177e4SLinus Torvalds 1351da177e4SLinus Torvalds for (i = 0; i < j; i++) { 1361da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1371da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 1381da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 1391da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, 1401da177e4SLinus Torvalds "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 1411da177e4SLinus Torvalds bp); 1421da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 1431da177e4SLinus Torvalds } 1441da177e4SLinus Torvalds } 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds #endif 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds /* 1491da177e4SLinus Torvalds * called from bwrite on xfs inode buffers 1501da177e4SLinus Torvalds */ 1511da177e4SLinus Torvalds void 1521da177e4SLinus Torvalds xfs_inobp_bwcheck(xfs_buf_t *bp) 1531da177e4SLinus Torvalds { 1541da177e4SLinus Torvalds xfs_mount_t *mp; 1551da177e4SLinus Torvalds int i; 1561da177e4SLinus Torvalds int j; 1571da177e4SLinus Torvalds xfs_dinode_t *dip; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds ASSERT(XFS_BUF_FSPRIVATE3(bp, void *) != NULL); 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds for (i = 0; i < j; i++) { 1671da177e4SLinus Torvalds dip = (xfs_dinode_t *) xfs_buf_offset(bp, 1681da177e4SLinus Torvalds i * mp->m_sb.sb_inodesize); 1691da177e4SLinus Torvalds if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { 1701da177e4SLinus Torvalds cmn_err(CE_WARN, 1711da177e4SLinus Torvalds "Bad magic # 0x%x in XFS inode buffer 0x%Lx, starting blockno %Ld, offset 0x%x", 1721da177e4SLinus Torvalds INT_GET(dip->di_core.di_magic, ARCH_CONVERT), 1731da177e4SLinus Torvalds (__uint64_t)(__psunsigned_t) bp, 1741da177e4SLinus Torvalds (__int64_t) XFS_BUF_ADDR(bp), 1751da177e4SLinus Torvalds xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); 1761da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, mp, 1771da177e4SLinus Torvalds "corrupt, unmount and run xfs_repair"); 1781da177e4SLinus Torvalds } 1791da177e4SLinus Torvalds if (!dip->di_next_unlinked) { 1801da177e4SLinus Torvalds cmn_err(CE_WARN, 1811da177e4SLinus Torvalds "Bad next_unlinked field (0) in XFS inode buffer 0x%p, starting blockno %Ld, offset 0x%x", 1821da177e4SLinus Torvalds (__uint64_t)(__psunsigned_t) bp, 1831da177e4SLinus Torvalds (__int64_t) XFS_BUF_ADDR(bp), 1841da177e4SLinus Torvalds xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); 1851da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, mp, 1861da177e4SLinus Torvalds "corrupt, unmount and run xfs_repair"); 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds return; 1911da177e4SLinus Torvalds } 1921da177e4SLinus Torvalds 1931da177e4SLinus Torvalds /* 1941da177e4SLinus Torvalds * This routine is called to map an inode number within a file 1951da177e4SLinus Torvalds * system to the buffer containing the on-disk version of the 1961da177e4SLinus Torvalds * inode. It returns a pointer to the buffer containing the 1971da177e4SLinus Torvalds * on-disk inode in the bpp parameter, and in the dip parameter 1981da177e4SLinus Torvalds * it returns a pointer to the on-disk inode within that buffer. 1991da177e4SLinus Torvalds * 2001da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2011da177e4SLinus Torvalds * dipp are undefined. 2021da177e4SLinus Torvalds * 2031da177e4SLinus Torvalds * Use xfs_imap() to determine the size and location of the 2041da177e4SLinus Torvalds * buffer to read from disk. 2051da177e4SLinus Torvalds */ 2061da177e4SLinus Torvalds int 2071da177e4SLinus Torvalds xfs_inotobp( 2081da177e4SLinus Torvalds xfs_mount_t *mp, 2091da177e4SLinus Torvalds xfs_trans_t *tp, 2101da177e4SLinus Torvalds xfs_ino_t ino, 2111da177e4SLinus Torvalds xfs_dinode_t **dipp, 2121da177e4SLinus Torvalds xfs_buf_t **bpp, 2131da177e4SLinus Torvalds int *offset) 2141da177e4SLinus Torvalds { 2151da177e4SLinus Torvalds int di_ok; 2161da177e4SLinus Torvalds xfs_imap_t imap; 2171da177e4SLinus Torvalds xfs_buf_t *bp; 2181da177e4SLinus Torvalds int error; 2191da177e4SLinus Torvalds xfs_dinode_t *dip; 2201da177e4SLinus Torvalds 2211da177e4SLinus Torvalds /* 2221da177e4SLinus Torvalds * Call the space managment code to find the location of the 2231da177e4SLinus Torvalds * inode on disk. 2241da177e4SLinus Torvalds */ 2251da177e4SLinus Torvalds imap.im_blkno = 0; 2261da177e4SLinus Torvalds error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 2271da177e4SLinus Torvalds if (error != 0) { 2281da177e4SLinus Torvalds cmn_err(CE_WARN, 2291da177e4SLinus Torvalds "xfs_inotobp: xfs_imap() returned an " 2301da177e4SLinus Torvalds "error %d on %s. Returning error.", error, mp->m_fsname); 2311da177e4SLinus Torvalds return error; 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds /* 2351da177e4SLinus Torvalds * If the inode number maps to a block outside the bounds of the 2361da177e4SLinus Torvalds * file system then return NULL rather than calling read_buf 2371da177e4SLinus Torvalds * and panicing when we get an error from the driver. 2381da177e4SLinus Torvalds */ 2391da177e4SLinus Torvalds if ((imap.im_blkno + imap.im_len) > 2401da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 2411da177e4SLinus Torvalds cmn_err(CE_WARN, 2421da177e4SLinus Torvalds "xfs_inotobp: inode number (%d + %d) maps to a block outside the bounds " 2431da177e4SLinus Torvalds "of the file system %s. Returning EINVAL.", 2441da177e4SLinus Torvalds imap.im_blkno, imap.im_len,mp->m_fsname); 2451da177e4SLinus Torvalds return XFS_ERROR(EINVAL); 2461da177e4SLinus Torvalds } 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds /* 2491da177e4SLinus Torvalds * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 2501da177e4SLinus Torvalds * default to just a read_buf() call. 2511da177e4SLinus Torvalds */ 2521da177e4SLinus Torvalds error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 2531da177e4SLinus Torvalds (int)imap.im_len, XFS_BUF_LOCK, &bp); 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds if (error) { 2561da177e4SLinus Torvalds cmn_err(CE_WARN, 2571da177e4SLinus Torvalds "xfs_inotobp: xfs_trans_read_buf() returned an " 2581da177e4SLinus Torvalds "error %d on %s. Returning error.", error, mp->m_fsname); 2591da177e4SLinus Torvalds return error; 2601da177e4SLinus Torvalds } 2611da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); 2621da177e4SLinus Torvalds di_ok = 2631da177e4SLinus Torvalds INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 2641da177e4SLinus Torvalds XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 2651da177e4SLinus Torvalds if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 2661da177e4SLinus Torvalds XFS_RANDOM_ITOBP_INOTOBP))) { 2671da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); 2681da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 2691da177e4SLinus Torvalds cmn_err(CE_WARN, 2701da177e4SLinus Torvalds "xfs_inotobp: XFS_TEST_ERROR() returned an " 2711da177e4SLinus Torvalds "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); 2721da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 2731da177e4SLinus Torvalds } 2741da177e4SLinus Torvalds 2751da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* 2781da177e4SLinus Torvalds * Set *dipp to point to the on-disk inode in the buffer. 2791da177e4SLinus Torvalds */ 2801da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2811da177e4SLinus Torvalds *bpp = bp; 2821da177e4SLinus Torvalds *offset = imap.im_boffset; 2831da177e4SLinus Torvalds return 0; 2841da177e4SLinus Torvalds } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds /* 2881da177e4SLinus Torvalds * This routine is called to map an inode to the buffer containing 2891da177e4SLinus Torvalds * the on-disk version of the inode. It returns a pointer to the 2901da177e4SLinus Torvalds * buffer containing the on-disk inode in the bpp parameter, and in 2911da177e4SLinus Torvalds * the dip parameter it returns a pointer to the on-disk inode within 2921da177e4SLinus Torvalds * that buffer. 2931da177e4SLinus Torvalds * 2941da177e4SLinus Torvalds * If a non-zero error is returned, then the contents of bpp and 2951da177e4SLinus Torvalds * dipp are undefined. 2961da177e4SLinus Torvalds * 2971da177e4SLinus Torvalds * If the inode is new and has not yet been initialized, use xfs_imap() 2981da177e4SLinus Torvalds * to determine the size and location of the buffer to read from disk. 2991da177e4SLinus Torvalds * If the inode has already been mapped to its buffer and read in once, 3001da177e4SLinus Torvalds * then use the mapping information stored in the inode rather than 3011da177e4SLinus Torvalds * calling xfs_imap(). This allows us to avoid the overhead of looking 3021da177e4SLinus Torvalds * at the inode btree for small block file systems (see xfs_dilocate()). 3031da177e4SLinus Torvalds * We can tell whether the inode has been mapped in before by comparing 3041da177e4SLinus Torvalds * its disk block address to 0. Only uninitialized inodes will have 3051da177e4SLinus Torvalds * 0 for the disk block address. 3061da177e4SLinus Torvalds */ 3071da177e4SLinus Torvalds int 3081da177e4SLinus Torvalds xfs_itobp( 3091da177e4SLinus Torvalds xfs_mount_t *mp, 3101da177e4SLinus Torvalds xfs_trans_t *tp, 3111da177e4SLinus Torvalds xfs_inode_t *ip, 3121da177e4SLinus Torvalds xfs_dinode_t **dipp, 3131da177e4SLinus Torvalds xfs_buf_t **bpp, 3141da177e4SLinus Torvalds xfs_daddr_t bno) 3151da177e4SLinus Torvalds { 3161da177e4SLinus Torvalds xfs_buf_t *bp; 3171da177e4SLinus Torvalds int error; 3181da177e4SLinus Torvalds xfs_imap_t imap; 3191da177e4SLinus Torvalds #ifdef __KERNEL__ 3201da177e4SLinus Torvalds int i; 3211da177e4SLinus Torvalds int ni; 3221da177e4SLinus Torvalds #endif 3231da177e4SLinus Torvalds 3241da177e4SLinus Torvalds if (ip->i_blkno == (xfs_daddr_t)0) { 3251da177e4SLinus Torvalds /* 3261da177e4SLinus Torvalds * Call the space management code to find the location of the 3271da177e4SLinus Torvalds * inode on disk. 3281da177e4SLinus Torvalds */ 3291da177e4SLinus Torvalds imap.im_blkno = bno; 3301da177e4SLinus Torvalds error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP); 3311da177e4SLinus Torvalds if (error != 0) { 3321da177e4SLinus Torvalds return error; 3331da177e4SLinus Torvalds } 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds /* 3361da177e4SLinus Torvalds * If the inode number maps to a block outside the bounds 3371da177e4SLinus Torvalds * of the file system then return NULL rather than calling 3381da177e4SLinus Torvalds * read_buf and panicing when we get an error from the 3391da177e4SLinus Torvalds * driver. 3401da177e4SLinus Torvalds */ 3411da177e4SLinus Torvalds if ((imap.im_blkno + imap.im_len) > 3421da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 3431da177e4SLinus Torvalds #ifdef DEBUG 3441da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 3451da177e4SLinus Torvalds "(imap.im_blkno (0x%llx) " 3461da177e4SLinus Torvalds "+ imap.im_len (0x%llx)) > " 3471da177e4SLinus Torvalds " XFS_FSB_TO_BB(mp, " 3481da177e4SLinus Torvalds "mp->m_sb.sb_dblocks) (0x%llx)", 3491da177e4SLinus Torvalds (unsigned long long) imap.im_blkno, 3501da177e4SLinus Torvalds (unsigned long long) imap.im_len, 3511da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 3521da177e4SLinus Torvalds #endif /* DEBUG */ 3531da177e4SLinus Torvalds return XFS_ERROR(EINVAL); 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds /* 3571da177e4SLinus Torvalds * Fill in the fields in the inode that will be used to 3581da177e4SLinus Torvalds * map the inode to its buffer from now on. 3591da177e4SLinus Torvalds */ 3601da177e4SLinus Torvalds ip->i_blkno = imap.im_blkno; 3611da177e4SLinus Torvalds ip->i_len = imap.im_len; 3621da177e4SLinus Torvalds ip->i_boffset = imap.im_boffset; 3631da177e4SLinus Torvalds } else { 3641da177e4SLinus Torvalds /* 3651da177e4SLinus Torvalds * We've already mapped the inode once, so just use the 3661da177e4SLinus Torvalds * mapping that we saved the first time. 3671da177e4SLinus Torvalds */ 3681da177e4SLinus Torvalds imap.im_blkno = ip->i_blkno; 3691da177e4SLinus Torvalds imap.im_len = ip->i_len; 3701da177e4SLinus Torvalds imap.im_boffset = ip->i_boffset; 3711da177e4SLinus Torvalds } 3721da177e4SLinus Torvalds ASSERT(bno == 0 || bno == imap.im_blkno); 3731da177e4SLinus Torvalds 3741da177e4SLinus Torvalds /* 3751da177e4SLinus Torvalds * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will 3761da177e4SLinus Torvalds * default to just a read_buf() call. 3771da177e4SLinus Torvalds */ 3781da177e4SLinus Torvalds error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, 3791da177e4SLinus Torvalds (int)imap.im_len, XFS_BUF_LOCK, &bp); 3801da177e4SLinus Torvalds 3811da177e4SLinus Torvalds if (error) { 3821da177e4SLinus Torvalds #ifdef DEBUG 3831da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " 3841da177e4SLinus Torvalds "xfs_trans_read_buf() returned error %d, " 3851da177e4SLinus Torvalds "imap.im_blkno 0x%llx, imap.im_len 0x%llx", 3861da177e4SLinus Torvalds error, (unsigned long long) imap.im_blkno, 3871da177e4SLinus Torvalds (unsigned long long) imap.im_len); 3881da177e4SLinus Torvalds #endif /* DEBUG */ 3891da177e4SLinus Torvalds return error; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds #ifdef __KERNEL__ 3921da177e4SLinus Torvalds /* 3931da177e4SLinus Torvalds * Validate the magic number and version of every inode in the buffer 3941da177e4SLinus Torvalds * (if DEBUG kernel) or the first inode in the buffer, otherwise. 3951da177e4SLinus Torvalds */ 3961da177e4SLinus Torvalds #ifdef DEBUG 3971da177e4SLinus Torvalds ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; 3981da177e4SLinus Torvalds #else 3991da177e4SLinus Torvalds ni = 1; 4001da177e4SLinus Torvalds #endif 4011da177e4SLinus Torvalds for (i = 0; i < ni; i++) { 4021da177e4SLinus Torvalds int di_ok; 4031da177e4SLinus Torvalds xfs_dinode_t *dip; 4041da177e4SLinus Torvalds 4051da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, 4061da177e4SLinus Torvalds (i << mp->m_sb.sb_inodelog)); 4071da177e4SLinus Torvalds di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 4081da177e4SLinus Torvalds XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 4091da177e4SLinus Torvalds if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 4101da177e4SLinus Torvalds XFS_RANDOM_ITOBP_INOTOBP))) { 4111da177e4SLinus Torvalds #ifdef DEBUG 4121da177e4SLinus Torvalds prdev("bad inode magic/vsn daddr %lld #%d (magic=%x)", 4131da177e4SLinus Torvalds mp->m_ddev_targp, 4141da177e4SLinus Torvalds (unsigned long long)imap.im_blkno, i, 4151da177e4SLinus Torvalds INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); 4161da177e4SLinus Torvalds #endif 4171da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, 4181da177e4SLinus Torvalds mp, dip); 4191da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 4201da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds } 4231da177e4SLinus Torvalds #endif /* __KERNEL__ */ 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 4261da177e4SLinus Torvalds 4271da177e4SLinus Torvalds /* 4281da177e4SLinus Torvalds * Mark the buffer as an inode buffer now that it looks good 4291da177e4SLinus Torvalds */ 4301da177e4SLinus Torvalds XFS_BUF_SET_VTYPE(bp, B_FS_INO); 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds /* 4331da177e4SLinus Torvalds * Set *dipp to point to the on-disk inode in the buffer. 4341da177e4SLinus Torvalds */ 4351da177e4SLinus Torvalds *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 4361da177e4SLinus Torvalds *bpp = bp; 4371da177e4SLinus Torvalds return 0; 4381da177e4SLinus Torvalds } 4391da177e4SLinus Torvalds 4401da177e4SLinus Torvalds /* 4411da177e4SLinus Torvalds * Move inode type and inode format specific information from the 4421da177e4SLinus Torvalds * on-disk inode to the in-core inode. For fifos, devs, and sockets 4431da177e4SLinus Torvalds * this means set if_rdev to the proper value. For files, directories, 4441da177e4SLinus Torvalds * and symlinks this means to bring in the in-line data or extent 4451da177e4SLinus Torvalds * pointers. For a file in B-tree format, only the root is immediately 4461da177e4SLinus Torvalds * brought in-core. The rest will be in-lined in if_extents when it 4471da177e4SLinus Torvalds * is first referenced (see xfs_iread_extents()). 4481da177e4SLinus Torvalds */ 4491da177e4SLinus Torvalds STATIC int 4501da177e4SLinus Torvalds xfs_iformat( 4511da177e4SLinus Torvalds xfs_inode_t *ip, 4521da177e4SLinus Torvalds xfs_dinode_t *dip) 4531da177e4SLinus Torvalds { 4541da177e4SLinus Torvalds xfs_attr_shortform_t *atp; 4551da177e4SLinus Torvalds int size; 4561da177e4SLinus Torvalds int error; 4571da177e4SLinus Torvalds xfs_fsize_t di_size; 4581da177e4SLinus Torvalds ip->i_df.if_ext_max = 4591da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 4601da177e4SLinus Torvalds error = 0; 4611da177e4SLinus Torvalds 4621da177e4SLinus Torvalds if (unlikely( 4631da177e4SLinus Torvalds INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + 4641da177e4SLinus Torvalds INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > 4651da177e4SLinus Torvalds INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) { 4661da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 4671da177e4SLinus Torvalds "corrupt dinode %Lu, extent total = %d, nblocks = %Lu." 4681da177e4SLinus Torvalds " Unmount and run xfs_repair.", 4691da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 4701da177e4SLinus Torvalds (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) 4711da177e4SLinus Torvalds + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), 4721da177e4SLinus Torvalds (unsigned long long) 4731da177e4SLinus Torvalds INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)); 4741da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 4751da177e4SLinus Torvalds ip->i_mount, dip); 4761da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4771da177e4SLinus Torvalds } 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) { 4801da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 4811da177e4SLinus Torvalds "corrupt dinode %Lu, forkoff = 0x%x." 4821da177e4SLinus Torvalds " Unmount and run xfs_repair.", 4831da177e4SLinus Torvalds (unsigned long long)ip->i_ino, 4841da177e4SLinus Torvalds (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); 4851da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 4861da177e4SLinus Torvalds ip->i_mount, dip); 4871da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4881da177e4SLinus Torvalds } 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 4911da177e4SLinus Torvalds case S_IFIFO: 4921da177e4SLinus Torvalds case S_IFCHR: 4931da177e4SLinus Torvalds case S_IFBLK: 4941da177e4SLinus Torvalds case S_IFSOCK: 4951da177e4SLinus Torvalds if (unlikely(INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV)) { 4961da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 4971da177e4SLinus Torvalds ip->i_mount, dip); 4981da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 4991da177e4SLinus Torvalds } 5001da177e4SLinus Torvalds ip->i_d.di_size = 0; 5011da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 5021da177e4SLinus Torvalds break; 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds case S_IFREG: 5051da177e4SLinus Torvalds case S_IFLNK: 5061da177e4SLinus Torvalds case S_IFDIR: 5071da177e4SLinus Torvalds switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { 5081da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 5091da177e4SLinus Torvalds /* 5101da177e4SLinus Torvalds * no local regular files yet 5111da177e4SLinus Torvalds */ 5121da177e4SLinus Torvalds if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) { 5131da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 5141da177e4SLinus Torvalds "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", 5151da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 5161da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(4)", 5171da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5181da177e4SLinus Torvalds ip->i_mount, dip); 5191da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds 5221da177e4SLinus Torvalds di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); 5231da177e4SLinus Torvalds if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 5241da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 5251da177e4SLinus Torvalds "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", 5261da177e4SLinus Torvalds (unsigned long long) ip->i_ino, 5271da177e4SLinus Torvalds (long long) di_size); 5281da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat(5)", 5291da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 5301da177e4SLinus Torvalds ip->i_mount, dip); 5311da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5321da177e4SLinus Torvalds } 5331da177e4SLinus Torvalds 5341da177e4SLinus Torvalds size = (int)di_size; 5351da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); 5361da177e4SLinus Torvalds break; 5371da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 5381da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); 5391da177e4SLinus Torvalds break; 5401da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 5411da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); 5421da177e4SLinus Torvalds break; 5431da177e4SLinus Torvalds default: 5441da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 5451da177e4SLinus Torvalds ip->i_mount); 5461da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5471da177e4SLinus Torvalds } 5481da177e4SLinus Torvalds break; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds default: 5511da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 5521da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds if (error) { 5551da177e4SLinus Torvalds return error; 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds if (!XFS_DFORK_Q(dip)) 5581da177e4SLinus Torvalds return 0; 5591da177e4SLinus Torvalds ASSERT(ip->i_afp == NULL); 5601da177e4SLinus Torvalds ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 5611da177e4SLinus Torvalds ip->i_afp->if_ext_max = 5621da177e4SLinus Torvalds XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 5631da177e4SLinus Torvalds switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { 5641da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 5651da177e4SLinus Torvalds atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 5661da177e4SLinus Torvalds size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT); 5671da177e4SLinus Torvalds error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 5681da177e4SLinus Torvalds break; 5691da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 5701da177e4SLinus Torvalds error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); 5711da177e4SLinus Torvalds break; 5721da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 5731da177e4SLinus Torvalds error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 5741da177e4SLinus Torvalds break; 5751da177e4SLinus Torvalds default: 5761da177e4SLinus Torvalds error = XFS_ERROR(EFSCORRUPTED); 5771da177e4SLinus Torvalds break; 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds if (error) { 5801da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 5811da177e4SLinus Torvalds ip->i_afp = NULL; 5821da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds return error; 5851da177e4SLinus Torvalds } 5861da177e4SLinus Torvalds 5871da177e4SLinus Torvalds /* 5881da177e4SLinus Torvalds * The file is in-lined in the on-disk inode. 5891da177e4SLinus Torvalds * If it fits into if_inline_data, then copy 5901da177e4SLinus Torvalds * it there, otherwise allocate a buffer for it 5911da177e4SLinus Torvalds * and copy the data there. Either way, set 5921da177e4SLinus Torvalds * if_data to point at the data. 5931da177e4SLinus Torvalds * If we allocate a buffer for the data, make 5941da177e4SLinus Torvalds * sure that its size is a multiple of 4 and 5951da177e4SLinus Torvalds * record the real size in i_real_bytes. 5961da177e4SLinus Torvalds */ 5971da177e4SLinus Torvalds STATIC int 5981da177e4SLinus Torvalds xfs_iformat_local( 5991da177e4SLinus Torvalds xfs_inode_t *ip, 6001da177e4SLinus Torvalds xfs_dinode_t *dip, 6011da177e4SLinus Torvalds int whichfork, 6021da177e4SLinus Torvalds int size) 6031da177e4SLinus Torvalds { 6041da177e4SLinus Torvalds xfs_ifork_t *ifp; 6051da177e4SLinus Torvalds int real_size; 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds /* 6081da177e4SLinus Torvalds * If the size is unreasonable, then something 6091da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 6101da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 6111da177e4SLinus Torvalds */ 6121da177e4SLinus Torvalds if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 6131da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 6141da177e4SLinus Torvalds "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", 6151da177e4SLinus Torvalds (unsigned long long) ip->i_ino, size, 6161da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 6171da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 6181da177e4SLinus Torvalds ip->i_mount, dip); 6191da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6221da177e4SLinus Torvalds real_size = 0; 6231da177e4SLinus Torvalds if (size == 0) 6241da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 6251da177e4SLinus Torvalds else if (size <= sizeof(ifp->if_u2.if_inline_data)) 6261da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 6271da177e4SLinus Torvalds else { 6281da177e4SLinus Torvalds real_size = roundup(size, 4); 6291da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 6301da177e4SLinus Torvalds } 6311da177e4SLinus Torvalds ifp->if_bytes = size; 6321da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 6331da177e4SLinus Torvalds if (size) 6341da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); 6351da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 6361da177e4SLinus Torvalds ifp->if_flags |= XFS_IFINLINE; 6371da177e4SLinus Torvalds return 0; 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds 6401da177e4SLinus Torvalds /* 6411da177e4SLinus Torvalds * The file consists of a set of extents all 6421da177e4SLinus Torvalds * of which fit into the on-disk inode. 6431da177e4SLinus Torvalds * If there are few enough extents to fit into 6441da177e4SLinus Torvalds * the if_inline_ext, then copy them there. 6451da177e4SLinus Torvalds * Otherwise allocate a buffer for them and copy 6461da177e4SLinus Torvalds * them into it. Either way, set if_extents 6471da177e4SLinus Torvalds * to point at the extents. 6481da177e4SLinus Torvalds */ 6491da177e4SLinus Torvalds STATIC int 6501da177e4SLinus Torvalds xfs_iformat_extents( 6511da177e4SLinus Torvalds xfs_inode_t *ip, 6521da177e4SLinus Torvalds xfs_dinode_t *dip, 6531da177e4SLinus Torvalds int whichfork) 6541da177e4SLinus Torvalds { 6551da177e4SLinus Torvalds xfs_bmbt_rec_t *ep, *dp; 6561da177e4SLinus Torvalds xfs_ifork_t *ifp; 6571da177e4SLinus Torvalds int nex; 6581da177e4SLinus Torvalds int real_size; 6591da177e4SLinus Torvalds int size; 6601da177e4SLinus Torvalds int i; 6611da177e4SLinus Torvalds 6621da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 6631da177e4SLinus Torvalds nex = XFS_DFORK_NEXTENTS(dip, whichfork); 6641da177e4SLinus Torvalds size = nex * (uint)sizeof(xfs_bmbt_rec_t); 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds /* 6671da177e4SLinus Torvalds * If the number of extents is unreasonable, then something 6681da177e4SLinus Torvalds * is wrong and we just bail out rather than crash in 6691da177e4SLinus Torvalds * kmem_alloc() or memcpy() below. 6701da177e4SLinus Torvalds */ 6711da177e4SLinus Torvalds if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 6721da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 6731da177e4SLinus Torvalds "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", 6741da177e4SLinus Torvalds (unsigned long long) ip->i_ino, nex); 6751da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 6761da177e4SLinus Torvalds ip->i_mount, dip); 6771da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 6781da177e4SLinus Torvalds } 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds real_size = 0; 6811da177e4SLinus Torvalds if (nex == 0) 6821da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 6831da177e4SLinus Torvalds else if (nex <= XFS_INLINE_EXTS) 6841da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 6851da177e4SLinus Torvalds else { 6861da177e4SLinus Torvalds ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); 6871da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents != NULL); 6881da177e4SLinus Torvalds real_size = size; 6891da177e4SLinus Torvalds } 6901da177e4SLinus Torvalds ifp->if_bytes = size; 6911da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 6921da177e4SLinus Torvalds if (size) { 6931da177e4SLinus Torvalds dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 6941da177e4SLinus Torvalds xfs_validate_extents(dp, nex, 1, XFS_EXTFMT_INODE(ip)); 6951da177e4SLinus Torvalds ep = ifp->if_u1.if_extents; 6961da177e4SLinus Torvalds for (i = 0; i < nex; i++, ep++, dp++) { 6971da177e4SLinus Torvalds ep->l0 = INT_GET(get_unaligned((__uint64_t*)&dp->l0), 6981da177e4SLinus Torvalds ARCH_CONVERT); 6991da177e4SLinus Torvalds ep->l1 = INT_GET(get_unaligned((__uint64_t*)&dp->l1), 7001da177e4SLinus Torvalds ARCH_CONVERT); 7011da177e4SLinus Torvalds } 7021da177e4SLinus Torvalds xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, 7031da177e4SLinus Torvalds whichfork); 7041da177e4SLinus Torvalds if (whichfork != XFS_DATA_FORK || 7051da177e4SLinus Torvalds XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) 7061da177e4SLinus Torvalds if (unlikely(xfs_check_nostate_extents( 7071da177e4SLinus Torvalds ifp->if_u1.if_extents, nex))) { 7081da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_extents(2)", 7091da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, 7101da177e4SLinus Torvalds ip->i_mount); 7111da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7121da177e4SLinus Torvalds } 7131da177e4SLinus Torvalds } 7141da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 7151da177e4SLinus Torvalds return 0; 7161da177e4SLinus Torvalds } 7171da177e4SLinus Torvalds 7181da177e4SLinus Torvalds /* 7191da177e4SLinus Torvalds * The file has too many extents to fit into 7201da177e4SLinus Torvalds * the inode, so they are in B-tree format. 7211da177e4SLinus Torvalds * Allocate a buffer for the root of the B-tree 7221da177e4SLinus Torvalds * and copy the root into it. The i_extents 7231da177e4SLinus Torvalds * field will remain NULL until all of the 7241da177e4SLinus Torvalds * extents are read in (when they are needed). 7251da177e4SLinus Torvalds */ 7261da177e4SLinus Torvalds STATIC int 7271da177e4SLinus Torvalds xfs_iformat_btree( 7281da177e4SLinus Torvalds xfs_inode_t *ip, 7291da177e4SLinus Torvalds xfs_dinode_t *dip, 7301da177e4SLinus Torvalds int whichfork) 7311da177e4SLinus Torvalds { 7321da177e4SLinus Torvalds xfs_bmdr_block_t *dfp; 7331da177e4SLinus Torvalds xfs_ifork_t *ifp; 7341da177e4SLinus Torvalds /* REFERENCED */ 7351da177e4SLinus Torvalds int nrecs; 7361da177e4SLinus Torvalds int size; 7371da177e4SLinus Torvalds 7381da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 7391da177e4SLinus Torvalds dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 7401da177e4SLinus Torvalds size = XFS_BMAP_BROOT_SPACE(dfp); 7411da177e4SLinus Torvalds nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds /* 7441da177e4SLinus Torvalds * blow out if -- fork has less extents than can fit in 7451da177e4SLinus Torvalds * fork (fork shouldn't be a btree format), root btree 7461da177e4SLinus Torvalds * block has more records than can fit into the fork, 7471da177e4SLinus Torvalds * or the number of extents is greater than the number of 7481da177e4SLinus Torvalds * blocks. 7491da177e4SLinus Torvalds */ 7501da177e4SLinus Torvalds if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max 7511da177e4SLinus Torvalds || XFS_BMDR_SPACE_CALC(nrecs) > 7521da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 7531da177e4SLinus Torvalds || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 7541da177e4SLinus Torvalds xfs_fs_cmn_err(CE_WARN, ip->i_mount, 7551da177e4SLinus Torvalds "corrupt inode %Lu (btree). Unmount and run xfs_repair.", 7561da177e4SLinus Torvalds (unsigned long long) ip->i_ino); 7571da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 7581da177e4SLinus Torvalds ip->i_mount); 7591da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 7601da177e4SLinus Torvalds } 7611da177e4SLinus Torvalds 7621da177e4SLinus Torvalds ifp->if_broot_bytes = size; 7631da177e4SLinus Torvalds ifp->if_broot = kmem_alloc(size, KM_SLEEP); 7641da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 7651da177e4SLinus Torvalds /* 7661da177e4SLinus Torvalds * Copy and convert from the on-disk structure 7671da177e4SLinus Torvalds * to the in-memory structure. 7681da177e4SLinus Torvalds */ 7691da177e4SLinus Torvalds xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 7701da177e4SLinus Torvalds ifp->if_broot, size); 7711da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 7721da177e4SLinus Torvalds ifp->if_flags |= XFS_IFBROOT; 7731da177e4SLinus Torvalds 7741da177e4SLinus Torvalds return 0; 7751da177e4SLinus Torvalds } 7761da177e4SLinus Torvalds 7771da177e4SLinus Torvalds /* 7781da177e4SLinus Torvalds * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk 7791da177e4SLinus Torvalds * and native format 7801da177e4SLinus Torvalds * 7811da177e4SLinus Torvalds * buf = on-disk representation 7821da177e4SLinus Torvalds * dip = native representation 7831da177e4SLinus Torvalds * dir = direction - +ve -> disk to native 7841da177e4SLinus Torvalds * -ve -> native to disk 7851da177e4SLinus Torvalds */ 7861da177e4SLinus Torvalds void 7871da177e4SLinus Torvalds xfs_xlate_dinode_core( 7881da177e4SLinus Torvalds xfs_caddr_t buf, 7891da177e4SLinus Torvalds xfs_dinode_core_t *dip, 7901da177e4SLinus Torvalds int dir) 7911da177e4SLinus Torvalds { 7921da177e4SLinus Torvalds xfs_dinode_core_t *buf_core = (xfs_dinode_core_t *)buf; 7931da177e4SLinus Torvalds xfs_dinode_core_t *mem_core = (xfs_dinode_core_t *)dip; 7941da177e4SLinus Torvalds xfs_arch_t arch = ARCH_CONVERT; 7951da177e4SLinus Torvalds 7961da177e4SLinus Torvalds ASSERT(dir); 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); 7991da177e4SLinus Torvalds INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); 8001da177e4SLinus Torvalds INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); 8011da177e4SLinus Torvalds INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); 8021da177e4SLinus Torvalds INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); 8031da177e4SLinus Torvalds INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); 8041da177e4SLinus Torvalds INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); 8051da177e4SLinus Torvalds INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); 8061da177e4SLinus Torvalds INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds if (dir > 0) { 8091da177e4SLinus Torvalds memcpy(mem_core->di_pad, buf_core->di_pad, 8101da177e4SLinus Torvalds sizeof(buf_core->di_pad)); 8111da177e4SLinus Torvalds } else { 8121da177e4SLinus Torvalds memcpy(buf_core->di_pad, mem_core->di_pad, 8131da177e4SLinus Torvalds sizeof(buf_core->di_pad)); 8141da177e4SLinus Torvalds } 8151da177e4SLinus Torvalds 8161da177e4SLinus Torvalds INT_XLATE(buf_core->di_flushiter, mem_core->di_flushiter, dir, arch); 8171da177e4SLinus Torvalds 8181da177e4SLinus Torvalds INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec, 8191da177e4SLinus Torvalds dir, arch); 8201da177e4SLinus Torvalds INT_XLATE(buf_core->di_atime.t_nsec, mem_core->di_atime.t_nsec, 8211da177e4SLinus Torvalds dir, arch); 8221da177e4SLinus Torvalds INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec, 8231da177e4SLinus Torvalds dir, arch); 8241da177e4SLinus Torvalds INT_XLATE(buf_core->di_mtime.t_nsec, mem_core->di_mtime.t_nsec, 8251da177e4SLinus Torvalds dir, arch); 8261da177e4SLinus Torvalds INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec, 8271da177e4SLinus Torvalds dir, arch); 8281da177e4SLinus Torvalds INT_XLATE(buf_core->di_ctime.t_nsec, mem_core->di_ctime.t_nsec, 8291da177e4SLinus Torvalds dir, arch); 8301da177e4SLinus Torvalds INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch); 8311da177e4SLinus Torvalds INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch); 8321da177e4SLinus Torvalds INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch); 8331da177e4SLinus Torvalds INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch); 8341da177e4SLinus Torvalds INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch); 8351da177e4SLinus Torvalds INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch); 8361da177e4SLinus Torvalds INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch); 8371da177e4SLinus Torvalds INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch); 8381da177e4SLinus Torvalds INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch); 8391da177e4SLinus Torvalds INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch); 8401da177e4SLinus Torvalds INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch); 8411da177e4SLinus Torvalds } 8421da177e4SLinus Torvalds 8431da177e4SLinus Torvalds STATIC uint 8441da177e4SLinus Torvalds _xfs_dic2xflags( 8451da177e4SLinus Torvalds xfs_dinode_core_t *dic, 8461da177e4SLinus Torvalds __uint16_t di_flags) 8471da177e4SLinus Torvalds { 8481da177e4SLinus Torvalds uint flags = 0; 8491da177e4SLinus Torvalds 8501da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_ANY) { 8511da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_REALTIME) 8521da177e4SLinus Torvalds flags |= XFS_XFLAG_REALTIME; 8531da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PREALLOC) 8541da177e4SLinus Torvalds flags |= XFS_XFLAG_PREALLOC; 8551da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_IMMUTABLE) 8561da177e4SLinus Torvalds flags |= XFS_XFLAG_IMMUTABLE; 8571da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_APPEND) 8581da177e4SLinus Torvalds flags |= XFS_XFLAG_APPEND; 8591da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_SYNC) 8601da177e4SLinus Torvalds flags |= XFS_XFLAG_SYNC; 8611da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOATIME) 8621da177e4SLinus Torvalds flags |= XFS_XFLAG_NOATIME; 8631da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NODUMP) 8641da177e4SLinus Torvalds flags |= XFS_XFLAG_NODUMP; 8651da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_RTINHERIT) 8661da177e4SLinus Torvalds flags |= XFS_XFLAG_RTINHERIT; 8671da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_PROJINHERIT) 8681da177e4SLinus Torvalds flags |= XFS_XFLAG_PROJINHERIT; 8691da177e4SLinus Torvalds if (di_flags & XFS_DIFLAG_NOSYMLINKS) 8701da177e4SLinus Torvalds flags |= XFS_XFLAG_NOSYMLINKS; 8711da177e4SLinus Torvalds } 8721da177e4SLinus Torvalds 8731da177e4SLinus Torvalds return flags; 8741da177e4SLinus Torvalds } 8751da177e4SLinus Torvalds 8761da177e4SLinus Torvalds uint 8771da177e4SLinus Torvalds xfs_ip2xflags( 8781da177e4SLinus Torvalds xfs_inode_t *ip) 8791da177e4SLinus Torvalds { 8801da177e4SLinus Torvalds xfs_dinode_core_t *dic = &ip->i_d; 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds return _xfs_dic2xflags(dic, dic->di_flags) | 8831da177e4SLinus Torvalds (XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0); 8841da177e4SLinus Torvalds } 8851da177e4SLinus Torvalds 8861da177e4SLinus Torvalds uint 8871da177e4SLinus Torvalds xfs_dic2xflags( 8881da177e4SLinus Torvalds xfs_dinode_core_t *dic) 8891da177e4SLinus Torvalds { 8901da177e4SLinus Torvalds return _xfs_dic2xflags(dic, INT_GET(dic->di_flags, ARCH_CONVERT)) | 8911da177e4SLinus Torvalds (XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0); 8921da177e4SLinus Torvalds } 8931da177e4SLinus Torvalds 8941da177e4SLinus Torvalds /* 8951da177e4SLinus Torvalds * Given a mount structure and an inode number, return a pointer 8961da177e4SLinus Torvalds * to a newly allocated in-core inode coresponding to the given 8971da177e4SLinus Torvalds * inode number. 8981da177e4SLinus Torvalds * 8991da177e4SLinus Torvalds * Initialize the inode's attributes and extent pointers if it 9001da177e4SLinus Torvalds * already has them (it will not if the inode has no links). 9011da177e4SLinus Torvalds */ 9021da177e4SLinus Torvalds int 9031da177e4SLinus Torvalds xfs_iread( 9041da177e4SLinus Torvalds xfs_mount_t *mp, 9051da177e4SLinus Torvalds xfs_trans_t *tp, 9061da177e4SLinus Torvalds xfs_ino_t ino, 9071da177e4SLinus Torvalds xfs_inode_t **ipp, 9081da177e4SLinus Torvalds xfs_daddr_t bno) 9091da177e4SLinus Torvalds { 9101da177e4SLinus Torvalds xfs_buf_t *bp; 9111da177e4SLinus Torvalds xfs_dinode_t *dip; 9121da177e4SLinus Torvalds xfs_inode_t *ip; 9131da177e4SLinus Torvalds int error; 9141da177e4SLinus Torvalds 9151da177e4SLinus Torvalds ASSERT(xfs_inode_zone != NULL); 9161da177e4SLinus Torvalds 9171da177e4SLinus Torvalds ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); 9181da177e4SLinus Torvalds ip->i_ino = ino; 9191da177e4SLinus Torvalds ip->i_mount = mp; 9201da177e4SLinus Torvalds 9211da177e4SLinus Torvalds /* 9221da177e4SLinus Torvalds * Get pointer's to the on-disk inode and the buffer containing it. 9231da177e4SLinus Torvalds * If the inode number refers to a block outside the file system 9241da177e4SLinus Torvalds * then xfs_itobp() will return NULL. In this case we should 9251da177e4SLinus Torvalds * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will 9261da177e4SLinus Torvalds * know that this is a new incore inode. 9271da177e4SLinus Torvalds */ 9281da177e4SLinus Torvalds error = xfs_itobp(mp, tp, ip, &dip, &bp, bno); 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds if (error != 0) { 9311da177e4SLinus Torvalds kmem_zone_free(xfs_inode_zone, ip); 9321da177e4SLinus Torvalds return error; 9331da177e4SLinus Torvalds } 9341da177e4SLinus Torvalds 9351da177e4SLinus Torvalds /* 9361da177e4SLinus Torvalds * Initialize inode's trace buffers. 9371da177e4SLinus Torvalds * Do this before xfs_iformat in case it adds entries. 9381da177e4SLinus Torvalds */ 9391da177e4SLinus Torvalds #ifdef XFS_BMAP_TRACE 9401da177e4SLinus Torvalds ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); 9411da177e4SLinus Torvalds #endif 9421da177e4SLinus Torvalds #ifdef XFS_BMBT_TRACE 9431da177e4SLinus Torvalds ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); 9441da177e4SLinus Torvalds #endif 9451da177e4SLinus Torvalds #ifdef XFS_RW_TRACE 9461da177e4SLinus Torvalds ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); 9471da177e4SLinus Torvalds #endif 9481da177e4SLinus Torvalds #ifdef XFS_ILOCK_TRACE 9491da177e4SLinus Torvalds ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); 9501da177e4SLinus Torvalds #endif 9511da177e4SLinus Torvalds #ifdef XFS_DIR2_TRACE 9521da177e4SLinus Torvalds ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); 9531da177e4SLinus Torvalds #endif 9541da177e4SLinus Torvalds 9551da177e4SLinus Torvalds /* 9561da177e4SLinus Torvalds * If we got something that isn't an inode it means someone 9571da177e4SLinus Torvalds * (nfs or dmi) has a stale handle. 9581da177e4SLinus Torvalds */ 9591da177e4SLinus Torvalds if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { 9601da177e4SLinus Torvalds kmem_zone_free(xfs_inode_zone, ip); 9611da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 9621da177e4SLinus Torvalds #ifdef DEBUG 9631da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 9641da177e4SLinus Torvalds "dip->di_core.di_magic (0x%x) != " 9651da177e4SLinus Torvalds "XFS_DINODE_MAGIC (0x%x)", 9661da177e4SLinus Torvalds INT_GET(dip->di_core.di_magic, ARCH_CONVERT), 9671da177e4SLinus Torvalds XFS_DINODE_MAGIC); 9681da177e4SLinus Torvalds #endif /* DEBUG */ 9691da177e4SLinus Torvalds return XFS_ERROR(EINVAL); 9701da177e4SLinus Torvalds } 9711da177e4SLinus Torvalds 9721da177e4SLinus Torvalds /* 9731da177e4SLinus Torvalds * If the on-disk inode is already linked to a directory 9741da177e4SLinus Torvalds * entry, copy all of the inode into the in-core inode. 9751da177e4SLinus Torvalds * xfs_iformat() handles copying in the inode format 9761da177e4SLinus Torvalds * specific information. 9771da177e4SLinus Torvalds * Otherwise, just get the truly permanent information. 9781da177e4SLinus Torvalds */ 9791da177e4SLinus Torvalds if (dip->di_core.di_mode) { 9801da177e4SLinus Torvalds xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, 9811da177e4SLinus Torvalds &(ip->i_d), 1); 9821da177e4SLinus Torvalds error = xfs_iformat(ip, dip); 9831da177e4SLinus Torvalds if (error) { 9841da177e4SLinus Torvalds kmem_zone_free(xfs_inode_zone, ip); 9851da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 9861da177e4SLinus Torvalds #ifdef DEBUG 9871da177e4SLinus Torvalds xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 9881da177e4SLinus Torvalds "xfs_iformat() returned error %d", 9891da177e4SLinus Torvalds error); 9901da177e4SLinus Torvalds #endif /* DEBUG */ 9911da177e4SLinus Torvalds return error; 9921da177e4SLinus Torvalds } 9931da177e4SLinus Torvalds } else { 9941da177e4SLinus Torvalds ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT); 9951da177e4SLinus Torvalds ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT); 9961da177e4SLinus Torvalds ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT); 9971da177e4SLinus Torvalds ip->i_d.di_flushiter = INT_GET(dip->di_core.di_flushiter, ARCH_CONVERT); 9981da177e4SLinus Torvalds /* 9991da177e4SLinus Torvalds * Make sure to pull in the mode here as well in 10001da177e4SLinus Torvalds * case the inode is released without being used. 10011da177e4SLinus Torvalds * This ensures that xfs_inactive() will see that 10021da177e4SLinus Torvalds * the inode is already free and not try to mess 10031da177e4SLinus Torvalds * with the uninitialized part of it. 10041da177e4SLinus Torvalds */ 10051da177e4SLinus Torvalds ip->i_d.di_mode = 0; 10061da177e4SLinus Torvalds /* 10071da177e4SLinus Torvalds * Initialize the per-fork minima and maxima for a new 10081da177e4SLinus Torvalds * inode here. xfs_iformat will do it for old inodes. 10091da177e4SLinus Torvalds */ 10101da177e4SLinus Torvalds ip->i_df.if_ext_max = 10111da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 10121da177e4SLinus Torvalds } 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds INIT_LIST_HEAD(&ip->i_reclaim); 10151da177e4SLinus Torvalds 10161da177e4SLinus Torvalds /* 10171da177e4SLinus Torvalds * The inode format changed when we moved the link count and 10181da177e4SLinus Torvalds * made it 32 bits long. If this is an old format inode, 10191da177e4SLinus Torvalds * convert it in memory to look like a new one. If it gets 10201da177e4SLinus Torvalds * flushed to disk we will convert back before flushing or 10211da177e4SLinus Torvalds * logging it. We zero out the new projid field and the old link 10221da177e4SLinus Torvalds * count field. We'll handle clearing the pad field (the remains 10231da177e4SLinus Torvalds * of the old uuid field) when we actually convert the inode to 10241da177e4SLinus Torvalds * the new format. We don't change the version number so that we 10251da177e4SLinus Torvalds * can distinguish this from a real new format inode. 10261da177e4SLinus Torvalds */ 10271da177e4SLinus Torvalds if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 10281da177e4SLinus Torvalds ip->i_d.di_nlink = ip->i_d.di_onlink; 10291da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 10301da177e4SLinus Torvalds ip->i_d.di_projid = 0; 10311da177e4SLinus Torvalds } 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds ip->i_delayed_blks = 0; 10341da177e4SLinus Torvalds 10351da177e4SLinus Torvalds /* 10361da177e4SLinus Torvalds * Mark the buffer containing the inode as something to keep 10371da177e4SLinus Torvalds * around for a while. This helps to keep recently accessed 10381da177e4SLinus Torvalds * meta-data in-core longer. 10391da177e4SLinus Torvalds */ 10401da177e4SLinus Torvalds XFS_BUF_SET_REF(bp, XFS_INO_REF); 10411da177e4SLinus Torvalds 10421da177e4SLinus Torvalds /* 10431da177e4SLinus Torvalds * Use xfs_trans_brelse() to release the buffer containing the 10441da177e4SLinus Torvalds * on-disk inode, because it was acquired with xfs_trans_read_buf() 10451da177e4SLinus Torvalds * in xfs_itobp() above. If tp is NULL, this is just a normal 10461da177e4SLinus Torvalds * brelse(). If we're within a transaction, then xfs_trans_brelse() 10471da177e4SLinus Torvalds * will only release the buffer if it is not dirty within the 10481da177e4SLinus Torvalds * transaction. It will be OK to release the buffer in this case, 10491da177e4SLinus Torvalds * because inodes on disk are never destroyed and we will be 10501da177e4SLinus Torvalds * locking the new in-core inode before putting it in the hash 10511da177e4SLinus Torvalds * table where other processes can find it. Thus we don't have 10521da177e4SLinus Torvalds * to worry about the inode being changed just because we released 10531da177e4SLinus Torvalds * the buffer. 10541da177e4SLinus Torvalds */ 10551da177e4SLinus Torvalds xfs_trans_brelse(tp, bp); 10561da177e4SLinus Torvalds *ipp = ip; 10571da177e4SLinus Torvalds return 0; 10581da177e4SLinus Torvalds } 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds /* 10611da177e4SLinus Torvalds * Read in extents from a btree-format inode. 10621da177e4SLinus Torvalds * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. 10631da177e4SLinus Torvalds */ 10641da177e4SLinus Torvalds int 10651da177e4SLinus Torvalds xfs_iread_extents( 10661da177e4SLinus Torvalds xfs_trans_t *tp, 10671da177e4SLinus Torvalds xfs_inode_t *ip, 10681da177e4SLinus Torvalds int whichfork) 10691da177e4SLinus Torvalds { 10701da177e4SLinus Torvalds int error; 10711da177e4SLinus Torvalds xfs_ifork_t *ifp; 10721da177e4SLinus Torvalds size_t size; 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 10751da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 10761da177e4SLinus Torvalds ip->i_mount); 10771da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 10781da177e4SLinus Torvalds } 10791da177e4SLinus Torvalds size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t); 10801da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 10811da177e4SLinus Torvalds /* 10821da177e4SLinus Torvalds * We know that the size is valid (it's checked in iformat_btree) 10831da177e4SLinus Torvalds */ 10841da177e4SLinus Torvalds ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); 10851da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents != NULL); 10861da177e4SLinus Torvalds ifp->if_lastex = NULLEXTNUM; 10871da177e4SLinus Torvalds ifp->if_bytes = ifp->if_real_bytes = (int)size; 10881da177e4SLinus Torvalds ifp->if_flags |= XFS_IFEXTENTS; 10891da177e4SLinus Torvalds error = xfs_bmap_read_extents(tp, ip, whichfork); 10901da177e4SLinus Torvalds if (error) { 10911da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_extents, size); 10921da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 10931da177e4SLinus Torvalds ifp->if_bytes = ifp->if_real_bytes = 0; 10941da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFEXTENTS; 10951da177e4SLinus Torvalds return error; 10961da177e4SLinus Torvalds } 10971da177e4SLinus Torvalds xfs_validate_extents((xfs_bmbt_rec_t *)ifp->if_u1.if_extents, 10981da177e4SLinus Torvalds XFS_IFORK_NEXTENTS(ip, whichfork), 0, XFS_EXTFMT_INODE(ip)); 10991da177e4SLinus Torvalds return 0; 11001da177e4SLinus Torvalds } 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds /* 11031da177e4SLinus Torvalds * Allocate an inode on disk and return a copy of its in-core version. 11041da177e4SLinus Torvalds * The in-core inode is locked exclusively. Set mode, nlink, and rdev 11051da177e4SLinus Torvalds * appropriately within the inode. The uid and gid for the inode are 11061da177e4SLinus Torvalds * set according to the contents of the given cred structure. 11071da177e4SLinus Torvalds * 11081da177e4SLinus Torvalds * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() 11091da177e4SLinus Torvalds * has a free inode available, call xfs_iget() 11101da177e4SLinus Torvalds * to obtain the in-core version of the allocated inode. Finally, 11111da177e4SLinus Torvalds * fill in the inode and log its initial contents. In this case, 11121da177e4SLinus Torvalds * ialloc_context would be set to NULL and call_again set to false. 11131da177e4SLinus Torvalds * 11141da177e4SLinus Torvalds * If xfs_dialloc() does not have an available inode, 11151da177e4SLinus Torvalds * it will replenish its supply by doing an allocation. Since we can 11161da177e4SLinus Torvalds * only do one allocation within a transaction without deadlocks, we 11171da177e4SLinus Torvalds * must commit the current transaction before returning the inode itself. 11181da177e4SLinus Torvalds * In this case, therefore, we will set call_again to true and return. 11191da177e4SLinus Torvalds * The caller should then commit the current transaction, start a new 11201da177e4SLinus Torvalds * transaction, and call xfs_ialloc() again to actually get the inode. 11211da177e4SLinus Torvalds * 11221da177e4SLinus Torvalds * To ensure that some other process does not grab the inode that 11231da177e4SLinus Torvalds * was allocated during the first call to xfs_ialloc(), this routine 11241da177e4SLinus Torvalds * also returns the [locked] bp pointing to the head of the freelist 11251da177e4SLinus Torvalds * as ialloc_context. The caller should hold this buffer across 11261da177e4SLinus Torvalds * the commit and pass it back into this routine on the second call. 11271da177e4SLinus Torvalds */ 11281da177e4SLinus Torvalds int 11291da177e4SLinus Torvalds xfs_ialloc( 11301da177e4SLinus Torvalds xfs_trans_t *tp, 11311da177e4SLinus Torvalds xfs_inode_t *pip, 11321da177e4SLinus Torvalds mode_t mode, 1133*31b084aeSNathan Scott xfs_nlink_t nlink, 11341da177e4SLinus Torvalds xfs_dev_t rdev, 11351da177e4SLinus Torvalds cred_t *cr, 11361da177e4SLinus Torvalds xfs_prid_t prid, 11371da177e4SLinus Torvalds int okalloc, 11381da177e4SLinus Torvalds xfs_buf_t **ialloc_context, 11391da177e4SLinus Torvalds boolean_t *call_again, 11401da177e4SLinus Torvalds xfs_inode_t **ipp) 11411da177e4SLinus Torvalds { 11421da177e4SLinus Torvalds xfs_ino_t ino; 11431da177e4SLinus Torvalds xfs_inode_t *ip; 11441da177e4SLinus Torvalds vnode_t *vp; 11451da177e4SLinus Torvalds uint flags; 11461da177e4SLinus Torvalds int error; 11471da177e4SLinus Torvalds 11481da177e4SLinus Torvalds /* 11491da177e4SLinus Torvalds * Call the space management code to pick 11501da177e4SLinus Torvalds * the on-disk inode to be allocated. 11511da177e4SLinus Torvalds */ 11521da177e4SLinus Torvalds error = xfs_dialloc(tp, pip->i_ino, mode, okalloc, 11531da177e4SLinus Torvalds ialloc_context, call_again, &ino); 11541da177e4SLinus Torvalds if (error != 0) { 11551da177e4SLinus Torvalds return error; 11561da177e4SLinus Torvalds } 11571da177e4SLinus Torvalds if (*call_again || ino == NULLFSINO) { 11581da177e4SLinus Torvalds *ipp = NULL; 11591da177e4SLinus Torvalds return 0; 11601da177e4SLinus Torvalds } 11611da177e4SLinus Torvalds ASSERT(*ialloc_context == NULL); 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds /* 11641da177e4SLinus Torvalds * Get the in-core inode with the lock held exclusively. 11651da177e4SLinus Torvalds * This is because we're setting fields here we need 11661da177e4SLinus Torvalds * to prevent others from looking at until we're done. 11671da177e4SLinus Torvalds */ 11681da177e4SLinus Torvalds error = xfs_trans_iget(tp->t_mountp, tp, ino, 11691da177e4SLinus Torvalds IGET_CREATE, XFS_ILOCK_EXCL, &ip); 11701da177e4SLinus Torvalds if (error != 0) { 11711da177e4SLinus Torvalds return error; 11721da177e4SLinus Torvalds } 11731da177e4SLinus Torvalds ASSERT(ip != NULL); 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds vp = XFS_ITOV(ip); 11761da177e4SLinus Torvalds vp->v_type = IFTOVT(mode); 11771da177e4SLinus Torvalds ip->i_d.di_mode = (__uint16_t)mode; 11781da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 11791da177e4SLinus Torvalds ip->i_d.di_nlink = nlink; 11801da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == nlink); 11811da177e4SLinus Torvalds ip->i_d.di_uid = current_fsuid(cr); 11821da177e4SLinus Torvalds ip->i_d.di_gid = current_fsgid(cr); 11831da177e4SLinus Torvalds ip->i_d.di_projid = prid; 11841da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 11851da177e4SLinus Torvalds 11861da177e4SLinus Torvalds /* 11871da177e4SLinus Torvalds * If the superblock version is up to where we support new format 11881da177e4SLinus Torvalds * inodes and this is currently an old format inode, then change 11891da177e4SLinus Torvalds * the inode version number now. This way we only do the conversion 11901da177e4SLinus Torvalds * here rather than here and in the flush/logging code. 11911da177e4SLinus Torvalds */ 11921da177e4SLinus Torvalds if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && 11931da177e4SLinus Torvalds ip->i_d.di_version == XFS_DINODE_VERSION_1) { 11941da177e4SLinus Torvalds ip->i_d.di_version = XFS_DINODE_VERSION_2; 11951da177e4SLinus Torvalds /* 11961da177e4SLinus Torvalds * We've already zeroed the old link count, the projid field, 11971da177e4SLinus Torvalds * and the pad field. 11981da177e4SLinus Torvalds */ 11991da177e4SLinus Torvalds } 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds /* 12021da177e4SLinus Torvalds * Project ids won't be stored on disk if we are using a version 1 inode. 12031da177e4SLinus Torvalds */ 12041da177e4SLinus Torvalds if ( (prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 12051da177e4SLinus Torvalds xfs_bump_ino_vers2(tp, ip); 12061da177e4SLinus Torvalds 12071da177e4SLinus Torvalds if (XFS_INHERIT_GID(pip, vp->v_vfsp)) { 12081da177e4SLinus Torvalds ip->i_d.di_gid = pip->i_d.di_gid; 12091da177e4SLinus Torvalds if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) { 12101da177e4SLinus Torvalds ip->i_d.di_mode |= S_ISGID; 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds } 12131da177e4SLinus Torvalds 12141da177e4SLinus Torvalds /* 12151da177e4SLinus Torvalds * If the group ID of the new file does not match the effective group 12161da177e4SLinus Torvalds * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 12171da177e4SLinus Torvalds * (and only if the irix_sgid_inherit compatibility variable is set). 12181da177e4SLinus Torvalds */ 12191da177e4SLinus Torvalds if ((irix_sgid_inherit) && 12201da177e4SLinus Torvalds (ip->i_d.di_mode & S_ISGID) && 12211da177e4SLinus Torvalds (!in_group_p((gid_t)ip->i_d.di_gid))) { 12221da177e4SLinus Torvalds ip->i_d.di_mode &= ~S_ISGID; 12231da177e4SLinus Torvalds } 12241da177e4SLinus Torvalds 12251da177e4SLinus Torvalds ip->i_d.di_size = 0; 12261da177e4SLinus Torvalds ip->i_d.di_nextents = 0; 12271da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 12281da177e4SLinus Torvalds xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 12291da177e4SLinus Torvalds /* 12301da177e4SLinus Torvalds * di_gen will have been taken care of in xfs_iread. 12311da177e4SLinus Torvalds */ 12321da177e4SLinus Torvalds ip->i_d.di_extsize = 0; 12331da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 12341da177e4SLinus Torvalds ip->i_d.di_dmstate = 0; 12351da177e4SLinus Torvalds ip->i_d.di_flags = 0; 12361da177e4SLinus Torvalds flags = XFS_ILOG_CORE; 12371da177e4SLinus Torvalds switch (mode & S_IFMT) { 12381da177e4SLinus Torvalds case S_IFIFO: 12391da177e4SLinus Torvalds case S_IFCHR: 12401da177e4SLinus Torvalds case S_IFBLK: 12411da177e4SLinus Torvalds case S_IFSOCK: 12421da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_DEV; 12431da177e4SLinus Torvalds ip->i_df.if_u2.if_rdev = rdev; 12441da177e4SLinus Torvalds ip->i_df.if_flags = 0; 12451da177e4SLinus Torvalds flags |= XFS_ILOG_DEV; 12461da177e4SLinus Torvalds break; 12471da177e4SLinus Torvalds case S_IFREG: 12481da177e4SLinus Torvalds case S_IFDIR: 12491da177e4SLinus Torvalds if (unlikely(pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 12501da177e4SLinus Torvalds if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) { 12511da177e4SLinus Torvalds if ((mode & S_IFMT) == S_IFDIR) { 12521da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_RTINHERIT; 12531da177e4SLinus Torvalds } else { 12541da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_REALTIME; 12551da177e4SLinus Torvalds ip->i_iocore.io_flags |= XFS_IOCORE_RT; 12561da177e4SLinus Torvalds } 12571da177e4SLinus Torvalds } 12581da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && 12591da177e4SLinus Torvalds xfs_inherit_noatime) 12601da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_NOATIME; 12611da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && 12621da177e4SLinus Torvalds xfs_inherit_nodump) 12631da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_NODUMP; 12641da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && 12651da177e4SLinus Torvalds xfs_inherit_sync) 12661da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_SYNC; 12671da177e4SLinus Torvalds if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && 12681da177e4SLinus Torvalds xfs_inherit_nosymlinks) 12691da177e4SLinus Torvalds ip->i_d.di_flags |= XFS_DIFLAG_NOSYMLINKS; 12701da177e4SLinus Torvalds } 12711da177e4SLinus Torvalds /* FALLTHROUGH */ 12721da177e4SLinus Torvalds case S_IFLNK: 12731da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 12741da177e4SLinus Torvalds ip->i_df.if_flags = XFS_IFEXTENTS; 12751da177e4SLinus Torvalds ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; 12761da177e4SLinus Torvalds ip->i_df.if_u1.if_extents = NULL; 12771da177e4SLinus Torvalds break; 12781da177e4SLinus Torvalds default: 12791da177e4SLinus Torvalds ASSERT(0); 12801da177e4SLinus Torvalds } 12811da177e4SLinus Torvalds /* 12821da177e4SLinus Torvalds * Attribute fork settings for new inode. 12831da177e4SLinus Torvalds */ 12841da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 12851da177e4SLinus Torvalds ip->i_d.di_anextents = 0; 12861da177e4SLinus Torvalds 12871da177e4SLinus Torvalds /* 12881da177e4SLinus Torvalds * Log the new values stuffed into the inode. 12891da177e4SLinus Torvalds */ 12901da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, flags); 12911da177e4SLinus Torvalds 12921da177e4SLinus Torvalds /* now that we have a v_type we can set Linux inode ops (& unlock) */ 12931da177e4SLinus Torvalds VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1); 12941da177e4SLinus Torvalds 12951da177e4SLinus Torvalds *ipp = ip; 12961da177e4SLinus Torvalds return 0; 12971da177e4SLinus Torvalds } 12981da177e4SLinus Torvalds 12991da177e4SLinus Torvalds /* 13001da177e4SLinus Torvalds * Check to make sure that there are no blocks allocated to the 13011da177e4SLinus Torvalds * file beyond the size of the file. We don't check this for 13021da177e4SLinus Torvalds * files with fixed size extents or real time extents, but we 13031da177e4SLinus Torvalds * at least do it for regular files. 13041da177e4SLinus Torvalds */ 13051da177e4SLinus Torvalds #ifdef DEBUG 13061da177e4SLinus Torvalds void 13071da177e4SLinus Torvalds xfs_isize_check( 13081da177e4SLinus Torvalds xfs_mount_t *mp, 13091da177e4SLinus Torvalds xfs_inode_t *ip, 13101da177e4SLinus Torvalds xfs_fsize_t isize) 13111da177e4SLinus Torvalds { 13121da177e4SLinus Torvalds xfs_fileoff_t map_first; 13131da177e4SLinus Torvalds int nimaps; 13141da177e4SLinus Torvalds xfs_bmbt_irec_t imaps[2]; 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 13171da177e4SLinus Torvalds return; 13181da177e4SLinus Torvalds 13191da177e4SLinus Torvalds if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME ) 13201da177e4SLinus Torvalds return; 13211da177e4SLinus Torvalds 13221da177e4SLinus Torvalds nimaps = 2; 13231da177e4SLinus Torvalds map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 13241da177e4SLinus Torvalds /* 13251da177e4SLinus Torvalds * The filesystem could be shutting down, so bmapi may return 13261da177e4SLinus Torvalds * an error. 13271da177e4SLinus Torvalds */ 13281da177e4SLinus Torvalds if (xfs_bmapi(NULL, ip, map_first, 13291da177e4SLinus Torvalds (XFS_B_TO_FSB(mp, 13301da177e4SLinus Torvalds (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - 13311da177e4SLinus Torvalds map_first), 13321da177e4SLinus Torvalds XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps, 13331da177e4SLinus Torvalds NULL)) 13341da177e4SLinus Torvalds return; 13351da177e4SLinus Torvalds ASSERT(nimaps == 1); 13361da177e4SLinus Torvalds ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 13371da177e4SLinus Torvalds } 13381da177e4SLinus Torvalds #endif /* DEBUG */ 13391da177e4SLinus Torvalds 13401da177e4SLinus Torvalds /* 13411da177e4SLinus Torvalds * Calculate the last possible buffered byte in a file. This must 13421da177e4SLinus Torvalds * include data that was buffered beyond the EOF by the write code. 13431da177e4SLinus Torvalds * This also needs to deal with overflowing the xfs_fsize_t type 13441da177e4SLinus Torvalds * which can happen for sizes near the limit. 13451da177e4SLinus Torvalds * 13461da177e4SLinus Torvalds * We also need to take into account any blocks beyond the EOF. It 13471da177e4SLinus Torvalds * may be the case that they were buffered by a write which failed. 13481da177e4SLinus Torvalds * In that case the pages will still be in memory, but the inode size 13491da177e4SLinus Torvalds * will never have been updated. 13501da177e4SLinus Torvalds */ 13511da177e4SLinus Torvalds xfs_fsize_t 13521da177e4SLinus Torvalds xfs_file_last_byte( 13531da177e4SLinus Torvalds xfs_inode_t *ip) 13541da177e4SLinus Torvalds { 13551da177e4SLinus Torvalds xfs_mount_t *mp; 13561da177e4SLinus Torvalds xfs_fsize_t last_byte; 13571da177e4SLinus Torvalds xfs_fileoff_t last_block; 13581da177e4SLinus Torvalds xfs_fileoff_t size_last_block; 13591da177e4SLinus Torvalds int error; 13601da177e4SLinus Torvalds 13611da177e4SLinus Torvalds ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 13621da177e4SLinus Torvalds 13631da177e4SLinus Torvalds mp = ip->i_mount; 13641da177e4SLinus Torvalds /* 13651da177e4SLinus Torvalds * Only check for blocks beyond the EOF if the extents have 13661da177e4SLinus Torvalds * been read in. This eliminates the need for the inode lock, 13671da177e4SLinus Torvalds * and it also saves us from looking when it really isn't 13681da177e4SLinus Torvalds * necessary. 13691da177e4SLinus Torvalds */ 13701da177e4SLinus Torvalds if (ip->i_df.if_flags & XFS_IFEXTENTS) { 13711da177e4SLinus Torvalds error = xfs_bmap_last_offset(NULL, ip, &last_block, 13721da177e4SLinus Torvalds XFS_DATA_FORK); 13731da177e4SLinus Torvalds if (error) { 13741da177e4SLinus Torvalds last_block = 0; 13751da177e4SLinus Torvalds } 13761da177e4SLinus Torvalds } else { 13771da177e4SLinus Torvalds last_block = 0; 13781da177e4SLinus Torvalds } 13791da177e4SLinus Torvalds size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); 13801da177e4SLinus Torvalds last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 13811da177e4SLinus Torvalds 13821da177e4SLinus Torvalds last_byte = XFS_FSB_TO_B(mp, last_block); 13831da177e4SLinus Torvalds if (last_byte < 0) { 13841da177e4SLinus Torvalds return XFS_MAXIOFFSET(mp); 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds last_byte += (1 << mp->m_writeio_log); 13871da177e4SLinus Torvalds if (last_byte < 0) { 13881da177e4SLinus Torvalds return XFS_MAXIOFFSET(mp); 13891da177e4SLinus Torvalds } 13901da177e4SLinus Torvalds return last_byte; 13911da177e4SLinus Torvalds } 13921da177e4SLinus Torvalds 13931da177e4SLinus Torvalds #if defined(XFS_RW_TRACE) 13941da177e4SLinus Torvalds STATIC void 13951da177e4SLinus Torvalds xfs_itrunc_trace( 13961da177e4SLinus Torvalds int tag, 13971da177e4SLinus Torvalds xfs_inode_t *ip, 13981da177e4SLinus Torvalds int flag, 13991da177e4SLinus Torvalds xfs_fsize_t new_size, 14001da177e4SLinus Torvalds xfs_off_t toss_start, 14011da177e4SLinus Torvalds xfs_off_t toss_finish) 14021da177e4SLinus Torvalds { 14031da177e4SLinus Torvalds if (ip->i_rwtrace == NULL) { 14041da177e4SLinus Torvalds return; 14051da177e4SLinus Torvalds } 14061da177e4SLinus Torvalds 14071da177e4SLinus Torvalds ktrace_enter(ip->i_rwtrace, 14081da177e4SLinus Torvalds (void*)((long)tag), 14091da177e4SLinus Torvalds (void*)ip, 14101da177e4SLinus Torvalds (void*)(unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff), 14111da177e4SLinus Torvalds (void*)(unsigned long)(ip->i_d.di_size & 0xffffffff), 14121da177e4SLinus Torvalds (void*)((long)flag), 14131da177e4SLinus Torvalds (void*)(unsigned long)((new_size >> 32) & 0xffffffff), 14141da177e4SLinus Torvalds (void*)(unsigned long)(new_size & 0xffffffff), 14151da177e4SLinus Torvalds (void*)(unsigned long)((toss_start >> 32) & 0xffffffff), 14161da177e4SLinus Torvalds (void*)(unsigned long)(toss_start & 0xffffffff), 14171da177e4SLinus Torvalds (void*)(unsigned long)((toss_finish >> 32) & 0xffffffff), 14181da177e4SLinus Torvalds (void*)(unsigned long)(toss_finish & 0xffffffff), 14191da177e4SLinus Torvalds (void*)(unsigned long)current_cpu(), 14201da177e4SLinus Torvalds (void*)0, 14211da177e4SLinus Torvalds (void*)0, 14221da177e4SLinus Torvalds (void*)0, 14231da177e4SLinus Torvalds (void*)0); 14241da177e4SLinus Torvalds } 14251da177e4SLinus Torvalds #else 14261da177e4SLinus Torvalds #define xfs_itrunc_trace(tag, ip, flag, new_size, toss_start, toss_finish) 14271da177e4SLinus Torvalds #endif 14281da177e4SLinus Torvalds 14291da177e4SLinus Torvalds /* 14301da177e4SLinus Torvalds * Start the truncation of the file to new_size. The new size 14311da177e4SLinus Torvalds * must be smaller than the current size. This routine will 14321da177e4SLinus Torvalds * clear the buffer and page caches of file data in the removed 14331da177e4SLinus Torvalds * range, and xfs_itruncate_finish() will remove the underlying 14341da177e4SLinus Torvalds * disk blocks. 14351da177e4SLinus Torvalds * 14361da177e4SLinus Torvalds * The inode must have its I/O lock locked EXCLUSIVELY, and it 14371da177e4SLinus Torvalds * must NOT have the inode lock held at all. This is because we're 14381da177e4SLinus Torvalds * calling into the buffer/page cache code and we can't hold the 14391da177e4SLinus Torvalds * inode lock when we do so. 14401da177e4SLinus Torvalds * 14411da177e4SLinus Torvalds * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 14421da177e4SLinus Torvalds * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 14431da177e4SLinus Torvalds * in the case that the caller is locking things out of order and 14441da177e4SLinus Torvalds * may not be able to call xfs_itruncate_finish() with the inode lock 14451da177e4SLinus Torvalds * held without dropping the I/O lock. If the caller must drop the 14461da177e4SLinus Torvalds * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start() 14471da177e4SLinus Torvalds * must be called again with all the same restrictions as the initial 14481da177e4SLinus Torvalds * call. 14491da177e4SLinus Torvalds */ 14501da177e4SLinus Torvalds void 14511da177e4SLinus Torvalds xfs_itruncate_start( 14521da177e4SLinus Torvalds xfs_inode_t *ip, 14531da177e4SLinus Torvalds uint flags, 14541da177e4SLinus Torvalds xfs_fsize_t new_size) 14551da177e4SLinus Torvalds { 14561da177e4SLinus Torvalds xfs_fsize_t last_byte; 14571da177e4SLinus Torvalds xfs_off_t toss_start; 14581da177e4SLinus Torvalds xfs_mount_t *mp; 14591da177e4SLinus Torvalds vnode_t *vp; 14601da177e4SLinus Torvalds 14611da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 14621da177e4SLinus Torvalds ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 14631da177e4SLinus Torvalds ASSERT((flags == XFS_ITRUNC_DEFINITE) || 14641da177e4SLinus Torvalds (flags == XFS_ITRUNC_MAYBE)); 14651da177e4SLinus Torvalds 14661da177e4SLinus Torvalds mp = ip->i_mount; 14671da177e4SLinus Torvalds vp = XFS_ITOV(ip); 14681da177e4SLinus Torvalds /* 14691da177e4SLinus Torvalds * Call VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES() to get rid of pages and buffers 14701da177e4SLinus Torvalds * overlapping the region being removed. We have to use 14711da177e4SLinus Torvalds * the less efficient VOP_FLUSHINVAL_PAGES() in the case that the 14721da177e4SLinus Torvalds * caller may not be able to finish the truncate without 14731da177e4SLinus Torvalds * dropping the inode's I/O lock. Make sure 14741da177e4SLinus Torvalds * to catch any pages brought in by buffers overlapping 14751da177e4SLinus Torvalds * the EOF by searching out beyond the isize by our 14761da177e4SLinus Torvalds * block size. We round new_size up to a block boundary 14771da177e4SLinus Torvalds * so that we don't toss things on the same block as 14781da177e4SLinus Torvalds * new_size but before it. 14791da177e4SLinus Torvalds * 14801da177e4SLinus Torvalds * Before calling VOP_TOSS_PAGES() or VOP_FLUSHINVAL_PAGES(), make sure to 14811da177e4SLinus Torvalds * call remapf() over the same region if the file is mapped. 14821da177e4SLinus Torvalds * This frees up mapped file references to the pages in the 14831da177e4SLinus Torvalds * given range and for the VOP_FLUSHINVAL_PAGES() case it ensures 14841da177e4SLinus Torvalds * that we get the latest mapped changes flushed out. 14851da177e4SLinus Torvalds */ 14861da177e4SLinus Torvalds toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 14871da177e4SLinus Torvalds toss_start = XFS_FSB_TO_B(mp, toss_start); 14881da177e4SLinus Torvalds if (toss_start < 0) { 14891da177e4SLinus Torvalds /* 14901da177e4SLinus Torvalds * The place to start tossing is beyond our maximum 14911da177e4SLinus Torvalds * file size, so there is no way that the data extended 14921da177e4SLinus Torvalds * out there. 14931da177e4SLinus Torvalds */ 14941da177e4SLinus Torvalds return; 14951da177e4SLinus Torvalds } 14961da177e4SLinus Torvalds last_byte = xfs_file_last_byte(ip); 14971da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_START, ip, flags, new_size, toss_start, 14981da177e4SLinus Torvalds last_byte); 14991da177e4SLinus Torvalds if (last_byte > toss_start) { 15001da177e4SLinus Torvalds if (flags & XFS_ITRUNC_DEFINITE) { 15011da177e4SLinus Torvalds VOP_TOSS_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 15021da177e4SLinus Torvalds } else { 15031da177e4SLinus Torvalds VOP_FLUSHINVAL_PAGES(vp, toss_start, -1, FI_REMAPF_LOCKED); 15041da177e4SLinus Torvalds } 15051da177e4SLinus Torvalds } 15061da177e4SLinus Torvalds 15071da177e4SLinus Torvalds #ifdef DEBUG 15081da177e4SLinus Torvalds if (new_size == 0) { 15091da177e4SLinus Torvalds ASSERT(VN_CACHED(vp) == 0); 15101da177e4SLinus Torvalds } 15111da177e4SLinus Torvalds #endif 15121da177e4SLinus Torvalds } 15131da177e4SLinus Torvalds 15141da177e4SLinus Torvalds /* 15151da177e4SLinus Torvalds * Shrink the file to the given new_size. The new 15161da177e4SLinus Torvalds * size must be smaller than the current size. 15171da177e4SLinus Torvalds * This will free up the underlying blocks 15181da177e4SLinus Torvalds * in the removed range after a call to xfs_itruncate_start() 15191da177e4SLinus Torvalds * or xfs_atruncate_start(). 15201da177e4SLinus Torvalds * 15211da177e4SLinus Torvalds * The transaction passed to this routine must have made 15221da177e4SLinus Torvalds * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. 15231da177e4SLinus Torvalds * This routine may commit the given transaction and 15241da177e4SLinus Torvalds * start new ones, so make sure everything involved in 15251da177e4SLinus Torvalds * the transaction is tidy before calling here. 15261da177e4SLinus Torvalds * Some transaction will be returned to the caller to be 15271da177e4SLinus Torvalds * committed. The incoming transaction must already include 15281da177e4SLinus Torvalds * the inode, and both inode locks must be held exclusively. 15291da177e4SLinus Torvalds * The inode must also be "held" within the transaction. On 15301da177e4SLinus Torvalds * return the inode will be "held" within the returned transaction. 15311da177e4SLinus Torvalds * This routine does NOT require any disk space to be reserved 15321da177e4SLinus Torvalds * for it within the transaction. 15331da177e4SLinus Torvalds * 15341da177e4SLinus Torvalds * The fork parameter must be either xfs_attr_fork or xfs_data_fork, 15351da177e4SLinus Torvalds * and it indicates the fork which is to be truncated. For the 15361da177e4SLinus Torvalds * attribute fork we only support truncation to size 0. 15371da177e4SLinus Torvalds * 15381da177e4SLinus Torvalds * We use the sync parameter to indicate whether or not the first 15391da177e4SLinus Torvalds * transaction we perform might have to be synchronous. For the attr fork, 15401da177e4SLinus Torvalds * it needs to be so if the unlink of the inode is not yet known to be 15411da177e4SLinus Torvalds * permanent in the log. This keeps us from freeing and reusing the 15421da177e4SLinus Torvalds * blocks of the attribute fork before the unlink of the inode becomes 15431da177e4SLinus Torvalds * permanent. 15441da177e4SLinus Torvalds * 15451da177e4SLinus Torvalds * For the data fork, we normally have to run synchronously if we're 15461da177e4SLinus Torvalds * being called out of the inactive path or we're being called 15471da177e4SLinus Torvalds * out of the create path where we're truncating an existing file. 15481da177e4SLinus Torvalds * Either way, the truncate needs to be sync so blocks don't reappear 15491da177e4SLinus Torvalds * in the file with altered data in case of a crash. wsync filesystems 15501da177e4SLinus Torvalds * can run the first case async because anything that shrinks the inode 15511da177e4SLinus Torvalds * has to run sync so by the time we're called here from inactive, the 15521da177e4SLinus Torvalds * inode size is permanently set to 0. 15531da177e4SLinus Torvalds * 15541da177e4SLinus Torvalds * Calls from the truncate path always need to be sync unless we're 15551da177e4SLinus Torvalds * in a wsync filesystem and the file has already been unlinked. 15561da177e4SLinus Torvalds * 15571da177e4SLinus Torvalds * The caller is responsible for correctly setting the sync parameter. 15581da177e4SLinus Torvalds * It gets too hard for us to guess here which path we're being called 15591da177e4SLinus Torvalds * out of just based on inode state. 15601da177e4SLinus Torvalds */ 15611da177e4SLinus Torvalds int 15621da177e4SLinus Torvalds xfs_itruncate_finish( 15631da177e4SLinus Torvalds xfs_trans_t **tp, 15641da177e4SLinus Torvalds xfs_inode_t *ip, 15651da177e4SLinus Torvalds xfs_fsize_t new_size, 15661da177e4SLinus Torvalds int fork, 15671da177e4SLinus Torvalds int sync) 15681da177e4SLinus Torvalds { 15691da177e4SLinus Torvalds xfs_fsblock_t first_block; 15701da177e4SLinus Torvalds xfs_fileoff_t first_unmap_block; 15711da177e4SLinus Torvalds xfs_fileoff_t last_block; 15721da177e4SLinus Torvalds xfs_filblks_t unmap_len=0; 15731da177e4SLinus Torvalds xfs_mount_t *mp; 15741da177e4SLinus Torvalds xfs_trans_t *ntp; 15751da177e4SLinus Torvalds int done; 15761da177e4SLinus Torvalds int committed; 15771da177e4SLinus Torvalds xfs_bmap_free_t free_list; 15781da177e4SLinus Torvalds int error; 15791da177e4SLinus Torvalds 15801da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 15811da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 15821da177e4SLinus Torvalds ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 15831da177e4SLinus Torvalds ASSERT(*tp != NULL); 15841da177e4SLinus Torvalds ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 15851da177e4SLinus Torvalds ASSERT(ip->i_transp == *tp); 15861da177e4SLinus Torvalds ASSERT(ip->i_itemp != NULL); 15871da177e4SLinus Torvalds ASSERT(ip->i_itemp->ili_flags & XFS_ILI_HOLD); 15881da177e4SLinus Torvalds 15891da177e4SLinus Torvalds 15901da177e4SLinus Torvalds ntp = *tp; 15911da177e4SLinus Torvalds mp = (ntp)->t_mountp; 15921da177e4SLinus Torvalds ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); 15931da177e4SLinus Torvalds 15941da177e4SLinus Torvalds /* 15951da177e4SLinus Torvalds * We only support truncating the entire attribute fork. 15961da177e4SLinus Torvalds */ 15971da177e4SLinus Torvalds if (fork == XFS_ATTR_FORK) { 15981da177e4SLinus Torvalds new_size = 0LL; 15991da177e4SLinus Torvalds } 16001da177e4SLinus Torvalds first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); 16011da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_FINISH1, ip, 0, new_size, 0, 0); 16021da177e4SLinus Torvalds /* 16031da177e4SLinus Torvalds * The first thing we do is set the size to new_size permanently 16041da177e4SLinus Torvalds * on disk. This way we don't have to worry about anyone ever 16051da177e4SLinus Torvalds * being able to look at the data being freed even in the face 16061da177e4SLinus Torvalds * of a crash. What we're getting around here is the case where 16071da177e4SLinus Torvalds * we free a block, it is allocated to another file, it is written 16081da177e4SLinus Torvalds * to, and then we crash. If the new data gets written to the 16091da177e4SLinus Torvalds * file but the log buffers containing the free and reallocation 16101da177e4SLinus Torvalds * don't, then we'd end up with garbage in the blocks being freed. 16111da177e4SLinus Torvalds * As long as we make the new_size permanent before actually 16121da177e4SLinus Torvalds * freeing any blocks it doesn't matter if they get writtten to. 16131da177e4SLinus Torvalds * 16141da177e4SLinus Torvalds * The callers must signal into us whether or not the size 16151da177e4SLinus Torvalds * setting here must be synchronous. There are a few cases 16161da177e4SLinus Torvalds * where it doesn't have to be synchronous. Those cases 16171da177e4SLinus Torvalds * occur if the file is unlinked and we know the unlink is 16181da177e4SLinus Torvalds * permanent or if the blocks being truncated are guaranteed 16191da177e4SLinus Torvalds * to be beyond the inode eof (regardless of the link count) 16201da177e4SLinus Torvalds * and the eof value is permanent. Both of these cases occur 16211da177e4SLinus Torvalds * only on wsync-mounted filesystems. In those cases, we're 16221da177e4SLinus Torvalds * guaranteed that no user will ever see the data in the blocks 16231da177e4SLinus Torvalds * that are being truncated so the truncate can run async. 16241da177e4SLinus Torvalds * In the free beyond eof case, the file may wind up with 16251da177e4SLinus Torvalds * more blocks allocated to it than it needs if we crash 16261da177e4SLinus Torvalds * and that won't get fixed until the next time the file 16271da177e4SLinus Torvalds * is re-opened and closed but that's ok as that shouldn't 16281da177e4SLinus Torvalds * be too many blocks. 16291da177e4SLinus Torvalds * 16301da177e4SLinus Torvalds * However, we can't just make all wsync xactions run async 16311da177e4SLinus Torvalds * because there's one call out of the create path that needs 16321da177e4SLinus Torvalds * to run sync where it's truncating an existing file to size 16331da177e4SLinus Torvalds * 0 whose size is > 0. 16341da177e4SLinus Torvalds * 16351da177e4SLinus Torvalds * It's probably possible to come up with a test in this 16361da177e4SLinus Torvalds * routine that would correctly distinguish all the above 16371da177e4SLinus Torvalds * cases from the values of the function parameters and the 16381da177e4SLinus Torvalds * inode state but for sanity's sake, I've decided to let the 16391da177e4SLinus Torvalds * layers above just tell us. It's simpler to correctly figure 16401da177e4SLinus Torvalds * out in the layer above exactly under what conditions we 16411da177e4SLinus Torvalds * can run async and I think it's easier for others read and 16421da177e4SLinus Torvalds * follow the logic in case something has to be changed. 16431da177e4SLinus Torvalds * cscope is your friend -- rcc. 16441da177e4SLinus Torvalds * 16451da177e4SLinus Torvalds * The attribute fork is much simpler. 16461da177e4SLinus Torvalds * 16471da177e4SLinus Torvalds * For the attribute fork we allow the caller to tell us whether 16481da177e4SLinus Torvalds * the unlink of the inode that led to this call is yet permanent 16491da177e4SLinus Torvalds * in the on disk log. If it is not and we will be freeing extents 16501da177e4SLinus Torvalds * in this inode then we make the first transaction synchronous 16511da177e4SLinus Torvalds * to make sure that the unlink is permanent by the time we free 16521da177e4SLinus Torvalds * the blocks. 16531da177e4SLinus Torvalds */ 16541da177e4SLinus Torvalds if (fork == XFS_DATA_FORK) { 16551da177e4SLinus Torvalds if (ip->i_d.di_nextents > 0) { 16561da177e4SLinus Torvalds ip->i_d.di_size = new_size; 16571da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 16581da177e4SLinus Torvalds } 16591da177e4SLinus Torvalds } else if (sync) { 16601da177e4SLinus Torvalds ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 16611da177e4SLinus Torvalds if (ip->i_d.di_anextents > 0) 16621da177e4SLinus Torvalds xfs_trans_set_sync(ntp); 16631da177e4SLinus Torvalds } 16641da177e4SLinus Torvalds ASSERT(fork == XFS_DATA_FORK || 16651da177e4SLinus Torvalds (fork == XFS_ATTR_FORK && 16661da177e4SLinus Torvalds ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || 16671da177e4SLinus Torvalds (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); 16681da177e4SLinus Torvalds 16691da177e4SLinus Torvalds /* 16701da177e4SLinus Torvalds * Since it is possible for space to become allocated beyond 16711da177e4SLinus Torvalds * the end of the file (in a crash where the space is allocated 16721da177e4SLinus Torvalds * but the inode size is not yet updated), simply remove any 16731da177e4SLinus Torvalds * blocks which show up between the new EOF and the maximum 16741da177e4SLinus Torvalds * possible file size. If the first block to be removed is 16751da177e4SLinus Torvalds * beyond the maximum file size (ie it is the same as last_block), 16761da177e4SLinus Torvalds * then there is nothing to do. 16771da177e4SLinus Torvalds */ 16781da177e4SLinus Torvalds last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 16791da177e4SLinus Torvalds ASSERT(first_unmap_block <= last_block); 16801da177e4SLinus Torvalds done = 0; 16811da177e4SLinus Torvalds if (last_block == first_unmap_block) { 16821da177e4SLinus Torvalds done = 1; 16831da177e4SLinus Torvalds } else { 16841da177e4SLinus Torvalds unmap_len = last_block - first_unmap_block + 1; 16851da177e4SLinus Torvalds } 16861da177e4SLinus Torvalds while (!done) { 16871da177e4SLinus Torvalds /* 16881da177e4SLinus Torvalds * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() 16891da177e4SLinus Torvalds * will tell us whether it freed the entire range or 16901da177e4SLinus Torvalds * not. If this is a synchronous mount (wsync), 16911da177e4SLinus Torvalds * then we can tell bunmapi to keep all the 16921da177e4SLinus Torvalds * transactions asynchronous since the unlink 16931da177e4SLinus Torvalds * transaction that made this inode inactive has 16941da177e4SLinus Torvalds * already hit the disk. There's no danger of 16951da177e4SLinus Torvalds * the freed blocks being reused, there being a 16961da177e4SLinus Torvalds * crash, and the reused blocks suddenly reappearing 16971da177e4SLinus Torvalds * in this file with garbage in them once recovery 16981da177e4SLinus Torvalds * runs. 16991da177e4SLinus Torvalds */ 17001da177e4SLinus Torvalds XFS_BMAP_INIT(&free_list, &first_block); 17011da177e4SLinus Torvalds error = xfs_bunmapi(ntp, ip, first_unmap_block, 17021da177e4SLinus Torvalds unmap_len, 17031da177e4SLinus Torvalds XFS_BMAPI_AFLAG(fork) | 17041da177e4SLinus Torvalds (sync ? 0 : XFS_BMAPI_ASYNC), 17051da177e4SLinus Torvalds XFS_ITRUNC_MAX_EXTENTS, 17061da177e4SLinus Torvalds &first_block, &free_list, &done); 17071da177e4SLinus Torvalds if (error) { 17081da177e4SLinus Torvalds /* 17091da177e4SLinus Torvalds * If the bunmapi call encounters an error, 17101da177e4SLinus Torvalds * return to the caller where the transaction 17111da177e4SLinus Torvalds * can be properly aborted. We just need to 17121da177e4SLinus Torvalds * make sure we're not holding any resources 17131da177e4SLinus Torvalds * that we were not when we came in. 17141da177e4SLinus Torvalds */ 17151da177e4SLinus Torvalds xfs_bmap_cancel(&free_list); 17161da177e4SLinus Torvalds return error; 17171da177e4SLinus Torvalds } 17181da177e4SLinus Torvalds 17191da177e4SLinus Torvalds /* 17201da177e4SLinus Torvalds * Duplicate the transaction that has the permanent 17211da177e4SLinus Torvalds * reservation and commit the old transaction. 17221da177e4SLinus Torvalds */ 17231da177e4SLinus Torvalds error = xfs_bmap_finish(tp, &free_list, first_block, 17241da177e4SLinus Torvalds &committed); 17251da177e4SLinus Torvalds ntp = *tp; 17261da177e4SLinus Torvalds if (error) { 17271da177e4SLinus Torvalds /* 17281da177e4SLinus Torvalds * If the bmap finish call encounters an error, 17291da177e4SLinus Torvalds * return to the caller where the transaction 17301da177e4SLinus Torvalds * can be properly aborted. We just need to 17311da177e4SLinus Torvalds * make sure we're not holding any resources 17321da177e4SLinus Torvalds * that we were not when we came in. 17331da177e4SLinus Torvalds * 17341da177e4SLinus Torvalds * Aborting from this point might lose some 17351da177e4SLinus Torvalds * blocks in the file system, but oh well. 17361da177e4SLinus Torvalds */ 17371da177e4SLinus Torvalds xfs_bmap_cancel(&free_list); 17381da177e4SLinus Torvalds if (committed) { 17391da177e4SLinus Torvalds /* 17401da177e4SLinus Torvalds * If the passed in transaction committed 17411da177e4SLinus Torvalds * in xfs_bmap_finish(), then we want to 17421da177e4SLinus Torvalds * add the inode to this one before returning. 17431da177e4SLinus Torvalds * This keeps things simple for the higher 17441da177e4SLinus Torvalds * level code, because it always knows that 17451da177e4SLinus Torvalds * the inode is locked and held in the 17461da177e4SLinus Torvalds * transaction that returns to it whether 17471da177e4SLinus Torvalds * errors occur or not. We don't mark the 17481da177e4SLinus Torvalds * inode dirty so that this transaction can 17491da177e4SLinus Torvalds * be easily aborted if possible. 17501da177e4SLinus Torvalds */ 17511da177e4SLinus Torvalds xfs_trans_ijoin(ntp, ip, 17521da177e4SLinus Torvalds XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 17531da177e4SLinus Torvalds xfs_trans_ihold(ntp, ip); 17541da177e4SLinus Torvalds } 17551da177e4SLinus Torvalds return error; 17561da177e4SLinus Torvalds } 17571da177e4SLinus Torvalds 17581da177e4SLinus Torvalds if (committed) { 17591da177e4SLinus Torvalds /* 17601da177e4SLinus Torvalds * The first xact was committed, 17611da177e4SLinus Torvalds * so add the inode to the new one. 17621da177e4SLinus Torvalds * Mark it dirty so it will be logged 17631da177e4SLinus Torvalds * and moved forward in the log as 17641da177e4SLinus Torvalds * part of every commit. 17651da177e4SLinus Torvalds */ 17661da177e4SLinus Torvalds xfs_trans_ijoin(ntp, ip, 17671da177e4SLinus Torvalds XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 17681da177e4SLinus Torvalds xfs_trans_ihold(ntp, ip); 17691da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 17701da177e4SLinus Torvalds } 17711da177e4SLinus Torvalds ntp = xfs_trans_dup(ntp); 17721da177e4SLinus Torvalds (void) xfs_trans_commit(*tp, 0, NULL); 17731da177e4SLinus Torvalds *tp = ntp; 17741da177e4SLinus Torvalds error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 17751da177e4SLinus Torvalds XFS_TRANS_PERM_LOG_RES, 17761da177e4SLinus Torvalds XFS_ITRUNCATE_LOG_COUNT); 17771da177e4SLinus Torvalds /* 17781da177e4SLinus Torvalds * Add the inode being truncated to the next chained 17791da177e4SLinus Torvalds * transaction. 17801da177e4SLinus Torvalds */ 17811da177e4SLinus Torvalds xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 17821da177e4SLinus Torvalds xfs_trans_ihold(ntp, ip); 17831da177e4SLinus Torvalds if (error) 17841da177e4SLinus Torvalds return (error); 17851da177e4SLinus Torvalds } 17861da177e4SLinus Torvalds /* 17871da177e4SLinus Torvalds * Only update the size in the case of the data fork, but 17881da177e4SLinus Torvalds * always re-log the inode so that our permanent transaction 17891da177e4SLinus Torvalds * can keep on rolling it forward in the log. 17901da177e4SLinus Torvalds */ 17911da177e4SLinus Torvalds if (fork == XFS_DATA_FORK) { 17921da177e4SLinus Torvalds xfs_isize_check(mp, ip, new_size); 17931da177e4SLinus Torvalds ip->i_d.di_size = new_size; 17941da177e4SLinus Torvalds } 17951da177e4SLinus Torvalds xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 17961da177e4SLinus Torvalds ASSERT((new_size != 0) || 17971da177e4SLinus Torvalds (fork == XFS_ATTR_FORK) || 17981da177e4SLinus Torvalds (ip->i_delayed_blks == 0)); 17991da177e4SLinus Torvalds ASSERT((new_size != 0) || 18001da177e4SLinus Torvalds (fork == XFS_ATTR_FORK) || 18011da177e4SLinus Torvalds (ip->i_d.di_nextents == 0)); 18021da177e4SLinus Torvalds xfs_itrunc_trace(XFS_ITRUNC_FINISH2, ip, 0, new_size, 0, 0); 18031da177e4SLinus Torvalds return 0; 18041da177e4SLinus Torvalds } 18051da177e4SLinus Torvalds 18061da177e4SLinus Torvalds 18071da177e4SLinus Torvalds /* 18081da177e4SLinus Torvalds * xfs_igrow_start 18091da177e4SLinus Torvalds * 18101da177e4SLinus Torvalds * Do the first part of growing a file: zero any data in the last 18111da177e4SLinus Torvalds * block that is beyond the old EOF. We need to do this before 18121da177e4SLinus Torvalds * the inode is joined to the transaction to modify the i_size. 18131da177e4SLinus Torvalds * That way we can drop the inode lock and call into the buffer 18141da177e4SLinus Torvalds * cache to get the buffer mapping the EOF. 18151da177e4SLinus Torvalds */ 18161da177e4SLinus Torvalds int 18171da177e4SLinus Torvalds xfs_igrow_start( 18181da177e4SLinus Torvalds xfs_inode_t *ip, 18191da177e4SLinus Torvalds xfs_fsize_t new_size, 18201da177e4SLinus Torvalds cred_t *credp) 18211da177e4SLinus Torvalds { 18221da177e4SLinus Torvalds xfs_fsize_t isize; 18231da177e4SLinus Torvalds int error; 18241da177e4SLinus Torvalds 18251da177e4SLinus Torvalds ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 18261da177e4SLinus Torvalds ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 18271da177e4SLinus Torvalds ASSERT(new_size > ip->i_d.di_size); 18281da177e4SLinus Torvalds 18291da177e4SLinus Torvalds error = 0; 18301da177e4SLinus Torvalds isize = ip->i_d.di_size; 18311da177e4SLinus Torvalds /* 18321da177e4SLinus Torvalds * Zero any pages that may have been created by 18331da177e4SLinus Torvalds * xfs_write_file() beyond the end of the file 18341da177e4SLinus Torvalds * and any blocks between the old and new file sizes. 18351da177e4SLinus Torvalds */ 18361da177e4SLinus Torvalds error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize, 18371da177e4SLinus Torvalds new_size); 18381da177e4SLinus Torvalds return error; 18391da177e4SLinus Torvalds } 18401da177e4SLinus Torvalds 18411da177e4SLinus Torvalds /* 18421da177e4SLinus Torvalds * xfs_igrow_finish 18431da177e4SLinus Torvalds * 18441da177e4SLinus Torvalds * This routine is called to extend the size of a file. 18451da177e4SLinus Torvalds * The inode must have both the iolock and the ilock locked 18461da177e4SLinus Torvalds * for update and it must be a part of the current transaction. 18471da177e4SLinus Torvalds * The xfs_igrow_start() function must have been called previously. 18481da177e4SLinus Torvalds * If the change_flag is not zero, the inode change timestamp will 18491da177e4SLinus Torvalds * be updated. 18501da177e4SLinus Torvalds */ 18511da177e4SLinus Torvalds void 18521da177e4SLinus Torvalds xfs_igrow_finish( 18531da177e4SLinus Torvalds xfs_trans_t *tp, 18541da177e4SLinus Torvalds xfs_inode_t *ip, 18551da177e4SLinus Torvalds xfs_fsize_t new_size, 18561da177e4SLinus Torvalds int change_flag) 18571da177e4SLinus Torvalds { 18581da177e4SLinus Torvalds ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 18591da177e4SLinus Torvalds ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 18601da177e4SLinus Torvalds ASSERT(ip->i_transp == tp); 18611da177e4SLinus Torvalds ASSERT(new_size > ip->i_d.di_size); 18621da177e4SLinus Torvalds 18631da177e4SLinus Torvalds /* 18641da177e4SLinus Torvalds * Update the file size. Update the inode change timestamp 18651da177e4SLinus Torvalds * if change_flag set. 18661da177e4SLinus Torvalds */ 18671da177e4SLinus Torvalds ip->i_d.di_size = new_size; 18681da177e4SLinus Torvalds if (change_flag) 18691da177e4SLinus Torvalds xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 18701da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 18711da177e4SLinus Torvalds 18721da177e4SLinus Torvalds } 18731da177e4SLinus Torvalds 18741da177e4SLinus Torvalds 18751da177e4SLinus Torvalds /* 18761da177e4SLinus Torvalds * This is called when the inode's link count goes to 0. 18771da177e4SLinus Torvalds * We place the on-disk inode on a list in the AGI. It 18781da177e4SLinus Torvalds * will be pulled from this list when the inode is freed. 18791da177e4SLinus Torvalds */ 18801da177e4SLinus Torvalds int 18811da177e4SLinus Torvalds xfs_iunlink( 18821da177e4SLinus Torvalds xfs_trans_t *tp, 18831da177e4SLinus Torvalds xfs_inode_t *ip) 18841da177e4SLinus Torvalds { 18851da177e4SLinus Torvalds xfs_mount_t *mp; 18861da177e4SLinus Torvalds xfs_agi_t *agi; 18871da177e4SLinus Torvalds xfs_dinode_t *dip; 18881da177e4SLinus Torvalds xfs_buf_t *agibp; 18891da177e4SLinus Torvalds xfs_buf_t *ibp; 18901da177e4SLinus Torvalds xfs_agnumber_t agno; 18911da177e4SLinus Torvalds xfs_daddr_t agdaddr; 18921da177e4SLinus Torvalds xfs_agino_t agino; 18931da177e4SLinus Torvalds short bucket_index; 18941da177e4SLinus Torvalds int offset; 18951da177e4SLinus Torvalds int error; 18961da177e4SLinus Torvalds int agi_ok; 18971da177e4SLinus Torvalds 18981da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 18991da177e4SLinus Torvalds ASSERT(ip->i_d.di_mode != 0); 19001da177e4SLinus Torvalds ASSERT(ip->i_transp == tp); 19011da177e4SLinus Torvalds 19021da177e4SLinus Torvalds mp = tp->t_mountp; 19031da177e4SLinus Torvalds 19041da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 19051da177e4SLinus Torvalds agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 19061da177e4SLinus Torvalds 19071da177e4SLinus Torvalds /* 19081da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 19091da177e4SLinus Torvalds * on the list. 19101da177e4SLinus Torvalds */ 19111da177e4SLinus Torvalds error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 19121da177e4SLinus Torvalds XFS_FSS_TO_BB(mp, 1), 0, &agibp); 19131da177e4SLinus Torvalds if (error) { 19141da177e4SLinus Torvalds return error; 19151da177e4SLinus Torvalds } 19161da177e4SLinus Torvalds /* 19171da177e4SLinus Torvalds * Validate the magic number of the agi block. 19181da177e4SLinus Torvalds */ 19191da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 19201da177e4SLinus Torvalds agi_ok = 19211da177e4SLinus Torvalds INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC && 19221da177e4SLinus Torvalds XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT)); 19231da177e4SLinus Torvalds if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK, 19241da177e4SLinus Torvalds XFS_RANDOM_IUNLINK))) { 19251da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi); 19261da177e4SLinus Torvalds xfs_trans_brelse(tp, agibp); 19271da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 19281da177e4SLinus Torvalds } 19291da177e4SLinus Torvalds /* 19301da177e4SLinus Torvalds * Get the index into the agi hash table for the 19311da177e4SLinus Torvalds * list this inode will go on. 19321da177e4SLinus Torvalds */ 19331da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 19341da177e4SLinus Torvalds ASSERT(agino != 0); 19351da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 19361da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 19371da177e4SLinus Torvalds ASSERT(INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != agino); 19381da177e4SLinus Torvalds 19391da177e4SLinus Torvalds if (INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != NULLAGINO) { 19401da177e4SLinus Torvalds /* 19411da177e4SLinus Torvalds * There is already another inode in the bucket we need 19421da177e4SLinus Torvalds * to add ourselves to. Add us at the front of the list. 19431da177e4SLinus Torvalds * Here we put the head pointer into our next pointer, 19441da177e4SLinus Torvalds * and then we fall through to point the head at us. 19451da177e4SLinus Torvalds */ 19461da177e4SLinus Torvalds error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); 19471da177e4SLinus Torvalds if (error) { 19481da177e4SLinus Torvalds return error; 19491da177e4SLinus Torvalds } 19501da177e4SLinus Torvalds ASSERT(INT_GET(dip->di_next_unlinked, ARCH_CONVERT) == NULLAGINO); 19511da177e4SLinus Torvalds ASSERT(dip->di_next_unlinked); 19521da177e4SLinus Torvalds /* both on-disk, don't endian flip twice */ 19531da177e4SLinus Torvalds dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 19541da177e4SLinus Torvalds offset = ip->i_boffset + 19551da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 19561da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 19571da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 19581da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19591da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 19601da177e4SLinus Torvalds } 19611da177e4SLinus Torvalds 19621da177e4SLinus Torvalds /* 19631da177e4SLinus Torvalds * Point the bucket head pointer at the inode being inserted. 19641da177e4SLinus Torvalds */ 19651da177e4SLinus Torvalds ASSERT(agino != 0); 19661da177e4SLinus Torvalds INT_SET(agi->agi_unlinked[bucket_index], ARCH_CONVERT, agino); 19671da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 19681da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 19691da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 19701da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 19711da177e4SLinus Torvalds return 0; 19721da177e4SLinus Torvalds } 19731da177e4SLinus Torvalds 19741da177e4SLinus Torvalds /* 19751da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 19761da177e4SLinus Torvalds */ 19771da177e4SLinus Torvalds STATIC int 19781da177e4SLinus Torvalds xfs_iunlink_remove( 19791da177e4SLinus Torvalds xfs_trans_t *tp, 19801da177e4SLinus Torvalds xfs_inode_t *ip) 19811da177e4SLinus Torvalds { 19821da177e4SLinus Torvalds xfs_ino_t next_ino; 19831da177e4SLinus Torvalds xfs_mount_t *mp; 19841da177e4SLinus Torvalds xfs_agi_t *agi; 19851da177e4SLinus Torvalds xfs_dinode_t *dip; 19861da177e4SLinus Torvalds xfs_buf_t *agibp; 19871da177e4SLinus Torvalds xfs_buf_t *ibp; 19881da177e4SLinus Torvalds xfs_agnumber_t agno; 19891da177e4SLinus Torvalds xfs_daddr_t agdaddr; 19901da177e4SLinus Torvalds xfs_agino_t agino; 19911da177e4SLinus Torvalds xfs_agino_t next_agino; 19921da177e4SLinus Torvalds xfs_buf_t *last_ibp; 19931da177e4SLinus Torvalds xfs_dinode_t *last_dip; 19941da177e4SLinus Torvalds short bucket_index; 19951da177e4SLinus Torvalds int offset, last_offset; 19961da177e4SLinus Torvalds int error; 19971da177e4SLinus Torvalds int agi_ok; 19981da177e4SLinus Torvalds 19991da177e4SLinus Torvalds /* 20001da177e4SLinus Torvalds * First pull the on-disk inode from the AGI unlinked list. 20011da177e4SLinus Torvalds */ 20021da177e4SLinus Torvalds mp = tp->t_mountp; 20031da177e4SLinus Torvalds 20041da177e4SLinus Torvalds agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 20051da177e4SLinus Torvalds agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); 20061da177e4SLinus Torvalds 20071da177e4SLinus Torvalds /* 20081da177e4SLinus Torvalds * Get the agi buffer first. It ensures lock ordering 20091da177e4SLinus Torvalds * on the list. 20101da177e4SLinus Torvalds */ 20111da177e4SLinus Torvalds error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 20121da177e4SLinus Torvalds XFS_FSS_TO_BB(mp, 1), 0, &agibp); 20131da177e4SLinus Torvalds if (error) { 20141da177e4SLinus Torvalds cmn_err(CE_WARN, 20151da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.", 20161da177e4SLinus Torvalds error, mp->m_fsname); 20171da177e4SLinus Torvalds return error; 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds /* 20201da177e4SLinus Torvalds * Validate the magic number of the agi block. 20211da177e4SLinus Torvalds */ 20221da177e4SLinus Torvalds agi = XFS_BUF_TO_AGI(agibp); 20231da177e4SLinus Torvalds agi_ok = 20241da177e4SLinus Torvalds INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC && 20251da177e4SLinus Torvalds XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT)); 20261da177e4SLinus Torvalds if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE, 20271da177e4SLinus Torvalds XFS_RANDOM_IUNLINK_REMOVE))) { 20281da177e4SLinus Torvalds XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW, 20291da177e4SLinus Torvalds mp, agi); 20301da177e4SLinus Torvalds xfs_trans_brelse(tp, agibp); 20311da177e4SLinus Torvalds cmn_err(CE_WARN, 20321da177e4SLinus Torvalds "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.", 20331da177e4SLinus Torvalds mp->m_fsname); 20341da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 20351da177e4SLinus Torvalds } 20361da177e4SLinus Torvalds /* 20371da177e4SLinus Torvalds * Get the index into the agi hash table for the 20381da177e4SLinus Torvalds * list this inode will go on. 20391da177e4SLinus Torvalds */ 20401da177e4SLinus Torvalds agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 20411da177e4SLinus Torvalds ASSERT(agino != 0); 20421da177e4SLinus Torvalds bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 20431da177e4SLinus Torvalds ASSERT(INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) != NULLAGINO); 20441da177e4SLinus Torvalds ASSERT(agi->agi_unlinked[bucket_index]); 20451da177e4SLinus Torvalds 20461da177e4SLinus Torvalds if (INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT) == agino) { 20471da177e4SLinus Torvalds /* 20481da177e4SLinus Torvalds * We're at the head of the list. Get the inode's 20491da177e4SLinus Torvalds * on-disk buffer to see if there is anyone after us 20501da177e4SLinus Torvalds * on the list. Only modify our next pointer if it 20511da177e4SLinus Torvalds * is not already NULLAGINO. This saves us the overhead 20521da177e4SLinus Torvalds * of dealing with the buffer when there is no need to 20531da177e4SLinus Torvalds * change it. 20541da177e4SLinus Torvalds */ 20551da177e4SLinus Torvalds error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); 20561da177e4SLinus Torvalds if (error) { 20571da177e4SLinus Torvalds cmn_err(CE_WARN, 20581da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 20591da177e4SLinus Torvalds error, mp->m_fsname); 20601da177e4SLinus Torvalds return error; 20611da177e4SLinus Torvalds } 20621da177e4SLinus Torvalds next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 20631da177e4SLinus Torvalds ASSERT(next_agino != 0); 20641da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 20651da177e4SLinus Torvalds INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 20661da177e4SLinus Torvalds offset = ip->i_boffset + 20671da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 20681da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 20691da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 20701da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20711da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 20721da177e4SLinus Torvalds } else { 20731da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 20741da177e4SLinus Torvalds } 20751da177e4SLinus Torvalds /* 20761da177e4SLinus Torvalds * Point the bucket head pointer at the next inode. 20771da177e4SLinus Torvalds */ 20781da177e4SLinus Torvalds ASSERT(next_agino != 0); 20791da177e4SLinus Torvalds ASSERT(next_agino != agino); 20801da177e4SLinus Torvalds INT_SET(agi->agi_unlinked[bucket_index], ARCH_CONVERT, next_agino); 20811da177e4SLinus Torvalds offset = offsetof(xfs_agi_t, agi_unlinked) + 20821da177e4SLinus Torvalds (sizeof(xfs_agino_t) * bucket_index); 20831da177e4SLinus Torvalds xfs_trans_log_buf(tp, agibp, offset, 20841da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 20851da177e4SLinus Torvalds } else { 20861da177e4SLinus Torvalds /* 20871da177e4SLinus Torvalds * We need to search the list for the inode being freed. 20881da177e4SLinus Torvalds */ 20891da177e4SLinus Torvalds next_agino = INT_GET(agi->agi_unlinked[bucket_index], ARCH_CONVERT); 20901da177e4SLinus Torvalds last_ibp = NULL; 20911da177e4SLinus Torvalds while (next_agino != agino) { 20921da177e4SLinus Torvalds /* 20931da177e4SLinus Torvalds * If the last inode wasn't the one pointing to 20941da177e4SLinus Torvalds * us, then release its buffer since we're not 20951da177e4SLinus Torvalds * going to do anything with it. 20961da177e4SLinus Torvalds */ 20971da177e4SLinus Torvalds if (last_ibp != NULL) { 20981da177e4SLinus Torvalds xfs_trans_brelse(tp, last_ibp); 20991da177e4SLinus Torvalds } 21001da177e4SLinus Torvalds next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 21011da177e4SLinus Torvalds error = xfs_inotobp(mp, tp, next_ino, &last_dip, 21021da177e4SLinus Torvalds &last_ibp, &last_offset); 21031da177e4SLinus Torvalds if (error) { 21041da177e4SLinus Torvalds cmn_err(CE_WARN, 21051da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 21061da177e4SLinus Torvalds error, mp->m_fsname); 21071da177e4SLinus Torvalds return error; 21081da177e4SLinus Torvalds } 21091da177e4SLinus Torvalds next_agino = INT_GET(last_dip->di_next_unlinked, ARCH_CONVERT); 21101da177e4SLinus Torvalds ASSERT(next_agino != NULLAGINO); 21111da177e4SLinus Torvalds ASSERT(next_agino != 0); 21121da177e4SLinus Torvalds } 21131da177e4SLinus Torvalds /* 21141da177e4SLinus Torvalds * Now last_ibp points to the buffer previous to us on 21151da177e4SLinus Torvalds * the unlinked list. Pull us from the list. 21161da177e4SLinus Torvalds */ 21171da177e4SLinus Torvalds error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0); 21181da177e4SLinus Torvalds if (error) { 21191da177e4SLinus Torvalds cmn_err(CE_WARN, 21201da177e4SLinus Torvalds "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 21211da177e4SLinus Torvalds error, mp->m_fsname); 21221da177e4SLinus Torvalds return error; 21231da177e4SLinus Torvalds } 21241da177e4SLinus Torvalds next_agino = INT_GET(dip->di_next_unlinked, ARCH_CONVERT); 21251da177e4SLinus Torvalds ASSERT(next_agino != 0); 21261da177e4SLinus Torvalds ASSERT(next_agino != agino); 21271da177e4SLinus Torvalds if (next_agino != NULLAGINO) { 21281da177e4SLinus Torvalds INT_SET(dip->di_next_unlinked, ARCH_CONVERT, NULLAGINO); 21291da177e4SLinus Torvalds offset = ip->i_boffset + 21301da177e4SLinus Torvalds offsetof(xfs_dinode_t, di_next_unlinked); 21311da177e4SLinus Torvalds xfs_trans_inode_buf(tp, ibp); 21321da177e4SLinus Torvalds xfs_trans_log_buf(tp, ibp, offset, 21331da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 21341da177e4SLinus Torvalds xfs_inobp_check(mp, ibp); 21351da177e4SLinus Torvalds } else { 21361da177e4SLinus Torvalds xfs_trans_brelse(tp, ibp); 21371da177e4SLinus Torvalds } 21381da177e4SLinus Torvalds /* 21391da177e4SLinus Torvalds * Point the previous inode on the list to the next inode. 21401da177e4SLinus Torvalds */ 21411da177e4SLinus Torvalds INT_SET(last_dip->di_next_unlinked, ARCH_CONVERT, next_agino); 21421da177e4SLinus Torvalds ASSERT(next_agino != 0); 21431da177e4SLinus Torvalds offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 21441da177e4SLinus Torvalds xfs_trans_inode_buf(tp, last_ibp); 21451da177e4SLinus Torvalds xfs_trans_log_buf(tp, last_ibp, offset, 21461da177e4SLinus Torvalds (offset + sizeof(xfs_agino_t) - 1)); 21471da177e4SLinus Torvalds xfs_inobp_check(mp, last_ibp); 21481da177e4SLinus Torvalds } 21491da177e4SLinus Torvalds return 0; 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds 21521da177e4SLinus Torvalds static __inline__ int xfs_inode_clean(xfs_inode_t *ip) 21531da177e4SLinus Torvalds { 21541da177e4SLinus Torvalds return (((ip->i_itemp == NULL) || 21551da177e4SLinus Torvalds !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && 21561da177e4SLinus Torvalds (ip->i_update_core == 0)); 21571da177e4SLinus Torvalds } 21581da177e4SLinus Torvalds 21591da177e4SLinus Torvalds void 21601da177e4SLinus Torvalds xfs_ifree_cluster( 21611da177e4SLinus Torvalds xfs_inode_t *free_ip, 21621da177e4SLinus Torvalds xfs_trans_t *tp, 21631da177e4SLinus Torvalds xfs_ino_t inum) 21641da177e4SLinus Torvalds { 21651da177e4SLinus Torvalds xfs_mount_t *mp = free_ip->i_mount; 21661da177e4SLinus Torvalds int blks_per_cluster; 21671da177e4SLinus Torvalds int nbufs; 21681da177e4SLinus Torvalds int ninodes; 21691da177e4SLinus Torvalds int i, j, found, pre_flushed; 21701da177e4SLinus Torvalds xfs_daddr_t blkno; 21711da177e4SLinus Torvalds xfs_buf_t *bp; 21721da177e4SLinus Torvalds xfs_ihash_t *ih; 21731da177e4SLinus Torvalds xfs_inode_t *ip, **ip_found; 21741da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 21751da177e4SLinus Torvalds xfs_log_item_t *lip; 21761da177e4SLinus Torvalds SPLDECL(s); 21771da177e4SLinus Torvalds 21781da177e4SLinus Torvalds if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { 21791da177e4SLinus Torvalds blks_per_cluster = 1; 21801da177e4SLinus Torvalds ninodes = mp->m_sb.sb_inopblock; 21811da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp); 21821da177e4SLinus Torvalds } else { 21831da177e4SLinus Torvalds blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / 21841da177e4SLinus Torvalds mp->m_sb.sb_blocksize; 21851da177e4SLinus Torvalds ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; 21861da177e4SLinus Torvalds nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; 21871da177e4SLinus Torvalds } 21881da177e4SLinus Torvalds 21891da177e4SLinus Torvalds ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS); 21901da177e4SLinus Torvalds 21911da177e4SLinus Torvalds for (j = 0; j < nbufs; j++, inum += ninodes) { 21921da177e4SLinus Torvalds blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 21931da177e4SLinus Torvalds XFS_INO_TO_AGBNO(mp, inum)); 21941da177e4SLinus Torvalds 21951da177e4SLinus Torvalds 21961da177e4SLinus Torvalds /* 21971da177e4SLinus Torvalds * Look for each inode in memory and attempt to lock it, 21981da177e4SLinus Torvalds * we can be racing with flush and tail pushing here. 21991da177e4SLinus Torvalds * any inode we get the locks on, add to an array of 22001da177e4SLinus Torvalds * inode items to process later. 22011da177e4SLinus Torvalds * 22021da177e4SLinus Torvalds * The get the buffer lock, we could beat a flush 22031da177e4SLinus Torvalds * or tail pushing thread to the lock here, in which 22041da177e4SLinus Torvalds * case they will go looking for the inode buffer 22051da177e4SLinus Torvalds * and fail, we need some other form of interlock 22061da177e4SLinus Torvalds * here. 22071da177e4SLinus Torvalds */ 22081da177e4SLinus Torvalds found = 0; 22091da177e4SLinus Torvalds for (i = 0; i < ninodes; i++) { 22101da177e4SLinus Torvalds ih = XFS_IHASH(mp, inum + i); 22111da177e4SLinus Torvalds read_lock(&ih->ih_lock); 22121da177e4SLinus Torvalds for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { 22131da177e4SLinus Torvalds if (ip->i_ino == inum + i) 22141da177e4SLinus Torvalds break; 22151da177e4SLinus Torvalds } 22161da177e4SLinus Torvalds 22171da177e4SLinus Torvalds /* Inode not in memory or we found it already, 22181da177e4SLinus Torvalds * nothing to do 22191da177e4SLinus Torvalds */ 22201da177e4SLinus Torvalds if (!ip || (ip->i_flags & XFS_ISTALE)) { 22211da177e4SLinus Torvalds read_unlock(&ih->ih_lock); 22221da177e4SLinus Torvalds continue; 22231da177e4SLinus Torvalds } 22241da177e4SLinus Torvalds 22251da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 22261da177e4SLinus Torvalds read_unlock(&ih->ih_lock); 22271da177e4SLinus Torvalds continue; 22281da177e4SLinus Torvalds } 22291da177e4SLinus Torvalds 22301da177e4SLinus Torvalds /* If we can get the locks then add it to the 22311da177e4SLinus Torvalds * list, otherwise by the time we get the bp lock 22321da177e4SLinus Torvalds * below it will already be attached to the 22331da177e4SLinus Torvalds * inode buffer. 22341da177e4SLinus Torvalds */ 22351da177e4SLinus Torvalds 22361da177e4SLinus Torvalds /* This inode will already be locked - by us, lets 22371da177e4SLinus Torvalds * keep it that way. 22381da177e4SLinus Torvalds */ 22391da177e4SLinus Torvalds 22401da177e4SLinus Torvalds if (ip == free_ip) { 22411da177e4SLinus Torvalds if (xfs_iflock_nowait(ip)) { 22421da177e4SLinus Torvalds ip->i_flags |= XFS_ISTALE; 22431da177e4SLinus Torvalds 22441da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 22451da177e4SLinus Torvalds xfs_ifunlock(ip); 22461da177e4SLinus Torvalds } else { 22471da177e4SLinus Torvalds ip_found[found++] = ip; 22481da177e4SLinus Torvalds } 22491da177e4SLinus Torvalds } 22501da177e4SLinus Torvalds read_unlock(&ih->ih_lock); 22511da177e4SLinus Torvalds continue; 22521da177e4SLinus Torvalds } 22531da177e4SLinus Torvalds 22541da177e4SLinus Torvalds if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 22551da177e4SLinus Torvalds if (xfs_iflock_nowait(ip)) { 22561da177e4SLinus Torvalds ip->i_flags |= XFS_ISTALE; 22571da177e4SLinus Torvalds 22581da177e4SLinus Torvalds if (xfs_inode_clean(ip)) { 22591da177e4SLinus Torvalds xfs_ifunlock(ip); 22601da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22611da177e4SLinus Torvalds } else { 22621da177e4SLinus Torvalds ip_found[found++] = ip; 22631da177e4SLinus Torvalds } 22641da177e4SLinus Torvalds } else { 22651da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 22661da177e4SLinus Torvalds } 22671da177e4SLinus Torvalds } 22681da177e4SLinus Torvalds 22691da177e4SLinus Torvalds read_unlock(&ih->ih_lock); 22701da177e4SLinus Torvalds } 22711da177e4SLinus Torvalds 22721da177e4SLinus Torvalds bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 22731da177e4SLinus Torvalds mp->m_bsize * blks_per_cluster, 22741da177e4SLinus Torvalds XFS_BUF_LOCK); 22751da177e4SLinus Torvalds 22761da177e4SLinus Torvalds pre_flushed = 0; 22771da177e4SLinus Torvalds lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 22781da177e4SLinus Torvalds while (lip) { 22791da177e4SLinus Torvalds if (lip->li_type == XFS_LI_INODE) { 22801da177e4SLinus Torvalds iip = (xfs_inode_log_item_t *)lip; 22811da177e4SLinus Torvalds ASSERT(iip->ili_logged == 1); 22821da177e4SLinus Torvalds lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 22831da177e4SLinus Torvalds AIL_LOCK(mp,s); 22841da177e4SLinus Torvalds iip->ili_flush_lsn = iip->ili_item.li_lsn; 22851da177e4SLinus Torvalds AIL_UNLOCK(mp, s); 22861da177e4SLinus Torvalds iip->ili_inode->i_flags |= XFS_ISTALE; 22871da177e4SLinus Torvalds pre_flushed++; 22881da177e4SLinus Torvalds } 22891da177e4SLinus Torvalds lip = lip->li_bio_list; 22901da177e4SLinus Torvalds } 22911da177e4SLinus Torvalds 22921da177e4SLinus Torvalds for (i = 0; i < found; i++) { 22931da177e4SLinus Torvalds ip = ip_found[i]; 22941da177e4SLinus Torvalds iip = ip->i_itemp; 22951da177e4SLinus Torvalds 22961da177e4SLinus Torvalds if (!iip) { 22971da177e4SLinus Torvalds ip->i_update_core = 0; 22981da177e4SLinus Torvalds xfs_ifunlock(ip); 22991da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 23001da177e4SLinus Torvalds continue; 23011da177e4SLinus Torvalds } 23021da177e4SLinus Torvalds 23031da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 23041da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 23051da177e4SLinus Torvalds iip->ili_logged = 1; 23061da177e4SLinus Torvalds AIL_LOCK(mp,s); 23071da177e4SLinus Torvalds iip->ili_flush_lsn = iip->ili_item.li_lsn; 23081da177e4SLinus Torvalds AIL_UNLOCK(mp, s); 23091da177e4SLinus Torvalds 23101da177e4SLinus Torvalds xfs_buf_attach_iodone(bp, 23111da177e4SLinus Torvalds (void(*)(xfs_buf_t*,xfs_log_item_t*)) 23121da177e4SLinus Torvalds xfs_istale_done, (xfs_log_item_t *)iip); 23131da177e4SLinus Torvalds if (ip != free_ip) { 23141da177e4SLinus Torvalds xfs_iunlock(ip, XFS_ILOCK_EXCL); 23151da177e4SLinus Torvalds } 23161da177e4SLinus Torvalds } 23171da177e4SLinus Torvalds 23181da177e4SLinus Torvalds if (found || pre_flushed) 23191da177e4SLinus Torvalds xfs_trans_stale_inode_buf(tp, bp); 23201da177e4SLinus Torvalds xfs_trans_binval(tp, bp); 23211da177e4SLinus Torvalds } 23221da177e4SLinus Torvalds 23231da177e4SLinus Torvalds kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 23241da177e4SLinus Torvalds } 23251da177e4SLinus Torvalds 23261da177e4SLinus Torvalds /* 23271da177e4SLinus Torvalds * This is called to return an inode to the inode free list. 23281da177e4SLinus Torvalds * The inode should already be truncated to 0 length and have 23291da177e4SLinus Torvalds * no pages associated with it. This routine also assumes that 23301da177e4SLinus Torvalds * the inode is already a part of the transaction. 23311da177e4SLinus Torvalds * 23321da177e4SLinus Torvalds * The on-disk copy of the inode will have been added to the list 23331da177e4SLinus Torvalds * of unlinked inodes in the AGI. We need to remove the inode from 23341da177e4SLinus Torvalds * that list atomically with respect to freeing it here. 23351da177e4SLinus Torvalds */ 23361da177e4SLinus Torvalds int 23371da177e4SLinus Torvalds xfs_ifree( 23381da177e4SLinus Torvalds xfs_trans_t *tp, 23391da177e4SLinus Torvalds xfs_inode_t *ip, 23401da177e4SLinus Torvalds xfs_bmap_free_t *flist) 23411da177e4SLinus Torvalds { 23421da177e4SLinus Torvalds int error; 23431da177e4SLinus Torvalds int delete; 23441da177e4SLinus Torvalds xfs_ino_t first_ino; 23451da177e4SLinus Torvalds 23461da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 23471da177e4SLinus Torvalds ASSERT(ip->i_transp == tp); 23481da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink == 0); 23491da177e4SLinus Torvalds ASSERT(ip->i_d.di_nextents == 0); 23501da177e4SLinus Torvalds ASSERT(ip->i_d.di_anextents == 0); 23511da177e4SLinus Torvalds ASSERT((ip->i_d.di_size == 0) || 23521da177e4SLinus Torvalds ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 23531da177e4SLinus Torvalds ASSERT(ip->i_d.di_nblocks == 0); 23541da177e4SLinus Torvalds 23551da177e4SLinus Torvalds /* 23561da177e4SLinus Torvalds * Pull the on-disk inode from the AGI unlinked list. 23571da177e4SLinus Torvalds */ 23581da177e4SLinus Torvalds error = xfs_iunlink_remove(tp, ip); 23591da177e4SLinus Torvalds if (error != 0) { 23601da177e4SLinus Torvalds return error; 23611da177e4SLinus Torvalds } 23621da177e4SLinus Torvalds 23631da177e4SLinus Torvalds error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 23641da177e4SLinus Torvalds if (error != 0) { 23651da177e4SLinus Torvalds return error; 23661da177e4SLinus Torvalds } 23671da177e4SLinus Torvalds ip->i_d.di_mode = 0; /* mark incore inode as free */ 23681da177e4SLinus Torvalds ip->i_d.di_flags = 0; 23691da177e4SLinus Torvalds ip->i_d.di_dmevmask = 0; 23701da177e4SLinus Torvalds ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ 23711da177e4SLinus Torvalds ip->i_df.if_ext_max = 23721da177e4SLinus Torvalds XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 23731da177e4SLinus Torvalds ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; 23741da177e4SLinus Torvalds ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 23751da177e4SLinus Torvalds /* 23761da177e4SLinus Torvalds * Bump the generation count so no one will be confused 23771da177e4SLinus Torvalds * by reincarnations of this inode. 23781da177e4SLinus Torvalds */ 23791da177e4SLinus Torvalds ip->i_d.di_gen++; 23801da177e4SLinus Torvalds xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 23811da177e4SLinus Torvalds 23821da177e4SLinus Torvalds if (delete) { 23831da177e4SLinus Torvalds xfs_ifree_cluster(ip, tp, first_ino); 23841da177e4SLinus Torvalds } 23851da177e4SLinus Torvalds 23861da177e4SLinus Torvalds return 0; 23871da177e4SLinus Torvalds } 23881da177e4SLinus Torvalds 23891da177e4SLinus Torvalds /* 23901da177e4SLinus Torvalds * Reallocate the space for if_broot based on the number of records 23911da177e4SLinus Torvalds * being added or deleted as indicated in rec_diff. Move the records 23921da177e4SLinus Torvalds * and pointers in if_broot to fit the new size. When shrinking this 23931da177e4SLinus Torvalds * will eliminate holes between the records and pointers created by 23941da177e4SLinus Torvalds * the caller. When growing this will create holes to be filled in 23951da177e4SLinus Torvalds * by the caller. 23961da177e4SLinus Torvalds * 23971da177e4SLinus Torvalds * The caller must not request to add more records than would fit in 23981da177e4SLinus Torvalds * the on-disk inode root. If the if_broot is currently NULL, then 23991da177e4SLinus Torvalds * if we adding records one will be allocated. The caller must also 24001da177e4SLinus Torvalds * not request that the number of records go below zero, although 24011da177e4SLinus Torvalds * it can go to zero. 24021da177e4SLinus Torvalds * 24031da177e4SLinus Torvalds * ip -- the inode whose if_broot area is changing 24041da177e4SLinus Torvalds * ext_diff -- the change in the number of records, positive or negative, 24051da177e4SLinus Torvalds * requested for the if_broot array. 24061da177e4SLinus Torvalds */ 24071da177e4SLinus Torvalds void 24081da177e4SLinus Torvalds xfs_iroot_realloc( 24091da177e4SLinus Torvalds xfs_inode_t *ip, 24101da177e4SLinus Torvalds int rec_diff, 24111da177e4SLinus Torvalds int whichfork) 24121da177e4SLinus Torvalds { 24131da177e4SLinus Torvalds int cur_max; 24141da177e4SLinus Torvalds xfs_ifork_t *ifp; 24151da177e4SLinus Torvalds xfs_bmbt_block_t *new_broot; 24161da177e4SLinus Torvalds int new_max; 24171da177e4SLinus Torvalds size_t new_size; 24181da177e4SLinus Torvalds char *np; 24191da177e4SLinus Torvalds char *op; 24201da177e4SLinus Torvalds 24211da177e4SLinus Torvalds /* 24221da177e4SLinus Torvalds * Handle the degenerate case quietly. 24231da177e4SLinus Torvalds */ 24241da177e4SLinus Torvalds if (rec_diff == 0) { 24251da177e4SLinus Torvalds return; 24261da177e4SLinus Torvalds } 24271da177e4SLinus Torvalds 24281da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 24291da177e4SLinus Torvalds if (rec_diff > 0) { 24301da177e4SLinus Torvalds /* 24311da177e4SLinus Torvalds * If there wasn't any memory allocated before, just 24321da177e4SLinus Torvalds * allocate it now and get out. 24331da177e4SLinus Torvalds */ 24341da177e4SLinus Torvalds if (ifp->if_broot_bytes == 0) { 24351da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 24361da177e4SLinus Torvalds ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 24371da177e4SLinus Torvalds KM_SLEEP); 24381da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 24391da177e4SLinus Torvalds return; 24401da177e4SLinus Torvalds } 24411da177e4SLinus Torvalds 24421da177e4SLinus Torvalds /* 24431da177e4SLinus Torvalds * If there is already an existing if_broot, then we need 24441da177e4SLinus Torvalds * to realloc() it and shift the pointers to their new 24451da177e4SLinus Torvalds * location. The records don't change location because 24461da177e4SLinus Torvalds * they are kept butted up against the btree block header. 24471da177e4SLinus Torvalds */ 24481da177e4SLinus Torvalds cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 24491da177e4SLinus Torvalds new_max = cur_max + rec_diff; 24501da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 24511da177e4SLinus Torvalds ifp->if_broot = (xfs_bmbt_block_t *) 24521da177e4SLinus Torvalds kmem_realloc(ifp->if_broot, 24531da177e4SLinus Torvalds new_size, 24541da177e4SLinus Torvalds (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 24551da177e4SLinus Torvalds KM_SLEEP); 24561da177e4SLinus Torvalds op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 24571da177e4SLinus Torvalds ifp->if_broot_bytes); 24581da177e4SLinus Torvalds np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 24591da177e4SLinus Torvalds (int)new_size); 24601da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 24611da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 24621da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 24631da177e4SLinus Torvalds memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 24641da177e4SLinus Torvalds return; 24651da177e4SLinus Torvalds } 24661da177e4SLinus Torvalds 24671da177e4SLinus Torvalds /* 24681da177e4SLinus Torvalds * rec_diff is less than 0. In this case, we are shrinking the 24691da177e4SLinus Torvalds * if_broot buffer. It must already exist. If we go to zero 24701da177e4SLinus Torvalds * records, just get rid of the root and clear the status bit. 24711da177e4SLinus Torvalds */ 24721da177e4SLinus Torvalds ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 24731da177e4SLinus Torvalds cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 24741da177e4SLinus Torvalds new_max = cur_max + rec_diff; 24751da177e4SLinus Torvalds ASSERT(new_max >= 0); 24761da177e4SLinus Torvalds if (new_max > 0) 24771da177e4SLinus Torvalds new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 24781da177e4SLinus Torvalds else 24791da177e4SLinus Torvalds new_size = 0; 24801da177e4SLinus Torvalds if (new_size > 0) { 24811da177e4SLinus Torvalds new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 24821da177e4SLinus Torvalds /* 24831da177e4SLinus Torvalds * First copy over the btree block header. 24841da177e4SLinus Torvalds */ 24851da177e4SLinus Torvalds memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 24861da177e4SLinus Torvalds } else { 24871da177e4SLinus Torvalds new_broot = NULL; 24881da177e4SLinus Torvalds ifp->if_flags &= ~XFS_IFBROOT; 24891da177e4SLinus Torvalds } 24901da177e4SLinus Torvalds 24911da177e4SLinus Torvalds /* 24921da177e4SLinus Torvalds * Only copy the records and pointers if there are any. 24931da177e4SLinus Torvalds */ 24941da177e4SLinus Torvalds if (new_max > 0) { 24951da177e4SLinus Torvalds /* 24961da177e4SLinus Torvalds * First copy the records. 24971da177e4SLinus Torvalds */ 24981da177e4SLinus Torvalds op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 24991da177e4SLinus Torvalds ifp->if_broot_bytes); 25001da177e4SLinus Torvalds np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, 25011da177e4SLinus Torvalds (int)new_size); 25021da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 25031da177e4SLinus Torvalds 25041da177e4SLinus Torvalds /* 25051da177e4SLinus Torvalds * Then copy the pointers. 25061da177e4SLinus Torvalds */ 25071da177e4SLinus Torvalds op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 25081da177e4SLinus Torvalds ifp->if_broot_bytes); 25091da177e4SLinus Torvalds np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 25101da177e4SLinus Torvalds (int)new_size); 25111da177e4SLinus Torvalds memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 25121da177e4SLinus Torvalds } 25131da177e4SLinus Torvalds kmem_free(ifp->if_broot, ifp->if_broot_bytes); 25141da177e4SLinus Torvalds ifp->if_broot = new_broot; 25151da177e4SLinus Torvalds ifp->if_broot_bytes = (int)new_size; 25161da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 25171da177e4SLinus Torvalds XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 25181da177e4SLinus Torvalds return; 25191da177e4SLinus Torvalds } 25201da177e4SLinus Torvalds 25211da177e4SLinus Torvalds 25221da177e4SLinus Torvalds /* 25231da177e4SLinus Torvalds * This is called when the amount of space needed for if_extents 25241da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 25251da177e4SLinus Torvalds * the number of extents that need to be added or deleted in the 25261da177e4SLinus Torvalds * ext_diff parameter. 25271da177e4SLinus Torvalds * 25281da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 25291da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 25301da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 25311da177e4SLinus Torvalds * to what is needed. 25321da177e4SLinus Torvalds * 25331da177e4SLinus Torvalds * ip -- the inode whose if_extents area is changing 25341da177e4SLinus Torvalds * ext_diff -- the change in the number of extents, positive or negative, 25351da177e4SLinus Torvalds * requested for the if_extents array. 25361da177e4SLinus Torvalds */ 25371da177e4SLinus Torvalds void 25381da177e4SLinus Torvalds xfs_iext_realloc( 25391da177e4SLinus Torvalds xfs_inode_t *ip, 25401da177e4SLinus Torvalds int ext_diff, 25411da177e4SLinus Torvalds int whichfork) 25421da177e4SLinus Torvalds { 25431da177e4SLinus Torvalds int byte_diff; 25441da177e4SLinus Torvalds xfs_ifork_t *ifp; 25451da177e4SLinus Torvalds int new_size; 25461da177e4SLinus Torvalds uint rnew_size; 25471da177e4SLinus Torvalds 25481da177e4SLinus Torvalds if (ext_diff == 0) { 25491da177e4SLinus Torvalds return; 25501da177e4SLinus Torvalds } 25511da177e4SLinus Torvalds 25521da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 25531da177e4SLinus Torvalds byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t); 25541da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 25551da177e4SLinus Torvalds ASSERT(new_size >= 0); 25561da177e4SLinus Torvalds 25571da177e4SLinus Torvalds if (new_size == 0) { 25581da177e4SLinus Torvalds if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { 25591da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 25601da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 25611da177e4SLinus Torvalds } 25621da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 25631da177e4SLinus Torvalds rnew_size = 0; 25641da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) { 25651da177e4SLinus Torvalds /* 25661da177e4SLinus Torvalds * If the valid extents can fit in if_inline_ext, 25671da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 25681da177e4SLinus Torvalds */ 25691da177e4SLinus Torvalds if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { 25701da177e4SLinus Torvalds /* 25711da177e4SLinus Torvalds * For now, empty files are format EXTENTS, 25721da177e4SLinus Torvalds * so the if_extents pointer is null. 25731da177e4SLinus Torvalds */ 25741da177e4SLinus Torvalds if (ifp->if_u1.if_extents) { 25751da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_ext, 25761da177e4SLinus Torvalds ifp->if_u1.if_extents, new_size); 25771da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_extents, 25781da177e4SLinus Torvalds ifp->if_real_bytes); 25791da177e4SLinus Torvalds } 25801da177e4SLinus Torvalds ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 25811da177e4SLinus Torvalds } 25821da177e4SLinus Torvalds rnew_size = 0; 25831da177e4SLinus Torvalds } else { 25841da177e4SLinus Torvalds rnew_size = new_size; 25851da177e4SLinus Torvalds if ((rnew_size & (rnew_size - 1)) != 0) 25861da177e4SLinus Torvalds rnew_size = xfs_iroundup(rnew_size); 25871da177e4SLinus Torvalds /* 25881da177e4SLinus Torvalds * Stuck with malloc/realloc. 25891da177e4SLinus Torvalds */ 25901da177e4SLinus Torvalds if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) { 25911da177e4SLinus Torvalds ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 25921da177e4SLinus Torvalds kmem_alloc(rnew_size, KM_SLEEP); 25931da177e4SLinus Torvalds memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, 25941da177e4SLinus Torvalds sizeof(ifp->if_u2.if_inline_ext)); 25951da177e4SLinus Torvalds } else if (rnew_size != ifp->if_real_bytes) { 25961da177e4SLinus Torvalds ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) 25971da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_extents, 25981da177e4SLinus Torvalds rnew_size, 25991da177e4SLinus Torvalds ifp->if_real_bytes, 26001da177e4SLinus Torvalds KM_NOFS); 26011da177e4SLinus Torvalds } 26021da177e4SLinus Torvalds } 26031da177e4SLinus Torvalds ifp->if_real_bytes = rnew_size; 26041da177e4SLinus Torvalds ifp->if_bytes = new_size; 26051da177e4SLinus Torvalds } 26061da177e4SLinus Torvalds 26071da177e4SLinus Torvalds 26081da177e4SLinus Torvalds /* 26091da177e4SLinus Torvalds * This is called when the amount of space needed for if_data 26101da177e4SLinus Torvalds * is increased or decreased. The change in size is indicated by 26111da177e4SLinus Torvalds * the number of bytes that need to be added or deleted in the 26121da177e4SLinus Torvalds * byte_diff parameter. 26131da177e4SLinus Torvalds * 26141da177e4SLinus Torvalds * If the amount of space needed has decreased below the size of the 26151da177e4SLinus Torvalds * inline buffer, then switch to using the inline buffer. Otherwise, 26161da177e4SLinus Torvalds * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer 26171da177e4SLinus Torvalds * to what is needed. 26181da177e4SLinus Torvalds * 26191da177e4SLinus Torvalds * ip -- the inode whose if_data area is changing 26201da177e4SLinus Torvalds * byte_diff -- the change in the number of bytes, positive or negative, 26211da177e4SLinus Torvalds * requested for the if_data array. 26221da177e4SLinus Torvalds */ 26231da177e4SLinus Torvalds void 26241da177e4SLinus Torvalds xfs_idata_realloc( 26251da177e4SLinus Torvalds xfs_inode_t *ip, 26261da177e4SLinus Torvalds int byte_diff, 26271da177e4SLinus Torvalds int whichfork) 26281da177e4SLinus Torvalds { 26291da177e4SLinus Torvalds xfs_ifork_t *ifp; 26301da177e4SLinus Torvalds int new_size; 26311da177e4SLinus Torvalds int real_size; 26321da177e4SLinus Torvalds 26331da177e4SLinus Torvalds if (byte_diff == 0) { 26341da177e4SLinus Torvalds return; 26351da177e4SLinus Torvalds } 26361da177e4SLinus Torvalds 26371da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 26381da177e4SLinus Torvalds new_size = (int)ifp->if_bytes + byte_diff; 26391da177e4SLinus Torvalds ASSERT(new_size >= 0); 26401da177e4SLinus Torvalds 26411da177e4SLinus Torvalds if (new_size == 0) { 26421da177e4SLinus Torvalds if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 26431da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 26441da177e4SLinus Torvalds } 26451da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 26461da177e4SLinus Torvalds real_size = 0; 26471da177e4SLinus Torvalds } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { 26481da177e4SLinus Torvalds /* 26491da177e4SLinus Torvalds * If the valid extents/data can fit in if_inline_ext/data, 26501da177e4SLinus Torvalds * copy them from the malloc'd vector and free it. 26511da177e4SLinus Torvalds */ 26521da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 26531da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 26541da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 26551da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 26561da177e4SLinus Torvalds memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 26571da177e4SLinus Torvalds new_size); 26581da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 26591da177e4SLinus Torvalds ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 26601da177e4SLinus Torvalds } 26611da177e4SLinus Torvalds real_size = 0; 26621da177e4SLinus Torvalds } else { 26631da177e4SLinus Torvalds /* 26641da177e4SLinus Torvalds * Stuck with malloc/realloc. 26651da177e4SLinus Torvalds * For inline data, the underlying buffer must be 26661da177e4SLinus Torvalds * a multiple of 4 bytes in size so that it can be 26671da177e4SLinus Torvalds * logged and stay on word boundaries. We enforce 26681da177e4SLinus Torvalds * that here. 26691da177e4SLinus Torvalds */ 26701da177e4SLinus Torvalds real_size = roundup(new_size, 4); 26711da177e4SLinus Torvalds if (ifp->if_u1.if_data == NULL) { 26721da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 26731da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 26741da177e4SLinus Torvalds } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 26751da177e4SLinus Torvalds /* 26761da177e4SLinus Torvalds * Only do the realloc if the underlying size 26771da177e4SLinus Torvalds * is really changing. 26781da177e4SLinus Torvalds */ 26791da177e4SLinus Torvalds if (ifp->if_real_bytes != real_size) { 26801da177e4SLinus Torvalds ifp->if_u1.if_data = 26811da177e4SLinus Torvalds kmem_realloc(ifp->if_u1.if_data, 26821da177e4SLinus Torvalds real_size, 26831da177e4SLinus Torvalds ifp->if_real_bytes, 26841da177e4SLinus Torvalds KM_SLEEP); 26851da177e4SLinus Torvalds } 26861da177e4SLinus Torvalds } else { 26871da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 26881da177e4SLinus Torvalds ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); 26891da177e4SLinus Torvalds memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, 26901da177e4SLinus Torvalds ifp->if_bytes); 26911da177e4SLinus Torvalds } 26921da177e4SLinus Torvalds } 26931da177e4SLinus Torvalds ifp->if_real_bytes = real_size; 26941da177e4SLinus Torvalds ifp->if_bytes = new_size; 26951da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 26961da177e4SLinus Torvalds } 26971da177e4SLinus Torvalds 26981da177e4SLinus Torvalds 26991da177e4SLinus Torvalds 27001da177e4SLinus Torvalds 27011da177e4SLinus Torvalds /* 27021da177e4SLinus Torvalds * Map inode to disk block and offset. 27031da177e4SLinus Torvalds * 27041da177e4SLinus Torvalds * mp -- the mount point structure for the current file system 27051da177e4SLinus Torvalds * tp -- the current transaction 27061da177e4SLinus Torvalds * ino -- the inode number of the inode to be located 27071da177e4SLinus Torvalds * imap -- this structure is filled in with the information necessary 27081da177e4SLinus Torvalds * to retrieve the given inode from disk 27091da177e4SLinus Torvalds * flags -- flags to pass to xfs_dilocate indicating whether or not 27101da177e4SLinus Torvalds * lookups in the inode btree were OK or not 27111da177e4SLinus Torvalds */ 27121da177e4SLinus Torvalds int 27131da177e4SLinus Torvalds xfs_imap( 27141da177e4SLinus Torvalds xfs_mount_t *mp, 27151da177e4SLinus Torvalds xfs_trans_t *tp, 27161da177e4SLinus Torvalds xfs_ino_t ino, 27171da177e4SLinus Torvalds xfs_imap_t *imap, 27181da177e4SLinus Torvalds uint flags) 27191da177e4SLinus Torvalds { 27201da177e4SLinus Torvalds xfs_fsblock_t fsbno; 27211da177e4SLinus Torvalds int len; 27221da177e4SLinus Torvalds int off; 27231da177e4SLinus Torvalds int error; 27241da177e4SLinus Torvalds 27251da177e4SLinus Torvalds fsbno = imap->im_blkno ? 27261da177e4SLinus Torvalds XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; 27271da177e4SLinus Torvalds error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); 27281da177e4SLinus Torvalds if (error != 0) { 27291da177e4SLinus Torvalds return error; 27301da177e4SLinus Torvalds } 27311da177e4SLinus Torvalds imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); 27321da177e4SLinus Torvalds imap->im_len = XFS_FSB_TO_BB(mp, len); 27331da177e4SLinus Torvalds imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); 27341da177e4SLinus Torvalds imap->im_ioffset = (ushort)off; 27351da177e4SLinus Torvalds imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); 27361da177e4SLinus Torvalds return 0; 27371da177e4SLinus Torvalds } 27381da177e4SLinus Torvalds 27391da177e4SLinus Torvalds void 27401da177e4SLinus Torvalds xfs_idestroy_fork( 27411da177e4SLinus Torvalds xfs_inode_t *ip, 27421da177e4SLinus Torvalds int whichfork) 27431da177e4SLinus Torvalds { 27441da177e4SLinus Torvalds xfs_ifork_t *ifp; 27451da177e4SLinus Torvalds 27461da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 27471da177e4SLinus Torvalds if (ifp->if_broot != NULL) { 27481da177e4SLinus Torvalds kmem_free(ifp->if_broot, ifp->if_broot_bytes); 27491da177e4SLinus Torvalds ifp->if_broot = NULL; 27501da177e4SLinus Torvalds } 27511da177e4SLinus Torvalds 27521da177e4SLinus Torvalds /* 27531da177e4SLinus Torvalds * If the format is local, then we can't have an extents 27541da177e4SLinus Torvalds * array so just look for an inline data array. If we're 27551da177e4SLinus Torvalds * not local then we may or may not have an extents list, 27561da177e4SLinus Torvalds * so check and free it up if we do. 27571da177e4SLinus Torvalds */ 27581da177e4SLinus Torvalds if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 27591da177e4SLinus Torvalds if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 27601da177e4SLinus Torvalds (ifp->if_u1.if_data != NULL)) { 27611da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 27621da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 27631da177e4SLinus Torvalds ifp->if_u1.if_data = NULL; 27641da177e4SLinus Torvalds ifp->if_real_bytes = 0; 27651da177e4SLinus Torvalds } 27661da177e4SLinus Torvalds } else if ((ifp->if_flags & XFS_IFEXTENTS) && 27671da177e4SLinus Torvalds (ifp->if_u1.if_extents != NULL) && 27681da177e4SLinus Torvalds (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { 27691da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes != 0); 27701da177e4SLinus Torvalds kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 27711da177e4SLinus Torvalds ifp->if_u1.if_extents = NULL; 27721da177e4SLinus Torvalds ifp->if_real_bytes = 0; 27731da177e4SLinus Torvalds } 27741da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_extents == NULL || 27751da177e4SLinus Torvalds ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); 27761da177e4SLinus Torvalds ASSERT(ifp->if_real_bytes == 0); 27771da177e4SLinus Torvalds if (whichfork == XFS_ATTR_FORK) { 27781da177e4SLinus Torvalds kmem_zone_free(xfs_ifork_zone, ip->i_afp); 27791da177e4SLinus Torvalds ip->i_afp = NULL; 27801da177e4SLinus Torvalds } 27811da177e4SLinus Torvalds } 27821da177e4SLinus Torvalds 27831da177e4SLinus Torvalds /* 27841da177e4SLinus Torvalds * This is called free all the memory associated with an inode. 27851da177e4SLinus Torvalds * It must free the inode itself and any buffers allocated for 27861da177e4SLinus Torvalds * if_extents/if_data and if_broot. It must also free the lock 27871da177e4SLinus Torvalds * associated with the inode. 27881da177e4SLinus Torvalds */ 27891da177e4SLinus Torvalds void 27901da177e4SLinus Torvalds xfs_idestroy( 27911da177e4SLinus Torvalds xfs_inode_t *ip) 27921da177e4SLinus Torvalds { 27931da177e4SLinus Torvalds 27941da177e4SLinus Torvalds switch (ip->i_d.di_mode & S_IFMT) { 27951da177e4SLinus Torvalds case S_IFREG: 27961da177e4SLinus Torvalds case S_IFDIR: 27971da177e4SLinus Torvalds case S_IFLNK: 27981da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_DATA_FORK); 27991da177e4SLinus Torvalds break; 28001da177e4SLinus Torvalds } 28011da177e4SLinus Torvalds if (ip->i_afp) 28021da177e4SLinus Torvalds xfs_idestroy_fork(ip, XFS_ATTR_FORK); 28031da177e4SLinus Torvalds mrfree(&ip->i_lock); 28041da177e4SLinus Torvalds mrfree(&ip->i_iolock); 28051da177e4SLinus Torvalds freesema(&ip->i_flock); 28061da177e4SLinus Torvalds #ifdef XFS_BMAP_TRACE 28071da177e4SLinus Torvalds ktrace_free(ip->i_xtrace); 28081da177e4SLinus Torvalds #endif 28091da177e4SLinus Torvalds #ifdef XFS_BMBT_TRACE 28101da177e4SLinus Torvalds ktrace_free(ip->i_btrace); 28111da177e4SLinus Torvalds #endif 28121da177e4SLinus Torvalds #ifdef XFS_RW_TRACE 28131da177e4SLinus Torvalds ktrace_free(ip->i_rwtrace); 28141da177e4SLinus Torvalds #endif 28151da177e4SLinus Torvalds #ifdef XFS_ILOCK_TRACE 28161da177e4SLinus Torvalds ktrace_free(ip->i_lock_trace); 28171da177e4SLinus Torvalds #endif 28181da177e4SLinus Torvalds #ifdef XFS_DIR2_TRACE 28191da177e4SLinus Torvalds ktrace_free(ip->i_dir_trace); 28201da177e4SLinus Torvalds #endif 28211da177e4SLinus Torvalds if (ip->i_itemp) { 28221da177e4SLinus Torvalds /* XXXdpd should be able to assert this but shutdown 28231da177e4SLinus Torvalds * is leaving the AIL behind. */ 28241da177e4SLinus Torvalds ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) || 28251da177e4SLinus Torvalds XFS_FORCED_SHUTDOWN(ip->i_mount)); 28261da177e4SLinus Torvalds xfs_inode_item_destroy(ip); 28271da177e4SLinus Torvalds } 28281da177e4SLinus Torvalds kmem_zone_free(xfs_inode_zone, ip); 28291da177e4SLinus Torvalds } 28301da177e4SLinus Torvalds 28311da177e4SLinus Torvalds 28321da177e4SLinus Torvalds /* 28331da177e4SLinus Torvalds * Increment the pin count of the given buffer. 28341da177e4SLinus Torvalds * This value is protected by ipinlock spinlock in the mount structure. 28351da177e4SLinus Torvalds */ 28361da177e4SLinus Torvalds void 28371da177e4SLinus Torvalds xfs_ipin( 28381da177e4SLinus Torvalds xfs_inode_t *ip) 28391da177e4SLinus Torvalds { 28401da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 28411da177e4SLinus Torvalds 28421da177e4SLinus Torvalds atomic_inc(&ip->i_pincount); 28431da177e4SLinus Torvalds } 28441da177e4SLinus Torvalds 28451da177e4SLinus Torvalds /* 28461da177e4SLinus Torvalds * Decrement the pin count of the given inode, and wake up 28471da177e4SLinus Torvalds * anyone in xfs_iwait_unpin() if the count goes to 0. The 28481da177e4SLinus Torvalds * inode must have been previoulsy pinned with a call to xfs_ipin(). 28491da177e4SLinus Torvalds */ 28501da177e4SLinus Torvalds void 28511da177e4SLinus Torvalds xfs_iunpin( 28521da177e4SLinus Torvalds xfs_inode_t *ip) 28531da177e4SLinus Torvalds { 28541da177e4SLinus Torvalds ASSERT(atomic_read(&ip->i_pincount) > 0); 28551da177e4SLinus Torvalds 28561da177e4SLinus Torvalds if (atomic_dec_and_test(&ip->i_pincount)) { 28571da177e4SLinus Torvalds vnode_t *vp = XFS_ITOV_NULL(ip); 28581da177e4SLinus Torvalds 28591da177e4SLinus Torvalds /* make sync come back and flush this inode */ 28601da177e4SLinus Torvalds if (vp) { 28611da177e4SLinus Torvalds struct inode *inode = LINVFS_GET_IP(vp); 28621da177e4SLinus Torvalds 28631da177e4SLinus Torvalds if (!(inode->i_state & I_NEW)) 28641da177e4SLinus Torvalds mark_inode_dirty_sync(inode); 28651da177e4SLinus Torvalds } 28661da177e4SLinus Torvalds 28671da177e4SLinus Torvalds wake_up(&ip->i_ipin_wait); 28681da177e4SLinus Torvalds } 28691da177e4SLinus Torvalds } 28701da177e4SLinus Torvalds 28711da177e4SLinus Torvalds /* 28721da177e4SLinus Torvalds * This is called to wait for the given inode to be unpinned. 28731da177e4SLinus Torvalds * It will sleep until this happens. The caller must have the 28741da177e4SLinus Torvalds * inode locked in at least shared mode so that the buffer cannot 28751da177e4SLinus Torvalds * be subsequently pinned once someone is waiting for it to be 28761da177e4SLinus Torvalds * unpinned. 28771da177e4SLinus Torvalds */ 28781da177e4SLinus Torvalds void 28791da177e4SLinus Torvalds xfs_iunpin_wait( 28801da177e4SLinus Torvalds xfs_inode_t *ip) 28811da177e4SLinus Torvalds { 28821da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 28831da177e4SLinus Torvalds xfs_lsn_t lsn; 28841da177e4SLinus Torvalds 28851da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 28861da177e4SLinus Torvalds 28871da177e4SLinus Torvalds if (atomic_read(&ip->i_pincount) == 0) { 28881da177e4SLinus Torvalds return; 28891da177e4SLinus Torvalds } 28901da177e4SLinus Torvalds 28911da177e4SLinus Torvalds iip = ip->i_itemp; 28921da177e4SLinus Torvalds if (iip && iip->ili_last_lsn) { 28931da177e4SLinus Torvalds lsn = iip->ili_last_lsn; 28941da177e4SLinus Torvalds } else { 28951da177e4SLinus Torvalds lsn = (xfs_lsn_t)0; 28961da177e4SLinus Torvalds } 28971da177e4SLinus Torvalds 28981da177e4SLinus Torvalds /* 28991da177e4SLinus Torvalds * Give the log a push so we don't wait here too long. 29001da177e4SLinus Torvalds */ 29011da177e4SLinus Torvalds xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); 29021da177e4SLinus Torvalds 29031da177e4SLinus Torvalds wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); 29041da177e4SLinus Torvalds } 29051da177e4SLinus Torvalds 29061da177e4SLinus Torvalds 29071da177e4SLinus Torvalds /* 29081da177e4SLinus Torvalds * xfs_iextents_copy() 29091da177e4SLinus Torvalds * 29101da177e4SLinus Torvalds * This is called to copy the REAL extents (as opposed to the delayed 29111da177e4SLinus Torvalds * allocation extents) from the inode into the given buffer. It 29121da177e4SLinus Torvalds * returns the number of bytes copied into the buffer. 29131da177e4SLinus Torvalds * 29141da177e4SLinus Torvalds * If there are no delayed allocation extents, then we can just 29151da177e4SLinus Torvalds * memcpy() the extents into the buffer. Otherwise, we need to 29161da177e4SLinus Torvalds * examine each extent in turn and skip those which are delayed. 29171da177e4SLinus Torvalds */ 29181da177e4SLinus Torvalds int 29191da177e4SLinus Torvalds xfs_iextents_copy( 29201da177e4SLinus Torvalds xfs_inode_t *ip, 29211da177e4SLinus Torvalds xfs_bmbt_rec_t *buffer, 29221da177e4SLinus Torvalds int whichfork) 29231da177e4SLinus Torvalds { 29241da177e4SLinus Torvalds int copied; 29251da177e4SLinus Torvalds xfs_bmbt_rec_t *dest_ep; 29261da177e4SLinus Torvalds xfs_bmbt_rec_t *ep; 29271da177e4SLinus Torvalds #ifdef XFS_BMAP_TRACE 29281da177e4SLinus Torvalds static char fname[] = "xfs_iextents_copy"; 29291da177e4SLinus Torvalds #endif 29301da177e4SLinus Torvalds int i; 29311da177e4SLinus Torvalds xfs_ifork_t *ifp; 29321da177e4SLinus Torvalds int nrecs; 29331da177e4SLinus Torvalds xfs_fsblock_t start_block; 29341da177e4SLinus Torvalds 29351da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 29361da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 29371da177e4SLinus Torvalds ASSERT(ifp->if_bytes > 0); 29381da177e4SLinus Torvalds 29391da177e4SLinus Torvalds nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 29401da177e4SLinus Torvalds xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); 29411da177e4SLinus Torvalds ASSERT(nrecs > 0); 29421da177e4SLinus Torvalds 29431da177e4SLinus Torvalds /* 29441da177e4SLinus Torvalds * There are some delayed allocation extents in the 29451da177e4SLinus Torvalds * inode, so copy the extents one at a time and skip 29461da177e4SLinus Torvalds * the delayed ones. There must be at least one 29471da177e4SLinus Torvalds * non-delayed extent. 29481da177e4SLinus Torvalds */ 29491da177e4SLinus Torvalds ep = ifp->if_u1.if_extents; 29501da177e4SLinus Torvalds dest_ep = buffer; 29511da177e4SLinus Torvalds copied = 0; 29521da177e4SLinus Torvalds for (i = 0; i < nrecs; i++) { 29531da177e4SLinus Torvalds start_block = xfs_bmbt_get_startblock(ep); 29541da177e4SLinus Torvalds if (ISNULLSTARTBLOCK(start_block)) { 29551da177e4SLinus Torvalds /* 29561da177e4SLinus Torvalds * It's a delayed allocation extent, so skip it. 29571da177e4SLinus Torvalds */ 29581da177e4SLinus Torvalds ep++; 29591da177e4SLinus Torvalds continue; 29601da177e4SLinus Torvalds } 29611da177e4SLinus Torvalds 29621da177e4SLinus Torvalds /* Translate to on disk format */ 29631da177e4SLinus Torvalds put_unaligned(INT_GET(ep->l0, ARCH_CONVERT), 29641da177e4SLinus Torvalds (__uint64_t*)&dest_ep->l0); 29651da177e4SLinus Torvalds put_unaligned(INT_GET(ep->l1, ARCH_CONVERT), 29661da177e4SLinus Torvalds (__uint64_t*)&dest_ep->l1); 29671da177e4SLinus Torvalds dest_ep++; 29681da177e4SLinus Torvalds ep++; 29691da177e4SLinus Torvalds copied++; 29701da177e4SLinus Torvalds } 29711da177e4SLinus Torvalds ASSERT(copied != 0); 29721da177e4SLinus Torvalds xfs_validate_extents(buffer, copied, 1, XFS_EXTFMT_INODE(ip)); 29731da177e4SLinus Torvalds 29741da177e4SLinus Torvalds return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 29751da177e4SLinus Torvalds } 29761da177e4SLinus Torvalds 29771da177e4SLinus Torvalds /* 29781da177e4SLinus Torvalds * Each of the following cases stores data into the same region 29791da177e4SLinus Torvalds * of the on-disk inode, so only one of them can be valid at 29801da177e4SLinus Torvalds * any given time. While it is possible to have conflicting formats 29811da177e4SLinus Torvalds * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is 29821da177e4SLinus Torvalds * in EXTENTS format, this can only happen when the fork has 29831da177e4SLinus Torvalds * changed formats after being modified but before being flushed. 29841da177e4SLinus Torvalds * In these cases, the format always takes precedence, because the 29851da177e4SLinus Torvalds * format indicates the current state of the fork. 29861da177e4SLinus Torvalds */ 29871da177e4SLinus Torvalds /*ARGSUSED*/ 29881da177e4SLinus Torvalds STATIC int 29891da177e4SLinus Torvalds xfs_iflush_fork( 29901da177e4SLinus Torvalds xfs_inode_t *ip, 29911da177e4SLinus Torvalds xfs_dinode_t *dip, 29921da177e4SLinus Torvalds xfs_inode_log_item_t *iip, 29931da177e4SLinus Torvalds int whichfork, 29941da177e4SLinus Torvalds xfs_buf_t *bp) 29951da177e4SLinus Torvalds { 29961da177e4SLinus Torvalds char *cp; 29971da177e4SLinus Torvalds xfs_ifork_t *ifp; 29981da177e4SLinus Torvalds xfs_mount_t *mp; 29991da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 30001da177e4SLinus Torvalds int first; 30011da177e4SLinus Torvalds #endif 30021da177e4SLinus Torvalds static const short brootflag[2] = 30031da177e4SLinus Torvalds { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 30041da177e4SLinus Torvalds static const short dataflag[2] = 30051da177e4SLinus Torvalds { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; 30061da177e4SLinus Torvalds static const short extflag[2] = 30071da177e4SLinus Torvalds { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; 30081da177e4SLinus Torvalds 30091da177e4SLinus Torvalds if (iip == NULL) 30101da177e4SLinus Torvalds return 0; 30111da177e4SLinus Torvalds ifp = XFS_IFORK_PTR(ip, whichfork); 30121da177e4SLinus Torvalds /* 30131da177e4SLinus Torvalds * This can happen if we gave up in iformat in an error path, 30141da177e4SLinus Torvalds * for the attribute fork. 30151da177e4SLinus Torvalds */ 30161da177e4SLinus Torvalds if (ifp == NULL) { 30171da177e4SLinus Torvalds ASSERT(whichfork == XFS_ATTR_FORK); 30181da177e4SLinus Torvalds return 0; 30191da177e4SLinus Torvalds } 30201da177e4SLinus Torvalds cp = XFS_DFORK_PTR(dip, whichfork); 30211da177e4SLinus Torvalds mp = ip->i_mount; 30221da177e4SLinus Torvalds switch (XFS_IFORK_FORMAT(ip, whichfork)) { 30231da177e4SLinus Torvalds case XFS_DINODE_FMT_LOCAL: 30241da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && 30251da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 30261da177e4SLinus Torvalds ASSERT(ifp->if_u1.if_data != NULL); 30271da177e4SLinus Torvalds ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 30281da177e4SLinus Torvalds memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); 30291da177e4SLinus Torvalds } 30301da177e4SLinus Torvalds if (whichfork == XFS_DATA_FORK) { 30311da177e4SLinus Torvalds if (unlikely(XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip))) { 30321da177e4SLinus Torvalds XFS_ERROR_REPORT("xfs_iflush_fork", 30331da177e4SLinus Torvalds XFS_ERRLEVEL_LOW, mp); 30341da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 30351da177e4SLinus Torvalds } 30361da177e4SLinus Torvalds } 30371da177e4SLinus Torvalds break; 30381da177e4SLinus Torvalds 30391da177e4SLinus Torvalds case XFS_DINODE_FMT_EXTENTS: 30401da177e4SLinus Torvalds ASSERT((ifp->if_flags & XFS_IFEXTENTS) || 30411da177e4SLinus Torvalds !(iip->ili_format.ilf_fields & extflag[whichfork])); 30421da177e4SLinus Torvalds ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0)); 30431da177e4SLinus Torvalds ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0)); 30441da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & extflag[whichfork]) && 30451da177e4SLinus Torvalds (ifp->if_bytes > 0)) { 30461da177e4SLinus Torvalds ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); 30471da177e4SLinus Torvalds (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, 30481da177e4SLinus Torvalds whichfork); 30491da177e4SLinus Torvalds } 30501da177e4SLinus Torvalds break; 30511da177e4SLinus Torvalds 30521da177e4SLinus Torvalds case XFS_DINODE_FMT_BTREE: 30531da177e4SLinus Torvalds if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && 30541da177e4SLinus Torvalds (ifp->if_broot_bytes > 0)) { 30551da177e4SLinus Torvalds ASSERT(ifp->if_broot != NULL); 30561da177e4SLinus Torvalds ASSERT(ifp->if_broot_bytes <= 30571da177e4SLinus Torvalds (XFS_IFORK_SIZE(ip, whichfork) + 30581da177e4SLinus Torvalds XFS_BROOT_SIZE_ADJ)); 30591da177e4SLinus Torvalds xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 30601da177e4SLinus Torvalds (xfs_bmdr_block_t *)cp, 30611da177e4SLinus Torvalds XFS_DFORK_SIZE(dip, mp, whichfork)); 30621da177e4SLinus Torvalds } 30631da177e4SLinus Torvalds break; 30641da177e4SLinus Torvalds 30651da177e4SLinus Torvalds case XFS_DINODE_FMT_DEV: 30661da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 30671da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 30681da177e4SLinus Torvalds INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev); 30691da177e4SLinus Torvalds } 30701da177e4SLinus Torvalds break; 30711da177e4SLinus Torvalds 30721da177e4SLinus Torvalds case XFS_DINODE_FMT_UUID: 30731da177e4SLinus Torvalds if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 30741da177e4SLinus Torvalds ASSERT(whichfork == XFS_DATA_FORK); 30751da177e4SLinus Torvalds memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 30761da177e4SLinus Torvalds sizeof(uuid_t)); 30771da177e4SLinus Torvalds } 30781da177e4SLinus Torvalds break; 30791da177e4SLinus Torvalds 30801da177e4SLinus Torvalds default: 30811da177e4SLinus Torvalds ASSERT(0); 30821da177e4SLinus Torvalds break; 30831da177e4SLinus Torvalds } 30841da177e4SLinus Torvalds 30851da177e4SLinus Torvalds return 0; 30861da177e4SLinus Torvalds } 30871da177e4SLinus Torvalds 30881da177e4SLinus Torvalds /* 30891da177e4SLinus Torvalds * xfs_iflush() will write a modified inode's changes out to the 30901da177e4SLinus Torvalds * inode's on disk home. The caller must have the inode lock held 30911da177e4SLinus Torvalds * in at least shared mode and the inode flush semaphore must be 30921da177e4SLinus Torvalds * held as well. The inode lock will still be held upon return from 30931da177e4SLinus Torvalds * the call and the caller is free to unlock it. 30941da177e4SLinus Torvalds * The inode flush lock will be unlocked when the inode reaches the disk. 30951da177e4SLinus Torvalds * The flags indicate how the inode's buffer should be written out. 30961da177e4SLinus Torvalds */ 30971da177e4SLinus Torvalds int 30981da177e4SLinus Torvalds xfs_iflush( 30991da177e4SLinus Torvalds xfs_inode_t *ip, 31001da177e4SLinus Torvalds uint flags) 31011da177e4SLinus Torvalds { 31021da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 31031da177e4SLinus Torvalds xfs_buf_t *bp; 31041da177e4SLinus Torvalds xfs_dinode_t *dip; 31051da177e4SLinus Torvalds xfs_mount_t *mp; 31061da177e4SLinus Torvalds int error; 31071da177e4SLinus Torvalds /* REFERENCED */ 31081da177e4SLinus Torvalds xfs_chash_t *ch; 31091da177e4SLinus Torvalds xfs_inode_t *iq; 31101da177e4SLinus Torvalds int clcount; /* count of inodes clustered */ 31111da177e4SLinus Torvalds int bufwasdelwri; 31121da177e4SLinus Torvalds enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; 31131da177e4SLinus Torvalds SPLDECL(s); 31141da177e4SLinus Torvalds 31151da177e4SLinus Torvalds XFS_STATS_INC(xs_iflush_count); 31161da177e4SLinus Torvalds 31171da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 31181da177e4SLinus Torvalds ASSERT(valusema(&ip->i_flock) <= 0); 31191da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 31201da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 31211da177e4SLinus Torvalds 31221da177e4SLinus Torvalds iip = ip->i_itemp; 31231da177e4SLinus Torvalds mp = ip->i_mount; 31241da177e4SLinus Torvalds 31251da177e4SLinus Torvalds /* 31261da177e4SLinus Torvalds * If the inode isn't dirty, then just release the inode 31271da177e4SLinus Torvalds * flush lock and do nothing. 31281da177e4SLinus Torvalds */ 31291da177e4SLinus Torvalds if ((ip->i_update_core == 0) && 31301da177e4SLinus Torvalds ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 31311da177e4SLinus Torvalds ASSERT((iip != NULL) ? 31321da177e4SLinus Torvalds !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); 31331da177e4SLinus Torvalds xfs_ifunlock(ip); 31341da177e4SLinus Torvalds return 0; 31351da177e4SLinus Torvalds } 31361da177e4SLinus Torvalds 31371da177e4SLinus Torvalds /* 31381da177e4SLinus Torvalds * We can't flush the inode until it is unpinned, so 31391da177e4SLinus Torvalds * wait for it. We know noone new can pin it, because 31401da177e4SLinus Torvalds * we are holding the inode lock shared and you need 31411da177e4SLinus Torvalds * to hold it exclusively to pin the inode. 31421da177e4SLinus Torvalds */ 31431da177e4SLinus Torvalds xfs_iunpin_wait(ip); 31441da177e4SLinus Torvalds 31451da177e4SLinus Torvalds /* 31461da177e4SLinus Torvalds * This may have been unpinned because the filesystem is shutting 31471da177e4SLinus Torvalds * down forcibly. If that's the case we must not write this inode 31481da177e4SLinus Torvalds * to disk, because the log record didn't make it to disk! 31491da177e4SLinus Torvalds */ 31501da177e4SLinus Torvalds if (XFS_FORCED_SHUTDOWN(mp)) { 31511da177e4SLinus Torvalds ip->i_update_core = 0; 31521da177e4SLinus Torvalds if (iip) 31531da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 31541da177e4SLinus Torvalds xfs_ifunlock(ip); 31551da177e4SLinus Torvalds return XFS_ERROR(EIO); 31561da177e4SLinus Torvalds } 31571da177e4SLinus Torvalds 31581da177e4SLinus Torvalds /* 31591da177e4SLinus Torvalds * Get the buffer containing the on-disk inode. 31601da177e4SLinus Torvalds */ 31611da177e4SLinus Torvalds error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); 31621da177e4SLinus Torvalds if (error != 0) { 31631da177e4SLinus Torvalds xfs_ifunlock(ip); 31641da177e4SLinus Torvalds return error; 31651da177e4SLinus Torvalds } 31661da177e4SLinus Torvalds 31671da177e4SLinus Torvalds /* 31681da177e4SLinus Torvalds * Decide how buffer will be flushed out. This is done before 31691da177e4SLinus Torvalds * the call to xfs_iflush_int because this field is zeroed by it. 31701da177e4SLinus Torvalds */ 31711da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 31721da177e4SLinus Torvalds /* 31731da177e4SLinus Torvalds * Flush out the inode buffer according to the directions 31741da177e4SLinus Torvalds * of the caller. In the cases where the caller has given 31751da177e4SLinus Torvalds * us a choice choose the non-delwri case. This is because 31761da177e4SLinus Torvalds * the inode is in the AIL and we need to get it out soon. 31771da177e4SLinus Torvalds */ 31781da177e4SLinus Torvalds switch (flags) { 31791da177e4SLinus Torvalds case XFS_IFLUSH_SYNC: 31801da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_SYNC: 31811da177e4SLinus Torvalds flags = 0; 31821da177e4SLinus Torvalds break; 31831da177e4SLinus Torvalds case XFS_IFLUSH_ASYNC: 31841da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 31851da177e4SLinus Torvalds flags = INT_ASYNC; 31861da177e4SLinus Torvalds break; 31871da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI: 31881da177e4SLinus Torvalds flags = INT_DELWRI; 31891da177e4SLinus Torvalds break; 31901da177e4SLinus Torvalds default: 31911da177e4SLinus Torvalds ASSERT(0); 31921da177e4SLinus Torvalds flags = 0; 31931da177e4SLinus Torvalds break; 31941da177e4SLinus Torvalds } 31951da177e4SLinus Torvalds } else { 31961da177e4SLinus Torvalds switch (flags) { 31971da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_SYNC: 31981da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI_ELSE_ASYNC: 31991da177e4SLinus Torvalds case XFS_IFLUSH_DELWRI: 32001da177e4SLinus Torvalds flags = INT_DELWRI; 32011da177e4SLinus Torvalds break; 32021da177e4SLinus Torvalds case XFS_IFLUSH_ASYNC: 32031da177e4SLinus Torvalds flags = INT_ASYNC; 32041da177e4SLinus Torvalds break; 32051da177e4SLinus Torvalds case XFS_IFLUSH_SYNC: 32061da177e4SLinus Torvalds flags = 0; 32071da177e4SLinus Torvalds break; 32081da177e4SLinus Torvalds default: 32091da177e4SLinus Torvalds ASSERT(0); 32101da177e4SLinus Torvalds flags = 0; 32111da177e4SLinus Torvalds break; 32121da177e4SLinus Torvalds } 32131da177e4SLinus Torvalds } 32141da177e4SLinus Torvalds 32151da177e4SLinus Torvalds /* 32161da177e4SLinus Torvalds * First flush out the inode that xfs_iflush was called with. 32171da177e4SLinus Torvalds */ 32181da177e4SLinus Torvalds error = xfs_iflush_int(ip, bp); 32191da177e4SLinus Torvalds if (error) { 32201da177e4SLinus Torvalds goto corrupt_out; 32211da177e4SLinus Torvalds } 32221da177e4SLinus Torvalds 32231da177e4SLinus Torvalds /* 32241da177e4SLinus Torvalds * inode clustering: 32251da177e4SLinus Torvalds * see if other inodes can be gathered into this write 32261da177e4SLinus Torvalds */ 32271da177e4SLinus Torvalds 32281da177e4SLinus Torvalds ip->i_chash->chl_buf = bp; 32291da177e4SLinus Torvalds 32301da177e4SLinus Torvalds ch = XFS_CHASH(mp, ip->i_blkno); 32311da177e4SLinus Torvalds s = mutex_spinlock(&ch->ch_lock); 32321da177e4SLinus Torvalds 32331da177e4SLinus Torvalds clcount = 0; 32341da177e4SLinus Torvalds for (iq = ip->i_cnext; iq != ip; iq = iq->i_cnext) { 32351da177e4SLinus Torvalds /* 32361da177e4SLinus Torvalds * Do an un-protected check to see if the inode is dirty and 32371da177e4SLinus Torvalds * is a candidate for flushing. These checks will be repeated 32381da177e4SLinus Torvalds * later after the appropriate locks are acquired. 32391da177e4SLinus Torvalds */ 32401da177e4SLinus Torvalds iip = iq->i_itemp; 32411da177e4SLinus Torvalds if ((iq->i_update_core == 0) && 32421da177e4SLinus Torvalds ((iip == NULL) || 32431da177e4SLinus Torvalds !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && 32441da177e4SLinus Torvalds xfs_ipincount(iq) == 0) { 32451da177e4SLinus Torvalds continue; 32461da177e4SLinus Torvalds } 32471da177e4SLinus Torvalds 32481da177e4SLinus Torvalds /* 32491da177e4SLinus Torvalds * Try to get locks. If any are unavailable, 32501da177e4SLinus Torvalds * then this inode cannot be flushed and is skipped. 32511da177e4SLinus Torvalds */ 32521da177e4SLinus Torvalds 32531da177e4SLinus Torvalds /* get inode locks (just i_lock) */ 32541da177e4SLinus Torvalds if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { 32551da177e4SLinus Torvalds /* get inode flush lock */ 32561da177e4SLinus Torvalds if (xfs_iflock_nowait(iq)) { 32571da177e4SLinus Torvalds /* check if pinned */ 32581da177e4SLinus Torvalds if (xfs_ipincount(iq) == 0) { 32591da177e4SLinus Torvalds /* arriving here means that 32601da177e4SLinus Torvalds * this inode can be flushed. 32611da177e4SLinus Torvalds * first re-check that it's 32621da177e4SLinus Torvalds * dirty 32631da177e4SLinus Torvalds */ 32641da177e4SLinus Torvalds iip = iq->i_itemp; 32651da177e4SLinus Torvalds if ((iq->i_update_core != 0)|| 32661da177e4SLinus Torvalds ((iip != NULL) && 32671da177e4SLinus Torvalds (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 32681da177e4SLinus Torvalds clcount++; 32691da177e4SLinus Torvalds error = xfs_iflush_int(iq, bp); 32701da177e4SLinus Torvalds if (error) { 32711da177e4SLinus Torvalds xfs_iunlock(iq, 32721da177e4SLinus Torvalds XFS_ILOCK_SHARED); 32731da177e4SLinus Torvalds goto cluster_corrupt_out; 32741da177e4SLinus Torvalds } 32751da177e4SLinus Torvalds } else { 32761da177e4SLinus Torvalds xfs_ifunlock(iq); 32771da177e4SLinus Torvalds } 32781da177e4SLinus Torvalds } else { 32791da177e4SLinus Torvalds xfs_ifunlock(iq); 32801da177e4SLinus Torvalds } 32811da177e4SLinus Torvalds } 32821da177e4SLinus Torvalds xfs_iunlock(iq, XFS_ILOCK_SHARED); 32831da177e4SLinus Torvalds } 32841da177e4SLinus Torvalds } 32851da177e4SLinus Torvalds mutex_spinunlock(&ch->ch_lock, s); 32861da177e4SLinus Torvalds 32871da177e4SLinus Torvalds if (clcount) { 32881da177e4SLinus Torvalds XFS_STATS_INC(xs_icluster_flushcnt); 32891da177e4SLinus Torvalds XFS_STATS_ADD(xs_icluster_flushinode, clcount); 32901da177e4SLinus Torvalds } 32911da177e4SLinus Torvalds 32921da177e4SLinus Torvalds /* 32931da177e4SLinus Torvalds * If the buffer is pinned then push on the log so we won't 32941da177e4SLinus Torvalds * get stuck waiting in the write for too long. 32951da177e4SLinus Torvalds */ 32961da177e4SLinus Torvalds if (XFS_BUF_ISPINNED(bp)){ 32971da177e4SLinus Torvalds xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 32981da177e4SLinus Torvalds } 32991da177e4SLinus Torvalds 33001da177e4SLinus Torvalds if (flags & INT_DELWRI) { 33011da177e4SLinus Torvalds xfs_bdwrite(mp, bp); 33021da177e4SLinus Torvalds } else if (flags & INT_ASYNC) { 33031da177e4SLinus Torvalds xfs_bawrite(mp, bp); 33041da177e4SLinus Torvalds } else { 33051da177e4SLinus Torvalds error = xfs_bwrite(mp, bp); 33061da177e4SLinus Torvalds } 33071da177e4SLinus Torvalds return error; 33081da177e4SLinus Torvalds 33091da177e4SLinus Torvalds corrupt_out: 33101da177e4SLinus Torvalds xfs_buf_relse(bp); 33111da177e4SLinus Torvalds xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 33121da177e4SLinus Torvalds xfs_iflush_abort(ip); 33131da177e4SLinus Torvalds /* 33141da177e4SLinus Torvalds * Unlocks the flush lock 33151da177e4SLinus Torvalds */ 33161da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 33171da177e4SLinus Torvalds 33181da177e4SLinus Torvalds cluster_corrupt_out: 33191da177e4SLinus Torvalds /* Corruption detected in the clustering loop. Invalidate the 33201da177e4SLinus Torvalds * inode buffer and shut down the filesystem. 33211da177e4SLinus Torvalds */ 33221da177e4SLinus Torvalds mutex_spinunlock(&ch->ch_lock, s); 33231da177e4SLinus Torvalds 33241da177e4SLinus Torvalds /* 33251da177e4SLinus Torvalds * Clean up the buffer. If it was B_DELWRI, just release it -- 33261da177e4SLinus Torvalds * brelse can handle it with no problems. If not, shut down the 33271da177e4SLinus Torvalds * filesystem before releasing the buffer. 33281da177e4SLinus Torvalds */ 33291da177e4SLinus Torvalds if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { 33301da177e4SLinus Torvalds xfs_buf_relse(bp); 33311da177e4SLinus Torvalds } 33321da177e4SLinus Torvalds 33331da177e4SLinus Torvalds xfs_force_shutdown(mp, XFS_CORRUPT_INCORE); 33341da177e4SLinus Torvalds 33351da177e4SLinus Torvalds if(!bufwasdelwri) { 33361da177e4SLinus Torvalds /* 33371da177e4SLinus Torvalds * Just like incore_relse: if we have b_iodone functions, 33381da177e4SLinus Torvalds * mark the buffer as an error and call them. Otherwise 33391da177e4SLinus Torvalds * mark it as stale and brelse. 33401da177e4SLinus Torvalds */ 33411da177e4SLinus Torvalds if (XFS_BUF_IODONE_FUNC(bp)) { 33421da177e4SLinus Torvalds XFS_BUF_CLR_BDSTRAT_FUNC(bp); 33431da177e4SLinus Torvalds XFS_BUF_UNDONE(bp); 33441da177e4SLinus Torvalds XFS_BUF_STALE(bp); 33451da177e4SLinus Torvalds XFS_BUF_SHUT(bp); 33461da177e4SLinus Torvalds XFS_BUF_ERROR(bp,EIO); 33471da177e4SLinus Torvalds xfs_biodone(bp); 33481da177e4SLinus Torvalds } else { 33491da177e4SLinus Torvalds XFS_BUF_STALE(bp); 33501da177e4SLinus Torvalds xfs_buf_relse(bp); 33511da177e4SLinus Torvalds } 33521da177e4SLinus Torvalds } 33531da177e4SLinus Torvalds 33541da177e4SLinus Torvalds xfs_iflush_abort(iq); 33551da177e4SLinus Torvalds /* 33561da177e4SLinus Torvalds * Unlocks the flush lock 33571da177e4SLinus Torvalds */ 33581da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 33591da177e4SLinus Torvalds } 33601da177e4SLinus Torvalds 33611da177e4SLinus Torvalds 33621da177e4SLinus Torvalds STATIC int 33631da177e4SLinus Torvalds xfs_iflush_int( 33641da177e4SLinus Torvalds xfs_inode_t *ip, 33651da177e4SLinus Torvalds xfs_buf_t *bp) 33661da177e4SLinus Torvalds { 33671da177e4SLinus Torvalds xfs_inode_log_item_t *iip; 33681da177e4SLinus Torvalds xfs_dinode_t *dip; 33691da177e4SLinus Torvalds xfs_mount_t *mp; 33701da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG 33711da177e4SLinus Torvalds int first; 33721da177e4SLinus Torvalds #endif 33731da177e4SLinus Torvalds SPLDECL(s); 33741da177e4SLinus Torvalds 33751da177e4SLinus Torvalds ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 33761da177e4SLinus Torvalds ASSERT(valusema(&ip->i_flock) <= 0); 33771da177e4SLinus Torvalds ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 33781da177e4SLinus Torvalds ip->i_d.di_nextents > ip->i_df.if_ext_max); 33791da177e4SLinus Torvalds 33801da177e4SLinus Torvalds iip = ip->i_itemp; 33811da177e4SLinus Torvalds mp = ip->i_mount; 33821da177e4SLinus Torvalds 33831da177e4SLinus Torvalds 33841da177e4SLinus Torvalds /* 33851da177e4SLinus Torvalds * If the inode isn't dirty, then just release the inode 33861da177e4SLinus Torvalds * flush lock and do nothing. 33871da177e4SLinus Torvalds */ 33881da177e4SLinus Torvalds if ((ip->i_update_core == 0) && 33891da177e4SLinus Torvalds ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { 33901da177e4SLinus Torvalds xfs_ifunlock(ip); 33911da177e4SLinus Torvalds return 0; 33921da177e4SLinus Torvalds } 33931da177e4SLinus Torvalds 33941da177e4SLinus Torvalds /* set *dip = inode's place in the buffer */ 33951da177e4SLinus Torvalds dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 33961da177e4SLinus Torvalds 33971da177e4SLinus Torvalds /* 33981da177e4SLinus Torvalds * Clear i_update_core before copying out the data. 33991da177e4SLinus Torvalds * This is for coordination with our timestamp updates 34001da177e4SLinus Torvalds * that don't hold the inode lock. They will always 34011da177e4SLinus Torvalds * update the timestamps BEFORE setting i_update_core, 34021da177e4SLinus Torvalds * so if we clear i_update_core after they set it we 34031da177e4SLinus Torvalds * are guaranteed to see their updates to the timestamps. 34041da177e4SLinus Torvalds * I believe that this depends on strongly ordered memory 34051da177e4SLinus Torvalds * semantics, but we have that. We use the SYNCHRONIZE 34061da177e4SLinus Torvalds * macro to make sure that the compiler does not reorder 34071da177e4SLinus Torvalds * the i_update_core access below the data copy below. 34081da177e4SLinus Torvalds */ 34091da177e4SLinus Torvalds ip->i_update_core = 0; 34101da177e4SLinus Torvalds SYNCHRONIZE(); 34111da177e4SLinus Torvalds 34121da177e4SLinus Torvalds if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC, 34131da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 34141da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34151da177e4SLinus Torvalds "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 34161da177e4SLinus Torvalds ip->i_ino, (int) INT_GET(dip->di_core.di_magic, ARCH_CONVERT), dip); 34171da177e4SLinus Torvalds goto corrupt_out; 34181da177e4SLinus Torvalds } 34191da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 34201da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 34211da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34221da177e4SLinus Torvalds "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 34231da177e4SLinus Torvalds ip->i_ino, ip, ip->i_d.di_magic); 34241da177e4SLinus Torvalds goto corrupt_out; 34251da177e4SLinus Torvalds } 34261da177e4SLinus Torvalds if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 34271da177e4SLinus Torvalds if (XFS_TEST_ERROR( 34281da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 34291da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 34301da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 34311da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34321da177e4SLinus Torvalds "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 34331da177e4SLinus Torvalds ip->i_ino, ip); 34341da177e4SLinus Torvalds goto corrupt_out; 34351da177e4SLinus Torvalds } 34361da177e4SLinus Torvalds } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 34371da177e4SLinus Torvalds if (XFS_TEST_ERROR( 34381da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 34391da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 34401da177e4SLinus Torvalds (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 34411da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 34421da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34431da177e4SLinus Torvalds "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 34441da177e4SLinus Torvalds ip->i_ino, ip); 34451da177e4SLinus Torvalds goto corrupt_out; 34461da177e4SLinus Torvalds } 34471da177e4SLinus Torvalds } 34481da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 34491da177e4SLinus Torvalds ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 34501da177e4SLinus Torvalds XFS_RANDOM_IFLUSH_5)) { 34511da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34521da177e4SLinus Torvalds "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 34531da177e4SLinus Torvalds ip->i_ino, 34541da177e4SLinus Torvalds ip->i_d.di_nextents + ip->i_d.di_anextents, 34551da177e4SLinus Torvalds ip->i_d.di_nblocks, 34561da177e4SLinus Torvalds ip); 34571da177e4SLinus Torvalds goto corrupt_out; 34581da177e4SLinus Torvalds } 34591da177e4SLinus Torvalds if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 34601da177e4SLinus Torvalds mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 34611da177e4SLinus Torvalds xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 34621da177e4SLinus Torvalds "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 34631da177e4SLinus Torvalds ip->i_ino, ip->i_d.di_forkoff, ip); 34641da177e4SLinus Torvalds goto corrupt_out; 34651da177e4SLinus Torvalds } 34661da177e4SLinus Torvalds /* 34671da177e4SLinus Torvalds * bump the flush iteration count, used to detect flushes which 34681da177e4SLinus Torvalds * postdate a log record during recovery. 34691da177e4SLinus Torvalds */ 34701da177e4SLinus Torvalds 34711da177e4SLinus Torvalds ip->i_d.di_flushiter++; 34721da177e4SLinus Torvalds 34731da177e4SLinus Torvalds /* 34741da177e4SLinus Torvalds * Copy the dirty parts of the inode into the on-disk 34751da177e4SLinus Torvalds * inode. We always copy out the core of the inode, 34761da177e4SLinus Torvalds * because if the inode is dirty at all the core must 34771da177e4SLinus Torvalds * be. 34781da177e4SLinus Torvalds */ 34791da177e4SLinus Torvalds xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1); 34801da177e4SLinus Torvalds 34811da177e4SLinus Torvalds /* Wrap, we never let the log put out DI_MAX_FLUSH */ 34821da177e4SLinus Torvalds if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 34831da177e4SLinus Torvalds ip->i_d.di_flushiter = 0; 34841da177e4SLinus Torvalds 34851da177e4SLinus Torvalds /* 34861da177e4SLinus Torvalds * If this is really an old format inode and the superblock version 34871da177e4SLinus Torvalds * has not been updated to support only new format inodes, then 34881da177e4SLinus Torvalds * convert back to the old inode format. If the superblock version 34891da177e4SLinus Torvalds * has been updated, then make the conversion permanent. 34901da177e4SLinus Torvalds */ 34911da177e4SLinus Torvalds ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 34921da177e4SLinus Torvalds XFS_SB_VERSION_HASNLINK(&mp->m_sb)); 34931da177e4SLinus Torvalds if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 34941da177e4SLinus Torvalds if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { 34951da177e4SLinus Torvalds /* 34961da177e4SLinus Torvalds * Convert it back. 34971da177e4SLinus Torvalds */ 34981da177e4SLinus Torvalds ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 34991da177e4SLinus Torvalds INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, ip->i_d.di_nlink); 35001da177e4SLinus Torvalds } else { 35011da177e4SLinus Torvalds /* 35021da177e4SLinus Torvalds * The superblock version has already been bumped, 35031da177e4SLinus Torvalds * so just make the conversion to the new inode 35041da177e4SLinus Torvalds * format permanent. 35051da177e4SLinus Torvalds */ 35061da177e4SLinus Torvalds ip->i_d.di_version = XFS_DINODE_VERSION_2; 35071da177e4SLinus Torvalds INT_SET(dip->di_core.di_version, ARCH_CONVERT, XFS_DINODE_VERSION_2); 35081da177e4SLinus Torvalds ip->i_d.di_onlink = 0; 35091da177e4SLinus Torvalds dip->di_core.di_onlink = 0; 35101da177e4SLinus Torvalds memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 35111da177e4SLinus Torvalds memset(&(dip->di_core.di_pad[0]), 0, 35121da177e4SLinus Torvalds sizeof(dip->di_core.di_pad)); 35131da177e4SLinus Torvalds ASSERT(ip->i_d.di_projid == 0); 35141da177e4SLinus Torvalds } 35151da177e4SLinus Torvalds } 35161da177e4SLinus Torvalds 35171da177e4SLinus Torvalds if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { 35181da177e4SLinus Torvalds goto corrupt_out; 35191da177e4SLinus Torvalds } 35201da177e4SLinus Torvalds 35211da177e4SLinus Torvalds if (XFS_IFORK_Q(ip)) { 35221da177e4SLinus Torvalds /* 35231da177e4SLinus Torvalds * The only error from xfs_iflush_fork is on the data fork. 35241da177e4SLinus Torvalds */ 35251da177e4SLinus Torvalds (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); 35261da177e4SLinus Torvalds } 35271da177e4SLinus Torvalds xfs_inobp_check(mp, bp); 35281da177e4SLinus Torvalds 35291da177e4SLinus Torvalds /* 35301da177e4SLinus Torvalds * We've recorded everything logged in the inode, so we'd 35311da177e4SLinus Torvalds * like to clear the ilf_fields bits so we don't log and 35321da177e4SLinus Torvalds * flush things unnecessarily. However, we can't stop 35331da177e4SLinus Torvalds * logging all this information until the data we've copied 35341da177e4SLinus Torvalds * into the disk buffer is written to disk. If we did we might 35351da177e4SLinus Torvalds * overwrite the copy of the inode in the log with all the 35361da177e4SLinus Torvalds * data after re-logging only part of it, and in the face of 35371da177e4SLinus Torvalds * a crash we wouldn't have all the data we need to recover. 35381da177e4SLinus Torvalds * 35391da177e4SLinus Torvalds * What we do is move the bits to the ili_last_fields field. 35401da177e4SLinus Torvalds * When logging the inode, these bits are moved back to the 35411da177e4SLinus Torvalds * ilf_fields field. In the xfs_iflush_done() routine we 35421da177e4SLinus Torvalds * clear ili_last_fields, since we know that the information 35431da177e4SLinus Torvalds * those bits represent is permanently on disk. As long as 35441da177e4SLinus Torvalds * the flush completes before the inode is logged again, then 35451da177e4SLinus Torvalds * both ilf_fields and ili_last_fields will be cleared. 35461da177e4SLinus Torvalds * 35471da177e4SLinus Torvalds * We can play with the ilf_fields bits here, because the inode 35481da177e4SLinus Torvalds * lock must be held exclusively in order to set bits there 35491da177e4SLinus Torvalds * and the flush lock protects the ili_last_fields bits. 35501da177e4SLinus Torvalds * Set ili_logged so the flush done 35511da177e4SLinus Torvalds * routine can tell whether or not to look in the AIL. 35521da177e4SLinus Torvalds * Also, store the current LSN of the inode so that we can tell 35531da177e4SLinus Torvalds * whether the item has moved in the AIL from xfs_iflush_done(). 35541da177e4SLinus Torvalds * In order to read the lsn we need the AIL lock, because 35551da177e4SLinus Torvalds * it is a 64 bit value that cannot be read atomically. 35561da177e4SLinus Torvalds */ 35571da177e4SLinus Torvalds if (iip != NULL && iip->ili_format.ilf_fields != 0) { 35581da177e4SLinus Torvalds iip->ili_last_fields = iip->ili_format.ilf_fields; 35591da177e4SLinus Torvalds iip->ili_format.ilf_fields = 0; 35601da177e4SLinus Torvalds iip->ili_logged = 1; 35611da177e4SLinus Torvalds 35621da177e4SLinus Torvalds ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 35631da177e4SLinus Torvalds AIL_LOCK(mp,s); 35641da177e4SLinus Torvalds iip->ili_flush_lsn = iip->ili_item.li_lsn; 35651da177e4SLinus Torvalds AIL_UNLOCK(mp, s); 35661da177e4SLinus Torvalds 35671da177e4SLinus Torvalds /* 35681da177e4SLinus Torvalds * Attach the function xfs_iflush_done to the inode's 35691da177e4SLinus Torvalds * buffer. This will remove the inode from the AIL 35701da177e4SLinus Torvalds * and unlock the inode's flush lock when the inode is 35711da177e4SLinus Torvalds * completely written to disk. 35721da177e4SLinus Torvalds */ 35731da177e4SLinus Torvalds xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) 35741da177e4SLinus Torvalds xfs_iflush_done, (xfs_log_item_t *)iip); 35751da177e4SLinus Torvalds 35761da177e4SLinus Torvalds ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 35771da177e4SLinus Torvalds ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 35781da177e4SLinus Torvalds } else { 35791da177e4SLinus Torvalds /* 35801da177e4SLinus Torvalds * We're flushing an inode which is not in the AIL and has 35811da177e4SLinus Torvalds * not been logged but has i_update_core set. For this 35821da177e4SLinus Torvalds * case we can use a B_DELWRI flush and immediately drop 35831da177e4SLinus Torvalds * the inode flush lock because we can avoid the whole 35841da177e4SLinus Torvalds * AIL state thing. It's OK to drop the flush lock now, 35851da177e4SLinus Torvalds * because we've already locked the buffer and to do anything 35861da177e4SLinus Torvalds * you really need both. 35871da177e4SLinus Torvalds */ 35881da177e4SLinus Torvalds if (iip != NULL) { 35891da177e4SLinus Torvalds ASSERT(iip->ili_logged == 0); 35901da177e4SLinus Torvalds ASSERT(iip->ili_last_fields == 0); 35911da177e4SLinus Torvalds ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0); 35921da177e4SLinus Torvalds } 35931da177e4SLinus Torvalds xfs_ifunlock(ip); 35941da177e4SLinus Torvalds } 35951da177e4SLinus Torvalds 35961da177e4SLinus Torvalds return 0; 35971da177e4SLinus Torvalds 35981da177e4SLinus Torvalds corrupt_out: 35991da177e4SLinus Torvalds return XFS_ERROR(EFSCORRUPTED); 36001da177e4SLinus Torvalds } 36011da177e4SLinus Torvalds 36021da177e4SLinus Torvalds 36031da177e4SLinus Torvalds /* 36041da177e4SLinus Torvalds * Flush all inactive inodes in mp. Return true if no user references 36051da177e4SLinus Torvalds * were found, false otherwise. 36061da177e4SLinus Torvalds */ 36071da177e4SLinus Torvalds int 36081da177e4SLinus Torvalds xfs_iflush_all( 36091da177e4SLinus Torvalds xfs_mount_t *mp, 36101da177e4SLinus Torvalds int flag) 36111da177e4SLinus Torvalds { 36121da177e4SLinus Torvalds int busy; 36131da177e4SLinus Torvalds int done; 36141da177e4SLinus Torvalds int purged; 36151da177e4SLinus Torvalds xfs_inode_t *ip; 36161da177e4SLinus Torvalds vmap_t vmap; 36171da177e4SLinus Torvalds vnode_t *vp; 36181da177e4SLinus Torvalds 36191da177e4SLinus Torvalds busy = done = 0; 36201da177e4SLinus Torvalds while (!done) { 36211da177e4SLinus Torvalds purged = 0; 36221da177e4SLinus Torvalds XFS_MOUNT_ILOCK(mp); 36231da177e4SLinus Torvalds ip = mp->m_inodes; 36241da177e4SLinus Torvalds if (ip == NULL) { 36251da177e4SLinus Torvalds break; 36261da177e4SLinus Torvalds } 36271da177e4SLinus Torvalds do { 36281da177e4SLinus Torvalds /* Make sure we skip markers inserted by sync */ 36291da177e4SLinus Torvalds if (ip->i_mount == NULL) { 36301da177e4SLinus Torvalds ip = ip->i_mnext; 36311da177e4SLinus Torvalds continue; 36321da177e4SLinus Torvalds } 36331da177e4SLinus Torvalds 36341da177e4SLinus Torvalds /* 36351da177e4SLinus Torvalds * It's up to our caller to purge the root 36361da177e4SLinus Torvalds * and quota vnodes later. 36371da177e4SLinus Torvalds */ 36381da177e4SLinus Torvalds vp = XFS_ITOV_NULL(ip); 36391da177e4SLinus Torvalds 36401da177e4SLinus Torvalds if (!vp) { 36411da177e4SLinus Torvalds XFS_MOUNT_IUNLOCK(mp); 36421da177e4SLinus Torvalds xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); 36431da177e4SLinus Torvalds purged = 1; 36441da177e4SLinus Torvalds break; 36451da177e4SLinus Torvalds } 36461da177e4SLinus Torvalds 36471da177e4SLinus Torvalds if (vn_count(vp) != 0) { 36481da177e4SLinus Torvalds if (vn_count(vp) == 1 && 36491da177e4SLinus Torvalds (ip == mp->m_rootip || 36501da177e4SLinus Torvalds (mp->m_quotainfo && 36511da177e4SLinus Torvalds (ip->i_ino == mp->m_sb.sb_uquotino || 36521da177e4SLinus Torvalds ip->i_ino == mp->m_sb.sb_gquotino)))) { 36531da177e4SLinus Torvalds 36541da177e4SLinus Torvalds ip = ip->i_mnext; 36551da177e4SLinus Torvalds continue; 36561da177e4SLinus Torvalds } 36571da177e4SLinus Torvalds if (!(flag & XFS_FLUSH_ALL)) { 36581da177e4SLinus Torvalds busy = 1; 36591da177e4SLinus Torvalds done = 1; 36601da177e4SLinus Torvalds break; 36611da177e4SLinus Torvalds } 36621da177e4SLinus Torvalds /* 36631da177e4SLinus Torvalds * Ignore busy inodes but continue flushing 36641da177e4SLinus Torvalds * others. 36651da177e4SLinus Torvalds */ 36661da177e4SLinus Torvalds ip = ip->i_mnext; 36671da177e4SLinus Torvalds continue; 36681da177e4SLinus Torvalds } 36691da177e4SLinus Torvalds /* 36701da177e4SLinus Torvalds * Sample vp mapping while holding mp locked on MP 36711da177e4SLinus Torvalds * systems, so we don't purge a reclaimed or 36721da177e4SLinus Torvalds * nonexistent vnode. We break from the loop 36731da177e4SLinus Torvalds * since we know that we modify 36741da177e4SLinus Torvalds * it by pulling ourselves from it in xfs_reclaim() 36751da177e4SLinus Torvalds * called via vn_purge() below. Set ip to the next 36761da177e4SLinus Torvalds * entry in the list anyway so we'll know below 36771da177e4SLinus Torvalds * whether we reached the end or not. 36781da177e4SLinus Torvalds */ 36791da177e4SLinus Torvalds VMAP(vp, vmap); 36801da177e4SLinus Torvalds XFS_MOUNT_IUNLOCK(mp); 36811da177e4SLinus Torvalds 36821da177e4SLinus Torvalds vn_purge(vp, &vmap); 36831da177e4SLinus Torvalds 36841da177e4SLinus Torvalds purged = 1; 36851da177e4SLinus Torvalds break; 36861da177e4SLinus Torvalds } while (ip != mp->m_inodes); 36871da177e4SLinus Torvalds /* 36881da177e4SLinus Torvalds * We need to distinguish between when we exit the loop 36891da177e4SLinus Torvalds * after a purge and when we simply hit the end of the 36901da177e4SLinus Torvalds * list. We can't use the (ip == mp->m_inodes) test, 36911da177e4SLinus Torvalds * because when we purge an inode at the start of the list 36921da177e4SLinus Torvalds * the next inode on the list becomes mp->m_inodes. That 36931da177e4SLinus Torvalds * would cause such a test to bail out early. The purged 36941da177e4SLinus Torvalds * variable tells us how we got out of the loop. 36951da177e4SLinus Torvalds */ 36961da177e4SLinus Torvalds if (!purged) { 36971da177e4SLinus Torvalds done = 1; 36981da177e4SLinus Torvalds } 36991da177e4SLinus Torvalds } 37001da177e4SLinus Torvalds XFS_MOUNT_IUNLOCK(mp); 37011da177e4SLinus Torvalds return !busy; 37021da177e4SLinus Torvalds } 37031da177e4SLinus Torvalds 37041da177e4SLinus Torvalds 37051da177e4SLinus Torvalds /* 37061da177e4SLinus Torvalds * xfs_iaccess: check accessibility of inode for mode. 37071da177e4SLinus Torvalds */ 37081da177e4SLinus Torvalds int 37091da177e4SLinus Torvalds xfs_iaccess( 37101da177e4SLinus Torvalds xfs_inode_t *ip, 37111da177e4SLinus Torvalds mode_t mode, 37121da177e4SLinus Torvalds cred_t *cr) 37131da177e4SLinus Torvalds { 37141da177e4SLinus Torvalds int error; 37151da177e4SLinus Torvalds mode_t orgmode = mode; 37161da177e4SLinus Torvalds struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); 37171da177e4SLinus Torvalds 37181da177e4SLinus Torvalds if (mode & S_IWUSR) { 37191da177e4SLinus Torvalds umode_t imode = inode->i_mode; 37201da177e4SLinus Torvalds 37211da177e4SLinus Torvalds if (IS_RDONLY(inode) && 37221da177e4SLinus Torvalds (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode))) 37231da177e4SLinus Torvalds return XFS_ERROR(EROFS); 37241da177e4SLinus Torvalds 37251da177e4SLinus Torvalds if (IS_IMMUTABLE(inode)) 37261da177e4SLinus Torvalds return XFS_ERROR(EACCES); 37271da177e4SLinus Torvalds } 37281da177e4SLinus Torvalds 37291da177e4SLinus Torvalds /* 37301da177e4SLinus Torvalds * If there's an Access Control List it's used instead of 37311da177e4SLinus Torvalds * the mode bits. 37321da177e4SLinus Torvalds */ 37331da177e4SLinus Torvalds if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1) 37341da177e4SLinus Torvalds return error ? XFS_ERROR(error) : 0; 37351da177e4SLinus Torvalds 37361da177e4SLinus Torvalds if (current_fsuid(cr) != ip->i_d.di_uid) { 37371da177e4SLinus Torvalds mode >>= 3; 37381da177e4SLinus Torvalds if (!in_group_p((gid_t)ip->i_d.di_gid)) 37391da177e4SLinus Torvalds mode >>= 3; 37401da177e4SLinus Torvalds } 37411da177e4SLinus Torvalds 37421da177e4SLinus Torvalds /* 37431da177e4SLinus Torvalds * If the DACs are ok we don't need any capability check. 37441da177e4SLinus Torvalds */ 37451da177e4SLinus Torvalds if ((ip->i_d.di_mode & mode) == mode) 37461da177e4SLinus Torvalds return 0; 37471da177e4SLinus Torvalds /* 37481da177e4SLinus Torvalds * Read/write DACs are always overridable. 37491da177e4SLinus Torvalds * Executable DACs are overridable if at least one exec bit is set. 37501da177e4SLinus Torvalds */ 37511da177e4SLinus Torvalds if (!(orgmode & S_IXUSR) || 37521da177e4SLinus Torvalds (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 37531da177e4SLinus Torvalds if (capable_cred(cr, CAP_DAC_OVERRIDE)) 37541da177e4SLinus Torvalds return 0; 37551da177e4SLinus Torvalds 37561da177e4SLinus Torvalds if ((orgmode == S_IRUSR) || 37571da177e4SLinus Torvalds (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) { 37581da177e4SLinus Torvalds if (capable_cred(cr, CAP_DAC_READ_SEARCH)) 37591da177e4SLinus Torvalds return 0; 37601da177e4SLinus Torvalds #ifdef NOISE 37611da177e4SLinus Torvalds cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode); 37621da177e4SLinus Torvalds #endif /* NOISE */ 37631da177e4SLinus Torvalds return XFS_ERROR(EACCES); 37641da177e4SLinus Torvalds } 37651da177e4SLinus Torvalds return XFS_ERROR(EACCES); 37661da177e4SLinus Torvalds } 37671da177e4SLinus Torvalds 37681da177e4SLinus Torvalds /* 37691da177e4SLinus Torvalds * xfs_iroundup: round up argument to next power of two 37701da177e4SLinus Torvalds */ 37711da177e4SLinus Torvalds uint 37721da177e4SLinus Torvalds xfs_iroundup( 37731da177e4SLinus Torvalds uint v) 37741da177e4SLinus Torvalds { 37751da177e4SLinus Torvalds int i; 37761da177e4SLinus Torvalds uint m; 37771da177e4SLinus Torvalds 37781da177e4SLinus Torvalds if ((v & (v - 1)) == 0) 37791da177e4SLinus Torvalds return v; 37801da177e4SLinus Torvalds ASSERT((v & 0x80000000) == 0); 37811da177e4SLinus Torvalds if ((v & (v + 1)) == 0) 37821da177e4SLinus Torvalds return v + 1; 37831da177e4SLinus Torvalds for (i = 0, m = 1; i < 31; i++, m <<= 1) { 37841da177e4SLinus Torvalds if (v & m) 37851da177e4SLinus Torvalds continue; 37861da177e4SLinus Torvalds v |= m; 37871da177e4SLinus Torvalds if ((v & (v + 1)) == 0) 37881da177e4SLinus Torvalds return v + 1; 37891da177e4SLinus Torvalds } 37901da177e4SLinus Torvalds ASSERT(0); 37911da177e4SLinus Torvalds return( 0 ); 37921da177e4SLinus Torvalds } 37931da177e4SLinus Torvalds 37941da177e4SLinus Torvalds /* 37951da177e4SLinus Torvalds * Change the requested timestamp in the given inode. 37961da177e4SLinus Torvalds * We don't lock across timestamp updates, and we don't log them but 37971da177e4SLinus Torvalds * we do record the fact that there is dirty information in core. 37981da177e4SLinus Torvalds * 37991da177e4SLinus Torvalds * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG 38001da177e4SLinus Torvalds * with XFS_ICHGTIME_ACC to be sure that access time 38011da177e4SLinus Torvalds * update will take. Calling first with XFS_ICHGTIME_ACC 38021da177e4SLinus Torvalds * and then XFS_ICHGTIME_MOD may fail to modify the access 38031da177e4SLinus Torvalds * timestamp if the filesystem is mounted noacctm. 38041da177e4SLinus Torvalds */ 38051da177e4SLinus Torvalds void 38061da177e4SLinus Torvalds xfs_ichgtime(xfs_inode_t *ip, 38071da177e4SLinus Torvalds int flags) 38081da177e4SLinus Torvalds { 38091da177e4SLinus Torvalds timespec_t tv; 38101da177e4SLinus Torvalds vnode_t *vp = XFS_ITOV(ip); 38111da177e4SLinus Torvalds struct inode *inode = LINVFS_GET_IP(vp); 38121da177e4SLinus Torvalds 38131da177e4SLinus Torvalds /* 38141da177e4SLinus Torvalds * We're not supposed to change timestamps in readonly-mounted 38151da177e4SLinus Torvalds * filesystems. Throw it away if anyone asks us. 38161da177e4SLinus Torvalds */ 38171da177e4SLinus Torvalds if (unlikely(vp->v_vfsp->vfs_flag & VFS_RDONLY)) 38181da177e4SLinus Torvalds return; 38191da177e4SLinus Torvalds 38201da177e4SLinus Torvalds /* 38211da177e4SLinus Torvalds * Don't update access timestamps on reads if mounted "noatime" 38221da177e4SLinus Torvalds * Throw it away if anyone asks us. 38231da177e4SLinus Torvalds */ 38241da177e4SLinus Torvalds if ((ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) && 38251da177e4SLinus Torvalds ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) 38261da177e4SLinus Torvalds == XFS_ICHGTIME_ACC)) 38271da177e4SLinus Torvalds return; 38281da177e4SLinus Torvalds 38291da177e4SLinus Torvalds nanotime(&tv); 38301da177e4SLinus Torvalds if (flags & XFS_ICHGTIME_MOD) { 38311da177e4SLinus Torvalds VN_MTIMESET(vp, &tv); 38321da177e4SLinus Torvalds ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; 38331da177e4SLinus Torvalds ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; 38341da177e4SLinus Torvalds } 38351da177e4SLinus Torvalds if (flags & XFS_ICHGTIME_ACC) { 38361da177e4SLinus Torvalds VN_ATIMESET(vp, &tv); 38371da177e4SLinus Torvalds ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; 38381da177e4SLinus Torvalds ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; 38391da177e4SLinus Torvalds } 38401da177e4SLinus Torvalds if (flags & XFS_ICHGTIME_CHG) { 38411da177e4SLinus Torvalds VN_CTIMESET(vp, &tv); 38421da177e4SLinus Torvalds ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; 38431da177e4SLinus Torvalds ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; 38441da177e4SLinus Torvalds } 38451da177e4SLinus Torvalds 38461da177e4SLinus Torvalds /* 38471da177e4SLinus Torvalds * We update the i_update_core field _after_ changing 38481da177e4SLinus Torvalds * the timestamps in order to coordinate properly with 38491da177e4SLinus Torvalds * xfs_iflush() so that we don't lose timestamp updates. 38501da177e4SLinus Torvalds * This keeps us from having to hold the inode lock 38511da177e4SLinus Torvalds * while doing this. We use the SYNCHRONIZE macro to 38521da177e4SLinus Torvalds * ensure that the compiler does not reorder the update 38531da177e4SLinus Torvalds * of i_update_core above the timestamp updates above. 38541da177e4SLinus Torvalds */ 38551da177e4SLinus Torvalds SYNCHRONIZE(); 38561da177e4SLinus Torvalds ip->i_update_core = 1; 38571da177e4SLinus Torvalds if (!(inode->i_state & I_LOCK)) 38581da177e4SLinus Torvalds mark_inode_dirty_sync(inode); 38591da177e4SLinus Torvalds } 38601da177e4SLinus Torvalds 38611da177e4SLinus Torvalds #ifdef XFS_ILOCK_TRACE 38621da177e4SLinus Torvalds ktrace_t *xfs_ilock_trace_buf; 38631da177e4SLinus Torvalds 38641da177e4SLinus Torvalds void 38651da177e4SLinus Torvalds xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 38661da177e4SLinus Torvalds { 38671da177e4SLinus Torvalds ktrace_enter(ip->i_lock_trace, 38681da177e4SLinus Torvalds (void *)ip, 38691da177e4SLinus Torvalds (void *)(unsigned long)lock, /* 1 = LOCK, 3=UNLOCK, etc */ 38701da177e4SLinus Torvalds (void *)(unsigned long)lockflags, /* XFS_ILOCK_EXCL etc */ 38711da177e4SLinus Torvalds (void *)ra, /* caller of ilock */ 38721da177e4SLinus Torvalds (void *)(unsigned long)current_cpu(), 38731da177e4SLinus Torvalds (void *)(unsigned long)current_pid(), 38741da177e4SLinus Torvalds NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL); 38751da177e4SLinus Torvalds } 38761da177e4SLinus Torvalds #endif 3877