xref: /openbmc/linux/fs/xfs/xfs_inode.c (revision 673e8e597c06eb81954bf21a10f5cce74a1de8f1)
11da177e4SLinus Torvalds /*
23e57ecf6SOlaf Weber  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
37b718769SNathan Scott  * All Rights Reserved.
41da177e4SLinus Torvalds  *
57b718769SNathan Scott  * This program is free software; you can redistribute it and/or
67b718769SNathan Scott  * modify it under the terms of the GNU General Public License as
71da177e4SLinus Torvalds  * published by the Free Software Foundation.
81da177e4SLinus Torvalds  *
97b718769SNathan Scott  * This program is distributed in the hope that it would be useful,
107b718769SNathan Scott  * but WITHOUT ANY WARRANTY; without even the implied warranty of
117b718769SNathan Scott  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
127b718769SNathan Scott  * GNU General Public License for more details.
131da177e4SLinus Torvalds  *
147b718769SNathan Scott  * You should have received a copy of the GNU General Public License
157b718769SNathan Scott  * along with this program; if not, write the Free Software Foundation,
167b718769SNathan Scott  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
171da177e4SLinus Torvalds  */
1840ebd81dSRobert P. J. Day #include <linux/log2.h>
1940ebd81dSRobert P. J. Day 
201da177e4SLinus Torvalds #include "xfs.h"
21a844f451SNathan Scott #include "xfs_fs.h"
221da177e4SLinus Torvalds #include "xfs_types.h"
23a844f451SNathan Scott #include "xfs_bit.h"
241da177e4SLinus Torvalds #include "xfs_log.h"
25a844f451SNathan Scott #include "xfs_inum.h"
261da177e4SLinus Torvalds #include "xfs_trans.h"
271da177e4SLinus Torvalds #include "xfs_trans_priv.h"
281da177e4SLinus Torvalds #include "xfs_sb.h"
291da177e4SLinus Torvalds #include "xfs_ag.h"
301da177e4SLinus Torvalds #include "xfs_mount.h"
311da177e4SLinus Torvalds #include "xfs_bmap_btree.h"
32a844f451SNathan Scott #include "xfs_alloc_btree.h"
331da177e4SLinus Torvalds #include "xfs_ialloc_btree.h"
34a844f451SNathan Scott #include "xfs_attr_sf.h"
351da177e4SLinus Torvalds #include "xfs_dinode.h"
361da177e4SLinus Torvalds #include "xfs_inode.h"
371da177e4SLinus Torvalds #include "xfs_buf_item.h"
38a844f451SNathan Scott #include "xfs_inode_item.h"
39a844f451SNathan Scott #include "xfs_btree.h"
40a844f451SNathan Scott #include "xfs_alloc.h"
41a844f451SNathan Scott #include "xfs_ialloc.h"
42a844f451SNathan Scott #include "xfs_bmap.h"
431da177e4SLinus Torvalds #include "xfs_error.h"
441da177e4SLinus Torvalds #include "xfs_utils.h"
451da177e4SLinus Torvalds #include "xfs_quota.h"
462a82b8beSDavid Chinner #include "xfs_filestream.h"
47739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h"
480b1b213fSChristoph Hellwig #include "xfs_trace.h"
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone;
511da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone;
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds /*
548f04c47aSChristoph Hellwig  * Used in xfs_itruncate_extents().  This is the maximum number of extents
551da177e4SLinus Torvalds  * freed from a file in a single transaction.
561da177e4SLinus Torvalds  */
571da177e4SLinus Torvalds #define	XFS_ITRUNC_MAX_EXTENTS	2
581da177e4SLinus Torvalds 
591da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
601da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
611da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
621da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds #ifdef DEBUG
651da177e4SLinus Torvalds /*
661da177e4SLinus Torvalds  * Make sure that the extents in the given memory buffer
671da177e4SLinus Torvalds  * are valid.
681da177e4SLinus Torvalds  */
691da177e4SLinus Torvalds STATIC void
701da177e4SLinus Torvalds xfs_validate_extents(
714eea22f0SMandy Kirkconnell 	xfs_ifork_t		*ifp,
721da177e4SLinus Torvalds 	int			nrecs,
731da177e4SLinus Torvalds 	xfs_exntfmt_t		fmt)
741da177e4SLinus Torvalds {
751da177e4SLinus Torvalds 	xfs_bmbt_irec_t		irec;
76a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t	rec;
771da177e4SLinus Torvalds 	int			i;
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
80a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
81a6f64d4aSChristoph Hellwig 		rec.l0 = get_unaligned(&ep->l0);
82a6f64d4aSChristoph Hellwig 		rec.l1 = get_unaligned(&ep->l1);
831da177e4SLinus Torvalds 		xfs_bmbt_get_all(&rec, &irec);
841da177e4SLinus Torvalds 		if (fmt == XFS_EXTFMT_NOSTATE)
851da177e4SLinus Torvalds 			ASSERT(irec.br_state == XFS_EXT_NORM);
861da177e4SLinus Torvalds 	}
871da177e4SLinus Torvalds }
881da177e4SLinus Torvalds #else /* DEBUG */
89a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt)
901da177e4SLinus Torvalds #endif /* DEBUG */
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds /*
931da177e4SLinus Torvalds  * Check that none of the inode's in the buffer have a next
941da177e4SLinus Torvalds  * unlinked field of 0.
951da177e4SLinus Torvalds  */
961da177e4SLinus Torvalds #if defined(DEBUG)
971da177e4SLinus Torvalds void
981da177e4SLinus Torvalds xfs_inobp_check(
991da177e4SLinus Torvalds 	xfs_mount_t	*mp,
1001da177e4SLinus Torvalds 	xfs_buf_t	*bp)
1011da177e4SLinus Torvalds {
1021da177e4SLinus Torvalds 	int		i;
1031da177e4SLinus Torvalds 	int		j;
1041da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds 	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds 	for (i = 0; i < j; i++) {
1091da177e4SLinus Torvalds 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1101da177e4SLinus Torvalds 					i * mp->m_sb.sb_inodesize);
1111da177e4SLinus Torvalds 		if (!dip->di_next_unlinked)  {
11253487786SDave Chinner 			xfs_alert(mp,
11353487786SDave Chinner 	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
1141da177e4SLinus Torvalds 				bp);
1151da177e4SLinus Torvalds 			ASSERT(dip->di_next_unlinked);
1161da177e4SLinus Torvalds 		}
1171da177e4SLinus Torvalds 	}
1181da177e4SLinus Torvalds }
1191da177e4SLinus Torvalds #endif
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds /*
1224ae29b43SDavid Chinner  * Find the buffer associated with the given inode map
1234ae29b43SDavid Chinner  * We do basic validation checks on the buffer once it has been
1244ae29b43SDavid Chinner  * retrieved from disk.
1254ae29b43SDavid Chinner  */
1264ae29b43SDavid Chinner STATIC int
1274ae29b43SDavid Chinner xfs_imap_to_bp(
1284ae29b43SDavid Chinner 	xfs_mount_t	*mp,
1294ae29b43SDavid Chinner 	xfs_trans_t	*tp,
13092bfc6e7SChristoph Hellwig 	struct xfs_imap	*imap,
1314ae29b43SDavid Chinner 	xfs_buf_t	**bpp,
1324ae29b43SDavid Chinner 	uint		buf_flags,
133b48d8d64SChristoph Hellwig 	uint		iget_flags)
1344ae29b43SDavid Chinner {
1354ae29b43SDavid Chinner 	int		error;
1364ae29b43SDavid Chinner 	int		i;
1374ae29b43SDavid Chinner 	int		ni;
1384ae29b43SDavid Chinner 	xfs_buf_t	*bp;
1394ae29b43SDavid Chinner 
1404ae29b43SDavid Chinner 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
141a3f74ffbSDavid Chinner 				   (int)imap->im_len, buf_flags, &bp);
1424ae29b43SDavid Chinner 	if (error) {
143a3f74ffbSDavid Chinner 		if (error != EAGAIN) {
1440b932cccSDave Chinner 			xfs_warn(mp,
1450b932cccSDave Chinner 				"%s: xfs_trans_read_buf() returned error %d.",
1460b932cccSDave Chinner 				__func__, error);
147a3f74ffbSDavid Chinner 		} else {
1480cadda1cSChristoph Hellwig 			ASSERT(buf_flags & XBF_TRYLOCK);
149a3f74ffbSDavid Chinner 		}
1504ae29b43SDavid Chinner 		return error;
1514ae29b43SDavid Chinner 	}
1524ae29b43SDavid Chinner 
1534ae29b43SDavid Chinner 	/*
1544ae29b43SDavid Chinner 	 * Validate the magic number and version of every inode in the buffer
1554ae29b43SDavid Chinner 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
1564ae29b43SDavid Chinner 	 */
1574ae29b43SDavid Chinner #ifdef DEBUG
1584ae29b43SDavid Chinner 	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
1594ae29b43SDavid Chinner #else	/* usual case */
1604ae29b43SDavid Chinner 	ni = 1;
1614ae29b43SDavid Chinner #endif
1624ae29b43SDavid Chinner 
1634ae29b43SDavid Chinner 	for (i = 0; i < ni; i++) {
1644ae29b43SDavid Chinner 		int		di_ok;
1654ae29b43SDavid Chinner 		xfs_dinode_t	*dip;
1664ae29b43SDavid Chinner 
1674ae29b43SDavid Chinner 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1684ae29b43SDavid Chinner 					(i << mp->m_sb.sb_inodelog));
16969ef921bSChristoph Hellwig 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
17081591fe2SChristoph Hellwig 			    XFS_DINODE_GOOD_VERSION(dip->di_version);
1714ae29b43SDavid Chinner 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
1724ae29b43SDavid Chinner 						XFS_ERRTAG_ITOBP_INOTOBP,
1734ae29b43SDavid Chinner 						XFS_RANDOM_ITOBP_INOTOBP))) {
1741920779eSDave Chinner 			if (iget_flags & XFS_IGET_UNTRUSTED) {
1754ae29b43SDavid Chinner 				xfs_trans_brelse(tp, bp);
1764ae29b43SDavid Chinner 				return XFS_ERROR(EINVAL);
1774ae29b43SDavid Chinner 			}
1784ae29b43SDavid Chinner 			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
1794ae29b43SDavid Chinner 						XFS_ERRLEVEL_HIGH, mp, dip);
1804ae29b43SDavid Chinner #ifdef DEBUG
1810b932cccSDave Chinner 			xfs_emerg(mp,
1820b932cccSDave Chinner 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
1834ae29b43SDavid Chinner 				(unsigned long long)imap->im_blkno, i,
18481591fe2SChristoph Hellwig 				be16_to_cpu(dip->di_magic));
1850b932cccSDave Chinner 			ASSERT(0);
1864ae29b43SDavid Chinner #endif
1874ae29b43SDavid Chinner 			xfs_trans_brelse(tp, bp);
1884ae29b43SDavid Chinner 			return XFS_ERROR(EFSCORRUPTED);
1894ae29b43SDavid Chinner 		}
1904ae29b43SDavid Chinner 	}
1914ae29b43SDavid Chinner 
1924ae29b43SDavid Chinner 	xfs_inobp_check(mp, bp);
1934ae29b43SDavid Chinner 	*bpp = bp;
1944ae29b43SDavid Chinner 	return 0;
1954ae29b43SDavid Chinner }
1964ae29b43SDavid Chinner 
1974ae29b43SDavid Chinner /*
1981da177e4SLinus Torvalds  * This routine is called to map an inode number within a file
1991da177e4SLinus Torvalds  * system to the buffer containing the on-disk version of the
2001da177e4SLinus Torvalds  * inode.  It returns a pointer to the buffer containing the
2011da177e4SLinus Torvalds  * on-disk inode in the bpp parameter, and in the dip parameter
2021da177e4SLinus Torvalds  * it returns a pointer to the on-disk inode within that buffer.
2031da177e4SLinus Torvalds  *
2041da177e4SLinus Torvalds  * If a non-zero error is returned, then the contents of bpp and
2051da177e4SLinus Torvalds  * dipp are undefined.
2061da177e4SLinus Torvalds  *
2071da177e4SLinus Torvalds  * Use xfs_imap() to determine the size and location of the
2081da177e4SLinus Torvalds  * buffer to read from disk.
2091da177e4SLinus Torvalds  */
210c679eef0SChristoph Hellwig int
2111da177e4SLinus Torvalds xfs_inotobp(
2121da177e4SLinus Torvalds 	xfs_mount_t	*mp,
2131da177e4SLinus Torvalds 	xfs_trans_t	*tp,
2141da177e4SLinus Torvalds 	xfs_ino_t	ino,
2151da177e4SLinus Torvalds 	xfs_dinode_t	**dipp,
2161da177e4SLinus Torvalds 	xfs_buf_t	**bpp,
217c679eef0SChristoph Hellwig 	int		*offset,
218c679eef0SChristoph Hellwig 	uint		imap_flags)
2191da177e4SLinus Torvalds {
22092bfc6e7SChristoph Hellwig 	struct xfs_imap	imap;
2211da177e4SLinus Torvalds 	xfs_buf_t	*bp;
2221da177e4SLinus Torvalds 	int		error;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds 	imap.im_blkno = 0;
225a1941895SChristoph Hellwig 	error = xfs_imap(mp, tp, ino, &imap, imap_flags);
2264ae29b43SDavid Chinner 	if (error)
2271da177e4SLinus Torvalds 		return error;
2281da177e4SLinus Torvalds 
2290cadda1cSChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
2304ae29b43SDavid Chinner 	if (error)
2311da177e4SLinus Torvalds 		return error;
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
2341da177e4SLinus Torvalds 	*bpp = bp;
2351da177e4SLinus Torvalds 	*offset = imap.im_boffset;
2361da177e4SLinus Torvalds 	return 0;
2371da177e4SLinus Torvalds }
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds /*
2411da177e4SLinus Torvalds  * This routine is called to map an inode to the buffer containing
2421da177e4SLinus Torvalds  * the on-disk version of the inode.  It returns a pointer to the
2431da177e4SLinus Torvalds  * buffer containing the on-disk inode in the bpp parameter, and in
2441da177e4SLinus Torvalds  * the dip parameter it returns a pointer to the on-disk inode within
2451da177e4SLinus Torvalds  * that buffer.
2461da177e4SLinus Torvalds  *
2471da177e4SLinus Torvalds  * If a non-zero error is returned, then the contents of bpp and
2481da177e4SLinus Torvalds  * dipp are undefined.
2491da177e4SLinus Torvalds  *
25076d8b277SChristoph Hellwig  * The inode is expected to already been mapped to its buffer and read
25176d8b277SChristoph Hellwig  * in once, thus we can use the mapping information stored in the inode
25276d8b277SChristoph Hellwig  * rather than calling xfs_imap().  This allows us to avoid the overhead
25376d8b277SChristoph Hellwig  * of looking at the inode btree for small block file systems
25494e1b69dSChristoph Hellwig  * (see xfs_imap()).
2551da177e4SLinus Torvalds  */
2561da177e4SLinus Torvalds int
2571da177e4SLinus Torvalds xfs_itobp(
2581da177e4SLinus Torvalds 	xfs_mount_t	*mp,
2591da177e4SLinus Torvalds 	xfs_trans_t	*tp,
2601da177e4SLinus Torvalds 	xfs_inode_t	*ip,
2611da177e4SLinus Torvalds 	xfs_dinode_t	**dipp,
2621da177e4SLinus Torvalds 	xfs_buf_t	**bpp,
263a3f74ffbSDavid Chinner 	uint		buf_flags)
2641da177e4SLinus Torvalds {
2651da177e4SLinus Torvalds 	xfs_buf_t	*bp;
2661da177e4SLinus Torvalds 	int		error;
2671da177e4SLinus Torvalds 
26892bfc6e7SChristoph Hellwig 	ASSERT(ip->i_imap.im_blkno != 0);
2691da177e4SLinus Torvalds 
27092bfc6e7SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
2714ae29b43SDavid Chinner 	if (error)
2721da177e4SLinus Torvalds 		return error;
2734d1a2ed3SNathan Scott 
274a3f74ffbSDavid Chinner 	if (!bp) {
2750cadda1cSChristoph Hellwig 		ASSERT(buf_flags & XBF_TRYLOCK);
276a3f74ffbSDavid Chinner 		ASSERT(tp == NULL);
277a3f74ffbSDavid Chinner 		*bpp = NULL;
278a3f74ffbSDavid Chinner 		return EAGAIN;
279a3f74ffbSDavid Chinner 	}
280a3f74ffbSDavid Chinner 
28192bfc6e7SChristoph Hellwig 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2821da177e4SLinus Torvalds 	*bpp = bp;
2831da177e4SLinus Torvalds 	return 0;
2841da177e4SLinus Torvalds }
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds /*
2871da177e4SLinus Torvalds  * Move inode type and inode format specific information from the
2881da177e4SLinus Torvalds  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
2891da177e4SLinus Torvalds  * this means set if_rdev to the proper value.  For files, directories,
2901da177e4SLinus Torvalds  * and symlinks this means to bring in the in-line data or extent
2911da177e4SLinus Torvalds  * pointers.  For a file in B-tree format, only the root is immediately
2921da177e4SLinus Torvalds  * brought in-core.  The rest will be in-lined in if_extents when it
2931da177e4SLinus Torvalds  * is first referenced (see xfs_iread_extents()).
2941da177e4SLinus Torvalds  */
2951da177e4SLinus Torvalds STATIC int
2961da177e4SLinus Torvalds xfs_iformat(
2971da177e4SLinus Torvalds 	xfs_inode_t		*ip,
2981da177e4SLinus Torvalds 	xfs_dinode_t		*dip)
2991da177e4SLinus Torvalds {
3001da177e4SLinus Torvalds 	xfs_attr_shortform_t	*atp;
3011da177e4SLinus Torvalds 	int			size;
3021da177e4SLinus Torvalds 	int			error;
3031da177e4SLinus Torvalds 	xfs_fsize_t             di_size;
3041da177e4SLinus Torvalds 	ip->i_df.if_ext_max =
3051da177e4SLinus Torvalds 		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3061da177e4SLinus Torvalds 	error = 0;
3071da177e4SLinus Torvalds 
30881591fe2SChristoph Hellwig 	if (unlikely(be32_to_cpu(dip->di_nextents) +
30981591fe2SChristoph Hellwig 		     be16_to_cpu(dip->di_anextents) >
31081591fe2SChristoph Hellwig 		     be64_to_cpu(dip->di_nblocks))) {
31165333b4cSDave Chinner 		xfs_warn(ip->i_mount,
3123762ec6bSNathan Scott 			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
3131da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
31481591fe2SChristoph Hellwig 			(int)(be32_to_cpu(dip->di_nextents) +
31581591fe2SChristoph Hellwig 			      be16_to_cpu(dip->di_anextents)),
3161da177e4SLinus Torvalds 			(unsigned long long)
31781591fe2SChristoph Hellwig 				be64_to_cpu(dip->di_nblocks));
3181da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
3191da177e4SLinus Torvalds 				     ip->i_mount, dip);
3201da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
3211da177e4SLinus Torvalds 	}
3221da177e4SLinus Torvalds 
32381591fe2SChristoph Hellwig 	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
32465333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
3251da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
32681591fe2SChristoph Hellwig 			dip->di_forkoff);
3271da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
3281da177e4SLinus Torvalds 				     ip->i_mount, dip);
3291da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
3301da177e4SLinus Torvalds 	}
3311da177e4SLinus Torvalds 
332b89d4208SChristoph Hellwig 	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
333b89d4208SChristoph Hellwig 		     !ip->i_mount->m_rtdev_targp)) {
33465333b4cSDave Chinner 		xfs_warn(ip->i_mount,
335b89d4208SChristoph Hellwig 			"corrupt dinode %Lu, has realtime flag set.",
336b89d4208SChristoph Hellwig 			ip->i_ino);
337b89d4208SChristoph Hellwig 		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
338b89d4208SChristoph Hellwig 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
339b89d4208SChristoph Hellwig 		return XFS_ERROR(EFSCORRUPTED);
340b89d4208SChristoph Hellwig 	}
341b89d4208SChristoph Hellwig 
3421da177e4SLinus Torvalds 	switch (ip->i_d.di_mode & S_IFMT) {
3431da177e4SLinus Torvalds 	case S_IFIFO:
3441da177e4SLinus Torvalds 	case S_IFCHR:
3451da177e4SLinus Torvalds 	case S_IFBLK:
3461da177e4SLinus Torvalds 	case S_IFSOCK:
34781591fe2SChristoph Hellwig 		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
3481da177e4SLinus Torvalds 			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
3491da177e4SLinus Torvalds 					      ip->i_mount, dip);
3501da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
3511da177e4SLinus Torvalds 		}
3521da177e4SLinus Torvalds 		ip->i_d.di_size = 0;
353ba87ea69SLachlan McIlroy 		ip->i_size = 0;
35481591fe2SChristoph Hellwig 		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
3551da177e4SLinus Torvalds 		break;
3561da177e4SLinus Torvalds 
3571da177e4SLinus Torvalds 	case S_IFREG:
3581da177e4SLinus Torvalds 	case S_IFLNK:
3591da177e4SLinus Torvalds 	case S_IFDIR:
36081591fe2SChristoph Hellwig 		switch (dip->di_format) {
3611da177e4SLinus Torvalds 		case XFS_DINODE_FMT_LOCAL:
3621da177e4SLinus Torvalds 			/*
3631da177e4SLinus Torvalds 			 * no local regular files yet
3641da177e4SLinus Torvalds 			 */
365abbede1bSAl Viro 			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
36665333b4cSDave Chinner 				xfs_warn(ip->i_mount,
36765333b4cSDave Chinner 			"corrupt inode %Lu (local format for regular file).",
3681da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino);
3691da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
3701da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
3711da177e4SLinus Torvalds 						     ip->i_mount, dip);
3721da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
3731da177e4SLinus Torvalds 			}
3741da177e4SLinus Torvalds 
37581591fe2SChristoph Hellwig 			di_size = be64_to_cpu(dip->di_size);
3761da177e4SLinus Torvalds 			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
37765333b4cSDave Chinner 				xfs_warn(ip->i_mount,
37865333b4cSDave Chinner 			"corrupt inode %Lu (bad size %Ld for local inode).",
3791da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino,
3801da177e4SLinus Torvalds 					(long long) di_size);
3811da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
3821da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
3831da177e4SLinus Torvalds 						     ip->i_mount, dip);
3841da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
3851da177e4SLinus Torvalds 			}
3861da177e4SLinus Torvalds 
3871da177e4SLinus Torvalds 			size = (int)di_size;
3881da177e4SLinus Torvalds 			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
3891da177e4SLinus Torvalds 			break;
3901da177e4SLinus Torvalds 		case XFS_DINODE_FMT_EXTENTS:
3911da177e4SLinus Torvalds 			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
3921da177e4SLinus Torvalds 			break;
3931da177e4SLinus Torvalds 		case XFS_DINODE_FMT_BTREE:
3941da177e4SLinus Torvalds 			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
3951da177e4SLinus Torvalds 			break;
3961da177e4SLinus Torvalds 		default:
3971da177e4SLinus Torvalds 			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
3981da177e4SLinus Torvalds 					 ip->i_mount);
3991da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
4001da177e4SLinus Torvalds 		}
4011da177e4SLinus Torvalds 		break;
4021da177e4SLinus Torvalds 
4031da177e4SLinus Torvalds 	default:
4041da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
4051da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
4061da177e4SLinus Torvalds 	}
4071da177e4SLinus Torvalds 	if (error) {
4081da177e4SLinus Torvalds 		return error;
4091da177e4SLinus Torvalds 	}
4101da177e4SLinus Torvalds 	if (!XFS_DFORK_Q(dip))
4111da177e4SLinus Torvalds 		return 0;
4121da177e4SLinus Torvalds 	ASSERT(ip->i_afp == NULL);
4134a7edddcSDave Chinner 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
4141da177e4SLinus Torvalds 	ip->i_afp->if_ext_max =
4151da177e4SLinus Torvalds 		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
41681591fe2SChristoph Hellwig 	switch (dip->di_aformat) {
4171da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
4181da177e4SLinus Torvalds 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
4193b244aa8SNathan Scott 		size = be16_to_cpu(atp->hdr.totsize);
4202809f76aSChristoph Hellwig 
4212809f76aSChristoph Hellwig 		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
42265333b4cSDave Chinner 			xfs_warn(ip->i_mount,
42365333b4cSDave Chinner 				"corrupt inode %Lu (bad attr fork size %Ld).",
4242809f76aSChristoph Hellwig 				(unsigned long long) ip->i_ino,
4252809f76aSChristoph Hellwig 				(long long) size);
4262809f76aSChristoph Hellwig 			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
4272809f76aSChristoph Hellwig 					     XFS_ERRLEVEL_LOW,
4282809f76aSChristoph Hellwig 					     ip->i_mount, dip);
4292809f76aSChristoph Hellwig 			return XFS_ERROR(EFSCORRUPTED);
4302809f76aSChristoph Hellwig 		}
4312809f76aSChristoph Hellwig 
4321da177e4SLinus Torvalds 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
4331da177e4SLinus Torvalds 		break;
4341da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
4351da177e4SLinus Torvalds 		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
4361da177e4SLinus Torvalds 		break;
4371da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
4381da177e4SLinus Torvalds 		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
4391da177e4SLinus Torvalds 		break;
4401da177e4SLinus Torvalds 	default:
4411da177e4SLinus Torvalds 		error = XFS_ERROR(EFSCORRUPTED);
4421da177e4SLinus Torvalds 		break;
4431da177e4SLinus Torvalds 	}
4441da177e4SLinus Torvalds 	if (error) {
4451da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
4461da177e4SLinus Torvalds 		ip->i_afp = NULL;
4471da177e4SLinus Torvalds 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
4481da177e4SLinus Torvalds 	}
4491da177e4SLinus Torvalds 	return error;
4501da177e4SLinus Torvalds }
4511da177e4SLinus Torvalds 
4521da177e4SLinus Torvalds /*
4531da177e4SLinus Torvalds  * The file is in-lined in the on-disk inode.
4541da177e4SLinus Torvalds  * If it fits into if_inline_data, then copy
4551da177e4SLinus Torvalds  * it there, otherwise allocate a buffer for it
4561da177e4SLinus Torvalds  * and copy the data there.  Either way, set
4571da177e4SLinus Torvalds  * if_data to point at the data.
4581da177e4SLinus Torvalds  * If we allocate a buffer for the data, make
4591da177e4SLinus Torvalds  * sure that its size is a multiple of 4 and
4601da177e4SLinus Torvalds  * record the real size in i_real_bytes.
4611da177e4SLinus Torvalds  */
4621da177e4SLinus Torvalds STATIC int
4631da177e4SLinus Torvalds xfs_iformat_local(
4641da177e4SLinus Torvalds 	xfs_inode_t	*ip,
4651da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
4661da177e4SLinus Torvalds 	int		whichfork,
4671da177e4SLinus Torvalds 	int		size)
4681da177e4SLinus Torvalds {
4691da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
4701da177e4SLinus Torvalds 	int		real_size;
4711da177e4SLinus Torvalds 
4721da177e4SLinus Torvalds 	/*
4731da177e4SLinus Torvalds 	 * If the size is unreasonable, then something
4741da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
4751da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
4761da177e4SLinus Torvalds 	 */
4771da177e4SLinus Torvalds 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
47865333b4cSDave Chinner 		xfs_warn(ip->i_mount,
47965333b4cSDave Chinner 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
4801da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, size,
4811da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
4821da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
4831da177e4SLinus Torvalds 				     ip->i_mount, dip);
4841da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
4851da177e4SLinus Torvalds 	}
4861da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
4871da177e4SLinus Torvalds 	real_size = 0;
4881da177e4SLinus Torvalds 	if (size == 0)
4891da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
4901da177e4SLinus Torvalds 	else if (size <= sizeof(ifp->if_u2.if_inline_data))
4911da177e4SLinus Torvalds 		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
4921da177e4SLinus Torvalds 	else {
4931da177e4SLinus Torvalds 		real_size = roundup(size, 4);
4944a7edddcSDave Chinner 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
4951da177e4SLinus Torvalds 	}
4961da177e4SLinus Torvalds 	ifp->if_bytes = size;
4971da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
4981da177e4SLinus Torvalds 	if (size)
4991da177e4SLinus Torvalds 		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
5001da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
5011da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFINLINE;
5021da177e4SLinus Torvalds 	return 0;
5031da177e4SLinus Torvalds }
5041da177e4SLinus Torvalds 
5051da177e4SLinus Torvalds /*
5061da177e4SLinus Torvalds  * The file consists of a set of extents all
5071da177e4SLinus Torvalds  * of which fit into the on-disk inode.
5081da177e4SLinus Torvalds  * If there are few enough extents to fit into
5091da177e4SLinus Torvalds  * the if_inline_ext, then copy them there.
5101da177e4SLinus Torvalds  * Otherwise allocate a buffer for them and copy
5111da177e4SLinus Torvalds  * them into it.  Either way, set if_extents
5121da177e4SLinus Torvalds  * to point at the extents.
5131da177e4SLinus Torvalds  */
5141da177e4SLinus Torvalds STATIC int
5151da177e4SLinus Torvalds xfs_iformat_extents(
5161da177e4SLinus Torvalds 	xfs_inode_t	*ip,
5171da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
5181da177e4SLinus Torvalds 	int		whichfork)
5191da177e4SLinus Torvalds {
520a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t	*dp;
5211da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
5221da177e4SLinus Torvalds 	int		nex;
5231da177e4SLinus Torvalds 	int		size;
5241da177e4SLinus Torvalds 	int		i;
5251da177e4SLinus Torvalds 
5261da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
5271da177e4SLinus Torvalds 	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
5281da177e4SLinus Torvalds 	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 	/*
5311da177e4SLinus Torvalds 	 * If the number of extents is unreasonable, then something
5321da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
5331da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
5341da177e4SLinus Torvalds 	 */
5351da177e4SLinus Torvalds 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
53665333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
5371da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, nex);
5381da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
5391da177e4SLinus Torvalds 				     ip->i_mount, dip);
5401da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5411da177e4SLinus Torvalds 	}
5421da177e4SLinus Torvalds 
5434eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
5441da177e4SLinus Torvalds 	if (nex == 0)
5451da177e4SLinus Torvalds 		ifp->if_u1.if_extents = NULL;
5461da177e4SLinus Torvalds 	else if (nex <= XFS_INLINE_EXTS)
5471da177e4SLinus Torvalds 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
5484eea22f0SMandy Kirkconnell 	else
5494eea22f0SMandy Kirkconnell 		xfs_iext_add(ifp, 0, nex);
5504eea22f0SMandy Kirkconnell 
5511da177e4SLinus Torvalds 	ifp->if_bytes = size;
5521da177e4SLinus Torvalds 	if (size) {
5531da177e4SLinus Torvalds 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
554a6f64d4aSChristoph Hellwig 		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
5554eea22f0SMandy Kirkconnell 		for (i = 0; i < nex; i++, dp++) {
556a6f64d4aSChristoph Hellwig 			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
557597bca63SHarvey Harrison 			ep->l0 = get_unaligned_be64(&dp->l0);
558597bca63SHarvey Harrison 			ep->l1 = get_unaligned_be64(&dp->l1);
5591da177e4SLinus Torvalds 		}
5603a59c94cSEric Sandeen 		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
5611da177e4SLinus Torvalds 		if (whichfork != XFS_DATA_FORK ||
5621da177e4SLinus Torvalds 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
5631da177e4SLinus Torvalds 				if (unlikely(xfs_check_nostate_extents(
5644eea22f0SMandy Kirkconnell 				    ifp, 0, nex))) {
5651da177e4SLinus Torvalds 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
5661da177e4SLinus Torvalds 							 XFS_ERRLEVEL_LOW,
5671da177e4SLinus Torvalds 							 ip->i_mount);
5681da177e4SLinus Torvalds 					return XFS_ERROR(EFSCORRUPTED);
5691da177e4SLinus Torvalds 				}
5701da177e4SLinus Torvalds 	}
5711da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
5721da177e4SLinus Torvalds 	return 0;
5731da177e4SLinus Torvalds }
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds /*
5761da177e4SLinus Torvalds  * The file has too many extents to fit into
5771da177e4SLinus Torvalds  * the inode, so they are in B-tree format.
5781da177e4SLinus Torvalds  * Allocate a buffer for the root of the B-tree
5791da177e4SLinus Torvalds  * and copy the root into it.  The i_extents
5801da177e4SLinus Torvalds  * field will remain NULL until all of the
5811da177e4SLinus Torvalds  * extents are read in (when they are needed).
5821da177e4SLinus Torvalds  */
5831da177e4SLinus Torvalds STATIC int
5841da177e4SLinus Torvalds xfs_iformat_btree(
5851da177e4SLinus Torvalds 	xfs_inode_t		*ip,
5861da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
5871da177e4SLinus Torvalds 	int			whichfork)
5881da177e4SLinus Torvalds {
5891da177e4SLinus Torvalds 	xfs_bmdr_block_t	*dfp;
5901da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
5911da177e4SLinus Torvalds 	/* REFERENCED */
5921da177e4SLinus Torvalds 	int			nrecs;
5931da177e4SLinus Torvalds 	int			size;
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
5961da177e4SLinus Torvalds 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
5971da177e4SLinus Torvalds 	size = XFS_BMAP_BROOT_SPACE(dfp);
59860197e8dSChristoph Hellwig 	nrecs = be16_to_cpu(dfp->bb_numrecs);
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds 	/*
6011da177e4SLinus Torvalds 	 * blow out if -- fork has less extents than can fit in
6021da177e4SLinus Torvalds 	 * fork (fork shouldn't be a btree format), root btree
6031da177e4SLinus Torvalds 	 * block has more records than can fit into the fork,
6041da177e4SLinus Torvalds 	 * or the number of extents is greater than the number of
6051da177e4SLinus Torvalds 	 * blocks.
6061da177e4SLinus Torvalds 	 */
6071da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
6081da177e4SLinus Torvalds 	    || XFS_BMDR_SPACE_CALC(nrecs) >
6091da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
6101da177e4SLinus Torvalds 	    || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
61165333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
6121da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino);
61365333b4cSDave Chinner 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
61465333b4cSDave Chinner 				 ip->i_mount, dip);
6151da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
6161da177e4SLinus Torvalds 	}
6171da177e4SLinus Torvalds 
6181da177e4SLinus Torvalds 	ifp->if_broot_bytes = size;
6194a7edddcSDave Chinner 	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
6201da177e4SLinus Torvalds 	ASSERT(ifp->if_broot != NULL);
6211da177e4SLinus Torvalds 	/*
6221da177e4SLinus Torvalds 	 * Copy and convert from the on-disk structure
6231da177e4SLinus Torvalds 	 * to the in-memory structure.
6241da177e4SLinus Torvalds 	 */
62560197e8dSChristoph Hellwig 	xfs_bmdr_to_bmbt(ip->i_mount, dfp,
62660197e8dSChristoph Hellwig 			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
6271da177e4SLinus Torvalds 			 ifp->if_broot, size);
6281da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
6291da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFBROOT;
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	return 0;
6321da177e4SLinus Torvalds }
6331da177e4SLinus Torvalds 
634d96f8f89SEric Sandeen STATIC void
635347d1c01SChristoph Hellwig xfs_dinode_from_disk(
636347d1c01SChristoph Hellwig 	xfs_icdinode_t		*to,
63781591fe2SChristoph Hellwig 	xfs_dinode_t		*from)
6381da177e4SLinus Torvalds {
639347d1c01SChristoph Hellwig 	to->di_magic = be16_to_cpu(from->di_magic);
640347d1c01SChristoph Hellwig 	to->di_mode = be16_to_cpu(from->di_mode);
641347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
642347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
643347d1c01SChristoph Hellwig 	to->di_onlink = be16_to_cpu(from->di_onlink);
644347d1c01SChristoph Hellwig 	to->di_uid = be32_to_cpu(from->di_uid);
645347d1c01SChristoph Hellwig 	to->di_gid = be32_to_cpu(from->di_gid);
646347d1c01SChristoph Hellwig 	to->di_nlink = be32_to_cpu(from->di_nlink);
6476743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
6486743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
649347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
650347d1c01SChristoph Hellwig 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
651347d1c01SChristoph Hellwig 	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
652347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
653347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
654347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
655347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
656347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
657347d1c01SChristoph Hellwig 	to->di_size = be64_to_cpu(from->di_size);
658347d1c01SChristoph Hellwig 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
659347d1c01SChristoph Hellwig 	to->di_extsize = be32_to_cpu(from->di_extsize);
660347d1c01SChristoph Hellwig 	to->di_nextents = be32_to_cpu(from->di_nextents);
661347d1c01SChristoph Hellwig 	to->di_anextents = be16_to_cpu(from->di_anextents);
662347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
663347d1c01SChristoph Hellwig 	to->di_aformat	= from->di_aformat;
664347d1c01SChristoph Hellwig 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
665347d1c01SChristoph Hellwig 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
666347d1c01SChristoph Hellwig 	to->di_flags	= be16_to_cpu(from->di_flags);
667347d1c01SChristoph Hellwig 	to->di_gen	= be32_to_cpu(from->di_gen);
6681da177e4SLinus Torvalds }
6691da177e4SLinus Torvalds 
670347d1c01SChristoph Hellwig void
671347d1c01SChristoph Hellwig xfs_dinode_to_disk(
67281591fe2SChristoph Hellwig 	xfs_dinode_t		*to,
673347d1c01SChristoph Hellwig 	xfs_icdinode_t		*from)
674347d1c01SChristoph Hellwig {
675347d1c01SChristoph Hellwig 	to->di_magic = cpu_to_be16(from->di_magic);
676347d1c01SChristoph Hellwig 	to->di_mode = cpu_to_be16(from->di_mode);
677347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
678347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
679347d1c01SChristoph Hellwig 	to->di_onlink = cpu_to_be16(from->di_onlink);
680347d1c01SChristoph Hellwig 	to->di_uid = cpu_to_be32(from->di_uid);
681347d1c01SChristoph Hellwig 	to->di_gid = cpu_to_be32(from->di_gid);
682347d1c01SChristoph Hellwig 	to->di_nlink = cpu_to_be32(from->di_nlink);
6836743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
6846743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
685347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
686347d1c01SChristoph Hellwig 	to->di_flushiter = cpu_to_be16(from->di_flushiter);
687347d1c01SChristoph Hellwig 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
688347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
689347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
690347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
691347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
692347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
693347d1c01SChristoph Hellwig 	to->di_size = cpu_to_be64(from->di_size);
694347d1c01SChristoph Hellwig 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
695347d1c01SChristoph Hellwig 	to->di_extsize = cpu_to_be32(from->di_extsize);
696347d1c01SChristoph Hellwig 	to->di_nextents = cpu_to_be32(from->di_nextents);
697347d1c01SChristoph Hellwig 	to->di_anextents = cpu_to_be16(from->di_anextents);
698347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
699347d1c01SChristoph Hellwig 	to->di_aformat = from->di_aformat;
700347d1c01SChristoph Hellwig 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
701347d1c01SChristoph Hellwig 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
702347d1c01SChristoph Hellwig 	to->di_flags = cpu_to_be16(from->di_flags);
703347d1c01SChristoph Hellwig 	to->di_gen = cpu_to_be32(from->di_gen);
7041da177e4SLinus Torvalds }
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds STATIC uint
7071da177e4SLinus Torvalds _xfs_dic2xflags(
7081da177e4SLinus Torvalds 	__uint16_t		di_flags)
7091da177e4SLinus Torvalds {
7101da177e4SLinus Torvalds 	uint			flags = 0;
7111da177e4SLinus Torvalds 
7121da177e4SLinus Torvalds 	if (di_flags & XFS_DIFLAG_ANY) {
7131da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_REALTIME)
7141da177e4SLinus Torvalds 			flags |= XFS_XFLAG_REALTIME;
7151da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PREALLOC)
7161da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PREALLOC;
7171da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
7181da177e4SLinus Torvalds 			flags |= XFS_XFLAG_IMMUTABLE;
7191da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_APPEND)
7201da177e4SLinus Torvalds 			flags |= XFS_XFLAG_APPEND;
7211da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_SYNC)
7221da177e4SLinus Torvalds 			flags |= XFS_XFLAG_SYNC;
7231da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOATIME)
7241da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOATIME;
7251da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NODUMP)
7261da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NODUMP;
7271da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_RTINHERIT)
7281da177e4SLinus Torvalds 			flags |= XFS_XFLAG_RTINHERIT;
7291da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PROJINHERIT)
7301da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PROJINHERIT;
7311da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
7321da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOSYMLINKS;
733dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSIZE)
734dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSIZE;
735dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
736dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSZINHERIT;
737d3446eacSBarry Naujok 		if (di_flags & XFS_DIFLAG_NODEFRAG)
738d3446eacSBarry Naujok 			flags |= XFS_XFLAG_NODEFRAG;
7392a82b8beSDavid Chinner 		if (di_flags & XFS_DIFLAG_FILESTREAM)
7402a82b8beSDavid Chinner 			flags |= XFS_XFLAG_FILESTREAM;
7411da177e4SLinus Torvalds 	}
7421da177e4SLinus Torvalds 
7431da177e4SLinus Torvalds 	return flags;
7441da177e4SLinus Torvalds }
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds uint
7471da177e4SLinus Torvalds xfs_ip2xflags(
7481da177e4SLinus Torvalds 	xfs_inode_t		*ip)
7491da177e4SLinus Torvalds {
750347d1c01SChristoph Hellwig 	xfs_icdinode_t		*dic = &ip->i_d;
7511da177e4SLinus Torvalds 
752a916e2bdSNathan Scott 	return _xfs_dic2xflags(dic->di_flags) |
75345ba598eSChristoph Hellwig 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
7541da177e4SLinus Torvalds }
7551da177e4SLinus Torvalds 
7561da177e4SLinus Torvalds uint
7571da177e4SLinus Torvalds xfs_dic2xflags(
75845ba598eSChristoph Hellwig 	xfs_dinode_t		*dip)
7591da177e4SLinus Torvalds {
76081591fe2SChristoph Hellwig 	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
76145ba598eSChristoph Hellwig 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
7621da177e4SLinus Torvalds }
7631da177e4SLinus Torvalds 
7641da177e4SLinus Torvalds /*
76524f211baSChristoph Hellwig  * Read the disk inode attributes into the in-core inode structure.
7661da177e4SLinus Torvalds  */
7671da177e4SLinus Torvalds int
7681da177e4SLinus Torvalds xfs_iread(
7691da177e4SLinus Torvalds 	xfs_mount_t	*mp,
7701da177e4SLinus Torvalds 	xfs_trans_t	*tp,
77124f211baSChristoph Hellwig 	xfs_inode_t	*ip,
77224f211baSChristoph Hellwig 	uint		iget_flags)
7731da177e4SLinus Torvalds {
7741da177e4SLinus Torvalds 	xfs_buf_t	*bp;
7751da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
7761da177e4SLinus Torvalds 	int		error;
7771da177e4SLinus Torvalds 
7781da177e4SLinus Torvalds 	/*
77992bfc6e7SChristoph Hellwig 	 * Fill in the location information in the in-core inode.
7801da177e4SLinus Torvalds 	 */
78124f211baSChristoph Hellwig 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
7829ed0451eSChristoph Hellwig 	if (error)
78324f211baSChristoph Hellwig 		return error;
7841da177e4SLinus Torvalds 
7851da177e4SLinus Torvalds 	/*
78692bfc6e7SChristoph Hellwig 	 * Get pointers to the on-disk inode and the buffer containing it.
78776d8b277SChristoph Hellwig 	 */
78892bfc6e7SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
7890cadda1cSChristoph Hellwig 			       XBF_LOCK, iget_flags);
79076d8b277SChristoph Hellwig 	if (error)
79124f211baSChristoph Hellwig 		return error;
79292bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
79376d8b277SChristoph Hellwig 
79476d8b277SChristoph Hellwig 	/*
7951da177e4SLinus Torvalds 	 * If we got something that isn't an inode it means someone
7961da177e4SLinus Torvalds 	 * (nfs or dmi) has a stale handle.
7971da177e4SLinus Torvalds 	 */
79869ef921bSChristoph Hellwig 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
7991da177e4SLinus Torvalds #ifdef DEBUG
80053487786SDave Chinner 		xfs_alert(mp,
80153487786SDave Chinner 			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
80253487786SDave Chinner 			__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
8031da177e4SLinus Torvalds #endif /* DEBUG */
8049ed0451eSChristoph Hellwig 		error = XFS_ERROR(EINVAL);
8059ed0451eSChristoph Hellwig 		goto out_brelse;
8061da177e4SLinus Torvalds 	}
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds 	/*
8091da177e4SLinus Torvalds 	 * If the on-disk inode is already linked to a directory
8101da177e4SLinus Torvalds 	 * entry, copy all of the inode into the in-core inode.
8111da177e4SLinus Torvalds 	 * xfs_iformat() handles copying in the inode format
8121da177e4SLinus Torvalds 	 * specific information.
8131da177e4SLinus Torvalds 	 * Otherwise, just get the truly permanent information.
8141da177e4SLinus Torvalds 	 */
81581591fe2SChristoph Hellwig 	if (dip->di_mode) {
81681591fe2SChristoph Hellwig 		xfs_dinode_from_disk(&ip->i_d, dip);
8171da177e4SLinus Torvalds 		error = xfs_iformat(ip, dip);
8181da177e4SLinus Torvalds 		if (error)  {
8191da177e4SLinus Torvalds #ifdef DEBUG
82053487786SDave Chinner 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
82153487786SDave Chinner 				__func__, error);
8221da177e4SLinus Torvalds #endif /* DEBUG */
8239ed0451eSChristoph Hellwig 			goto out_brelse;
8241da177e4SLinus Torvalds 		}
8251da177e4SLinus Torvalds 	} else {
82681591fe2SChristoph Hellwig 		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
82781591fe2SChristoph Hellwig 		ip->i_d.di_version = dip->di_version;
82881591fe2SChristoph Hellwig 		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
82981591fe2SChristoph Hellwig 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
8301da177e4SLinus Torvalds 		/*
8311da177e4SLinus Torvalds 		 * Make sure to pull in the mode here as well in
8321da177e4SLinus Torvalds 		 * case the inode is released without being used.
8331da177e4SLinus Torvalds 		 * This ensures that xfs_inactive() will see that
8341da177e4SLinus Torvalds 		 * the inode is already free and not try to mess
8351da177e4SLinus Torvalds 		 * with the uninitialized part of it.
8361da177e4SLinus Torvalds 		 */
8371da177e4SLinus Torvalds 		ip->i_d.di_mode = 0;
8381da177e4SLinus Torvalds 		/*
8391da177e4SLinus Torvalds 		 * Initialize the per-fork minima and maxima for a new
8401da177e4SLinus Torvalds 		 * inode here.  xfs_iformat will do it for old inodes.
8411da177e4SLinus Torvalds 		 */
8421da177e4SLinus Torvalds 		ip->i_df.if_ext_max =
8431da177e4SLinus Torvalds 			XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
8441da177e4SLinus Torvalds 	}
8451da177e4SLinus Torvalds 
8461da177e4SLinus Torvalds 	/*
8471da177e4SLinus Torvalds 	 * The inode format changed when we moved the link count and
8481da177e4SLinus Torvalds 	 * made it 32 bits long.  If this is an old format inode,
8491da177e4SLinus Torvalds 	 * convert it in memory to look like a new one.  If it gets
8501da177e4SLinus Torvalds 	 * flushed to disk we will convert back before flushing or
8511da177e4SLinus Torvalds 	 * logging it.  We zero out the new projid field and the old link
8521da177e4SLinus Torvalds 	 * count field.  We'll handle clearing the pad field (the remains
8531da177e4SLinus Torvalds 	 * of the old uuid field) when we actually convert the inode to
8541da177e4SLinus Torvalds 	 * the new format. We don't change the version number so that we
8551da177e4SLinus Torvalds 	 * can distinguish this from a real new format inode.
8561da177e4SLinus Torvalds 	 */
85751ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
8581da177e4SLinus Torvalds 		ip->i_d.di_nlink = ip->i_d.di_onlink;
8591da177e4SLinus Torvalds 		ip->i_d.di_onlink = 0;
8606743099cSArkadiusz Mi?kiewicz 		xfs_set_projid(ip, 0);
8611da177e4SLinus Torvalds 	}
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds 	ip->i_delayed_blks = 0;
864ba87ea69SLachlan McIlroy 	ip->i_size = ip->i_d.di_size;
8651da177e4SLinus Torvalds 
8661da177e4SLinus Torvalds 	/*
8671da177e4SLinus Torvalds 	 * Mark the buffer containing the inode as something to keep
8681da177e4SLinus Torvalds 	 * around for a while.  This helps to keep recently accessed
8691da177e4SLinus Torvalds 	 * meta-data in-core longer.
8701da177e4SLinus Torvalds 	 */
871821eb21dSDave Chinner 	xfs_buf_set_ref(bp, XFS_INO_REF);
8721da177e4SLinus Torvalds 
8731da177e4SLinus Torvalds 	/*
8741da177e4SLinus Torvalds 	 * Use xfs_trans_brelse() to release the buffer containing the
8751da177e4SLinus Torvalds 	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
8761da177e4SLinus Torvalds 	 * in xfs_itobp() above.  If tp is NULL, this is just a normal
8771da177e4SLinus Torvalds 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
8781da177e4SLinus Torvalds 	 * will only release the buffer if it is not dirty within the
8791da177e4SLinus Torvalds 	 * transaction.  It will be OK to release the buffer in this case,
8801da177e4SLinus Torvalds 	 * because inodes on disk are never destroyed and we will be
8811da177e4SLinus Torvalds 	 * locking the new in-core inode before putting it in the hash
8821da177e4SLinus Torvalds 	 * table where other processes can find it.  Thus we don't have
8831da177e4SLinus Torvalds 	 * to worry about the inode being changed just because we released
8841da177e4SLinus Torvalds 	 * the buffer.
8851da177e4SLinus Torvalds 	 */
8869ed0451eSChristoph Hellwig  out_brelse:
8879ed0451eSChristoph Hellwig 	xfs_trans_brelse(tp, bp);
8889ed0451eSChristoph Hellwig 	return error;
8891da177e4SLinus Torvalds }
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds /*
8921da177e4SLinus Torvalds  * Read in extents from a btree-format inode.
8931da177e4SLinus Torvalds  * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
8941da177e4SLinus Torvalds  */
8951da177e4SLinus Torvalds int
8961da177e4SLinus Torvalds xfs_iread_extents(
8971da177e4SLinus Torvalds 	xfs_trans_t	*tp,
8981da177e4SLinus Torvalds 	xfs_inode_t	*ip,
8991da177e4SLinus Torvalds 	int		whichfork)
9001da177e4SLinus Torvalds {
9011da177e4SLinus Torvalds 	int		error;
9021da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
9034eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;
9041da177e4SLinus Torvalds 
9051da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
9061da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
9071da177e4SLinus Torvalds 				 ip->i_mount);
9081da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
9091da177e4SLinus Torvalds 	}
9104eea22f0SMandy Kirkconnell 	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
9111da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
9124eea22f0SMandy Kirkconnell 
9131da177e4SLinus Torvalds 	/*
9141da177e4SLinus Torvalds 	 * We know that the size is valid (it's checked in iformat_btree)
9151da177e4SLinus Torvalds 	 */
9164eea22f0SMandy Kirkconnell 	ifp->if_bytes = ifp->if_real_bytes = 0;
9171da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
9184eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, 0, nextents);
9191da177e4SLinus Torvalds 	error = xfs_bmap_read_extents(tp, ip, whichfork);
9201da177e4SLinus Torvalds 	if (error) {
9214eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
9221da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFEXTENTS;
9231da177e4SLinus Torvalds 		return error;
9241da177e4SLinus Torvalds 	}
925a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
9261da177e4SLinus Torvalds 	return 0;
9271da177e4SLinus Torvalds }
9281da177e4SLinus Torvalds 
9291da177e4SLinus Torvalds /*
9301da177e4SLinus Torvalds  * Allocate an inode on disk and return a copy of its in-core version.
9311da177e4SLinus Torvalds  * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
9321da177e4SLinus Torvalds  * appropriately within the inode.  The uid and gid for the inode are
9331da177e4SLinus Torvalds  * set according to the contents of the given cred structure.
9341da177e4SLinus Torvalds  *
9351da177e4SLinus Torvalds  * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
9361da177e4SLinus Torvalds  * has a free inode available, call xfs_iget()
9371da177e4SLinus Torvalds  * to obtain the in-core version of the allocated inode.  Finally,
9381da177e4SLinus Torvalds  * fill in the inode and log its initial contents.  In this case,
9391da177e4SLinus Torvalds  * ialloc_context would be set to NULL and call_again set to false.
9401da177e4SLinus Torvalds  *
9411da177e4SLinus Torvalds  * If xfs_dialloc() does not have an available inode,
9421da177e4SLinus Torvalds  * it will replenish its supply by doing an allocation. Since we can
9431da177e4SLinus Torvalds  * only do one allocation within a transaction without deadlocks, we
9441da177e4SLinus Torvalds  * must commit the current transaction before returning the inode itself.
9451da177e4SLinus Torvalds  * In this case, therefore, we will set call_again to true and return.
9461da177e4SLinus Torvalds  * The caller should then commit the current transaction, start a new
9471da177e4SLinus Torvalds  * transaction, and call xfs_ialloc() again to actually get the inode.
9481da177e4SLinus Torvalds  *
9491da177e4SLinus Torvalds  * To ensure that some other process does not grab the inode that
9501da177e4SLinus Torvalds  * was allocated during the first call to xfs_ialloc(), this routine
9511da177e4SLinus Torvalds  * also returns the [locked] bp pointing to the head of the freelist
9521da177e4SLinus Torvalds  * as ialloc_context.  The caller should hold this buffer across
9531da177e4SLinus Torvalds  * the commit and pass it back into this routine on the second call.
954b11f94d5SDavid Chinner  *
955b11f94d5SDavid Chinner  * If we are allocating quota inodes, we do not have a parent inode
956b11f94d5SDavid Chinner  * to attach to or associate with (i.e. pip == NULL) because they
957b11f94d5SDavid Chinner  * are not linked into the directory structure - they are attached
958b11f94d5SDavid Chinner  * directly to the superblock - and so have no parent.
9591da177e4SLinus Torvalds  */
9601da177e4SLinus Torvalds int
9611da177e4SLinus Torvalds xfs_ialloc(
9621da177e4SLinus Torvalds 	xfs_trans_t	*tp,
9631da177e4SLinus Torvalds 	xfs_inode_t	*pip,
964576b1d67SAl Viro 	umode_t		mode,
96531b084aeSNathan Scott 	xfs_nlink_t	nlink,
9661da177e4SLinus Torvalds 	xfs_dev_t	rdev,
9676743099cSArkadiusz Mi?kiewicz 	prid_t		prid,
9681da177e4SLinus Torvalds 	int		okalloc,
9691da177e4SLinus Torvalds 	xfs_buf_t	**ialloc_context,
9701da177e4SLinus Torvalds 	boolean_t	*call_again,
9711da177e4SLinus Torvalds 	xfs_inode_t	**ipp)
9721da177e4SLinus Torvalds {
9731da177e4SLinus Torvalds 	xfs_ino_t	ino;
9741da177e4SLinus Torvalds 	xfs_inode_t	*ip;
9751da177e4SLinus Torvalds 	uint		flags;
9761da177e4SLinus Torvalds 	int		error;
977dff35fd4SChristoph Hellwig 	timespec_t	tv;
978bf904248SDavid Chinner 	int		filestreams = 0;
9791da177e4SLinus Torvalds 
9801da177e4SLinus Torvalds 	/*
9811da177e4SLinus Torvalds 	 * Call the space management code to pick
9821da177e4SLinus Torvalds 	 * the on-disk inode to be allocated.
9831da177e4SLinus Torvalds 	 */
984b11f94d5SDavid Chinner 	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
9851da177e4SLinus Torvalds 			    ialloc_context, call_again, &ino);
986bf904248SDavid Chinner 	if (error)
9871da177e4SLinus Torvalds 		return error;
9881da177e4SLinus Torvalds 	if (*call_again || ino == NULLFSINO) {
9891da177e4SLinus Torvalds 		*ipp = NULL;
9901da177e4SLinus Torvalds 		return 0;
9911da177e4SLinus Torvalds 	}
9921da177e4SLinus Torvalds 	ASSERT(*ialloc_context == NULL);
9931da177e4SLinus Torvalds 
9941da177e4SLinus Torvalds 	/*
9951da177e4SLinus Torvalds 	 * Get the in-core inode with the lock held exclusively.
9961da177e4SLinus Torvalds 	 * This is because we're setting fields here we need
9971da177e4SLinus Torvalds 	 * to prevent others from looking at until we're done.
9981da177e4SLinus Torvalds 	 */
999ec3ba85fSChristoph Hellwig 	error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1000ec3ba85fSChristoph Hellwig 			 XFS_ILOCK_EXCL, &ip);
1001bf904248SDavid Chinner 	if (error)
10021da177e4SLinus Torvalds 		return error;
10031da177e4SLinus Torvalds 	ASSERT(ip != NULL);
10041da177e4SLinus Torvalds 
1005576b1d67SAl Viro 	ip->i_d.di_mode = mode;
10061da177e4SLinus Torvalds 	ip->i_d.di_onlink = 0;
10071da177e4SLinus Torvalds 	ip->i_d.di_nlink = nlink;
10081da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == nlink);
10099e2b2dc4SDavid Howells 	ip->i_d.di_uid = current_fsuid();
10109e2b2dc4SDavid Howells 	ip->i_d.di_gid = current_fsgid();
10116743099cSArkadiusz Mi?kiewicz 	xfs_set_projid(ip, prid);
10121da177e4SLinus Torvalds 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
10131da177e4SLinus Torvalds 
10141da177e4SLinus Torvalds 	/*
10151da177e4SLinus Torvalds 	 * If the superblock version is up to where we support new format
10161da177e4SLinus Torvalds 	 * inodes and this is currently an old format inode, then change
10171da177e4SLinus Torvalds 	 * the inode version number now.  This way we only do the conversion
10181da177e4SLinus Torvalds 	 * here rather than here and in the flush/logging code.
10191da177e4SLinus Torvalds 	 */
102062118709SEric Sandeen 	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
102151ce16d5SChristoph Hellwig 	    ip->i_d.di_version == 1) {
102251ce16d5SChristoph Hellwig 		ip->i_d.di_version = 2;
10231da177e4SLinus Torvalds 		/*
10241da177e4SLinus Torvalds 		 * We've already zeroed the old link count, the projid field,
10251da177e4SLinus Torvalds 		 * and the pad field.
10261da177e4SLinus Torvalds 		 */
10271da177e4SLinus Torvalds 	}
10281da177e4SLinus Torvalds 
10291da177e4SLinus Torvalds 	/*
10301da177e4SLinus Torvalds 	 * Project ids won't be stored on disk if we are using a version 1 inode.
10311da177e4SLinus Torvalds 	 */
103251ce16d5SChristoph Hellwig 	if ((prid != 0) && (ip->i_d.di_version == 1))
10331da177e4SLinus Torvalds 		xfs_bump_ino_vers2(tp, ip);
10341da177e4SLinus Torvalds 
1035bd186aa9SChristoph Hellwig 	if (pip && XFS_INHERIT_GID(pip)) {
10361da177e4SLinus Torvalds 		ip->i_d.di_gid = pip->i_d.di_gid;
1037abbede1bSAl Viro 		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
10381da177e4SLinus Torvalds 			ip->i_d.di_mode |= S_ISGID;
10391da177e4SLinus Torvalds 		}
10401da177e4SLinus Torvalds 	}
10411da177e4SLinus Torvalds 
10421da177e4SLinus Torvalds 	/*
10431da177e4SLinus Torvalds 	 * If the group ID of the new file does not match the effective group
10441da177e4SLinus Torvalds 	 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
10451da177e4SLinus Torvalds 	 * (and only if the irix_sgid_inherit compatibility variable is set).
10461da177e4SLinus Torvalds 	 */
10471da177e4SLinus Torvalds 	if ((irix_sgid_inherit) &&
10481da177e4SLinus Torvalds 	    (ip->i_d.di_mode & S_ISGID) &&
10491da177e4SLinus Torvalds 	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
10501da177e4SLinus Torvalds 		ip->i_d.di_mode &= ~S_ISGID;
10511da177e4SLinus Torvalds 	}
10521da177e4SLinus Torvalds 
10531da177e4SLinus Torvalds 	ip->i_d.di_size = 0;
1054ba87ea69SLachlan McIlroy 	ip->i_size = 0;
10551da177e4SLinus Torvalds 	ip->i_d.di_nextents = 0;
10561da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
1057dff35fd4SChristoph Hellwig 
1058dff35fd4SChristoph Hellwig 	nanotime(&tv);
1059dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1060dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1061dff35fd4SChristoph Hellwig 	ip->i_d.di_atime = ip->i_d.di_mtime;
1062dff35fd4SChristoph Hellwig 	ip->i_d.di_ctime = ip->i_d.di_mtime;
1063dff35fd4SChristoph Hellwig 
10641da177e4SLinus Torvalds 	/*
10651da177e4SLinus Torvalds 	 * di_gen will have been taken care of in xfs_iread.
10661da177e4SLinus Torvalds 	 */
10671da177e4SLinus Torvalds 	ip->i_d.di_extsize = 0;
10681da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
10691da177e4SLinus Torvalds 	ip->i_d.di_dmstate = 0;
10701da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
10711da177e4SLinus Torvalds 	flags = XFS_ILOG_CORE;
10721da177e4SLinus Torvalds 	switch (mode & S_IFMT) {
10731da177e4SLinus Torvalds 	case S_IFIFO:
10741da177e4SLinus Torvalds 	case S_IFCHR:
10751da177e4SLinus Torvalds 	case S_IFBLK:
10761da177e4SLinus Torvalds 	case S_IFSOCK:
10771da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
10781da177e4SLinus Torvalds 		ip->i_df.if_u2.if_rdev = rdev;
10791da177e4SLinus Torvalds 		ip->i_df.if_flags = 0;
10801da177e4SLinus Torvalds 		flags |= XFS_ILOG_DEV;
10811da177e4SLinus Torvalds 		break;
10821da177e4SLinus Torvalds 	case S_IFREG:
1083bf904248SDavid Chinner 		/*
1084bf904248SDavid Chinner 		 * we can't set up filestreams until after the VFS inode
1085bf904248SDavid Chinner 		 * is set up properly.
1086bf904248SDavid Chinner 		 */
1087bf904248SDavid Chinner 		if (pip && xfs_inode_is_filestream(pip))
1088bf904248SDavid Chinner 			filestreams = 1;
10892a82b8beSDavid Chinner 		/* fall through */
10901da177e4SLinus Torvalds 	case S_IFDIR:
1091b11f94d5SDavid Chinner 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1092365ca83dSNathan Scott 			uint	di_flags = 0;
1093365ca83dSNathan Scott 
1094abbede1bSAl Viro 			if (S_ISDIR(mode)) {
1095365ca83dSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1096365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_RTINHERIT;
1097dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1098dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1099dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1100dd9f438eSNathan Scott 				}
1101abbede1bSAl Viro 			} else if (S_ISREG(mode)) {
1102613d7043SChristoph Hellwig 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1103365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_REALTIME;
1104dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1105dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSIZE;
1106dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1107dd9f438eSNathan Scott 				}
11081da177e4SLinus Torvalds 			}
11091da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
11101da177e4SLinus Torvalds 			    xfs_inherit_noatime)
1111365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOATIME;
11121da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
11131da177e4SLinus Torvalds 			    xfs_inherit_nodump)
1114365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NODUMP;
11151da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
11161da177e4SLinus Torvalds 			    xfs_inherit_sync)
1117365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_SYNC;
11181da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
11191da177e4SLinus Torvalds 			    xfs_inherit_nosymlinks)
1120365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
1121365ca83dSNathan Scott 			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1122365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_PROJINHERIT;
1123d3446eacSBarry Naujok 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1124d3446eacSBarry Naujok 			    xfs_inherit_nodefrag)
1125d3446eacSBarry Naujok 				di_flags |= XFS_DIFLAG_NODEFRAG;
11262a82b8beSDavid Chinner 			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
11272a82b8beSDavid Chinner 				di_flags |= XFS_DIFLAG_FILESTREAM;
1128365ca83dSNathan Scott 			ip->i_d.di_flags |= di_flags;
11291da177e4SLinus Torvalds 		}
11301da177e4SLinus Torvalds 		/* FALLTHROUGH */
11311da177e4SLinus Torvalds 	case S_IFLNK:
11321da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
11331da177e4SLinus Torvalds 		ip->i_df.if_flags = XFS_IFEXTENTS;
11341da177e4SLinus Torvalds 		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
11351da177e4SLinus Torvalds 		ip->i_df.if_u1.if_extents = NULL;
11361da177e4SLinus Torvalds 		break;
11371da177e4SLinus Torvalds 	default:
11381da177e4SLinus Torvalds 		ASSERT(0);
11391da177e4SLinus Torvalds 	}
11401da177e4SLinus Torvalds 	/*
11411da177e4SLinus Torvalds 	 * Attribute fork settings for new inode.
11421da177e4SLinus Torvalds 	 */
11431da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
11441da177e4SLinus Torvalds 	ip->i_d.di_anextents = 0;
11451da177e4SLinus Torvalds 
11461da177e4SLinus Torvalds 	/*
11471da177e4SLinus Torvalds 	 * Log the new values stuffed into the inode.
11481da177e4SLinus Torvalds 	 */
1149ddc3415aSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
11501da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, flags);
11511da177e4SLinus Torvalds 
1152b83bd138SNathan Scott 	/* now that we have an i_mode we can setup inode ops and unlock */
115341be8bedSChristoph Hellwig 	xfs_setup_inode(ip);
11541da177e4SLinus Torvalds 
1155bf904248SDavid Chinner 	/* now we have set up the vfs inode we can associate the filestream */
1156bf904248SDavid Chinner 	if (filestreams) {
1157bf904248SDavid Chinner 		error = xfs_filestream_associate(pip, ip);
1158bf904248SDavid Chinner 		if (error < 0)
1159bf904248SDavid Chinner 			return -error;
1160bf904248SDavid Chinner 		if (!error)
1161bf904248SDavid Chinner 			xfs_iflags_set(ip, XFS_IFILESTREAM);
1162bf904248SDavid Chinner 	}
1163bf904248SDavid Chinner 
11641da177e4SLinus Torvalds 	*ipp = ip;
11651da177e4SLinus Torvalds 	return 0;
11661da177e4SLinus Torvalds }
11671da177e4SLinus Torvalds 
11681da177e4SLinus Torvalds /*
11698f04c47aSChristoph Hellwig  * Free up the underlying blocks past new_size.  The new size must be smaller
11708f04c47aSChristoph Hellwig  * than the current size.  This routine can be used both for the attribute and
11718f04c47aSChristoph Hellwig  * data fork, and does not modify the inode size, which is left to the caller.
11721da177e4SLinus Torvalds  *
1173f6485057SDavid Chinner  * The transaction passed to this routine must have made a permanent log
1174f6485057SDavid Chinner  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
1175f6485057SDavid Chinner  * given transaction and start new ones, so make sure everything involved in
1176f6485057SDavid Chinner  * the transaction is tidy before calling here.  Some transaction will be
1177f6485057SDavid Chinner  * returned to the caller to be committed.  The incoming transaction must
1178f6485057SDavid Chinner  * already include the inode, and both inode locks must be held exclusively.
1179f6485057SDavid Chinner  * The inode must also be "held" within the transaction.  On return the inode
1180f6485057SDavid Chinner  * will be "held" within the returned transaction.  This routine does NOT
1181f6485057SDavid Chinner  * require any disk space to be reserved for it within the transaction.
11821da177e4SLinus Torvalds  *
1183f6485057SDavid Chinner  * If we get an error, we must return with the inode locked and linked into the
1184f6485057SDavid Chinner  * current transaction. This keeps things simple for the higher level code,
1185f6485057SDavid Chinner  * because it always knows that the inode is locked and held in the transaction
1186f6485057SDavid Chinner  * that returns to it whether errors occur or not.  We don't mark the inode
1187f6485057SDavid Chinner  * dirty on error so that transactions can be easily aborted if possible.
11881da177e4SLinus Torvalds  */
11891da177e4SLinus Torvalds int
11908f04c47aSChristoph Hellwig xfs_itruncate_extents(
11918f04c47aSChristoph Hellwig 	struct xfs_trans	**tpp,
11928f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
11938f04c47aSChristoph Hellwig 	int			whichfork,
11948f04c47aSChristoph Hellwig 	xfs_fsize_t		new_size)
11951da177e4SLinus Torvalds {
11968f04c47aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
11978f04c47aSChristoph Hellwig 	struct xfs_trans	*tp = *tpp;
11988f04c47aSChristoph Hellwig 	struct xfs_trans	*ntp;
11998f04c47aSChristoph Hellwig 	xfs_bmap_free_t		free_list;
12001da177e4SLinus Torvalds 	xfs_fsblock_t		first_block;
12011da177e4SLinus Torvalds 	xfs_fileoff_t		first_unmap_block;
12021da177e4SLinus Torvalds 	xfs_fileoff_t		last_block;
12038f04c47aSChristoph Hellwig 	xfs_filblks_t		unmap_len;
12041da177e4SLinus Torvalds 	int			committed;
12058f04c47aSChristoph Hellwig 	int			error = 0;
12068f04c47aSChristoph Hellwig 	int			done = 0;
12071da177e4SLinus Torvalds 
1208579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
12098f04c47aSChristoph Hellwig 	ASSERT(new_size <= ip->i_size);
12108f04c47aSChristoph Hellwig 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
12111da177e4SLinus Torvalds 	ASSERT(ip->i_itemp != NULL);
1212898621d5SChristoph Hellwig 	ASSERT(ip->i_itemp->ili_lock_flags == 0);
12131da177e4SLinus Torvalds 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
12141da177e4SLinus Torvalds 
1215*673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_start(ip, new_size);
1216*673e8e59SChristoph Hellwig 
12171da177e4SLinus Torvalds 	/*
12181da177e4SLinus Torvalds 	 * Since it is possible for space to become allocated beyond
12191da177e4SLinus Torvalds 	 * the end of the file (in a crash where the space is allocated
12201da177e4SLinus Torvalds 	 * but the inode size is not yet updated), simply remove any
12211da177e4SLinus Torvalds 	 * blocks which show up between the new EOF and the maximum
12221da177e4SLinus Torvalds 	 * possible file size.  If the first block to be removed is
12231da177e4SLinus Torvalds 	 * beyond the maximum file size (ie it is the same as last_block),
12241da177e4SLinus Torvalds 	 * then there is nothing to do.
12251da177e4SLinus Torvalds 	 */
12268f04c47aSChristoph Hellwig 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
12271da177e4SLinus Torvalds 	last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
12288f04c47aSChristoph Hellwig 	if (first_unmap_block == last_block)
12298f04c47aSChristoph Hellwig 		return 0;
12308f04c47aSChristoph Hellwig 
12318f04c47aSChristoph Hellwig 	ASSERT(first_unmap_block < last_block);
12321da177e4SLinus Torvalds 	unmap_len = last_block - first_unmap_block + 1;
12331da177e4SLinus Torvalds 	while (!done) {
12349d87c319SEric Sandeen 		xfs_bmap_init(&free_list, &first_block);
12358f04c47aSChristoph Hellwig 		error = xfs_bunmapi(tp, ip,
12363e57ecf6SOlaf Weber 				    first_unmap_block, unmap_len,
12378f04c47aSChristoph Hellwig 				    xfs_bmapi_aflag(whichfork),
12381da177e4SLinus Torvalds 				    XFS_ITRUNC_MAX_EXTENTS,
12393e57ecf6SOlaf Weber 				    &first_block, &free_list,
1240b4e9181eSChristoph Hellwig 				    &done);
12418f04c47aSChristoph Hellwig 		if (error)
12428f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds 		/*
12451da177e4SLinus Torvalds 		 * Duplicate the transaction that has the permanent
12461da177e4SLinus Torvalds 		 * reservation and commit the old transaction.
12471da177e4SLinus Torvalds 		 */
12488f04c47aSChristoph Hellwig 		error = xfs_bmap_finish(&tp, &free_list, &committed);
1249898621d5SChristoph Hellwig 		if (committed)
1250ddc3415aSChristoph Hellwig 			xfs_trans_ijoin(tp, ip, 0);
12518f04c47aSChristoph Hellwig 		if (error)
12528f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
12531da177e4SLinus Torvalds 
12541da177e4SLinus Torvalds 		if (committed) {
12551da177e4SLinus Torvalds 			/*
1256f6485057SDavid Chinner 			 * Mark the inode dirty so it will be logged and
1257e5720eecSDavid Chinner 			 * moved forward in the log as part of every commit.
12581da177e4SLinus Torvalds 			 */
12598f04c47aSChristoph Hellwig 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
12601da177e4SLinus Torvalds 		}
1261f6485057SDavid Chinner 
12628f04c47aSChristoph Hellwig 		ntp = xfs_trans_dup(tp);
12638f04c47aSChristoph Hellwig 		error = xfs_trans_commit(tp, 0);
12648f04c47aSChristoph Hellwig 		tp = ntp;
1265f6485057SDavid Chinner 
1266ddc3415aSChristoph Hellwig 		xfs_trans_ijoin(tp, ip, 0);
1267f6485057SDavid Chinner 
1268cc09c0dcSDave Chinner 		if (error)
12698f04c47aSChristoph Hellwig 			goto out;
12708f04c47aSChristoph Hellwig 
1271cc09c0dcSDave Chinner 		/*
12728f04c47aSChristoph Hellwig 		 * Transaction commit worked ok so we can drop the extra ticket
1273cc09c0dcSDave Chinner 		 * reference that we gained in xfs_trans_dup()
1274cc09c0dcSDave Chinner 		 */
12758f04c47aSChristoph Hellwig 		xfs_log_ticket_put(tp->t_ticket);
12768f04c47aSChristoph Hellwig 		error = xfs_trans_reserve(tp, 0,
1277f6485057SDavid Chinner 					XFS_ITRUNCATE_LOG_RES(mp), 0,
12781da177e4SLinus Torvalds 					XFS_TRANS_PERM_LOG_RES,
12791da177e4SLinus Torvalds 					XFS_ITRUNCATE_LOG_COUNT);
12801da177e4SLinus Torvalds 		if (error)
12818f04c47aSChristoph Hellwig 			goto out;
12821da177e4SLinus Torvalds 	}
12838f04c47aSChristoph Hellwig 
1284*673e8e59SChristoph Hellwig 	/*
1285*673e8e59SChristoph Hellwig 	 * Always re-log the inode so that our permanent transaction can keep
1286*673e8e59SChristoph Hellwig 	 * on rolling it forward in the log.
1287*673e8e59SChristoph Hellwig 	 */
1288*673e8e59SChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1289*673e8e59SChristoph Hellwig 
1290*673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_end(ip, new_size);
1291*673e8e59SChristoph Hellwig 
12928f04c47aSChristoph Hellwig out:
12938f04c47aSChristoph Hellwig 	*tpp = tp;
12948f04c47aSChristoph Hellwig 	return error;
12958f04c47aSChristoph Hellwig out_bmap_cancel:
12961da177e4SLinus Torvalds 	/*
12978f04c47aSChristoph Hellwig 	 * If the bunmapi call encounters an error, return to the caller where
12988f04c47aSChristoph Hellwig 	 * the transaction can be properly aborted.  We just need to make sure
12998f04c47aSChristoph Hellwig 	 * we're not holding any resources that we were not when we came in.
13001da177e4SLinus Torvalds 	 */
13018f04c47aSChristoph Hellwig 	xfs_bmap_cancel(&free_list);
13028f04c47aSChristoph Hellwig 	goto out;
13038f04c47aSChristoph Hellwig }
13048f04c47aSChristoph Hellwig 
13051da177e4SLinus Torvalds /*
13061da177e4SLinus Torvalds  * This is called when the inode's link count goes to 0.
13071da177e4SLinus Torvalds  * We place the on-disk inode on a list in the AGI.  It
13081da177e4SLinus Torvalds  * will be pulled from this list when the inode is freed.
13091da177e4SLinus Torvalds  */
13101da177e4SLinus Torvalds int
13111da177e4SLinus Torvalds xfs_iunlink(
13121da177e4SLinus Torvalds 	xfs_trans_t	*tp,
13131da177e4SLinus Torvalds 	xfs_inode_t	*ip)
13141da177e4SLinus Torvalds {
13151da177e4SLinus Torvalds 	xfs_mount_t	*mp;
13161da177e4SLinus Torvalds 	xfs_agi_t	*agi;
13171da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
13181da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
13191da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
13201da177e4SLinus Torvalds 	xfs_agino_t	agino;
13211da177e4SLinus Torvalds 	short		bucket_index;
13221da177e4SLinus Torvalds 	int		offset;
13231da177e4SLinus Torvalds 	int		error;
13241da177e4SLinus Torvalds 
13251da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
13261da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_mode != 0);
13271da177e4SLinus Torvalds 
13281da177e4SLinus Torvalds 	mp = tp->t_mountp;
13291da177e4SLinus Torvalds 
13301da177e4SLinus Torvalds 	/*
13311da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
13321da177e4SLinus Torvalds 	 * on the list.
13331da177e4SLinus Torvalds 	 */
13345e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1335859d7182SVlad Apostolov 	if (error)
13361da177e4SLinus Torvalds 		return error;
13371da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
13385e1be0fbSChristoph Hellwig 
13391da177e4SLinus Torvalds 	/*
13401da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
13411da177e4SLinus Torvalds 	 * list this inode will go on.
13421da177e4SLinus Torvalds 	 */
13431da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
13441da177e4SLinus Torvalds 	ASSERT(agino != 0);
13451da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
13461da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
134716259e7dSChristoph Hellwig 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
13481da177e4SLinus Torvalds 
134969ef921bSChristoph Hellwig 	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
13501da177e4SLinus Torvalds 		/*
13511da177e4SLinus Torvalds 		 * There is already another inode in the bucket we need
13521da177e4SLinus Torvalds 		 * to add ourselves to.  Add us at the front of the list.
13531da177e4SLinus Torvalds 		 * Here we put the head pointer into our next pointer,
13541da177e4SLinus Torvalds 		 * and then we fall through to point the head at us.
13551da177e4SLinus Torvalds 		 */
13560cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1357c319b58bSVlad Apostolov 		if (error)
1358c319b58bSVlad Apostolov 			return error;
1359c319b58bSVlad Apostolov 
136069ef921bSChristoph Hellwig 		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
13611da177e4SLinus Torvalds 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
136292bfc6e7SChristoph Hellwig 		offset = ip->i_imap.im_boffset +
13631da177e4SLinus Torvalds 			offsetof(xfs_dinode_t, di_next_unlinked);
13641da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, ibp);
13651da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, ibp, offset,
13661da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
13671da177e4SLinus Torvalds 		xfs_inobp_check(mp, ibp);
13681da177e4SLinus Torvalds 	}
13691da177e4SLinus Torvalds 
13701da177e4SLinus Torvalds 	/*
13711da177e4SLinus Torvalds 	 * Point the bucket head pointer at the inode being inserted.
13721da177e4SLinus Torvalds 	 */
13731da177e4SLinus Torvalds 	ASSERT(agino != 0);
137416259e7dSChristoph Hellwig 	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
13751da177e4SLinus Torvalds 	offset = offsetof(xfs_agi_t, agi_unlinked) +
13761da177e4SLinus Torvalds 		(sizeof(xfs_agino_t) * bucket_index);
13771da177e4SLinus Torvalds 	xfs_trans_log_buf(tp, agibp, offset,
13781da177e4SLinus Torvalds 			  (offset + sizeof(xfs_agino_t) - 1));
13791da177e4SLinus Torvalds 	return 0;
13801da177e4SLinus Torvalds }
13811da177e4SLinus Torvalds 
13821da177e4SLinus Torvalds /*
13831da177e4SLinus Torvalds  * Pull the on-disk inode from the AGI unlinked list.
13841da177e4SLinus Torvalds  */
13851da177e4SLinus Torvalds STATIC int
13861da177e4SLinus Torvalds xfs_iunlink_remove(
13871da177e4SLinus Torvalds 	xfs_trans_t	*tp,
13881da177e4SLinus Torvalds 	xfs_inode_t	*ip)
13891da177e4SLinus Torvalds {
13901da177e4SLinus Torvalds 	xfs_ino_t	next_ino;
13911da177e4SLinus Torvalds 	xfs_mount_t	*mp;
13921da177e4SLinus Torvalds 	xfs_agi_t	*agi;
13931da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
13941da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
13951da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
13961da177e4SLinus Torvalds 	xfs_agnumber_t	agno;
13971da177e4SLinus Torvalds 	xfs_agino_t	agino;
13981da177e4SLinus Torvalds 	xfs_agino_t	next_agino;
13991da177e4SLinus Torvalds 	xfs_buf_t	*last_ibp;
14006fdf8cccSNathan Scott 	xfs_dinode_t	*last_dip = NULL;
14011da177e4SLinus Torvalds 	short		bucket_index;
14026fdf8cccSNathan Scott 	int		offset, last_offset = 0;
14031da177e4SLinus Torvalds 	int		error;
14041da177e4SLinus Torvalds 
14051da177e4SLinus Torvalds 	mp = tp->t_mountp;
14061da177e4SLinus Torvalds 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
14071da177e4SLinus Torvalds 
14081da177e4SLinus Torvalds 	/*
14091da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
14101da177e4SLinus Torvalds 	 * on the list.
14111da177e4SLinus Torvalds 	 */
14125e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, agno, &agibp);
14135e1be0fbSChristoph Hellwig 	if (error)
14141da177e4SLinus Torvalds 		return error;
14155e1be0fbSChristoph Hellwig 
14161da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
14175e1be0fbSChristoph Hellwig 
14181da177e4SLinus Torvalds 	/*
14191da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
14201da177e4SLinus Torvalds 	 * list this inode will go on.
14211da177e4SLinus Torvalds 	 */
14221da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
14231da177e4SLinus Torvalds 	ASSERT(agino != 0);
14241da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
142569ef921bSChristoph Hellwig 	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
14261da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
14271da177e4SLinus Torvalds 
142816259e7dSChristoph Hellwig 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
14291da177e4SLinus Torvalds 		/*
14301da177e4SLinus Torvalds 		 * We're at the head of the list.  Get the inode's
14311da177e4SLinus Torvalds 		 * on-disk buffer to see if there is anyone after us
14321da177e4SLinus Torvalds 		 * on the list.  Only modify our next pointer if it
14331da177e4SLinus Torvalds 		 * is not already NULLAGINO.  This saves us the overhead
14341da177e4SLinus Torvalds 		 * of dealing with the buffer when there is no need to
14351da177e4SLinus Torvalds 		 * change it.
14361da177e4SLinus Torvalds 		 */
14370cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
14381da177e4SLinus Torvalds 		if (error) {
14390b932cccSDave Chinner 			xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
14400b932cccSDave Chinner 				__func__, error);
14411da177e4SLinus Torvalds 			return error;
14421da177e4SLinus Torvalds 		}
1443347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
14441da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
14451da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1446347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
144792bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
14481da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
14491da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
14501da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
14511da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
14521da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
14531da177e4SLinus Torvalds 		} else {
14541da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
14551da177e4SLinus Torvalds 		}
14561da177e4SLinus Torvalds 		/*
14571da177e4SLinus Torvalds 		 * Point the bucket head pointer at the next inode.
14581da177e4SLinus Torvalds 		 */
14591da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
14601da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
146116259e7dSChristoph Hellwig 		agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
14621da177e4SLinus Torvalds 		offset = offsetof(xfs_agi_t, agi_unlinked) +
14631da177e4SLinus Torvalds 			(sizeof(xfs_agino_t) * bucket_index);
14641da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, agibp, offset,
14651da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
14661da177e4SLinus Torvalds 	} else {
14671da177e4SLinus Torvalds 		/*
14681da177e4SLinus Torvalds 		 * We need to search the list for the inode being freed.
14691da177e4SLinus Torvalds 		 */
147016259e7dSChristoph Hellwig 		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
14711da177e4SLinus Torvalds 		last_ibp = NULL;
14721da177e4SLinus Torvalds 		while (next_agino != agino) {
14731da177e4SLinus Torvalds 			/*
14741da177e4SLinus Torvalds 			 * If the last inode wasn't the one pointing to
14751da177e4SLinus Torvalds 			 * us, then release its buffer since we're not
14761da177e4SLinus Torvalds 			 * going to do anything with it.
14771da177e4SLinus Torvalds 			 */
14781da177e4SLinus Torvalds 			if (last_ibp != NULL) {
14791da177e4SLinus Torvalds 				xfs_trans_brelse(tp, last_ibp);
14801da177e4SLinus Torvalds 			}
14811da177e4SLinus Torvalds 			next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
14821da177e4SLinus Torvalds 			error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1483c679eef0SChristoph Hellwig 					    &last_ibp, &last_offset, 0);
14841da177e4SLinus Torvalds 			if (error) {
14850b932cccSDave Chinner 				xfs_warn(mp,
14860b932cccSDave Chinner 					"%s: xfs_inotobp() returned error %d.",
14870b932cccSDave Chinner 					__func__, error);
14881da177e4SLinus Torvalds 				return error;
14891da177e4SLinus Torvalds 			}
1490347d1c01SChristoph Hellwig 			next_agino = be32_to_cpu(last_dip->di_next_unlinked);
14911da177e4SLinus Torvalds 			ASSERT(next_agino != NULLAGINO);
14921da177e4SLinus Torvalds 			ASSERT(next_agino != 0);
14931da177e4SLinus Torvalds 		}
14941da177e4SLinus Torvalds 		/*
14951da177e4SLinus Torvalds 		 * Now last_ibp points to the buffer previous to us on
14961da177e4SLinus Torvalds 		 * the unlinked list.  Pull us from the list.
14971da177e4SLinus Torvalds 		 */
14980cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
14991da177e4SLinus Torvalds 		if (error) {
15000b932cccSDave Chinner 			xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
15010b932cccSDave Chinner 				__func__, error);
15021da177e4SLinus Torvalds 			return error;
15031da177e4SLinus Torvalds 		}
1504347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
15051da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
15061da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
15071da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1508347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
150992bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
15101da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
15111da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
15121da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
15131da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
15141da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
15151da177e4SLinus Torvalds 		} else {
15161da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
15171da177e4SLinus Torvalds 		}
15181da177e4SLinus Torvalds 		/*
15191da177e4SLinus Torvalds 		 * Point the previous inode on the list to the next inode.
15201da177e4SLinus Torvalds 		 */
1521347d1c01SChristoph Hellwig 		last_dip->di_next_unlinked = cpu_to_be32(next_agino);
15221da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
15231da177e4SLinus Torvalds 		offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
15241da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, last_ibp);
15251da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, last_ibp, offset,
15261da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
15271da177e4SLinus Torvalds 		xfs_inobp_check(mp, last_ibp);
15281da177e4SLinus Torvalds 	}
15291da177e4SLinus Torvalds 	return 0;
15301da177e4SLinus Torvalds }
15311da177e4SLinus Torvalds 
15325b3eed75SDave Chinner /*
15335b3eed75SDave Chinner  * A big issue when freeing the inode cluster is is that we _cannot_ skip any
15345b3eed75SDave Chinner  * inodes that are in memory - they all must be marked stale and attached to
15355b3eed75SDave Chinner  * the cluster buffer.
15365b3eed75SDave Chinner  */
15372a30f36dSChandra Seetharaman STATIC int
15381da177e4SLinus Torvalds xfs_ifree_cluster(
15391da177e4SLinus Torvalds 	xfs_inode_t	*free_ip,
15401da177e4SLinus Torvalds 	xfs_trans_t	*tp,
15411da177e4SLinus Torvalds 	xfs_ino_t	inum)
15421da177e4SLinus Torvalds {
15431da177e4SLinus Torvalds 	xfs_mount_t		*mp = free_ip->i_mount;
15441da177e4SLinus Torvalds 	int			blks_per_cluster;
15451da177e4SLinus Torvalds 	int			nbufs;
15461da177e4SLinus Torvalds 	int			ninodes;
15475b257b4aSDave Chinner 	int			i, j;
15481da177e4SLinus Torvalds 	xfs_daddr_t		blkno;
15491da177e4SLinus Torvalds 	xfs_buf_t		*bp;
15505b257b4aSDave Chinner 	xfs_inode_t		*ip;
15511da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
15521da177e4SLinus Torvalds 	xfs_log_item_t		*lip;
15535017e97dSDave Chinner 	struct xfs_perag	*pag;
15541da177e4SLinus Torvalds 
15555017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
15561da177e4SLinus Torvalds 	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
15571da177e4SLinus Torvalds 		blks_per_cluster = 1;
15581da177e4SLinus Torvalds 		ninodes = mp->m_sb.sb_inopblock;
15591da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp);
15601da177e4SLinus Torvalds 	} else {
15611da177e4SLinus Torvalds 		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
15621da177e4SLinus Torvalds 					mp->m_sb.sb_blocksize;
15631da177e4SLinus Torvalds 		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
15641da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
15651da177e4SLinus Torvalds 	}
15661da177e4SLinus Torvalds 
15671da177e4SLinus Torvalds 	for (j = 0; j < nbufs; j++, inum += ninodes) {
15681da177e4SLinus Torvalds 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
15691da177e4SLinus Torvalds 					 XFS_INO_TO_AGBNO(mp, inum));
15701da177e4SLinus Torvalds 
15711da177e4SLinus Torvalds 		/*
15725b257b4aSDave Chinner 		 * We obtain and lock the backing buffer first in the process
15735b257b4aSDave Chinner 		 * here, as we have to ensure that any dirty inode that we
15745b257b4aSDave Chinner 		 * can't get the flush lock on is attached to the buffer.
15755b257b4aSDave Chinner 		 * If we scan the in-memory inodes first, then buffer IO can
15765b257b4aSDave Chinner 		 * complete before we get a lock on it, and hence we may fail
15775b257b4aSDave Chinner 		 * to mark all the active inodes on the buffer stale.
15781da177e4SLinus Torvalds 		 */
15791da177e4SLinus Torvalds 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
15801da177e4SLinus Torvalds 					mp->m_bsize * blks_per_cluster,
15810cadda1cSChristoph Hellwig 					XBF_LOCK);
15821da177e4SLinus Torvalds 
15832a30f36dSChandra Seetharaman 		if (!bp)
15842a30f36dSChandra Seetharaman 			return ENOMEM;
15855b257b4aSDave Chinner 		/*
15865b257b4aSDave Chinner 		 * Walk the inodes already attached to the buffer and mark them
15875b257b4aSDave Chinner 		 * stale. These will all have the flush locks held, so an
15885b3eed75SDave Chinner 		 * in-memory inode walk can't lock them. By marking them all
15895b3eed75SDave Chinner 		 * stale first, we will not attempt to lock them in the loop
15905b3eed75SDave Chinner 		 * below as the XFS_ISTALE flag will be set.
15915b257b4aSDave Chinner 		 */
1592adadbeefSChristoph Hellwig 		lip = bp->b_fspriv;
15931da177e4SLinus Torvalds 		while (lip) {
15941da177e4SLinus Torvalds 			if (lip->li_type == XFS_LI_INODE) {
15951da177e4SLinus Torvalds 				iip = (xfs_inode_log_item_t *)lip;
15961da177e4SLinus Torvalds 				ASSERT(iip->ili_logged == 1);
1597ca30b2a7SChristoph Hellwig 				lip->li_cb = xfs_istale_done;
15987b2e2a31SDavid Chinner 				xfs_trans_ail_copy_lsn(mp->m_ail,
15997b2e2a31SDavid Chinner 							&iip->ili_flush_lsn,
16007b2e2a31SDavid Chinner 							&iip->ili_item.li_lsn);
1601e5ffd2bbSDavid Chinner 				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
16021da177e4SLinus Torvalds 			}
16031da177e4SLinus Torvalds 			lip = lip->li_bio_list;
16041da177e4SLinus Torvalds 		}
16051da177e4SLinus Torvalds 
16065b3eed75SDave Chinner 
16075b257b4aSDave Chinner 		/*
16085b257b4aSDave Chinner 		 * For each inode in memory attempt to add it to the inode
16095b257b4aSDave Chinner 		 * buffer and set it up for being staled on buffer IO
16105b257b4aSDave Chinner 		 * completion.  This is safe as we've locked out tail pushing
16115b257b4aSDave Chinner 		 * and flushing by locking the buffer.
16125b257b4aSDave Chinner 		 *
16135b257b4aSDave Chinner 		 * We have already marked every inode that was part of a
16145b257b4aSDave Chinner 		 * transaction stale above, which means there is no point in
16155b257b4aSDave Chinner 		 * even trying to lock them.
16165b257b4aSDave Chinner 		 */
16175b257b4aSDave Chinner 		for (i = 0; i < ninodes; i++) {
16185b3eed75SDave Chinner retry:
16191a3e8f3dSDave Chinner 			rcu_read_lock();
16205b257b4aSDave Chinner 			ip = radix_tree_lookup(&pag->pag_ici_root,
16215b257b4aSDave Chinner 					XFS_INO_TO_AGINO(mp, (inum + i)));
16221da177e4SLinus Torvalds 
16231a3e8f3dSDave Chinner 			/* Inode not in memory, nothing to do */
16241a3e8f3dSDave Chinner 			if (!ip) {
16251a3e8f3dSDave Chinner 				rcu_read_unlock();
16265b257b4aSDave Chinner 				continue;
16275b257b4aSDave Chinner 			}
16285b257b4aSDave Chinner 
16295b3eed75SDave Chinner 			/*
16301a3e8f3dSDave Chinner 			 * because this is an RCU protected lookup, we could
16311a3e8f3dSDave Chinner 			 * find a recently freed or even reallocated inode
16321a3e8f3dSDave Chinner 			 * during the lookup. We need to check under the
16331a3e8f3dSDave Chinner 			 * i_flags_lock for a valid inode here. Skip it if it
16341a3e8f3dSDave Chinner 			 * is not valid, the wrong inode or stale.
16351a3e8f3dSDave Chinner 			 */
16361a3e8f3dSDave Chinner 			spin_lock(&ip->i_flags_lock);
16371a3e8f3dSDave Chinner 			if (ip->i_ino != inum + i ||
16381a3e8f3dSDave Chinner 			    __xfs_iflags_test(ip, XFS_ISTALE)) {
16391a3e8f3dSDave Chinner 				spin_unlock(&ip->i_flags_lock);
16401a3e8f3dSDave Chinner 				rcu_read_unlock();
16411a3e8f3dSDave Chinner 				continue;
16421a3e8f3dSDave Chinner 			}
16431a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
16441a3e8f3dSDave Chinner 
16451a3e8f3dSDave Chinner 			/*
16465b3eed75SDave Chinner 			 * Don't try to lock/unlock the current inode, but we
16475b3eed75SDave Chinner 			 * _cannot_ skip the other inodes that we did not find
16485b3eed75SDave Chinner 			 * in the list attached to the buffer and are not
16495b3eed75SDave Chinner 			 * already marked stale. If we can't lock it, back off
16505b3eed75SDave Chinner 			 * and retry.
16515b3eed75SDave Chinner 			 */
16525b257b4aSDave Chinner 			if (ip != free_ip &&
16535b257b4aSDave Chinner 			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
16541a3e8f3dSDave Chinner 				rcu_read_unlock();
16555b3eed75SDave Chinner 				delay(1);
16565b3eed75SDave Chinner 				goto retry;
16575b257b4aSDave Chinner 			}
16581a3e8f3dSDave Chinner 			rcu_read_unlock();
16595b257b4aSDave Chinner 
16605b3eed75SDave Chinner 			xfs_iflock(ip);
16615b257b4aSDave Chinner 			xfs_iflags_set(ip, XFS_ISTALE);
16625b257b4aSDave Chinner 
16635b3eed75SDave Chinner 			/*
16645b3eed75SDave Chinner 			 * we don't need to attach clean inodes or those only
16655b3eed75SDave Chinner 			 * with unlogged changes (which we throw away, anyway).
16665b3eed75SDave Chinner 			 */
16675b257b4aSDave Chinner 			iip = ip->i_itemp;
16685b3eed75SDave Chinner 			if (!iip || xfs_inode_clean(ip)) {
16695b257b4aSDave Chinner 				ASSERT(ip != free_ip);
16701da177e4SLinus Torvalds 				ip->i_update_core = 0;
16711da177e4SLinus Torvalds 				xfs_ifunlock(ip);
16721da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
16731da177e4SLinus Torvalds 				continue;
16741da177e4SLinus Torvalds 			}
16751da177e4SLinus Torvalds 
16761da177e4SLinus Torvalds 			iip->ili_last_fields = iip->ili_format.ilf_fields;
16771da177e4SLinus Torvalds 			iip->ili_format.ilf_fields = 0;
16781da177e4SLinus Torvalds 			iip->ili_logged = 1;
16797b2e2a31SDavid Chinner 			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
16807b2e2a31SDavid Chinner 						&iip->ili_item.li_lsn);
16811da177e4SLinus Torvalds 
1682ca30b2a7SChristoph Hellwig 			xfs_buf_attach_iodone(bp, xfs_istale_done,
1683ca30b2a7SChristoph Hellwig 						  &iip->ili_item);
16845b257b4aSDave Chinner 
16855b257b4aSDave Chinner 			if (ip != free_ip)
16861da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
16871da177e4SLinus Torvalds 		}
16881da177e4SLinus Torvalds 
16891da177e4SLinus Torvalds 		xfs_trans_stale_inode_buf(tp, bp);
16901da177e4SLinus Torvalds 		xfs_trans_binval(tp, bp);
16911da177e4SLinus Torvalds 	}
16921da177e4SLinus Torvalds 
16935017e97dSDave Chinner 	xfs_perag_put(pag);
16942a30f36dSChandra Seetharaman 	return 0;
16951da177e4SLinus Torvalds }
16961da177e4SLinus Torvalds 
16971da177e4SLinus Torvalds /*
16981da177e4SLinus Torvalds  * This is called to return an inode to the inode free list.
16991da177e4SLinus Torvalds  * The inode should already be truncated to 0 length and have
17001da177e4SLinus Torvalds  * no pages associated with it.  This routine also assumes that
17011da177e4SLinus Torvalds  * the inode is already a part of the transaction.
17021da177e4SLinus Torvalds  *
17031da177e4SLinus Torvalds  * The on-disk copy of the inode will have been added to the list
17041da177e4SLinus Torvalds  * of unlinked inodes in the AGI. We need to remove the inode from
17051da177e4SLinus Torvalds  * that list atomically with respect to freeing it here.
17061da177e4SLinus Torvalds  */
17071da177e4SLinus Torvalds int
17081da177e4SLinus Torvalds xfs_ifree(
17091da177e4SLinus Torvalds 	xfs_trans_t	*tp,
17101da177e4SLinus Torvalds 	xfs_inode_t	*ip,
17111da177e4SLinus Torvalds 	xfs_bmap_free_t	*flist)
17121da177e4SLinus Torvalds {
17131da177e4SLinus Torvalds 	int			error;
17141da177e4SLinus Torvalds 	int			delete;
17151da177e4SLinus Torvalds 	xfs_ino_t		first_ino;
1716c319b58bSVlad Apostolov 	xfs_dinode_t    	*dip;
1717c319b58bSVlad Apostolov 	xfs_buf_t       	*ibp;
17181da177e4SLinus Torvalds 
1719579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
17201da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
17211da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nextents == 0);
17221da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_anextents == 0);
1723ba87ea69SLachlan McIlroy 	ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
1724abbede1bSAl Viro 	       (!S_ISREG(ip->i_d.di_mode)));
17251da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
17261da177e4SLinus Torvalds 
17271da177e4SLinus Torvalds 	/*
17281da177e4SLinus Torvalds 	 * Pull the on-disk inode from the AGI unlinked list.
17291da177e4SLinus Torvalds 	 */
17301da177e4SLinus Torvalds 	error = xfs_iunlink_remove(tp, ip);
17311da177e4SLinus Torvalds 	if (error != 0) {
17321da177e4SLinus Torvalds 		return error;
17331da177e4SLinus Torvalds 	}
17341da177e4SLinus Torvalds 
17351da177e4SLinus Torvalds 	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
17361da177e4SLinus Torvalds 	if (error != 0) {
17371da177e4SLinus Torvalds 		return error;
17381da177e4SLinus Torvalds 	}
17391da177e4SLinus Torvalds 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
17401da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
17411da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
17421da177e4SLinus Torvalds 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
17431da177e4SLinus Torvalds 	ip->i_df.if_ext_max =
17441da177e4SLinus Torvalds 		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
17451da177e4SLinus Torvalds 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
17461da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
17471da177e4SLinus Torvalds 	/*
17481da177e4SLinus Torvalds 	 * Bump the generation count so no one will be confused
17491da177e4SLinus Torvalds 	 * by reincarnations of this inode.
17501da177e4SLinus Torvalds 	 */
17511da177e4SLinus Torvalds 	ip->i_d.di_gen++;
1752c319b58bSVlad Apostolov 
17531da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
17541da177e4SLinus Torvalds 
17550cadda1cSChristoph Hellwig 	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
1756c319b58bSVlad Apostolov 	if (error)
1757c319b58bSVlad Apostolov 		return error;
1758c319b58bSVlad Apostolov 
1759c319b58bSVlad Apostolov         /*
1760c319b58bSVlad Apostolov 	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
1761c319b58bSVlad Apostolov 	* from picking up this inode when it is reclaimed (its incore state
1762c319b58bSVlad Apostolov 	* initialzed but not flushed to disk yet). The in-core di_mode is
1763c319b58bSVlad Apostolov 	* already cleared  and a corresponding transaction logged.
1764c319b58bSVlad Apostolov 	* The hack here just synchronizes the in-core to on-disk
1765c319b58bSVlad Apostolov 	* di_mode value in advance before the actual inode sync to disk.
1766c319b58bSVlad Apostolov 	* This is OK because the inode is already unlinked and would never
1767c319b58bSVlad Apostolov 	* change its di_mode again for this inode generation.
1768c319b58bSVlad Apostolov 	* This is a temporary hack that would require a proper fix
1769c319b58bSVlad Apostolov 	* in the future.
1770c319b58bSVlad Apostolov 	*/
177181591fe2SChristoph Hellwig 	dip->di_mode = 0;
1772c319b58bSVlad Apostolov 
17731da177e4SLinus Torvalds 	if (delete) {
17742a30f36dSChandra Seetharaman 		error = xfs_ifree_cluster(ip, tp, first_ino);
17751da177e4SLinus Torvalds 	}
17761da177e4SLinus Torvalds 
17772a30f36dSChandra Seetharaman 	return error;
17781da177e4SLinus Torvalds }
17791da177e4SLinus Torvalds 
17801da177e4SLinus Torvalds /*
17811da177e4SLinus Torvalds  * Reallocate the space for if_broot based on the number of records
17821da177e4SLinus Torvalds  * being added or deleted as indicated in rec_diff.  Move the records
17831da177e4SLinus Torvalds  * and pointers in if_broot to fit the new size.  When shrinking this
17841da177e4SLinus Torvalds  * will eliminate holes between the records and pointers created by
17851da177e4SLinus Torvalds  * the caller.  When growing this will create holes to be filled in
17861da177e4SLinus Torvalds  * by the caller.
17871da177e4SLinus Torvalds  *
17881da177e4SLinus Torvalds  * The caller must not request to add more records than would fit in
17891da177e4SLinus Torvalds  * the on-disk inode root.  If the if_broot is currently NULL, then
17901da177e4SLinus Torvalds  * if we adding records one will be allocated.  The caller must also
17911da177e4SLinus Torvalds  * not request that the number of records go below zero, although
17921da177e4SLinus Torvalds  * it can go to zero.
17931da177e4SLinus Torvalds  *
17941da177e4SLinus Torvalds  * ip -- the inode whose if_broot area is changing
17951da177e4SLinus Torvalds  * ext_diff -- the change in the number of records, positive or negative,
17961da177e4SLinus Torvalds  *	 requested for the if_broot array.
17971da177e4SLinus Torvalds  */
17981da177e4SLinus Torvalds void
17991da177e4SLinus Torvalds xfs_iroot_realloc(
18001da177e4SLinus Torvalds 	xfs_inode_t		*ip,
18011da177e4SLinus Torvalds 	int			rec_diff,
18021da177e4SLinus Torvalds 	int			whichfork)
18031da177e4SLinus Torvalds {
180460197e8dSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
18051da177e4SLinus Torvalds 	int			cur_max;
18061da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
18077cc95a82SChristoph Hellwig 	struct xfs_btree_block	*new_broot;
18081da177e4SLinus Torvalds 	int			new_max;
18091da177e4SLinus Torvalds 	size_t			new_size;
18101da177e4SLinus Torvalds 	char			*np;
18111da177e4SLinus Torvalds 	char			*op;
18121da177e4SLinus Torvalds 
18131da177e4SLinus Torvalds 	/*
18141da177e4SLinus Torvalds 	 * Handle the degenerate case quietly.
18151da177e4SLinus Torvalds 	 */
18161da177e4SLinus Torvalds 	if (rec_diff == 0) {
18171da177e4SLinus Torvalds 		return;
18181da177e4SLinus Torvalds 	}
18191da177e4SLinus Torvalds 
18201da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
18211da177e4SLinus Torvalds 	if (rec_diff > 0) {
18221da177e4SLinus Torvalds 		/*
18231da177e4SLinus Torvalds 		 * If there wasn't any memory allocated before, just
18241da177e4SLinus Torvalds 		 * allocate it now and get out.
18251da177e4SLinus Torvalds 		 */
18261da177e4SLinus Torvalds 		if (ifp->if_broot_bytes == 0) {
18271da177e4SLinus Torvalds 			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
18284a7edddcSDave Chinner 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
18291da177e4SLinus Torvalds 			ifp->if_broot_bytes = (int)new_size;
18301da177e4SLinus Torvalds 			return;
18311da177e4SLinus Torvalds 		}
18321da177e4SLinus Torvalds 
18331da177e4SLinus Torvalds 		/*
18341da177e4SLinus Torvalds 		 * If there is already an existing if_broot, then we need
18351da177e4SLinus Torvalds 		 * to realloc() it and shift the pointers to their new
18361da177e4SLinus Torvalds 		 * location.  The records don't change location because
18371da177e4SLinus Torvalds 		 * they are kept butted up against the btree block header.
18381da177e4SLinus Torvalds 		 */
183960197e8dSChristoph Hellwig 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
18401da177e4SLinus Torvalds 		new_max = cur_max + rec_diff;
18411da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
18427cc95a82SChristoph Hellwig 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
18431da177e4SLinus Torvalds 				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
18444a7edddcSDave Chinner 				KM_SLEEP | KM_NOFS);
184560197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
18461da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
184760197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
18481da177e4SLinus Torvalds 						     (int)new_size);
18491da177e4SLinus Torvalds 		ifp->if_broot_bytes = (int)new_size;
18501da177e4SLinus Torvalds 		ASSERT(ifp->if_broot_bytes <=
18511da177e4SLinus Torvalds 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
18521da177e4SLinus Torvalds 		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
18531da177e4SLinus Torvalds 		return;
18541da177e4SLinus Torvalds 	}
18551da177e4SLinus Torvalds 
18561da177e4SLinus Torvalds 	/*
18571da177e4SLinus Torvalds 	 * rec_diff is less than 0.  In this case, we are shrinking the
18581da177e4SLinus Torvalds 	 * if_broot buffer.  It must already exist.  If we go to zero
18591da177e4SLinus Torvalds 	 * records, just get rid of the root and clear the status bit.
18601da177e4SLinus Torvalds 	 */
18611da177e4SLinus Torvalds 	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
186260197e8dSChristoph Hellwig 	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
18631da177e4SLinus Torvalds 	new_max = cur_max + rec_diff;
18641da177e4SLinus Torvalds 	ASSERT(new_max >= 0);
18651da177e4SLinus Torvalds 	if (new_max > 0)
18661da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
18671da177e4SLinus Torvalds 	else
18681da177e4SLinus Torvalds 		new_size = 0;
18691da177e4SLinus Torvalds 	if (new_size > 0) {
18704a7edddcSDave Chinner 		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
18711da177e4SLinus Torvalds 		/*
18721da177e4SLinus Torvalds 		 * First copy over the btree block header.
18731da177e4SLinus Torvalds 		 */
18747cc95a82SChristoph Hellwig 		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
18751da177e4SLinus Torvalds 	} else {
18761da177e4SLinus Torvalds 		new_broot = NULL;
18771da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFBROOT;
18781da177e4SLinus Torvalds 	}
18791da177e4SLinus Torvalds 
18801da177e4SLinus Torvalds 	/*
18811da177e4SLinus Torvalds 	 * Only copy the records and pointers if there are any.
18821da177e4SLinus Torvalds 	 */
18831da177e4SLinus Torvalds 	if (new_max > 0) {
18841da177e4SLinus Torvalds 		/*
18851da177e4SLinus Torvalds 		 * First copy the records.
18861da177e4SLinus Torvalds 		 */
1887136341b4SChristoph Hellwig 		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
1888136341b4SChristoph Hellwig 		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
18891da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
18901da177e4SLinus Torvalds 
18911da177e4SLinus Torvalds 		/*
18921da177e4SLinus Torvalds 		 * Then copy the pointers.
18931da177e4SLinus Torvalds 		 */
189460197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
18951da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
189660197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
18971da177e4SLinus Torvalds 						     (int)new_size);
18981da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
18991da177e4SLinus Torvalds 	}
1900f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_broot);
19011da177e4SLinus Torvalds 	ifp->if_broot = new_broot;
19021da177e4SLinus Torvalds 	ifp->if_broot_bytes = (int)new_size;
19031da177e4SLinus Torvalds 	ASSERT(ifp->if_broot_bytes <=
19041da177e4SLinus Torvalds 		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
19051da177e4SLinus Torvalds 	return;
19061da177e4SLinus Torvalds }
19071da177e4SLinus Torvalds 
19081da177e4SLinus Torvalds 
19091da177e4SLinus Torvalds /*
19101da177e4SLinus Torvalds  * This is called when the amount of space needed for if_data
19111da177e4SLinus Torvalds  * is increased or decreased.  The change in size is indicated by
19121da177e4SLinus Torvalds  * the number of bytes that need to be added or deleted in the
19131da177e4SLinus Torvalds  * byte_diff parameter.
19141da177e4SLinus Torvalds  *
19151da177e4SLinus Torvalds  * If the amount of space needed has decreased below the size of the
19161da177e4SLinus Torvalds  * inline buffer, then switch to using the inline buffer.  Otherwise,
19171da177e4SLinus Torvalds  * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
19181da177e4SLinus Torvalds  * to what is needed.
19191da177e4SLinus Torvalds  *
19201da177e4SLinus Torvalds  * ip -- the inode whose if_data area is changing
19211da177e4SLinus Torvalds  * byte_diff -- the change in the number of bytes, positive or negative,
19221da177e4SLinus Torvalds  *	 requested for the if_data array.
19231da177e4SLinus Torvalds  */
19241da177e4SLinus Torvalds void
19251da177e4SLinus Torvalds xfs_idata_realloc(
19261da177e4SLinus Torvalds 	xfs_inode_t	*ip,
19271da177e4SLinus Torvalds 	int		byte_diff,
19281da177e4SLinus Torvalds 	int		whichfork)
19291da177e4SLinus Torvalds {
19301da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
19311da177e4SLinus Torvalds 	int		new_size;
19321da177e4SLinus Torvalds 	int		real_size;
19331da177e4SLinus Torvalds 
19341da177e4SLinus Torvalds 	if (byte_diff == 0) {
19351da177e4SLinus Torvalds 		return;
19361da177e4SLinus Torvalds 	}
19371da177e4SLinus Torvalds 
19381da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
19391da177e4SLinus Torvalds 	new_size = (int)ifp->if_bytes + byte_diff;
19401da177e4SLinus Torvalds 	ASSERT(new_size >= 0);
19411da177e4SLinus Torvalds 
19421da177e4SLinus Torvalds 	if (new_size == 0) {
19431da177e4SLinus Torvalds 		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
1944f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
19451da177e4SLinus Torvalds 		}
19461da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
19471da177e4SLinus Torvalds 		real_size = 0;
19481da177e4SLinus Torvalds 	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
19491da177e4SLinus Torvalds 		/*
19501da177e4SLinus Torvalds 		 * If the valid extents/data can fit in if_inline_ext/data,
19511da177e4SLinus Torvalds 		 * copy them from the malloc'd vector and free it.
19521da177e4SLinus Torvalds 		 */
19531da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
19541da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
19551da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
19561da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
19571da177e4SLinus Torvalds 			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
19581da177e4SLinus Torvalds 			      new_size);
1959f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
19601da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
19611da177e4SLinus Torvalds 		}
19621da177e4SLinus Torvalds 		real_size = 0;
19631da177e4SLinus Torvalds 	} else {
19641da177e4SLinus Torvalds 		/*
19651da177e4SLinus Torvalds 		 * Stuck with malloc/realloc.
19661da177e4SLinus Torvalds 		 * For inline data, the underlying buffer must be
19671da177e4SLinus Torvalds 		 * a multiple of 4 bytes in size so that it can be
19681da177e4SLinus Torvalds 		 * logged and stay on word boundaries.  We enforce
19691da177e4SLinus Torvalds 		 * that here.
19701da177e4SLinus Torvalds 		 */
19711da177e4SLinus Torvalds 		real_size = roundup(new_size, 4);
19721da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
19731da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
19744a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
19754a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
19761da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
19771da177e4SLinus Torvalds 			/*
19781da177e4SLinus Torvalds 			 * Only do the realloc if the underlying size
19791da177e4SLinus Torvalds 			 * is really changing.
19801da177e4SLinus Torvalds 			 */
19811da177e4SLinus Torvalds 			if (ifp->if_real_bytes != real_size) {
19821da177e4SLinus Torvalds 				ifp->if_u1.if_data =
19831da177e4SLinus Torvalds 					kmem_realloc(ifp->if_u1.if_data,
19841da177e4SLinus Torvalds 							real_size,
19851da177e4SLinus Torvalds 							ifp->if_real_bytes,
19864a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
19871da177e4SLinus Torvalds 			}
19881da177e4SLinus Torvalds 		} else {
19891da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
19904a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
19914a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
19921da177e4SLinus Torvalds 			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
19931da177e4SLinus Torvalds 				ifp->if_bytes);
19941da177e4SLinus Torvalds 		}
19951da177e4SLinus Torvalds 	}
19961da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
19971da177e4SLinus Torvalds 	ifp->if_bytes = new_size;
19981da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
19991da177e4SLinus Torvalds }
20001da177e4SLinus Torvalds 
20011da177e4SLinus Torvalds void
20021da177e4SLinus Torvalds xfs_idestroy_fork(
20031da177e4SLinus Torvalds 	xfs_inode_t	*ip,
20041da177e4SLinus Torvalds 	int		whichfork)
20051da177e4SLinus Torvalds {
20061da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
20071da177e4SLinus Torvalds 
20081da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
20091da177e4SLinus Torvalds 	if (ifp->if_broot != NULL) {
2010f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_broot);
20111da177e4SLinus Torvalds 		ifp->if_broot = NULL;
20121da177e4SLinus Torvalds 	}
20131da177e4SLinus Torvalds 
20141da177e4SLinus Torvalds 	/*
20151da177e4SLinus Torvalds 	 * If the format is local, then we can't have an extents
20161da177e4SLinus Torvalds 	 * array so just look for an inline data array.  If we're
20171da177e4SLinus Torvalds 	 * not local then we may or may not have an extents list,
20181da177e4SLinus Torvalds 	 * so check and free it up if we do.
20191da177e4SLinus Torvalds 	 */
20201da177e4SLinus Torvalds 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
20211da177e4SLinus Torvalds 		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
20221da177e4SLinus Torvalds 		    (ifp->if_u1.if_data != NULL)) {
20231da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
2024f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
20251da177e4SLinus Torvalds 			ifp->if_u1.if_data = NULL;
20261da177e4SLinus Torvalds 			ifp->if_real_bytes = 0;
20271da177e4SLinus Torvalds 		}
20281da177e4SLinus Torvalds 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
20290293ce3aSMandy Kirkconnell 		   ((ifp->if_flags & XFS_IFEXTIREC) ||
20300293ce3aSMandy Kirkconnell 		    ((ifp->if_u1.if_extents != NULL) &&
20310293ce3aSMandy Kirkconnell 		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
20321da177e4SLinus Torvalds 		ASSERT(ifp->if_real_bytes != 0);
20334eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
20341da177e4SLinus Torvalds 	}
20351da177e4SLinus Torvalds 	ASSERT(ifp->if_u1.if_extents == NULL ||
20361da177e4SLinus Torvalds 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
20371da177e4SLinus Torvalds 	ASSERT(ifp->if_real_bytes == 0);
20381da177e4SLinus Torvalds 	if (whichfork == XFS_ATTR_FORK) {
20391da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
20401da177e4SLinus Torvalds 		ip->i_afp = NULL;
20411da177e4SLinus Torvalds 	}
20421da177e4SLinus Torvalds }
20431da177e4SLinus Torvalds 
20441da177e4SLinus Torvalds /*
204560ec6783SChristoph Hellwig  * This is called to unpin an inode.  The caller must have the inode locked
204660ec6783SChristoph Hellwig  * in at least shared mode so that the buffer cannot be subsequently pinned
204760ec6783SChristoph Hellwig  * once someone is waiting for it to be unpinned.
20481da177e4SLinus Torvalds  */
204960ec6783SChristoph Hellwig static void
205060ec6783SChristoph Hellwig xfs_iunpin_nowait(
205160ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
2052a3f74ffbSDavid Chinner {
2053579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2054a3f74ffbSDavid Chinner 
20554aaf15d1SDave Chinner 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
20564aaf15d1SDave Chinner 
2057a3f74ffbSDavid Chinner 	/* Give the log a push to start the unpinning I/O */
205860ec6783SChristoph Hellwig 	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2059a14a348bSChristoph Hellwig 
2060a3f74ffbSDavid Chinner }
2061a3f74ffbSDavid Chinner 
2062777df5afSDave Chinner void
20631da177e4SLinus Torvalds xfs_iunpin_wait(
206460ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
20651da177e4SLinus Torvalds {
206660ec6783SChristoph Hellwig 	if (xfs_ipincount(ip)) {
206760ec6783SChristoph Hellwig 		xfs_iunpin_nowait(ip);
206860ec6783SChristoph Hellwig 		wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
20691da177e4SLinus Torvalds 	}
20701da177e4SLinus Torvalds }
20711da177e4SLinus Torvalds 
20721da177e4SLinus Torvalds /*
20731da177e4SLinus Torvalds  * xfs_iextents_copy()
20741da177e4SLinus Torvalds  *
20751da177e4SLinus Torvalds  * This is called to copy the REAL extents (as opposed to the delayed
20761da177e4SLinus Torvalds  * allocation extents) from the inode into the given buffer.  It
20771da177e4SLinus Torvalds  * returns the number of bytes copied into the buffer.
20781da177e4SLinus Torvalds  *
20791da177e4SLinus Torvalds  * If there are no delayed allocation extents, then we can just
20801da177e4SLinus Torvalds  * memcpy() the extents into the buffer.  Otherwise, we need to
20811da177e4SLinus Torvalds  * examine each extent in turn and skip those which are delayed.
20821da177e4SLinus Torvalds  */
20831da177e4SLinus Torvalds int
20841da177e4SLinus Torvalds xfs_iextents_copy(
20851da177e4SLinus Torvalds 	xfs_inode_t		*ip,
2086a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t		*dp,
20871da177e4SLinus Torvalds 	int			whichfork)
20881da177e4SLinus Torvalds {
20891da177e4SLinus Torvalds 	int			copied;
20901da177e4SLinus Torvalds 	int			i;
20911da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
20921da177e4SLinus Torvalds 	int			nrecs;
20931da177e4SLinus Torvalds 	xfs_fsblock_t		start_block;
20941da177e4SLinus Torvalds 
20951da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
2096579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
20971da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes > 0);
20981da177e4SLinus Torvalds 
20991da177e4SLinus Torvalds 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
21003a59c94cSEric Sandeen 	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
21011da177e4SLinus Torvalds 	ASSERT(nrecs > 0);
21021da177e4SLinus Torvalds 
21031da177e4SLinus Torvalds 	/*
21041da177e4SLinus Torvalds 	 * There are some delayed allocation extents in the
21051da177e4SLinus Torvalds 	 * inode, so copy the extents one at a time and skip
21061da177e4SLinus Torvalds 	 * the delayed ones.  There must be at least one
21071da177e4SLinus Torvalds 	 * non-delayed extent.
21081da177e4SLinus Torvalds 	 */
21091da177e4SLinus Torvalds 	copied = 0;
21101da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
2111a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
21121da177e4SLinus Torvalds 		start_block = xfs_bmbt_get_startblock(ep);
21139d87c319SEric Sandeen 		if (isnullstartblock(start_block)) {
21141da177e4SLinus Torvalds 			/*
21151da177e4SLinus Torvalds 			 * It's a delayed allocation extent, so skip it.
21161da177e4SLinus Torvalds 			 */
21171da177e4SLinus Torvalds 			continue;
21181da177e4SLinus Torvalds 		}
21191da177e4SLinus Torvalds 
21201da177e4SLinus Torvalds 		/* Translate to on disk format */
2121cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
2122cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
2123a6f64d4aSChristoph Hellwig 		dp++;
21241da177e4SLinus Torvalds 		copied++;
21251da177e4SLinus Torvalds 	}
21261da177e4SLinus Torvalds 	ASSERT(copied != 0);
2127a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
21281da177e4SLinus Torvalds 
21291da177e4SLinus Torvalds 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
21301da177e4SLinus Torvalds }
21311da177e4SLinus Torvalds 
21321da177e4SLinus Torvalds /*
21331da177e4SLinus Torvalds  * Each of the following cases stores data into the same region
21341da177e4SLinus Torvalds  * of the on-disk inode, so only one of them can be valid at
21351da177e4SLinus Torvalds  * any given time. While it is possible to have conflicting formats
21361da177e4SLinus Torvalds  * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
21371da177e4SLinus Torvalds  * in EXTENTS format, this can only happen when the fork has
21381da177e4SLinus Torvalds  * changed formats after being modified but before being flushed.
21391da177e4SLinus Torvalds  * In these cases, the format always takes precedence, because the
21401da177e4SLinus Torvalds  * format indicates the current state of the fork.
21411da177e4SLinus Torvalds  */
21421da177e4SLinus Torvalds /*ARGSUSED*/
2143e4ac967bSDavid Chinner STATIC void
21441da177e4SLinus Torvalds xfs_iflush_fork(
21451da177e4SLinus Torvalds 	xfs_inode_t		*ip,
21461da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
21471da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip,
21481da177e4SLinus Torvalds 	int			whichfork,
21491da177e4SLinus Torvalds 	xfs_buf_t		*bp)
21501da177e4SLinus Torvalds {
21511da177e4SLinus Torvalds 	char			*cp;
21521da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
21531da177e4SLinus Torvalds 	xfs_mount_t		*mp;
21541da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
21551da177e4SLinus Torvalds 	int			first;
21561da177e4SLinus Torvalds #endif
21571da177e4SLinus Torvalds 	static const short	brootflag[2] =
21581da177e4SLinus Torvalds 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
21591da177e4SLinus Torvalds 	static const short	dataflag[2] =
21601da177e4SLinus Torvalds 		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
21611da177e4SLinus Torvalds 	static const short	extflag[2] =
21621da177e4SLinus Torvalds 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
21631da177e4SLinus Torvalds 
2164e4ac967bSDavid Chinner 	if (!iip)
2165e4ac967bSDavid Chinner 		return;
21661da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
21671da177e4SLinus Torvalds 	/*
21681da177e4SLinus Torvalds 	 * This can happen if we gave up in iformat in an error path,
21691da177e4SLinus Torvalds 	 * for the attribute fork.
21701da177e4SLinus Torvalds 	 */
2171e4ac967bSDavid Chinner 	if (!ifp) {
21721da177e4SLinus Torvalds 		ASSERT(whichfork == XFS_ATTR_FORK);
2173e4ac967bSDavid Chinner 		return;
21741da177e4SLinus Torvalds 	}
21751da177e4SLinus Torvalds 	cp = XFS_DFORK_PTR(dip, whichfork);
21761da177e4SLinus Torvalds 	mp = ip->i_mount;
21771da177e4SLinus Torvalds 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
21781da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
21791da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
21801da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
21811da177e4SLinus Torvalds 			ASSERT(ifp->if_u1.if_data != NULL);
21821da177e4SLinus Torvalds 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
21831da177e4SLinus Torvalds 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
21841da177e4SLinus Torvalds 		}
21851da177e4SLinus Torvalds 		break;
21861da177e4SLinus Torvalds 
21871da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
21881da177e4SLinus Torvalds 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
21891da177e4SLinus Torvalds 		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
21901da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
21911da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
2192ab1908a5SChristoph Hellwig 			ASSERT(xfs_iext_get_ext(ifp, 0));
21931da177e4SLinus Torvalds 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
21941da177e4SLinus Torvalds 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
21951da177e4SLinus Torvalds 				whichfork);
21961da177e4SLinus Torvalds 		}
21971da177e4SLinus Torvalds 		break;
21981da177e4SLinus Torvalds 
21991da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
22001da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
22011da177e4SLinus Torvalds 		    (ifp->if_broot_bytes > 0)) {
22021da177e4SLinus Torvalds 			ASSERT(ifp->if_broot != NULL);
22031da177e4SLinus Torvalds 			ASSERT(ifp->if_broot_bytes <=
22041da177e4SLinus Torvalds 			       (XFS_IFORK_SIZE(ip, whichfork) +
22051da177e4SLinus Torvalds 				XFS_BROOT_SIZE_ADJ));
220660197e8dSChristoph Hellwig 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
22071da177e4SLinus Torvalds 				(xfs_bmdr_block_t *)cp,
22081da177e4SLinus Torvalds 				XFS_DFORK_SIZE(dip, mp, whichfork));
22091da177e4SLinus Torvalds 		}
22101da177e4SLinus Torvalds 		break;
22111da177e4SLinus Torvalds 
22121da177e4SLinus Torvalds 	case XFS_DINODE_FMT_DEV:
22131da177e4SLinus Torvalds 		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
22141da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
221581591fe2SChristoph Hellwig 			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
22161da177e4SLinus Torvalds 		}
22171da177e4SLinus Torvalds 		break;
22181da177e4SLinus Torvalds 
22191da177e4SLinus Torvalds 	case XFS_DINODE_FMT_UUID:
22201da177e4SLinus Torvalds 		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
22211da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
222281591fe2SChristoph Hellwig 			memcpy(XFS_DFORK_DPTR(dip),
222381591fe2SChristoph Hellwig 			       &ip->i_df.if_u2.if_uuid,
22241da177e4SLinus Torvalds 			       sizeof(uuid_t));
22251da177e4SLinus Torvalds 		}
22261da177e4SLinus Torvalds 		break;
22271da177e4SLinus Torvalds 
22281da177e4SLinus Torvalds 	default:
22291da177e4SLinus Torvalds 		ASSERT(0);
22301da177e4SLinus Torvalds 		break;
22311da177e4SLinus Torvalds 	}
22321da177e4SLinus Torvalds }
22331da177e4SLinus Torvalds 
2234bad55843SDavid Chinner STATIC int
2235bad55843SDavid Chinner xfs_iflush_cluster(
2236bad55843SDavid Chinner 	xfs_inode_t	*ip,
2237bad55843SDavid Chinner 	xfs_buf_t	*bp)
2238bad55843SDavid Chinner {
2239bad55843SDavid Chinner 	xfs_mount_t		*mp = ip->i_mount;
22405017e97dSDave Chinner 	struct xfs_perag	*pag;
2241bad55843SDavid Chinner 	unsigned long		first_index, mask;
2242c8f5f12eSDavid Chinner 	unsigned long		inodes_per_cluster;
2243bad55843SDavid Chinner 	int			ilist_size;
2244bad55843SDavid Chinner 	xfs_inode_t		**ilist;
2245bad55843SDavid Chinner 	xfs_inode_t		*iq;
2246bad55843SDavid Chinner 	int			nr_found;
2247bad55843SDavid Chinner 	int			clcount = 0;
2248bad55843SDavid Chinner 	int			bufwasdelwri;
2249bad55843SDavid Chinner 	int			i;
2250bad55843SDavid Chinner 
22515017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2252bad55843SDavid Chinner 
2253c8f5f12eSDavid Chinner 	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2254c8f5f12eSDavid Chinner 	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
225549383b0eSDavid Chinner 	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2256bad55843SDavid Chinner 	if (!ilist)
225744b56e0aSDave Chinner 		goto out_put;
2258bad55843SDavid Chinner 
2259bad55843SDavid Chinner 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2260bad55843SDavid Chinner 	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
22611a3e8f3dSDave Chinner 	rcu_read_lock();
2262bad55843SDavid Chinner 	/* really need a gang lookup range call here */
2263bad55843SDavid Chinner 	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2264c8f5f12eSDavid Chinner 					first_index, inodes_per_cluster);
2265bad55843SDavid Chinner 	if (nr_found == 0)
2266bad55843SDavid Chinner 		goto out_free;
2267bad55843SDavid Chinner 
2268bad55843SDavid Chinner 	for (i = 0; i < nr_found; i++) {
2269bad55843SDavid Chinner 		iq = ilist[i];
2270bad55843SDavid Chinner 		if (iq == ip)
2271bad55843SDavid Chinner 			continue;
22721a3e8f3dSDave Chinner 
22731a3e8f3dSDave Chinner 		/*
22741a3e8f3dSDave Chinner 		 * because this is an RCU protected lookup, we could find a
22751a3e8f3dSDave Chinner 		 * recently freed or even reallocated inode during the lookup.
22761a3e8f3dSDave Chinner 		 * We need to check under the i_flags_lock for a valid inode
22771a3e8f3dSDave Chinner 		 * here. Skip it if it is not valid or the wrong inode.
22781a3e8f3dSDave Chinner 		 */
22791a3e8f3dSDave Chinner 		spin_lock(&ip->i_flags_lock);
22801a3e8f3dSDave Chinner 		if (!ip->i_ino ||
22811a3e8f3dSDave Chinner 		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
22821a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
22831a3e8f3dSDave Chinner 			continue;
22841a3e8f3dSDave Chinner 		}
22851a3e8f3dSDave Chinner 		spin_unlock(&ip->i_flags_lock);
22861a3e8f3dSDave Chinner 
2287bad55843SDavid Chinner 		/*
2288bad55843SDavid Chinner 		 * Do an un-protected check to see if the inode is dirty and
2289bad55843SDavid Chinner 		 * is a candidate for flushing.  These checks will be repeated
2290bad55843SDavid Chinner 		 * later after the appropriate locks are acquired.
2291bad55843SDavid Chinner 		 */
229233540408SDavid Chinner 		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
2293bad55843SDavid Chinner 			continue;
2294bad55843SDavid Chinner 
2295bad55843SDavid Chinner 		/*
2296bad55843SDavid Chinner 		 * Try to get locks.  If any are unavailable or it is pinned,
2297bad55843SDavid Chinner 		 * then this inode cannot be flushed and is skipped.
2298bad55843SDavid Chinner 		 */
2299bad55843SDavid Chinner 
2300bad55843SDavid Chinner 		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
2301bad55843SDavid Chinner 			continue;
2302bad55843SDavid Chinner 		if (!xfs_iflock_nowait(iq)) {
2303bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2304bad55843SDavid Chinner 			continue;
2305bad55843SDavid Chinner 		}
2306bad55843SDavid Chinner 		if (xfs_ipincount(iq)) {
2307bad55843SDavid Chinner 			xfs_ifunlock(iq);
2308bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2309bad55843SDavid Chinner 			continue;
2310bad55843SDavid Chinner 		}
2311bad55843SDavid Chinner 
2312bad55843SDavid Chinner 		/*
2313bad55843SDavid Chinner 		 * arriving here means that this inode can be flushed.  First
2314bad55843SDavid Chinner 		 * re-check that it's dirty before flushing.
2315bad55843SDavid Chinner 		 */
231633540408SDavid Chinner 		if (!xfs_inode_clean(iq)) {
2317bad55843SDavid Chinner 			int	error;
2318bad55843SDavid Chinner 			error = xfs_iflush_int(iq, bp);
2319bad55843SDavid Chinner 			if (error) {
2320bad55843SDavid Chinner 				xfs_iunlock(iq, XFS_ILOCK_SHARED);
2321bad55843SDavid Chinner 				goto cluster_corrupt_out;
2322bad55843SDavid Chinner 			}
2323bad55843SDavid Chinner 			clcount++;
2324bad55843SDavid Chinner 		} else {
2325bad55843SDavid Chinner 			xfs_ifunlock(iq);
2326bad55843SDavid Chinner 		}
2327bad55843SDavid Chinner 		xfs_iunlock(iq, XFS_ILOCK_SHARED);
2328bad55843SDavid Chinner 	}
2329bad55843SDavid Chinner 
2330bad55843SDavid Chinner 	if (clcount) {
2331bad55843SDavid Chinner 		XFS_STATS_INC(xs_icluster_flushcnt);
2332bad55843SDavid Chinner 		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
2333bad55843SDavid Chinner 	}
2334bad55843SDavid Chinner 
2335bad55843SDavid Chinner out_free:
23361a3e8f3dSDave Chinner 	rcu_read_unlock();
2337f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
233844b56e0aSDave Chinner out_put:
233944b56e0aSDave Chinner 	xfs_perag_put(pag);
2340bad55843SDavid Chinner 	return 0;
2341bad55843SDavid Chinner 
2342bad55843SDavid Chinner 
2343bad55843SDavid Chinner cluster_corrupt_out:
2344bad55843SDavid Chinner 	/*
2345bad55843SDavid Chinner 	 * Corruption detected in the clustering loop.  Invalidate the
2346bad55843SDavid Chinner 	 * inode buffer and shut down the filesystem.
2347bad55843SDavid Chinner 	 */
23481a3e8f3dSDave Chinner 	rcu_read_unlock();
2349bad55843SDavid Chinner 	/*
2350bad55843SDavid Chinner 	 * Clean up the buffer.  If it was B_DELWRI, just release it --
2351bad55843SDavid Chinner 	 * brelse can handle it with no problems.  If not, shut down the
2352bad55843SDavid Chinner 	 * filesystem before releasing the buffer.
2353bad55843SDavid Chinner 	 */
2354bad55843SDavid Chinner 	bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
2355bad55843SDavid Chinner 	if (bufwasdelwri)
2356bad55843SDavid Chinner 		xfs_buf_relse(bp);
2357bad55843SDavid Chinner 
2358bad55843SDavid Chinner 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2359bad55843SDavid Chinner 
2360bad55843SDavid Chinner 	if (!bufwasdelwri) {
2361bad55843SDavid Chinner 		/*
2362bad55843SDavid Chinner 		 * Just like incore_relse: if we have b_iodone functions,
2363bad55843SDavid Chinner 		 * mark the buffer as an error and call them.  Otherwise
2364bad55843SDavid Chinner 		 * mark it as stale and brelse.
2365bad55843SDavid Chinner 		 */
2366cb669ca5SChristoph Hellwig 		if (bp->b_iodone) {
2367bad55843SDavid Chinner 			XFS_BUF_UNDONE(bp);
2368c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
23695a52c2a5SChandra Seetharaman 			xfs_buf_ioerror(bp, EIO);
23701a1a3e97SChristoph Hellwig 			xfs_buf_ioend(bp, 0);
2371bad55843SDavid Chinner 		} else {
2372c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
2373bad55843SDavid Chinner 			xfs_buf_relse(bp);
2374bad55843SDavid Chinner 		}
2375bad55843SDavid Chinner 	}
2376bad55843SDavid Chinner 
2377bad55843SDavid Chinner 	/*
2378bad55843SDavid Chinner 	 * Unlocks the flush lock
2379bad55843SDavid Chinner 	 */
2380bad55843SDavid Chinner 	xfs_iflush_abort(iq);
2381f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
238244b56e0aSDave Chinner 	xfs_perag_put(pag);
2383bad55843SDavid Chinner 	return XFS_ERROR(EFSCORRUPTED);
2384bad55843SDavid Chinner }
2385bad55843SDavid Chinner 
23861da177e4SLinus Torvalds /*
23871da177e4SLinus Torvalds  * xfs_iflush() will write a modified inode's changes out to the
23881da177e4SLinus Torvalds  * inode's on disk home.  The caller must have the inode lock held
2389c63942d3SDavid Chinner  * in at least shared mode and the inode flush completion must be
2390c63942d3SDavid Chinner  * active as well.  The inode lock will still be held upon return from
23911da177e4SLinus Torvalds  * the call and the caller is free to unlock it.
2392c63942d3SDavid Chinner  * The inode flush will be completed when the inode reaches the disk.
23931da177e4SLinus Torvalds  * The flags indicate how the inode's buffer should be written out.
23941da177e4SLinus Torvalds  */
23951da177e4SLinus Torvalds int
23961da177e4SLinus Torvalds xfs_iflush(
23971da177e4SLinus Torvalds 	xfs_inode_t		*ip,
23981da177e4SLinus Torvalds 	uint			flags)
23991da177e4SLinus Torvalds {
24001da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
24011da177e4SLinus Torvalds 	xfs_buf_t		*bp;
24021da177e4SLinus Torvalds 	xfs_dinode_t		*dip;
24031da177e4SLinus Torvalds 	xfs_mount_t		*mp;
24041da177e4SLinus Torvalds 	int			error;
24051da177e4SLinus Torvalds 
24061da177e4SLinus Torvalds 	XFS_STATS_INC(xs_iflush_count);
24071da177e4SLinus Torvalds 
2408579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2409c63942d3SDavid Chinner 	ASSERT(!completion_done(&ip->i_flush));
24101da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
24111da177e4SLinus Torvalds 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
24121da177e4SLinus Torvalds 
24131da177e4SLinus Torvalds 	iip = ip->i_itemp;
24141da177e4SLinus Torvalds 	mp = ip->i_mount;
24151da177e4SLinus Torvalds 
24161da177e4SLinus Torvalds 	/*
2417a3f74ffbSDavid Chinner 	 * We can't flush the inode until it is unpinned, so wait for it if we
2418a3f74ffbSDavid Chinner 	 * are allowed to block.  We know no one new can pin it, because we are
2419a3f74ffbSDavid Chinner 	 * holding the inode lock shared and you need to hold it exclusively to
2420a3f74ffbSDavid Chinner 	 * pin the inode.
2421a3f74ffbSDavid Chinner 	 *
2422a3f74ffbSDavid Chinner 	 * If we are not allowed to block, force the log out asynchronously so
2423a3f74ffbSDavid Chinner 	 * that when we come back the inode will be unpinned. If other inodes
2424a3f74ffbSDavid Chinner 	 * in the same cluster are dirty, they will probably write the inode
2425a3f74ffbSDavid Chinner 	 * out for us if they occur after the log force completes.
24261da177e4SLinus Torvalds 	 */
2427c854363eSDave Chinner 	if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2428a3f74ffbSDavid Chinner 		xfs_iunpin_nowait(ip);
2429a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2430a3f74ffbSDavid Chinner 		return EAGAIN;
2431a3f74ffbSDavid Chinner 	}
24321da177e4SLinus Torvalds 	xfs_iunpin_wait(ip);
24331da177e4SLinus Torvalds 
24341da177e4SLinus Torvalds 	/*
24354b6a4688SDave Chinner 	 * For stale inodes we cannot rely on the backing buffer remaining
24364b6a4688SDave Chinner 	 * stale in cache for the remaining life of the stale inode and so
24374b6a4688SDave Chinner 	 * xfs_itobp() below may give us a buffer that no longer contains
24384b6a4688SDave Chinner 	 * inodes below. We have to check this after ensuring the inode is
24394b6a4688SDave Chinner 	 * unpinned so that it is safe to reclaim the stale inode after the
24404b6a4688SDave Chinner 	 * flush call.
24414b6a4688SDave Chinner 	 */
24424b6a4688SDave Chinner 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
24434b6a4688SDave Chinner 		xfs_ifunlock(ip);
24444b6a4688SDave Chinner 		return 0;
24454b6a4688SDave Chinner 	}
24464b6a4688SDave Chinner 
24474b6a4688SDave Chinner 	/*
24481da177e4SLinus Torvalds 	 * This may have been unpinned because the filesystem is shutting
24491da177e4SLinus Torvalds 	 * down forcibly. If that's the case we must not write this inode
24501da177e4SLinus Torvalds 	 * to disk, because the log record didn't make it to disk!
24511da177e4SLinus Torvalds 	 */
24521da177e4SLinus Torvalds 	if (XFS_FORCED_SHUTDOWN(mp)) {
24531da177e4SLinus Torvalds 		ip->i_update_core = 0;
24541da177e4SLinus Torvalds 		if (iip)
24551da177e4SLinus Torvalds 			iip->ili_format.ilf_fields = 0;
24561da177e4SLinus Torvalds 		xfs_ifunlock(ip);
24571da177e4SLinus Torvalds 		return XFS_ERROR(EIO);
24581da177e4SLinus Torvalds 	}
24591da177e4SLinus Torvalds 
24601da177e4SLinus Torvalds 	/*
2461a3f74ffbSDavid Chinner 	 * Get the buffer containing the on-disk inode.
2462a3f74ffbSDavid Chinner 	 */
246376d8b277SChristoph Hellwig 	error = xfs_itobp(mp, NULL, ip, &dip, &bp,
24641bfd8d04SDave Chinner 				(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
2465a3f74ffbSDavid Chinner 	if (error || !bp) {
2466a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2467a3f74ffbSDavid Chinner 		return error;
2468a3f74ffbSDavid Chinner 	}
2469a3f74ffbSDavid Chinner 
2470a3f74ffbSDavid Chinner 	/*
24711da177e4SLinus Torvalds 	 * First flush out the inode that xfs_iflush was called with.
24721da177e4SLinus Torvalds 	 */
24731da177e4SLinus Torvalds 	error = xfs_iflush_int(ip, bp);
2474bad55843SDavid Chinner 	if (error)
24751da177e4SLinus Torvalds 		goto corrupt_out;
24761da177e4SLinus Torvalds 
24771da177e4SLinus Torvalds 	/*
2478a3f74ffbSDavid Chinner 	 * If the buffer is pinned then push on the log now so we won't
2479a3f74ffbSDavid Chinner 	 * get stuck waiting in the write for too long.
2480a3f74ffbSDavid Chinner 	 */
2481811e64c7SChandra Seetharaman 	if (xfs_buf_ispinned(bp))
2482a14a348bSChristoph Hellwig 		xfs_log_force(mp, 0);
2483a3f74ffbSDavid Chinner 
2484a3f74ffbSDavid Chinner 	/*
24851da177e4SLinus Torvalds 	 * inode clustering:
24861da177e4SLinus Torvalds 	 * see if other inodes can be gathered into this write
24871da177e4SLinus Torvalds 	 */
2488bad55843SDavid Chinner 	error = xfs_iflush_cluster(ip, bp);
2489bad55843SDavid Chinner 	if (error)
24901da177e4SLinus Torvalds 		goto cluster_corrupt_out;
24911da177e4SLinus Torvalds 
2492c854363eSDave Chinner 	if (flags & SYNC_WAIT)
2493c2b006c1SChristoph Hellwig 		error = xfs_bwrite(bp);
2494c2b006c1SChristoph Hellwig 	else
249561551f1eSChristoph Hellwig 		xfs_buf_delwri_queue(bp);
2496c2b006c1SChristoph Hellwig 
249761551f1eSChristoph Hellwig 	xfs_buf_relse(bp);
24981da177e4SLinus Torvalds 	return error;
24991da177e4SLinus Torvalds 
25001da177e4SLinus Torvalds corrupt_out:
25011da177e4SLinus Torvalds 	xfs_buf_relse(bp);
25027d04a335SNathan Scott 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
25031da177e4SLinus Torvalds cluster_corrupt_out:
25041da177e4SLinus Torvalds 	/*
25051da177e4SLinus Torvalds 	 * Unlocks the flush lock
25061da177e4SLinus Torvalds 	 */
2507bad55843SDavid Chinner 	xfs_iflush_abort(ip);
25081da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
25091da177e4SLinus Torvalds }
25101da177e4SLinus Torvalds 
25111da177e4SLinus Torvalds 
25121da177e4SLinus Torvalds STATIC int
25131da177e4SLinus Torvalds xfs_iflush_int(
25141da177e4SLinus Torvalds 	xfs_inode_t		*ip,
25151da177e4SLinus Torvalds 	xfs_buf_t		*bp)
25161da177e4SLinus Torvalds {
25171da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
25181da177e4SLinus Torvalds 	xfs_dinode_t		*dip;
25191da177e4SLinus Torvalds 	xfs_mount_t		*mp;
25201da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
25211da177e4SLinus Torvalds 	int			first;
25221da177e4SLinus Torvalds #endif
25231da177e4SLinus Torvalds 
2524579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2525c63942d3SDavid Chinner 	ASSERT(!completion_done(&ip->i_flush));
25261da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
25271da177e4SLinus Torvalds 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
25281da177e4SLinus Torvalds 
25291da177e4SLinus Torvalds 	iip = ip->i_itemp;
25301da177e4SLinus Torvalds 	mp = ip->i_mount;
25311da177e4SLinus Torvalds 
25321da177e4SLinus Torvalds 	/* set *dip = inode's place in the buffer */
253392bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
25341da177e4SLinus Torvalds 
25351da177e4SLinus Torvalds 	/*
25361da177e4SLinus Torvalds 	 * Clear i_update_core before copying out the data.
25371da177e4SLinus Torvalds 	 * This is for coordination with our timestamp updates
25381da177e4SLinus Torvalds 	 * that don't hold the inode lock. They will always
25391da177e4SLinus Torvalds 	 * update the timestamps BEFORE setting i_update_core,
25401da177e4SLinus Torvalds 	 * so if we clear i_update_core after they set it we
25411da177e4SLinus Torvalds 	 * are guaranteed to see their updates to the timestamps.
25421da177e4SLinus Torvalds 	 * I believe that this depends on strongly ordered memory
25431da177e4SLinus Torvalds 	 * semantics, but we have that.  We use the SYNCHRONIZE
25441da177e4SLinus Torvalds 	 * macro to make sure that the compiler does not reorder
25451da177e4SLinus Torvalds 	 * the i_update_core access below the data copy below.
25461da177e4SLinus Torvalds 	 */
25471da177e4SLinus Torvalds 	ip->i_update_core = 0;
25481da177e4SLinus Torvalds 	SYNCHRONIZE();
25491da177e4SLinus Torvalds 
255042fe2b1fSChristoph Hellwig 	/*
2551f9581b14SChristoph Hellwig 	 * Make sure to get the latest timestamps from the Linux inode.
255242fe2b1fSChristoph Hellwig 	 */
2553f9581b14SChristoph Hellwig 	xfs_synchronize_times(ip);
255442fe2b1fSChristoph Hellwig 
255569ef921bSChristoph Hellwig 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
25561da177e4SLinus Torvalds 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
25576a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
25586a19d939SDave Chinner 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
25596a19d939SDave Chinner 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
25601da177e4SLinus Torvalds 		goto corrupt_out;
25611da177e4SLinus Torvalds 	}
25621da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
25631da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
25646a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
25656a19d939SDave Chinner 			"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
25666a19d939SDave Chinner 			__func__, ip->i_ino, ip, ip->i_d.di_magic);
25671da177e4SLinus Torvalds 		goto corrupt_out;
25681da177e4SLinus Torvalds 	}
2569abbede1bSAl Viro 	if (S_ISREG(ip->i_d.di_mode)) {
25701da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
25711da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
25721da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
25731da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
25746a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
25756a19d939SDave Chinner 				"%s: Bad regular inode %Lu, ptr 0x%p",
25766a19d939SDave Chinner 				__func__, ip->i_ino, ip);
25771da177e4SLinus Torvalds 			goto corrupt_out;
25781da177e4SLinus Torvalds 		}
2579abbede1bSAl Viro 	} else if (S_ISDIR(ip->i_d.di_mode)) {
25801da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
25811da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
25821da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
25831da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
25841da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
25856a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
25866a19d939SDave Chinner 				"%s: Bad directory inode %Lu, ptr 0x%p",
25876a19d939SDave Chinner 				__func__, ip->i_ino, ip);
25881da177e4SLinus Torvalds 			goto corrupt_out;
25891da177e4SLinus Torvalds 		}
25901da177e4SLinus Torvalds 	}
25911da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
25921da177e4SLinus Torvalds 				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
25931da177e4SLinus Torvalds 				XFS_RANDOM_IFLUSH_5)) {
25946a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
25956a19d939SDave Chinner 			"%s: detected corrupt incore inode %Lu, "
25966a19d939SDave Chinner 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
25976a19d939SDave Chinner 			__func__, ip->i_ino,
25981da177e4SLinus Torvalds 			ip->i_d.di_nextents + ip->i_d.di_anextents,
25996a19d939SDave Chinner 			ip->i_d.di_nblocks, ip);
26001da177e4SLinus Torvalds 		goto corrupt_out;
26011da177e4SLinus Torvalds 	}
26021da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
26031da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
26046a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
26056a19d939SDave Chinner 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
26066a19d939SDave Chinner 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
26071da177e4SLinus Torvalds 		goto corrupt_out;
26081da177e4SLinus Torvalds 	}
26091da177e4SLinus Torvalds 	/*
26101da177e4SLinus Torvalds 	 * bump the flush iteration count, used to detect flushes which
26111da177e4SLinus Torvalds 	 * postdate a log record during recovery.
26121da177e4SLinus Torvalds 	 */
26131da177e4SLinus Torvalds 
26141da177e4SLinus Torvalds 	ip->i_d.di_flushiter++;
26151da177e4SLinus Torvalds 
26161da177e4SLinus Torvalds 	/*
26171da177e4SLinus Torvalds 	 * Copy the dirty parts of the inode into the on-disk
26181da177e4SLinus Torvalds 	 * inode.  We always copy out the core of the inode,
26191da177e4SLinus Torvalds 	 * because if the inode is dirty at all the core must
26201da177e4SLinus Torvalds 	 * be.
26211da177e4SLinus Torvalds 	 */
262281591fe2SChristoph Hellwig 	xfs_dinode_to_disk(dip, &ip->i_d);
26231da177e4SLinus Torvalds 
26241da177e4SLinus Torvalds 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
26251da177e4SLinus Torvalds 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
26261da177e4SLinus Torvalds 		ip->i_d.di_flushiter = 0;
26271da177e4SLinus Torvalds 
26281da177e4SLinus Torvalds 	/*
26291da177e4SLinus Torvalds 	 * If this is really an old format inode and the superblock version
26301da177e4SLinus Torvalds 	 * has not been updated to support only new format inodes, then
26311da177e4SLinus Torvalds 	 * convert back to the old inode format.  If the superblock version
26321da177e4SLinus Torvalds 	 * has been updated, then make the conversion permanent.
26331da177e4SLinus Torvalds 	 */
263451ce16d5SChristoph Hellwig 	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
263551ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
263662118709SEric Sandeen 		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
26371da177e4SLinus Torvalds 			/*
26381da177e4SLinus Torvalds 			 * Convert it back.
26391da177e4SLinus Torvalds 			 */
26401da177e4SLinus Torvalds 			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
264181591fe2SChristoph Hellwig 			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
26421da177e4SLinus Torvalds 		} else {
26431da177e4SLinus Torvalds 			/*
26441da177e4SLinus Torvalds 			 * The superblock version has already been bumped,
26451da177e4SLinus Torvalds 			 * so just make the conversion to the new inode
26461da177e4SLinus Torvalds 			 * format permanent.
26471da177e4SLinus Torvalds 			 */
264851ce16d5SChristoph Hellwig 			ip->i_d.di_version = 2;
264951ce16d5SChristoph Hellwig 			dip->di_version = 2;
26501da177e4SLinus Torvalds 			ip->i_d.di_onlink = 0;
265181591fe2SChristoph Hellwig 			dip->di_onlink = 0;
26521da177e4SLinus Torvalds 			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
265381591fe2SChristoph Hellwig 			memset(&(dip->di_pad[0]), 0,
265481591fe2SChristoph Hellwig 			      sizeof(dip->di_pad));
26556743099cSArkadiusz Mi?kiewicz 			ASSERT(xfs_get_projid(ip) == 0);
26561da177e4SLinus Torvalds 		}
26571da177e4SLinus Torvalds 	}
26581da177e4SLinus Torvalds 
2659e4ac967bSDavid Chinner 	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
2660e4ac967bSDavid Chinner 	if (XFS_IFORK_Q(ip))
2661e4ac967bSDavid Chinner 		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
26621da177e4SLinus Torvalds 	xfs_inobp_check(mp, bp);
26631da177e4SLinus Torvalds 
26641da177e4SLinus Torvalds 	/*
26651da177e4SLinus Torvalds 	 * We've recorded everything logged in the inode, so we'd
26661da177e4SLinus Torvalds 	 * like to clear the ilf_fields bits so we don't log and
26671da177e4SLinus Torvalds 	 * flush things unnecessarily.  However, we can't stop
26681da177e4SLinus Torvalds 	 * logging all this information until the data we've copied
26691da177e4SLinus Torvalds 	 * into the disk buffer is written to disk.  If we did we might
26701da177e4SLinus Torvalds 	 * overwrite the copy of the inode in the log with all the
26711da177e4SLinus Torvalds 	 * data after re-logging only part of it, and in the face of
26721da177e4SLinus Torvalds 	 * a crash we wouldn't have all the data we need to recover.
26731da177e4SLinus Torvalds 	 *
26741da177e4SLinus Torvalds 	 * What we do is move the bits to the ili_last_fields field.
26751da177e4SLinus Torvalds 	 * When logging the inode, these bits are moved back to the
26761da177e4SLinus Torvalds 	 * ilf_fields field.  In the xfs_iflush_done() routine we
26771da177e4SLinus Torvalds 	 * clear ili_last_fields, since we know that the information
26781da177e4SLinus Torvalds 	 * those bits represent is permanently on disk.  As long as
26791da177e4SLinus Torvalds 	 * the flush completes before the inode is logged again, then
26801da177e4SLinus Torvalds 	 * both ilf_fields and ili_last_fields will be cleared.
26811da177e4SLinus Torvalds 	 *
26821da177e4SLinus Torvalds 	 * We can play with the ilf_fields bits here, because the inode
26831da177e4SLinus Torvalds 	 * lock must be held exclusively in order to set bits there
26841da177e4SLinus Torvalds 	 * and the flush lock protects the ili_last_fields bits.
26851da177e4SLinus Torvalds 	 * Set ili_logged so the flush done
26861da177e4SLinus Torvalds 	 * routine can tell whether or not to look in the AIL.
26871da177e4SLinus Torvalds 	 * Also, store the current LSN of the inode so that we can tell
26881da177e4SLinus Torvalds 	 * whether the item has moved in the AIL from xfs_iflush_done().
26891da177e4SLinus Torvalds 	 * In order to read the lsn we need the AIL lock, because
26901da177e4SLinus Torvalds 	 * it is a 64 bit value that cannot be read atomically.
26911da177e4SLinus Torvalds 	 */
26921da177e4SLinus Torvalds 	if (iip != NULL && iip->ili_format.ilf_fields != 0) {
26931da177e4SLinus Torvalds 		iip->ili_last_fields = iip->ili_format.ilf_fields;
26941da177e4SLinus Torvalds 		iip->ili_format.ilf_fields = 0;
26951da177e4SLinus Torvalds 		iip->ili_logged = 1;
26961da177e4SLinus Torvalds 
26977b2e2a31SDavid Chinner 		xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
26987b2e2a31SDavid Chinner 					&iip->ili_item.li_lsn);
26991da177e4SLinus Torvalds 
27001da177e4SLinus Torvalds 		/*
27011da177e4SLinus Torvalds 		 * Attach the function xfs_iflush_done to the inode's
27021da177e4SLinus Torvalds 		 * buffer.  This will remove the inode from the AIL
27031da177e4SLinus Torvalds 		 * and unlock the inode's flush lock when the inode is
27041da177e4SLinus Torvalds 		 * completely written to disk.
27051da177e4SLinus Torvalds 		 */
2706ca30b2a7SChristoph Hellwig 		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
27071da177e4SLinus Torvalds 
2708adadbeefSChristoph Hellwig 		ASSERT(bp->b_fspriv != NULL);
2709cb669ca5SChristoph Hellwig 		ASSERT(bp->b_iodone != NULL);
27101da177e4SLinus Torvalds 	} else {
27111da177e4SLinus Torvalds 		/*
27121da177e4SLinus Torvalds 		 * We're flushing an inode which is not in the AIL and has
27131da177e4SLinus Torvalds 		 * not been logged but has i_update_core set.  For this
27141da177e4SLinus Torvalds 		 * case we can use a B_DELWRI flush and immediately drop
27151da177e4SLinus Torvalds 		 * the inode flush lock because we can avoid the whole
27161da177e4SLinus Torvalds 		 * AIL state thing.  It's OK to drop the flush lock now,
27171da177e4SLinus Torvalds 		 * because we've already locked the buffer and to do anything
27181da177e4SLinus Torvalds 		 * you really need both.
27191da177e4SLinus Torvalds 		 */
27201da177e4SLinus Torvalds 		if (iip != NULL) {
27211da177e4SLinus Torvalds 			ASSERT(iip->ili_logged == 0);
27221da177e4SLinus Torvalds 			ASSERT(iip->ili_last_fields == 0);
27231da177e4SLinus Torvalds 			ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
27241da177e4SLinus Torvalds 		}
27251da177e4SLinus Torvalds 		xfs_ifunlock(ip);
27261da177e4SLinus Torvalds 	}
27271da177e4SLinus Torvalds 
27281da177e4SLinus Torvalds 	return 0;
27291da177e4SLinus Torvalds 
27301da177e4SLinus Torvalds corrupt_out:
27311da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
27321da177e4SLinus Torvalds }
27331da177e4SLinus Torvalds 
27344dd2cb4aSChristoph Hellwig void
27354dd2cb4aSChristoph Hellwig xfs_promote_inode(
27364dd2cb4aSChristoph Hellwig 	struct xfs_inode	*ip)
27374dd2cb4aSChristoph Hellwig {
27384dd2cb4aSChristoph Hellwig 	struct xfs_buf		*bp;
27394dd2cb4aSChristoph Hellwig 
27404dd2cb4aSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
27414dd2cb4aSChristoph Hellwig 
27424dd2cb4aSChristoph Hellwig 	bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
27434dd2cb4aSChristoph Hellwig 			ip->i_imap.im_len, XBF_TRYLOCK);
27444dd2cb4aSChristoph Hellwig 	if (!bp)
27454dd2cb4aSChristoph Hellwig 		return;
27464dd2cb4aSChristoph Hellwig 
27474dd2cb4aSChristoph Hellwig 	if (XFS_BUF_ISDELAYWRITE(bp)) {
27484dd2cb4aSChristoph Hellwig 		xfs_buf_delwri_promote(bp);
27494dd2cb4aSChristoph Hellwig 		wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
27504dd2cb4aSChristoph Hellwig 	}
27514dd2cb4aSChristoph Hellwig 
27524dd2cb4aSChristoph Hellwig 	xfs_buf_relse(bp);
27534dd2cb4aSChristoph Hellwig }
27544dd2cb4aSChristoph Hellwig 
27554eea22f0SMandy Kirkconnell /*
27564eea22f0SMandy Kirkconnell  * Return a pointer to the extent record at file index idx.
27574eea22f0SMandy Kirkconnell  */
2758a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *
27594eea22f0SMandy Kirkconnell xfs_iext_get_ext(
27604eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
27614eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx)		/* index of target extent */
27624eea22f0SMandy Kirkconnell {
27634eea22f0SMandy Kirkconnell 	ASSERT(idx >= 0);
276487bef181SChristoph Hellwig 	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
276587bef181SChristoph Hellwig 
27660293ce3aSMandy Kirkconnell 	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
27670293ce3aSMandy Kirkconnell 		return ifp->if_u1.if_ext_irec->er_extbuf;
27680293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
27690293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;		/* irec pointer */
27700293ce3aSMandy Kirkconnell 		int		erp_idx = 0;	/* irec index */
27710293ce3aSMandy Kirkconnell 		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
27720293ce3aSMandy Kirkconnell 
27730293ce3aSMandy Kirkconnell 		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
27740293ce3aSMandy Kirkconnell 		return &erp->er_extbuf[page_idx];
27750293ce3aSMandy Kirkconnell 	} else if (ifp->if_bytes) {
27764eea22f0SMandy Kirkconnell 		return &ifp->if_u1.if_extents[idx];
27774eea22f0SMandy Kirkconnell 	} else {
27784eea22f0SMandy Kirkconnell 		return NULL;
27794eea22f0SMandy Kirkconnell 	}
27804eea22f0SMandy Kirkconnell }
27814eea22f0SMandy Kirkconnell 
27824eea22f0SMandy Kirkconnell /*
27834eea22f0SMandy Kirkconnell  * Insert new item(s) into the extent records for incore inode
27844eea22f0SMandy Kirkconnell  * fork 'ifp'.  'count' new items are inserted at index 'idx'.
27854eea22f0SMandy Kirkconnell  */
27864eea22f0SMandy Kirkconnell void
27874eea22f0SMandy Kirkconnell xfs_iext_insert(
27886ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
27894eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* starting index of new items */
27904eea22f0SMandy Kirkconnell 	xfs_extnum_t	count,		/* number of inserted items */
27916ef35544SChristoph Hellwig 	xfs_bmbt_irec_t	*new,		/* items to insert */
27926ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
27934eea22f0SMandy Kirkconnell {
27946ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
27954eea22f0SMandy Kirkconnell 	xfs_extnum_t	i;		/* extent record index */
27964eea22f0SMandy Kirkconnell 
27970b1b213fSChristoph Hellwig 	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
27980b1b213fSChristoph Hellwig 
27994eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
28004eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, idx, count);
2801a6f64d4aSChristoph Hellwig 	for (i = idx; i < idx + count; i++, new++)
2802a6f64d4aSChristoph Hellwig 		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
28034eea22f0SMandy Kirkconnell }
28044eea22f0SMandy Kirkconnell 
28054eea22f0SMandy Kirkconnell /*
28064eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
28074eea22f0SMandy Kirkconnell  * extents needs to be increased. The ext_diff parameter stores the
28084eea22f0SMandy Kirkconnell  * number of new extents being added and the idx parameter contains
28094eea22f0SMandy Kirkconnell  * the extent index where the new extents will be added. If the new
28104eea22f0SMandy Kirkconnell  * extents are being appended, then we just need to (re)allocate and
28114eea22f0SMandy Kirkconnell  * initialize the space. Otherwise, if the new extents are being
28124eea22f0SMandy Kirkconnell  * inserted into the middle of the existing entries, a bit more work
28134eea22f0SMandy Kirkconnell  * is required to make room for the new extents to be inserted. The
28144eea22f0SMandy Kirkconnell  * caller is responsible for filling in the new extent entries upon
28154eea22f0SMandy Kirkconnell  * return.
28164eea22f0SMandy Kirkconnell  */
28174eea22f0SMandy Kirkconnell void
28184eea22f0SMandy Kirkconnell xfs_iext_add(
28194eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
28204eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin adding exts */
2821c41564b5SNathan Scott 	int		ext_diff)	/* number of extents to add */
28224eea22f0SMandy Kirkconnell {
28234eea22f0SMandy Kirkconnell 	int		byte_diff;	/* new bytes being added */
28244eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after adding */
28254eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
28264eea22f0SMandy Kirkconnell 
28274eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
28284eea22f0SMandy Kirkconnell 	ASSERT((idx >= 0) && (idx <= nextents));
28294eea22f0SMandy Kirkconnell 	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
28304eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes + byte_diff;
28314eea22f0SMandy Kirkconnell 	/*
28324eea22f0SMandy Kirkconnell 	 * If the new number of extents (nextents + ext_diff)
28334eea22f0SMandy Kirkconnell 	 * fits inside the inode, then continue to use the inline
28344eea22f0SMandy Kirkconnell 	 * extent buffer.
28354eea22f0SMandy Kirkconnell 	 */
28364eea22f0SMandy Kirkconnell 	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
28374eea22f0SMandy Kirkconnell 		if (idx < nextents) {
28384eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
28394eea22f0SMandy Kirkconnell 				&ifp->if_u2.if_inline_ext[idx],
28404eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
28414eea22f0SMandy Kirkconnell 			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
28424eea22f0SMandy Kirkconnell 		}
28434eea22f0SMandy Kirkconnell 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
28444eea22f0SMandy Kirkconnell 		ifp->if_real_bytes = 0;
28454eea22f0SMandy Kirkconnell 	}
28464eea22f0SMandy Kirkconnell 	/*
28474eea22f0SMandy Kirkconnell 	 * Otherwise use a linear (direct) extent list.
28484eea22f0SMandy Kirkconnell 	 * If the extents are currently inside the inode,
28494eea22f0SMandy Kirkconnell 	 * xfs_iext_realloc_direct will switch us from
28504eea22f0SMandy Kirkconnell 	 * inline to direct extent allocation mode.
28514eea22f0SMandy Kirkconnell 	 */
28520293ce3aSMandy Kirkconnell 	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
28534eea22f0SMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, new_size);
28544eea22f0SMandy Kirkconnell 		if (idx < nextents) {
28554eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
28564eea22f0SMandy Kirkconnell 				&ifp->if_u1.if_extents[idx],
28574eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
28584eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
28594eea22f0SMandy Kirkconnell 		}
28604eea22f0SMandy Kirkconnell 	}
28610293ce3aSMandy Kirkconnell 	/* Indirection array */
28620293ce3aSMandy Kirkconnell 	else {
28630293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;
28640293ce3aSMandy Kirkconnell 		int		erp_idx = 0;
28650293ce3aSMandy Kirkconnell 		int		page_idx = idx;
28660293ce3aSMandy Kirkconnell 
28670293ce3aSMandy Kirkconnell 		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
28680293ce3aSMandy Kirkconnell 		if (ifp->if_flags & XFS_IFEXTIREC) {
28690293ce3aSMandy Kirkconnell 			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
28700293ce3aSMandy Kirkconnell 		} else {
28710293ce3aSMandy Kirkconnell 			xfs_iext_irec_init(ifp);
28720293ce3aSMandy Kirkconnell 			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
28730293ce3aSMandy Kirkconnell 			erp = ifp->if_u1.if_ext_irec;
28740293ce3aSMandy Kirkconnell 		}
28750293ce3aSMandy Kirkconnell 		/* Extents fit in target extent page */
28760293ce3aSMandy Kirkconnell 		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
28770293ce3aSMandy Kirkconnell 			if (page_idx < erp->er_extcount) {
28780293ce3aSMandy Kirkconnell 				memmove(&erp->er_extbuf[page_idx + ext_diff],
28790293ce3aSMandy Kirkconnell 					&erp->er_extbuf[page_idx],
28800293ce3aSMandy Kirkconnell 					(erp->er_extcount - page_idx) *
28810293ce3aSMandy Kirkconnell 					sizeof(xfs_bmbt_rec_t));
28820293ce3aSMandy Kirkconnell 				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
28830293ce3aSMandy Kirkconnell 			}
28840293ce3aSMandy Kirkconnell 			erp->er_extcount += ext_diff;
28850293ce3aSMandy Kirkconnell 			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
28860293ce3aSMandy Kirkconnell 		}
28870293ce3aSMandy Kirkconnell 		/* Insert a new extent page */
28880293ce3aSMandy Kirkconnell 		else if (erp) {
28890293ce3aSMandy Kirkconnell 			xfs_iext_add_indirect_multi(ifp,
28900293ce3aSMandy Kirkconnell 				erp_idx, page_idx, ext_diff);
28910293ce3aSMandy Kirkconnell 		}
28920293ce3aSMandy Kirkconnell 		/*
28930293ce3aSMandy Kirkconnell 		 * If extent(s) are being appended to the last page in
28940293ce3aSMandy Kirkconnell 		 * the indirection array and the new extent(s) don't fit
28950293ce3aSMandy Kirkconnell 		 * in the page, then erp is NULL and erp_idx is set to
28960293ce3aSMandy Kirkconnell 		 * the next index needed in the indirection array.
28970293ce3aSMandy Kirkconnell 		 */
28980293ce3aSMandy Kirkconnell 		else {
28990293ce3aSMandy Kirkconnell 			int	count = ext_diff;
29000293ce3aSMandy Kirkconnell 
29010293ce3aSMandy Kirkconnell 			while (count) {
29020293ce3aSMandy Kirkconnell 				erp = xfs_iext_irec_new(ifp, erp_idx);
29030293ce3aSMandy Kirkconnell 				erp->er_extcount = count;
29040293ce3aSMandy Kirkconnell 				count -= MIN(count, (int)XFS_LINEAR_EXTS);
29050293ce3aSMandy Kirkconnell 				if (count) {
29060293ce3aSMandy Kirkconnell 					erp_idx++;
29070293ce3aSMandy Kirkconnell 				}
29080293ce3aSMandy Kirkconnell 			}
29090293ce3aSMandy Kirkconnell 		}
29100293ce3aSMandy Kirkconnell 	}
29114eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
29124eea22f0SMandy Kirkconnell }
29134eea22f0SMandy Kirkconnell 
29144eea22f0SMandy Kirkconnell /*
29150293ce3aSMandy Kirkconnell  * This is called when incore extents are being added to the indirection
29160293ce3aSMandy Kirkconnell  * array and the new extents do not fit in the target extent list. The
29170293ce3aSMandy Kirkconnell  * erp_idx parameter contains the irec index for the target extent list
29180293ce3aSMandy Kirkconnell  * in the indirection array, and the idx parameter contains the extent
29190293ce3aSMandy Kirkconnell  * index within the list. The number of extents being added is stored
29200293ce3aSMandy Kirkconnell  * in the count parameter.
29210293ce3aSMandy Kirkconnell  *
29220293ce3aSMandy Kirkconnell  *    |-------|   |-------|
29230293ce3aSMandy Kirkconnell  *    |       |   |       |    idx - number of extents before idx
29240293ce3aSMandy Kirkconnell  *    |  idx  |   | count |
29250293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being inserted at idx
29260293ce3aSMandy Kirkconnell  *    |-------|   |-------|
29270293ce3aSMandy Kirkconnell  *    | count |   | nex2  |    nex2 - number of extents after idx + count
29280293ce3aSMandy Kirkconnell  *    |-------|   |-------|
29290293ce3aSMandy Kirkconnell  */
29300293ce3aSMandy Kirkconnell void
29310293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(
29320293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,			/* inode fork pointer */
29330293ce3aSMandy Kirkconnell 	int		erp_idx,		/* target extent irec index */
29340293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,			/* index within target list */
29350293ce3aSMandy Kirkconnell 	int		count)			/* new extents being added */
29360293ce3aSMandy Kirkconnell {
29370293ce3aSMandy Kirkconnell 	int		byte_diff;		/* new bytes being added */
29380293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
29390293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;		/* number of extents to add */
29400293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;		/* new extents still needed */
29410293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;			/* extents after idx + count */
29420293ce3aSMandy Kirkconnell 	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
29430293ce3aSMandy Kirkconnell 	int		nlists;			/* number of irec's (lists) */
29440293ce3aSMandy Kirkconnell 
29450293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
29460293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
29470293ce3aSMandy Kirkconnell 	nex2 = erp->er_extcount - idx;
29480293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
29490293ce3aSMandy Kirkconnell 
29500293ce3aSMandy Kirkconnell 	/*
29510293ce3aSMandy Kirkconnell 	 * Save second part of target extent list
29520293ce3aSMandy Kirkconnell 	 * (all extents past */
29530293ce3aSMandy Kirkconnell 	if (nex2) {
29540293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
29556785073bSDavid Chinner 		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
29560293ce3aSMandy Kirkconnell 		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
29570293ce3aSMandy Kirkconnell 		erp->er_extcount -= nex2;
29580293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
29590293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[idx], 0, byte_diff);
29600293ce3aSMandy Kirkconnell 	}
29610293ce3aSMandy Kirkconnell 
29620293ce3aSMandy Kirkconnell 	/*
29630293ce3aSMandy Kirkconnell 	 * Add the new extents to the end of the target
29640293ce3aSMandy Kirkconnell 	 * list, then allocate new irec record(s) and
29650293ce3aSMandy Kirkconnell 	 * extent buffer(s) as needed to store the rest
29660293ce3aSMandy Kirkconnell 	 * of the new extents.
29670293ce3aSMandy Kirkconnell 	 */
29680293ce3aSMandy Kirkconnell 	ext_cnt = count;
29690293ce3aSMandy Kirkconnell 	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
29700293ce3aSMandy Kirkconnell 	if (ext_diff) {
29710293ce3aSMandy Kirkconnell 		erp->er_extcount += ext_diff;
29720293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
29730293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
29740293ce3aSMandy Kirkconnell 	}
29750293ce3aSMandy Kirkconnell 	while (ext_cnt) {
29760293ce3aSMandy Kirkconnell 		erp_idx++;
29770293ce3aSMandy Kirkconnell 		erp = xfs_iext_irec_new(ifp, erp_idx);
29780293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
29790293ce3aSMandy Kirkconnell 		erp->er_extcount = ext_diff;
29800293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
29810293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
29820293ce3aSMandy Kirkconnell 	}
29830293ce3aSMandy Kirkconnell 
29840293ce3aSMandy Kirkconnell 	/* Add nex2 extents back to indirection array */
29850293ce3aSMandy Kirkconnell 	if (nex2) {
29860293ce3aSMandy Kirkconnell 		xfs_extnum_t	ext_avail;
29870293ce3aSMandy Kirkconnell 		int		i;
29880293ce3aSMandy Kirkconnell 
29890293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
29900293ce3aSMandy Kirkconnell 		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
29910293ce3aSMandy Kirkconnell 		i = 0;
29920293ce3aSMandy Kirkconnell 		/*
29930293ce3aSMandy Kirkconnell 		 * If nex2 extents fit in the current page, append
29940293ce3aSMandy Kirkconnell 		 * nex2_ep after the new extents.
29950293ce3aSMandy Kirkconnell 		 */
29960293ce3aSMandy Kirkconnell 		if (nex2 <= ext_avail) {
29970293ce3aSMandy Kirkconnell 			i = erp->er_extcount;
29980293ce3aSMandy Kirkconnell 		}
29990293ce3aSMandy Kirkconnell 		/*
30000293ce3aSMandy Kirkconnell 		 * Otherwise, check if space is available in the
30010293ce3aSMandy Kirkconnell 		 * next page.
30020293ce3aSMandy Kirkconnell 		 */
30030293ce3aSMandy Kirkconnell 		else if ((erp_idx < nlists - 1) &&
30040293ce3aSMandy Kirkconnell 			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
30050293ce3aSMandy Kirkconnell 			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
30060293ce3aSMandy Kirkconnell 			erp_idx++;
30070293ce3aSMandy Kirkconnell 			erp++;
30080293ce3aSMandy Kirkconnell 			/* Create a hole for nex2 extents */
30090293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
30100293ce3aSMandy Kirkconnell 				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
30110293ce3aSMandy Kirkconnell 		}
30120293ce3aSMandy Kirkconnell 		/*
30130293ce3aSMandy Kirkconnell 		 * Final choice, create a new extent page for
30140293ce3aSMandy Kirkconnell 		 * nex2 extents.
30150293ce3aSMandy Kirkconnell 		 */
30160293ce3aSMandy Kirkconnell 		else {
30170293ce3aSMandy Kirkconnell 			erp_idx++;
30180293ce3aSMandy Kirkconnell 			erp = xfs_iext_irec_new(ifp, erp_idx);
30190293ce3aSMandy Kirkconnell 		}
30200293ce3aSMandy Kirkconnell 		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3021f0e2d93cSDenys Vlasenko 		kmem_free(nex2_ep);
30220293ce3aSMandy Kirkconnell 		erp->er_extcount += nex2;
30230293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
30240293ce3aSMandy Kirkconnell 	}
30250293ce3aSMandy Kirkconnell }
30260293ce3aSMandy Kirkconnell 
30270293ce3aSMandy Kirkconnell /*
30284eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
30294eea22f0SMandy Kirkconnell  * extents needs to be decreased. The ext_diff parameter stores the
30304eea22f0SMandy Kirkconnell  * number of extents to be removed and the idx parameter contains
30314eea22f0SMandy Kirkconnell  * the extent index where the extents will be removed from.
30320293ce3aSMandy Kirkconnell  *
30330293ce3aSMandy Kirkconnell  * If the amount of space needed has decreased below the linear
30340293ce3aSMandy Kirkconnell  * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
30350293ce3aSMandy Kirkconnell  * extent array.  Otherwise, use kmem_realloc() to adjust the
30360293ce3aSMandy Kirkconnell  * size to what is needed.
30374eea22f0SMandy Kirkconnell  */
30384eea22f0SMandy Kirkconnell void
30394eea22f0SMandy Kirkconnell xfs_iext_remove(
30406ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
30414eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
30426ef35544SChristoph Hellwig 	int		ext_diff,	/* number of extents to remove */
30436ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
30444eea22f0SMandy Kirkconnell {
30456ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
30464eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
30474eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
30484eea22f0SMandy Kirkconnell 
30490b1b213fSChristoph Hellwig 	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
30500b1b213fSChristoph Hellwig 
30514eea22f0SMandy Kirkconnell 	ASSERT(ext_diff > 0);
30524eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
30534eea22f0SMandy Kirkconnell 	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
30544eea22f0SMandy Kirkconnell 
30554eea22f0SMandy Kirkconnell 	if (new_size == 0) {
30564eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
30570293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
30580293ce3aSMandy Kirkconnell 		xfs_iext_remove_indirect(ifp, idx, ext_diff);
30594eea22f0SMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
30604eea22f0SMandy Kirkconnell 		xfs_iext_remove_direct(ifp, idx, ext_diff);
30614eea22f0SMandy Kirkconnell 	} else {
30624eea22f0SMandy Kirkconnell 		xfs_iext_remove_inline(ifp, idx, ext_diff);
30634eea22f0SMandy Kirkconnell 	}
30644eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
30654eea22f0SMandy Kirkconnell }
30664eea22f0SMandy Kirkconnell 
30674eea22f0SMandy Kirkconnell /*
30684eea22f0SMandy Kirkconnell  * This removes ext_diff extents from the inline buffer, beginning
30694eea22f0SMandy Kirkconnell  * at extent index idx.
30704eea22f0SMandy Kirkconnell  */
30714eea22f0SMandy Kirkconnell void
30724eea22f0SMandy Kirkconnell xfs_iext_remove_inline(
30734eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
30744eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
30754eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
30764eea22f0SMandy Kirkconnell {
30774eea22f0SMandy Kirkconnell 	int		nextents;	/* number of extents in file */
30784eea22f0SMandy Kirkconnell 
30790293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
30804eea22f0SMandy Kirkconnell 	ASSERT(idx < XFS_INLINE_EXTS);
30814eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
30824eea22f0SMandy Kirkconnell 	ASSERT(((nextents - ext_diff) > 0) &&
30834eea22f0SMandy Kirkconnell 		(nextents - ext_diff) < XFS_INLINE_EXTS);
30844eea22f0SMandy Kirkconnell 
30854eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
30864eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u2.if_inline_ext[idx],
30874eea22f0SMandy Kirkconnell 			&ifp->if_u2.if_inline_ext[idx + ext_diff],
30884eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
30894eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
30904eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
30914eea22f0SMandy Kirkconnell 			0, ext_diff * sizeof(xfs_bmbt_rec_t));
30924eea22f0SMandy Kirkconnell 	} else {
30934eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[idx], 0,
30944eea22f0SMandy Kirkconnell 			ext_diff * sizeof(xfs_bmbt_rec_t));
30954eea22f0SMandy Kirkconnell 	}
30964eea22f0SMandy Kirkconnell }
30974eea22f0SMandy Kirkconnell 
30984eea22f0SMandy Kirkconnell /*
30994eea22f0SMandy Kirkconnell  * This removes ext_diff extents from a linear (direct) extent list,
31004eea22f0SMandy Kirkconnell  * beginning at extent index idx. If the extents are being removed
31014eea22f0SMandy Kirkconnell  * from the end of the list (ie. truncate) then we just need to re-
31024eea22f0SMandy Kirkconnell  * allocate the list to remove the extra space. Otherwise, if the
31034eea22f0SMandy Kirkconnell  * extents are being removed from the middle of the existing extent
31044eea22f0SMandy Kirkconnell  * entries, then we first need to move the extent records beginning
31054eea22f0SMandy Kirkconnell  * at idx + ext_diff up in the list to overwrite the records being
31064eea22f0SMandy Kirkconnell  * removed, then remove the extra space via kmem_realloc.
31074eea22f0SMandy Kirkconnell  */
31084eea22f0SMandy Kirkconnell void
31094eea22f0SMandy Kirkconnell xfs_iext_remove_direct(
31104eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
31114eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
31124eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
31134eea22f0SMandy Kirkconnell {
31144eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
31154eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
31164eea22f0SMandy Kirkconnell 
31170293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
31184eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes -
31194eea22f0SMandy Kirkconnell 		(ext_diff * sizeof(xfs_bmbt_rec_t));
31204eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
31214eea22f0SMandy Kirkconnell 
31224eea22f0SMandy Kirkconnell 	if (new_size == 0) {
31234eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
31244eea22f0SMandy Kirkconnell 		return;
31254eea22f0SMandy Kirkconnell 	}
31264eea22f0SMandy Kirkconnell 	/* Move extents up in the list (if needed) */
31274eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
31284eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u1.if_extents[idx],
31294eea22f0SMandy Kirkconnell 			&ifp->if_u1.if_extents[idx + ext_diff],
31304eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
31314eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
31324eea22f0SMandy Kirkconnell 	}
31334eea22f0SMandy Kirkconnell 	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
31344eea22f0SMandy Kirkconnell 		0, ext_diff * sizeof(xfs_bmbt_rec_t));
31354eea22f0SMandy Kirkconnell 	/*
31364eea22f0SMandy Kirkconnell 	 * Reallocate the direct extent list. If the extents
31374eea22f0SMandy Kirkconnell 	 * will fit inside the inode then xfs_iext_realloc_direct
31384eea22f0SMandy Kirkconnell 	 * will switch from direct to inline extent allocation
31394eea22f0SMandy Kirkconnell 	 * mode for us.
31404eea22f0SMandy Kirkconnell 	 */
31414eea22f0SMandy Kirkconnell 	xfs_iext_realloc_direct(ifp, new_size);
31424eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
31434eea22f0SMandy Kirkconnell }
31444eea22f0SMandy Kirkconnell 
31454eea22f0SMandy Kirkconnell /*
31460293ce3aSMandy Kirkconnell  * This is called when incore extents are being removed from the
31470293ce3aSMandy Kirkconnell  * indirection array and the extents being removed span multiple extent
31480293ce3aSMandy Kirkconnell  * buffers. The idx parameter contains the file extent index where we
31490293ce3aSMandy Kirkconnell  * want to begin removing extents, and the count parameter contains
31500293ce3aSMandy Kirkconnell  * how many extents need to be removed.
31510293ce3aSMandy Kirkconnell  *
31520293ce3aSMandy Kirkconnell  *    |-------|   |-------|
31530293ce3aSMandy Kirkconnell  *    | nex1  |   |       |    nex1 - number of extents before idx
31540293ce3aSMandy Kirkconnell  *    |-------|   | count |
31550293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being removed at idx
31560293ce3aSMandy Kirkconnell  *    | count |   |-------|
31570293ce3aSMandy Kirkconnell  *    |       |   | nex2  |    nex2 - number of extents after idx + count
31580293ce3aSMandy Kirkconnell  *    |-------|   |-------|
31590293ce3aSMandy Kirkconnell  */
31600293ce3aSMandy Kirkconnell void
31610293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(
31620293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
31630293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing extents */
31640293ce3aSMandy Kirkconnell 	int		count)		/* number of extents to remove */
31650293ce3aSMandy Kirkconnell {
31660293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
31670293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
31680293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;	/* extents left to remove */
31690293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
31700293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex1;		/* number of extents before idx */
31710293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;		/* extents after idx + count */
31720293ce3aSMandy Kirkconnell 	int		page_idx = idx;	/* index in target extent list */
31730293ce3aSMandy Kirkconnell 
31740293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
31750293ce3aSMandy Kirkconnell 	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
31760293ce3aSMandy Kirkconnell 	ASSERT(erp != NULL);
31770293ce3aSMandy Kirkconnell 	nex1 = page_idx;
31780293ce3aSMandy Kirkconnell 	ext_cnt = count;
31790293ce3aSMandy Kirkconnell 	while (ext_cnt) {
31800293ce3aSMandy Kirkconnell 		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
31810293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
31820293ce3aSMandy Kirkconnell 		/*
31830293ce3aSMandy Kirkconnell 		 * Check for deletion of entire list;
31840293ce3aSMandy Kirkconnell 		 * xfs_iext_irec_remove() updates extent offsets.
31850293ce3aSMandy Kirkconnell 		 */
31860293ce3aSMandy Kirkconnell 		if (ext_diff == erp->er_extcount) {
31870293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
31880293ce3aSMandy Kirkconnell 			ext_cnt -= ext_diff;
31890293ce3aSMandy Kirkconnell 			nex1 = 0;
31900293ce3aSMandy Kirkconnell 			if (ext_cnt) {
31910293ce3aSMandy Kirkconnell 				ASSERT(erp_idx < ifp->if_real_bytes /
31920293ce3aSMandy Kirkconnell 					XFS_IEXT_BUFSZ);
31930293ce3aSMandy Kirkconnell 				erp = &ifp->if_u1.if_ext_irec[erp_idx];
31940293ce3aSMandy Kirkconnell 				nex1 = 0;
31950293ce3aSMandy Kirkconnell 				continue;
31960293ce3aSMandy Kirkconnell 			} else {
31970293ce3aSMandy Kirkconnell 				break;
31980293ce3aSMandy Kirkconnell 			}
31990293ce3aSMandy Kirkconnell 		}
32000293ce3aSMandy Kirkconnell 		/* Move extents up (if needed) */
32010293ce3aSMandy Kirkconnell 		if (nex2) {
32020293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex1],
32030293ce3aSMandy Kirkconnell 				&erp->er_extbuf[nex1 + ext_diff],
32040293ce3aSMandy Kirkconnell 				nex2 * sizeof(xfs_bmbt_rec_t));
32050293ce3aSMandy Kirkconnell 		}
32060293ce3aSMandy Kirkconnell 		/* Zero out rest of page */
32070293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
32080293ce3aSMandy Kirkconnell 			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
32090293ce3aSMandy Kirkconnell 		/* Update remaining counters */
32100293ce3aSMandy Kirkconnell 		erp->er_extcount -= ext_diff;
32110293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
32120293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
32130293ce3aSMandy Kirkconnell 		nex1 = 0;
32140293ce3aSMandy Kirkconnell 		erp_idx++;
32150293ce3aSMandy Kirkconnell 		erp++;
32160293ce3aSMandy Kirkconnell 	}
32170293ce3aSMandy Kirkconnell 	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
32180293ce3aSMandy Kirkconnell 	xfs_iext_irec_compact(ifp);
32190293ce3aSMandy Kirkconnell }
32200293ce3aSMandy Kirkconnell 
32210293ce3aSMandy Kirkconnell /*
32224eea22f0SMandy Kirkconnell  * Create, destroy, or resize a linear (direct) block of extents.
32234eea22f0SMandy Kirkconnell  */
32244eea22f0SMandy Kirkconnell void
32254eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(
32264eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32274eea22f0SMandy Kirkconnell 	int		new_size)	/* new size of extents */
32284eea22f0SMandy Kirkconnell {
32294eea22f0SMandy Kirkconnell 	int		rnew_size;	/* real new size of extents */
32304eea22f0SMandy Kirkconnell 
32314eea22f0SMandy Kirkconnell 	rnew_size = new_size;
32324eea22f0SMandy Kirkconnell 
32330293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
32340293ce3aSMandy Kirkconnell 		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
32350293ce3aSMandy Kirkconnell 		 (new_size != ifp->if_real_bytes)));
32360293ce3aSMandy Kirkconnell 
32374eea22f0SMandy Kirkconnell 	/* Free extent records */
32384eea22f0SMandy Kirkconnell 	if (new_size == 0) {
32394eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
32404eea22f0SMandy Kirkconnell 	}
32414eea22f0SMandy Kirkconnell 	/* Resize direct extent list and zero any new bytes */
32424eea22f0SMandy Kirkconnell 	else if (ifp->if_real_bytes) {
32434eea22f0SMandy Kirkconnell 		/* Check if extents will fit inside the inode */
32444eea22f0SMandy Kirkconnell 		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
32454eea22f0SMandy Kirkconnell 			xfs_iext_direct_to_inline(ifp, new_size /
32464eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t));
32474eea22f0SMandy Kirkconnell 			ifp->if_bytes = new_size;
32484eea22f0SMandy Kirkconnell 			return;
32494eea22f0SMandy Kirkconnell 		}
325016a087d8SVignesh Babu 		if (!is_power_of_2(new_size)){
325140ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
32524eea22f0SMandy Kirkconnell 		}
32534eea22f0SMandy Kirkconnell 		if (rnew_size != ifp->if_real_bytes) {
3254a6f64d4aSChristoph Hellwig 			ifp->if_u1.if_extents =
32554eea22f0SMandy Kirkconnell 				kmem_realloc(ifp->if_u1.if_extents,
32564eea22f0SMandy Kirkconnell 						rnew_size,
32576785073bSDavid Chinner 						ifp->if_real_bytes, KM_NOFS);
32584eea22f0SMandy Kirkconnell 		}
32594eea22f0SMandy Kirkconnell 		if (rnew_size > ifp->if_real_bytes) {
32604eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
32614eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t)], 0,
32624eea22f0SMandy Kirkconnell 				rnew_size - ifp->if_real_bytes);
32634eea22f0SMandy Kirkconnell 		}
32644eea22f0SMandy Kirkconnell 	}
32654eea22f0SMandy Kirkconnell 	/*
32664eea22f0SMandy Kirkconnell 	 * Switch from the inline extent buffer to a direct
32674eea22f0SMandy Kirkconnell 	 * extent list. Be sure to include the inline extent
32684eea22f0SMandy Kirkconnell 	 * bytes in new_size.
32694eea22f0SMandy Kirkconnell 	 */
32704eea22f0SMandy Kirkconnell 	else {
32714eea22f0SMandy Kirkconnell 		new_size += ifp->if_bytes;
327216a087d8SVignesh Babu 		if (!is_power_of_2(new_size)) {
327340ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
32744eea22f0SMandy Kirkconnell 		}
32754eea22f0SMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, rnew_size);
32764eea22f0SMandy Kirkconnell 	}
32774eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = rnew_size;
32784eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
32794eea22f0SMandy Kirkconnell }
32804eea22f0SMandy Kirkconnell 
32814eea22f0SMandy Kirkconnell /*
32824eea22f0SMandy Kirkconnell  * Switch from linear (direct) extent records to inline buffer.
32834eea22f0SMandy Kirkconnell  */
32844eea22f0SMandy Kirkconnell void
32854eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(
32864eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32874eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents)	/* number of extents in file */
32884eea22f0SMandy Kirkconnell {
32894eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
32904eea22f0SMandy Kirkconnell 	ASSERT(nextents <= XFS_INLINE_EXTS);
32914eea22f0SMandy Kirkconnell 	/*
32924eea22f0SMandy Kirkconnell 	 * The inline buffer was zeroed when we switched
32934eea22f0SMandy Kirkconnell 	 * from inline to direct extent allocation mode,
32944eea22f0SMandy Kirkconnell 	 * so we don't need to clear it here.
32954eea22f0SMandy Kirkconnell 	 */
32964eea22f0SMandy Kirkconnell 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
32974eea22f0SMandy Kirkconnell 		nextents * sizeof(xfs_bmbt_rec_t));
3298f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_extents);
32994eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
33004eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
33014eea22f0SMandy Kirkconnell }
33024eea22f0SMandy Kirkconnell 
33034eea22f0SMandy Kirkconnell /*
33044eea22f0SMandy Kirkconnell  * Switch from inline buffer to linear (direct) extent records.
33054eea22f0SMandy Kirkconnell  * new_size should already be rounded up to the next power of 2
33064eea22f0SMandy Kirkconnell  * by the caller (when appropriate), so use new_size as it is.
33074eea22f0SMandy Kirkconnell  * However, since new_size may be rounded up, we can't update
33084eea22f0SMandy Kirkconnell  * if_bytes here. It is the caller's responsibility to update
33094eea22f0SMandy Kirkconnell  * if_bytes upon return.
33104eea22f0SMandy Kirkconnell  */
33114eea22f0SMandy Kirkconnell void
33124eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(
33134eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33144eea22f0SMandy Kirkconnell 	int		new_size)	/* number of extents in file */
33154eea22f0SMandy Kirkconnell {
33166785073bSDavid Chinner 	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
33174eea22f0SMandy Kirkconnell 	memset(ifp->if_u1.if_extents, 0, new_size);
33184eea22f0SMandy Kirkconnell 	if (ifp->if_bytes) {
33194eea22f0SMandy Kirkconnell 		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
33204eea22f0SMandy Kirkconnell 			ifp->if_bytes);
33214eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
33224eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
33234eea22f0SMandy Kirkconnell 	}
33244eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = new_size;
33254eea22f0SMandy Kirkconnell }
33264eea22f0SMandy Kirkconnell 
33274eea22f0SMandy Kirkconnell /*
33280293ce3aSMandy Kirkconnell  * Resize an extent indirection array to new_size bytes.
33290293ce3aSMandy Kirkconnell  */
3330d96f8f89SEric Sandeen STATIC void
33310293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(
33320293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33330293ce3aSMandy Kirkconnell 	int		new_size)	/* new indirection array size */
33340293ce3aSMandy Kirkconnell {
33350293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
33360293ce3aSMandy Kirkconnell 	int		size;		/* current indirection array size */
33370293ce3aSMandy Kirkconnell 
33380293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
33390293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
33400293ce3aSMandy Kirkconnell 	size = nlists * sizeof(xfs_ext_irec_t);
33410293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes);
33420293ce3aSMandy Kirkconnell 	ASSERT((new_size >= 0) && (new_size != size));
33430293ce3aSMandy Kirkconnell 	if (new_size == 0) {
33440293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
33450293ce3aSMandy Kirkconnell 	} else {
33460293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
33470293ce3aSMandy Kirkconnell 			kmem_realloc(ifp->if_u1.if_ext_irec,
33486785073bSDavid Chinner 				new_size, size, KM_NOFS);
33490293ce3aSMandy Kirkconnell 	}
33500293ce3aSMandy Kirkconnell }
33510293ce3aSMandy Kirkconnell 
33520293ce3aSMandy Kirkconnell /*
33530293ce3aSMandy Kirkconnell  * Switch from indirection array to linear (direct) extent allocations.
33540293ce3aSMandy Kirkconnell  */
3355d96f8f89SEric Sandeen STATIC void
33560293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(
33570293ce3aSMandy Kirkconnell 	 xfs_ifork_t	*ifp)		/* inode fork pointer */
33580293ce3aSMandy Kirkconnell {
3359a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
33600293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
33610293ce3aSMandy Kirkconnell 	int		size;		/* size of file extents */
33620293ce3aSMandy Kirkconnell 
33630293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
33640293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
33650293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
33660293ce3aSMandy Kirkconnell 	size = nextents * sizeof(xfs_bmbt_rec_t);
33670293ce3aSMandy Kirkconnell 
336871a8c87fSLachlan McIlroy 	xfs_iext_irec_compact_pages(ifp);
33690293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
33700293ce3aSMandy Kirkconnell 
33710293ce3aSMandy Kirkconnell 	ep = ifp->if_u1.if_ext_irec->er_extbuf;
3372f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_ext_irec);
33730293ce3aSMandy Kirkconnell 	ifp->if_flags &= ~XFS_IFEXTIREC;
33740293ce3aSMandy Kirkconnell 	ifp->if_u1.if_extents = ep;
33750293ce3aSMandy Kirkconnell 	ifp->if_bytes = size;
33760293ce3aSMandy Kirkconnell 	if (nextents < XFS_LINEAR_EXTS) {
33770293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, size);
33780293ce3aSMandy Kirkconnell 	}
33790293ce3aSMandy Kirkconnell }
33800293ce3aSMandy Kirkconnell 
33810293ce3aSMandy Kirkconnell /*
33824eea22f0SMandy Kirkconnell  * Free incore file extents.
33834eea22f0SMandy Kirkconnell  */
33844eea22f0SMandy Kirkconnell void
33854eea22f0SMandy Kirkconnell xfs_iext_destroy(
33864eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
33874eea22f0SMandy Kirkconnell {
33880293ce3aSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
33890293ce3aSMandy Kirkconnell 		int	erp_idx;
33900293ce3aSMandy Kirkconnell 		int	nlists;
33910293ce3aSMandy Kirkconnell 
33920293ce3aSMandy Kirkconnell 		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
33930293ce3aSMandy Kirkconnell 		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
33940293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
33950293ce3aSMandy Kirkconnell 		}
33960293ce3aSMandy Kirkconnell 		ifp->if_flags &= ~XFS_IFEXTIREC;
33970293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
3398f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_extents);
33994eea22f0SMandy Kirkconnell 	} else if (ifp->if_bytes) {
34004eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
34014eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
34024eea22f0SMandy Kirkconnell 	}
34034eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = NULL;
34044eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
34054eea22f0SMandy Kirkconnell 	ifp->if_bytes = 0;
34064eea22f0SMandy Kirkconnell }
34070293ce3aSMandy Kirkconnell 
34080293ce3aSMandy Kirkconnell /*
34098867bc9bSMandy Kirkconnell  * Return a pointer to the extent record for file system block bno.
34108867bc9bSMandy Kirkconnell  */
3411a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *			/* pointer to found extent record */
34128867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext(
34138867bc9bSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34148867bc9bSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
34158867bc9bSMandy Kirkconnell 	xfs_extnum_t	*idxp)		/* index of target extent */
34168867bc9bSMandy Kirkconnell {
3417a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
34188867bc9bSMandy Kirkconnell 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
3419a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
34208867bc9bSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
3421c41564b5SNathan Scott 	int		high;		/* upper boundary in search */
34228867bc9bSMandy Kirkconnell 	xfs_extnum_t	idx = 0;	/* index of target extent */
3423c41564b5SNathan Scott 	int		low;		/* lower boundary in search */
34248867bc9bSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of file extents */
34258867bc9bSMandy Kirkconnell 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
34268867bc9bSMandy Kirkconnell 
34278867bc9bSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
34288867bc9bSMandy Kirkconnell 	if (nextents == 0) {
34298867bc9bSMandy Kirkconnell 		*idxp = 0;
34308867bc9bSMandy Kirkconnell 		return NULL;
34318867bc9bSMandy Kirkconnell 	}
34328867bc9bSMandy Kirkconnell 	low = 0;
34338867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
34348867bc9bSMandy Kirkconnell 		/* Find target extent list */
34358867bc9bSMandy Kirkconnell 		int	erp_idx = 0;
34368867bc9bSMandy Kirkconnell 		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
34378867bc9bSMandy Kirkconnell 		base = erp->er_extbuf;
34388867bc9bSMandy Kirkconnell 		high = erp->er_extcount - 1;
34398867bc9bSMandy Kirkconnell 	} else {
34408867bc9bSMandy Kirkconnell 		base = ifp->if_u1.if_extents;
34418867bc9bSMandy Kirkconnell 		high = nextents - 1;
34428867bc9bSMandy Kirkconnell 	}
34438867bc9bSMandy Kirkconnell 	/* Binary search extent records */
34448867bc9bSMandy Kirkconnell 	while (low <= high) {
34458867bc9bSMandy Kirkconnell 		idx = (low + high) >> 1;
34468867bc9bSMandy Kirkconnell 		ep = base + idx;
34478867bc9bSMandy Kirkconnell 		startoff = xfs_bmbt_get_startoff(ep);
34488867bc9bSMandy Kirkconnell 		blockcount = xfs_bmbt_get_blockcount(ep);
34498867bc9bSMandy Kirkconnell 		if (bno < startoff) {
34508867bc9bSMandy Kirkconnell 			high = idx - 1;
34518867bc9bSMandy Kirkconnell 		} else if (bno >= startoff + blockcount) {
34528867bc9bSMandy Kirkconnell 			low = idx + 1;
34538867bc9bSMandy Kirkconnell 		} else {
34548867bc9bSMandy Kirkconnell 			/* Convert back to file-based extent index */
34558867bc9bSMandy Kirkconnell 			if (ifp->if_flags & XFS_IFEXTIREC) {
34568867bc9bSMandy Kirkconnell 				idx += erp->er_extoff;
34578867bc9bSMandy Kirkconnell 			}
34588867bc9bSMandy Kirkconnell 			*idxp = idx;
34598867bc9bSMandy Kirkconnell 			return ep;
34608867bc9bSMandy Kirkconnell 		}
34618867bc9bSMandy Kirkconnell 	}
34628867bc9bSMandy Kirkconnell 	/* Convert back to file-based extent index */
34638867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
34648867bc9bSMandy Kirkconnell 		idx += erp->er_extoff;
34658867bc9bSMandy Kirkconnell 	}
34668867bc9bSMandy Kirkconnell 	if (bno >= startoff + blockcount) {
34678867bc9bSMandy Kirkconnell 		if (++idx == nextents) {
34688867bc9bSMandy Kirkconnell 			ep = NULL;
34698867bc9bSMandy Kirkconnell 		} else {
34708867bc9bSMandy Kirkconnell 			ep = xfs_iext_get_ext(ifp, idx);
34718867bc9bSMandy Kirkconnell 		}
34728867bc9bSMandy Kirkconnell 	}
34738867bc9bSMandy Kirkconnell 	*idxp = idx;
34748867bc9bSMandy Kirkconnell 	return ep;
34758867bc9bSMandy Kirkconnell }
34768867bc9bSMandy Kirkconnell 
34778867bc9bSMandy Kirkconnell /*
34780293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
34790293ce3aSMandy Kirkconnell  * extent record for filesystem block bno. Store the index of the
34800293ce3aSMandy Kirkconnell  * target irec in *erp_idxp.
34810293ce3aSMandy Kirkconnell  */
34828867bc9bSMandy Kirkconnell xfs_ext_irec_t *			/* pointer to found extent record */
34830293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec(
34840293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34850293ce3aSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
34860293ce3aSMandy Kirkconnell 	int		*erp_idxp)	/* irec index of target ext list */
34870293ce3aSMandy Kirkconnell {
34880293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
34890293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
34908867bc9bSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
34910293ce3aSMandy Kirkconnell 	int		nlists;		/* number of extent irec's (lists) */
34920293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
34930293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
34940293ce3aSMandy Kirkconnell 
34950293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34960293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
34970293ce3aSMandy Kirkconnell 	erp_idx = 0;
34980293ce3aSMandy Kirkconnell 	low = 0;
34990293ce3aSMandy Kirkconnell 	high = nlists - 1;
35000293ce3aSMandy Kirkconnell 	while (low <= high) {
35010293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
35020293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
35030293ce3aSMandy Kirkconnell 		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
35040293ce3aSMandy Kirkconnell 		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
35050293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
35060293ce3aSMandy Kirkconnell 		} else if (erp_next && bno >=
35070293ce3aSMandy Kirkconnell 			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
35080293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
35090293ce3aSMandy Kirkconnell 		} else {
35100293ce3aSMandy Kirkconnell 			break;
35110293ce3aSMandy Kirkconnell 		}
35120293ce3aSMandy Kirkconnell 	}
35130293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
35140293ce3aSMandy Kirkconnell 	return erp;
35150293ce3aSMandy Kirkconnell }
35160293ce3aSMandy Kirkconnell 
35170293ce3aSMandy Kirkconnell /*
35180293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
35190293ce3aSMandy Kirkconnell  * extent record at file extent index *idxp. Store the index of the
35200293ce3aSMandy Kirkconnell  * target irec in *erp_idxp and store the page index of the target
35210293ce3aSMandy Kirkconnell  * extent record in *idxp.
35220293ce3aSMandy Kirkconnell  */
35230293ce3aSMandy Kirkconnell xfs_ext_irec_t *
35240293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec(
35250293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35260293ce3aSMandy Kirkconnell 	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
35270293ce3aSMandy Kirkconnell 	int		*erp_idxp,	/* pointer to target irec */
35280293ce3aSMandy Kirkconnell 	int		realloc)	/* new bytes were just added */
35290293ce3aSMandy Kirkconnell {
35300293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
35310293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
35320293ce3aSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
35330293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
35340293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
35350293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
35360293ce3aSMandy Kirkconnell 	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
35370293ce3aSMandy Kirkconnell 
35380293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
353987bef181SChristoph Hellwig 	ASSERT(page_idx >= 0);
354087bef181SChristoph Hellwig 	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
354187bef181SChristoph Hellwig 	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
354287bef181SChristoph Hellwig 
35430293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
35440293ce3aSMandy Kirkconnell 	erp_idx = 0;
35450293ce3aSMandy Kirkconnell 	low = 0;
35460293ce3aSMandy Kirkconnell 	high = nlists - 1;
35470293ce3aSMandy Kirkconnell 
35480293ce3aSMandy Kirkconnell 	/* Binary search extent irec's */
35490293ce3aSMandy Kirkconnell 	while (low <= high) {
35500293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
35510293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
35520293ce3aSMandy Kirkconnell 		prev = erp_idx > 0 ? erp - 1 : NULL;
35530293ce3aSMandy Kirkconnell 		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
35540293ce3aSMandy Kirkconnell 		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
35550293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
35560293ce3aSMandy Kirkconnell 		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
35570293ce3aSMandy Kirkconnell 			   (page_idx == erp->er_extoff + erp->er_extcount &&
35580293ce3aSMandy Kirkconnell 			    !realloc)) {
35590293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
35600293ce3aSMandy Kirkconnell 		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
35610293ce3aSMandy Kirkconnell 			   erp->er_extcount == XFS_LINEAR_EXTS) {
35620293ce3aSMandy Kirkconnell 			ASSERT(realloc);
35630293ce3aSMandy Kirkconnell 			page_idx = 0;
35640293ce3aSMandy Kirkconnell 			erp_idx++;
35650293ce3aSMandy Kirkconnell 			erp = erp_idx < nlists ? erp + 1 : NULL;
35660293ce3aSMandy Kirkconnell 			break;
35670293ce3aSMandy Kirkconnell 		} else {
35680293ce3aSMandy Kirkconnell 			page_idx -= erp->er_extoff;
35690293ce3aSMandy Kirkconnell 			break;
35700293ce3aSMandy Kirkconnell 		}
35710293ce3aSMandy Kirkconnell 	}
35720293ce3aSMandy Kirkconnell 	*idxp = page_idx;
35730293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
35740293ce3aSMandy Kirkconnell 	return(erp);
35750293ce3aSMandy Kirkconnell }
35760293ce3aSMandy Kirkconnell 
35770293ce3aSMandy Kirkconnell /*
35780293ce3aSMandy Kirkconnell  * Allocate and initialize an indirection array once the space needed
35790293ce3aSMandy Kirkconnell  * for incore extents increases above XFS_IEXT_BUFSZ.
35800293ce3aSMandy Kirkconnell  */
35810293ce3aSMandy Kirkconnell void
35820293ce3aSMandy Kirkconnell xfs_iext_irec_init(
35830293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
35840293ce3aSMandy Kirkconnell {
35850293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
35860293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
35870293ce3aSMandy Kirkconnell 
35880293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
35890293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
35900293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
35910293ce3aSMandy Kirkconnell 
35926785073bSDavid Chinner 	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
35930293ce3aSMandy Kirkconnell 
35940293ce3aSMandy Kirkconnell 	if (nextents == 0) {
35956785073bSDavid Chinner 		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
35960293ce3aSMandy Kirkconnell 	} else if (!ifp->if_real_bytes) {
35970293ce3aSMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
35980293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
35990293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
36000293ce3aSMandy Kirkconnell 	}
36010293ce3aSMandy Kirkconnell 	erp->er_extbuf = ifp->if_u1.if_extents;
36020293ce3aSMandy Kirkconnell 	erp->er_extcount = nextents;
36030293ce3aSMandy Kirkconnell 	erp->er_extoff = 0;
36040293ce3aSMandy Kirkconnell 
36050293ce3aSMandy Kirkconnell 	ifp->if_flags |= XFS_IFEXTIREC;
36060293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
36070293ce3aSMandy Kirkconnell 	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
36080293ce3aSMandy Kirkconnell 	ifp->if_u1.if_ext_irec = erp;
36090293ce3aSMandy Kirkconnell 
36100293ce3aSMandy Kirkconnell 	return;
36110293ce3aSMandy Kirkconnell }
36120293ce3aSMandy Kirkconnell 
36130293ce3aSMandy Kirkconnell /*
36140293ce3aSMandy Kirkconnell  * Allocate and initialize a new entry in the indirection array.
36150293ce3aSMandy Kirkconnell  */
36160293ce3aSMandy Kirkconnell xfs_ext_irec_t *
36170293ce3aSMandy Kirkconnell xfs_iext_irec_new(
36180293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36190293ce3aSMandy Kirkconnell 	int		erp_idx)	/* index for new irec */
36200293ce3aSMandy Kirkconnell {
36210293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
36220293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
36230293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
36240293ce3aSMandy Kirkconnell 
36250293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
36260293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36270293ce3aSMandy Kirkconnell 
36280293ce3aSMandy Kirkconnell 	/* Resize indirection array */
36290293ce3aSMandy Kirkconnell 	xfs_iext_realloc_indirect(ifp, ++nlists *
36300293ce3aSMandy Kirkconnell 				  sizeof(xfs_ext_irec_t));
36310293ce3aSMandy Kirkconnell 	/*
36320293ce3aSMandy Kirkconnell 	 * Move records down in the array so the
36330293ce3aSMandy Kirkconnell 	 * new page can use erp_idx.
36340293ce3aSMandy Kirkconnell 	 */
36350293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
36360293ce3aSMandy Kirkconnell 	for (i = nlists - 1; i > erp_idx; i--) {
36370293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
36380293ce3aSMandy Kirkconnell 	}
36390293ce3aSMandy Kirkconnell 	ASSERT(i == erp_idx);
36400293ce3aSMandy Kirkconnell 
36410293ce3aSMandy Kirkconnell 	/* Initialize new extent record */
36420293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
36436785073bSDavid Chinner 	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
36440293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
36450293ce3aSMandy Kirkconnell 	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
36460293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extcount = 0;
36470293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extoff = erp_idx > 0 ?
36480293ce3aSMandy Kirkconnell 		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
36490293ce3aSMandy Kirkconnell 	return (&erp[erp_idx]);
36500293ce3aSMandy Kirkconnell }
36510293ce3aSMandy Kirkconnell 
36520293ce3aSMandy Kirkconnell /*
36530293ce3aSMandy Kirkconnell  * Remove a record from the indirection array.
36540293ce3aSMandy Kirkconnell  */
36550293ce3aSMandy Kirkconnell void
36560293ce3aSMandy Kirkconnell xfs_iext_irec_remove(
36570293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36580293ce3aSMandy Kirkconnell 	int		erp_idx)	/* irec index to remove */
36590293ce3aSMandy Kirkconnell {
36600293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
36610293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
36620293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
36630293ce3aSMandy Kirkconnell 
36640293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
36650293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36660293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
36670293ce3aSMandy Kirkconnell 	if (erp->er_extbuf) {
36680293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
36690293ce3aSMandy Kirkconnell 			-erp->er_extcount);
3670f0e2d93cSDenys Vlasenko 		kmem_free(erp->er_extbuf);
36710293ce3aSMandy Kirkconnell 	}
36720293ce3aSMandy Kirkconnell 	/* Compact extent records */
36730293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
36740293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists - 1; i++) {
36750293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
36760293ce3aSMandy Kirkconnell 	}
36770293ce3aSMandy Kirkconnell 	/*
36780293ce3aSMandy Kirkconnell 	 * Manually free the last extent record from the indirection
36790293ce3aSMandy Kirkconnell 	 * array.  A call to xfs_iext_realloc_indirect() with a size
36800293ce3aSMandy Kirkconnell 	 * of zero would result in a call to xfs_iext_destroy() which
36810293ce3aSMandy Kirkconnell 	 * would in turn call this function again, creating a nasty
36820293ce3aSMandy Kirkconnell 	 * infinite loop.
36830293ce3aSMandy Kirkconnell 	 */
36840293ce3aSMandy Kirkconnell 	if (--nlists) {
36850293ce3aSMandy Kirkconnell 		xfs_iext_realloc_indirect(ifp,
36860293ce3aSMandy Kirkconnell 			nlists * sizeof(xfs_ext_irec_t));
36870293ce3aSMandy Kirkconnell 	} else {
3688f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_ext_irec);
36890293ce3aSMandy Kirkconnell 	}
36900293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
36910293ce3aSMandy Kirkconnell }
36920293ce3aSMandy Kirkconnell 
36930293ce3aSMandy Kirkconnell /*
36940293ce3aSMandy Kirkconnell  * This is called to clean up large amounts of unused memory allocated
36950293ce3aSMandy Kirkconnell  * by the indirection array.  Before compacting anything though, verify
36960293ce3aSMandy Kirkconnell  * that the indirection array is still needed and switch back to the
36970293ce3aSMandy Kirkconnell  * linear extent list (or even the inline buffer) if possible.  The
36980293ce3aSMandy Kirkconnell  * compaction policy is as follows:
36990293ce3aSMandy Kirkconnell  *
37000293ce3aSMandy Kirkconnell  *    Full Compaction: Extents fit into a single page (or inline buffer)
370171a8c87fSLachlan McIlroy  * Partial Compaction: Extents occupy less than 50% of allocated space
37020293ce3aSMandy Kirkconnell  *      No Compaction: Extents occupy at least 50% of allocated space
37030293ce3aSMandy Kirkconnell  */
37040293ce3aSMandy Kirkconnell void
37050293ce3aSMandy Kirkconnell xfs_iext_irec_compact(
37060293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
37070293ce3aSMandy Kirkconnell {
37080293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
37090293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37100293ce3aSMandy Kirkconnell 
37110293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37120293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37130293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
37140293ce3aSMandy Kirkconnell 
37150293ce3aSMandy Kirkconnell 	if (nextents == 0) {
37160293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
37170293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_INLINE_EXTS) {
37180293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
37190293ce3aSMandy Kirkconnell 		xfs_iext_direct_to_inline(ifp, nextents);
37200293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_LINEAR_EXTS) {
37210293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
37220293ce3aSMandy Kirkconnell 	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
37230293ce3aSMandy Kirkconnell 		xfs_iext_irec_compact_pages(ifp);
37240293ce3aSMandy Kirkconnell 	}
37250293ce3aSMandy Kirkconnell }
37260293ce3aSMandy Kirkconnell 
37270293ce3aSMandy Kirkconnell /*
37280293ce3aSMandy Kirkconnell  * Combine extents from neighboring extent pages.
37290293ce3aSMandy Kirkconnell  */
37300293ce3aSMandy Kirkconnell void
37310293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(
37320293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
37330293ce3aSMandy Kirkconnell {
37340293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
37350293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
37360293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37370293ce3aSMandy Kirkconnell 
37380293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37390293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37400293ce3aSMandy Kirkconnell 	while (erp_idx < nlists - 1) {
37410293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
37420293ce3aSMandy Kirkconnell 		erp_next = erp + 1;
37430293ce3aSMandy Kirkconnell 		if (erp_next->er_extcount <=
37440293ce3aSMandy Kirkconnell 		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
374571a8c87fSLachlan McIlroy 			memcpy(&erp->er_extbuf[erp->er_extcount],
37460293ce3aSMandy Kirkconnell 				erp_next->er_extbuf, erp_next->er_extcount *
37470293ce3aSMandy Kirkconnell 				sizeof(xfs_bmbt_rec_t));
37480293ce3aSMandy Kirkconnell 			erp->er_extcount += erp_next->er_extcount;
37490293ce3aSMandy Kirkconnell 			/*
37500293ce3aSMandy Kirkconnell 			 * Free page before removing extent record
37510293ce3aSMandy Kirkconnell 			 * so er_extoffs don't get modified in
37520293ce3aSMandy Kirkconnell 			 * xfs_iext_irec_remove.
37530293ce3aSMandy Kirkconnell 			 */
3754f0e2d93cSDenys Vlasenko 			kmem_free(erp_next->er_extbuf);
37550293ce3aSMandy Kirkconnell 			erp_next->er_extbuf = NULL;
37560293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx + 1);
37570293ce3aSMandy Kirkconnell 			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37580293ce3aSMandy Kirkconnell 		} else {
37590293ce3aSMandy Kirkconnell 			erp_idx++;
37600293ce3aSMandy Kirkconnell 		}
37610293ce3aSMandy Kirkconnell 	}
37620293ce3aSMandy Kirkconnell }
37630293ce3aSMandy Kirkconnell 
37640293ce3aSMandy Kirkconnell /*
37650293ce3aSMandy Kirkconnell  * This is called to update the er_extoff field in the indirection
37660293ce3aSMandy Kirkconnell  * array when extents have been added or removed from one of the
37670293ce3aSMandy Kirkconnell  * extent lists. erp_idx contains the irec index to begin updating
37680293ce3aSMandy Kirkconnell  * at and ext_diff contains the number of extents that were added
37690293ce3aSMandy Kirkconnell  * or removed.
37700293ce3aSMandy Kirkconnell  */
37710293ce3aSMandy Kirkconnell void
37720293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(
37730293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37740293ce3aSMandy Kirkconnell 	int		erp_idx,	/* irec index to update */
37750293ce3aSMandy Kirkconnell 	int		ext_diff)	/* number of new extents */
37760293ce3aSMandy Kirkconnell {
37770293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
37780293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists */
37790293ce3aSMandy Kirkconnell 
37800293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37810293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37820293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists; i++) {
37830293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
37840293ce3aSMandy Kirkconnell 	}
37850293ce3aSMandy Kirkconnell }
3786