xref: /openbmc/linux/fs/xfs/xfs_inode.c (revision 69ef921b55cc3788d1d2a27b33b27d04acd0090a)
11da177e4SLinus Torvalds /*
23e57ecf6SOlaf Weber  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
37b718769SNathan Scott  * All Rights Reserved.
41da177e4SLinus Torvalds  *
57b718769SNathan Scott  * This program is free software; you can redistribute it and/or
67b718769SNathan Scott  * modify it under the terms of the GNU General Public License as
71da177e4SLinus Torvalds  * published by the Free Software Foundation.
81da177e4SLinus Torvalds  *
97b718769SNathan Scott  * This program is distributed in the hope that it would be useful,
107b718769SNathan Scott  * but WITHOUT ANY WARRANTY; without even the implied warranty of
117b718769SNathan Scott  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
127b718769SNathan Scott  * GNU General Public License for more details.
131da177e4SLinus Torvalds  *
147b718769SNathan Scott  * You should have received a copy of the GNU General Public License
157b718769SNathan Scott  * along with this program; if not, write the Free Software Foundation,
167b718769SNathan Scott  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
171da177e4SLinus Torvalds  */
1840ebd81dSRobert P. J. Day #include <linux/log2.h>
1940ebd81dSRobert P. J. Day 
201da177e4SLinus Torvalds #include "xfs.h"
21a844f451SNathan Scott #include "xfs_fs.h"
221da177e4SLinus Torvalds #include "xfs_types.h"
23a844f451SNathan Scott #include "xfs_bit.h"
241da177e4SLinus Torvalds #include "xfs_log.h"
25a844f451SNathan Scott #include "xfs_inum.h"
261da177e4SLinus Torvalds #include "xfs_trans.h"
271da177e4SLinus Torvalds #include "xfs_trans_priv.h"
281da177e4SLinus Torvalds #include "xfs_sb.h"
291da177e4SLinus Torvalds #include "xfs_ag.h"
301da177e4SLinus Torvalds #include "xfs_mount.h"
311da177e4SLinus Torvalds #include "xfs_bmap_btree.h"
32a844f451SNathan Scott #include "xfs_alloc_btree.h"
331da177e4SLinus Torvalds #include "xfs_ialloc_btree.h"
34a844f451SNathan Scott #include "xfs_attr_sf.h"
351da177e4SLinus Torvalds #include "xfs_dinode.h"
361da177e4SLinus Torvalds #include "xfs_inode.h"
371da177e4SLinus Torvalds #include "xfs_buf_item.h"
38a844f451SNathan Scott #include "xfs_inode_item.h"
39a844f451SNathan Scott #include "xfs_btree.h"
408c4ed633SChristoph Hellwig #include "xfs_btree_trace.h"
41a844f451SNathan Scott #include "xfs_alloc.h"
42a844f451SNathan Scott #include "xfs_ialloc.h"
43a844f451SNathan Scott #include "xfs_bmap.h"
441da177e4SLinus Torvalds #include "xfs_error.h"
451da177e4SLinus Torvalds #include "xfs_utils.h"
461da177e4SLinus Torvalds #include "xfs_quota.h"
472a82b8beSDavid Chinner #include "xfs_filestream.h"
48739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h"
490b1b213fSChristoph Hellwig #include "xfs_trace.h"
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone;
521da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone;
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds /*
558f04c47aSChristoph Hellwig  * Used in xfs_itruncate_extents().  This is the maximum number of extents
561da177e4SLinus Torvalds  * freed from a file in a single transaction.
571da177e4SLinus Torvalds  */
581da177e4SLinus Torvalds #define	XFS_ITRUNC_MAX_EXTENTS	2
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
611da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
621da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
631da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds #ifdef DEBUG
661da177e4SLinus Torvalds /*
671da177e4SLinus Torvalds  * Make sure that the extents in the given memory buffer
681da177e4SLinus Torvalds  * are valid.
691da177e4SLinus Torvalds  */
701da177e4SLinus Torvalds STATIC void
711da177e4SLinus Torvalds xfs_validate_extents(
724eea22f0SMandy Kirkconnell 	xfs_ifork_t		*ifp,
731da177e4SLinus Torvalds 	int			nrecs,
741da177e4SLinus Torvalds 	xfs_exntfmt_t		fmt)
751da177e4SLinus Torvalds {
761da177e4SLinus Torvalds 	xfs_bmbt_irec_t		irec;
77a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t	rec;
781da177e4SLinus Torvalds 	int			i;
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
81a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
82a6f64d4aSChristoph Hellwig 		rec.l0 = get_unaligned(&ep->l0);
83a6f64d4aSChristoph Hellwig 		rec.l1 = get_unaligned(&ep->l1);
841da177e4SLinus Torvalds 		xfs_bmbt_get_all(&rec, &irec);
851da177e4SLinus Torvalds 		if (fmt == XFS_EXTFMT_NOSTATE)
861da177e4SLinus Torvalds 			ASSERT(irec.br_state == XFS_EXT_NORM);
871da177e4SLinus Torvalds 	}
881da177e4SLinus Torvalds }
891da177e4SLinus Torvalds #else /* DEBUG */
90a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt)
911da177e4SLinus Torvalds #endif /* DEBUG */
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds /*
941da177e4SLinus Torvalds  * Check that none of the inode's in the buffer have a next
951da177e4SLinus Torvalds  * unlinked field of 0.
961da177e4SLinus Torvalds  */
971da177e4SLinus Torvalds #if defined(DEBUG)
981da177e4SLinus Torvalds void
991da177e4SLinus Torvalds xfs_inobp_check(
1001da177e4SLinus Torvalds 	xfs_mount_t	*mp,
1011da177e4SLinus Torvalds 	xfs_buf_t	*bp)
1021da177e4SLinus Torvalds {
1031da177e4SLinus Torvalds 	int		i;
1041da177e4SLinus Torvalds 	int		j;
1051da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
1061da177e4SLinus Torvalds 
1071da177e4SLinus Torvalds 	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds 	for (i = 0; i < j; i++) {
1101da177e4SLinus Torvalds 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1111da177e4SLinus Torvalds 					i * mp->m_sb.sb_inodesize);
1121da177e4SLinus Torvalds 		if (!dip->di_next_unlinked)  {
11353487786SDave Chinner 			xfs_alert(mp,
11453487786SDave Chinner 	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
1151da177e4SLinus Torvalds 				bp);
1161da177e4SLinus Torvalds 			ASSERT(dip->di_next_unlinked);
1171da177e4SLinus Torvalds 		}
1181da177e4SLinus Torvalds 	}
1191da177e4SLinus Torvalds }
1201da177e4SLinus Torvalds #endif
1211da177e4SLinus Torvalds 
1221da177e4SLinus Torvalds /*
1234ae29b43SDavid Chinner  * Find the buffer associated with the given inode map
1244ae29b43SDavid Chinner  * We do basic validation checks on the buffer once it has been
1254ae29b43SDavid Chinner  * retrieved from disk.
1264ae29b43SDavid Chinner  */
1274ae29b43SDavid Chinner STATIC int
1284ae29b43SDavid Chinner xfs_imap_to_bp(
1294ae29b43SDavid Chinner 	xfs_mount_t	*mp,
1304ae29b43SDavid Chinner 	xfs_trans_t	*tp,
13192bfc6e7SChristoph Hellwig 	struct xfs_imap	*imap,
1324ae29b43SDavid Chinner 	xfs_buf_t	**bpp,
1334ae29b43SDavid Chinner 	uint		buf_flags,
134b48d8d64SChristoph Hellwig 	uint		iget_flags)
1354ae29b43SDavid Chinner {
1364ae29b43SDavid Chinner 	int		error;
1374ae29b43SDavid Chinner 	int		i;
1384ae29b43SDavid Chinner 	int		ni;
1394ae29b43SDavid Chinner 	xfs_buf_t	*bp;
1404ae29b43SDavid Chinner 
1414ae29b43SDavid Chinner 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
142a3f74ffbSDavid Chinner 				   (int)imap->im_len, buf_flags, &bp);
1434ae29b43SDavid Chinner 	if (error) {
144a3f74ffbSDavid Chinner 		if (error != EAGAIN) {
1450b932cccSDave Chinner 			xfs_warn(mp,
1460b932cccSDave Chinner 				"%s: xfs_trans_read_buf() returned error %d.",
1470b932cccSDave Chinner 				__func__, error);
148a3f74ffbSDavid Chinner 		} else {
1490cadda1cSChristoph Hellwig 			ASSERT(buf_flags & XBF_TRYLOCK);
150a3f74ffbSDavid Chinner 		}
1514ae29b43SDavid Chinner 		return error;
1524ae29b43SDavid Chinner 	}
1534ae29b43SDavid Chinner 
1544ae29b43SDavid Chinner 	/*
1554ae29b43SDavid Chinner 	 * Validate the magic number and version of every inode in the buffer
1564ae29b43SDavid Chinner 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
1574ae29b43SDavid Chinner 	 */
1584ae29b43SDavid Chinner #ifdef DEBUG
1594ae29b43SDavid Chinner 	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
1604ae29b43SDavid Chinner #else	/* usual case */
1614ae29b43SDavid Chinner 	ni = 1;
1624ae29b43SDavid Chinner #endif
1634ae29b43SDavid Chinner 
1644ae29b43SDavid Chinner 	for (i = 0; i < ni; i++) {
1654ae29b43SDavid Chinner 		int		di_ok;
1664ae29b43SDavid Chinner 		xfs_dinode_t	*dip;
1674ae29b43SDavid Chinner 
1684ae29b43SDavid Chinner 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1694ae29b43SDavid Chinner 					(i << mp->m_sb.sb_inodelog));
170*69ef921bSChristoph Hellwig 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
17181591fe2SChristoph Hellwig 			    XFS_DINODE_GOOD_VERSION(dip->di_version);
1724ae29b43SDavid Chinner 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
1734ae29b43SDavid Chinner 						XFS_ERRTAG_ITOBP_INOTOBP,
1744ae29b43SDavid Chinner 						XFS_RANDOM_ITOBP_INOTOBP))) {
1751920779eSDave Chinner 			if (iget_flags & XFS_IGET_UNTRUSTED) {
1764ae29b43SDavid Chinner 				xfs_trans_brelse(tp, bp);
1774ae29b43SDavid Chinner 				return XFS_ERROR(EINVAL);
1784ae29b43SDavid Chinner 			}
1794ae29b43SDavid Chinner 			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
1804ae29b43SDavid Chinner 						XFS_ERRLEVEL_HIGH, mp, dip);
1814ae29b43SDavid Chinner #ifdef DEBUG
1820b932cccSDave Chinner 			xfs_emerg(mp,
1830b932cccSDave Chinner 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
1844ae29b43SDavid Chinner 				(unsigned long long)imap->im_blkno, i,
18581591fe2SChristoph Hellwig 				be16_to_cpu(dip->di_magic));
1860b932cccSDave Chinner 			ASSERT(0);
1874ae29b43SDavid Chinner #endif
1884ae29b43SDavid Chinner 			xfs_trans_brelse(tp, bp);
1894ae29b43SDavid Chinner 			return XFS_ERROR(EFSCORRUPTED);
1904ae29b43SDavid Chinner 		}
1914ae29b43SDavid Chinner 	}
1924ae29b43SDavid Chinner 
1934ae29b43SDavid Chinner 	xfs_inobp_check(mp, bp);
1944ae29b43SDavid Chinner 
1954ae29b43SDavid Chinner 	/*
1964ae29b43SDavid Chinner 	 * Mark the buffer as an inode buffer now that it looks good
1974ae29b43SDavid Chinner 	 */
1984ae29b43SDavid Chinner 	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
1994ae29b43SDavid Chinner 
2004ae29b43SDavid Chinner 	*bpp = bp;
2014ae29b43SDavid Chinner 	return 0;
2024ae29b43SDavid Chinner }
2034ae29b43SDavid Chinner 
2044ae29b43SDavid Chinner /*
2051da177e4SLinus Torvalds  * This routine is called to map an inode number within a file
2061da177e4SLinus Torvalds  * system to the buffer containing the on-disk version of the
2071da177e4SLinus Torvalds  * inode.  It returns a pointer to the buffer containing the
2081da177e4SLinus Torvalds  * on-disk inode in the bpp parameter, and in the dip parameter
2091da177e4SLinus Torvalds  * it returns a pointer to the on-disk inode within that buffer.
2101da177e4SLinus Torvalds  *
2111da177e4SLinus Torvalds  * If a non-zero error is returned, then the contents of bpp and
2121da177e4SLinus Torvalds  * dipp are undefined.
2131da177e4SLinus Torvalds  *
2141da177e4SLinus Torvalds  * Use xfs_imap() to determine the size and location of the
2151da177e4SLinus Torvalds  * buffer to read from disk.
2161da177e4SLinus Torvalds  */
217c679eef0SChristoph Hellwig int
2181da177e4SLinus Torvalds xfs_inotobp(
2191da177e4SLinus Torvalds 	xfs_mount_t	*mp,
2201da177e4SLinus Torvalds 	xfs_trans_t	*tp,
2211da177e4SLinus Torvalds 	xfs_ino_t	ino,
2221da177e4SLinus Torvalds 	xfs_dinode_t	**dipp,
2231da177e4SLinus Torvalds 	xfs_buf_t	**bpp,
224c679eef0SChristoph Hellwig 	int		*offset,
225c679eef0SChristoph Hellwig 	uint		imap_flags)
2261da177e4SLinus Torvalds {
22792bfc6e7SChristoph Hellwig 	struct xfs_imap	imap;
2281da177e4SLinus Torvalds 	xfs_buf_t	*bp;
2291da177e4SLinus Torvalds 	int		error;
2301da177e4SLinus Torvalds 
2311da177e4SLinus Torvalds 	imap.im_blkno = 0;
232a1941895SChristoph Hellwig 	error = xfs_imap(mp, tp, ino, &imap, imap_flags);
2334ae29b43SDavid Chinner 	if (error)
2341da177e4SLinus Torvalds 		return error;
2351da177e4SLinus Torvalds 
2360cadda1cSChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
2374ae29b43SDavid Chinner 	if (error)
2381da177e4SLinus Torvalds 		return error;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
2411da177e4SLinus Torvalds 	*bpp = bp;
2421da177e4SLinus Torvalds 	*offset = imap.im_boffset;
2431da177e4SLinus Torvalds 	return 0;
2441da177e4SLinus Torvalds }
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 
2471da177e4SLinus Torvalds /*
2481da177e4SLinus Torvalds  * This routine is called to map an inode to the buffer containing
2491da177e4SLinus Torvalds  * the on-disk version of the inode.  It returns a pointer to the
2501da177e4SLinus Torvalds  * buffer containing the on-disk inode in the bpp parameter, and in
2511da177e4SLinus Torvalds  * the dip parameter it returns a pointer to the on-disk inode within
2521da177e4SLinus Torvalds  * that buffer.
2531da177e4SLinus Torvalds  *
2541da177e4SLinus Torvalds  * If a non-zero error is returned, then the contents of bpp and
2551da177e4SLinus Torvalds  * dipp are undefined.
2561da177e4SLinus Torvalds  *
25776d8b277SChristoph Hellwig  * The inode is expected to already been mapped to its buffer and read
25876d8b277SChristoph Hellwig  * in once, thus we can use the mapping information stored in the inode
25976d8b277SChristoph Hellwig  * rather than calling xfs_imap().  This allows us to avoid the overhead
26076d8b277SChristoph Hellwig  * of looking at the inode btree for small block file systems
26194e1b69dSChristoph Hellwig  * (see xfs_imap()).
2621da177e4SLinus Torvalds  */
2631da177e4SLinus Torvalds int
2641da177e4SLinus Torvalds xfs_itobp(
2651da177e4SLinus Torvalds 	xfs_mount_t	*mp,
2661da177e4SLinus Torvalds 	xfs_trans_t	*tp,
2671da177e4SLinus Torvalds 	xfs_inode_t	*ip,
2681da177e4SLinus Torvalds 	xfs_dinode_t	**dipp,
2691da177e4SLinus Torvalds 	xfs_buf_t	**bpp,
270a3f74ffbSDavid Chinner 	uint		buf_flags)
2711da177e4SLinus Torvalds {
2721da177e4SLinus Torvalds 	xfs_buf_t	*bp;
2731da177e4SLinus Torvalds 	int		error;
2741da177e4SLinus Torvalds 
27592bfc6e7SChristoph Hellwig 	ASSERT(ip->i_imap.im_blkno != 0);
2761da177e4SLinus Torvalds 
27792bfc6e7SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
2784ae29b43SDavid Chinner 	if (error)
2791da177e4SLinus Torvalds 		return error;
2804d1a2ed3SNathan Scott 
281a3f74ffbSDavid Chinner 	if (!bp) {
2820cadda1cSChristoph Hellwig 		ASSERT(buf_flags & XBF_TRYLOCK);
283a3f74ffbSDavid Chinner 		ASSERT(tp == NULL);
284a3f74ffbSDavid Chinner 		*bpp = NULL;
285a3f74ffbSDavid Chinner 		return EAGAIN;
286a3f74ffbSDavid Chinner 	}
287a3f74ffbSDavid Chinner 
28892bfc6e7SChristoph Hellwig 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
2891da177e4SLinus Torvalds 	*bpp = bp;
2901da177e4SLinus Torvalds 	return 0;
2911da177e4SLinus Torvalds }
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds /*
2941da177e4SLinus Torvalds  * Move inode type and inode format specific information from the
2951da177e4SLinus Torvalds  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
2961da177e4SLinus Torvalds  * this means set if_rdev to the proper value.  For files, directories,
2971da177e4SLinus Torvalds  * and symlinks this means to bring in the in-line data or extent
2981da177e4SLinus Torvalds  * pointers.  For a file in B-tree format, only the root is immediately
2991da177e4SLinus Torvalds  * brought in-core.  The rest will be in-lined in if_extents when it
3001da177e4SLinus Torvalds  * is first referenced (see xfs_iread_extents()).
3011da177e4SLinus Torvalds  */
3021da177e4SLinus Torvalds STATIC int
3031da177e4SLinus Torvalds xfs_iformat(
3041da177e4SLinus Torvalds 	xfs_inode_t		*ip,
3051da177e4SLinus Torvalds 	xfs_dinode_t		*dip)
3061da177e4SLinus Torvalds {
3071da177e4SLinus Torvalds 	xfs_attr_shortform_t	*atp;
3081da177e4SLinus Torvalds 	int			size;
3091da177e4SLinus Torvalds 	int			error;
3101da177e4SLinus Torvalds 	xfs_fsize_t             di_size;
3111da177e4SLinus Torvalds 	ip->i_df.if_ext_max =
3121da177e4SLinus Torvalds 		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3131da177e4SLinus Torvalds 	error = 0;
3141da177e4SLinus Torvalds 
31581591fe2SChristoph Hellwig 	if (unlikely(be32_to_cpu(dip->di_nextents) +
31681591fe2SChristoph Hellwig 		     be16_to_cpu(dip->di_anextents) >
31781591fe2SChristoph Hellwig 		     be64_to_cpu(dip->di_nblocks))) {
31865333b4cSDave Chinner 		xfs_warn(ip->i_mount,
3193762ec6bSNathan Scott 			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
3201da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
32181591fe2SChristoph Hellwig 			(int)(be32_to_cpu(dip->di_nextents) +
32281591fe2SChristoph Hellwig 			      be16_to_cpu(dip->di_anextents)),
3231da177e4SLinus Torvalds 			(unsigned long long)
32481591fe2SChristoph Hellwig 				be64_to_cpu(dip->di_nblocks));
3251da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
3261da177e4SLinus Torvalds 				     ip->i_mount, dip);
3271da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
3281da177e4SLinus Torvalds 	}
3291da177e4SLinus Torvalds 
33081591fe2SChristoph Hellwig 	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
33165333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
3321da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
33381591fe2SChristoph Hellwig 			dip->di_forkoff);
3341da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
3351da177e4SLinus Torvalds 				     ip->i_mount, dip);
3361da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
3371da177e4SLinus Torvalds 	}
3381da177e4SLinus Torvalds 
339b89d4208SChristoph Hellwig 	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
340b89d4208SChristoph Hellwig 		     !ip->i_mount->m_rtdev_targp)) {
34165333b4cSDave Chinner 		xfs_warn(ip->i_mount,
342b89d4208SChristoph Hellwig 			"corrupt dinode %Lu, has realtime flag set.",
343b89d4208SChristoph Hellwig 			ip->i_ino);
344b89d4208SChristoph Hellwig 		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
345b89d4208SChristoph Hellwig 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
346b89d4208SChristoph Hellwig 		return XFS_ERROR(EFSCORRUPTED);
347b89d4208SChristoph Hellwig 	}
348b89d4208SChristoph Hellwig 
3491da177e4SLinus Torvalds 	switch (ip->i_d.di_mode & S_IFMT) {
3501da177e4SLinus Torvalds 	case S_IFIFO:
3511da177e4SLinus Torvalds 	case S_IFCHR:
3521da177e4SLinus Torvalds 	case S_IFBLK:
3531da177e4SLinus Torvalds 	case S_IFSOCK:
35481591fe2SChristoph Hellwig 		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
3551da177e4SLinus Torvalds 			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
3561da177e4SLinus Torvalds 					      ip->i_mount, dip);
3571da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
3581da177e4SLinus Torvalds 		}
3591da177e4SLinus Torvalds 		ip->i_d.di_size = 0;
360ba87ea69SLachlan McIlroy 		ip->i_size = 0;
36181591fe2SChristoph Hellwig 		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
3621da177e4SLinus Torvalds 		break;
3631da177e4SLinus Torvalds 
3641da177e4SLinus Torvalds 	case S_IFREG:
3651da177e4SLinus Torvalds 	case S_IFLNK:
3661da177e4SLinus Torvalds 	case S_IFDIR:
36781591fe2SChristoph Hellwig 		switch (dip->di_format) {
3681da177e4SLinus Torvalds 		case XFS_DINODE_FMT_LOCAL:
3691da177e4SLinus Torvalds 			/*
3701da177e4SLinus Torvalds 			 * no local regular files yet
3711da177e4SLinus Torvalds 			 */
37281591fe2SChristoph Hellwig 			if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
37365333b4cSDave Chinner 				xfs_warn(ip->i_mount,
37465333b4cSDave Chinner 			"corrupt inode %Lu (local format for regular file).",
3751da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino);
3761da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
3771da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
3781da177e4SLinus Torvalds 						     ip->i_mount, dip);
3791da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
3801da177e4SLinus Torvalds 			}
3811da177e4SLinus Torvalds 
38281591fe2SChristoph Hellwig 			di_size = be64_to_cpu(dip->di_size);
3831da177e4SLinus Torvalds 			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
38465333b4cSDave Chinner 				xfs_warn(ip->i_mount,
38565333b4cSDave Chinner 			"corrupt inode %Lu (bad size %Ld for local inode).",
3861da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino,
3871da177e4SLinus Torvalds 					(long long) di_size);
3881da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
3891da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
3901da177e4SLinus Torvalds 						     ip->i_mount, dip);
3911da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
3921da177e4SLinus Torvalds 			}
3931da177e4SLinus Torvalds 
3941da177e4SLinus Torvalds 			size = (int)di_size;
3951da177e4SLinus Torvalds 			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
3961da177e4SLinus Torvalds 			break;
3971da177e4SLinus Torvalds 		case XFS_DINODE_FMT_EXTENTS:
3981da177e4SLinus Torvalds 			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
3991da177e4SLinus Torvalds 			break;
4001da177e4SLinus Torvalds 		case XFS_DINODE_FMT_BTREE:
4011da177e4SLinus Torvalds 			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
4021da177e4SLinus Torvalds 			break;
4031da177e4SLinus Torvalds 		default:
4041da177e4SLinus Torvalds 			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
4051da177e4SLinus Torvalds 					 ip->i_mount);
4061da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
4071da177e4SLinus Torvalds 		}
4081da177e4SLinus Torvalds 		break;
4091da177e4SLinus Torvalds 
4101da177e4SLinus Torvalds 	default:
4111da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
4121da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
4131da177e4SLinus Torvalds 	}
4141da177e4SLinus Torvalds 	if (error) {
4151da177e4SLinus Torvalds 		return error;
4161da177e4SLinus Torvalds 	}
4171da177e4SLinus Torvalds 	if (!XFS_DFORK_Q(dip))
4181da177e4SLinus Torvalds 		return 0;
4191da177e4SLinus Torvalds 	ASSERT(ip->i_afp == NULL);
4204a7edddcSDave Chinner 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
4211da177e4SLinus Torvalds 	ip->i_afp->if_ext_max =
4221da177e4SLinus Torvalds 		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
42381591fe2SChristoph Hellwig 	switch (dip->di_aformat) {
4241da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
4251da177e4SLinus Torvalds 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
4263b244aa8SNathan Scott 		size = be16_to_cpu(atp->hdr.totsize);
4272809f76aSChristoph Hellwig 
4282809f76aSChristoph Hellwig 		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
42965333b4cSDave Chinner 			xfs_warn(ip->i_mount,
43065333b4cSDave Chinner 				"corrupt inode %Lu (bad attr fork size %Ld).",
4312809f76aSChristoph Hellwig 				(unsigned long long) ip->i_ino,
4322809f76aSChristoph Hellwig 				(long long) size);
4332809f76aSChristoph Hellwig 			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
4342809f76aSChristoph Hellwig 					     XFS_ERRLEVEL_LOW,
4352809f76aSChristoph Hellwig 					     ip->i_mount, dip);
4362809f76aSChristoph Hellwig 			return XFS_ERROR(EFSCORRUPTED);
4372809f76aSChristoph Hellwig 		}
4382809f76aSChristoph Hellwig 
4391da177e4SLinus Torvalds 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
4401da177e4SLinus Torvalds 		break;
4411da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
4421da177e4SLinus Torvalds 		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
4431da177e4SLinus Torvalds 		break;
4441da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
4451da177e4SLinus Torvalds 		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
4461da177e4SLinus Torvalds 		break;
4471da177e4SLinus Torvalds 	default:
4481da177e4SLinus Torvalds 		error = XFS_ERROR(EFSCORRUPTED);
4491da177e4SLinus Torvalds 		break;
4501da177e4SLinus Torvalds 	}
4511da177e4SLinus Torvalds 	if (error) {
4521da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
4531da177e4SLinus Torvalds 		ip->i_afp = NULL;
4541da177e4SLinus Torvalds 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
4551da177e4SLinus Torvalds 	}
4561da177e4SLinus Torvalds 	return error;
4571da177e4SLinus Torvalds }
4581da177e4SLinus Torvalds 
4591da177e4SLinus Torvalds /*
4601da177e4SLinus Torvalds  * The file is in-lined in the on-disk inode.
4611da177e4SLinus Torvalds  * If it fits into if_inline_data, then copy
4621da177e4SLinus Torvalds  * it there, otherwise allocate a buffer for it
4631da177e4SLinus Torvalds  * and copy the data there.  Either way, set
4641da177e4SLinus Torvalds  * if_data to point at the data.
4651da177e4SLinus Torvalds  * If we allocate a buffer for the data, make
4661da177e4SLinus Torvalds  * sure that its size is a multiple of 4 and
4671da177e4SLinus Torvalds  * record the real size in i_real_bytes.
4681da177e4SLinus Torvalds  */
4691da177e4SLinus Torvalds STATIC int
4701da177e4SLinus Torvalds xfs_iformat_local(
4711da177e4SLinus Torvalds 	xfs_inode_t	*ip,
4721da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
4731da177e4SLinus Torvalds 	int		whichfork,
4741da177e4SLinus Torvalds 	int		size)
4751da177e4SLinus Torvalds {
4761da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
4771da177e4SLinus Torvalds 	int		real_size;
4781da177e4SLinus Torvalds 
4791da177e4SLinus Torvalds 	/*
4801da177e4SLinus Torvalds 	 * If the size is unreasonable, then something
4811da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
4821da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
4831da177e4SLinus Torvalds 	 */
4841da177e4SLinus Torvalds 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
48565333b4cSDave Chinner 		xfs_warn(ip->i_mount,
48665333b4cSDave Chinner 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
4871da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, size,
4881da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
4891da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
4901da177e4SLinus Torvalds 				     ip->i_mount, dip);
4911da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
4921da177e4SLinus Torvalds 	}
4931da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
4941da177e4SLinus Torvalds 	real_size = 0;
4951da177e4SLinus Torvalds 	if (size == 0)
4961da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
4971da177e4SLinus Torvalds 	else if (size <= sizeof(ifp->if_u2.if_inline_data))
4981da177e4SLinus Torvalds 		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
4991da177e4SLinus Torvalds 	else {
5001da177e4SLinus Torvalds 		real_size = roundup(size, 4);
5014a7edddcSDave Chinner 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
5021da177e4SLinus Torvalds 	}
5031da177e4SLinus Torvalds 	ifp->if_bytes = size;
5041da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
5051da177e4SLinus Torvalds 	if (size)
5061da177e4SLinus Torvalds 		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
5071da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
5081da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFINLINE;
5091da177e4SLinus Torvalds 	return 0;
5101da177e4SLinus Torvalds }
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds /*
5131da177e4SLinus Torvalds  * The file consists of a set of extents all
5141da177e4SLinus Torvalds  * of which fit into the on-disk inode.
5151da177e4SLinus Torvalds  * If there are few enough extents to fit into
5161da177e4SLinus Torvalds  * the if_inline_ext, then copy them there.
5171da177e4SLinus Torvalds  * Otherwise allocate a buffer for them and copy
5181da177e4SLinus Torvalds  * them into it.  Either way, set if_extents
5191da177e4SLinus Torvalds  * to point at the extents.
5201da177e4SLinus Torvalds  */
5211da177e4SLinus Torvalds STATIC int
5221da177e4SLinus Torvalds xfs_iformat_extents(
5231da177e4SLinus Torvalds 	xfs_inode_t	*ip,
5241da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
5251da177e4SLinus Torvalds 	int		whichfork)
5261da177e4SLinus Torvalds {
527a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t	*dp;
5281da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
5291da177e4SLinus Torvalds 	int		nex;
5301da177e4SLinus Torvalds 	int		size;
5311da177e4SLinus Torvalds 	int		i;
5321da177e4SLinus Torvalds 
5331da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
5341da177e4SLinus Torvalds 	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
5351da177e4SLinus Torvalds 	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds 	/*
5381da177e4SLinus Torvalds 	 * If the number of extents is unreasonable, then something
5391da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
5401da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
5411da177e4SLinus Torvalds 	 */
5421da177e4SLinus Torvalds 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
54365333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
5441da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, nex);
5451da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
5461da177e4SLinus Torvalds 				     ip->i_mount, dip);
5471da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5481da177e4SLinus Torvalds 	}
5491da177e4SLinus Torvalds 
5504eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
5511da177e4SLinus Torvalds 	if (nex == 0)
5521da177e4SLinus Torvalds 		ifp->if_u1.if_extents = NULL;
5531da177e4SLinus Torvalds 	else if (nex <= XFS_INLINE_EXTS)
5541da177e4SLinus Torvalds 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
5554eea22f0SMandy Kirkconnell 	else
5564eea22f0SMandy Kirkconnell 		xfs_iext_add(ifp, 0, nex);
5574eea22f0SMandy Kirkconnell 
5581da177e4SLinus Torvalds 	ifp->if_bytes = size;
5591da177e4SLinus Torvalds 	if (size) {
5601da177e4SLinus Torvalds 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
561a6f64d4aSChristoph Hellwig 		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
5624eea22f0SMandy Kirkconnell 		for (i = 0; i < nex; i++, dp++) {
563a6f64d4aSChristoph Hellwig 			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
564597bca63SHarvey Harrison 			ep->l0 = get_unaligned_be64(&dp->l0);
565597bca63SHarvey Harrison 			ep->l1 = get_unaligned_be64(&dp->l1);
5661da177e4SLinus Torvalds 		}
5673a59c94cSEric Sandeen 		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
5681da177e4SLinus Torvalds 		if (whichfork != XFS_DATA_FORK ||
5691da177e4SLinus Torvalds 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
5701da177e4SLinus Torvalds 				if (unlikely(xfs_check_nostate_extents(
5714eea22f0SMandy Kirkconnell 				    ifp, 0, nex))) {
5721da177e4SLinus Torvalds 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
5731da177e4SLinus Torvalds 							 XFS_ERRLEVEL_LOW,
5741da177e4SLinus Torvalds 							 ip->i_mount);
5751da177e4SLinus Torvalds 					return XFS_ERROR(EFSCORRUPTED);
5761da177e4SLinus Torvalds 				}
5771da177e4SLinus Torvalds 	}
5781da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
5791da177e4SLinus Torvalds 	return 0;
5801da177e4SLinus Torvalds }
5811da177e4SLinus Torvalds 
5821da177e4SLinus Torvalds /*
5831da177e4SLinus Torvalds  * The file has too many extents to fit into
5841da177e4SLinus Torvalds  * the inode, so they are in B-tree format.
5851da177e4SLinus Torvalds  * Allocate a buffer for the root of the B-tree
5861da177e4SLinus Torvalds  * and copy the root into it.  The i_extents
5871da177e4SLinus Torvalds  * field will remain NULL until all of the
5881da177e4SLinus Torvalds  * extents are read in (when they are needed).
5891da177e4SLinus Torvalds  */
5901da177e4SLinus Torvalds STATIC int
5911da177e4SLinus Torvalds xfs_iformat_btree(
5921da177e4SLinus Torvalds 	xfs_inode_t		*ip,
5931da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
5941da177e4SLinus Torvalds 	int			whichfork)
5951da177e4SLinus Torvalds {
5961da177e4SLinus Torvalds 	xfs_bmdr_block_t	*dfp;
5971da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
5981da177e4SLinus Torvalds 	/* REFERENCED */
5991da177e4SLinus Torvalds 	int			nrecs;
6001da177e4SLinus Torvalds 	int			size;
6011da177e4SLinus Torvalds 
6021da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
6031da177e4SLinus Torvalds 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
6041da177e4SLinus Torvalds 	size = XFS_BMAP_BROOT_SPACE(dfp);
60560197e8dSChristoph Hellwig 	nrecs = be16_to_cpu(dfp->bb_numrecs);
6061da177e4SLinus Torvalds 
6071da177e4SLinus Torvalds 	/*
6081da177e4SLinus Torvalds 	 * blow out if -- fork has less extents than can fit in
6091da177e4SLinus Torvalds 	 * fork (fork shouldn't be a btree format), root btree
6101da177e4SLinus Torvalds 	 * block has more records than can fit into the fork,
6111da177e4SLinus Torvalds 	 * or the number of extents is greater than the number of
6121da177e4SLinus Torvalds 	 * blocks.
6131da177e4SLinus Torvalds 	 */
6141da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
6151da177e4SLinus Torvalds 	    || XFS_BMDR_SPACE_CALC(nrecs) >
6161da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
6171da177e4SLinus Torvalds 	    || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
61865333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
6191da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino);
62065333b4cSDave Chinner 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
62165333b4cSDave Chinner 				 ip->i_mount, dip);
6221da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
6231da177e4SLinus Torvalds 	}
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds 	ifp->if_broot_bytes = size;
6264a7edddcSDave Chinner 	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
6271da177e4SLinus Torvalds 	ASSERT(ifp->if_broot != NULL);
6281da177e4SLinus Torvalds 	/*
6291da177e4SLinus Torvalds 	 * Copy and convert from the on-disk structure
6301da177e4SLinus Torvalds 	 * to the in-memory structure.
6311da177e4SLinus Torvalds 	 */
63260197e8dSChristoph Hellwig 	xfs_bmdr_to_bmbt(ip->i_mount, dfp,
63360197e8dSChristoph Hellwig 			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
6341da177e4SLinus Torvalds 			 ifp->if_broot, size);
6351da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
6361da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFBROOT;
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds 	return 0;
6391da177e4SLinus Torvalds }
6401da177e4SLinus Torvalds 
641d96f8f89SEric Sandeen STATIC void
642347d1c01SChristoph Hellwig xfs_dinode_from_disk(
643347d1c01SChristoph Hellwig 	xfs_icdinode_t		*to,
64481591fe2SChristoph Hellwig 	xfs_dinode_t		*from)
6451da177e4SLinus Torvalds {
646347d1c01SChristoph Hellwig 	to->di_magic = be16_to_cpu(from->di_magic);
647347d1c01SChristoph Hellwig 	to->di_mode = be16_to_cpu(from->di_mode);
648347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
649347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
650347d1c01SChristoph Hellwig 	to->di_onlink = be16_to_cpu(from->di_onlink);
651347d1c01SChristoph Hellwig 	to->di_uid = be32_to_cpu(from->di_uid);
652347d1c01SChristoph Hellwig 	to->di_gid = be32_to_cpu(from->di_gid);
653347d1c01SChristoph Hellwig 	to->di_nlink = be32_to_cpu(from->di_nlink);
6546743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
6556743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
656347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
657347d1c01SChristoph Hellwig 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
658347d1c01SChristoph Hellwig 	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
659347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
660347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
661347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
662347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
663347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
664347d1c01SChristoph Hellwig 	to->di_size = be64_to_cpu(from->di_size);
665347d1c01SChristoph Hellwig 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
666347d1c01SChristoph Hellwig 	to->di_extsize = be32_to_cpu(from->di_extsize);
667347d1c01SChristoph Hellwig 	to->di_nextents = be32_to_cpu(from->di_nextents);
668347d1c01SChristoph Hellwig 	to->di_anextents = be16_to_cpu(from->di_anextents);
669347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
670347d1c01SChristoph Hellwig 	to->di_aformat	= from->di_aformat;
671347d1c01SChristoph Hellwig 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
672347d1c01SChristoph Hellwig 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
673347d1c01SChristoph Hellwig 	to->di_flags	= be16_to_cpu(from->di_flags);
674347d1c01SChristoph Hellwig 	to->di_gen	= be32_to_cpu(from->di_gen);
6751da177e4SLinus Torvalds }
6761da177e4SLinus Torvalds 
677347d1c01SChristoph Hellwig void
678347d1c01SChristoph Hellwig xfs_dinode_to_disk(
67981591fe2SChristoph Hellwig 	xfs_dinode_t		*to,
680347d1c01SChristoph Hellwig 	xfs_icdinode_t		*from)
681347d1c01SChristoph Hellwig {
682347d1c01SChristoph Hellwig 	to->di_magic = cpu_to_be16(from->di_magic);
683347d1c01SChristoph Hellwig 	to->di_mode = cpu_to_be16(from->di_mode);
684347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
685347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
686347d1c01SChristoph Hellwig 	to->di_onlink = cpu_to_be16(from->di_onlink);
687347d1c01SChristoph Hellwig 	to->di_uid = cpu_to_be32(from->di_uid);
688347d1c01SChristoph Hellwig 	to->di_gid = cpu_to_be32(from->di_gid);
689347d1c01SChristoph Hellwig 	to->di_nlink = cpu_to_be32(from->di_nlink);
6906743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
6916743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
692347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
693347d1c01SChristoph Hellwig 	to->di_flushiter = cpu_to_be16(from->di_flushiter);
694347d1c01SChristoph Hellwig 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
695347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
696347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
697347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
698347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
699347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
700347d1c01SChristoph Hellwig 	to->di_size = cpu_to_be64(from->di_size);
701347d1c01SChristoph Hellwig 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
702347d1c01SChristoph Hellwig 	to->di_extsize = cpu_to_be32(from->di_extsize);
703347d1c01SChristoph Hellwig 	to->di_nextents = cpu_to_be32(from->di_nextents);
704347d1c01SChristoph Hellwig 	to->di_anextents = cpu_to_be16(from->di_anextents);
705347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
706347d1c01SChristoph Hellwig 	to->di_aformat = from->di_aformat;
707347d1c01SChristoph Hellwig 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
708347d1c01SChristoph Hellwig 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
709347d1c01SChristoph Hellwig 	to->di_flags = cpu_to_be16(from->di_flags);
710347d1c01SChristoph Hellwig 	to->di_gen = cpu_to_be32(from->di_gen);
7111da177e4SLinus Torvalds }
7121da177e4SLinus Torvalds 
7131da177e4SLinus Torvalds STATIC uint
7141da177e4SLinus Torvalds _xfs_dic2xflags(
7151da177e4SLinus Torvalds 	__uint16_t		di_flags)
7161da177e4SLinus Torvalds {
7171da177e4SLinus Torvalds 	uint			flags = 0;
7181da177e4SLinus Torvalds 
7191da177e4SLinus Torvalds 	if (di_flags & XFS_DIFLAG_ANY) {
7201da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_REALTIME)
7211da177e4SLinus Torvalds 			flags |= XFS_XFLAG_REALTIME;
7221da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PREALLOC)
7231da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PREALLOC;
7241da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
7251da177e4SLinus Torvalds 			flags |= XFS_XFLAG_IMMUTABLE;
7261da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_APPEND)
7271da177e4SLinus Torvalds 			flags |= XFS_XFLAG_APPEND;
7281da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_SYNC)
7291da177e4SLinus Torvalds 			flags |= XFS_XFLAG_SYNC;
7301da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOATIME)
7311da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOATIME;
7321da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NODUMP)
7331da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NODUMP;
7341da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_RTINHERIT)
7351da177e4SLinus Torvalds 			flags |= XFS_XFLAG_RTINHERIT;
7361da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PROJINHERIT)
7371da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PROJINHERIT;
7381da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
7391da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOSYMLINKS;
740dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSIZE)
741dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSIZE;
742dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
743dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSZINHERIT;
744d3446eacSBarry Naujok 		if (di_flags & XFS_DIFLAG_NODEFRAG)
745d3446eacSBarry Naujok 			flags |= XFS_XFLAG_NODEFRAG;
7462a82b8beSDavid Chinner 		if (di_flags & XFS_DIFLAG_FILESTREAM)
7472a82b8beSDavid Chinner 			flags |= XFS_XFLAG_FILESTREAM;
7481da177e4SLinus Torvalds 	}
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 	return flags;
7511da177e4SLinus Torvalds }
7521da177e4SLinus Torvalds 
7531da177e4SLinus Torvalds uint
7541da177e4SLinus Torvalds xfs_ip2xflags(
7551da177e4SLinus Torvalds 	xfs_inode_t		*ip)
7561da177e4SLinus Torvalds {
757347d1c01SChristoph Hellwig 	xfs_icdinode_t		*dic = &ip->i_d;
7581da177e4SLinus Torvalds 
759a916e2bdSNathan Scott 	return _xfs_dic2xflags(dic->di_flags) |
76045ba598eSChristoph Hellwig 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
7611da177e4SLinus Torvalds }
7621da177e4SLinus Torvalds 
7631da177e4SLinus Torvalds uint
7641da177e4SLinus Torvalds xfs_dic2xflags(
76545ba598eSChristoph Hellwig 	xfs_dinode_t		*dip)
7661da177e4SLinus Torvalds {
76781591fe2SChristoph Hellwig 	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
76845ba598eSChristoph Hellwig 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
7691da177e4SLinus Torvalds }
7701da177e4SLinus Torvalds 
7711da177e4SLinus Torvalds /*
77224f211baSChristoph Hellwig  * Read the disk inode attributes into the in-core inode structure.
7731da177e4SLinus Torvalds  */
7741da177e4SLinus Torvalds int
7751da177e4SLinus Torvalds xfs_iread(
7761da177e4SLinus Torvalds 	xfs_mount_t	*mp,
7771da177e4SLinus Torvalds 	xfs_trans_t	*tp,
77824f211baSChristoph Hellwig 	xfs_inode_t	*ip,
77924f211baSChristoph Hellwig 	uint		iget_flags)
7801da177e4SLinus Torvalds {
7811da177e4SLinus Torvalds 	xfs_buf_t	*bp;
7821da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
7831da177e4SLinus Torvalds 	int		error;
7841da177e4SLinus Torvalds 
7851da177e4SLinus Torvalds 	/*
78692bfc6e7SChristoph Hellwig 	 * Fill in the location information in the in-core inode.
7871da177e4SLinus Torvalds 	 */
78824f211baSChristoph Hellwig 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
7899ed0451eSChristoph Hellwig 	if (error)
79024f211baSChristoph Hellwig 		return error;
7911da177e4SLinus Torvalds 
7921da177e4SLinus Torvalds 	/*
79392bfc6e7SChristoph Hellwig 	 * Get pointers to the on-disk inode and the buffer containing it.
79476d8b277SChristoph Hellwig 	 */
79592bfc6e7SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
7960cadda1cSChristoph Hellwig 			       XBF_LOCK, iget_flags);
79776d8b277SChristoph Hellwig 	if (error)
79824f211baSChristoph Hellwig 		return error;
79992bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
80076d8b277SChristoph Hellwig 
80176d8b277SChristoph Hellwig 	/*
8021da177e4SLinus Torvalds 	 * If we got something that isn't an inode it means someone
8031da177e4SLinus Torvalds 	 * (nfs or dmi) has a stale handle.
8041da177e4SLinus Torvalds 	 */
805*69ef921bSChristoph Hellwig 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
8061da177e4SLinus Torvalds #ifdef DEBUG
80753487786SDave Chinner 		xfs_alert(mp,
80853487786SDave Chinner 			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
80953487786SDave Chinner 			__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
8101da177e4SLinus Torvalds #endif /* DEBUG */
8119ed0451eSChristoph Hellwig 		error = XFS_ERROR(EINVAL);
8129ed0451eSChristoph Hellwig 		goto out_brelse;
8131da177e4SLinus Torvalds 	}
8141da177e4SLinus Torvalds 
8151da177e4SLinus Torvalds 	/*
8161da177e4SLinus Torvalds 	 * If the on-disk inode is already linked to a directory
8171da177e4SLinus Torvalds 	 * entry, copy all of the inode into the in-core inode.
8181da177e4SLinus Torvalds 	 * xfs_iformat() handles copying in the inode format
8191da177e4SLinus Torvalds 	 * specific information.
8201da177e4SLinus Torvalds 	 * Otherwise, just get the truly permanent information.
8211da177e4SLinus Torvalds 	 */
82281591fe2SChristoph Hellwig 	if (dip->di_mode) {
82381591fe2SChristoph Hellwig 		xfs_dinode_from_disk(&ip->i_d, dip);
8241da177e4SLinus Torvalds 		error = xfs_iformat(ip, dip);
8251da177e4SLinus Torvalds 		if (error)  {
8261da177e4SLinus Torvalds #ifdef DEBUG
82753487786SDave Chinner 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
82853487786SDave Chinner 				__func__, error);
8291da177e4SLinus Torvalds #endif /* DEBUG */
8309ed0451eSChristoph Hellwig 			goto out_brelse;
8311da177e4SLinus Torvalds 		}
8321da177e4SLinus Torvalds 	} else {
83381591fe2SChristoph Hellwig 		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
83481591fe2SChristoph Hellwig 		ip->i_d.di_version = dip->di_version;
83581591fe2SChristoph Hellwig 		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
83681591fe2SChristoph Hellwig 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
8371da177e4SLinus Torvalds 		/*
8381da177e4SLinus Torvalds 		 * Make sure to pull in the mode here as well in
8391da177e4SLinus Torvalds 		 * case the inode is released without being used.
8401da177e4SLinus Torvalds 		 * This ensures that xfs_inactive() will see that
8411da177e4SLinus Torvalds 		 * the inode is already free and not try to mess
8421da177e4SLinus Torvalds 		 * with the uninitialized part of it.
8431da177e4SLinus Torvalds 		 */
8441da177e4SLinus Torvalds 		ip->i_d.di_mode = 0;
8451da177e4SLinus Torvalds 		/*
8461da177e4SLinus Torvalds 		 * Initialize the per-fork minima and maxima for a new
8471da177e4SLinus Torvalds 		 * inode here.  xfs_iformat will do it for old inodes.
8481da177e4SLinus Torvalds 		 */
8491da177e4SLinus Torvalds 		ip->i_df.if_ext_max =
8501da177e4SLinus Torvalds 			XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
8511da177e4SLinus Torvalds 	}
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds 	/*
8541da177e4SLinus Torvalds 	 * The inode format changed when we moved the link count and
8551da177e4SLinus Torvalds 	 * made it 32 bits long.  If this is an old format inode,
8561da177e4SLinus Torvalds 	 * convert it in memory to look like a new one.  If it gets
8571da177e4SLinus Torvalds 	 * flushed to disk we will convert back before flushing or
8581da177e4SLinus Torvalds 	 * logging it.  We zero out the new projid field and the old link
8591da177e4SLinus Torvalds 	 * count field.  We'll handle clearing the pad field (the remains
8601da177e4SLinus Torvalds 	 * of the old uuid field) when we actually convert the inode to
8611da177e4SLinus Torvalds 	 * the new format. We don't change the version number so that we
8621da177e4SLinus Torvalds 	 * can distinguish this from a real new format inode.
8631da177e4SLinus Torvalds 	 */
86451ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
8651da177e4SLinus Torvalds 		ip->i_d.di_nlink = ip->i_d.di_onlink;
8661da177e4SLinus Torvalds 		ip->i_d.di_onlink = 0;
8676743099cSArkadiusz Mi?kiewicz 		xfs_set_projid(ip, 0);
8681da177e4SLinus Torvalds 	}
8691da177e4SLinus Torvalds 
8701da177e4SLinus Torvalds 	ip->i_delayed_blks = 0;
871ba87ea69SLachlan McIlroy 	ip->i_size = ip->i_d.di_size;
8721da177e4SLinus Torvalds 
8731da177e4SLinus Torvalds 	/*
8741da177e4SLinus Torvalds 	 * Mark the buffer containing the inode as something to keep
8751da177e4SLinus Torvalds 	 * around for a while.  This helps to keep recently accessed
8761da177e4SLinus Torvalds 	 * meta-data in-core longer.
8771da177e4SLinus Torvalds 	 */
878821eb21dSDave Chinner 	xfs_buf_set_ref(bp, XFS_INO_REF);
8791da177e4SLinus Torvalds 
8801da177e4SLinus Torvalds 	/*
8811da177e4SLinus Torvalds 	 * Use xfs_trans_brelse() to release the buffer containing the
8821da177e4SLinus Torvalds 	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
8831da177e4SLinus Torvalds 	 * in xfs_itobp() above.  If tp is NULL, this is just a normal
8841da177e4SLinus Torvalds 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
8851da177e4SLinus Torvalds 	 * will only release the buffer if it is not dirty within the
8861da177e4SLinus Torvalds 	 * transaction.  It will be OK to release the buffer in this case,
8871da177e4SLinus Torvalds 	 * because inodes on disk are never destroyed and we will be
8881da177e4SLinus Torvalds 	 * locking the new in-core inode before putting it in the hash
8891da177e4SLinus Torvalds 	 * table where other processes can find it.  Thus we don't have
8901da177e4SLinus Torvalds 	 * to worry about the inode being changed just because we released
8911da177e4SLinus Torvalds 	 * the buffer.
8921da177e4SLinus Torvalds 	 */
8939ed0451eSChristoph Hellwig  out_brelse:
8949ed0451eSChristoph Hellwig 	xfs_trans_brelse(tp, bp);
8959ed0451eSChristoph Hellwig 	return error;
8961da177e4SLinus Torvalds }
8971da177e4SLinus Torvalds 
8981da177e4SLinus Torvalds /*
8991da177e4SLinus Torvalds  * Read in extents from a btree-format inode.
9001da177e4SLinus Torvalds  * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
9011da177e4SLinus Torvalds  */
9021da177e4SLinus Torvalds int
9031da177e4SLinus Torvalds xfs_iread_extents(
9041da177e4SLinus Torvalds 	xfs_trans_t	*tp,
9051da177e4SLinus Torvalds 	xfs_inode_t	*ip,
9061da177e4SLinus Torvalds 	int		whichfork)
9071da177e4SLinus Torvalds {
9081da177e4SLinus Torvalds 	int		error;
9091da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
9104eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;
9111da177e4SLinus Torvalds 
9121da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
9131da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
9141da177e4SLinus Torvalds 				 ip->i_mount);
9151da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
9161da177e4SLinus Torvalds 	}
9174eea22f0SMandy Kirkconnell 	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
9181da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
9194eea22f0SMandy Kirkconnell 
9201da177e4SLinus Torvalds 	/*
9211da177e4SLinus Torvalds 	 * We know that the size is valid (it's checked in iformat_btree)
9221da177e4SLinus Torvalds 	 */
9234eea22f0SMandy Kirkconnell 	ifp->if_bytes = ifp->if_real_bytes = 0;
9241da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
9254eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, 0, nextents);
9261da177e4SLinus Torvalds 	error = xfs_bmap_read_extents(tp, ip, whichfork);
9271da177e4SLinus Torvalds 	if (error) {
9284eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
9291da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFEXTENTS;
9301da177e4SLinus Torvalds 		return error;
9311da177e4SLinus Torvalds 	}
932a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
9331da177e4SLinus Torvalds 	return 0;
9341da177e4SLinus Torvalds }
9351da177e4SLinus Torvalds 
9361da177e4SLinus Torvalds /*
9371da177e4SLinus Torvalds  * Allocate an inode on disk and return a copy of its in-core version.
9381da177e4SLinus Torvalds  * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
9391da177e4SLinus Torvalds  * appropriately within the inode.  The uid and gid for the inode are
9401da177e4SLinus Torvalds  * set according to the contents of the given cred structure.
9411da177e4SLinus Torvalds  *
9421da177e4SLinus Torvalds  * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
9431da177e4SLinus Torvalds  * has a free inode available, call xfs_iget()
9441da177e4SLinus Torvalds  * to obtain the in-core version of the allocated inode.  Finally,
9451da177e4SLinus Torvalds  * fill in the inode and log its initial contents.  In this case,
9461da177e4SLinus Torvalds  * ialloc_context would be set to NULL and call_again set to false.
9471da177e4SLinus Torvalds  *
9481da177e4SLinus Torvalds  * If xfs_dialloc() does not have an available inode,
9491da177e4SLinus Torvalds  * it will replenish its supply by doing an allocation. Since we can
9501da177e4SLinus Torvalds  * only do one allocation within a transaction without deadlocks, we
9511da177e4SLinus Torvalds  * must commit the current transaction before returning the inode itself.
9521da177e4SLinus Torvalds  * In this case, therefore, we will set call_again to true and return.
9531da177e4SLinus Torvalds  * The caller should then commit the current transaction, start a new
9541da177e4SLinus Torvalds  * transaction, and call xfs_ialloc() again to actually get the inode.
9551da177e4SLinus Torvalds  *
9561da177e4SLinus Torvalds  * To ensure that some other process does not grab the inode that
9571da177e4SLinus Torvalds  * was allocated during the first call to xfs_ialloc(), this routine
9581da177e4SLinus Torvalds  * also returns the [locked] bp pointing to the head of the freelist
9591da177e4SLinus Torvalds  * as ialloc_context.  The caller should hold this buffer across
9601da177e4SLinus Torvalds  * the commit and pass it back into this routine on the second call.
961b11f94d5SDavid Chinner  *
962b11f94d5SDavid Chinner  * If we are allocating quota inodes, we do not have a parent inode
963b11f94d5SDavid Chinner  * to attach to or associate with (i.e. pip == NULL) because they
964b11f94d5SDavid Chinner  * are not linked into the directory structure - they are attached
965b11f94d5SDavid Chinner  * directly to the superblock - and so have no parent.
9661da177e4SLinus Torvalds  */
9671da177e4SLinus Torvalds int
9681da177e4SLinus Torvalds xfs_ialloc(
9691da177e4SLinus Torvalds 	xfs_trans_t	*tp,
9701da177e4SLinus Torvalds 	xfs_inode_t	*pip,
9711da177e4SLinus Torvalds 	mode_t		mode,
97231b084aeSNathan Scott 	xfs_nlink_t	nlink,
9731da177e4SLinus Torvalds 	xfs_dev_t	rdev,
9746743099cSArkadiusz Mi?kiewicz 	prid_t		prid,
9751da177e4SLinus Torvalds 	int		okalloc,
9761da177e4SLinus Torvalds 	xfs_buf_t	**ialloc_context,
9771da177e4SLinus Torvalds 	boolean_t	*call_again,
9781da177e4SLinus Torvalds 	xfs_inode_t	**ipp)
9791da177e4SLinus Torvalds {
9801da177e4SLinus Torvalds 	xfs_ino_t	ino;
9811da177e4SLinus Torvalds 	xfs_inode_t	*ip;
9821da177e4SLinus Torvalds 	uint		flags;
9831da177e4SLinus Torvalds 	int		error;
984dff35fd4SChristoph Hellwig 	timespec_t	tv;
985bf904248SDavid Chinner 	int		filestreams = 0;
9861da177e4SLinus Torvalds 
9871da177e4SLinus Torvalds 	/*
9881da177e4SLinus Torvalds 	 * Call the space management code to pick
9891da177e4SLinus Torvalds 	 * the on-disk inode to be allocated.
9901da177e4SLinus Torvalds 	 */
991b11f94d5SDavid Chinner 	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
9921da177e4SLinus Torvalds 			    ialloc_context, call_again, &ino);
993bf904248SDavid Chinner 	if (error)
9941da177e4SLinus Torvalds 		return error;
9951da177e4SLinus Torvalds 	if (*call_again || ino == NULLFSINO) {
9961da177e4SLinus Torvalds 		*ipp = NULL;
9971da177e4SLinus Torvalds 		return 0;
9981da177e4SLinus Torvalds 	}
9991da177e4SLinus Torvalds 	ASSERT(*ialloc_context == NULL);
10001da177e4SLinus Torvalds 
10011da177e4SLinus Torvalds 	/*
10021da177e4SLinus Torvalds 	 * Get the in-core inode with the lock held exclusively.
10031da177e4SLinus Torvalds 	 * This is because we're setting fields here we need
10041da177e4SLinus Torvalds 	 * to prevent others from looking at until we're done.
10051da177e4SLinus Torvalds 	 */
1006ec3ba85fSChristoph Hellwig 	error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1007ec3ba85fSChristoph Hellwig 			 XFS_ILOCK_EXCL, &ip);
1008bf904248SDavid Chinner 	if (error)
10091da177e4SLinus Torvalds 		return error;
10101da177e4SLinus Torvalds 	ASSERT(ip != NULL);
10111da177e4SLinus Torvalds 
10121da177e4SLinus Torvalds 	ip->i_d.di_mode = (__uint16_t)mode;
10131da177e4SLinus Torvalds 	ip->i_d.di_onlink = 0;
10141da177e4SLinus Torvalds 	ip->i_d.di_nlink = nlink;
10151da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == nlink);
10169e2b2dc4SDavid Howells 	ip->i_d.di_uid = current_fsuid();
10179e2b2dc4SDavid Howells 	ip->i_d.di_gid = current_fsgid();
10186743099cSArkadiusz Mi?kiewicz 	xfs_set_projid(ip, prid);
10191da177e4SLinus Torvalds 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
10201da177e4SLinus Torvalds 
10211da177e4SLinus Torvalds 	/*
10221da177e4SLinus Torvalds 	 * If the superblock version is up to where we support new format
10231da177e4SLinus Torvalds 	 * inodes and this is currently an old format inode, then change
10241da177e4SLinus Torvalds 	 * the inode version number now.  This way we only do the conversion
10251da177e4SLinus Torvalds 	 * here rather than here and in the flush/logging code.
10261da177e4SLinus Torvalds 	 */
102762118709SEric Sandeen 	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
102851ce16d5SChristoph Hellwig 	    ip->i_d.di_version == 1) {
102951ce16d5SChristoph Hellwig 		ip->i_d.di_version = 2;
10301da177e4SLinus Torvalds 		/*
10311da177e4SLinus Torvalds 		 * We've already zeroed the old link count, the projid field,
10321da177e4SLinus Torvalds 		 * and the pad field.
10331da177e4SLinus Torvalds 		 */
10341da177e4SLinus Torvalds 	}
10351da177e4SLinus Torvalds 
10361da177e4SLinus Torvalds 	/*
10371da177e4SLinus Torvalds 	 * Project ids won't be stored on disk if we are using a version 1 inode.
10381da177e4SLinus Torvalds 	 */
103951ce16d5SChristoph Hellwig 	if ((prid != 0) && (ip->i_d.di_version == 1))
10401da177e4SLinus Torvalds 		xfs_bump_ino_vers2(tp, ip);
10411da177e4SLinus Torvalds 
1042bd186aa9SChristoph Hellwig 	if (pip && XFS_INHERIT_GID(pip)) {
10431da177e4SLinus Torvalds 		ip->i_d.di_gid = pip->i_d.di_gid;
10441da177e4SLinus Torvalds 		if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
10451da177e4SLinus Torvalds 			ip->i_d.di_mode |= S_ISGID;
10461da177e4SLinus Torvalds 		}
10471da177e4SLinus Torvalds 	}
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds 	/*
10501da177e4SLinus Torvalds 	 * If the group ID of the new file does not match the effective group
10511da177e4SLinus Torvalds 	 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
10521da177e4SLinus Torvalds 	 * (and only if the irix_sgid_inherit compatibility variable is set).
10531da177e4SLinus Torvalds 	 */
10541da177e4SLinus Torvalds 	if ((irix_sgid_inherit) &&
10551da177e4SLinus Torvalds 	    (ip->i_d.di_mode & S_ISGID) &&
10561da177e4SLinus Torvalds 	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
10571da177e4SLinus Torvalds 		ip->i_d.di_mode &= ~S_ISGID;
10581da177e4SLinus Torvalds 	}
10591da177e4SLinus Torvalds 
10601da177e4SLinus Torvalds 	ip->i_d.di_size = 0;
1061ba87ea69SLachlan McIlroy 	ip->i_size = 0;
10621da177e4SLinus Torvalds 	ip->i_d.di_nextents = 0;
10631da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
1064dff35fd4SChristoph Hellwig 
1065dff35fd4SChristoph Hellwig 	nanotime(&tv);
1066dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1067dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1068dff35fd4SChristoph Hellwig 	ip->i_d.di_atime = ip->i_d.di_mtime;
1069dff35fd4SChristoph Hellwig 	ip->i_d.di_ctime = ip->i_d.di_mtime;
1070dff35fd4SChristoph Hellwig 
10711da177e4SLinus Torvalds 	/*
10721da177e4SLinus Torvalds 	 * di_gen will have been taken care of in xfs_iread.
10731da177e4SLinus Torvalds 	 */
10741da177e4SLinus Torvalds 	ip->i_d.di_extsize = 0;
10751da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
10761da177e4SLinus Torvalds 	ip->i_d.di_dmstate = 0;
10771da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
10781da177e4SLinus Torvalds 	flags = XFS_ILOG_CORE;
10791da177e4SLinus Torvalds 	switch (mode & S_IFMT) {
10801da177e4SLinus Torvalds 	case S_IFIFO:
10811da177e4SLinus Torvalds 	case S_IFCHR:
10821da177e4SLinus Torvalds 	case S_IFBLK:
10831da177e4SLinus Torvalds 	case S_IFSOCK:
10841da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
10851da177e4SLinus Torvalds 		ip->i_df.if_u2.if_rdev = rdev;
10861da177e4SLinus Torvalds 		ip->i_df.if_flags = 0;
10871da177e4SLinus Torvalds 		flags |= XFS_ILOG_DEV;
10881da177e4SLinus Torvalds 		break;
10891da177e4SLinus Torvalds 	case S_IFREG:
1090bf904248SDavid Chinner 		/*
1091bf904248SDavid Chinner 		 * we can't set up filestreams until after the VFS inode
1092bf904248SDavid Chinner 		 * is set up properly.
1093bf904248SDavid Chinner 		 */
1094bf904248SDavid Chinner 		if (pip && xfs_inode_is_filestream(pip))
1095bf904248SDavid Chinner 			filestreams = 1;
10962a82b8beSDavid Chinner 		/* fall through */
10971da177e4SLinus Torvalds 	case S_IFDIR:
1098b11f94d5SDavid Chinner 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1099365ca83dSNathan Scott 			uint	di_flags = 0;
1100365ca83dSNathan Scott 
11011da177e4SLinus Torvalds 			if ((mode & S_IFMT) == S_IFDIR) {
1102365ca83dSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1103365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_RTINHERIT;
1104dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1105dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1106dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1107dd9f438eSNathan Scott 				}
1108dd9f438eSNathan Scott 			} else if ((mode & S_IFMT) == S_IFREG) {
1109613d7043SChristoph Hellwig 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1110365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_REALTIME;
1111dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1112dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSIZE;
1113dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1114dd9f438eSNathan Scott 				}
11151da177e4SLinus Torvalds 			}
11161da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
11171da177e4SLinus Torvalds 			    xfs_inherit_noatime)
1118365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOATIME;
11191da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
11201da177e4SLinus Torvalds 			    xfs_inherit_nodump)
1121365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NODUMP;
11221da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
11231da177e4SLinus Torvalds 			    xfs_inherit_sync)
1124365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_SYNC;
11251da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
11261da177e4SLinus Torvalds 			    xfs_inherit_nosymlinks)
1127365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
1128365ca83dSNathan Scott 			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1129365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_PROJINHERIT;
1130d3446eacSBarry Naujok 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1131d3446eacSBarry Naujok 			    xfs_inherit_nodefrag)
1132d3446eacSBarry Naujok 				di_flags |= XFS_DIFLAG_NODEFRAG;
11332a82b8beSDavid Chinner 			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
11342a82b8beSDavid Chinner 				di_flags |= XFS_DIFLAG_FILESTREAM;
1135365ca83dSNathan Scott 			ip->i_d.di_flags |= di_flags;
11361da177e4SLinus Torvalds 		}
11371da177e4SLinus Torvalds 		/* FALLTHROUGH */
11381da177e4SLinus Torvalds 	case S_IFLNK:
11391da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
11401da177e4SLinus Torvalds 		ip->i_df.if_flags = XFS_IFEXTENTS;
11411da177e4SLinus Torvalds 		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
11421da177e4SLinus Torvalds 		ip->i_df.if_u1.if_extents = NULL;
11431da177e4SLinus Torvalds 		break;
11441da177e4SLinus Torvalds 	default:
11451da177e4SLinus Torvalds 		ASSERT(0);
11461da177e4SLinus Torvalds 	}
11471da177e4SLinus Torvalds 	/*
11481da177e4SLinus Torvalds 	 * Attribute fork settings for new inode.
11491da177e4SLinus Torvalds 	 */
11501da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
11511da177e4SLinus Torvalds 	ip->i_d.di_anextents = 0;
11521da177e4SLinus Torvalds 
11531da177e4SLinus Torvalds 	/*
11541da177e4SLinus Torvalds 	 * Log the new values stuffed into the inode.
11551da177e4SLinus Torvalds 	 */
1156ec3ba85fSChristoph Hellwig 	xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
11571da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, flags);
11581da177e4SLinus Torvalds 
1159b83bd138SNathan Scott 	/* now that we have an i_mode we can setup inode ops and unlock */
116041be8bedSChristoph Hellwig 	xfs_setup_inode(ip);
11611da177e4SLinus Torvalds 
1162bf904248SDavid Chinner 	/* now we have set up the vfs inode we can associate the filestream */
1163bf904248SDavid Chinner 	if (filestreams) {
1164bf904248SDavid Chinner 		error = xfs_filestream_associate(pip, ip);
1165bf904248SDavid Chinner 		if (error < 0)
1166bf904248SDavid Chinner 			return -error;
1167bf904248SDavid Chinner 		if (!error)
1168bf904248SDavid Chinner 			xfs_iflags_set(ip, XFS_IFILESTREAM);
1169bf904248SDavid Chinner 	}
1170bf904248SDavid Chinner 
11711da177e4SLinus Torvalds 	*ipp = ip;
11721da177e4SLinus Torvalds 	return 0;
11731da177e4SLinus Torvalds }
11741da177e4SLinus Torvalds 
11751da177e4SLinus Torvalds /*
11761da177e4SLinus Torvalds  * Check to make sure that there are no blocks allocated to the
11771da177e4SLinus Torvalds  * file beyond the size of the file.  We don't check this for
11781da177e4SLinus Torvalds  * files with fixed size extents or real time extents, but we
11791da177e4SLinus Torvalds  * at least do it for regular files.
11801da177e4SLinus Torvalds  */
11811da177e4SLinus Torvalds #ifdef DEBUG
11828f04c47aSChristoph Hellwig STATIC void
11831da177e4SLinus Torvalds xfs_isize_check(
11848f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
11851da177e4SLinus Torvalds 	xfs_fsize_t		isize)
11861da177e4SLinus Torvalds {
11878f04c47aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
11881da177e4SLinus Torvalds 	xfs_fileoff_t		map_first;
11891da177e4SLinus Torvalds 	int			nimaps;
11901da177e4SLinus Torvalds 	xfs_bmbt_irec_t		imaps[2];
11911da177e4SLinus Torvalds 
11921da177e4SLinus Torvalds 	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
11931da177e4SLinus Torvalds 		return;
11941da177e4SLinus Torvalds 
119571ddabb9SEric Sandeen 	if (XFS_IS_REALTIME_INODE(ip))
119671ddabb9SEric Sandeen 		return;
119771ddabb9SEric Sandeen 
119871ddabb9SEric Sandeen 	if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
11991da177e4SLinus Torvalds 		return;
12001da177e4SLinus Torvalds 
12011da177e4SLinus Torvalds 	nimaps = 2;
12021da177e4SLinus Torvalds 	map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
12031da177e4SLinus Torvalds 	/*
12041da177e4SLinus Torvalds 	 * The filesystem could be shutting down, so bmapi may return
12051da177e4SLinus Torvalds 	 * an error.
12061da177e4SLinus Torvalds 	 */
12071da177e4SLinus Torvalds 	if (xfs_bmapi(NULL, ip, map_first,
12081da177e4SLinus Torvalds 			 (XFS_B_TO_FSB(mp,
12091da177e4SLinus Torvalds 				       (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
12101da177e4SLinus Torvalds 			  map_first),
12111da177e4SLinus Torvalds 			 XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
1212b4e9181eSChristoph Hellwig 			 NULL))
12131da177e4SLinus Torvalds 	    return;
12141da177e4SLinus Torvalds 	ASSERT(nimaps == 1);
12151da177e4SLinus Torvalds 	ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
12161da177e4SLinus Torvalds }
12178f04c47aSChristoph Hellwig #else	/* DEBUG */
12188f04c47aSChristoph Hellwig #define xfs_isize_check(ip, isize)
12191da177e4SLinus Torvalds #endif	/* DEBUG */
12201da177e4SLinus Torvalds 
12211da177e4SLinus Torvalds /*
12228f04c47aSChristoph Hellwig  * Free up the underlying blocks past new_size.  The new size must be smaller
12238f04c47aSChristoph Hellwig  * than the current size.  This routine can be used both for the attribute and
12248f04c47aSChristoph Hellwig  * data fork, and does not modify the inode size, which is left to the caller.
12251da177e4SLinus Torvalds  *
1226f6485057SDavid Chinner  * The transaction passed to this routine must have made a permanent log
1227f6485057SDavid Chinner  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
1228f6485057SDavid Chinner  * given transaction and start new ones, so make sure everything involved in
1229f6485057SDavid Chinner  * the transaction is tidy before calling here.  Some transaction will be
1230f6485057SDavid Chinner  * returned to the caller to be committed.  The incoming transaction must
1231f6485057SDavid Chinner  * already include the inode, and both inode locks must be held exclusively.
1232f6485057SDavid Chinner  * The inode must also be "held" within the transaction.  On return the inode
1233f6485057SDavid Chinner  * will be "held" within the returned transaction.  This routine does NOT
1234f6485057SDavid Chinner  * require any disk space to be reserved for it within the transaction.
12351da177e4SLinus Torvalds  *
1236f6485057SDavid Chinner  * If we get an error, we must return with the inode locked and linked into the
1237f6485057SDavid Chinner  * current transaction. This keeps things simple for the higher level code,
1238f6485057SDavid Chinner  * because it always knows that the inode is locked and held in the transaction
1239f6485057SDavid Chinner  * that returns to it whether errors occur or not.  We don't mark the inode
1240f6485057SDavid Chinner  * dirty on error so that transactions can be easily aborted if possible.
12411da177e4SLinus Torvalds  */
12421da177e4SLinus Torvalds int
12438f04c47aSChristoph Hellwig xfs_itruncate_extents(
12448f04c47aSChristoph Hellwig 	struct xfs_trans	**tpp,
12458f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
12468f04c47aSChristoph Hellwig 	int			whichfork,
12478f04c47aSChristoph Hellwig 	xfs_fsize_t		new_size)
12481da177e4SLinus Torvalds {
12498f04c47aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
12508f04c47aSChristoph Hellwig 	struct xfs_trans	*tp = *tpp;
12518f04c47aSChristoph Hellwig 	struct xfs_trans	*ntp;
12528f04c47aSChristoph Hellwig 	xfs_bmap_free_t		free_list;
12531da177e4SLinus Torvalds 	xfs_fsblock_t		first_block;
12541da177e4SLinus Torvalds 	xfs_fileoff_t		first_unmap_block;
12551da177e4SLinus Torvalds 	xfs_fileoff_t		last_block;
12568f04c47aSChristoph Hellwig 	xfs_filblks_t		unmap_len;
12571da177e4SLinus Torvalds 	int			committed;
12588f04c47aSChristoph Hellwig 	int			error = 0;
12598f04c47aSChristoph Hellwig 	int			done = 0;
12601da177e4SLinus Torvalds 
1261579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
12628f04c47aSChristoph Hellwig 	ASSERT(new_size <= ip->i_size);
12638f04c47aSChristoph Hellwig 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
12641da177e4SLinus Torvalds 	ASSERT(ip->i_itemp != NULL);
1265898621d5SChristoph Hellwig 	ASSERT(ip->i_itemp->ili_lock_flags == 0);
12661da177e4SLinus Torvalds 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
12671da177e4SLinus Torvalds 
12681da177e4SLinus Torvalds 	/*
12691da177e4SLinus Torvalds 	 * Since it is possible for space to become allocated beyond
12701da177e4SLinus Torvalds 	 * the end of the file (in a crash where the space is allocated
12711da177e4SLinus Torvalds 	 * but the inode size is not yet updated), simply remove any
12721da177e4SLinus Torvalds 	 * blocks which show up between the new EOF and the maximum
12731da177e4SLinus Torvalds 	 * possible file size.  If the first block to be removed is
12741da177e4SLinus Torvalds 	 * beyond the maximum file size (ie it is the same as last_block),
12751da177e4SLinus Torvalds 	 * then there is nothing to do.
12761da177e4SLinus Torvalds 	 */
12778f04c47aSChristoph Hellwig 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
12781da177e4SLinus Torvalds 	last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
12798f04c47aSChristoph Hellwig 	if (first_unmap_block == last_block)
12808f04c47aSChristoph Hellwig 		return 0;
12818f04c47aSChristoph Hellwig 
12828f04c47aSChristoph Hellwig 	ASSERT(first_unmap_block < last_block);
12831da177e4SLinus Torvalds 	unmap_len = last_block - first_unmap_block + 1;
12841da177e4SLinus Torvalds 	while (!done) {
12859d87c319SEric Sandeen 		xfs_bmap_init(&free_list, &first_block);
12868f04c47aSChristoph Hellwig 		error = xfs_bunmapi(tp, ip,
12873e57ecf6SOlaf Weber 				    first_unmap_block, unmap_len,
12888f04c47aSChristoph Hellwig 				    xfs_bmapi_aflag(whichfork),
12891da177e4SLinus Torvalds 				    XFS_ITRUNC_MAX_EXTENTS,
12903e57ecf6SOlaf Weber 				    &first_block, &free_list,
1291b4e9181eSChristoph Hellwig 				    &done);
12928f04c47aSChristoph Hellwig 		if (error)
12938f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
12941da177e4SLinus Torvalds 
12951da177e4SLinus Torvalds 		/*
12961da177e4SLinus Torvalds 		 * Duplicate the transaction that has the permanent
12971da177e4SLinus Torvalds 		 * reservation and commit the old transaction.
12981da177e4SLinus Torvalds 		 */
12998f04c47aSChristoph Hellwig 		error = xfs_bmap_finish(&tp, &free_list, &committed);
1300898621d5SChristoph Hellwig 		if (committed)
13018f04c47aSChristoph Hellwig 			xfs_trans_ijoin(tp, ip);
13028f04c47aSChristoph Hellwig 		if (error)
13038f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
13041da177e4SLinus Torvalds 
13051da177e4SLinus Torvalds 		if (committed) {
13061da177e4SLinus Torvalds 			/*
1307f6485057SDavid Chinner 			 * Mark the inode dirty so it will be logged and
1308e5720eecSDavid Chinner 			 * moved forward in the log as part of every commit.
13091da177e4SLinus Torvalds 			 */
13108f04c47aSChristoph Hellwig 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
13111da177e4SLinus Torvalds 		}
1312f6485057SDavid Chinner 
13138f04c47aSChristoph Hellwig 		ntp = xfs_trans_dup(tp);
13148f04c47aSChristoph Hellwig 		error = xfs_trans_commit(tp, 0);
13158f04c47aSChristoph Hellwig 		tp = ntp;
1316f6485057SDavid Chinner 
13178f04c47aSChristoph Hellwig 		xfs_trans_ijoin(tp, ip);
1318f6485057SDavid Chinner 
1319cc09c0dcSDave Chinner 		if (error)
13208f04c47aSChristoph Hellwig 			goto out;
13218f04c47aSChristoph Hellwig 
1322cc09c0dcSDave Chinner 		/*
13238f04c47aSChristoph Hellwig 		 * Transaction commit worked ok so we can drop the extra ticket
1324cc09c0dcSDave Chinner 		 * reference that we gained in xfs_trans_dup()
1325cc09c0dcSDave Chinner 		 */
13268f04c47aSChristoph Hellwig 		xfs_log_ticket_put(tp->t_ticket);
13278f04c47aSChristoph Hellwig 		error = xfs_trans_reserve(tp, 0,
1328f6485057SDavid Chinner 					XFS_ITRUNCATE_LOG_RES(mp), 0,
13291da177e4SLinus Torvalds 					XFS_TRANS_PERM_LOG_RES,
13301da177e4SLinus Torvalds 					XFS_ITRUNCATE_LOG_COUNT);
13311da177e4SLinus Torvalds 		if (error)
13328f04c47aSChristoph Hellwig 			goto out;
13331da177e4SLinus Torvalds 	}
13348f04c47aSChristoph Hellwig 
13358f04c47aSChristoph Hellwig out:
13368f04c47aSChristoph Hellwig 	*tpp = tp;
13378f04c47aSChristoph Hellwig 	return error;
13388f04c47aSChristoph Hellwig out_bmap_cancel:
13391da177e4SLinus Torvalds 	/*
13408f04c47aSChristoph Hellwig 	 * If the bunmapi call encounters an error, return to the caller where
13418f04c47aSChristoph Hellwig 	 * the transaction can be properly aborted.  We just need to make sure
13428f04c47aSChristoph Hellwig 	 * we're not holding any resources that we were not when we came in.
13431da177e4SLinus Torvalds 	 */
13448f04c47aSChristoph Hellwig 	xfs_bmap_cancel(&free_list);
13458f04c47aSChristoph Hellwig 	goto out;
13468f04c47aSChristoph Hellwig }
13478f04c47aSChristoph Hellwig 
13488f04c47aSChristoph Hellwig int
13498f04c47aSChristoph Hellwig xfs_itruncate_data(
13508f04c47aSChristoph Hellwig 	struct xfs_trans	**tpp,
13518f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
13528f04c47aSChristoph Hellwig 	xfs_fsize_t		new_size)
13538f04c47aSChristoph Hellwig {
13548f04c47aSChristoph Hellwig 	int			error;
13558f04c47aSChristoph Hellwig 
13568f04c47aSChristoph Hellwig 	trace_xfs_itruncate_data_start(ip, new_size);
13578f04c47aSChristoph Hellwig 
1358ba87ea69SLachlan McIlroy 	/*
13598f04c47aSChristoph Hellwig 	 * The first thing we do is set the size to new_size permanently on
13608f04c47aSChristoph Hellwig 	 * disk.  This way we don't have to worry about anyone ever being able
13618f04c47aSChristoph Hellwig 	 * to look at the data being freed even in the face of a crash.
13628f04c47aSChristoph Hellwig 	 * What we're getting around here is the case where we free a block, it
13638f04c47aSChristoph Hellwig 	 * is allocated to another file, it is written to, and then we crash.
13648f04c47aSChristoph Hellwig 	 * If the new data gets written to the file but the log buffers
13658f04c47aSChristoph Hellwig 	 * containing the free and reallocation don't, then we'd end up with
13668f04c47aSChristoph Hellwig 	 * garbage in the blocks being freed.  As long as we make the new_size
13678f04c47aSChristoph Hellwig 	 * permanent before actually freeing any blocks it doesn't matter if
13688f04c47aSChristoph Hellwig 	 * they get written to.
13698f04c47aSChristoph Hellwig 	 */
13708f04c47aSChristoph Hellwig 	if (ip->i_d.di_nextents > 0) {
13718f04c47aSChristoph Hellwig 		/*
13728f04c47aSChristoph Hellwig 		 * If we are not changing the file size then do not update
13738f04c47aSChristoph Hellwig 		 * the on-disk file size - we may be called from
13748f04c47aSChristoph Hellwig 		 * xfs_inactive_free_eofblocks().  If we update the on-disk
13758f04c47aSChristoph Hellwig 		 * file size and then the system crashes before the contents
13768f04c47aSChristoph Hellwig 		 * of the file are flushed to disk then the files may be
13778f04c47aSChristoph Hellwig 		 * full of holes (ie NULL files bug).
1378ba87ea69SLachlan McIlroy 		 */
1379ba87ea69SLachlan McIlroy 		if (ip->i_size != new_size) {
13801da177e4SLinus Torvalds 			ip->i_d.di_size = new_size;
1381ba87ea69SLachlan McIlroy 			ip->i_size = new_size;
13828f04c47aSChristoph Hellwig 			xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1383ba87ea69SLachlan McIlroy 		}
13841da177e4SLinus Torvalds 	}
13858f04c47aSChristoph Hellwig 
13868f04c47aSChristoph Hellwig 	error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
13878f04c47aSChristoph Hellwig 	if (error)
13888f04c47aSChristoph Hellwig 		return error;
13898f04c47aSChristoph Hellwig 
13908f04c47aSChristoph Hellwig 	/*
13918f04c47aSChristoph Hellwig 	 * If we are not changing the file size then do not update the on-disk
13928f04c47aSChristoph Hellwig 	 * file size - we may be called from xfs_inactive_free_eofblocks().
13938f04c47aSChristoph Hellwig 	 * If we update the on-disk file size and then the system crashes
13948f04c47aSChristoph Hellwig 	 * before the contents of the file are flushed to disk then the files
13958f04c47aSChristoph Hellwig 	 * may be full of holes (ie NULL files bug).
13968f04c47aSChristoph Hellwig 	 */
13978f04c47aSChristoph Hellwig 	xfs_isize_check(ip, new_size);
13988f04c47aSChristoph Hellwig 	if (ip->i_size != new_size) {
13998f04c47aSChristoph Hellwig 		ip->i_d.di_size = new_size;
14008f04c47aSChristoph Hellwig 		ip->i_size = new_size;
14018f04c47aSChristoph Hellwig 	}
14028f04c47aSChristoph Hellwig 
14038f04c47aSChristoph Hellwig 	ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
14048f04c47aSChristoph Hellwig 	ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
14058f04c47aSChristoph Hellwig 
14068f04c47aSChristoph Hellwig 	/*
14078f04c47aSChristoph Hellwig 	 * Always re-log the inode so that our permanent transaction can keep
14088f04c47aSChristoph Hellwig 	 * on rolling it forward in the log.
14098f04c47aSChristoph Hellwig 	 */
14108f04c47aSChristoph Hellwig 	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
14118f04c47aSChristoph Hellwig 
14128f04c47aSChristoph Hellwig 	trace_xfs_itruncate_data_end(ip, new_size);
14131da177e4SLinus Torvalds 	return 0;
14141da177e4SLinus Torvalds }
14151da177e4SLinus Torvalds 
14161da177e4SLinus Torvalds /*
14171da177e4SLinus Torvalds  * This is called when the inode's link count goes to 0.
14181da177e4SLinus Torvalds  * We place the on-disk inode on a list in the AGI.  It
14191da177e4SLinus Torvalds  * will be pulled from this list when the inode is freed.
14201da177e4SLinus Torvalds  */
14211da177e4SLinus Torvalds int
14221da177e4SLinus Torvalds xfs_iunlink(
14231da177e4SLinus Torvalds 	xfs_trans_t	*tp,
14241da177e4SLinus Torvalds 	xfs_inode_t	*ip)
14251da177e4SLinus Torvalds {
14261da177e4SLinus Torvalds 	xfs_mount_t	*mp;
14271da177e4SLinus Torvalds 	xfs_agi_t	*agi;
14281da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
14291da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
14301da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
14311da177e4SLinus Torvalds 	xfs_agino_t	agino;
14321da177e4SLinus Torvalds 	short		bucket_index;
14331da177e4SLinus Torvalds 	int		offset;
14341da177e4SLinus Torvalds 	int		error;
14351da177e4SLinus Torvalds 
14361da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
14371da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_mode != 0);
14381da177e4SLinus Torvalds 
14391da177e4SLinus Torvalds 	mp = tp->t_mountp;
14401da177e4SLinus Torvalds 
14411da177e4SLinus Torvalds 	/*
14421da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
14431da177e4SLinus Torvalds 	 * on the list.
14441da177e4SLinus Torvalds 	 */
14455e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1446859d7182SVlad Apostolov 	if (error)
14471da177e4SLinus Torvalds 		return error;
14481da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
14495e1be0fbSChristoph Hellwig 
14501da177e4SLinus Torvalds 	/*
14511da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
14521da177e4SLinus Torvalds 	 * list this inode will go on.
14531da177e4SLinus Torvalds 	 */
14541da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
14551da177e4SLinus Torvalds 	ASSERT(agino != 0);
14561da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
14571da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
145816259e7dSChristoph Hellwig 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
14591da177e4SLinus Torvalds 
1460*69ef921bSChristoph Hellwig 	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
14611da177e4SLinus Torvalds 		/*
14621da177e4SLinus Torvalds 		 * There is already another inode in the bucket we need
14631da177e4SLinus Torvalds 		 * to add ourselves to.  Add us at the front of the list.
14641da177e4SLinus Torvalds 		 * Here we put the head pointer into our next pointer,
14651da177e4SLinus Torvalds 		 * and then we fall through to point the head at us.
14661da177e4SLinus Torvalds 		 */
14670cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1468c319b58bSVlad Apostolov 		if (error)
1469c319b58bSVlad Apostolov 			return error;
1470c319b58bSVlad Apostolov 
1471*69ef921bSChristoph Hellwig 		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
14721da177e4SLinus Torvalds 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
147392bfc6e7SChristoph Hellwig 		offset = ip->i_imap.im_boffset +
14741da177e4SLinus Torvalds 			offsetof(xfs_dinode_t, di_next_unlinked);
14751da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, ibp);
14761da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, ibp, offset,
14771da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
14781da177e4SLinus Torvalds 		xfs_inobp_check(mp, ibp);
14791da177e4SLinus Torvalds 	}
14801da177e4SLinus Torvalds 
14811da177e4SLinus Torvalds 	/*
14821da177e4SLinus Torvalds 	 * Point the bucket head pointer at the inode being inserted.
14831da177e4SLinus Torvalds 	 */
14841da177e4SLinus Torvalds 	ASSERT(agino != 0);
148516259e7dSChristoph Hellwig 	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
14861da177e4SLinus Torvalds 	offset = offsetof(xfs_agi_t, agi_unlinked) +
14871da177e4SLinus Torvalds 		(sizeof(xfs_agino_t) * bucket_index);
14881da177e4SLinus Torvalds 	xfs_trans_log_buf(tp, agibp, offset,
14891da177e4SLinus Torvalds 			  (offset + sizeof(xfs_agino_t) - 1));
14901da177e4SLinus Torvalds 	return 0;
14911da177e4SLinus Torvalds }
14921da177e4SLinus Torvalds 
14931da177e4SLinus Torvalds /*
14941da177e4SLinus Torvalds  * Pull the on-disk inode from the AGI unlinked list.
14951da177e4SLinus Torvalds  */
14961da177e4SLinus Torvalds STATIC int
14971da177e4SLinus Torvalds xfs_iunlink_remove(
14981da177e4SLinus Torvalds 	xfs_trans_t	*tp,
14991da177e4SLinus Torvalds 	xfs_inode_t	*ip)
15001da177e4SLinus Torvalds {
15011da177e4SLinus Torvalds 	xfs_ino_t	next_ino;
15021da177e4SLinus Torvalds 	xfs_mount_t	*mp;
15031da177e4SLinus Torvalds 	xfs_agi_t	*agi;
15041da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
15051da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
15061da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
15071da177e4SLinus Torvalds 	xfs_agnumber_t	agno;
15081da177e4SLinus Torvalds 	xfs_agino_t	agino;
15091da177e4SLinus Torvalds 	xfs_agino_t	next_agino;
15101da177e4SLinus Torvalds 	xfs_buf_t	*last_ibp;
15116fdf8cccSNathan Scott 	xfs_dinode_t	*last_dip = NULL;
15121da177e4SLinus Torvalds 	short		bucket_index;
15136fdf8cccSNathan Scott 	int		offset, last_offset = 0;
15141da177e4SLinus Torvalds 	int		error;
15151da177e4SLinus Torvalds 
15161da177e4SLinus Torvalds 	mp = tp->t_mountp;
15171da177e4SLinus Torvalds 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
15181da177e4SLinus Torvalds 
15191da177e4SLinus Torvalds 	/*
15201da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
15211da177e4SLinus Torvalds 	 * on the list.
15221da177e4SLinus Torvalds 	 */
15235e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, agno, &agibp);
15245e1be0fbSChristoph Hellwig 	if (error)
15251da177e4SLinus Torvalds 		return error;
15265e1be0fbSChristoph Hellwig 
15271da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
15285e1be0fbSChristoph Hellwig 
15291da177e4SLinus Torvalds 	/*
15301da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
15311da177e4SLinus Torvalds 	 * list this inode will go on.
15321da177e4SLinus Torvalds 	 */
15331da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
15341da177e4SLinus Torvalds 	ASSERT(agino != 0);
15351da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
1536*69ef921bSChristoph Hellwig 	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
15371da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
15381da177e4SLinus Torvalds 
153916259e7dSChristoph Hellwig 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
15401da177e4SLinus Torvalds 		/*
15411da177e4SLinus Torvalds 		 * We're at the head of the list.  Get the inode's
15421da177e4SLinus Torvalds 		 * on-disk buffer to see if there is anyone after us
15431da177e4SLinus Torvalds 		 * on the list.  Only modify our next pointer if it
15441da177e4SLinus Torvalds 		 * is not already NULLAGINO.  This saves us the overhead
15451da177e4SLinus Torvalds 		 * of dealing with the buffer when there is no need to
15461da177e4SLinus Torvalds 		 * change it.
15471da177e4SLinus Torvalds 		 */
15480cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
15491da177e4SLinus Torvalds 		if (error) {
15500b932cccSDave Chinner 			xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
15510b932cccSDave Chinner 				__func__, error);
15521da177e4SLinus Torvalds 			return error;
15531da177e4SLinus Torvalds 		}
1554347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
15551da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
15561da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1557347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
155892bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
15591da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
15601da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
15611da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
15621da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
15631da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
15641da177e4SLinus Torvalds 		} else {
15651da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
15661da177e4SLinus Torvalds 		}
15671da177e4SLinus Torvalds 		/*
15681da177e4SLinus Torvalds 		 * Point the bucket head pointer at the next inode.
15691da177e4SLinus Torvalds 		 */
15701da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
15711da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
157216259e7dSChristoph Hellwig 		agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
15731da177e4SLinus Torvalds 		offset = offsetof(xfs_agi_t, agi_unlinked) +
15741da177e4SLinus Torvalds 			(sizeof(xfs_agino_t) * bucket_index);
15751da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, agibp, offset,
15761da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
15771da177e4SLinus Torvalds 	} else {
15781da177e4SLinus Torvalds 		/*
15791da177e4SLinus Torvalds 		 * We need to search the list for the inode being freed.
15801da177e4SLinus Torvalds 		 */
158116259e7dSChristoph Hellwig 		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
15821da177e4SLinus Torvalds 		last_ibp = NULL;
15831da177e4SLinus Torvalds 		while (next_agino != agino) {
15841da177e4SLinus Torvalds 			/*
15851da177e4SLinus Torvalds 			 * If the last inode wasn't the one pointing to
15861da177e4SLinus Torvalds 			 * us, then release its buffer since we're not
15871da177e4SLinus Torvalds 			 * going to do anything with it.
15881da177e4SLinus Torvalds 			 */
15891da177e4SLinus Torvalds 			if (last_ibp != NULL) {
15901da177e4SLinus Torvalds 				xfs_trans_brelse(tp, last_ibp);
15911da177e4SLinus Torvalds 			}
15921da177e4SLinus Torvalds 			next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
15931da177e4SLinus Torvalds 			error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1594c679eef0SChristoph Hellwig 					    &last_ibp, &last_offset, 0);
15951da177e4SLinus Torvalds 			if (error) {
15960b932cccSDave Chinner 				xfs_warn(mp,
15970b932cccSDave Chinner 					"%s: xfs_inotobp() returned error %d.",
15980b932cccSDave Chinner 					__func__, error);
15991da177e4SLinus Torvalds 				return error;
16001da177e4SLinus Torvalds 			}
1601347d1c01SChristoph Hellwig 			next_agino = be32_to_cpu(last_dip->di_next_unlinked);
16021da177e4SLinus Torvalds 			ASSERT(next_agino != NULLAGINO);
16031da177e4SLinus Torvalds 			ASSERT(next_agino != 0);
16041da177e4SLinus Torvalds 		}
16051da177e4SLinus Torvalds 		/*
16061da177e4SLinus Torvalds 		 * Now last_ibp points to the buffer previous to us on
16071da177e4SLinus Torvalds 		 * the unlinked list.  Pull us from the list.
16081da177e4SLinus Torvalds 		 */
16090cadda1cSChristoph Hellwig 		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
16101da177e4SLinus Torvalds 		if (error) {
16110b932cccSDave Chinner 			xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
16120b932cccSDave Chinner 				__func__, error);
16131da177e4SLinus Torvalds 			return error;
16141da177e4SLinus Torvalds 		}
1615347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
16161da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
16171da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
16181da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1619347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
162092bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
16211da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
16221da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
16231da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
16241da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
16251da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
16261da177e4SLinus Torvalds 		} else {
16271da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
16281da177e4SLinus Torvalds 		}
16291da177e4SLinus Torvalds 		/*
16301da177e4SLinus Torvalds 		 * Point the previous inode on the list to the next inode.
16311da177e4SLinus Torvalds 		 */
1632347d1c01SChristoph Hellwig 		last_dip->di_next_unlinked = cpu_to_be32(next_agino);
16331da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
16341da177e4SLinus Torvalds 		offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
16351da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, last_ibp);
16361da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, last_ibp, offset,
16371da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
16381da177e4SLinus Torvalds 		xfs_inobp_check(mp, last_ibp);
16391da177e4SLinus Torvalds 	}
16401da177e4SLinus Torvalds 	return 0;
16411da177e4SLinus Torvalds }
16421da177e4SLinus Torvalds 
16435b3eed75SDave Chinner /*
16445b3eed75SDave Chinner  * A big issue when freeing the inode cluster is is that we _cannot_ skip any
16455b3eed75SDave Chinner  * inodes that are in memory - they all must be marked stale and attached to
16465b3eed75SDave Chinner  * the cluster buffer.
16475b3eed75SDave Chinner  */
1648ba0f32d4SChristoph Hellwig STATIC void
16491da177e4SLinus Torvalds xfs_ifree_cluster(
16501da177e4SLinus Torvalds 	xfs_inode_t	*free_ip,
16511da177e4SLinus Torvalds 	xfs_trans_t	*tp,
16521da177e4SLinus Torvalds 	xfs_ino_t	inum)
16531da177e4SLinus Torvalds {
16541da177e4SLinus Torvalds 	xfs_mount_t		*mp = free_ip->i_mount;
16551da177e4SLinus Torvalds 	int			blks_per_cluster;
16561da177e4SLinus Torvalds 	int			nbufs;
16571da177e4SLinus Torvalds 	int			ninodes;
16585b257b4aSDave Chinner 	int			i, j;
16591da177e4SLinus Torvalds 	xfs_daddr_t		blkno;
16601da177e4SLinus Torvalds 	xfs_buf_t		*bp;
16615b257b4aSDave Chinner 	xfs_inode_t		*ip;
16621da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
16631da177e4SLinus Torvalds 	xfs_log_item_t		*lip;
16645017e97dSDave Chinner 	struct xfs_perag	*pag;
16651da177e4SLinus Torvalds 
16665017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
16671da177e4SLinus Torvalds 	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
16681da177e4SLinus Torvalds 		blks_per_cluster = 1;
16691da177e4SLinus Torvalds 		ninodes = mp->m_sb.sb_inopblock;
16701da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp);
16711da177e4SLinus Torvalds 	} else {
16721da177e4SLinus Torvalds 		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
16731da177e4SLinus Torvalds 					mp->m_sb.sb_blocksize;
16741da177e4SLinus Torvalds 		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
16751da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
16761da177e4SLinus Torvalds 	}
16771da177e4SLinus Torvalds 
16781da177e4SLinus Torvalds 	for (j = 0; j < nbufs; j++, inum += ninodes) {
16791da177e4SLinus Torvalds 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
16801da177e4SLinus Torvalds 					 XFS_INO_TO_AGBNO(mp, inum));
16811da177e4SLinus Torvalds 
16821da177e4SLinus Torvalds 		/*
16835b257b4aSDave Chinner 		 * We obtain and lock the backing buffer first in the process
16845b257b4aSDave Chinner 		 * here, as we have to ensure that any dirty inode that we
16855b257b4aSDave Chinner 		 * can't get the flush lock on is attached to the buffer.
16865b257b4aSDave Chinner 		 * If we scan the in-memory inodes first, then buffer IO can
16875b257b4aSDave Chinner 		 * complete before we get a lock on it, and hence we may fail
16885b257b4aSDave Chinner 		 * to mark all the active inodes on the buffer stale.
16891da177e4SLinus Torvalds 		 */
16901da177e4SLinus Torvalds 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
16911da177e4SLinus Torvalds 					mp->m_bsize * blks_per_cluster,
16920cadda1cSChristoph Hellwig 					XBF_LOCK);
16931da177e4SLinus Torvalds 
16945b257b4aSDave Chinner 		/*
16955b257b4aSDave Chinner 		 * Walk the inodes already attached to the buffer and mark them
16965b257b4aSDave Chinner 		 * stale. These will all have the flush locks held, so an
16975b3eed75SDave Chinner 		 * in-memory inode walk can't lock them. By marking them all
16985b3eed75SDave Chinner 		 * stale first, we will not attempt to lock them in the loop
16995b3eed75SDave Chinner 		 * below as the XFS_ISTALE flag will be set.
17005b257b4aSDave Chinner 		 */
17011da177e4SLinus Torvalds 		lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
17021da177e4SLinus Torvalds 		while (lip) {
17031da177e4SLinus Torvalds 			if (lip->li_type == XFS_LI_INODE) {
17041da177e4SLinus Torvalds 				iip = (xfs_inode_log_item_t *)lip;
17051da177e4SLinus Torvalds 				ASSERT(iip->ili_logged == 1);
1706ca30b2a7SChristoph Hellwig 				lip->li_cb = xfs_istale_done;
17077b2e2a31SDavid Chinner 				xfs_trans_ail_copy_lsn(mp->m_ail,
17087b2e2a31SDavid Chinner 							&iip->ili_flush_lsn,
17097b2e2a31SDavid Chinner 							&iip->ili_item.li_lsn);
1710e5ffd2bbSDavid Chinner 				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
17111da177e4SLinus Torvalds 			}
17121da177e4SLinus Torvalds 			lip = lip->li_bio_list;
17131da177e4SLinus Torvalds 		}
17141da177e4SLinus Torvalds 
17155b3eed75SDave Chinner 
17165b257b4aSDave Chinner 		/*
17175b257b4aSDave Chinner 		 * For each inode in memory attempt to add it to the inode
17185b257b4aSDave Chinner 		 * buffer and set it up for being staled on buffer IO
17195b257b4aSDave Chinner 		 * completion.  This is safe as we've locked out tail pushing
17205b257b4aSDave Chinner 		 * and flushing by locking the buffer.
17215b257b4aSDave Chinner 		 *
17225b257b4aSDave Chinner 		 * We have already marked every inode that was part of a
17235b257b4aSDave Chinner 		 * transaction stale above, which means there is no point in
17245b257b4aSDave Chinner 		 * even trying to lock them.
17255b257b4aSDave Chinner 		 */
17265b257b4aSDave Chinner 		for (i = 0; i < ninodes; i++) {
17275b3eed75SDave Chinner retry:
17281a3e8f3dSDave Chinner 			rcu_read_lock();
17295b257b4aSDave Chinner 			ip = radix_tree_lookup(&pag->pag_ici_root,
17305b257b4aSDave Chinner 					XFS_INO_TO_AGINO(mp, (inum + i)));
17311da177e4SLinus Torvalds 
17321a3e8f3dSDave Chinner 			/* Inode not in memory, nothing to do */
17331a3e8f3dSDave Chinner 			if (!ip) {
17341a3e8f3dSDave Chinner 				rcu_read_unlock();
17355b257b4aSDave Chinner 				continue;
17365b257b4aSDave Chinner 			}
17375b257b4aSDave Chinner 
17385b3eed75SDave Chinner 			/*
17391a3e8f3dSDave Chinner 			 * because this is an RCU protected lookup, we could
17401a3e8f3dSDave Chinner 			 * find a recently freed or even reallocated inode
17411a3e8f3dSDave Chinner 			 * during the lookup. We need to check under the
17421a3e8f3dSDave Chinner 			 * i_flags_lock for a valid inode here. Skip it if it
17431a3e8f3dSDave Chinner 			 * is not valid, the wrong inode or stale.
17441a3e8f3dSDave Chinner 			 */
17451a3e8f3dSDave Chinner 			spin_lock(&ip->i_flags_lock);
17461a3e8f3dSDave Chinner 			if (ip->i_ino != inum + i ||
17471a3e8f3dSDave Chinner 			    __xfs_iflags_test(ip, XFS_ISTALE)) {
17481a3e8f3dSDave Chinner 				spin_unlock(&ip->i_flags_lock);
17491a3e8f3dSDave Chinner 				rcu_read_unlock();
17501a3e8f3dSDave Chinner 				continue;
17511a3e8f3dSDave Chinner 			}
17521a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
17531a3e8f3dSDave Chinner 
17541a3e8f3dSDave Chinner 			/*
17555b3eed75SDave Chinner 			 * Don't try to lock/unlock the current inode, but we
17565b3eed75SDave Chinner 			 * _cannot_ skip the other inodes that we did not find
17575b3eed75SDave Chinner 			 * in the list attached to the buffer and are not
17585b3eed75SDave Chinner 			 * already marked stale. If we can't lock it, back off
17595b3eed75SDave Chinner 			 * and retry.
17605b3eed75SDave Chinner 			 */
17615b257b4aSDave Chinner 			if (ip != free_ip &&
17625b257b4aSDave Chinner 			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
17631a3e8f3dSDave Chinner 				rcu_read_unlock();
17645b3eed75SDave Chinner 				delay(1);
17655b3eed75SDave Chinner 				goto retry;
17665b257b4aSDave Chinner 			}
17671a3e8f3dSDave Chinner 			rcu_read_unlock();
17685b257b4aSDave Chinner 
17695b3eed75SDave Chinner 			xfs_iflock(ip);
17705b257b4aSDave Chinner 			xfs_iflags_set(ip, XFS_ISTALE);
17715b257b4aSDave Chinner 
17725b3eed75SDave Chinner 			/*
17735b3eed75SDave Chinner 			 * we don't need to attach clean inodes or those only
17745b3eed75SDave Chinner 			 * with unlogged changes (which we throw away, anyway).
17755b3eed75SDave Chinner 			 */
17765b257b4aSDave Chinner 			iip = ip->i_itemp;
17775b3eed75SDave Chinner 			if (!iip || xfs_inode_clean(ip)) {
17785b257b4aSDave Chinner 				ASSERT(ip != free_ip);
17791da177e4SLinus Torvalds 				ip->i_update_core = 0;
17801da177e4SLinus Torvalds 				xfs_ifunlock(ip);
17811da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
17821da177e4SLinus Torvalds 				continue;
17831da177e4SLinus Torvalds 			}
17841da177e4SLinus Torvalds 
17851da177e4SLinus Torvalds 			iip->ili_last_fields = iip->ili_format.ilf_fields;
17861da177e4SLinus Torvalds 			iip->ili_format.ilf_fields = 0;
17871da177e4SLinus Torvalds 			iip->ili_logged = 1;
17887b2e2a31SDavid Chinner 			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
17897b2e2a31SDavid Chinner 						&iip->ili_item.li_lsn);
17901da177e4SLinus Torvalds 
1791ca30b2a7SChristoph Hellwig 			xfs_buf_attach_iodone(bp, xfs_istale_done,
1792ca30b2a7SChristoph Hellwig 						  &iip->ili_item);
17935b257b4aSDave Chinner 
17945b257b4aSDave Chinner 			if (ip != free_ip)
17951da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
17961da177e4SLinus Torvalds 		}
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 		xfs_trans_stale_inode_buf(tp, bp);
17991da177e4SLinus Torvalds 		xfs_trans_binval(tp, bp);
18001da177e4SLinus Torvalds 	}
18011da177e4SLinus Torvalds 
18025017e97dSDave Chinner 	xfs_perag_put(pag);
18031da177e4SLinus Torvalds }
18041da177e4SLinus Torvalds 
18051da177e4SLinus Torvalds /*
18061da177e4SLinus Torvalds  * This is called to return an inode to the inode free list.
18071da177e4SLinus Torvalds  * The inode should already be truncated to 0 length and have
18081da177e4SLinus Torvalds  * no pages associated with it.  This routine also assumes that
18091da177e4SLinus Torvalds  * the inode is already a part of the transaction.
18101da177e4SLinus Torvalds  *
18111da177e4SLinus Torvalds  * The on-disk copy of the inode will have been added to the list
18121da177e4SLinus Torvalds  * of unlinked inodes in the AGI. We need to remove the inode from
18131da177e4SLinus Torvalds  * that list atomically with respect to freeing it here.
18141da177e4SLinus Torvalds  */
18151da177e4SLinus Torvalds int
18161da177e4SLinus Torvalds xfs_ifree(
18171da177e4SLinus Torvalds 	xfs_trans_t	*tp,
18181da177e4SLinus Torvalds 	xfs_inode_t	*ip,
18191da177e4SLinus Torvalds 	xfs_bmap_free_t	*flist)
18201da177e4SLinus Torvalds {
18211da177e4SLinus Torvalds 	int			error;
18221da177e4SLinus Torvalds 	int			delete;
18231da177e4SLinus Torvalds 	xfs_ino_t		first_ino;
1824c319b58bSVlad Apostolov 	xfs_dinode_t    	*dip;
1825c319b58bSVlad Apostolov 	xfs_buf_t       	*ibp;
18261da177e4SLinus Torvalds 
1827579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
18281da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
18291da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nextents == 0);
18301da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_anextents == 0);
1831ba87ea69SLachlan McIlroy 	ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
18321da177e4SLinus Torvalds 	       ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
18331da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
18341da177e4SLinus Torvalds 
18351da177e4SLinus Torvalds 	/*
18361da177e4SLinus Torvalds 	 * Pull the on-disk inode from the AGI unlinked list.
18371da177e4SLinus Torvalds 	 */
18381da177e4SLinus Torvalds 	error = xfs_iunlink_remove(tp, ip);
18391da177e4SLinus Torvalds 	if (error != 0) {
18401da177e4SLinus Torvalds 		return error;
18411da177e4SLinus Torvalds 	}
18421da177e4SLinus Torvalds 
18431da177e4SLinus Torvalds 	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
18441da177e4SLinus Torvalds 	if (error != 0) {
18451da177e4SLinus Torvalds 		return error;
18461da177e4SLinus Torvalds 	}
18471da177e4SLinus Torvalds 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
18481da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
18491da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
18501da177e4SLinus Torvalds 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
18511da177e4SLinus Torvalds 	ip->i_df.if_ext_max =
18521da177e4SLinus Torvalds 		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
18531da177e4SLinus Torvalds 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
18541da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
18551da177e4SLinus Torvalds 	/*
18561da177e4SLinus Torvalds 	 * Bump the generation count so no one will be confused
18571da177e4SLinus Torvalds 	 * by reincarnations of this inode.
18581da177e4SLinus Torvalds 	 */
18591da177e4SLinus Torvalds 	ip->i_d.di_gen++;
1860c319b58bSVlad Apostolov 
18611da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
18621da177e4SLinus Torvalds 
18630cadda1cSChristoph Hellwig 	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
1864c319b58bSVlad Apostolov 	if (error)
1865c319b58bSVlad Apostolov 		return error;
1866c319b58bSVlad Apostolov 
1867c319b58bSVlad Apostolov         /*
1868c319b58bSVlad Apostolov 	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
1869c319b58bSVlad Apostolov 	* from picking up this inode when it is reclaimed (its incore state
1870c319b58bSVlad Apostolov 	* initialzed but not flushed to disk yet). The in-core di_mode is
1871c319b58bSVlad Apostolov 	* already cleared  and a corresponding transaction logged.
1872c319b58bSVlad Apostolov 	* The hack here just synchronizes the in-core to on-disk
1873c319b58bSVlad Apostolov 	* di_mode value in advance before the actual inode sync to disk.
1874c319b58bSVlad Apostolov 	* This is OK because the inode is already unlinked and would never
1875c319b58bSVlad Apostolov 	* change its di_mode again for this inode generation.
1876c319b58bSVlad Apostolov 	* This is a temporary hack that would require a proper fix
1877c319b58bSVlad Apostolov 	* in the future.
1878c319b58bSVlad Apostolov 	*/
187981591fe2SChristoph Hellwig 	dip->di_mode = 0;
1880c319b58bSVlad Apostolov 
18811da177e4SLinus Torvalds 	if (delete) {
18821da177e4SLinus Torvalds 		xfs_ifree_cluster(ip, tp, first_ino);
18831da177e4SLinus Torvalds 	}
18841da177e4SLinus Torvalds 
18851da177e4SLinus Torvalds 	return 0;
18861da177e4SLinus Torvalds }
18871da177e4SLinus Torvalds 
18881da177e4SLinus Torvalds /*
18891da177e4SLinus Torvalds  * Reallocate the space for if_broot based on the number of records
18901da177e4SLinus Torvalds  * being added or deleted as indicated in rec_diff.  Move the records
18911da177e4SLinus Torvalds  * and pointers in if_broot to fit the new size.  When shrinking this
18921da177e4SLinus Torvalds  * will eliminate holes between the records and pointers created by
18931da177e4SLinus Torvalds  * the caller.  When growing this will create holes to be filled in
18941da177e4SLinus Torvalds  * by the caller.
18951da177e4SLinus Torvalds  *
18961da177e4SLinus Torvalds  * The caller must not request to add more records than would fit in
18971da177e4SLinus Torvalds  * the on-disk inode root.  If the if_broot is currently NULL, then
18981da177e4SLinus Torvalds  * if we adding records one will be allocated.  The caller must also
18991da177e4SLinus Torvalds  * not request that the number of records go below zero, although
19001da177e4SLinus Torvalds  * it can go to zero.
19011da177e4SLinus Torvalds  *
19021da177e4SLinus Torvalds  * ip -- the inode whose if_broot area is changing
19031da177e4SLinus Torvalds  * ext_diff -- the change in the number of records, positive or negative,
19041da177e4SLinus Torvalds  *	 requested for the if_broot array.
19051da177e4SLinus Torvalds  */
19061da177e4SLinus Torvalds void
19071da177e4SLinus Torvalds xfs_iroot_realloc(
19081da177e4SLinus Torvalds 	xfs_inode_t		*ip,
19091da177e4SLinus Torvalds 	int			rec_diff,
19101da177e4SLinus Torvalds 	int			whichfork)
19111da177e4SLinus Torvalds {
191260197e8dSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
19131da177e4SLinus Torvalds 	int			cur_max;
19141da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
19157cc95a82SChristoph Hellwig 	struct xfs_btree_block	*new_broot;
19161da177e4SLinus Torvalds 	int			new_max;
19171da177e4SLinus Torvalds 	size_t			new_size;
19181da177e4SLinus Torvalds 	char			*np;
19191da177e4SLinus Torvalds 	char			*op;
19201da177e4SLinus Torvalds 
19211da177e4SLinus Torvalds 	/*
19221da177e4SLinus Torvalds 	 * Handle the degenerate case quietly.
19231da177e4SLinus Torvalds 	 */
19241da177e4SLinus Torvalds 	if (rec_diff == 0) {
19251da177e4SLinus Torvalds 		return;
19261da177e4SLinus Torvalds 	}
19271da177e4SLinus Torvalds 
19281da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
19291da177e4SLinus Torvalds 	if (rec_diff > 0) {
19301da177e4SLinus Torvalds 		/*
19311da177e4SLinus Torvalds 		 * If there wasn't any memory allocated before, just
19321da177e4SLinus Torvalds 		 * allocate it now and get out.
19331da177e4SLinus Torvalds 		 */
19341da177e4SLinus Torvalds 		if (ifp->if_broot_bytes == 0) {
19351da177e4SLinus Torvalds 			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
19364a7edddcSDave Chinner 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
19371da177e4SLinus Torvalds 			ifp->if_broot_bytes = (int)new_size;
19381da177e4SLinus Torvalds 			return;
19391da177e4SLinus Torvalds 		}
19401da177e4SLinus Torvalds 
19411da177e4SLinus Torvalds 		/*
19421da177e4SLinus Torvalds 		 * If there is already an existing if_broot, then we need
19431da177e4SLinus Torvalds 		 * to realloc() it and shift the pointers to their new
19441da177e4SLinus Torvalds 		 * location.  The records don't change location because
19451da177e4SLinus Torvalds 		 * they are kept butted up against the btree block header.
19461da177e4SLinus Torvalds 		 */
194760197e8dSChristoph Hellwig 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
19481da177e4SLinus Torvalds 		new_max = cur_max + rec_diff;
19491da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
19507cc95a82SChristoph Hellwig 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
19511da177e4SLinus Torvalds 				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
19524a7edddcSDave Chinner 				KM_SLEEP | KM_NOFS);
195360197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
19541da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
195560197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
19561da177e4SLinus Torvalds 						     (int)new_size);
19571da177e4SLinus Torvalds 		ifp->if_broot_bytes = (int)new_size;
19581da177e4SLinus Torvalds 		ASSERT(ifp->if_broot_bytes <=
19591da177e4SLinus Torvalds 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
19601da177e4SLinus Torvalds 		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
19611da177e4SLinus Torvalds 		return;
19621da177e4SLinus Torvalds 	}
19631da177e4SLinus Torvalds 
19641da177e4SLinus Torvalds 	/*
19651da177e4SLinus Torvalds 	 * rec_diff is less than 0.  In this case, we are shrinking the
19661da177e4SLinus Torvalds 	 * if_broot buffer.  It must already exist.  If we go to zero
19671da177e4SLinus Torvalds 	 * records, just get rid of the root and clear the status bit.
19681da177e4SLinus Torvalds 	 */
19691da177e4SLinus Torvalds 	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
197060197e8dSChristoph Hellwig 	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
19711da177e4SLinus Torvalds 	new_max = cur_max + rec_diff;
19721da177e4SLinus Torvalds 	ASSERT(new_max >= 0);
19731da177e4SLinus Torvalds 	if (new_max > 0)
19741da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
19751da177e4SLinus Torvalds 	else
19761da177e4SLinus Torvalds 		new_size = 0;
19771da177e4SLinus Torvalds 	if (new_size > 0) {
19784a7edddcSDave Chinner 		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
19791da177e4SLinus Torvalds 		/*
19801da177e4SLinus Torvalds 		 * First copy over the btree block header.
19811da177e4SLinus Torvalds 		 */
19827cc95a82SChristoph Hellwig 		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
19831da177e4SLinus Torvalds 	} else {
19841da177e4SLinus Torvalds 		new_broot = NULL;
19851da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFBROOT;
19861da177e4SLinus Torvalds 	}
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds 	/*
19891da177e4SLinus Torvalds 	 * Only copy the records and pointers if there are any.
19901da177e4SLinus Torvalds 	 */
19911da177e4SLinus Torvalds 	if (new_max > 0) {
19921da177e4SLinus Torvalds 		/*
19931da177e4SLinus Torvalds 		 * First copy the records.
19941da177e4SLinus Torvalds 		 */
1995136341b4SChristoph Hellwig 		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
1996136341b4SChristoph Hellwig 		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
19971da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
19981da177e4SLinus Torvalds 
19991da177e4SLinus Torvalds 		/*
20001da177e4SLinus Torvalds 		 * Then copy the pointers.
20011da177e4SLinus Torvalds 		 */
200260197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
20031da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
200460197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
20051da177e4SLinus Torvalds 						     (int)new_size);
20061da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
20071da177e4SLinus Torvalds 	}
2008f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_broot);
20091da177e4SLinus Torvalds 	ifp->if_broot = new_broot;
20101da177e4SLinus Torvalds 	ifp->if_broot_bytes = (int)new_size;
20111da177e4SLinus Torvalds 	ASSERT(ifp->if_broot_bytes <=
20121da177e4SLinus Torvalds 		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
20131da177e4SLinus Torvalds 	return;
20141da177e4SLinus Torvalds }
20151da177e4SLinus Torvalds 
20161da177e4SLinus Torvalds 
20171da177e4SLinus Torvalds /*
20181da177e4SLinus Torvalds  * This is called when the amount of space needed for if_data
20191da177e4SLinus Torvalds  * is increased or decreased.  The change in size is indicated by
20201da177e4SLinus Torvalds  * the number of bytes that need to be added or deleted in the
20211da177e4SLinus Torvalds  * byte_diff parameter.
20221da177e4SLinus Torvalds  *
20231da177e4SLinus Torvalds  * If the amount of space needed has decreased below the size of the
20241da177e4SLinus Torvalds  * inline buffer, then switch to using the inline buffer.  Otherwise,
20251da177e4SLinus Torvalds  * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
20261da177e4SLinus Torvalds  * to what is needed.
20271da177e4SLinus Torvalds  *
20281da177e4SLinus Torvalds  * ip -- the inode whose if_data area is changing
20291da177e4SLinus Torvalds  * byte_diff -- the change in the number of bytes, positive or negative,
20301da177e4SLinus Torvalds  *	 requested for the if_data array.
20311da177e4SLinus Torvalds  */
20321da177e4SLinus Torvalds void
20331da177e4SLinus Torvalds xfs_idata_realloc(
20341da177e4SLinus Torvalds 	xfs_inode_t	*ip,
20351da177e4SLinus Torvalds 	int		byte_diff,
20361da177e4SLinus Torvalds 	int		whichfork)
20371da177e4SLinus Torvalds {
20381da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
20391da177e4SLinus Torvalds 	int		new_size;
20401da177e4SLinus Torvalds 	int		real_size;
20411da177e4SLinus Torvalds 
20421da177e4SLinus Torvalds 	if (byte_diff == 0) {
20431da177e4SLinus Torvalds 		return;
20441da177e4SLinus Torvalds 	}
20451da177e4SLinus Torvalds 
20461da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
20471da177e4SLinus Torvalds 	new_size = (int)ifp->if_bytes + byte_diff;
20481da177e4SLinus Torvalds 	ASSERT(new_size >= 0);
20491da177e4SLinus Torvalds 
20501da177e4SLinus Torvalds 	if (new_size == 0) {
20511da177e4SLinus Torvalds 		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2052f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
20531da177e4SLinus Torvalds 		}
20541da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
20551da177e4SLinus Torvalds 		real_size = 0;
20561da177e4SLinus Torvalds 	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
20571da177e4SLinus Torvalds 		/*
20581da177e4SLinus Torvalds 		 * If the valid extents/data can fit in if_inline_ext/data,
20591da177e4SLinus Torvalds 		 * copy them from the malloc'd vector and free it.
20601da177e4SLinus Torvalds 		 */
20611da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
20621da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
20631da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
20641da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
20651da177e4SLinus Torvalds 			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
20661da177e4SLinus Torvalds 			      new_size);
2067f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
20681da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
20691da177e4SLinus Torvalds 		}
20701da177e4SLinus Torvalds 		real_size = 0;
20711da177e4SLinus Torvalds 	} else {
20721da177e4SLinus Torvalds 		/*
20731da177e4SLinus Torvalds 		 * Stuck with malloc/realloc.
20741da177e4SLinus Torvalds 		 * For inline data, the underlying buffer must be
20751da177e4SLinus Torvalds 		 * a multiple of 4 bytes in size so that it can be
20761da177e4SLinus Torvalds 		 * logged and stay on word boundaries.  We enforce
20771da177e4SLinus Torvalds 		 * that here.
20781da177e4SLinus Torvalds 		 */
20791da177e4SLinus Torvalds 		real_size = roundup(new_size, 4);
20801da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
20811da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
20824a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
20834a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
20841da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
20851da177e4SLinus Torvalds 			/*
20861da177e4SLinus Torvalds 			 * Only do the realloc if the underlying size
20871da177e4SLinus Torvalds 			 * is really changing.
20881da177e4SLinus Torvalds 			 */
20891da177e4SLinus Torvalds 			if (ifp->if_real_bytes != real_size) {
20901da177e4SLinus Torvalds 				ifp->if_u1.if_data =
20911da177e4SLinus Torvalds 					kmem_realloc(ifp->if_u1.if_data,
20921da177e4SLinus Torvalds 							real_size,
20931da177e4SLinus Torvalds 							ifp->if_real_bytes,
20944a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
20951da177e4SLinus Torvalds 			}
20961da177e4SLinus Torvalds 		} else {
20971da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
20984a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
20994a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
21001da177e4SLinus Torvalds 			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
21011da177e4SLinus Torvalds 				ifp->if_bytes);
21021da177e4SLinus Torvalds 		}
21031da177e4SLinus Torvalds 	}
21041da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
21051da177e4SLinus Torvalds 	ifp->if_bytes = new_size;
21061da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
21071da177e4SLinus Torvalds }
21081da177e4SLinus Torvalds 
21091da177e4SLinus Torvalds void
21101da177e4SLinus Torvalds xfs_idestroy_fork(
21111da177e4SLinus Torvalds 	xfs_inode_t	*ip,
21121da177e4SLinus Torvalds 	int		whichfork)
21131da177e4SLinus Torvalds {
21141da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
21151da177e4SLinus Torvalds 
21161da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
21171da177e4SLinus Torvalds 	if (ifp->if_broot != NULL) {
2118f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_broot);
21191da177e4SLinus Torvalds 		ifp->if_broot = NULL;
21201da177e4SLinus Torvalds 	}
21211da177e4SLinus Torvalds 
21221da177e4SLinus Torvalds 	/*
21231da177e4SLinus Torvalds 	 * If the format is local, then we can't have an extents
21241da177e4SLinus Torvalds 	 * array so just look for an inline data array.  If we're
21251da177e4SLinus Torvalds 	 * not local then we may or may not have an extents list,
21261da177e4SLinus Torvalds 	 * so check and free it up if we do.
21271da177e4SLinus Torvalds 	 */
21281da177e4SLinus Torvalds 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
21291da177e4SLinus Torvalds 		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
21301da177e4SLinus Torvalds 		    (ifp->if_u1.if_data != NULL)) {
21311da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
2132f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
21331da177e4SLinus Torvalds 			ifp->if_u1.if_data = NULL;
21341da177e4SLinus Torvalds 			ifp->if_real_bytes = 0;
21351da177e4SLinus Torvalds 		}
21361da177e4SLinus Torvalds 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
21370293ce3aSMandy Kirkconnell 		   ((ifp->if_flags & XFS_IFEXTIREC) ||
21380293ce3aSMandy Kirkconnell 		    ((ifp->if_u1.if_extents != NULL) &&
21390293ce3aSMandy Kirkconnell 		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
21401da177e4SLinus Torvalds 		ASSERT(ifp->if_real_bytes != 0);
21414eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
21421da177e4SLinus Torvalds 	}
21431da177e4SLinus Torvalds 	ASSERT(ifp->if_u1.if_extents == NULL ||
21441da177e4SLinus Torvalds 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
21451da177e4SLinus Torvalds 	ASSERT(ifp->if_real_bytes == 0);
21461da177e4SLinus Torvalds 	if (whichfork == XFS_ATTR_FORK) {
21471da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
21481da177e4SLinus Torvalds 		ip->i_afp = NULL;
21491da177e4SLinus Torvalds 	}
21501da177e4SLinus Torvalds }
21511da177e4SLinus Torvalds 
21521da177e4SLinus Torvalds /*
215360ec6783SChristoph Hellwig  * This is called to unpin an inode.  The caller must have the inode locked
215460ec6783SChristoph Hellwig  * in at least shared mode so that the buffer cannot be subsequently pinned
215560ec6783SChristoph Hellwig  * once someone is waiting for it to be unpinned.
21561da177e4SLinus Torvalds  */
215760ec6783SChristoph Hellwig static void
215860ec6783SChristoph Hellwig xfs_iunpin_nowait(
215960ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
2160a3f74ffbSDavid Chinner {
2161579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2162a3f74ffbSDavid Chinner 
21634aaf15d1SDave Chinner 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
21644aaf15d1SDave Chinner 
2165a3f74ffbSDavid Chinner 	/* Give the log a push to start the unpinning I/O */
216660ec6783SChristoph Hellwig 	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2167a14a348bSChristoph Hellwig 
2168a3f74ffbSDavid Chinner }
2169a3f74ffbSDavid Chinner 
2170777df5afSDave Chinner void
21711da177e4SLinus Torvalds xfs_iunpin_wait(
217260ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
21731da177e4SLinus Torvalds {
217460ec6783SChristoph Hellwig 	if (xfs_ipincount(ip)) {
217560ec6783SChristoph Hellwig 		xfs_iunpin_nowait(ip);
217660ec6783SChristoph Hellwig 		wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
21771da177e4SLinus Torvalds 	}
21781da177e4SLinus Torvalds }
21791da177e4SLinus Torvalds 
21801da177e4SLinus Torvalds /*
21811da177e4SLinus Torvalds  * xfs_iextents_copy()
21821da177e4SLinus Torvalds  *
21831da177e4SLinus Torvalds  * This is called to copy the REAL extents (as opposed to the delayed
21841da177e4SLinus Torvalds  * allocation extents) from the inode into the given buffer.  It
21851da177e4SLinus Torvalds  * returns the number of bytes copied into the buffer.
21861da177e4SLinus Torvalds  *
21871da177e4SLinus Torvalds  * If there are no delayed allocation extents, then we can just
21881da177e4SLinus Torvalds  * memcpy() the extents into the buffer.  Otherwise, we need to
21891da177e4SLinus Torvalds  * examine each extent in turn and skip those which are delayed.
21901da177e4SLinus Torvalds  */
21911da177e4SLinus Torvalds int
21921da177e4SLinus Torvalds xfs_iextents_copy(
21931da177e4SLinus Torvalds 	xfs_inode_t		*ip,
2194a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t		*dp,
21951da177e4SLinus Torvalds 	int			whichfork)
21961da177e4SLinus Torvalds {
21971da177e4SLinus Torvalds 	int			copied;
21981da177e4SLinus Torvalds 	int			i;
21991da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
22001da177e4SLinus Torvalds 	int			nrecs;
22011da177e4SLinus Torvalds 	xfs_fsblock_t		start_block;
22021da177e4SLinus Torvalds 
22031da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
2204579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
22051da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes > 0);
22061da177e4SLinus Torvalds 
22071da177e4SLinus Torvalds 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
22083a59c94cSEric Sandeen 	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
22091da177e4SLinus Torvalds 	ASSERT(nrecs > 0);
22101da177e4SLinus Torvalds 
22111da177e4SLinus Torvalds 	/*
22121da177e4SLinus Torvalds 	 * There are some delayed allocation extents in the
22131da177e4SLinus Torvalds 	 * inode, so copy the extents one at a time and skip
22141da177e4SLinus Torvalds 	 * the delayed ones.  There must be at least one
22151da177e4SLinus Torvalds 	 * non-delayed extent.
22161da177e4SLinus Torvalds 	 */
22171da177e4SLinus Torvalds 	copied = 0;
22181da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
2219a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
22201da177e4SLinus Torvalds 		start_block = xfs_bmbt_get_startblock(ep);
22219d87c319SEric Sandeen 		if (isnullstartblock(start_block)) {
22221da177e4SLinus Torvalds 			/*
22231da177e4SLinus Torvalds 			 * It's a delayed allocation extent, so skip it.
22241da177e4SLinus Torvalds 			 */
22251da177e4SLinus Torvalds 			continue;
22261da177e4SLinus Torvalds 		}
22271da177e4SLinus Torvalds 
22281da177e4SLinus Torvalds 		/* Translate to on disk format */
2229cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
2230cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
2231a6f64d4aSChristoph Hellwig 		dp++;
22321da177e4SLinus Torvalds 		copied++;
22331da177e4SLinus Torvalds 	}
22341da177e4SLinus Torvalds 	ASSERT(copied != 0);
2235a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
22361da177e4SLinus Torvalds 
22371da177e4SLinus Torvalds 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
22381da177e4SLinus Torvalds }
22391da177e4SLinus Torvalds 
22401da177e4SLinus Torvalds /*
22411da177e4SLinus Torvalds  * Each of the following cases stores data into the same region
22421da177e4SLinus Torvalds  * of the on-disk inode, so only one of them can be valid at
22431da177e4SLinus Torvalds  * any given time. While it is possible to have conflicting formats
22441da177e4SLinus Torvalds  * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
22451da177e4SLinus Torvalds  * in EXTENTS format, this can only happen when the fork has
22461da177e4SLinus Torvalds  * changed formats after being modified but before being flushed.
22471da177e4SLinus Torvalds  * In these cases, the format always takes precedence, because the
22481da177e4SLinus Torvalds  * format indicates the current state of the fork.
22491da177e4SLinus Torvalds  */
22501da177e4SLinus Torvalds /*ARGSUSED*/
2251e4ac967bSDavid Chinner STATIC void
22521da177e4SLinus Torvalds xfs_iflush_fork(
22531da177e4SLinus Torvalds 	xfs_inode_t		*ip,
22541da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
22551da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip,
22561da177e4SLinus Torvalds 	int			whichfork,
22571da177e4SLinus Torvalds 	xfs_buf_t		*bp)
22581da177e4SLinus Torvalds {
22591da177e4SLinus Torvalds 	char			*cp;
22601da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
22611da177e4SLinus Torvalds 	xfs_mount_t		*mp;
22621da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
22631da177e4SLinus Torvalds 	int			first;
22641da177e4SLinus Torvalds #endif
22651da177e4SLinus Torvalds 	static const short	brootflag[2] =
22661da177e4SLinus Torvalds 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
22671da177e4SLinus Torvalds 	static const short	dataflag[2] =
22681da177e4SLinus Torvalds 		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
22691da177e4SLinus Torvalds 	static const short	extflag[2] =
22701da177e4SLinus Torvalds 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
22711da177e4SLinus Torvalds 
2272e4ac967bSDavid Chinner 	if (!iip)
2273e4ac967bSDavid Chinner 		return;
22741da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
22751da177e4SLinus Torvalds 	/*
22761da177e4SLinus Torvalds 	 * This can happen if we gave up in iformat in an error path,
22771da177e4SLinus Torvalds 	 * for the attribute fork.
22781da177e4SLinus Torvalds 	 */
2279e4ac967bSDavid Chinner 	if (!ifp) {
22801da177e4SLinus Torvalds 		ASSERT(whichfork == XFS_ATTR_FORK);
2281e4ac967bSDavid Chinner 		return;
22821da177e4SLinus Torvalds 	}
22831da177e4SLinus Torvalds 	cp = XFS_DFORK_PTR(dip, whichfork);
22841da177e4SLinus Torvalds 	mp = ip->i_mount;
22851da177e4SLinus Torvalds 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
22861da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
22871da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & dataflag[whichfork]) &&
22881da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
22891da177e4SLinus Torvalds 			ASSERT(ifp->if_u1.if_data != NULL);
22901da177e4SLinus Torvalds 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
22911da177e4SLinus Torvalds 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
22921da177e4SLinus Torvalds 		}
22931da177e4SLinus Torvalds 		break;
22941da177e4SLinus Torvalds 
22951da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
22961da177e4SLinus Torvalds 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
22971da177e4SLinus Torvalds 		       !(iip->ili_format.ilf_fields & extflag[whichfork]));
22981da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
22991da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
2300ab1908a5SChristoph Hellwig 			ASSERT(xfs_iext_get_ext(ifp, 0));
23011da177e4SLinus Torvalds 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
23021da177e4SLinus Torvalds 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
23031da177e4SLinus Torvalds 				whichfork);
23041da177e4SLinus Torvalds 		}
23051da177e4SLinus Torvalds 		break;
23061da177e4SLinus Torvalds 
23071da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
23081da177e4SLinus Torvalds 		if ((iip->ili_format.ilf_fields & brootflag[whichfork]) &&
23091da177e4SLinus Torvalds 		    (ifp->if_broot_bytes > 0)) {
23101da177e4SLinus Torvalds 			ASSERT(ifp->if_broot != NULL);
23111da177e4SLinus Torvalds 			ASSERT(ifp->if_broot_bytes <=
23121da177e4SLinus Torvalds 			       (XFS_IFORK_SIZE(ip, whichfork) +
23131da177e4SLinus Torvalds 				XFS_BROOT_SIZE_ADJ));
231460197e8dSChristoph Hellwig 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
23151da177e4SLinus Torvalds 				(xfs_bmdr_block_t *)cp,
23161da177e4SLinus Torvalds 				XFS_DFORK_SIZE(dip, mp, whichfork));
23171da177e4SLinus Torvalds 		}
23181da177e4SLinus Torvalds 		break;
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds 	case XFS_DINODE_FMT_DEV:
23211da177e4SLinus Torvalds 		if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
23221da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
232381591fe2SChristoph Hellwig 			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
23241da177e4SLinus Torvalds 		}
23251da177e4SLinus Torvalds 		break;
23261da177e4SLinus Torvalds 
23271da177e4SLinus Torvalds 	case XFS_DINODE_FMT_UUID:
23281da177e4SLinus Torvalds 		if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
23291da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
233081591fe2SChristoph Hellwig 			memcpy(XFS_DFORK_DPTR(dip),
233181591fe2SChristoph Hellwig 			       &ip->i_df.if_u2.if_uuid,
23321da177e4SLinus Torvalds 			       sizeof(uuid_t));
23331da177e4SLinus Torvalds 		}
23341da177e4SLinus Torvalds 		break;
23351da177e4SLinus Torvalds 
23361da177e4SLinus Torvalds 	default:
23371da177e4SLinus Torvalds 		ASSERT(0);
23381da177e4SLinus Torvalds 		break;
23391da177e4SLinus Torvalds 	}
23401da177e4SLinus Torvalds }
23411da177e4SLinus Torvalds 
2342bad55843SDavid Chinner STATIC int
2343bad55843SDavid Chinner xfs_iflush_cluster(
2344bad55843SDavid Chinner 	xfs_inode_t	*ip,
2345bad55843SDavid Chinner 	xfs_buf_t	*bp)
2346bad55843SDavid Chinner {
2347bad55843SDavid Chinner 	xfs_mount_t		*mp = ip->i_mount;
23485017e97dSDave Chinner 	struct xfs_perag	*pag;
2349bad55843SDavid Chinner 	unsigned long		first_index, mask;
2350c8f5f12eSDavid Chinner 	unsigned long		inodes_per_cluster;
2351bad55843SDavid Chinner 	int			ilist_size;
2352bad55843SDavid Chinner 	xfs_inode_t		**ilist;
2353bad55843SDavid Chinner 	xfs_inode_t		*iq;
2354bad55843SDavid Chinner 	int			nr_found;
2355bad55843SDavid Chinner 	int			clcount = 0;
2356bad55843SDavid Chinner 	int			bufwasdelwri;
2357bad55843SDavid Chinner 	int			i;
2358bad55843SDavid Chinner 
23595017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2360bad55843SDavid Chinner 
2361c8f5f12eSDavid Chinner 	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2362c8f5f12eSDavid Chinner 	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
236349383b0eSDavid Chinner 	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2364bad55843SDavid Chinner 	if (!ilist)
236544b56e0aSDave Chinner 		goto out_put;
2366bad55843SDavid Chinner 
2367bad55843SDavid Chinner 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2368bad55843SDavid Chinner 	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
23691a3e8f3dSDave Chinner 	rcu_read_lock();
2370bad55843SDavid Chinner 	/* really need a gang lookup range call here */
2371bad55843SDavid Chinner 	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2372c8f5f12eSDavid Chinner 					first_index, inodes_per_cluster);
2373bad55843SDavid Chinner 	if (nr_found == 0)
2374bad55843SDavid Chinner 		goto out_free;
2375bad55843SDavid Chinner 
2376bad55843SDavid Chinner 	for (i = 0; i < nr_found; i++) {
2377bad55843SDavid Chinner 		iq = ilist[i];
2378bad55843SDavid Chinner 		if (iq == ip)
2379bad55843SDavid Chinner 			continue;
23801a3e8f3dSDave Chinner 
23811a3e8f3dSDave Chinner 		/*
23821a3e8f3dSDave Chinner 		 * because this is an RCU protected lookup, we could find a
23831a3e8f3dSDave Chinner 		 * recently freed or even reallocated inode during the lookup.
23841a3e8f3dSDave Chinner 		 * We need to check under the i_flags_lock for a valid inode
23851a3e8f3dSDave Chinner 		 * here. Skip it if it is not valid or the wrong inode.
23861a3e8f3dSDave Chinner 		 */
23871a3e8f3dSDave Chinner 		spin_lock(&ip->i_flags_lock);
23881a3e8f3dSDave Chinner 		if (!ip->i_ino ||
23891a3e8f3dSDave Chinner 		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
23901a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
23911a3e8f3dSDave Chinner 			continue;
23921a3e8f3dSDave Chinner 		}
23931a3e8f3dSDave Chinner 		spin_unlock(&ip->i_flags_lock);
23941a3e8f3dSDave Chinner 
2395bad55843SDavid Chinner 		/*
2396bad55843SDavid Chinner 		 * Do an un-protected check to see if the inode is dirty and
2397bad55843SDavid Chinner 		 * is a candidate for flushing.  These checks will be repeated
2398bad55843SDavid Chinner 		 * later after the appropriate locks are acquired.
2399bad55843SDavid Chinner 		 */
240033540408SDavid Chinner 		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
2401bad55843SDavid Chinner 			continue;
2402bad55843SDavid Chinner 
2403bad55843SDavid Chinner 		/*
2404bad55843SDavid Chinner 		 * Try to get locks.  If any are unavailable or it is pinned,
2405bad55843SDavid Chinner 		 * then this inode cannot be flushed and is skipped.
2406bad55843SDavid Chinner 		 */
2407bad55843SDavid Chinner 
2408bad55843SDavid Chinner 		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
2409bad55843SDavid Chinner 			continue;
2410bad55843SDavid Chinner 		if (!xfs_iflock_nowait(iq)) {
2411bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2412bad55843SDavid Chinner 			continue;
2413bad55843SDavid Chinner 		}
2414bad55843SDavid Chinner 		if (xfs_ipincount(iq)) {
2415bad55843SDavid Chinner 			xfs_ifunlock(iq);
2416bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2417bad55843SDavid Chinner 			continue;
2418bad55843SDavid Chinner 		}
2419bad55843SDavid Chinner 
2420bad55843SDavid Chinner 		/*
2421bad55843SDavid Chinner 		 * arriving here means that this inode can be flushed.  First
2422bad55843SDavid Chinner 		 * re-check that it's dirty before flushing.
2423bad55843SDavid Chinner 		 */
242433540408SDavid Chinner 		if (!xfs_inode_clean(iq)) {
2425bad55843SDavid Chinner 			int	error;
2426bad55843SDavid Chinner 			error = xfs_iflush_int(iq, bp);
2427bad55843SDavid Chinner 			if (error) {
2428bad55843SDavid Chinner 				xfs_iunlock(iq, XFS_ILOCK_SHARED);
2429bad55843SDavid Chinner 				goto cluster_corrupt_out;
2430bad55843SDavid Chinner 			}
2431bad55843SDavid Chinner 			clcount++;
2432bad55843SDavid Chinner 		} else {
2433bad55843SDavid Chinner 			xfs_ifunlock(iq);
2434bad55843SDavid Chinner 		}
2435bad55843SDavid Chinner 		xfs_iunlock(iq, XFS_ILOCK_SHARED);
2436bad55843SDavid Chinner 	}
2437bad55843SDavid Chinner 
2438bad55843SDavid Chinner 	if (clcount) {
2439bad55843SDavid Chinner 		XFS_STATS_INC(xs_icluster_flushcnt);
2440bad55843SDavid Chinner 		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
2441bad55843SDavid Chinner 	}
2442bad55843SDavid Chinner 
2443bad55843SDavid Chinner out_free:
24441a3e8f3dSDave Chinner 	rcu_read_unlock();
2445f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
244644b56e0aSDave Chinner out_put:
244744b56e0aSDave Chinner 	xfs_perag_put(pag);
2448bad55843SDavid Chinner 	return 0;
2449bad55843SDavid Chinner 
2450bad55843SDavid Chinner 
2451bad55843SDavid Chinner cluster_corrupt_out:
2452bad55843SDavid Chinner 	/*
2453bad55843SDavid Chinner 	 * Corruption detected in the clustering loop.  Invalidate the
2454bad55843SDavid Chinner 	 * inode buffer and shut down the filesystem.
2455bad55843SDavid Chinner 	 */
24561a3e8f3dSDave Chinner 	rcu_read_unlock();
2457bad55843SDavid Chinner 	/*
2458bad55843SDavid Chinner 	 * Clean up the buffer.  If it was B_DELWRI, just release it --
2459bad55843SDavid Chinner 	 * brelse can handle it with no problems.  If not, shut down the
2460bad55843SDavid Chinner 	 * filesystem before releasing the buffer.
2461bad55843SDavid Chinner 	 */
2462bad55843SDavid Chinner 	bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
2463bad55843SDavid Chinner 	if (bufwasdelwri)
2464bad55843SDavid Chinner 		xfs_buf_relse(bp);
2465bad55843SDavid Chinner 
2466bad55843SDavid Chinner 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2467bad55843SDavid Chinner 
2468bad55843SDavid Chinner 	if (!bufwasdelwri) {
2469bad55843SDavid Chinner 		/*
2470bad55843SDavid Chinner 		 * Just like incore_relse: if we have b_iodone functions,
2471bad55843SDavid Chinner 		 * mark the buffer as an error and call them.  Otherwise
2472bad55843SDavid Chinner 		 * mark it as stale and brelse.
2473bad55843SDavid Chinner 		 */
2474bad55843SDavid Chinner 		if (XFS_BUF_IODONE_FUNC(bp)) {
2475bad55843SDavid Chinner 			XFS_BUF_UNDONE(bp);
2476bad55843SDavid Chinner 			XFS_BUF_STALE(bp);
2477bad55843SDavid Chinner 			XFS_BUF_ERROR(bp,EIO);
24781a1a3e97SChristoph Hellwig 			xfs_buf_ioend(bp, 0);
2479bad55843SDavid Chinner 		} else {
2480bad55843SDavid Chinner 			XFS_BUF_STALE(bp);
2481bad55843SDavid Chinner 			xfs_buf_relse(bp);
2482bad55843SDavid Chinner 		}
2483bad55843SDavid Chinner 	}
2484bad55843SDavid Chinner 
2485bad55843SDavid Chinner 	/*
2486bad55843SDavid Chinner 	 * Unlocks the flush lock
2487bad55843SDavid Chinner 	 */
2488bad55843SDavid Chinner 	xfs_iflush_abort(iq);
2489f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
249044b56e0aSDave Chinner 	xfs_perag_put(pag);
2491bad55843SDavid Chinner 	return XFS_ERROR(EFSCORRUPTED);
2492bad55843SDavid Chinner }
2493bad55843SDavid Chinner 
24941da177e4SLinus Torvalds /*
24951da177e4SLinus Torvalds  * xfs_iflush() will write a modified inode's changes out to the
24961da177e4SLinus Torvalds  * inode's on disk home.  The caller must have the inode lock held
2497c63942d3SDavid Chinner  * in at least shared mode and the inode flush completion must be
2498c63942d3SDavid Chinner  * active as well.  The inode lock will still be held upon return from
24991da177e4SLinus Torvalds  * the call and the caller is free to unlock it.
2500c63942d3SDavid Chinner  * The inode flush will be completed when the inode reaches the disk.
25011da177e4SLinus Torvalds  * The flags indicate how the inode's buffer should be written out.
25021da177e4SLinus Torvalds  */
25031da177e4SLinus Torvalds int
25041da177e4SLinus Torvalds xfs_iflush(
25051da177e4SLinus Torvalds 	xfs_inode_t		*ip,
25061da177e4SLinus Torvalds 	uint			flags)
25071da177e4SLinus Torvalds {
25081da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
25091da177e4SLinus Torvalds 	xfs_buf_t		*bp;
25101da177e4SLinus Torvalds 	xfs_dinode_t		*dip;
25111da177e4SLinus Torvalds 	xfs_mount_t		*mp;
25121da177e4SLinus Torvalds 	int			error;
25131da177e4SLinus Torvalds 
25141da177e4SLinus Torvalds 	XFS_STATS_INC(xs_iflush_count);
25151da177e4SLinus Torvalds 
2516579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2517c63942d3SDavid Chinner 	ASSERT(!completion_done(&ip->i_flush));
25181da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
25191da177e4SLinus Torvalds 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
25201da177e4SLinus Torvalds 
25211da177e4SLinus Torvalds 	iip = ip->i_itemp;
25221da177e4SLinus Torvalds 	mp = ip->i_mount;
25231da177e4SLinus Torvalds 
25241da177e4SLinus Torvalds 	/*
2525a3f74ffbSDavid Chinner 	 * We can't flush the inode until it is unpinned, so wait for it if we
2526a3f74ffbSDavid Chinner 	 * are allowed to block.  We know no one new can pin it, because we are
2527a3f74ffbSDavid Chinner 	 * holding the inode lock shared and you need to hold it exclusively to
2528a3f74ffbSDavid Chinner 	 * pin the inode.
2529a3f74ffbSDavid Chinner 	 *
2530a3f74ffbSDavid Chinner 	 * If we are not allowed to block, force the log out asynchronously so
2531a3f74ffbSDavid Chinner 	 * that when we come back the inode will be unpinned. If other inodes
2532a3f74ffbSDavid Chinner 	 * in the same cluster are dirty, they will probably write the inode
2533a3f74ffbSDavid Chinner 	 * out for us if they occur after the log force completes.
25341da177e4SLinus Torvalds 	 */
2535c854363eSDave Chinner 	if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2536a3f74ffbSDavid Chinner 		xfs_iunpin_nowait(ip);
2537a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2538a3f74ffbSDavid Chinner 		return EAGAIN;
2539a3f74ffbSDavid Chinner 	}
25401da177e4SLinus Torvalds 	xfs_iunpin_wait(ip);
25411da177e4SLinus Torvalds 
25421da177e4SLinus Torvalds 	/*
25434b6a4688SDave Chinner 	 * For stale inodes we cannot rely on the backing buffer remaining
25444b6a4688SDave Chinner 	 * stale in cache for the remaining life of the stale inode and so
25454b6a4688SDave Chinner 	 * xfs_itobp() below may give us a buffer that no longer contains
25464b6a4688SDave Chinner 	 * inodes below. We have to check this after ensuring the inode is
25474b6a4688SDave Chinner 	 * unpinned so that it is safe to reclaim the stale inode after the
25484b6a4688SDave Chinner 	 * flush call.
25494b6a4688SDave Chinner 	 */
25504b6a4688SDave Chinner 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
25514b6a4688SDave Chinner 		xfs_ifunlock(ip);
25524b6a4688SDave Chinner 		return 0;
25534b6a4688SDave Chinner 	}
25544b6a4688SDave Chinner 
25554b6a4688SDave Chinner 	/*
25561da177e4SLinus Torvalds 	 * This may have been unpinned because the filesystem is shutting
25571da177e4SLinus Torvalds 	 * down forcibly. If that's the case we must not write this inode
25581da177e4SLinus Torvalds 	 * to disk, because the log record didn't make it to disk!
25591da177e4SLinus Torvalds 	 */
25601da177e4SLinus Torvalds 	if (XFS_FORCED_SHUTDOWN(mp)) {
25611da177e4SLinus Torvalds 		ip->i_update_core = 0;
25621da177e4SLinus Torvalds 		if (iip)
25631da177e4SLinus Torvalds 			iip->ili_format.ilf_fields = 0;
25641da177e4SLinus Torvalds 		xfs_ifunlock(ip);
25651da177e4SLinus Torvalds 		return XFS_ERROR(EIO);
25661da177e4SLinus Torvalds 	}
25671da177e4SLinus Torvalds 
25681da177e4SLinus Torvalds 	/*
2569a3f74ffbSDavid Chinner 	 * Get the buffer containing the on-disk inode.
2570a3f74ffbSDavid Chinner 	 */
257176d8b277SChristoph Hellwig 	error = xfs_itobp(mp, NULL, ip, &dip, &bp,
25721bfd8d04SDave Chinner 				(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
2573a3f74ffbSDavid Chinner 	if (error || !bp) {
2574a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2575a3f74ffbSDavid Chinner 		return error;
2576a3f74ffbSDavid Chinner 	}
2577a3f74ffbSDavid Chinner 
2578a3f74ffbSDavid Chinner 	/*
25791da177e4SLinus Torvalds 	 * First flush out the inode that xfs_iflush was called with.
25801da177e4SLinus Torvalds 	 */
25811da177e4SLinus Torvalds 	error = xfs_iflush_int(ip, bp);
2582bad55843SDavid Chinner 	if (error)
25831da177e4SLinus Torvalds 		goto corrupt_out;
25841da177e4SLinus Torvalds 
25851da177e4SLinus Torvalds 	/*
2586a3f74ffbSDavid Chinner 	 * If the buffer is pinned then push on the log now so we won't
2587a3f74ffbSDavid Chinner 	 * get stuck waiting in the write for too long.
2588a3f74ffbSDavid Chinner 	 */
2589a3f74ffbSDavid Chinner 	if (XFS_BUF_ISPINNED(bp))
2590a14a348bSChristoph Hellwig 		xfs_log_force(mp, 0);
2591a3f74ffbSDavid Chinner 
2592a3f74ffbSDavid Chinner 	/*
25931da177e4SLinus Torvalds 	 * inode clustering:
25941da177e4SLinus Torvalds 	 * see if other inodes can be gathered into this write
25951da177e4SLinus Torvalds 	 */
2596bad55843SDavid Chinner 	error = xfs_iflush_cluster(ip, bp);
2597bad55843SDavid Chinner 	if (error)
25981da177e4SLinus Torvalds 		goto cluster_corrupt_out;
25991da177e4SLinus Torvalds 
2600c854363eSDave Chinner 	if (flags & SYNC_WAIT)
26011da177e4SLinus Torvalds 		error = xfs_bwrite(mp, bp);
2602c854363eSDave Chinner 	else
2603c854363eSDave Chinner 		xfs_bdwrite(mp, bp);
26041da177e4SLinus Torvalds 	return error;
26051da177e4SLinus Torvalds 
26061da177e4SLinus Torvalds corrupt_out:
26071da177e4SLinus Torvalds 	xfs_buf_relse(bp);
26087d04a335SNathan Scott 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
26091da177e4SLinus Torvalds cluster_corrupt_out:
26101da177e4SLinus Torvalds 	/*
26111da177e4SLinus Torvalds 	 * Unlocks the flush lock
26121da177e4SLinus Torvalds 	 */
2613bad55843SDavid Chinner 	xfs_iflush_abort(ip);
26141da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
26151da177e4SLinus Torvalds }
26161da177e4SLinus Torvalds 
26171da177e4SLinus Torvalds 
26181da177e4SLinus Torvalds STATIC int
26191da177e4SLinus Torvalds xfs_iflush_int(
26201da177e4SLinus Torvalds 	xfs_inode_t		*ip,
26211da177e4SLinus Torvalds 	xfs_buf_t		*bp)
26221da177e4SLinus Torvalds {
26231da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
26241da177e4SLinus Torvalds 	xfs_dinode_t		*dip;
26251da177e4SLinus Torvalds 	xfs_mount_t		*mp;
26261da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
26271da177e4SLinus Torvalds 	int			first;
26281da177e4SLinus Torvalds #endif
26291da177e4SLinus Torvalds 
2630579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2631c63942d3SDavid Chinner 	ASSERT(!completion_done(&ip->i_flush));
26321da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
26331da177e4SLinus Torvalds 	       ip->i_d.di_nextents > ip->i_df.if_ext_max);
26341da177e4SLinus Torvalds 
26351da177e4SLinus Torvalds 	iip = ip->i_itemp;
26361da177e4SLinus Torvalds 	mp = ip->i_mount;
26371da177e4SLinus Torvalds 
26381da177e4SLinus Torvalds 	/* set *dip = inode's place in the buffer */
263992bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
26401da177e4SLinus Torvalds 
26411da177e4SLinus Torvalds 	/*
26421da177e4SLinus Torvalds 	 * Clear i_update_core before copying out the data.
26431da177e4SLinus Torvalds 	 * This is for coordination with our timestamp updates
26441da177e4SLinus Torvalds 	 * that don't hold the inode lock. They will always
26451da177e4SLinus Torvalds 	 * update the timestamps BEFORE setting i_update_core,
26461da177e4SLinus Torvalds 	 * so if we clear i_update_core after they set it we
26471da177e4SLinus Torvalds 	 * are guaranteed to see their updates to the timestamps.
26481da177e4SLinus Torvalds 	 * I believe that this depends on strongly ordered memory
26491da177e4SLinus Torvalds 	 * semantics, but we have that.  We use the SYNCHRONIZE
26501da177e4SLinus Torvalds 	 * macro to make sure that the compiler does not reorder
26511da177e4SLinus Torvalds 	 * the i_update_core access below the data copy below.
26521da177e4SLinus Torvalds 	 */
26531da177e4SLinus Torvalds 	ip->i_update_core = 0;
26541da177e4SLinus Torvalds 	SYNCHRONIZE();
26551da177e4SLinus Torvalds 
265642fe2b1fSChristoph Hellwig 	/*
2657f9581b14SChristoph Hellwig 	 * Make sure to get the latest timestamps from the Linux inode.
265842fe2b1fSChristoph Hellwig 	 */
2659f9581b14SChristoph Hellwig 	xfs_synchronize_times(ip);
266042fe2b1fSChristoph Hellwig 
2661*69ef921bSChristoph Hellwig 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
26621da177e4SLinus Torvalds 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
26636a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
26646a19d939SDave Chinner 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
26656a19d939SDave Chinner 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
26661da177e4SLinus Torvalds 		goto corrupt_out;
26671da177e4SLinus Torvalds 	}
26681da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
26691da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
26706a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
26716a19d939SDave Chinner 			"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
26726a19d939SDave Chinner 			__func__, ip->i_ino, ip, ip->i_d.di_magic);
26731da177e4SLinus Torvalds 		goto corrupt_out;
26741da177e4SLinus Torvalds 	}
26751da177e4SLinus Torvalds 	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
26761da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
26771da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
26781da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
26791da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
26806a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
26816a19d939SDave Chinner 				"%s: Bad regular inode %Lu, ptr 0x%p",
26826a19d939SDave Chinner 				__func__, ip->i_ino, ip);
26831da177e4SLinus Torvalds 			goto corrupt_out;
26841da177e4SLinus Torvalds 		}
26851da177e4SLinus Torvalds 	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
26861da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
26871da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
26881da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
26891da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
26901da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
26916a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
26926a19d939SDave Chinner 				"%s: Bad directory inode %Lu, ptr 0x%p",
26936a19d939SDave Chinner 				__func__, ip->i_ino, ip);
26941da177e4SLinus Torvalds 			goto corrupt_out;
26951da177e4SLinus Torvalds 		}
26961da177e4SLinus Torvalds 	}
26971da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
26981da177e4SLinus Torvalds 				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
26991da177e4SLinus Torvalds 				XFS_RANDOM_IFLUSH_5)) {
27006a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27016a19d939SDave Chinner 			"%s: detected corrupt incore inode %Lu, "
27026a19d939SDave Chinner 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
27036a19d939SDave Chinner 			__func__, ip->i_ino,
27041da177e4SLinus Torvalds 			ip->i_d.di_nextents + ip->i_d.di_anextents,
27056a19d939SDave Chinner 			ip->i_d.di_nblocks, ip);
27061da177e4SLinus Torvalds 		goto corrupt_out;
27071da177e4SLinus Torvalds 	}
27081da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
27091da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
27106a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27116a19d939SDave Chinner 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
27126a19d939SDave Chinner 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
27131da177e4SLinus Torvalds 		goto corrupt_out;
27141da177e4SLinus Torvalds 	}
27151da177e4SLinus Torvalds 	/*
27161da177e4SLinus Torvalds 	 * bump the flush iteration count, used to detect flushes which
27171da177e4SLinus Torvalds 	 * postdate a log record during recovery.
27181da177e4SLinus Torvalds 	 */
27191da177e4SLinus Torvalds 
27201da177e4SLinus Torvalds 	ip->i_d.di_flushiter++;
27211da177e4SLinus Torvalds 
27221da177e4SLinus Torvalds 	/*
27231da177e4SLinus Torvalds 	 * Copy the dirty parts of the inode into the on-disk
27241da177e4SLinus Torvalds 	 * inode.  We always copy out the core of the inode,
27251da177e4SLinus Torvalds 	 * because if the inode is dirty at all the core must
27261da177e4SLinus Torvalds 	 * be.
27271da177e4SLinus Torvalds 	 */
272881591fe2SChristoph Hellwig 	xfs_dinode_to_disk(dip, &ip->i_d);
27291da177e4SLinus Torvalds 
27301da177e4SLinus Torvalds 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
27311da177e4SLinus Torvalds 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
27321da177e4SLinus Torvalds 		ip->i_d.di_flushiter = 0;
27331da177e4SLinus Torvalds 
27341da177e4SLinus Torvalds 	/*
27351da177e4SLinus Torvalds 	 * If this is really an old format inode and the superblock version
27361da177e4SLinus Torvalds 	 * has not been updated to support only new format inodes, then
27371da177e4SLinus Torvalds 	 * convert back to the old inode format.  If the superblock version
27381da177e4SLinus Torvalds 	 * has been updated, then make the conversion permanent.
27391da177e4SLinus Torvalds 	 */
274051ce16d5SChristoph Hellwig 	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
274151ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
274262118709SEric Sandeen 		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
27431da177e4SLinus Torvalds 			/*
27441da177e4SLinus Torvalds 			 * Convert it back.
27451da177e4SLinus Torvalds 			 */
27461da177e4SLinus Torvalds 			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
274781591fe2SChristoph Hellwig 			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
27481da177e4SLinus Torvalds 		} else {
27491da177e4SLinus Torvalds 			/*
27501da177e4SLinus Torvalds 			 * The superblock version has already been bumped,
27511da177e4SLinus Torvalds 			 * so just make the conversion to the new inode
27521da177e4SLinus Torvalds 			 * format permanent.
27531da177e4SLinus Torvalds 			 */
275451ce16d5SChristoph Hellwig 			ip->i_d.di_version = 2;
275551ce16d5SChristoph Hellwig 			dip->di_version = 2;
27561da177e4SLinus Torvalds 			ip->i_d.di_onlink = 0;
275781591fe2SChristoph Hellwig 			dip->di_onlink = 0;
27581da177e4SLinus Torvalds 			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
275981591fe2SChristoph Hellwig 			memset(&(dip->di_pad[0]), 0,
276081591fe2SChristoph Hellwig 			      sizeof(dip->di_pad));
27616743099cSArkadiusz Mi?kiewicz 			ASSERT(xfs_get_projid(ip) == 0);
27621da177e4SLinus Torvalds 		}
27631da177e4SLinus Torvalds 	}
27641da177e4SLinus Torvalds 
2765e4ac967bSDavid Chinner 	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
2766e4ac967bSDavid Chinner 	if (XFS_IFORK_Q(ip))
2767e4ac967bSDavid Chinner 		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
27681da177e4SLinus Torvalds 	xfs_inobp_check(mp, bp);
27691da177e4SLinus Torvalds 
27701da177e4SLinus Torvalds 	/*
27711da177e4SLinus Torvalds 	 * We've recorded everything logged in the inode, so we'd
27721da177e4SLinus Torvalds 	 * like to clear the ilf_fields bits so we don't log and
27731da177e4SLinus Torvalds 	 * flush things unnecessarily.  However, we can't stop
27741da177e4SLinus Torvalds 	 * logging all this information until the data we've copied
27751da177e4SLinus Torvalds 	 * into the disk buffer is written to disk.  If we did we might
27761da177e4SLinus Torvalds 	 * overwrite the copy of the inode in the log with all the
27771da177e4SLinus Torvalds 	 * data after re-logging only part of it, and in the face of
27781da177e4SLinus Torvalds 	 * a crash we wouldn't have all the data we need to recover.
27791da177e4SLinus Torvalds 	 *
27801da177e4SLinus Torvalds 	 * What we do is move the bits to the ili_last_fields field.
27811da177e4SLinus Torvalds 	 * When logging the inode, these bits are moved back to the
27821da177e4SLinus Torvalds 	 * ilf_fields field.  In the xfs_iflush_done() routine we
27831da177e4SLinus Torvalds 	 * clear ili_last_fields, since we know that the information
27841da177e4SLinus Torvalds 	 * those bits represent is permanently on disk.  As long as
27851da177e4SLinus Torvalds 	 * the flush completes before the inode is logged again, then
27861da177e4SLinus Torvalds 	 * both ilf_fields and ili_last_fields will be cleared.
27871da177e4SLinus Torvalds 	 *
27881da177e4SLinus Torvalds 	 * We can play with the ilf_fields bits here, because the inode
27891da177e4SLinus Torvalds 	 * lock must be held exclusively in order to set bits there
27901da177e4SLinus Torvalds 	 * and the flush lock protects the ili_last_fields bits.
27911da177e4SLinus Torvalds 	 * Set ili_logged so the flush done
27921da177e4SLinus Torvalds 	 * routine can tell whether or not to look in the AIL.
27931da177e4SLinus Torvalds 	 * Also, store the current LSN of the inode so that we can tell
27941da177e4SLinus Torvalds 	 * whether the item has moved in the AIL from xfs_iflush_done().
27951da177e4SLinus Torvalds 	 * In order to read the lsn we need the AIL lock, because
27961da177e4SLinus Torvalds 	 * it is a 64 bit value that cannot be read atomically.
27971da177e4SLinus Torvalds 	 */
27981da177e4SLinus Torvalds 	if (iip != NULL && iip->ili_format.ilf_fields != 0) {
27991da177e4SLinus Torvalds 		iip->ili_last_fields = iip->ili_format.ilf_fields;
28001da177e4SLinus Torvalds 		iip->ili_format.ilf_fields = 0;
28011da177e4SLinus Torvalds 		iip->ili_logged = 1;
28021da177e4SLinus Torvalds 
28037b2e2a31SDavid Chinner 		xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
28047b2e2a31SDavid Chinner 					&iip->ili_item.li_lsn);
28051da177e4SLinus Torvalds 
28061da177e4SLinus Torvalds 		/*
28071da177e4SLinus Torvalds 		 * Attach the function xfs_iflush_done to the inode's
28081da177e4SLinus Torvalds 		 * buffer.  This will remove the inode from the AIL
28091da177e4SLinus Torvalds 		 * and unlock the inode's flush lock when the inode is
28101da177e4SLinus Torvalds 		 * completely written to disk.
28111da177e4SLinus Torvalds 		 */
2812ca30b2a7SChristoph Hellwig 		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
28131da177e4SLinus Torvalds 
28141da177e4SLinus Torvalds 		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
28151da177e4SLinus Torvalds 		ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
28161da177e4SLinus Torvalds 	} else {
28171da177e4SLinus Torvalds 		/*
28181da177e4SLinus Torvalds 		 * We're flushing an inode which is not in the AIL and has
28191da177e4SLinus Torvalds 		 * not been logged but has i_update_core set.  For this
28201da177e4SLinus Torvalds 		 * case we can use a B_DELWRI flush and immediately drop
28211da177e4SLinus Torvalds 		 * the inode flush lock because we can avoid the whole
28221da177e4SLinus Torvalds 		 * AIL state thing.  It's OK to drop the flush lock now,
28231da177e4SLinus Torvalds 		 * because we've already locked the buffer and to do anything
28241da177e4SLinus Torvalds 		 * you really need both.
28251da177e4SLinus Torvalds 		 */
28261da177e4SLinus Torvalds 		if (iip != NULL) {
28271da177e4SLinus Torvalds 			ASSERT(iip->ili_logged == 0);
28281da177e4SLinus Torvalds 			ASSERT(iip->ili_last_fields == 0);
28291da177e4SLinus Torvalds 			ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
28301da177e4SLinus Torvalds 		}
28311da177e4SLinus Torvalds 		xfs_ifunlock(ip);
28321da177e4SLinus Torvalds 	}
28331da177e4SLinus Torvalds 
28341da177e4SLinus Torvalds 	return 0;
28351da177e4SLinus Torvalds 
28361da177e4SLinus Torvalds corrupt_out:
28371da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
28381da177e4SLinus Torvalds }
28391da177e4SLinus Torvalds 
28404eea22f0SMandy Kirkconnell /*
28414eea22f0SMandy Kirkconnell  * Return a pointer to the extent record at file index idx.
28424eea22f0SMandy Kirkconnell  */
2843a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *
28444eea22f0SMandy Kirkconnell xfs_iext_get_ext(
28454eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
28464eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx)		/* index of target extent */
28474eea22f0SMandy Kirkconnell {
28484eea22f0SMandy Kirkconnell 	ASSERT(idx >= 0);
284987bef181SChristoph Hellwig 	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
285087bef181SChristoph Hellwig 
28510293ce3aSMandy Kirkconnell 	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
28520293ce3aSMandy Kirkconnell 		return ifp->if_u1.if_ext_irec->er_extbuf;
28530293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
28540293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;		/* irec pointer */
28550293ce3aSMandy Kirkconnell 		int		erp_idx = 0;	/* irec index */
28560293ce3aSMandy Kirkconnell 		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
28570293ce3aSMandy Kirkconnell 
28580293ce3aSMandy Kirkconnell 		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
28590293ce3aSMandy Kirkconnell 		return &erp->er_extbuf[page_idx];
28600293ce3aSMandy Kirkconnell 	} else if (ifp->if_bytes) {
28614eea22f0SMandy Kirkconnell 		return &ifp->if_u1.if_extents[idx];
28624eea22f0SMandy Kirkconnell 	} else {
28634eea22f0SMandy Kirkconnell 		return NULL;
28644eea22f0SMandy Kirkconnell 	}
28654eea22f0SMandy Kirkconnell }
28664eea22f0SMandy Kirkconnell 
28674eea22f0SMandy Kirkconnell /*
28684eea22f0SMandy Kirkconnell  * Insert new item(s) into the extent records for incore inode
28694eea22f0SMandy Kirkconnell  * fork 'ifp'.  'count' new items are inserted at index 'idx'.
28704eea22f0SMandy Kirkconnell  */
28714eea22f0SMandy Kirkconnell void
28724eea22f0SMandy Kirkconnell xfs_iext_insert(
28736ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
28744eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* starting index of new items */
28754eea22f0SMandy Kirkconnell 	xfs_extnum_t	count,		/* number of inserted items */
28766ef35544SChristoph Hellwig 	xfs_bmbt_irec_t	*new,		/* items to insert */
28776ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
28784eea22f0SMandy Kirkconnell {
28796ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
28804eea22f0SMandy Kirkconnell 	xfs_extnum_t	i;		/* extent record index */
28814eea22f0SMandy Kirkconnell 
28820b1b213fSChristoph Hellwig 	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
28830b1b213fSChristoph Hellwig 
28844eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
28854eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, idx, count);
2886a6f64d4aSChristoph Hellwig 	for (i = idx; i < idx + count; i++, new++)
2887a6f64d4aSChristoph Hellwig 		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
28884eea22f0SMandy Kirkconnell }
28894eea22f0SMandy Kirkconnell 
28904eea22f0SMandy Kirkconnell /*
28914eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
28924eea22f0SMandy Kirkconnell  * extents needs to be increased. The ext_diff parameter stores the
28934eea22f0SMandy Kirkconnell  * number of new extents being added and the idx parameter contains
28944eea22f0SMandy Kirkconnell  * the extent index where the new extents will be added. If the new
28954eea22f0SMandy Kirkconnell  * extents are being appended, then we just need to (re)allocate and
28964eea22f0SMandy Kirkconnell  * initialize the space. Otherwise, if the new extents are being
28974eea22f0SMandy Kirkconnell  * inserted into the middle of the existing entries, a bit more work
28984eea22f0SMandy Kirkconnell  * is required to make room for the new extents to be inserted. The
28994eea22f0SMandy Kirkconnell  * caller is responsible for filling in the new extent entries upon
29004eea22f0SMandy Kirkconnell  * return.
29014eea22f0SMandy Kirkconnell  */
29024eea22f0SMandy Kirkconnell void
29034eea22f0SMandy Kirkconnell xfs_iext_add(
29044eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
29054eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin adding exts */
2906c41564b5SNathan Scott 	int		ext_diff)	/* number of extents to add */
29074eea22f0SMandy Kirkconnell {
29084eea22f0SMandy Kirkconnell 	int		byte_diff;	/* new bytes being added */
29094eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after adding */
29104eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
29114eea22f0SMandy Kirkconnell 
29124eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
29134eea22f0SMandy Kirkconnell 	ASSERT((idx >= 0) && (idx <= nextents));
29144eea22f0SMandy Kirkconnell 	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
29154eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes + byte_diff;
29164eea22f0SMandy Kirkconnell 	/*
29174eea22f0SMandy Kirkconnell 	 * If the new number of extents (nextents + ext_diff)
29184eea22f0SMandy Kirkconnell 	 * fits inside the inode, then continue to use the inline
29194eea22f0SMandy Kirkconnell 	 * extent buffer.
29204eea22f0SMandy Kirkconnell 	 */
29214eea22f0SMandy Kirkconnell 	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
29224eea22f0SMandy Kirkconnell 		if (idx < nextents) {
29234eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
29244eea22f0SMandy Kirkconnell 				&ifp->if_u2.if_inline_ext[idx],
29254eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
29264eea22f0SMandy Kirkconnell 			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
29274eea22f0SMandy Kirkconnell 		}
29284eea22f0SMandy Kirkconnell 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
29294eea22f0SMandy Kirkconnell 		ifp->if_real_bytes = 0;
29304eea22f0SMandy Kirkconnell 	}
29314eea22f0SMandy Kirkconnell 	/*
29324eea22f0SMandy Kirkconnell 	 * Otherwise use a linear (direct) extent list.
29334eea22f0SMandy Kirkconnell 	 * If the extents are currently inside the inode,
29344eea22f0SMandy Kirkconnell 	 * xfs_iext_realloc_direct will switch us from
29354eea22f0SMandy Kirkconnell 	 * inline to direct extent allocation mode.
29364eea22f0SMandy Kirkconnell 	 */
29370293ce3aSMandy Kirkconnell 	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
29384eea22f0SMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, new_size);
29394eea22f0SMandy Kirkconnell 		if (idx < nextents) {
29404eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
29414eea22f0SMandy Kirkconnell 				&ifp->if_u1.if_extents[idx],
29424eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
29434eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
29444eea22f0SMandy Kirkconnell 		}
29454eea22f0SMandy Kirkconnell 	}
29460293ce3aSMandy Kirkconnell 	/* Indirection array */
29470293ce3aSMandy Kirkconnell 	else {
29480293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;
29490293ce3aSMandy Kirkconnell 		int		erp_idx = 0;
29500293ce3aSMandy Kirkconnell 		int		page_idx = idx;
29510293ce3aSMandy Kirkconnell 
29520293ce3aSMandy Kirkconnell 		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
29530293ce3aSMandy Kirkconnell 		if (ifp->if_flags & XFS_IFEXTIREC) {
29540293ce3aSMandy Kirkconnell 			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
29550293ce3aSMandy Kirkconnell 		} else {
29560293ce3aSMandy Kirkconnell 			xfs_iext_irec_init(ifp);
29570293ce3aSMandy Kirkconnell 			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
29580293ce3aSMandy Kirkconnell 			erp = ifp->if_u1.if_ext_irec;
29590293ce3aSMandy Kirkconnell 		}
29600293ce3aSMandy Kirkconnell 		/* Extents fit in target extent page */
29610293ce3aSMandy Kirkconnell 		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
29620293ce3aSMandy Kirkconnell 			if (page_idx < erp->er_extcount) {
29630293ce3aSMandy Kirkconnell 				memmove(&erp->er_extbuf[page_idx + ext_diff],
29640293ce3aSMandy Kirkconnell 					&erp->er_extbuf[page_idx],
29650293ce3aSMandy Kirkconnell 					(erp->er_extcount - page_idx) *
29660293ce3aSMandy Kirkconnell 					sizeof(xfs_bmbt_rec_t));
29670293ce3aSMandy Kirkconnell 				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
29680293ce3aSMandy Kirkconnell 			}
29690293ce3aSMandy Kirkconnell 			erp->er_extcount += ext_diff;
29700293ce3aSMandy Kirkconnell 			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
29710293ce3aSMandy Kirkconnell 		}
29720293ce3aSMandy Kirkconnell 		/* Insert a new extent page */
29730293ce3aSMandy Kirkconnell 		else if (erp) {
29740293ce3aSMandy Kirkconnell 			xfs_iext_add_indirect_multi(ifp,
29750293ce3aSMandy Kirkconnell 				erp_idx, page_idx, ext_diff);
29760293ce3aSMandy Kirkconnell 		}
29770293ce3aSMandy Kirkconnell 		/*
29780293ce3aSMandy Kirkconnell 		 * If extent(s) are being appended to the last page in
29790293ce3aSMandy Kirkconnell 		 * the indirection array and the new extent(s) don't fit
29800293ce3aSMandy Kirkconnell 		 * in the page, then erp is NULL and erp_idx is set to
29810293ce3aSMandy Kirkconnell 		 * the next index needed in the indirection array.
29820293ce3aSMandy Kirkconnell 		 */
29830293ce3aSMandy Kirkconnell 		else {
29840293ce3aSMandy Kirkconnell 			int	count = ext_diff;
29850293ce3aSMandy Kirkconnell 
29860293ce3aSMandy Kirkconnell 			while (count) {
29870293ce3aSMandy Kirkconnell 				erp = xfs_iext_irec_new(ifp, erp_idx);
29880293ce3aSMandy Kirkconnell 				erp->er_extcount = count;
29890293ce3aSMandy Kirkconnell 				count -= MIN(count, (int)XFS_LINEAR_EXTS);
29900293ce3aSMandy Kirkconnell 				if (count) {
29910293ce3aSMandy Kirkconnell 					erp_idx++;
29920293ce3aSMandy Kirkconnell 				}
29930293ce3aSMandy Kirkconnell 			}
29940293ce3aSMandy Kirkconnell 		}
29950293ce3aSMandy Kirkconnell 	}
29964eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
29974eea22f0SMandy Kirkconnell }
29984eea22f0SMandy Kirkconnell 
29994eea22f0SMandy Kirkconnell /*
30000293ce3aSMandy Kirkconnell  * This is called when incore extents are being added to the indirection
30010293ce3aSMandy Kirkconnell  * array and the new extents do not fit in the target extent list. The
30020293ce3aSMandy Kirkconnell  * erp_idx parameter contains the irec index for the target extent list
30030293ce3aSMandy Kirkconnell  * in the indirection array, and the idx parameter contains the extent
30040293ce3aSMandy Kirkconnell  * index within the list. The number of extents being added is stored
30050293ce3aSMandy Kirkconnell  * in the count parameter.
30060293ce3aSMandy Kirkconnell  *
30070293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30080293ce3aSMandy Kirkconnell  *    |       |   |       |    idx - number of extents before idx
30090293ce3aSMandy Kirkconnell  *    |  idx  |   | count |
30100293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being inserted at idx
30110293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30120293ce3aSMandy Kirkconnell  *    | count |   | nex2  |    nex2 - number of extents after idx + count
30130293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30140293ce3aSMandy Kirkconnell  */
30150293ce3aSMandy Kirkconnell void
30160293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(
30170293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,			/* inode fork pointer */
30180293ce3aSMandy Kirkconnell 	int		erp_idx,		/* target extent irec index */
30190293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,			/* index within target list */
30200293ce3aSMandy Kirkconnell 	int		count)			/* new extents being added */
30210293ce3aSMandy Kirkconnell {
30220293ce3aSMandy Kirkconnell 	int		byte_diff;		/* new bytes being added */
30230293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
30240293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;		/* number of extents to add */
30250293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;		/* new extents still needed */
30260293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;			/* extents after idx + count */
30270293ce3aSMandy Kirkconnell 	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
30280293ce3aSMandy Kirkconnell 	int		nlists;			/* number of irec's (lists) */
30290293ce3aSMandy Kirkconnell 
30300293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
30310293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
30320293ce3aSMandy Kirkconnell 	nex2 = erp->er_extcount - idx;
30330293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
30340293ce3aSMandy Kirkconnell 
30350293ce3aSMandy Kirkconnell 	/*
30360293ce3aSMandy Kirkconnell 	 * Save second part of target extent list
30370293ce3aSMandy Kirkconnell 	 * (all extents past */
30380293ce3aSMandy Kirkconnell 	if (nex2) {
30390293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
30406785073bSDavid Chinner 		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
30410293ce3aSMandy Kirkconnell 		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
30420293ce3aSMandy Kirkconnell 		erp->er_extcount -= nex2;
30430293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
30440293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[idx], 0, byte_diff);
30450293ce3aSMandy Kirkconnell 	}
30460293ce3aSMandy Kirkconnell 
30470293ce3aSMandy Kirkconnell 	/*
30480293ce3aSMandy Kirkconnell 	 * Add the new extents to the end of the target
30490293ce3aSMandy Kirkconnell 	 * list, then allocate new irec record(s) and
30500293ce3aSMandy Kirkconnell 	 * extent buffer(s) as needed to store the rest
30510293ce3aSMandy Kirkconnell 	 * of the new extents.
30520293ce3aSMandy Kirkconnell 	 */
30530293ce3aSMandy Kirkconnell 	ext_cnt = count;
30540293ce3aSMandy Kirkconnell 	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
30550293ce3aSMandy Kirkconnell 	if (ext_diff) {
30560293ce3aSMandy Kirkconnell 		erp->er_extcount += ext_diff;
30570293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
30580293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
30590293ce3aSMandy Kirkconnell 	}
30600293ce3aSMandy Kirkconnell 	while (ext_cnt) {
30610293ce3aSMandy Kirkconnell 		erp_idx++;
30620293ce3aSMandy Kirkconnell 		erp = xfs_iext_irec_new(ifp, erp_idx);
30630293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
30640293ce3aSMandy Kirkconnell 		erp->er_extcount = ext_diff;
30650293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
30660293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
30670293ce3aSMandy Kirkconnell 	}
30680293ce3aSMandy Kirkconnell 
30690293ce3aSMandy Kirkconnell 	/* Add nex2 extents back to indirection array */
30700293ce3aSMandy Kirkconnell 	if (nex2) {
30710293ce3aSMandy Kirkconnell 		xfs_extnum_t	ext_avail;
30720293ce3aSMandy Kirkconnell 		int		i;
30730293ce3aSMandy Kirkconnell 
30740293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
30750293ce3aSMandy Kirkconnell 		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
30760293ce3aSMandy Kirkconnell 		i = 0;
30770293ce3aSMandy Kirkconnell 		/*
30780293ce3aSMandy Kirkconnell 		 * If nex2 extents fit in the current page, append
30790293ce3aSMandy Kirkconnell 		 * nex2_ep after the new extents.
30800293ce3aSMandy Kirkconnell 		 */
30810293ce3aSMandy Kirkconnell 		if (nex2 <= ext_avail) {
30820293ce3aSMandy Kirkconnell 			i = erp->er_extcount;
30830293ce3aSMandy Kirkconnell 		}
30840293ce3aSMandy Kirkconnell 		/*
30850293ce3aSMandy Kirkconnell 		 * Otherwise, check if space is available in the
30860293ce3aSMandy Kirkconnell 		 * next page.
30870293ce3aSMandy Kirkconnell 		 */
30880293ce3aSMandy Kirkconnell 		else if ((erp_idx < nlists - 1) &&
30890293ce3aSMandy Kirkconnell 			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
30900293ce3aSMandy Kirkconnell 			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
30910293ce3aSMandy Kirkconnell 			erp_idx++;
30920293ce3aSMandy Kirkconnell 			erp++;
30930293ce3aSMandy Kirkconnell 			/* Create a hole for nex2 extents */
30940293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
30950293ce3aSMandy Kirkconnell 				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
30960293ce3aSMandy Kirkconnell 		}
30970293ce3aSMandy Kirkconnell 		/*
30980293ce3aSMandy Kirkconnell 		 * Final choice, create a new extent page for
30990293ce3aSMandy Kirkconnell 		 * nex2 extents.
31000293ce3aSMandy Kirkconnell 		 */
31010293ce3aSMandy Kirkconnell 		else {
31020293ce3aSMandy Kirkconnell 			erp_idx++;
31030293ce3aSMandy Kirkconnell 			erp = xfs_iext_irec_new(ifp, erp_idx);
31040293ce3aSMandy Kirkconnell 		}
31050293ce3aSMandy Kirkconnell 		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3106f0e2d93cSDenys Vlasenko 		kmem_free(nex2_ep);
31070293ce3aSMandy Kirkconnell 		erp->er_extcount += nex2;
31080293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
31090293ce3aSMandy Kirkconnell 	}
31100293ce3aSMandy Kirkconnell }
31110293ce3aSMandy Kirkconnell 
31120293ce3aSMandy Kirkconnell /*
31134eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
31144eea22f0SMandy Kirkconnell  * extents needs to be decreased. The ext_diff parameter stores the
31154eea22f0SMandy Kirkconnell  * number of extents to be removed and the idx parameter contains
31164eea22f0SMandy Kirkconnell  * the extent index where the extents will be removed from.
31170293ce3aSMandy Kirkconnell  *
31180293ce3aSMandy Kirkconnell  * If the amount of space needed has decreased below the linear
31190293ce3aSMandy Kirkconnell  * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
31200293ce3aSMandy Kirkconnell  * extent array.  Otherwise, use kmem_realloc() to adjust the
31210293ce3aSMandy Kirkconnell  * size to what is needed.
31224eea22f0SMandy Kirkconnell  */
31234eea22f0SMandy Kirkconnell void
31244eea22f0SMandy Kirkconnell xfs_iext_remove(
31256ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
31264eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
31276ef35544SChristoph Hellwig 	int		ext_diff,	/* number of extents to remove */
31286ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
31294eea22f0SMandy Kirkconnell {
31306ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
31314eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
31324eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
31334eea22f0SMandy Kirkconnell 
31340b1b213fSChristoph Hellwig 	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
31350b1b213fSChristoph Hellwig 
31364eea22f0SMandy Kirkconnell 	ASSERT(ext_diff > 0);
31374eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
31384eea22f0SMandy Kirkconnell 	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
31394eea22f0SMandy Kirkconnell 
31404eea22f0SMandy Kirkconnell 	if (new_size == 0) {
31414eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
31420293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
31430293ce3aSMandy Kirkconnell 		xfs_iext_remove_indirect(ifp, idx, ext_diff);
31444eea22f0SMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
31454eea22f0SMandy Kirkconnell 		xfs_iext_remove_direct(ifp, idx, ext_diff);
31464eea22f0SMandy Kirkconnell 	} else {
31474eea22f0SMandy Kirkconnell 		xfs_iext_remove_inline(ifp, idx, ext_diff);
31484eea22f0SMandy Kirkconnell 	}
31494eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
31504eea22f0SMandy Kirkconnell }
31514eea22f0SMandy Kirkconnell 
31524eea22f0SMandy Kirkconnell /*
31534eea22f0SMandy Kirkconnell  * This removes ext_diff extents from the inline buffer, beginning
31544eea22f0SMandy Kirkconnell  * at extent index idx.
31554eea22f0SMandy Kirkconnell  */
31564eea22f0SMandy Kirkconnell void
31574eea22f0SMandy Kirkconnell xfs_iext_remove_inline(
31584eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
31594eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
31604eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
31614eea22f0SMandy Kirkconnell {
31624eea22f0SMandy Kirkconnell 	int		nextents;	/* number of extents in file */
31634eea22f0SMandy Kirkconnell 
31640293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
31654eea22f0SMandy Kirkconnell 	ASSERT(idx < XFS_INLINE_EXTS);
31664eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
31674eea22f0SMandy Kirkconnell 	ASSERT(((nextents - ext_diff) > 0) &&
31684eea22f0SMandy Kirkconnell 		(nextents - ext_diff) < XFS_INLINE_EXTS);
31694eea22f0SMandy Kirkconnell 
31704eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
31714eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u2.if_inline_ext[idx],
31724eea22f0SMandy Kirkconnell 			&ifp->if_u2.if_inline_ext[idx + ext_diff],
31734eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
31744eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
31754eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
31764eea22f0SMandy Kirkconnell 			0, ext_diff * sizeof(xfs_bmbt_rec_t));
31774eea22f0SMandy Kirkconnell 	} else {
31784eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[idx], 0,
31794eea22f0SMandy Kirkconnell 			ext_diff * sizeof(xfs_bmbt_rec_t));
31804eea22f0SMandy Kirkconnell 	}
31814eea22f0SMandy Kirkconnell }
31824eea22f0SMandy Kirkconnell 
31834eea22f0SMandy Kirkconnell /*
31844eea22f0SMandy Kirkconnell  * This removes ext_diff extents from a linear (direct) extent list,
31854eea22f0SMandy Kirkconnell  * beginning at extent index idx. If the extents are being removed
31864eea22f0SMandy Kirkconnell  * from the end of the list (ie. truncate) then we just need to re-
31874eea22f0SMandy Kirkconnell  * allocate the list to remove the extra space. Otherwise, if the
31884eea22f0SMandy Kirkconnell  * extents are being removed from the middle of the existing extent
31894eea22f0SMandy Kirkconnell  * entries, then we first need to move the extent records beginning
31904eea22f0SMandy Kirkconnell  * at idx + ext_diff up in the list to overwrite the records being
31914eea22f0SMandy Kirkconnell  * removed, then remove the extra space via kmem_realloc.
31924eea22f0SMandy Kirkconnell  */
31934eea22f0SMandy Kirkconnell void
31944eea22f0SMandy Kirkconnell xfs_iext_remove_direct(
31954eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
31964eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
31974eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
31984eea22f0SMandy Kirkconnell {
31994eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
32004eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
32014eea22f0SMandy Kirkconnell 
32020293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
32034eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes -
32044eea22f0SMandy Kirkconnell 		(ext_diff * sizeof(xfs_bmbt_rec_t));
32054eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
32064eea22f0SMandy Kirkconnell 
32074eea22f0SMandy Kirkconnell 	if (new_size == 0) {
32084eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
32094eea22f0SMandy Kirkconnell 		return;
32104eea22f0SMandy Kirkconnell 	}
32114eea22f0SMandy Kirkconnell 	/* Move extents up in the list (if needed) */
32124eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
32134eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u1.if_extents[idx],
32144eea22f0SMandy Kirkconnell 			&ifp->if_u1.if_extents[idx + ext_diff],
32154eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
32164eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
32174eea22f0SMandy Kirkconnell 	}
32184eea22f0SMandy Kirkconnell 	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
32194eea22f0SMandy Kirkconnell 		0, ext_diff * sizeof(xfs_bmbt_rec_t));
32204eea22f0SMandy Kirkconnell 	/*
32214eea22f0SMandy Kirkconnell 	 * Reallocate the direct extent list. If the extents
32224eea22f0SMandy Kirkconnell 	 * will fit inside the inode then xfs_iext_realloc_direct
32234eea22f0SMandy Kirkconnell 	 * will switch from direct to inline extent allocation
32244eea22f0SMandy Kirkconnell 	 * mode for us.
32254eea22f0SMandy Kirkconnell 	 */
32264eea22f0SMandy Kirkconnell 	xfs_iext_realloc_direct(ifp, new_size);
32274eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
32284eea22f0SMandy Kirkconnell }
32294eea22f0SMandy Kirkconnell 
32304eea22f0SMandy Kirkconnell /*
32310293ce3aSMandy Kirkconnell  * This is called when incore extents are being removed from the
32320293ce3aSMandy Kirkconnell  * indirection array and the extents being removed span multiple extent
32330293ce3aSMandy Kirkconnell  * buffers. The idx parameter contains the file extent index where we
32340293ce3aSMandy Kirkconnell  * want to begin removing extents, and the count parameter contains
32350293ce3aSMandy Kirkconnell  * how many extents need to be removed.
32360293ce3aSMandy Kirkconnell  *
32370293ce3aSMandy Kirkconnell  *    |-------|   |-------|
32380293ce3aSMandy Kirkconnell  *    | nex1  |   |       |    nex1 - number of extents before idx
32390293ce3aSMandy Kirkconnell  *    |-------|   | count |
32400293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being removed at idx
32410293ce3aSMandy Kirkconnell  *    | count |   |-------|
32420293ce3aSMandy Kirkconnell  *    |       |   | nex2  |    nex2 - number of extents after idx + count
32430293ce3aSMandy Kirkconnell  *    |-------|   |-------|
32440293ce3aSMandy Kirkconnell  */
32450293ce3aSMandy Kirkconnell void
32460293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(
32470293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32480293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing extents */
32490293ce3aSMandy Kirkconnell 	int		count)		/* number of extents to remove */
32500293ce3aSMandy Kirkconnell {
32510293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
32520293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
32530293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;	/* extents left to remove */
32540293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
32550293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex1;		/* number of extents before idx */
32560293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;		/* extents after idx + count */
32570293ce3aSMandy Kirkconnell 	int		page_idx = idx;	/* index in target extent list */
32580293ce3aSMandy Kirkconnell 
32590293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
32600293ce3aSMandy Kirkconnell 	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
32610293ce3aSMandy Kirkconnell 	ASSERT(erp != NULL);
32620293ce3aSMandy Kirkconnell 	nex1 = page_idx;
32630293ce3aSMandy Kirkconnell 	ext_cnt = count;
32640293ce3aSMandy Kirkconnell 	while (ext_cnt) {
32650293ce3aSMandy Kirkconnell 		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
32660293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
32670293ce3aSMandy Kirkconnell 		/*
32680293ce3aSMandy Kirkconnell 		 * Check for deletion of entire list;
32690293ce3aSMandy Kirkconnell 		 * xfs_iext_irec_remove() updates extent offsets.
32700293ce3aSMandy Kirkconnell 		 */
32710293ce3aSMandy Kirkconnell 		if (ext_diff == erp->er_extcount) {
32720293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
32730293ce3aSMandy Kirkconnell 			ext_cnt -= ext_diff;
32740293ce3aSMandy Kirkconnell 			nex1 = 0;
32750293ce3aSMandy Kirkconnell 			if (ext_cnt) {
32760293ce3aSMandy Kirkconnell 				ASSERT(erp_idx < ifp->if_real_bytes /
32770293ce3aSMandy Kirkconnell 					XFS_IEXT_BUFSZ);
32780293ce3aSMandy Kirkconnell 				erp = &ifp->if_u1.if_ext_irec[erp_idx];
32790293ce3aSMandy Kirkconnell 				nex1 = 0;
32800293ce3aSMandy Kirkconnell 				continue;
32810293ce3aSMandy Kirkconnell 			} else {
32820293ce3aSMandy Kirkconnell 				break;
32830293ce3aSMandy Kirkconnell 			}
32840293ce3aSMandy Kirkconnell 		}
32850293ce3aSMandy Kirkconnell 		/* Move extents up (if needed) */
32860293ce3aSMandy Kirkconnell 		if (nex2) {
32870293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex1],
32880293ce3aSMandy Kirkconnell 				&erp->er_extbuf[nex1 + ext_diff],
32890293ce3aSMandy Kirkconnell 				nex2 * sizeof(xfs_bmbt_rec_t));
32900293ce3aSMandy Kirkconnell 		}
32910293ce3aSMandy Kirkconnell 		/* Zero out rest of page */
32920293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
32930293ce3aSMandy Kirkconnell 			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
32940293ce3aSMandy Kirkconnell 		/* Update remaining counters */
32950293ce3aSMandy Kirkconnell 		erp->er_extcount -= ext_diff;
32960293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
32970293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
32980293ce3aSMandy Kirkconnell 		nex1 = 0;
32990293ce3aSMandy Kirkconnell 		erp_idx++;
33000293ce3aSMandy Kirkconnell 		erp++;
33010293ce3aSMandy Kirkconnell 	}
33020293ce3aSMandy Kirkconnell 	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
33030293ce3aSMandy Kirkconnell 	xfs_iext_irec_compact(ifp);
33040293ce3aSMandy Kirkconnell }
33050293ce3aSMandy Kirkconnell 
33060293ce3aSMandy Kirkconnell /*
33074eea22f0SMandy Kirkconnell  * Create, destroy, or resize a linear (direct) block of extents.
33084eea22f0SMandy Kirkconnell  */
33094eea22f0SMandy Kirkconnell void
33104eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(
33114eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33124eea22f0SMandy Kirkconnell 	int		new_size)	/* new size of extents */
33134eea22f0SMandy Kirkconnell {
33144eea22f0SMandy Kirkconnell 	int		rnew_size;	/* real new size of extents */
33154eea22f0SMandy Kirkconnell 
33164eea22f0SMandy Kirkconnell 	rnew_size = new_size;
33174eea22f0SMandy Kirkconnell 
33180293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
33190293ce3aSMandy Kirkconnell 		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
33200293ce3aSMandy Kirkconnell 		 (new_size != ifp->if_real_bytes)));
33210293ce3aSMandy Kirkconnell 
33224eea22f0SMandy Kirkconnell 	/* Free extent records */
33234eea22f0SMandy Kirkconnell 	if (new_size == 0) {
33244eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
33254eea22f0SMandy Kirkconnell 	}
33264eea22f0SMandy Kirkconnell 	/* Resize direct extent list and zero any new bytes */
33274eea22f0SMandy Kirkconnell 	else if (ifp->if_real_bytes) {
33284eea22f0SMandy Kirkconnell 		/* Check if extents will fit inside the inode */
33294eea22f0SMandy Kirkconnell 		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
33304eea22f0SMandy Kirkconnell 			xfs_iext_direct_to_inline(ifp, new_size /
33314eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t));
33324eea22f0SMandy Kirkconnell 			ifp->if_bytes = new_size;
33334eea22f0SMandy Kirkconnell 			return;
33344eea22f0SMandy Kirkconnell 		}
333516a087d8SVignesh Babu 		if (!is_power_of_2(new_size)){
333640ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
33374eea22f0SMandy Kirkconnell 		}
33384eea22f0SMandy Kirkconnell 		if (rnew_size != ifp->if_real_bytes) {
3339a6f64d4aSChristoph Hellwig 			ifp->if_u1.if_extents =
33404eea22f0SMandy Kirkconnell 				kmem_realloc(ifp->if_u1.if_extents,
33414eea22f0SMandy Kirkconnell 						rnew_size,
33426785073bSDavid Chinner 						ifp->if_real_bytes, KM_NOFS);
33434eea22f0SMandy Kirkconnell 		}
33444eea22f0SMandy Kirkconnell 		if (rnew_size > ifp->if_real_bytes) {
33454eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
33464eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t)], 0,
33474eea22f0SMandy Kirkconnell 				rnew_size - ifp->if_real_bytes);
33484eea22f0SMandy Kirkconnell 		}
33494eea22f0SMandy Kirkconnell 	}
33504eea22f0SMandy Kirkconnell 	/*
33514eea22f0SMandy Kirkconnell 	 * Switch from the inline extent buffer to a direct
33524eea22f0SMandy Kirkconnell 	 * extent list. Be sure to include the inline extent
33534eea22f0SMandy Kirkconnell 	 * bytes in new_size.
33544eea22f0SMandy Kirkconnell 	 */
33554eea22f0SMandy Kirkconnell 	else {
33564eea22f0SMandy Kirkconnell 		new_size += ifp->if_bytes;
335716a087d8SVignesh Babu 		if (!is_power_of_2(new_size)) {
335840ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
33594eea22f0SMandy Kirkconnell 		}
33604eea22f0SMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, rnew_size);
33614eea22f0SMandy Kirkconnell 	}
33624eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = rnew_size;
33634eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
33644eea22f0SMandy Kirkconnell }
33654eea22f0SMandy Kirkconnell 
33664eea22f0SMandy Kirkconnell /*
33674eea22f0SMandy Kirkconnell  * Switch from linear (direct) extent records to inline buffer.
33684eea22f0SMandy Kirkconnell  */
33694eea22f0SMandy Kirkconnell void
33704eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(
33714eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33724eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents)	/* number of extents in file */
33734eea22f0SMandy Kirkconnell {
33744eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
33754eea22f0SMandy Kirkconnell 	ASSERT(nextents <= XFS_INLINE_EXTS);
33764eea22f0SMandy Kirkconnell 	/*
33774eea22f0SMandy Kirkconnell 	 * The inline buffer was zeroed when we switched
33784eea22f0SMandy Kirkconnell 	 * from inline to direct extent allocation mode,
33794eea22f0SMandy Kirkconnell 	 * so we don't need to clear it here.
33804eea22f0SMandy Kirkconnell 	 */
33814eea22f0SMandy Kirkconnell 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
33824eea22f0SMandy Kirkconnell 		nextents * sizeof(xfs_bmbt_rec_t));
3383f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_extents);
33844eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
33854eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
33864eea22f0SMandy Kirkconnell }
33874eea22f0SMandy Kirkconnell 
33884eea22f0SMandy Kirkconnell /*
33894eea22f0SMandy Kirkconnell  * Switch from inline buffer to linear (direct) extent records.
33904eea22f0SMandy Kirkconnell  * new_size should already be rounded up to the next power of 2
33914eea22f0SMandy Kirkconnell  * by the caller (when appropriate), so use new_size as it is.
33924eea22f0SMandy Kirkconnell  * However, since new_size may be rounded up, we can't update
33934eea22f0SMandy Kirkconnell  * if_bytes here. It is the caller's responsibility to update
33944eea22f0SMandy Kirkconnell  * if_bytes upon return.
33954eea22f0SMandy Kirkconnell  */
33964eea22f0SMandy Kirkconnell void
33974eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(
33984eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33994eea22f0SMandy Kirkconnell 	int		new_size)	/* number of extents in file */
34004eea22f0SMandy Kirkconnell {
34016785073bSDavid Chinner 	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
34024eea22f0SMandy Kirkconnell 	memset(ifp->if_u1.if_extents, 0, new_size);
34034eea22f0SMandy Kirkconnell 	if (ifp->if_bytes) {
34044eea22f0SMandy Kirkconnell 		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
34054eea22f0SMandy Kirkconnell 			ifp->if_bytes);
34064eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
34074eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
34084eea22f0SMandy Kirkconnell 	}
34094eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = new_size;
34104eea22f0SMandy Kirkconnell }
34114eea22f0SMandy Kirkconnell 
34124eea22f0SMandy Kirkconnell /*
34130293ce3aSMandy Kirkconnell  * Resize an extent indirection array to new_size bytes.
34140293ce3aSMandy Kirkconnell  */
3415d96f8f89SEric Sandeen STATIC void
34160293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(
34170293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34180293ce3aSMandy Kirkconnell 	int		new_size)	/* new indirection array size */
34190293ce3aSMandy Kirkconnell {
34200293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
34210293ce3aSMandy Kirkconnell 	int		size;		/* current indirection array size */
34220293ce3aSMandy Kirkconnell 
34230293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34240293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
34250293ce3aSMandy Kirkconnell 	size = nlists * sizeof(xfs_ext_irec_t);
34260293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes);
34270293ce3aSMandy Kirkconnell 	ASSERT((new_size >= 0) && (new_size != size));
34280293ce3aSMandy Kirkconnell 	if (new_size == 0) {
34290293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
34300293ce3aSMandy Kirkconnell 	} else {
34310293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
34320293ce3aSMandy Kirkconnell 			kmem_realloc(ifp->if_u1.if_ext_irec,
34336785073bSDavid Chinner 				new_size, size, KM_NOFS);
34340293ce3aSMandy Kirkconnell 	}
34350293ce3aSMandy Kirkconnell }
34360293ce3aSMandy Kirkconnell 
34370293ce3aSMandy Kirkconnell /*
34380293ce3aSMandy Kirkconnell  * Switch from indirection array to linear (direct) extent allocations.
34390293ce3aSMandy Kirkconnell  */
3440d96f8f89SEric Sandeen STATIC void
34410293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(
34420293ce3aSMandy Kirkconnell 	 xfs_ifork_t	*ifp)		/* inode fork pointer */
34430293ce3aSMandy Kirkconnell {
3444a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
34450293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
34460293ce3aSMandy Kirkconnell 	int		size;		/* size of file extents */
34470293ce3aSMandy Kirkconnell 
34480293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34490293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
34500293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
34510293ce3aSMandy Kirkconnell 	size = nextents * sizeof(xfs_bmbt_rec_t);
34520293ce3aSMandy Kirkconnell 
345371a8c87fSLachlan McIlroy 	xfs_iext_irec_compact_pages(ifp);
34540293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
34550293ce3aSMandy Kirkconnell 
34560293ce3aSMandy Kirkconnell 	ep = ifp->if_u1.if_ext_irec->er_extbuf;
3457f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_ext_irec);
34580293ce3aSMandy Kirkconnell 	ifp->if_flags &= ~XFS_IFEXTIREC;
34590293ce3aSMandy Kirkconnell 	ifp->if_u1.if_extents = ep;
34600293ce3aSMandy Kirkconnell 	ifp->if_bytes = size;
34610293ce3aSMandy Kirkconnell 	if (nextents < XFS_LINEAR_EXTS) {
34620293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, size);
34630293ce3aSMandy Kirkconnell 	}
34640293ce3aSMandy Kirkconnell }
34650293ce3aSMandy Kirkconnell 
34660293ce3aSMandy Kirkconnell /*
34674eea22f0SMandy Kirkconnell  * Free incore file extents.
34684eea22f0SMandy Kirkconnell  */
34694eea22f0SMandy Kirkconnell void
34704eea22f0SMandy Kirkconnell xfs_iext_destroy(
34714eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
34724eea22f0SMandy Kirkconnell {
34730293ce3aSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
34740293ce3aSMandy Kirkconnell 		int	erp_idx;
34750293ce3aSMandy Kirkconnell 		int	nlists;
34760293ce3aSMandy Kirkconnell 
34770293ce3aSMandy Kirkconnell 		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
34780293ce3aSMandy Kirkconnell 		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
34790293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
34800293ce3aSMandy Kirkconnell 		}
34810293ce3aSMandy Kirkconnell 		ifp->if_flags &= ~XFS_IFEXTIREC;
34820293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
3483f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_extents);
34844eea22f0SMandy Kirkconnell 	} else if (ifp->if_bytes) {
34854eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
34864eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
34874eea22f0SMandy Kirkconnell 	}
34884eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = NULL;
34894eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
34904eea22f0SMandy Kirkconnell 	ifp->if_bytes = 0;
34914eea22f0SMandy Kirkconnell }
34920293ce3aSMandy Kirkconnell 
34930293ce3aSMandy Kirkconnell /*
34948867bc9bSMandy Kirkconnell  * Return a pointer to the extent record for file system block bno.
34958867bc9bSMandy Kirkconnell  */
3496a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *			/* pointer to found extent record */
34978867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext(
34988867bc9bSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34998867bc9bSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
35008867bc9bSMandy Kirkconnell 	xfs_extnum_t	*idxp)		/* index of target extent */
35018867bc9bSMandy Kirkconnell {
3502a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
35038867bc9bSMandy Kirkconnell 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
3504a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
35058867bc9bSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
3506c41564b5SNathan Scott 	int		high;		/* upper boundary in search */
35078867bc9bSMandy Kirkconnell 	xfs_extnum_t	idx = 0;	/* index of target extent */
3508c41564b5SNathan Scott 	int		low;		/* lower boundary in search */
35098867bc9bSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of file extents */
35108867bc9bSMandy Kirkconnell 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
35118867bc9bSMandy Kirkconnell 
35128867bc9bSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
35138867bc9bSMandy Kirkconnell 	if (nextents == 0) {
35148867bc9bSMandy Kirkconnell 		*idxp = 0;
35158867bc9bSMandy Kirkconnell 		return NULL;
35168867bc9bSMandy Kirkconnell 	}
35178867bc9bSMandy Kirkconnell 	low = 0;
35188867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
35198867bc9bSMandy Kirkconnell 		/* Find target extent list */
35208867bc9bSMandy Kirkconnell 		int	erp_idx = 0;
35218867bc9bSMandy Kirkconnell 		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
35228867bc9bSMandy Kirkconnell 		base = erp->er_extbuf;
35238867bc9bSMandy Kirkconnell 		high = erp->er_extcount - 1;
35248867bc9bSMandy Kirkconnell 	} else {
35258867bc9bSMandy Kirkconnell 		base = ifp->if_u1.if_extents;
35268867bc9bSMandy Kirkconnell 		high = nextents - 1;
35278867bc9bSMandy Kirkconnell 	}
35288867bc9bSMandy Kirkconnell 	/* Binary search extent records */
35298867bc9bSMandy Kirkconnell 	while (low <= high) {
35308867bc9bSMandy Kirkconnell 		idx = (low + high) >> 1;
35318867bc9bSMandy Kirkconnell 		ep = base + idx;
35328867bc9bSMandy Kirkconnell 		startoff = xfs_bmbt_get_startoff(ep);
35338867bc9bSMandy Kirkconnell 		blockcount = xfs_bmbt_get_blockcount(ep);
35348867bc9bSMandy Kirkconnell 		if (bno < startoff) {
35358867bc9bSMandy Kirkconnell 			high = idx - 1;
35368867bc9bSMandy Kirkconnell 		} else if (bno >= startoff + blockcount) {
35378867bc9bSMandy Kirkconnell 			low = idx + 1;
35388867bc9bSMandy Kirkconnell 		} else {
35398867bc9bSMandy Kirkconnell 			/* Convert back to file-based extent index */
35408867bc9bSMandy Kirkconnell 			if (ifp->if_flags & XFS_IFEXTIREC) {
35418867bc9bSMandy Kirkconnell 				idx += erp->er_extoff;
35428867bc9bSMandy Kirkconnell 			}
35438867bc9bSMandy Kirkconnell 			*idxp = idx;
35448867bc9bSMandy Kirkconnell 			return ep;
35458867bc9bSMandy Kirkconnell 		}
35468867bc9bSMandy Kirkconnell 	}
35478867bc9bSMandy Kirkconnell 	/* Convert back to file-based extent index */
35488867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
35498867bc9bSMandy Kirkconnell 		idx += erp->er_extoff;
35508867bc9bSMandy Kirkconnell 	}
35518867bc9bSMandy Kirkconnell 	if (bno >= startoff + blockcount) {
35528867bc9bSMandy Kirkconnell 		if (++idx == nextents) {
35538867bc9bSMandy Kirkconnell 			ep = NULL;
35548867bc9bSMandy Kirkconnell 		} else {
35558867bc9bSMandy Kirkconnell 			ep = xfs_iext_get_ext(ifp, idx);
35568867bc9bSMandy Kirkconnell 		}
35578867bc9bSMandy Kirkconnell 	}
35588867bc9bSMandy Kirkconnell 	*idxp = idx;
35598867bc9bSMandy Kirkconnell 	return ep;
35608867bc9bSMandy Kirkconnell }
35618867bc9bSMandy Kirkconnell 
35628867bc9bSMandy Kirkconnell /*
35630293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
35640293ce3aSMandy Kirkconnell  * extent record for filesystem block bno. Store the index of the
35650293ce3aSMandy Kirkconnell  * target irec in *erp_idxp.
35660293ce3aSMandy Kirkconnell  */
35678867bc9bSMandy Kirkconnell xfs_ext_irec_t *			/* pointer to found extent record */
35680293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec(
35690293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35700293ce3aSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
35710293ce3aSMandy Kirkconnell 	int		*erp_idxp)	/* irec index of target ext list */
35720293ce3aSMandy Kirkconnell {
35730293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
35740293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
35758867bc9bSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
35760293ce3aSMandy Kirkconnell 	int		nlists;		/* number of extent irec's (lists) */
35770293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
35780293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
35790293ce3aSMandy Kirkconnell 
35800293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
35810293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
35820293ce3aSMandy Kirkconnell 	erp_idx = 0;
35830293ce3aSMandy Kirkconnell 	low = 0;
35840293ce3aSMandy Kirkconnell 	high = nlists - 1;
35850293ce3aSMandy Kirkconnell 	while (low <= high) {
35860293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
35870293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
35880293ce3aSMandy Kirkconnell 		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
35890293ce3aSMandy Kirkconnell 		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
35900293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
35910293ce3aSMandy Kirkconnell 		} else if (erp_next && bno >=
35920293ce3aSMandy Kirkconnell 			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
35930293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
35940293ce3aSMandy Kirkconnell 		} else {
35950293ce3aSMandy Kirkconnell 			break;
35960293ce3aSMandy Kirkconnell 		}
35970293ce3aSMandy Kirkconnell 	}
35980293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
35990293ce3aSMandy Kirkconnell 	return erp;
36000293ce3aSMandy Kirkconnell }
36010293ce3aSMandy Kirkconnell 
36020293ce3aSMandy Kirkconnell /*
36030293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
36040293ce3aSMandy Kirkconnell  * extent record at file extent index *idxp. Store the index of the
36050293ce3aSMandy Kirkconnell  * target irec in *erp_idxp and store the page index of the target
36060293ce3aSMandy Kirkconnell  * extent record in *idxp.
36070293ce3aSMandy Kirkconnell  */
36080293ce3aSMandy Kirkconnell xfs_ext_irec_t *
36090293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec(
36100293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36110293ce3aSMandy Kirkconnell 	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
36120293ce3aSMandy Kirkconnell 	int		*erp_idxp,	/* pointer to target irec */
36130293ce3aSMandy Kirkconnell 	int		realloc)	/* new bytes were just added */
36140293ce3aSMandy Kirkconnell {
36150293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
36160293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
36170293ce3aSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
36180293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
36190293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
36200293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
36210293ce3aSMandy Kirkconnell 	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
36220293ce3aSMandy Kirkconnell 
36230293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
362487bef181SChristoph Hellwig 	ASSERT(page_idx >= 0);
362587bef181SChristoph Hellwig 	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
362687bef181SChristoph Hellwig 	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
362787bef181SChristoph Hellwig 
36280293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36290293ce3aSMandy Kirkconnell 	erp_idx = 0;
36300293ce3aSMandy Kirkconnell 	low = 0;
36310293ce3aSMandy Kirkconnell 	high = nlists - 1;
36320293ce3aSMandy Kirkconnell 
36330293ce3aSMandy Kirkconnell 	/* Binary search extent irec's */
36340293ce3aSMandy Kirkconnell 	while (low <= high) {
36350293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
36360293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
36370293ce3aSMandy Kirkconnell 		prev = erp_idx > 0 ? erp - 1 : NULL;
36380293ce3aSMandy Kirkconnell 		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
36390293ce3aSMandy Kirkconnell 		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
36400293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
36410293ce3aSMandy Kirkconnell 		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
36420293ce3aSMandy Kirkconnell 			   (page_idx == erp->er_extoff + erp->er_extcount &&
36430293ce3aSMandy Kirkconnell 			    !realloc)) {
36440293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
36450293ce3aSMandy Kirkconnell 		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
36460293ce3aSMandy Kirkconnell 			   erp->er_extcount == XFS_LINEAR_EXTS) {
36470293ce3aSMandy Kirkconnell 			ASSERT(realloc);
36480293ce3aSMandy Kirkconnell 			page_idx = 0;
36490293ce3aSMandy Kirkconnell 			erp_idx++;
36500293ce3aSMandy Kirkconnell 			erp = erp_idx < nlists ? erp + 1 : NULL;
36510293ce3aSMandy Kirkconnell 			break;
36520293ce3aSMandy Kirkconnell 		} else {
36530293ce3aSMandy Kirkconnell 			page_idx -= erp->er_extoff;
36540293ce3aSMandy Kirkconnell 			break;
36550293ce3aSMandy Kirkconnell 		}
36560293ce3aSMandy Kirkconnell 	}
36570293ce3aSMandy Kirkconnell 	*idxp = page_idx;
36580293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
36590293ce3aSMandy Kirkconnell 	return(erp);
36600293ce3aSMandy Kirkconnell }
36610293ce3aSMandy Kirkconnell 
36620293ce3aSMandy Kirkconnell /*
36630293ce3aSMandy Kirkconnell  * Allocate and initialize an indirection array once the space needed
36640293ce3aSMandy Kirkconnell  * for incore extents increases above XFS_IEXT_BUFSZ.
36650293ce3aSMandy Kirkconnell  */
36660293ce3aSMandy Kirkconnell void
36670293ce3aSMandy Kirkconnell xfs_iext_irec_init(
36680293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
36690293ce3aSMandy Kirkconnell {
36700293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
36710293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
36720293ce3aSMandy Kirkconnell 
36730293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
36740293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
36750293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
36760293ce3aSMandy Kirkconnell 
36776785073bSDavid Chinner 	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
36780293ce3aSMandy Kirkconnell 
36790293ce3aSMandy Kirkconnell 	if (nextents == 0) {
36806785073bSDavid Chinner 		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
36810293ce3aSMandy Kirkconnell 	} else if (!ifp->if_real_bytes) {
36820293ce3aSMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
36830293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
36840293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
36850293ce3aSMandy Kirkconnell 	}
36860293ce3aSMandy Kirkconnell 	erp->er_extbuf = ifp->if_u1.if_extents;
36870293ce3aSMandy Kirkconnell 	erp->er_extcount = nextents;
36880293ce3aSMandy Kirkconnell 	erp->er_extoff = 0;
36890293ce3aSMandy Kirkconnell 
36900293ce3aSMandy Kirkconnell 	ifp->if_flags |= XFS_IFEXTIREC;
36910293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
36920293ce3aSMandy Kirkconnell 	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
36930293ce3aSMandy Kirkconnell 	ifp->if_u1.if_ext_irec = erp;
36940293ce3aSMandy Kirkconnell 
36950293ce3aSMandy Kirkconnell 	return;
36960293ce3aSMandy Kirkconnell }
36970293ce3aSMandy Kirkconnell 
36980293ce3aSMandy Kirkconnell /*
36990293ce3aSMandy Kirkconnell  * Allocate and initialize a new entry in the indirection array.
37000293ce3aSMandy Kirkconnell  */
37010293ce3aSMandy Kirkconnell xfs_ext_irec_t *
37020293ce3aSMandy Kirkconnell xfs_iext_irec_new(
37030293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37040293ce3aSMandy Kirkconnell 	int		erp_idx)	/* index for new irec */
37050293ce3aSMandy Kirkconnell {
37060293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
37070293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
37080293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37090293ce3aSMandy Kirkconnell 
37100293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37110293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37120293ce3aSMandy Kirkconnell 
37130293ce3aSMandy Kirkconnell 	/* Resize indirection array */
37140293ce3aSMandy Kirkconnell 	xfs_iext_realloc_indirect(ifp, ++nlists *
37150293ce3aSMandy Kirkconnell 				  sizeof(xfs_ext_irec_t));
37160293ce3aSMandy Kirkconnell 	/*
37170293ce3aSMandy Kirkconnell 	 * Move records down in the array so the
37180293ce3aSMandy Kirkconnell 	 * new page can use erp_idx.
37190293ce3aSMandy Kirkconnell 	 */
37200293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
37210293ce3aSMandy Kirkconnell 	for (i = nlists - 1; i > erp_idx; i--) {
37220293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
37230293ce3aSMandy Kirkconnell 	}
37240293ce3aSMandy Kirkconnell 	ASSERT(i == erp_idx);
37250293ce3aSMandy Kirkconnell 
37260293ce3aSMandy Kirkconnell 	/* Initialize new extent record */
37270293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
37286785073bSDavid Chinner 	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
37290293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
37300293ce3aSMandy Kirkconnell 	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
37310293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extcount = 0;
37320293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extoff = erp_idx > 0 ?
37330293ce3aSMandy Kirkconnell 		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
37340293ce3aSMandy Kirkconnell 	return (&erp[erp_idx]);
37350293ce3aSMandy Kirkconnell }
37360293ce3aSMandy Kirkconnell 
37370293ce3aSMandy Kirkconnell /*
37380293ce3aSMandy Kirkconnell  * Remove a record from the indirection array.
37390293ce3aSMandy Kirkconnell  */
37400293ce3aSMandy Kirkconnell void
37410293ce3aSMandy Kirkconnell xfs_iext_irec_remove(
37420293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37430293ce3aSMandy Kirkconnell 	int		erp_idx)	/* irec index to remove */
37440293ce3aSMandy Kirkconnell {
37450293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
37460293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
37470293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37480293ce3aSMandy Kirkconnell 
37490293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37500293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37510293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
37520293ce3aSMandy Kirkconnell 	if (erp->er_extbuf) {
37530293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
37540293ce3aSMandy Kirkconnell 			-erp->er_extcount);
3755f0e2d93cSDenys Vlasenko 		kmem_free(erp->er_extbuf);
37560293ce3aSMandy Kirkconnell 	}
37570293ce3aSMandy Kirkconnell 	/* Compact extent records */
37580293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
37590293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists - 1; i++) {
37600293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
37610293ce3aSMandy Kirkconnell 	}
37620293ce3aSMandy Kirkconnell 	/*
37630293ce3aSMandy Kirkconnell 	 * Manually free the last extent record from the indirection
37640293ce3aSMandy Kirkconnell 	 * array.  A call to xfs_iext_realloc_indirect() with a size
37650293ce3aSMandy Kirkconnell 	 * of zero would result in a call to xfs_iext_destroy() which
37660293ce3aSMandy Kirkconnell 	 * would in turn call this function again, creating a nasty
37670293ce3aSMandy Kirkconnell 	 * infinite loop.
37680293ce3aSMandy Kirkconnell 	 */
37690293ce3aSMandy Kirkconnell 	if (--nlists) {
37700293ce3aSMandy Kirkconnell 		xfs_iext_realloc_indirect(ifp,
37710293ce3aSMandy Kirkconnell 			nlists * sizeof(xfs_ext_irec_t));
37720293ce3aSMandy Kirkconnell 	} else {
3773f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_ext_irec);
37740293ce3aSMandy Kirkconnell 	}
37750293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
37760293ce3aSMandy Kirkconnell }
37770293ce3aSMandy Kirkconnell 
37780293ce3aSMandy Kirkconnell /*
37790293ce3aSMandy Kirkconnell  * This is called to clean up large amounts of unused memory allocated
37800293ce3aSMandy Kirkconnell  * by the indirection array.  Before compacting anything though, verify
37810293ce3aSMandy Kirkconnell  * that the indirection array is still needed and switch back to the
37820293ce3aSMandy Kirkconnell  * linear extent list (or even the inline buffer) if possible.  The
37830293ce3aSMandy Kirkconnell  * compaction policy is as follows:
37840293ce3aSMandy Kirkconnell  *
37850293ce3aSMandy Kirkconnell  *    Full Compaction: Extents fit into a single page (or inline buffer)
378671a8c87fSLachlan McIlroy  * Partial Compaction: Extents occupy less than 50% of allocated space
37870293ce3aSMandy Kirkconnell  *      No Compaction: Extents occupy at least 50% of allocated space
37880293ce3aSMandy Kirkconnell  */
37890293ce3aSMandy Kirkconnell void
37900293ce3aSMandy Kirkconnell xfs_iext_irec_compact(
37910293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
37920293ce3aSMandy Kirkconnell {
37930293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
37940293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37950293ce3aSMandy Kirkconnell 
37960293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37970293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37980293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
37990293ce3aSMandy Kirkconnell 
38000293ce3aSMandy Kirkconnell 	if (nextents == 0) {
38010293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
38020293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_INLINE_EXTS) {
38030293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
38040293ce3aSMandy Kirkconnell 		xfs_iext_direct_to_inline(ifp, nextents);
38050293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_LINEAR_EXTS) {
38060293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
38070293ce3aSMandy Kirkconnell 	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
38080293ce3aSMandy Kirkconnell 		xfs_iext_irec_compact_pages(ifp);
38090293ce3aSMandy Kirkconnell 	}
38100293ce3aSMandy Kirkconnell }
38110293ce3aSMandy Kirkconnell 
38120293ce3aSMandy Kirkconnell /*
38130293ce3aSMandy Kirkconnell  * Combine extents from neighboring extent pages.
38140293ce3aSMandy Kirkconnell  */
38150293ce3aSMandy Kirkconnell void
38160293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(
38170293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
38180293ce3aSMandy Kirkconnell {
38190293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
38200293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
38210293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
38220293ce3aSMandy Kirkconnell 
38230293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38240293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38250293ce3aSMandy Kirkconnell 	while (erp_idx < nlists - 1) {
38260293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
38270293ce3aSMandy Kirkconnell 		erp_next = erp + 1;
38280293ce3aSMandy Kirkconnell 		if (erp_next->er_extcount <=
38290293ce3aSMandy Kirkconnell 		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
383071a8c87fSLachlan McIlroy 			memcpy(&erp->er_extbuf[erp->er_extcount],
38310293ce3aSMandy Kirkconnell 				erp_next->er_extbuf, erp_next->er_extcount *
38320293ce3aSMandy Kirkconnell 				sizeof(xfs_bmbt_rec_t));
38330293ce3aSMandy Kirkconnell 			erp->er_extcount += erp_next->er_extcount;
38340293ce3aSMandy Kirkconnell 			/*
38350293ce3aSMandy Kirkconnell 			 * Free page before removing extent record
38360293ce3aSMandy Kirkconnell 			 * so er_extoffs don't get modified in
38370293ce3aSMandy Kirkconnell 			 * xfs_iext_irec_remove.
38380293ce3aSMandy Kirkconnell 			 */
3839f0e2d93cSDenys Vlasenko 			kmem_free(erp_next->er_extbuf);
38400293ce3aSMandy Kirkconnell 			erp_next->er_extbuf = NULL;
38410293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx + 1);
38420293ce3aSMandy Kirkconnell 			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38430293ce3aSMandy Kirkconnell 		} else {
38440293ce3aSMandy Kirkconnell 			erp_idx++;
38450293ce3aSMandy Kirkconnell 		}
38460293ce3aSMandy Kirkconnell 	}
38470293ce3aSMandy Kirkconnell }
38480293ce3aSMandy Kirkconnell 
38490293ce3aSMandy Kirkconnell /*
38500293ce3aSMandy Kirkconnell  * This is called to update the er_extoff field in the indirection
38510293ce3aSMandy Kirkconnell  * array when extents have been added or removed from one of the
38520293ce3aSMandy Kirkconnell  * extent lists. erp_idx contains the irec index to begin updating
38530293ce3aSMandy Kirkconnell  * at and ext_diff contains the number of extents that were added
38540293ce3aSMandy Kirkconnell  * or removed.
38550293ce3aSMandy Kirkconnell  */
38560293ce3aSMandy Kirkconnell void
38570293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(
38580293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
38590293ce3aSMandy Kirkconnell 	int		erp_idx,	/* irec index to update */
38600293ce3aSMandy Kirkconnell 	int		ext_diff)	/* number of new extents */
38610293ce3aSMandy Kirkconnell {
38620293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
38630293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists */
38640293ce3aSMandy Kirkconnell 
38650293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38660293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38670293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists; i++) {
38680293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
38690293ce3aSMandy Kirkconnell 	}
38700293ce3aSMandy Kirkconnell }
3871