xref: /openbmc/linux/fs/xfs/xfs_inode.c (revision fa96acadf1eb712fca6d59922ad93787c87e44ec)
11da177e4SLinus Torvalds /*
23e57ecf6SOlaf Weber  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
37b718769SNathan Scott  * All Rights Reserved.
41da177e4SLinus Torvalds  *
57b718769SNathan Scott  * This program is free software; you can redistribute it and/or
67b718769SNathan Scott  * modify it under the terms of the GNU General Public License as
71da177e4SLinus Torvalds  * published by the Free Software Foundation.
81da177e4SLinus Torvalds  *
97b718769SNathan Scott  * This program is distributed in the hope that it would be useful,
107b718769SNathan Scott  * but WITHOUT ANY WARRANTY; without even the implied warranty of
117b718769SNathan Scott  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
127b718769SNathan Scott  * GNU General Public License for more details.
131da177e4SLinus Torvalds  *
147b718769SNathan Scott  * You should have received a copy of the GNU General Public License
157b718769SNathan Scott  * along with this program; if not, write the Free Software Foundation,
167b718769SNathan Scott  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
171da177e4SLinus Torvalds  */
1840ebd81dSRobert P. J. Day #include <linux/log2.h>
1940ebd81dSRobert P. J. Day 
201da177e4SLinus Torvalds #include "xfs.h"
21a844f451SNathan Scott #include "xfs_fs.h"
221da177e4SLinus Torvalds #include "xfs_types.h"
231da177e4SLinus Torvalds #include "xfs_log.h"
24a844f451SNathan Scott #include "xfs_inum.h"
251da177e4SLinus Torvalds #include "xfs_trans.h"
261da177e4SLinus Torvalds #include "xfs_trans_priv.h"
271da177e4SLinus Torvalds #include "xfs_sb.h"
281da177e4SLinus Torvalds #include "xfs_ag.h"
291da177e4SLinus Torvalds #include "xfs_mount.h"
301da177e4SLinus Torvalds #include "xfs_bmap_btree.h"
31a844f451SNathan Scott #include "xfs_alloc_btree.h"
321da177e4SLinus Torvalds #include "xfs_ialloc_btree.h"
33a844f451SNathan Scott #include "xfs_attr_sf.h"
341da177e4SLinus Torvalds #include "xfs_dinode.h"
351da177e4SLinus Torvalds #include "xfs_inode.h"
361da177e4SLinus Torvalds #include "xfs_buf_item.h"
37a844f451SNathan Scott #include "xfs_inode_item.h"
38a844f451SNathan Scott #include "xfs_btree.h"
39a844f451SNathan Scott #include "xfs_alloc.h"
40a844f451SNathan Scott #include "xfs_ialloc.h"
41a844f451SNathan Scott #include "xfs_bmap.h"
421da177e4SLinus Torvalds #include "xfs_error.h"
431da177e4SLinus Torvalds #include "xfs_utils.h"
441da177e4SLinus Torvalds #include "xfs_quota.h"
452a82b8beSDavid Chinner #include "xfs_filestream.h"
46739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h"
470b1b213fSChristoph Hellwig #include "xfs_trace.h"
481da177e4SLinus Torvalds 
491da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone;
501da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone;
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds /*
538f04c47aSChristoph Hellwig  * Used in xfs_itruncate_extents().  This is the maximum number of extents
541da177e4SLinus Torvalds  * freed from a file in a single transaction.
551da177e4SLinus Torvalds  */
561da177e4SLinus Torvalds #define	XFS_ITRUNC_MAX_EXTENTS	2
571da177e4SLinus Torvalds 
581da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
591da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
601da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
611da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
621da177e4SLinus Torvalds 
632a0ec1d9SDave Chinner /*
642a0ec1d9SDave Chinner  * helper function to extract extent size hint from inode
652a0ec1d9SDave Chinner  */
662a0ec1d9SDave Chinner xfs_extlen_t
672a0ec1d9SDave Chinner xfs_get_extsz_hint(
682a0ec1d9SDave Chinner 	struct xfs_inode	*ip)
692a0ec1d9SDave Chinner {
702a0ec1d9SDave Chinner 	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
712a0ec1d9SDave Chinner 		return ip->i_d.di_extsize;
722a0ec1d9SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip))
732a0ec1d9SDave Chinner 		return ip->i_mount->m_sb.sb_rextsize;
742a0ec1d9SDave Chinner 	return 0;
752a0ec1d9SDave Chinner }
762a0ec1d9SDave Chinner 
77*fa96acadSDave Chinner /*
78*fa96acadSDave Chinner  * This is a wrapper routine around the xfs_ilock() routine used to centralize
79*fa96acadSDave Chinner  * some grungy code.  It is used in places that wish to lock the inode solely
80*fa96acadSDave Chinner  * for reading the extents.  The reason these places can't just call
81*fa96acadSDave Chinner  * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the
82*fa96acadSDave Chinner  * extents from disk for a file in b-tree format.  If the inode is in b-tree
83*fa96acadSDave Chinner  * format, then we need to lock the inode exclusively until the extents are read
84*fa96acadSDave Chinner  * in.  Locking it exclusively all the time would limit our parallelism
85*fa96acadSDave Chinner  * unnecessarily, though.  What we do instead is check to see if the extents
86*fa96acadSDave Chinner  * have been read in yet, and only lock the inode exclusively if they have not.
87*fa96acadSDave Chinner  *
88*fa96acadSDave Chinner  * The function returns a value which should be given to the corresponding
89*fa96acadSDave Chinner  * xfs_iunlock_map_shared().  This value is the mode in which the lock was
90*fa96acadSDave Chinner  * actually taken.
91*fa96acadSDave Chinner  */
92*fa96acadSDave Chinner uint
93*fa96acadSDave Chinner xfs_ilock_map_shared(
94*fa96acadSDave Chinner 	xfs_inode_t	*ip)
95*fa96acadSDave Chinner {
96*fa96acadSDave Chinner 	uint	lock_mode;
97*fa96acadSDave Chinner 
98*fa96acadSDave Chinner 	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
99*fa96acadSDave Chinner 	    ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
100*fa96acadSDave Chinner 		lock_mode = XFS_ILOCK_EXCL;
101*fa96acadSDave Chinner 	} else {
102*fa96acadSDave Chinner 		lock_mode = XFS_ILOCK_SHARED;
103*fa96acadSDave Chinner 	}
104*fa96acadSDave Chinner 
105*fa96acadSDave Chinner 	xfs_ilock(ip, lock_mode);
106*fa96acadSDave Chinner 
107*fa96acadSDave Chinner 	return lock_mode;
108*fa96acadSDave Chinner }
109*fa96acadSDave Chinner 
110*fa96acadSDave Chinner /*
111*fa96acadSDave Chinner  * This is simply the unlock routine to go with xfs_ilock_map_shared().
112*fa96acadSDave Chinner  * All it does is call xfs_iunlock() with the given lock_mode.
113*fa96acadSDave Chinner  */
114*fa96acadSDave Chinner void
115*fa96acadSDave Chinner xfs_iunlock_map_shared(
116*fa96acadSDave Chinner 	xfs_inode_t	*ip,
117*fa96acadSDave Chinner 	unsigned int	lock_mode)
118*fa96acadSDave Chinner {
119*fa96acadSDave Chinner 	xfs_iunlock(ip, lock_mode);
120*fa96acadSDave Chinner }
121*fa96acadSDave Chinner 
122*fa96acadSDave Chinner /*
123*fa96acadSDave Chinner  * The xfs inode contains 2 locks: a multi-reader lock called the
124*fa96acadSDave Chinner  * i_iolock and a multi-reader lock called the i_lock.  This routine
125*fa96acadSDave Chinner  * allows either or both of the locks to be obtained.
126*fa96acadSDave Chinner  *
127*fa96acadSDave Chinner  * The 2 locks should always be ordered so that the IO lock is
128*fa96acadSDave Chinner  * obtained first in order to prevent deadlock.
129*fa96acadSDave Chinner  *
130*fa96acadSDave Chinner  * ip -- the inode being locked
131*fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks
132*fa96acadSDave Chinner  *       to be locked.  It can be:
133*fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED,
134*fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL,
135*fa96acadSDave Chinner  *		XFS_ILOCK_SHARED,
136*fa96acadSDave Chinner  *		XFS_ILOCK_EXCL,
137*fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
138*fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
139*fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
140*fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
141*fa96acadSDave Chinner  */
142*fa96acadSDave Chinner void
143*fa96acadSDave Chinner xfs_ilock(
144*fa96acadSDave Chinner 	xfs_inode_t		*ip,
145*fa96acadSDave Chinner 	uint			lock_flags)
146*fa96acadSDave Chinner {
147*fa96acadSDave Chinner 	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
148*fa96acadSDave Chinner 
149*fa96acadSDave Chinner 	/*
150*fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
151*fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
152*fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
153*fa96acadSDave Chinner 	 */
154*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
155*fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
156*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
157*fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
158*fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
159*fa96acadSDave Chinner 
160*fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
161*fa96acadSDave Chinner 		mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
162*fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
163*fa96acadSDave Chinner 		mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
164*fa96acadSDave Chinner 
165*fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
166*fa96acadSDave Chinner 		mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
167*fa96acadSDave Chinner 	else if (lock_flags & XFS_ILOCK_SHARED)
168*fa96acadSDave Chinner 		mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
169*fa96acadSDave Chinner }
170*fa96acadSDave Chinner 
171*fa96acadSDave Chinner /*
172*fa96acadSDave Chinner  * This is just like xfs_ilock(), except that the caller
173*fa96acadSDave Chinner  * is guaranteed not to sleep.  It returns 1 if it gets
174*fa96acadSDave Chinner  * the requested locks and 0 otherwise.  If the IO lock is
175*fa96acadSDave Chinner  * obtained but the inode lock cannot be, then the IO lock
176*fa96acadSDave Chinner  * is dropped before returning.
177*fa96acadSDave Chinner  *
178*fa96acadSDave Chinner  * ip -- the inode being locked
179*fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks to be
180*fa96acadSDave Chinner  *       to be locked.  See the comment for xfs_ilock() for a list
181*fa96acadSDave Chinner  *	 of valid values.
182*fa96acadSDave Chinner  */
183*fa96acadSDave Chinner int
184*fa96acadSDave Chinner xfs_ilock_nowait(
185*fa96acadSDave Chinner 	xfs_inode_t		*ip,
186*fa96acadSDave Chinner 	uint			lock_flags)
187*fa96acadSDave Chinner {
188*fa96acadSDave Chinner 	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
189*fa96acadSDave Chinner 
190*fa96acadSDave Chinner 	/*
191*fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
192*fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
193*fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
194*fa96acadSDave Chinner 	 */
195*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
196*fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
197*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
198*fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
199*fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
200*fa96acadSDave Chinner 
201*fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL) {
202*fa96acadSDave Chinner 		if (!mrtryupdate(&ip->i_iolock))
203*fa96acadSDave Chinner 			goto out;
204*fa96acadSDave Chinner 	} else if (lock_flags & XFS_IOLOCK_SHARED) {
205*fa96acadSDave Chinner 		if (!mrtryaccess(&ip->i_iolock))
206*fa96acadSDave Chinner 			goto out;
207*fa96acadSDave Chinner 	}
208*fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL) {
209*fa96acadSDave Chinner 		if (!mrtryupdate(&ip->i_lock))
210*fa96acadSDave Chinner 			goto out_undo_iolock;
211*fa96acadSDave Chinner 	} else if (lock_flags & XFS_ILOCK_SHARED) {
212*fa96acadSDave Chinner 		if (!mrtryaccess(&ip->i_lock))
213*fa96acadSDave Chinner 			goto out_undo_iolock;
214*fa96acadSDave Chinner 	}
215*fa96acadSDave Chinner 	return 1;
216*fa96acadSDave Chinner 
217*fa96acadSDave Chinner  out_undo_iolock:
218*fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
219*fa96acadSDave Chinner 		mrunlock_excl(&ip->i_iolock);
220*fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
221*fa96acadSDave Chinner 		mrunlock_shared(&ip->i_iolock);
222*fa96acadSDave Chinner  out:
223*fa96acadSDave Chinner 	return 0;
224*fa96acadSDave Chinner }
225*fa96acadSDave Chinner 
226*fa96acadSDave Chinner /*
227*fa96acadSDave Chinner  * xfs_iunlock() is used to drop the inode locks acquired with
228*fa96acadSDave Chinner  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
229*fa96acadSDave Chinner  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
230*fa96acadSDave Chinner  * that we know which locks to drop.
231*fa96acadSDave Chinner  *
232*fa96acadSDave Chinner  * ip -- the inode being unlocked
233*fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks to be
234*fa96acadSDave Chinner  *       to be unlocked.  See the comment for xfs_ilock() for a list
235*fa96acadSDave Chinner  *	 of valid values for this parameter.
236*fa96acadSDave Chinner  *
237*fa96acadSDave Chinner  */
238*fa96acadSDave Chinner void
239*fa96acadSDave Chinner xfs_iunlock(
240*fa96acadSDave Chinner 	xfs_inode_t		*ip,
241*fa96acadSDave Chinner 	uint			lock_flags)
242*fa96acadSDave Chinner {
243*fa96acadSDave Chinner 	/*
244*fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
245*fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
246*fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
247*fa96acadSDave Chinner 	 */
248*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
249*fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
250*fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
251*fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
252*fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
253*fa96acadSDave Chinner 	ASSERT(lock_flags != 0);
254*fa96acadSDave Chinner 
255*fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
256*fa96acadSDave Chinner 		mrunlock_excl(&ip->i_iolock);
257*fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
258*fa96acadSDave Chinner 		mrunlock_shared(&ip->i_iolock);
259*fa96acadSDave Chinner 
260*fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
261*fa96acadSDave Chinner 		mrunlock_excl(&ip->i_lock);
262*fa96acadSDave Chinner 	else if (lock_flags & XFS_ILOCK_SHARED)
263*fa96acadSDave Chinner 		mrunlock_shared(&ip->i_lock);
264*fa96acadSDave Chinner 
265*fa96acadSDave Chinner 	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
266*fa96acadSDave Chinner }
267*fa96acadSDave Chinner 
268*fa96acadSDave Chinner /*
269*fa96acadSDave Chinner  * give up write locks.  the i/o lock cannot be held nested
270*fa96acadSDave Chinner  * if it is being demoted.
271*fa96acadSDave Chinner  */
272*fa96acadSDave Chinner void
273*fa96acadSDave Chinner xfs_ilock_demote(
274*fa96acadSDave Chinner 	xfs_inode_t		*ip,
275*fa96acadSDave Chinner 	uint			lock_flags)
276*fa96acadSDave Chinner {
277*fa96acadSDave Chinner 	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
278*fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
279*fa96acadSDave Chinner 
280*fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
281*fa96acadSDave Chinner 		mrdemote(&ip->i_lock);
282*fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
283*fa96acadSDave Chinner 		mrdemote(&ip->i_iolock);
284*fa96acadSDave Chinner 
285*fa96acadSDave Chinner 	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
286*fa96acadSDave Chinner }
287*fa96acadSDave Chinner 
288*fa96acadSDave Chinner #ifdef DEBUG
289*fa96acadSDave Chinner int
290*fa96acadSDave Chinner xfs_isilocked(
291*fa96acadSDave Chinner 	xfs_inode_t		*ip,
292*fa96acadSDave Chinner 	uint			lock_flags)
293*fa96acadSDave Chinner {
294*fa96acadSDave Chinner 	if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
295*fa96acadSDave Chinner 		if (!(lock_flags & XFS_ILOCK_SHARED))
296*fa96acadSDave Chinner 			return !!ip->i_lock.mr_writer;
297*fa96acadSDave Chinner 		return rwsem_is_locked(&ip->i_lock.mr_lock);
298*fa96acadSDave Chinner 	}
299*fa96acadSDave Chinner 
300*fa96acadSDave Chinner 	if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
301*fa96acadSDave Chinner 		if (!(lock_flags & XFS_IOLOCK_SHARED))
302*fa96acadSDave Chinner 			return !!ip->i_iolock.mr_writer;
303*fa96acadSDave Chinner 		return rwsem_is_locked(&ip->i_iolock.mr_lock);
304*fa96acadSDave Chinner 	}
305*fa96acadSDave Chinner 
306*fa96acadSDave Chinner 	ASSERT(0);
307*fa96acadSDave Chinner 	return 0;
308*fa96acadSDave Chinner }
309*fa96acadSDave Chinner #endif
310*fa96acadSDave Chinner 
311*fa96acadSDave Chinner void
312*fa96acadSDave Chinner __xfs_iflock(
313*fa96acadSDave Chinner 	struct xfs_inode	*ip)
314*fa96acadSDave Chinner {
315*fa96acadSDave Chinner 	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
316*fa96acadSDave Chinner 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
317*fa96acadSDave Chinner 
318*fa96acadSDave Chinner 	do {
319*fa96acadSDave Chinner 		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
320*fa96acadSDave Chinner 		if (xfs_isiflocked(ip))
321*fa96acadSDave Chinner 			io_schedule();
322*fa96acadSDave Chinner 	} while (!xfs_iflock_nowait(ip));
323*fa96acadSDave Chinner 
324*fa96acadSDave Chinner 	finish_wait(wq, &wait.wait);
325*fa96acadSDave Chinner }
326*fa96acadSDave Chinner 
3271da177e4SLinus Torvalds #ifdef DEBUG
3281da177e4SLinus Torvalds /*
3291da177e4SLinus Torvalds  * Make sure that the extents in the given memory buffer
3301da177e4SLinus Torvalds  * are valid.
3311da177e4SLinus Torvalds  */
3321da177e4SLinus Torvalds STATIC void
3331da177e4SLinus Torvalds xfs_validate_extents(
3344eea22f0SMandy Kirkconnell 	xfs_ifork_t		*ifp,
3351da177e4SLinus Torvalds 	int			nrecs,
3361da177e4SLinus Torvalds 	xfs_exntfmt_t		fmt)
3371da177e4SLinus Torvalds {
3381da177e4SLinus Torvalds 	xfs_bmbt_irec_t		irec;
339a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t	rec;
3401da177e4SLinus Torvalds 	int			i;
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
343a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
344a6f64d4aSChristoph Hellwig 		rec.l0 = get_unaligned(&ep->l0);
345a6f64d4aSChristoph Hellwig 		rec.l1 = get_unaligned(&ep->l1);
3461da177e4SLinus Torvalds 		xfs_bmbt_get_all(&rec, &irec);
3471da177e4SLinus Torvalds 		if (fmt == XFS_EXTFMT_NOSTATE)
3481da177e4SLinus Torvalds 			ASSERT(irec.br_state == XFS_EXT_NORM);
3491da177e4SLinus Torvalds 	}
3501da177e4SLinus Torvalds }
3511da177e4SLinus Torvalds #else /* DEBUG */
352a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt)
3531da177e4SLinus Torvalds #endif /* DEBUG */
3541da177e4SLinus Torvalds 
3551da177e4SLinus Torvalds /*
3561da177e4SLinus Torvalds  * Check that none of the inode's in the buffer have a next
3571da177e4SLinus Torvalds  * unlinked field of 0.
3581da177e4SLinus Torvalds  */
3591da177e4SLinus Torvalds #if defined(DEBUG)
3601da177e4SLinus Torvalds void
3611da177e4SLinus Torvalds xfs_inobp_check(
3621da177e4SLinus Torvalds 	xfs_mount_t	*mp,
3631da177e4SLinus Torvalds 	xfs_buf_t	*bp)
3641da177e4SLinus Torvalds {
3651da177e4SLinus Torvalds 	int		i;
3661da177e4SLinus Torvalds 	int		j;
3671da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
3681da177e4SLinus Torvalds 
3691da177e4SLinus Torvalds 	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
3701da177e4SLinus Torvalds 
3711da177e4SLinus Torvalds 	for (i = 0; i < j; i++) {
3721da177e4SLinus Torvalds 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
3731da177e4SLinus Torvalds 					i * mp->m_sb.sb_inodesize);
3741da177e4SLinus Torvalds 		if (!dip->di_next_unlinked)  {
37553487786SDave Chinner 			xfs_alert(mp,
37653487786SDave Chinner 	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
3771da177e4SLinus Torvalds 				bp);
3781da177e4SLinus Torvalds 			ASSERT(dip->di_next_unlinked);
3791da177e4SLinus Torvalds 		}
3801da177e4SLinus Torvalds 	}
3811da177e4SLinus Torvalds }
3821da177e4SLinus Torvalds #endif
3831da177e4SLinus Torvalds 
3841da177e4SLinus Torvalds /*
385475ee413SChristoph Hellwig  * This routine is called to map an inode to the buffer containing the on-disk
386475ee413SChristoph Hellwig  * version of the inode.  It returns a pointer to the buffer containing the
387475ee413SChristoph Hellwig  * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
388475ee413SChristoph Hellwig  * pointer to the on-disk inode within that buffer.
389475ee413SChristoph Hellwig  *
390475ee413SChristoph Hellwig  * If a non-zero error is returned, then the contents of bpp and dipp are
391475ee413SChristoph Hellwig  * undefined.
3924ae29b43SDavid Chinner  */
393475ee413SChristoph Hellwig int
3944ae29b43SDavid Chinner xfs_imap_to_bp(
395475ee413SChristoph Hellwig 	struct xfs_mount	*mp,
396475ee413SChristoph Hellwig 	struct xfs_trans	*tp,
39792bfc6e7SChristoph Hellwig 	struct xfs_imap		*imap,
398475ee413SChristoph Hellwig 	struct xfs_dinode	**dipp,
399475ee413SChristoph Hellwig 	struct xfs_buf		**bpp,
4004ae29b43SDavid Chinner 	uint			buf_flags,
401b48d8d64SChristoph Hellwig 	uint			iget_flags)
4024ae29b43SDavid Chinner {
403475ee413SChristoph Hellwig 	struct xfs_buf		*bp;
4044ae29b43SDavid Chinner 	int			error;
4054ae29b43SDavid Chinner 	int			i;
4064ae29b43SDavid Chinner 	int			ni;
4074ae29b43SDavid Chinner 
408611c9946SDave Chinner 	buf_flags |= XBF_UNMAPPED;
4094ae29b43SDavid Chinner 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
410a3f74ffbSDavid Chinner 				   (int)imap->im_len, buf_flags, &bp);
4114ae29b43SDavid Chinner 	if (error) {
412a3f74ffbSDavid Chinner 		if (error != EAGAIN) {
4130b932cccSDave Chinner 			xfs_warn(mp,
4140b932cccSDave Chinner 				"%s: xfs_trans_read_buf() returned error %d.",
4150b932cccSDave Chinner 				__func__, error);
416a3f74ffbSDavid Chinner 		} else {
4170cadda1cSChristoph Hellwig 			ASSERT(buf_flags & XBF_TRYLOCK);
418a3f74ffbSDavid Chinner 		}
4194ae29b43SDavid Chinner 		return error;
4204ae29b43SDavid Chinner 	}
4214ae29b43SDavid Chinner 
4224ae29b43SDavid Chinner 	/*
4234ae29b43SDavid Chinner 	 * Validate the magic number and version of every inode in the buffer
4244ae29b43SDavid Chinner 	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
4254ae29b43SDavid Chinner 	 */
4264ae29b43SDavid Chinner #ifdef DEBUG
4274ae29b43SDavid Chinner 	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
4284ae29b43SDavid Chinner #else	/* usual case */
4294ae29b43SDavid Chinner 	ni = 1;
4304ae29b43SDavid Chinner #endif
4314ae29b43SDavid Chinner 
4324ae29b43SDavid Chinner 	for (i = 0; i < ni; i++) {
4334ae29b43SDavid Chinner 		int		di_ok;
4344ae29b43SDavid Chinner 		xfs_dinode_t	*dip;
4354ae29b43SDavid Chinner 
4364ae29b43SDavid Chinner 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
4374ae29b43SDavid Chinner 					(i << mp->m_sb.sb_inodelog));
43869ef921bSChristoph Hellwig 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
43981591fe2SChristoph Hellwig 			    XFS_DINODE_GOOD_VERSION(dip->di_version);
4404ae29b43SDavid Chinner 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
4414ae29b43SDavid Chinner 						XFS_ERRTAG_ITOBP_INOTOBP,
4424ae29b43SDavid Chinner 						XFS_RANDOM_ITOBP_INOTOBP))) {
4431920779eSDave Chinner 			if (iget_flags & XFS_IGET_UNTRUSTED) {
4444ae29b43SDavid Chinner 				xfs_trans_brelse(tp, bp);
4454ae29b43SDavid Chinner 				return XFS_ERROR(EINVAL);
4464ae29b43SDavid Chinner 			}
447475ee413SChristoph Hellwig 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
448475ee413SChristoph Hellwig 					     mp, dip);
4494ae29b43SDavid Chinner #ifdef DEBUG
4500b932cccSDave Chinner 			xfs_emerg(mp,
4510b932cccSDave Chinner 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
4524ae29b43SDavid Chinner 				(unsigned long long)imap->im_blkno, i,
45381591fe2SChristoph Hellwig 				be16_to_cpu(dip->di_magic));
4540b932cccSDave Chinner 			ASSERT(0);
4554ae29b43SDavid Chinner #endif
4564ae29b43SDavid Chinner 			xfs_trans_brelse(tp, bp);
4574ae29b43SDavid Chinner 			return XFS_ERROR(EFSCORRUPTED);
4584ae29b43SDavid Chinner 		}
4594ae29b43SDavid Chinner 	}
4604ae29b43SDavid Chinner 
4614ae29b43SDavid Chinner 	xfs_inobp_check(mp, bp);
462475ee413SChristoph Hellwig 
4634ae29b43SDavid Chinner 	*bpp = bp;
464475ee413SChristoph Hellwig 	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
4654ae29b43SDavid Chinner 	return 0;
4664ae29b43SDavid Chinner }
4674ae29b43SDavid Chinner 
4684ae29b43SDavid Chinner /*
4691da177e4SLinus Torvalds  * Move inode type and inode format specific information from the
4701da177e4SLinus Torvalds  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
4711da177e4SLinus Torvalds  * this means set if_rdev to the proper value.  For files, directories,
4721da177e4SLinus Torvalds  * and symlinks this means to bring in the in-line data or extent
4731da177e4SLinus Torvalds  * pointers.  For a file in B-tree format, only the root is immediately
4741da177e4SLinus Torvalds  * brought in-core.  The rest will be in-lined in if_extents when it
4751da177e4SLinus Torvalds  * is first referenced (see xfs_iread_extents()).
4761da177e4SLinus Torvalds  */
4771da177e4SLinus Torvalds STATIC int
4781da177e4SLinus Torvalds xfs_iformat(
4791da177e4SLinus Torvalds 	xfs_inode_t		*ip,
4801da177e4SLinus Torvalds 	xfs_dinode_t		*dip)
4811da177e4SLinus Torvalds {
4821da177e4SLinus Torvalds 	xfs_attr_shortform_t	*atp;
4831da177e4SLinus Torvalds 	int			size;
4848096b1ebSChristoph Hellwig 	int			error = 0;
4851da177e4SLinus Torvalds 	xfs_fsize_t             di_size;
4861da177e4SLinus Torvalds 
48781591fe2SChristoph Hellwig 	if (unlikely(be32_to_cpu(dip->di_nextents) +
48881591fe2SChristoph Hellwig 		     be16_to_cpu(dip->di_anextents) >
48981591fe2SChristoph Hellwig 		     be64_to_cpu(dip->di_nblocks))) {
49065333b4cSDave Chinner 		xfs_warn(ip->i_mount,
4913762ec6bSNathan Scott 			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
4921da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
49381591fe2SChristoph Hellwig 			(int)(be32_to_cpu(dip->di_nextents) +
49481591fe2SChristoph Hellwig 			      be16_to_cpu(dip->di_anextents)),
4951da177e4SLinus Torvalds 			(unsigned long long)
49681591fe2SChristoph Hellwig 				be64_to_cpu(dip->di_nblocks));
4971da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
4981da177e4SLinus Torvalds 				     ip->i_mount, dip);
4991da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5001da177e4SLinus Torvalds 	}
5011da177e4SLinus Torvalds 
50281591fe2SChristoph Hellwig 	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
50365333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
5041da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
50581591fe2SChristoph Hellwig 			dip->di_forkoff);
5061da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
5071da177e4SLinus Torvalds 				     ip->i_mount, dip);
5081da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5091da177e4SLinus Torvalds 	}
5101da177e4SLinus Torvalds 
511b89d4208SChristoph Hellwig 	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
512b89d4208SChristoph Hellwig 		     !ip->i_mount->m_rtdev_targp)) {
51365333b4cSDave Chinner 		xfs_warn(ip->i_mount,
514b89d4208SChristoph Hellwig 			"corrupt dinode %Lu, has realtime flag set.",
515b89d4208SChristoph Hellwig 			ip->i_ino);
516b89d4208SChristoph Hellwig 		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
517b89d4208SChristoph Hellwig 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
518b89d4208SChristoph Hellwig 		return XFS_ERROR(EFSCORRUPTED);
519b89d4208SChristoph Hellwig 	}
520b89d4208SChristoph Hellwig 
5211da177e4SLinus Torvalds 	switch (ip->i_d.di_mode & S_IFMT) {
5221da177e4SLinus Torvalds 	case S_IFIFO:
5231da177e4SLinus Torvalds 	case S_IFCHR:
5241da177e4SLinus Torvalds 	case S_IFBLK:
5251da177e4SLinus Torvalds 	case S_IFSOCK:
52681591fe2SChristoph Hellwig 		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
5271da177e4SLinus Torvalds 			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
5281da177e4SLinus Torvalds 					      ip->i_mount, dip);
5291da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
5301da177e4SLinus Torvalds 		}
5311da177e4SLinus Torvalds 		ip->i_d.di_size = 0;
53281591fe2SChristoph Hellwig 		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
5331da177e4SLinus Torvalds 		break;
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds 	case S_IFREG:
5361da177e4SLinus Torvalds 	case S_IFLNK:
5371da177e4SLinus Torvalds 	case S_IFDIR:
53881591fe2SChristoph Hellwig 		switch (dip->di_format) {
5391da177e4SLinus Torvalds 		case XFS_DINODE_FMT_LOCAL:
5401da177e4SLinus Torvalds 			/*
5411da177e4SLinus Torvalds 			 * no local regular files yet
5421da177e4SLinus Torvalds 			 */
543abbede1bSAl Viro 			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
54465333b4cSDave Chinner 				xfs_warn(ip->i_mount,
54565333b4cSDave Chinner 			"corrupt inode %Lu (local format for regular file).",
5461da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino);
5471da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
5481da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
5491da177e4SLinus Torvalds 						     ip->i_mount, dip);
5501da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
5511da177e4SLinus Torvalds 			}
5521da177e4SLinus Torvalds 
55381591fe2SChristoph Hellwig 			di_size = be64_to_cpu(dip->di_size);
5541da177e4SLinus Torvalds 			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
55565333b4cSDave Chinner 				xfs_warn(ip->i_mount,
55665333b4cSDave Chinner 			"corrupt inode %Lu (bad size %Ld for local inode).",
5571da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino,
5581da177e4SLinus Torvalds 					(long long) di_size);
5591da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
5601da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
5611da177e4SLinus Torvalds 						     ip->i_mount, dip);
5621da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
5631da177e4SLinus Torvalds 			}
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds 			size = (int)di_size;
5661da177e4SLinus Torvalds 			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
5671da177e4SLinus Torvalds 			break;
5681da177e4SLinus Torvalds 		case XFS_DINODE_FMT_EXTENTS:
5691da177e4SLinus Torvalds 			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
5701da177e4SLinus Torvalds 			break;
5711da177e4SLinus Torvalds 		case XFS_DINODE_FMT_BTREE:
5721da177e4SLinus Torvalds 			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
5731da177e4SLinus Torvalds 			break;
5741da177e4SLinus Torvalds 		default:
5751da177e4SLinus Torvalds 			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
5761da177e4SLinus Torvalds 					 ip->i_mount);
5771da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
5781da177e4SLinus Torvalds 		}
5791da177e4SLinus Torvalds 		break;
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds 	default:
5821da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
5831da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5841da177e4SLinus Torvalds 	}
5851da177e4SLinus Torvalds 	if (error) {
5861da177e4SLinus Torvalds 		return error;
5871da177e4SLinus Torvalds 	}
5881da177e4SLinus Torvalds 	if (!XFS_DFORK_Q(dip))
5891da177e4SLinus Torvalds 		return 0;
5908096b1ebSChristoph Hellwig 
5911da177e4SLinus Torvalds 	ASSERT(ip->i_afp == NULL);
5924a7edddcSDave Chinner 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
5938096b1ebSChristoph Hellwig 
59481591fe2SChristoph Hellwig 	switch (dip->di_aformat) {
5951da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
5961da177e4SLinus Torvalds 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
5973b244aa8SNathan Scott 		size = be16_to_cpu(atp->hdr.totsize);
5982809f76aSChristoph Hellwig 
5992809f76aSChristoph Hellwig 		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
60065333b4cSDave Chinner 			xfs_warn(ip->i_mount,
60165333b4cSDave Chinner 				"corrupt inode %Lu (bad attr fork size %Ld).",
6022809f76aSChristoph Hellwig 				(unsigned long long) ip->i_ino,
6032809f76aSChristoph Hellwig 				(long long) size);
6042809f76aSChristoph Hellwig 			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
6052809f76aSChristoph Hellwig 					     XFS_ERRLEVEL_LOW,
6062809f76aSChristoph Hellwig 					     ip->i_mount, dip);
6072809f76aSChristoph Hellwig 			return XFS_ERROR(EFSCORRUPTED);
6082809f76aSChristoph Hellwig 		}
6092809f76aSChristoph Hellwig 
6101da177e4SLinus Torvalds 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
6111da177e4SLinus Torvalds 		break;
6121da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
6131da177e4SLinus Torvalds 		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
6141da177e4SLinus Torvalds 		break;
6151da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
6161da177e4SLinus Torvalds 		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
6171da177e4SLinus Torvalds 		break;
6181da177e4SLinus Torvalds 	default:
6191da177e4SLinus Torvalds 		error = XFS_ERROR(EFSCORRUPTED);
6201da177e4SLinus Torvalds 		break;
6211da177e4SLinus Torvalds 	}
6221da177e4SLinus Torvalds 	if (error) {
6231da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
6241da177e4SLinus Torvalds 		ip->i_afp = NULL;
6251da177e4SLinus Torvalds 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
6261da177e4SLinus Torvalds 	}
6271da177e4SLinus Torvalds 	return error;
6281da177e4SLinus Torvalds }
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds /*
6311da177e4SLinus Torvalds  * The file is in-lined in the on-disk inode.
6321da177e4SLinus Torvalds  * If it fits into if_inline_data, then copy
6331da177e4SLinus Torvalds  * it there, otherwise allocate a buffer for it
6341da177e4SLinus Torvalds  * and copy the data there.  Either way, set
6351da177e4SLinus Torvalds  * if_data to point at the data.
6361da177e4SLinus Torvalds  * If we allocate a buffer for the data, make
6371da177e4SLinus Torvalds  * sure that its size is a multiple of 4 and
6381da177e4SLinus Torvalds  * record the real size in i_real_bytes.
6391da177e4SLinus Torvalds  */
6401da177e4SLinus Torvalds STATIC int
6411da177e4SLinus Torvalds xfs_iformat_local(
6421da177e4SLinus Torvalds 	xfs_inode_t	*ip,
6431da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
6441da177e4SLinus Torvalds 	int		whichfork,
6451da177e4SLinus Torvalds 	int		size)
6461da177e4SLinus Torvalds {
6471da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
6481da177e4SLinus Torvalds 	int		real_size;
6491da177e4SLinus Torvalds 
6501da177e4SLinus Torvalds 	/*
6511da177e4SLinus Torvalds 	 * If the size is unreasonable, then something
6521da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
6531da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
6541da177e4SLinus Torvalds 	 */
6551da177e4SLinus Torvalds 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
65665333b4cSDave Chinner 		xfs_warn(ip->i_mount,
65765333b4cSDave Chinner 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
6581da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, size,
6591da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
6601da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
6611da177e4SLinus Torvalds 				     ip->i_mount, dip);
6621da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
6631da177e4SLinus Torvalds 	}
6641da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
6651da177e4SLinus Torvalds 	real_size = 0;
6661da177e4SLinus Torvalds 	if (size == 0)
6671da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
6681da177e4SLinus Torvalds 	else if (size <= sizeof(ifp->if_u2.if_inline_data))
6691da177e4SLinus Torvalds 		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
6701da177e4SLinus Torvalds 	else {
6711da177e4SLinus Torvalds 		real_size = roundup(size, 4);
6724a7edddcSDave Chinner 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
6731da177e4SLinus Torvalds 	}
6741da177e4SLinus Torvalds 	ifp->if_bytes = size;
6751da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
6761da177e4SLinus Torvalds 	if (size)
6771da177e4SLinus Torvalds 		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
6781da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
6791da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFINLINE;
6801da177e4SLinus Torvalds 	return 0;
6811da177e4SLinus Torvalds }
6821da177e4SLinus Torvalds 
6831da177e4SLinus Torvalds /*
6841da177e4SLinus Torvalds  * The file consists of a set of extents all
6851da177e4SLinus Torvalds  * of which fit into the on-disk inode.
6861da177e4SLinus Torvalds  * If there are few enough extents to fit into
6871da177e4SLinus Torvalds  * the if_inline_ext, then copy them there.
6881da177e4SLinus Torvalds  * Otherwise allocate a buffer for them and copy
6891da177e4SLinus Torvalds  * them into it.  Either way, set if_extents
6901da177e4SLinus Torvalds  * to point at the extents.
6911da177e4SLinus Torvalds  */
6921da177e4SLinus Torvalds STATIC int
6931da177e4SLinus Torvalds xfs_iformat_extents(
6941da177e4SLinus Torvalds 	xfs_inode_t	*ip,
6951da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
6961da177e4SLinus Torvalds 	int		whichfork)
6971da177e4SLinus Torvalds {
698a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t	*dp;
6991da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
7001da177e4SLinus Torvalds 	int		nex;
7011da177e4SLinus Torvalds 	int		size;
7021da177e4SLinus Torvalds 	int		i;
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
7051da177e4SLinus Torvalds 	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
7061da177e4SLinus Torvalds 	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
7071da177e4SLinus Torvalds 
7081da177e4SLinus Torvalds 	/*
7091da177e4SLinus Torvalds 	 * If the number of extents is unreasonable, then something
7101da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
7111da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
7121da177e4SLinus Torvalds 	 */
7131da177e4SLinus Torvalds 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
71465333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
7151da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, nex);
7161da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
7171da177e4SLinus Torvalds 				     ip->i_mount, dip);
7181da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
7191da177e4SLinus Torvalds 	}
7201da177e4SLinus Torvalds 
7214eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
7221da177e4SLinus Torvalds 	if (nex == 0)
7231da177e4SLinus Torvalds 		ifp->if_u1.if_extents = NULL;
7241da177e4SLinus Torvalds 	else if (nex <= XFS_INLINE_EXTS)
7251da177e4SLinus Torvalds 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
7264eea22f0SMandy Kirkconnell 	else
7274eea22f0SMandy Kirkconnell 		xfs_iext_add(ifp, 0, nex);
7284eea22f0SMandy Kirkconnell 
7291da177e4SLinus Torvalds 	ifp->if_bytes = size;
7301da177e4SLinus Torvalds 	if (size) {
7311da177e4SLinus Torvalds 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
732a6f64d4aSChristoph Hellwig 		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
7334eea22f0SMandy Kirkconnell 		for (i = 0; i < nex; i++, dp++) {
734a6f64d4aSChristoph Hellwig 			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
735597bca63SHarvey Harrison 			ep->l0 = get_unaligned_be64(&dp->l0);
736597bca63SHarvey Harrison 			ep->l1 = get_unaligned_be64(&dp->l1);
7371da177e4SLinus Torvalds 		}
7383a59c94cSEric Sandeen 		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
7391da177e4SLinus Torvalds 		if (whichfork != XFS_DATA_FORK ||
7401da177e4SLinus Torvalds 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
7411da177e4SLinus Torvalds 				if (unlikely(xfs_check_nostate_extents(
7424eea22f0SMandy Kirkconnell 				    ifp, 0, nex))) {
7431da177e4SLinus Torvalds 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
7441da177e4SLinus Torvalds 							 XFS_ERRLEVEL_LOW,
7451da177e4SLinus Torvalds 							 ip->i_mount);
7461da177e4SLinus Torvalds 					return XFS_ERROR(EFSCORRUPTED);
7471da177e4SLinus Torvalds 				}
7481da177e4SLinus Torvalds 	}
7491da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
7501da177e4SLinus Torvalds 	return 0;
7511da177e4SLinus Torvalds }
7521da177e4SLinus Torvalds 
7531da177e4SLinus Torvalds /*
7541da177e4SLinus Torvalds  * The file has too many extents to fit into
7551da177e4SLinus Torvalds  * the inode, so they are in B-tree format.
7561da177e4SLinus Torvalds  * Allocate a buffer for the root of the B-tree
7571da177e4SLinus Torvalds  * and copy the root into it.  The i_extents
7581da177e4SLinus Torvalds  * field will remain NULL until all of the
7591da177e4SLinus Torvalds  * extents are read in (when they are needed).
7601da177e4SLinus Torvalds  */
7611da177e4SLinus Torvalds STATIC int
7621da177e4SLinus Torvalds xfs_iformat_btree(
7631da177e4SLinus Torvalds 	xfs_inode_t		*ip,
7641da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
7651da177e4SLinus Torvalds 	int			whichfork)
7661da177e4SLinus Torvalds {
7671da177e4SLinus Torvalds 	xfs_bmdr_block_t	*dfp;
7681da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
7691da177e4SLinus Torvalds 	/* REFERENCED */
7701da177e4SLinus Torvalds 	int			nrecs;
7711da177e4SLinus Torvalds 	int			size;
7721da177e4SLinus Torvalds 
7731da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
7741da177e4SLinus Torvalds 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
7751da177e4SLinus Torvalds 	size = XFS_BMAP_BROOT_SPACE(dfp);
77660197e8dSChristoph Hellwig 	nrecs = be16_to_cpu(dfp->bb_numrecs);
7771da177e4SLinus Torvalds 
7781da177e4SLinus Torvalds 	/*
7791da177e4SLinus Torvalds 	 * blow out if -- fork has less extents than can fit in
7801da177e4SLinus Torvalds 	 * fork (fork shouldn't be a btree format), root btree
7811da177e4SLinus Torvalds 	 * block has more records than can fit into the fork,
7821da177e4SLinus Torvalds 	 * or the number of extents is greater than the number of
7831da177e4SLinus Torvalds 	 * blocks.
7841da177e4SLinus Torvalds 	 */
7858096b1ebSChristoph Hellwig 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
7868096b1ebSChristoph Hellwig 			XFS_IFORK_MAXEXT(ip, whichfork) ||
7878096b1ebSChristoph Hellwig 		     XFS_BMDR_SPACE_CALC(nrecs) >
7888096b1ebSChristoph Hellwig 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
7898096b1ebSChristoph Hellwig 		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
79065333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
7911da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino);
79265333b4cSDave Chinner 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
79365333b4cSDave Chinner 				 ip->i_mount, dip);
7941da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
7951da177e4SLinus Torvalds 	}
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds 	ifp->if_broot_bytes = size;
7984a7edddcSDave Chinner 	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
7991da177e4SLinus Torvalds 	ASSERT(ifp->if_broot != NULL);
8001da177e4SLinus Torvalds 	/*
8011da177e4SLinus Torvalds 	 * Copy and convert from the on-disk structure
8021da177e4SLinus Torvalds 	 * to the in-memory structure.
8031da177e4SLinus Torvalds 	 */
80460197e8dSChristoph Hellwig 	xfs_bmdr_to_bmbt(ip->i_mount, dfp,
80560197e8dSChristoph Hellwig 			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
8061da177e4SLinus Torvalds 			 ifp->if_broot, size);
8071da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
8081da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFBROOT;
8091da177e4SLinus Torvalds 
8101da177e4SLinus Torvalds 	return 0;
8111da177e4SLinus Torvalds }
8121da177e4SLinus Torvalds 
813d96f8f89SEric Sandeen STATIC void
814347d1c01SChristoph Hellwig xfs_dinode_from_disk(
815347d1c01SChristoph Hellwig 	xfs_icdinode_t		*to,
81681591fe2SChristoph Hellwig 	xfs_dinode_t		*from)
8171da177e4SLinus Torvalds {
818347d1c01SChristoph Hellwig 	to->di_magic = be16_to_cpu(from->di_magic);
819347d1c01SChristoph Hellwig 	to->di_mode = be16_to_cpu(from->di_mode);
820347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
821347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
822347d1c01SChristoph Hellwig 	to->di_onlink = be16_to_cpu(from->di_onlink);
823347d1c01SChristoph Hellwig 	to->di_uid = be32_to_cpu(from->di_uid);
824347d1c01SChristoph Hellwig 	to->di_gid = be32_to_cpu(from->di_gid);
825347d1c01SChristoph Hellwig 	to->di_nlink = be32_to_cpu(from->di_nlink);
8266743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
8276743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
828347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
829347d1c01SChristoph Hellwig 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
830347d1c01SChristoph Hellwig 	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
831347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
832347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
833347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
834347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
835347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
836347d1c01SChristoph Hellwig 	to->di_size = be64_to_cpu(from->di_size);
837347d1c01SChristoph Hellwig 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
838347d1c01SChristoph Hellwig 	to->di_extsize = be32_to_cpu(from->di_extsize);
839347d1c01SChristoph Hellwig 	to->di_nextents = be32_to_cpu(from->di_nextents);
840347d1c01SChristoph Hellwig 	to->di_anextents = be16_to_cpu(from->di_anextents);
841347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
842347d1c01SChristoph Hellwig 	to->di_aformat	= from->di_aformat;
843347d1c01SChristoph Hellwig 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
844347d1c01SChristoph Hellwig 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
845347d1c01SChristoph Hellwig 	to->di_flags	= be16_to_cpu(from->di_flags);
846347d1c01SChristoph Hellwig 	to->di_gen	= be32_to_cpu(from->di_gen);
8471da177e4SLinus Torvalds }
8481da177e4SLinus Torvalds 
849347d1c01SChristoph Hellwig void
850347d1c01SChristoph Hellwig xfs_dinode_to_disk(
85181591fe2SChristoph Hellwig 	xfs_dinode_t		*to,
852347d1c01SChristoph Hellwig 	xfs_icdinode_t		*from)
853347d1c01SChristoph Hellwig {
854347d1c01SChristoph Hellwig 	to->di_magic = cpu_to_be16(from->di_magic);
855347d1c01SChristoph Hellwig 	to->di_mode = cpu_to_be16(from->di_mode);
856347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
857347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
858347d1c01SChristoph Hellwig 	to->di_onlink = cpu_to_be16(from->di_onlink);
859347d1c01SChristoph Hellwig 	to->di_uid = cpu_to_be32(from->di_uid);
860347d1c01SChristoph Hellwig 	to->di_gid = cpu_to_be32(from->di_gid);
861347d1c01SChristoph Hellwig 	to->di_nlink = cpu_to_be32(from->di_nlink);
8626743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
8636743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
864347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
865347d1c01SChristoph Hellwig 	to->di_flushiter = cpu_to_be16(from->di_flushiter);
866347d1c01SChristoph Hellwig 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
867347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
868347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
869347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
870347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
871347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
872347d1c01SChristoph Hellwig 	to->di_size = cpu_to_be64(from->di_size);
873347d1c01SChristoph Hellwig 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
874347d1c01SChristoph Hellwig 	to->di_extsize = cpu_to_be32(from->di_extsize);
875347d1c01SChristoph Hellwig 	to->di_nextents = cpu_to_be32(from->di_nextents);
876347d1c01SChristoph Hellwig 	to->di_anextents = cpu_to_be16(from->di_anextents);
877347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
878347d1c01SChristoph Hellwig 	to->di_aformat = from->di_aformat;
879347d1c01SChristoph Hellwig 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
880347d1c01SChristoph Hellwig 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
881347d1c01SChristoph Hellwig 	to->di_flags = cpu_to_be16(from->di_flags);
882347d1c01SChristoph Hellwig 	to->di_gen = cpu_to_be32(from->di_gen);
8831da177e4SLinus Torvalds }
8841da177e4SLinus Torvalds 
8851da177e4SLinus Torvalds STATIC uint
8861da177e4SLinus Torvalds _xfs_dic2xflags(
8871da177e4SLinus Torvalds 	__uint16_t		di_flags)
8881da177e4SLinus Torvalds {
8891da177e4SLinus Torvalds 	uint			flags = 0;
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds 	if (di_flags & XFS_DIFLAG_ANY) {
8921da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_REALTIME)
8931da177e4SLinus Torvalds 			flags |= XFS_XFLAG_REALTIME;
8941da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PREALLOC)
8951da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PREALLOC;
8961da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
8971da177e4SLinus Torvalds 			flags |= XFS_XFLAG_IMMUTABLE;
8981da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_APPEND)
8991da177e4SLinus Torvalds 			flags |= XFS_XFLAG_APPEND;
9001da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_SYNC)
9011da177e4SLinus Torvalds 			flags |= XFS_XFLAG_SYNC;
9021da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOATIME)
9031da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOATIME;
9041da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NODUMP)
9051da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NODUMP;
9061da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_RTINHERIT)
9071da177e4SLinus Torvalds 			flags |= XFS_XFLAG_RTINHERIT;
9081da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PROJINHERIT)
9091da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PROJINHERIT;
9101da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
9111da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOSYMLINKS;
912dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSIZE)
913dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSIZE;
914dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
915dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSZINHERIT;
916d3446eacSBarry Naujok 		if (di_flags & XFS_DIFLAG_NODEFRAG)
917d3446eacSBarry Naujok 			flags |= XFS_XFLAG_NODEFRAG;
9182a82b8beSDavid Chinner 		if (di_flags & XFS_DIFLAG_FILESTREAM)
9192a82b8beSDavid Chinner 			flags |= XFS_XFLAG_FILESTREAM;
9201da177e4SLinus Torvalds 	}
9211da177e4SLinus Torvalds 
9221da177e4SLinus Torvalds 	return flags;
9231da177e4SLinus Torvalds }
9241da177e4SLinus Torvalds 
9251da177e4SLinus Torvalds uint
9261da177e4SLinus Torvalds xfs_ip2xflags(
9271da177e4SLinus Torvalds 	xfs_inode_t		*ip)
9281da177e4SLinus Torvalds {
929347d1c01SChristoph Hellwig 	xfs_icdinode_t		*dic = &ip->i_d;
9301da177e4SLinus Torvalds 
931a916e2bdSNathan Scott 	return _xfs_dic2xflags(dic->di_flags) |
93245ba598eSChristoph Hellwig 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
9331da177e4SLinus Torvalds }
9341da177e4SLinus Torvalds 
9351da177e4SLinus Torvalds uint
9361da177e4SLinus Torvalds xfs_dic2xflags(
93745ba598eSChristoph Hellwig 	xfs_dinode_t		*dip)
9381da177e4SLinus Torvalds {
93981591fe2SChristoph Hellwig 	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
94045ba598eSChristoph Hellwig 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
9411da177e4SLinus Torvalds }
9421da177e4SLinus Torvalds 
9431da177e4SLinus Torvalds /*
94424f211baSChristoph Hellwig  * Read the disk inode attributes into the in-core inode structure.
9451da177e4SLinus Torvalds  */
9461da177e4SLinus Torvalds int
9471da177e4SLinus Torvalds xfs_iread(
9481da177e4SLinus Torvalds 	xfs_mount_t	*mp,
9491da177e4SLinus Torvalds 	xfs_trans_t	*tp,
95024f211baSChristoph Hellwig 	xfs_inode_t	*ip,
95124f211baSChristoph Hellwig 	uint		iget_flags)
9521da177e4SLinus Torvalds {
9531da177e4SLinus Torvalds 	xfs_buf_t	*bp;
9541da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
9551da177e4SLinus Torvalds 	int		error;
9561da177e4SLinus Torvalds 
9571da177e4SLinus Torvalds 	/*
95892bfc6e7SChristoph Hellwig 	 * Fill in the location information in the in-core inode.
9591da177e4SLinus Torvalds 	 */
96024f211baSChristoph Hellwig 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
9619ed0451eSChristoph Hellwig 	if (error)
96224f211baSChristoph Hellwig 		return error;
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds 	/*
96592bfc6e7SChristoph Hellwig 	 * Get pointers to the on-disk inode and the buffer containing it.
96676d8b277SChristoph Hellwig 	 */
967475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
96876d8b277SChristoph Hellwig 	if (error)
96924f211baSChristoph Hellwig 		return error;
97076d8b277SChristoph Hellwig 
97176d8b277SChristoph Hellwig 	/*
9721da177e4SLinus Torvalds 	 * If we got something that isn't an inode it means someone
9731da177e4SLinus Torvalds 	 * (nfs or dmi) has a stale handle.
9741da177e4SLinus Torvalds 	 */
97569ef921bSChristoph Hellwig 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
9761da177e4SLinus Torvalds #ifdef DEBUG
97753487786SDave Chinner 		xfs_alert(mp,
97853487786SDave Chinner 			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
97953487786SDave Chinner 			__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
9801da177e4SLinus Torvalds #endif /* DEBUG */
9819ed0451eSChristoph Hellwig 		error = XFS_ERROR(EINVAL);
9829ed0451eSChristoph Hellwig 		goto out_brelse;
9831da177e4SLinus Torvalds 	}
9841da177e4SLinus Torvalds 
9851da177e4SLinus Torvalds 	/*
9861da177e4SLinus Torvalds 	 * If the on-disk inode is already linked to a directory
9871da177e4SLinus Torvalds 	 * entry, copy all of the inode into the in-core inode.
9881da177e4SLinus Torvalds 	 * xfs_iformat() handles copying in the inode format
9891da177e4SLinus Torvalds 	 * specific information.
9901da177e4SLinus Torvalds 	 * Otherwise, just get the truly permanent information.
9911da177e4SLinus Torvalds 	 */
99281591fe2SChristoph Hellwig 	if (dip->di_mode) {
99381591fe2SChristoph Hellwig 		xfs_dinode_from_disk(&ip->i_d, dip);
9941da177e4SLinus Torvalds 		error = xfs_iformat(ip, dip);
9951da177e4SLinus Torvalds 		if (error)  {
9961da177e4SLinus Torvalds #ifdef DEBUG
99753487786SDave Chinner 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
99853487786SDave Chinner 				__func__, error);
9991da177e4SLinus Torvalds #endif /* DEBUG */
10009ed0451eSChristoph Hellwig 			goto out_brelse;
10011da177e4SLinus Torvalds 		}
10021da177e4SLinus Torvalds 	} else {
100381591fe2SChristoph Hellwig 		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
100481591fe2SChristoph Hellwig 		ip->i_d.di_version = dip->di_version;
100581591fe2SChristoph Hellwig 		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
100681591fe2SChristoph Hellwig 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
10071da177e4SLinus Torvalds 		/*
10081da177e4SLinus Torvalds 		 * Make sure to pull in the mode here as well in
10091da177e4SLinus Torvalds 		 * case the inode is released without being used.
10101da177e4SLinus Torvalds 		 * This ensures that xfs_inactive() will see that
10111da177e4SLinus Torvalds 		 * the inode is already free and not try to mess
10121da177e4SLinus Torvalds 		 * with the uninitialized part of it.
10131da177e4SLinus Torvalds 		 */
10141da177e4SLinus Torvalds 		ip->i_d.di_mode = 0;
10151da177e4SLinus Torvalds 	}
10161da177e4SLinus Torvalds 
10171da177e4SLinus Torvalds 	/*
10181da177e4SLinus Torvalds 	 * The inode format changed when we moved the link count and
10191da177e4SLinus Torvalds 	 * made it 32 bits long.  If this is an old format inode,
10201da177e4SLinus Torvalds 	 * convert it in memory to look like a new one.  If it gets
10211da177e4SLinus Torvalds 	 * flushed to disk we will convert back before flushing or
10221da177e4SLinus Torvalds 	 * logging it.  We zero out the new projid field and the old link
10231da177e4SLinus Torvalds 	 * count field.  We'll handle clearing the pad field (the remains
10241da177e4SLinus Torvalds 	 * of the old uuid field) when we actually convert the inode to
10251da177e4SLinus Torvalds 	 * the new format. We don't change the version number so that we
10261da177e4SLinus Torvalds 	 * can distinguish this from a real new format inode.
10271da177e4SLinus Torvalds 	 */
102851ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
10291da177e4SLinus Torvalds 		ip->i_d.di_nlink = ip->i_d.di_onlink;
10301da177e4SLinus Torvalds 		ip->i_d.di_onlink = 0;
10316743099cSArkadiusz Mi?kiewicz 		xfs_set_projid(ip, 0);
10321da177e4SLinus Torvalds 	}
10331da177e4SLinus Torvalds 
10341da177e4SLinus Torvalds 	ip->i_delayed_blks = 0;
10351da177e4SLinus Torvalds 
10361da177e4SLinus Torvalds 	/*
10371da177e4SLinus Torvalds 	 * Mark the buffer containing the inode as something to keep
10381da177e4SLinus Torvalds 	 * around for a while.  This helps to keep recently accessed
10391da177e4SLinus Torvalds 	 * meta-data in-core longer.
10401da177e4SLinus Torvalds 	 */
1041821eb21dSDave Chinner 	xfs_buf_set_ref(bp, XFS_INO_REF);
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 	/*
10441da177e4SLinus Torvalds 	 * Use xfs_trans_brelse() to release the buffer containing the
10451da177e4SLinus Torvalds 	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
1046475ee413SChristoph Hellwig 	 * in xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
10471da177e4SLinus Torvalds 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
10481da177e4SLinus Torvalds 	 * will only release the buffer if it is not dirty within the
10491da177e4SLinus Torvalds 	 * transaction.  It will be OK to release the buffer in this case,
10501da177e4SLinus Torvalds 	 * because inodes on disk are never destroyed and we will be
10511da177e4SLinus Torvalds 	 * locking the new in-core inode before putting it in the hash
10521da177e4SLinus Torvalds 	 * table where other processes can find it.  Thus we don't have
10531da177e4SLinus Torvalds 	 * to worry about the inode being changed just because we released
10541da177e4SLinus Torvalds 	 * the buffer.
10551da177e4SLinus Torvalds 	 */
10569ed0451eSChristoph Hellwig  out_brelse:
10579ed0451eSChristoph Hellwig 	xfs_trans_brelse(tp, bp);
10589ed0451eSChristoph Hellwig 	return error;
10591da177e4SLinus Torvalds }
10601da177e4SLinus Torvalds 
10611da177e4SLinus Torvalds /*
10621da177e4SLinus Torvalds  * Read in extents from a btree-format inode.
10631da177e4SLinus Torvalds  * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
10641da177e4SLinus Torvalds  */
10651da177e4SLinus Torvalds int
10661da177e4SLinus Torvalds xfs_iread_extents(
10671da177e4SLinus Torvalds 	xfs_trans_t	*tp,
10681da177e4SLinus Torvalds 	xfs_inode_t	*ip,
10691da177e4SLinus Torvalds 	int		whichfork)
10701da177e4SLinus Torvalds {
10711da177e4SLinus Torvalds 	int		error;
10721da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
10734eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;
10741da177e4SLinus Torvalds 
10751da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
10761da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
10771da177e4SLinus Torvalds 				 ip->i_mount);
10781da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
10791da177e4SLinus Torvalds 	}
10804eea22f0SMandy Kirkconnell 	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
10811da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
10824eea22f0SMandy Kirkconnell 
10831da177e4SLinus Torvalds 	/*
10841da177e4SLinus Torvalds 	 * We know that the size is valid (it's checked in iformat_btree)
10851da177e4SLinus Torvalds 	 */
10864eea22f0SMandy Kirkconnell 	ifp->if_bytes = ifp->if_real_bytes = 0;
10871da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
10884eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, 0, nextents);
10891da177e4SLinus Torvalds 	error = xfs_bmap_read_extents(tp, ip, whichfork);
10901da177e4SLinus Torvalds 	if (error) {
10914eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
10921da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFEXTENTS;
10931da177e4SLinus Torvalds 		return error;
10941da177e4SLinus Torvalds 	}
1095a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
10961da177e4SLinus Torvalds 	return 0;
10971da177e4SLinus Torvalds }
10981da177e4SLinus Torvalds 
10991da177e4SLinus Torvalds /*
11001da177e4SLinus Torvalds  * Allocate an inode on disk and return a copy of its in-core version.
11011da177e4SLinus Torvalds  * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
11021da177e4SLinus Torvalds  * appropriately within the inode.  The uid and gid for the inode are
11031da177e4SLinus Torvalds  * set according to the contents of the given cred structure.
11041da177e4SLinus Torvalds  *
11051da177e4SLinus Torvalds  * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
11061da177e4SLinus Torvalds  * has a free inode available, call xfs_iget()
11071da177e4SLinus Torvalds  * to obtain the in-core version of the allocated inode.  Finally,
11081da177e4SLinus Torvalds  * fill in the inode and log its initial contents.  In this case,
11091da177e4SLinus Torvalds  * ialloc_context would be set to NULL and call_again set to false.
11101da177e4SLinus Torvalds  *
11111da177e4SLinus Torvalds  * If xfs_dialloc() does not have an available inode,
11121da177e4SLinus Torvalds  * it will replenish its supply by doing an allocation. Since we can
11131da177e4SLinus Torvalds  * only do one allocation within a transaction without deadlocks, we
11141da177e4SLinus Torvalds  * must commit the current transaction before returning the inode itself.
11151da177e4SLinus Torvalds  * In this case, therefore, we will set call_again to true and return.
11161da177e4SLinus Torvalds  * The caller should then commit the current transaction, start a new
11171da177e4SLinus Torvalds  * transaction, and call xfs_ialloc() again to actually get the inode.
11181da177e4SLinus Torvalds  *
11191da177e4SLinus Torvalds  * To ensure that some other process does not grab the inode that
11201da177e4SLinus Torvalds  * was allocated during the first call to xfs_ialloc(), this routine
11211da177e4SLinus Torvalds  * also returns the [locked] bp pointing to the head of the freelist
11221da177e4SLinus Torvalds  * as ialloc_context.  The caller should hold this buffer across
11231da177e4SLinus Torvalds  * the commit and pass it back into this routine on the second call.
1124b11f94d5SDavid Chinner  *
1125b11f94d5SDavid Chinner  * If we are allocating quota inodes, we do not have a parent inode
1126b11f94d5SDavid Chinner  * to attach to or associate with (i.e. pip == NULL) because they
1127b11f94d5SDavid Chinner  * are not linked into the directory structure - they are attached
1128b11f94d5SDavid Chinner  * directly to the superblock - and so have no parent.
11291da177e4SLinus Torvalds  */
11301da177e4SLinus Torvalds int
11311da177e4SLinus Torvalds xfs_ialloc(
11321da177e4SLinus Torvalds 	xfs_trans_t	*tp,
11331da177e4SLinus Torvalds 	xfs_inode_t	*pip,
1134576b1d67SAl Viro 	umode_t		mode,
113531b084aeSNathan Scott 	xfs_nlink_t	nlink,
11361da177e4SLinus Torvalds 	xfs_dev_t	rdev,
11376743099cSArkadiusz Mi?kiewicz 	prid_t		prid,
11381da177e4SLinus Torvalds 	int		okalloc,
11391da177e4SLinus Torvalds 	xfs_buf_t	**ialloc_context,
11401da177e4SLinus Torvalds 	xfs_inode_t	**ipp)
11411da177e4SLinus Torvalds {
11421da177e4SLinus Torvalds 	xfs_ino_t	ino;
11431da177e4SLinus Torvalds 	xfs_inode_t	*ip;
11441da177e4SLinus Torvalds 	uint		flags;
11451da177e4SLinus Torvalds 	int		error;
1146dff35fd4SChristoph Hellwig 	timespec_t	tv;
1147bf904248SDavid Chinner 	int		filestreams = 0;
11481da177e4SLinus Torvalds 
11491da177e4SLinus Torvalds 	/*
11501da177e4SLinus Torvalds 	 * Call the space management code to pick
11511da177e4SLinus Torvalds 	 * the on-disk inode to be allocated.
11521da177e4SLinus Torvalds 	 */
1153b11f94d5SDavid Chinner 	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
115408358906SChristoph Hellwig 			    ialloc_context, &ino);
1155bf904248SDavid Chinner 	if (error)
11561da177e4SLinus Torvalds 		return error;
115708358906SChristoph Hellwig 	if (*ialloc_context || ino == NULLFSINO) {
11581da177e4SLinus Torvalds 		*ipp = NULL;
11591da177e4SLinus Torvalds 		return 0;
11601da177e4SLinus Torvalds 	}
11611da177e4SLinus Torvalds 	ASSERT(*ialloc_context == NULL);
11621da177e4SLinus Torvalds 
11631da177e4SLinus Torvalds 	/*
11641da177e4SLinus Torvalds 	 * Get the in-core inode with the lock held exclusively.
11651da177e4SLinus Torvalds 	 * This is because we're setting fields here we need
11661da177e4SLinus Torvalds 	 * to prevent others from looking at until we're done.
11671da177e4SLinus Torvalds 	 */
1168ec3ba85fSChristoph Hellwig 	error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1169ec3ba85fSChristoph Hellwig 			 XFS_ILOCK_EXCL, &ip);
1170bf904248SDavid Chinner 	if (error)
11711da177e4SLinus Torvalds 		return error;
11721da177e4SLinus Torvalds 	ASSERT(ip != NULL);
11731da177e4SLinus Torvalds 
1174576b1d67SAl Viro 	ip->i_d.di_mode = mode;
11751da177e4SLinus Torvalds 	ip->i_d.di_onlink = 0;
11761da177e4SLinus Torvalds 	ip->i_d.di_nlink = nlink;
11771da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == nlink);
11789e2b2dc4SDavid Howells 	ip->i_d.di_uid = current_fsuid();
11799e2b2dc4SDavid Howells 	ip->i_d.di_gid = current_fsgid();
11806743099cSArkadiusz Mi?kiewicz 	xfs_set_projid(ip, prid);
11811da177e4SLinus Torvalds 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
11821da177e4SLinus Torvalds 
11831da177e4SLinus Torvalds 	/*
11841da177e4SLinus Torvalds 	 * If the superblock version is up to where we support new format
11851da177e4SLinus Torvalds 	 * inodes and this is currently an old format inode, then change
11861da177e4SLinus Torvalds 	 * the inode version number now.  This way we only do the conversion
11871da177e4SLinus Torvalds 	 * here rather than here and in the flush/logging code.
11881da177e4SLinus Torvalds 	 */
118962118709SEric Sandeen 	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
119051ce16d5SChristoph Hellwig 	    ip->i_d.di_version == 1) {
119151ce16d5SChristoph Hellwig 		ip->i_d.di_version = 2;
11921da177e4SLinus Torvalds 		/*
11931da177e4SLinus Torvalds 		 * We've already zeroed the old link count, the projid field,
11941da177e4SLinus Torvalds 		 * and the pad field.
11951da177e4SLinus Torvalds 		 */
11961da177e4SLinus Torvalds 	}
11971da177e4SLinus Torvalds 
11981da177e4SLinus Torvalds 	/*
11991da177e4SLinus Torvalds 	 * Project ids won't be stored on disk if we are using a version 1 inode.
12001da177e4SLinus Torvalds 	 */
120151ce16d5SChristoph Hellwig 	if ((prid != 0) && (ip->i_d.di_version == 1))
12021da177e4SLinus Torvalds 		xfs_bump_ino_vers2(tp, ip);
12031da177e4SLinus Torvalds 
1204bd186aa9SChristoph Hellwig 	if (pip && XFS_INHERIT_GID(pip)) {
12051da177e4SLinus Torvalds 		ip->i_d.di_gid = pip->i_d.di_gid;
1206abbede1bSAl Viro 		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
12071da177e4SLinus Torvalds 			ip->i_d.di_mode |= S_ISGID;
12081da177e4SLinus Torvalds 		}
12091da177e4SLinus Torvalds 	}
12101da177e4SLinus Torvalds 
12111da177e4SLinus Torvalds 	/*
12121da177e4SLinus Torvalds 	 * If the group ID of the new file does not match the effective group
12131da177e4SLinus Torvalds 	 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
12141da177e4SLinus Torvalds 	 * (and only if the irix_sgid_inherit compatibility variable is set).
12151da177e4SLinus Torvalds 	 */
12161da177e4SLinus Torvalds 	if ((irix_sgid_inherit) &&
12171da177e4SLinus Torvalds 	    (ip->i_d.di_mode & S_ISGID) &&
12181da177e4SLinus Torvalds 	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
12191da177e4SLinus Torvalds 		ip->i_d.di_mode &= ~S_ISGID;
12201da177e4SLinus Torvalds 	}
12211da177e4SLinus Torvalds 
12221da177e4SLinus Torvalds 	ip->i_d.di_size = 0;
12231da177e4SLinus Torvalds 	ip->i_d.di_nextents = 0;
12241da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
1225dff35fd4SChristoph Hellwig 
1226dff35fd4SChristoph Hellwig 	nanotime(&tv);
1227dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1228dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1229dff35fd4SChristoph Hellwig 	ip->i_d.di_atime = ip->i_d.di_mtime;
1230dff35fd4SChristoph Hellwig 	ip->i_d.di_ctime = ip->i_d.di_mtime;
1231dff35fd4SChristoph Hellwig 
12321da177e4SLinus Torvalds 	/*
12331da177e4SLinus Torvalds 	 * di_gen will have been taken care of in xfs_iread.
12341da177e4SLinus Torvalds 	 */
12351da177e4SLinus Torvalds 	ip->i_d.di_extsize = 0;
12361da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
12371da177e4SLinus Torvalds 	ip->i_d.di_dmstate = 0;
12381da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
12391da177e4SLinus Torvalds 	flags = XFS_ILOG_CORE;
12401da177e4SLinus Torvalds 	switch (mode & S_IFMT) {
12411da177e4SLinus Torvalds 	case S_IFIFO:
12421da177e4SLinus Torvalds 	case S_IFCHR:
12431da177e4SLinus Torvalds 	case S_IFBLK:
12441da177e4SLinus Torvalds 	case S_IFSOCK:
12451da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
12461da177e4SLinus Torvalds 		ip->i_df.if_u2.if_rdev = rdev;
12471da177e4SLinus Torvalds 		ip->i_df.if_flags = 0;
12481da177e4SLinus Torvalds 		flags |= XFS_ILOG_DEV;
12491da177e4SLinus Torvalds 		break;
12501da177e4SLinus Torvalds 	case S_IFREG:
1251bf904248SDavid Chinner 		/*
1252bf904248SDavid Chinner 		 * we can't set up filestreams until after the VFS inode
1253bf904248SDavid Chinner 		 * is set up properly.
1254bf904248SDavid Chinner 		 */
1255bf904248SDavid Chinner 		if (pip && xfs_inode_is_filestream(pip))
1256bf904248SDavid Chinner 			filestreams = 1;
12572a82b8beSDavid Chinner 		/* fall through */
12581da177e4SLinus Torvalds 	case S_IFDIR:
1259b11f94d5SDavid Chinner 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1260365ca83dSNathan Scott 			uint	di_flags = 0;
1261365ca83dSNathan Scott 
1262abbede1bSAl Viro 			if (S_ISDIR(mode)) {
1263365ca83dSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1264365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_RTINHERIT;
1265dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1266dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1267dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1268dd9f438eSNathan Scott 				}
1269abbede1bSAl Viro 			} else if (S_ISREG(mode)) {
1270613d7043SChristoph Hellwig 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1271365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_REALTIME;
1272dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1273dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSIZE;
1274dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1275dd9f438eSNathan Scott 				}
12761da177e4SLinus Torvalds 			}
12771da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
12781da177e4SLinus Torvalds 			    xfs_inherit_noatime)
1279365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOATIME;
12801da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
12811da177e4SLinus Torvalds 			    xfs_inherit_nodump)
1282365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NODUMP;
12831da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
12841da177e4SLinus Torvalds 			    xfs_inherit_sync)
1285365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_SYNC;
12861da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
12871da177e4SLinus Torvalds 			    xfs_inherit_nosymlinks)
1288365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
1289365ca83dSNathan Scott 			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1290365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_PROJINHERIT;
1291d3446eacSBarry Naujok 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1292d3446eacSBarry Naujok 			    xfs_inherit_nodefrag)
1293d3446eacSBarry Naujok 				di_flags |= XFS_DIFLAG_NODEFRAG;
12942a82b8beSDavid Chinner 			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
12952a82b8beSDavid Chinner 				di_flags |= XFS_DIFLAG_FILESTREAM;
1296365ca83dSNathan Scott 			ip->i_d.di_flags |= di_flags;
12971da177e4SLinus Torvalds 		}
12981da177e4SLinus Torvalds 		/* FALLTHROUGH */
12991da177e4SLinus Torvalds 	case S_IFLNK:
13001da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
13011da177e4SLinus Torvalds 		ip->i_df.if_flags = XFS_IFEXTENTS;
13021da177e4SLinus Torvalds 		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
13031da177e4SLinus Torvalds 		ip->i_df.if_u1.if_extents = NULL;
13041da177e4SLinus Torvalds 		break;
13051da177e4SLinus Torvalds 	default:
13061da177e4SLinus Torvalds 		ASSERT(0);
13071da177e4SLinus Torvalds 	}
13081da177e4SLinus Torvalds 	/*
13091da177e4SLinus Torvalds 	 * Attribute fork settings for new inode.
13101da177e4SLinus Torvalds 	 */
13111da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
13121da177e4SLinus Torvalds 	ip->i_d.di_anextents = 0;
13131da177e4SLinus Torvalds 
13141da177e4SLinus Torvalds 	/*
13151da177e4SLinus Torvalds 	 * Log the new values stuffed into the inode.
13161da177e4SLinus Torvalds 	 */
1317ddc3415aSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
13181da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, flags);
13191da177e4SLinus Torvalds 
1320b83bd138SNathan Scott 	/* now that we have an i_mode we can setup inode ops and unlock */
132141be8bedSChristoph Hellwig 	xfs_setup_inode(ip);
13221da177e4SLinus Torvalds 
1323bf904248SDavid Chinner 	/* now we have set up the vfs inode we can associate the filestream */
1324bf904248SDavid Chinner 	if (filestreams) {
1325bf904248SDavid Chinner 		error = xfs_filestream_associate(pip, ip);
1326bf904248SDavid Chinner 		if (error < 0)
1327bf904248SDavid Chinner 			return -error;
1328bf904248SDavid Chinner 		if (!error)
1329bf904248SDavid Chinner 			xfs_iflags_set(ip, XFS_IFILESTREAM);
1330bf904248SDavid Chinner 	}
1331bf904248SDavid Chinner 
13321da177e4SLinus Torvalds 	*ipp = ip;
13331da177e4SLinus Torvalds 	return 0;
13341da177e4SLinus Torvalds }
13351da177e4SLinus Torvalds 
13361da177e4SLinus Torvalds /*
13378f04c47aSChristoph Hellwig  * Free up the underlying blocks past new_size.  The new size must be smaller
13388f04c47aSChristoph Hellwig  * than the current size.  This routine can be used both for the attribute and
13398f04c47aSChristoph Hellwig  * data fork, and does not modify the inode size, which is left to the caller.
13401da177e4SLinus Torvalds  *
1341f6485057SDavid Chinner  * The transaction passed to this routine must have made a permanent log
1342f6485057SDavid Chinner  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
1343f6485057SDavid Chinner  * given transaction and start new ones, so make sure everything involved in
1344f6485057SDavid Chinner  * the transaction is tidy before calling here.  Some transaction will be
1345f6485057SDavid Chinner  * returned to the caller to be committed.  The incoming transaction must
1346f6485057SDavid Chinner  * already include the inode, and both inode locks must be held exclusively.
1347f6485057SDavid Chinner  * The inode must also be "held" within the transaction.  On return the inode
1348f6485057SDavid Chinner  * will be "held" within the returned transaction.  This routine does NOT
1349f6485057SDavid Chinner  * require any disk space to be reserved for it within the transaction.
13501da177e4SLinus Torvalds  *
1351f6485057SDavid Chinner  * If we get an error, we must return with the inode locked and linked into the
1352f6485057SDavid Chinner  * current transaction. This keeps things simple for the higher level code,
1353f6485057SDavid Chinner  * because it always knows that the inode is locked and held in the transaction
1354f6485057SDavid Chinner  * that returns to it whether errors occur or not.  We don't mark the inode
1355f6485057SDavid Chinner  * dirty on error so that transactions can be easily aborted if possible.
13561da177e4SLinus Torvalds  */
13571da177e4SLinus Torvalds int
13588f04c47aSChristoph Hellwig xfs_itruncate_extents(
13598f04c47aSChristoph Hellwig 	struct xfs_trans	**tpp,
13608f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
13618f04c47aSChristoph Hellwig 	int			whichfork,
13628f04c47aSChristoph Hellwig 	xfs_fsize_t		new_size)
13631da177e4SLinus Torvalds {
13648f04c47aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
13658f04c47aSChristoph Hellwig 	struct xfs_trans	*tp = *tpp;
13668f04c47aSChristoph Hellwig 	struct xfs_trans	*ntp;
13678f04c47aSChristoph Hellwig 	xfs_bmap_free_t		free_list;
13681da177e4SLinus Torvalds 	xfs_fsblock_t		first_block;
13691da177e4SLinus Torvalds 	xfs_fileoff_t		first_unmap_block;
13701da177e4SLinus Torvalds 	xfs_fileoff_t		last_block;
13718f04c47aSChristoph Hellwig 	xfs_filblks_t		unmap_len;
13721da177e4SLinus Torvalds 	int			committed;
13738f04c47aSChristoph Hellwig 	int			error = 0;
13748f04c47aSChristoph Hellwig 	int			done = 0;
13751da177e4SLinus Torvalds 
13760b56185bSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
13770b56185bSChristoph Hellwig 	ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
13780b56185bSChristoph Hellwig 	       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1379ce7ae151SChristoph Hellwig 	ASSERT(new_size <= XFS_ISIZE(ip));
13808f04c47aSChristoph Hellwig 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
13811da177e4SLinus Torvalds 	ASSERT(ip->i_itemp != NULL);
1382898621d5SChristoph Hellwig 	ASSERT(ip->i_itemp->ili_lock_flags == 0);
13831da177e4SLinus Torvalds 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
13841da177e4SLinus Torvalds 
1385673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_start(ip, new_size);
1386673e8e59SChristoph Hellwig 
13871da177e4SLinus Torvalds 	/*
13881da177e4SLinus Torvalds 	 * Since it is possible for space to become allocated beyond
13891da177e4SLinus Torvalds 	 * the end of the file (in a crash where the space is allocated
13901da177e4SLinus Torvalds 	 * but the inode size is not yet updated), simply remove any
13911da177e4SLinus Torvalds 	 * blocks which show up between the new EOF and the maximum
13921da177e4SLinus Torvalds 	 * possible file size.  If the first block to be removed is
13931da177e4SLinus Torvalds 	 * beyond the maximum file size (ie it is the same as last_block),
13941da177e4SLinus Torvalds 	 * then there is nothing to do.
13951da177e4SLinus Torvalds 	 */
13968f04c47aSChristoph Hellwig 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
139732972383SDave Chinner 	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
13988f04c47aSChristoph Hellwig 	if (first_unmap_block == last_block)
13998f04c47aSChristoph Hellwig 		return 0;
14008f04c47aSChristoph Hellwig 
14018f04c47aSChristoph Hellwig 	ASSERT(first_unmap_block < last_block);
14021da177e4SLinus Torvalds 	unmap_len = last_block - first_unmap_block + 1;
14031da177e4SLinus Torvalds 	while (!done) {
14049d87c319SEric Sandeen 		xfs_bmap_init(&free_list, &first_block);
14058f04c47aSChristoph Hellwig 		error = xfs_bunmapi(tp, ip,
14063e57ecf6SOlaf Weber 				    first_unmap_block, unmap_len,
14078f04c47aSChristoph Hellwig 				    xfs_bmapi_aflag(whichfork),
14081da177e4SLinus Torvalds 				    XFS_ITRUNC_MAX_EXTENTS,
14093e57ecf6SOlaf Weber 				    &first_block, &free_list,
1410b4e9181eSChristoph Hellwig 				    &done);
14118f04c47aSChristoph Hellwig 		if (error)
14128f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
14131da177e4SLinus Torvalds 
14141da177e4SLinus Torvalds 		/*
14151da177e4SLinus Torvalds 		 * Duplicate the transaction that has the permanent
14161da177e4SLinus Torvalds 		 * reservation and commit the old transaction.
14171da177e4SLinus Torvalds 		 */
14188f04c47aSChristoph Hellwig 		error = xfs_bmap_finish(&tp, &free_list, &committed);
1419898621d5SChristoph Hellwig 		if (committed)
1420ddc3415aSChristoph Hellwig 			xfs_trans_ijoin(tp, ip, 0);
14218f04c47aSChristoph Hellwig 		if (error)
14228f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
14231da177e4SLinus Torvalds 
14241da177e4SLinus Torvalds 		if (committed) {
14251da177e4SLinus Torvalds 			/*
1426f6485057SDavid Chinner 			 * Mark the inode dirty so it will be logged and
1427e5720eecSDavid Chinner 			 * moved forward in the log as part of every commit.
14281da177e4SLinus Torvalds 			 */
14298f04c47aSChristoph Hellwig 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
14301da177e4SLinus Torvalds 		}
1431f6485057SDavid Chinner 
14328f04c47aSChristoph Hellwig 		ntp = xfs_trans_dup(tp);
14338f04c47aSChristoph Hellwig 		error = xfs_trans_commit(tp, 0);
14348f04c47aSChristoph Hellwig 		tp = ntp;
1435f6485057SDavid Chinner 
1436ddc3415aSChristoph Hellwig 		xfs_trans_ijoin(tp, ip, 0);
1437f6485057SDavid Chinner 
1438cc09c0dcSDave Chinner 		if (error)
14398f04c47aSChristoph Hellwig 			goto out;
14408f04c47aSChristoph Hellwig 
1441cc09c0dcSDave Chinner 		/*
14428f04c47aSChristoph Hellwig 		 * Transaction commit worked ok so we can drop the extra ticket
1443cc09c0dcSDave Chinner 		 * reference that we gained in xfs_trans_dup()
1444cc09c0dcSDave Chinner 		 */
14458f04c47aSChristoph Hellwig 		xfs_log_ticket_put(tp->t_ticket);
14468f04c47aSChristoph Hellwig 		error = xfs_trans_reserve(tp, 0,
1447f6485057SDavid Chinner 					XFS_ITRUNCATE_LOG_RES(mp), 0,
14481da177e4SLinus Torvalds 					XFS_TRANS_PERM_LOG_RES,
14491da177e4SLinus Torvalds 					XFS_ITRUNCATE_LOG_COUNT);
14501da177e4SLinus Torvalds 		if (error)
14518f04c47aSChristoph Hellwig 			goto out;
14521da177e4SLinus Torvalds 	}
14538f04c47aSChristoph Hellwig 
1454673e8e59SChristoph Hellwig 	/*
1455673e8e59SChristoph Hellwig 	 * Always re-log the inode so that our permanent transaction can keep
1456673e8e59SChristoph Hellwig 	 * on rolling it forward in the log.
1457673e8e59SChristoph Hellwig 	 */
1458673e8e59SChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1459673e8e59SChristoph Hellwig 
1460673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_end(ip, new_size);
1461673e8e59SChristoph Hellwig 
14628f04c47aSChristoph Hellwig out:
14638f04c47aSChristoph Hellwig 	*tpp = tp;
14648f04c47aSChristoph Hellwig 	return error;
14658f04c47aSChristoph Hellwig out_bmap_cancel:
14661da177e4SLinus Torvalds 	/*
14678f04c47aSChristoph Hellwig 	 * If the bunmapi call encounters an error, return to the caller where
14688f04c47aSChristoph Hellwig 	 * the transaction can be properly aborted.  We just need to make sure
14698f04c47aSChristoph Hellwig 	 * we're not holding any resources that we were not when we came in.
14701da177e4SLinus Torvalds 	 */
14718f04c47aSChristoph Hellwig 	xfs_bmap_cancel(&free_list);
14728f04c47aSChristoph Hellwig 	goto out;
14738f04c47aSChristoph Hellwig }
14748f04c47aSChristoph Hellwig 
14751da177e4SLinus Torvalds /*
14761da177e4SLinus Torvalds  * This is called when the inode's link count goes to 0.
14771da177e4SLinus Torvalds  * We place the on-disk inode on a list in the AGI.  It
14781da177e4SLinus Torvalds  * will be pulled from this list when the inode is freed.
14791da177e4SLinus Torvalds  */
14801da177e4SLinus Torvalds int
14811da177e4SLinus Torvalds xfs_iunlink(
14821da177e4SLinus Torvalds 	xfs_trans_t	*tp,
14831da177e4SLinus Torvalds 	xfs_inode_t	*ip)
14841da177e4SLinus Torvalds {
14851da177e4SLinus Torvalds 	xfs_mount_t	*mp;
14861da177e4SLinus Torvalds 	xfs_agi_t	*agi;
14871da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
14881da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
14891da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
14901da177e4SLinus Torvalds 	xfs_agino_t	agino;
14911da177e4SLinus Torvalds 	short		bucket_index;
14921da177e4SLinus Torvalds 	int		offset;
14931da177e4SLinus Torvalds 	int		error;
14941da177e4SLinus Torvalds 
14951da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
14961da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_mode != 0);
14971da177e4SLinus Torvalds 
14981da177e4SLinus Torvalds 	mp = tp->t_mountp;
14991da177e4SLinus Torvalds 
15001da177e4SLinus Torvalds 	/*
15011da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
15021da177e4SLinus Torvalds 	 * on the list.
15031da177e4SLinus Torvalds 	 */
15045e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1505859d7182SVlad Apostolov 	if (error)
15061da177e4SLinus Torvalds 		return error;
15071da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
15085e1be0fbSChristoph Hellwig 
15091da177e4SLinus Torvalds 	/*
15101da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
15111da177e4SLinus Torvalds 	 * list this inode will go on.
15121da177e4SLinus Torvalds 	 */
15131da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
15141da177e4SLinus Torvalds 	ASSERT(agino != 0);
15151da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
15161da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
151716259e7dSChristoph Hellwig 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
15181da177e4SLinus Torvalds 
151969ef921bSChristoph Hellwig 	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
15201da177e4SLinus Torvalds 		/*
15211da177e4SLinus Torvalds 		 * There is already another inode in the bucket we need
15221da177e4SLinus Torvalds 		 * to add ourselves to.  Add us at the front of the list.
15231da177e4SLinus Torvalds 		 * Here we put the head pointer into our next pointer,
15241da177e4SLinus Torvalds 		 * and then we fall through to point the head at us.
15251da177e4SLinus Torvalds 		 */
1526475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1527475ee413SChristoph Hellwig 				       0, 0);
1528c319b58bSVlad Apostolov 		if (error)
1529c319b58bSVlad Apostolov 			return error;
1530c319b58bSVlad Apostolov 
153169ef921bSChristoph Hellwig 		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
15321da177e4SLinus Torvalds 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
153392bfc6e7SChristoph Hellwig 		offset = ip->i_imap.im_boffset +
15341da177e4SLinus Torvalds 			offsetof(xfs_dinode_t, di_next_unlinked);
15351da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, ibp);
15361da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, ibp, offset,
15371da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
15381da177e4SLinus Torvalds 		xfs_inobp_check(mp, ibp);
15391da177e4SLinus Torvalds 	}
15401da177e4SLinus Torvalds 
15411da177e4SLinus Torvalds 	/*
15421da177e4SLinus Torvalds 	 * Point the bucket head pointer at the inode being inserted.
15431da177e4SLinus Torvalds 	 */
15441da177e4SLinus Torvalds 	ASSERT(agino != 0);
154516259e7dSChristoph Hellwig 	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
15461da177e4SLinus Torvalds 	offset = offsetof(xfs_agi_t, agi_unlinked) +
15471da177e4SLinus Torvalds 		(sizeof(xfs_agino_t) * bucket_index);
15481da177e4SLinus Torvalds 	xfs_trans_log_buf(tp, agibp, offset,
15491da177e4SLinus Torvalds 			  (offset + sizeof(xfs_agino_t) - 1));
15501da177e4SLinus Torvalds 	return 0;
15511da177e4SLinus Torvalds }
15521da177e4SLinus Torvalds 
15531da177e4SLinus Torvalds /*
15541da177e4SLinus Torvalds  * Pull the on-disk inode from the AGI unlinked list.
15551da177e4SLinus Torvalds  */
15561da177e4SLinus Torvalds STATIC int
15571da177e4SLinus Torvalds xfs_iunlink_remove(
15581da177e4SLinus Torvalds 	xfs_trans_t	*tp,
15591da177e4SLinus Torvalds 	xfs_inode_t	*ip)
15601da177e4SLinus Torvalds {
15611da177e4SLinus Torvalds 	xfs_ino_t	next_ino;
15621da177e4SLinus Torvalds 	xfs_mount_t	*mp;
15631da177e4SLinus Torvalds 	xfs_agi_t	*agi;
15641da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
15651da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
15661da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
15671da177e4SLinus Torvalds 	xfs_agnumber_t	agno;
15681da177e4SLinus Torvalds 	xfs_agino_t	agino;
15691da177e4SLinus Torvalds 	xfs_agino_t	next_agino;
15701da177e4SLinus Torvalds 	xfs_buf_t	*last_ibp;
15716fdf8cccSNathan Scott 	xfs_dinode_t	*last_dip = NULL;
15721da177e4SLinus Torvalds 	short		bucket_index;
15736fdf8cccSNathan Scott 	int		offset, last_offset = 0;
15741da177e4SLinus Torvalds 	int		error;
15751da177e4SLinus Torvalds 
15761da177e4SLinus Torvalds 	mp = tp->t_mountp;
15771da177e4SLinus Torvalds 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
15781da177e4SLinus Torvalds 
15791da177e4SLinus Torvalds 	/*
15801da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
15811da177e4SLinus Torvalds 	 * on the list.
15821da177e4SLinus Torvalds 	 */
15835e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, agno, &agibp);
15845e1be0fbSChristoph Hellwig 	if (error)
15851da177e4SLinus Torvalds 		return error;
15865e1be0fbSChristoph Hellwig 
15871da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
15885e1be0fbSChristoph Hellwig 
15891da177e4SLinus Torvalds 	/*
15901da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
15911da177e4SLinus Torvalds 	 * list this inode will go on.
15921da177e4SLinus Torvalds 	 */
15931da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
15941da177e4SLinus Torvalds 	ASSERT(agino != 0);
15951da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
159669ef921bSChristoph Hellwig 	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
15971da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
15981da177e4SLinus Torvalds 
159916259e7dSChristoph Hellwig 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
16001da177e4SLinus Torvalds 		/*
1601475ee413SChristoph Hellwig 		 * We're at the head of the list.  Get the inode's on-disk
1602475ee413SChristoph Hellwig 		 * buffer to see if there is anyone after us on the list.
1603475ee413SChristoph Hellwig 		 * Only modify our next pointer if it is not already NULLAGINO.
1604475ee413SChristoph Hellwig 		 * This saves us the overhead of dealing with the buffer when
1605475ee413SChristoph Hellwig 		 * there is no need to change it.
16061da177e4SLinus Torvalds 		 */
1607475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1608475ee413SChristoph Hellwig 				       0, 0);
16091da177e4SLinus Torvalds 		if (error) {
1610475ee413SChristoph Hellwig 			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
16110b932cccSDave Chinner 				__func__, error);
16121da177e4SLinus Torvalds 			return error;
16131da177e4SLinus Torvalds 		}
1614347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
16151da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
16161da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1617347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
161892bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
16191da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
16201da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
16211da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
16221da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
16231da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
16241da177e4SLinus Torvalds 		} else {
16251da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
16261da177e4SLinus Torvalds 		}
16271da177e4SLinus Torvalds 		/*
16281da177e4SLinus Torvalds 		 * Point the bucket head pointer at the next inode.
16291da177e4SLinus Torvalds 		 */
16301da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
16311da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
163216259e7dSChristoph Hellwig 		agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
16331da177e4SLinus Torvalds 		offset = offsetof(xfs_agi_t, agi_unlinked) +
16341da177e4SLinus Torvalds 			(sizeof(xfs_agino_t) * bucket_index);
16351da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, agibp, offset,
16361da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
16371da177e4SLinus Torvalds 	} else {
16381da177e4SLinus Torvalds 		/*
16391da177e4SLinus Torvalds 		 * We need to search the list for the inode being freed.
16401da177e4SLinus Torvalds 		 */
164116259e7dSChristoph Hellwig 		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
16421da177e4SLinus Torvalds 		last_ibp = NULL;
16431da177e4SLinus Torvalds 		while (next_agino != agino) {
1644129dbc9aSChristoph Hellwig 			struct xfs_imap	imap;
1645129dbc9aSChristoph Hellwig 
1646129dbc9aSChristoph Hellwig 			if (last_ibp)
16471da177e4SLinus Torvalds 				xfs_trans_brelse(tp, last_ibp);
1648129dbc9aSChristoph Hellwig 
1649129dbc9aSChristoph Hellwig 			imap.im_blkno = 0;
16501da177e4SLinus Torvalds 			next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1651129dbc9aSChristoph Hellwig 
1652129dbc9aSChristoph Hellwig 			error = xfs_imap(mp, tp, next_ino, &imap, 0);
16531da177e4SLinus Torvalds 			if (error) {
16540b932cccSDave Chinner 				xfs_warn(mp,
1655129dbc9aSChristoph Hellwig 	"%s: xfs_imap returned error %d.",
16560b932cccSDave Chinner 					 __func__, error);
16571da177e4SLinus Torvalds 				return error;
16581da177e4SLinus Torvalds 			}
1659129dbc9aSChristoph Hellwig 
1660129dbc9aSChristoph Hellwig 			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
1661129dbc9aSChristoph Hellwig 					       &last_ibp, 0, 0);
1662129dbc9aSChristoph Hellwig 			if (error) {
1663129dbc9aSChristoph Hellwig 				xfs_warn(mp,
1664129dbc9aSChristoph Hellwig 	"%s: xfs_imap_to_bp returned error %d.",
1665129dbc9aSChristoph Hellwig 					__func__, error);
1666129dbc9aSChristoph Hellwig 				return error;
1667129dbc9aSChristoph Hellwig 			}
1668129dbc9aSChristoph Hellwig 
1669129dbc9aSChristoph Hellwig 			last_offset = imap.im_boffset;
1670347d1c01SChristoph Hellwig 			next_agino = be32_to_cpu(last_dip->di_next_unlinked);
16711da177e4SLinus Torvalds 			ASSERT(next_agino != NULLAGINO);
16721da177e4SLinus Torvalds 			ASSERT(next_agino != 0);
16731da177e4SLinus Torvalds 		}
1674475ee413SChristoph Hellwig 
16751da177e4SLinus Torvalds 		/*
1676475ee413SChristoph Hellwig 		 * Now last_ibp points to the buffer previous to us on the
1677475ee413SChristoph Hellwig 		 * unlinked list.  Pull us from the list.
16781da177e4SLinus Torvalds 		 */
1679475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1680475ee413SChristoph Hellwig 				       0, 0);
16811da177e4SLinus Torvalds 		if (error) {
1682475ee413SChristoph Hellwig 			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
16830b932cccSDave Chinner 				__func__, error);
16841da177e4SLinus Torvalds 			return error;
16851da177e4SLinus Torvalds 		}
1686347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
16871da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
16881da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
16891da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1690347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
169192bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
16921da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
16931da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
16941da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
16951da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
16961da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
16971da177e4SLinus Torvalds 		} else {
16981da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
16991da177e4SLinus Torvalds 		}
17001da177e4SLinus Torvalds 		/*
17011da177e4SLinus Torvalds 		 * Point the previous inode on the list to the next inode.
17021da177e4SLinus Torvalds 		 */
1703347d1c01SChristoph Hellwig 		last_dip->di_next_unlinked = cpu_to_be32(next_agino);
17041da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
17051da177e4SLinus Torvalds 		offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
17061da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, last_ibp);
17071da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, last_ibp, offset,
17081da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
17091da177e4SLinus Torvalds 		xfs_inobp_check(mp, last_ibp);
17101da177e4SLinus Torvalds 	}
17111da177e4SLinus Torvalds 	return 0;
17121da177e4SLinus Torvalds }
17131da177e4SLinus Torvalds 
17145b3eed75SDave Chinner /*
17155b3eed75SDave Chinner  * A big issue when freeing the inode cluster is is that we _cannot_ skip any
17165b3eed75SDave Chinner  * inodes that are in memory - they all must be marked stale and attached to
17175b3eed75SDave Chinner  * the cluster buffer.
17185b3eed75SDave Chinner  */
17192a30f36dSChandra Seetharaman STATIC int
17201da177e4SLinus Torvalds xfs_ifree_cluster(
17211da177e4SLinus Torvalds 	xfs_inode_t	*free_ip,
17221da177e4SLinus Torvalds 	xfs_trans_t	*tp,
17231da177e4SLinus Torvalds 	xfs_ino_t	inum)
17241da177e4SLinus Torvalds {
17251da177e4SLinus Torvalds 	xfs_mount_t		*mp = free_ip->i_mount;
17261da177e4SLinus Torvalds 	int			blks_per_cluster;
17271da177e4SLinus Torvalds 	int			nbufs;
17281da177e4SLinus Torvalds 	int			ninodes;
17295b257b4aSDave Chinner 	int			i, j;
17301da177e4SLinus Torvalds 	xfs_daddr_t		blkno;
17311da177e4SLinus Torvalds 	xfs_buf_t		*bp;
17325b257b4aSDave Chinner 	xfs_inode_t		*ip;
17331da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
17341da177e4SLinus Torvalds 	xfs_log_item_t		*lip;
17355017e97dSDave Chinner 	struct xfs_perag	*pag;
17361da177e4SLinus Torvalds 
17375017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
17381da177e4SLinus Torvalds 	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
17391da177e4SLinus Torvalds 		blks_per_cluster = 1;
17401da177e4SLinus Torvalds 		ninodes = mp->m_sb.sb_inopblock;
17411da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp);
17421da177e4SLinus Torvalds 	} else {
17431da177e4SLinus Torvalds 		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
17441da177e4SLinus Torvalds 					mp->m_sb.sb_blocksize;
17451da177e4SLinus Torvalds 		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
17461da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
17471da177e4SLinus Torvalds 	}
17481da177e4SLinus Torvalds 
17491da177e4SLinus Torvalds 	for (j = 0; j < nbufs; j++, inum += ninodes) {
17501da177e4SLinus Torvalds 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
17511da177e4SLinus Torvalds 					 XFS_INO_TO_AGBNO(mp, inum));
17521da177e4SLinus Torvalds 
17531da177e4SLinus Torvalds 		/*
17545b257b4aSDave Chinner 		 * We obtain and lock the backing buffer first in the process
17555b257b4aSDave Chinner 		 * here, as we have to ensure that any dirty inode that we
17565b257b4aSDave Chinner 		 * can't get the flush lock on is attached to the buffer.
17575b257b4aSDave Chinner 		 * If we scan the in-memory inodes first, then buffer IO can
17585b257b4aSDave Chinner 		 * complete before we get a lock on it, and hence we may fail
17595b257b4aSDave Chinner 		 * to mark all the active inodes on the buffer stale.
17601da177e4SLinus Torvalds 		 */
17611da177e4SLinus Torvalds 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1762a8acad70SDave Chinner 					mp->m_bsize * blks_per_cluster, 0);
17631da177e4SLinus Torvalds 
17642a30f36dSChandra Seetharaman 		if (!bp)
17652a30f36dSChandra Seetharaman 			return ENOMEM;
17665b257b4aSDave Chinner 		/*
17675b257b4aSDave Chinner 		 * Walk the inodes already attached to the buffer and mark them
17685b257b4aSDave Chinner 		 * stale. These will all have the flush locks held, so an
17695b3eed75SDave Chinner 		 * in-memory inode walk can't lock them. By marking them all
17705b3eed75SDave Chinner 		 * stale first, we will not attempt to lock them in the loop
17715b3eed75SDave Chinner 		 * below as the XFS_ISTALE flag will be set.
17725b257b4aSDave Chinner 		 */
1773adadbeefSChristoph Hellwig 		lip = bp->b_fspriv;
17741da177e4SLinus Torvalds 		while (lip) {
17751da177e4SLinus Torvalds 			if (lip->li_type == XFS_LI_INODE) {
17761da177e4SLinus Torvalds 				iip = (xfs_inode_log_item_t *)lip;
17771da177e4SLinus Torvalds 				ASSERT(iip->ili_logged == 1);
1778ca30b2a7SChristoph Hellwig 				lip->li_cb = xfs_istale_done;
17797b2e2a31SDavid Chinner 				xfs_trans_ail_copy_lsn(mp->m_ail,
17807b2e2a31SDavid Chinner 							&iip->ili_flush_lsn,
17817b2e2a31SDavid Chinner 							&iip->ili_item.li_lsn);
1782e5ffd2bbSDavid Chinner 				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
17831da177e4SLinus Torvalds 			}
17841da177e4SLinus Torvalds 			lip = lip->li_bio_list;
17851da177e4SLinus Torvalds 		}
17861da177e4SLinus Torvalds 
17875b3eed75SDave Chinner 
17885b257b4aSDave Chinner 		/*
17895b257b4aSDave Chinner 		 * For each inode in memory attempt to add it to the inode
17905b257b4aSDave Chinner 		 * buffer and set it up for being staled on buffer IO
17915b257b4aSDave Chinner 		 * completion.  This is safe as we've locked out tail pushing
17925b257b4aSDave Chinner 		 * and flushing by locking the buffer.
17935b257b4aSDave Chinner 		 *
17945b257b4aSDave Chinner 		 * We have already marked every inode that was part of a
17955b257b4aSDave Chinner 		 * transaction stale above, which means there is no point in
17965b257b4aSDave Chinner 		 * even trying to lock them.
17975b257b4aSDave Chinner 		 */
17985b257b4aSDave Chinner 		for (i = 0; i < ninodes; i++) {
17995b3eed75SDave Chinner retry:
18001a3e8f3dSDave Chinner 			rcu_read_lock();
18015b257b4aSDave Chinner 			ip = radix_tree_lookup(&pag->pag_ici_root,
18025b257b4aSDave Chinner 					XFS_INO_TO_AGINO(mp, (inum + i)));
18031da177e4SLinus Torvalds 
18041a3e8f3dSDave Chinner 			/* Inode not in memory, nothing to do */
18051a3e8f3dSDave Chinner 			if (!ip) {
18061a3e8f3dSDave Chinner 				rcu_read_unlock();
18075b257b4aSDave Chinner 				continue;
18085b257b4aSDave Chinner 			}
18095b257b4aSDave Chinner 
18105b3eed75SDave Chinner 			/*
18111a3e8f3dSDave Chinner 			 * because this is an RCU protected lookup, we could
18121a3e8f3dSDave Chinner 			 * find a recently freed or even reallocated inode
18131a3e8f3dSDave Chinner 			 * during the lookup. We need to check under the
18141a3e8f3dSDave Chinner 			 * i_flags_lock for a valid inode here. Skip it if it
18151a3e8f3dSDave Chinner 			 * is not valid, the wrong inode or stale.
18161a3e8f3dSDave Chinner 			 */
18171a3e8f3dSDave Chinner 			spin_lock(&ip->i_flags_lock);
18181a3e8f3dSDave Chinner 			if (ip->i_ino != inum + i ||
18191a3e8f3dSDave Chinner 			    __xfs_iflags_test(ip, XFS_ISTALE)) {
18201a3e8f3dSDave Chinner 				spin_unlock(&ip->i_flags_lock);
18211a3e8f3dSDave Chinner 				rcu_read_unlock();
18221a3e8f3dSDave Chinner 				continue;
18231a3e8f3dSDave Chinner 			}
18241a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
18251a3e8f3dSDave Chinner 
18261a3e8f3dSDave Chinner 			/*
18275b3eed75SDave Chinner 			 * Don't try to lock/unlock the current inode, but we
18285b3eed75SDave Chinner 			 * _cannot_ skip the other inodes that we did not find
18295b3eed75SDave Chinner 			 * in the list attached to the buffer and are not
18305b3eed75SDave Chinner 			 * already marked stale. If we can't lock it, back off
18315b3eed75SDave Chinner 			 * and retry.
18325b3eed75SDave Chinner 			 */
18335b257b4aSDave Chinner 			if (ip != free_ip &&
18345b257b4aSDave Chinner 			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
18351a3e8f3dSDave Chinner 				rcu_read_unlock();
18365b3eed75SDave Chinner 				delay(1);
18375b3eed75SDave Chinner 				goto retry;
18385b257b4aSDave Chinner 			}
18391a3e8f3dSDave Chinner 			rcu_read_unlock();
18405b257b4aSDave Chinner 
18415b3eed75SDave Chinner 			xfs_iflock(ip);
18425b257b4aSDave Chinner 			xfs_iflags_set(ip, XFS_ISTALE);
18435b257b4aSDave Chinner 
18445b3eed75SDave Chinner 			/*
18455b3eed75SDave Chinner 			 * we don't need to attach clean inodes or those only
18465b3eed75SDave Chinner 			 * with unlogged changes (which we throw away, anyway).
18475b3eed75SDave Chinner 			 */
18485b257b4aSDave Chinner 			iip = ip->i_itemp;
18495b3eed75SDave Chinner 			if (!iip || xfs_inode_clean(ip)) {
18505b257b4aSDave Chinner 				ASSERT(ip != free_ip);
18511da177e4SLinus Torvalds 				xfs_ifunlock(ip);
18521da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
18531da177e4SLinus Torvalds 				continue;
18541da177e4SLinus Torvalds 			}
18551da177e4SLinus Torvalds 
1856f5d8d5c4SChristoph Hellwig 			iip->ili_last_fields = iip->ili_fields;
1857f5d8d5c4SChristoph Hellwig 			iip->ili_fields = 0;
18581da177e4SLinus Torvalds 			iip->ili_logged = 1;
18597b2e2a31SDavid Chinner 			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
18607b2e2a31SDavid Chinner 						&iip->ili_item.li_lsn);
18611da177e4SLinus Torvalds 
1862ca30b2a7SChristoph Hellwig 			xfs_buf_attach_iodone(bp, xfs_istale_done,
1863ca30b2a7SChristoph Hellwig 						  &iip->ili_item);
18645b257b4aSDave Chinner 
18655b257b4aSDave Chinner 			if (ip != free_ip)
18661da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
18671da177e4SLinus Torvalds 		}
18681da177e4SLinus Torvalds 
18691da177e4SLinus Torvalds 		xfs_trans_stale_inode_buf(tp, bp);
18701da177e4SLinus Torvalds 		xfs_trans_binval(tp, bp);
18711da177e4SLinus Torvalds 	}
18721da177e4SLinus Torvalds 
18735017e97dSDave Chinner 	xfs_perag_put(pag);
18742a30f36dSChandra Seetharaman 	return 0;
18751da177e4SLinus Torvalds }
18761da177e4SLinus Torvalds 
18771da177e4SLinus Torvalds /*
18781da177e4SLinus Torvalds  * This is called to return an inode to the inode free list.
18791da177e4SLinus Torvalds  * The inode should already be truncated to 0 length and have
18801da177e4SLinus Torvalds  * no pages associated with it.  This routine also assumes that
18811da177e4SLinus Torvalds  * the inode is already a part of the transaction.
18821da177e4SLinus Torvalds  *
18831da177e4SLinus Torvalds  * The on-disk copy of the inode will have been added to the list
18841da177e4SLinus Torvalds  * of unlinked inodes in the AGI. We need to remove the inode from
18851da177e4SLinus Torvalds  * that list atomically with respect to freeing it here.
18861da177e4SLinus Torvalds  */
18871da177e4SLinus Torvalds int
18881da177e4SLinus Torvalds xfs_ifree(
18891da177e4SLinus Torvalds 	xfs_trans_t	*tp,
18901da177e4SLinus Torvalds 	xfs_inode_t	*ip,
18911da177e4SLinus Torvalds 	xfs_bmap_free_t	*flist)
18921da177e4SLinus Torvalds {
18931da177e4SLinus Torvalds 	int			error;
18941da177e4SLinus Torvalds 	int			delete;
18951da177e4SLinus Torvalds 	xfs_ino_t		first_ino;
1896c319b58bSVlad Apostolov 	xfs_dinode_t    	*dip;
1897c319b58bSVlad Apostolov 	xfs_buf_t       	*ibp;
18981da177e4SLinus Torvalds 
1899579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
19001da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
19011da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nextents == 0);
19021da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_anextents == 0);
1903ce7ae151SChristoph Hellwig 	ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
19041da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
19051da177e4SLinus Torvalds 
19061da177e4SLinus Torvalds 	/*
19071da177e4SLinus Torvalds 	 * Pull the on-disk inode from the AGI unlinked list.
19081da177e4SLinus Torvalds 	 */
19091da177e4SLinus Torvalds 	error = xfs_iunlink_remove(tp, ip);
19101da177e4SLinus Torvalds 	if (error != 0) {
19111da177e4SLinus Torvalds 		return error;
19121da177e4SLinus Torvalds 	}
19131da177e4SLinus Torvalds 
19141da177e4SLinus Torvalds 	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
19151da177e4SLinus Torvalds 	if (error != 0) {
19161da177e4SLinus Torvalds 		return error;
19171da177e4SLinus Torvalds 	}
19181da177e4SLinus Torvalds 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
19191da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
19201da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
19211da177e4SLinus Torvalds 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
19221da177e4SLinus Torvalds 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
19231da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
19241da177e4SLinus Torvalds 	/*
19251da177e4SLinus Torvalds 	 * Bump the generation count so no one will be confused
19261da177e4SLinus Torvalds 	 * by reincarnations of this inode.
19271da177e4SLinus Torvalds 	 */
19281da177e4SLinus Torvalds 	ip->i_d.di_gen++;
1929c319b58bSVlad Apostolov 
19301da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
19311da177e4SLinus Torvalds 
1932475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
1933475ee413SChristoph Hellwig 			       0, 0);
1934c319b58bSVlad Apostolov 	if (error)
1935c319b58bSVlad Apostolov 		return error;
1936c319b58bSVlad Apostolov 
1937c319b58bSVlad Apostolov         /*
1938c319b58bSVlad Apostolov 	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
1939c319b58bSVlad Apostolov 	* from picking up this inode when it is reclaimed (its incore state
1940c319b58bSVlad Apostolov 	* initialzed but not flushed to disk yet). The in-core di_mode is
1941c319b58bSVlad Apostolov 	* already cleared  and a corresponding transaction logged.
1942c319b58bSVlad Apostolov 	* The hack here just synchronizes the in-core to on-disk
1943c319b58bSVlad Apostolov 	* di_mode value in advance before the actual inode sync to disk.
1944c319b58bSVlad Apostolov 	* This is OK because the inode is already unlinked and would never
1945c319b58bSVlad Apostolov 	* change its di_mode again for this inode generation.
1946c319b58bSVlad Apostolov 	* This is a temporary hack that would require a proper fix
1947c319b58bSVlad Apostolov 	* in the future.
1948c319b58bSVlad Apostolov 	*/
194981591fe2SChristoph Hellwig 	dip->di_mode = 0;
1950c319b58bSVlad Apostolov 
19511da177e4SLinus Torvalds 	if (delete) {
19522a30f36dSChandra Seetharaman 		error = xfs_ifree_cluster(ip, tp, first_ino);
19531da177e4SLinus Torvalds 	}
19541da177e4SLinus Torvalds 
19552a30f36dSChandra Seetharaman 	return error;
19561da177e4SLinus Torvalds }
19571da177e4SLinus Torvalds 
19581da177e4SLinus Torvalds /*
19591da177e4SLinus Torvalds  * Reallocate the space for if_broot based on the number of records
19601da177e4SLinus Torvalds  * being added or deleted as indicated in rec_diff.  Move the records
19611da177e4SLinus Torvalds  * and pointers in if_broot to fit the new size.  When shrinking this
19621da177e4SLinus Torvalds  * will eliminate holes between the records and pointers created by
19631da177e4SLinus Torvalds  * the caller.  When growing this will create holes to be filled in
19641da177e4SLinus Torvalds  * by the caller.
19651da177e4SLinus Torvalds  *
19661da177e4SLinus Torvalds  * The caller must not request to add more records than would fit in
19671da177e4SLinus Torvalds  * the on-disk inode root.  If the if_broot is currently NULL, then
19681da177e4SLinus Torvalds  * if we adding records one will be allocated.  The caller must also
19691da177e4SLinus Torvalds  * not request that the number of records go below zero, although
19701da177e4SLinus Torvalds  * it can go to zero.
19711da177e4SLinus Torvalds  *
19721da177e4SLinus Torvalds  * ip -- the inode whose if_broot area is changing
19731da177e4SLinus Torvalds  * ext_diff -- the change in the number of records, positive or negative,
19741da177e4SLinus Torvalds  *	 requested for the if_broot array.
19751da177e4SLinus Torvalds  */
19761da177e4SLinus Torvalds void
19771da177e4SLinus Torvalds xfs_iroot_realloc(
19781da177e4SLinus Torvalds 	xfs_inode_t		*ip,
19791da177e4SLinus Torvalds 	int			rec_diff,
19801da177e4SLinus Torvalds 	int			whichfork)
19811da177e4SLinus Torvalds {
198260197e8dSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
19831da177e4SLinus Torvalds 	int			cur_max;
19841da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
19857cc95a82SChristoph Hellwig 	struct xfs_btree_block	*new_broot;
19861da177e4SLinus Torvalds 	int			new_max;
19871da177e4SLinus Torvalds 	size_t			new_size;
19881da177e4SLinus Torvalds 	char			*np;
19891da177e4SLinus Torvalds 	char			*op;
19901da177e4SLinus Torvalds 
19911da177e4SLinus Torvalds 	/*
19921da177e4SLinus Torvalds 	 * Handle the degenerate case quietly.
19931da177e4SLinus Torvalds 	 */
19941da177e4SLinus Torvalds 	if (rec_diff == 0) {
19951da177e4SLinus Torvalds 		return;
19961da177e4SLinus Torvalds 	}
19971da177e4SLinus Torvalds 
19981da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
19991da177e4SLinus Torvalds 	if (rec_diff > 0) {
20001da177e4SLinus Torvalds 		/*
20011da177e4SLinus Torvalds 		 * If there wasn't any memory allocated before, just
20021da177e4SLinus Torvalds 		 * allocate it now and get out.
20031da177e4SLinus Torvalds 		 */
20041da177e4SLinus Torvalds 		if (ifp->if_broot_bytes == 0) {
20051da177e4SLinus Torvalds 			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
20064a7edddcSDave Chinner 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
20071da177e4SLinus Torvalds 			ifp->if_broot_bytes = (int)new_size;
20081da177e4SLinus Torvalds 			return;
20091da177e4SLinus Torvalds 		}
20101da177e4SLinus Torvalds 
20111da177e4SLinus Torvalds 		/*
20121da177e4SLinus Torvalds 		 * If there is already an existing if_broot, then we need
20131da177e4SLinus Torvalds 		 * to realloc() it and shift the pointers to their new
20141da177e4SLinus Torvalds 		 * location.  The records don't change location because
20151da177e4SLinus Torvalds 		 * they are kept butted up against the btree block header.
20161da177e4SLinus Torvalds 		 */
201760197e8dSChristoph Hellwig 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
20181da177e4SLinus Torvalds 		new_max = cur_max + rec_diff;
20191da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
20207cc95a82SChristoph Hellwig 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
20211da177e4SLinus Torvalds 				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
20224a7edddcSDave Chinner 				KM_SLEEP | KM_NOFS);
202360197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
20241da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
202560197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
20261da177e4SLinus Torvalds 						     (int)new_size);
20271da177e4SLinus Torvalds 		ifp->if_broot_bytes = (int)new_size;
20281da177e4SLinus Torvalds 		ASSERT(ifp->if_broot_bytes <=
20291da177e4SLinus Torvalds 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
20301da177e4SLinus Torvalds 		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
20311da177e4SLinus Torvalds 		return;
20321da177e4SLinus Torvalds 	}
20331da177e4SLinus Torvalds 
20341da177e4SLinus Torvalds 	/*
20351da177e4SLinus Torvalds 	 * rec_diff is less than 0.  In this case, we are shrinking the
20361da177e4SLinus Torvalds 	 * if_broot buffer.  It must already exist.  If we go to zero
20371da177e4SLinus Torvalds 	 * records, just get rid of the root and clear the status bit.
20381da177e4SLinus Torvalds 	 */
20391da177e4SLinus Torvalds 	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
204060197e8dSChristoph Hellwig 	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
20411da177e4SLinus Torvalds 	new_max = cur_max + rec_diff;
20421da177e4SLinus Torvalds 	ASSERT(new_max >= 0);
20431da177e4SLinus Torvalds 	if (new_max > 0)
20441da177e4SLinus Torvalds 		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
20451da177e4SLinus Torvalds 	else
20461da177e4SLinus Torvalds 		new_size = 0;
20471da177e4SLinus Torvalds 	if (new_size > 0) {
20484a7edddcSDave Chinner 		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
20491da177e4SLinus Torvalds 		/*
20501da177e4SLinus Torvalds 		 * First copy over the btree block header.
20511da177e4SLinus Torvalds 		 */
20527cc95a82SChristoph Hellwig 		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
20531da177e4SLinus Torvalds 	} else {
20541da177e4SLinus Torvalds 		new_broot = NULL;
20551da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFBROOT;
20561da177e4SLinus Torvalds 	}
20571da177e4SLinus Torvalds 
20581da177e4SLinus Torvalds 	/*
20591da177e4SLinus Torvalds 	 * Only copy the records and pointers if there are any.
20601da177e4SLinus Torvalds 	 */
20611da177e4SLinus Torvalds 	if (new_max > 0) {
20621da177e4SLinus Torvalds 		/*
20631da177e4SLinus Torvalds 		 * First copy the records.
20641da177e4SLinus Torvalds 		 */
2065136341b4SChristoph Hellwig 		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2066136341b4SChristoph Hellwig 		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
20671da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
20681da177e4SLinus Torvalds 
20691da177e4SLinus Torvalds 		/*
20701da177e4SLinus Torvalds 		 * Then copy the pointers.
20711da177e4SLinus Torvalds 		 */
207260197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
20731da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
207460197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
20751da177e4SLinus Torvalds 						     (int)new_size);
20761da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
20771da177e4SLinus Torvalds 	}
2078f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_broot);
20791da177e4SLinus Torvalds 	ifp->if_broot = new_broot;
20801da177e4SLinus Torvalds 	ifp->if_broot_bytes = (int)new_size;
20811da177e4SLinus Torvalds 	ASSERT(ifp->if_broot_bytes <=
20821da177e4SLinus Torvalds 		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
20831da177e4SLinus Torvalds 	return;
20841da177e4SLinus Torvalds }
20851da177e4SLinus Torvalds 
20861da177e4SLinus Torvalds 
20871da177e4SLinus Torvalds /*
20881da177e4SLinus Torvalds  * This is called when the amount of space needed for if_data
20891da177e4SLinus Torvalds  * is increased or decreased.  The change in size is indicated by
20901da177e4SLinus Torvalds  * the number of bytes that need to be added or deleted in the
20911da177e4SLinus Torvalds  * byte_diff parameter.
20921da177e4SLinus Torvalds  *
20931da177e4SLinus Torvalds  * If the amount of space needed has decreased below the size of the
20941da177e4SLinus Torvalds  * inline buffer, then switch to using the inline buffer.  Otherwise,
20951da177e4SLinus Torvalds  * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
20961da177e4SLinus Torvalds  * to what is needed.
20971da177e4SLinus Torvalds  *
20981da177e4SLinus Torvalds  * ip -- the inode whose if_data area is changing
20991da177e4SLinus Torvalds  * byte_diff -- the change in the number of bytes, positive or negative,
21001da177e4SLinus Torvalds  *	 requested for the if_data array.
21011da177e4SLinus Torvalds  */
21021da177e4SLinus Torvalds void
21031da177e4SLinus Torvalds xfs_idata_realloc(
21041da177e4SLinus Torvalds 	xfs_inode_t	*ip,
21051da177e4SLinus Torvalds 	int		byte_diff,
21061da177e4SLinus Torvalds 	int		whichfork)
21071da177e4SLinus Torvalds {
21081da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
21091da177e4SLinus Torvalds 	int		new_size;
21101da177e4SLinus Torvalds 	int		real_size;
21111da177e4SLinus Torvalds 
21121da177e4SLinus Torvalds 	if (byte_diff == 0) {
21131da177e4SLinus Torvalds 		return;
21141da177e4SLinus Torvalds 	}
21151da177e4SLinus Torvalds 
21161da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
21171da177e4SLinus Torvalds 	new_size = (int)ifp->if_bytes + byte_diff;
21181da177e4SLinus Torvalds 	ASSERT(new_size >= 0);
21191da177e4SLinus Torvalds 
21201da177e4SLinus Torvalds 	if (new_size == 0) {
21211da177e4SLinus Torvalds 		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2122f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
21231da177e4SLinus Torvalds 		}
21241da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
21251da177e4SLinus Torvalds 		real_size = 0;
21261da177e4SLinus Torvalds 	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
21271da177e4SLinus Torvalds 		/*
21281da177e4SLinus Torvalds 		 * If the valid extents/data can fit in if_inline_ext/data,
21291da177e4SLinus Torvalds 		 * copy them from the malloc'd vector and free it.
21301da177e4SLinus Torvalds 		 */
21311da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
21321da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
21331da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
21341da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
21351da177e4SLinus Torvalds 			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
21361da177e4SLinus Torvalds 			      new_size);
2137f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
21381da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
21391da177e4SLinus Torvalds 		}
21401da177e4SLinus Torvalds 		real_size = 0;
21411da177e4SLinus Torvalds 	} else {
21421da177e4SLinus Torvalds 		/*
21431da177e4SLinus Torvalds 		 * Stuck with malloc/realloc.
21441da177e4SLinus Torvalds 		 * For inline data, the underlying buffer must be
21451da177e4SLinus Torvalds 		 * a multiple of 4 bytes in size so that it can be
21461da177e4SLinus Torvalds 		 * logged and stay on word boundaries.  We enforce
21471da177e4SLinus Torvalds 		 * that here.
21481da177e4SLinus Torvalds 		 */
21491da177e4SLinus Torvalds 		real_size = roundup(new_size, 4);
21501da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
21511da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
21524a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
21534a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
21541da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
21551da177e4SLinus Torvalds 			/*
21561da177e4SLinus Torvalds 			 * Only do the realloc if the underlying size
21571da177e4SLinus Torvalds 			 * is really changing.
21581da177e4SLinus Torvalds 			 */
21591da177e4SLinus Torvalds 			if (ifp->if_real_bytes != real_size) {
21601da177e4SLinus Torvalds 				ifp->if_u1.if_data =
21611da177e4SLinus Torvalds 					kmem_realloc(ifp->if_u1.if_data,
21621da177e4SLinus Torvalds 							real_size,
21631da177e4SLinus Torvalds 							ifp->if_real_bytes,
21644a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
21651da177e4SLinus Torvalds 			}
21661da177e4SLinus Torvalds 		} else {
21671da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
21684a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
21694a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
21701da177e4SLinus Torvalds 			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
21711da177e4SLinus Torvalds 				ifp->if_bytes);
21721da177e4SLinus Torvalds 		}
21731da177e4SLinus Torvalds 	}
21741da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
21751da177e4SLinus Torvalds 	ifp->if_bytes = new_size;
21761da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
21771da177e4SLinus Torvalds }
21781da177e4SLinus Torvalds 
21791da177e4SLinus Torvalds void
21801da177e4SLinus Torvalds xfs_idestroy_fork(
21811da177e4SLinus Torvalds 	xfs_inode_t	*ip,
21821da177e4SLinus Torvalds 	int		whichfork)
21831da177e4SLinus Torvalds {
21841da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
21851da177e4SLinus Torvalds 
21861da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
21871da177e4SLinus Torvalds 	if (ifp->if_broot != NULL) {
2188f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_broot);
21891da177e4SLinus Torvalds 		ifp->if_broot = NULL;
21901da177e4SLinus Torvalds 	}
21911da177e4SLinus Torvalds 
21921da177e4SLinus Torvalds 	/*
21931da177e4SLinus Torvalds 	 * If the format is local, then we can't have an extents
21941da177e4SLinus Torvalds 	 * array so just look for an inline data array.  If we're
21951da177e4SLinus Torvalds 	 * not local then we may or may not have an extents list,
21961da177e4SLinus Torvalds 	 * so check and free it up if we do.
21971da177e4SLinus Torvalds 	 */
21981da177e4SLinus Torvalds 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
21991da177e4SLinus Torvalds 		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
22001da177e4SLinus Torvalds 		    (ifp->if_u1.if_data != NULL)) {
22011da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
2202f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
22031da177e4SLinus Torvalds 			ifp->if_u1.if_data = NULL;
22041da177e4SLinus Torvalds 			ifp->if_real_bytes = 0;
22051da177e4SLinus Torvalds 		}
22061da177e4SLinus Torvalds 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
22070293ce3aSMandy Kirkconnell 		   ((ifp->if_flags & XFS_IFEXTIREC) ||
22080293ce3aSMandy Kirkconnell 		    ((ifp->if_u1.if_extents != NULL) &&
22090293ce3aSMandy Kirkconnell 		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
22101da177e4SLinus Torvalds 		ASSERT(ifp->if_real_bytes != 0);
22114eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
22121da177e4SLinus Torvalds 	}
22131da177e4SLinus Torvalds 	ASSERT(ifp->if_u1.if_extents == NULL ||
22141da177e4SLinus Torvalds 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
22151da177e4SLinus Torvalds 	ASSERT(ifp->if_real_bytes == 0);
22161da177e4SLinus Torvalds 	if (whichfork == XFS_ATTR_FORK) {
22171da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
22181da177e4SLinus Torvalds 		ip->i_afp = NULL;
22191da177e4SLinus Torvalds 	}
22201da177e4SLinus Torvalds }
22211da177e4SLinus Torvalds 
22221da177e4SLinus Torvalds /*
222360ec6783SChristoph Hellwig  * This is called to unpin an inode.  The caller must have the inode locked
222460ec6783SChristoph Hellwig  * in at least shared mode so that the buffer cannot be subsequently pinned
222560ec6783SChristoph Hellwig  * once someone is waiting for it to be unpinned.
22261da177e4SLinus Torvalds  */
222760ec6783SChristoph Hellwig static void
2228f392e631SChristoph Hellwig xfs_iunpin(
222960ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
2230a3f74ffbSDavid Chinner {
2231579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2232a3f74ffbSDavid Chinner 
22334aaf15d1SDave Chinner 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
22344aaf15d1SDave Chinner 
2235a3f74ffbSDavid Chinner 	/* Give the log a push to start the unpinning I/O */
223660ec6783SChristoph Hellwig 	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2237a14a348bSChristoph Hellwig 
2238a3f74ffbSDavid Chinner }
2239a3f74ffbSDavid Chinner 
2240f392e631SChristoph Hellwig static void
2241f392e631SChristoph Hellwig __xfs_iunpin_wait(
2242f392e631SChristoph Hellwig 	struct xfs_inode	*ip)
2243f392e631SChristoph Hellwig {
2244f392e631SChristoph Hellwig 	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
2245f392e631SChristoph Hellwig 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
2246f392e631SChristoph Hellwig 
2247f392e631SChristoph Hellwig 	xfs_iunpin(ip);
2248f392e631SChristoph Hellwig 
2249f392e631SChristoph Hellwig 	do {
2250f392e631SChristoph Hellwig 		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
2251f392e631SChristoph Hellwig 		if (xfs_ipincount(ip))
2252f392e631SChristoph Hellwig 			io_schedule();
2253f392e631SChristoph Hellwig 	} while (xfs_ipincount(ip));
2254f392e631SChristoph Hellwig 	finish_wait(wq, &wait.wait);
2255f392e631SChristoph Hellwig }
2256f392e631SChristoph Hellwig 
2257777df5afSDave Chinner void
22581da177e4SLinus Torvalds xfs_iunpin_wait(
225960ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
22601da177e4SLinus Torvalds {
2261f392e631SChristoph Hellwig 	if (xfs_ipincount(ip))
2262f392e631SChristoph Hellwig 		__xfs_iunpin_wait(ip);
22631da177e4SLinus Torvalds }
22641da177e4SLinus Torvalds 
22651da177e4SLinus Torvalds /*
22661da177e4SLinus Torvalds  * xfs_iextents_copy()
22671da177e4SLinus Torvalds  *
22681da177e4SLinus Torvalds  * This is called to copy the REAL extents (as opposed to the delayed
22691da177e4SLinus Torvalds  * allocation extents) from the inode into the given buffer.  It
22701da177e4SLinus Torvalds  * returns the number of bytes copied into the buffer.
22711da177e4SLinus Torvalds  *
22721da177e4SLinus Torvalds  * If there are no delayed allocation extents, then we can just
22731da177e4SLinus Torvalds  * memcpy() the extents into the buffer.  Otherwise, we need to
22741da177e4SLinus Torvalds  * examine each extent in turn and skip those which are delayed.
22751da177e4SLinus Torvalds  */
22761da177e4SLinus Torvalds int
22771da177e4SLinus Torvalds xfs_iextents_copy(
22781da177e4SLinus Torvalds 	xfs_inode_t		*ip,
2279a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t		*dp,
22801da177e4SLinus Torvalds 	int			whichfork)
22811da177e4SLinus Torvalds {
22821da177e4SLinus Torvalds 	int			copied;
22831da177e4SLinus Torvalds 	int			i;
22841da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
22851da177e4SLinus Torvalds 	int			nrecs;
22861da177e4SLinus Torvalds 	xfs_fsblock_t		start_block;
22871da177e4SLinus Torvalds 
22881da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
2289579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
22901da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes > 0);
22911da177e4SLinus Torvalds 
22921da177e4SLinus Torvalds 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
22933a59c94cSEric Sandeen 	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
22941da177e4SLinus Torvalds 	ASSERT(nrecs > 0);
22951da177e4SLinus Torvalds 
22961da177e4SLinus Torvalds 	/*
22971da177e4SLinus Torvalds 	 * There are some delayed allocation extents in the
22981da177e4SLinus Torvalds 	 * inode, so copy the extents one at a time and skip
22991da177e4SLinus Torvalds 	 * the delayed ones.  There must be at least one
23001da177e4SLinus Torvalds 	 * non-delayed extent.
23011da177e4SLinus Torvalds 	 */
23021da177e4SLinus Torvalds 	copied = 0;
23031da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
2304a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
23051da177e4SLinus Torvalds 		start_block = xfs_bmbt_get_startblock(ep);
23069d87c319SEric Sandeen 		if (isnullstartblock(start_block)) {
23071da177e4SLinus Torvalds 			/*
23081da177e4SLinus Torvalds 			 * It's a delayed allocation extent, so skip it.
23091da177e4SLinus Torvalds 			 */
23101da177e4SLinus Torvalds 			continue;
23111da177e4SLinus Torvalds 		}
23121da177e4SLinus Torvalds 
23131da177e4SLinus Torvalds 		/* Translate to on disk format */
2314cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
2315cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
2316a6f64d4aSChristoph Hellwig 		dp++;
23171da177e4SLinus Torvalds 		copied++;
23181da177e4SLinus Torvalds 	}
23191da177e4SLinus Torvalds 	ASSERT(copied != 0);
2320a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
23231da177e4SLinus Torvalds }
23241da177e4SLinus Torvalds 
23251da177e4SLinus Torvalds /*
23261da177e4SLinus Torvalds  * Each of the following cases stores data into the same region
23271da177e4SLinus Torvalds  * of the on-disk inode, so only one of them can be valid at
23281da177e4SLinus Torvalds  * any given time. While it is possible to have conflicting formats
23291da177e4SLinus Torvalds  * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
23301da177e4SLinus Torvalds  * in EXTENTS format, this can only happen when the fork has
23311da177e4SLinus Torvalds  * changed formats after being modified but before being flushed.
23321da177e4SLinus Torvalds  * In these cases, the format always takes precedence, because the
23331da177e4SLinus Torvalds  * format indicates the current state of the fork.
23341da177e4SLinus Torvalds  */
23351da177e4SLinus Torvalds /*ARGSUSED*/
2336e4ac967bSDavid Chinner STATIC void
23371da177e4SLinus Torvalds xfs_iflush_fork(
23381da177e4SLinus Torvalds 	xfs_inode_t		*ip,
23391da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
23401da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip,
23411da177e4SLinus Torvalds 	int			whichfork,
23421da177e4SLinus Torvalds 	xfs_buf_t		*bp)
23431da177e4SLinus Torvalds {
23441da177e4SLinus Torvalds 	char			*cp;
23451da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
23461da177e4SLinus Torvalds 	xfs_mount_t		*mp;
23471da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
23481da177e4SLinus Torvalds 	int			first;
23491da177e4SLinus Torvalds #endif
23501da177e4SLinus Torvalds 	static const short	brootflag[2] =
23511da177e4SLinus Torvalds 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
23521da177e4SLinus Torvalds 	static const short	dataflag[2] =
23531da177e4SLinus Torvalds 		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
23541da177e4SLinus Torvalds 	static const short	extflag[2] =
23551da177e4SLinus Torvalds 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
23561da177e4SLinus Torvalds 
2357e4ac967bSDavid Chinner 	if (!iip)
2358e4ac967bSDavid Chinner 		return;
23591da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
23601da177e4SLinus Torvalds 	/*
23611da177e4SLinus Torvalds 	 * This can happen if we gave up in iformat in an error path,
23621da177e4SLinus Torvalds 	 * for the attribute fork.
23631da177e4SLinus Torvalds 	 */
2364e4ac967bSDavid Chinner 	if (!ifp) {
23651da177e4SLinus Torvalds 		ASSERT(whichfork == XFS_ATTR_FORK);
2366e4ac967bSDavid Chinner 		return;
23671da177e4SLinus Torvalds 	}
23681da177e4SLinus Torvalds 	cp = XFS_DFORK_PTR(dip, whichfork);
23691da177e4SLinus Torvalds 	mp = ip->i_mount;
23701da177e4SLinus Torvalds 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
23711da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
2372f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & dataflag[whichfork]) &&
23731da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
23741da177e4SLinus Torvalds 			ASSERT(ifp->if_u1.if_data != NULL);
23751da177e4SLinus Torvalds 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
23761da177e4SLinus Torvalds 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
23771da177e4SLinus Torvalds 		}
23781da177e4SLinus Torvalds 		break;
23791da177e4SLinus Torvalds 
23801da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
23811da177e4SLinus Torvalds 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2382f5d8d5c4SChristoph Hellwig 		       !(iip->ili_fields & extflag[whichfork]));
2383f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & extflag[whichfork]) &&
23841da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
2385ab1908a5SChristoph Hellwig 			ASSERT(xfs_iext_get_ext(ifp, 0));
23861da177e4SLinus Torvalds 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
23871da177e4SLinus Torvalds 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
23881da177e4SLinus Torvalds 				whichfork);
23891da177e4SLinus Torvalds 		}
23901da177e4SLinus Torvalds 		break;
23911da177e4SLinus Torvalds 
23921da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
2393f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & brootflag[whichfork]) &&
23941da177e4SLinus Torvalds 		    (ifp->if_broot_bytes > 0)) {
23951da177e4SLinus Torvalds 			ASSERT(ifp->if_broot != NULL);
23961da177e4SLinus Torvalds 			ASSERT(ifp->if_broot_bytes <=
23971da177e4SLinus Torvalds 			       (XFS_IFORK_SIZE(ip, whichfork) +
23981da177e4SLinus Torvalds 				XFS_BROOT_SIZE_ADJ));
239960197e8dSChristoph Hellwig 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
24001da177e4SLinus Torvalds 				(xfs_bmdr_block_t *)cp,
24011da177e4SLinus Torvalds 				XFS_DFORK_SIZE(dip, mp, whichfork));
24021da177e4SLinus Torvalds 		}
24031da177e4SLinus Torvalds 		break;
24041da177e4SLinus Torvalds 
24051da177e4SLinus Torvalds 	case XFS_DINODE_FMT_DEV:
2406f5d8d5c4SChristoph Hellwig 		if (iip->ili_fields & XFS_ILOG_DEV) {
24071da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
240881591fe2SChristoph Hellwig 			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
24091da177e4SLinus Torvalds 		}
24101da177e4SLinus Torvalds 		break;
24111da177e4SLinus Torvalds 
24121da177e4SLinus Torvalds 	case XFS_DINODE_FMT_UUID:
2413f5d8d5c4SChristoph Hellwig 		if (iip->ili_fields & XFS_ILOG_UUID) {
24141da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
241581591fe2SChristoph Hellwig 			memcpy(XFS_DFORK_DPTR(dip),
241681591fe2SChristoph Hellwig 			       &ip->i_df.if_u2.if_uuid,
24171da177e4SLinus Torvalds 			       sizeof(uuid_t));
24181da177e4SLinus Torvalds 		}
24191da177e4SLinus Torvalds 		break;
24201da177e4SLinus Torvalds 
24211da177e4SLinus Torvalds 	default:
24221da177e4SLinus Torvalds 		ASSERT(0);
24231da177e4SLinus Torvalds 		break;
24241da177e4SLinus Torvalds 	}
24251da177e4SLinus Torvalds }
24261da177e4SLinus Torvalds 
2427bad55843SDavid Chinner STATIC int
2428bad55843SDavid Chinner xfs_iflush_cluster(
2429bad55843SDavid Chinner 	xfs_inode_t	*ip,
2430bad55843SDavid Chinner 	xfs_buf_t	*bp)
2431bad55843SDavid Chinner {
2432bad55843SDavid Chinner 	xfs_mount_t		*mp = ip->i_mount;
24335017e97dSDave Chinner 	struct xfs_perag	*pag;
2434bad55843SDavid Chinner 	unsigned long		first_index, mask;
2435c8f5f12eSDavid Chinner 	unsigned long		inodes_per_cluster;
2436bad55843SDavid Chinner 	int			ilist_size;
2437bad55843SDavid Chinner 	xfs_inode_t		**ilist;
2438bad55843SDavid Chinner 	xfs_inode_t		*iq;
2439bad55843SDavid Chinner 	int			nr_found;
2440bad55843SDavid Chinner 	int			clcount = 0;
2441bad55843SDavid Chinner 	int			bufwasdelwri;
2442bad55843SDavid Chinner 	int			i;
2443bad55843SDavid Chinner 
24445017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2445bad55843SDavid Chinner 
2446c8f5f12eSDavid Chinner 	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2447c8f5f12eSDavid Chinner 	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
244849383b0eSDavid Chinner 	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2449bad55843SDavid Chinner 	if (!ilist)
245044b56e0aSDave Chinner 		goto out_put;
2451bad55843SDavid Chinner 
2452bad55843SDavid Chinner 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2453bad55843SDavid Chinner 	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
24541a3e8f3dSDave Chinner 	rcu_read_lock();
2455bad55843SDavid Chinner 	/* really need a gang lookup range call here */
2456bad55843SDavid Chinner 	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2457c8f5f12eSDavid Chinner 					first_index, inodes_per_cluster);
2458bad55843SDavid Chinner 	if (nr_found == 0)
2459bad55843SDavid Chinner 		goto out_free;
2460bad55843SDavid Chinner 
2461bad55843SDavid Chinner 	for (i = 0; i < nr_found; i++) {
2462bad55843SDavid Chinner 		iq = ilist[i];
2463bad55843SDavid Chinner 		if (iq == ip)
2464bad55843SDavid Chinner 			continue;
24651a3e8f3dSDave Chinner 
24661a3e8f3dSDave Chinner 		/*
24671a3e8f3dSDave Chinner 		 * because this is an RCU protected lookup, we could find a
24681a3e8f3dSDave Chinner 		 * recently freed or even reallocated inode during the lookup.
24691a3e8f3dSDave Chinner 		 * We need to check under the i_flags_lock for a valid inode
24701a3e8f3dSDave Chinner 		 * here. Skip it if it is not valid or the wrong inode.
24711a3e8f3dSDave Chinner 		 */
24721a3e8f3dSDave Chinner 		spin_lock(&ip->i_flags_lock);
24731a3e8f3dSDave Chinner 		if (!ip->i_ino ||
24741a3e8f3dSDave Chinner 		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
24751a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
24761a3e8f3dSDave Chinner 			continue;
24771a3e8f3dSDave Chinner 		}
24781a3e8f3dSDave Chinner 		spin_unlock(&ip->i_flags_lock);
24791a3e8f3dSDave Chinner 
2480bad55843SDavid Chinner 		/*
2481bad55843SDavid Chinner 		 * Do an un-protected check to see if the inode is dirty and
2482bad55843SDavid Chinner 		 * is a candidate for flushing.  These checks will be repeated
2483bad55843SDavid Chinner 		 * later after the appropriate locks are acquired.
2484bad55843SDavid Chinner 		 */
248533540408SDavid Chinner 		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
2486bad55843SDavid Chinner 			continue;
2487bad55843SDavid Chinner 
2488bad55843SDavid Chinner 		/*
2489bad55843SDavid Chinner 		 * Try to get locks.  If any are unavailable or it is pinned,
2490bad55843SDavid Chinner 		 * then this inode cannot be flushed and is skipped.
2491bad55843SDavid Chinner 		 */
2492bad55843SDavid Chinner 
2493bad55843SDavid Chinner 		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
2494bad55843SDavid Chinner 			continue;
2495bad55843SDavid Chinner 		if (!xfs_iflock_nowait(iq)) {
2496bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2497bad55843SDavid Chinner 			continue;
2498bad55843SDavid Chinner 		}
2499bad55843SDavid Chinner 		if (xfs_ipincount(iq)) {
2500bad55843SDavid Chinner 			xfs_ifunlock(iq);
2501bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2502bad55843SDavid Chinner 			continue;
2503bad55843SDavid Chinner 		}
2504bad55843SDavid Chinner 
2505bad55843SDavid Chinner 		/*
2506bad55843SDavid Chinner 		 * arriving here means that this inode can be flushed.  First
2507bad55843SDavid Chinner 		 * re-check that it's dirty before flushing.
2508bad55843SDavid Chinner 		 */
250933540408SDavid Chinner 		if (!xfs_inode_clean(iq)) {
2510bad55843SDavid Chinner 			int	error;
2511bad55843SDavid Chinner 			error = xfs_iflush_int(iq, bp);
2512bad55843SDavid Chinner 			if (error) {
2513bad55843SDavid Chinner 				xfs_iunlock(iq, XFS_ILOCK_SHARED);
2514bad55843SDavid Chinner 				goto cluster_corrupt_out;
2515bad55843SDavid Chinner 			}
2516bad55843SDavid Chinner 			clcount++;
2517bad55843SDavid Chinner 		} else {
2518bad55843SDavid Chinner 			xfs_ifunlock(iq);
2519bad55843SDavid Chinner 		}
2520bad55843SDavid Chinner 		xfs_iunlock(iq, XFS_ILOCK_SHARED);
2521bad55843SDavid Chinner 	}
2522bad55843SDavid Chinner 
2523bad55843SDavid Chinner 	if (clcount) {
2524bad55843SDavid Chinner 		XFS_STATS_INC(xs_icluster_flushcnt);
2525bad55843SDavid Chinner 		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
2526bad55843SDavid Chinner 	}
2527bad55843SDavid Chinner 
2528bad55843SDavid Chinner out_free:
25291a3e8f3dSDave Chinner 	rcu_read_unlock();
2530f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
253144b56e0aSDave Chinner out_put:
253244b56e0aSDave Chinner 	xfs_perag_put(pag);
2533bad55843SDavid Chinner 	return 0;
2534bad55843SDavid Chinner 
2535bad55843SDavid Chinner 
2536bad55843SDavid Chinner cluster_corrupt_out:
2537bad55843SDavid Chinner 	/*
2538bad55843SDavid Chinner 	 * Corruption detected in the clustering loop.  Invalidate the
2539bad55843SDavid Chinner 	 * inode buffer and shut down the filesystem.
2540bad55843SDavid Chinner 	 */
25411a3e8f3dSDave Chinner 	rcu_read_unlock();
2542bad55843SDavid Chinner 	/*
254343ff2122SChristoph Hellwig 	 * Clean up the buffer.  If it was delwri, just release it --
2544bad55843SDavid Chinner 	 * brelse can handle it with no problems.  If not, shut down the
2545bad55843SDavid Chinner 	 * filesystem before releasing the buffer.
2546bad55843SDavid Chinner 	 */
254743ff2122SChristoph Hellwig 	bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
2548bad55843SDavid Chinner 	if (bufwasdelwri)
2549bad55843SDavid Chinner 		xfs_buf_relse(bp);
2550bad55843SDavid Chinner 
2551bad55843SDavid Chinner 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2552bad55843SDavid Chinner 
2553bad55843SDavid Chinner 	if (!bufwasdelwri) {
2554bad55843SDavid Chinner 		/*
2555bad55843SDavid Chinner 		 * Just like incore_relse: if we have b_iodone functions,
2556bad55843SDavid Chinner 		 * mark the buffer as an error and call them.  Otherwise
2557bad55843SDavid Chinner 		 * mark it as stale and brelse.
2558bad55843SDavid Chinner 		 */
2559cb669ca5SChristoph Hellwig 		if (bp->b_iodone) {
2560bad55843SDavid Chinner 			XFS_BUF_UNDONE(bp);
2561c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
25625a52c2a5SChandra Seetharaman 			xfs_buf_ioerror(bp, EIO);
25631a1a3e97SChristoph Hellwig 			xfs_buf_ioend(bp, 0);
2564bad55843SDavid Chinner 		} else {
2565c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
2566bad55843SDavid Chinner 			xfs_buf_relse(bp);
2567bad55843SDavid Chinner 		}
2568bad55843SDavid Chinner 	}
2569bad55843SDavid Chinner 
2570bad55843SDavid Chinner 	/*
2571bad55843SDavid Chinner 	 * Unlocks the flush lock
2572bad55843SDavid Chinner 	 */
257304913fddSDave Chinner 	xfs_iflush_abort(iq, false);
2574f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
257544b56e0aSDave Chinner 	xfs_perag_put(pag);
2576bad55843SDavid Chinner 	return XFS_ERROR(EFSCORRUPTED);
2577bad55843SDavid Chinner }
2578bad55843SDavid Chinner 
25791da177e4SLinus Torvalds /*
25804c46819aSChristoph Hellwig  * Flush dirty inode metadata into the backing buffer.
25814c46819aSChristoph Hellwig  *
25824c46819aSChristoph Hellwig  * The caller must have the inode lock and the inode flush lock held.  The
25834c46819aSChristoph Hellwig  * inode lock will still be held upon return to the caller, and the inode
25844c46819aSChristoph Hellwig  * flush lock will be released after the inode has reached the disk.
25854c46819aSChristoph Hellwig  *
25864c46819aSChristoph Hellwig  * The caller must write out the buffer returned in *bpp and release it.
25871da177e4SLinus Torvalds  */
25881da177e4SLinus Torvalds int
25891da177e4SLinus Torvalds xfs_iflush(
25904c46819aSChristoph Hellwig 	struct xfs_inode	*ip,
25914c46819aSChristoph Hellwig 	struct xfs_buf		**bpp)
25921da177e4SLinus Torvalds {
25934c46819aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
25944c46819aSChristoph Hellwig 	struct xfs_buf		*bp;
25954c46819aSChristoph Hellwig 	struct xfs_dinode	*dip;
25961da177e4SLinus Torvalds 	int			error;
25971da177e4SLinus Torvalds 
25981da177e4SLinus Torvalds 	XFS_STATS_INC(xs_iflush_count);
25991da177e4SLinus Torvalds 
2600579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2601474fce06SChristoph Hellwig 	ASSERT(xfs_isiflocked(ip));
26021da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
26038096b1ebSChristoph Hellwig 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
26041da177e4SLinus Torvalds 
26054c46819aSChristoph Hellwig 	*bpp = NULL;
26061da177e4SLinus Torvalds 
26071da177e4SLinus Torvalds 	xfs_iunpin_wait(ip);
26081da177e4SLinus Torvalds 
26091da177e4SLinus Torvalds 	/*
26104b6a4688SDave Chinner 	 * For stale inodes we cannot rely on the backing buffer remaining
26114b6a4688SDave Chinner 	 * stale in cache for the remaining life of the stale inode and so
2612475ee413SChristoph Hellwig 	 * xfs_imap_to_bp() below may give us a buffer that no longer contains
26134b6a4688SDave Chinner 	 * inodes below. We have to check this after ensuring the inode is
26144b6a4688SDave Chinner 	 * unpinned so that it is safe to reclaim the stale inode after the
26154b6a4688SDave Chinner 	 * flush call.
26164b6a4688SDave Chinner 	 */
26174b6a4688SDave Chinner 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
26184b6a4688SDave Chinner 		xfs_ifunlock(ip);
26194b6a4688SDave Chinner 		return 0;
26204b6a4688SDave Chinner 	}
26214b6a4688SDave Chinner 
26224b6a4688SDave Chinner 	/*
26231da177e4SLinus Torvalds 	 * This may have been unpinned because the filesystem is shutting
26241da177e4SLinus Torvalds 	 * down forcibly. If that's the case we must not write this inode
262532ce90a4SChristoph Hellwig 	 * to disk, because the log record didn't make it to disk.
262632ce90a4SChristoph Hellwig 	 *
262732ce90a4SChristoph Hellwig 	 * We also have to remove the log item from the AIL in this case,
262832ce90a4SChristoph Hellwig 	 * as we wait for an empty AIL as part of the unmount process.
26291da177e4SLinus Torvalds 	 */
26301da177e4SLinus Torvalds 	if (XFS_FORCED_SHUTDOWN(mp)) {
263132ce90a4SChristoph Hellwig 		error = XFS_ERROR(EIO);
263232ce90a4SChristoph Hellwig 		goto abort_out;
26331da177e4SLinus Torvalds 	}
26341da177e4SLinus Torvalds 
26351da177e4SLinus Torvalds 	/*
2636a3f74ffbSDavid Chinner 	 * Get the buffer containing the on-disk inode.
2637a3f74ffbSDavid Chinner 	 */
2638475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
2639475ee413SChristoph Hellwig 			       0);
2640a3f74ffbSDavid Chinner 	if (error || !bp) {
2641a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2642a3f74ffbSDavid Chinner 		return error;
2643a3f74ffbSDavid Chinner 	}
2644a3f74ffbSDavid Chinner 
2645a3f74ffbSDavid Chinner 	/*
26461da177e4SLinus Torvalds 	 * First flush out the inode that xfs_iflush was called with.
26471da177e4SLinus Torvalds 	 */
26481da177e4SLinus Torvalds 	error = xfs_iflush_int(ip, bp);
2649bad55843SDavid Chinner 	if (error)
26501da177e4SLinus Torvalds 		goto corrupt_out;
26511da177e4SLinus Torvalds 
26521da177e4SLinus Torvalds 	/*
2653a3f74ffbSDavid Chinner 	 * If the buffer is pinned then push on the log now so we won't
2654a3f74ffbSDavid Chinner 	 * get stuck waiting in the write for too long.
2655a3f74ffbSDavid Chinner 	 */
2656811e64c7SChandra Seetharaman 	if (xfs_buf_ispinned(bp))
2657a14a348bSChristoph Hellwig 		xfs_log_force(mp, 0);
2658a3f74ffbSDavid Chinner 
2659a3f74ffbSDavid Chinner 	/*
26601da177e4SLinus Torvalds 	 * inode clustering:
26611da177e4SLinus Torvalds 	 * see if other inodes can be gathered into this write
26621da177e4SLinus Torvalds 	 */
2663bad55843SDavid Chinner 	error = xfs_iflush_cluster(ip, bp);
2664bad55843SDavid Chinner 	if (error)
26651da177e4SLinus Torvalds 		goto cluster_corrupt_out;
26661da177e4SLinus Torvalds 
26674c46819aSChristoph Hellwig 	*bpp = bp;
26684c46819aSChristoph Hellwig 	return 0;
26691da177e4SLinus Torvalds 
26701da177e4SLinus Torvalds corrupt_out:
26711da177e4SLinus Torvalds 	xfs_buf_relse(bp);
26727d04a335SNathan Scott 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
26731da177e4SLinus Torvalds cluster_corrupt_out:
267432ce90a4SChristoph Hellwig 	error = XFS_ERROR(EFSCORRUPTED);
267532ce90a4SChristoph Hellwig abort_out:
26761da177e4SLinus Torvalds 	/*
26771da177e4SLinus Torvalds 	 * Unlocks the flush lock
26781da177e4SLinus Torvalds 	 */
267904913fddSDave Chinner 	xfs_iflush_abort(ip, false);
268032ce90a4SChristoph Hellwig 	return error;
26811da177e4SLinus Torvalds }
26821da177e4SLinus Torvalds 
26831da177e4SLinus Torvalds 
26841da177e4SLinus Torvalds STATIC int
26851da177e4SLinus Torvalds xfs_iflush_int(
26861da177e4SLinus Torvalds 	xfs_inode_t		*ip,
26871da177e4SLinus Torvalds 	xfs_buf_t		*bp)
26881da177e4SLinus Torvalds {
26891da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
26901da177e4SLinus Torvalds 	xfs_dinode_t		*dip;
26911da177e4SLinus Torvalds 	xfs_mount_t		*mp;
26921da177e4SLinus Torvalds #ifdef XFS_TRANS_DEBUG
26931da177e4SLinus Torvalds 	int			first;
26941da177e4SLinus Torvalds #endif
26951da177e4SLinus Torvalds 
2696579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2697474fce06SChristoph Hellwig 	ASSERT(xfs_isiflocked(ip));
26981da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
26998096b1ebSChristoph Hellwig 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
27001da177e4SLinus Torvalds 
27011da177e4SLinus Torvalds 	iip = ip->i_itemp;
27021da177e4SLinus Torvalds 	mp = ip->i_mount;
27031da177e4SLinus Torvalds 
27041da177e4SLinus Torvalds 	/* set *dip = inode's place in the buffer */
270592bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
27061da177e4SLinus Torvalds 
270769ef921bSChristoph Hellwig 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
27081da177e4SLinus Torvalds 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
27096a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27106a19d939SDave Chinner 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
27116a19d939SDave Chinner 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
27121da177e4SLinus Torvalds 		goto corrupt_out;
27131da177e4SLinus Torvalds 	}
27141da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
27151da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
27166a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27176a19d939SDave Chinner 			"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
27186a19d939SDave Chinner 			__func__, ip->i_ino, ip, ip->i_d.di_magic);
27191da177e4SLinus Torvalds 		goto corrupt_out;
27201da177e4SLinus Torvalds 	}
2721abbede1bSAl Viro 	if (S_ISREG(ip->i_d.di_mode)) {
27221da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
27231da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
27241da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
27251da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
27266a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27276a19d939SDave Chinner 				"%s: Bad regular inode %Lu, ptr 0x%p",
27286a19d939SDave Chinner 				__func__, ip->i_ino, ip);
27291da177e4SLinus Torvalds 			goto corrupt_out;
27301da177e4SLinus Torvalds 		}
2731abbede1bSAl Viro 	} else if (S_ISDIR(ip->i_d.di_mode)) {
27321da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
27331da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
27341da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
27351da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
27361da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
27376a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27386a19d939SDave Chinner 				"%s: Bad directory inode %Lu, ptr 0x%p",
27396a19d939SDave Chinner 				__func__, ip->i_ino, ip);
27401da177e4SLinus Torvalds 			goto corrupt_out;
27411da177e4SLinus Torvalds 		}
27421da177e4SLinus Torvalds 	}
27431da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
27441da177e4SLinus Torvalds 				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
27451da177e4SLinus Torvalds 				XFS_RANDOM_IFLUSH_5)) {
27466a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27476a19d939SDave Chinner 			"%s: detected corrupt incore inode %Lu, "
27486a19d939SDave Chinner 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
27496a19d939SDave Chinner 			__func__, ip->i_ino,
27501da177e4SLinus Torvalds 			ip->i_d.di_nextents + ip->i_d.di_anextents,
27516a19d939SDave Chinner 			ip->i_d.di_nblocks, ip);
27521da177e4SLinus Torvalds 		goto corrupt_out;
27531da177e4SLinus Torvalds 	}
27541da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
27551da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
27566a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
27576a19d939SDave Chinner 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
27586a19d939SDave Chinner 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
27591da177e4SLinus Torvalds 		goto corrupt_out;
27601da177e4SLinus Torvalds 	}
27611da177e4SLinus Torvalds 	/*
27621da177e4SLinus Torvalds 	 * bump the flush iteration count, used to detect flushes which
27631da177e4SLinus Torvalds 	 * postdate a log record during recovery.
27641da177e4SLinus Torvalds 	 */
27651da177e4SLinus Torvalds 
27661da177e4SLinus Torvalds 	ip->i_d.di_flushiter++;
27671da177e4SLinus Torvalds 
27681da177e4SLinus Torvalds 	/*
27691da177e4SLinus Torvalds 	 * Copy the dirty parts of the inode into the on-disk
27701da177e4SLinus Torvalds 	 * inode.  We always copy out the core of the inode,
27711da177e4SLinus Torvalds 	 * because if the inode is dirty at all the core must
27721da177e4SLinus Torvalds 	 * be.
27731da177e4SLinus Torvalds 	 */
277481591fe2SChristoph Hellwig 	xfs_dinode_to_disk(dip, &ip->i_d);
27751da177e4SLinus Torvalds 
27761da177e4SLinus Torvalds 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
27771da177e4SLinus Torvalds 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
27781da177e4SLinus Torvalds 		ip->i_d.di_flushiter = 0;
27791da177e4SLinus Torvalds 
27801da177e4SLinus Torvalds 	/*
27811da177e4SLinus Torvalds 	 * If this is really an old format inode and the superblock version
27821da177e4SLinus Torvalds 	 * has not been updated to support only new format inodes, then
27831da177e4SLinus Torvalds 	 * convert back to the old inode format.  If the superblock version
27841da177e4SLinus Torvalds 	 * has been updated, then make the conversion permanent.
27851da177e4SLinus Torvalds 	 */
278651ce16d5SChristoph Hellwig 	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
278751ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
278862118709SEric Sandeen 		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
27891da177e4SLinus Torvalds 			/*
27901da177e4SLinus Torvalds 			 * Convert it back.
27911da177e4SLinus Torvalds 			 */
27921da177e4SLinus Torvalds 			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
279381591fe2SChristoph Hellwig 			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
27941da177e4SLinus Torvalds 		} else {
27951da177e4SLinus Torvalds 			/*
27961da177e4SLinus Torvalds 			 * The superblock version has already been bumped,
27971da177e4SLinus Torvalds 			 * so just make the conversion to the new inode
27981da177e4SLinus Torvalds 			 * format permanent.
27991da177e4SLinus Torvalds 			 */
280051ce16d5SChristoph Hellwig 			ip->i_d.di_version = 2;
280151ce16d5SChristoph Hellwig 			dip->di_version = 2;
28021da177e4SLinus Torvalds 			ip->i_d.di_onlink = 0;
280381591fe2SChristoph Hellwig 			dip->di_onlink = 0;
28041da177e4SLinus Torvalds 			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
280581591fe2SChristoph Hellwig 			memset(&(dip->di_pad[0]), 0,
280681591fe2SChristoph Hellwig 			      sizeof(dip->di_pad));
28076743099cSArkadiusz Mi?kiewicz 			ASSERT(xfs_get_projid(ip) == 0);
28081da177e4SLinus Torvalds 		}
28091da177e4SLinus Torvalds 	}
28101da177e4SLinus Torvalds 
2811e4ac967bSDavid Chinner 	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
2812e4ac967bSDavid Chinner 	if (XFS_IFORK_Q(ip))
2813e4ac967bSDavid Chinner 		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
28141da177e4SLinus Torvalds 	xfs_inobp_check(mp, bp);
28151da177e4SLinus Torvalds 
28161da177e4SLinus Torvalds 	/*
2817f5d8d5c4SChristoph Hellwig 	 * We've recorded everything logged in the inode, so we'd like to clear
2818f5d8d5c4SChristoph Hellwig 	 * the ili_fields bits so we don't log and flush things unnecessarily.
2819f5d8d5c4SChristoph Hellwig 	 * However, we can't stop logging all this information until the data
2820f5d8d5c4SChristoph Hellwig 	 * we've copied into the disk buffer is written to disk.  If we did we
2821f5d8d5c4SChristoph Hellwig 	 * might overwrite the copy of the inode in the log with all the data
2822f5d8d5c4SChristoph Hellwig 	 * after re-logging only part of it, and in the face of a crash we
2823f5d8d5c4SChristoph Hellwig 	 * wouldn't have all the data we need to recover.
28241da177e4SLinus Torvalds 	 *
2825f5d8d5c4SChristoph Hellwig 	 * What we do is move the bits to the ili_last_fields field.  When
2826f5d8d5c4SChristoph Hellwig 	 * logging the inode, these bits are moved back to the ili_fields field.
2827f5d8d5c4SChristoph Hellwig 	 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
2828f5d8d5c4SChristoph Hellwig 	 * know that the information those bits represent is permanently on
2829f5d8d5c4SChristoph Hellwig 	 * disk.  As long as the flush completes before the inode is logged
2830f5d8d5c4SChristoph Hellwig 	 * again, then both ili_fields and ili_last_fields will be cleared.
28311da177e4SLinus Torvalds 	 *
2832f5d8d5c4SChristoph Hellwig 	 * We can play with the ili_fields bits here, because the inode lock
2833f5d8d5c4SChristoph Hellwig 	 * must be held exclusively in order to set bits there and the flush
2834f5d8d5c4SChristoph Hellwig 	 * lock protects the ili_last_fields bits.  Set ili_logged so the flush
2835f5d8d5c4SChristoph Hellwig 	 * done routine can tell whether or not to look in the AIL.  Also, store
2836f5d8d5c4SChristoph Hellwig 	 * the current LSN of the inode so that we can tell whether the item has
2837f5d8d5c4SChristoph Hellwig 	 * moved in the AIL from xfs_iflush_done().  In order to read the lsn we
2838f5d8d5c4SChristoph Hellwig 	 * need the AIL lock, because it is a 64 bit value that cannot be read
2839f5d8d5c4SChristoph Hellwig 	 * atomically.
28401da177e4SLinus Torvalds 	 */
2841f5d8d5c4SChristoph Hellwig 	if (iip != NULL && iip->ili_fields != 0) {
2842f5d8d5c4SChristoph Hellwig 		iip->ili_last_fields = iip->ili_fields;
2843f5d8d5c4SChristoph Hellwig 		iip->ili_fields = 0;
28441da177e4SLinus Torvalds 		iip->ili_logged = 1;
28451da177e4SLinus Torvalds 
28467b2e2a31SDavid Chinner 		xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
28477b2e2a31SDavid Chinner 					&iip->ili_item.li_lsn);
28481da177e4SLinus Torvalds 
28491da177e4SLinus Torvalds 		/*
28501da177e4SLinus Torvalds 		 * Attach the function xfs_iflush_done to the inode's
28511da177e4SLinus Torvalds 		 * buffer.  This will remove the inode from the AIL
28521da177e4SLinus Torvalds 		 * and unlock the inode's flush lock when the inode is
28531da177e4SLinus Torvalds 		 * completely written to disk.
28541da177e4SLinus Torvalds 		 */
2855ca30b2a7SChristoph Hellwig 		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
28561da177e4SLinus Torvalds 
2857adadbeefSChristoph Hellwig 		ASSERT(bp->b_fspriv != NULL);
2858cb669ca5SChristoph Hellwig 		ASSERT(bp->b_iodone != NULL);
28591da177e4SLinus Torvalds 	} else {
28601da177e4SLinus Torvalds 		/*
28611da177e4SLinus Torvalds 		 * We're flushing an inode which is not in the AIL and has
28628a9c9980SChristoph Hellwig 		 * not been logged.  For this case we can immediately drop
28631da177e4SLinus Torvalds 		 * the inode flush lock because we can avoid the whole
28641da177e4SLinus Torvalds 		 * AIL state thing.  It's OK to drop the flush lock now,
28651da177e4SLinus Torvalds 		 * because we've already locked the buffer and to do anything
28661da177e4SLinus Torvalds 		 * you really need both.
28671da177e4SLinus Torvalds 		 */
28681da177e4SLinus Torvalds 		if (iip != NULL) {
28691da177e4SLinus Torvalds 			ASSERT(iip->ili_logged == 0);
28701da177e4SLinus Torvalds 			ASSERT(iip->ili_last_fields == 0);
28711da177e4SLinus Torvalds 			ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
28721da177e4SLinus Torvalds 		}
28731da177e4SLinus Torvalds 		xfs_ifunlock(ip);
28741da177e4SLinus Torvalds 	}
28751da177e4SLinus Torvalds 
28761da177e4SLinus Torvalds 	return 0;
28771da177e4SLinus Torvalds 
28781da177e4SLinus Torvalds corrupt_out:
28791da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
28801da177e4SLinus Torvalds }
28811da177e4SLinus Torvalds 
28824eea22f0SMandy Kirkconnell /*
28834eea22f0SMandy Kirkconnell  * Return a pointer to the extent record at file index idx.
28844eea22f0SMandy Kirkconnell  */
2885a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *
28864eea22f0SMandy Kirkconnell xfs_iext_get_ext(
28874eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
28884eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx)		/* index of target extent */
28894eea22f0SMandy Kirkconnell {
28904eea22f0SMandy Kirkconnell 	ASSERT(idx >= 0);
289187bef181SChristoph Hellwig 	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
289287bef181SChristoph Hellwig 
28930293ce3aSMandy Kirkconnell 	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
28940293ce3aSMandy Kirkconnell 		return ifp->if_u1.if_ext_irec->er_extbuf;
28950293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
28960293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;		/* irec pointer */
28970293ce3aSMandy Kirkconnell 		int		erp_idx = 0;	/* irec index */
28980293ce3aSMandy Kirkconnell 		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
28990293ce3aSMandy Kirkconnell 
29000293ce3aSMandy Kirkconnell 		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
29010293ce3aSMandy Kirkconnell 		return &erp->er_extbuf[page_idx];
29020293ce3aSMandy Kirkconnell 	} else if (ifp->if_bytes) {
29034eea22f0SMandy Kirkconnell 		return &ifp->if_u1.if_extents[idx];
29044eea22f0SMandy Kirkconnell 	} else {
29054eea22f0SMandy Kirkconnell 		return NULL;
29064eea22f0SMandy Kirkconnell 	}
29074eea22f0SMandy Kirkconnell }
29084eea22f0SMandy Kirkconnell 
29094eea22f0SMandy Kirkconnell /*
29104eea22f0SMandy Kirkconnell  * Insert new item(s) into the extent records for incore inode
29114eea22f0SMandy Kirkconnell  * fork 'ifp'.  'count' new items are inserted at index 'idx'.
29124eea22f0SMandy Kirkconnell  */
29134eea22f0SMandy Kirkconnell void
29144eea22f0SMandy Kirkconnell xfs_iext_insert(
29156ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
29164eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* starting index of new items */
29174eea22f0SMandy Kirkconnell 	xfs_extnum_t	count,		/* number of inserted items */
29186ef35544SChristoph Hellwig 	xfs_bmbt_irec_t	*new,		/* items to insert */
29196ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
29204eea22f0SMandy Kirkconnell {
29216ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
29224eea22f0SMandy Kirkconnell 	xfs_extnum_t	i;		/* extent record index */
29234eea22f0SMandy Kirkconnell 
29240b1b213fSChristoph Hellwig 	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
29250b1b213fSChristoph Hellwig 
29264eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
29274eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, idx, count);
2928a6f64d4aSChristoph Hellwig 	for (i = idx; i < idx + count; i++, new++)
2929a6f64d4aSChristoph Hellwig 		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
29304eea22f0SMandy Kirkconnell }
29314eea22f0SMandy Kirkconnell 
29324eea22f0SMandy Kirkconnell /*
29334eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
29344eea22f0SMandy Kirkconnell  * extents needs to be increased. The ext_diff parameter stores the
29354eea22f0SMandy Kirkconnell  * number of new extents being added and the idx parameter contains
29364eea22f0SMandy Kirkconnell  * the extent index where the new extents will be added. If the new
29374eea22f0SMandy Kirkconnell  * extents are being appended, then we just need to (re)allocate and
29384eea22f0SMandy Kirkconnell  * initialize the space. Otherwise, if the new extents are being
29394eea22f0SMandy Kirkconnell  * inserted into the middle of the existing entries, a bit more work
29404eea22f0SMandy Kirkconnell  * is required to make room for the new extents to be inserted. The
29414eea22f0SMandy Kirkconnell  * caller is responsible for filling in the new extent entries upon
29424eea22f0SMandy Kirkconnell  * return.
29434eea22f0SMandy Kirkconnell  */
29444eea22f0SMandy Kirkconnell void
29454eea22f0SMandy Kirkconnell xfs_iext_add(
29464eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
29474eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin adding exts */
2948c41564b5SNathan Scott 	int		ext_diff)	/* number of extents to add */
29494eea22f0SMandy Kirkconnell {
29504eea22f0SMandy Kirkconnell 	int		byte_diff;	/* new bytes being added */
29514eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after adding */
29524eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
29534eea22f0SMandy Kirkconnell 
29544eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
29554eea22f0SMandy Kirkconnell 	ASSERT((idx >= 0) && (idx <= nextents));
29564eea22f0SMandy Kirkconnell 	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
29574eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes + byte_diff;
29584eea22f0SMandy Kirkconnell 	/*
29594eea22f0SMandy Kirkconnell 	 * If the new number of extents (nextents + ext_diff)
29604eea22f0SMandy Kirkconnell 	 * fits inside the inode, then continue to use the inline
29614eea22f0SMandy Kirkconnell 	 * extent buffer.
29624eea22f0SMandy Kirkconnell 	 */
29634eea22f0SMandy Kirkconnell 	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
29644eea22f0SMandy Kirkconnell 		if (idx < nextents) {
29654eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
29664eea22f0SMandy Kirkconnell 				&ifp->if_u2.if_inline_ext[idx],
29674eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
29684eea22f0SMandy Kirkconnell 			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
29694eea22f0SMandy Kirkconnell 		}
29704eea22f0SMandy Kirkconnell 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
29714eea22f0SMandy Kirkconnell 		ifp->if_real_bytes = 0;
29724eea22f0SMandy Kirkconnell 	}
29734eea22f0SMandy Kirkconnell 	/*
29744eea22f0SMandy Kirkconnell 	 * Otherwise use a linear (direct) extent list.
29754eea22f0SMandy Kirkconnell 	 * If the extents are currently inside the inode,
29764eea22f0SMandy Kirkconnell 	 * xfs_iext_realloc_direct will switch us from
29774eea22f0SMandy Kirkconnell 	 * inline to direct extent allocation mode.
29784eea22f0SMandy Kirkconnell 	 */
29790293ce3aSMandy Kirkconnell 	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
29804eea22f0SMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, new_size);
29814eea22f0SMandy Kirkconnell 		if (idx < nextents) {
29824eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
29834eea22f0SMandy Kirkconnell 				&ifp->if_u1.if_extents[idx],
29844eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
29854eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
29864eea22f0SMandy Kirkconnell 		}
29874eea22f0SMandy Kirkconnell 	}
29880293ce3aSMandy Kirkconnell 	/* Indirection array */
29890293ce3aSMandy Kirkconnell 	else {
29900293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;
29910293ce3aSMandy Kirkconnell 		int		erp_idx = 0;
29920293ce3aSMandy Kirkconnell 		int		page_idx = idx;
29930293ce3aSMandy Kirkconnell 
29940293ce3aSMandy Kirkconnell 		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
29950293ce3aSMandy Kirkconnell 		if (ifp->if_flags & XFS_IFEXTIREC) {
29960293ce3aSMandy Kirkconnell 			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
29970293ce3aSMandy Kirkconnell 		} else {
29980293ce3aSMandy Kirkconnell 			xfs_iext_irec_init(ifp);
29990293ce3aSMandy Kirkconnell 			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
30000293ce3aSMandy Kirkconnell 			erp = ifp->if_u1.if_ext_irec;
30010293ce3aSMandy Kirkconnell 		}
30020293ce3aSMandy Kirkconnell 		/* Extents fit in target extent page */
30030293ce3aSMandy Kirkconnell 		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
30040293ce3aSMandy Kirkconnell 			if (page_idx < erp->er_extcount) {
30050293ce3aSMandy Kirkconnell 				memmove(&erp->er_extbuf[page_idx + ext_diff],
30060293ce3aSMandy Kirkconnell 					&erp->er_extbuf[page_idx],
30070293ce3aSMandy Kirkconnell 					(erp->er_extcount - page_idx) *
30080293ce3aSMandy Kirkconnell 					sizeof(xfs_bmbt_rec_t));
30090293ce3aSMandy Kirkconnell 				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
30100293ce3aSMandy Kirkconnell 			}
30110293ce3aSMandy Kirkconnell 			erp->er_extcount += ext_diff;
30120293ce3aSMandy Kirkconnell 			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
30130293ce3aSMandy Kirkconnell 		}
30140293ce3aSMandy Kirkconnell 		/* Insert a new extent page */
30150293ce3aSMandy Kirkconnell 		else if (erp) {
30160293ce3aSMandy Kirkconnell 			xfs_iext_add_indirect_multi(ifp,
30170293ce3aSMandy Kirkconnell 				erp_idx, page_idx, ext_diff);
30180293ce3aSMandy Kirkconnell 		}
30190293ce3aSMandy Kirkconnell 		/*
30200293ce3aSMandy Kirkconnell 		 * If extent(s) are being appended to the last page in
30210293ce3aSMandy Kirkconnell 		 * the indirection array and the new extent(s) don't fit
30220293ce3aSMandy Kirkconnell 		 * in the page, then erp is NULL and erp_idx is set to
30230293ce3aSMandy Kirkconnell 		 * the next index needed in the indirection array.
30240293ce3aSMandy Kirkconnell 		 */
30250293ce3aSMandy Kirkconnell 		else {
30260293ce3aSMandy Kirkconnell 			int	count = ext_diff;
30270293ce3aSMandy Kirkconnell 
30280293ce3aSMandy Kirkconnell 			while (count) {
30290293ce3aSMandy Kirkconnell 				erp = xfs_iext_irec_new(ifp, erp_idx);
30300293ce3aSMandy Kirkconnell 				erp->er_extcount = count;
30310293ce3aSMandy Kirkconnell 				count -= MIN(count, (int)XFS_LINEAR_EXTS);
30320293ce3aSMandy Kirkconnell 				if (count) {
30330293ce3aSMandy Kirkconnell 					erp_idx++;
30340293ce3aSMandy Kirkconnell 				}
30350293ce3aSMandy Kirkconnell 			}
30360293ce3aSMandy Kirkconnell 		}
30370293ce3aSMandy Kirkconnell 	}
30384eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
30394eea22f0SMandy Kirkconnell }
30404eea22f0SMandy Kirkconnell 
30414eea22f0SMandy Kirkconnell /*
30420293ce3aSMandy Kirkconnell  * This is called when incore extents are being added to the indirection
30430293ce3aSMandy Kirkconnell  * array and the new extents do not fit in the target extent list. The
30440293ce3aSMandy Kirkconnell  * erp_idx parameter contains the irec index for the target extent list
30450293ce3aSMandy Kirkconnell  * in the indirection array, and the idx parameter contains the extent
30460293ce3aSMandy Kirkconnell  * index within the list. The number of extents being added is stored
30470293ce3aSMandy Kirkconnell  * in the count parameter.
30480293ce3aSMandy Kirkconnell  *
30490293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30500293ce3aSMandy Kirkconnell  *    |       |   |       |    idx - number of extents before idx
30510293ce3aSMandy Kirkconnell  *    |  idx  |   | count |
30520293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being inserted at idx
30530293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30540293ce3aSMandy Kirkconnell  *    | count |   | nex2  |    nex2 - number of extents after idx + count
30550293ce3aSMandy Kirkconnell  *    |-------|   |-------|
30560293ce3aSMandy Kirkconnell  */
30570293ce3aSMandy Kirkconnell void
30580293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(
30590293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,			/* inode fork pointer */
30600293ce3aSMandy Kirkconnell 	int		erp_idx,		/* target extent irec index */
30610293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,			/* index within target list */
30620293ce3aSMandy Kirkconnell 	int		count)			/* new extents being added */
30630293ce3aSMandy Kirkconnell {
30640293ce3aSMandy Kirkconnell 	int		byte_diff;		/* new bytes being added */
30650293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
30660293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;		/* number of extents to add */
30670293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;		/* new extents still needed */
30680293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;			/* extents after idx + count */
30690293ce3aSMandy Kirkconnell 	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
30700293ce3aSMandy Kirkconnell 	int		nlists;			/* number of irec's (lists) */
30710293ce3aSMandy Kirkconnell 
30720293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
30730293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
30740293ce3aSMandy Kirkconnell 	nex2 = erp->er_extcount - idx;
30750293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
30760293ce3aSMandy Kirkconnell 
30770293ce3aSMandy Kirkconnell 	/*
30780293ce3aSMandy Kirkconnell 	 * Save second part of target extent list
30790293ce3aSMandy Kirkconnell 	 * (all extents past */
30800293ce3aSMandy Kirkconnell 	if (nex2) {
30810293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
30826785073bSDavid Chinner 		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
30830293ce3aSMandy Kirkconnell 		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
30840293ce3aSMandy Kirkconnell 		erp->er_extcount -= nex2;
30850293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
30860293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[idx], 0, byte_diff);
30870293ce3aSMandy Kirkconnell 	}
30880293ce3aSMandy Kirkconnell 
30890293ce3aSMandy Kirkconnell 	/*
30900293ce3aSMandy Kirkconnell 	 * Add the new extents to the end of the target
30910293ce3aSMandy Kirkconnell 	 * list, then allocate new irec record(s) and
30920293ce3aSMandy Kirkconnell 	 * extent buffer(s) as needed to store the rest
30930293ce3aSMandy Kirkconnell 	 * of the new extents.
30940293ce3aSMandy Kirkconnell 	 */
30950293ce3aSMandy Kirkconnell 	ext_cnt = count;
30960293ce3aSMandy Kirkconnell 	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
30970293ce3aSMandy Kirkconnell 	if (ext_diff) {
30980293ce3aSMandy Kirkconnell 		erp->er_extcount += ext_diff;
30990293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
31000293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
31010293ce3aSMandy Kirkconnell 	}
31020293ce3aSMandy Kirkconnell 	while (ext_cnt) {
31030293ce3aSMandy Kirkconnell 		erp_idx++;
31040293ce3aSMandy Kirkconnell 		erp = xfs_iext_irec_new(ifp, erp_idx);
31050293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
31060293ce3aSMandy Kirkconnell 		erp->er_extcount = ext_diff;
31070293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
31080293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
31090293ce3aSMandy Kirkconnell 	}
31100293ce3aSMandy Kirkconnell 
31110293ce3aSMandy Kirkconnell 	/* Add nex2 extents back to indirection array */
31120293ce3aSMandy Kirkconnell 	if (nex2) {
31130293ce3aSMandy Kirkconnell 		xfs_extnum_t	ext_avail;
31140293ce3aSMandy Kirkconnell 		int		i;
31150293ce3aSMandy Kirkconnell 
31160293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
31170293ce3aSMandy Kirkconnell 		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
31180293ce3aSMandy Kirkconnell 		i = 0;
31190293ce3aSMandy Kirkconnell 		/*
31200293ce3aSMandy Kirkconnell 		 * If nex2 extents fit in the current page, append
31210293ce3aSMandy Kirkconnell 		 * nex2_ep after the new extents.
31220293ce3aSMandy Kirkconnell 		 */
31230293ce3aSMandy Kirkconnell 		if (nex2 <= ext_avail) {
31240293ce3aSMandy Kirkconnell 			i = erp->er_extcount;
31250293ce3aSMandy Kirkconnell 		}
31260293ce3aSMandy Kirkconnell 		/*
31270293ce3aSMandy Kirkconnell 		 * Otherwise, check if space is available in the
31280293ce3aSMandy Kirkconnell 		 * next page.
31290293ce3aSMandy Kirkconnell 		 */
31300293ce3aSMandy Kirkconnell 		else if ((erp_idx < nlists - 1) &&
31310293ce3aSMandy Kirkconnell 			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
31320293ce3aSMandy Kirkconnell 			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
31330293ce3aSMandy Kirkconnell 			erp_idx++;
31340293ce3aSMandy Kirkconnell 			erp++;
31350293ce3aSMandy Kirkconnell 			/* Create a hole for nex2 extents */
31360293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
31370293ce3aSMandy Kirkconnell 				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
31380293ce3aSMandy Kirkconnell 		}
31390293ce3aSMandy Kirkconnell 		/*
31400293ce3aSMandy Kirkconnell 		 * Final choice, create a new extent page for
31410293ce3aSMandy Kirkconnell 		 * nex2 extents.
31420293ce3aSMandy Kirkconnell 		 */
31430293ce3aSMandy Kirkconnell 		else {
31440293ce3aSMandy Kirkconnell 			erp_idx++;
31450293ce3aSMandy Kirkconnell 			erp = xfs_iext_irec_new(ifp, erp_idx);
31460293ce3aSMandy Kirkconnell 		}
31470293ce3aSMandy Kirkconnell 		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3148f0e2d93cSDenys Vlasenko 		kmem_free(nex2_ep);
31490293ce3aSMandy Kirkconnell 		erp->er_extcount += nex2;
31500293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
31510293ce3aSMandy Kirkconnell 	}
31520293ce3aSMandy Kirkconnell }
31530293ce3aSMandy Kirkconnell 
31540293ce3aSMandy Kirkconnell /*
31554eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
31564eea22f0SMandy Kirkconnell  * extents needs to be decreased. The ext_diff parameter stores the
31574eea22f0SMandy Kirkconnell  * number of extents to be removed and the idx parameter contains
31584eea22f0SMandy Kirkconnell  * the extent index where the extents will be removed from.
31590293ce3aSMandy Kirkconnell  *
31600293ce3aSMandy Kirkconnell  * If the amount of space needed has decreased below the linear
31610293ce3aSMandy Kirkconnell  * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
31620293ce3aSMandy Kirkconnell  * extent array.  Otherwise, use kmem_realloc() to adjust the
31630293ce3aSMandy Kirkconnell  * size to what is needed.
31644eea22f0SMandy Kirkconnell  */
31654eea22f0SMandy Kirkconnell void
31664eea22f0SMandy Kirkconnell xfs_iext_remove(
31676ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
31684eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
31696ef35544SChristoph Hellwig 	int		ext_diff,	/* number of extents to remove */
31706ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
31714eea22f0SMandy Kirkconnell {
31726ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
31734eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
31744eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
31754eea22f0SMandy Kirkconnell 
31760b1b213fSChristoph Hellwig 	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
31770b1b213fSChristoph Hellwig 
31784eea22f0SMandy Kirkconnell 	ASSERT(ext_diff > 0);
31794eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
31804eea22f0SMandy Kirkconnell 	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
31814eea22f0SMandy Kirkconnell 
31824eea22f0SMandy Kirkconnell 	if (new_size == 0) {
31834eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
31840293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
31850293ce3aSMandy Kirkconnell 		xfs_iext_remove_indirect(ifp, idx, ext_diff);
31864eea22f0SMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
31874eea22f0SMandy Kirkconnell 		xfs_iext_remove_direct(ifp, idx, ext_diff);
31884eea22f0SMandy Kirkconnell 	} else {
31894eea22f0SMandy Kirkconnell 		xfs_iext_remove_inline(ifp, idx, ext_diff);
31904eea22f0SMandy Kirkconnell 	}
31914eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
31924eea22f0SMandy Kirkconnell }
31934eea22f0SMandy Kirkconnell 
31944eea22f0SMandy Kirkconnell /*
31954eea22f0SMandy Kirkconnell  * This removes ext_diff extents from the inline buffer, beginning
31964eea22f0SMandy Kirkconnell  * at extent index idx.
31974eea22f0SMandy Kirkconnell  */
31984eea22f0SMandy Kirkconnell void
31994eea22f0SMandy Kirkconnell xfs_iext_remove_inline(
32004eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32014eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
32024eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
32034eea22f0SMandy Kirkconnell {
32044eea22f0SMandy Kirkconnell 	int		nextents;	/* number of extents in file */
32054eea22f0SMandy Kirkconnell 
32060293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
32074eea22f0SMandy Kirkconnell 	ASSERT(idx < XFS_INLINE_EXTS);
32084eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
32094eea22f0SMandy Kirkconnell 	ASSERT(((nextents - ext_diff) > 0) &&
32104eea22f0SMandy Kirkconnell 		(nextents - ext_diff) < XFS_INLINE_EXTS);
32114eea22f0SMandy Kirkconnell 
32124eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
32134eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u2.if_inline_ext[idx],
32144eea22f0SMandy Kirkconnell 			&ifp->if_u2.if_inline_ext[idx + ext_diff],
32154eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
32164eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
32174eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
32184eea22f0SMandy Kirkconnell 			0, ext_diff * sizeof(xfs_bmbt_rec_t));
32194eea22f0SMandy Kirkconnell 	} else {
32204eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[idx], 0,
32214eea22f0SMandy Kirkconnell 			ext_diff * sizeof(xfs_bmbt_rec_t));
32224eea22f0SMandy Kirkconnell 	}
32234eea22f0SMandy Kirkconnell }
32244eea22f0SMandy Kirkconnell 
32254eea22f0SMandy Kirkconnell /*
32264eea22f0SMandy Kirkconnell  * This removes ext_diff extents from a linear (direct) extent list,
32274eea22f0SMandy Kirkconnell  * beginning at extent index idx. If the extents are being removed
32284eea22f0SMandy Kirkconnell  * from the end of the list (ie. truncate) then we just need to re-
32294eea22f0SMandy Kirkconnell  * allocate the list to remove the extra space. Otherwise, if the
32304eea22f0SMandy Kirkconnell  * extents are being removed from the middle of the existing extent
32314eea22f0SMandy Kirkconnell  * entries, then we first need to move the extent records beginning
32324eea22f0SMandy Kirkconnell  * at idx + ext_diff up in the list to overwrite the records being
32334eea22f0SMandy Kirkconnell  * removed, then remove the extra space via kmem_realloc.
32344eea22f0SMandy Kirkconnell  */
32354eea22f0SMandy Kirkconnell void
32364eea22f0SMandy Kirkconnell xfs_iext_remove_direct(
32374eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32384eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
32394eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
32404eea22f0SMandy Kirkconnell {
32414eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
32424eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
32434eea22f0SMandy Kirkconnell 
32440293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
32454eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes -
32464eea22f0SMandy Kirkconnell 		(ext_diff * sizeof(xfs_bmbt_rec_t));
32474eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
32484eea22f0SMandy Kirkconnell 
32494eea22f0SMandy Kirkconnell 	if (new_size == 0) {
32504eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
32514eea22f0SMandy Kirkconnell 		return;
32524eea22f0SMandy Kirkconnell 	}
32534eea22f0SMandy Kirkconnell 	/* Move extents up in the list (if needed) */
32544eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
32554eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u1.if_extents[idx],
32564eea22f0SMandy Kirkconnell 			&ifp->if_u1.if_extents[idx + ext_diff],
32574eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
32584eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
32594eea22f0SMandy Kirkconnell 	}
32604eea22f0SMandy Kirkconnell 	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
32614eea22f0SMandy Kirkconnell 		0, ext_diff * sizeof(xfs_bmbt_rec_t));
32624eea22f0SMandy Kirkconnell 	/*
32634eea22f0SMandy Kirkconnell 	 * Reallocate the direct extent list. If the extents
32644eea22f0SMandy Kirkconnell 	 * will fit inside the inode then xfs_iext_realloc_direct
32654eea22f0SMandy Kirkconnell 	 * will switch from direct to inline extent allocation
32664eea22f0SMandy Kirkconnell 	 * mode for us.
32674eea22f0SMandy Kirkconnell 	 */
32684eea22f0SMandy Kirkconnell 	xfs_iext_realloc_direct(ifp, new_size);
32694eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
32704eea22f0SMandy Kirkconnell }
32714eea22f0SMandy Kirkconnell 
32724eea22f0SMandy Kirkconnell /*
32730293ce3aSMandy Kirkconnell  * This is called when incore extents are being removed from the
32740293ce3aSMandy Kirkconnell  * indirection array and the extents being removed span multiple extent
32750293ce3aSMandy Kirkconnell  * buffers. The idx parameter contains the file extent index where we
32760293ce3aSMandy Kirkconnell  * want to begin removing extents, and the count parameter contains
32770293ce3aSMandy Kirkconnell  * how many extents need to be removed.
32780293ce3aSMandy Kirkconnell  *
32790293ce3aSMandy Kirkconnell  *    |-------|   |-------|
32800293ce3aSMandy Kirkconnell  *    | nex1  |   |       |    nex1 - number of extents before idx
32810293ce3aSMandy Kirkconnell  *    |-------|   | count |
32820293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being removed at idx
32830293ce3aSMandy Kirkconnell  *    | count |   |-------|
32840293ce3aSMandy Kirkconnell  *    |       |   | nex2  |    nex2 - number of extents after idx + count
32850293ce3aSMandy Kirkconnell  *    |-------|   |-------|
32860293ce3aSMandy Kirkconnell  */
32870293ce3aSMandy Kirkconnell void
32880293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(
32890293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
32900293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing extents */
32910293ce3aSMandy Kirkconnell 	int		count)		/* number of extents to remove */
32920293ce3aSMandy Kirkconnell {
32930293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
32940293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
32950293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;	/* extents left to remove */
32960293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
32970293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex1;		/* number of extents before idx */
32980293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;		/* extents after idx + count */
32990293ce3aSMandy Kirkconnell 	int		page_idx = idx;	/* index in target extent list */
33000293ce3aSMandy Kirkconnell 
33010293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
33020293ce3aSMandy Kirkconnell 	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
33030293ce3aSMandy Kirkconnell 	ASSERT(erp != NULL);
33040293ce3aSMandy Kirkconnell 	nex1 = page_idx;
33050293ce3aSMandy Kirkconnell 	ext_cnt = count;
33060293ce3aSMandy Kirkconnell 	while (ext_cnt) {
33070293ce3aSMandy Kirkconnell 		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
33080293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
33090293ce3aSMandy Kirkconnell 		/*
33100293ce3aSMandy Kirkconnell 		 * Check for deletion of entire list;
33110293ce3aSMandy Kirkconnell 		 * xfs_iext_irec_remove() updates extent offsets.
33120293ce3aSMandy Kirkconnell 		 */
33130293ce3aSMandy Kirkconnell 		if (ext_diff == erp->er_extcount) {
33140293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
33150293ce3aSMandy Kirkconnell 			ext_cnt -= ext_diff;
33160293ce3aSMandy Kirkconnell 			nex1 = 0;
33170293ce3aSMandy Kirkconnell 			if (ext_cnt) {
33180293ce3aSMandy Kirkconnell 				ASSERT(erp_idx < ifp->if_real_bytes /
33190293ce3aSMandy Kirkconnell 					XFS_IEXT_BUFSZ);
33200293ce3aSMandy Kirkconnell 				erp = &ifp->if_u1.if_ext_irec[erp_idx];
33210293ce3aSMandy Kirkconnell 				nex1 = 0;
33220293ce3aSMandy Kirkconnell 				continue;
33230293ce3aSMandy Kirkconnell 			} else {
33240293ce3aSMandy Kirkconnell 				break;
33250293ce3aSMandy Kirkconnell 			}
33260293ce3aSMandy Kirkconnell 		}
33270293ce3aSMandy Kirkconnell 		/* Move extents up (if needed) */
33280293ce3aSMandy Kirkconnell 		if (nex2) {
33290293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex1],
33300293ce3aSMandy Kirkconnell 				&erp->er_extbuf[nex1 + ext_diff],
33310293ce3aSMandy Kirkconnell 				nex2 * sizeof(xfs_bmbt_rec_t));
33320293ce3aSMandy Kirkconnell 		}
33330293ce3aSMandy Kirkconnell 		/* Zero out rest of page */
33340293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
33350293ce3aSMandy Kirkconnell 			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
33360293ce3aSMandy Kirkconnell 		/* Update remaining counters */
33370293ce3aSMandy Kirkconnell 		erp->er_extcount -= ext_diff;
33380293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
33390293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
33400293ce3aSMandy Kirkconnell 		nex1 = 0;
33410293ce3aSMandy Kirkconnell 		erp_idx++;
33420293ce3aSMandy Kirkconnell 		erp++;
33430293ce3aSMandy Kirkconnell 	}
33440293ce3aSMandy Kirkconnell 	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
33450293ce3aSMandy Kirkconnell 	xfs_iext_irec_compact(ifp);
33460293ce3aSMandy Kirkconnell }
33470293ce3aSMandy Kirkconnell 
33480293ce3aSMandy Kirkconnell /*
33494eea22f0SMandy Kirkconnell  * Create, destroy, or resize a linear (direct) block of extents.
33504eea22f0SMandy Kirkconnell  */
33514eea22f0SMandy Kirkconnell void
33524eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(
33534eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33544eea22f0SMandy Kirkconnell 	int		new_size)	/* new size of extents */
33554eea22f0SMandy Kirkconnell {
33564eea22f0SMandy Kirkconnell 	int		rnew_size;	/* real new size of extents */
33574eea22f0SMandy Kirkconnell 
33584eea22f0SMandy Kirkconnell 	rnew_size = new_size;
33594eea22f0SMandy Kirkconnell 
33600293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
33610293ce3aSMandy Kirkconnell 		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
33620293ce3aSMandy Kirkconnell 		 (new_size != ifp->if_real_bytes)));
33630293ce3aSMandy Kirkconnell 
33644eea22f0SMandy Kirkconnell 	/* Free extent records */
33654eea22f0SMandy Kirkconnell 	if (new_size == 0) {
33664eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
33674eea22f0SMandy Kirkconnell 	}
33684eea22f0SMandy Kirkconnell 	/* Resize direct extent list and zero any new bytes */
33694eea22f0SMandy Kirkconnell 	else if (ifp->if_real_bytes) {
33704eea22f0SMandy Kirkconnell 		/* Check if extents will fit inside the inode */
33714eea22f0SMandy Kirkconnell 		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
33724eea22f0SMandy Kirkconnell 			xfs_iext_direct_to_inline(ifp, new_size /
33734eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t));
33744eea22f0SMandy Kirkconnell 			ifp->if_bytes = new_size;
33754eea22f0SMandy Kirkconnell 			return;
33764eea22f0SMandy Kirkconnell 		}
337716a087d8SVignesh Babu 		if (!is_power_of_2(new_size)){
337840ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
33794eea22f0SMandy Kirkconnell 		}
33804eea22f0SMandy Kirkconnell 		if (rnew_size != ifp->if_real_bytes) {
3381a6f64d4aSChristoph Hellwig 			ifp->if_u1.if_extents =
33824eea22f0SMandy Kirkconnell 				kmem_realloc(ifp->if_u1.if_extents,
33834eea22f0SMandy Kirkconnell 						rnew_size,
33846785073bSDavid Chinner 						ifp->if_real_bytes, KM_NOFS);
33854eea22f0SMandy Kirkconnell 		}
33864eea22f0SMandy Kirkconnell 		if (rnew_size > ifp->if_real_bytes) {
33874eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
33884eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t)], 0,
33894eea22f0SMandy Kirkconnell 				rnew_size - ifp->if_real_bytes);
33904eea22f0SMandy Kirkconnell 		}
33914eea22f0SMandy Kirkconnell 	}
33924eea22f0SMandy Kirkconnell 	/*
33934eea22f0SMandy Kirkconnell 	 * Switch from the inline extent buffer to a direct
33944eea22f0SMandy Kirkconnell 	 * extent list. Be sure to include the inline extent
33954eea22f0SMandy Kirkconnell 	 * bytes in new_size.
33964eea22f0SMandy Kirkconnell 	 */
33974eea22f0SMandy Kirkconnell 	else {
33984eea22f0SMandy Kirkconnell 		new_size += ifp->if_bytes;
339916a087d8SVignesh Babu 		if (!is_power_of_2(new_size)) {
340040ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
34014eea22f0SMandy Kirkconnell 		}
34024eea22f0SMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, rnew_size);
34034eea22f0SMandy Kirkconnell 	}
34044eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = rnew_size;
34054eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
34064eea22f0SMandy Kirkconnell }
34074eea22f0SMandy Kirkconnell 
34084eea22f0SMandy Kirkconnell /*
34094eea22f0SMandy Kirkconnell  * Switch from linear (direct) extent records to inline buffer.
34104eea22f0SMandy Kirkconnell  */
34114eea22f0SMandy Kirkconnell void
34124eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(
34134eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34144eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents)	/* number of extents in file */
34154eea22f0SMandy Kirkconnell {
34164eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
34174eea22f0SMandy Kirkconnell 	ASSERT(nextents <= XFS_INLINE_EXTS);
34184eea22f0SMandy Kirkconnell 	/*
34194eea22f0SMandy Kirkconnell 	 * The inline buffer was zeroed when we switched
34204eea22f0SMandy Kirkconnell 	 * from inline to direct extent allocation mode,
34214eea22f0SMandy Kirkconnell 	 * so we don't need to clear it here.
34224eea22f0SMandy Kirkconnell 	 */
34234eea22f0SMandy Kirkconnell 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
34244eea22f0SMandy Kirkconnell 		nextents * sizeof(xfs_bmbt_rec_t));
3425f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_extents);
34264eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
34274eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
34284eea22f0SMandy Kirkconnell }
34294eea22f0SMandy Kirkconnell 
34304eea22f0SMandy Kirkconnell /*
34314eea22f0SMandy Kirkconnell  * Switch from inline buffer to linear (direct) extent records.
34324eea22f0SMandy Kirkconnell  * new_size should already be rounded up to the next power of 2
34334eea22f0SMandy Kirkconnell  * by the caller (when appropriate), so use new_size as it is.
34344eea22f0SMandy Kirkconnell  * However, since new_size may be rounded up, we can't update
34354eea22f0SMandy Kirkconnell  * if_bytes here. It is the caller's responsibility to update
34364eea22f0SMandy Kirkconnell  * if_bytes upon return.
34374eea22f0SMandy Kirkconnell  */
34384eea22f0SMandy Kirkconnell void
34394eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(
34404eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34414eea22f0SMandy Kirkconnell 	int		new_size)	/* number of extents in file */
34424eea22f0SMandy Kirkconnell {
34436785073bSDavid Chinner 	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
34444eea22f0SMandy Kirkconnell 	memset(ifp->if_u1.if_extents, 0, new_size);
34454eea22f0SMandy Kirkconnell 	if (ifp->if_bytes) {
34464eea22f0SMandy Kirkconnell 		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
34474eea22f0SMandy Kirkconnell 			ifp->if_bytes);
34484eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
34494eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
34504eea22f0SMandy Kirkconnell 	}
34514eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = new_size;
34524eea22f0SMandy Kirkconnell }
34534eea22f0SMandy Kirkconnell 
34544eea22f0SMandy Kirkconnell /*
34550293ce3aSMandy Kirkconnell  * Resize an extent indirection array to new_size bytes.
34560293ce3aSMandy Kirkconnell  */
3457d96f8f89SEric Sandeen STATIC void
34580293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(
34590293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34600293ce3aSMandy Kirkconnell 	int		new_size)	/* new indirection array size */
34610293ce3aSMandy Kirkconnell {
34620293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
34630293ce3aSMandy Kirkconnell 	int		size;		/* current indirection array size */
34640293ce3aSMandy Kirkconnell 
34650293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34660293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
34670293ce3aSMandy Kirkconnell 	size = nlists * sizeof(xfs_ext_irec_t);
34680293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes);
34690293ce3aSMandy Kirkconnell 	ASSERT((new_size >= 0) && (new_size != size));
34700293ce3aSMandy Kirkconnell 	if (new_size == 0) {
34710293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
34720293ce3aSMandy Kirkconnell 	} else {
34730293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
34740293ce3aSMandy Kirkconnell 			kmem_realloc(ifp->if_u1.if_ext_irec,
34756785073bSDavid Chinner 				new_size, size, KM_NOFS);
34760293ce3aSMandy Kirkconnell 	}
34770293ce3aSMandy Kirkconnell }
34780293ce3aSMandy Kirkconnell 
34790293ce3aSMandy Kirkconnell /*
34800293ce3aSMandy Kirkconnell  * Switch from indirection array to linear (direct) extent allocations.
34810293ce3aSMandy Kirkconnell  */
3482d96f8f89SEric Sandeen STATIC void
34830293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(
34840293ce3aSMandy Kirkconnell 	 xfs_ifork_t	*ifp)		/* inode fork pointer */
34850293ce3aSMandy Kirkconnell {
3486a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
34870293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
34880293ce3aSMandy Kirkconnell 	int		size;		/* size of file extents */
34890293ce3aSMandy Kirkconnell 
34900293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34910293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
34920293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
34930293ce3aSMandy Kirkconnell 	size = nextents * sizeof(xfs_bmbt_rec_t);
34940293ce3aSMandy Kirkconnell 
349571a8c87fSLachlan McIlroy 	xfs_iext_irec_compact_pages(ifp);
34960293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
34970293ce3aSMandy Kirkconnell 
34980293ce3aSMandy Kirkconnell 	ep = ifp->if_u1.if_ext_irec->er_extbuf;
3499f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_ext_irec);
35000293ce3aSMandy Kirkconnell 	ifp->if_flags &= ~XFS_IFEXTIREC;
35010293ce3aSMandy Kirkconnell 	ifp->if_u1.if_extents = ep;
35020293ce3aSMandy Kirkconnell 	ifp->if_bytes = size;
35030293ce3aSMandy Kirkconnell 	if (nextents < XFS_LINEAR_EXTS) {
35040293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, size);
35050293ce3aSMandy Kirkconnell 	}
35060293ce3aSMandy Kirkconnell }
35070293ce3aSMandy Kirkconnell 
35080293ce3aSMandy Kirkconnell /*
35094eea22f0SMandy Kirkconnell  * Free incore file extents.
35104eea22f0SMandy Kirkconnell  */
35114eea22f0SMandy Kirkconnell void
35124eea22f0SMandy Kirkconnell xfs_iext_destroy(
35134eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
35144eea22f0SMandy Kirkconnell {
35150293ce3aSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
35160293ce3aSMandy Kirkconnell 		int	erp_idx;
35170293ce3aSMandy Kirkconnell 		int	nlists;
35180293ce3aSMandy Kirkconnell 
35190293ce3aSMandy Kirkconnell 		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
35200293ce3aSMandy Kirkconnell 		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
35210293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
35220293ce3aSMandy Kirkconnell 		}
35230293ce3aSMandy Kirkconnell 		ifp->if_flags &= ~XFS_IFEXTIREC;
35240293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
3525f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_extents);
35264eea22f0SMandy Kirkconnell 	} else if (ifp->if_bytes) {
35274eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
35284eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
35294eea22f0SMandy Kirkconnell 	}
35304eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = NULL;
35314eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
35324eea22f0SMandy Kirkconnell 	ifp->if_bytes = 0;
35334eea22f0SMandy Kirkconnell }
35340293ce3aSMandy Kirkconnell 
35350293ce3aSMandy Kirkconnell /*
35368867bc9bSMandy Kirkconnell  * Return a pointer to the extent record for file system block bno.
35378867bc9bSMandy Kirkconnell  */
3538a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *			/* pointer to found extent record */
35398867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext(
35408867bc9bSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35418867bc9bSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
35428867bc9bSMandy Kirkconnell 	xfs_extnum_t	*idxp)		/* index of target extent */
35438867bc9bSMandy Kirkconnell {
3544a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
35458867bc9bSMandy Kirkconnell 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
3546a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
35478867bc9bSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
3548c41564b5SNathan Scott 	int		high;		/* upper boundary in search */
35498867bc9bSMandy Kirkconnell 	xfs_extnum_t	idx = 0;	/* index of target extent */
3550c41564b5SNathan Scott 	int		low;		/* lower boundary in search */
35518867bc9bSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of file extents */
35528867bc9bSMandy Kirkconnell 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
35538867bc9bSMandy Kirkconnell 
35548867bc9bSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
35558867bc9bSMandy Kirkconnell 	if (nextents == 0) {
35568867bc9bSMandy Kirkconnell 		*idxp = 0;
35578867bc9bSMandy Kirkconnell 		return NULL;
35588867bc9bSMandy Kirkconnell 	}
35598867bc9bSMandy Kirkconnell 	low = 0;
35608867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
35618867bc9bSMandy Kirkconnell 		/* Find target extent list */
35628867bc9bSMandy Kirkconnell 		int	erp_idx = 0;
35638867bc9bSMandy Kirkconnell 		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
35648867bc9bSMandy Kirkconnell 		base = erp->er_extbuf;
35658867bc9bSMandy Kirkconnell 		high = erp->er_extcount - 1;
35668867bc9bSMandy Kirkconnell 	} else {
35678867bc9bSMandy Kirkconnell 		base = ifp->if_u1.if_extents;
35688867bc9bSMandy Kirkconnell 		high = nextents - 1;
35698867bc9bSMandy Kirkconnell 	}
35708867bc9bSMandy Kirkconnell 	/* Binary search extent records */
35718867bc9bSMandy Kirkconnell 	while (low <= high) {
35728867bc9bSMandy Kirkconnell 		idx = (low + high) >> 1;
35738867bc9bSMandy Kirkconnell 		ep = base + idx;
35748867bc9bSMandy Kirkconnell 		startoff = xfs_bmbt_get_startoff(ep);
35758867bc9bSMandy Kirkconnell 		blockcount = xfs_bmbt_get_blockcount(ep);
35768867bc9bSMandy Kirkconnell 		if (bno < startoff) {
35778867bc9bSMandy Kirkconnell 			high = idx - 1;
35788867bc9bSMandy Kirkconnell 		} else if (bno >= startoff + blockcount) {
35798867bc9bSMandy Kirkconnell 			low = idx + 1;
35808867bc9bSMandy Kirkconnell 		} else {
35818867bc9bSMandy Kirkconnell 			/* Convert back to file-based extent index */
35828867bc9bSMandy Kirkconnell 			if (ifp->if_flags & XFS_IFEXTIREC) {
35838867bc9bSMandy Kirkconnell 				idx += erp->er_extoff;
35848867bc9bSMandy Kirkconnell 			}
35858867bc9bSMandy Kirkconnell 			*idxp = idx;
35868867bc9bSMandy Kirkconnell 			return ep;
35878867bc9bSMandy Kirkconnell 		}
35888867bc9bSMandy Kirkconnell 	}
35898867bc9bSMandy Kirkconnell 	/* Convert back to file-based extent index */
35908867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
35918867bc9bSMandy Kirkconnell 		idx += erp->er_extoff;
35928867bc9bSMandy Kirkconnell 	}
35938867bc9bSMandy Kirkconnell 	if (bno >= startoff + blockcount) {
35948867bc9bSMandy Kirkconnell 		if (++idx == nextents) {
35958867bc9bSMandy Kirkconnell 			ep = NULL;
35968867bc9bSMandy Kirkconnell 		} else {
35978867bc9bSMandy Kirkconnell 			ep = xfs_iext_get_ext(ifp, idx);
35988867bc9bSMandy Kirkconnell 		}
35998867bc9bSMandy Kirkconnell 	}
36008867bc9bSMandy Kirkconnell 	*idxp = idx;
36018867bc9bSMandy Kirkconnell 	return ep;
36028867bc9bSMandy Kirkconnell }
36038867bc9bSMandy Kirkconnell 
36048867bc9bSMandy Kirkconnell /*
36050293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
36060293ce3aSMandy Kirkconnell  * extent record for filesystem block bno. Store the index of the
36070293ce3aSMandy Kirkconnell  * target irec in *erp_idxp.
36080293ce3aSMandy Kirkconnell  */
36098867bc9bSMandy Kirkconnell xfs_ext_irec_t *			/* pointer to found extent record */
36100293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec(
36110293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36120293ce3aSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
36130293ce3aSMandy Kirkconnell 	int		*erp_idxp)	/* irec index of target ext list */
36140293ce3aSMandy Kirkconnell {
36150293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
36160293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
36178867bc9bSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
36180293ce3aSMandy Kirkconnell 	int		nlists;		/* number of extent irec's (lists) */
36190293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
36200293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
36210293ce3aSMandy Kirkconnell 
36220293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
36230293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36240293ce3aSMandy Kirkconnell 	erp_idx = 0;
36250293ce3aSMandy Kirkconnell 	low = 0;
36260293ce3aSMandy Kirkconnell 	high = nlists - 1;
36270293ce3aSMandy Kirkconnell 	while (low <= high) {
36280293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
36290293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
36300293ce3aSMandy Kirkconnell 		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
36310293ce3aSMandy Kirkconnell 		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
36320293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
36330293ce3aSMandy Kirkconnell 		} else if (erp_next && bno >=
36340293ce3aSMandy Kirkconnell 			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
36350293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
36360293ce3aSMandy Kirkconnell 		} else {
36370293ce3aSMandy Kirkconnell 			break;
36380293ce3aSMandy Kirkconnell 		}
36390293ce3aSMandy Kirkconnell 	}
36400293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
36410293ce3aSMandy Kirkconnell 	return erp;
36420293ce3aSMandy Kirkconnell }
36430293ce3aSMandy Kirkconnell 
36440293ce3aSMandy Kirkconnell /*
36450293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
36460293ce3aSMandy Kirkconnell  * extent record at file extent index *idxp. Store the index of the
36470293ce3aSMandy Kirkconnell  * target irec in *erp_idxp and store the page index of the target
36480293ce3aSMandy Kirkconnell  * extent record in *idxp.
36490293ce3aSMandy Kirkconnell  */
36500293ce3aSMandy Kirkconnell xfs_ext_irec_t *
36510293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec(
36520293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36530293ce3aSMandy Kirkconnell 	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
36540293ce3aSMandy Kirkconnell 	int		*erp_idxp,	/* pointer to target irec */
36550293ce3aSMandy Kirkconnell 	int		realloc)	/* new bytes were just added */
36560293ce3aSMandy Kirkconnell {
36570293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
36580293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
36590293ce3aSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
36600293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
36610293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
36620293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
36630293ce3aSMandy Kirkconnell 	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
36640293ce3aSMandy Kirkconnell 
36650293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
366687bef181SChristoph Hellwig 	ASSERT(page_idx >= 0);
366787bef181SChristoph Hellwig 	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
366887bef181SChristoph Hellwig 	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
366987bef181SChristoph Hellwig 
36700293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36710293ce3aSMandy Kirkconnell 	erp_idx = 0;
36720293ce3aSMandy Kirkconnell 	low = 0;
36730293ce3aSMandy Kirkconnell 	high = nlists - 1;
36740293ce3aSMandy Kirkconnell 
36750293ce3aSMandy Kirkconnell 	/* Binary search extent irec's */
36760293ce3aSMandy Kirkconnell 	while (low <= high) {
36770293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
36780293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
36790293ce3aSMandy Kirkconnell 		prev = erp_idx > 0 ? erp - 1 : NULL;
36800293ce3aSMandy Kirkconnell 		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
36810293ce3aSMandy Kirkconnell 		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
36820293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
36830293ce3aSMandy Kirkconnell 		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
36840293ce3aSMandy Kirkconnell 			   (page_idx == erp->er_extoff + erp->er_extcount &&
36850293ce3aSMandy Kirkconnell 			    !realloc)) {
36860293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
36870293ce3aSMandy Kirkconnell 		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
36880293ce3aSMandy Kirkconnell 			   erp->er_extcount == XFS_LINEAR_EXTS) {
36890293ce3aSMandy Kirkconnell 			ASSERT(realloc);
36900293ce3aSMandy Kirkconnell 			page_idx = 0;
36910293ce3aSMandy Kirkconnell 			erp_idx++;
36920293ce3aSMandy Kirkconnell 			erp = erp_idx < nlists ? erp + 1 : NULL;
36930293ce3aSMandy Kirkconnell 			break;
36940293ce3aSMandy Kirkconnell 		} else {
36950293ce3aSMandy Kirkconnell 			page_idx -= erp->er_extoff;
36960293ce3aSMandy Kirkconnell 			break;
36970293ce3aSMandy Kirkconnell 		}
36980293ce3aSMandy Kirkconnell 	}
36990293ce3aSMandy Kirkconnell 	*idxp = page_idx;
37000293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
37010293ce3aSMandy Kirkconnell 	return(erp);
37020293ce3aSMandy Kirkconnell }
37030293ce3aSMandy Kirkconnell 
37040293ce3aSMandy Kirkconnell /*
37050293ce3aSMandy Kirkconnell  * Allocate and initialize an indirection array once the space needed
37060293ce3aSMandy Kirkconnell  * for incore extents increases above XFS_IEXT_BUFSZ.
37070293ce3aSMandy Kirkconnell  */
37080293ce3aSMandy Kirkconnell void
37090293ce3aSMandy Kirkconnell xfs_iext_irec_init(
37100293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
37110293ce3aSMandy Kirkconnell {
37120293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
37130293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
37140293ce3aSMandy Kirkconnell 
37150293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
37160293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
37170293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
37180293ce3aSMandy Kirkconnell 
37196785073bSDavid Chinner 	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
37200293ce3aSMandy Kirkconnell 
37210293ce3aSMandy Kirkconnell 	if (nextents == 0) {
37226785073bSDavid Chinner 		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
37230293ce3aSMandy Kirkconnell 	} else if (!ifp->if_real_bytes) {
37240293ce3aSMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
37250293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
37260293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
37270293ce3aSMandy Kirkconnell 	}
37280293ce3aSMandy Kirkconnell 	erp->er_extbuf = ifp->if_u1.if_extents;
37290293ce3aSMandy Kirkconnell 	erp->er_extcount = nextents;
37300293ce3aSMandy Kirkconnell 	erp->er_extoff = 0;
37310293ce3aSMandy Kirkconnell 
37320293ce3aSMandy Kirkconnell 	ifp->if_flags |= XFS_IFEXTIREC;
37330293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
37340293ce3aSMandy Kirkconnell 	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
37350293ce3aSMandy Kirkconnell 	ifp->if_u1.if_ext_irec = erp;
37360293ce3aSMandy Kirkconnell 
37370293ce3aSMandy Kirkconnell 	return;
37380293ce3aSMandy Kirkconnell }
37390293ce3aSMandy Kirkconnell 
37400293ce3aSMandy Kirkconnell /*
37410293ce3aSMandy Kirkconnell  * Allocate and initialize a new entry in the indirection array.
37420293ce3aSMandy Kirkconnell  */
37430293ce3aSMandy Kirkconnell xfs_ext_irec_t *
37440293ce3aSMandy Kirkconnell xfs_iext_irec_new(
37450293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37460293ce3aSMandy Kirkconnell 	int		erp_idx)	/* index for new irec */
37470293ce3aSMandy Kirkconnell {
37480293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
37490293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
37500293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37510293ce3aSMandy Kirkconnell 
37520293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37530293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37540293ce3aSMandy Kirkconnell 
37550293ce3aSMandy Kirkconnell 	/* Resize indirection array */
37560293ce3aSMandy Kirkconnell 	xfs_iext_realloc_indirect(ifp, ++nlists *
37570293ce3aSMandy Kirkconnell 				  sizeof(xfs_ext_irec_t));
37580293ce3aSMandy Kirkconnell 	/*
37590293ce3aSMandy Kirkconnell 	 * Move records down in the array so the
37600293ce3aSMandy Kirkconnell 	 * new page can use erp_idx.
37610293ce3aSMandy Kirkconnell 	 */
37620293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
37630293ce3aSMandy Kirkconnell 	for (i = nlists - 1; i > erp_idx; i--) {
37640293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
37650293ce3aSMandy Kirkconnell 	}
37660293ce3aSMandy Kirkconnell 	ASSERT(i == erp_idx);
37670293ce3aSMandy Kirkconnell 
37680293ce3aSMandy Kirkconnell 	/* Initialize new extent record */
37690293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
37706785073bSDavid Chinner 	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
37710293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
37720293ce3aSMandy Kirkconnell 	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
37730293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extcount = 0;
37740293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extoff = erp_idx > 0 ?
37750293ce3aSMandy Kirkconnell 		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
37760293ce3aSMandy Kirkconnell 	return (&erp[erp_idx]);
37770293ce3aSMandy Kirkconnell }
37780293ce3aSMandy Kirkconnell 
37790293ce3aSMandy Kirkconnell /*
37800293ce3aSMandy Kirkconnell  * Remove a record from the indirection array.
37810293ce3aSMandy Kirkconnell  */
37820293ce3aSMandy Kirkconnell void
37830293ce3aSMandy Kirkconnell xfs_iext_irec_remove(
37840293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37850293ce3aSMandy Kirkconnell 	int		erp_idx)	/* irec index to remove */
37860293ce3aSMandy Kirkconnell {
37870293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
37880293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
37890293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37900293ce3aSMandy Kirkconnell 
37910293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37920293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37930293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
37940293ce3aSMandy Kirkconnell 	if (erp->er_extbuf) {
37950293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
37960293ce3aSMandy Kirkconnell 			-erp->er_extcount);
3797f0e2d93cSDenys Vlasenko 		kmem_free(erp->er_extbuf);
37980293ce3aSMandy Kirkconnell 	}
37990293ce3aSMandy Kirkconnell 	/* Compact extent records */
38000293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
38010293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists - 1; i++) {
38020293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
38030293ce3aSMandy Kirkconnell 	}
38040293ce3aSMandy Kirkconnell 	/*
38050293ce3aSMandy Kirkconnell 	 * Manually free the last extent record from the indirection
38060293ce3aSMandy Kirkconnell 	 * array.  A call to xfs_iext_realloc_indirect() with a size
38070293ce3aSMandy Kirkconnell 	 * of zero would result in a call to xfs_iext_destroy() which
38080293ce3aSMandy Kirkconnell 	 * would in turn call this function again, creating a nasty
38090293ce3aSMandy Kirkconnell 	 * infinite loop.
38100293ce3aSMandy Kirkconnell 	 */
38110293ce3aSMandy Kirkconnell 	if (--nlists) {
38120293ce3aSMandy Kirkconnell 		xfs_iext_realloc_indirect(ifp,
38130293ce3aSMandy Kirkconnell 			nlists * sizeof(xfs_ext_irec_t));
38140293ce3aSMandy Kirkconnell 	} else {
3815f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_ext_irec);
38160293ce3aSMandy Kirkconnell 	}
38170293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
38180293ce3aSMandy Kirkconnell }
38190293ce3aSMandy Kirkconnell 
38200293ce3aSMandy Kirkconnell /*
38210293ce3aSMandy Kirkconnell  * This is called to clean up large amounts of unused memory allocated
38220293ce3aSMandy Kirkconnell  * by the indirection array.  Before compacting anything though, verify
38230293ce3aSMandy Kirkconnell  * that the indirection array is still needed and switch back to the
38240293ce3aSMandy Kirkconnell  * linear extent list (or even the inline buffer) if possible.  The
38250293ce3aSMandy Kirkconnell  * compaction policy is as follows:
38260293ce3aSMandy Kirkconnell  *
38270293ce3aSMandy Kirkconnell  *    Full Compaction: Extents fit into a single page (or inline buffer)
382871a8c87fSLachlan McIlroy  * Partial Compaction: Extents occupy less than 50% of allocated space
38290293ce3aSMandy Kirkconnell  *      No Compaction: Extents occupy at least 50% of allocated space
38300293ce3aSMandy Kirkconnell  */
38310293ce3aSMandy Kirkconnell void
38320293ce3aSMandy Kirkconnell xfs_iext_irec_compact(
38330293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
38340293ce3aSMandy Kirkconnell {
38350293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
38360293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
38370293ce3aSMandy Kirkconnell 
38380293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38390293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38400293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
38410293ce3aSMandy Kirkconnell 
38420293ce3aSMandy Kirkconnell 	if (nextents == 0) {
38430293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
38440293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_INLINE_EXTS) {
38450293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
38460293ce3aSMandy Kirkconnell 		xfs_iext_direct_to_inline(ifp, nextents);
38470293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_LINEAR_EXTS) {
38480293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
38490293ce3aSMandy Kirkconnell 	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
38500293ce3aSMandy Kirkconnell 		xfs_iext_irec_compact_pages(ifp);
38510293ce3aSMandy Kirkconnell 	}
38520293ce3aSMandy Kirkconnell }
38530293ce3aSMandy Kirkconnell 
38540293ce3aSMandy Kirkconnell /*
38550293ce3aSMandy Kirkconnell  * Combine extents from neighboring extent pages.
38560293ce3aSMandy Kirkconnell  */
38570293ce3aSMandy Kirkconnell void
38580293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(
38590293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
38600293ce3aSMandy Kirkconnell {
38610293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
38620293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
38630293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
38640293ce3aSMandy Kirkconnell 
38650293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38660293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38670293ce3aSMandy Kirkconnell 	while (erp_idx < nlists - 1) {
38680293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
38690293ce3aSMandy Kirkconnell 		erp_next = erp + 1;
38700293ce3aSMandy Kirkconnell 		if (erp_next->er_extcount <=
38710293ce3aSMandy Kirkconnell 		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
387271a8c87fSLachlan McIlroy 			memcpy(&erp->er_extbuf[erp->er_extcount],
38730293ce3aSMandy Kirkconnell 				erp_next->er_extbuf, erp_next->er_extcount *
38740293ce3aSMandy Kirkconnell 				sizeof(xfs_bmbt_rec_t));
38750293ce3aSMandy Kirkconnell 			erp->er_extcount += erp_next->er_extcount;
38760293ce3aSMandy Kirkconnell 			/*
38770293ce3aSMandy Kirkconnell 			 * Free page before removing extent record
38780293ce3aSMandy Kirkconnell 			 * so er_extoffs don't get modified in
38790293ce3aSMandy Kirkconnell 			 * xfs_iext_irec_remove.
38800293ce3aSMandy Kirkconnell 			 */
3881f0e2d93cSDenys Vlasenko 			kmem_free(erp_next->er_extbuf);
38820293ce3aSMandy Kirkconnell 			erp_next->er_extbuf = NULL;
38830293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx + 1);
38840293ce3aSMandy Kirkconnell 			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38850293ce3aSMandy Kirkconnell 		} else {
38860293ce3aSMandy Kirkconnell 			erp_idx++;
38870293ce3aSMandy Kirkconnell 		}
38880293ce3aSMandy Kirkconnell 	}
38890293ce3aSMandy Kirkconnell }
38900293ce3aSMandy Kirkconnell 
38910293ce3aSMandy Kirkconnell /*
38920293ce3aSMandy Kirkconnell  * This is called to update the er_extoff field in the indirection
38930293ce3aSMandy Kirkconnell  * array when extents have been added or removed from one of the
38940293ce3aSMandy Kirkconnell  * extent lists. erp_idx contains the irec index to begin updating
38950293ce3aSMandy Kirkconnell  * at and ext_diff contains the number of extents that were added
38960293ce3aSMandy Kirkconnell  * or removed.
38970293ce3aSMandy Kirkconnell  */
38980293ce3aSMandy Kirkconnell void
38990293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(
39000293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
39010293ce3aSMandy Kirkconnell 	int		erp_idx,	/* irec index to update */
39020293ce3aSMandy Kirkconnell 	int		ext_diff)	/* number of new extents */
39030293ce3aSMandy Kirkconnell {
39040293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
39050293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists */
39060293ce3aSMandy Kirkconnell 
39070293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
39080293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
39090293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists; i++) {
39100293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
39110293ce3aSMandy Kirkconnell 	}
39120293ce3aSMandy Kirkconnell }
3913