xref: /openbmc/linux/fs/xfs/xfs_inode.c (revision 93848a999cf9b9e4f4f77dba843a48c393f33c59)
11da177e4SLinus Torvalds /*
23e57ecf6SOlaf Weber  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
37b718769SNathan Scott  * All Rights Reserved.
41da177e4SLinus Torvalds  *
57b718769SNathan Scott  * This program is free software; you can redistribute it and/or
67b718769SNathan Scott  * modify it under the terms of the GNU General Public License as
71da177e4SLinus Torvalds  * published by the Free Software Foundation.
81da177e4SLinus Torvalds  *
97b718769SNathan Scott  * This program is distributed in the hope that it would be useful,
107b718769SNathan Scott  * but WITHOUT ANY WARRANTY; without even the implied warranty of
117b718769SNathan Scott  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
127b718769SNathan Scott  * GNU General Public License for more details.
131da177e4SLinus Torvalds  *
147b718769SNathan Scott  * You should have received a copy of the GNU General Public License
157b718769SNathan Scott  * along with this program; if not, write the Free Software Foundation,
167b718769SNathan Scott  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
171da177e4SLinus Torvalds  */
1840ebd81dSRobert P. J. Day #include <linux/log2.h>
1940ebd81dSRobert P. J. Day 
201da177e4SLinus Torvalds #include "xfs.h"
21a844f451SNathan Scott #include "xfs_fs.h"
221da177e4SLinus Torvalds #include "xfs_types.h"
231da177e4SLinus Torvalds #include "xfs_log.h"
24a844f451SNathan Scott #include "xfs_inum.h"
251da177e4SLinus Torvalds #include "xfs_trans.h"
261da177e4SLinus Torvalds #include "xfs_trans_priv.h"
271da177e4SLinus Torvalds #include "xfs_sb.h"
281da177e4SLinus Torvalds #include "xfs_ag.h"
291da177e4SLinus Torvalds #include "xfs_mount.h"
301da177e4SLinus Torvalds #include "xfs_bmap_btree.h"
31a844f451SNathan Scott #include "xfs_alloc_btree.h"
321da177e4SLinus Torvalds #include "xfs_ialloc_btree.h"
33a844f451SNathan Scott #include "xfs_attr_sf.h"
341da177e4SLinus Torvalds #include "xfs_dinode.h"
351da177e4SLinus Torvalds #include "xfs_inode.h"
361da177e4SLinus Torvalds #include "xfs_buf_item.h"
37a844f451SNathan Scott #include "xfs_inode_item.h"
38a844f451SNathan Scott #include "xfs_btree.h"
39a844f451SNathan Scott #include "xfs_alloc.h"
40a844f451SNathan Scott #include "xfs_ialloc.h"
41a844f451SNathan Scott #include "xfs_bmap.h"
421da177e4SLinus Torvalds #include "xfs_error.h"
431da177e4SLinus Torvalds #include "xfs_utils.h"
441da177e4SLinus Torvalds #include "xfs_quota.h"
452a82b8beSDavid Chinner #include "xfs_filestream.h"
46739bfb2aSChristoph Hellwig #include "xfs_vnodeops.h"
47*93848a99SChristoph Hellwig #include "xfs_cksum.h"
480b1b213fSChristoph Hellwig #include "xfs_trace.h"
4933479e05SDave Chinner #include "xfs_icache.h"
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds kmem_zone_t *xfs_ifork_zone;
521da177e4SLinus Torvalds kmem_zone_t *xfs_inode_zone;
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds /*
558f04c47aSChristoph Hellwig  * Used in xfs_itruncate_extents().  This is the maximum number of extents
561da177e4SLinus Torvalds  * freed from a file in a single transaction.
571da177e4SLinus Torvalds  */
581da177e4SLinus Torvalds #define	XFS_ITRUNC_MAX_EXTENTS	2
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
611da177e4SLinus Torvalds STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
621da177e4SLinus Torvalds STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
631da177e4SLinus Torvalds STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
641da177e4SLinus Torvalds 
652a0ec1d9SDave Chinner /*
662a0ec1d9SDave Chinner  * helper function to extract extent size hint from inode
672a0ec1d9SDave Chinner  */
682a0ec1d9SDave Chinner xfs_extlen_t
692a0ec1d9SDave Chinner xfs_get_extsz_hint(
702a0ec1d9SDave Chinner 	struct xfs_inode	*ip)
712a0ec1d9SDave Chinner {
722a0ec1d9SDave Chinner 	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
732a0ec1d9SDave Chinner 		return ip->i_d.di_extsize;
742a0ec1d9SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip))
752a0ec1d9SDave Chinner 		return ip->i_mount->m_sb.sb_rextsize;
762a0ec1d9SDave Chinner 	return 0;
772a0ec1d9SDave Chinner }
782a0ec1d9SDave Chinner 
79fa96acadSDave Chinner /*
80fa96acadSDave Chinner  * This is a wrapper routine around the xfs_ilock() routine used to centralize
81fa96acadSDave Chinner  * some grungy code.  It is used in places that wish to lock the inode solely
82fa96acadSDave Chinner  * for reading the extents.  The reason these places can't just call
83fa96acadSDave Chinner  * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the
84fa96acadSDave Chinner  * extents from disk for a file in b-tree format.  If the inode is in b-tree
85fa96acadSDave Chinner  * format, then we need to lock the inode exclusively until the extents are read
86fa96acadSDave Chinner  * in.  Locking it exclusively all the time would limit our parallelism
87fa96acadSDave Chinner  * unnecessarily, though.  What we do instead is check to see if the extents
88fa96acadSDave Chinner  * have been read in yet, and only lock the inode exclusively if they have not.
89fa96acadSDave Chinner  *
90fa96acadSDave Chinner  * The function returns a value which should be given to the corresponding
91fa96acadSDave Chinner  * xfs_iunlock_map_shared().  This value is the mode in which the lock was
92fa96acadSDave Chinner  * actually taken.
93fa96acadSDave Chinner  */
94fa96acadSDave Chinner uint
95fa96acadSDave Chinner xfs_ilock_map_shared(
96fa96acadSDave Chinner 	xfs_inode_t	*ip)
97fa96acadSDave Chinner {
98fa96acadSDave Chinner 	uint	lock_mode;
99fa96acadSDave Chinner 
100fa96acadSDave Chinner 	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
101fa96acadSDave Chinner 	    ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
102fa96acadSDave Chinner 		lock_mode = XFS_ILOCK_EXCL;
103fa96acadSDave Chinner 	} else {
104fa96acadSDave Chinner 		lock_mode = XFS_ILOCK_SHARED;
105fa96acadSDave Chinner 	}
106fa96acadSDave Chinner 
107fa96acadSDave Chinner 	xfs_ilock(ip, lock_mode);
108fa96acadSDave Chinner 
109fa96acadSDave Chinner 	return lock_mode;
110fa96acadSDave Chinner }
111fa96acadSDave Chinner 
112fa96acadSDave Chinner /*
113fa96acadSDave Chinner  * This is simply the unlock routine to go with xfs_ilock_map_shared().
114fa96acadSDave Chinner  * All it does is call xfs_iunlock() with the given lock_mode.
115fa96acadSDave Chinner  */
116fa96acadSDave Chinner void
117fa96acadSDave Chinner xfs_iunlock_map_shared(
118fa96acadSDave Chinner 	xfs_inode_t	*ip,
119fa96acadSDave Chinner 	unsigned int	lock_mode)
120fa96acadSDave Chinner {
121fa96acadSDave Chinner 	xfs_iunlock(ip, lock_mode);
122fa96acadSDave Chinner }
123fa96acadSDave Chinner 
124fa96acadSDave Chinner /*
125fa96acadSDave Chinner  * The xfs inode contains 2 locks: a multi-reader lock called the
126fa96acadSDave Chinner  * i_iolock and a multi-reader lock called the i_lock.  This routine
127fa96acadSDave Chinner  * allows either or both of the locks to be obtained.
128fa96acadSDave Chinner  *
129fa96acadSDave Chinner  * The 2 locks should always be ordered so that the IO lock is
130fa96acadSDave Chinner  * obtained first in order to prevent deadlock.
131fa96acadSDave Chinner  *
132fa96acadSDave Chinner  * ip -- the inode being locked
133fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks
134fa96acadSDave Chinner  *       to be locked.  It can be:
135fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED,
136fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL,
137fa96acadSDave Chinner  *		XFS_ILOCK_SHARED,
138fa96acadSDave Chinner  *		XFS_ILOCK_EXCL,
139fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
140fa96acadSDave Chinner  *		XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
141fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
142fa96acadSDave Chinner  *		XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
143fa96acadSDave Chinner  */
144fa96acadSDave Chinner void
145fa96acadSDave Chinner xfs_ilock(
146fa96acadSDave Chinner 	xfs_inode_t		*ip,
147fa96acadSDave Chinner 	uint			lock_flags)
148fa96acadSDave Chinner {
149fa96acadSDave Chinner 	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
150fa96acadSDave Chinner 
151fa96acadSDave Chinner 	/*
152fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
153fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
154fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
155fa96acadSDave Chinner 	 */
156fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
157fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
158fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
159fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
160fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
161fa96acadSDave Chinner 
162fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
163fa96acadSDave Chinner 		mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
164fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
165fa96acadSDave Chinner 		mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
166fa96acadSDave Chinner 
167fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
168fa96acadSDave Chinner 		mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
169fa96acadSDave Chinner 	else if (lock_flags & XFS_ILOCK_SHARED)
170fa96acadSDave Chinner 		mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
171fa96acadSDave Chinner }
172fa96acadSDave Chinner 
173fa96acadSDave Chinner /*
174fa96acadSDave Chinner  * This is just like xfs_ilock(), except that the caller
175fa96acadSDave Chinner  * is guaranteed not to sleep.  It returns 1 if it gets
176fa96acadSDave Chinner  * the requested locks and 0 otherwise.  If the IO lock is
177fa96acadSDave Chinner  * obtained but the inode lock cannot be, then the IO lock
178fa96acadSDave Chinner  * is dropped before returning.
179fa96acadSDave Chinner  *
180fa96acadSDave Chinner  * ip -- the inode being locked
181fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks to be
182fa96acadSDave Chinner  *       to be locked.  See the comment for xfs_ilock() for a list
183fa96acadSDave Chinner  *	 of valid values.
184fa96acadSDave Chinner  */
185fa96acadSDave Chinner int
186fa96acadSDave Chinner xfs_ilock_nowait(
187fa96acadSDave Chinner 	xfs_inode_t		*ip,
188fa96acadSDave Chinner 	uint			lock_flags)
189fa96acadSDave Chinner {
190fa96acadSDave Chinner 	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
191fa96acadSDave Chinner 
192fa96acadSDave Chinner 	/*
193fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
194fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
195fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
196fa96acadSDave Chinner 	 */
197fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
198fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
199fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
200fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
201fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
202fa96acadSDave Chinner 
203fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL) {
204fa96acadSDave Chinner 		if (!mrtryupdate(&ip->i_iolock))
205fa96acadSDave Chinner 			goto out;
206fa96acadSDave Chinner 	} else if (lock_flags & XFS_IOLOCK_SHARED) {
207fa96acadSDave Chinner 		if (!mrtryaccess(&ip->i_iolock))
208fa96acadSDave Chinner 			goto out;
209fa96acadSDave Chinner 	}
210fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL) {
211fa96acadSDave Chinner 		if (!mrtryupdate(&ip->i_lock))
212fa96acadSDave Chinner 			goto out_undo_iolock;
213fa96acadSDave Chinner 	} else if (lock_flags & XFS_ILOCK_SHARED) {
214fa96acadSDave Chinner 		if (!mrtryaccess(&ip->i_lock))
215fa96acadSDave Chinner 			goto out_undo_iolock;
216fa96acadSDave Chinner 	}
217fa96acadSDave Chinner 	return 1;
218fa96acadSDave Chinner 
219fa96acadSDave Chinner  out_undo_iolock:
220fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
221fa96acadSDave Chinner 		mrunlock_excl(&ip->i_iolock);
222fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
223fa96acadSDave Chinner 		mrunlock_shared(&ip->i_iolock);
224fa96acadSDave Chinner  out:
225fa96acadSDave Chinner 	return 0;
226fa96acadSDave Chinner }
227fa96acadSDave Chinner 
228fa96acadSDave Chinner /*
229fa96acadSDave Chinner  * xfs_iunlock() is used to drop the inode locks acquired with
230fa96acadSDave Chinner  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
231fa96acadSDave Chinner  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
232fa96acadSDave Chinner  * that we know which locks to drop.
233fa96acadSDave Chinner  *
234fa96acadSDave Chinner  * ip -- the inode being unlocked
235fa96acadSDave Chinner  * lock_flags -- this parameter indicates the inode's locks to be
236fa96acadSDave Chinner  *       to be unlocked.  See the comment for xfs_ilock() for a list
237fa96acadSDave Chinner  *	 of valid values for this parameter.
238fa96acadSDave Chinner  *
239fa96acadSDave Chinner  */
240fa96acadSDave Chinner void
241fa96acadSDave Chinner xfs_iunlock(
242fa96acadSDave Chinner 	xfs_inode_t		*ip,
243fa96acadSDave Chinner 	uint			lock_flags)
244fa96acadSDave Chinner {
245fa96acadSDave Chinner 	/*
246fa96acadSDave Chinner 	 * You can't set both SHARED and EXCL for the same lock,
247fa96acadSDave Chinner 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
248fa96acadSDave Chinner 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
249fa96acadSDave Chinner 	 */
250fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
251fa96acadSDave Chinner 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
252fa96acadSDave Chinner 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
253fa96acadSDave Chinner 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
254fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
255fa96acadSDave Chinner 	ASSERT(lock_flags != 0);
256fa96acadSDave Chinner 
257fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
258fa96acadSDave Chinner 		mrunlock_excl(&ip->i_iolock);
259fa96acadSDave Chinner 	else if (lock_flags & XFS_IOLOCK_SHARED)
260fa96acadSDave Chinner 		mrunlock_shared(&ip->i_iolock);
261fa96acadSDave Chinner 
262fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
263fa96acadSDave Chinner 		mrunlock_excl(&ip->i_lock);
264fa96acadSDave Chinner 	else if (lock_flags & XFS_ILOCK_SHARED)
265fa96acadSDave Chinner 		mrunlock_shared(&ip->i_lock);
266fa96acadSDave Chinner 
267fa96acadSDave Chinner 	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
268fa96acadSDave Chinner }
269fa96acadSDave Chinner 
270fa96acadSDave Chinner /*
271fa96acadSDave Chinner  * give up write locks.  the i/o lock cannot be held nested
272fa96acadSDave Chinner  * if it is being demoted.
273fa96acadSDave Chinner  */
274fa96acadSDave Chinner void
275fa96acadSDave Chinner xfs_ilock_demote(
276fa96acadSDave Chinner 	xfs_inode_t		*ip,
277fa96acadSDave Chinner 	uint			lock_flags)
278fa96acadSDave Chinner {
279fa96acadSDave Chinner 	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
280fa96acadSDave Chinner 	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
281fa96acadSDave Chinner 
282fa96acadSDave Chinner 	if (lock_flags & XFS_ILOCK_EXCL)
283fa96acadSDave Chinner 		mrdemote(&ip->i_lock);
284fa96acadSDave Chinner 	if (lock_flags & XFS_IOLOCK_EXCL)
285fa96acadSDave Chinner 		mrdemote(&ip->i_iolock);
286fa96acadSDave Chinner 
287fa96acadSDave Chinner 	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
288fa96acadSDave Chinner }
289fa96acadSDave Chinner 
290fa96acadSDave Chinner #ifdef DEBUG
291fa96acadSDave Chinner int
292fa96acadSDave Chinner xfs_isilocked(
293fa96acadSDave Chinner 	xfs_inode_t		*ip,
294fa96acadSDave Chinner 	uint			lock_flags)
295fa96acadSDave Chinner {
296fa96acadSDave Chinner 	if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
297fa96acadSDave Chinner 		if (!(lock_flags & XFS_ILOCK_SHARED))
298fa96acadSDave Chinner 			return !!ip->i_lock.mr_writer;
299fa96acadSDave Chinner 		return rwsem_is_locked(&ip->i_lock.mr_lock);
300fa96acadSDave Chinner 	}
301fa96acadSDave Chinner 
302fa96acadSDave Chinner 	if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
303fa96acadSDave Chinner 		if (!(lock_flags & XFS_IOLOCK_SHARED))
304fa96acadSDave Chinner 			return !!ip->i_iolock.mr_writer;
305fa96acadSDave Chinner 		return rwsem_is_locked(&ip->i_iolock.mr_lock);
306fa96acadSDave Chinner 	}
307fa96acadSDave Chinner 
308fa96acadSDave Chinner 	ASSERT(0);
309fa96acadSDave Chinner 	return 0;
310fa96acadSDave Chinner }
311fa96acadSDave Chinner #endif
312fa96acadSDave Chinner 
313fa96acadSDave Chinner void
314fa96acadSDave Chinner __xfs_iflock(
315fa96acadSDave Chinner 	struct xfs_inode	*ip)
316fa96acadSDave Chinner {
317fa96acadSDave Chinner 	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
318fa96acadSDave Chinner 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
319fa96acadSDave Chinner 
320fa96acadSDave Chinner 	do {
321fa96acadSDave Chinner 		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
322fa96acadSDave Chinner 		if (xfs_isiflocked(ip))
323fa96acadSDave Chinner 			io_schedule();
324fa96acadSDave Chinner 	} while (!xfs_iflock_nowait(ip));
325fa96acadSDave Chinner 
326fa96acadSDave Chinner 	finish_wait(wq, &wait.wait);
327fa96acadSDave Chinner }
328fa96acadSDave Chinner 
3291da177e4SLinus Torvalds #ifdef DEBUG
3301da177e4SLinus Torvalds /*
3311da177e4SLinus Torvalds  * Make sure that the extents in the given memory buffer
3321da177e4SLinus Torvalds  * are valid.
3331da177e4SLinus Torvalds  */
3341da177e4SLinus Torvalds STATIC void
3351da177e4SLinus Torvalds xfs_validate_extents(
3364eea22f0SMandy Kirkconnell 	xfs_ifork_t		*ifp,
3371da177e4SLinus Torvalds 	int			nrecs,
3381da177e4SLinus Torvalds 	xfs_exntfmt_t		fmt)
3391da177e4SLinus Torvalds {
3401da177e4SLinus Torvalds 	xfs_bmbt_irec_t		irec;
341a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t	rec;
3421da177e4SLinus Torvalds 	int			i;
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
345a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
346a6f64d4aSChristoph Hellwig 		rec.l0 = get_unaligned(&ep->l0);
347a6f64d4aSChristoph Hellwig 		rec.l1 = get_unaligned(&ep->l1);
3481da177e4SLinus Torvalds 		xfs_bmbt_get_all(&rec, &irec);
3491da177e4SLinus Torvalds 		if (fmt == XFS_EXTFMT_NOSTATE)
3501da177e4SLinus Torvalds 			ASSERT(irec.br_state == XFS_EXT_NORM);
3511da177e4SLinus Torvalds 	}
3521da177e4SLinus Torvalds }
3531da177e4SLinus Torvalds #else /* DEBUG */
354a6f64d4aSChristoph Hellwig #define xfs_validate_extents(ifp, nrecs, fmt)
3551da177e4SLinus Torvalds #endif /* DEBUG */
3561da177e4SLinus Torvalds 
3571da177e4SLinus Torvalds /*
3581da177e4SLinus Torvalds  * Check that none of the inode's in the buffer have a next
3591da177e4SLinus Torvalds  * unlinked field of 0.
3601da177e4SLinus Torvalds  */
3611da177e4SLinus Torvalds #if defined(DEBUG)
3621da177e4SLinus Torvalds void
3631da177e4SLinus Torvalds xfs_inobp_check(
3641da177e4SLinus Torvalds 	xfs_mount_t	*mp,
3651da177e4SLinus Torvalds 	xfs_buf_t	*bp)
3661da177e4SLinus Torvalds {
3671da177e4SLinus Torvalds 	int		i;
3681da177e4SLinus Torvalds 	int		j;
3691da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
3701da177e4SLinus Torvalds 
3711da177e4SLinus Torvalds 	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
3721da177e4SLinus Torvalds 
3731da177e4SLinus Torvalds 	for (i = 0; i < j; i++) {
3741da177e4SLinus Torvalds 		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
3751da177e4SLinus Torvalds 					i * mp->m_sb.sb_inodesize);
3761da177e4SLinus Torvalds 		if (!dip->di_next_unlinked)  {
37753487786SDave Chinner 			xfs_alert(mp,
37853487786SDave Chinner 	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
3791da177e4SLinus Torvalds 				bp);
3801da177e4SLinus Torvalds 			ASSERT(dip->di_next_unlinked);
3811da177e4SLinus Torvalds 		}
3821da177e4SLinus Torvalds 	}
3831da177e4SLinus Torvalds }
3841da177e4SLinus Torvalds #endif
3851da177e4SLinus Torvalds 
386612cfbfeSDave Chinner static void
387af133e86SDave Chinner xfs_inode_buf_verify(
388af133e86SDave Chinner 	struct xfs_buf	*bp)
389af133e86SDave Chinner {
390af133e86SDave Chinner 	struct xfs_mount *mp = bp->b_target->bt_mount;
391af133e86SDave Chinner 	int		i;
392af133e86SDave Chinner 	int		ni;
393af133e86SDave Chinner 
394af133e86SDave Chinner 	/*
395af133e86SDave Chinner 	 * Validate the magic number and version of every inode in the buffer
396af133e86SDave Chinner 	 */
397af133e86SDave Chinner 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
398af133e86SDave Chinner 	for (i = 0; i < ni; i++) {
399af133e86SDave Chinner 		int		di_ok;
400af133e86SDave Chinner 		xfs_dinode_t	*dip;
401af133e86SDave Chinner 
402af133e86SDave Chinner 		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
403af133e86SDave Chinner 					(i << mp->m_sb.sb_inodelog));
404af133e86SDave Chinner 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
405af133e86SDave Chinner 			    XFS_DINODE_GOOD_VERSION(dip->di_version);
406af133e86SDave Chinner 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
407af133e86SDave Chinner 						XFS_ERRTAG_ITOBP_INOTOBP,
408af133e86SDave Chinner 						XFS_RANDOM_ITOBP_INOTOBP))) {
409af133e86SDave Chinner 			xfs_buf_ioerror(bp, EFSCORRUPTED);
410af133e86SDave Chinner 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
411af133e86SDave Chinner 					     mp, dip);
412af133e86SDave Chinner #ifdef DEBUG
413af133e86SDave Chinner 			xfs_emerg(mp,
414af133e86SDave Chinner 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
415af133e86SDave Chinner 				(unsigned long long)bp->b_bn, i,
416af133e86SDave Chinner 				be16_to_cpu(dip->di_magic));
417af133e86SDave Chinner 			ASSERT(0);
418af133e86SDave Chinner #endif
419af133e86SDave Chinner 		}
420af133e86SDave Chinner 	}
421af133e86SDave Chinner 	xfs_inobp_check(mp, bp);
422612cfbfeSDave Chinner }
423612cfbfeSDave Chinner 
4241813dd64SDave Chinner 
4251813dd64SDave Chinner static void
4261813dd64SDave Chinner xfs_inode_buf_read_verify(
4271813dd64SDave Chinner 	struct xfs_buf	*bp)
4281813dd64SDave Chinner {
4291813dd64SDave Chinner 	xfs_inode_buf_verify(bp);
4301813dd64SDave Chinner }
4311813dd64SDave Chinner 
4321813dd64SDave Chinner static void
433612cfbfeSDave Chinner xfs_inode_buf_write_verify(
434612cfbfeSDave Chinner 	struct xfs_buf	*bp)
435612cfbfeSDave Chinner {
436612cfbfeSDave Chinner 	xfs_inode_buf_verify(bp);
437612cfbfeSDave Chinner }
438612cfbfeSDave Chinner 
4391813dd64SDave Chinner const struct xfs_buf_ops xfs_inode_buf_ops = {
4401813dd64SDave Chinner 	.verify_read = xfs_inode_buf_read_verify,
4411813dd64SDave Chinner 	.verify_write = xfs_inode_buf_write_verify,
4421813dd64SDave Chinner };
4431813dd64SDave Chinner 
444af133e86SDave Chinner 
4451da177e4SLinus Torvalds /*
446475ee413SChristoph Hellwig  * This routine is called to map an inode to the buffer containing the on-disk
447475ee413SChristoph Hellwig  * version of the inode.  It returns a pointer to the buffer containing the
448475ee413SChristoph Hellwig  * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
449475ee413SChristoph Hellwig  * pointer to the on-disk inode within that buffer.
450475ee413SChristoph Hellwig  *
451475ee413SChristoph Hellwig  * If a non-zero error is returned, then the contents of bpp and dipp are
452475ee413SChristoph Hellwig  * undefined.
4534ae29b43SDavid Chinner  */
454475ee413SChristoph Hellwig int
4554ae29b43SDavid Chinner xfs_imap_to_bp(
456475ee413SChristoph Hellwig 	struct xfs_mount	*mp,
457475ee413SChristoph Hellwig 	struct xfs_trans	*tp,
45892bfc6e7SChristoph Hellwig 	struct xfs_imap		*imap,
459475ee413SChristoph Hellwig 	struct xfs_dinode       **dipp,
460475ee413SChristoph Hellwig 	struct xfs_buf		**bpp,
4614ae29b43SDavid Chinner 	uint			buf_flags,
462b48d8d64SChristoph Hellwig 	uint			iget_flags)
4634ae29b43SDavid Chinner {
464475ee413SChristoph Hellwig 	struct xfs_buf		*bp;
4654ae29b43SDavid Chinner 	int			error;
4664ae29b43SDavid Chinner 
467611c9946SDave Chinner 	buf_flags |= XBF_UNMAPPED;
4684ae29b43SDavid Chinner 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
469af133e86SDave Chinner 				   (int)imap->im_len, buf_flags, &bp,
4701813dd64SDave Chinner 				   &xfs_inode_buf_ops);
4714ae29b43SDavid Chinner 	if (error) {
472af133e86SDave Chinner 		if (error == EAGAIN) {
4730cadda1cSChristoph Hellwig 			ASSERT(buf_flags & XBF_TRYLOCK);
4744ae29b43SDavid Chinner 			return error;
4754ae29b43SDavid Chinner 		}
4764ae29b43SDavid Chinner 
477af133e86SDave Chinner 		if (error == EFSCORRUPTED &&
478af133e86SDave Chinner 		    (iget_flags & XFS_IGET_UNTRUSTED))
4794ae29b43SDavid Chinner 			return XFS_ERROR(EINVAL);
4804ae29b43SDavid Chinner 
481af133e86SDave Chinner 		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
482af133e86SDave Chinner 			__func__, error);
483af133e86SDave Chinner 		return error;
484af133e86SDave Chinner 	}
485475ee413SChristoph Hellwig 
4864ae29b43SDavid Chinner 	*bpp = bp;
487475ee413SChristoph Hellwig 	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
4884ae29b43SDavid Chinner 	return 0;
4894ae29b43SDavid Chinner }
4904ae29b43SDavid Chinner 
4914ae29b43SDavid Chinner /*
4921da177e4SLinus Torvalds  * Move inode type and inode format specific information from the
4931da177e4SLinus Torvalds  * on-disk inode to the in-core inode.  For fifos, devs, and sockets
4941da177e4SLinus Torvalds  * this means set if_rdev to the proper value.  For files, directories,
4951da177e4SLinus Torvalds  * and symlinks this means to bring in the in-line data or extent
4961da177e4SLinus Torvalds  * pointers.  For a file in B-tree format, only the root is immediately
4971da177e4SLinus Torvalds  * brought in-core.  The rest will be in-lined in if_extents when it
4981da177e4SLinus Torvalds  * is first referenced (see xfs_iread_extents()).
4991da177e4SLinus Torvalds  */
5001da177e4SLinus Torvalds STATIC int
5011da177e4SLinus Torvalds xfs_iformat(
5021da177e4SLinus Torvalds 	xfs_inode_t		*ip,
5031da177e4SLinus Torvalds 	xfs_dinode_t		*dip)
5041da177e4SLinus Torvalds {
5051da177e4SLinus Torvalds 	xfs_attr_shortform_t	*atp;
5061da177e4SLinus Torvalds 	int			size;
5078096b1ebSChristoph Hellwig 	int			error = 0;
5081da177e4SLinus Torvalds 	xfs_fsize_t             di_size;
5091da177e4SLinus Torvalds 
51081591fe2SChristoph Hellwig 	if (unlikely(be32_to_cpu(dip->di_nextents) +
51181591fe2SChristoph Hellwig 		     be16_to_cpu(dip->di_anextents) >
51281591fe2SChristoph Hellwig 		     be64_to_cpu(dip->di_nblocks))) {
51365333b4cSDave Chinner 		xfs_warn(ip->i_mount,
5143762ec6bSNathan Scott 			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
5151da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
51681591fe2SChristoph Hellwig 			(int)(be32_to_cpu(dip->di_nextents) +
51781591fe2SChristoph Hellwig 			      be16_to_cpu(dip->di_anextents)),
5181da177e4SLinus Torvalds 			(unsigned long long)
51981591fe2SChristoph Hellwig 				be64_to_cpu(dip->di_nblocks));
5201da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
5211da177e4SLinus Torvalds 				     ip->i_mount, dip);
5221da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5231da177e4SLinus Torvalds 	}
5241da177e4SLinus Torvalds 
52581591fe2SChristoph Hellwig 	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
52665333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
5271da177e4SLinus Torvalds 			(unsigned long long)ip->i_ino,
52881591fe2SChristoph Hellwig 			dip->di_forkoff);
5291da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
5301da177e4SLinus Torvalds 				     ip->i_mount, dip);
5311da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
5321da177e4SLinus Torvalds 	}
5331da177e4SLinus Torvalds 
534b89d4208SChristoph Hellwig 	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
535b89d4208SChristoph Hellwig 		     !ip->i_mount->m_rtdev_targp)) {
53665333b4cSDave Chinner 		xfs_warn(ip->i_mount,
537b89d4208SChristoph Hellwig 			"corrupt dinode %Lu, has realtime flag set.",
538b89d4208SChristoph Hellwig 			ip->i_ino);
539b89d4208SChristoph Hellwig 		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
540b89d4208SChristoph Hellwig 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
541b89d4208SChristoph Hellwig 		return XFS_ERROR(EFSCORRUPTED);
542b89d4208SChristoph Hellwig 	}
543b89d4208SChristoph Hellwig 
5441da177e4SLinus Torvalds 	switch (ip->i_d.di_mode & S_IFMT) {
5451da177e4SLinus Torvalds 	case S_IFIFO:
5461da177e4SLinus Torvalds 	case S_IFCHR:
5471da177e4SLinus Torvalds 	case S_IFBLK:
5481da177e4SLinus Torvalds 	case S_IFSOCK:
54981591fe2SChristoph Hellwig 		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
5501da177e4SLinus Torvalds 			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
5511da177e4SLinus Torvalds 					      ip->i_mount, dip);
5521da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
5531da177e4SLinus Torvalds 		}
5541da177e4SLinus Torvalds 		ip->i_d.di_size = 0;
55581591fe2SChristoph Hellwig 		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
5561da177e4SLinus Torvalds 		break;
5571da177e4SLinus Torvalds 
5581da177e4SLinus Torvalds 	case S_IFREG:
5591da177e4SLinus Torvalds 	case S_IFLNK:
5601da177e4SLinus Torvalds 	case S_IFDIR:
56181591fe2SChristoph Hellwig 		switch (dip->di_format) {
5621da177e4SLinus Torvalds 		case XFS_DINODE_FMT_LOCAL:
5631da177e4SLinus Torvalds 			/*
5641da177e4SLinus Torvalds 			 * no local regular files yet
5651da177e4SLinus Torvalds 			 */
566abbede1bSAl Viro 			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
56765333b4cSDave Chinner 				xfs_warn(ip->i_mount,
56865333b4cSDave Chinner 			"corrupt inode %Lu (local format for regular file).",
5691da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino);
5701da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
5711da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
5721da177e4SLinus Torvalds 						     ip->i_mount, dip);
5731da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
5741da177e4SLinus Torvalds 			}
5751da177e4SLinus Torvalds 
57681591fe2SChristoph Hellwig 			di_size = be64_to_cpu(dip->di_size);
5771da177e4SLinus Torvalds 			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
57865333b4cSDave Chinner 				xfs_warn(ip->i_mount,
57965333b4cSDave Chinner 			"corrupt inode %Lu (bad size %Ld for local inode).",
5801da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino,
5811da177e4SLinus Torvalds 					(long long) di_size);
5821da177e4SLinus Torvalds 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
5831da177e4SLinus Torvalds 						     XFS_ERRLEVEL_LOW,
5841da177e4SLinus Torvalds 						     ip->i_mount, dip);
5851da177e4SLinus Torvalds 				return XFS_ERROR(EFSCORRUPTED);
5861da177e4SLinus Torvalds 			}
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds 			size = (int)di_size;
5891da177e4SLinus Torvalds 			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
5901da177e4SLinus Torvalds 			break;
5911da177e4SLinus Torvalds 		case XFS_DINODE_FMT_EXTENTS:
5921da177e4SLinus Torvalds 			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
5931da177e4SLinus Torvalds 			break;
5941da177e4SLinus Torvalds 		case XFS_DINODE_FMT_BTREE:
5951da177e4SLinus Torvalds 			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
5961da177e4SLinus Torvalds 			break;
5971da177e4SLinus Torvalds 		default:
5981da177e4SLinus Torvalds 			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
5991da177e4SLinus Torvalds 					 ip->i_mount);
6001da177e4SLinus Torvalds 			return XFS_ERROR(EFSCORRUPTED);
6011da177e4SLinus Torvalds 		}
6021da177e4SLinus Torvalds 		break;
6031da177e4SLinus Torvalds 
6041da177e4SLinus Torvalds 	default:
6051da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
6061da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
6071da177e4SLinus Torvalds 	}
6081da177e4SLinus Torvalds 	if (error) {
6091da177e4SLinus Torvalds 		return error;
6101da177e4SLinus Torvalds 	}
6111da177e4SLinus Torvalds 	if (!XFS_DFORK_Q(dip))
6121da177e4SLinus Torvalds 		return 0;
6138096b1ebSChristoph Hellwig 
6141da177e4SLinus Torvalds 	ASSERT(ip->i_afp == NULL);
6154a7edddcSDave Chinner 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
6168096b1ebSChristoph Hellwig 
61781591fe2SChristoph Hellwig 	switch (dip->di_aformat) {
6181da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
6191da177e4SLinus Torvalds 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
6203b244aa8SNathan Scott 		size = be16_to_cpu(atp->hdr.totsize);
6212809f76aSChristoph Hellwig 
6222809f76aSChristoph Hellwig 		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
62365333b4cSDave Chinner 			xfs_warn(ip->i_mount,
62465333b4cSDave Chinner 				"corrupt inode %Lu (bad attr fork size %Ld).",
6252809f76aSChristoph Hellwig 				(unsigned long long) ip->i_ino,
6262809f76aSChristoph Hellwig 				(long long) size);
6272809f76aSChristoph Hellwig 			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
6282809f76aSChristoph Hellwig 					     XFS_ERRLEVEL_LOW,
6292809f76aSChristoph Hellwig 					     ip->i_mount, dip);
6302809f76aSChristoph Hellwig 			return XFS_ERROR(EFSCORRUPTED);
6312809f76aSChristoph Hellwig 		}
6322809f76aSChristoph Hellwig 
6331da177e4SLinus Torvalds 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
6341da177e4SLinus Torvalds 		break;
6351da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
6361da177e4SLinus Torvalds 		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
6371da177e4SLinus Torvalds 		break;
6381da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
6391da177e4SLinus Torvalds 		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
6401da177e4SLinus Torvalds 		break;
6411da177e4SLinus Torvalds 	default:
6421da177e4SLinus Torvalds 		error = XFS_ERROR(EFSCORRUPTED);
6431da177e4SLinus Torvalds 		break;
6441da177e4SLinus Torvalds 	}
6451da177e4SLinus Torvalds 	if (error) {
6461da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
6471da177e4SLinus Torvalds 		ip->i_afp = NULL;
6481da177e4SLinus Torvalds 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
6491da177e4SLinus Torvalds 	}
6501da177e4SLinus Torvalds 	return error;
6511da177e4SLinus Torvalds }
6521da177e4SLinus Torvalds 
6531da177e4SLinus Torvalds /*
6541da177e4SLinus Torvalds  * The file is in-lined in the on-disk inode.
6551da177e4SLinus Torvalds  * If it fits into if_inline_data, then copy
6561da177e4SLinus Torvalds  * it there, otherwise allocate a buffer for it
6571da177e4SLinus Torvalds  * and copy the data there.  Either way, set
6581da177e4SLinus Torvalds  * if_data to point at the data.
6591da177e4SLinus Torvalds  * If we allocate a buffer for the data, make
6601da177e4SLinus Torvalds  * sure that its size is a multiple of 4 and
6611da177e4SLinus Torvalds  * record the real size in i_real_bytes.
6621da177e4SLinus Torvalds  */
6631da177e4SLinus Torvalds STATIC int
6641da177e4SLinus Torvalds xfs_iformat_local(
6651da177e4SLinus Torvalds 	xfs_inode_t	*ip,
6661da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
6671da177e4SLinus Torvalds 	int		whichfork,
6681da177e4SLinus Torvalds 	int		size)
6691da177e4SLinus Torvalds {
6701da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
6711da177e4SLinus Torvalds 	int		real_size;
6721da177e4SLinus Torvalds 
6731da177e4SLinus Torvalds 	/*
6741da177e4SLinus Torvalds 	 * If the size is unreasonable, then something
6751da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
6761da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
6771da177e4SLinus Torvalds 	 */
6781da177e4SLinus Torvalds 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
67965333b4cSDave Chinner 		xfs_warn(ip->i_mount,
68065333b4cSDave Chinner 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
6811da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, size,
6821da177e4SLinus Torvalds 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
6831da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
6841da177e4SLinus Torvalds 				     ip->i_mount, dip);
6851da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
6861da177e4SLinus Torvalds 	}
6871da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
6881da177e4SLinus Torvalds 	real_size = 0;
6891da177e4SLinus Torvalds 	if (size == 0)
6901da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
6911da177e4SLinus Torvalds 	else if (size <= sizeof(ifp->if_u2.if_inline_data))
6921da177e4SLinus Torvalds 		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
6931da177e4SLinus Torvalds 	else {
6941da177e4SLinus Torvalds 		real_size = roundup(size, 4);
6954a7edddcSDave Chinner 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
6961da177e4SLinus Torvalds 	}
6971da177e4SLinus Torvalds 	ifp->if_bytes = size;
6981da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
6991da177e4SLinus Torvalds 	if (size)
7001da177e4SLinus Torvalds 		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
7011da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
7021da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFINLINE;
7031da177e4SLinus Torvalds 	return 0;
7041da177e4SLinus Torvalds }
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds /*
7071da177e4SLinus Torvalds  * The file consists of a set of extents all
7081da177e4SLinus Torvalds  * of which fit into the on-disk inode.
7091da177e4SLinus Torvalds  * If there are few enough extents to fit into
7101da177e4SLinus Torvalds  * the if_inline_ext, then copy them there.
7111da177e4SLinus Torvalds  * Otherwise allocate a buffer for them and copy
7121da177e4SLinus Torvalds  * them into it.  Either way, set if_extents
7131da177e4SLinus Torvalds  * to point at the extents.
7141da177e4SLinus Torvalds  */
7151da177e4SLinus Torvalds STATIC int
7161da177e4SLinus Torvalds xfs_iformat_extents(
7171da177e4SLinus Torvalds 	xfs_inode_t	*ip,
7181da177e4SLinus Torvalds 	xfs_dinode_t	*dip,
7191da177e4SLinus Torvalds 	int		whichfork)
7201da177e4SLinus Torvalds {
721a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t	*dp;
7221da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
7231da177e4SLinus Torvalds 	int		nex;
7241da177e4SLinus Torvalds 	int		size;
7251da177e4SLinus Torvalds 	int		i;
7261da177e4SLinus Torvalds 
7271da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
7281da177e4SLinus Torvalds 	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
7291da177e4SLinus Torvalds 	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
7301da177e4SLinus Torvalds 
7311da177e4SLinus Torvalds 	/*
7321da177e4SLinus Torvalds 	 * If the number of extents is unreasonable, then something
7331da177e4SLinus Torvalds 	 * is wrong and we just bail out rather than crash in
7341da177e4SLinus Torvalds 	 * kmem_alloc() or memcpy() below.
7351da177e4SLinus Torvalds 	 */
7361da177e4SLinus Torvalds 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
73765333b4cSDave Chinner 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
7381da177e4SLinus Torvalds 			(unsigned long long) ip->i_ino, nex);
7391da177e4SLinus Torvalds 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
7401da177e4SLinus Torvalds 				     ip->i_mount, dip);
7411da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
7421da177e4SLinus Torvalds 	}
7431da177e4SLinus Torvalds 
7444eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
7451da177e4SLinus Torvalds 	if (nex == 0)
7461da177e4SLinus Torvalds 		ifp->if_u1.if_extents = NULL;
7471da177e4SLinus Torvalds 	else if (nex <= XFS_INLINE_EXTS)
7481da177e4SLinus Torvalds 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
7494eea22f0SMandy Kirkconnell 	else
7504eea22f0SMandy Kirkconnell 		xfs_iext_add(ifp, 0, nex);
7514eea22f0SMandy Kirkconnell 
7521da177e4SLinus Torvalds 	ifp->if_bytes = size;
7531da177e4SLinus Torvalds 	if (size) {
7541da177e4SLinus Torvalds 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
755a6f64d4aSChristoph Hellwig 		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
7564eea22f0SMandy Kirkconnell 		for (i = 0; i < nex; i++, dp++) {
757a6f64d4aSChristoph Hellwig 			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
758597bca63SHarvey Harrison 			ep->l0 = get_unaligned_be64(&dp->l0);
759597bca63SHarvey Harrison 			ep->l1 = get_unaligned_be64(&dp->l1);
7601da177e4SLinus Torvalds 		}
7613a59c94cSEric Sandeen 		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
7621da177e4SLinus Torvalds 		if (whichfork != XFS_DATA_FORK ||
7631da177e4SLinus Torvalds 			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
7641da177e4SLinus Torvalds 				if (unlikely(xfs_check_nostate_extents(
7654eea22f0SMandy Kirkconnell 				    ifp, 0, nex))) {
7661da177e4SLinus Torvalds 					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
7671da177e4SLinus Torvalds 							 XFS_ERRLEVEL_LOW,
7681da177e4SLinus Torvalds 							 ip->i_mount);
7691da177e4SLinus Torvalds 					return XFS_ERROR(EFSCORRUPTED);
7701da177e4SLinus Torvalds 				}
7711da177e4SLinus Torvalds 	}
7721da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
7731da177e4SLinus Torvalds 	return 0;
7741da177e4SLinus Torvalds }
7751da177e4SLinus Torvalds 
7761da177e4SLinus Torvalds /*
7771da177e4SLinus Torvalds  * The file has too many extents to fit into
7781da177e4SLinus Torvalds  * the inode, so they are in B-tree format.
7791da177e4SLinus Torvalds  * Allocate a buffer for the root of the B-tree
7801da177e4SLinus Torvalds  * and copy the root into it.  The i_extents
7811da177e4SLinus Torvalds  * field will remain NULL until all of the
7821da177e4SLinus Torvalds  * extents are read in (when they are needed).
7831da177e4SLinus Torvalds  */
7841da177e4SLinus Torvalds STATIC int
7851da177e4SLinus Torvalds xfs_iformat_btree(
7861da177e4SLinus Torvalds 	xfs_inode_t		*ip,
7871da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
7881da177e4SLinus Torvalds 	int			whichfork)
7891da177e4SLinus Torvalds {
790ee1a47abSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
7911da177e4SLinus Torvalds 	xfs_bmdr_block_t	*dfp;
7921da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
7931da177e4SLinus Torvalds 	/* REFERENCED */
7941da177e4SLinus Torvalds 	int			nrecs;
7951da177e4SLinus Torvalds 	int			size;
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
7981da177e4SLinus Torvalds 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
799ee1a47abSChristoph Hellwig 	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
80060197e8dSChristoph Hellwig 	nrecs = be16_to_cpu(dfp->bb_numrecs);
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	/*
8031da177e4SLinus Torvalds 	 * blow out if -- fork has less extents than can fit in
8041da177e4SLinus Torvalds 	 * fork (fork shouldn't be a btree format), root btree
8051da177e4SLinus Torvalds 	 * block has more records than can fit into the fork,
8061da177e4SLinus Torvalds 	 * or the number of extents is greater than the number of
8071da177e4SLinus Torvalds 	 * blocks.
8081da177e4SLinus Torvalds 	 */
8098096b1ebSChristoph Hellwig 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
8108096b1ebSChristoph Hellwig 					XFS_IFORK_MAXEXT(ip, whichfork) ||
8118096b1ebSChristoph Hellwig 		     XFS_BMDR_SPACE_CALC(nrecs) >
812ee1a47abSChristoph Hellwig 					XFS_DFORK_SIZE(dip, mp, whichfork) ||
8138096b1ebSChristoph Hellwig 		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
814ee1a47abSChristoph Hellwig 		xfs_warn(mp, "corrupt inode %Lu (btree).",
8151da177e4SLinus Torvalds 					(unsigned long long) ip->i_ino);
81665333b4cSDave Chinner 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
817ee1a47abSChristoph Hellwig 					 mp, dip);
8181da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
8191da177e4SLinus Torvalds 	}
8201da177e4SLinus Torvalds 
8211da177e4SLinus Torvalds 	ifp->if_broot_bytes = size;
8224a7edddcSDave Chinner 	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
8231da177e4SLinus Torvalds 	ASSERT(ifp->if_broot != NULL);
8241da177e4SLinus Torvalds 	/*
8251da177e4SLinus Torvalds 	 * Copy and convert from the on-disk structure
8261da177e4SLinus Torvalds 	 * to the in-memory structure.
8271da177e4SLinus Torvalds 	 */
828ee1a47abSChristoph Hellwig 	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
8291da177e4SLinus Torvalds 			 ifp->if_broot, size);
8301da177e4SLinus Torvalds 	ifp->if_flags &= ~XFS_IFEXTENTS;
8311da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFBROOT;
8321da177e4SLinus Torvalds 
8331da177e4SLinus Torvalds 	return 0;
8341da177e4SLinus Torvalds }
8351da177e4SLinus Torvalds 
836d96f8f89SEric Sandeen STATIC void
837347d1c01SChristoph Hellwig xfs_dinode_from_disk(
838347d1c01SChristoph Hellwig 	xfs_icdinode_t		*to,
83981591fe2SChristoph Hellwig 	xfs_dinode_t		*from)
8401da177e4SLinus Torvalds {
841347d1c01SChristoph Hellwig 	to->di_magic = be16_to_cpu(from->di_magic);
842347d1c01SChristoph Hellwig 	to->di_mode = be16_to_cpu(from->di_mode);
843347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
844347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
845347d1c01SChristoph Hellwig 	to->di_onlink = be16_to_cpu(from->di_onlink);
846347d1c01SChristoph Hellwig 	to->di_uid = be32_to_cpu(from->di_uid);
847347d1c01SChristoph Hellwig 	to->di_gid = be32_to_cpu(from->di_gid);
848347d1c01SChristoph Hellwig 	to->di_nlink = be32_to_cpu(from->di_nlink);
8496743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
8506743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
851347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
852347d1c01SChristoph Hellwig 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
853347d1c01SChristoph Hellwig 	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
854347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
855347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
856347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
857347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
858347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
859347d1c01SChristoph Hellwig 	to->di_size = be64_to_cpu(from->di_size);
860347d1c01SChristoph Hellwig 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
861347d1c01SChristoph Hellwig 	to->di_extsize = be32_to_cpu(from->di_extsize);
862347d1c01SChristoph Hellwig 	to->di_nextents = be32_to_cpu(from->di_nextents);
863347d1c01SChristoph Hellwig 	to->di_anextents = be16_to_cpu(from->di_anextents);
864347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
865347d1c01SChristoph Hellwig 	to->di_aformat	= from->di_aformat;
866347d1c01SChristoph Hellwig 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
867347d1c01SChristoph Hellwig 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
868347d1c01SChristoph Hellwig 	to->di_flags	= be16_to_cpu(from->di_flags);
869347d1c01SChristoph Hellwig 	to->di_gen	= be32_to_cpu(from->di_gen);
870*93848a99SChristoph Hellwig 
871*93848a99SChristoph Hellwig 	if (to->di_version == 3) {
872*93848a99SChristoph Hellwig 		to->di_changecount = be64_to_cpu(from->di_changecount);
873*93848a99SChristoph Hellwig 		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
874*93848a99SChristoph Hellwig 		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
875*93848a99SChristoph Hellwig 		to->di_flags2 = be64_to_cpu(from->di_flags2);
876*93848a99SChristoph Hellwig 		to->di_ino = be64_to_cpu(from->di_ino);
877*93848a99SChristoph Hellwig 		to->di_lsn = be64_to_cpu(from->di_lsn);
878*93848a99SChristoph Hellwig 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
879*93848a99SChristoph Hellwig 		uuid_copy(&to->di_uuid, &from->di_uuid);
880*93848a99SChristoph Hellwig 	}
8811da177e4SLinus Torvalds }
8821da177e4SLinus Torvalds 
883347d1c01SChristoph Hellwig void
884347d1c01SChristoph Hellwig xfs_dinode_to_disk(
88581591fe2SChristoph Hellwig 	xfs_dinode_t		*to,
886347d1c01SChristoph Hellwig 	xfs_icdinode_t		*from)
887347d1c01SChristoph Hellwig {
888347d1c01SChristoph Hellwig 	to->di_magic = cpu_to_be16(from->di_magic);
889347d1c01SChristoph Hellwig 	to->di_mode = cpu_to_be16(from->di_mode);
890347d1c01SChristoph Hellwig 	to->di_version = from ->di_version;
891347d1c01SChristoph Hellwig 	to->di_format = from->di_format;
892347d1c01SChristoph Hellwig 	to->di_onlink = cpu_to_be16(from->di_onlink);
893347d1c01SChristoph Hellwig 	to->di_uid = cpu_to_be32(from->di_uid);
894347d1c01SChristoph Hellwig 	to->di_gid = cpu_to_be32(from->di_gid);
895347d1c01SChristoph Hellwig 	to->di_nlink = cpu_to_be32(from->di_nlink);
8966743099cSArkadiusz Mi?kiewicz 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
8976743099cSArkadiusz Mi?kiewicz 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
898347d1c01SChristoph Hellwig 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
899347d1c01SChristoph Hellwig 	to->di_flushiter = cpu_to_be16(from->di_flushiter);
900347d1c01SChristoph Hellwig 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
901347d1c01SChristoph Hellwig 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
902347d1c01SChristoph Hellwig 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
903347d1c01SChristoph Hellwig 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
904347d1c01SChristoph Hellwig 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
905347d1c01SChristoph Hellwig 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
906347d1c01SChristoph Hellwig 	to->di_size = cpu_to_be64(from->di_size);
907347d1c01SChristoph Hellwig 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
908347d1c01SChristoph Hellwig 	to->di_extsize = cpu_to_be32(from->di_extsize);
909347d1c01SChristoph Hellwig 	to->di_nextents = cpu_to_be32(from->di_nextents);
910347d1c01SChristoph Hellwig 	to->di_anextents = cpu_to_be16(from->di_anextents);
911347d1c01SChristoph Hellwig 	to->di_forkoff = from->di_forkoff;
912347d1c01SChristoph Hellwig 	to->di_aformat = from->di_aformat;
913347d1c01SChristoph Hellwig 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
914347d1c01SChristoph Hellwig 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
915347d1c01SChristoph Hellwig 	to->di_flags = cpu_to_be16(from->di_flags);
916347d1c01SChristoph Hellwig 	to->di_gen = cpu_to_be32(from->di_gen);
917*93848a99SChristoph Hellwig 
918*93848a99SChristoph Hellwig 	if (from->di_version == 3) {
919*93848a99SChristoph Hellwig 		to->di_changecount = cpu_to_be64(from->di_changecount);
920*93848a99SChristoph Hellwig 		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
921*93848a99SChristoph Hellwig 		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
922*93848a99SChristoph Hellwig 		to->di_flags2 = cpu_to_be64(from->di_flags2);
923*93848a99SChristoph Hellwig 		to->di_ino = cpu_to_be64(from->di_ino);
924*93848a99SChristoph Hellwig 		to->di_lsn = cpu_to_be64(from->di_lsn);
925*93848a99SChristoph Hellwig 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
926*93848a99SChristoph Hellwig 		uuid_copy(&to->di_uuid, &from->di_uuid);
927*93848a99SChristoph Hellwig 	}
9281da177e4SLinus Torvalds }
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds STATIC uint
9311da177e4SLinus Torvalds _xfs_dic2xflags(
9321da177e4SLinus Torvalds 	__uint16_t		di_flags)
9331da177e4SLinus Torvalds {
9341da177e4SLinus Torvalds 	uint			flags = 0;
9351da177e4SLinus Torvalds 
9361da177e4SLinus Torvalds 	if (di_flags & XFS_DIFLAG_ANY) {
9371da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_REALTIME)
9381da177e4SLinus Torvalds 			flags |= XFS_XFLAG_REALTIME;
9391da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PREALLOC)
9401da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PREALLOC;
9411da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_IMMUTABLE)
9421da177e4SLinus Torvalds 			flags |= XFS_XFLAG_IMMUTABLE;
9431da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_APPEND)
9441da177e4SLinus Torvalds 			flags |= XFS_XFLAG_APPEND;
9451da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_SYNC)
9461da177e4SLinus Torvalds 			flags |= XFS_XFLAG_SYNC;
9471da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOATIME)
9481da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOATIME;
9491da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NODUMP)
9501da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NODUMP;
9511da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_RTINHERIT)
9521da177e4SLinus Torvalds 			flags |= XFS_XFLAG_RTINHERIT;
9531da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_PROJINHERIT)
9541da177e4SLinus Torvalds 			flags |= XFS_XFLAG_PROJINHERIT;
9551da177e4SLinus Torvalds 		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
9561da177e4SLinus Torvalds 			flags |= XFS_XFLAG_NOSYMLINKS;
957dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSIZE)
958dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSIZE;
959dd9f438eSNathan Scott 		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
960dd9f438eSNathan Scott 			flags |= XFS_XFLAG_EXTSZINHERIT;
961d3446eacSBarry Naujok 		if (di_flags & XFS_DIFLAG_NODEFRAG)
962d3446eacSBarry Naujok 			flags |= XFS_XFLAG_NODEFRAG;
9632a82b8beSDavid Chinner 		if (di_flags & XFS_DIFLAG_FILESTREAM)
9642a82b8beSDavid Chinner 			flags |= XFS_XFLAG_FILESTREAM;
9651da177e4SLinus Torvalds 	}
9661da177e4SLinus Torvalds 
9671da177e4SLinus Torvalds 	return flags;
9681da177e4SLinus Torvalds }
9691da177e4SLinus Torvalds 
9701da177e4SLinus Torvalds uint
9711da177e4SLinus Torvalds xfs_ip2xflags(
9721da177e4SLinus Torvalds 	xfs_inode_t		*ip)
9731da177e4SLinus Torvalds {
974347d1c01SChristoph Hellwig 	xfs_icdinode_t		*dic = &ip->i_d;
9751da177e4SLinus Torvalds 
976a916e2bdSNathan Scott 	return _xfs_dic2xflags(dic->di_flags) |
97745ba598eSChristoph Hellwig 				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
9781da177e4SLinus Torvalds }
9791da177e4SLinus Torvalds 
9801da177e4SLinus Torvalds uint
9811da177e4SLinus Torvalds xfs_dic2xflags(
98245ba598eSChristoph Hellwig 	xfs_dinode_t		*dip)
9831da177e4SLinus Torvalds {
98481591fe2SChristoph Hellwig 	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
98545ba598eSChristoph Hellwig 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
9861da177e4SLinus Torvalds }
9871da177e4SLinus Torvalds 
988*93848a99SChristoph Hellwig static bool
989*93848a99SChristoph Hellwig xfs_dinode_verify(
990*93848a99SChristoph Hellwig 	struct xfs_mount	*mp,
991*93848a99SChristoph Hellwig 	struct xfs_inode	*ip,
992*93848a99SChristoph Hellwig 	struct xfs_dinode	*dip)
993*93848a99SChristoph Hellwig {
994*93848a99SChristoph Hellwig 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
995*93848a99SChristoph Hellwig 		return false;
996*93848a99SChristoph Hellwig 
997*93848a99SChristoph Hellwig 	/* only version 3 or greater inodes are extensively verified here */
998*93848a99SChristoph Hellwig 	if (dip->di_version < 3)
999*93848a99SChristoph Hellwig 		return true;
1000*93848a99SChristoph Hellwig 
1001*93848a99SChristoph Hellwig 	if (!xfs_sb_version_hascrc(&mp->m_sb))
1002*93848a99SChristoph Hellwig 		return false;
1003*93848a99SChristoph Hellwig 	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
1004*93848a99SChristoph Hellwig 			      offsetof(struct xfs_dinode, di_crc)))
1005*93848a99SChristoph Hellwig 		return false;
1006*93848a99SChristoph Hellwig 	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
1007*93848a99SChristoph Hellwig 		return false;
1008*93848a99SChristoph Hellwig 	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
1009*93848a99SChristoph Hellwig 		return false;
1010*93848a99SChristoph Hellwig 	return true;
1011*93848a99SChristoph Hellwig }
1012*93848a99SChristoph Hellwig 
1013*93848a99SChristoph Hellwig void
1014*93848a99SChristoph Hellwig xfs_dinode_calc_crc(
1015*93848a99SChristoph Hellwig 	struct xfs_mount	*mp,
1016*93848a99SChristoph Hellwig 	struct xfs_dinode	*dip)
1017*93848a99SChristoph Hellwig {
1018*93848a99SChristoph Hellwig 	__uint32_t		crc;
1019*93848a99SChristoph Hellwig 
1020*93848a99SChristoph Hellwig 	if (dip->di_version < 3)
1021*93848a99SChristoph Hellwig 		return;
1022*93848a99SChristoph Hellwig 
1023*93848a99SChristoph Hellwig 	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
1024*93848a99SChristoph Hellwig 	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
1025*93848a99SChristoph Hellwig 			      offsetof(struct xfs_dinode, di_crc));
1026*93848a99SChristoph Hellwig 	dip->di_crc = xfs_end_cksum(crc);
1027*93848a99SChristoph Hellwig }
1028*93848a99SChristoph Hellwig 
10291da177e4SLinus Torvalds /*
103024f211baSChristoph Hellwig  * Read the disk inode attributes into the in-core inode structure.
10311da177e4SLinus Torvalds  */
10321da177e4SLinus Torvalds int
10331da177e4SLinus Torvalds xfs_iread(
10341da177e4SLinus Torvalds 	xfs_mount_t	*mp,
10351da177e4SLinus Torvalds 	xfs_trans_t	*tp,
103624f211baSChristoph Hellwig 	xfs_inode_t	*ip,
103724f211baSChristoph Hellwig 	uint		iget_flags)
10381da177e4SLinus Torvalds {
10391da177e4SLinus Torvalds 	xfs_buf_t	*bp;
10401da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
10411da177e4SLinus Torvalds 	int		error;
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 	/*
104492bfc6e7SChristoph Hellwig 	 * Fill in the location information in the in-core inode.
10451da177e4SLinus Torvalds 	 */
104624f211baSChristoph Hellwig 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
10479ed0451eSChristoph Hellwig 	if (error)
104824f211baSChristoph Hellwig 		return error;
10491da177e4SLinus Torvalds 
10501da177e4SLinus Torvalds 	/*
105192bfc6e7SChristoph Hellwig 	 * Get pointers to the on-disk inode and the buffer containing it.
105276d8b277SChristoph Hellwig 	 */
1053475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
105476d8b277SChristoph Hellwig 	if (error)
105524f211baSChristoph Hellwig 		return error;
105676d8b277SChristoph Hellwig 
1057*93848a99SChristoph Hellwig 	/* even unallocated inodes are verified */
1058*93848a99SChristoph Hellwig 	if (!xfs_dinode_verify(mp, ip, dip)) {
1059*93848a99SChristoph Hellwig 		xfs_alert(mp, "%s: validation failed for inode %lld failed",
1060*93848a99SChristoph Hellwig 				__func__, ip->i_ino);
1061*93848a99SChristoph Hellwig 
1062*93848a99SChristoph Hellwig 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
1063*93848a99SChristoph Hellwig 		error = XFS_ERROR(EFSCORRUPTED);
10649ed0451eSChristoph Hellwig 		goto out_brelse;
10651da177e4SLinus Torvalds 	}
10661da177e4SLinus Torvalds 
10671da177e4SLinus Torvalds 	/*
10681da177e4SLinus Torvalds 	 * If the on-disk inode is already linked to a directory
10691da177e4SLinus Torvalds 	 * entry, copy all of the inode into the in-core inode.
10701da177e4SLinus Torvalds 	 * xfs_iformat() handles copying in the inode format
10711da177e4SLinus Torvalds 	 * specific information.
10721da177e4SLinus Torvalds 	 * Otherwise, just get the truly permanent information.
10731da177e4SLinus Torvalds 	 */
107481591fe2SChristoph Hellwig 	if (dip->di_mode) {
107581591fe2SChristoph Hellwig 		xfs_dinode_from_disk(&ip->i_d, dip);
10761da177e4SLinus Torvalds 		error = xfs_iformat(ip, dip);
10771da177e4SLinus Torvalds 		if (error)  {
10781da177e4SLinus Torvalds #ifdef DEBUG
107953487786SDave Chinner 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
108053487786SDave Chinner 				__func__, error);
10811da177e4SLinus Torvalds #endif /* DEBUG */
10829ed0451eSChristoph Hellwig 			goto out_brelse;
10831da177e4SLinus Torvalds 		}
10841da177e4SLinus Torvalds 	} else {
1085*93848a99SChristoph Hellwig 		/*
1086*93848a99SChristoph Hellwig 		 * Partial initialisation of the in-core inode. Just the bits
1087*93848a99SChristoph Hellwig 		 * that xfs_ialloc won't overwrite or relies on being correct.
1088*93848a99SChristoph Hellwig 		 */
108981591fe2SChristoph Hellwig 		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
109081591fe2SChristoph Hellwig 		ip->i_d.di_version = dip->di_version;
109181591fe2SChristoph Hellwig 		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
109281591fe2SChristoph Hellwig 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
1093*93848a99SChristoph Hellwig 
1094*93848a99SChristoph Hellwig 		if (dip->di_version == 3) {
1095*93848a99SChristoph Hellwig 			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
1096*93848a99SChristoph Hellwig 			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
1097*93848a99SChristoph Hellwig 		}
1098*93848a99SChristoph Hellwig 
10991da177e4SLinus Torvalds 		/*
11001da177e4SLinus Torvalds 		 * Make sure to pull in the mode here as well in
11011da177e4SLinus Torvalds 		 * case the inode is released without being used.
11021da177e4SLinus Torvalds 		 * This ensures that xfs_inactive() will see that
11031da177e4SLinus Torvalds 		 * the inode is already free and not try to mess
11041da177e4SLinus Torvalds 		 * with the uninitialized part of it.
11051da177e4SLinus Torvalds 		 */
11061da177e4SLinus Torvalds 		ip->i_d.di_mode = 0;
11071da177e4SLinus Torvalds 	}
11081da177e4SLinus Torvalds 
11091da177e4SLinus Torvalds 	/*
11101da177e4SLinus Torvalds 	 * The inode format changed when we moved the link count and
11111da177e4SLinus Torvalds 	 * made it 32 bits long.  If this is an old format inode,
11121da177e4SLinus Torvalds 	 * convert it in memory to look like a new one.  If it gets
11131da177e4SLinus Torvalds 	 * flushed to disk we will convert back before flushing or
11141da177e4SLinus Torvalds 	 * logging it.  We zero out the new projid field and the old link
11151da177e4SLinus Torvalds 	 * count field.  We'll handle clearing the pad field (the remains
11161da177e4SLinus Torvalds 	 * of the old uuid field) when we actually convert the inode to
11171da177e4SLinus Torvalds 	 * the new format. We don't change the version number so that we
11181da177e4SLinus Torvalds 	 * can distinguish this from a real new format inode.
11191da177e4SLinus Torvalds 	 */
112051ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
11211da177e4SLinus Torvalds 		ip->i_d.di_nlink = ip->i_d.di_onlink;
11221da177e4SLinus Torvalds 		ip->i_d.di_onlink = 0;
11236743099cSArkadiusz Mi?kiewicz 		xfs_set_projid(ip, 0);
11241da177e4SLinus Torvalds 	}
11251da177e4SLinus Torvalds 
11261da177e4SLinus Torvalds 	ip->i_delayed_blks = 0;
11271da177e4SLinus Torvalds 
11281da177e4SLinus Torvalds 	/*
11291da177e4SLinus Torvalds 	 * Mark the buffer containing the inode as something to keep
11301da177e4SLinus Torvalds 	 * around for a while.  This helps to keep recently accessed
11311da177e4SLinus Torvalds 	 * meta-data in-core longer.
11321da177e4SLinus Torvalds 	 */
1133821eb21dSDave Chinner 	xfs_buf_set_ref(bp, XFS_INO_REF);
11341da177e4SLinus Torvalds 
11351da177e4SLinus Torvalds 	/*
11361da177e4SLinus Torvalds 	 * Use xfs_trans_brelse() to release the buffer containing the
11371da177e4SLinus Torvalds 	 * on-disk inode, because it was acquired with xfs_trans_read_buf()
1138475ee413SChristoph Hellwig 	 * in xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
11391da177e4SLinus Torvalds 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
11401da177e4SLinus Torvalds 	 * will only release the buffer if it is not dirty within the
11411da177e4SLinus Torvalds 	 * transaction.  It will be OK to release the buffer in this case,
11421da177e4SLinus Torvalds 	 * because inodes on disk are never destroyed and we will be
11431da177e4SLinus Torvalds 	 * locking the new in-core inode before putting it in the hash
11441da177e4SLinus Torvalds 	 * table where other processes can find it.  Thus we don't have
11451da177e4SLinus Torvalds 	 * to worry about the inode being changed just because we released
11461da177e4SLinus Torvalds 	 * the buffer.
11471da177e4SLinus Torvalds 	 */
11489ed0451eSChristoph Hellwig  out_brelse:
11499ed0451eSChristoph Hellwig 	xfs_trans_brelse(tp, bp);
11509ed0451eSChristoph Hellwig 	return error;
11511da177e4SLinus Torvalds }
11521da177e4SLinus Torvalds 
11531da177e4SLinus Torvalds /*
11541da177e4SLinus Torvalds  * Read in extents from a btree-format inode.
11551da177e4SLinus Torvalds  * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
11561da177e4SLinus Torvalds  */
11571da177e4SLinus Torvalds int
11581da177e4SLinus Torvalds xfs_iread_extents(
11591da177e4SLinus Torvalds 	xfs_trans_t	*tp,
11601da177e4SLinus Torvalds 	xfs_inode_t	*ip,
11611da177e4SLinus Torvalds 	int		whichfork)
11621da177e4SLinus Torvalds {
11631da177e4SLinus Torvalds 	int		error;
11641da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
11654eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;
11661da177e4SLinus Torvalds 
11671da177e4SLinus Torvalds 	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
11681da177e4SLinus Torvalds 		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
11691da177e4SLinus Torvalds 				 ip->i_mount);
11701da177e4SLinus Torvalds 		return XFS_ERROR(EFSCORRUPTED);
11711da177e4SLinus Torvalds 	}
11724eea22f0SMandy Kirkconnell 	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
11731da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
11744eea22f0SMandy Kirkconnell 
11751da177e4SLinus Torvalds 	/*
11761da177e4SLinus Torvalds 	 * We know that the size is valid (it's checked in iformat_btree)
11771da177e4SLinus Torvalds 	 */
11784eea22f0SMandy Kirkconnell 	ifp->if_bytes = ifp->if_real_bytes = 0;
11791da177e4SLinus Torvalds 	ifp->if_flags |= XFS_IFEXTENTS;
11804eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, 0, nextents);
11811da177e4SLinus Torvalds 	error = xfs_bmap_read_extents(tp, ip, whichfork);
11821da177e4SLinus Torvalds 	if (error) {
11834eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
11841da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFEXTENTS;
11851da177e4SLinus Torvalds 		return error;
11861da177e4SLinus Torvalds 	}
1187a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
11881da177e4SLinus Torvalds 	return 0;
11891da177e4SLinus Torvalds }
11901da177e4SLinus Torvalds 
11911da177e4SLinus Torvalds /*
11921da177e4SLinus Torvalds  * Allocate an inode on disk and return a copy of its in-core version.
11931da177e4SLinus Torvalds  * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
11941da177e4SLinus Torvalds  * appropriately within the inode.  The uid and gid for the inode are
11951da177e4SLinus Torvalds  * set according to the contents of the given cred structure.
11961da177e4SLinus Torvalds  *
11971da177e4SLinus Torvalds  * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
1198cd856db6SCarlos Maiolino  * has a free inode available, call xfs_iget() to obtain the in-core
1199cd856db6SCarlos Maiolino  * version of the allocated inode.  Finally, fill in the inode and
1200cd856db6SCarlos Maiolino  * log its initial contents.  In this case, ialloc_context would be
1201cd856db6SCarlos Maiolino  * set to NULL.
12021da177e4SLinus Torvalds  *
1203cd856db6SCarlos Maiolino  * If xfs_dialloc() does not have an available inode, it will replenish
1204cd856db6SCarlos Maiolino  * its supply by doing an allocation. Since we can only do one
1205cd856db6SCarlos Maiolino  * allocation within a transaction without deadlocks, we must commit
1206cd856db6SCarlos Maiolino  * the current transaction before returning the inode itself.
1207cd856db6SCarlos Maiolino  * In this case, therefore, we will set ialloc_context and return.
12081da177e4SLinus Torvalds  * The caller should then commit the current transaction, start a new
12091da177e4SLinus Torvalds  * transaction, and call xfs_ialloc() again to actually get the inode.
12101da177e4SLinus Torvalds  *
12111da177e4SLinus Torvalds  * To ensure that some other process does not grab the inode that
12121da177e4SLinus Torvalds  * was allocated during the first call to xfs_ialloc(), this routine
12131da177e4SLinus Torvalds  * also returns the [locked] bp pointing to the head of the freelist
12141da177e4SLinus Torvalds  * as ialloc_context.  The caller should hold this buffer across
12151da177e4SLinus Torvalds  * the commit and pass it back into this routine on the second call.
1216b11f94d5SDavid Chinner  *
1217b11f94d5SDavid Chinner  * If we are allocating quota inodes, we do not have a parent inode
1218b11f94d5SDavid Chinner  * to attach to or associate with (i.e. pip == NULL) because they
1219b11f94d5SDavid Chinner  * are not linked into the directory structure - they are attached
1220b11f94d5SDavid Chinner  * directly to the superblock - and so have no parent.
12211da177e4SLinus Torvalds  */
12221da177e4SLinus Torvalds int
12231da177e4SLinus Torvalds xfs_ialloc(
12241da177e4SLinus Torvalds 	xfs_trans_t	*tp,
12251da177e4SLinus Torvalds 	xfs_inode_t	*pip,
1226576b1d67SAl Viro 	umode_t		mode,
122731b084aeSNathan Scott 	xfs_nlink_t	nlink,
12281da177e4SLinus Torvalds 	xfs_dev_t	rdev,
12296743099cSArkadiusz Mi?kiewicz 	prid_t		prid,
12301da177e4SLinus Torvalds 	int		okalloc,
12311da177e4SLinus Torvalds 	xfs_buf_t	**ialloc_context,
12321da177e4SLinus Torvalds 	xfs_inode_t	**ipp)
12331da177e4SLinus Torvalds {
1234*93848a99SChristoph Hellwig 	struct xfs_mount *mp = tp->t_mountp;
12351da177e4SLinus Torvalds 	xfs_ino_t	ino;
12361da177e4SLinus Torvalds 	xfs_inode_t	*ip;
12371da177e4SLinus Torvalds 	uint		flags;
12381da177e4SLinus Torvalds 	int		error;
1239dff35fd4SChristoph Hellwig 	timespec_t	tv;
1240bf904248SDavid Chinner 	int		filestreams = 0;
12411da177e4SLinus Torvalds 
12421da177e4SLinus Torvalds 	/*
12431da177e4SLinus Torvalds 	 * Call the space management code to pick
12441da177e4SLinus Torvalds 	 * the on-disk inode to be allocated.
12451da177e4SLinus Torvalds 	 */
1246b11f94d5SDavid Chinner 	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
124708358906SChristoph Hellwig 			    ialloc_context, &ino);
1248bf904248SDavid Chinner 	if (error)
12491da177e4SLinus Torvalds 		return error;
125008358906SChristoph Hellwig 	if (*ialloc_context || ino == NULLFSINO) {
12511da177e4SLinus Torvalds 		*ipp = NULL;
12521da177e4SLinus Torvalds 		return 0;
12531da177e4SLinus Torvalds 	}
12541da177e4SLinus Torvalds 	ASSERT(*ialloc_context == NULL);
12551da177e4SLinus Torvalds 
12561da177e4SLinus Torvalds 	/*
12571da177e4SLinus Torvalds 	 * Get the in-core inode with the lock held exclusively.
12581da177e4SLinus Torvalds 	 * This is because we're setting fields here we need
12591da177e4SLinus Torvalds 	 * to prevent others from looking at until we're done.
12601da177e4SLinus Torvalds 	 */
1261*93848a99SChristoph Hellwig 	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
1262ec3ba85fSChristoph Hellwig 			 XFS_ILOCK_EXCL, &ip);
1263bf904248SDavid Chinner 	if (error)
12641da177e4SLinus Torvalds 		return error;
12651da177e4SLinus Torvalds 	ASSERT(ip != NULL);
12661da177e4SLinus Torvalds 
1267576b1d67SAl Viro 	ip->i_d.di_mode = mode;
12681da177e4SLinus Torvalds 	ip->i_d.di_onlink = 0;
12691da177e4SLinus Torvalds 	ip->i_d.di_nlink = nlink;
12701da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == nlink);
12719e2b2dc4SDavid Howells 	ip->i_d.di_uid = current_fsuid();
12729e2b2dc4SDavid Howells 	ip->i_d.di_gid = current_fsgid();
12736743099cSArkadiusz Mi?kiewicz 	xfs_set_projid(ip, prid);
12741da177e4SLinus Torvalds 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
12751da177e4SLinus Torvalds 
12761da177e4SLinus Torvalds 	/*
12771da177e4SLinus Torvalds 	 * If the superblock version is up to where we support new format
12781da177e4SLinus Torvalds 	 * inodes and this is currently an old format inode, then change
12791da177e4SLinus Torvalds 	 * the inode version number now.  This way we only do the conversion
12801da177e4SLinus Torvalds 	 * here rather than here and in the flush/logging code.
12811da177e4SLinus Torvalds 	 */
1282*93848a99SChristoph Hellwig 	if (xfs_sb_version_hasnlink(&mp->m_sb) &&
128351ce16d5SChristoph Hellwig 	    ip->i_d.di_version == 1) {
128451ce16d5SChristoph Hellwig 		ip->i_d.di_version = 2;
12851da177e4SLinus Torvalds 		/*
12861da177e4SLinus Torvalds 		 * We've already zeroed the old link count, the projid field,
12871da177e4SLinus Torvalds 		 * and the pad field.
12881da177e4SLinus Torvalds 		 */
12891da177e4SLinus Torvalds 	}
12901da177e4SLinus Torvalds 
12911da177e4SLinus Torvalds 	/*
12921da177e4SLinus Torvalds 	 * Project ids won't be stored on disk if we are using a version 1 inode.
12931da177e4SLinus Torvalds 	 */
129451ce16d5SChristoph Hellwig 	if ((prid != 0) && (ip->i_d.di_version == 1))
12951da177e4SLinus Torvalds 		xfs_bump_ino_vers2(tp, ip);
12961da177e4SLinus Torvalds 
1297bd186aa9SChristoph Hellwig 	if (pip && XFS_INHERIT_GID(pip)) {
12981da177e4SLinus Torvalds 		ip->i_d.di_gid = pip->i_d.di_gid;
1299abbede1bSAl Viro 		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
13001da177e4SLinus Torvalds 			ip->i_d.di_mode |= S_ISGID;
13011da177e4SLinus Torvalds 		}
13021da177e4SLinus Torvalds 	}
13031da177e4SLinus Torvalds 
13041da177e4SLinus Torvalds 	/*
13051da177e4SLinus Torvalds 	 * If the group ID of the new file does not match the effective group
13061da177e4SLinus Torvalds 	 * ID or one of the supplementary group IDs, the S_ISGID bit is cleared
13071da177e4SLinus Torvalds 	 * (and only if the irix_sgid_inherit compatibility variable is set).
13081da177e4SLinus Torvalds 	 */
13091da177e4SLinus Torvalds 	if ((irix_sgid_inherit) &&
13101da177e4SLinus Torvalds 	    (ip->i_d.di_mode & S_ISGID) &&
13111da177e4SLinus Torvalds 	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
13121da177e4SLinus Torvalds 		ip->i_d.di_mode &= ~S_ISGID;
13131da177e4SLinus Torvalds 	}
13141da177e4SLinus Torvalds 
13151da177e4SLinus Torvalds 	ip->i_d.di_size = 0;
13161da177e4SLinus Torvalds 	ip->i_d.di_nextents = 0;
13171da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
1318dff35fd4SChristoph Hellwig 
1319dff35fd4SChristoph Hellwig 	nanotime(&tv);
1320dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
1321dff35fd4SChristoph Hellwig 	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
1322dff35fd4SChristoph Hellwig 	ip->i_d.di_atime = ip->i_d.di_mtime;
1323dff35fd4SChristoph Hellwig 	ip->i_d.di_ctime = ip->i_d.di_mtime;
1324dff35fd4SChristoph Hellwig 
13251da177e4SLinus Torvalds 	/*
13261da177e4SLinus Torvalds 	 * di_gen will have been taken care of in xfs_iread.
13271da177e4SLinus Torvalds 	 */
13281da177e4SLinus Torvalds 	ip->i_d.di_extsize = 0;
13291da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
13301da177e4SLinus Torvalds 	ip->i_d.di_dmstate = 0;
13311da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
1332*93848a99SChristoph Hellwig 
1333*93848a99SChristoph Hellwig 	if (ip->i_d.di_version == 3) {
1334*93848a99SChristoph Hellwig 		ASSERT(ip->i_d.di_ino == ino);
1335*93848a99SChristoph Hellwig 		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
1336*93848a99SChristoph Hellwig 		ip->i_d.di_crc = 0;
1337*93848a99SChristoph Hellwig 		ip->i_d.di_changecount = 1;
1338*93848a99SChristoph Hellwig 		ip->i_d.di_lsn = 0;
1339*93848a99SChristoph Hellwig 		ip->i_d.di_flags2 = 0;
1340*93848a99SChristoph Hellwig 		memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
1341*93848a99SChristoph Hellwig 		ip->i_d.di_crtime = ip->i_d.di_mtime;
1342*93848a99SChristoph Hellwig 	}
1343*93848a99SChristoph Hellwig 
1344*93848a99SChristoph Hellwig 
13451da177e4SLinus Torvalds 	flags = XFS_ILOG_CORE;
13461da177e4SLinus Torvalds 	switch (mode & S_IFMT) {
13471da177e4SLinus Torvalds 	case S_IFIFO:
13481da177e4SLinus Torvalds 	case S_IFCHR:
13491da177e4SLinus Torvalds 	case S_IFBLK:
13501da177e4SLinus Torvalds 	case S_IFSOCK:
13511da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_DEV;
13521da177e4SLinus Torvalds 		ip->i_df.if_u2.if_rdev = rdev;
13531da177e4SLinus Torvalds 		ip->i_df.if_flags = 0;
13541da177e4SLinus Torvalds 		flags |= XFS_ILOG_DEV;
13551da177e4SLinus Torvalds 		break;
13561da177e4SLinus Torvalds 	case S_IFREG:
1357bf904248SDavid Chinner 		/*
1358bf904248SDavid Chinner 		 * we can't set up filestreams until after the VFS inode
1359bf904248SDavid Chinner 		 * is set up properly.
1360bf904248SDavid Chinner 		 */
1361bf904248SDavid Chinner 		if (pip && xfs_inode_is_filestream(pip))
1362bf904248SDavid Chinner 			filestreams = 1;
13632a82b8beSDavid Chinner 		/* fall through */
13641da177e4SLinus Torvalds 	case S_IFDIR:
1365b11f94d5SDavid Chinner 		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
1366365ca83dSNathan Scott 			uint	di_flags = 0;
1367365ca83dSNathan Scott 
1368abbede1bSAl Viro 			if (S_ISDIR(mode)) {
1369365ca83dSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1370365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_RTINHERIT;
1371dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1372dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
1373dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1374dd9f438eSNathan Scott 				}
1375abbede1bSAl Viro 			} else if (S_ISREG(mode)) {
1376613d7043SChristoph Hellwig 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
1377365ca83dSNathan Scott 					di_flags |= XFS_DIFLAG_REALTIME;
1378dd9f438eSNathan Scott 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
1379dd9f438eSNathan Scott 					di_flags |= XFS_DIFLAG_EXTSIZE;
1380dd9f438eSNathan Scott 					ip->i_d.di_extsize = pip->i_d.di_extsize;
1381dd9f438eSNathan Scott 				}
13821da177e4SLinus Torvalds 			}
13831da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
13841da177e4SLinus Torvalds 			    xfs_inherit_noatime)
1385365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOATIME;
13861da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
13871da177e4SLinus Torvalds 			    xfs_inherit_nodump)
1388365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NODUMP;
13891da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
13901da177e4SLinus Torvalds 			    xfs_inherit_sync)
1391365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_SYNC;
13921da177e4SLinus Torvalds 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
13931da177e4SLinus Torvalds 			    xfs_inherit_nosymlinks)
1394365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_NOSYMLINKS;
1395365ca83dSNathan Scott 			if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1396365ca83dSNathan Scott 				di_flags |= XFS_DIFLAG_PROJINHERIT;
1397d3446eacSBarry Naujok 			if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
1398d3446eacSBarry Naujok 			    xfs_inherit_nodefrag)
1399d3446eacSBarry Naujok 				di_flags |= XFS_DIFLAG_NODEFRAG;
14002a82b8beSDavid Chinner 			if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
14012a82b8beSDavid Chinner 				di_flags |= XFS_DIFLAG_FILESTREAM;
1402365ca83dSNathan Scott 			ip->i_d.di_flags |= di_flags;
14031da177e4SLinus Torvalds 		}
14041da177e4SLinus Torvalds 		/* FALLTHROUGH */
14051da177e4SLinus Torvalds 	case S_IFLNK:
14061da177e4SLinus Torvalds 		ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
14071da177e4SLinus Torvalds 		ip->i_df.if_flags = XFS_IFEXTENTS;
14081da177e4SLinus Torvalds 		ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
14091da177e4SLinus Torvalds 		ip->i_df.if_u1.if_extents = NULL;
14101da177e4SLinus Torvalds 		break;
14111da177e4SLinus Torvalds 	default:
14121da177e4SLinus Torvalds 		ASSERT(0);
14131da177e4SLinus Torvalds 	}
14141da177e4SLinus Torvalds 	/*
14151da177e4SLinus Torvalds 	 * Attribute fork settings for new inode.
14161da177e4SLinus Torvalds 	 */
14171da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
14181da177e4SLinus Torvalds 	ip->i_d.di_anextents = 0;
14191da177e4SLinus Torvalds 
14201da177e4SLinus Torvalds 	/*
14211da177e4SLinus Torvalds 	 * Log the new values stuffed into the inode.
14221da177e4SLinus Torvalds 	 */
1423ddc3415aSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
14241da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, flags);
14251da177e4SLinus Torvalds 
1426b83bd138SNathan Scott 	/* now that we have an i_mode we can setup inode ops and unlock */
142741be8bedSChristoph Hellwig 	xfs_setup_inode(ip);
14281da177e4SLinus Torvalds 
1429bf904248SDavid Chinner 	/* now we have set up the vfs inode we can associate the filestream */
1430bf904248SDavid Chinner 	if (filestreams) {
1431bf904248SDavid Chinner 		error = xfs_filestream_associate(pip, ip);
1432bf904248SDavid Chinner 		if (error < 0)
1433bf904248SDavid Chinner 			return -error;
1434bf904248SDavid Chinner 		if (!error)
1435bf904248SDavid Chinner 			xfs_iflags_set(ip, XFS_IFILESTREAM);
1436bf904248SDavid Chinner 	}
1437bf904248SDavid Chinner 
14381da177e4SLinus Torvalds 	*ipp = ip;
14391da177e4SLinus Torvalds 	return 0;
14401da177e4SLinus Torvalds }
14411da177e4SLinus Torvalds 
14421da177e4SLinus Torvalds /*
14438f04c47aSChristoph Hellwig  * Free up the underlying blocks past new_size.  The new size must be smaller
14448f04c47aSChristoph Hellwig  * than the current size.  This routine can be used both for the attribute and
14458f04c47aSChristoph Hellwig  * data fork, and does not modify the inode size, which is left to the caller.
14461da177e4SLinus Torvalds  *
1447f6485057SDavid Chinner  * The transaction passed to this routine must have made a permanent log
1448f6485057SDavid Chinner  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
1449f6485057SDavid Chinner  * given transaction and start new ones, so make sure everything involved in
1450f6485057SDavid Chinner  * the transaction is tidy before calling here.  Some transaction will be
1451f6485057SDavid Chinner  * returned to the caller to be committed.  The incoming transaction must
1452f6485057SDavid Chinner  * already include the inode, and both inode locks must be held exclusively.
1453f6485057SDavid Chinner  * The inode must also be "held" within the transaction.  On return the inode
1454f6485057SDavid Chinner  * will be "held" within the returned transaction.  This routine does NOT
1455f6485057SDavid Chinner  * require any disk space to be reserved for it within the transaction.
14561da177e4SLinus Torvalds  *
1457f6485057SDavid Chinner  * If we get an error, we must return with the inode locked and linked into the
1458f6485057SDavid Chinner  * current transaction. This keeps things simple for the higher level code,
1459f6485057SDavid Chinner  * because it always knows that the inode is locked and held in the transaction
1460f6485057SDavid Chinner  * that returns to it whether errors occur or not.  We don't mark the inode
1461f6485057SDavid Chinner  * dirty on error so that transactions can be easily aborted if possible.
14621da177e4SLinus Torvalds  */
14631da177e4SLinus Torvalds int
14648f04c47aSChristoph Hellwig xfs_itruncate_extents(
14658f04c47aSChristoph Hellwig 	struct xfs_trans	**tpp,
14668f04c47aSChristoph Hellwig 	struct xfs_inode	*ip,
14678f04c47aSChristoph Hellwig 	int			whichfork,
14688f04c47aSChristoph Hellwig 	xfs_fsize_t		new_size)
14691da177e4SLinus Torvalds {
14708f04c47aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
14718f04c47aSChristoph Hellwig 	struct xfs_trans	*tp = *tpp;
14728f04c47aSChristoph Hellwig 	struct xfs_trans	*ntp;
14738f04c47aSChristoph Hellwig 	xfs_bmap_free_t		free_list;
14741da177e4SLinus Torvalds 	xfs_fsblock_t		first_block;
14751da177e4SLinus Torvalds 	xfs_fileoff_t		first_unmap_block;
14761da177e4SLinus Torvalds 	xfs_fileoff_t		last_block;
14778f04c47aSChristoph Hellwig 	xfs_filblks_t		unmap_len;
14781da177e4SLinus Torvalds 	int			committed;
14798f04c47aSChristoph Hellwig 	int			error = 0;
14808f04c47aSChristoph Hellwig 	int			done = 0;
14811da177e4SLinus Torvalds 
14820b56185bSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
14830b56185bSChristoph Hellwig 	ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
14840b56185bSChristoph Hellwig 	       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1485ce7ae151SChristoph Hellwig 	ASSERT(new_size <= XFS_ISIZE(ip));
14868f04c47aSChristoph Hellwig 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
14871da177e4SLinus Torvalds 	ASSERT(ip->i_itemp != NULL);
1488898621d5SChristoph Hellwig 	ASSERT(ip->i_itemp->ili_lock_flags == 0);
14891da177e4SLinus Torvalds 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
14901da177e4SLinus Torvalds 
1491673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_start(ip, new_size);
1492673e8e59SChristoph Hellwig 
14931da177e4SLinus Torvalds 	/*
14941da177e4SLinus Torvalds 	 * Since it is possible for space to become allocated beyond
14951da177e4SLinus Torvalds 	 * the end of the file (in a crash where the space is allocated
14961da177e4SLinus Torvalds 	 * but the inode size is not yet updated), simply remove any
14971da177e4SLinus Torvalds 	 * blocks which show up between the new EOF and the maximum
14981da177e4SLinus Torvalds 	 * possible file size.  If the first block to be removed is
14991da177e4SLinus Torvalds 	 * beyond the maximum file size (ie it is the same as last_block),
15001da177e4SLinus Torvalds 	 * then there is nothing to do.
15011da177e4SLinus Torvalds 	 */
15028f04c47aSChristoph Hellwig 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
150332972383SDave Chinner 	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
15048f04c47aSChristoph Hellwig 	if (first_unmap_block == last_block)
15058f04c47aSChristoph Hellwig 		return 0;
15068f04c47aSChristoph Hellwig 
15078f04c47aSChristoph Hellwig 	ASSERT(first_unmap_block < last_block);
15081da177e4SLinus Torvalds 	unmap_len = last_block - first_unmap_block + 1;
15091da177e4SLinus Torvalds 	while (!done) {
15109d87c319SEric Sandeen 		xfs_bmap_init(&free_list, &first_block);
15118f04c47aSChristoph Hellwig 		error = xfs_bunmapi(tp, ip,
15123e57ecf6SOlaf Weber 				    first_unmap_block, unmap_len,
15138f04c47aSChristoph Hellwig 				    xfs_bmapi_aflag(whichfork),
15141da177e4SLinus Torvalds 				    XFS_ITRUNC_MAX_EXTENTS,
15153e57ecf6SOlaf Weber 				    &first_block, &free_list,
1516b4e9181eSChristoph Hellwig 				    &done);
15178f04c47aSChristoph Hellwig 		if (error)
15188f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
15191da177e4SLinus Torvalds 
15201da177e4SLinus Torvalds 		/*
15211da177e4SLinus Torvalds 		 * Duplicate the transaction that has the permanent
15221da177e4SLinus Torvalds 		 * reservation and commit the old transaction.
15231da177e4SLinus Torvalds 		 */
15248f04c47aSChristoph Hellwig 		error = xfs_bmap_finish(&tp, &free_list, &committed);
1525898621d5SChristoph Hellwig 		if (committed)
1526ddc3415aSChristoph Hellwig 			xfs_trans_ijoin(tp, ip, 0);
15278f04c47aSChristoph Hellwig 		if (error)
15288f04c47aSChristoph Hellwig 			goto out_bmap_cancel;
15291da177e4SLinus Torvalds 
15301da177e4SLinus Torvalds 		if (committed) {
15311da177e4SLinus Torvalds 			/*
1532f6485057SDavid Chinner 			 * Mark the inode dirty so it will be logged and
1533e5720eecSDavid Chinner 			 * moved forward in the log as part of every commit.
15341da177e4SLinus Torvalds 			 */
15358f04c47aSChristoph Hellwig 			xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
15361da177e4SLinus Torvalds 		}
1537f6485057SDavid Chinner 
15388f04c47aSChristoph Hellwig 		ntp = xfs_trans_dup(tp);
15398f04c47aSChristoph Hellwig 		error = xfs_trans_commit(tp, 0);
15408f04c47aSChristoph Hellwig 		tp = ntp;
1541f6485057SDavid Chinner 
1542ddc3415aSChristoph Hellwig 		xfs_trans_ijoin(tp, ip, 0);
1543f6485057SDavid Chinner 
1544cc09c0dcSDave Chinner 		if (error)
15458f04c47aSChristoph Hellwig 			goto out;
15468f04c47aSChristoph Hellwig 
1547cc09c0dcSDave Chinner 		/*
15488f04c47aSChristoph Hellwig 		 * Transaction commit worked ok so we can drop the extra ticket
1549cc09c0dcSDave Chinner 		 * reference that we gained in xfs_trans_dup()
1550cc09c0dcSDave Chinner 		 */
15518f04c47aSChristoph Hellwig 		xfs_log_ticket_put(tp->t_ticket);
15528f04c47aSChristoph Hellwig 		error = xfs_trans_reserve(tp, 0,
1553f6485057SDavid Chinner 					XFS_ITRUNCATE_LOG_RES(mp), 0,
15541da177e4SLinus Torvalds 					XFS_TRANS_PERM_LOG_RES,
15551da177e4SLinus Torvalds 					XFS_ITRUNCATE_LOG_COUNT);
15561da177e4SLinus Torvalds 		if (error)
15578f04c47aSChristoph Hellwig 			goto out;
15581da177e4SLinus Torvalds 	}
15598f04c47aSChristoph Hellwig 
1560673e8e59SChristoph Hellwig 	/*
1561673e8e59SChristoph Hellwig 	 * Always re-log the inode so that our permanent transaction can keep
1562673e8e59SChristoph Hellwig 	 * on rolling it forward in the log.
1563673e8e59SChristoph Hellwig 	 */
1564673e8e59SChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1565673e8e59SChristoph Hellwig 
1566673e8e59SChristoph Hellwig 	trace_xfs_itruncate_extents_end(ip, new_size);
1567673e8e59SChristoph Hellwig 
15688f04c47aSChristoph Hellwig out:
15698f04c47aSChristoph Hellwig 	*tpp = tp;
15708f04c47aSChristoph Hellwig 	return error;
15718f04c47aSChristoph Hellwig out_bmap_cancel:
15721da177e4SLinus Torvalds 	/*
15738f04c47aSChristoph Hellwig 	 * If the bunmapi call encounters an error, return to the caller where
15748f04c47aSChristoph Hellwig 	 * the transaction can be properly aborted.  We just need to make sure
15758f04c47aSChristoph Hellwig 	 * we're not holding any resources that we were not when we came in.
15761da177e4SLinus Torvalds 	 */
15778f04c47aSChristoph Hellwig 	xfs_bmap_cancel(&free_list);
15788f04c47aSChristoph Hellwig 	goto out;
15798f04c47aSChristoph Hellwig }
15808f04c47aSChristoph Hellwig 
15811da177e4SLinus Torvalds /*
15821da177e4SLinus Torvalds  * This is called when the inode's link count goes to 0.
15831da177e4SLinus Torvalds  * We place the on-disk inode on a list in the AGI.  It
15841da177e4SLinus Torvalds  * will be pulled from this list when the inode is freed.
15851da177e4SLinus Torvalds  */
15861da177e4SLinus Torvalds int
15871da177e4SLinus Torvalds xfs_iunlink(
15881da177e4SLinus Torvalds 	xfs_trans_t	*tp,
15891da177e4SLinus Torvalds 	xfs_inode_t	*ip)
15901da177e4SLinus Torvalds {
15911da177e4SLinus Torvalds 	xfs_mount_t	*mp;
15921da177e4SLinus Torvalds 	xfs_agi_t	*agi;
15931da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
15941da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
15951da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
15961da177e4SLinus Torvalds 	xfs_agino_t	agino;
15971da177e4SLinus Torvalds 	short		bucket_index;
15981da177e4SLinus Torvalds 	int		offset;
15991da177e4SLinus Torvalds 	int		error;
16001da177e4SLinus Torvalds 
16011da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
16021da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_mode != 0);
16031da177e4SLinus Torvalds 
16041da177e4SLinus Torvalds 	mp = tp->t_mountp;
16051da177e4SLinus Torvalds 
16061da177e4SLinus Torvalds 	/*
16071da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
16081da177e4SLinus Torvalds 	 * on the list.
16091da177e4SLinus Torvalds 	 */
16105e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1611859d7182SVlad Apostolov 	if (error)
16121da177e4SLinus Torvalds 		return error;
16131da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
16145e1be0fbSChristoph Hellwig 
16151da177e4SLinus Torvalds 	/*
16161da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
16171da177e4SLinus Torvalds 	 * list this inode will go on.
16181da177e4SLinus Torvalds 	 */
16191da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
16201da177e4SLinus Torvalds 	ASSERT(agino != 0);
16211da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
16221da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
162316259e7dSChristoph Hellwig 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
16241da177e4SLinus Torvalds 
162569ef921bSChristoph Hellwig 	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
16261da177e4SLinus Torvalds 		/*
16271da177e4SLinus Torvalds 		 * There is already another inode in the bucket we need
16281da177e4SLinus Torvalds 		 * to add ourselves to.  Add us at the front of the list.
16291da177e4SLinus Torvalds 		 * Here we put the head pointer into our next pointer,
16301da177e4SLinus Torvalds 		 * and then we fall through to point the head at us.
16311da177e4SLinus Torvalds 		 */
1632475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1633475ee413SChristoph Hellwig 				       0, 0);
1634c319b58bSVlad Apostolov 		if (error)
1635c319b58bSVlad Apostolov 			return error;
1636c319b58bSVlad Apostolov 
163769ef921bSChristoph Hellwig 		ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
16381da177e4SLinus Torvalds 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
163992bfc6e7SChristoph Hellwig 		offset = ip->i_imap.im_boffset +
16401da177e4SLinus Torvalds 			offsetof(xfs_dinode_t, di_next_unlinked);
16411da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, ibp);
16421da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, ibp, offset,
16431da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
16441da177e4SLinus Torvalds 		xfs_inobp_check(mp, ibp);
16451da177e4SLinus Torvalds 	}
16461da177e4SLinus Torvalds 
16471da177e4SLinus Torvalds 	/*
16481da177e4SLinus Torvalds 	 * Point the bucket head pointer at the inode being inserted.
16491da177e4SLinus Torvalds 	 */
16501da177e4SLinus Torvalds 	ASSERT(agino != 0);
165116259e7dSChristoph Hellwig 	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
16521da177e4SLinus Torvalds 	offset = offsetof(xfs_agi_t, agi_unlinked) +
16531da177e4SLinus Torvalds 		(sizeof(xfs_agino_t) * bucket_index);
16541da177e4SLinus Torvalds 	xfs_trans_log_buf(tp, agibp, offset,
16551da177e4SLinus Torvalds 			  (offset + sizeof(xfs_agino_t) - 1));
16561da177e4SLinus Torvalds 	return 0;
16571da177e4SLinus Torvalds }
16581da177e4SLinus Torvalds 
16591da177e4SLinus Torvalds /*
16601da177e4SLinus Torvalds  * Pull the on-disk inode from the AGI unlinked list.
16611da177e4SLinus Torvalds  */
16621da177e4SLinus Torvalds STATIC int
16631da177e4SLinus Torvalds xfs_iunlink_remove(
16641da177e4SLinus Torvalds 	xfs_trans_t	*tp,
16651da177e4SLinus Torvalds 	xfs_inode_t	*ip)
16661da177e4SLinus Torvalds {
16671da177e4SLinus Torvalds 	xfs_ino_t	next_ino;
16681da177e4SLinus Torvalds 	xfs_mount_t	*mp;
16691da177e4SLinus Torvalds 	xfs_agi_t	*agi;
16701da177e4SLinus Torvalds 	xfs_dinode_t	*dip;
16711da177e4SLinus Torvalds 	xfs_buf_t	*agibp;
16721da177e4SLinus Torvalds 	xfs_buf_t	*ibp;
16731da177e4SLinus Torvalds 	xfs_agnumber_t	agno;
16741da177e4SLinus Torvalds 	xfs_agino_t	agino;
16751da177e4SLinus Torvalds 	xfs_agino_t	next_agino;
16761da177e4SLinus Torvalds 	xfs_buf_t	*last_ibp;
16776fdf8cccSNathan Scott 	xfs_dinode_t	*last_dip = NULL;
16781da177e4SLinus Torvalds 	short		bucket_index;
16796fdf8cccSNathan Scott 	int		offset, last_offset = 0;
16801da177e4SLinus Torvalds 	int		error;
16811da177e4SLinus Torvalds 
16821da177e4SLinus Torvalds 	mp = tp->t_mountp;
16831da177e4SLinus Torvalds 	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
16841da177e4SLinus Torvalds 
16851da177e4SLinus Torvalds 	/*
16861da177e4SLinus Torvalds 	 * Get the agi buffer first.  It ensures lock ordering
16871da177e4SLinus Torvalds 	 * on the list.
16881da177e4SLinus Torvalds 	 */
16895e1be0fbSChristoph Hellwig 	error = xfs_read_agi(mp, tp, agno, &agibp);
16905e1be0fbSChristoph Hellwig 	if (error)
16911da177e4SLinus Torvalds 		return error;
16925e1be0fbSChristoph Hellwig 
16931da177e4SLinus Torvalds 	agi = XFS_BUF_TO_AGI(agibp);
16945e1be0fbSChristoph Hellwig 
16951da177e4SLinus Torvalds 	/*
16961da177e4SLinus Torvalds 	 * Get the index into the agi hash table for the
16971da177e4SLinus Torvalds 	 * list this inode will go on.
16981da177e4SLinus Torvalds 	 */
16991da177e4SLinus Torvalds 	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
17001da177e4SLinus Torvalds 	ASSERT(agino != 0);
17011da177e4SLinus Torvalds 	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
170269ef921bSChristoph Hellwig 	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
17031da177e4SLinus Torvalds 	ASSERT(agi->agi_unlinked[bucket_index]);
17041da177e4SLinus Torvalds 
170516259e7dSChristoph Hellwig 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
17061da177e4SLinus Torvalds 		/*
1707475ee413SChristoph Hellwig 		 * We're at the head of the list.  Get the inode's on-disk
1708475ee413SChristoph Hellwig 		 * buffer to see if there is anyone after us on the list.
1709475ee413SChristoph Hellwig 		 * Only modify our next pointer if it is not already NULLAGINO.
1710475ee413SChristoph Hellwig 		 * This saves us the overhead of dealing with the buffer when
1711475ee413SChristoph Hellwig 		 * there is no need to change it.
17121da177e4SLinus Torvalds 		 */
1713475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1714475ee413SChristoph Hellwig 				       0, 0);
17151da177e4SLinus Torvalds 		if (error) {
1716475ee413SChristoph Hellwig 			xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
17170b932cccSDave Chinner 				__func__, error);
17181da177e4SLinus Torvalds 			return error;
17191da177e4SLinus Torvalds 		}
1720347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
17211da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
17221da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1723347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
172492bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
17251da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
17261da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
17271da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
17281da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
17291da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
17301da177e4SLinus Torvalds 		} else {
17311da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
17321da177e4SLinus Torvalds 		}
17331da177e4SLinus Torvalds 		/*
17341da177e4SLinus Torvalds 		 * Point the bucket head pointer at the next inode.
17351da177e4SLinus Torvalds 		 */
17361da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
17371da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
173816259e7dSChristoph Hellwig 		agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
17391da177e4SLinus Torvalds 		offset = offsetof(xfs_agi_t, agi_unlinked) +
17401da177e4SLinus Torvalds 			(sizeof(xfs_agino_t) * bucket_index);
17411da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, agibp, offset,
17421da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
17431da177e4SLinus Torvalds 	} else {
17441da177e4SLinus Torvalds 		/*
17451da177e4SLinus Torvalds 		 * We need to search the list for the inode being freed.
17461da177e4SLinus Torvalds 		 */
174716259e7dSChristoph Hellwig 		next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
17481da177e4SLinus Torvalds 		last_ibp = NULL;
17491da177e4SLinus Torvalds 		while (next_agino != agino) {
1750129dbc9aSChristoph Hellwig 			struct xfs_imap	imap;
1751129dbc9aSChristoph Hellwig 
1752129dbc9aSChristoph Hellwig 			if (last_ibp)
17531da177e4SLinus Torvalds 				xfs_trans_brelse(tp, last_ibp);
1754129dbc9aSChristoph Hellwig 
1755129dbc9aSChristoph Hellwig 			imap.im_blkno = 0;
17561da177e4SLinus Torvalds 			next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1757129dbc9aSChristoph Hellwig 
1758129dbc9aSChristoph Hellwig 			error = xfs_imap(mp, tp, next_ino, &imap, 0);
17591da177e4SLinus Torvalds 			if (error) {
17600b932cccSDave Chinner 				xfs_warn(mp,
1761129dbc9aSChristoph Hellwig 	"%s: xfs_imap returned error %d.",
17620b932cccSDave Chinner 					 __func__, error);
17631da177e4SLinus Torvalds 				return error;
17641da177e4SLinus Torvalds 			}
1765129dbc9aSChristoph Hellwig 
1766129dbc9aSChristoph Hellwig 			error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
1767129dbc9aSChristoph Hellwig 					       &last_ibp, 0, 0);
1768129dbc9aSChristoph Hellwig 			if (error) {
1769129dbc9aSChristoph Hellwig 				xfs_warn(mp,
1770129dbc9aSChristoph Hellwig 	"%s: xfs_imap_to_bp returned error %d.",
1771129dbc9aSChristoph Hellwig 					__func__, error);
1772129dbc9aSChristoph Hellwig 				return error;
1773129dbc9aSChristoph Hellwig 			}
1774129dbc9aSChristoph Hellwig 
1775129dbc9aSChristoph Hellwig 			last_offset = imap.im_boffset;
1776347d1c01SChristoph Hellwig 			next_agino = be32_to_cpu(last_dip->di_next_unlinked);
17771da177e4SLinus Torvalds 			ASSERT(next_agino != NULLAGINO);
17781da177e4SLinus Torvalds 			ASSERT(next_agino != 0);
17791da177e4SLinus Torvalds 		}
1780475ee413SChristoph Hellwig 
17811da177e4SLinus Torvalds 		/*
1782475ee413SChristoph Hellwig 		 * Now last_ibp points to the buffer previous to us on the
1783475ee413SChristoph Hellwig 		 * unlinked list.  Pull us from the list.
17841da177e4SLinus Torvalds 		 */
1785475ee413SChristoph Hellwig 		error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
1786475ee413SChristoph Hellwig 				       0, 0);
17871da177e4SLinus Torvalds 		if (error) {
1788475ee413SChristoph Hellwig 			xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
17890b932cccSDave Chinner 				__func__, error);
17901da177e4SLinus Torvalds 			return error;
17911da177e4SLinus Torvalds 		}
1792347d1c01SChristoph Hellwig 		next_agino = be32_to_cpu(dip->di_next_unlinked);
17931da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
17941da177e4SLinus Torvalds 		ASSERT(next_agino != agino);
17951da177e4SLinus Torvalds 		if (next_agino != NULLAGINO) {
1796347d1c01SChristoph Hellwig 			dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
179792bfc6e7SChristoph Hellwig 			offset = ip->i_imap.im_boffset +
17981da177e4SLinus Torvalds 				offsetof(xfs_dinode_t, di_next_unlinked);
17991da177e4SLinus Torvalds 			xfs_trans_inode_buf(tp, ibp);
18001da177e4SLinus Torvalds 			xfs_trans_log_buf(tp, ibp, offset,
18011da177e4SLinus Torvalds 					  (offset + sizeof(xfs_agino_t) - 1));
18021da177e4SLinus Torvalds 			xfs_inobp_check(mp, ibp);
18031da177e4SLinus Torvalds 		} else {
18041da177e4SLinus Torvalds 			xfs_trans_brelse(tp, ibp);
18051da177e4SLinus Torvalds 		}
18061da177e4SLinus Torvalds 		/*
18071da177e4SLinus Torvalds 		 * Point the previous inode on the list to the next inode.
18081da177e4SLinus Torvalds 		 */
1809347d1c01SChristoph Hellwig 		last_dip->di_next_unlinked = cpu_to_be32(next_agino);
18101da177e4SLinus Torvalds 		ASSERT(next_agino != 0);
18111da177e4SLinus Torvalds 		offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
18121da177e4SLinus Torvalds 		xfs_trans_inode_buf(tp, last_ibp);
18131da177e4SLinus Torvalds 		xfs_trans_log_buf(tp, last_ibp, offset,
18141da177e4SLinus Torvalds 				  (offset + sizeof(xfs_agino_t) - 1));
18151da177e4SLinus Torvalds 		xfs_inobp_check(mp, last_ibp);
18161da177e4SLinus Torvalds 	}
18171da177e4SLinus Torvalds 	return 0;
18181da177e4SLinus Torvalds }
18191da177e4SLinus Torvalds 
18205b3eed75SDave Chinner /*
18215b3eed75SDave Chinner  * A big issue when freeing the inode cluster is is that we _cannot_ skip any
18225b3eed75SDave Chinner  * inodes that are in memory - they all must be marked stale and attached to
18235b3eed75SDave Chinner  * the cluster buffer.
18245b3eed75SDave Chinner  */
18252a30f36dSChandra Seetharaman STATIC int
18261da177e4SLinus Torvalds xfs_ifree_cluster(
18271da177e4SLinus Torvalds 	xfs_inode_t	*free_ip,
18281da177e4SLinus Torvalds 	xfs_trans_t	*tp,
18291da177e4SLinus Torvalds 	xfs_ino_t	inum)
18301da177e4SLinus Torvalds {
18311da177e4SLinus Torvalds 	xfs_mount_t		*mp = free_ip->i_mount;
18321da177e4SLinus Torvalds 	int			blks_per_cluster;
18331da177e4SLinus Torvalds 	int			nbufs;
18341da177e4SLinus Torvalds 	int			ninodes;
18355b257b4aSDave Chinner 	int			i, j;
18361da177e4SLinus Torvalds 	xfs_daddr_t		blkno;
18371da177e4SLinus Torvalds 	xfs_buf_t		*bp;
18385b257b4aSDave Chinner 	xfs_inode_t		*ip;
18391da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip;
18401da177e4SLinus Torvalds 	xfs_log_item_t		*lip;
18415017e97dSDave Chinner 	struct xfs_perag	*pag;
18421da177e4SLinus Torvalds 
18435017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
18441da177e4SLinus Torvalds 	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
18451da177e4SLinus Torvalds 		blks_per_cluster = 1;
18461da177e4SLinus Torvalds 		ninodes = mp->m_sb.sb_inopblock;
18471da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp);
18481da177e4SLinus Torvalds 	} else {
18491da177e4SLinus Torvalds 		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
18501da177e4SLinus Torvalds 					mp->m_sb.sb_blocksize;
18511da177e4SLinus Torvalds 		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
18521da177e4SLinus Torvalds 		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
18531da177e4SLinus Torvalds 	}
18541da177e4SLinus Torvalds 
18551da177e4SLinus Torvalds 	for (j = 0; j < nbufs; j++, inum += ninodes) {
18561da177e4SLinus Torvalds 		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
18571da177e4SLinus Torvalds 					 XFS_INO_TO_AGBNO(mp, inum));
18581da177e4SLinus Torvalds 
18591da177e4SLinus Torvalds 		/*
18605b257b4aSDave Chinner 		 * We obtain and lock the backing buffer first in the process
18615b257b4aSDave Chinner 		 * here, as we have to ensure that any dirty inode that we
18625b257b4aSDave Chinner 		 * can't get the flush lock on is attached to the buffer.
18635b257b4aSDave Chinner 		 * If we scan the in-memory inodes first, then buffer IO can
18645b257b4aSDave Chinner 		 * complete before we get a lock on it, and hence we may fail
18655b257b4aSDave Chinner 		 * to mark all the active inodes on the buffer stale.
18661da177e4SLinus Torvalds 		 */
18671da177e4SLinus Torvalds 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1868b6aff29fSDave Chinner 					mp->m_bsize * blks_per_cluster,
1869b6aff29fSDave Chinner 					XBF_UNMAPPED);
18701da177e4SLinus Torvalds 
18712a30f36dSChandra Seetharaman 		if (!bp)
18722a30f36dSChandra Seetharaman 			return ENOMEM;
1873b0f539deSDave Chinner 
1874b0f539deSDave Chinner 		/*
1875b0f539deSDave Chinner 		 * This buffer may not have been correctly initialised as we
1876b0f539deSDave Chinner 		 * didn't read it from disk. That's not important because we are
1877b0f539deSDave Chinner 		 * only using to mark the buffer as stale in the log, and to
1878b0f539deSDave Chinner 		 * attach stale cached inodes on it. That means it will never be
1879b0f539deSDave Chinner 		 * dispatched for IO. If it is, we want to know about it, and we
1880b0f539deSDave Chinner 		 * want it to fail. We can acheive this by adding a write
1881b0f539deSDave Chinner 		 * verifier to the buffer.
1882b0f539deSDave Chinner 		 */
18831813dd64SDave Chinner 		 bp->b_ops = &xfs_inode_buf_ops;
1884b0f539deSDave Chinner 
18855b257b4aSDave Chinner 		/*
18865b257b4aSDave Chinner 		 * Walk the inodes already attached to the buffer and mark them
18875b257b4aSDave Chinner 		 * stale. These will all have the flush locks held, so an
18885b3eed75SDave Chinner 		 * in-memory inode walk can't lock them. By marking them all
18895b3eed75SDave Chinner 		 * stale first, we will not attempt to lock them in the loop
18905b3eed75SDave Chinner 		 * below as the XFS_ISTALE flag will be set.
18915b257b4aSDave Chinner 		 */
1892adadbeefSChristoph Hellwig 		lip = bp->b_fspriv;
18931da177e4SLinus Torvalds 		while (lip) {
18941da177e4SLinus Torvalds 			if (lip->li_type == XFS_LI_INODE) {
18951da177e4SLinus Torvalds 				iip = (xfs_inode_log_item_t *)lip;
18961da177e4SLinus Torvalds 				ASSERT(iip->ili_logged == 1);
1897ca30b2a7SChristoph Hellwig 				lip->li_cb = xfs_istale_done;
18987b2e2a31SDavid Chinner 				xfs_trans_ail_copy_lsn(mp->m_ail,
18997b2e2a31SDavid Chinner 							&iip->ili_flush_lsn,
19007b2e2a31SDavid Chinner 							&iip->ili_item.li_lsn);
1901e5ffd2bbSDavid Chinner 				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
19021da177e4SLinus Torvalds 			}
19031da177e4SLinus Torvalds 			lip = lip->li_bio_list;
19041da177e4SLinus Torvalds 		}
19051da177e4SLinus Torvalds 
19065b3eed75SDave Chinner 
19075b257b4aSDave Chinner 		/*
19085b257b4aSDave Chinner 		 * For each inode in memory attempt to add it to the inode
19095b257b4aSDave Chinner 		 * buffer and set it up for being staled on buffer IO
19105b257b4aSDave Chinner 		 * completion.  This is safe as we've locked out tail pushing
19115b257b4aSDave Chinner 		 * and flushing by locking the buffer.
19125b257b4aSDave Chinner 		 *
19135b257b4aSDave Chinner 		 * We have already marked every inode that was part of a
19145b257b4aSDave Chinner 		 * transaction stale above, which means there is no point in
19155b257b4aSDave Chinner 		 * even trying to lock them.
19165b257b4aSDave Chinner 		 */
19175b257b4aSDave Chinner 		for (i = 0; i < ninodes; i++) {
19185b3eed75SDave Chinner retry:
19191a3e8f3dSDave Chinner 			rcu_read_lock();
19205b257b4aSDave Chinner 			ip = radix_tree_lookup(&pag->pag_ici_root,
19215b257b4aSDave Chinner 					XFS_INO_TO_AGINO(mp, (inum + i)));
19221da177e4SLinus Torvalds 
19231a3e8f3dSDave Chinner 			/* Inode not in memory, nothing to do */
19241a3e8f3dSDave Chinner 			if (!ip) {
19251a3e8f3dSDave Chinner 				rcu_read_unlock();
19265b257b4aSDave Chinner 				continue;
19275b257b4aSDave Chinner 			}
19285b257b4aSDave Chinner 
19295b3eed75SDave Chinner 			/*
19301a3e8f3dSDave Chinner 			 * because this is an RCU protected lookup, we could
19311a3e8f3dSDave Chinner 			 * find a recently freed or even reallocated inode
19321a3e8f3dSDave Chinner 			 * during the lookup. We need to check under the
19331a3e8f3dSDave Chinner 			 * i_flags_lock for a valid inode here. Skip it if it
19341a3e8f3dSDave Chinner 			 * is not valid, the wrong inode or stale.
19351a3e8f3dSDave Chinner 			 */
19361a3e8f3dSDave Chinner 			spin_lock(&ip->i_flags_lock);
19371a3e8f3dSDave Chinner 			if (ip->i_ino != inum + i ||
19381a3e8f3dSDave Chinner 			    __xfs_iflags_test(ip, XFS_ISTALE)) {
19391a3e8f3dSDave Chinner 				spin_unlock(&ip->i_flags_lock);
19401a3e8f3dSDave Chinner 				rcu_read_unlock();
19411a3e8f3dSDave Chinner 				continue;
19421a3e8f3dSDave Chinner 			}
19431a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
19441a3e8f3dSDave Chinner 
19451a3e8f3dSDave Chinner 			/*
19465b3eed75SDave Chinner 			 * Don't try to lock/unlock the current inode, but we
19475b3eed75SDave Chinner 			 * _cannot_ skip the other inodes that we did not find
19485b3eed75SDave Chinner 			 * in the list attached to the buffer and are not
19495b3eed75SDave Chinner 			 * already marked stale. If we can't lock it, back off
19505b3eed75SDave Chinner 			 * and retry.
19515b3eed75SDave Chinner 			 */
19525b257b4aSDave Chinner 			if (ip != free_ip &&
19535b257b4aSDave Chinner 			    !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
19541a3e8f3dSDave Chinner 				rcu_read_unlock();
19555b3eed75SDave Chinner 				delay(1);
19565b3eed75SDave Chinner 				goto retry;
19575b257b4aSDave Chinner 			}
19581a3e8f3dSDave Chinner 			rcu_read_unlock();
19595b257b4aSDave Chinner 
19605b3eed75SDave Chinner 			xfs_iflock(ip);
19615b257b4aSDave Chinner 			xfs_iflags_set(ip, XFS_ISTALE);
19625b257b4aSDave Chinner 
19635b3eed75SDave Chinner 			/*
19645b3eed75SDave Chinner 			 * we don't need to attach clean inodes or those only
19655b3eed75SDave Chinner 			 * with unlogged changes (which we throw away, anyway).
19665b3eed75SDave Chinner 			 */
19675b257b4aSDave Chinner 			iip = ip->i_itemp;
19685b3eed75SDave Chinner 			if (!iip || xfs_inode_clean(ip)) {
19695b257b4aSDave Chinner 				ASSERT(ip != free_ip);
19701da177e4SLinus Torvalds 				xfs_ifunlock(ip);
19711da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
19721da177e4SLinus Torvalds 				continue;
19731da177e4SLinus Torvalds 			}
19741da177e4SLinus Torvalds 
1975f5d8d5c4SChristoph Hellwig 			iip->ili_last_fields = iip->ili_fields;
1976f5d8d5c4SChristoph Hellwig 			iip->ili_fields = 0;
19771da177e4SLinus Torvalds 			iip->ili_logged = 1;
19787b2e2a31SDavid Chinner 			xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
19797b2e2a31SDavid Chinner 						&iip->ili_item.li_lsn);
19801da177e4SLinus Torvalds 
1981ca30b2a7SChristoph Hellwig 			xfs_buf_attach_iodone(bp, xfs_istale_done,
1982ca30b2a7SChristoph Hellwig 						  &iip->ili_item);
19835b257b4aSDave Chinner 
19845b257b4aSDave Chinner 			if (ip != free_ip)
19851da177e4SLinus Torvalds 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
19861da177e4SLinus Torvalds 		}
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds 		xfs_trans_stale_inode_buf(tp, bp);
19891da177e4SLinus Torvalds 		xfs_trans_binval(tp, bp);
19901da177e4SLinus Torvalds 	}
19911da177e4SLinus Torvalds 
19925017e97dSDave Chinner 	xfs_perag_put(pag);
19932a30f36dSChandra Seetharaman 	return 0;
19941da177e4SLinus Torvalds }
19951da177e4SLinus Torvalds 
19961da177e4SLinus Torvalds /*
19971da177e4SLinus Torvalds  * This is called to return an inode to the inode free list.
19981da177e4SLinus Torvalds  * The inode should already be truncated to 0 length and have
19991da177e4SLinus Torvalds  * no pages associated with it.  This routine also assumes that
20001da177e4SLinus Torvalds  * the inode is already a part of the transaction.
20011da177e4SLinus Torvalds  *
20021da177e4SLinus Torvalds  * The on-disk copy of the inode will have been added to the list
20031da177e4SLinus Torvalds  * of unlinked inodes in the AGI. We need to remove the inode from
20041da177e4SLinus Torvalds  * that list atomically with respect to freeing it here.
20051da177e4SLinus Torvalds  */
20061da177e4SLinus Torvalds int
20071da177e4SLinus Torvalds xfs_ifree(
20081da177e4SLinus Torvalds 	xfs_trans_t	*tp,
20091da177e4SLinus Torvalds 	xfs_inode_t	*ip,
20101da177e4SLinus Torvalds 	xfs_bmap_free_t	*flist)
20111da177e4SLinus Torvalds {
20121da177e4SLinus Torvalds 	int			error;
20131da177e4SLinus Torvalds 	int			delete;
20141da177e4SLinus Torvalds 	xfs_ino_t		first_ino;
2015c319b58bSVlad Apostolov 	xfs_dinode_t    	*dip;
2016c319b58bSVlad Apostolov 	xfs_buf_t       	*ibp;
20171da177e4SLinus Torvalds 
2018579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
20191da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nlink == 0);
20201da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nextents == 0);
20211da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_anextents == 0);
2022ce7ae151SChristoph Hellwig 	ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
20231da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_nblocks == 0);
20241da177e4SLinus Torvalds 
20251da177e4SLinus Torvalds 	/*
20261da177e4SLinus Torvalds 	 * Pull the on-disk inode from the AGI unlinked list.
20271da177e4SLinus Torvalds 	 */
20281da177e4SLinus Torvalds 	error = xfs_iunlink_remove(tp, ip);
20291da177e4SLinus Torvalds 	if (error != 0) {
20301da177e4SLinus Torvalds 		return error;
20311da177e4SLinus Torvalds 	}
20321da177e4SLinus Torvalds 
20331da177e4SLinus Torvalds 	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
20341da177e4SLinus Torvalds 	if (error != 0) {
20351da177e4SLinus Torvalds 		return error;
20361da177e4SLinus Torvalds 	}
20371da177e4SLinus Torvalds 	ip->i_d.di_mode = 0;		/* mark incore inode as free */
20381da177e4SLinus Torvalds 	ip->i_d.di_flags = 0;
20391da177e4SLinus Torvalds 	ip->i_d.di_dmevmask = 0;
20401da177e4SLinus Torvalds 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
20411da177e4SLinus Torvalds 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
20421da177e4SLinus Torvalds 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
20431da177e4SLinus Torvalds 	/*
20441da177e4SLinus Torvalds 	 * Bump the generation count so no one will be confused
20451da177e4SLinus Torvalds 	 * by reincarnations of this inode.
20461da177e4SLinus Torvalds 	 */
20471da177e4SLinus Torvalds 	ip->i_d.di_gen++;
2048c319b58bSVlad Apostolov 
20491da177e4SLinus Torvalds 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
20501da177e4SLinus Torvalds 
2051475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
2052475ee413SChristoph Hellwig 			       0, 0);
2053c319b58bSVlad Apostolov 	if (error)
2054c319b58bSVlad Apostolov 		return error;
2055c319b58bSVlad Apostolov 
2056c319b58bSVlad Apostolov         /*
2057c319b58bSVlad Apostolov 	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
2058c319b58bSVlad Apostolov 	* from picking up this inode when it is reclaimed (its incore state
2059c319b58bSVlad Apostolov 	* initialzed but not flushed to disk yet). The in-core di_mode is
2060c319b58bSVlad Apostolov 	* already cleared  and a corresponding transaction logged.
2061c319b58bSVlad Apostolov 	* The hack here just synchronizes the in-core to on-disk
2062c319b58bSVlad Apostolov 	* di_mode value in advance before the actual inode sync to disk.
2063c319b58bSVlad Apostolov 	* This is OK because the inode is already unlinked and would never
2064c319b58bSVlad Apostolov 	* change its di_mode again for this inode generation.
2065c319b58bSVlad Apostolov 	* This is a temporary hack that would require a proper fix
2066c319b58bSVlad Apostolov 	* in the future.
2067c319b58bSVlad Apostolov 	*/
206881591fe2SChristoph Hellwig 	dip->di_mode = 0;
2069c319b58bSVlad Apostolov 
20701da177e4SLinus Torvalds 	if (delete) {
20712a30f36dSChandra Seetharaman 		error = xfs_ifree_cluster(ip, tp, first_ino);
20721da177e4SLinus Torvalds 	}
20731da177e4SLinus Torvalds 
20742a30f36dSChandra Seetharaman 	return error;
20751da177e4SLinus Torvalds }
20761da177e4SLinus Torvalds 
20771da177e4SLinus Torvalds /*
20781da177e4SLinus Torvalds  * Reallocate the space for if_broot based on the number of records
20791da177e4SLinus Torvalds  * being added or deleted as indicated in rec_diff.  Move the records
20801da177e4SLinus Torvalds  * and pointers in if_broot to fit the new size.  When shrinking this
20811da177e4SLinus Torvalds  * will eliminate holes between the records and pointers created by
20821da177e4SLinus Torvalds  * the caller.  When growing this will create holes to be filled in
20831da177e4SLinus Torvalds  * by the caller.
20841da177e4SLinus Torvalds  *
20851da177e4SLinus Torvalds  * The caller must not request to add more records than would fit in
20861da177e4SLinus Torvalds  * the on-disk inode root.  If the if_broot is currently NULL, then
20871da177e4SLinus Torvalds  * if we adding records one will be allocated.  The caller must also
20881da177e4SLinus Torvalds  * not request that the number of records go below zero, although
20891da177e4SLinus Torvalds  * it can go to zero.
20901da177e4SLinus Torvalds  *
20911da177e4SLinus Torvalds  * ip -- the inode whose if_broot area is changing
20921da177e4SLinus Torvalds  * ext_diff -- the change in the number of records, positive or negative,
20931da177e4SLinus Torvalds  *	 requested for the if_broot array.
20941da177e4SLinus Torvalds  */
20951da177e4SLinus Torvalds void
20961da177e4SLinus Torvalds xfs_iroot_realloc(
20971da177e4SLinus Torvalds 	xfs_inode_t		*ip,
20981da177e4SLinus Torvalds 	int			rec_diff,
20991da177e4SLinus Torvalds 	int			whichfork)
21001da177e4SLinus Torvalds {
210160197e8dSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
21021da177e4SLinus Torvalds 	int			cur_max;
21031da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
21047cc95a82SChristoph Hellwig 	struct xfs_btree_block	*new_broot;
21051da177e4SLinus Torvalds 	int			new_max;
21061da177e4SLinus Torvalds 	size_t			new_size;
21071da177e4SLinus Torvalds 	char			*np;
21081da177e4SLinus Torvalds 	char			*op;
21091da177e4SLinus Torvalds 
21101da177e4SLinus Torvalds 	/*
21111da177e4SLinus Torvalds 	 * Handle the degenerate case quietly.
21121da177e4SLinus Torvalds 	 */
21131da177e4SLinus Torvalds 	if (rec_diff == 0) {
21141da177e4SLinus Torvalds 		return;
21151da177e4SLinus Torvalds 	}
21161da177e4SLinus Torvalds 
21171da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
21181da177e4SLinus Torvalds 	if (rec_diff > 0) {
21191da177e4SLinus Torvalds 		/*
21201da177e4SLinus Torvalds 		 * If there wasn't any memory allocated before, just
21211da177e4SLinus Torvalds 		 * allocate it now and get out.
21221da177e4SLinus Torvalds 		 */
21231da177e4SLinus Torvalds 		if (ifp->if_broot_bytes == 0) {
2124ee1a47abSChristoph Hellwig 			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
21254a7edddcSDave Chinner 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
21261da177e4SLinus Torvalds 			ifp->if_broot_bytes = (int)new_size;
21271da177e4SLinus Torvalds 			return;
21281da177e4SLinus Torvalds 		}
21291da177e4SLinus Torvalds 
21301da177e4SLinus Torvalds 		/*
21311da177e4SLinus Torvalds 		 * If there is already an existing if_broot, then we need
21321da177e4SLinus Torvalds 		 * to realloc() it and shift the pointers to their new
21331da177e4SLinus Torvalds 		 * location.  The records don't change location because
21341da177e4SLinus Torvalds 		 * they are kept butted up against the btree block header.
21351da177e4SLinus Torvalds 		 */
213660197e8dSChristoph Hellwig 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
21371da177e4SLinus Torvalds 		new_max = cur_max + rec_diff;
2138ee1a47abSChristoph Hellwig 		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
21397cc95a82SChristoph Hellwig 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2140ee1a47abSChristoph Hellwig 				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
21414a7edddcSDave Chinner 				KM_SLEEP | KM_NOFS);
214260197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
21431da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
214460197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
21451da177e4SLinus Torvalds 						     (int)new_size);
21461da177e4SLinus Torvalds 		ifp->if_broot_bytes = (int)new_size;
21471da177e4SLinus Torvalds 		ASSERT(ifp->if_broot_bytes <=
2148ee1a47abSChristoph Hellwig 			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
21491da177e4SLinus Torvalds 		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
21501da177e4SLinus Torvalds 		return;
21511da177e4SLinus Torvalds 	}
21521da177e4SLinus Torvalds 
21531da177e4SLinus Torvalds 	/*
21541da177e4SLinus Torvalds 	 * rec_diff is less than 0.  In this case, we are shrinking the
21551da177e4SLinus Torvalds 	 * if_broot buffer.  It must already exist.  If we go to zero
21561da177e4SLinus Torvalds 	 * records, just get rid of the root and clear the status bit.
21571da177e4SLinus Torvalds 	 */
21581da177e4SLinus Torvalds 	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
215960197e8dSChristoph Hellwig 	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
21601da177e4SLinus Torvalds 	new_max = cur_max + rec_diff;
21611da177e4SLinus Torvalds 	ASSERT(new_max >= 0);
21621da177e4SLinus Torvalds 	if (new_max > 0)
2163ee1a47abSChristoph Hellwig 		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
21641da177e4SLinus Torvalds 	else
21651da177e4SLinus Torvalds 		new_size = 0;
21661da177e4SLinus Torvalds 	if (new_size > 0) {
21674a7edddcSDave Chinner 		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
21681da177e4SLinus Torvalds 		/*
21691da177e4SLinus Torvalds 		 * First copy over the btree block header.
21701da177e4SLinus Torvalds 		 */
2171ee1a47abSChristoph Hellwig 		memcpy(new_broot, ifp->if_broot,
2172ee1a47abSChristoph Hellwig 			XFS_BMBT_BLOCK_LEN(ip->i_mount));
21731da177e4SLinus Torvalds 	} else {
21741da177e4SLinus Torvalds 		new_broot = NULL;
21751da177e4SLinus Torvalds 		ifp->if_flags &= ~XFS_IFBROOT;
21761da177e4SLinus Torvalds 	}
21771da177e4SLinus Torvalds 
21781da177e4SLinus Torvalds 	/*
21791da177e4SLinus Torvalds 	 * Only copy the records and pointers if there are any.
21801da177e4SLinus Torvalds 	 */
21811da177e4SLinus Torvalds 	if (new_max > 0) {
21821da177e4SLinus Torvalds 		/*
21831da177e4SLinus Torvalds 		 * First copy the records.
21841da177e4SLinus Torvalds 		 */
2185136341b4SChristoph Hellwig 		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2186136341b4SChristoph Hellwig 		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
21871da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
21881da177e4SLinus Torvalds 
21891da177e4SLinus Torvalds 		/*
21901da177e4SLinus Torvalds 		 * Then copy the pointers.
21911da177e4SLinus Torvalds 		 */
219260197e8dSChristoph Hellwig 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
21931da177e4SLinus Torvalds 						     ifp->if_broot_bytes);
219460197e8dSChristoph Hellwig 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
21951da177e4SLinus Torvalds 						     (int)new_size);
21961da177e4SLinus Torvalds 		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
21971da177e4SLinus Torvalds 	}
2198f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_broot);
21991da177e4SLinus Torvalds 	ifp->if_broot = new_broot;
22001da177e4SLinus Torvalds 	ifp->if_broot_bytes = (int)new_size;
22011da177e4SLinus Torvalds 	ASSERT(ifp->if_broot_bytes <=
2202ee1a47abSChristoph Hellwig 		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
22031da177e4SLinus Torvalds 	return;
22041da177e4SLinus Torvalds }
22051da177e4SLinus Torvalds 
22061da177e4SLinus Torvalds 
22071da177e4SLinus Torvalds /*
22081da177e4SLinus Torvalds  * This is called when the amount of space needed for if_data
22091da177e4SLinus Torvalds  * is increased or decreased.  The change in size is indicated by
22101da177e4SLinus Torvalds  * the number of bytes that need to be added or deleted in the
22111da177e4SLinus Torvalds  * byte_diff parameter.
22121da177e4SLinus Torvalds  *
22131da177e4SLinus Torvalds  * If the amount of space needed has decreased below the size of the
22141da177e4SLinus Torvalds  * inline buffer, then switch to using the inline buffer.  Otherwise,
22151da177e4SLinus Torvalds  * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
22161da177e4SLinus Torvalds  * to what is needed.
22171da177e4SLinus Torvalds  *
22181da177e4SLinus Torvalds  * ip -- the inode whose if_data area is changing
22191da177e4SLinus Torvalds  * byte_diff -- the change in the number of bytes, positive or negative,
22201da177e4SLinus Torvalds  *	 requested for the if_data array.
22211da177e4SLinus Torvalds  */
22221da177e4SLinus Torvalds void
22231da177e4SLinus Torvalds xfs_idata_realloc(
22241da177e4SLinus Torvalds 	xfs_inode_t	*ip,
22251da177e4SLinus Torvalds 	int		byte_diff,
22261da177e4SLinus Torvalds 	int		whichfork)
22271da177e4SLinus Torvalds {
22281da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
22291da177e4SLinus Torvalds 	int		new_size;
22301da177e4SLinus Torvalds 	int		real_size;
22311da177e4SLinus Torvalds 
22321da177e4SLinus Torvalds 	if (byte_diff == 0) {
22331da177e4SLinus Torvalds 		return;
22341da177e4SLinus Torvalds 	}
22351da177e4SLinus Torvalds 
22361da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
22371da177e4SLinus Torvalds 	new_size = (int)ifp->if_bytes + byte_diff;
22381da177e4SLinus Torvalds 	ASSERT(new_size >= 0);
22391da177e4SLinus Torvalds 
22401da177e4SLinus Torvalds 	if (new_size == 0) {
22411da177e4SLinus Torvalds 		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2242f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
22431da177e4SLinus Torvalds 		}
22441da177e4SLinus Torvalds 		ifp->if_u1.if_data = NULL;
22451da177e4SLinus Torvalds 		real_size = 0;
22461da177e4SLinus Torvalds 	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
22471da177e4SLinus Torvalds 		/*
22481da177e4SLinus Torvalds 		 * If the valid extents/data can fit in if_inline_ext/data,
22491da177e4SLinus Torvalds 		 * copy them from the malloc'd vector and free it.
22501da177e4SLinus Torvalds 		 */
22511da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
22521da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
22531da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
22541da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
22551da177e4SLinus Torvalds 			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
22561da177e4SLinus Torvalds 			      new_size);
2257f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
22581da177e4SLinus Torvalds 			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
22591da177e4SLinus Torvalds 		}
22601da177e4SLinus Torvalds 		real_size = 0;
22611da177e4SLinus Torvalds 	} else {
22621da177e4SLinus Torvalds 		/*
22631da177e4SLinus Torvalds 		 * Stuck with malloc/realloc.
22641da177e4SLinus Torvalds 		 * For inline data, the underlying buffer must be
22651da177e4SLinus Torvalds 		 * a multiple of 4 bytes in size so that it can be
22661da177e4SLinus Torvalds 		 * logged and stay on word boundaries.  We enforce
22671da177e4SLinus Torvalds 		 * that here.
22681da177e4SLinus Torvalds 		 */
22691da177e4SLinus Torvalds 		real_size = roundup(new_size, 4);
22701da177e4SLinus Torvalds 		if (ifp->if_u1.if_data == NULL) {
22711da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
22724a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
22734a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
22741da177e4SLinus Torvalds 		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
22751da177e4SLinus Torvalds 			/*
22761da177e4SLinus Torvalds 			 * Only do the realloc if the underlying size
22771da177e4SLinus Torvalds 			 * is really changing.
22781da177e4SLinus Torvalds 			 */
22791da177e4SLinus Torvalds 			if (ifp->if_real_bytes != real_size) {
22801da177e4SLinus Torvalds 				ifp->if_u1.if_data =
22811da177e4SLinus Torvalds 					kmem_realloc(ifp->if_u1.if_data,
22821da177e4SLinus Torvalds 							real_size,
22831da177e4SLinus Torvalds 							ifp->if_real_bytes,
22844a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
22851da177e4SLinus Torvalds 			}
22861da177e4SLinus Torvalds 		} else {
22871da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes == 0);
22884a7edddcSDave Chinner 			ifp->if_u1.if_data = kmem_alloc(real_size,
22894a7edddcSDave Chinner 							KM_SLEEP | KM_NOFS);
22901da177e4SLinus Torvalds 			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
22911da177e4SLinus Torvalds 				ifp->if_bytes);
22921da177e4SLinus Torvalds 		}
22931da177e4SLinus Torvalds 	}
22941da177e4SLinus Torvalds 	ifp->if_real_bytes = real_size;
22951da177e4SLinus Torvalds 	ifp->if_bytes = new_size;
22961da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
22971da177e4SLinus Torvalds }
22981da177e4SLinus Torvalds 
22991da177e4SLinus Torvalds void
23001da177e4SLinus Torvalds xfs_idestroy_fork(
23011da177e4SLinus Torvalds 	xfs_inode_t	*ip,
23021da177e4SLinus Torvalds 	int		whichfork)
23031da177e4SLinus Torvalds {
23041da177e4SLinus Torvalds 	xfs_ifork_t	*ifp;
23051da177e4SLinus Torvalds 
23061da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
23071da177e4SLinus Torvalds 	if (ifp->if_broot != NULL) {
2308f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_broot);
23091da177e4SLinus Torvalds 		ifp->if_broot = NULL;
23101da177e4SLinus Torvalds 	}
23111da177e4SLinus Torvalds 
23121da177e4SLinus Torvalds 	/*
23131da177e4SLinus Torvalds 	 * If the format is local, then we can't have an extents
23141da177e4SLinus Torvalds 	 * array so just look for an inline data array.  If we're
23151da177e4SLinus Torvalds 	 * not local then we may or may not have an extents list,
23161da177e4SLinus Torvalds 	 * so check and free it up if we do.
23171da177e4SLinus Torvalds 	 */
23181da177e4SLinus Torvalds 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
23191da177e4SLinus Torvalds 		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
23201da177e4SLinus Torvalds 		    (ifp->if_u1.if_data != NULL)) {
23211da177e4SLinus Torvalds 			ASSERT(ifp->if_real_bytes != 0);
2322f0e2d93cSDenys Vlasenko 			kmem_free(ifp->if_u1.if_data);
23231da177e4SLinus Torvalds 			ifp->if_u1.if_data = NULL;
23241da177e4SLinus Torvalds 			ifp->if_real_bytes = 0;
23251da177e4SLinus Torvalds 		}
23261da177e4SLinus Torvalds 	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
23270293ce3aSMandy Kirkconnell 		   ((ifp->if_flags & XFS_IFEXTIREC) ||
23280293ce3aSMandy Kirkconnell 		    ((ifp->if_u1.if_extents != NULL) &&
23290293ce3aSMandy Kirkconnell 		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
23301da177e4SLinus Torvalds 		ASSERT(ifp->if_real_bytes != 0);
23314eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
23321da177e4SLinus Torvalds 	}
23331da177e4SLinus Torvalds 	ASSERT(ifp->if_u1.if_extents == NULL ||
23341da177e4SLinus Torvalds 	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
23351da177e4SLinus Torvalds 	ASSERT(ifp->if_real_bytes == 0);
23361da177e4SLinus Torvalds 	if (whichfork == XFS_ATTR_FORK) {
23371da177e4SLinus Torvalds 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
23381da177e4SLinus Torvalds 		ip->i_afp = NULL;
23391da177e4SLinus Torvalds 	}
23401da177e4SLinus Torvalds }
23411da177e4SLinus Torvalds 
23421da177e4SLinus Torvalds /*
234360ec6783SChristoph Hellwig  * This is called to unpin an inode.  The caller must have the inode locked
234460ec6783SChristoph Hellwig  * in at least shared mode so that the buffer cannot be subsequently pinned
234560ec6783SChristoph Hellwig  * once someone is waiting for it to be unpinned.
23461da177e4SLinus Torvalds  */
234760ec6783SChristoph Hellwig static void
2348f392e631SChristoph Hellwig xfs_iunpin(
234960ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
2350a3f74ffbSDavid Chinner {
2351579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2352a3f74ffbSDavid Chinner 
23534aaf15d1SDave Chinner 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
23544aaf15d1SDave Chinner 
2355a3f74ffbSDavid Chinner 	/* Give the log a push to start the unpinning I/O */
235660ec6783SChristoph Hellwig 	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2357a14a348bSChristoph Hellwig 
2358a3f74ffbSDavid Chinner }
2359a3f74ffbSDavid Chinner 
2360f392e631SChristoph Hellwig static void
2361f392e631SChristoph Hellwig __xfs_iunpin_wait(
2362f392e631SChristoph Hellwig 	struct xfs_inode	*ip)
2363f392e631SChristoph Hellwig {
2364f392e631SChristoph Hellwig 	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
2365f392e631SChristoph Hellwig 	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
2366f392e631SChristoph Hellwig 
2367f392e631SChristoph Hellwig 	xfs_iunpin(ip);
2368f392e631SChristoph Hellwig 
2369f392e631SChristoph Hellwig 	do {
2370f392e631SChristoph Hellwig 		prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
2371f392e631SChristoph Hellwig 		if (xfs_ipincount(ip))
2372f392e631SChristoph Hellwig 			io_schedule();
2373f392e631SChristoph Hellwig 	} while (xfs_ipincount(ip));
2374f392e631SChristoph Hellwig 	finish_wait(wq, &wait.wait);
2375f392e631SChristoph Hellwig }
2376f392e631SChristoph Hellwig 
2377777df5afSDave Chinner void
23781da177e4SLinus Torvalds xfs_iunpin_wait(
237960ec6783SChristoph Hellwig 	struct xfs_inode	*ip)
23801da177e4SLinus Torvalds {
2381f392e631SChristoph Hellwig 	if (xfs_ipincount(ip))
2382f392e631SChristoph Hellwig 		__xfs_iunpin_wait(ip);
23831da177e4SLinus Torvalds }
23841da177e4SLinus Torvalds 
23851da177e4SLinus Torvalds /*
23861da177e4SLinus Torvalds  * xfs_iextents_copy()
23871da177e4SLinus Torvalds  *
23881da177e4SLinus Torvalds  * This is called to copy the REAL extents (as opposed to the delayed
23891da177e4SLinus Torvalds  * allocation extents) from the inode into the given buffer.  It
23901da177e4SLinus Torvalds  * returns the number of bytes copied into the buffer.
23911da177e4SLinus Torvalds  *
23921da177e4SLinus Torvalds  * If there are no delayed allocation extents, then we can just
23931da177e4SLinus Torvalds  * memcpy() the extents into the buffer.  Otherwise, we need to
23941da177e4SLinus Torvalds  * examine each extent in turn and skip those which are delayed.
23951da177e4SLinus Torvalds  */
23961da177e4SLinus Torvalds int
23971da177e4SLinus Torvalds xfs_iextents_copy(
23981da177e4SLinus Torvalds 	xfs_inode_t		*ip,
2399a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_t		*dp,
24001da177e4SLinus Torvalds 	int			whichfork)
24011da177e4SLinus Torvalds {
24021da177e4SLinus Torvalds 	int			copied;
24031da177e4SLinus Torvalds 	int			i;
24041da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
24051da177e4SLinus Torvalds 	int			nrecs;
24061da177e4SLinus Torvalds 	xfs_fsblock_t		start_block;
24071da177e4SLinus Torvalds 
24081da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
2409579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
24101da177e4SLinus Torvalds 	ASSERT(ifp->if_bytes > 0);
24111da177e4SLinus Torvalds 
24121da177e4SLinus Torvalds 	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
24133a59c94cSEric Sandeen 	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
24141da177e4SLinus Torvalds 	ASSERT(nrecs > 0);
24151da177e4SLinus Torvalds 
24161da177e4SLinus Torvalds 	/*
24171da177e4SLinus Torvalds 	 * There are some delayed allocation extents in the
24181da177e4SLinus Torvalds 	 * inode, so copy the extents one at a time and skip
24191da177e4SLinus Torvalds 	 * the delayed ones.  There must be at least one
24201da177e4SLinus Torvalds 	 * non-delayed extent.
24211da177e4SLinus Torvalds 	 */
24221da177e4SLinus Torvalds 	copied = 0;
24231da177e4SLinus Torvalds 	for (i = 0; i < nrecs; i++) {
2424a6f64d4aSChristoph Hellwig 		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
24251da177e4SLinus Torvalds 		start_block = xfs_bmbt_get_startblock(ep);
24269d87c319SEric Sandeen 		if (isnullstartblock(start_block)) {
24271da177e4SLinus Torvalds 			/*
24281da177e4SLinus Torvalds 			 * It's a delayed allocation extent, so skip it.
24291da177e4SLinus Torvalds 			 */
24301da177e4SLinus Torvalds 			continue;
24311da177e4SLinus Torvalds 		}
24321da177e4SLinus Torvalds 
24331da177e4SLinus Torvalds 		/* Translate to on disk format */
2434cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
2435cd8b0a97SChristoph Hellwig 		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
2436a6f64d4aSChristoph Hellwig 		dp++;
24371da177e4SLinus Torvalds 		copied++;
24381da177e4SLinus Torvalds 	}
24391da177e4SLinus Torvalds 	ASSERT(copied != 0);
2440a6f64d4aSChristoph Hellwig 	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
24411da177e4SLinus Torvalds 
24421da177e4SLinus Torvalds 	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
24431da177e4SLinus Torvalds }
24441da177e4SLinus Torvalds 
24451da177e4SLinus Torvalds /*
24461da177e4SLinus Torvalds  * Each of the following cases stores data into the same region
24471da177e4SLinus Torvalds  * of the on-disk inode, so only one of them can be valid at
24481da177e4SLinus Torvalds  * any given time. While it is possible to have conflicting formats
24491da177e4SLinus Torvalds  * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
24501da177e4SLinus Torvalds  * in EXTENTS format, this can only happen when the fork has
24511da177e4SLinus Torvalds  * changed formats after being modified but before being flushed.
24521da177e4SLinus Torvalds  * In these cases, the format always takes precedence, because the
24531da177e4SLinus Torvalds  * format indicates the current state of the fork.
24541da177e4SLinus Torvalds  */
24551da177e4SLinus Torvalds /*ARGSUSED*/
2456e4ac967bSDavid Chinner STATIC void
24571da177e4SLinus Torvalds xfs_iflush_fork(
24581da177e4SLinus Torvalds 	xfs_inode_t		*ip,
24591da177e4SLinus Torvalds 	xfs_dinode_t		*dip,
24601da177e4SLinus Torvalds 	xfs_inode_log_item_t	*iip,
24611da177e4SLinus Torvalds 	int			whichfork,
24621da177e4SLinus Torvalds 	xfs_buf_t		*bp)
24631da177e4SLinus Torvalds {
24641da177e4SLinus Torvalds 	char			*cp;
24651da177e4SLinus Torvalds 	xfs_ifork_t		*ifp;
24661da177e4SLinus Torvalds 	xfs_mount_t		*mp;
24671da177e4SLinus Torvalds 	static const short	brootflag[2] =
24681da177e4SLinus Torvalds 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
24691da177e4SLinus Torvalds 	static const short	dataflag[2] =
24701da177e4SLinus Torvalds 		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
24711da177e4SLinus Torvalds 	static const short	extflag[2] =
24721da177e4SLinus Torvalds 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
24731da177e4SLinus Torvalds 
2474e4ac967bSDavid Chinner 	if (!iip)
2475e4ac967bSDavid Chinner 		return;
24761da177e4SLinus Torvalds 	ifp = XFS_IFORK_PTR(ip, whichfork);
24771da177e4SLinus Torvalds 	/*
24781da177e4SLinus Torvalds 	 * This can happen if we gave up in iformat in an error path,
24791da177e4SLinus Torvalds 	 * for the attribute fork.
24801da177e4SLinus Torvalds 	 */
2481e4ac967bSDavid Chinner 	if (!ifp) {
24821da177e4SLinus Torvalds 		ASSERT(whichfork == XFS_ATTR_FORK);
2483e4ac967bSDavid Chinner 		return;
24841da177e4SLinus Torvalds 	}
24851da177e4SLinus Torvalds 	cp = XFS_DFORK_PTR(dip, whichfork);
24861da177e4SLinus Torvalds 	mp = ip->i_mount;
24871da177e4SLinus Torvalds 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
24881da177e4SLinus Torvalds 	case XFS_DINODE_FMT_LOCAL:
2489f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & dataflag[whichfork]) &&
24901da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
24911da177e4SLinus Torvalds 			ASSERT(ifp->if_u1.if_data != NULL);
24921da177e4SLinus Torvalds 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
24931da177e4SLinus Torvalds 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
24941da177e4SLinus Torvalds 		}
24951da177e4SLinus Torvalds 		break;
24961da177e4SLinus Torvalds 
24971da177e4SLinus Torvalds 	case XFS_DINODE_FMT_EXTENTS:
24981da177e4SLinus Torvalds 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
2499f5d8d5c4SChristoph Hellwig 		       !(iip->ili_fields & extflag[whichfork]));
2500f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & extflag[whichfork]) &&
25011da177e4SLinus Torvalds 		    (ifp->if_bytes > 0)) {
2502ab1908a5SChristoph Hellwig 			ASSERT(xfs_iext_get_ext(ifp, 0));
25031da177e4SLinus Torvalds 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
25041da177e4SLinus Torvalds 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
25051da177e4SLinus Torvalds 				whichfork);
25061da177e4SLinus Torvalds 		}
25071da177e4SLinus Torvalds 		break;
25081da177e4SLinus Torvalds 
25091da177e4SLinus Torvalds 	case XFS_DINODE_FMT_BTREE:
2510f5d8d5c4SChristoph Hellwig 		if ((iip->ili_fields & brootflag[whichfork]) &&
25111da177e4SLinus Torvalds 		    (ifp->if_broot_bytes > 0)) {
25121da177e4SLinus Torvalds 			ASSERT(ifp->if_broot != NULL);
25131da177e4SLinus Torvalds 			ASSERT(ifp->if_broot_bytes <=
25141da177e4SLinus Torvalds 			       (XFS_IFORK_SIZE(ip, whichfork) +
2515ee1a47abSChristoph Hellwig 				XFS_BROOT_SIZE_ADJ(ip)));
251660197e8dSChristoph Hellwig 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
25171da177e4SLinus Torvalds 				(xfs_bmdr_block_t *)cp,
25181da177e4SLinus Torvalds 				XFS_DFORK_SIZE(dip, mp, whichfork));
25191da177e4SLinus Torvalds 		}
25201da177e4SLinus Torvalds 		break;
25211da177e4SLinus Torvalds 
25221da177e4SLinus Torvalds 	case XFS_DINODE_FMT_DEV:
2523f5d8d5c4SChristoph Hellwig 		if (iip->ili_fields & XFS_ILOG_DEV) {
25241da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
252581591fe2SChristoph Hellwig 			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
25261da177e4SLinus Torvalds 		}
25271da177e4SLinus Torvalds 		break;
25281da177e4SLinus Torvalds 
25291da177e4SLinus Torvalds 	case XFS_DINODE_FMT_UUID:
2530f5d8d5c4SChristoph Hellwig 		if (iip->ili_fields & XFS_ILOG_UUID) {
25311da177e4SLinus Torvalds 			ASSERT(whichfork == XFS_DATA_FORK);
253281591fe2SChristoph Hellwig 			memcpy(XFS_DFORK_DPTR(dip),
253381591fe2SChristoph Hellwig 			       &ip->i_df.if_u2.if_uuid,
25341da177e4SLinus Torvalds 			       sizeof(uuid_t));
25351da177e4SLinus Torvalds 		}
25361da177e4SLinus Torvalds 		break;
25371da177e4SLinus Torvalds 
25381da177e4SLinus Torvalds 	default:
25391da177e4SLinus Torvalds 		ASSERT(0);
25401da177e4SLinus Torvalds 		break;
25411da177e4SLinus Torvalds 	}
25421da177e4SLinus Torvalds }
25431da177e4SLinus Torvalds 
2544bad55843SDavid Chinner STATIC int
2545bad55843SDavid Chinner xfs_iflush_cluster(
2546bad55843SDavid Chinner 	xfs_inode_t	*ip,
2547bad55843SDavid Chinner 	xfs_buf_t	*bp)
2548bad55843SDavid Chinner {
2549bad55843SDavid Chinner 	xfs_mount_t		*mp = ip->i_mount;
25505017e97dSDave Chinner 	struct xfs_perag	*pag;
2551bad55843SDavid Chinner 	unsigned long		first_index, mask;
2552c8f5f12eSDavid Chinner 	unsigned long		inodes_per_cluster;
2553bad55843SDavid Chinner 	int			ilist_size;
2554bad55843SDavid Chinner 	xfs_inode_t		**ilist;
2555bad55843SDavid Chinner 	xfs_inode_t		*iq;
2556bad55843SDavid Chinner 	int			nr_found;
2557bad55843SDavid Chinner 	int			clcount = 0;
2558bad55843SDavid Chinner 	int			bufwasdelwri;
2559bad55843SDavid Chinner 	int			i;
2560bad55843SDavid Chinner 
25615017e97dSDave Chinner 	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
2562bad55843SDavid Chinner 
2563c8f5f12eSDavid Chinner 	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2564c8f5f12eSDavid Chinner 	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
256549383b0eSDavid Chinner 	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2566bad55843SDavid Chinner 	if (!ilist)
256744b56e0aSDave Chinner 		goto out_put;
2568bad55843SDavid Chinner 
2569bad55843SDavid Chinner 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
2570bad55843SDavid Chinner 	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
25711a3e8f3dSDave Chinner 	rcu_read_lock();
2572bad55843SDavid Chinner 	/* really need a gang lookup range call here */
2573bad55843SDavid Chinner 	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2574c8f5f12eSDavid Chinner 					first_index, inodes_per_cluster);
2575bad55843SDavid Chinner 	if (nr_found == 0)
2576bad55843SDavid Chinner 		goto out_free;
2577bad55843SDavid Chinner 
2578bad55843SDavid Chinner 	for (i = 0; i < nr_found; i++) {
2579bad55843SDavid Chinner 		iq = ilist[i];
2580bad55843SDavid Chinner 		if (iq == ip)
2581bad55843SDavid Chinner 			continue;
25821a3e8f3dSDave Chinner 
25831a3e8f3dSDave Chinner 		/*
25841a3e8f3dSDave Chinner 		 * because this is an RCU protected lookup, we could find a
25851a3e8f3dSDave Chinner 		 * recently freed or even reallocated inode during the lookup.
25861a3e8f3dSDave Chinner 		 * We need to check under the i_flags_lock for a valid inode
25871a3e8f3dSDave Chinner 		 * here. Skip it if it is not valid or the wrong inode.
25881a3e8f3dSDave Chinner 		 */
25891a3e8f3dSDave Chinner 		spin_lock(&ip->i_flags_lock);
25901a3e8f3dSDave Chinner 		if (!ip->i_ino ||
25911a3e8f3dSDave Chinner 		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
25921a3e8f3dSDave Chinner 			spin_unlock(&ip->i_flags_lock);
25931a3e8f3dSDave Chinner 			continue;
25941a3e8f3dSDave Chinner 		}
25951a3e8f3dSDave Chinner 		spin_unlock(&ip->i_flags_lock);
25961a3e8f3dSDave Chinner 
2597bad55843SDavid Chinner 		/*
2598bad55843SDavid Chinner 		 * Do an un-protected check to see if the inode is dirty and
2599bad55843SDavid Chinner 		 * is a candidate for flushing.  These checks will be repeated
2600bad55843SDavid Chinner 		 * later after the appropriate locks are acquired.
2601bad55843SDavid Chinner 		 */
260233540408SDavid Chinner 		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
2603bad55843SDavid Chinner 			continue;
2604bad55843SDavid Chinner 
2605bad55843SDavid Chinner 		/*
2606bad55843SDavid Chinner 		 * Try to get locks.  If any are unavailable or it is pinned,
2607bad55843SDavid Chinner 		 * then this inode cannot be flushed and is skipped.
2608bad55843SDavid Chinner 		 */
2609bad55843SDavid Chinner 
2610bad55843SDavid Chinner 		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
2611bad55843SDavid Chinner 			continue;
2612bad55843SDavid Chinner 		if (!xfs_iflock_nowait(iq)) {
2613bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2614bad55843SDavid Chinner 			continue;
2615bad55843SDavid Chinner 		}
2616bad55843SDavid Chinner 		if (xfs_ipincount(iq)) {
2617bad55843SDavid Chinner 			xfs_ifunlock(iq);
2618bad55843SDavid Chinner 			xfs_iunlock(iq, XFS_ILOCK_SHARED);
2619bad55843SDavid Chinner 			continue;
2620bad55843SDavid Chinner 		}
2621bad55843SDavid Chinner 
2622bad55843SDavid Chinner 		/*
2623bad55843SDavid Chinner 		 * arriving here means that this inode can be flushed.  First
2624bad55843SDavid Chinner 		 * re-check that it's dirty before flushing.
2625bad55843SDavid Chinner 		 */
262633540408SDavid Chinner 		if (!xfs_inode_clean(iq)) {
2627bad55843SDavid Chinner 			int	error;
2628bad55843SDavid Chinner 			error = xfs_iflush_int(iq, bp);
2629bad55843SDavid Chinner 			if (error) {
2630bad55843SDavid Chinner 				xfs_iunlock(iq, XFS_ILOCK_SHARED);
2631bad55843SDavid Chinner 				goto cluster_corrupt_out;
2632bad55843SDavid Chinner 			}
2633bad55843SDavid Chinner 			clcount++;
2634bad55843SDavid Chinner 		} else {
2635bad55843SDavid Chinner 			xfs_ifunlock(iq);
2636bad55843SDavid Chinner 		}
2637bad55843SDavid Chinner 		xfs_iunlock(iq, XFS_ILOCK_SHARED);
2638bad55843SDavid Chinner 	}
2639bad55843SDavid Chinner 
2640bad55843SDavid Chinner 	if (clcount) {
2641bad55843SDavid Chinner 		XFS_STATS_INC(xs_icluster_flushcnt);
2642bad55843SDavid Chinner 		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
2643bad55843SDavid Chinner 	}
2644bad55843SDavid Chinner 
2645bad55843SDavid Chinner out_free:
26461a3e8f3dSDave Chinner 	rcu_read_unlock();
2647f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
264844b56e0aSDave Chinner out_put:
264944b56e0aSDave Chinner 	xfs_perag_put(pag);
2650bad55843SDavid Chinner 	return 0;
2651bad55843SDavid Chinner 
2652bad55843SDavid Chinner 
2653bad55843SDavid Chinner cluster_corrupt_out:
2654bad55843SDavid Chinner 	/*
2655bad55843SDavid Chinner 	 * Corruption detected in the clustering loop.  Invalidate the
2656bad55843SDavid Chinner 	 * inode buffer and shut down the filesystem.
2657bad55843SDavid Chinner 	 */
26581a3e8f3dSDave Chinner 	rcu_read_unlock();
2659bad55843SDavid Chinner 	/*
266043ff2122SChristoph Hellwig 	 * Clean up the buffer.  If it was delwri, just release it --
2661bad55843SDavid Chinner 	 * brelse can handle it with no problems.  If not, shut down the
2662bad55843SDavid Chinner 	 * filesystem before releasing the buffer.
2663bad55843SDavid Chinner 	 */
266443ff2122SChristoph Hellwig 	bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
2665bad55843SDavid Chinner 	if (bufwasdelwri)
2666bad55843SDavid Chinner 		xfs_buf_relse(bp);
2667bad55843SDavid Chinner 
2668bad55843SDavid Chinner 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2669bad55843SDavid Chinner 
2670bad55843SDavid Chinner 	if (!bufwasdelwri) {
2671bad55843SDavid Chinner 		/*
2672bad55843SDavid Chinner 		 * Just like incore_relse: if we have b_iodone functions,
2673bad55843SDavid Chinner 		 * mark the buffer as an error and call them.  Otherwise
2674bad55843SDavid Chinner 		 * mark it as stale and brelse.
2675bad55843SDavid Chinner 		 */
2676cb669ca5SChristoph Hellwig 		if (bp->b_iodone) {
2677bad55843SDavid Chinner 			XFS_BUF_UNDONE(bp);
2678c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
26795a52c2a5SChandra Seetharaman 			xfs_buf_ioerror(bp, EIO);
26801a1a3e97SChristoph Hellwig 			xfs_buf_ioend(bp, 0);
2681bad55843SDavid Chinner 		} else {
2682c867cb61SChristoph Hellwig 			xfs_buf_stale(bp);
2683bad55843SDavid Chinner 			xfs_buf_relse(bp);
2684bad55843SDavid Chinner 		}
2685bad55843SDavid Chinner 	}
2686bad55843SDavid Chinner 
2687bad55843SDavid Chinner 	/*
2688bad55843SDavid Chinner 	 * Unlocks the flush lock
2689bad55843SDavid Chinner 	 */
269004913fddSDave Chinner 	xfs_iflush_abort(iq, false);
2691f0e2d93cSDenys Vlasenko 	kmem_free(ilist);
269244b56e0aSDave Chinner 	xfs_perag_put(pag);
2693bad55843SDavid Chinner 	return XFS_ERROR(EFSCORRUPTED);
2694bad55843SDavid Chinner }
2695bad55843SDavid Chinner 
26961da177e4SLinus Torvalds /*
26974c46819aSChristoph Hellwig  * Flush dirty inode metadata into the backing buffer.
26984c46819aSChristoph Hellwig  *
26994c46819aSChristoph Hellwig  * The caller must have the inode lock and the inode flush lock held.  The
27004c46819aSChristoph Hellwig  * inode lock will still be held upon return to the caller, and the inode
27014c46819aSChristoph Hellwig  * flush lock will be released after the inode has reached the disk.
27024c46819aSChristoph Hellwig  *
27034c46819aSChristoph Hellwig  * The caller must write out the buffer returned in *bpp and release it.
27041da177e4SLinus Torvalds  */
27051da177e4SLinus Torvalds int
27061da177e4SLinus Torvalds xfs_iflush(
27074c46819aSChristoph Hellwig 	struct xfs_inode	*ip,
27084c46819aSChristoph Hellwig 	struct xfs_buf		**bpp)
27091da177e4SLinus Torvalds {
27104c46819aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
27114c46819aSChristoph Hellwig 	struct xfs_buf		*bp;
27124c46819aSChristoph Hellwig 	struct xfs_dinode	*dip;
27131da177e4SLinus Torvalds 	int			error;
27141da177e4SLinus Torvalds 
27151da177e4SLinus Torvalds 	XFS_STATS_INC(xs_iflush_count);
27161da177e4SLinus Torvalds 
2717579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2718474fce06SChristoph Hellwig 	ASSERT(xfs_isiflocked(ip));
27191da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
27208096b1ebSChristoph Hellwig 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
27211da177e4SLinus Torvalds 
27224c46819aSChristoph Hellwig 	*bpp = NULL;
27231da177e4SLinus Torvalds 
27241da177e4SLinus Torvalds 	xfs_iunpin_wait(ip);
27251da177e4SLinus Torvalds 
27261da177e4SLinus Torvalds 	/*
27274b6a4688SDave Chinner 	 * For stale inodes we cannot rely on the backing buffer remaining
27284b6a4688SDave Chinner 	 * stale in cache for the remaining life of the stale inode and so
2729475ee413SChristoph Hellwig 	 * xfs_imap_to_bp() below may give us a buffer that no longer contains
27304b6a4688SDave Chinner 	 * inodes below. We have to check this after ensuring the inode is
27314b6a4688SDave Chinner 	 * unpinned so that it is safe to reclaim the stale inode after the
27324b6a4688SDave Chinner 	 * flush call.
27334b6a4688SDave Chinner 	 */
27344b6a4688SDave Chinner 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
27354b6a4688SDave Chinner 		xfs_ifunlock(ip);
27364b6a4688SDave Chinner 		return 0;
27374b6a4688SDave Chinner 	}
27384b6a4688SDave Chinner 
27394b6a4688SDave Chinner 	/*
27401da177e4SLinus Torvalds 	 * This may have been unpinned because the filesystem is shutting
27411da177e4SLinus Torvalds 	 * down forcibly. If that's the case we must not write this inode
274232ce90a4SChristoph Hellwig 	 * to disk, because the log record didn't make it to disk.
274332ce90a4SChristoph Hellwig 	 *
274432ce90a4SChristoph Hellwig 	 * We also have to remove the log item from the AIL in this case,
274532ce90a4SChristoph Hellwig 	 * as we wait for an empty AIL as part of the unmount process.
27461da177e4SLinus Torvalds 	 */
27471da177e4SLinus Torvalds 	if (XFS_FORCED_SHUTDOWN(mp)) {
274832ce90a4SChristoph Hellwig 		error = XFS_ERROR(EIO);
274932ce90a4SChristoph Hellwig 		goto abort_out;
27501da177e4SLinus Torvalds 	}
27511da177e4SLinus Torvalds 
27521da177e4SLinus Torvalds 	/*
2753a3f74ffbSDavid Chinner 	 * Get the buffer containing the on-disk inode.
2754a3f74ffbSDavid Chinner 	 */
2755475ee413SChristoph Hellwig 	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
2756475ee413SChristoph Hellwig 			       0);
2757a3f74ffbSDavid Chinner 	if (error || !bp) {
2758a3f74ffbSDavid Chinner 		xfs_ifunlock(ip);
2759a3f74ffbSDavid Chinner 		return error;
2760a3f74ffbSDavid Chinner 	}
2761a3f74ffbSDavid Chinner 
2762a3f74ffbSDavid Chinner 	/*
27631da177e4SLinus Torvalds 	 * First flush out the inode that xfs_iflush was called with.
27641da177e4SLinus Torvalds 	 */
27651da177e4SLinus Torvalds 	error = xfs_iflush_int(ip, bp);
2766bad55843SDavid Chinner 	if (error)
27671da177e4SLinus Torvalds 		goto corrupt_out;
27681da177e4SLinus Torvalds 
27691da177e4SLinus Torvalds 	/*
2770a3f74ffbSDavid Chinner 	 * If the buffer is pinned then push on the log now so we won't
2771a3f74ffbSDavid Chinner 	 * get stuck waiting in the write for too long.
2772a3f74ffbSDavid Chinner 	 */
2773811e64c7SChandra Seetharaman 	if (xfs_buf_ispinned(bp))
2774a14a348bSChristoph Hellwig 		xfs_log_force(mp, 0);
2775a3f74ffbSDavid Chinner 
2776a3f74ffbSDavid Chinner 	/*
27771da177e4SLinus Torvalds 	 * inode clustering:
27781da177e4SLinus Torvalds 	 * see if other inodes can be gathered into this write
27791da177e4SLinus Torvalds 	 */
2780bad55843SDavid Chinner 	error = xfs_iflush_cluster(ip, bp);
2781bad55843SDavid Chinner 	if (error)
27821da177e4SLinus Torvalds 		goto cluster_corrupt_out;
27831da177e4SLinus Torvalds 
27844c46819aSChristoph Hellwig 	*bpp = bp;
27854c46819aSChristoph Hellwig 	return 0;
27861da177e4SLinus Torvalds 
27871da177e4SLinus Torvalds corrupt_out:
27881da177e4SLinus Torvalds 	xfs_buf_relse(bp);
27897d04a335SNathan Scott 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
27901da177e4SLinus Torvalds cluster_corrupt_out:
279132ce90a4SChristoph Hellwig 	error = XFS_ERROR(EFSCORRUPTED);
279232ce90a4SChristoph Hellwig abort_out:
27931da177e4SLinus Torvalds 	/*
27941da177e4SLinus Torvalds 	 * Unlocks the flush lock
27951da177e4SLinus Torvalds 	 */
279604913fddSDave Chinner 	xfs_iflush_abort(ip, false);
279732ce90a4SChristoph Hellwig 	return error;
27981da177e4SLinus Torvalds }
27991da177e4SLinus Torvalds 
28001da177e4SLinus Torvalds 
28011da177e4SLinus Torvalds STATIC int
28021da177e4SLinus Torvalds xfs_iflush_int(
2803*93848a99SChristoph Hellwig 	struct xfs_inode	*ip,
2804*93848a99SChristoph Hellwig 	struct xfs_buf		*bp)
28051da177e4SLinus Torvalds {
2806*93848a99SChristoph Hellwig 	struct xfs_inode_log_item *iip = ip->i_itemp;
2807*93848a99SChristoph Hellwig 	struct xfs_dinode	*dip;
2808*93848a99SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
28091da177e4SLinus Torvalds 
2810579aa9caSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2811474fce06SChristoph Hellwig 	ASSERT(xfs_isiflocked(ip));
28121da177e4SLinus Torvalds 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
28138096b1ebSChristoph Hellwig 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2814*93848a99SChristoph Hellwig 	ASSERT(iip != NULL && iip->ili_fields != 0);
28151da177e4SLinus Torvalds 
28161da177e4SLinus Torvalds 	/* set *dip = inode's place in the buffer */
281792bfc6e7SChristoph Hellwig 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
28181da177e4SLinus Torvalds 
281969ef921bSChristoph Hellwig 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
28201da177e4SLinus Torvalds 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
28216a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28226a19d939SDave Chinner 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
28236a19d939SDave Chinner 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
28241da177e4SLinus Torvalds 		goto corrupt_out;
28251da177e4SLinus Torvalds 	}
28261da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
28271da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
28286a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28296a19d939SDave Chinner 			"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
28306a19d939SDave Chinner 			__func__, ip->i_ino, ip, ip->i_d.di_magic);
28311da177e4SLinus Torvalds 		goto corrupt_out;
28321da177e4SLinus Torvalds 	}
2833abbede1bSAl Viro 	if (S_ISREG(ip->i_d.di_mode)) {
28341da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
28351da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
28361da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
28371da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
28386a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28396a19d939SDave Chinner 				"%s: Bad regular inode %Lu, ptr 0x%p",
28406a19d939SDave Chinner 				__func__, ip->i_ino, ip);
28411da177e4SLinus Torvalds 			goto corrupt_out;
28421da177e4SLinus Torvalds 		}
2843abbede1bSAl Viro 	} else if (S_ISDIR(ip->i_d.di_mode)) {
28441da177e4SLinus Torvalds 		if (XFS_TEST_ERROR(
28451da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
28461da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
28471da177e4SLinus Torvalds 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
28481da177e4SLinus Torvalds 		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
28496a19d939SDave Chinner 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28506a19d939SDave Chinner 				"%s: Bad directory inode %Lu, ptr 0x%p",
28516a19d939SDave Chinner 				__func__, ip->i_ino, ip);
28521da177e4SLinus Torvalds 			goto corrupt_out;
28531da177e4SLinus Torvalds 		}
28541da177e4SLinus Torvalds 	}
28551da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
28561da177e4SLinus Torvalds 				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
28571da177e4SLinus Torvalds 				XFS_RANDOM_IFLUSH_5)) {
28586a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28596a19d939SDave Chinner 			"%s: detected corrupt incore inode %Lu, "
28606a19d939SDave Chinner 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
28616a19d939SDave Chinner 			__func__, ip->i_ino,
28621da177e4SLinus Torvalds 			ip->i_d.di_nextents + ip->i_d.di_anextents,
28636a19d939SDave Chinner 			ip->i_d.di_nblocks, ip);
28641da177e4SLinus Torvalds 		goto corrupt_out;
28651da177e4SLinus Torvalds 	}
28661da177e4SLinus Torvalds 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
28671da177e4SLinus Torvalds 				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
28686a19d939SDave Chinner 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
28696a19d939SDave Chinner 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
28706a19d939SDave Chinner 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
28711da177e4SLinus Torvalds 		goto corrupt_out;
28721da177e4SLinus Torvalds 	}
28731da177e4SLinus Torvalds 	/*
28741da177e4SLinus Torvalds 	 * bump the flush iteration count, used to detect flushes which
2875*93848a99SChristoph Hellwig 	 * postdate a log record during recovery. This is redundant as we now
2876*93848a99SChristoph Hellwig 	 * log every change and hence this can't happen. Still, it doesn't hurt.
28771da177e4SLinus Torvalds 	 */
28781da177e4SLinus Torvalds 	ip->i_d.di_flushiter++;
28791da177e4SLinus Torvalds 
28801da177e4SLinus Torvalds 	/*
28811da177e4SLinus Torvalds 	 * Copy the dirty parts of the inode into the on-disk
28821da177e4SLinus Torvalds 	 * inode.  We always copy out the core of the inode,
28831da177e4SLinus Torvalds 	 * because if the inode is dirty at all the core must
28841da177e4SLinus Torvalds 	 * be.
28851da177e4SLinus Torvalds 	 */
288681591fe2SChristoph Hellwig 	xfs_dinode_to_disk(dip, &ip->i_d);
28871da177e4SLinus Torvalds 
28881da177e4SLinus Torvalds 	/* Wrap, we never let the log put out DI_MAX_FLUSH */
28891da177e4SLinus Torvalds 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
28901da177e4SLinus Torvalds 		ip->i_d.di_flushiter = 0;
28911da177e4SLinus Torvalds 
28921da177e4SLinus Torvalds 	/*
28931da177e4SLinus Torvalds 	 * If this is really an old format inode and the superblock version
28941da177e4SLinus Torvalds 	 * has not been updated to support only new format inodes, then
28951da177e4SLinus Torvalds 	 * convert back to the old inode format.  If the superblock version
28961da177e4SLinus Torvalds 	 * has been updated, then make the conversion permanent.
28971da177e4SLinus Torvalds 	 */
289851ce16d5SChristoph Hellwig 	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
289951ce16d5SChristoph Hellwig 	if (ip->i_d.di_version == 1) {
290062118709SEric Sandeen 		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
29011da177e4SLinus Torvalds 			/*
29021da177e4SLinus Torvalds 			 * Convert it back.
29031da177e4SLinus Torvalds 			 */
29041da177e4SLinus Torvalds 			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
290581591fe2SChristoph Hellwig 			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
29061da177e4SLinus Torvalds 		} else {
29071da177e4SLinus Torvalds 			/*
29081da177e4SLinus Torvalds 			 * The superblock version has already been bumped,
29091da177e4SLinus Torvalds 			 * so just make the conversion to the new inode
29101da177e4SLinus Torvalds 			 * format permanent.
29111da177e4SLinus Torvalds 			 */
291251ce16d5SChristoph Hellwig 			ip->i_d.di_version = 2;
291351ce16d5SChristoph Hellwig 			dip->di_version = 2;
29141da177e4SLinus Torvalds 			ip->i_d.di_onlink = 0;
291581591fe2SChristoph Hellwig 			dip->di_onlink = 0;
29161da177e4SLinus Torvalds 			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
291781591fe2SChristoph Hellwig 			memset(&(dip->di_pad[0]), 0,
291881591fe2SChristoph Hellwig 			      sizeof(dip->di_pad));
29196743099cSArkadiusz Mi?kiewicz 			ASSERT(xfs_get_projid(ip) == 0);
29201da177e4SLinus Torvalds 		}
29211da177e4SLinus Torvalds 	}
29221da177e4SLinus Torvalds 
2923e4ac967bSDavid Chinner 	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp);
2924e4ac967bSDavid Chinner 	if (XFS_IFORK_Q(ip))
2925e4ac967bSDavid Chinner 		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp);
29261da177e4SLinus Torvalds 	xfs_inobp_check(mp, bp);
29271da177e4SLinus Torvalds 
29281da177e4SLinus Torvalds 	/*
2929f5d8d5c4SChristoph Hellwig 	 * We've recorded everything logged in the inode, so we'd like to clear
2930f5d8d5c4SChristoph Hellwig 	 * the ili_fields bits so we don't log and flush things unnecessarily.
2931f5d8d5c4SChristoph Hellwig 	 * However, we can't stop logging all this information until the data
2932f5d8d5c4SChristoph Hellwig 	 * we've copied into the disk buffer is written to disk.  If we did we
2933f5d8d5c4SChristoph Hellwig 	 * might overwrite the copy of the inode in the log with all the data
2934f5d8d5c4SChristoph Hellwig 	 * after re-logging only part of it, and in the face of a crash we
2935f5d8d5c4SChristoph Hellwig 	 * wouldn't have all the data we need to recover.
29361da177e4SLinus Torvalds 	 *
2937f5d8d5c4SChristoph Hellwig 	 * What we do is move the bits to the ili_last_fields field.  When
2938f5d8d5c4SChristoph Hellwig 	 * logging the inode, these bits are moved back to the ili_fields field.
2939f5d8d5c4SChristoph Hellwig 	 * In the xfs_iflush_done() routine we clear ili_last_fields, since we
2940f5d8d5c4SChristoph Hellwig 	 * know that the information those bits represent is permanently on
2941f5d8d5c4SChristoph Hellwig 	 * disk.  As long as the flush completes before the inode is logged
2942f5d8d5c4SChristoph Hellwig 	 * again, then both ili_fields and ili_last_fields will be cleared.
29431da177e4SLinus Torvalds 	 *
2944f5d8d5c4SChristoph Hellwig 	 * We can play with the ili_fields bits here, because the inode lock
2945f5d8d5c4SChristoph Hellwig 	 * must be held exclusively in order to set bits there and the flush
2946f5d8d5c4SChristoph Hellwig 	 * lock protects the ili_last_fields bits.  Set ili_logged so the flush
2947f5d8d5c4SChristoph Hellwig 	 * done routine can tell whether or not to look in the AIL.  Also, store
2948f5d8d5c4SChristoph Hellwig 	 * the current LSN of the inode so that we can tell whether the item has
2949f5d8d5c4SChristoph Hellwig 	 * moved in the AIL from xfs_iflush_done().  In order to read the lsn we
2950f5d8d5c4SChristoph Hellwig 	 * need the AIL lock, because it is a 64 bit value that cannot be read
2951f5d8d5c4SChristoph Hellwig 	 * atomically.
29521da177e4SLinus Torvalds 	 */
2953f5d8d5c4SChristoph Hellwig 	iip->ili_last_fields = iip->ili_fields;
2954f5d8d5c4SChristoph Hellwig 	iip->ili_fields = 0;
29551da177e4SLinus Torvalds 	iip->ili_logged = 1;
29561da177e4SLinus Torvalds 
29577b2e2a31SDavid Chinner 	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
29587b2e2a31SDavid Chinner 				&iip->ili_item.li_lsn);
29591da177e4SLinus Torvalds 
29601da177e4SLinus Torvalds 	/*
29611da177e4SLinus Torvalds 	 * Attach the function xfs_iflush_done to the inode's
29621da177e4SLinus Torvalds 	 * buffer.  This will remove the inode from the AIL
29631da177e4SLinus Torvalds 	 * and unlock the inode's flush lock when the inode is
29641da177e4SLinus Torvalds 	 * completely written to disk.
29651da177e4SLinus Torvalds 	 */
2966ca30b2a7SChristoph Hellwig 	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
29671da177e4SLinus Torvalds 
2968*93848a99SChristoph Hellwig 	/* update the lsn in the on disk inode if required */
2969*93848a99SChristoph Hellwig 	if (ip->i_d.di_version == 3)
2970*93848a99SChristoph Hellwig 		dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
2971*93848a99SChristoph Hellwig 
2972*93848a99SChristoph Hellwig 	/* generate the checksum. */
2973*93848a99SChristoph Hellwig 	xfs_dinode_calc_crc(mp, dip);
2974*93848a99SChristoph Hellwig 
2975adadbeefSChristoph Hellwig 	ASSERT(bp->b_fspriv != NULL);
2976cb669ca5SChristoph Hellwig 	ASSERT(bp->b_iodone != NULL);
29771da177e4SLinus Torvalds 	return 0;
29781da177e4SLinus Torvalds 
29791da177e4SLinus Torvalds corrupt_out:
29801da177e4SLinus Torvalds 	return XFS_ERROR(EFSCORRUPTED);
29811da177e4SLinus Torvalds }
29821da177e4SLinus Torvalds 
29834eea22f0SMandy Kirkconnell /*
29844eea22f0SMandy Kirkconnell  * Return a pointer to the extent record at file index idx.
29854eea22f0SMandy Kirkconnell  */
2986a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *
29874eea22f0SMandy Kirkconnell xfs_iext_get_ext(
29884eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
29894eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx)		/* index of target extent */
29904eea22f0SMandy Kirkconnell {
29914eea22f0SMandy Kirkconnell 	ASSERT(idx >= 0);
299287bef181SChristoph Hellwig 	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
299387bef181SChristoph Hellwig 
29940293ce3aSMandy Kirkconnell 	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
29950293ce3aSMandy Kirkconnell 		return ifp->if_u1.if_ext_irec->er_extbuf;
29960293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
29970293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;		/* irec pointer */
29980293ce3aSMandy Kirkconnell 		int		erp_idx = 0;	/* irec index */
29990293ce3aSMandy Kirkconnell 		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
30000293ce3aSMandy Kirkconnell 
30010293ce3aSMandy Kirkconnell 		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
30020293ce3aSMandy Kirkconnell 		return &erp->er_extbuf[page_idx];
30030293ce3aSMandy Kirkconnell 	} else if (ifp->if_bytes) {
30044eea22f0SMandy Kirkconnell 		return &ifp->if_u1.if_extents[idx];
30054eea22f0SMandy Kirkconnell 	} else {
30064eea22f0SMandy Kirkconnell 		return NULL;
30074eea22f0SMandy Kirkconnell 	}
30084eea22f0SMandy Kirkconnell }
30094eea22f0SMandy Kirkconnell 
30104eea22f0SMandy Kirkconnell /*
30114eea22f0SMandy Kirkconnell  * Insert new item(s) into the extent records for incore inode
30124eea22f0SMandy Kirkconnell  * fork 'ifp'.  'count' new items are inserted at index 'idx'.
30134eea22f0SMandy Kirkconnell  */
30144eea22f0SMandy Kirkconnell void
30154eea22f0SMandy Kirkconnell xfs_iext_insert(
30166ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
30174eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* starting index of new items */
30184eea22f0SMandy Kirkconnell 	xfs_extnum_t	count,		/* number of inserted items */
30196ef35544SChristoph Hellwig 	xfs_bmbt_irec_t	*new,		/* items to insert */
30206ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
30214eea22f0SMandy Kirkconnell {
30226ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
30234eea22f0SMandy Kirkconnell 	xfs_extnum_t	i;		/* extent record index */
30244eea22f0SMandy Kirkconnell 
30250b1b213fSChristoph Hellwig 	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
30260b1b213fSChristoph Hellwig 
30274eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
30284eea22f0SMandy Kirkconnell 	xfs_iext_add(ifp, idx, count);
3029a6f64d4aSChristoph Hellwig 	for (i = idx; i < idx + count; i++, new++)
3030a6f64d4aSChristoph Hellwig 		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
30314eea22f0SMandy Kirkconnell }
30324eea22f0SMandy Kirkconnell 
30334eea22f0SMandy Kirkconnell /*
30344eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
30354eea22f0SMandy Kirkconnell  * extents needs to be increased. The ext_diff parameter stores the
30364eea22f0SMandy Kirkconnell  * number of new extents being added and the idx parameter contains
30374eea22f0SMandy Kirkconnell  * the extent index where the new extents will be added. If the new
30384eea22f0SMandy Kirkconnell  * extents are being appended, then we just need to (re)allocate and
30394eea22f0SMandy Kirkconnell  * initialize the space. Otherwise, if the new extents are being
30404eea22f0SMandy Kirkconnell  * inserted into the middle of the existing entries, a bit more work
30414eea22f0SMandy Kirkconnell  * is required to make room for the new extents to be inserted. The
30424eea22f0SMandy Kirkconnell  * caller is responsible for filling in the new extent entries upon
30434eea22f0SMandy Kirkconnell  * return.
30444eea22f0SMandy Kirkconnell  */
30454eea22f0SMandy Kirkconnell void
30464eea22f0SMandy Kirkconnell xfs_iext_add(
30474eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
30484eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin adding exts */
3049c41564b5SNathan Scott 	int		ext_diff)	/* number of extents to add */
30504eea22f0SMandy Kirkconnell {
30514eea22f0SMandy Kirkconnell 	int		byte_diff;	/* new bytes being added */
30524eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after adding */
30534eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
30544eea22f0SMandy Kirkconnell 
30554eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
30564eea22f0SMandy Kirkconnell 	ASSERT((idx >= 0) && (idx <= nextents));
30574eea22f0SMandy Kirkconnell 	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
30584eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes + byte_diff;
30594eea22f0SMandy Kirkconnell 	/*
30604eea22f0SMandy Kirkconnell 	 * If the new number of extents (nextents + ext_diff)
30614eea22f0SMandy Kirkconnell 	 * fits inside the inode, then continue to use the inline
30624eea22f0SMandy Kirkconnell 	 * extent buffer.
30634eea22f0SMandy Kirkconnell 	 */
30644eea22f0SMandy Kirkconnell 	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
30654eea22f0SMandy Kirkconnell 		if (idx < nextents) {
30664eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
30674eea22f0SMandy Kirkconnell 				&ifp->if_u2.if_inline_ext[idx],
30684eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
30694eea22f0SMandy Kirkconnell 			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
30704eea22f0SMandy Kirkconnell 		}
30714eea22f0SMandy Kirkconnell 		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
30724eea22f0SMandy Kirkconnell 		ifp->if_real_bytes = 0;
30734eea22f0SMandy Kirkconnell 	}
30744eea22f0SMandy Kirkconnell 	/*
30754eea22f0SMandy Kirkconnell 	 * Otherwise use a linear (direct) extent list.
30764eea22f0SMandy Kirkconnell 	 * If the extents are currently inside the inode,
30774eea22f0SMandy Kirkconnell 	 * xfs_iext_realloc_direct will switch us from
30784eea22f0SMandy Kirkconnell 	 * inline to direct extent allocation mode.
30794eea22f0SMandy Kirkconnell 	 */
30800293ce3aSMandy Kirkconnell 	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
30814eea22f0SMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, new_size);
30824eea22f0SMandy Kirkconnell 		if (idx < nextents) {
30834eea22f0SMandy Kirkconnell 			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
30844eea22f0SMandy Kirkconnell 				&ifp->if_u1.if_extents[idx],
30854eea22f0SMandy Kirkconnell 				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
30864eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
30874eea22f0SMandy Kirkconnell 		}
30884eea22f0SMandy Kirkconnell 	}
30890293ce3aSMandy Kirkconnell 	/* Indirection array */
30900293ce3aSMandy Kirkconnell 	else {
30910293ce3aSMandy Kirkconnell 		xfs_ext_irec_t	*erp;
30920293ce3aSMandy Kirkconnell 		int		erp_idx = 0;
30930293ce3aSMandy Kirkconnell 		int		page_idx = idx;
30940293ce3aSMandy Kirkconnell 
30950293ce3aSMandy Kirkconnell 		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
30960293ce3aSMandy Kirkconnell 		if (ifp->if_flags & XFS_IFEXTIREC) {
30970293ce3aSMandy Kirkconnell 			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
30980293ce3aSMandy Kirkconnell 		} else {
30990293ce3aSMandy Kirkconnell 			xfs_iext_irec_init(ifp);
31000293ce3aSMandy Kirkconnell 			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
31010293ce3aSMandy Kirkconnell 			erp = ifp->if_u1.if_ext_irec;
31020293ce3aSMandy Kirkconnell 		}
31030293ce3aSMandy Kirkconnell 		/* Extents fit in target extent page */
31040293ce3aSMandy Kirkconnell 		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
31050293ce3aSMandy Kirkconnell 			if (page_idx < erp->er_extcount) {
31060293ce3aSMandy Kirkconnell 				memmove(&erp->er_extbuf[page_idx + ext_diff],
31070293ce3aSMandy Kirkconnell 					&erp->er_extbuf[page_idx],
31080293ce3aSMandy Kirkconnell 					(erp->er_extcount - page_idx) *
31090293ce3aSMandy Kirkconnell 					sizeof(xfs_bmbt_rec_t));
31100293ce3aSMandy Kirkconnell 				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
31110293ce3aSMandy Kirkconnell 			}
31120293ce3aSMandy Kirkconnell 			erp->er_extcount += ext_diff;
31130293ce3aSMandy Kirkconnell 			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
31140293ce3aSMandy Kirkconnell 		}
31150293ce3aSMandy Kirkconnell 		/* Insert a new extent page */
31160293ce3aSMandy Kirkconnell 		else if (erp) {
31170293ce3aSMandy Kirkconnell 			xfs_iext_add_indirect_multi(ifp,
31180293ce3aSMandy Kirkconnell 				erp_idx, page_idx, ext_diff);
31190293ce3aSMandy Kirkconnell 		}
31200293ce3aSMandy Kirkconnell 		/*
31210293ce3aSMandy Kirkconnell 		 * If extent(s) are being appended to the last page in
31220293ce3aSMandy Kirkconnell 		 * the indirection array and the new extent(s) don't fit
31230293ce3aSMandy Kirkconnell 		 * in the page, then erp is NULL and erp_idx is set to
31240293ce3aSMandy Kirkconnell 		 * the next index needed in the indirection array.
31250293ce3aSMandy Kirkconnell 		 */
31260293ce3aSMandy Kirkconnell 		else {
31270293ce3aSMandy Kirkconnell 			int	count = ext_diff;
31280293ce3aSMandy Kirkconnell 
31290293ce3aSMandy Kirkconnell 			while (count) {
31300293ce3aSMandy Kirkconnell 				erp = xfs_iext_irec_new(ifp, erp_idx);
31310293ce3aSMandy Kirkconnell 				erp->er_extcount = count;
31320293ce3aSMandy Kirkconnell 				count -= MIN(count, (int)XFS_LINEAR_EXTS);
31330293ce3aSMandy Kirkconnell 				if (count) {
31340293ce3aSMandy Kirkconnell 					erp_idx++;
31350293ce3aSMandy Kirkconnell 				}
31360293ce3aSMandy Kirkconnell 			}
31370293ce3aSMandy Kirkconnell 		}
31380293ce3aSMandy Kirkconnell 	}
31394eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
31404eea22f0SMandy Kirkconnell }
31414eea22f0SMandy Kirkconnell 
31424eea22f0SMandy Kirkconnell /*
31430293ce3aSMandy Kirkconnell  * This is called when incore extents are being added to the indirection
31440293ce3aSMandy Kirkconnell  * array and the new extents do not fit in the target extent list. The
31450293ce3aSMandy Kirkconnell  * erp_idx parameter contains the irec index for the target extent list
31460293ce3aSMandy Kirkconnell  * in the indirection array, and the idx parameter contains the extent
31470293ce3aSMandy Kirkconnell  * index within the list. The number of extents being added is stored
31480293ce3aSMandy Kirkconnell  * in the count parameter.
31490293ce3aSMandy Kirkconnell  *
31500293ce3aSMandy Kirkconnell  *    |-------|   |-------|
31510293ce3aSMandy Kirkconnell  *    |       |   |       |    idx - number of extents before idx
31520293ce3aSMandy Kirkconnell  *    |  idx  |   | count |
31530293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being inserted at idx
31540293ce3aSMandy Kirkconnell  *    |-------|   |-------|
31550293ce3aSMandy Kirkconnell  *    | count |   | nex2  |    nex2 - number of extents after idx + count
31560293ce3aSMandy Kirkconnell  *    |-------|   |-------|
31570293ce3aSMandy Kirkconnell  */
31580293ce3aSMandy Kirkconnell void
31590293ce3aSMandy Kirkconnell xfs_iext_add_indirect_multi(
31600293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,			/* inode fork pointer */
31610293ce3aSMandy Kirkconnell 	int		erp_idx,		/* target extent irec index */
31620293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,			/* index within target list */
31630293ce3aSMandy Kirkconnell 	int		count)			/* new extents being added */
31640293ce3aSMandy Kirkconnell {
31650293ce3aSMandy Kirkconnell 	int		byte_diff;		/* new bytes being added */
31660293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
31670293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;		/* number of extents to add */
31680293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;		/* new extents still needed */
31690293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;			/* extents after idx + count */
31700293ce3aSMandy Kirkconnell 	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
31710293ce3aSMandy Kirkconnell 	int		nlists;			/* number of irec's (lists) */
31720293ce3aSMandy Kirkconnell 
31730293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
31740293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
31750293ce3aSMandy Kirkconnell 	nex2 = erp->er_extcount - idx;
31760293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
31770293ce3aSMandy Kirkconnell 
31780293ce3aSMandy Kirkconnell 	/*
31790293ce3aSMandy Kirkconnell 	 * Save second part of target extent list
31800293ce3aSMandy Kirkconnell 	 * (all extents past */
31810293ce3aSMandy Kirkconnell 	if (nex2) {
31820293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
31836785073bSDavid Chinner 		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
31840293ce3aSMandy Kirkconnell 		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
31850293ce3aSMandy Kirkconnell 		erp->er_extcount -= nex2;
31860293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
31870293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[idx], 0, byte_diff);
31880293ce3aSMandy Kirkconnell 	}
31890293ce3aSMandy Kirkconnell 
31900293ce3aSMandy Kirkconnell 	/*
31910293ce3aSMandy Kirkconnell 	 * Add the new extents to the end of the target
31920293ce3aSMandy Kirkconnell 	 * list, then allocate new irec record(s) and
31930293ce3aSMandy Kirkconnell 	 * extent buffer(s) as needed to store the rest
31940293ce3aSMandy Kirkconnell 	 * of the new extents.
31950293ce3aSMandy Kirkconnell 	 */
31960293ce3aSMandy Kirkconnell 	ext_cnt = count;
31970293ce3aSMandy Kirkconnell 	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
31980293ce3aSMandy Kirkconnell 	if (ext_diff) {
31990293ce3aSMandy Kirkconnell 		erp->er_extcount += ext_diff;
32000293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
32010293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
32020293ce3aSMandy Kirkconnell 	}
32030293ce3aSMandy Kirkconnell 	while (ext_cnt) {
32040293ce3aSMandy Kirkconnell 		erp_idx++;
32050293ce3aSMandy Kirkconnell 		erp = xfs_iext_irec_new(ifp, erp_idx);
32060293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
32070293ce3aSMandy Kirkconnell 		erp->er_extcount = ext_diff;
32080293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
32090293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
32100293ce3aSMandy Kirkconnell 	}
32110293ce3aSMandy Kirkconnell 
32120293ce3aSMandy Kirkconnell 	/* Add nex2 extents back to indirection array */
32130293ce3aSMandy Kirkconnell 	if (nex2) {
32140293ce3aSMandy Kirkconnell 		xfs_extnum_t	ext_avail;
32150293ce3aSMandy Kirkconnell 		int		i;
32160293ce3aSMandy Kirkconnell 
32170293ce3aSMandy Kirkconnell 		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
32180293ce3aSMandy Kirkconnell 		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
32190293ce3aSMandy Kirkconnell 		i = 0;
32200293ce3aSMandy Kirkconnell 		/*
32210293ce3aSMandy Kirkconnell 		 * If nex2 extents fit in the current page, append
32220293ce3aSMandy Kirkconnell 		 * nex2_ep after the new extents.
32230293ce3aSMandy Kirkconnell 		 */
32240293ce3aSMandy Kirkconnell 		if (nex2 <= ext_avail) {
32250293ce3aSMandy Kirkconnell 			i = erp->er_extcount;
32260293ce3aSMandy Kirkconnell 		}
32270293ce3aSMandy Kirkconnell 		/*
32280293ce3aSMandy Kirkconnell 		 * Otherwise, check if space is available in the
32290293ce3aSMandy Kirkconnell 		 * next page.
32300293ce3aSMandy Kirkconnell 		 */
32310293ce3aSMandy Kirkconnell 		else if ((erp_idx < nlists - 1) &&
32320293ce3aSMandy Kirkconnell 			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
32330293ce3aSMandy Kirkconnell 			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
32340293ce3aSMandy Kirkconnell 			erp_idx++;
32350293ce3aSMandy Kirkconnell 			erp++;
32360293ce3aSMandy Kirkconnell 			/* Create a hole for nex2 extents */
32370293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
32380293ce3aSMandy Kirkconnell 				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
32390293ce3aSMandy Kirkconnell 		}
32400293ce3aSMandy Kirkconnell 		/*
32410293ce3aSMandy Kirkconnell 		 * Final choice, create a new extent page for
32420293ce3aSMandy Kirkconnell 		 * nex2 extents.
32430293ce3aSMandy Kirkconnell 		 */
32440293ce3aSMandy Kirkconnell 		else {
32450293ce3aSMandy Kirkconnell 			erp_idx++;
32460293ce3aSMandy Kirkconnell 			erp = xfs_iext_irec_new(ifp, erp_idx);
32470293ce3aSMandy Kirkconnell 		}
32480293ce3aSMandy Kirkconnell 		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3249f0e2d93cSDenys Vlasenko 		kmem_free(nex2_ep);
32500293ce3aSMandy Kirkconnell 		erp->er_extcount += nex2;
32510293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
32520293ce3aSMandy Kirkconnell 	}
32530293ce3aSMandy Kirkconnell }
32540293ce3aSMandy Kirkconnell 
32550293ce3aSMandy Kirkconnell /*
32564eea22f0SMandy Kirkconnell  * This is called when the amount of space required for incore file
32574eea22f0SMandy Kirkconnell  * extents needs to be decreased. The ext_diff parameter stores the
32584eea22f0SMandy Kirkconnell  * number of extents to be removed and the idx parameter contains
32594eea22f0SMandy Kirkconnell  * the extent index where the extents will be removed from.
32600293ce3aSMandy Kirkconnell  *
32610293ce3aSMandy Kirkconnell  * If the amount of space needed has decreased below the linear
32620293ce3aSMandy Kirkconnell  * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
32630293ce3aSMandy Kirkconnell  * extent array.  Otherwise, use kmem_realloc() to adjust the
32640293ce3aSMandy Kirkconnell  * size to what is needed.
32654eea22f0SMandy Kirkconnell  */
32664eea22f0SMandy Kirkconnell void
32674eea22f0SMandy Kirkconnell xfs_iext_remove(
32686ef35544SChristoph Hellwig 	xfs_inode_t	*ip,		/* incore inode pointer */
32694eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
32706ef35544SChristoph Hellwig 	int		ext_diff,	/* number of extents to remove */
32716ef35544SChristoph Hellwig 	int		state)		/* type of extent conversion */
32724eea22f0SMandy Kirkconnell {
32736ef35544SChristoph Hellwig 	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
32744eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
32754eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
32764eea22f0SMandy Kirkconnell 
32770b1b213fSChristoph Hellwig 	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
32780b1b213fSChristoph Hellwig 
32794eea22f0SMandy Kirkconnell 	ASSERT(ext_diff > 0);
32804eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
32814eea22f0SMandy Kirkconnell 	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
32824eea22f0SMandy Kirkconnell 
32834eea22f0SMandy Kirkconnell 	if (new_size == 0) {
32844eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
32850293ce3aSMandy Kirkconnell 	} else if (ifp->if_flags & XFS_IFEXTIREC) {
32860293ce3aSMandy Kirkconnell 		xfs_iext_remove_indirect(ifp, idx, ext_diff);
32874eea22f0SMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
32884eea22f0SMandy Kirkconnell 		xfs_iext_remove_direct(ifp, idx, ext_diff);
32894eea22f0SMandy Kirkconnell 	} else {
32904eea22f0SMandy Kirkconnell 		xfs_iext_remove_inline(ifp, idx, ext_diff);
32914eea22f0SMandy Kirkconnell 	}
32924eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
32934eea22f0SMandy Kirkconnell }
32944eea22f0SMandy Kirkconnell 
32954eea22f0SMandy Kirkconnell /*
32964eea22f0SMandy Kirkconnell  * This removes ext_diff extents from the inline buffer, beginning
32974eea22f0SMandy Kirkconnell  * at extent index idx.
32984eea22f0SMandy Kirkconnell  */
32994eea22f0SMandy Kirkconnell void
33004eea22f0SMandy Kirkconnell xfs_iext_remove_inline(
33014eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33024eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
33034eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
33044eea22f0SMandy Kirkconnell {
33054eea22f0SMandy Kirkconnell 	int		nextents;	/* number of extents in file */
33064eea22f0SMandy Kirkconnell 
33070293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
33084eea22f0SMandy Kirkconnell 	ASSERT(idx < XFS_INLINE_EXTS);
33094eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
33104eea22f0SMandy Kirkconnell 	ASSERT(((nextents - ext_diff) > 0) &&
33114eea22f0SMandy Kirkconnell 		(nextents - ext_diff) < XFS_INLINE_EXTS);
33124eea22f0SMandy Kirkconnell 
33134eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
33144eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u2.if_inline_ext[idx],
33154eea22f0SMandy Kirkconnell 			&ifp->if_u2.if_inline_ext[idx + ext_diff],
33164eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
33174eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
33184eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
33194eea22f0SMandy Kirkconnell 			0, ext_diff * sizeof(xfs_bmbt_rec_t));
33204eea22f0SMandy Kirkconnell 	} else {
33214eea22f0SMandy Kirkconnell 		memset(&ifp->if_u2.if_inline_ext[idx], 0,
33224eea22f0SMandy Kirkconnell 			ext_diff * sizeof(xfs_bmbt_rec_t));
33234eea22f0SMandy Kirkconnell 	}
33244eea22f0SMandy Kirkconnell }
33254eea22f0SMandy Kirkconnell 
33264eea22f0SMandy Kirkconnell /*
33274eea22f0SMandy Kirkconnell  * This removes ext_diff extents from a linear (direct) extent list,
33284eea22f0SMandy Kirkconnell  * beginning at extent index idx. If the extents are being removed
33294eea22f0SMandy Kirkconnell  * from the end of the list (ie. truncate) then we just need to re-
33304eea22f0SMandy Kirkconnell  * allocate the list to remove the extra space. Otherwise, if the
33314eea22f0SMandy Kirkconnell  * extents are being removed from the middle of the existing extent
33324eea22f0SMandy Kirkconnell  * entries, then we first need to move the extent records beginning
33334eea22f0SMandy Kirkconnell  * at idx + ext_diff up in the list to overwrite the records being
33344eea22f0SMandy Kirkconnell  * removed, then remove the extra space via kmem_realloc.
33354eea22f0SMandy Kirkconnell  */
33364eea22f0SMandy Kirkconnell void
33374eea22f0SMandy Kirkconnell xfs_iext_remove_direct(
33384eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33394eea22f0SMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing exts */
33404eea22f0SMandy Kirkconnell 	int		ext_diff)	/* number of extents to remove */
33414eea22f0SMandy Kirkconnell {
33424eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
33434eea22f0SMandy Kirkconnell 	int		new_size;	/* size of extents after removal */
33444eea22f0SMandy Kirkconnell 
33450293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
33464eea22f0SMandy Kirkconnell 	new_size = ifp->if_bytes -
33474eea22f0SMandy Kirkconnell 		(ext_diff * sizeof(xfs_bmbt_rec_t));
33484eea22f0SMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
33494eea22f0SMandy Kirkconnell 
33504eea22f0SMandy Kirkconnell 	if (new_size == 0) {
33514eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
33524eea22f0SMandy Kirkconnell 		return;
33534eea22f0SMandy Kirkconnell 	}
33544eea22f0SMandy Kirkconnell 	/* Move extents up in the list (if needed) */
33554eea22f0SMandy Kirkconnell 	if (idx + ext_diff < nextents) {
33564eea22f0SMandy Kirkconnell 		memmove(&ifp->if_u1.if_extents[idx],
33574eea22f0SMandy Kirkconnell 			&ifp->if_u1.if_extents[idx + ext_diff],
33584eea22f0SMandy Kirkconnell 			(nextents - (idx + ext_diff)) *
33594eea22f0SMandy Kirkconnell 			 sizeof(xfs_bmbt_rec_t));
33604eea22f0SMandy Kirkconnell 	}
33614eea22f0SMandy Kirkconnell 	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
33624eea22f0SMandy Kirkconnell 		0, ext_diff * sizeof(xfs_bmbt_rec_t));
33634eea22f0SMandy Kirkconnell 	/*
33644eea22f0SMandy Kirkconnell 	 * Reallocate the direct extent list. If the extents
33654eea22f0SMandy Kirkconnell 	 * will fit inside the inode then xfs_iext_realloc_direct
33664eea22f0SMandy Kirkconnell 	 * will switch from direct to inline extent allocation
33674eea22f0SMandy Kirkconnell 	 * mode for us.
33684eea22f0SMandy Kirkconnell 	 */
33694eea22f0SMandy Kirkconnell 	xfs_iext_realloc_direct(ifp, new_size);
33704eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
33714eea22f0SMandy Kirkconnell }
33724eea22f0SMandy Kirkconnell 
33734eea22f0SMandy Kirkconnell /*
33740293ce3aSMandy Kirkconnell  * This is called when incore extents are being removed from the
33750293ce3aSMandy Kirkconnell  * indirection array and the extents being removed span multiple extent
33760293ce3aSMandy Kirkconnell  * buffers. The idx parameter contains the file extent index where we
33770293ce3aSMandy Kirkconnell  * want to begin removing extents, and the count parameter contains
33780293ce3aSMandy Kirkconnell  * how many extents need to be removed.
33790293ce3aSMandy Kirkconnell  *
33800293ce3aSMandy Kirkconnell  *    |-------|   |-------|
33810293ce3aSMandy Kirkconnell  *    | nex1  |   |       |    nex1 - number of extents before idx
33820293ce3aSMandy Kirkconnell  *    |-------|   | count |
33830293ce3aSMandy Kirkconnell  *    |       |   |       |    count - number of extents being removed at idx
33840293ce3aSMandy Kirkconnell  *    | count |   |-------|
33850293ce3aSMandy Kirkconnell  *    |       |   | nex2  |    nex2 - number of extents after idx + count
33860293ce3aSMandy Kirkconnell  *    |-------|   |-------|
33870293ce3aSMandy Kirkconnell  */
33880293ce3aSMandy Kirkconnell void
33890293ce3aSMandy Kirkconnell xfs_iext_remove_indirect(
33900293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
33910293ce3aSMandy Kirkconnell 	xfs_extnum_t	idx,		/* index to begin removing extents */
33920293ce3aSMandy Kirkconnell 	int		count)		/* number of extents to remove */
33930293ce3aSMandy Kirkconnell {
33940293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
33950293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
33960293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_cnt;	/* extents left to remove */
33970293ce3aSMandy Kirkconnell 	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
33980293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex1;		/* number of extents before idx */
33990293ce3aSMandy Kirkconnell 	xfs_extnum_t	nex2;		/* extents after idx + count */
34000293ce3aSMandy Kirkconnell 	int		page_idx = idx;	/* index in target extent list */
34010293ce3aSMandy Kirkconnell 
34020293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
34030293ce3aSMandy Kirkconnell 	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
34040293ce3aSMandy Kirkconnell 	ASSERT(erp != NULL);
34050293ce3aSMandy Kirkconnell 	nex1 = page_idx;
34060293ce3aSMandy Kirkconnell 	ext_cnt = count;
34070293ce3aSMandy Kirkconnell 	while (ext_cnt) {
34080293ce3aSMandy Kirkconnell 		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
34090293ce3aSMandy Kirkconnell 		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
34100293ce3aSMandy Kirkconnell 		/*
34110293ce3aSMandy Kirkconnell 		 * Check for deletion of entire list;
34120293ce3aSMandy Kirkconnell 		 * xfs_iext_irec_remove() updates extent offsets.
34130293ce3aSMandy Kirkconnell 		 */
34140293ce3aSMandy Kirkconnell 		if (ext_diff == erp->er_extcount) {
34150293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
34160293ce3aSMandy Kirkconnell 			ext_cnt -= ext_diff;
34170293ce3aSMandy Kirkconnell 			nex1 = 0;
34180293ce3aSMandy Kirkconnell 			if (ext_cnt) {
34190293ce3aSMandy Kirkconnell 				ASSERT(erp_idx < ifp->if_real_bytes /
34200293ce3aSMandy Kirkconnell 					XFS_IEXT_BUFSZ);
34210293ce3aSMandy Kirkconnell 				erp = &ifp->if_u1.if_ext_irec[erp_idx];
34220293ce3aSMandy Kirkconnell 				nex1 = 0;
34230293ce3aSMandy Kirkconnell 				continue;
34240293ce3aSMandy Kirkconnell 			} else {
34250293ce3aSMandy Kirkconnell 				break;
34260293ce3aSMandy Kirkconnell 			}
34270293ce3aSMandy Kirkconnell 		}
34280293ce3aSMandy Kirkconnell 		/* Move extents up (if needed) */
34290293ce3aSMandy Kirkconnell 		if (nex2) {
34300293ce3aSMandy Kirkconnell 			memmove(&erp->er_extbuf[nex1],
34310293ce3aSMandy Kirkconnell 				&erp->er_extbuf[nex1 + ext_diff],
34320293ce3aSMandy Kirkconnell 				nex2 * sizeof(xfs_bmbt_rec_t));
34330293ce3aSMandy Kirkconnell 		}
34340293ce3aSMandy Kirkconnell 		/* Zero out rest of page */
34350293ce3aSMandy Kirkconnell 		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
34360293ce3aSMandy Kirkconnell 			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
34370293ce3aSMandy Kirkconnell 		/* Update remaining counters */
34380293ce3aSMandy Kirkconnell 		erp->er_extcount -= ext_diff;
34390293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
34400293ce3aSMandy Kirkconnell 		ext_cnt -= ext_diff;
34410293ce3aSMandy Kirkconnell 		nex1 = 0;
34420293ce3aSMandy Kirkconnell 		erp_idx++;
34430293ce3aSMandy Kirkconnell 		erp++;
34440293ce3aSMandy Kirkconnell 	}
34450293ce3aSMandy Kirkconnell 	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
34460293ce3aSMandy Kirkconnell 	xfs_iext_irec_compact(ifp);
34470293ce3aSMandy Kirkconnell }
34480293ce3aSMandy Kirkconnell 
34490293ce3aSMandy Kirkconnell /*
34504eea22f0SMandy Kirkconnell  * Create, destroy, or resize a linear (direct) block of extents.
34514eea22f0SMandy Kirkconnell  */
34524eea22f0SMandy Kirkconnell void
34534eea22f0SMandy Kirkconnell xfs_iext_realloc_direct(
34544eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
34554eea22f0SMandy Kirkconnell 	int		new_size)	/* new size of extents */
34564eea22f0SMandy Kirkconnell {
34574eea22f0SMandy Kirkconnell 	int		rnew_size;	/* real new size of extents */
34584eea22f0SMandy Kirkconnell 
34594eea22f0SMandy Kirkconnell 	rnew_size = new_size;
34604eea22f0SMandy Kirkconnell 
34610293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
34620293ce3aSMandy Kirkconnell 		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
34630293ce3aSMandy Kirkconnell 		 (new_size != ifp->if_real_bytes)));
34640293ce3aSMandy Kirkconnell 
34654eea22f0SMandy Kirkconnell 	/* Free extent records */
34664eea22f0SMandy Kirkconnell 	if (new_size == 0) {
34674eea22f0SMandy Kirkconnell 		xfs_iext_destroy(ifp);
34684eea22f0SMandy Kirkconnell 	}
34694eea22f0SMandy Kirkconnell 	/* Resize direct extent list and zero any new bytes */
34704eea22f0SMandy Kirkconnell 	else if (ifp->if_real_bytes) {
34714eea22f0SMandy Kirkconnell 		/* Check if extents will fit inside the inode */
34724eea22f0SMandy Kirkconnell 		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
34734eea22f0SMandy Kirkconnell 			xfs_iext_direct_to_inline(ifp, new_size /
34744eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t));
34754eea22f0SMandy Kirkconnell 			ifp->if_bytes = new_size;
34764eea22f0SMandy Kirkconnell 			return;
34774eea22f0SMandy Kirkconnell 		}
347816a087d8SVignesh Babu 		if (!is_power_of_2(new_size)){
347940ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
34804eea22f0SMandy Kirkconnell 		}
34814eea22f0SMandy Kirkconnell 		if (rnew_size != ifp->if_real_bytes) {
3482a6f64d4aSChristoph Hellwig 			ifp->if_u1.if_extents =
34834eea22f0SMandy Kirkconnell 				kmem_realloc(ifp->if_u1.if_extents,
34844eea22f0SMandy Kirkconnell 						rnew_size,
34856785073bSDavid Chinner 						ifp->if_real_bytes, KM_NOFS);
34864eea22f0SMandy Kirkconnell 		}
34874eea22f0SMandy Kirkconnell 		if (rnew_size > ifp->if_real_bytes) {
34884eea22f0SMandy Kirkconnell 			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
34894eea22f0SMandy Kirkconnell 				(uint)sizeof(xfs_bmbt_rec_t)], 0,
34904eea22f0SMandy Kirkconnell 				rnew_size - ifp->if_real_bytes);
34914eea22f0SMandy Kirkconnell 		}
34924eea22f0SMandy Kirkconnell 	}
34934eea22f0SMandy Kirkconnell 	/*
34944eea22f0SMandy Kirkconnell 	 * Switch from the inline extent buffer to a direct
34954eea22f0SMandy Kirkconnell 	 * extent list. Be sure to include the inline extent
34964eea22f0SMandy Kirkconnell 	 * bytes in new_size.
34974eea22f0SMandy Kirkconnell 	 */
34984eea22f0SMandy Kirkconnell 	else {
34994eea22f0SMandy Kirkconnell 		new_size += ifp->if_bytes;
350016a087d8SVignesh Babu 		if (!is_power_of_2(new_size)) {
350140ebd81dSRobert P. J. Day 			rnew_size = roundup_pow_of_two(new_size);
35024eea22f0SMandy Kirkconnell 		}
35034eea22f0SMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, rnew_size);
35044eea22f0SMandy Kirkconnell 	}
35054eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = rnew_size;
35064eea22f0SMandy Kirkconnell 	ifp->if_bytes = new_size;
35074eea22f0SMandy Kirkconnell }
35084eea22f0SMandy Kirkconnell 
35094eea22f0SMandy Kirkconnell /*
35104eea22f0SMandy Kirkconnell  * Switch from linear (direct) extent records to inline buffer.
35114eea22f0SMandy Kirkconnell  */
35124eea22f0SMandy Kirkconnell void
35134eea22f0SMandy Kirkconnell xfs_iext_direct_to_inline(
35144eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35154eea22f0SMandy Kirkconnell 	xfs_extnum_t	nextents)	/* number of extents in file */
35164eea22f0SMandy Kirkconnell {
35174eea22f0SMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
35184eea22f0SMandy Kirkconnell 	ASSERT(nextents <= XFS_INLINE_EXTS);
35194eea22f0SMandy Kirkconnell 	/*
35204eea22f0SMandy Kirkconnell 	 * The inline buffer was zeroed when we switched
35214eea22f0SMandy Kirkconnell 	 * from inline to direct extent allocation mode,
35224eea22f0SMandy Kirkconnell 	 * so we don't need to clear it here.
35234eea22f0SMandy Kirkconnell 	 */
35244eea22f0SMandy Kirkconnell 	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
35254eea22f0SMandy Kirkconnell 		nextents * sizeof(xfs_bmbt_rec_t));
3526f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_extents);
35274eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
35284eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
35294eea22f0SMandy Kirkconnell }
35304eea22f0SMandy Kirkconnell 
35314eea22f0SMandy Kirkconnell /*
35324eea22f0SMandy Kirkconnell  * Switch from inline buffer to linear (direct) extent records.
35334eea22f0SMandy Kirkconnell  * new_size should already be rounded up to the next power of 2
35344eea22f0SMandy Kirkconnell  * by the caller (when appropriate), so use new_size as it is.
35354eea22f0SMandy Kirkconnell  * However, since new_size may be rounded up, we can't update
35364eea22f0SMandy Kirkconnell  * if_bytes here. It is the caller's responsibility to update
35374eea22f0SMandy Kirkconnell  * if_bytes upon return.
35384eea22f0SMandy Kirkconnell  */
35394eea22f0SMandy Kirkconnell void
35404eea22f0SMandy Kirkconnell xfs_iext_inline_to_direct(
35414eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35424eea22f0SMandy Kirkconnell 	int		new_size)	/* number of extents in file */
35434eea22f0SMandy Kirkconnell {
35446785073bSDavid Chinner 	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
35454eea22f0SMandy Kirkconnell 	memset(ifp->if_u1.if_extents, 0, new_size);
35464eea22f0SMandy Kirkconnell 	if (ifp->if_bytes) {
35474eea22f0SMandy Kirkconnell 		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
35484eea22f0SMandy Kirkconnell 			ifp->if_bytes);
35494eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
35504eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
35514eea22f0SMandy Kirkconnell 	}
35524eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = new_size;
35534eea22f0SMandy Kirkconnell }
35544eea22f0SMandy Kirkconnell 
35554eea22f0SMandy Kirkconnell /*
35560293ce3aSMandy Kirkconnell  * Resize an extent indirection array to new_size bytes.
35570293ce3aSMandy Kirkconnell  */
3558d96f8f89SEric Sandeen STATIC void
35590293ce3aSMandy Kirkconnell xfs_iext_realloc_indirect(
35600293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
35610293ce3aSMandy Kirkconnell 	int		new_size)	/* new indirection array size */
35620293ce3aSMandy Kirkconnell {
35630293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
35640293ce3aSMandy Kirkconnell 	int		size;		/* current indirection array size */
35650293ce3aSMandy Kirkconnell 
35660293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
35670293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
35680293ce3aSMandy Kirkconnell 	size = nlists * sizeof(xfs_ext_irec_t);
35690293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes);
35700293ce3aSMandy Kirkconnell 	ASSERT((new_size >= 0) && (new_size != size));
35710293ce3aSMandy Kirkconnell 	if (new_size == 0) {
35720293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
35730293ce3aSMandy Kirkconnell 	} else {
35740293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
35750293ce3aSMandy Kirkconnell 			kmem_realloc(ifp->if_u1.if_ext_irec,
35766785073bSDavid Chinner 				new_size, size, KM_NOFS);
35770293ce3aSMandy Kirkconnell 	}
35780293ce3aSMandy Kirkconnell }
35790293ce3aSMandy Kirkconnell 
35800293ce3aSMandy Kirkconnell /*
35810293ce3aSMandy Kirkconnell  * Switch from indirection array to linear (direct) extent allocations.
35820293ce3aSMandy Kirkconnell  */
3583d96f8f89SEric Sandeen STATIC void
35840293ce3aSMandy Kirkconnell xfs_iext_indirect_to_direct(
35850293ce3aSMandy Kirkconnell 	 xfs_ifork_t	*ifp)		/* inode fork pointer */
35860293ce3aSMandy Kirkconnell {
3587a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
35880293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
35890293ce3aSMandy Kirkconnell 	int		size;		/* size of file extents */
35900293ce3aSMandy Kirkconnell 
35910293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
35920293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
35930293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
35940293ce3aSMandy Kirkconnell 	size = nextents * sizeof(xfs_bmbt_rec_t);
35950293ce3aSMandy Kirkconnell 
359671a8c87fSLachlan McIlroy 	xfs_iext_irec_compact_pages(ifp);
35970293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
35980293ce3aSMandy Kirkconnell 
35990293ce3aSMandy Kirkconnell 	ep = ifp->if_u1.if_ext_irec->er_extbuf;
3600f0e2d93cSDenys Vlasenko 	kmem_free(ifp->if_u1.if_ext_irec);
36010293ce3aSMandy Kirkconnell 	ifp->if_flags &= ~XFS_IFEXTIREC;
36020293ce3aSMandy Kirkconnell 	ifp->if_u1.if_extents = ep;
36030293ce3aSMandy Kirkconnell 	ifp->if_bytes = size;
36040293ce3aSMandy Kirkconnell 	if (nextents < XFS_LINEAR_EXTS) {
36050293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, size);
36060293ce3aSMandy Kirkconnell 	}
36070293ce3aSMandy Kirkconnell }
36080293ce3aSMandy Kirkconnell 
36090293ce3aSMandy Kirkconnell /*
36104eea22f0SMandy Kirkconnell  * Free incore file extents.
36114eea22f0SMandy Kirkconnell  */
36124eea22f0SMandy Kirkconnell void
36134eea22f0SMandy Kirkconnell xfs_iext_destroy(
36144eea22f0SMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
36154eea22f0SMandy Kirkconnell {
36160293ce3aSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
36170293ce3aSMandy Kirkconnell 		int	erp_idx;
36180293ce3aSMandy Kirkconnell 		int	nlists;
36190293ce3aSMandy Kirkconnell 
36200293ce3aSMandy Kirkconnell 		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
36210293ce3aSMandy Kirkconnell 		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
36220293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx);
36230293ce3aSMandy Kirkconnell 		}
36240293ce3aSMandy Kirkconnell 		ifp->if_flags &= ~XFS_IFEXTIREC;
36250293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes) {
3626f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_extents);
36274eea22f0SMandy Kirkconnell 	} else if (ifp->if_bytes) {
36284eea22f0SMandy Kirkconnell 		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
36294eea22f0SMandy Kirkconnell 			sizeof(xfs_bmbt_rec_t));
36304eea22f0SMandy Kirkconnell 	}
36314eea22f0SMandy Kirkconnell 	ifp->if_u1.if_extents = NULL;
36324eea22f0SMandy Kirkconnell 	ifp->if_real_bytes = 0;
36334eea22f0SMandy Kirkconnell 	ifp->if_bytes = 0;
36344eea22f0SMandy Kirkconnell }
36350293ce3aSMandy Kirkconnell 
36360293ce3aSMandy Kirkconnell /*
36378867bc9bSMandy Kirkconnell  * Return a pointer to the extent record for file system block bno.
36388867bc9bSMandy Kirkconnell  */
3639a6f64d4aSChristoph Hellwig xfs_bmbt_rec_host_t *			/* pointer to found extent record */
36408867bc9bSMandy Kirkconnell xfs_iext_bno_to_ext(
36418867bc9bSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
36428867bc9bSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
36438867bc9bSMandy Kirkconnell 	xfs_extnum_t	*idxp)		/* index of target extent */
36448867bc9bSMandy Kirkconnell {
3645a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
36468867bc9bSMandy Kirkconnell 	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
3647a6f64d4aSChristoph Hellwig 	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
36488867bc9bSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
3649c41564b5SNathan Scott 	int		high;		/* upper boundary in search */
36508867bc9bSMandy Kirkconnell 	xfs_extnum_t	idx = 0;	/* index of target extent */
3651c41564b5SNathan Scott 	int		low;		/* lower boundary in search */
36528867bc9bSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of file extents */
36538867bc9bSMandy Kirkconnell 	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
36548867bc9bSMandy Kirkconnell 
36558867bc9bSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
36568867bc9bSMandy Kirkconnell 	if (nextents == 0) {
36578867bc9bSMandy Kirkconnell 		*idxp = 0;
36588867bc9bSMandy Kirkconnell 		return NULL;
36598867bc9bSMandy Kirkconnell 	}
36608867bc9bSMandy Kirkconnell 	low = 0;
36618867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
36628867bc9bSMandy Kirkconnell 		/* Find target extent list */
36638867bc9bSMandy Kirkconnell 		int	erp_idx = 0;
36648867bc9bSMandy Kirkconnell 		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
36658867bc9bSMandy Kirkconnell 		base = erp->er_extbuf;
36668867bc9bSMandy Kirkconnell 		high = erp->er_extcount - 1;
36678867bc9bSMandy Kirkconnell 	} else {
36688867bc9bSMandy Kirkconnell 		base = ifp->if_u1.if_extents;
36698867bc9bSMandy Kirkconnell 		high = nextents - 1;
36708867bc9bSMandy Kirkconnell 	}
36718867bc9bSMandy Kirkconnell 	/* Binary search extent records */
36728867bc9bSMandy Kirkconnell 	while (low <= high) {
36738867bc9bSMandy Kirkconnell 		idx = (low + high) >> 1;
36748867bc9bSMandy Kirkconnell 		ep = base + idx;
36758867bc9bSMandy Kirkconnell 		startoff = xfs_bmbt_get_startoff(ep);
36768867bc9bSMandy Kirkconnell 		blockcount = xfs_bmbt_get_blockcount(ep);
36778867bc9bSMandy Kirkconnell 		if (bno < startoff) {
36788867bc9bSMandy Kirkconnell 			high = idx - 1;
36798867bc9bSMandy Kirkconnell 		} else if (bno >= startoff + blockcount) {
36808867bc9bSMandy Kirkconnell 			low = idx + 1;
36818867bc9bSMandy Kirkconnell 		} else {
36828867bc9bSMandy Kirkconnell 			/* Convert back to file-based extent index */
36838867bc9bSMandy Kirkconnell 			if (ifp->if_flags & XFS_IFEXTIREC) {
36848867bc9bSMandy Kirkconnell 				idx += erp->er_extoff;
36858867bc9bSMandy Kirkconnell 			}
36868867bc9bSMandy Kirkconnell 			*idxp = idx;
36878867bc9bSMandy Kirkconnell 			return ep;
36888867bc9bSMandy Kirkconnell 		}
36898867bc9bSMandy Kirkconnell 	}
36908867bc9bSMandy Kirkconnell 	/* Convert back to file-based extent index */
36918867bc9bSMandy Kirkconnell 	if (ifp->if_flags & XFS_IFEXTIREC) {
36928867bc9bSMandy Kirkconnell 		idx += erp->er_extoff;
36938867bc9bSMandy Kirkconnell 	}
36948867bc9bSMandy Kirkconnell 	if (bno >= startoff + blockcount) {
36958867bc9bSMandy Kirkconnell 		if (++idx == nextents) {
36968867bc9bSMandy Kirkconnell 			ep = NULL;
36978867bc9bSMandy Kirkconnell 		} else {
36988867bc9bSMandy Kirkconnell 			ep = xfs_iext_get_ext(ifp, idx);
36998867bc9bSMandy Kirkconnell 		}
37008867bc9bSMandy Kirkconnell 	}
37018867bc9bSMandy Kirkconnell 	*idxp = idx;
37028867bc9bSMandy Kirkconnell 	return ep;
37038867bc9bSMandy Kirkconnell }
37048867bc9bSMandy Kirkconnell 
37058867bc9bSMandy Kirkconnell /*
37060293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
37070293ce3aSMandy Kirkconnell  * extent record for filesystem block bno. Store the index of the
37080293ce3aSMandy Kirkconnell  * target irec in *erp_idxp.
37090293ce3aSMandy Kirkconnell  */
37108867bc9bSMandy Kirkconnell xfs_ext_irec_t *			/* pointer to found extent record */
37110293ce3aSMandy Kirkconnell xfs_iext_bno_to_irec(
37120293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37130293ce3aSMandy Kirkconnell 	xfs_fileoff_t	bno,		/* block number to search for */
37140293ce3aSMandy Kirkconnell 	int		*erp_idxp)	/* irec index of target ext list */
37150293ce3aSMandy Kirkconnell {
37160293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
37170293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
37188867bc9bSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
37190293ce3aSMandy Kirkconnell 	int		nlists;		/* number of extent irec's (lists) */
37200293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
37210293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
37220293ce3aSMandy Kirkconnell 
37230293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
37240293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37250293ce3aSMandy Kirkconnell 	erp_idx = 0;
37260293ce3aSMandy Kirkconnell 	low = 0;
37270293ce3aSMandy Kirkconnell 	high = nlists - 1;
37280293ce3aSMandy Kirkconnell 	while (low <= high) {
37290293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
37300293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
37310293ce3aSMandy Kirkconnell 		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
37320293ce3aSMandy Kirkconnell 		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
37330293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
37340293ce3aSMandy Kirkconnell 		} else if (erp_next && bno >=
37350293ce3aSMandy Kirkconnell 			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
37360293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
37370293ce3aSMandy Kirkconnell 		} else {
37380293ce3aSMandy Kirkconnell 			break;
37390293ce3aSMandy Kirkconnell 		}
37400293ce3aSMandy Kirkconnell 	}
37410293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
37420293ce3aSMandy Kirkconnell 	return erp;
37430293ce3aSMandy Kirkconnell }
37440293ce3aSMandy Kirkconnell 
37450293ce3aSMandy Kirkconnell /*
37460293ce3aSMandy Kirkconnell  * Return a pointer to the indirection array entry containing the
37470293ce3aSMandy Kirkconnell  * extent record at file extent index *idxp. Store the index of the
37480293ce3aSMandy Kirkconnell  * target irec in *erp_idxp and store the page index of the target
37490293ce3aSMandy Kirkconnell  * extent record in *idxp.
37500293ce3aSMandy Kirkconnell  */
37510293ce3aSMandy Kirkconnell xfs_ext_irec_t *
37520293ce3aSMandy Kirkconnell xfs_iext_idx_to_irec(
37530293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
37540293ce3aSMandy Kirkconnell 	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
37550293ce3aSMandy Kirkconnell 	int		*erp_idxp,	/* pointer to target irec */
37560293ce3aSMandy Kirkconnell 	int		realloc)	/* new bytes were just added */
37570293ce3aSMandy Kirkconnell {
37580293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
37590293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
37600293ce3aSMandy Kirkconnell 	int		erp_idx;	/* indirection array index */
37610293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
37620293ce3aSMandy Kirkconnell 	int		high;		/* binary search upper limit */
37630293ce3aSMandy Kirkconnell 	int		low;		/* binary search lower limit */
37640293ce3aSMandy Kirkconnell 	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
37650293ce3aSMandy Kirkconnell 
37660293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
376787bef181SChristoph Hellwig 	ASSERT(page_idx >= 0);
376887bef181SChristoph Hellwig 	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
376987bef181SChristoph Hellwig 	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
377087bef181SChristoph Hellwig 
37710293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
37720293ce3aSMandy Kirkconnell 	erp_idx = 0;
37730293ce3aSMandy Kirkconnell 	low = 0;
37740293ce3aSMandy Kirkconnell 	high = nlists - 1;
37750293ce3aSMandy Kirkconnell 
37760293ce3aSMandy Kirkconnell 	/* Binary search extent irec's */
37770293ce3aSMandy Kirkconnell 	while (low <= high) {
37780293ce3aSMandy Kirkconnell 		erp_idx = (low + high) >> 1;
37790293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
37800293ce3aSMandy Kirkconnell 		prev = erp_idx > 0 ? erp - 1 : NULL;
37810293ce3aSMandy Kirkconnell 		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
37820293ce3aSMandy Kirkconnell 		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
37830293ce3aSMandy Kirkconnell 			high = erp_idx - 1;
37840293ce3aSMandy Kirkconnell 		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
37850293ce3aSMandy Kirkconnell 			   (page_idx == erp->er_extoff + erp->er_extcount &&
37860293ce3aSMandy Kirkconnell 			    !realloc)) {
37870293ce3aSMandy Kirkconnell 			low = erp_idx + 1;
37880293ce3aSMandy Kirkconnell 		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
37890293ce3aSMandy Kirkconnell 			   erp->er_extcount == XFS_LINEAR_EXTS) {
37900293ce3aSMandy Kirkconnell 			ASSERT(realloc);
37910293ce3aSMandy Kirkconnell 			page_idx = 0;
37920293ce3aSMandy Kirkconnell 			erp_idx++;
37930293ce3aSMandy Kirkconnell 			erp = erp_idx < nlists ? erp + 1 : NULL;
37940293ce3aSMandy Kirkconnell 			break;
37950293ce3aSMandy Kirkconnell 		} else {
37960293ce3aSMandy Kirkconnell 			page_idx -= erp->er_extoff;
37970293ce3aSMandy Kirkconnell 			break;
37980293ce3aSMandy Kirkconnell 		}
37990293ce3aSMandy Kirkconnell 	}
38000293ce3aSMandy Kirkconnell 	*idxp = page_idx;
38010293ce3aSMandy Kirkconnell 	*erp_idxp = erp_idx;
38020293ce3aSMandy Kirkconnell 	return(erp);
38030293ce3aSMandy Kirkconnell }
38040293ce3aSMandy Kirkconnell 
38050293ce3aSMandy Kirkconnell /*
38060293ce3aSMandy Kirkconnell  * Allocate and initialize an indirection array once the space needed
38070293ce3aSMandy Kirkconnell  * for incore extents increases above XFS_IEXT_BUFSZ.
38080293ce3aSMandy Kirkconnell  */
38090293ce3aSMandy Kirkconnell void
38100293ce3aSMandy Kirkconnell xfs_iext_irec_init(
38110293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
38120293ce3aSMandy Kirkconnell {
38130293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
38140293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
38150293ce3aSMandy Kirkconnell 
38160293ce3aSMandy Kirkconnell 	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
38170293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
38180293ce3aSMandy Kirkconnell 	ASSERT(nextents <= XFS_LINEAR_EXTS);
38190293ce3aSMandy Kirkconnell 
38206785073bSDavid Chinner 	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
38210293ce3aSMandy Kirkconnell 
38220293ce3aSMandy Kirkconnell 	if (nextents == 0) {
38236785073bSDavid Chinner 		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
38240293ce3aSMandy Kirkconnell 	} else if (!ifp->if_real_bytes) {
38250293ce3aSMandy Kirkconnell 		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
38260293ce3aSMandy Kirkconnell 	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
38270293ce3aSMandy Kirkconnell 		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
38280293ce3aSMandy Kirkconnell 	}
38290293ce3aSMandy Kirkconnell 	erp->er_extbuf = ifp->if_u1.if_extents;
38300293ce3aSMandy Kirkconnell 	erp->er_extcount = nextents;
38310293ce3aSMandy Kirkconnell 	erp->er_extoff = 0;
38320293ce3aSMandy Kirkconnell 
38330293ce3aSMandy Kirkconnell 	ifp->if_flags |= XFS_IFEXTIREC;
38340293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
38350293ce3aSMandy Kirkconnell 	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
38360293ce3aSMandy Kirkconnell 	ifp->if_u1.if_ext_irec = erp;
38370293ce3aSMandy Kirkconnell 
38380293ce3aSMandy Kirkconnell 	return;
38390293ce3aSMandy Kirkconnell }
38400293ce3aSMandy Kirkconnell 
38410293ce3aSMandy Kirkconnell /*
38420293ce3aSMandy Kirkconnell  * Allocate and initialize a new entry in the indirection array.
38430293ce3aSMandy Kirkconnell  */
38440293ce3aSMandy Kirkconnell xfs_ext_irec_t *
38450293ce3aSMandy Kirkconnell xfs_iext_irec_new(
38460293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
38470293ce3aSMandy Kirkconnell 	int		erp_idx)	/* index for new irec */
38480293ce3aSMandy Kirkconnell {
38490293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
38500293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
38510293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
38520293ce3aSMandy Kirkconnell 
38530293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38540293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38550293ce3aSMandy Kirkconnell 
38560293ce3aSMandy Kirkconnell 	/* Resize indirection array */
38570293ce3aSMandy Kirkconnell 	xfs_iext_realloc_indirect(ifp, ++nlists *
38580293ce3aSMandy Kirkconnell 				  sizeof(xfs_ext_irec_t));
38590293ce3aSMandy Kirkconnell 	/*
38600293ce3aSMandy Kirkconnell 	 * Move records down in the array so the
38610293ce3aSMandy Kirkconnell 	 * new page can use erp_idx.
38620293ce3aSMandy Kirkconnell 	 */
38630293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
38640293ce3aSMandy Kirkconnell 	for (i = nlists - 1; i > erp_idx; i--) {
38650293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
38660293ce3aSMandy Kirkconnell 	}
38670293ce3aSMandy Kirkconnell 	ASSERT(i == erp_idx);
38680293ce3aSMandy Kirkconnell 
38690293ce3aSMandy Kirkconnell 	/* Initialize new extent record */
38700293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
38716785073bSDavid Chinner 	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
38720293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
38730293ce3aSMandy Kirkconnell 	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
38740293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extcount = 0;
38750293ce3aSMandy Kirkconnell 	erp[erp_idx].er_extoff = erp_idx > 0 ?
38760293ce3aSMandy Kirkconnell 		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
38770293ce3aSMandy Kirkconnell 	return (&erp[erp_idx]);
38780293ce3aSMandy Kirkconnell }
38790293ce3aSMandy Kirkconnell 
38800293ce3aSMandy Kirkconnell /*
38810293ce3aSMandy Kirkconnell  * Remove a record from the indirection array.
38820293ce3aSMandy Kirkconnell  */
38830293ce3aSMandy Kirkconnell void
38840293ce3aSMandy Kirkconnell xfs_iext_irec_remove(
38850293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
38860293ce3aSMandy Kirkconnell 	int		erp_idx)	/* irec index to remove */
38870293ce3aSMandy Kirkconnell {
38880293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp;		/* indirection array pointer */
38890293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
38900293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
38910293ce3aSMandy Kirkconnell 
38920293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
38930293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
38940293ce3aSMandy Kirkconnell 	erp = &ifp->if_u1.if_ext_irec[erp_idx];
38950293ce3aSMandy Kirkconnell 	if (erp->er_extbuf) {
38960293ce3aSMandy Kirkconnell 		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
38970293ce3aSMandy Kirkconnell 			-erp->er_extcount);
3898f0e2d93cSDenys Vlasenko 		kmem_free(erp->er_extbuf);
38990293ce3aSMandy Kirkconnell 	}
39000293ce3aSMandy Kirkconnell 	/* Compact extent records */
39010293ce3aSMandy Kirkconnell 	erp = ifp->if_u1.if_ext_irec;
39020293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists - 1; i++) {
39030293ce3aSMandy Kirkconnell 		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
39040293ce3aSMandy Kirkconnell 	}
39050293ce3aSMandy Kirkconnell 	/*
39060293ce3aSMandy Kirkconnell 	 * Manually free the last extent record from the indirection
39070293ce3aSMandy Kirkconnell 	 * array.  A call to xfs_iext_realloc_indirect() with a size
39080293ce3aSMandy Kirkconnell 	 * of zero would result in a call to xfs_iext_destroy() which
39090293ce3aSMandy Kirkconnell 	 * would in turn call this function again, creating a nasty
39100293ce3aSMandy Kirkconnell 	 * infinite loop.
39110293ce3aSMandy Kirkconnell 	 */
39120293ce3aSMandy Kirkconnell 	if (--nlists) {
39130293ce3aSMandy Kirkconnell 		xfs_iext_realloc_indirect(ifp,
39140293ce3aSMandy Kirkconnell 			nlists * sizeof(xfs_ext_irec_t));
39150293ce3aSMandy Kirkconnell 	} else {
3916f0e2d93cSDenys Vlasenko 		kmem_free(ifp->if_u1.if_ext_irec);
39170293ce3aSMandy Kirkconnell 	}
39180293ce3aSMandy Kirkconnell 	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
39190293ce3aSMandy Kirkconnell }
39200293ce3aSMandy Kirkconnell 
39210293ce3aSMandy Kirkconnell /*
39220293ce3aSMandy Kirkconnell  * This is called to clean up large amounts of unused memory allocated
39230293ce3aSMandy Kirkconnell  * by the indirection array.  Before compacting anything though, verify
39240293ce3aSMandy Kirkconnell  * that the indirection array is still needed and switch back to the
39250293ce3aSMandy Kirkconnell  * linear extent list (or even the inline buffer) if possible.  The
39260293ce3aSMandy Kirkconnell  * compaction policy is as follows:
39270293ce3aSMandy Kirkconnell  *
39280293ce3aSMandy Kirkconnell  *    Full Compaction: Extents fit into a single page (or inline buffer)
392971a8c87fSLachlan McIlroy  * Partial Compaction: Extents occupy less than 50% of allocated space
39300293ce3aSMandy Kirkconnell  *      No Compaction: Extents occupy at least 50% of allocated space
39310293ce3aSMandy Kirkconnell  */
39320293ce3aSMandy Kirkconnell void
39330293ce3aSMandy Kirkconnell xfs_iext_irec_compact(
39340293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
39350293ce3aSMandy Kirkconnell {
39360293ce3aSMandy Kirkconnell 	xfs_extnum_t	nextents;	/* number of extents in file */
39370293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
39380293ce3aSMandy Kirkconnell 
39390293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
39400293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
39410293ce3aSMandy Kirkconnell 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
39420293ce3aSMandy Kirkconnell 
39430293ce3aSMandy Kirkconnell 	if (nextents == 0) {
39440293ce3aSMandy Kirkconnell 		xfs_iext_destroy(ifp);
39450293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_INLINE_EXTS) {
39460293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
39470293ce3aSMandy Kirkconnell 		xfs_iext_direct_to_inline(ifp, nextents);
39480293ce3aSMandy Kirkconnell 	} else if (nextents <= XFS_LINEAR_EXTS) {
39490293ce3aSMandy Kirkconnell 		xfs_iext_indirect_to_direct(ifp);
39500293ce3aSMandy Kirkconnell 	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
39510293ce3aSMandy Kirkconnell 		xfs_iext_irec_compact_pages(ifp);
39520293ce3aSMandy Kirkconnell 	}
39530293ce3aSMandy Kirkconnell }
39540293ce3aSMandy Kirkconnell 
39550293ce3aSMandy Kirkconnell /*
39560293ce3aSMandy Kirkconnell  * Combine extents from neighboring extent pages.
39570293ce3aSMandy Kirkconnell  */
39580293ce3aSMandy Kirkconnell void
39590293ce3aSMandy Kirkconnell xfs_iext_irec_compact_pages(
39600293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp)		/* inode fork pointer */
39610293ce3aSMandy Kirkconnell {
39620293ce3aSMandy Kirkconnell 	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
39630293ce3aSMandy Kirkconnell 	int		erp_idx = 0;	/* indirection array index */
39640293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists) */
39650293ce3aSMandy Kirkconnell 
39660293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
39670293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
39680293ce3aSMandy Kirkconnell 	while (erp_idx < nlists - 1) {
39690293ce3aSMandy Kirkconnell 		erp = &ifp->if_u1.if_ext_irec[erp_idx];
39700293ce3aSMandy Kirkconnell 		erp_next = erp + 1;
39710293ce3aSMandy Kirkconnell 		if (erp_next->er_extcount <=
39720293ce3aSMandy Kirkconnell 		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
397371a8c87fSLachlan McIlroy 			memcpy(&erp->er_extbuf[erp->er_extcount],
39740293ce3aSMandy Kirkconnell 				erp_next->er_extbuf, erp_next->er_extcount *
39750293ce3aSMandy Kirkconnell 				sizeof(xfs_bmbt_rec_t));
39760293ce3aSMandy Kirkconnell 			erp->er_extcount += erp_next->er_extcount;
39770293ce3aSMandy Kirkconnell 			/*
39780293ce3aSMandy Kirkconnell 			 * Free page before removing extent record
39790293ce3aSMandy Kirkconnell 			 * so er_extoffs don't get modified in
39800293ce3aSMandy Kirkconnell 			 * xfs_iext_irec_remove.
39810293ce3aSMandy Kirkconnell 			 */
3982f0e2d93cSDenys Vlasenko 			kmem_free(erp_next->er_extbuf);
39830293ce3aSMandy Kirkconnell 			erp_next->er_extbuf = NULL;
39840293ce3aSMandy Kirkconnell 			xfs_iext_irec_remove(ifp, erp_idx + 1);
39850293ce3aSMandy Kirkconnell 			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
39860293ce3aSMandy Kirkconnell 		} else {
39870293ce3aSMandy Kirkconnell 			erp_idx++;
39880293ce3aSMandy Kirkconnell 		}
39890293ce3aSMandy Kirkconnell 	}
39900293ce3aSMandy Kirkconnell }
39910293ce3aSMandy Kirkconnell 
39920293ce3aSMandy Kirkconnell /*
39930293ce3aSMandy Kirkconnell  * This is called to update the er_extoff field in the indirection
39940293ce3aSMandy Kirkconnell  * array when extents have been added or removed from one of the
39950293ce3aSMandy Kirkconnell  * extent lists. erp_idx contains the irec index to begin updating
39960293ce3aSMandy Kirkconnell  * at and ext_diff contains the number of extents that were added
39970293ce3aSMandy Kirkconnell  * or removed.
39980293ce3aSMandy Kirkconnell  */
39990293ce3aSMandy Kirkconnell void
40000293ce3aSMandy Kirkconnell xfs_iext_irec_update_extoffs(
40010293ce3aSMandy Kirkconnell 	xfs_ifork_t	*ifp,		/* inode fork pointer */
40020293ce3aSMandy Kirkconnell 	int		erp_idx,	/* irec index to update */
40030293ce3aSMandy Kirkconnell 	int		ext_diff)	/* number of new extents */
40040293ce3aSMandy Kirkconnell {
40050293ce3aSMandy Kirkconnell 	int		i;		/* loop counter */
40060293ce3aSMandy Kirkconnell 	int		nlists;		/* number of irec's (ex lists */
40070293ce3aSMandy Kirkconnell 
40080293ce3aSMandy Kirkconnell 	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
40090293ce3aSMandy Kirkconnell 	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
40100293ce3aSMandy Kirkconnell 	for (i = erp_idx; i < nlists; i++) {
40110293ce3aSMandy Kirkconnell 		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
40120293ce3aSMandy Kirkconnell 	}
40130293ce3aSMandy Kirkconnell }
401472b53efaSBrian Foster 
401572b53efaSBrian Foster /*
401672b53efaSBrian Foster  * Test whether it is appropriate to check an inode for and free post EOF
401772b53efaSBrian Foster  * blocks. The 'force' parameter determines whether we should also consider
401872b53efaSBrian Foster  * regular files that are marked preallocated or append-only.
401972b53efaSBrian Foster  */
402072b53efaSBrian Foster bool
402172b53efaSBrian Foster xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
402272b53efaSBrian Foster {
402372b53efaSBrian Foster 	/* prealloc/delalloc exists only on regular files */
402472b53efaSBrian Foster 	if (!S_ISREG(ip->i_d.di_mode))
402572b53efaSBrian Foster 		return false;
402672b53efaSBrian Foster 
402772b53efaSBrian Foster 	/*
402872b53efaSBrian Foster 	 * Zero sized files with no cached pages and delalloc blocks will not
402972b53efaSBrian Foster 	 * have speculative prealloc/delalloc blocks to remove.
403072b53efaSBrian Foster 	 */
403172b53efaSBrian Foster 	if (VFS_I(ip)->i_size == 0 &&
403272b53efaSBrian Foster 	    VN_CACHED(VFS_I(ip)) == 0 &&
403372b53efaSBrian Foster 	    ip->i_delayed_blks == 0)
403472b53efaSBrian Foster 		return false;
403572b53efaSBrian Foster 
403672b53efaSBrian Foster 	/* If we haven't read in the extent list, then don't do it now. */
403772b53efaSBrian Foster 	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
403872b53efaSBrian Foster 		return false;
403972b53efaSBrian Foster 
404072b53efaSBrian Foster 	/*
404172b53efaSBrian Foster 	 * Do not free real preallocated or append-only files unless the file
404272b53efaSBrian Foster 	 * has delalloc blocks and we are forced to remove them.
404372b53efaSBrian Foster 	 */
404472b53efaSBrian Foster 	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
404572b53efaSBrian Foster 		if (!force || ip->i_delayed_blks == 0)
404672b53efaSBrian Foster 			return false;
404772b53efaSBrian Foster 
404872b53efaSBrian Foster 	return true;
404972b53efaSBrian Foster }
405072b53efaSBrian Foster 
4051