xref: /openbmc/linux/fs/xfs/xfs_inode_item.c (revision 94c7b6fc)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_trans_resv.h"
23 #include "xfs_sb.h"
24 #include "xfs_ag.h"
25 #include "xfs_mount.h"
26 #include "xfs_inode.h"
27 #include "xfs_trans.h"
28 #include "xfs_inode_item.h"
29 #include "xfs_error.h"
30 #include "xfs_trace.h"
31 #include "xfs_trans_priv.h"
32 #include "xfs_dinode.h"
33 #include "xfs_log.h"
34 
35 
36 kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */
37 
38 static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
39 {
40 	return container_of(lip, struct xfs_inode_log_item, ili_item);
41 }
42 
43 STATIC void
44 xfs_inode_item_data_fork_size(
45 	struct xfs_inode_log_item *iip,
46 	int			*nvecs,
47 	int			*nbytes)
48 {
49 	struct xfs_inode	*ip = iip->ili_inode;
50 
51 	switch (ip->i_d.di_format) {
52 	case XFS_DINODE_FMT_EXTENTS:
53 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
54 		    ip->i_d.di_nextents > 0 &&
55 		    ip->i_df.if_bytes > 0) {
56 			/* worst case, doesn't subtract delalloc extents */
57 			*nbytes += XFS_IFORK_DSIZE(ip);
58 			*nvecs += 1;
59 		}
60 		break;
61 	case XFS_DINODE_FMT_BTREE:
62 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
63 		    ip->i_df.if_broot_bytes > 0) {
64 			*nbytes += ip->i_df.if_broot_bytes;
65 			*nvecs += 1;
66 		}
67 		break;
68 	case XFS_DINODE_FMT_LOCAL:
69 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
70 		    ip->i_df.if_bytes > 0) {
71 			*nbytes += roundup(ip->i_df.if_bytes, 4);
72 			*nvecs += 1;
73 		}
74 		break;
75 
76 	case XFS_DINODE_FMT_DEV:
77 	case XFS_DINODE_FMT_UUID:
78 		break;
79 	default:
80 		ASSERT(0);
81 		break;
82 	}
83 }
84 
85 STATIC void
86 xfs_inode_item_attr_fork_size(
87 	struct xfs_inode_log_item *iip,
88 	int			*nvecs,
89 	int			*nbytes)
90 {
91 	struct xfs_inode	*ip = iip->ili_inode;
92 
93 	switch (ip->i_d.di_aformat) {
94 	case XFS_DINODE_FMT_EXTENTS:
95 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
96 		    ip->i_d.di_anextents > 0 &&
97 		    ip->i_afp->if_bytes > 0) {
98 			/* worst case, doesn't subtract unused space */
99 			*nbytes += XFS_IFORK_ASIZE(ip);
100 			*nvecs += 1;
101 		}
102 		break;
103 	case XFS_DINODE_FMT_BTREE:
104 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
105 		    ip->i_afp->if_broot_bytes > 0) {
106 			*nbytes += ip->i_afp->if_broot_bytes;
107 			*nvecs += 1;
108 		}
109 		break;
110 	case XFS_DINODE_FMT_LOCAL:
111 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
112 		    ip->i_afp->if_bytes > 0) {
113 			*nbytes += roundup(ip->i_afp->if_bytes, 4);
114 			*nvecs += 1;
115 		}
116 		break;
117 	default:
118 		ASSERT(0);
119 		break;
120 	}
121 }
122 
123 /*
124  * This returns the number of iovecs needed to log the given inode item.
125  *
126  * We need one iovec for the inode log format structure, one for the
127  * inode core, and possibly one for the inode data/extents/b-tree root
128  * and one for the inode attribute data/extents/b-tree root.
129  */
130 STATIC void
131 xfs_inode_item_size(
132 	struct xfs_log_item	*lip,
133 	int			*nvecs,
134 	int			*nbytes)
135 {
136 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
137 	struct xfs_inode	*ip = iip->ili_inode;
138 
139 	*nvecs += 2;
140 	*nbytes += sizeof(struct xfs_inode_log_format) +
141 		   xfs_icdinode_size(ip->i_d.di_version);
142 
143 	xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
144 	if (XFS_IFORK_Q(ip))
145 		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
146 }
147 
148 STATIC void
149 xfs_inode_item_format_data_fork(
150 	struct xfs_inode_log_item *iip,
151 	struct xfs_inode_log_format *ilf,
152 	struct xfs_log_vec	*lv,
153 	struct xfs_log_iovec	**vecp)
154 {
155 	struct xfs_inode	*ip = iip->ili_inode;
156 	size_t			data_bytes;
157 
158 	switch (ip->i_d.di_format) {
159 	case XFS_DINODE_FMT_EXTENTS:
160 		iip->ili_fields &=
161 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
162 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
163 
164 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
165 		    ip->i_d.di_nextents > 0 &&
166 		    ip->i_df.if_bytes > 0) {
167 			struct xfs_bmbt_rec *p;
168 
169 			ASSERT(ip->i_df.if_u1.if_extents != NULL);
170 			ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
171 
172 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
173 			data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
174 			xlog_finish_iovec(lv, *vecp, data_bytes);
175 
176 			ASSERT(data_bytes <= ip->i_df.if_bytes);
177 
178 			ilf->ilf_dsize = data_bytes;
179 			ilf->ilf_size++;
180 		} else {
181 			iip->ili_fields &= ~XFS_ILOG_DEXT;
182 		}
183 		break;
184 	case XFS_DINODE_FMT_BTREE:
185 		iip->ili_fields &=
186 			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
187 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
188 
189 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
190 		    ip->i_df.if_broot_bytes > 0) {
191 			ASSERT(ip->i_df.if_broot != NULL);
192 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
193 					ip->i_df.if_broot,
194 					ip->i_df.if_broot_bytes);
195 			ilf->ilf_dsize = ip->i_df.if_broot_bytes;
196 			ilf->ilf_size++;
197 		} else {
198 			ASSERT(!(iip->ili_fields &
199 				 XFS_ILOG_DBROOT));
200 			iip->ili_fields &= ~XFS_ILOG_DBROOT;
201 		}
202 		break;
203 	case XFS_DINODE_FMT_LOCAL:
204 		iip->ili_fields &=
205 			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
206 			  XFS_ILOG_DEV | XFS_ILOG_UUID);
207 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
208 		    ip->i_df.if_bytes > 0) {
209 			/*
210 			 * Round i_bytes up to a word boundary.
211 			 * The underlying memory is guaranteed to
212 			 * to be there by xfs_idata_realloc().
213 			 */
214 			data_bytes = roundup(ip->i_df.if_bytes, 4);
215 			ASSERT(ip->i_df.if_real_bytes == 0 ||
216 			       ip->i_df.if_real_bytes == data_bytes);
217 			ASSERT(ip->i_df.if_u1.if_data != NULL);
218 			ASSERT(ip->i_d.di_size > 0);
219 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
220 					ip->i_df.if_u1.if_data, data_bytes);
221 			ilf->ilf_dsize = (unsigned)data_bytes;
222 			ilf->ilf_size++;
223 		} else {
224 			iip->ili_fields &= ~XFS_ILOG_DDATA;
225 		}
226 		break;
227 	case XFS_DINODE_FMT_DEV:
228 		iip->ili_fields &=
229 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
230 			  XFS_ILOG_DEXT | XFS_ILOG_UUID);
231 		if (iip->ili_fields & XFS_ILOG_DEV)
232 			ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev;
233 		break;
234 	case XFS_DINODE_FMT_UUID:
235 		iip->ili_fields &=
236 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
237 			  XFS_ILOG_DEXT | XFS_ILOG_DEV);
238 		if (iip->ili_fields & XFS_ILOG_UUID)
239 			ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;
240 		break;
241 	default:
242 		ASSERT(0);
243 		break;
244 	}
245 }
246 
247 STATIC void
248 xfs_inode_item_format_attr_fork(
249 	struct xfs_inode_log_item *iip,
250 	struct xfs_inode_log_format *ilf,
251 	struct xfs_log_vec	*lv,
252 	struct xfs_log_iovec	**vecp)
253 {
254 	struct xfs_inode	*ip = iip->ili_inode;
255 	size_t			data_bytes;
256 
257 	switch (ip->i_d.di_aformat) {
258 	case XFS_DINODE_FMT_EXTENTS:
259 		iip->ili_fields &=
260 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
261 
262 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
263 		    ip->i_d.di_anextents > 0 &&
264 		    ip->i_afp->if_bytes > 0) {
265 			struct xfs_bmbt_rec *p;
266 
267 			ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
268 				ip->i_d.di_anextents);
269 			ASSERT(ip->i_afp->if_u1.if_extents != NULL);
270 
271 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
272 			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
273 			xlog_finish_iovec(lv, *vecp, data_bytes);
274 
275 			ilf->ilf_asize = data_bytes;
276 			ilf->ilf_size++;
277 		} else {
278 			iip->ili_fields &= ~XFS_ILOG_AEXT;
279 		}
280 		break;
281 	case XFS_DINODE_FMT_BTREE:
282 		iip->ili_fields &=
283 			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
284 
285 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
286 		    ip->i_afp->if_broot_bytes > 0) {
287 			ASSERT(ip->i_afp->if_broot != NULL);
288 
289 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
290 					ip->i_afp->if_broot,
291 					ip->i_afp->if_broot_bytes);
292 			ilf->ilf_asize = ip->i_afp->if_broot_bytes;
293 			ilf->ilf_size++;
294 		} else {
295 			iip->ili_fields &= ~XFS_ILOG_ABROOT;
296 		}
297 		break;
298 	case XFS_DINODE_FMT_LOCAL:
299 		iip->ili_fields &=
300 			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
301 
302 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
303 		    ip->i_afp->if_bytes > 0) {
304 			/*
305 			 * Round i_bytes up to a word boundary.
306 			 * The underlying memory is guaranteed to
307 			 * to be there by xfs_idata_realloc().
308 			 */
309 			data_bytes = roundup(ip->i_afp->if_bytes, 4);
310 			ASSERT(ip->i_afp->if_real_bytes == 0 ||
311 			       ip->i_afp->if_real_bytes == data_bytes);
312 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
313 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
314 					ip->i_afp->if_u1.if_data,
315 					data_bytes);
316 			ilf->ilf_asize = (unsigned)data_bytes;
317 			ilf->ilf_size++;
318 		} else {
319 			iip->ili_fields &= ~XFS_ILOG_ADATA;
320 		}
321 		break;
322 	default:
323 		ASSERT(0);
324 		break;
325 	}
326 }
327 
328 /*
329  * This is called to fill in the vector of log iovecs for the given inode
330  * log item.  It fills the first item with an inode log format structure,
331  * the second with the on-disk inode structure, and a possible third and/or
332  * fourth with the inode data/extents/b-tree root and inode attributes
333  * data/extents/b-tree root.
334  */
335 STATIC void
336 xfs_inode_item_format(
337 	struct xfs_log_item	*lip,
338 	struct xfs_log_vec	*lv)
339 {
340 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
341 	struct xfs_inode	*ip = iip->ili_inode;
342 	struct xfs_inode_log_format *ilf;
343 	struct xfs_log_iovec	*vecp = NULL;
344 
345 	ASSERT(ip->i_d.di_version > 1);
346 
347 	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
348 	ilf->ilf_type = XFS_LI_INODE;
349 	ilf->ilf_ino = ip->i_ino;
350 	ilf->ilf_blkno = ip->i_imap.im_blkno;
351 	ilf->ilf_len = ip->i_imap.im_len;
352 	ilf->ilf_boffset = ip->i_imap.im_boffset;
353 	ilf->ilf_fields = XFS_ILOG_CORE;
354 	ilf->ilf_size = 2; /* format + core */
355 	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
356 
357 	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
358 			&ip->i_d,
359 			xfs_icdinode_size(ip->i_d.di_version));
360 
361 	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
362 	if (XFS_IFORK_Q(ip)) {
363 		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
364 	} else {
365 		iip->ili_fields &=
366 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
367 	}
368 
369 	/* update the format with the exact fields we actually logged */
370 	ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
371 }
372 
373 /*
374  * This is called to pin the inode associated with the inode log
375  * item in memory so it cannot be written out.
376  */
377 STATIC void
378 xfs_inode_item_pin(
379 	struct xfs_log_item	*lip)
380 {
381 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
382 
383 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
384 
385 	trace_xfs_inode_pin(ip, _RET_IP_);
386 	atomic_inc(&ip->i_pincount);
387 }
388 
389 
390 /*
391  * This is called to unpin the inode associated with the inode log
392  * item which was previously pinned with a call to xfs_inode_item_pin().
393  *
394  * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
395  */
396 STATIC void
397 xfs_inode_item_unpin(
398 	struct xfs_log_item	*lip,
399 	int			remove)
400 {
401 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
402 
403 	trace_xfs_inode_unpin(ip, _RET_IP_);
404 	ASSERT(atomic_read(&ip->i_pincount) > 0);
405 	if (atomic_dec_and_test(&ip->i_pincount))
406 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
407 }
408 
409 STATIC uint
410 xfs_inode_item_push(
411 	struct xfs_log_item	*lip,
412 	struct list_head	*buffer_list)
413 {
414 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
415 	struct xfs_inode	*ip = iip->ili_inode;
416 	struct xfs_buf		*bp = NULL;
417 	uint			rval = XFS_ITEM_SUCCESS;
418 	int			error;
419 
420 	if (xfs_ipincount(ip) > 0)
421 		return XFS_ITEM_PINNED;
422 
423 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
424 		return XFS_ITEM_LOCKED;
425 
426 	/*
427 	 * Re-check the pincount now that we stabilized the value by
428 	 * taking the ilock.
429 	 */
430 	if (xfs_ipincount(ip) > 0) {
431 		rval = XFS_ITEM_PINNED;
432 		goto out_unlock;
433 	}
434 
435 	/*
436 	 * Stale inode items should force out the iclog.
437 	 */
438 	if (ip->i_flags & XFS_ISTALE) {
439 		rval = XFS_ITEM_PINNED;
440 		goto out_unlock;
441 	}
442 
443 	/*
444 	 * Someone else is already flushing the inode.  Nothing we can do
445 	 * here but wait for the flush to finish and remove the item from
446 	 * the AIL.
447 	 */
448 	if (!xfs_iflock_nowait(ip)) {
449 		rval = XFS_ITEM_FLUSHING;
450 		goto out_unlock;
451 	}
452 
453 	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
454 	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
455 
456 	spin_unlock(&lip->li_ailp->xa_lock);
457 
458 	error = xfs_iflush(ip, &bp);
459 	if (!error) {
460 		if (!xfs_buf_delwri_queue(bp, buffer_list))
461 			rval = XFS_ITEM_FLUSHING;
462 		xfs_buf_relse(bp);
463 	}
464 
465 	spin_lock(&lip->li_ailp->xa_lock);
466 out_unlock:
467 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
468 	return rval;
469 }
470 
471 /*
472  * Unlock the inode associated with the inode log item.
473  * Clear the fields of the inode and inode log item that
474  * are specific to the current transaction.  If the
475  * hold flags is set, do not unlock the inode.
476  */
477 STATIC void
478 xfs_inode_item_unlock(
479 	struct xfs_log_item	*lip)
480 {
481 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
482 	struct xfs_inode	*ip = iip->ili_inode;
483 	unsigned short		lock_flags;
484 
485 	ASSERT(ip->i_itemp != NULL);
486 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
487 
488 	lock_flags = iip->ili_lock_flags;
489 	iip->ili_lock_flags = 0;
490 	if (lock_flags)
491 		xfs_iunlock(ip, lock_flags);
492 }
493 
494 /*
495  * This is called to find out where the oldest active copy of the inode log
496  * item in the on disk log resides now that the last log write of it completed
497  * at the given lsn.  Since we always re-log all dirty data in an inode, the
498  * latest copy in the on disk log is the only one that matters.  Therefore,
499  * simply return the given lsn.
500  *
501  * If the inode has been marked stale because the cluster is being freed, we
502  * don't want to (re-)insert this inode into the AIL. There is a race condition
503  * where the cluster buffer may be unpinned before the inode is inserted into
504  * the AIL during transaction committed processing. If the buffer is unpinned
505  * before the inode item has been committed and inserted, then it is possible
506  * for the buffer to be written and IO completes before the inode is inserted
507  * into the AIL. In that case, we'd be inserting a clean, stale inode into the
508  * AIL which will never get removed. It will, however, get reclaimed which
509  * triggers an assert in xfs_inode_free() complaining about freein an inode
510  * still in the AIL.
511  *
512  * To avoid this, just unpin the inode directly and return a LSN of -1 so the
513  * transaction committed code knows that it does not need to do any further
514  * processing on the item.
515  */
516 STATIC xfs_lsn_t
517 xfs_inode_item_committed(
518 	struct xfs_log_item	*lip,
519 	xfs_lsn_t		lsn)
520 {
521 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
522 	struct xfs_inode	*ip = iip->ili_inode;
523 
524 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
525 		xfs_inode_item_unpin(lip, 0);
526 		return -1;
527 	}
528 	return lsn;
529 }
530 
531 /*
532  * XXX rcc - this one really has to do something.  Probably needs
533  * to stamp in a new field in the incore inode.
534  */
535 STATIC void
536 xfs_inode_item_committing(
537 	struct xfs_log_item	*lip,
538 	xfs_lsn_t		lsn)
539 {
540 	INODE_ITEM(lip)->ili_last_lsn = lsn;
541 }
542 
543 /*
544  * This is the ops vector shared by all buf log items.
545  */
546 static const struct xfs_item_ops xfs_inode_item_ops = {
547 	.iop_size	= xfs_inode_item_size,
548 	.iop_format	= xfs_inode_item_format,
549 	.iop_pin	= xfs_inode_item_pin,
550 	.iop_unpin	= xfs_inode_item_unpin,
551 	.iop_unlock	= xfs_inode_item_unlock,
552 	.iop_committed	= xfs_inode_item_committed,
553 	.iop_push	= xfs_inode_item_push,
554 	.iop_committing = xfs_inode_item_committing
555 };
556 
557 
558 /*
559  * Initialize the inode log item for a newly allocated (in-core) inode.
560  */
561 void
562 xfs_inode_item_init(
563 	struct xfs_inode	*ip,
564 	struct xfs_mount	*mp)
565 {
566 	struct xfs_inode_log_item *iip;
567 
568 	ASSERT(ip->i_itemp == NULL);
569 	iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
570 
571 	iip->ili_inode = ip;
572 	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
573 						&xfs_inode_item_ops);
574 }
575 
576 /*
577  * Free the inode log item and any memory hanging off of it.
578  */
579 void
580 xfs_inode_item_destroy(
581 	xfs_inode_t	*ip)
582 {
583 	kmem_zone_free(xfs_ili_zone, ip->i_itemp);
584 }
585 
586 
587 /*
588  * This is the inode flushing I/O completion routine.  It is called
589  * from interrupt level when the buffer containing the inode is
590  * flushed to disk.  It is responsible for removing the inode item
591  * from the AIL if it has not been re-logged, and unlocking the inode's
592  * flush lock.
593  *
594  * To reduce AIL lock traffic as much as possible, we scan the buffer log item
595  * list for other inodes that will run this function. We remove them from the
596  * buffer list so we can process all the inode IO completions in one AIL lock
597  * traversal.
598  */
599 void
600 xfs_iflush_done(
601 	struct xfs_buf		*bp,
602 	struct xfs_log_item	*lip)
603 {
604 	struct xfs_inode_log_item *iip;
605 	struct xfs_log_item	*blip;
606 	struct xfs_log_item	*next;
607 	struct xfs_log_item	*prev;
608 	struct xfs_ail		*ailp = lip->li_ailp;
609 	int			need_ail = 0;
610 
611 	/*
612 	 * Scan the buffer IO completions for other inodes being completed and
613 	 * attach them to the current inode log item.
614 	 */
615 	blip = bp->b_fspriv;
616 	prev = NULL;
617 	while (blip != NULL) {
618 		if (lip->li_cb != xfs_iflush_done) {
619 			prev = blip;
620 			blip = blip->li_bio_list;
621 			continue;
622 		}
623 
624 		/* remove from list */
625 		next = blip->li_bio_list;
626 		if (!prev) {
627 			bp->b_fspriv = next;
628 		} else {
629 			prev->li_bio_list = next;
630 		}
631 
632 		/* add to current list */
633 		blip->li_bio_list = lip->li_bio_list;
634 		lip->li_bio_list = blip;
635 
636 		/*
637 		 * while we have the item, do the unlocked check for needing
638 		 * the AIL lock.
639 		 */
640 		iip = INODE_ITEM(blip);
641 		if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
642 			need_ail++;
643 
644 		blip = next;
645 	}
646 
647 	/* make sure we capture the state of the initial inode. */
648 	iip = INODE_ITEM(lip);
649 	if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
650 		need_ail++;
651 
652 	/*
653 	 * We only want to pull the item from the AIL if it is
654 	 * actually there and its location in the log has not
655 	 * changed since we started the flush.  Thus, we only bother
656 	 * if the ili_logged flag is set and the inode's lsn has not
657 	 * changed.  First we check the lsn outside
658 	 * the lock since it's cheaper, and then we recheck while
659 	 * holding the lock before removing the inode from the AIL.
660 	 */
661 	if (need_ail) {
662 		struct xfs_log_item *log_items[need_ail];
663 		int i = 0;
664 		spin_lock(&ailp->xa_lock);
665 		for (blip = lip; blip; blip = blip->li_bio_list) {
666 			iip = INODE_ITEM(blip);
667 			if (iip->ili_logged &&
668 			    blip->li_lsn == iip->ili_flush_lsn) {
669 				log_items[i++] = blip;
670 			}
671 			ASSERT(i <= need_ail);
672 		}
673 		/* xfs_trans_ail_delete_bulk() drops the AIL lock. */
674 		xfs_trans_ail_delete_bulk(ailp, log_items, i,
675 					  SHUTDOWN_CORRUPT_INCORE);
676 	}
677 
678 
679 	/*
680 	 * clean up and unlock the flush lock now we are done. We can clear the
681 	 * ili_last_fields bits now that we know that the data corresponding to
682 	 * them is safely on disk.
683 	 */
684 	for (blip = lip; blip; blip = next) {
685 		next = blip->li_bio_list;
686 		blip->li_bio_list = NULL;
687 
688 		iip = INODE_ITEM(blip);
689 		iip->ili_logged = 0;
690 		iip->ili_last_fields = 0;
691 		xfs_ifunlock(iip->ili_inode);
692 	}
693 }
694 
695 /*
696  * This is the inode flushing abort routine.  It is called from xfs_iflush when
697  * the filesystem is shutting down to clean up the inode state.  It is
698  * responsible for removing the inode item from the AIL if it has not been
699  * re-logged, and unlocking the inode's flush lock.
700  */
701 void
702 xfs_iflush_abort(
703 	xfs_inode_t		*ip,
704 	bool			stale)
705 {
706 	xfs_inode_log_item_t	*iip = ip->i_itemp;
707 
708 	if (iip) {
709 		struct xfs_ail	*ailp = iip->ili_item.li_ailp;
710 		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
711 			spin_lock(&ailp->xa_lock);
712 			if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
713 				/* xfs_trans_ail_delete() drops the AIL lock. */
714 				xfs_trans_ail_delete(ailp, &iip->ili_item,
715 						stale ?
716 						     SHUTDOWN_LOG_IO_ERROR :
717 						     SHUTDOWN_CORRUPT_INCORE);
718 			} else
719 				spin_unlock(&ailp->xa_lock);
720 		}
721 		iip->ili_logged = 0;
722 		/*
723 		 * Clear the ili_last_fields bits now that we know that the
724 		 * data corresponding to them is safely on disk.
725 		 */
726 		iip->ili_last_fields = 0;
727 		/*
728 		 * Clear the inode logging fields so no more flushes are
729 		 * attempted.
730 		 */
731 		iip->ili_fields = 0;
732 	}
733 	/*
734 	 * Release the inode's flush lock since we're done with it.
735 	 */
736 	xfs_ifunlock(ip);
737 }
738 
739 void
740 xfs_istale_done(
741 	struct xfs_buf		*bp,
742 	struct xfs_log_item	*lip)
743 {
744 	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
745 }
746 
747 /*
748  * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
749  * (which can have different field alignments) to the native version
750  */
751 int
752 xfs_inode_item_format_convert(
753 	xfs_log_iovec_t		*buf,
754 	xfs_inode_log_format_t	*in_f)
755 {
756 	if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
757 		xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
758 
759 		in_f->ilf_type = in_f32->ilf_type;
760 		in_f->ilf_size = in_f32->ilf_size;
761 		in_f->ilf_fields = in_f32->ilf_fields;
762 		in_f->ilf_asize = in_f32->ilf_asize;
763 		in_f->ilf_dsize = in_f32->ilf_dsize;
764 		in_f->ilf_ino = in_f32->ilf_ino;
765 		/* copy biggest field of ilf_u */
766 		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
767 		       in_f32->ilf_u.ilfu_uuid.__u_bits,
768 		       sizeof(uuid_t));
769 		in_f->ilf_blkno = in_f32->ilf_blkno;
770 		in_f->ilf_len = in_f32->ilf_len;
771 		in_f->ilf_boffset = in_f32->ilf_boffset;
772 		return 0;
773 	} else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
774 		xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
775 
776 		in_f->ilf_type = in_f64->ilf_type;
777 		in_f->ilf_size = in_f64->ilf_size;
778 		in_f->ilf_fields = in_f64->ilf_fields;
779 		in_f->ilf_asize = in_f64->ilf_asize;
780 		in_f->ilf_dsize = in_f64->ilf_dsize;
781 		in_f->ilf_ino = in_f64->ilf_ino;
782 		/* copy biggest field of ilf_u */
783 		memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
784 		       in_f64->ilf_u.ilfu_uuid.__u_bits,
785 		       sizeof(uuid_t));
786 		in_f->ilf_blkno = in_f64->ilf_blkno;
787 		in_f->ilf_len = in_f64->ilf_len;
788 		in_f->ilf_boffset = in_f64->ilf_boffset;
789 		return 0;
790 	}
791 	return EFSCORRUPTED;
792 }
793