xref: /openbmc/linux/fs/xfs/xfs_inode_item.c (revision d2ba09c1)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_trans_resv.h"
23 #include "xfs_mount.h"
24 #include "xfs_inode.h"
25 #include "xfs_trans.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_error.h"
28 #include "xfs_trace.h"
29 #include "xfs_trans_priv.h"
30 #include "xfs_buf_item.h"
31 #include "xfs_log.h"
32 
33 #include <linux/iversion.h>
34 
35 kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */
36 
37 static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
38 {
39 	return container_of(lip, struct xfs_inode_log_item, ili_item);
40 }
41 
42 STATIC void
43 xfs_inode_item_data_fork_size(
44 	struct xfs_inode_log_item *iip,
45 	int			*nvecs,
46 	int			*nbytes)
47 {
48 	struct xfs_inode	*ip = iip->ili_inode;
49 
50 	switch (ip->i_d.di_format) {
51 	case XFS_DINODE_FMT_EXTENTS:
52 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
53 		    ip->i_d.di_nextents > 0 &&
54 		    ip->i_df.if_bytes > 0) {
55 			/* worst case, doesn't subtract delalloc extents */
56 			*nbytes += XFS_IFORK_DSIZE(ip);
57 			*nvecs += 1;
58 		}
59 		break;
60 	case XFS_DINODE_FMT_BTREE:
61 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
62 		    ip->i_df.if_broot_bytes > 0) {
63 			*nbytes += ip->i_df.if_broot_bytes;
64 			*nvecs += 1;
65 		}
66 		break;
67 	case XFS_DINODE_FMT_LOCAL:
68 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
69 		    ip->i_df.if_bytes > 0) {
70 			*nbytes += roundup(ip->i_df.if_bytes, 4);
71 			*nvecs += 1;
72 		}
73 		break;
74 
75 	case XFS_DINODE_FMT_DEV:
76 		break;
77 	default:
78 		ASSERT(0);
79 		break;
80 	}
81 }
82 
83 STATIC void
84 xfs_inode_item_attr_fork_size(
85 	struct xfs_inode_log_item *iip,
86 	int			*nvecs,
87 	int			*nbytes)
88 {
89 	struct xfs_inode	*ip = iip->ili_inode;
90 
91 	switch (ip->i_d.di_aformat) {
92 	case XFS_DINODE_FMT_EXTENTS:
93 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
94 		    ip->i_d.di_anextents > 0 &&
95 		    ip->i_afp->if_bytes > 0) {
96 			/* worst case, doesn't subtract unused space */
97 			*nbytes += XFS_IFORK_ASIZE(ip);
98 			*nvecs += 1;
99 		}
100 		break;
101 	case XFS_DINODE_FMT_BTREE:
102 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
103 		    ip->i_afp->if_broot_bytes > 0) {
104 			*nbytes += ip->i_afp->if_broot_bytes;
105 			*nvecs += 1;
106 		}
107 		break;
108 	case XFS_DINODE_FMT_LOCAL:
109 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
110 		    ip->i_afp->if_bytes > 0) {
111 			*nbytes += roundup(ip->i_afp->if_bytes, 4);
112 			*nvecs += 1;
113 		}
114 		break;
115 	default:
116 		ASSERT(0);
117 		break;
118 	}
119 }
120 
121 /*
122  * This returns the number of iovecs needed to log the given inode item.
123  *
124  * We need one iovec for the inode log format structure, one for the
125  * inode core, and possibly one for the inode data/extents/b-tree root
126  * and one for the inode attribute data/extents/b-tree root.
127  */
128 STATIC void
129 xfs_inode_item_size(
130 	struct xfs_log_item	*lip,
131 	int			*nvecs,
132 	int			*nbytes)
133 {
134 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
135 	struct xfs_inode	*ip = iip->ili_inode;
136 
137 	*nvecs += 2;
138 	*nbytes += sizeof(struct xfs_inode_log_format) +
139 		   xfs_log_dinode_size(ip->i_d.di_version);
140 
141 	xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
142 	if (XFS_IFORK_Q(ip))
143 		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
144 }
145 
146 STATIC void
147 xfs_inode_item_format_data_fork(
148 	struct xfs_inode_log_item *iip,
149 	struct xfs_inode_log_format *ilf,
150 	struct xfs_log_vec	*lv,
151 	struct xfs_log_iovec	**vecp)
152 {
153 	struct xfs_inode	*ip = iip->ili_inode;
154 	size_t			data_bytes;
155 
156 	switch (ip->i_d.di_format) {
157 	case XFS_DINODE_FMT_EXTENTS:
158 		iip->ili_fields &=
159 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
160 
161 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
162 		    ip->i_d.di_nextents > 0 &&
163 		    ip->i_df.if_bytes > 0) {
164 			struct xfs_bmbt_rec *p;
165 
166 			ASSERT(xfs_iext_count(&ip->i_df) > 0);
167 
168 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
169 			data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
170 			xlog_finish_iovec(lv, *vecp, data_bytes);
171 
172 			ASSERT(data_bytes <= ip->i_df.if_bytes);
173 
174 			ilf->ilf_dsize = data_bytes;
175 			ilf->ilf_size++;
176 		} else {
177 			iip->ili_fields &= ~XFS_ILOG_DEXT;
178 		}
179 		break;
180 	case XFS_DINODE_FMT_BTREE:
181 		iip->ili_fields &=
182 			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
183 
184 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
185 		    ip->i_df.if_broot_bytes > 0) {
186 			ASSERT(ip->i_df.if_broot != NULL);
187 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
188 					ip->i_df.if_broot,
189 					ip->i_df.if_broot_bytes);
190 			ilf->ilf_dsize = ip->i_df.if_broot_bytes;
191 			ilf->ilf_size++;
192 		} else {
193 			ASSERT(!(iip->ili_fields &
194 				 XFS_ILOG_DBROOT));
195 			iip->ili_fields &= ~XFS_ILOG_DBROOT;
196 		}
197 		break;
198 	case XFS_DINODE_FMT_LOCAL:
199 		iip->ili_fields &=
200 			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
201 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
202 		    ip->i_df.if_bytes > 0) {
203 			/*
204 			 * Round i_bytes up to a word boundary.
205 			 * The underlying memory is guaranteed to
206 			 * to be there by xfs_idata_realloc().
207 			 */
208 			data_bytes = roundup(ip->i_df.if_bytes, 4);
209 			ASSERT(ip->i_df.if_real_bytes == 0 ||
210 			       ip->i_df.if_real_bytes >= data_bytes);
211 			ASSERT(ip->i_df.if_u1.if_data != NULL);
212 			ASSERT(ip->i_d.di_size > 0);
213 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
214 					ip->i_df.if_u1.if_data, data_bytes);
215 			ilf->ilf_dsize = (unsigned)data_bytes;
216 			ilf->ilf_size++;
217 		} else {
218 			iip->ili_fields &= ~XFS_ILOG_DDATA;
219 		}
220 		break;
221 	case XFS_DINODE_FMT_DEV:
222 		iip->ili_fields &=
223 			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
224 		if (iip->ili_fields & XFS_ILOG_DEV)
225 			ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
226 		break;
227 	default:
228 		ASSERT(0);
229 		break;
230 	}
231 }
232 
233 STATIC void
234 xfs_inode_item_format_attr_fork(
235 	struct xfs_inode_log_item *iip,
236 	struct xfs_inode_log_format *ilf,
237 	struct xfs_log_vec	*lv,
238 	struct xfs_log_iovec	**vecp)
239 {
240 	struct xfs_inode	*ip = iip->ili_inode;
241 	size_t			data_bytes;
242 
243 	switch (ip->i_d.di_aformat) {
244 	case XFS_DINODE_FMT_EXTENTS:
245 		iip->ili_fields &=
246 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
247 
248 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
249 		    ip->i_d.di_anextents > 0 &&
250 		    ip->i_afp->if_bytes > 0) {
251 			struct xfs_bmbt_rec *p;
252 
253 			ASSERT(xfs_iext_count(ip->i_afp) ==
254 				ip->i_d.di_anextents);
255 
256 			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
257 			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
258 			xlog_finish_iovec(lv, *vecp, data_bytes);
259 
260 			ilf->ilf_asize = data_bytes;
261 			ilf->ilf_size++;
262 		} else {
263 			iip->ili_fields &= ~XFS_ILOG_AEXT;
264 		}
265 		break;
266 	case XFS_DINODE_FMT_BTREE:
267 		iip->ili_fields &=
268 			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
269 
270 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
271 		    ip->i_afp->if_broot_bytes > 0) {
272 			ASSERT(ip->i_afp->if_broot != NULL);
273 
274 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
275 					ip->i_afp->if_broot,
276 					ip->i_afp->if_broot_bytes);
277 			ilf->ilf_asize = ip->i_afp->if_broot_bytes;
278 			ilf->ilf_size++;
279 		} else {
280 			iip->ili_fields &= ~XFS_ILOG_ABROOT;
281 		}
282 		break;
283 	case XFS_DINODE_FMT_LOCAL:
284 		iip->ili_fields &=
285 			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
286 
287 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
288 		    ip->i_afp->if_bytes > 0) {
289 			/*
290 			 * Round i_bytes up to a word boundary.
291 			 * The underlying memory is guaranteed to
292 			 * to be there by xfs_idata_realloc().
293 			 */
294 			data_bytes = roundup(ip->i_afp->if_bytes, 4);
295 			ASSERT(ip->i_afp->if_real_bytes == 0 ||
296 			       ip->i_afp->if_real_bytes >= data_bytes);
297 			ASSERT(ip->i_afp->if_u1.if_data != NULL);
298 			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
299 					ip->i_afp->if_u1.if_data,
300 					data_bytes);
301 			ilf->ilf_asize = (unsigned)data_bytes;
302 			ilf->ilf_size++;
303 		} else {
304 			iip->ili_fields &= ~XFS_ILOG_ADATA;
305 		}
306 		break;
307 	default:
308 		ASSERT(0);
309 		break;
310 	}
311 }
312 
313 static void
314 xfs_inode_to_log_dinode(
315 	struct xfs_inode	*ip,
316 	struct xfs_log_dinode	*to,
317 	xfs_lsn_t		lsn)
318 {
319 	struct xfs_icdinode	*from = &ip->i_d;
320 	struct inode		*inode = VFS_I(ip);
321 
322 	to->di_magic = XFS_DINODE_MAGIC;
323 
324 	to->di_version = from->di_version;
325 	to->di_format = from->di_format;
326 	to->di_uid = from->di_uid;
327 	to->di_gid = from->di_gid;
328 	to->di_projid_lo = from->di_projid_lo;
329 	to->di_projid_hi = from->di_projid_hi;
330 
331 	memset(to->di_pad, 0, sizeof(to->di_pad));
332 	memset(to->di_pad3, 0, sizeof(to->di_pad3));
333 	to->di_atime.t_sec = inode->i_atime.tv_sec;
334 	to->di_atime.t_nsec = inode->i_atime.tv_nsec;
335 	to->di_mtime.t_sec = inode->i_mtime.tv_sec;
336 	to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
337 	to->di_ctime.t_sec = inode->i_ctime.tv_sec;
338 	to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
339 	to->di_nlink = inode->i_nlink;
340 	to->di_gen = inode->i_generation;
341 	to->di_mode = inode->i_mode;
342 
343 	to->di_size = from->di_size;
344 	to->di_nblocks = from->di_nblocks;
345 	to->di_extsize = from->di_extsize;
346 	to->di_nextents = from->di_nextents;
347 	to->di_anextents = from->di_anextents;
348 	to->di_forkoff = from->di_forkoff;
349 	to->di_aformat = from->di_aformat;
350 	to->di_dmevmask = from->di_dmevmask;
351 	to->di_dmstate = from->di_dmstate;
352 	to->di_flags = from->di_flags;
353 
354 	/* log a dummy value to ensure log structure is fully initialised */
355 	to->di_next_unlinked = NULLAGINO;
356 
357 	if (from->di_version == 3) {
358 		to->di_changecount = inode_peek_iversion(inode);
359 		to->di_crtime.t_sec = from->di_crtime.t_sec;
360 		to->di_crtime.t_nsec = from->di_crtime.t_nsec;
361 		to->di_flags2 = from->di_flags2;
362 		to->di_cowextsize = from->di_cowextsize;
363 		to->di_ino = ip->i_ino;
364 		to->di_lsn = lsn;
365 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
366 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
367 		to->di_flushiter = 0;
368 	} else {
369 		to->di_flushiter = from->di_flushiter;
370 	}
371 }
372 
373 /*
374  * Format the inode core. Current timestamp data is only in the VFS inode
375  * fields, so we need to grab them from there. Hence rather than just copying
376  * the XFS inode core structure, format the fields directly into the iovec.
377  */
378 static void
379 xfs_inode_item_format_core(
380 	struct xfs_inode	*ip,
381 	struct xfs_log_vec	*lv,
382 	struct xfs_log_iovec	**vecp)
383 {
384 	struct xfs_log_dinode	*dic;
385 
386 	dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
387 	xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
388 	xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
389 }
390 
391 /*
392  * This is called to fill in the vector of log iovecs for the given inode
393  * log item.  It fills the first item with an inode log format structure,
394  * the second with the on-disk inode structure, and a possible third and/or
395  * fourth with the inode data/extents/b-tree root and inode attributes
396  * data/extents/b-tree root.
397  *
398  * Note: Always use the 64 bit inode log format structure so we don't
399  * leave an uninitialised hole in the format item on 64 bit systems. Log
400  * recovery on 32 bit systems handles this just fine, so there's no reason
401  * for not using an initialising the properly padded structure all the time.
402  */
403 STATIC void
404 xfs_inode_item_format(
405 	struct xfs_log_item	*lip,
406 	struct xfs_log_vec	*lv)
407 {
408 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
409 	struct xfs_inode	*ip = iip->ili_inode;
410 	struct xfs_log_iovec	*vecp = NULL;
411 	struct xfs_inode_log_format *ilf;
412 
413 	ASSERT(ip->i_d.di_version > 1);
414 
415 	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
416 	ilf->ilf_type = XFS_LI_INODE;
417 	ilf->ilf_ino = ip->i_ino;
418 	ilf->ilf_blkno = ip->i_imap.im_blkno;
419 	ilf->ilf_len = ip->i_imap.im_len;
420 	ilf->ilf_boffset = ip->i_imap.im_boffset;
421 	ilf->ilf_fields = XFS_ILOG_CORE;
422 	ilf->ilf_size = 2; /* format + core */
423 
424 	/*
425 	 * make sure we don't leak uninitialised data into the log in the case
426 	 * when we don't log every field in the inode.
427 	 */
428 	ilf->ilf_dsize = 0;
429 	ilf->ilf_asize = 0;
430 	ilf->ilf_pad = 0;
431 	memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
432 
433 	xlog_finish_iovec(lv, vecp, sizeof(*ilf));
434 
435 	xfs_inode_item_format_core(ip, lv, &vecp);
436 	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
437 	if (XFS_IFORK_Q(ip)) {
438 		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
439 	} else {
440 		iip->ili_fields &=
441 			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
442 	}
443 
444 	/* update the format with the exact fields we actually logged */
445 	ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
446 }
447 
448 /*
449  * This is called to pin the inode associated with the inode log
450  * item in memory so it cannot be written out.
451  */
452 STATIC void
453 xfs_inode_item_pin(
454 	struct xfs_log_item	*lip)
455 {
456 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
457 
458 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
459 
460 	trace_xfs_inode_pin(ip, _RET_IP_);
461 	atomic_inc(&ip->i_pincount);
462 }
463 
464 
465 /*
466  * This is called to unpin the inode associated with the inode log
467  * item which was previously pinned with a call to xfs_inode_item_pin().
468  *
469  * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
470  */
471 STATIC void
472 xfs_inode_item_unpin(
473 	struct xfs_log_item	*lip,
474 	int			remove)
475 {
476 	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
477 
478 	trace_xfs_inode_unpin(ip, _RET_IP_);
479 	ASSERT(atomic_read(&ip->i_pincount) > 0);
480 	if (atomic_dec_and_test(&ip->i_pincount))
481 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
482 }
483 
484 /*
485  * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
486  * have been failed during writeback
487  *
488  * This informs the AIL that the inode is already flush locked on the next push,
489  * and acquires a hold on the buffer to ensure that it isn't reclaimed before
490  * dirty data makes it to disk.
491  */
492 STATIC void
493 xfs_inode_item_error(
494 	struct xfs_log_item	*lip,
495 	struct xfs_buf		*bp)
496 {
497 	ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
498 	xfs_set_li_failed(lip, bp);
499 }
500 
501 STATIC uint
502 xfs_inode_item_push(
503 	struct xfs_log_item	*lip,
504 	struct list_head	*buffer_list)
505 		__releases(&lip->li_ailp->ail_lock)
506 		__acquires(&lip->li_ailp->ail_lock)
507 {
508 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
509 	struct xfs_inode	*ip = iip->ili_inode;
510 	struct xfs_buf		*bp = lip->li_buf;
511 	uint			rval = XFS_ITEM_SUCCESS;
512 	int			error;
513 
514 	if (xfs_ipincount(ip) > 0)
515 		return XFS_ITEM_PINNED;
516 
517 	/*
518 	 * The buffer containing this item failed to be written back
519 	 * previously. Resubmit the buffer for IO.
520 	 */
521 	if (lip->li_flags & XFS_LI_FAILED) {
522 		if (!xfs_buf_trylock(bp))
523 			return XFS_ITEM_LOCKED;
524 
525 		if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
526 			rval = XFS_ITEM_FLUSHING;
527 
528 		xfs_buf_unlock(bp);
529 		return rval;
530 	}
531 
532 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
533 		return XFS_ITEM_LOCKED;
534 
535 	/*
536 	 * Re-check the pincount now that we stabilized the value by
537 	 * taking the ilock.
538 	 */
539 	if (xfs_ipincount(ip) > 0) {
540 		rval = XFS_ITEM_PINNED;
541 		goto out_unlock;
542 	}
543 
544 	/*
545 	 * Stale inode items should force out the iclog.
546 	 */
547 	if (ip->i_flags & XFS_ISTALE) {
548 		rval = XFS_ITEM_PINNED;
549 		goto out_unlock;
550 	}
551 
552 	/*
553 	 * Someone else is already flushing the inode.  Nothing we can do
554 	 * here but wait for the flush to finish and remove the item from
555 	 * the AIL.
556 	 */
557 	if (!xfs_iflock_nowait(ip)) {
558 		rval = XFS_ITEM_FLUSHING;
559 		goto out_unlock;
560 	}
561 
562 	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
563 	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
564 
565 	spin_unlock(&lip->li_ailp->ail_lock);
566 
567 	error = xfs_iflush(ip, &bp);
568 	if (!error) {
569 		if (!xfs_buf_delwri_queue(bp, buffer_list))
570 			rval = XFS_ITEM_FLUSHING;
571 		xfs_buf_relse(bp);
572 	}
573 
574 	spin_lock(&lip->li_ailp->ail_lock);
575 out_unlock:
576 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
577 	return rval;
578 }
579 
580 /*
581  * Unlock the inode associated with the inode log item.
582  */
583 STATIC void
584 xfs_inode_item_unlock(
585 	struct xfs_log_item	*lip)
586 {
587 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
588 	struct xfs_inode	*ip = iip->ili_inode;
589 	unsigned short		lock_flags;
590 
591 	ASSERT(ip->i_itemp != NULL);
592 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
593 
594 	lock_flags = iip->ili_lock_flags;
595 	iip->ili_lock_flags = 0;
596 	if (lock_flags)
597 		xfs_iunlock(ip, lock_flags);
598 }
599 
600 /*
601  * This is called to find out where the oldest active copy of the inode log
602  * item in the on disk log resides now that the last log write of it completed
603  * at the given lsn.  Since we always re-log all dirty data in an inode, the
604  * latest copy in the on disk log is the only one that matters.  Therefore,
605  * simply return the given lsn.
606  *
607  * If the inode has been marked stale because the cluster is being freed, we
608  * don't want to (re-)insert this inode into the AIL. There is a race condition
609  * where the cluster buffer may be unpinned before the inode is inserted into
610  * the AIL during transaction committed processing. If the buffer is unpinned
611  * before the inode item has been committed and inserted, then it is possible
612  * for the buffer to be written and IO completes before the inode is inserted
613  * into the AIL. In that case, we'd be inserting a clean, stale inode into the
614  * AIL which will never get removed. It will, however, get reclaimed which
615  * triggers an assert in xfs_inode_free() complaining about freein an inode
616  * still in the AIL.
617  *
618  * To avoid this, just unpin the inode directly and return a LSN of -1 so the
619  * transaction committed code knows that it does not need to do any further
620  * processing on the item.
621  */
622 STATIC xfs_lsn_t
623 xfs_inode_item_committed(
624 	struct xfs_log_item	*lip,
625 	xfs_lsn_t		lsn)
626 {
627 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
628 	struct xfs_inode	*ip = iip->ili_inode;
629 
630 	if (xfs_iflags_test(ip, XFS_ISTALE)) {
631 		xfs_inode_item_unpin(lip, 0);
632 		return -1;
633 	}
634 	return lsn;
635 }
636 
637 STATIC void
638 xfs_inode_item_committing(
639 	struct xfs_log_item	*lip,
640 	xfs_lsn_t		lsn)
641 {
642 	INODE_ITEM(lip)->ili_last_lsn = lsn;
643 }
644 
645 /*
646  * This is the ops vector shared by all buf log items.
647  */
648 static const struct xfs_item_ops xfs_inode_item_ops = {
649 	.iop_size	= xfs_inode_item_size,
650 	.iop_format	= xfs_inode_item_format,
651 	.iop_pin	= xfs_inode_item_pin,
652 	.iop_unpin	= xfs_inode_item_unpin,
653 	.iop_unlock	= xfs_inode_item_unlock,
654 	.iop_committed	= xfs_inode_item_committed,
655 	.iop_push	= xfs_inode_item_push,
656 	.iop_committing = xfs_inode_item_committing,
657 	.iop_error	= xfs_inode_item_error
658 };
659 
660 
661 /*
662  * Initialize the inode log item for a newly allocated (in-core) inode.
663  */
664 void
665 xfs_inode_item_init(
666 	struct xfs_inode	*ip,
667 	struct xfs_mount	*mp)
668 {
669 	struct xfs_inode_log_item *iip;
670 
671 	ASSERT(ip->i_itemp == NULL);
672 	iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
673 
674 	iip->ili_inode = ip;
675 	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
676 						&xfs_inode_item_ops);
677 }
678 
679 /*
680  * Free the inode log item and any memory hanging off of it.
681  */
682 void
683 xfs_inode_item_destroy(
684 	xfs_inode_t	*ip)
685 {
686 	kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
687 	kmem_zone_free(xfs_ili_zone, ip->i_itemp);
688 }
689 
690 
691 /*
692  * This is the inode flushing I/O completion routine.  It is called
693  * from interrupt level when the buffer containing the inode is
694  * flushed to disk.  It is responsible for removing the inode item
695  * from the AIL if it has not been re-logged, and unlocking the inode's
696  * flush lock.
697  *
698  * To reduce AIL lock traffic as much as possible, we scan the buffer log item
699  * list for other inodes that will run this function. We remove them from the
700  * buffer list so we can process all the inode IO completions in one AIL lock
701  * traversal.
702  */
703 void
704 xfs_iflush_done(
705 	struct xfs_buf		*bp,
706 	struct xfs_log_item	*lip)
707 {
708 	struct xfs_inode_log_item *iip;
709 	struct xfs_log_item	*blip, *n;
710 	struct xfs_ail		*ailp = lip->li_ailp;
711 	int			need_ail = 0;
712 	LIST_HEAD(tmp);
713 
714 	/*
715 	 * Scan the buffer IO completions for other inodes being completed and
716 	 * attach them to the current inode log item.
717 	 */
718 
719 	list_add_tail(&lip->li_bio_list, &tmp);
720 
721 	list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
722 		if (lip->li_cb != xfs_iflush_done)
723 			continue;
724 
725 		list_move_tail(&blip->li_bio_list, &tmp);
726 		/*
727 		 * while we have the item, do the unlocked check for needing
728 		 * the AIL lock.
729 		 */
730 		iip = INODE_ITEM(blip);
731 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
732 		    (blip->li_flags & XFS_LI_FAILED))
733 			need_ail++;
734 	}
735 
736 	/* make sure we capture the state of the initial inode. */
737 	iip = INODE_ITEM(lip);
738 	if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
739 	    lip->li_flags & XFS_LI_FAILED)
740 		need_ail++;
741 
742 	/*
743 	 * We only want to pull the item from the AIL if it is
744 	 * actually there and its location in the log has not
745 	 * changed since we started the flush.  Thus, we only bother
746 	 * if the ili_logged flag is set and the inode's lsn has not
747 	 * changed.  First we check the lsn outside
748 	 * the lock since it's cheaper, and then we recheck while
749 	 * holding the lock before removing the inode from the AIL.
750 	 */
751 	if (need_ail) {
752 		bool			mlip_changed = false;
753 
754 		/* this is an opencoded batch version of xfs_trans_ail_delete */
755 		spin_lock(&ailp->ail_lock);
756 		list_for_each_entry(blip, &tmp, li_bio_list) {
757 			if (INODE_ITEM(blip)->ili_logged &&
758 			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
759 				mlip_changed |= xfs_ail_delete_one(ailp, blip);
760 			else {
761 				xfs_clear_li_failed(blip);
762 			}
763 		}
764 
765 		if (mlip_changed) {
766 			if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
767 				xlog_assign_tail_lsn_locked(ailp->ail_mount);
768 			if (list_empty(&ailp->ail_head))
769 				wake_up_all(&ailp->ail_empty);
770 		}
771 		spin_unlock(&ailp->ail_lock);
772 
773 		if (mlip_changed)
774 			xfs_log_space_wake(ailp->ail_mount);
775 	}
776 
777 	/*
778 	 * clean up and unlock the flush lock now we are done. We can clear the
779 	 * ili_last_fields bits now that we know that the data corresponding to
780 	 * them is safely on disk.
781 	 */
782 	list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
783 		list_del_init(&blip->li_bio_list);
784 		iip = INODE_ITEM(blip);
785 		iip->ili_logged = 0;
786 		iip->ili_last_fields = 0;
787 		xfs_ifunlock(iip->ili_inode);
788 	}
789 	list_del(&tmp);
790 }
791 
792 /*
793  * This is the inode flushing abort routine.  It is called from xfs_iflush when
794  * the filesystem is shutting down to clean up the inode state.  It is
795  * responsible for removing the inode item from the AIL if it has not been
796  * re-logged, and unlocking the inode's flush lock.
797  */
798 void
799 xfs_iflush_abort(
800 	xfs_inode_t		*ip,
801 	bool			stale)
802 {
803 	xfs_inode_log_item_t	*iip = ip->i_itemp;
804 
805 	if (iip) {
806 		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
807 			xfs_trans_ail_remove(&iip->ili_item,
808 					     stale ? SHUTDOWN_LOG_IO_ERROR :
809 						     SHUTDOWN_CORRUPT_INCORE);
810 		}
811 		iip->ili_logged = 0;
812 		/*
813 		 * Clear the ili_last_fields bits now that we know that the
814 		 * data corresponding to them is safely on disk.
815 		 */
816 		iip->ili_last_fields = 0;
817 		/*
818 		 * Clear the inode logging fields so no more flushes are
819 		 * attempted.
820 		 */
821 		iip->ili_fields = 0;
822 		iip->ili_fsync_fields = 0;
823 	}
824 	/*
825 	 * Release the inode's flush lock since we're done with it.
826 	 */
827 	xfs_ifunlock(ip);
828 }
829 
830 void
831 xfs_istale_done(
832 	struct xfs_buf		*bp,
833 	struct xfs_log_item	*lip)
834 {
835 	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
836 }
837 
838 /*
839  * convert an xfs_inode_log_format struct from the old 32 bit version
840  * (which can have different field alignments) to the native 64 bit version
841  */
842 int
843 xfs_inode_item_format_convert(
844 	struct xfs_log_iovec		*buf,
845 	struct xfs_inode_log_format	*in_f)
846 {
847 	struct xfs_inode_log_format_32	*in_f32 = buf->i_addr;
848 
849 	if (buf->i_len != sizeof(*in_f32))
850 		return -EFSCORRUPTED;
851 
852 	in_f->ilf_type = in_f32->ilf_type;
853 	in_f->ilf_size = in_f32->ilf_size;
854 	in_f->ilf_fields = in_f32->ilf_fields;
855 	in_f->ilf_asize = in_f32->ilf_asize;
856 	in_f->ilf_dsize = in_f32->ilf_dsize;
857 	in_f->ilf_ino = in_f32->ilf_ino;
858 	memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
859 	in_f->ilf_blkno = in_f32->ilf_blkno;
860 	in_f->ilf_len = in_f32->ilf_len;
861 	in_f->ilf_boffset = in_f32->ilf_boffset;
862 	return 0;
863 }
864