xref: /openbmc/linux/fs/xfs/libxfs/xfs_trans_inode.c (revision a5d46d9a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000,2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_trans_priv.h"
16 #include "xfs_inode_item.h"
17 
18 #include <linux/iversion.h>
19 
20 /*
21  * Add a locked inode to the transaction.
22  *
23  * The inode must be locked, and it cannot be associated with any transaction.
24  * If lock_flags is non-zero the inode will be unlocked on transaction commit.
25  */
26 void
27 xfs_trans_ijoin(
28 	struct xfs_trans	*tp,
29 	struct xfs_inode	*ip,
30 	uint			lock_flags)
31 {
32 	struct xfs_inode_log_item *iip;
33 
34 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
35 	if (ip->i_itemp == NULL)
36 		xfs_inode_item_init(ip, ip->i_mount);
37 	iip = ip->i_itemp;
38 
39 	ASSERT(iip->ili_lock_flags == 0);
40 	iip->ili_lock_flags = lock_flags;
41 	ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
42 
43 	/*
44 	 * Get a log_item_desc to point at the new item.
45 	 */
46 	xfs_trans_add_item(tp, &iip->ili_item);
47 }
48 
49 /*
50  * Transactional inode timestamp update. Requires the inode to be locked and
51  * joined to the transaction supplied. Relies on the transaction subsystem to
52  * track dirty state and update/writeback the inode accordingly.
53  */
54 void
55 xfs_trans_ichgtime(
56 	struct xfs_trans	*tp,
57 	struct xfs_inode	*ip,
58 	int			flags)
59 {
60 	struct inode		*inode = VFS_I(ip);
61 	struct timespec64	tv;
62 
63 	ASSERT(tp);
64 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
65 
66 	tv = current_time(inode);
67 
68 	if (flags & XFS_ICHGTIME_MOD)
69 		inode->i_mtime = tv;
70 	if (flags & XFS_ICHGTIME_CHG)
71 		inode->i_ctime = tv;
72 	if (flags & XFS_ICHGTIME_CREATE)
73 		ip->i_crtime = tv;
74 }
75 
76 /*
77  * This is called to mark the fields indicated in fieldmask as needing to be
78  * logged when the transaction is committed.  The inode must already be
79  * associated with the given transaction.
80  *
81  * The values for fieldmask are defined in xfs_inode_item.h.  We always log all
82  * of the core inode if any of it has changed, and we always log all of the
83  * inline data/extents/b-tree root if any of them has changed.
84  *
85  * Grab and pin the cluster buffer associated with this inode to avoid RMW
86  * cycles at inode writeback time. Avoid the need to add error handling to every
87  * xfs_trans_log_inode() call by shutting down on read error.  This will cause
88  * transactions to fail and everything to error out, just like if we return a
89  * read error in a dirty transaction and cancel it.
90  */
91 void
92 xfs_trans_log_inode(
93 	struct xfs_trans	*tp,
94 	struct xfs_inode	*ip,
95 	uint			flags)
96 {
97 	struct xfs_inode_log_item *iip = ip->i_itemp;
98 	struct inode		*inode = VFS_I(ip);
99 	uint			iversion_flags = 0;
100 
101 	ASSERT(iip);
102 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
103 	ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
104 
105 	tp->t_flags |= XFS_TRANS_DIRTY;
106 
107 	/*
108 	 * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
109 	 * don't matter - we either will need an extra transaction in 24 hours
110 	 * to log the timestamps, or will clear already cleared fields in the
111 	 * worst case.
112 	 */
113 	if (inode->i_state & I_DIRTY_TIME) {
114 		spin_lock(&inode->i_lock);
115 		inode->i_state &= ~I_DIRTY_TIME;
116 		spin_unlock(&inode->i_lock);
117 	}
118 
119 	/*
120 	 * First time we log the inode in a transaction, bump the inode change
121 	 * counter if it is configured for this to occur. While we have the
122 	 * inode locked exclusively for metadata modification, we can usually
123 	 * avoid setting XFS_ILOG_CORE if no one has queried the value since
124 	 * the last time it was incremented. If we have XFS_ILOG_CORE already
125 	 * set however, then go ahead and bump the i_version counter
126 	 * unconditionally.
127 	 */
128 	if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
129 		if (IS_I_VERSION(inode) &&
130 		    inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
131 			iversion_flags = XFS_ILOG_CORE;
132 	}
133 
134 	/*
135 	 * If we're updating the inode core or the timestamps and it's possible
136 	 * to upgrade this inode to bigtime format, do so now.
137 	 */
138 	if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
139 	    xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) &&
140 	    !xfs_inode_has_bigtime(ip)) {
141 		ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
142 		flags |= XFS_ILOG_CORE;
143 	}
144 
145 	/*
146 	 * Inode verifiers on older kernels don't check that the extent size
147 	 * hint is an integer multiple of the rt extent size on a directory
148 	 * with both rtinherit and extszinherit flags set.  If we're logging a
149 	 * directory that is misconfigured in this way, clear the hint.
150 	 */
151 	if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
152 	    (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
153 	    (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
154 		xfs_info_once(ip->i_mount,
155 	"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
156 		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
157 				   XFS_DIFLAG_EXTSZINHERIT);
158 		ip->i_extsize = 0;
159 		flags |= XFS_ILOG_CORE;
160 	}
161 
162 	/*
163 	 * Record the specific change for fdatasync optimisation. This allows
164 	 * fdatasync to skip log forces for inodes that are only timestamp
165 	 * dirty.
166 	 */
167 	spin_lock(&iip->ili_lock);
168 	iip->ili_fsync_fields |= flags;
169 
170 	if (!iip->ili_item.li_buf) {
171 		struct xfs_buf	*bp;
172 		int		error;
173 
174 		/*
175 		 * We hold the ILOCK here, so this inode is not going to be
176 		 * flushed while we are here. Further, because there is no
177 		 * buffer attached to the item, we know that there is no IO in
178 		 * progress, so nothing will clear the ili_fields while we read
179 		 * in the buffer. Hence we can safely drop the spin lock and
180 		 * read the buffer knowing that the state will not change from
181 		 * here.
182 		 */
183 		spin_unlock(&iip->ili_lock);
184 		error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
185 		if (error) {
186 			xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
187 			return;
188 		}
189 
190 		/*
191 		 * We need an explicit buffer reference for the log item but
192 		 * don't want the buffer to remain attached to the transaction.
193 		 * Hold the buffer but release the transaction reference once
194 		 * we've attached the inode log item to the buffer log item
195 		 * list.
196 		 */
197 		xfs_buf_hold(bp);
198 		spin_lock(&iip->ili_lock);
199 		iip->ili_item.li_buf = bp;
200 		bp->b_flags |= _XBF_INODES;
201 		list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
202 		xfs_trans_brelse(tp, bp);
203 	}
204 
205 	/*
206 	 * Always OR in the bits from the ili_last_fields field.  This is to
207 	 * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
208 	 * in the eventual clearing of the ili_fields bits.  See the big comment
209 	 * in xfs_iflush() for an explanation of this coordination mechanism.
210 	 */
211 	iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
212 	spin_unlock(&iip->ili_lock);
213 }
214 
215 int
216 xfs_trans_roll_inode(
217 	struct xfs_trans	**tpp,
218 	struct xfs_inode	*ip)
219 {
220 	int			error;
221 
222 	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
223 	error = xfs_trans_roll(tpp);
224 	if (!error)
225 		xfs_trans_ijoin(*tpp, ip, 0);
226 	return error;
227 }
228