xref: /openbmc/linux/fs/xfs/libxfs/xfs_inode_buf.c (revision c6fddb28)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_errortag.h"
15 #include "xfs_error.h"
16 #include "xfs_icache.h"
17 #include "xfs_trans.h"
18 #include "xfs_ialloc.h"
19 #include "xfs_dir2.h"
20 
21 #include <linux/iversion.h>
22 
23 /*
24  * Check that none of the inode's in the buffer have a next
25  * unlinked field of 0.
26  */
27 #if defined(DEBUG)
28 void
29 xfs_inobp_check(
30 	xfs_mount_t	*mp,
31 	xfs_buf_t	*bp)
32 {
33 	int		i;
34 	xfs_dinode_t	*dip;
35 
36 	for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
37 		dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
38 		if (!dip->di_next_unlinked)  {
39 			xfs_alert(mp,
40 	"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
41 				i, (long long)bp->b_bn);
42 		}
43 	}
44 }
45 #endif
46 
47 /*
48  * If we are doing readahead on an inode buffer, we might be in log recovery
49  * reading an inode allocation buffer that hasn't yet been replayed, and hence
50  * has not had the inode cores stamped into it. Hence for readahead, the buffer
51  * may be potentially invalid.
52  *
53  * If the readahead buffer is invalid, we need to mark it with an error and
54  * clear the DONE status of the buffer so that a followup read will re-read it
55  * from disk. We don't report the error otherwise to avoid warnings during log
56  * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
57  * because all we want to do is say readahead failed; there is no-one to report
58  * the error to, so this will distinguish it from a non-ra verifier failure.
59  * Changes to this readahead error behavour also need to be reflected in
60  * xfs_dquot_buf_readahead_verify().
61  */
62 static void
63 xfs_inode_buf_verify(
64 	struct xfs_buf	*bp,
65 	bool		readahead)
66 {
67 	struct xfs_mount *mp = bp->b_mount;
68 	xfs_agnumber_t	agno;
69 	int		i;
70 	int		ni;
71 
72 	/*
73 	 * Validate the magic number and version of every inode in the buffer
74 	 */
75 	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
76 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
77 	for (i = 0; i < ni; i++) {
78 		int		di_ok;
79 		xfs_dinode_t	*dip;
80 		xfs_agino_t	unlinked_ino;
81 
82 		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
83 		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
84 		di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
85 			xfs_dinode_good_version(&mp->m_sb, dip->di_version) &&
86 			xfs_verify_agino_or_null(mp, agno, unlinked_ino);
87 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
88 						XFS_ERRTAG_ITOBP_INOTOBP))) {
89 			if (readahead) {
90 				bp->b_flags &= ~XBF_DONE;
91 				xfs_buf_ioerror(bp, -EIO);
92 				return;
93 			}
94 
95 #ifdef DEBUG
96 			xfs_alert(mp,
97 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
98 				(unsigned long long)bp->b_bn, i,
99 				be16_to_cpu(dip->di_magic));
100 #endif
101 			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
102 					__func__, dip, sizeof(*dip),
103 					NULL);
104 			return;
105 		}
106 	}
107 }
108 
109 
110 static void
111 xfs_inode_buf_read_verify(
112 	struct xfs_buf	*bp)
113 {
114 	xfs_inode_buf_verify(bp, false);
115 }
116 
117 static void
118 xfs_inode_buf_readahead_verify(
119 	struct xfs_buf	*bp)
120 {
121 	xfs_inode_buf_verify(bp, true);
122 }
123 
124 static void
125 xfs_inode_buf_write_verify(
126 	struct xfs_buf	*bp)
127 {
128 	xfs_inode_buf_verify(bp, false);
129 }
130 
131 const struct xfs_buf_ops xfs_inode_buf_ops = {
132 	.name = "xfs_inode",
133 	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
134 		     cpu_to_be16(XFS_DINODE_MAGIC) },
135 	.verify_read = xfs_inode_buf_read_verify,
136 	.verify_write = xfs_inode_buf_write_verify,
137 };
138 
139 const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
140 	.name = "xfs_inode_ra",
141 	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
142 		     cpu_to_be16(XFS_DINODE_MAGIC) },
143 	.verify_read = xfs_inode_buf_readahead_verify,
144 	.verify_write = xfs_inode_buf_write_verify,
145 };
146 
147 
148 /*
149  * This routine is called to map an inode to the buffer containing the on-disk
150  * version of the inode.  It returns a pointer to the buffer containing the
151  * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
152  * pointer to the on-disk inode within that buffer.
153  *
154  * If a non-zero error is returned, then the contents of bpp and dipp are
155  * undefined.
156  */
157 int
158 xfs_imap_to_bp(
159 	struct xfs_mount	*mp,
160 	struct xfs_trans	*tp,
161 	struct xfs_imap		*imap,
162 	struct xfs_dinode       **dipp,
163 	struct xfs_buf		**bpp,
164 	uint			buf_flags,
165 	uint			iget_flags)
166 {
167 	struct xfs_buf		*bp;
168 	int			error;
169 
170 	buf_flags |= XBF_UNMAPPED;
171 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
172 				   (int)imap->im_len, buf_flags, &bp,
173 				   &xfs_inode_buf_ops);
174 	if (error) {
175 		if (error == -EAGAIN) {
176 			ASSERT(buf_flags & XBF_TRYLOCK);
177 			return error;
178 		}
179 		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
180 			__func__, error);
181 		return error;
182 	}
183 
184 	*bpp = bp;
185 	*dipp = xfs_buf_offset(bp, imap->im_boffset);
186 	return 0;
187 }
188 
189 void
190 xfs_inode_from_disk(
191 	struct xfs_inode	*ip,
192 	struct xfs_dinode	*from)
193 {
194 	struct xfs_icdinode	*to = &ip->i_d;
195 	struct inode		*inode = VFS_I(ip);
196 
197 	/*
198 	 * Convert v1 inodes immediately to v2 inode format as this is the
199 	 * minimum inode version format we support in the rest of the code.
200 	 * They will also be unconditionally written back to disk as v2 inodes.
201 	 */
202 	if (unlikely(from->di_version == 1)) {
203 		set_nlink(inode, be16_to_cpu(from->di_onlink));
204 		to->di_projid = 0;
205 	} else {
206 		set_nlink(inode, be32_to_cpu(from->di_nlink));
207 		to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
208 					be16_to_cpu(from->di_projid_lo);
209 	}
210 
211 	to->di_format = from->di_format;
212 	i_uid_write(inode, be32_to_cpu(from->di_uid));
213 	i_gid_write(inode, be32_to_cpu(from->di_gid));
214 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
215 
216 	/*
217 	 * Time is signed, so need to convert to signed 32 bit before
218 	 * storing in inode timestamp which may be 64 bit. Otherwise
219 	 * a time before epoch is converted to a time long after epoch
220 	 * on 64 bit systems.
221 	 */
222 	inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
223 	inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
224 	inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
225 	inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
226 	inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
227 	inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
228 	inode->i_generation = be32_to_cpu(from->di_gen);
229 	inode->i_mode = be16_to_cpu(from->di_mode);
230 
231 	to->di_size = be64_to_cpu(from->di_size);
232 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
233 	to->di_extsize = be32_to_cpu(from->di_extsize);
234 	to->di_nextents = be32_to_cpu(from->di_nextents);
235 	to->di_anextents = be16_to_cpu(from->di_anextents);
236 	to->di_forkoff = from->di_forkoff;
237 	to->di_aformat	= from->di_aformat;
238 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
239 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
240 	to->di_flags	= be16_to_cpu(from->di_flags);
241 
242 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
243 		inode_set_iversion_queried(inode,
244 					   be64_to_cpu(from->di_changecount));
245 		to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec);
246 		to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec);
247 		to->di_flags2 = be64_to_cpu(from->di_flags2);
248 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
249 	}
250 }
251 
252 void
253 xfs_inode_to_disk(
254 	struct xfs_inode	*ip,
255 	struct xfs_dinode	*to,
256 	xfs_lsn_t		lsn)
257 {
258 	struct xfs_icdinode	*from = &ip->i_d;
259 	struct inode		*inode = VFS_I(ip);
260 
261 	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
262 	to->di_onlink = 0;
263 
264 	to->di_format = from->di_format;
265 	to->di_uid = cpu_to_be32(i_uid_read(inode));
266 	to->di_gid = cpu_to_be32(i_gid_read(inode));
267 	to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff);
268 	to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
269 
270 	memset(to->di_pad, 0, sizeof(to->di_pad));
271 	to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
272 	to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
273 	to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
274 	to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
275 	to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
276 	to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
277 	to->di_nlink = cpu_to_be32(inode->i_nlink);
278 	to->di_gen = cpu_to_be32(inode->i_generation);
279 	to->di_mode = cpu_to_be16(inode->i_mode);
280 
281 	to->di_size = cpu_to_be64(from->di_size);
282 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
283 	to->di_extsize = cpu_to_be32(from->di_extsize);
284 	to->di_nextents = cpu_to_be32(from->di_nextents);
285 	to->di_anextents = cpu_to_be16(from->di_anextents);
286 	to->di_forkoff = from->di_forkoff;
287 	to->di_aformat = from->di_aformat;
288 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
289 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
290 	to->di_flags = cpu_to_be16(from->di_flags);
291 
292 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
293 		to->di_version = 3;
294 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
295 		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec);
296 		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec);
297 		to->di_flags2 = cpu_to_be64(from->di_flags2);
298 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
299 		to->di_ino = cpu_to_be64(ip->i_ino);
300 		to->di_lsn = cpu_to_be64(lsn);
301 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
302 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
303 		to->di_flushiter = 0;
304 	} else {
305 		to->di_version = 2;
306 		to->di_flushiter = cpu_to_be16(from->di_flushiter);
307 	}
308 }
309 
310 void
311 xfs_log_dinode_to_disk(
312 	struct xfs_log_dinode	*from,
313 	struct xfs_dinode	*to)
314 {
315 	to->di_magic = cpu_to_be16(from->di_magic);
316 	to->di_mode = cpu_to_be16(from->di_mode);
317 	to->di_version = from->di_version;
318 	to->di_format = from->di_format;
319 	to->di_onlink = 0;
320 	to->di_uid = cpu_to_be32(from->di_uid);
321 	to->di_gid = cpu_to_be32(from->di_gid);
322 	to->di_nlink = cpu_to_be32(from->di_nlink);
323 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
324 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
325 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
326 
327 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
328 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
329 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
330 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
331 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
332 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
333 
334 	to->di_size = cpu_to_be64(from->di_size);
335 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
336 	to->di_extsize = cpu_to_be32(from->di_extsize);
337 	to->di_nextents = cpu_to_be32(from->di_nextents);
338 	to->di_anextents = cpu_to_be16(from->di_anextents);
339 	to->di_forkoff = from->di_forkoff;
340 	to->di_aformat = from->di_aformat;
341 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
342 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
343 	to->di_flags = cpu_to_be16(from->di_flags);
344 	to->di_gen = cpu_to_be32(from->di_gen);
345 
346 	if (from->di_version == 3) {
347 		to->di_changecount = cpu_to_be64(from->di_changecount);
348 		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
349 		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
350 		to->di_flags2 = cpu_to_be64(from->di_flags2);
351 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
352 		to->di_ino = cpu_to_be64(from->di_ino);
353 		to->di_lsn = cpu_to_be64(from->di_lsn);
354 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
355 		uuid_copy(&to->di_uuid, &from->di_uuid);
356 		to->di_flushiter = 0;
357 	} else {
358 		to->di_flushiter = cpu_to_be16(from->di_flushiter);
359 	}
360 }
361 
362 static xfs_failaddr_t
363 xfs_dinode_verify_fork(
364 	struct xfs_dinode	*dip,
365 	struct xfs_mount	*mp,
366 	int			whichfork)
367 {
368 	uint32_t		di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
369 
370 	switch (XFS_DFORK_FORMAT(dip, whichfork)) {
371 	case XFS_DINODE_FMT_LOCAL:
372 		/*
373 		 * no local regular files yet
374 		 */
375 		if (whichfork == XFS_DATA_FORK) {
376 			if (S_ISREG(be16_to_cpu(dip->di_mode)))
377 				return __this_address;
378 			if (be64_to_cpu(dip->di_size) >
379 					XFS_DFORK_SIZE(dip, mp, whichfork))
380 				return __this_address;
381 		}
382 		if (di_nextents)
383 			return __this_address;
384 		break;
385 	case XFS_DINODE_FMT_EXTENTS:
386 		if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
387 			return __this_address;
388 		break;
389 	case XFS_DINODE_FMT_BTREE:
390 		if (whichfork == XFS_ATTR_FORK) {
391 			if (di_nextents > MAXAEXTNUM)
392 				return __this_address;
393 		} else if (di_nextents > MAXEXTNUM) {
394 			return __this_address;
395 		}
396 		break;
397 	default:
398 		return __this_address;
399 	}
400 	return NULL;
401 }
402 
403 static xfs_failaddr_t
404 xfs_dinode_verify_forkoff(
405 	struct xfs_dinode	*dip,
406 	struct xfs_mount	*mp)
407 {
408 	if (!XFS_DFORK_Q(dip))
409 		return NULL;
410 
411 	switch (dip->di_format)  {
412 	case XFS_DINODE_FMT_DEV:
413 		if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
414 			return __this_address;
415 		break;
416 	case XFS_DINODE_FMT_LOCAL:	/* fall through ... */
417 	case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
418 	case XFS_DINODE_FMT_BTREE:
419 		if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
420 			return __this_address;
421 		break;
422 	default:
423 		return __this_address;
424 	}
425 	return NULL;
426 }
427 
428 xfs_failaddr_t
429 xfs_dinode_verify(
430 	struct xfs_mount	*mp,
431 	xfs_ino_t		ino,
432 	struct xfs_dinode	*dip)
433 {
434 	xfs_failaddr_t		fa;
435 	uint16_t		mode;
436 	uint16_t		flags;
437 	uint64_t		flags2;
438 	uint64_t		di_size;
439 
440 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
441 		return __this_address;
442 
443 	/* Verify v3 integrity information first */
444 	if (dip->di_version >= 3) {
445 		if (!xfs_sb_version_has_v3inode(&mp->m_sb))
446 			return __this_address;
447 		if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
448 				      XFS_DINODE_CRC_OFF))
449 			return __this_address;
450 		if (be64_to_cpu(dip->di_ino) != ino)
451 			return __this_address;
452 		if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
453 			return __this_address;
454 	}
455 
456 	/* don't allow invalid i_size */
457 	di_size = be64_to_cpu(dip->di_size);
458 	if (di_size & (1ULL << 63))
459 		return __this_address;
460 
461 	mode = be16_to_cpu(dip->di_mode);
462 	if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
463 		return __this_address;
464 
465 	/* No zero-length symlinks/dirs. */
466 	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
467 		return __this_address;
468 
469 	/* Fork checks carried over from xfs_iformat_fork */
470 	if (mode &&
471 	    be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
472 			be64_to_cpu(dip->di_nblocks))
473 		return __this_address;
474 
475 	if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
476 		return __this_address;
477 
478 	flags = be16_to_cpu(dip->di_flags);
479 
480 	if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
481 		return __this_address;
482 
483 	/* check for illegal values of forkoff */
484 	fa = xfs_dinode_verify_forkoff(dip, mp);
485 	if (fa)
486 		return fa;
487 
488 	/* Do we have appropriate data fork formats for the mode? */
489 	switch (mode & S_IFMT) {
490 	case S_IFIFO:
491 	case S_IFCHR:
492 	case S_IFBLK:
493 	case S_IFSOCK:
494 		if (dip->di_format != XFS_DINODE_FMT_DEV)
495 			return __this_address;
496 		break;
497 	case S_IFREG:
498 	case S_IFLNK:
499 	case S_IFDIR:
500 		fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
501 		if (fa)
502 			return fa;
503 		break;
504 	case 0:
505 		/* Uninitialized inode ok. */
506 		break;
507 	default:
508 		return __this_address;
509 	}
510 
511 	if (XFS_DFORK_Q(dip)) {
512 		fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
513 		if (fa)
514 			return fa;
515 	} else {
516 		/*
517 		 * If there is no fork offset, this may be a freshly-made inode
518 		 * in a new disk cluster, in which case di_aformat is zeroed.
519 		 * Otherwise, such an inode must be in EXTENTS format; this goes
520 		 * for freed inodes as well.
521 		 */
522 		switch (dip->di_aformat) {
523 		case 0:
524 		case XFS_DINODE_FMT_EXTENTS:
525 			break;
526 		default:
527 			return __this_address;
528 		}
529 		if (dip->di_anextents)
530 			return __this_address;
531 	}
532 
533 	/* extent size hint validation */
534 	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
535 			mode, flags);
536 	if (fa)
537 		return fa;
538 
539 	/* only version 3 or greater inodes are extensively verified here */
540 	if (dip->di_version < 3)
541 		return NULL;
542 
543 	flags2 = be64_to_cpu(dip->di_flags2);
544 
545 	/* don't allow reflink/cowextsize if we don't have reflink */
546 	if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
547 	     !xfs_sb_version_hasreflink(&mp->m_sb))
548 		return __this_address;
549 
550 	/* only regular files get reflink */
551 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
552 		return __this_address;
553 
554 	/* don't let reflink and realtime mix */
555 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
556 		return __this_address;
557 
558 	/* don't let reflink and dax mix */
559 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
560 		return __this_address;
561 
562 	/* COW extent size hint validation */
563 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
564 			mode, flags, flags2);
565 	if (fa)
566 		return fa;
567 
568 	return NULL;
569 }
570 
571 void
572 xfs_dinode_calc_crc(
573 	struct xfs_mount	*mp,
574 	struct xfs_dinode	*dip)
575 {
576 	uint32_t		crc;
577 
578 	if (dip->di_version < 3)
579 		return;
580 
581 	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
582 	crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
583 			      XFS_DINODE_CRC_OFF);
584 	dip->di_crc = xfs_end_cksum(crc);
585 }
586 
587 /*
588  * Read the disk inode attributes into the in-core inode structure.
589  *
590  * For version 5 superblocks, if we are initialising a new inode and we are not
591  * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
592  * inode core with a random generation number. If we are keeping inodes around,
593  * we need to read the inode cluster to get the existing generation number off
594  * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
595  * format) then log recovery is dependent on the di_flushiter field being
596  * initialised from the current on-disk value and hence we must also read the
597  * inode off disk.
598  */
599 int
600 xfs_iread(
601 	xfs_mount_t	*mp,
602 	xfs_trans_t	*tp,
603 	xfs_inode_t	*ip,
604 	uint		iget_flags)
605 {
606 	xfs_buf_t	*bp;
607 	xfs_dinode_t	*dip;
608 	xfs_failaddr_t	fa;
609 	int		error;
610 
611 	/*
612 	 * Fill in the location information in the in-core inode.
613 	 */
614 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
615 	if (error)
616 		return error;
617 
618 	/* shortcut IO on inode allocation if possible */
619 	if ((iget_flags & XFS_IGET_CREATE) &&
620 	    xfs_sb_version_has_v3inode(&mp->m_sb) &&
621 	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
622 		VFS_I(ip)->i_generation = prandom_u32();
623 		return 0;
624 	}
625 
626 	/*
627 	 * Get pointers to the on-disk inode and the buffer containing it.
628 	 */
629 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
630 	if (error)
631 		return error;
632 
633 	/* even unallocated inodes are verified */
634 	fa = xfs_dinode_verify(mp, ip->i_ino, dip);
635 	if (fa) {
636 		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
637 				sizeof(*dip), fa);
638 		error = -EFSCORRUPTED;
639 		goto out_brelse;
640 	}
641 
642 	/*
643 	 * If the on-disk inode is already linked to a directory
644 	 * entry, copy all of the inode into the in-core inode.
645 	 * xfs_iformat_fork() handles copying in the inode format
646 	 * specific information.
647 	 * Otherwise, just get the truly permanent information.
648 	 */
649 	if (dip->di_mode) {
650 		xfs_inode_from_disk(ip, dip);
651 		error = xfs_iformat_fork(ip, dip);
652 		if (error)  {
653 #ifdef DEBUG
654 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
655 				__func__, error);
656 #endif /* DEBUG */
657 			goto out_brelse;
658 		}
659 	} else {
660 		/*
661 		 * Partial initialisation of the in-core inode. Just the bits
662 		 * that xfs_ialloc won't overwrite or relies on being correct.
663 		 */
664 		VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
665 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
666 
667 		/*
668 		 * Make sure to pull in the mode here as well in
669 		 * case the inode is released without being used.
670 		 * This ensures that xfs_inactive() will see that
671 		 * the inode is already free and not try to mess
672 		 * with the uninitialized part of it.
673 		 */
674 		VFS_I(ip)->i_mode = 0;
675 	}
676 
677 	ip->i_delayed_blks = 0;
678 
679 	/*
680 	 * Mark the buffer containing the inode as something to keep
681 	 * around for a while.  This helps to keep recently accessed
682 	 * meta-data in-core longer.
683 	 */
684 	xfs_buf_set_ref(bp, XFS_INO_REF);
685 
686 	/*
687 	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
688 	 * inode, because it was acquired with xfs_trans_read_buf() in
689 	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
690 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
691 	 * will only release the buffer if it is not dirty within the
692 	 * transaction.  It will be OK to release the buffer in this case,
693 	 * because inodes on disk are never destroyed and we will be locking the
694 	 * new in-core inode before putting it in the cache where other
695 	 * processes can find it.  Thus we don't have to worry about the inode
696 	 * being changed just because we released the buffer.
697 	 */
698  out_brelse:
699 	xfs_trans_brelse(tp, bp);
700 	return error;
701 }
702 
703 /*
704  * Validate di_extsize hint.
705  *
706  * The rules are documented at xfs_ioctl_setattr_check_extsize().
707  * These functions must be kept in sync with each other.
708  */
709 xfs_failaddr_t
710 xfs_inode_validate_extsize(
711 	struct xfs_mount		*mp,
712 	uint32_t			extsize,
713 	uint16_t			mode,
714 	uint16_t			flags)
715 {
716 	bool				rt_flag;
717 	bool				hint_flag;
718 	bool				inherit_flag;
719 	uint32_t			extsize_bytes;
720 	uint32_t			blocksize_bytes;
721 
722 	rt_flag = (flags & XFS_DIFLAG_REALTIME);
723 	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
724 	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
725 	extsize_bytes = XFS_FSB_TO_B(mp, extsize);
726 
727 	if (rt_flag)
728 		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
729 	else
730 		blocksize_bytes = mp->m_sb.sb_blocksize;
731 
732 	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
733 		return __this_address;
734 
735 	if (hint_flag && !S_ISREG(mode))
736 		return __this_address;
737 
738 	if (inherit_flag && !S_ISDIR(mode))
739 		return __this_address;
740 
741 	if ((hint_flag || inherit_flag) && extsize == 0)
742 		return __this_address;
743 
744 	/* free inodes get flags set to zero but extsize remains */
745 	if (mode && !(hint_flag || inherit_flag) && extsize != 0)
746 		return __this_address;
747 
748 	if (extsize_bytes % blocksize_bytes)
749 		return __this_address;
750 
751 	if (extsize > MAXEXTLEN)
752 		return __this_address;
753 
754 	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
755 		return __this_address;
756 
757 	return NULL;
758 }
759 
760 /*
761  * Validate di_cowextsize hint.
762  *
763  * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
764  * These functions must be kept in sync with each other.
765  */
766 xfs_failaddr_t
767 xfs_inode_validate_cowextsize(
768 	struct xfs_mount		*mp,
769 	uint32_t			cowextsize,
770 	uint16_t			mode,
771 	uint16_t			flags,
772 	uint64_t			flags2)
773 {
774 	bool				rt_flag;
775 	bool				hint_flag;
776 	uint32_t			cowextsize_bytes;
777 
778 	rt_flag = (flags & XFS_DIFLAG_REALTIME);
779 	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
780 	cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
781 
782 	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
783 		return __this_address;
784 
785 	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
786 		return __this_address;
787 
788 	if (hint_flag && cowextsize == 0)
789 		return __this_address;
790 
791 	/* free inodes get flags set to zero but cowextsize remains */
792 	if (mode && !hint_flag && cowextsize != 0)
793 		return __this_address;
794 
795 	if (hint_flag && rt_flag)
796 		return __this_address;
797 
798 	if (cowextsize_bytes % mp->m_sb.sb_blocksize)
799 		return __this_address;
800 
801 	if (cowextsize > MAXEXTLEN)
802 		return __this_address;
803 
804 	if (cowextsize > mp->m_sb.sb_agblocks / 2)
805 		return __this_address;
806 
807 	return NULL;
808 }
809