xref: /openbmc/linux/fs/xfs/libxfs/xfs_inode_buf.c (revision f26e4331)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_errortag.h"
15 #include "xfs_error.h"
16 #include "xfs_icache.h"
17 #include "xfs_trans.h"
18 #include "xfs_ialloc.h"
19 #include "xfs_dir2.h"
20 
21 #include <linux/iversion.h>
22 
23 /*
24  * Check that none of the inode's in the buffer have a next
25  * unlinked field of 0.
26  */
27 #if defined(DEBUG)
28 void
29 xfs_inobp_check(
30 	xfs_mount_t	*mp,
31 	xfs_buf_t	*bp)
32 {
33 	int		i;
34 	xfs_dinode_t	*dip;
35 
36 	for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
37 		dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
38 		if (!dip->di_next_unlinked)  {
39 			xfs_alert(mp,
40 	"Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
41 				i, (long long)bp->b_bn);
42 		}
43 	}
44 }
45 #endif
46 
47 bool
48 xfs_dinode_good_version(
49 	struct xfs_mount *mp,
50 	__u8		version)
51 {
52 	if (xfs_sb_version_hascrc(&mp->m_sb))
53 		return version == 3;
54 
55 	return version == 1 || version == 2;
56 }
57 
58 /*
59  * If we are doing readahead on an inode buffer, we might be in log recovery
60  * reading an inode allocation buffer that hasn't yet been replayed, and hence
61  * has not had the inode cores stamped into it. Hence for readahead, the buffer
62  * may be potentially invalid.
63  *
64  * If the readahead buffer is invalid, we need to mark it with an error and
65  * clear the DONE status of the buffer so that a followup read will re-read it
66  * from disk. We don't report the error otherwise to avoid warnings during log
67  * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
68  * because all we want to do is say readahead failed; there is no-one to report
69  * the error to, so this will distinguish it from a non-ra verifier failure.
70  * Changes to this readahead error behavour also need to be reflected in
71  * xfs_dquot_buf_readahead_verify().
72  */
73 static void
74 xfs_inode_buf_verify(
75 	struct xfs_buf	*bp,
76 	bool		readahead)
77 {
78 	struct xfs_mount *mp = bp->b_mount;
79 	xfs_agnumber_t	agno;
80 	int		i;
81 	int		ni;
82 
83 	/*
84 	 * Validate the magic number and version of every inode in the buffer
85 	 */
86 	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
87 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
88 	for (i = 0; i < ni; i++) {
89 		int		di_ok;
90 		xfs_dinode_t	*dip;
91 		xfs_agino_t	unlinked_ino;
92 
93 		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
94 		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
95 		di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
96 			xfs_dinode_good_version(mp, dip->di_version) &&
97 			xfs_verify_agino_or_null(mp, agno, unlinked_ino);
98 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
99 						XFS_ERRTAG_ITOBP_INOTOBP))) {
100 			if (readahead) {
101 				bp->b_flags &= ~XBF_DONE;
102 				xfs_buf_ioerror(bp, -EIO);
103 				return;
104 			}
105 
106 #ifdef DEBUG
107 			xfs_alert(mp,
108 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
109 				(unsigned long long)bp->b_bn, i,
110 				be16_to_cpu(dip->di_magic));
111 #endif
112 			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
113 					__func__, dip, sizeof(*dip),
114 					NULL);
115 			return;
116 		}
117 	}
118 }
119 
120 
121 static void
122 xfs_inode_buf_read_verify(
123 	struct xfs_buf	*bp)
124 {
125 	xfs_inode_buf_verify(bp, false);
126 }
127 
128 static void
129 xfs_inode_buf_readahead_verify(
130 	struct xfs_buf	*bp)
131 {
132 	xfs_inode_buf_verify(bp, true);
133 }
134 
135 static void
136 xfs_inode_buf_write_verify(
137 	struct xfs_buf	*bp)
138 {
139 	xfs_inode_buf_verify(bp, false);
140 }
141 
142 const struct xfs_buf_ops xfs_inode_buf_ops = {
143 	.name = "xfs_inode",
144 	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
145 		     cpu_to_be16(XFS_DINODE_MAGIC) },
146 	.verify_read = xfs_inode_buf_read_verify,
147 	.verify_write = xfs_inode_buf_write_verify,
148 };
149 
150 const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
151 	.name = "xfs_inode_ra",
152 	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
153 		     cpu_to_be16(XFS_DINODE_MAGIC) },
154 	.verify_read = xfs_inode_buf_readahead_verify,
155 	.verify_write = xfs_inode_buf_write_verify,
156 };
157 
158 
159 /*
160  * This routine is called to map an inode to the buffer containing the on-disk
161  * version of the inode.  It returns a pointer to the buffer containing the
162  * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
163  * pointer to the on-disk inode within that buffer.
164  *
165  * If a non-zero error is returned, then the contents of bpp and dipp are
166  * undefined.
167  */
168 int
169 xfs_imap_to_bp(
170 	struct xfs_mount	*mp,
171 	struct xfs_trans	*tp,
172 	struct xfs_imap		*imap,
173 	struct xfs_dinode       **dipp,
174 	struct xfs_buf		**bpp,
175 	uint			buf_flags,
176 	uint			iget_flags)
177 {
178 	struct xfs_buf		*bp;
179 	int			error;
180 
181 	buf_flags |= XBF_UNMAPPED;
182 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
183 				   (int)imap->im_len, buf_flags, &bp,
184 				   &xfs_inode_buf_ops);
185 	if (error) {
186 		if (error == -EAGAIN) {
187 			ASSERT(buf_flags & XBF_TRYLOCK);
188 			return error;
189 		}
190 		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
191 			__func__, error);
192 		return error;
193 	}
194 
195 	*bpp = bp;
196 	*dipp = xfs_buf_offset(bp, imap->im_boffset);
197 	return 0;
198 }
199 
200 void
201 xfs_inode_from_disk(
202 	struct xfs_inode	*ip,
203 	struct xfs_dinode	*from)
204 {
205 	struct xfs_icdinode	*to = &ip->i_d;
206 	struct inode		*inode = VFS_I(ip);
207 
208 
209 	/*
210 	 * Convert v1 inodes immediately to v2 inode format as this is the
211 	 * minimum inode version format we support in the rest of the code.
212 	 */
213 	to->di_version = from->di_version;
214 	if (to->di_version == 1) {
215 		set_nlink(inode, be16_to_cpu(from->di_onlink));
216 		to->di_projid = 0;
217 		to->di_version = 2;
218 	} else {
219 		set_nlink(inode, be32_to_cpu(from->di_nlink));
220 		to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
221 					be16_to_cpu(from->di_projid_lo);
222 	}
223 
224 	to->di_format = from->di_format;
225 	to->di_uid = be32_to_cpu(from->di_uid);
226 	to->di_gid = be32_to_cpu(from->di_gid);
227 	to->di_flushiter = be16_to_cpu(from->di_flushiter);
228 
229 	/*
230 	 * Time is signed, so need to convert to signed 32 bit before
231 	 * storing in inode timestamp which may be 64 bit. Otherwise
232 	 * a time before epoch is converted to a time long after epoch
233 	 * on 64 bit systems.
234 	 */
235 	inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
236 	inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
237 	inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
238 	inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
239 	inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
240 	inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
241 	inode->i_generation = be32_to_cpu(from->di_gen);
242 	inode->i_mode = be16_to_cpu(from->di_mode);
243 
244 	to->di_size = be64_to_cpu(from->di_size);
245 	to->di_nblocks = be64_to_cpu(from->di_nblocks);
246 	to->di_extsize = be32_to_cpu(from->di_extsize);
247 	to->di_nextents = be32_to_cpu(from->di_nextents);
248 	to->di_anextents = be16_to_cpu(from->di_anextents);
249 	to->di_forkoff = from->di_forkoff;
250 	to->di_aformat	= from->di_aformat;
251 	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
252 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
253 	to->di_flags	= be16_to_cpu(from->di_flags);
254 
255 	if (to->di_version == 3) {
256 		inode_set_iversion_queried(inode,
257 					   be64_to_cpu(from->di_changecount));
258 		to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec);
259 		to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec);
260 		to->di_flags2 = be64_to_cpu(from->di_flags2);
261 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
262 	}
263 }
264 
265 void
266 xfs_inode_to_disk(
267 	struct xfs_inode	*ip,
268 	struct xfs_dinode	*to,
269 	xfs_lsn_t		lsn)
270 {
271 	struct xfs_icdinode	*from = &ip->i_d;
272 	struct inode		*inode = VFS_I(ip);
273 
274 	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
275 	to->di_onlink = 0;
276 
277 	to->di_version = from->di_version;
278 	to->di_format = from->di_format;
279 	to->di_uid = cpu_to_be32(from->di_uid);
280 	to->di_gid = cpu_to_be32(from->di_gid);
281 	to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff);
282 	to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
283 
284 	memset(to->di_pad, 0, sizeof(to->di_pad));
285 	to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
286 	to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
287 	to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
288 	to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
289 	to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
290 	to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
291 	to->di_nlink = cpu_to_be32(inode->i_nlink);
292 	to->di_gen = cpu_to_be32(inode->i_generation);
293 	to->di_mode = cpu_to_be16(inode->i_mode);
294 
295 	to->di_size = cpu_to_be64(from->di_size);
296 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
297 	to->di_extsize = cpu_to_be32(from->di_extsize);
298 	to->di_nextents = cpu_to_be32(from->di_nextents);
299 	to->di_anextents = cpu_to_be16(from->di_anextents);
300 	to->di_forkoff = from->di_forkoff;
301 	to->di_aformat = from->di_aformat;
302 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
303 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
304 	to->di_flags = cpu_to_be16(from->di_flags);
305 
306 	if (from->di_version == 3) {
307 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
308 		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec);
309 		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec);
310 		to->di_flags2 = cpu_to_be64(from->di_flags2);
311 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
312 		to->di_ino = cpu_to_be64(ip->i_ino);
313 		to->di_lsn = cpu_to_be64(lsn);
314 		memset(to->di_pad2, 0, sizeof(to->di_pad2));
315 		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
316 		to->di_flushiter = 0;
317 	} else {
318 		to->di_flushiter = cpu_to_be16(from->di_flushiter);
319 	}
320 }
321 
322 void
323 xfs_log_dinode_to_disk(
324 	struct xfs_log_dinode	*from,
325 	struct xfs_dinode	*to)
326 {
327 	to->di_magic = cpu_to_be16(from->di_magic);
328 	to->di_mode = cpu_to_be16(from->di_mode);
329 	to->di_version = from->di_version;
330 	to->di_format = from->di_format;
331 	to->di_onlink = 0;
332 	to->di_uid = cpu_to_be32(from->di_uid);
333 	to->di_gid = cpu_to_be32(from->di_gid);
334 	to->di_nlink = cpu_to_be32(from->di_nlink);
335 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
336 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
337 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
338 
339 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
340 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
341 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
342 	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
343 	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
344 	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
345 
346 	to->di_size = cpu_to_be64(from->di_size);
347 	to->di_nblocks = cpu_to_be64(from->di_nblocks);
348 	to->di_extsize = cpu_to_be32(from->di_extsize);
349 	to->di_nextents = cpu_to_be32(from->di_nextents);
350 	to->di_anextents = cpu_to_be16(from->di_anextents);
351 	to->di_forkoff = from->di_forkoff;
352 	to->di_aformat = from->di_aformat;
353 	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
354 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
355 	to->di_flags = cpu_to_be16(from->di_flags);
356 	to->di_gen = cpu_to_be32(from->di_gen);
357 
358 	if (from->di_version == 3) {
359 		to->di_changecount = cpu_to_be64(from->di_changecount);
360 		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
361 		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
362 		to->di_flags2 = cpu_to_be64(from->di_flags2);
363 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
364 		to->di_ino = cpu_to_be64(from->di_ino);
365 		to->di_lsn = cpu_to_be64(from->di_lsn);
366 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
367 		uuid_copy(&to->di_uuid, &from->di_uuid);
368 		to->di_flushiter = 0;
369 	} else {
370 		to->di_flushiter = cpu_to_be16(from->di_flushiter);
371 	}
372 }
373 
374 static xfs_failaddr_t
375 xfs_dinode_verify_fork(
376 	struct xfs_dinode	*dip,
377 	struct xfs_mount	*mp,
378 	int			whichfork)
379 {
380 	uint32_t		di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
381 
382 	switch (XFS_DFORK_FORMAT(dip, whichfork)) {
383 	case XFS_DINODE_FMT_LOCAL:
384 		/*
385 		 * no local regular files yet
386 		 */
387 		if (whichfork == XFS_DATA_FORK) {
388 			if (S_ISREG(be16_to_cpu(dip->di_mode)))
389 				return __this_address;
390 			if (be64_to_cpu(dip->di_size) >
391 					XFS_DFORK_SIZE(dip, mp, whichfork))
392 				return __this_address;
393 		}
394 		if (di_nextents)
395 			return __this_address;
396 		break;
397 	case XFS_DINODE_FMT_EXTENTS:
398 		if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
399 			return __this_address;
400 		break;
401 	case XFS_DINODE_FMT_BTREE:
402 		if (whichfork == XFS_ATTR_FORK) {
403 			if (di_nextents > MAXAEXTNUM)
404 				return __this_address;
405 		} else if (di_nextents > MAXEXTNUM) {
406 			return __this_address;
407 		}
408 		break;
409 	default:
410 		return __this_address;
411 	}
412 	return NULL;
413 }
414 
415 static xfs_failaddr_t
416 xfs_dinode_verify_forkoff(
417 	struct xfs_dinode	*dip,
418 	struct xfs_mount	*mp)
419 {
420 	if (!XFS_DFORK_Q(dip))
421 		return NULL;
422 
423 	switch (dip->di_format)  {
424 	case XFS_DINODE_FMT_DEV:
425 		if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
426 			return __this_address;
427 		break;
428 	case XFS_DINODE_FMT_LOCAL:	/* fall through ... */
429 	case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
430 	case XFS_DINODE_FMT_BTREE:
431 		if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
432 			return __this_address;
433 		break;
434 	default:
435 		return __this_address;
436 	}
437 	return NULL;
438 }
439 
440 xfs_failaddr_t
441 xfs_dinode_verify(
442 	struct xfs_mount	*mp,
443 	xfs_ino_t		ino,
444 	struct xfs_dinode	*dip)
445 {
446 	xfs_failaddr_t		fa;
447 	uint16_t		mode;
448 	uint16_t		flags;
449 	uint64_t		flags2;
450 	uint64_t		di_size;
451 
452 	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
453 		return __this_address;
454 
455 	/* Verify v3 integrity information first */
456 	if (dip->di_version >= 3) {
457 		if (!xfs_sb_version_hascrc(&mp->m_sb))
458 			return __this_address;
459 		if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
460 				      XFS_DINODE_CRC_OFF))
461 			return __this_address;
462 		if (be64_to_cpu(dip->di_ino) != ino)
463 			return __this_address;
464 		if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
465 			return __this_address;
466 	}
467 
468 	/* don't allow invalid i_size */
469 	di_size = be64_to_cpu(dip->di_size);
470 	if (di_size & (1ULL << 63))
471 		return __this_address;
472 
473 	mode = be16_to_cpu(dip->di_mode);
474 	if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
475 		return __this_address;
476 
477 	/* No zero-length symlinks/dirs. */
478 	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
479 		return __this_address;
480 
481 	/* Fork checks carried over from xfs_iformat_fork */
482 	if (mode &&
483 	    be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
484 			be64_to_cpu(dip->di_nblocks))
485 		return __this_address;
486 
487 	if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
488 		return __this_address;
489 
490 	flags = be16_to_cpu(dip->di_flags);
491 
492 	if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
493 		return __this_address;
494 
495 	/* check for illegal values of forkoff */
496 	fa = xfs_dinode_verify_forkoff(dip, mp);
497 	if (fa)
498 		return fa;
499 
500 	/* Do we have appropriate data fork formats for the mode? */
501 	switch (mode & S_IFMT) {
502 	case S_IFIFO:
503 	case S_IFCHR:
504 	case S_IFBLK:
505 	case S_IFSOCK:
506 		if (dip->di_format != XFS_DINODE_FMT_DEV)
507 			return __this_address;
508 		break;
509 	case S_IFREG:
510 	case S_IFLNK:
511 	case S_IFDIR:
512 		fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
513 		if (fa)
514 			return fa;
515 		break;
516 	case 0:
517 		/* Uninitialized inode ok. */
518 		break;
519 	default:
520 		return __this_address;
521 	}
522 
523 	if (XFS_DFORK_Q(dip)) {
524 		fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
525 		if (fa)
526 			return fa;
527 	} else {
528 		/*
529 		 * If there is no fork offset, this may be a freshly-made inode
530 		 * in a new disk cluster, in which case di_aformat is zeroed.
531 		 * Otherwise, such an inode must be in EXTENTS format; this goes
532 		 * for freed inodes as well.
533 		 */
534 		switch (dip->di_aformat) {
535 		case 0:
536 		case XFS_DINODE_FMT_EXTENTS:
537 			break;
538 		default:
539 			return __this_address;
540 		}
541 		if (dip->di_anextents)
542 			return __this_address;
543 	}
544 
545 	/* extent size hint validation */
546 	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
547 			mode, flags);
548 	if (fa)
549 		return fa;
550 
551 	/* only version 3 or greater inodes are extensively verified here */
552 	if (dip->di_version < 3)
553 		return NULL;
554 
555 	flags2 = be64_to_cpu(dip->di_flags2);
556 
557 	/* don't allow reflink/cowextsize if we don't have reflink */
558 	if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
559 	     !xfs_sb_version_hasreflink(&mp->m_sb))
560 		return __this_address;
561 
562 	/* only regular files get reflink */
563 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
564 		return __this_address;
565 
566 	/* don't let reflink and realtime mix */
567 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
568 		return __this_address;
569 
570 	/* don't let reflink and dax mix */
571 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
572 		return __this_address;
573 
574 	/* COW extent size hint validation */
575 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
576 			mode, flags, flags2);
577 	if (fa)
578 		return fa;
579 
580 	return NULL;
581 }
582 
583 void
584 xfs_dinode_calc_crc(
585 	struct xfs_mount	*mp,
586 	struct xfs_dinode	*dip)
587 {
588 	uint32_t		crc;
589 
590 	if (dip->di_version < 3)
591 		return;
592 
593 	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
594 	crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
595 			      XFS_DINODE_CRC_OFF);
596 	dip->di_crc = xfs_end_cksum(crc);
597 }
598 
599 /*
600  * Read the disk inode attributes into the in-core inode structure.
601  *
602  * For version 5 superblocks, if we are initialising a new inode and we are not
603  * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
604  * inode core with a random generation number. If we are keeping inodes around,
605  * we need to read the inode cluster to get the existing generation number off
606  * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
607  * format) then log recovery is dependent on the di_flushiter field being
608  * initialised from the current on-disk value and hence we must also read the
609  * inode off disk.
610  */
611 int
612 xfs_iread(
613 	xfs_mount_t	*mp,
614 	xfs_trans_t	*tp,
615 	xfs_inode_t	*ip,
616 	uint		iget_flags)
617 {
618 	xfs_buf_t	*bp;
619 	xfs_dinode_t	*dip;
620 	xfs_failaddr_t	fa;
621 	int		error;
622 
623 	/*
624 	 * Fill in the location information in the in-core inode.
625 	 */
626 	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
627 	if (error)
628 		return error;
629 
630 	/* shortcut IO on inode allocation if possible */
631 	if ((iget_flags & XFS_IGET_CREATE) &&
632 	    xfs_sb_version_hascrc(&mp->m_sb) &&
633 	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
634 		VFS_I(ip)->i_generation = prandom_u32();
635 		ip->i_d.di_version = 3;
636 		return 0;
637 	}
638 
639 	/*
640 	 * Get pointers to the on-disk inode and the buffer containing it.
641 	 */
642 	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
643 	if (error)
644 		return error;
645 
646 	/* even unallocated inodes are verified */
647 	fa = xfs_dinode_verify(mp, ip->i_ino, dip);
648 	if (fa) {
649 		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
650 				sizeof(*dip), fa);
651 		error = -EFSCORRUPTED;
652 		goto out_brelse;
653 	}
654 
655 	/*
656 	 * If the on-disk inode is already linked to a directory
657 	 * entry, copy all of the inode into the in-core inode.
658 	 * xfs_iformat_fork() handles copying in the inode format
659 	 * specific information.
660 	 * Otherwise, just get the truly permanent information.
661 	 */
662 	if (dip->di_mode) {
663 		xfs_inode_from_disk(ip, dip);
664 		error = xfs_iformat_fork(ip, dip);
665 		if (error)  {
666 #ifdef DEBUG
667 			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
668 				__func__, error);
669 #endif /* DEBUG */
670 			goto out_brelse;
671 		}
672 	} else {
673 		/*
674 		 * Partial initialisation of the in-core inode. Just the bits
675 		 * that xfs_ialloc won't overwrite or relies on being correct.
676 		 */
677 		ip->i_d.di_version = dip->di_version;
678 		VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
679 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
680 
681 		/*
682 		 * Make sure to pull in the mode here as well in
683 		 * case the inode is released without being used.
684 		 * This ensures that xfs_inactive() will see that
685 		 * the inode is already free and not try to mess
686 		 * with the uninitialized part of it.
687 		 */
688 		VFS_I(ip)->i_mode = 0;
689 	}
690 
691 	ASSERT(ip->i_d.di_version >= 2);
692 	ip->i_delayed_blks = 0;
693 
694 	/*
695 	 * Mark the buffer containing the inode as something to keep
696 	 * around for a while.  This helps to keep recently accessed
697 	 * meta-data in-core longer.
698 	 */
699 	xfs_buf_set_ref(bp, XFS_INO_REF);
700 
701 	/*
702 	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
703 	 * inode, because it was acquired with xfs_trans_read_buf() in
704 	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
705 	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
706 	 * will only release the buffer if it is not dirty within the
707 	 * transaction.  It will be OK to release the buffer in this case,
708 	 * because inodes on disk are never destroyed and we will be locking the
709 	 * new in-core inode before putting it in the cache where other
710 	 * processes can find it.  Thus we don't have to worry about the inode
711 	 * being changed just because we released the buffer.
712 	 */
713  out_brelse:
714 	xfs_trans_brelse(tp, bp);
715 	return error;
716 }
717 
718 /*
719  * Validate di_extsize hint.
720  *
721  * The rules are documented at xfs_ioctl_setattr_check_extsize().
722  * These functions must be kept in sync with each other.
723  */
724 xfs_failaddr_t
725 xfs_inode_validate_extsize(
726 	struct xfs_mount		*mp,
727 	uint32_t			extsize,
728 	uint16_t			mode,
729 	uint16_t			flags)
730 {
731 	bool				rt_flag;
732 	bool				hint_flag;
733 	bool				inherit_flag;
734 	uint32_t			extsize_bytes;
735 	uint32_t			blocksize_bytes;
736 
737 	rt_flag = (flags & XFS_DIFLAG_REALTIME);
738 	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
739 	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
740 	extsize_bytes = XFS_FSB_TO_B(mp, extsize);
741 
742 	if (rt_flag)
743 		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
744 	else
745 		blocksize_bytes = mp->m_sb.sb_blocksize;
746 
747 	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
748 		return __this_address;
749 
750 	if (hint_flag && !S_ISREG(mode))
751 		return __this_address;
752 
753 	if (inherit_flag && !S_ISDIR(mode))
754 		return __this_address;
755 
756 	if ((hint_flag || inherit_flag) && extsize == 0)
757 		return __this_address;
758 
759 	/* free inodes get flags set to zero but extsize remains */
760 	if (mode && !(hint_flag || inherit_flag) && extsize != 0)
761 		return __this_address;
762 
763 	if (extsize_bytes % blocksize_bytes)
764 		return __this_address;
765 
766 	if (extsize > MAXEXTLEN)
767 		return __this_address;
768 
769 	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
770 		return __this_address;
771 
772 	return NULL;
773 }
774 
775 /*
776  * Validate di_cowextsize hint.
777  *
778  * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
779  * These functions must be kept in sync with each other.
780  */
781 xfs_failaddr_t
782 xfs_inode_validate_cowextsize(
783 	struct xfs_mount		*mp,
784 	uint32_t			cowextsize,
785 	uint16_t			mode,
786 	uint16_t			flags,
787 	uint64_t			flags2)
788 {
789 	bool				rt_flag;
790 	bool				hint_flag;
791 	uint32_t			cowextsize_bytes;
792 
793 	rt_flag = (flags & XFS_DIFLAG_REALTIME);
794 	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
795 	cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
796 
797 	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
798 		return __this_address;
799 
800 	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
801 		return __this_address;
802 
803 	if (hint_flag && cowextsize == 0)
804 		return __this_address;
805 
806 	/* free inodes get flags set to zero but cowextsize remains */
807 	if (mode && !hint_flag && cowextsize != 0)
808 		return __this_address;
809 
810 	if (hint_flag && rt_flag)
811 		return __this_address;
812 
813 	if (cowextsize_bytes % mp->m_sb.sb_blocksize)
814 		return __this_address;
815 
816 	if (cowextsize > MAXEXTLEN)
817 		return __this_address;
818 
819 	if (cowextsize > mp->m_sb.sb_agblocks / 2)
820 		return __this_address;
821 
822 	return NULL;
823 }
824