xref: /openbmc/linux/fs/xfs/libxfs/xfs_inode_buf.c (revision 09138ba68c1487a42c400485e999386a74911dbc)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4   * All Rights Reserved.
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_log_format.h"
11  #include "xfs_trans_resv.h"
12  #include "xfs_mount.h"
13  #include "xfs_ag.h"
14  #include "xfs_inode.h"
15  #include "xfs_errortag.h"
16  #include "xfs_error.h"
17  #include "xfs_icache.h"
18  #include "xfs_trans.h"
19  #include "xfs_ialloc.h"
20  #include "xfs_dir2.h"
21  
22  #include <linux/iversion.h>
23  
24  /*
25   * If we are doing readahead on an inode buffer, we might be in log recovery
26   * reading an inode allocation buffer that hasn't yet been replayed, and hence
27   * has not had the inode cores stamped into it. Hence for readahead, the buffer
28   * may be potentially invalid.
29   *
30   * If the readahead buffer is invalid, we need to mark it with an error and
31   * clear the DONE status of the buffer so that a followup read will re-read it
32   * from disk. We don't report the error otherwise to avoid warnings during log
33   * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
34   * because all we want to do is say readahead failed; there is no-one to report
35   * the error to, so this will distinguish it from a non-ra verifier failure.
36   * Changes to this readahead error behaviour also need to be reflected in
37   * xfs_dquot_buf_readahead_verify().
38   */
39  static void
xfs_inode_buf_verify(struct xfs_buf * bp,bool readahead)40  xfs_inode_buf_verify(
41  	struct xfs_buf	*bp,
42  	bool		readahead)
43  {
44  	struct xfs_mount *mp = bp->b_mount;
45  	int		i;
46  	int		ni;
47  
48  	/*
49  	 * Validate the magic number and version of every inode in the buffer
50  	 */
51  	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
52  	for (i = 0; i < ni; i++) {
53  		struct xfs_dinode	*dip;
54  		xfs_agino_t		unlinked_ino;
55  		int			di_ok;
56  
57  		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
58  		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
59  		di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
60  			xfs_dinode_good_version(mp, dip->di_version) &&
61  			xfs_verify_agino_or_null(bp->b_pag, unlinked_ino);
62  		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
63  						XFS_ERRTAG_ITOBP_INOTOBP))) {
64  			if (readahead) {
65  				bp->b_flags &= ~XBF_DONE;
66  				xfs_buf_ioerror(bp, -EIO);
67  				return;
68  			}
69  
70  #ifdef DEBUG
71  			xfs_alert(mp,
72  				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
73  				(unsigned long long)xfs_buf_daddr(bp), i,
74  				be16_to_cpu(dip->di_magic));
75  #endif
76  			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
77  					__func__, dip, sizeof(*dip),
78  					NULL);
79  			return;
80  		}
81  	}
82  }
83  
84  
85  static void
xfs_inode_buf_read_verify(struct xfs_buf * bp)86  xfs_inode_buf_read_verify(
87  	struct xfs_buf	*bp)
88  {
89  	xfs_inode_buf_verify(bp, false);
90  }
91  
92  static void
xfs_inode_buf_readahead_verify(struct xfs_buf * bp)93  xfs_inode_buf_readahead_verify(
94  	struct xfs_buf	*bp)
95  {
96  	xfs_inode_buf_verify(bp, true);
97  }
98  
99  static void
xfs_inode_buf_write_verify(struct xfs_buf * bp)100  xfs_inode_buf_write_verify(
101  	struct xfs_buf	*bp)
102  {
103  	xfs_inode_buf_verify(bp, false);
104  }
105  
106  const struct xfs_buf_ops xfs_inode_buf_ops = {
107  	.name = "xfs_inode",
108  	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
109  		     cpu_to_be16(XFS_DINODE_MAGIC) },
110  	.verify_read = xfs_inode_buf_read_verify,
111  	.verify_write = xfs_inode_buf_write_verify,
112  };
113  
114  const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
115  	.name = "xfs_inode_ra",
116  	.magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
117  		     cpu_to_be16(XFS_DINODE_MAGIC) },
118  	.verify_read = xfs_inode_buf_readahead_verify,
119  	.verify_write = xfs_inode_buf_write_verify,
120  };
121  
122  
123  /*
124   * This routine is called to map an inode to the buffer containing the on-disk
125   * version of the inode.  It returns a pointer to the buffer containing the
126   * on-disk inode in the bpp parameter.
127   */
128  int
xfs_imap_to_bp(struct xfs_mount * mp,struct xfs_trans * tp,struct xfs_imap * imap,struct xfs_buf ** bpp)129  xfs_imap_to_bp(
130  	struct xfs_mount	*mp,
131  	struct xfs_trans	*tp,
132  	struct xfs_imap		*imap,
133  	struct xfs_buf		**bpp)
134  {
135  	return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
136  				   imap->im_len, XBF_UNMAPPED, bpp,
137  				   &xfs_inode_buf_ops);
138  }
139  
xfs_inode_decode_bigtime(uint64_t ts)140  static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
141  {
142  	struct timespec64	tv;
143  	uint32_t		n;
144  
145  	tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
146  	tv.tv_nsec = n;
147  
148  	return tv;
149  }
150  
151  /* Convert an ondisk timestamp to an incore timestamp. */
152  struct timespec64
xfs_inode_from_disk_ts(struct xfs_dinode * dip,const xfs_timestamp_t ts)153  xfs_inode_from_disk_ts(
154  	struct xfs_dinode		*dip,
155  	const xfs_timestamp_t		ts)
156  {
157  	struct timespec64		tv;
158  	struct xfs_legacy_timestamp	*lts;
159  
160  	if (xfs_dinode_has_bigtime(dip))
161  		return xfs_inode_decode_bigtime(be64_to_cpu(ts));
162  
163  	lts = (struct xfs_legacy_timestamp *)&ts;
164  	tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
165  	tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
166  
167  	return tv;
168  }
169  
170  int
xfs_inode_from_disk(struct xfs_inode * ip,struct xfs_dinode * from)171  xfs_inode_from_disk(
172  	struct xfs_inode	*ip,
173  	struct xfs_dinode	*from)
174  {
175  	struct inode		*inode = VFS_I(ip);
176  	int			error;
177  	xfs_failaddr_t		fa;
178  
179  	ASSERT(ip->i_cowfp == NULL);
180  
181  	fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
182  	if (fa) {
183  		xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
184  				sizeof(*from), fa);
185  		return -EFSCORRUPTED;
186  	}
187  
188  	/*
189  	 * First get the permanent information that is needed to allocate an
190  	 * inode. If the inode is unused, mode is zero and we shouldn't mess
191  	 * with the uninitialized part of it.
192  	 */
193  	if (!xfs_has_v3inodes(ip->i_mount))
194  		ip->i_flushiter = be16_to_cpu(from->di_flushiter);
195  	inode->i_generation = be32_to_cpu(from->di_gen);
196  	inode->i_mode = be16_to_cpu(from->di_mode);
197  	if (!inode->i_mode)
198  		return 0;
199  
200  	/*
201  	 * Convert v1 inodes immediately to v2 inode format as this is the
202  	 * minimum inode version format we support in the rest of the code.
203  	 * They will also be unconditionally written back to disk as v2 inodes.
204  	 */
205  	if (unlikely(from->di_version == 1)) {
206  		set_nlink(inode, be16_to_cpu(from->di_onlink));
207  		ip->i_projid = 0;
208  	} else {
209  		set_nlink(inode, be32_to_cpu(from->di_nlink));
210  		ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
211  					be16_to_cpu(from->di_projid_lo);
212  	}
213  
214  	i_uid_write(inode, be32_to_cpu(from->di_uid));
215  	i_gid_write(inode, be32_to_cpu(from->di_gid));
216  
217  	/*
218  	 * Time is signed, so need to convert to signed 32 bit before
219  	 * storing in inode timestamp which may be 64 bit. Otherwise
220  	 * a time before epoch is converted to a time long after epoch
221  	 * on 64 bit systems.
222  	 */
223  	inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
224  	inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
225  	inode_set_ctime_to_ts(inode,
226  			      xfs_inode_from_disk_ts(from, from->di_ctime));
227  
228  	ip->i_disk_size = be64_to_cpu(from->di_size);
229  	ip->i_nblocks = be64_to_cpu(from->di_nblocks);
230  	ip->i_extsize = be32_to_cpu(from->di_extsize);
231  	ip->i_forkoff = from->di_forkoff;
232  	ip->i_diflags = be16_to_cpu(from->di_flags);
233  	ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked);
234  
235  	if (from->di_dmevmask || from->di_dmstate)
236  		xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
237  
238  	if (xfs_has_v3inodes(ip->i_mount)) {
239  		inode_set_iversion_queried(inode,
240  					   be64_to_cpu(from->di_changecount));
241  		ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
242  		ip->i_diflags2 = be64_to_cpu(from->di_flags2);
243  		ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
244  	}
245  
246  	error = xfs_iformat_data_fork(ip, from);
247  	if (error)
248  		return error;
249  	if (from->di_forkoff) {
250  		error = xfs_iformat_attr_fork(ip, from);
251  		if (error)
252  			goto out_destroy_data_fork;
253  	}
254  	if (xfs_is_reflink_inode(ip))
255  		xfs_ifork_init_cow(ip);
256  	return 0;
257  
258  out_destroy_data_fork:
259  	xfs_idestroy_fork(&ip->i_df);
260  	return error;
261  }
262  
263  /* Convert an incore timestamp to an ondisk timestamp. */
264  static inline xfs_timestamp_t
xfs_inode_to_disk_ts(struct xfs_inode * ip,const struct timespec64 tv)265  xfs_inode_to_disk_ts(
266  	struct xfs_inode		*ip,
267  	const struct timespec64		tv)
268  {
269  	struct xfs_legacy_timestamp	*lts;
270  	xfs_timestamp_t			ts;
271  
272  	if (xfs_inode_has_bigtime(ip))
273  		return cpu_to_be64(xfs_inode_encode_bigtime(tv));
274  
275  	lts = (struct xfs_legacy_timestamp *)&ts;
276  	lts->t_sec = cpu_to_be32(tv.tv_sec);
277  	lts->t_nsec = cpu_to_be32(tv.tv_nsec);
278  
279  	return ts;
280  }
281  
282  static inline void
xfs_inode_to_disk_iext_counters(struct xfs_inode * ip,struct xfs_dinode * to)283  xfs_inode_to_disk_iext_counters(
284  	struct xfs_inode	*ip,
285  	struct xfs_dinode	*to)
286  {
287  	if (xfs_inode_has_large_extent_counts(ip)) {
288  		to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
289  		to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af));
290  		/*
291  		 * We might be upgrading the inode to use larger extent counters
292  		 * than was previously used. Hence zero the unused field.
293  		 */
294  		to->di_nrext64_pad = cpu_to_be16(0);
295  	} else {
296  		to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
297  		to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af));
298  	}
299  }
300  
301  void
xfs_inode_to_disk(struct xfs_inode * ip,struct xfs_dinode * to,xfs_lsn_t lsn)302  xfs_inode_to_disk(
303  	struct xfs_inode	*ip,
304  	struct xfs_dinode	*to,
305  	xfs_lsn_t		lsn)
306  {
307  	struct inode		*inode = VFS_I(ip);
308  
309  	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
310  	to->di_onlink = 0;
311  
312  	to->di_format = xfs_ifork_format(&ip->i_df);
313  	to->di_uid = cpu_to_be32(i_uid_read(inode));
314  	to->di_gid = cpu_to_be32(i_gid_read(inode));
315  	to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
316  	to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
317  
318  	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
319  	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
320  	to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode));
321  	to->di_nlink = cpu_to_be32(inode->i_nlink);
322  	to->di_gen = cpu_to_be32(inode->i_generation);
323  	to->di_mode = cpu_to_be16(inode->i_mode);
324  
325  	to->di_size = cpu_to_be64(ip->i_disk_size);
326  	to->di_nblocks = cpu_to_be64(ip->i_nblocks);
327  	to->di_extsize = cpu_to_be32(ip->i_extsize);
328  	to->di_forkoff = ip->i_forkoff;
329  	to->di_aformat = xfs_ifork_format(&ip->i_af);
330  	to->di_flags = cpu_to_be16(ip->i_diflags);
331  
332  	if (xfs_has_v3inodes(ip->i_mount)) {
333  		to->di_version = 3;
334  		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
335  		to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
336  		to->di_flags2 = cpu_to_be64(ip->i_diflags2);
337  		to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
338  		to->di_ino = cpu_to_be64(ip->i_ino);
339  		to->di_lsn = cpu_to_be64(lsn);
340  		memset(to->di_pad2, 0, sizeof(to->di_pad2));
341  		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
342  		to->di_v3_pad = 0;
343  	} else {
344  		to->di_version = 2;
345  		to->di_flushiter = cpu_to_be16(ip->i_flushiter);
346  		memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
347  	}
348  
349  	xfs_inode_to_disk_iext_counters(ip, to);
350  }
351  
352  static xfs_failaddr_t
xfs_dinode_verify_fork(struct xfs_dinode * dip,struct xfs_mount * mp,int whichfork)353  xfs_dinode_verify_fork(
354  	struct xfs_dinode	*dip,
355  	struct xfs_mount	*mp,
356  	int			whichfork)
357  {
358  	xfs_extnum_t		di_nextents;
359  	xfs_extnum_t		max_extents;
360  	mode_t			mode = be16_to_cpu(dip->di_mode);
361  	uint32_t		fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
362  	uint32_t		fork_format = XFS_DFORK_FORMAT(dip, whichfork);
363  
364  	di_nextents = xfs_dfork_nextents(dip, whichfork);
365  
366  	/*
367  	 * For fork types that can contain local data, check that the fork
368  	 * format matches the size of local data contained within the fork.
369  	 */
370  	if (whichfork == XFS_DATA_FORK) {
371  		/*
372  		 * A directory small enough to fit in the inode must be stored
373  		 * in local format.  The directory sf <-> extents conversion
374  		 * code updates the directory size accordingly.  Directories
375  		 * being truncated have zero size and are not subject to this
376  		 * check.
377  		 */
378  		if (S_ISDIR(mode)) {
379  			if (dip->di_size &&
380  			    be64_to_cpu(dip->di_size) <= fork_size &&
381  			    fork_format != XFS_DINODE_FMT_LOCAL)
382  				return __this_address;
383  		}
384  
385  		/*
386  		 * A symlink with a target small enough to fit in the inode can
387  		 * be stored in extents format if xattrs were added (thus
388  		 * converting the data fork from shortform to remote format)
389  		 * and then removed.
390  		 */
391  		if (S_ISLNK(mode)) {
392  			if (be64_to_cpu(dip->di_size) <= fork_size &&
393  			    fork_format != XFS_DINODE_FMT_EXTENTS &&
394  			    fork_format != XFS_DINODE_FMT_LOCAL)
395  				return __this_address;
396  		}
397  
398  		/*
399  		 * For all types, check that when the size says the fork should
400  		 * be in extent or btree format, the inode isn't claiming to be
401  		 * in local format.
402  		 */
403  		if (be64_to_cpu(dip->di_size) > fork_size &&
404  		    fork_format == XFS_DINODE_FMT_LOCAL)
405  			return __this_address;
406  	}
407  
408  	switch (fork_format) {
409  	case XFS_DINODE_FMT_LOCAL:
410  		/*
411  		 * No local regular files yet.
412  		 */
413  		if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
414  			return __this_address;
415  		if (di_nextents)
416  			return __this_address;
417  		break;
418  	case XFS_DINODE_FMT_EXTENTS:
419  		if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
420  			return __this_address;
421  		break;
422  	case XFS_DINODE_FMT_BTREE:
423  		max_extents = xfs_iext_max_nextents(
424  					xfs_dinode_has_large_extent_counts(dip),
425  					whichfork);
426  		if (di_nextents > max_extents)
427  			return __this_address;
428  		break;
429  	default:
430  		return __this_address;
431  	}
432  	return NULL;
433  }
434  
435  static xfs_failaddr_t
xfs_dinode_verify_forkoff(struct xfs_dinode * dip,struct xfs_mount * mp)436  xfs_dinode_verify_forkoff(
437  	struct xfs_dinode	*dip,
438  	struct xfs_mount	*mp)
439  {
440  	if (!dip->di_forkoff)
441  		return NULL;
442  
443  	switch (dip->di_format)  {
444  	case XFS_DINODE_FMT_DEV:
445  		if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
446  			return __this_address;
447  		break;
448  	case XFS_DINODE_FMT_LOCAL:	/* fall through ... */
449  	case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
450  	case XFS_DINODE_FMT_BTREE:
451  		if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
452  			return __this_address;
453  		break;
454  	default:
455  		return __this_address;
456  	}
457  	return NULL;
458  }
459  
460  static xfs_failaddr_t
xfs_dinode_verify_nrext64(struct xfs_mount * mp,struct xfs_dinode * dip)461  xfs_dinode_verify_nrext64(
462  	struct xfs_mount	*mp,
463  	struct xfs_dinode	*dip)
464  {
465  	if (xfs_dinode_has_large_extent_counts(dip)) {
466  		if (!xfs_has_large_extent_counts(mp))
467  			return __this_address;
468  		if (dip->di_nrext64_pad != 0)
469  			return __this_address;
470  	} else if (dip->di_version >= 3) {
471  		if (dip->di_v3_pad != 0)
472  			return __this_address;
473  	}
474  
475  	return NULL;
476  }
477  
478  xfs_failaddr_t
xfs_dinode_verify(struct xfs_mount * mp,xfs_ino_t ino,struct xfs_dinode * dip)479  xfs_dinode_verify(
480  	struct xfs_mount	*mp,
481  	xfs_ino_t		ino,
482  	struct xfs_dinode	*dip)
483  {
484  	xfs_failaddr_t		fa;
485  	uint16_t		mode;
486  	uint16_t		flags;
487  	uint64_t		flags2;
488  	uint64_t		di_size;
489  	xfs_extnum_t		nextents;
490  	xfs_extnum_t		naextents;
491  	xfs_filblks_t		nblocks;
492  
493  	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
494  		return __this_address;
495  
496  	/* Verify v3 integrity information first */
497  	if (dip->di_version >= 3) {
498  		if (!xfs_has_v3inodes(mp))
499  			return __this_address;
500  		if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
501  				      XFS_DINODE_CRC_OFF))
502  			return __this_address;
503  		if (be64_to_cpu(dip->di_ino) != ino)
504  			return __this_address;
505  		if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
506  			return __this_address;
507  	}
508  
509  	/* don't allow invalid i_size */
510  	di_size = be64_to_cpu(dip->di_size);
511  	if (di_size & (1ULL << 63))
512  		return __this_address;
513  
514  	mode = be16_to_cpu(dip->di_mode);
515  	if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
516  		return __this_address;
517  
518  	/*
519  	 * No zero-length symlinks/dirs unless they're unlinked and hence being
520  	 * inactivated.
521  	 */
522  	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
523  		if (dip->di_version > 1) {
524  			if (dip->di_nlink)
525  				return __this_address;
526  		} else {
527  			if (dip->di_onlink)
528  				return __this_address;
529  		}
530  	}
531  
532  	fa = xfs_dinode_verify_nrext64(mp, dip);
533  	if (fa)
534  		return fa;
535  
536  	nextents = xfs_dfork_data_extents(dip);
537  	naextents = xfs_dfork_attr_extents(dip);
538  	nblocks = be64_to_cpu(dip->di_nblocks);
539  
540  	/* Fork checks carried over from xfs_iformat_fork */
541  	if (mode && nextents + naextents > nblocks)
542  		return __this_address;
543  
544  	if (nextents + naextents == 0 && nblocks != 0)
545  		return __this_address;
546  
547  	if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
548  		return __this_address;
549  
550  	if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
551  		return __this_address;
552  
553  	flags = be16_to_cpu(dip->di_flags);
554  
555  	if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
556  		return __this_address;
557  
558  	/* check for illegal values of forkoff */
559  	fa = xfs_dinode_verify_forkoff(dip, mp);
560  	if (fa)
561  		return fa;
562  
563  	/* Do we have appropriate data fork formats for the mode? */
564  	switch (mode & S_IFMT) {
565  	case S_IFIFO:
566  	case S_IFCHR:
567  	case S_IFBLK:
568  	case S_IFSOCK:
569  		if (dip->di_format != XFS_DINODE_FMT_DEV)
570  			return __this_address;
571  		break;
572  	case S_IFREG:
573  	case S_IFLNK:
574  	case S_IFDIR:
575  		fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
576  		if (fa)
577  			return fa;
578  		break;
579  	case 0:
580  		/* Uninitialized inode ok. */
581  		break;
582  	default:
583  		return __this_address;
584  	}
585  
586  	if (dip->di_forkoff) {
587  		fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
588  		if (fa)
589  			return fa;
590  	} else {
591  		/*
592  		 * If there is no fork offset, this may be a freshly-made inode
593  		 * in a new disk cluster, in which case di_aformat is zeroed.
594  		 * Otherwise, such an inode must be in EXTENTS format; this goes
595  		 * for freed inodes as well.
596  		 */
597  		switch (dip->di_aformat) {
598  		case 0:
599  		case XFS_DINODE_FMT_EXTENTS:
600  			break;
601  		default:
602  			return __this_address;
603  		}
604  		if (naextents)
605  			return __this_address;
606  	}
607  
608  	/* extent size hint validation */
609  	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
610  			mode, flags);
611  	if (fa)
612  		return fa;
613  
614  	/* only version 3 or greater inodes are extensively verified here */
615  	if (dip->di_version < 3)
616  		return NULL;
617  
618  	flags2 = be64_to_cpu(dip->di_flags2);
619  
620  	/* don't allow reflink/cowextsize if we don't have reflink */
621  	if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
622  	     !xfs_has_reflink(mp))
623  		return __this_address;
624  
625  	/* only regular files get reflink */
626  	if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
627  		return __this_address;
628  
629  	/* don't let reflink and realtime mix */
630  	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
631  		return __this_address;
632  
633  	/* COW extent size hint validation */
634  	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
635  			mode, flags, flags2);
636  	if (fa)
637  		return fa;
638  
639  	/* bigtime iflag can only happen on bigtime filesystems */
640  	if (xfs_dinode_has_bigtime(dip) &&
641  	    !xfs_has_bigtime(mp))
642  		return __this_address;
643  
644  	return NULL;
645  }
646  
647  void
xfs_dinode_calc_crc(struct xfs_mount * mp,struct xfs_dinode * dip)648  xfs_dinode_calc_crc(
649  	struct xfs_mount	*mp,
650  	struct xfs_dinode	*dip)
651  {
652  	uint32_t		crc;
653  
654  	if (dip->di_version < 3)
655  		return;
656  
657  	ASSERT(xfs_has_crc(mp));
658  	crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
659  			      XFS_DINODE_CRC_OFF);
660  	dip->di_crc = xfs_end_cksum(crc);
661  }
662  
663  /*
664   * Validate di_extsize hint.
665   *
666   * 1. Extent size hint is only valid for directories and regular files.
667   * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
668   * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
669   * 4. Hint cannot be larger than MAXTEXTLEN.
670   * 5. Can be changed on directories at any time.
671   * 6. Hint value of 0 turns off hints, clears inode flags.
672   * 7. Extent size must be a multiple of the appropriate block size.
673   *    For realtime files, this is the rt extent size.
674   * 8. For non-realtime files, the extent size hint must be limited
675   *    to half the AG size to avoid alignment extending the extent beyond the
676   *    limits of the AG.
677   */
678  xfs_failaddr_t
xfs_inode_validate_extsize(struct xfs_mount * mp,uint32_t extsize,uint16_t mode,uint16_t flags)679  xfs_inode_validate_extsize(
680  	struct xfs_mount		*mp,
681  	uint32_t			extsize,
682  	uint16_t			mode,
683  	uint16_t			flags)
684  {
685  	bool				rt_flag;
686  	bool				hint_flag;
687  	bool				inherit_flag;
688  	uint32_t			extsize_bytes;
689  	uint32_t			blocksize_bytes;
690  
691  	rt_flag = (flags & XFS_DIFLAG_REALTIME);
692  	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
693  	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
694  	extsize_bytes = XFS_FSB_TO_B(mp, extsize);
695  
696  	/*
697  	 * This comment describes a historic gap in this verifier function.
698  	 *
699  	 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
700  	 * function has never checked that the extent size hint is an integer
701  	 * multiple of the realtime extent size.  Since we allow users to set
702  	 * this combination  on non-rt filesystems /and/ to change the rt
703  	 * extent size when adding a rt device to a filesystem, the net effect
704  	 * is that users can configure a filesystem anticipating one rt
705  	 * geometry and change their minds later.  Directories do not use the
706  	 * extent size hint, so this is harmless for them.
707  	 *
708  	 * If a directory with a misaligned extent size hint is allowed to
709  	 * propagate that hint into a new regular realtime file, the result
710  	 * is that the inode cluster buffer verifier will trigger a corruption
711  	 * shutdown the next time it is run, because the verifier has always
712  	 * enforced the alignment rule for regular files.
713  	 *
714  	 * Because we allow administrators to set a new rt extent size when
715  	 * adding a rt section, we cannot add a check to this verifier because
716  	 * that will result a new source of directory corruption errors when
717  	 * reading an existing filesystem.  Instead, we rely on callers to
718  	 * decide when alignment checks are appropriate, and fix things up as
719  	 * needed.
720  	 */
721  
722  	if (rt_flag)
723  		blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
724  	else
725  		blocksize_bytes = mp->m_sb.sb_blocksize;
726  
727  	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
728  		return __this_address;
729  
730  	if (hint_flag && !S_ISREG(mode))
731  		return __this_address;
732  
733  	if (inherit_flag && !S_ISDIR(mode))
734  		return __this_address;
735  
736  	if ((hint_flag || inherit_flag) && extsize == 0)
737  		return __this_address;
738  
739  	/* free inodes get flags set to zero but extsize remains */
740  	if (mode && !(hint_flag || inherit_flag) && extsize != 0)
741  		return __this_address;
742  
743  	if (extsize_bytes % blocksize_bytes)
744  		return __this_address;
745  
746  	if (extsize > XFS_MAX_BMBT_EXTLEN)
747  		return __this_address;
748  
749  	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
750  		return __this_address;
751  
752  	return NULL;
753  }
754  
755  /*
756   * Validate di_cowextsize hint.
757   *
758   * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
759   *    The inode does not have to have any shared blocks, but it must be a v3.
760   * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
761   *    for a directory, the hint is propagated to new files.
762   * 3. Can be changed on files & directories at any time.
763   * 4. Hint value of 0 turns off hints, clears inode flags.
764   * 5. Extent size must be a multiple of the appropriate block size.
765   * 6. The extent size hint must be limited to half the AG size to avoid
766   *    alignment extending the extent beyond the limits of the AG.
767   */
768  xfs_failaddr_t
xfs_inode_validate_cowextsize(struct xfs_mount * mp,uint32_t cowextsize,uint16_t mode,uint16_t flags,uint64_t flags2)769  xfs_inode_validate_cowextsize(
770  	struct xfs_mount		*mp,
771  	uint32_t			cowextsize,
772  	uint16_t			mode,
773  	uint16_t			flags,
774  	uint64_t			flags2)
775  {
776  	bool				rt_flag;
777  	bool				hint_flag;
778  	uint32_t			cowextsize_bytes;
779  
780  	rt_flag = (flags & XFS_DIFLAG_REALTIME);
781  	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
782  	cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
783  
784  	if (hint_flag && !xfs_has_reflink(mp))
785  		return __this_address;
786  
787  	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
788  		return __this_address;
789  
790  	if (hint_flag && cowextsize == 0)
791  		return __this_address;
792  
793  	/* free inodes get flags set to zero but cowextsize remains */
794  	if (mode && !hint_flag && cowextsize != 0)
795  		return __this_address;
796  
797  	if (hint_flag && rt_flag)
798  		return __this_address;
799  
800  	if (cowextsize_bytes % mp->m_sb.sb_blocksize)
801  		return __this_address;
802  
803  	if (cowextsize > XFS_MAX_BMBT_EXTLEN)
804  		return __this_address;
805  
806  	if (cowextsize > mp->m_sb.sb_agblocks / 2)
807  		return __this_address;
808  
809  	return NULL;
810  }
811