1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_cksum.h" 18 #include "xfs_icache.h" 19 #include "xfs_trans.h" 20 #include "xfs_ialloc.h" 21 #include "xfs_dir2.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * Check that none of the inode's in the buffer have a next 27 * unlinked field of 0. 28 */ 29 #if defined(DEBUG) 30 void 31 xfs_inobp_check( 32 xfs_mount_t *mp, 33 xfs_buf_t *bp) 34 { 35 int i; 36 int j; 37 xfs_dinode_t *dip; 38 39 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 40 41 for (i = 0; i < j; i++) { 42 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); 43 if (!dip->di_next_unlinked) { 44 xfs_alert(mp, 45 "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", 46 i, (long long)bp->b_bn); 47 } 48 } 49 } 50 #endif 51 52 bool 53 xfs_dinode_good_version( 54 struct xfs_mount *mp, 55 __u8 version) 56 { 57 if (xfs_sb_version_hascrc(&mp->m_sb)) 58 return version == 3; 59 60 return version == 1 || version == 2; 61 } 62 63 /* 64 * If we are doing readahead on an inode buffer, we might be in log recovery 65 * reading an inode allocation buffer that hasn't yet been replayed, and hence 66 * has not had the inode cores stamped into it. Hence for readahead, the buffer 67 * may be potentially invalid. 68 * 69 * If the readahead buffer is invalid, we need to mark it with an error and 70 * clear the DONE status of the buffer so that a followup read will re-read it 71 * from disk. We don't report the error otherwise to avoid warnings during log 72 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here 73 * because all we want to do is say readahead failed; there is no-one to report 74 * the error to, so this will distinguish it from a non-ra verifier failure. 75 * Changes to this readahead error behavour also need to be reflected in 76 * xfs_dquot_buf_readahead_verify(). 77 */ 78 static void 79 xfs_inode_buf_verify( 80 struct xfs_buf *bp, 81 bool readahead) 82 { 83 struct xfs_mount *mp = bp->b_target->bt_mount; 84 xfs_agnumber_t agno; 85 int i; 86 int ni; 87 88 /* 89 * Validate the magic number and version of every inode in the buffer 90 */ 91 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 92 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 93 for (i = 0; i < ni; i++) { 94 int di_ok; 95 xfs_dinode_t *dip; 96 xfs_agino_t unlinked_ino; 97 98 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 99 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 100 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 101 xfs_dinode_good_version(mp, dip->di_version) && 102 xfs_verify_agino_or_null(mp, agno, unlinked_ino); 103 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 104 XFS_ERRTAG_ITOBP_INOTOBP))) { 105 if (readahead) { 106 bp->b_flags &= ~XBF_DONE; 107 xfs_buf_ioerror(bp, -EIO); 108 return; 109 } 110 111 #ifdef DEBUG 112 xfs_alert(mp, 113 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 114 (unsigned long long)bp->b_bn, i, 115 be16_to_cpu(dip->di_magic)); 116 #endif 117 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 118 __func__, dip, sizeof(*dip), 119 NULL); 120 return; 121 } 122 } 123 } 124 125 126 static void 127 xfs_inode_buf_read_verify( 128 struct xfs_buf *bp) 129 { 130 xfs_inode_buf_verify(bp, false); 131 } 132 133 static void 134 xfs_inode_buf_readahead_verify( 135 struct xfs_buf *bp) 136 { 137 xfs_inode_buf_verify(bp, true); 138 } 139 140 static void 141 xfs_inode_buf_write_verify( 142 struct xfs_buf *bp) 143 { 144 xfs_inode_buf_verify(bp, false); 145 } 146 147 const struct xfs_buf_ops xfs_inode_buf_ops = { 148 .name = "xfs_inode", 149 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 150 cpu_to_be16(XFS_DINODE_MAGIC) }, 151 .verify_read = xfs_inode_buf_read_verify, 152 .verify_write = xfs_inode_buf_write_verify, 153 }; 154 155 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 156 .name = "xfs_inode_ra", 157 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 158 cpu_to_be16(XFS_DINODE_MAGIC) }, 159 .verify_read = xfs_inode_buf_readahead_verify, 160 .verify_write = xfs_inode_buf_write_verify, 161 }; 162 163 164 /* 165 * This routine is called to map an inode to the buffer containing the on-disk 166 * version of the inode. It returns a pointer to the buffer containing the 167 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a 168 * pointer to the on-disk inode within that buffer. 169 * 170 * If a non-zero error is returned, then the contents of bpp and dipp are 171 * undefined. 172 */ 173 int 174 xfs_imap_to_bp( 175 struct xfs_mount *mp, 176 struct xfs_trans *tp, 177 struct xfs_imap *imap, 178 struct xfs_dinode **dipp, 179 struct xfs_buf **bpp, 180 uint buf_flags, 181 uint iget_flags) 182 { 183 struct xfs_buf *bp; 184 int error; 185 186 buf_flags |= XBF_UNMAPPED; 187 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 188 (int)imap->im_len, buf_flags, &bp, 189 &xfs_inode_buf_ops); 190 if (error) { 191 if (error == -EAGAIN) { 192 ASSERT(buf_flags & XBF_TRYLOCK); 193 return error; 194 } 195 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", 196 __func__, error); 197 return error; 198 } 199 200 *bpp = bp; 201 *dipp = xfs_buf_offset(bp, imap->im_boffset); 202 return 0; 203 } 204 205 void 206 xfs_inode_from_disk( 207 struct xfs_inode *ip, 208 struct xfs_dinode *from) 209 { 210 struct xfs_icdinode *to = &ip->i_d; 211 struct inode *inode = VFS_I(ip); 212 213 214 /* 215 * Convert v1 inodes immediately to v2 inode format as this is the 216 * minimum inode version format we support in the rest of the code. 217 */ 218 to->di_version = from->di_version; 219 if (to->di_version == 1) { 220 set_nlink(inode, be16_to_cpu(from->di_onlink)); 221 to->di_projid_lo = 0; 222 to->di_projid_hi = 0; 223 to->di_version = 2; 224 } else { 225 set_nlink(inode, be32_to_cpu(from->di_nlink)); 226 to->di_projid_lo = be16_to_cpu(from->di_projid_lo); 227 to->di_projid_hi = be16_to_cpu(from->di_projid_hi); 228 } 229 230 to->di_format = from->di_format; 231 to->di_uid = be32_to_cpu(from->di_uid); 232 to->di_gid = be32_to_cpu(from->di_gid); 233 to->di_flushiter = be16_to_cpu(from->di_flushiter); 234 235 /* 236 * Time is signed, so need to convert to signed 32 bit before 237 * storing in inode timestamp which may be 64 bit. Otherwise 238 * a time before epoch is converted to a time long after epoch 239 * on 64 bit systems. 240 */ 241 inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec); 242 inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec); 243 inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec); 244 inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec); 245 inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec); 246 inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec); 247 inode->i_generation = be32_to_cpu(from->di_gen); 248 inode->i_mode = be16_to_cpu(from->di_mode); 249 250 to->di_size = be64_to_cpu(from->di_size); 251 to->di_nblocks = be64_to_cpu(from->di_nblocks); 252 to->di_extsize = be32_to_cpu(from->di_extsize); 253 to->di_nextents = be32_to_cpu(from->di_nextents); 254 to->di_anextents = be16_to_cpu(from->di_anextents); 255 to->di_forkoff = from->di_forkoff; 256 to->di_aformat = from->di_aformat; 257 to->di_dmevmask = be32_to_cpu(from->di_dmevmask); 258 to->di_dmstate = be16_to_cpu(from->di_dmstate); 259 to->di_flags = be16_to_cpu(from->di_flags); 260 261 if (to->di_version == 3) { 262 inode_set_iversion_queried(inode, 263 be64_to_cpu(from->di_changecount)); 264 to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); 265 to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); 266 to->di_flags2 = be64_to_cpu(from->di_flags2); 267 to->di_cowextsize = be32_to_cpu(from->di_cowextsize); 268 } 269 } 270 271 void 272 xfs_inode_to_disk( 273 struct xfs_inode *ip, 274 struct xfs_dinode *to, 275 xfs_lsn_t lsn) 276 { 277 struct xfs_icdinode *from = &ip->i_d; 278 struct inode *inode = VFS_I(ip); 279 280 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 281 to->di_onlink = 0; 282 283 to->di_version = from->di_version; 284 to->di_format = from->di_format; 285 to->di_uid = cpu_to_be32(from->di_uid); 286 to->di_gid = cpu_to_be32(from->di_gid); 287 to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 288 to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 289 290 memset(to->di_pad, 0, sizeof(to->di_pad)); 291 to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec); 292 to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec); 293 to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec); 294 to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); 295 to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec); 296 to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); 297 to->di_nlink = cpu_to_be32(inode->i_nlink); 298 to->di_gen = cpu_to_be32(inode->i_generation); 299 to->di_mode = cpu_to_be16(inode->i_mode); 300 301 to->di_size = cpu_to_be64(from->di_size); 302 to->di_nblocks = cpu_to_be64(from->di_nblocks); 303 to->di_extsize = cpu_to_be32(from->di_extsize); 304 to->di_nextents = cpu_to_be32(from->di_nextents); 305 to->di_anextents = cpu_to_be16(from->di_anextents); 306 to->di_forkoff = from->di_forkoff; 307 to->di_aformat = from->di_aformat; 308 to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 309 to->di_dmstate = cpu_to_be16(from->di_dmstate); 310 to->di_flags = cpu_to_be16(from->di_flags); 311 312 if (from->di_version == 3) { 313 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 314 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); 315 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); 316 to->di_flags2 = cpu_to_be64(from->di_flags2); 317 to->di_cowextsize = cpu_to_be32(from->di_cowextsize); 318 to->di_ino = cpu_to_be64(ip->i_ino); 319 to->di_lsn = cpu_to_be64(lsn); 320 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 321 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 322 to->di_flushiter = 0; 323 } else { 324 to->di_flushiter = cpu_to_be16(from->di_flushiter); 325 } 326 } 327 328 void 329 xfs_log_dinode_to_disk( 330 struct xfs_log_dinode *from, 331 struct xfs_dinode *to) 332 { 333 to->di_magic = cpu_to_be16(from->di_magic); 334 to->di_mode = cpu_to_be16(from->di_mode); 335 to->di_version = from->di_version; 336 to->di_format = from->di_format; 337 to->di_onlink = 0; 338 to->di_uid = cpu_to_be32(from->di_uid); 339 to->di_gid = cpu_to_be32(from->di_gid); 340 to->di_nlink = cpu_to_be32(from->di_nlink); 341 to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 342 to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 343 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 344 345 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 346 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 347 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 348 to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec); 349 to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec); 350 to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec); 351 352 to->di_size = cpu_to_be64(from->di_size); 353 to->di_nblocks = cpu_to_be64(from->di_nblocks); 354 to->di_extsize = cpu_to_be32(from->di_extsize); 355 to->di_nextents = cpu_to_be32(from->di_nextents); 356 to->di_anextents = cpu_to_be16(from->di_anextents); 357 to->di_forkoff = from->di_forkoff; 358 to->di_aformat = from->di_aformat; 359 to->di_dmevmask = cpu_to_be32(from->di_dmevmask); 360 to->di_dmstate = cpu_to_be16(from->di_dmstate); 361 to->di_flags = cpu_to_be16(from->di_flags); 362 to->di_gen = cpu_to_be32(from->di_gen); 363 364 if (from->di_version == 3) { 365 to->di_changecount = cpu_to_be64(from->di_changecount); 366 to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); 367 to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); 368 to->di_flags2 = cpu_to_be64(from->di_flags2); 369 to->di_cowextsize = cpu_to_be32(from->di_cowextsize); 370 to->di_ino = cpu_to_be64(from->di_ino); 371 to->di_lsn = cpu_to_be64(from->di_lsn); 372 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 373 uuid_copy(&to->di_uuid, &from->di_uuid); 374 to->di_flushiter = 0; 375 } else { 376 to->di_flushiter = cpu_to_be16(from->di_flushiter); 377 } 378 } 379 380 static xfs_failaddr_t 381 xfs_dinode_verify_fork( 382 struct xfs_dinode *dip, 383 struct xfs_mount *mp, 384 int whichfork) 385 { 386 uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); 387 388 switch (XFS_DFORK_FORMAT(dip, whichfork)) { 389 case XFS_DINODE_FMT_LOCAL: 390 /* 391 * no local regular files yet 392 */ 393 if (whichfork == XFS_DATA_FORK) { 394 if (S_ISREG(be16_to_cpu(dip->di_mode))) 395 return __this_address; 396 if (be64_to_cpu(dip->di_size) > 397 XFS_DFORK_SIZE(dip, mp, whichfork)) 398 return __this_address; 399 } 400 if (di_nextents) 401 return __this_address; 402 break; 403 case XFS_DINODE_FMT_EXTENTS: 404 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 405 return __this_address; 406 break; 407 case XFS_DINODE_FMT_BTREE: 408 if (whichfork == XFS_ATTR_FORK) { 409 if (di_nextents > MAXAEXTNUM) 410 return __this_address; 411 } else if (di_nextents > MAXEXTNUM) { 412 return __this_address; 413 } 414 break; 415 default: 416 return __this_address; 417 } 418 return NULL; 419 } 420 421 static xfs_failaddr_t 422 xfs_dinode_verify_forkoff( 423 struct xfs_dinode *dip, 424 struct xfs_mount *mp) 425 { 426 if (!XFS_DFORK_Q(dip)) 427 return NULL; 428 429 switch (dip->di_format) { 430 case XFS_DINODE_FMT_DEV: 431 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 432 return __this_address; 433 break; 434 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 435 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 436 case XFS_DINODE_FMT_BTREE: 437 if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) 438 return __this_address; 439 break; 440 default: 441 return __this_address; 442 } 443 return NULL; 444 } 445 446 xfs_failaddr_t 447 xfs_dinode_verify( 448 struct xfs_mount *mp, 449 xfs_ino_t ino, 450 struct xfs_dinode *dip) 451 { 452 xfs_failaddr_t fa; 453 uint16_t mode; 454 uint16_t flags; 455 uint64_t flags2; 456 uint64_t di_size; 457 458 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 459 return __this_address; 460 461 /* Verify v3 integrity information first */ 462 if (dip->di_version >= 3) { 463 if (!xfs_sb_version_hascrc(&mp->m_sb)) 464 return __this_address; 465 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 466 XFS_DINODE_CRC_OFF)) 467 return __this_address; 468 if (be64_to_cpu(dip->di_ino) != ino) 469 return __this_address; 470 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 471 return __this_address; 472 } 473 474 /* don't allow invalid i_size */ 475 di_size = be64_to_cpu(dip->di_size); 476 if (di_size & (1ULL << 63)) 477 return __this_address; 478 479 mode = be16_to_cpu(dip->di_mode); 480 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 481 return __this_address; 482 483 /* No zero-length symlinks/dirs. */ 484 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 485 return __this_address; 486 487 /* Fork checks carried over from xfs_iformat_fork */ 488 if (mode && 489 be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > 490 be64_to_cpu(dip->di_nblocks)) 491 return __this_address; 492 493 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 494 return __this_address; 495 496 flags = be16_to_cpu(dip->di_flags); 497 498 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 499 return __this_address; 500 501 /* check for illegal values of forkoff */ 502 fa = xfs_dinode_verify_forkoff(dip, mp); 503 if (fa) 504 return fa; 505 506 /* Do we have appropriate data fork formats for the mode? */ 507 switch (mode & S_IFMT) { 508 case S_IFIFO: 509 case S_IFCHR: 510 case S_IFBLK: 511 case S_IFSOCK: 512 if (dip->di_format != XFS_DINODE_FMT_DEV) 513 return __this_address; 514 break; 515 case S_IFREG: 516 case S_IFLNK: 517 case S_IFDIR: 518 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 519 if (fa) 520 return fa; 521 break; 522 case 0: 523 /* Uninitialized inode ok. */ 524 break; 525 default: 526 return __this_address; 527 } 528 529 if (XFS_DFORK_Q(dip)) { 530 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 531 if (fa) 532 return fa; 533 } else { 534 /* 535 * If there is no fork offset, this may be a freshly-made inode 536 * in a new disk cluster, in which case di_aformat is zeroed. 537 * Otherwise, such an inode must be in EXTENTS format; this goes 538 * for freed inodes as well. 539 */ 540 switch (dip->di_aformat) { 541 case 0: 542 case XFS_DINODE_FMT_EXTENTS: 543 break; 544 default: 545 return __this_address; 546 } 547 if (dip->di_anextents) 548 return __this_address; 549 } 550 551 /* extent size hint validation */ 552 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 553 mode, flags); 554 if (fa) 555 return fa; 556 557 /* only version 3 or greater inodes are extensively verified here */ 558 if (dip->di_version < 3) 559 return NULL; 560 561 flags2 = be64_to_cpu(dip->di_flags2); 562 563 /* don't allow reflink/cowextsize if we don't have reflink */ 564 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 565 !xfs_sb_version_hasreflink(&mp->m_sb)) 566 return __this_address; 567 568 /* only regular files get reflink */ 569 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 570 return __this_address; 571 572 /* don't let reflink and realtime mix */ 573 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 574 return __this_address; 575 576 /* don't let reflink and dax mix */ 577 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) 578 return __this_address; 579 580 /* COW extent size hint validation */ 581 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 582 mode, flags, flags2); 583 if (fa) 584 return fa; 585 586 return NULL; 587 } 588 589 void 590 xfs_dinode_calc_crc( 591 struct xfs_mount *mp, 592 struct xfs_dinode *dip) 593 { 594 uint32_t crc; 595 596 if (dip->di_version < 3) 597 return; 598 599 ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); 600 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 601 XFS_DINODE_CRC_OFF); 602 dip->di_crc = xfs_end_cksum(crc); 603 } 604 605 /* 606 * Read the disk inode attributes into the in-core inode structure. 607 * 608 * For version 5 superblocks, if we are initialising a new inode and we are not 609 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new 610 * inode core with a random generation number. If we are keeping inodes around, 611 * we need to read the inode cluster to get the existing generation number off 612 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode 613 * format) then log recovery is dependent on the di_flushiter field being 614 * initialised from the current on-disk value and hence we must also read the 615 * inode off disk. 616 */ 617 int 618 xfs_iread( 619 xfs_mount_t *mp, 620 xfs_trans_t *tp, 621 xfs_inode_t *ip, 622 uint iget_flags) 623 { 624 xfs_buf_t *bp; 625 xfs_dinode_t *dip; 626 xfs_failaddr_t fa; 627 int error; 628 629 /* 630 * Fill in the location information in the in-core inode. 631 */ 632 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 633 if (error) 634 return error; 635 636 /* shortcut IO on inode allocation if possible */ 637 if ((iget_flags & XFS_IGET_CREATE) && 638 xfs_sb_version_hascrc(&mp->m_sb) && 639 !(mp->m_flags & XFS_MOUNT_IKEEP)) { 640 /* initialise the on-disk inode core */ 641 memset(&ip->i_d, 0, sizeof(ip->i_d)); 642 VFS_I(ip)->i_generation = prandom_u32(); 643 ip->i_d.di_version = 3; 644 return 0; 645 } 646 647 /* 648 * Get pointers to the on-disk inode and the buffer containing it. 649 */ 650 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); 651 if (error) 652 return error; 653 654 /* even unallocated inodes are verified */ 655 fa = xfs_dinode_verify(mp, ip->i_ino, dip); 656 if (fa) { 657 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, 658 sizeof(*dip), fa); 659 error = -EFSCORRUPTED; 660 goto out_brelse; 661 } 662 663 /* 664 * If the on-disk inode is already linked to a directory 665 * entry, copy all of the inode into the in-core inode. 666 * xfs_iformat_fork() handles copying in the inode format 667 * specific information. 668 * Otherwise, just get the truly permanent information. 669 */ 670 if (dip->di_mode) { 671 xfs_inode_from_disk(ip, dip); 672 error = xfs_iformat_fork(ip, dip); 673 if (error) { 674 #ifdef DEBUG 675 xfs_alert(mp, "%s: xfs_iformat() returned error %d", 676 __func__, error); 677 #endif /* DEBUG */ 678 goto out_brelse; 679 } 680 } else { 681 /* 682 * Partial initialisation of the in-core inode. Just the bits 683 * that xfs_ialloc won't overwrite or relies on being correct. 684 */ 685 ip->i_d.di_version = dip->di_version; 686 VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); 687 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); 688 689 /* 690 * Make sure to pull in the mode here as well in 691 * case the inode is released without being used. 692 * This ensures that xfs_inactive() will see that 693 * the inode is already free and not try to mess 694 * with the uninitialized part of it. 695 */ 696 VFS_I(ip)->i_mode = 0; 697 } 698 699 ASSERT(ip->i_d.di_version >= 2); 700 ip->i_delayed_blks = 0; 701 702 /* 703 * Mark the buffer containing the inode as something to keep 704 * around for a while. This helps to keep recently accessed 705 * meta-data in-core longer. 706 */ 707 xfs_buf_set_ref(bp, XFS_INO_REF); 708 709 /* 710 * Use xfs_trans_brelse() to release the buffer containing the on-disk 711 * inode, because it was acquired with xfs_trans_read_buf() in 712 * xfs_imap_to_bp() above. If tp is NULL, this is just a normal 713 * brelse(). If we're within a transaction, then xfs_trans_brelse() 714 * will only release the buffer if it is not dirty within the 715 * transaction. It will be OK to release the buffer in this case, 716 * because inodes on disk are never destroyed and we will be locking the 717 * new in-core inode before putting it in the cache where other 718 * processes can find it. Thus we don't have to worry about the inode 719 * being changed just because we released the buffer. 720 */ 721 out_brelse: 722 xfs_trans_brelse(tp, bp); 723 return error; 724 } 725 726 /* 727 * Validate di_extsize hint. 728 * 729 * The rules are documented at xfs_ioctl_setattr_check_extsize(). 730 * These functions must be kept in sync with each other. 731 */ 732 xfs_failaddr_t 733 xfs_inode_validate_extsize( 734 struct xfs_mount *mp, 735 uint32_t extsize, 736 uint16_t mode, 737 uint16_t flags) 738 { 739 bool rt_flag; 740 bool hint_flag; 741 bool inherit_flag; 742 uint32_t extsize_bytes; 743 uint32_t blocksize_bytes; 744 745 rt_flag = (flags & XFS_DIFLAG_REALTIME); 746 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 747 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 748 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 749 750 if (rt_flag) 751 blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; 752 else 753 blocksize_bytes = mp->m_sb.sb_blocksize; 754 755 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 756 return __this_address; 757 758 if (hint_flag && !S_ISREG(mode)) 759 return __this_address; 760 761 if (inherit_flag && !S_ISDIR(mode)) 762 return __this_address; 763 764 if ((hint_flag || inherit_flag) && extsize == 0) 765 return __this_address; 766 767 /* free inodes get flags set to zero but extsize remains */ 768 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 769 return __this_address; 770 771 if (extsize_bytes % blocksize_bytes) 772 return __this_address; 773 774 if (extsize > MAXEXTLEN) 775 return __this_address; 776 777 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 778 return __this_address; 779 780 return NULL; 781 } 782 783 /* 784 * Validate di_cowextsize hint. 785 * 786 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 787 * These functions must be kept in sync with each other. 788 */ 789 xfs_failaddr_t 790 xfs_inode_validate_cowextsize( 791 struct xfs_mount *mp, 792 uint32_t cowextsize, 793 uint16_t mode, 794 uint16_t flags, 795 uint64_t flags2) 796 { 797 bool rt_flag; 798 bool hint_flag; 799 uint32_t cowextsize_bytes; 800 801 rt_flag = (flags & XFS_DIFLAG_REALTIME); 802 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 803 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 804 805 if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) 806 return __this_address; 807 808 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 809 return __this_address; 810 811 if (hint_flag && cowextsize == 0) 812 return __this_address; 813 814 /* free inodes get flags set to zero but cowextsize remains */ 815 if (mode && !hint_flag && cowextsize != 0) 816 return __this_address; 817 818 if (hint_flag && rt_flag) 819 return __this_address; 820 821 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 822 return __this_address; 823 824 if (cowextsize > MAXEXTLEN) 825 return __this_address; 826 827 if (cowextsize > mp->m_sb.sb_agblocks / 2) 828 return __this_address; 829 830 return NULL; 831 } 832