1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_da_format.h" 17 #include "xfs_reflink.h" 18 #include "xfs_rmap.h" 19 #include "xfs_bmap_util.h" 20 #include "scrub/scrub.h" 21 #include "scrub/common.h" 22 #include "scrub/btree.h" 23 24 /* 25 * Grab total control of the inode metadata. It doesn't matter here if 26 * the file data is still changing; exclusive access to the metadata is 27 * the goal. 28 */ 29 int 30 xchk_setup_inode( 31 struct xfs_scrub *sc, 32 struct xfs_inode *ip) 33 { 34 int error; 35 36 /* 37 * Try to get the inode. If the verifiers fail, we try again 38 * in raw mode. 39 */ 40 error = xchk_get_inode(sc, ip); 41 switch (error) { 42 case 0: 43 break; 44 case -EFSCORRUPTED: 45 case -EFSBADCRC: 46 return xchk_trans_alloc(sc, 0); 47 default: 48 return error; 49 } 50 51 /* Got the inode, lock it and we're ready to go. */ 52 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 53 xfs_ilock(sc->ip, sc->ilock_flags); 54 error = xchk_trans_alloc(sc, 0); 55 if (error) 56 goto out; 57 sc->ilock_flags |= XFS_ILOCK_EXCL; 58 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 59 60 out: 61 /* scrub teardown will unlock and release the inode for us */ 62 return error; 63 } 64 65 /* Inode core */ 66 67 /* Validate di_extsize hint. */ 68 STATIC void 69 xchk_inode_extsize( 70 struct xfs_scrub *sc, 71 struct xfs_dinode *dip, 72 xfs_ino_t ino, 73 uint16_t mode, 74 uint16_t flags) 75 { 76 xfs_failaddr_t fa; 77 78 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 79 mode, flags); 80 if (fa) 81 xchk_ino_set_corrupt(sc, ino); 82 } 83 84 /* 85 * Validate di_cowextsize hint. 86 * 87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 88 * These functions must be kept in sync with each other. 89 */ 90 STATIC void 91 xchk_inode_cowextsize( 92 struct xfs_scrub *sc, 93 struct xfs_dinode *dip, 94 xfs_ino_t ino, 95 uint16_t mode, 96 uint16_t flags, 97 uint64_t flags2) 98 { 99 xfs_failaddr_t fa; 100 101 fa = xfs_inode_validate_cowextsize(sc->mp, 102 be32_to_cpu(dip->di_cowextsize), mode, flags, 103 flags2); 104 if (fa) 105 xchk_ino_set_corrupt(sc, ino); 106 } 107 108 /* Make sure the di_flags make sense for the inode. */ 109 STATIC void 110 xchk_inode_flags( 111 struct xfs_scrub *sc, 112 struct xfs_dinode *dip, 113 xfs_ino_t ino, 114 uint16_t mode, 115 uint16_t flags) 116 { 117 struct xfs_mount *mp = sc->mp; 118 119 /* di_flags are all taken, last bit cannot be used */ 120 if (flags & ~XFS_DIFLAG_ANY) 121 goto bad; 122 123 /* rt flags require rt device */ 124 if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 125 goto bad; 126 127 /* new rt bitmap flag only valid for rbmino */ 128 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 129 goto bad; 130 131 /* directory-only flags */ 132 if ((flags & (XFS_DIFLAG_RTINHERIT | 133 XFS_DIFLAG_EXTSZINHERIT | 134 XFS_DIFLAG_PROJINHERIT | 135 XFS_DIFLAG_NOSYMLINKS)) && 136 !S_ISDIR(mode)) 137 goto bad; 138 139 /* file-only flags */ 140 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 141 !S_ISREG(mode)) 142 goto bad; 143 144 /* filestreams and rt make no sense */ 145 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 146 goto bad; 147 148 return; 149 bad: 150 xchk_ino_set_corrupt(sc, ino); 151 } 152 153 /* Make sure the di_flags2 make sense for the inode. */ 154 STATIC void 155 xchk_inode_flags2( 156 struct xfs_scrub *sc, 157 struct xfs_dinode *dip, 158 xfs_ino_t ino, 159 uint16_t mode, 160 uint16_t flags, 161 uint64_t flags2) 162 { 163 struct xfs_mount *mp = sc->mp; 164 165 /* Unknown di_flags2 could be from a future kernel */ 166 if (flags2 & ~XFS_DIFLAG2_ANY) 167 xchk_ino_set_warning(sc, ino); 168 169 /* reflink flag requires reflink feature */ 170 if ((flags2 & XFS_DIFLAG2_REFLINK) && 171 !xfs_sb_version_hasreflink(&mp->m_sb)) 172 goto bad; 173 174 /* cowextsize flag is checked w.r.t. mode separately */ 175 176 /* file/dir-only flags */ 177 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 178 goto bad; 179 180 /* file-only flags */ 181 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 182 goto bad; 183 184 /* realtime and reflink make no sense, currently */ 185 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 186 goto bad; 187 188 /* dax and reflink make no sense, currently */ 189 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 190 goto bad; 191 192 /* no bigtime iflag without the bigtime feature */ 193 if (xfs_dinode_has_bigtime(dip) && 194 !xfs_sb_version_hasbigtime(&mp->m_sb)) 195 goto bad; 196 197 return; 198 bad: 199 xchk_ino_set_corrupt(sc, ino); 200 } 201 202 static inline void 203 xchk_dinode_nsec( 204 struct xfs_scrub *sc, 205 xfs_ino_t ino, 206 struct xfs_dinode *dip, 207 const xfs_timestamp_t ts) 208 { 209 struct timespec64 tv; 210 211 tv = xfs_inode_from_disk_ts(dip, ts); 212 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) 213 xchk_ino_set_corrupt(sc, ino); 214 } 215 216 /* Scrub all the ondisk inode fields. */ 217 STATIC void 218 xchk_dinode( 219 struct xfs_scrub *sc, 220 struct xfs_dinode *dip, 221 xfs_ino_t ino) 222 { 223 struct xfs_mount *mp = sc->mp; 224 size_t fork_recs; 225 unsigned long long isize; 226 uint64_t flags2; 227 uint32_t nextents; 228 uint16_t flags; 229 uint16_t mode; 230 231 flags = be16_to_cpu(dip->di_flags); 232 if (dip->di_version >= 3) 233 flags2 = be64_to_cpu(dip->di_flags2); 234 else 235 flags2 = 0; 236 237 /* di_mode */ 238 mode = be16_to_cpu(dip->di_mode); 239 switch (mode & S_IFMT) { 240 case S_IFLNK: 241 case S_IFREG: 242 case S_IFDIR: 243 case S_IFCHR: 244 case S_IFBLK: 245 case S_IFIFO: 246 case S_IFSOCK: 247 /* mode is recognized */ 248 break; 249 default: 250 xchk_ino_set_corrupt(sc, ino); 251 break; 252 } 253 254 /* v1/v2 fields */ 255 switch (dip->di_version) { 256 case 1: 257 /* 258 * We autoconvert v1 inodes into v2 inodes on writeout, 259 * so just mark this inode for preening. 260 */ 261 xchk_ino_set_preen(sc, ino); 262 break; 263 case 2: 264 case 3: 265 if (dip->di_onlink != 0) 266 xchk_ino_set_corrupt(sc, ino); 267 268 if (dip->di_mode == 0 && sc->ip) 269 xchk_ino_set_corrupt(sc, ino); 270 271 if (dip->di_projid_hi != 0 && 272 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 273 xchk_ino_set_corrupt(sc, ino); 274 break; 275 default: 276 xchk_ino_set_corrupt(sc, ino); 277 return; 278 } 279 280 /* 281 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 282 * userspace could have created that. 283 */ 284 if (dip->di_uid == cpu_to_be32(-1U) || 285 dip->di_gid == cpu_to_be32(-1U)) 286 xchk_ino_set_warning(sc, ino); 287 288 /* di_format */ 289 switch (dip->di_format) { 290 case XFS_DINODE_FMT_DEV: 291 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 292 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 293 xchk_ino_set_corrupt(sc, ino); 294 break; 295 case XFS_DINODE_FMT_LOCAL: 296 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 297 xchk_ino_set_corrupt(sc, ino); 298 break; 299 case XFS_DINODE_FMT_EXTENTS: 300 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 301 xchk_ino_set_corrupt(sc, ino); 302 break; 303 case XFS_DINODE_FMT_BTREE: 304 if (!S_ISREG(mode) && !S_ISDIR(mode)) 305 xchk_ino_set_corrupt(sc, ino); 306 break; 307 case XFS_DINODE_FMT_UUID: 308 default: 309 xchk_ino_set_corrupt(sc, ino); 310 break; 311 } 312 313 /* di_[amc]time.nsec */ 314 xchk_dinode_nsec(sc, ino, dip, dip->di_atime); 315 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); 316 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); 317 318 /* 319 * di_size. xfs_dinode_verify checks for things that screw up 320 * the VFS such as the upper bit being set and zero-length 321 * symlinks/directories, but we can do more here. 322 */ 323 isize = be64_to_cpu(dip->di_size); 324 if (isize & (1ULL << 63)) 325 xchk_ino_set_corrupt(sc, ino); 326 327 /* Devices, fifos, and sockets must have zero size */ 328 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 329 xchk_ino_set_corrupt(sc, ino); 330 331 /* Directories can't be larger than the data section size (32G) */ 332 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 333 xchk_ino_set_corrupt(sc, ino); 334 335 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 336 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 337 xchk_ino_set_corrupt(sc, ino); 338 339 /* 340 * Warn if the running kernel can't handle the kinds of offsets 341 * needed to deal with the file size. In other words, if the 342 * pagecache can't cache all the blocks in this file due to 343 * overly large offsets, flag the inode for admin review. 344 */ 345 if (isize >= mp->m_super->s_maxbytes) 346 xchk_ino_set_warning(sc, ino); 347 348 /* di_nblocks */ 349 if (flags2 & XFS_DIFLAG2_REFLINK) { 350 ; /* nblocks can exceed dblocks */ 351 } else if (flags & XFS_DIFLAG_REALTIME) { 352 /* 353 * nblocks is the sum of data extents (in the rtdev), 354 * attr extents (in the datadev), and both forks' bmbt 355 * blocks (in the datadev). This clumsy check is the 356 * best we can do without cross-referencing with the 357 * inode forks. 358 */ 359 if (be64_to_cpu(dip->di_nblocks) >= 360 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 361 xchk_ino_set_corrupt(sc, ino); 362 } else { 363 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 364 xchk_ino_set_corrupt(sc, ino); 365 } 366 367 xchk_inode_flags(sc, dip, ino, mode, flags); 368 369 xchk_inode_extsize(sc, dip, ino, mode, flags); 370 371 /* di_nextents */ 372 nextents = be32_to_cpu(dip->di_nextents); 373 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 374 switch (dip->di_format) { 375 case XFS_DINODE_FMT_EXTENTS: 376 if (nextents > fork_recs) 377 xchk_ino_set_corrupt(sc, ino); 378 break; 379 case XFS_DINODE_FMT_BTREE: 380 if (nextents <= fork_recs) 381 xchk_ino_set_corrupt(sc, ino); 382 break; 383 default: 384 if (nextents != 0) 385 xchk_ino_set_corrupt(sc, ino); 386 break; 387 } 388 389 /* di_forkoff */ 390 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 391 xchk_ino_set_corrupt(sc, ino); 392 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 393 xchk_ino_set_corrupt(sc, ino); 394 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 395 xchk_ino_set_corrupt(sc, ino); 396 397 /* di_aformat */ 398 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 399 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 400 dip->di_aformat != XFS_DINODE_FMT_BTREE) 401 xchk_ino_set_corrupt(sc, ino); 402 403 /* di_anextents */ 404 nextents = be16_to_cpu(dip->di_anextents); 405 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 406 switch (dip->di_aformat) { 407 case XFS_DINODE_FMT_EXTENTS: 408 if (nextents > fork_recs) 409 xchk_ino_set_corrupt(sc, ino); 410 break; 411 case XFS_DINODE_FMT_BTREE: 412 if (nextents <= fork_recs) 413 xchk_ino_set_corrupt(sc, ino); 414 break; 415 default: 416 if (nextents != 0) 417 xchk_ino_set_corrupt(sc, ino); 418 } 419 420 if (dip->di_version >= 3) { 421 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); 422 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 423 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 424 flags2); 425 } 426 } 427 428 /* 429 * Make sure the finobt doesn't think this inode is free. 430 * We don't have to check the inobt ourselves because we got the inode via 431 * IGET_UNTRUSTED, which checks the inobt for us. 432 */ 433 static void 434 xchk_inode_xref_finobt( 435 struct xfs_scrub *sc, 436 xfs_ino_t ino) 437 { 438 struct xfs_inobt_rec_incore rec; 439 xfs_agino_t agino; 440 int has_record; 441 int error; 442 443 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 444 return; 445 446 agino = XFS_INO_TO_AGINO(sc->mp, ino); 447 448 /* 449 * Try to get the finobt record. If we can't get it, then we're 450 * in good shape. 451 */ 452 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 453 &has_record); 454 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 455 !has_record) 456 return; 457 458 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 459 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 460 !has_record) 461 return; 462 463 /* 464 * Otherwise, make sure this record either doesn't cover this inode, 465 * or that it does but it's marked present. 466 */ 467 if (rec.ir_startino > agino || 468 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 469 return; 470 471 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 472 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 473 } 474 475 /* Cross reference the inode fields with the forks. */ 476 STATIC void 477 xchk_inode_xref_bmap( 478 struct xfs_scrub *sc, 479 struct xfs_dinode *dip) 480 { 481 xfs_extnum_t nextents; 482 xfs_filblks_t count; 483 xfs_filblks_t acount; 484 int error; 485 486 if (xchk_skip_xref(sc->sm)) 487 return; 488 489 /* Walk all the extents to check nextents/naextents/nblocks. */ 490 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 491 &nextents, &count); 492 if (!xchk_should_check_xref(sc, &error, NULL)) 493 return; 494 if (nextents < be32_to_cpu(dip->di_nextents)) 495 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 496 497 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 498 &nextents, &acount); 499 if (!xchk_should_check_xref(sc, &error, NULL)) 500 return; 501 if (nextents != be16_to_cpu(dip->di_anextents)) 502 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 503 504 /* Check nblocks against the inode. */ 505 if (count + acount != be64_to_cpu(dip->di_nblocks)) 506 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 507 } 508 509 /* Cross-reference with the other btrees. */ 510 STATIC void 511 xchk_inode_xref( 512 struct xfs_scrub *sc, 513 xfs_ino_t ino, 514 struct xfs_dinode *dip) 515 { 516 xfs_agnumber_t agno; 517 xfs_agblock_t agbno; 518 int error; 519 520 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 521 return; 522 523 agno = XFS_INO_TO_AGNO(sc->mp, ino); 524 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 525 526 error = xchk_ag_init(sc, agno, &sc->sa); 527 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 528 return; 529 530 xchk_xref_is_used_space(sc, agbno, 1); 531 xchk_inode_xref_finobt(sc, ino); 532 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 533 xchk_xref_is_not_shared(sc, agbno, 1); 534 xchk_inode_xref_bmap(sc, dip); 535 536 xchk_ag_free(sc, &sc->sa); 537 } 538 539 /* 540 * If the reflink iflag disagrees with a scan for shared data fork extents, 541 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 542 * any shared extents). We already checked for reflink iflag set on a non 543 * reflink filesystem. 544 */ 545 static void 546 xchk_inode_check_reflink_iflag( 547 struct xfs_scrub *sc, 548 xfs_ino_t ino) 549 { 550 struct xfs_mount *mp = sc->mp; 551 bool has_shared; 552 int error; 553 554 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 555 return; 556 557 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 558 &has_shared); 559 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 560 XFS_INO_TO_AGBNO(mp, ino), &error)) 561 return; 562 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 563 xchk_ino_set_preen(sc, ino); 564 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 565 xchk_ino_set_corrupt(sc, ino); 566 } 567 568 /* Scrub an inode. */ 569 int 570 xchk_inode( 571 struct xfs_scrub *sc) 572 { 573 struct xfs_dinode di; 574 int error = 0; 575 576 /* 577 * If sc->ip is NULL, that means that the setup function called 578 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 579 * and a NULL inode, so flag the corruption error and return. 580 */ 581 if (!sc->ip) { 582 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 583 return 0; 584 } 585 586 /* Scrub the inode core. */ 587 xfs_inode_to_disk(sc->ip, &di, 0); 588 xchk_dinode(sc, &di, sc->ip->i_ino); 589 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 590 goto out; 591 592 /* 593 * Look for discrepancies between file's data blocks and the reflink 594 * iflag. We already checked the iflag against the file mode when 595 * we scrubbed the dinode. 596 */ 597 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 598 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 599 600 xchk_inode_xref(sc, sc->ip->i_ino, &di); 601 out: 602 return error; 603 } 604