1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_da_format.h" 17 #include "xfs_reflink.h" 18 #include "xfs_rmap.h" 19 #include "xfs_bmap_util.h" 20 #include "scrub/scrub.h" 21 #include "scrub/common.h" 22 #include "scrub/btree.h" 23 24 /* 25 * Grab total control of the inode metadata. It doesn't matter here if 26 * the file data is still changing; exclusive access to the metadata is 27 * the goal. 28 */ 29 int 30 xchk_setup_inode( 31 struct xfs_scrub *sc, 32 struct xfs_inode *ip) 33 { 34 int error; 35 36 /* 37 * Try to get the inode. If the verifiers fail, we try again 38 * in raw mode. 39 */ 40 error = xchk_get_inode(sc, ip); 41 switch (error) { 42 case 0: 43 break; 44 case -EFSCORRUPTED: 45 case -EFSBADCRC: 46 return xchk_trans_alloc(sc, 0); 47 default: 48 return error; 49 } 50 51 /* Got the inode, lock it and we're ready to go. */ 52 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 53 xfs_ilock(sc->ip, sc->ilock_flags); 54 error = xchk_trans_alloc(sc, 0); 55 if (error) 56 goto out; 57 sc->ilock_flags |= XFS_ILOCK_EXCL; 58 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 59 60 out: 61 /* scrub teardown will unlock and release the inode for us */ 62 return error; 63 } 64 65 /* Inode core */ 66 67 /* Validate di_extsize hint. */ 68 STATIC void 69 xchk_inode_extsize( 70 struct xfs_scrub *sc, 71 struct xfs_dinode *dip, 72 xfs_ino_t ino, 73 uint16_t mode, 74 uint16_t flags) 75 { 76 xfs_failaddr_t fa; 77 78 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 79 mode, flags); 80 if (fa) 81 xchk_ino_set_corrupt(sc, ino); 82 } 83 84 /* 85 * Validate di_cowextsize hint. 86 * 87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 88 * These functions must be kept in sync with each other. 89 */ 90 STATIC void 91 xchk_inode_cowextsize( 92 struct xfs_scrub *sc, 93 struct xfs_dinode *dip, 94 xfs_ino_t ino, 95 uint16_t mode, 96 uint16_t flags, 97 uint64_t flags2) 98 { 99 xfs_failaddr_t fa; 100 101 fa = xfs_inode_validate_cowextsize(sc->mp, 102 be32_to_cpu(dip->di_cowextsize), mode, flags, 103 flags2); 104 if (fa) 105 xchk_ino_set_corrupt(sc, ino); 106 } 107 108 /* Make sure the di_flags make sense for the inode. */ 109 STATIC void 110 xchk_inode_flags( 111 struct xfs_scrub *sc, 112 struct xfs_dinode *dip, 113 xfs_ino_t ino, 114 uint16_t mode, 115 uint16_t flags) 116 { 117 struct xfs_mount *mp = sc->mp; 118 119 /* di_flags are all taken, last bit cannot be used */ 120 if (flags & ~XFS_DIFLAG_ANY) 121 goto bad; 122 123 /* rt flags require rt device */ 124 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 125 !mp->m_rtdev_targp) 126 goto bad; 127 128 /* new rt bitmap flag only valid for rbmino */ 129 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 130 goto bad; 131 132 /* directory-only flags */ 133 if ((flags & (XFS_DIFLAG_RTINHERIT | 134 XFS_DIFLAG_EXTSZINHERIT | 135 XFS_DIFLAG_PROJINHERIT | 136 XFS_DIFLAG_NOSYMLINKS)) && 137 !S_ISDIR(mode)) 138 goto bad; 139 140 /* file-only flags */ 141 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 142 !S_ISREG(mode)) 143 goto bad; 144 145 /* filestreams and rt make no sense */ 146 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 147 goto bad; 148 149 return; 150 bad: 151 xchk_ino_set_corrupt(sc, ino); 152 } 153 154 /* Make sure the di_flags2 make sense for the inode. */ 155 STATIC void 156 xchk_inode_flags2( 157 struct xfs_scrub *sc, 158 struct xfs_dinode *dip, 159 xfs_ino_t ino, 160 uint16_t mode, 161 uint16_t flags, 162 uint64_t flags2) 163 { 164 struct xfs_mount *mp = sc->mp; 165 166 /* Unknown di_flags2 could be from a future kernel */ 167 if (flags2 & ~XFS_DIFLAG2_ANY) 168 xchk_ino_set_warning(sc, ino); 169 170 /* reflink flag requires reflink feature */ 171 if ((flags2 & XFS_DIFLAG2_REFLINK) && 172 !xfs_sb_version_hasreflink(&mp->m_sb)) 173 goto bad; 174 175 /* cowextsize flag is checked w.r.t. mode separately */ 176 177 /* file/dir-only flags */ 178 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 179 goto bad; 180 181 /* file-only flags */ 182 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 183 goto bad; 184 185 /* realtime and reflink make no sense, currently */ 186 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 187 goto bad; 188 189 /* dax and reflink make no sense, currently */ 190 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 191 goto bad; 192 193 /* no bigtime iflag without the bigtime feature */ 194 if (xfs_dinode_has_bigtime(dip) && 195 !xfs_sb_version_hasbigtime(&mp->m_sb)) 196 goto bad; 197 198 return; 199 bad: 200 xchk_ino_set_corrupt(sc, ino); 201 } 202 203 static inline void 204 xchk_dinode_nsec( 205 struct xfs_scrub *sc, 206 xfs_ino_t ino, 207 struct xfs_dinode *dip, 208 const xfs_timestamp_t ts) 209 { 210 struct timespec64 tv; 211 212 tv = xfs_inode_from_disk_ts(dip, ts); 213 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) 214 xchk_ino_set_corrupt(sc, ino); 215 } 216 217 /* Scrub all the ondisk inode fields. */ 218 STATIC void 219 xchk_dinode( 220 struct xfs_scrub *sc, 221 struct xfs_dinode *dip, 222 xfs_ino_t ino) 223 { 224 struct xfs_mount *mp = sc->mp; 225 size_t fork_recs; 226 unsigned long long isize; 227 uint64_t flags2; 228 uint32_t nextents; 229 uint16_t flags; 230 uint16_t mode; 231 232 flags = be16_to_cpu(dip->di_flags); 233 if (dip->di_version >= 3) 234 flags2 = be64_to_cpu(dip->di_flags2); 235 else 236 flags2 = 0; 237 238 /* di_mode */ 239 mode = be16_to_cpu(dip->di_mode); 240 switch (mode & S_IFMT) { 241 case S_IFLNK: 242 case S_IFREG: 243 case S_IFDIR: 244 case S_IFCHR: 245 case S_IFBLK: 246 case S_IFIFO: 247 case S_IFSOCK: 248 /* mode is recognized */ 249 break; 250 default: 251 xchk_ino_set_corrupt(sc, ino); 252 break; 253 } 254 255 /* v1/v2 fields */ 256 switch (dip->di_version) { 257 case 1: 258 /* 259 * We autoconvert v1 inodes into v2 inodes on writeout, 260 * so just mark this inode for preening. 261 */ 262 xchk_ino_set_preen(sc, ino); 263 break; 264 case 2: 265 case 3: 266 if (dip->di_onlink != 0) 267 xchk_ino_set_corrupt(sc, ino); 268 269 if (dip->di_mode == 0 && sc->ip) 270 xchk_ino_set_corrupt(sc, ino); 271 272 if (dip->di_projid_hi != 0 && 273 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 274 xchk_ino_set_corrupt(sc, ino); 275 break; 276 default: 277 xchk_ino_set_corrupt(sc, ino); 278 return; 279 } 280 281 /* 282 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 283 * userspace could have created that. 284 */ 285 if (dip->di_uid == cpu_to_be32(-1U) || 286 dip->di_gid == cpu_to_be32(-1U)) 287 xchk_ino_set_warning(sc, ino); 288 289 /* di_format */ 290 switch (dip->di_format) { 291 case XFS_DINODE_FMT_DEV: 292 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 293 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 294 xchk_ino_set_corrupt(sc, ino); 295 break; 296 case XFS_DINODE_FMT_LOCAL: 297 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 298 xchk_ino_set_corrupt(sc, ino); 299 break; 300 case XFS_DINODE_FMT_EXTENTS: 301 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 302 xchk_ino_set_corrupt(sc, ino); 303 break; 304 case XFS_DINODE_FMT_BTREE: 305 if (!S_ISREG(mode) && !S_ISDIR(mode)) 306 xchk_ino_set_corrupt(sc, ino); 307 break; 308 case XFS_DINODE_FMT_UUID: 309 default: 310 xchk_ino_set_corrupt(sc, ino); 311 break; 312 } 313 314 /* di_[amc]time.nsec */ 315 xchk_dinode_nsec(sc, ino, dip, dip->di_atime); 316 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); 317 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); 318 319 /* 320 * di_size. xfs_dinode_verify checks for things that screw up 321 * the VFS such as the upper bit being set and zero-length 322 * symlinks/directories, but we can do more here. 323 */ 324 isize = be64_to_cpu(dip->di_size); 325 if (isize & (1ULL << 63)) 326 xchk_ino_set_corrupt(sc, ino); 327 328 /* Devices, fifos, and sockets must have zero size */ 329 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 330 xchk_ino_set_corrupt(sc, ino); 331 332 /* Directories can't be larger than the data section size (32G) */ 333 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 334 xchk_ino_set_corrupt(sc, ino); 335 336 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 337 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 338 xchk_ino_set_corrupt(sc, ino); 339 340 /* 341 * Warn if the running kernel can't handle the kinds of offsets 342 * needed to deal with the file size. In other words, if the 343 * pagecache can't cache all the blocks in this file due to 344 * overly large offsets, flag the inode for admin review. 345 */ 346 if (isize >= mp->m_super->s_maxbytes) 347 xchk_ino_set_warning(sc, ino); 348 349 /* di_nblocks */ 350 if (flags2 & XFS_DIFLAG2_REFLINK) { 351 ; /* nblocks can exceed dblocks */ 352 } else if (flags & XFS_DIFLAG_REALTIME) { 353 /* 354 * nblocks is the sum of data extents (in the rtdev), 355 * attr extents (in the datadev), and both forks' bmbt 356 * blocks (in the datadev). This clumsy check is the 357 * best we can do without cross-referencing with the 358 * inode forks. 359 */ 360 if (be64_to_cpu(dip->di_nblocks) >= 361 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 362 xchk_ino_set_corrupt(sc, ino); 363 } else { 364 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 365 xchk_ino_set_corrupt(sc, ino); 366 } 367 368 xchk_inode_flags(sc, dip, ino, mode, flags); 369 370 xchk_inode_extsize(sc, dip, ino, mode, flags); 371 372 /* di_nextents */ 373 nextents = be32_to_cpu(dip->di_nextents); 374 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 375 switch (dip->di_format) { 376 case XFS_DINODE_FMT_EXTENTS: 377 if (nextents > fork_recs) 378 xchk_ino_set_corrupt(sc, ino); 379 break; 380 case XFS_DINODE_FMT_BTREE: 381 if (nextents <= fork_recs) 382 xchk_ino_set_corrupt(sc, ino); 383 break; 384 default: 385 if (nextents != 0) 386 xchk_ino_set_corrupt(sc, ino); 387 break; 388 } 389 390 /* di_forkoff */ 391 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 392 xchk_ino_set_corrupt(sc, ino); 393 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 394 xchk_ino_set_corrupt(sc, ino); 395 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 396 xchk_ino_set_corrupt(sc, ino); 397 398 /* di_aformat */ 399 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 400 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 401 dip->di_aformat != XFS_DINODE_FMT_BTREE) 402 xchk_ino_set_corrupt(sc, ino); 403 404 /* di_anextents */ 405 nextents = be16_to_cpu(dip->di_anextents); 406 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 407 switch (dip->di_aformat) { 408 case XFS_DINODE_FMT_EXTENTS: 409 if (nextents > fork_recs) 410 xchk_ino_set_corrupt(sc, ino); 411 break; 412 case XFS_DINODE_FMT_BTREE: 413 if (nextents <= fork_recs) 414 xchk_ino_set_corrupt(sc, ino); 415 break; 416 default: 417 if (nextents != 0) 418 xchk_ino_set_corrupt(sc, ino); 419 } 420 421 if (dip->di_version >= 3) { 422 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); 423 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 424 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 425 flags2); 426 } 427 } 428 429 /* 430 * Make sure the finobt doesn't think this inode is free. 431 * We don't have to check the inobt ourselves because we got the inode via 432 * IGET_UNTRUSTED, which checks the inobt for us. 433 */ 434 static void 435 xchk_inode_xref_finobt( 436 struct xfs_scrub *sc, 437 xfs_ino_t ino) 438 { 439 struct xfs_inobt_rec_incore rec; 440 xfs_agino_t agino; 441 int has_record; 442 int error; 443 444 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 445 return; 446 447 agino = XFS_INO_TO_AGINO(sc->mp, ino); 448 449 /* 450 * Try to get the finobt record. If we can't get it, then we're 451 * in good shape. 452 */ 453 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 454 &has_record); 455 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 456 !has_record) 457 return; 458 459 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 460 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 461 !has_record) 462 return; 463 464 /* 465 * Otherwise, make sure this record either doesn't cover this inode, 466 * or that it does but it's marked present. 467 */ 468 if (rec.ir_startino > agino || 469 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 470 return; 471 472 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 473 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 474 } 475 476 /* Cross reference the inode fields with the forks. */ 477 STATIC void 478 xchk_inode_xref_bmap( 479 struct xfs_scrub *sc, 480 struct xfs_dinode *dip) 481 { 482 xfs_extnum_t nextents; 483 xfs_filblks_t count; 484 xfs_filblks_t acount; 485 int error; 486 487 if (xchk_skip_xref(sc->sm)) 488 return; 489 490 /* Walk all the extents to check nextents/naextents/nblocks. */ 491 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 492 &nextents, &count); 493 if (!xchk_should_check_xref(sc, &error, NULL)) 494 return; 495 if (nextents < be32_to_cpu(dip->di_nextents)) 496 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 497 498 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 499 &nextents, &acount); 500 if (!xchk_should_check_xref(sc, &error, NULL)) 501 return; 502 if (nextents != be16_to_cpu(dip->di_anextents)) 503 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 504 505 /* Check nblocks against the inode. */ 506 if (count + acount != be64_to_cpu(dip->di_nblocks)) 507 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 508 } 509 510 /* Cross-reference with the other btrees. */ 511 STATIC void 512 xchk_inode_xref( 513 struct xfs_scrub *sc, 514 xfs_ino_t ino, 515 struct xfs_dinode *dip) 516 { 517 xfs_agnumber_t agno; 518 xfs_agblock_t agbno; 519 int error; 520 521 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 522 return; 523 524 agno = XFS_INO_TO_AGNO(sc->mp, ino); 525 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 526 527 error = xchk_ag_init(sc, agno, &sc->sa); 528 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 529 return; 530 531 xchk_xref_is_used_space(sc, agbno, 1); 532 xchk_inode_xref_finobt(sc, ino); 533 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 534 xchk_xref_is_not_shared(sc, agbno, 1); 535 xchk_inode_xref_bmap(sc, dip); 536 537 xchk_ag_free(sc, &sc->sa); 538 } 539 540 /* 541 * If the reflink iflag disagrees with a scan for shared data fork extents, 542 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 543 * any shared extents). We already checked for reflink iflag set on a non 544 * reflink filesystem. 545 */ 546 static void 547 xchk_inode_check_reflink_iflag( 548 struct xfs_scrub *sc, 549 xfs_ino_t ino) 550 { 551 struct xfs_mount *mp = sc->mp; 552 bool has_shared; 553 int error; 554 555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 556 return; 557 558 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 559 &has_shared); 560 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 561 XFS_INO_TO_AGBNO(mp, ino), &error)) 562 return; 563 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 564 xchk_ino_set_preen(sc, ino); 565 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 566 xchk_ino_set_corrupt(sc, ino); 567 } 568 569 /* Scrub an inode. */ 570 int 571 xchk_inode( 572 struct xfs_scrub *sc) 573 { 574 struct xfs_dinode di; 575 int error = 0; 576 577 /* 578 * If sc->ip is NULL, that means that the setup function called 579 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 580 * and a NULL inode, so flag the corruption error and return. 581 */ 582 if (!sc->ip) { 583 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 584 return 0; 585 } 586 587 /* Scrub the inode core. */ 588 xfs_inode_to_disk(sc->ip, &di, 0); 589 xchk_dinode(sc, &di, sc->ip->i_ino); 590 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 591 goto out; 592 593 /* 594 * Look for discrepancies between file's data blocks and the reflink 595 * iflag. We already checked the iflag against the file mode when 596 * we scrubbed the dinode. 597 */ 598 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 599 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 600 601 xchk_inode_xref(sc, sc->ip->i_ino, &di); 602 out: 603 return error; 604 } 605