1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_inode_buf.h" 21 #include "xfs_inode_fork.h" 22 #include "xfs_ialloc.h" 23 #include "xfs_da_format.h" 24 #include "xfs_reflink.h" 25 #include "xfs_rmap.h" 26 #include "xfs_bmap.h" 27 #include "xfs_bmap_util.h" 28 #include "scrub/xfs_scrub.h" 29 #include "scrub/scrub.h" 30 #include "scrub/common.h" 31 #include "scrub/btree.h" 32 #include "scrub/trace.h" 33 34 /* 35 * Grab total control of the inode metadata. It doesn't matter here if 36 * the file data is still changing; exclusive access to the metadata is 37 * the goal. 38 */ 39 int 40 xchk_setup_inode( 41 struct xfs_scrub *sc, 42 struct xfs_inode *ip) 43 { 44 int error; 45 46 /* 47 * Try to get the inode. If the verifiers fail, we try again 48 * in raw mode. 49 */ 50 error = xchk_get_inode(sc, ip); 51 switch (error) { 52 case 0: 53 break; 54 case -EFSCORRUPTED: 55 case -EFSBADCRC: 56 return xchk_trans_alloc(sc, 0); 57 default: 58 return error; 59 } 60 61 /* Got the inode, lock it and we're ready to go. */ 62 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 63 xfs_ilock(sc->ip, sc->ilock_flags); 64 error = xchk_trans_alloc(sc, 0); 65 if (error) 66 goto out; 67 sc->ilock_flags |= XFS_ILOCK_EXCL; 68 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 69 70 out: 71 /* scrub teardown will unlock and release the inode for us */ 72 return error; 73 } 74 75 /* Inode core */ 76 77 /* Validate di_extsize hint. */ 78 STATIC void 79 xchk_inode_extsize( 80 struct xfs_scrub *sc, 81 struct xfs_dinode *dip, 82 xfs_ino_t ino, 83 uint16_t mode, 84 uint16_t flags) 85 { 86 xfs_failaddr_t fa; 87 88 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 89 mode, flags); 90 if (fa) 91 xchk_ino_set_corrupt(sc, ino); 92 } 93 94 /* 95 * Validate di_cowextsize hint. 96 * 97 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 98 * These functions must be kept in sync with each other. 99 */ 100 STATIC void 101 xchk_inode_cowextsize( 102 struct xfs_scrub *sc, 103 struct xfs_dinode *dip, 104 xfs_ino_t ino, 105 uint16_t mode, 106 uint16_t flags, 107 uint64_t flags2) 108 { 109 xfs_failaddr_t fa; 110 111 fa = xfs_inode_validate_cowextsize(sc->mp, 112 be32_to_cpu(dip->di_cowextsize), mode, flags, 113 flags2); 114 if (fa) 115 xchk_ino_set_corrupt(sc, ino); 116 } 117 118 /* Make sure the di_flags make sense for the inode. */ 119 STATIC void 120 xchk_inode_flags( 121 struct xfs_scrub *sc, 122 struct xfs_dinode *dip, 123 xfs_ino_t ino, 124 uint16_t mode, 125 uint16_t flags) 126 { 127 struct xfs_mount *mp = sc->mp; 128 129 /* di_flags are all taken, last bit cannot be used */ 130 if (flags & ~XFS_DIFLAG_ANY) 131 goto bad; 132 133 /* rt flags require rt device */ 134 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 135 !mp->m_rtdev_targp) 136 goto bad; 137 138 /* new rt bitmap flag only valid for rbmino */ 139 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 140 goto bad; 141 142 /* directory-only flags */ 143 if ((flags & (XFS_DIFLAG_RTINHERIT | 144 XFS_DIFLAG_EXTSZINHERIT | 145 XFS_DIFLAG_PROJINHERIT | 146 XFS_DIFLAG_NOSYMLINKS)) && 147 !S_ISDIR(mode)) 148 goto bad; 149 150 /* file-only flags */ 151 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 152 !S_ISREG(mode)) 153 goto bad; 154 155 /* filestreams and rt make no sense */ 156 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 157 goto bad; 158 159 return; 160 bad: 161 xchk_ino_set_corrupt(sc, ino); 162 } 163 164 /* Make sure the di_flags2 make sense for the inode. */ 165 STATIC void 166 xchk_inode_flags2( 167 struct xfs_scrub *sc, 168 struct xfs_dinode *dip, 169 xfs_ino_t ino, 170 uint16_t mode, 171 uint16_t flags, 172 uint64_t flags2) 173 { 174 struct xfs_mount *mp = sc->mp; 175 176 /* Unknown di_flags2 could be from a future kernel */ 177 if (flags2 & ~XFS_DIFLAG2_ANY) 178 xchk_ino_set_warning(sc, ino); 179 180 /* reflink flag requires reflink feature */ 181 if ((flags2 & XFS_DIFLAG2_REFLINK) && 182 !xfs_sb_version_hasreflink(&mp->m_sb)) 183 goto bad; 184 185 /* cowextsize flag is checked w.r.t. mode separately */ 186 187 /* file/dir-only flags */ 188 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 189 goto bad; 190 191 /* file-only flags */ 192 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 193 goto bad; 194 195 /* realtime and reflink make no sense, currently */ 196 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 197 goto bad; 198 199 /* dax and reflink make no sense, currently */ 200 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 201 goto bad; 202 203 return; 204 bad: 205 xchk_ino_set_corrupt(sc, ino); 206 } 207 208 /* Scrub all the ondisk inode fields. */ 209 STATIC void 210 xchk_dinode( 211 struct xfs_scrub *sc, 212 struct xfs_dinode *dip, 213 xfs_ino_t ino) 214 { 215 struct xfs_mount *mp = sc->mp; 216 size_t fork_recs; 217 unsigned long long isize; 218 uint64_t flags2; 219 uint32_t nextents; 220 uint16_t flags; 221 uint16_t mode; 222 223 flags = be16_to_cpu(dip->di_flags); 224 if (dip->di_version >= 3) 225 flags2 = be64_to_cpu(dip->di_flags2); 226 else 227 flags2 = 0; 228 229 /* di_mode */ 230 mode = be16_to_cpu(dip->di_mode); 231 switch (mode & S_IFMT) { 232 case S_IFLNK: 233 case S_IFREG: 234 case S_IFDIR: 235 case S_IFCHR: 236 case S_IFBLK: 237 case S_IFIFO: 238 case S_IFSOCK: 239 /* mode is recognized */ 240 break; 241 default: 242 xchk_ino_set_corrupt(sc, ino); 243 break; 244 } 245 246 /* v1/v2 fields */ 247 switch (dip->di_version) { 248 case 1: 249 /* 250 * We autoconvert v1 inodes into v2 inodes on writeout, 251 * so just mark this inode for preening. 252 */ 253 xchk_ino_set_preen(sc, ino); 254 break; 255 case 2: 256 case 3: 257 if (dip->di_onlink != 0) 258 xchk_ino_set_corrupt(sc, ino); 259 260 if (dip->di_mode == 0 && sc->ip) 261 xchk_ino_set_corrupt(sc, ino); 262 263 if (dip->di_projid_hi != 0 && 264 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 265 xchk_ino_set_corrupt(sc, ino); 266 break; 267 default: 268 xchk_ino_set_corrupt(sc, ino); 269 return; 270 } 271 272 /* 273 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 274 * userspace could have created that. 275 */ 276 if (dip->di_uid == cpu_to_be32(-1U) || 277 dip->di_gid == cpu_to_be32(-1U)) 278 xchk_ino_set_warning(sc, ino); 279 280 /* di_format */ 281 switch (dip->di_format) { 282 case XFS_DINODE_FMT_DEV: 283 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 284 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 285 xchk_ino_set_corrupt(sc, ino); 286 break; 287 case XFS_DINODE_FMT_LOCAL: 288 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 289 xchk_ino_set_corrupt(sc, ino); 290 break; 291 case XFS_DINODE_FMT_EXTENTS: 292 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 293 xchk_ino_set_corrupt(sc, ino); 294 break; 295 case XFS_DINODE_FMT_BTREE: 296 if (!S_ISREG(mode) && !S_ISDIR(mode)) 297 xchk_ino_set_corrupt(sc, ino); 298 break; 299 case XFS_DINODE_FMT_UUID: 300 default: 301 xchk_ino_set_corrupt(sc, ino); 302 break; 303 } 304 305 /* di_[amc]time.nsec */ 306 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 307 xchk_ino_set_corrupt(sc, ino); 308 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 309 xchk_ino_set_corrupt(sc, ino); 310 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 311 xchk_ino_set_corrupt(sc, ino); 312 313 /* 314 * di_size. xfs_dinode_verify checks for things that screw up 315 * the VFS such as the upper bit being set and zero-length 316 * symlinks/directories, but we can do more here. 317 */ 318 isize = be64_to_cpu(dip->di_size); 319 if (isize & (1ULL << 63)) 320 xchk_ino_set_corrupt(sc, ino); 321 322 /* Devices, fifos, and sockets must have zero size */ 323 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 324 xchk_ino_set_corrupt(sc, ino); 325 326 /* Directories can't be larger than the data section size (32G) */ 327 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 328 xchk_ino_set_corrupt(sc, ino); 329 330 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 331 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 332 xchk_ino_set_corrupt(sc, ino); 333 334 /* 335 * Warn if the running kernel can't handle the kinds of offsets 336 * needed to deal with the file size. In other words, if the 337 * pagecache can't cache all the blocks in this file due to 338 * overly large offsets, flag the inode for admin review. 339 */ 340 if (isize >= mp->m_super->s_maxbytes) 341 xchk_ino_set_warning(sc, ino); 342 343 /* di_nblocks */ 344 if (flags2 & XFS_DIFLAG2_REFLINK) { 345 ; /* nblocks can exceed dblocks */ 346 } else if (flags & XFS_DIFLAG_REALTIME) { 347 /* 348 * nblocks is the sum of data extents (in the rtdev), 349 * attr extents (in the datadev), and both forks' bmbt 350 * blocks (in the datadev). This clumsy check is the 351 * best we can do without cross-referencing with the 352 * inode forks. 353 */ 354 if (be64_to_cpu(dip->di_nblocks) >= 355 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 356 xchk_ino_set_corrupt(sc, ino); 357 } else { 358 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 359 xchk_ino_set_corrupt(sc, ino); 360 } 361 362 xchk_inode_flags(sc, dip, ino, mode, flags); 363 364 xchk_inode_extsize(sc, dip, ino, mode, flags); 365 366 /* di_nextents */ 367 nextents = be32_to_cpu(dip->di_nextents); 368 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 369 switch (dip->di_format) { 370 case XFS_DINODE_FMT_EXTENTS: 371 if (nextents > fork_recs) 372 xchk_ino_set_corrupt(sc, ino); 373 break; 374 case XFS_DINODE_FMT_BTREE: 375 if (nextents <= fork_recs) 376 xchk_ino_set_corrupt(sc, ino); 377 break; 378 default: 379 if (nextents != 0) 380 xchk_ino_set_corrupt(sc, ino); 381 break; 382 } 383 384 /* di_forkoff */ 385 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 386 xchk_ino_set_corrupt(sc, ino); 387 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 388 xchk_ino_set_corrupt(sc, ino); 389 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 390 xchk_ino_set_corrupt(sc, ino); 391 392 /* di_aformat */ 393 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 394 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 395 dip->di_aformat != XFS_DINODE_FMT_BTREE) 396 xchk_ino_set_corrupt(sc, ino); 397 398 /* di_anextents */ 399 nextents = be16_to_cpu(dip->di_anextents); 400 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 401 switch (dip->di_aformat) { 402 case XFS_DINODE_FMT_EXTENTS: 403 if (nextents > fork_recs) 404 xchk_ino_set_corrupt(sc, ino); 405 break; 406 case XFS_DINODE_FMT_BTREE: 407 if (nextents <= fork_recs) 408 xchk_ino_set_corrupt(sc, ino); 409 break; 410 default: 411 if (nextents != 0) 412 xchk_ino_set_corrupt(sc, ino); 413 } 414 415 if (dip->di_version >= 3) { 416 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 417 xchk_ino_set_corrupt(sc, ino); 418 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 419 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 420 flags2); 421 } 422 } 423 424 /* 425 * Make sure the finobt doesn't think this inode is free. 426 * We don't have to check the inobt ourselves because we got the inode via 427 * IGET_UNTRUSTED, which checks the inobt for us. 428 */ 429 static void 430 xchk_inode_xref_finobt( 431 struct xfs_scrub *sc, 432 xfs_ino_t ino) 433 { 434 struct xfs_inobt_rec_incore rec; 435 xfs_agino_t agino; 436 int has_record; 437 int error; 438 439 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 440 return; 441 442 agino = XFS_INO_TO_AGINO(sc->mp, ino); 443 444 /* 445 * Try to get the finobt record. If we can't get it, then we're 446 * in good shape. 447 */ 448 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 449 &has_record); 450 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 451 !has_record) 452 return; 453 454 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 455 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 456 !has_record) 457 return; 458 459 /* 460 * Otherwise, make sure this record either doesn't cover this inode, 461 * or that it does but it's marked present. 462 */ 463 if (rec.ir_startino > agino || 464 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 465 return; 466 467 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 468 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 469 } 470 471 /* Cross reference the inode fields with the forks. */ 472 STATIC void 473 xchk_inode_xref_bmap( 474 struct xfs_scrub *sc, 475 struct xfs_dinode *dip) 476 { 477 xfs_extnum_t nextents; 478 xfs_filblks_t count; 479 xfs_filblks_t acount; 480 int error; 481 482 if (xchk_skip_xref(sc->sm)) 483 return; 484 485 /* Walk all the extents to check nextents/naextents/nblocks. */ 486 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 487 &nextents, &count); 488 if (!xchk_should_check_xref(sc, &error, NULL)) 489 return; 490 if (nextents < be32_to_cpu(dip->di_nextents)) 491 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 492 493 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 494 &nextents, &acount); 495 if (!xchk_should_check_xref(sc, &error, NULL)) 496 return; 497 if (nextents != be16_to_cpu(dip->di_anextents)) 498 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 499 500 /* Check nblocks against the inode. */ 501 if (count + acount != be64_to_cpu(dip->di_nblocks)) 502 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 503 } 504 505 /* Cross-reference with the other btrees. */ 506 STATIC void 507 xchk_inode_xref( 508 struct xfs_scrub *sc, 509 xfs_ino_t ino, 510 struct xfs_dinode *dip) 511 { 512 xfs_agnumber_t agno; 513 xfs_agblock_t agbno; 514 int error; 515 516 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 517 return; 518 519 agno = XFS_INO_TO_AGNO(sc->mp, ino); 520 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 521 522 error = xchk_ag_init(sc, agno, &sc->sa); 523 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 524 return; 525 526 xchk_xref_is_used_space(sc, agbno, 1); 527 xchk_inode_xref_finobt(sc, ino); 528 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 529 xchk_xref_is_not_shared(sc, agbno, 1); 530 xchk_inode_xref_bmap(sc, dip); 531 532 xchk_ag_free(sc, &sc->sa); 533 } 534 535 /* 536 * If the reflink iflag disagrees with a scan for shared data fork extents, 537 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 538 * any shared extents). We already checked for reflink iflag set on a non 539 * reflink filesystem. 540 */ 541 static void 542 xchk_inode_check_reflink_iflag( 543 struct xfs_scrub *sc, 544 xfs_ino_t ino) 545 { 546 struct xfs_mount *mp = sc->mp; 547 bool has_shared; 548 int error; 549 550 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 551 return; 552 553 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 554 &has_shared); 555 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 556 XFS_INO_TO_AGBNO(mp, ino), &error)) 557 return; 558 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 559 xchk_ino_set_preen(sc, ino); 560 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 561 xchk_ino_set_corrupt(sc, ino); 562 } 563 564 /* Scrub an inode. */ 565 int 566 xchk_inode( 567 struct xfs_scrub *sc) 568 { 569 struct xfs_dinode di; 570 int error = 0; 571 572 /* 573 * If sc->ip is NULL, that means that the setup function called 574 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 575 * and a NULL inode, so flag the corruption error and return. 576 */ 577 if (!sc->ip) { 578 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 579 return 0; 580 } 581 582 /* Scrub the inode core. */ 583 xfs_inode_to_disk(sc->ip, &di, 0); 584 xchk_dinode(sc, &di, sc->ip->i_ino); 585 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 586 goto out; 587 588 /* 589 * Look for discrepancies between file's data blocks and the reflink 590 * iflag. We already checked the iflag against the file mode when 591 * we scrubbed the dinode. 592 */ 593 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 594 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 595 596 xchk_inode_xref(sc, sc->ip->i_ino, &di); 597 out: 598 return error; 599 } 600