1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_da_format.h" 17 #include "xfs_reflink.h" 18 #include "xfs_rmap.h" 19 #include "xfs_bmap_util.h" 20 #include "scrub/scrub.h" 21 #include "scrub/common.h" 22 #include "scrub/btree.h" 23 24 /* 25 * Grab total control of the inode metadata. It doesn't matter here if 26 * the file data is still changing; exclusive access to the metadata is 27 * the goal. 28 */ 29 int 30 xchk_setup_inode( 31 struct xfs_scrub *sc, 32 struct xfs_inode *ip) 33 { 34 int error; 35 36 /* 37 * Try to get the inode. If the verifiers fail, we try again 38 * in raw mode. 39 */ 40 error = xchk_get_inode(sc, ip); 41 switch (error) { 42 case 0: 43 break; 44 case -EFSCORRUPTED: 45 case -EFSBADCRC: 46 return xchk_trans_alloc(sc, 0); 47 default: 48 return error; 49 } 50 51 /* Got the inode, lock it and we're ready to go. */ 52 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 53 xfs_ilock(sc->ip, sc->ilock_flags); 54 error = xchk_trans_alloc(sc, 0); 55 if (error) 56 goto out; 57 sc->ilock_flags |= XFS_ILOCK_EXCL; 58 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 59 60 out: 61 /* scrub teardown will unlock and release the inode for us */ 62 return error; 63 } 64 65 /* Inode core */ 66 67 /* Validate di_extsize hint. */ 68 STATIC void 69 xchk_inode_extsize( 70 struct xfs_scrub *sc, 71 struct xfs_dinode *dip, 72 xfs_ino_t ino, 73 uint16_t mode, 74 uint16_t flags) 75 { 76 xfs_failaddr_t fa; 77 78 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 79 mode, flags); 80 if (fa) 81 xchk_ino_set_corrupt(sc, ino); 82 } 83 84 /* 85 * Validate di_cowextsize hint. 86 * 87 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 88 * These functions must be kept in sync with each other. 89 */ 90 STATIC void 91 xchk_inode_cowextsize( 92 struct xfs_scrub *sc, 93 struct xfs_dinode *dip, 94 xfs_ino_t ino, 95 uint16_t mode, 96 uint16_t flags, 97 uint64_t flags2) 98 { 99 xfs_failaddr_t fa; 100 101 fa = xfs_inode_validate_cowextsize(sc->mp, 102 be32_to_cpu(dip->di_cowextsize), mode, flags, 103 flags2); 104 if (fa) 105 xchk_ino_set_corrupt(sc, ino); 106 } 107 108 /* Make sure the di_flags make sense for the inode. */ 109 STATIC void 110 xchk_inode_flags( 111 struct xfs_scrub *sc, 112 struct xfs_dinode *dip, 113 xfs_ino_t ino, 114 uint16_t mode, 115 uint16_t flags) 116 { 117 struct xfs_mount *mp = sc->mp; 118 119 /* di_flags are all taken, last bit cannot be used */ 120 if (flags & ~XFS_DIFLAG_ANY) 121 goto bad; 122 123 /* rt flags require rt device */ 124 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 125 !mp->m_rtdev_targp) 126 goto bad; 127 128 /* new rt bitmap flag only valid for rbmino */ 129 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 130 goto bad; 131 132 /* directory-only flags */ 133 if ((flags & (XFS_DIFLAG_RTINHERIT | 134 XFS_DIFLAG_EXTSZINHERIT | 135 XFS_DIFLAG_PROJINHERIT | 136 XFS_DIFLAG_NOSYMLINKS)) && 137 !S_ISDIR(mode)) 138 goto bad; 139 140 /* file-only flags */ 141 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 142 !S_ISREG(mode)) 143 goto bad; 144 145 /* filestreams and rt make no sense */ 146 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 147 goto bad; 148 149 return; 150 bad: 151 xchk_ino_set_corrupt(sc, ino); 152 } 153 154 /* Make sure the di_flags2 make sense for the inode. */ 155 STATIC void 156 xchk_inode_flags2( 157 struct xfs_scrub *sc, 158 struct xfs_dinode *dip, 159 xfs_ino_t ino, 160 uint16_t mode, 161 uint16_t flags, 162 uint64_t flags2) 163 { 164 struct xfs_mount *mp = sc->mp; 165 166 /* Unknown di_flags2 could be from a future kernel */ 167 if (flags2 & ~XFS_DIFLAG2_ANY) 168 xchk_ino_set_warning(sc, ino); 169 170 /* reflink flag requires reflink feature */ 171 if ((flags2 & XFS_DIFLAG2_REFLINK) && 172 !xfs_sb_version_hasreflink(&mp->m_sb)) 173 goto bad; 174 175 /* cowextsize flag is checked w.r.t. mode separately */ 176 177 /* file/dir-only flags */ 178 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 179 goto bad; 180 181 /* file-only flags */ 182 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 183 goto bad; 184 185 /* realtime and reflink make no sense, currently */ 186 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 187 goto bad; 188 189 /* dax and reflink make no sense, currently */ 190 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 191 goto bad; 192 193 return; 194 bad: 195 xchk_ino_set_corrupt(sc, ino); 196 } 197 198 /* Scrub all the ondisk inode fields. */ 199 STATIC void 200 xchk_dinode( 201 struct xfs_scrub *sc, 202 struct xfs_dinode *dip, 203 xfs_ino_t ino) 204 { 205 struct xfs_mount *mp = sc->mp; 206 size_t fork_recs; 207 unsigned long long isize; 208 uint64_t flags2; 209 uint32_t nextents; 210 uint16_t flags; 211 uint16_t mode; 212 213 flags = be16_to_cpu(dip->di_flags); 214 if (dip->di_version >= 3) 215 flags2 = be64_to_cpu(dip->di_flags2); 216 else 217 flags2 = 0; 218 219 /* di_mode */ 220 mode = be16_to_cpu(dip->di_mode); 221 switch (mode & S_IFMT) { 222 case S_IFLNK: 223 case S_IFREG: 224 case S_IFDIR: 225 case S_IFCHR: 226 case S_IFBLK: 227 case S_IFIFO: 228 case S_IFSOCK: 229 /* mode is recognized */ 230 break; 231 default: 232 xchk_ino_set_corrupt(sc, ino); 233 break; 234 } 235 236 /* v1/v2 fields */ 237 switch (dip->di_version) { 238 case 1: 239 /* 240 * We autoconvert v1 inodes into v2 inodes on writeout, 241 * so just mark this inode for preening. 242 */ 243 xchk_ino_set_preen(sc, ino); 244 break; 245 case 2: 246 case 3: 247 if (dip->di_onlink != 0) 248 xchk_ino_set_corrupt(sc, ino); 249 250 if (dip->di_mode == 0 && sc->ip) 251 xchk_ino_set_corrupt(sc, ino); 252 253 if (dip->di_projid_hi != 0 && 254 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 255 xchk_ino_set_corrupt(sc, ino); 256 break; 257 default: 258 xchk_ino_set_corrupt(sc, ino); 259 return; 260 } 261 262 /* 263 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 264 * userspace could have created that. 265 */ 266 if (dip->di_uid == cpu_to_be32(-1U) || 267 dip->di_gid == cpu_to_be32(-1U)) 268 xchk_ino_set_warning(sc, ino); 269 270 /* di_format */ 271 switch (dip->di_format) { 272 case XFS_DINODE_FMT_DEV: 273 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 274 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 275 xchk_ino_set_corrupt(sc, ino); 276 break; 277 case XFS_DINODE_FMT_LOCAL: 278 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 279 xchk_ino_set_corrupt(sc, ino); 280 break; 281 case XFS_DINODE_FMT_EXTENTS: 282 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 283 xchk_ino_set_corrupt(sc, ino); 284 break; 285 case XFS_DINODE_FMT_BTREE: 286 if (!S_ISREG(mode) && !S_ISDIR(mode)) 287 xchk_ino_set_corrupt(sc, ino); 288 break; 289 case XFS_DINODE_FMT_UUID: 290 default: 291 xchk_ino_set_corrupt(sc, ino); 292 break; 293 } 294 295 /* di_[amc]time.nsec */ 296 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 297 xchk_ino_set_corrupt(sc, ino); 298 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 299 xchk_ino_set_corrupt(sc, ino); 300 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 301 xchk_ino_set_corrupt(sc, ino); 302 303 /* 304 * di_size. xfs_dinode_verify checks for things that screw up 305 * the VFS such as the upper bit being set and zero-length 306 * symlinks/directories, but we can do more here. 307 */ 308 isize = be64_to_cpu(dip->di_size); 309 if (isize & (1ULL << 63)) 310 xchk_ino_set_corrupt(sc, ino); 311 312 /* Devices, fifos, and sockets must have zero size */ 313 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 314 xchk_ino_set_corrupt(sc, ino); 315 316 /* Directories can't be larger than the data section size (32G) */ 317 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 318 xchk_ino_set_corrupt(sc, ino); 319 320 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 321 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 322 xchk_ino_set_corrupt(sc, ino); 323 324 /* 325 * Warn if the running kernel can't handle the kinds of offsets 326 * needed to deal with the file size. In other words, if the 327 * pagecache can't cache all the blocks in this file due to 328 * overly large offsets, flag the inode for admin review. 329 */ 330 if (isize >= mp->m_super->s_maxbytes) 331 xchk_ino_set_warning(sc, ino); 332 333 /* di_nblocks */ 334 if (flags2 & XFS_DIFLAG2_REFLINK) { 335 ; /* nblocks can exceed dblocks */ 336 } else if (flags & XFS_DIFLAG_REALTIME) { 337 /* 338 * nblocks is the sum of data extents (in the rtdev), 339 * attr extents (in the datadev), and both forks' bmbt 340 * blocks (in the datadev). This clumsy check is the 341 * best we can do without cross-referencing with the 342 * inode forks. 343 */ 344 if (be64_to_cpu(dip->di_nblocks) >= 345 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 346 xchk_ino_set_corrupt(sc, ino); 347 } else { 348 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 349 xchk_ino_set_corrupt(sc, ino); 350 } 351 352 xchk_inode_flags(sc, dip, ino, mode, flags); 353 354 xchk_inode_extsize(sc, dip, ino, mode, flags); 355 356 /* di_nextents */ 357 nextents = be32_to_cpu(dip->di_nextents); 358 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 359 switch (dip->di_format) { 360 case XFS_DINODE_FMT_EXTENTS: 361 if (nextents > fork_recs) 362 xchk_ino_set_corrupt(sc, ino); 363 break; 364 case XFS_DINODE_FMT_BTREE: 365 if (nextents <= fork_recs) 366 xchk_ino_set_corrupt(sc, ino); 367 break; 368 default: 369 if (nextents != 0) 370 xchk_ino_set_corrupt(sc, ino); 371 break; 372 } 373 374 /* di_forkoff */ 375 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 376 xchk_ino_set_corrupt(sc, ino); 377 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 378 xchk_ino_set_corrupt(sc, ino); 379 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 380 xchk_ino_set_corrupt(sc, ino); 381 382 /* di_aformat */ 383 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 384 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 385 dip->di_aformat != XFS_DINODE_FMT_BTREE) 386 xchk_ino_set_corrupt(sc, ino); 387 388 /* di_anextents */ 389 nextents = be16_to_cpu(dip->di_anextents); 390 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 391 switch (dip->di_aformat) { 392 case XFS_DINODE_FMT_EXTENTS: 393 if (nextents > fork_recs) 394 xchk_ino_set_corrupt(sc, ino); 395 break; 396 case XFS_DINODE_FMT_BTREE: 397 if (nextents <= fork_recs) 398 xchk_ino_set_corrupt(sc, ino); 399 break; 400 default: 401 if (nextents != 0) 402 xchk_ino_set_corrupt(sc, ino); 403 } 404 405 if (dip->di_version >= 3) { 406 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 407 xchk_ino_set_corrupt(sc, ino); 408 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 409 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 410 flags2); 411 } 412 } 413 414 /* 415 * Make sure the finobt doesn't think this inode is free. 416 * We don't have to check the inobt ourselves because we got the inode via 417 * IGET_UNTRUSTED, which checks the inobt for us. 418 */ 419 static void 420 xchk_inode_xref_finobt( 421 struct xfs_scrub *sc, 422 xfs_ino_t ino) 423 { 424 struct xfs_inobt_rec_incore rec; 425 xfs_agino_t agino; 426 int has_record; 427 int error; 428 429 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 430 return; 431 432 agino = XFS_INO_TO_AGINO(sc->mp, ino); 433 434 /* 435 * Try to get the finobt record. If we can't get it, then we're 436 * in good shape. 437 */ 438 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 439 &has_record); 440 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 441 !has_record) 442 return; 443 444 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 445 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 446 !has_record) 447 return; 448 449 /* 450 * Otherwise, make sure this record either doesn't cover this inode, 451 * or that it does but it's marked present. 452 */ 453 if (rec.ir_startino > agino || 454 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 455 return; 456 457 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 458 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 459 } 460 461 /* Cross reference the inode fields with the forks. */ 462 STATIC void 463 xchk_inode_xref_bmap( 464 struct xfs_scrub *sc, 465 struct xfs_dinode *dip) 466 { 467 xfs_extnum_t nextents; 468 xfs_filblks_t count; 469 xfs_filblks_t acount; 470 int error; 471 472 if (xchk_skip_xref(sc->sm)) 473 return; 474 475 /* Walk all the extents to check nextents/naextents/nblocks. */ 476 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 477 &nextents, &count); 478 if (!xchk_should_check_xref(sc, &error, NULL)) 479 return; 480 if (nextents < be32_to_cpu(dip->di_nextents)) 481 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 482 483 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 484 &nextents, &acount); 485 if (!xchk_should_check_xref(sc, &error, NULL)) 486 return; 487 if (nextents != be16_to_cpu(dip->di_anextents)) 488 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 489 490 /* Check nblocks against the inode. */ 491 if (count + acount != be64_to_cpu(dip->di_nblocks)) 492 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 493 } 494 495 /* Cross-reference with the other btrees. */ 496 STATIC void 497 xchk_inode_xref( 498 struct xfs_scrub *sc, 499 xfs_ino_t ino, 500 struct xfs_dinode *dip) 501 { 502 xfs_agnumber_t agno; 503 xfs_agblock_t agbno; 504 int error; 505 506 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 507 return; 508 509 agno = XFS_INO_TO_AGNO(sc->mp, ino); 510 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 511 512 error = xchk_ag_init(sc, agno, &sc->sa); 513 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 514 return; 515 516 xchk_xref_is_used_space(sc, agbno, 1); 517 xchk_inode_xref_finobt(sc, ino); 518 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 519 xchk_xref_is_not_shared(sc, agbno, 1); 520 xchk_inode_xref_bmap(sc, dip); 521 522 xchk_ag_free(sc, &sc->sa); 523 } 524 525 /* 526 * If the reflink iflag disagrees with a scan for shared data fork extents, 527 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 528 * any shared extents). We already checked for reflink iflag set on a non 529 * reflink filesystem. 530 */ 531 static void 532 xchk_inode_check_reflink_iflag( 533 struct xfs_scrub *sc, 534 xfs_ino_t ino) 535 { 536 struct xfs_mount *mp = sc->mp; 537 bool has_shared; 538 int error; 539 540 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 541 return; 542 543 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 544 &has_shared); 545 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 546 XFS_INO_TO_AGBNO(mp, ino), &error)) 547 return; 548 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 549 xchk_ino_set_preen(sc, ino); 550 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 551 xchk_ino_set_corrupt(sc, ino); 552 } 553 554 /* Scrub an inode. */ 555 int 556 xchk_inode( 557 struct xfs_scrub *sc) 558 { 559 struct xfs_dinode di; 560 int error = 0; 561 562 /* 563 * If sc->ip is NULL, that means that the setup function called 564 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 565 * and a NULL inode, so flag the corruption error and return. 566 */ 567 if (!sc->ip) { 568 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 569 return 0; 570 } 571 572 /* Scrub the inode core. */ 573 xfs_inode_to_disk(sc->ip, &di, 0); 574 xchk_dinode(sc, &di, sc->ip->i_ino); 575 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 576 goto out; 577 578 /* 579 * Look for discrepancies between file's data blocks and the reflink 580 * iflag. We already checked the iflag against the file mode when 581 * we scrubbed the dinode. 582 */ 583 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 584 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 585 586 xchk_inode_xref(sc, sc->ip->i_ino, &di); 587 out: 588 return error; 589 } 590