1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_icache.h" 34 #include "xfs_inode_buf.h" 35 #include "xfs_inode_fork.h" 36 #include "xfs_ialloc.h" 37 #include "xfs_da_format.h" 38 #include "xfs_reflink.h" 39 #include "xfs_rmap.h" 40 #include "xfs_bmap.h" 41 #include "xfs_bmap_util.h" 42 #include "scrub/xfs_scrub.h" 43 #include "scrub/scrub.h" 44 #include "scrub/common.h" 45 #include "scrub/btree.h" 46 #include "scrub/trace.h" 47 48 /* 49 * Grab total control of the inode metadata. It doesn't matter here if 50 * the file data is still changing; exclusive access to the metadata is 51 * the goal. 52 */ 53 int 54 xfs_scrub_setup_inode( 55 struct xfs_scrub_context *sc, 56 struct xfs_inode *ip) 57 { 58 struct xfs_mount *mp = sc->mp; 59 int error; 60 61 /* 62 * Try to get the inode. If the verifiers fail, we try again 63 * in raw mode. 64 */ 65 error = xfs_scrub_get_inode(sc, ip); 66 switch (error) { 67 case 0: 68 break; 69 case -EFSCORRUPTED: 70 case -EFSBADCRC: 71 return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 72 default: 73 return error; 74 } 75 76 /* Got the inode, lock it and we're ready to go. */ 77 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 78 xfs_ilock(sc->ip, sc->ilock_flags); 79 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 80 if (error) 81 goto out; 82 sc->ilock_flags |= XFS_ILOCK_EXCL; 83 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 84 85 out: 86 /* scrub teardown will unlock and release the inode for us */ 87 return error; 88 } 89 90 /* Inode core */ 91 92 /* Validate di_extsize hint. */ 93 STATIC void 94 xfs_scrub_inode_extsize( 95 struct xfs_scrub_context *sc, 96 struct xfs_dinode *dip, 97 xfs_ino_t ino, 98 uint16_t mode, 99 uint16_t flags) 100 { 101 xfs_failaddr_t fa; 102 103 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 104 mode, flags); 105 if (fa) 106 xfs_scrub_ino_set_corrupt(sc, ino); 107 } 108 109 /* 110 * Validate di_cowextsize hint. 111 * 112 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 113 * These functions must be kept in sync with each other. 114 */ 115 STATIC void 116 xfs_scrub_inode_cowextsize( 117 struct xfs_scrub_context *sc, 118 struct xfs_dinode *dip, 119 xfs_ino_t ino, 120 uint16_t mode, 121 uint16_t flags, 122 uint64_t flags2) 123 { 124 xfs_failaddr_t fa; 125 126 fa = xfs_inode_validate_cowextsize(sc->mp, 127 be32_to_cpu(dip->di_cowextsize), mode, flags, 128 flags2); 129 if (fa) 130 xfs_scrub_ino_set_corrupt(sc, ino); 131 } 132 133 /* Make sure the di_flags make sense for the inode. */ 134 STATIC void 135 xfs_scrub_inode_flags( 136 struct xfs_scrub_context *sc, 137 struct xfs_dinode *dip, 138 xfs_ino_t ino, 139 uint16_t mode, 140 uint16_t flags) 141 { 142 struct xfs_mount *mp = sc->mp; 143 144 if (flags & ~XFS_DIFLAG_ANY) 145 goto bad; 146 147 /* rt flags require rt device */ 148 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 149 !mp->m_rtdev_targp) 150 goto bad; 151 152 /* new rt bitmap flag only valid for rbmino */ 153 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 154 goto bad; 155 156 /* directory-only flags */ 157 if ((flags & (XFS_DIFLAG_RTINHERIT | 158 XFS_DIFLAG_EXTSZINHERIT | 159 XFS_DIFLAG_PROJINHERIT | 160 XFS_DIFLAG_NOSYMLINKS)) && 161 !S_ISDIR(mode)) 162 goto bad; 163 164 /* file-only flags */ 165 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 166 !S_ISREG(mode)) 167 goto bad; 168 169 /* filestreams and rt make no sense */ 170 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 171 goto bad; 172 173 return; 174 bad: 175 xfs_scrub_ino_set_corrupt(sc, ino); 176 } 177 178 /* Make sure the di_flags2 make sense for the inode. */ 179 STATIC void 180 xfs_scrub_inode_flags2( 181 struct xfs_scrub_context *sc, 182 struct xfs_dinode *dip, 183 xfs_ino_t ino, 184 uint16_t mode, 185 uint16_t flags, 186 uint64_t flags2) 187 { 188 struct xfs_mount *mp = sc->mp; 189 190 if (flags2 & ~XFS_DIFLAG2_ANY) 191 goto bad; 192 193 /* reflink flag requires reflink feature */ 194 if ((flags2 & XFS_DIFLAG2_REFLINK) && 195 !xfs_sb_version_hasreflink(&mp->m_sb)) 196 goto bad; 197 198 /* cowextsize flag is checked w.r.t. mode separately */ 199 200 /* file/dir-only flags */ 201 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 202 goto bad; 203 204 /* file-only flags */ 205 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 206 goto bad; 207 208 /* realtime and reflink make no sense, currently */ 209 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 210 goto bad; 211 212 /* dax and reflink make no sense, currently */ 213 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 214 goto bad; 215 216 return; 217 bad: 218 xfs_scrub_ino_set_corrupt(sc, ino); 219 } 220 221 /* Scrub all the ondisk inode fields. */ 222 STATIC void 223 xfs_scrub_dinode( 224 struct xfs_scrub_context *sc, 225 struct xfs_dinode *dip, 226 xfs_ino_t ino) 227 { 228 struct xfs_mount *mp = sc->mp; 229 size_t fork_recs; 230 unsigned long long isize; 231 uint64_t flags2; 232 uint32_t nextents; 233 uint16_t flags; 234 uint16_t mode; 235 236 flags = be16_to_cpu(dip->di_flags); 237 if (dip->di_version >= 3) 238 flags2 = be64_to_cpu(dip->di_flags2); 239 else 240 flags2 = 0; 241 242 /* di_mode */ 243 mode = be16_to_cpu(dip->di_mode); 244 switch (mode & S_IFMT) { 245 case S_IFLNK: 246 case S_IFREG: 247 case S_IFDIR: 248 case S_IFCHR: 249 case S_IFBLK: 250 case S_IFIFO: 251 case S_IFSOCK: 252 /* mode is recognized */ 253 break; 254 default: 255 xfs_scrub_ino_set_corrupt(sc, ino); 256 break; 257 } 258 259 /* v1/v2 fields */ 260 switch (dip->di_version) { 261 case 1: 262 /* 263 * We autoconvert v1 inodes into v2 inodes on writeout, 264 * so just mark this inode for preening. 265 */ 266 xfs_scrub_ino_set_preen(sc, ino); 267 break; 268 case 2: 269 case 3: 270 if (dip->di_onlink != 0) 271 xfs_scrub_ino_set_corrupt(sc, ino); 272 273 if (dip->di_mode == 0 && sc->ip) 274 xfs_scrub_ino_set_corrupt(sc, ino); 275 276 if (dip->di_projid_hi != 0 && 277 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 278 xfs_scrub_ino_set_corrupt(sc, ino); 279 break; 280 default: 281 xfs_scrub_ino_set_corrupt(sc, ino); 282 return; 283 } 284 285 /* 286 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 287 * userspace could have created that. 288 */ 289 if (dip->di_uid == cpu_to_be32(-1U) || 290 dip->di_gid == cpu_to_be32(-1U)) 291 xfs_scrub_ino_set_warning(sc, ino); 292 293 /* di_format */ 294 switch (dip->di_format) { 295 case XFS_DINODE_FMT_DEV: 296 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 297 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 298 xfs_scrub_ino_set_corrupt(sc, ino); 299 break; 300 case XFS_DINODE_FMT_LOCAL: 301 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 302 xfs_scrub_ino_set_corrupt(sc, ino); 303 break; 304 case XFS_DINODE_FMT_EXTENTS: 305 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 306 xfs_scrub_ino_set_corrupt(sc, ino); 307 break; 308 case XFS_DINODE_FMT_BTREE: 309 if (!S_ISREG(mode) && !S_ISDIR(mode)) 310 xfs_scrub_ino_set_corrupt(sc, ino); 311 break; 312 case XFS_DINODE_FMT_UUID: 313 default: 314 xfs_scrub_ino_set_corrupt(sc, ino); 315 break; 316 } 317 318 /* di_[amc]time.nsec */ 319 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 320 xfs_scrub_ino_set_corrupt(sc, ino); 321 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 322 xfs_scrub_ino_set_corrupt(sc, ino); 323 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 324 xfs_scrub_ino_set_corrupt(sc, ino); 325 326 /* 327 * di_size. xfs_dinode_verify checks for things that screw up 328 * the VFS such as the upper bit being set and zero-length 329 * symlinks/directories, but we can do more here. 330 */ 331 isize = be64_to_cpu(dip->di_size); 332 if (isize & (1ULL << 63)) 333 xfs_scrub_ino_set_corrupt(sc, ino); 334 335 /* Devices, fifos, and sockets must have zero size */ 336 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 337 xfs_scrub_ino_set_corrupt(sc, ino); 338 339 /* Directories can't be larger than the data section size (32G) */ 340 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 341 xfs_scrub_ino_set_corrupt(sc, ino); 342 343 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 344 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 345 xfs_scrub_ino_set_corrupt(sc, ino); 346 347 /* 348 * Warn if the running kernel can't handle the kinds of offsets 349 * needed to deal with the file size. In other words, if the 350 * pagecache can't cache all the blocks in this file due to 351 * overly large offsets, flag the inode for admin review. 352 */ 353 if (isize >= mp->m_super->s_maxbytes) 354 xfs_scrub_ino_set_warning(sc, ino); 355 356 /* di_nblocks */ 357 if (flags2 & XFS_DIFLAG2_REFLINK) { 358 ; /* nblocks can exceed dblocks */ 359 } else if (flags & XFS_DIFLAG_REALTIME) { 360 /* 361 * nblocks is the sum of data extents (in the rtdev), 362 * attr extents (in the datadev), and both forks' bmbt 363 * blocks (in the datadev). This clumsy check is the 364 * best we can do without cross-referencing with the 365 * inode forks. 366 */ 367 if (be64_to_cpu(dip->di_nblocks) >= 368 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 369 xfs_scrub_ino_set_corrupt(sc, ino); 370 } else { 371 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 372 xfs_scrub_ino_set_corrupt(sc, ino); 373 } 374 375 xfs_scrub_inode_flags(sc, dip, ino, mode, flags); 376 377 xfs_scrub_inode_extsize(sc, dip, ino, mode, flags); 378 379 /* di_nextents */ 380 nextents = be32_to_cpu(dip->di_nextents); 381 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 382 switch (dip->di_format) { 383 case XFS_DINODE_FMT_EXTENTS: 384 if (nextents > fork_recs) 385 xfs_scrub_ino_set_corrupt(sc, ino); 386 break; 387 case XFS_DINODE_FMT_BTREE: 388 if (nextents <= fork_recs) 389 xfs_scrub_ino_set_corrupt(sc, ino); 390 break; 391 default: 392 if (nextents != 0) 393 xfs_scrub_ino_set_corrupt(sc, ino); 394 break; 395 } 396 397 /* di_forkoff */ 398 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 399 xfs_scrub_ino_set_corrupt(sc, ino); 400 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 401 xfs_scrub_ino_set_corrupt(sc, ino); 402 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 403 xfs_scrub_ino_set_corrupt(sc, ino); 404 405 /* di_aformat */ 406 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 407 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 408 dip->di_aformat != XFS_DINODE_FMT_BTREE) 409 xfs_scrub_ino_set_corrupt(sc, ino); 410 411 /* di_anextents */ 412 nextents = be16_to_cpu(dip->di_anextents); 413 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 414 switch (dip->di_aformat) { 415 case XFS_DINODE_FMT_EXTENTS: 416 if (nextents > fork_recs) 417 xfs_scrub_ino_set_corrupt(sc, ino); 418 break; 419 case XFS_DINODE_FMT_BTREE: 420 if (nextents <= fork_recs) 421 xfs_scrub_ino_set_corrupt(sc, ino); 422 break; 423 default: 424 if (nextents != 0) 425 xfs_scrub_ino_set_corrupt(sc, ino); 426 } 427 428 if (dip->di_version >= 3) { 429 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 430 xfs_scrub_ino_set_corrupt(sc, ino); 431 xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2); 432 xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags, 433 flags2); 434 } 435 } 436 437 /* 438 * Make sure the finobt doesn't think this inode is free. 439 * We don't have to check the inobt ourselves because we got the inode via 440 * IGET_UNTRUSTED, which checks the inobt for us. 441 */ 442 static void 443 xfs_scrub_inode_xref_finobt( 444 struct xfs_scrub_context *sc, 445 xfs_ino_t ino) 446 { 447 struct xfs_inobt_rec_incore rec; 448 xfs_agino_t agino; 449 int has_record; 450 int error; 451 452 if (!sc->sa.fino_cur) 453 return; 454 455 agino = XFS_INO_TO_AGINO(sc->mp, ino); 456 457 /* 458 * Try to get the finobt record. If we can't get it, then we're 459 * in good shape. 460 */ 461 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 462 &has_record); 463 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 464 !has_record) 465 return; 466 467 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 468 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 469 !has_record) 470 return; 471 472 /* 473 * Otherwise, make sure this record either doesn't cover this inode, 474 * or that it does but it's marked present. 475 */ 476 if (rec.ir_startino > agino || 477 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 478 return; 479 480 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 481 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 482 } 483 484 /* Cross reference the inode fields with the forks. */ 485 STATIC void 486 xfs_scrub_inode_xref_bmap( 487 struct xfs_scrub_context *sc, 488 struct xfs_dinode *dip) 489 { 490 xfs_extnum_t nextents; 491 xfs_filblks_t count; 492 xfs_filblks_t acount; 493 int error; 494 495 /* Walk all the extents to check nextents/naextents/nblocks. */ 496 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 497 &nextents, &count); 498 if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 499 return; 500 if (nextents < be32_to_cpu(dip->di_nextents)) 501 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 502 503 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 504 &nextents, &acount); 505 if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 506 return; 507 if (nextents != be16_to_cpu(dip->di_anextents)) 508 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 509 510 /* Check nblocks against the inode. */ 511 if (count + acount != be64_to_cpu(dip->di_nblocks)) 512 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 513 } 514 515 /* Cross-reference with the other btrees. */ 516 STATIC void 517 xfs_scrub_inode_xref( 518 struct xfs_scrub_context *sc, 519 xfs_ino_t ino, 520 struct xfs_dinode *dip) 521 { 522 struct xfs_owner_info oinfo; 523 xfs_agnumber_t agno; 524 xfs_agblock_t agbno; 525 int error; 526 527 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 528 return; 529 530 agno = XFS_INO_TO_AGNO(sc->mp, ino); 531 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 532 533 error = xfs_scrub_ag_init(sc, agno, &sc->sa); 534 if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 535 return; 536 537 xfs_scrub_xref_is_used_space(sc, agbno, 1); 538 xfs_scrub_inode_xref_finobt(sc, ino); 539 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 540 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 541 xfs_scrub_xref_is_not_shared(sc, agbno, 1); 542 xfs_scrub_inode_xref_bmap(sc, dip); 543 544 xfs_scrub_ag_free(sc, &sc->sa); 545 } 546 547 /* 548 * If the reflink iflag disagrees with a scan for shared data fork extents, 549 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 550 * any shared extents). We already checked for reflink iflag set on a non 551 * reflink filesystem. 552 */ 553 static void 554 xfs_scrub_inode_check_reflink_iflag( 555 struct xfs_scrub_context *sc, 556 xfs_ino_t ino) 557 { 558 struct xfs_mount *mp = sc->mp; 559 bool has_shared; 560 int error; 561 562 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 563 return; 564 565 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 566 &has_shared); 567 if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 568 XFS_INO_TO_AGBNO(mp, ino), &error)) 569 return; 570 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 571 xfs_scrub_ino_set_preen(sc, ino); 572 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 573 xfs_scrub_ino_set_corrupt(sc, ino); 574 } 575 576 /* Scrub an inode. */ 577 int 578 xfs_scrub_inode( 579 struct xfs_scrub_context *sc) 580 { 581 struct xfs_dinode di; 582 int error = 0; 583 584 /* 585 * If sc->ip is NULL, that means that the setup function called 586 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 587 * and a NULL inode, so flag the corruption error and return. 588 */ 589 if (!sc->ip) { 590 xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino); 591 return 0; 592 } 593 594 /* Scrub the inode core. */ 595 xfs_inode_to_disk(sc->ip, &di, 0); 596 xfs_scrub_dinode(sc, &di, sc->ip->i_ino); 597 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 598 goto out; 599 600 /* 601 * Look for discrepancies between file's data blocks and the reflink 602 * iflag. We already checked the iflag against the file mode when 603 * we scrubbed the dinode. 604 */ 605 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 606 xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino); 607 608 xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di); 609 out: 610 return error; 611 } 612