1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_icache.h" 34 #include "xfs_inode_buf.h" 35 #include "xfs_inode_fork.h" 36 #include "xfs_ialloc.h" 37 #include "xfs_da_format.h" 38 #include "xfs_reflink.h" 39 #include "scrub/xfs_scrub.h" 40 #include "scrub/scrub.h" 41 #include "scrub/common.h" 42 #include "scrub/trace.h" 43 44 /* 45 * Grab total control of the inode metadata. It doesn't matter here if 46 * the file data is still changing; exclusive access to the metadata is 47 * the goal. 48 */ 49 int 50 xfs_scrub_setup_inode( 51 struct xfs_scrub_context *sc, 52 struct xfs_inode *ip) 53 { 54 struct xfs_mount *mp = sc->mp; 55 int error; 56 57 /* 58 * Try to get the inode. If the verifiers fail, we try again 59 * in raw mode. 60 */ 61 error = xfs_scrub_get_inode(sc, ip); 62 switch (error) { 63 case 0: 64 break; 65 case -EFSCORRUPTED: 66 case -EFSBADCRC: 67 return 0; 68 default: 69 return error; 70 } 71 72 /* Got the inode, lock it and we're ready to go. */ 73 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 74 xfs_ilock(sc->ip, sc->ilock_flags); 75 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 76 if (error) 77 goto out; 78 sc->ilock_flags |= XFS_ILOCK_EXCL; 79 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 80 81 out: 82 /* scrub teardown will unlock and release the inode for us */ 83 return error; 84 } 85 86 /* Inode core */ 87 88 /* 89 * Validate di_extsize hint. 90 * 91 * The rules are documented at xfs_ioctl_setattr_check_extsize(). 92 * These functions must be kept in sync with each other. 93 */ 94 STATIC void 95 xfs_scrub_inode_extsize( 96 struct xfs_scrub_context *sc, 97 struct xfs_buf *bp, 98 struct xfs_dinode *dip, 99 xfs_ino_t ino, 100 uint16_t mode, 101 uint16_t flags) 102 { 103 struct xfs_mount *mp = sc->mp; 104 bool rt_flag; 105 bool hint_flag; 106 bool inherit_flag; 107 uint32_t extsize; 108 uint32_t extsize_bytes; 109 uint32_t blocksize_bytes; 110 111 rt_flag = (flags & XFS_DIFLAG_REALTIME); 112 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 113 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 114 extsize = be32_to_cpu(dip->di_extsize); 115 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 116 117 if (rt_flag) 118 blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; 119 else 120 blocksize_bytes = mp->m_sb.sb_blocksize; 121 122 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 123 goto bad; 124 125 if (hint_flag && !S_ISREG(mode)) 126 goto bad; 127 128 if (inherit_flag && !S_ISDIR(mode)) 129 goto bad; 130 131 if ((hint_flag || inherit_flag) && extsize == 0) 132 goto bad; 133 134 if (!(hint_flag || inherit_flag) && extsize != 0) 135 goto bad; 136 137 if (extsize_bytes % blocksize_bytes) 138 goto bad; 139 140 if (extsize > MAXEXTLEN) 141 goto bad; 142 143 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 144 goto bad; 145 146 return; 147 bad: 148 xfs_scrub_ino_set_corrupt(sc, ino, bp); 149 } 150 151 /* 152 * Validate di_cowextsize hint. 153 * 154 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 155 * These functions must be kept in sync with each other. 156 */ 157 STATIC void 158 xfs_scrub_inode_cowextsize( 159 struct xfs_scrub_context *sc, 160 struct xfs_buf *bp, 161 struct xfs_dinode *dip, 162 xfs_ino_t ino, 163 uint16_t mode, 164 uint16_t flags, 165 uint64_t flags2) 166 { 167 struct xfs_mount *mp = sc->mp; 168 bool rt_flag; 169 bool hint_flag; 170 uint32_t extsize; 171 uint32_t extsize_bytes; 172 173 rt_flag = (flags & XFS_DIFLAG_REALTIME); 174 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 175 extsize = be32_to_cpu(dip->di_cowextsize); 176 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 177 178 if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) 179 goto bad; 180 181 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 182 goto bad; 183 184 if (hint_flag && extsize == 0) 185 goto bad; 186 187 if (!hint_flag && extsize != 0) 188 goto bad; 189 190 if (hint_flag && rt_flag) 191 goto bad; 192 193 if (extsize_bytes % mp->m_sb.sb_blocksize) 194 goto bad; 195 196 if (extsize > MAXEXTLEN) 197 goto bad; 198 199 if (extsize > mp->m_sb.sb_agblocks / 2) 200 goto bad; 201 202 return; 203 bad: 204 xfs_scrub_ino_set_corrupt(sc, ino, bp); 205 } 206 207 /* Make sure the di_flags make sense for the inode. */ 208 STATIC void 209 xfs_scrub_inode_flags( 210 struct xfs_scrub_context *sc, 211 struct xfs_buf *bp, 212 struct xfs_dinode *dip, 213 xfs_ino_t ino, 214 uint16_t mode, 215 uint16_t flags) 216 { 217 struct xfs_mount *mp = sc->mp; 218 219 if (flags & ~XFS_DIFLAG_ANY) 220 goto bad; 221 222 /* rt flags require rt device */ 223 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 224 !mp->m_rtdev_targp) 225 goto bad; 226 227 /* new rt bitmap flag only valid for rbmino */ 228 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 229 goto bad; 230 231 /* directory-only flags */ 232 if ((flags & (XFS_DIFLAG_RTINHERIT | 233 XFS_DIFLAG_EXTSZINHERIT | 234 XFS_DIFLAG_PROJINHERIT | 235 XFS_DIFLAG_NOSYMLINKS)) && 236 !S_ISDIR(mode)) 237 goto bad; 238 239 /* file-only flags */ 240 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 241 !S_ISREG(mode)) 242 goto bad; 243 244 /* filestreams and rt make no sense */ 245 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 246 goto bad; 247 248 return; 249 bad: 250 xfs_scrub_ino_set_corrupt(sc, ino, bp); 251 } 252 253 /* Make sure the di_flags2 make sense for the inode. */ 254 STATIC void 255 xfs_scrub_inode_flags2( 256 struct xfs_scrub_context *sc, 257 struct xfs_buf *bp, 258 struct xfs_dinode *dip, 259 xfs_ino_t ino, 260 uint16_t mode, 261 uint16_t flags, 262 uint64_t flags2) 263 { 264 struct xfs_mount *mp = sc->mp; 265 266 if (flags2 & ~XFS_DIFLAG2_ANY) 267 goto bad; 268 269 /* reflink flag requires reflink feature */ 270 if ((flags2 & XFS_DIFLAG2_REFLINK) && 271 !xfs_sb_version_hasreflink(&mp->m_sb)) 272 goto bad; 273 274 /* cowextsize flag is checked w.r.t. mode separately */ 275 276 /* file/dir-only flags */ 277 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 278 goto bad; 279 280 /* file-only flags */ 281 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 282 goto bad; 283 284 /* realtime and reflink make no sense, currently */ 285 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 286 goto bad; 287 288 /* dax and reflink make no sense, currently */ 289 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 290 goto bad; 291 292 return; 293 bad: 294 xfs_scrub_ino_set_corrupt(sc, ino, bp); 295 } 296 297 /* Scrub all the ondisk inode fields. */ 298 STATIC void 299 xfs_scrub_dinode( 300 struct xfs_scrub_context *sc, 301 struct xfs_buf *bp, 302 struct xfs_dinode *dip, 303 xfs_ino_t ino) 304 { 305 struct xfs_mount *mp = sc->mp; 306 size_t fork_recs; 307 unsigned long long isize; 308 uint64_t flags2; 309 uint32_t nextents; 310 uint16_t flags; 311 uint16_t mode; 312 313 flags = be16_to_cpu(dip->di_flags); 314 if (dip->di_version >= 3) 315 flags2 = be64_to_cpu(dip->di_flags2); 316 else 317 flags2 = 0; 318 319 /* di_mode */ 320 mode = be16_to_cpu(dip->di_mode); 321 if (mode & ~(S_IALLUGO | S_IFMT)) 322 xfs_scrub_ino_set_corrupt(sc, ino, bp); 323 324 /* v1/v2 fields */ 325 switch (dip->di_version) { 326 case 1: 327 /* 328 * We autoconvert v1 inodes into v2 inodes on writeout, 329 * so just mark this inode for preening. 330 */ 331 xfs_scrub_ino_set_preen(sc, ino, bp); 332 break; 333 case 2: 334 case 3: 335 if (dip->di_onlink != 0) 336 xfs_scrub_ino_set_corrupt(sc, ino, bp); 337 338 if (dip->di_mode == 0 && sc->ip) 339 xfs_scrub_ino_set_corrupt(sc, ino, bp); 340 341 if (dip->di_projid_hi != 0 && 342 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 343 xfs_scrub_ino_set_corrupt(sc, ino, bp); 344 break; 345 default: 346 xfs_scrub_ino_set_corrupt(sc, ino, bp); 347 return; 348 } 349 350 /* 351 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 352 * userspace could have created that. 353 */ 354 if (dip->di_uid == cpu_to_be32(-1U) || 355 dip->di_gid == cpu_to_be32(-1U)) 356 xfs_scrub_ino_set_warning(sc, ino, bp); 357 358 /* di_format */ 359 switch (dip->di_format) { 360 case XFS_DINODE_FMT_DEV: 361 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 362 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 363 xfs_scrub_ino_set_corrupt(sc, ino, bp); 364 break; 365 case XFS_DINODE_FMT_LOCAL: 366 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 367 xfs_scrub_ino_set_corrupt(sc, ino, bp); 368 break; 369 case XFS_DINODE_FMT_EXTENTS: 370 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 371 xfs_scrub_ino_set_corrupt(sc, ino, bp); 372 break; 373 case XFS_DINODE_FMT_BTREE: 374 if (!S_ISREG(mode) && !S_ISDIR(mode)) 375 xfs_scrub_ino_set_corrupt(sc, ino, bp); 376 break; 377 case XFS_DINODE_FMT_UUID: 378 default: 379 xfs_scrub_ino_set_corrupt(sc, ino, bp); 380 break; 381 } 382 383 /* 384 * di_size. xfs_dinode_verify checks for things that screw up 385 * the VFS such as the upper bit being set and zero-length 386 * symlinks/directories, but we can do more here. 387 */ 388 isize = be64_to_cpu(dip->di_size); 389 if (isize & (1ULL << 63)) 390 xfs_scrub_ino_set_corrupt(sc, ino, bp); 391 392 /* Devices, fifos, and sockets must have zero size */ 393 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 394 xfs_scrub_ino_set_corrupt(sc, ino, bp); 395 396 /* Directories can't be larger than the data section size (32G) */ 397 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 398 xfs_scrub_ino_set_corrupt(sc, ino, bp); 399 400 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 401 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 402 xfs_scrub_ino_set_corrupt(sc, ino, bp); 403 404 /* 405 * Warn if the running kernel can't handle the kinds of offsets 406 * needed to deal with the file size. In other words, if the 407 * pagecache can't cache all the blocks in this file due to 408 * overly large offsets, flag the inode for admin review. 409 */ 410 if (isize >= mp->m_super->s_maxbytes) 411 xfs_scrub_ino_set_warning(sc, ino, bp); 412 413 /* di_nblocks */ 414 if (flags2 & XFS_DIFLAG2_REFLINK) { 415 ; /* nblocks can exceed dblocks */ 416 } else if (flags & XFS_DIFLAG_REALTIME) { 417 /* 418 * nblocks is the sum of data extents (in the rtdev), 419 * attr extents (in the datadev), and both forks' bmbt 420 * blocks (in the datadev). This clumsy check is the 421 * best we can do without cross-referencing with the 422 * inode forks. 423 */ 424 if (be64_to_cpu(dip->di_nblocks) >= 425 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 426 xfs_scrub_ino_set_corrupt(sc, ino, bp); 427 } else { 428 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 429 xfs_scrub_ino_set_corrupt(sc, ino, bp); 430 } 431 432 xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags); 433 434 xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags); 435 436 /* di_nextents */ 437 nextents = be32_to_cpu(dip->di_nextents); 438 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 439 switch (dip->di_format) { 440 case XFS_DINODE_FMT_EXTENTS: 441 if (nextents > fork_recs) 442 xfs_scrub_ino_set_corrupt(sc, ino, bp); 443 break; 444 case XFS_DINODE_FMT_BTREE: 445 if (nextents <= fork_recs) 446 xfs_scrub_ino_set_corrupt(sc, ino, bp); 447 break; 448 default: 449 if (nextents != 0) 450 xfs_scrub_ino_set_corrupt(sc, ino, bp); 451 break; 452 } 453 454 /* di_forkoff */ 455 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 456 xfs_scrub_ino_set_corrupt(sc, ino, bp); 457 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 458 xfs_scrub_ino_set_corrupt(sc, ino, bp); 459 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 460 xfs_scrub_ino_set_corrupt(sc, ino, bp); 461 462 /* di_aformat */ 463 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 464 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 465 dip->di_aformat != XFS_DINODE_FMT_BTREE) 466 xfs_scrub_ino_set_corrupt(sc, ino, bp); 467 468 /* di_anextents */ 469 nextents = be16_to_cpu(dip->di_anextents); 470 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 471 switch (dip->di_aformat) { 472 case XFS_DINODE_FMT_EXTENTS: 473 if (nextents > fork_recs) 474 xfs_scrub_ino_set_corrupt(sc, ino, bp); 475 break; 476 case XFS_DINODE_FMT_BTREE: 477 if (nextents <= fork_recs) 478 xfs_scrub_ino_set_corrupt(sc, ino, bp); 479 break; 480 default: 481 if (nextents != 0) 482 xfs_scrub_ino_set_corrupt(sc, ino, bp); 483 } 484 485 if (dip->di_version >= 3) { 486 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 487 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 488 flags2); 489 } 490 } 491 492 /* Map and read a raw inode. */ 493 STATIC int 494 xfs_scrub_inode_map_raw( 495 struct xfs_scrub_context *sc, 496 xfs_ino_t ino, 497 struct xfs_buf **bpp, 498 struct xfs_dinode **dipp) 499 { 500 struct xfs_imap imap; 501 struct xfs_mount *mp = sc->mp; 502 struct xfs_buf *bp = NULL; 503 struct xfs_dinode *dip; 504 int error; 505 506 error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED); 507 if (error == -EINVAL) { 508 /* 509 * Inode could have gotten deleted out from under us; 510 * just forget about it. 511 */ 512 error = -ENOENT; 513 goto out; 514 } 515 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 516 XFS_INO_TO_AGBNO(mp, ino), &error)) 517 goto out; 518 519 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 520 imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp, 521 NULL); 522 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 523 XFS_INO_TO_AGBNO(mp, ino), &error)) 524 goto out; 525 526 /* 527 * Is this really an inode? We disabled verifiers in the above 528 * xfs_trans_read_buf call because the inode buffer verifier 529 * fails on /any/ inode record in the inode cluster with a bad 530 * magic or version number, not just the one that we're 531 * checking. Therefore, grab the buffer unconditionally, attach 532 * the inode verifiers by hand, and run the inode verifier only 533 * on the one inode we want. 534 */ 535 bp->b_ops = &xfs_inode_buf_ops; 536 dip = xfs_buf_offset(bp, imap.im_boffset); 537 if (!xfs_dinode_verify(mp, ino, dip) || 538 !xfs_dinode_good_version(mp, dip->di_version)) { 539 xfs_scrub_ino_set_corrupt(sc, ino, bp); 540 goto out_buf; 541 } 542 543 /* ...and is it the one we asked for? */ 544 if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) { 545 error = -ENOENT; 546 goto out_buf; 547 } 548 549 *dipp = dip; 550 *bpp = bp; 551 out: 552 return error; 553 out_buf: 554 xfs_trans_brelse(sc->tp, bp); 555 return error; 556 } 557 558 /* Scrub an inode. */ 559 int 560 xfs_scrub_inode( 561 struct xfs_scrub_context *sc) 562 { 563 struct xfs_dinode di; 564 struct xfs_mount *mp = sc->mp; 565 struct xfs_buf *bp = NULL; 566 struct xfs_dinode *dip; 567 xfs_ino_t ino; 568 569 bool has_shared; 570 int error = 0; 571 572 /* Did we get the in-core inode, or are we doing this manually? */ 573 if (sc->ip) { 574 ino = sc->ip->i_ino; 575 xfs_inode_to_disk(sc->ip, &di, 0); 576 dip = &di; 577 } else { 578 /* Map & read inode. */ 579 ino = sc->sm->sm_ino; 580 error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip); 581 if (error || !bp) 582 goto out; 583 } 584 585 xfs_scrub_dinode(sc, bp, dip, ino); 586 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 587 goto out; 588 589 /* Now let's do the things that require a live inode. */ 590 if (!sc->ip) 591 goto out; 592 593 /* 594 * Does this inode have the reflink flag set but no shared extents? 595 * Set the preening flag if this is the case. 596 */ 597 if (xfs_is_reflink_inode(sc->ip)) { 598 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 599 &has_shared); 600 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 601 XFS_INO_TO_AGBNO(mp, ino), &error)) 602 goto out; 603 if (!has_shared) 604 xfs_scrub_ino_set_preen(sc, ino, bp); 605 } 606 607 out: 608 if (bp) 609 xfs_trans_brelse(sc->tp, bp); 610 return error; 611 } 612