1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_itable.h" 21 #include "xfs_alloc.h" 22 #include "xfs_alloc_btree.h" 23 #include "xfs_bmap.h" 24 #include "xfs_bmap_btree.h" 25 #include "xfs_ialloc.h" 26 #include "xfs_ialloc_btree.h" 27 #include "xfs_refcount.h" 28 #include "xfs_refcount_btree.h" 29 #include "xfs_rmap.h" 30 #include "xfs_rmap_btree.h" 31 #include "xfs_log.h" 32 #include "xfs_trans_priv.h" 33 #include "xfs_attr.h" 34 #include "xfs_reflink.h" 35 #include "scrub/xfs_scrub.h" 36 #include "scrub/scrub.h" 37 #include "scrub/common.h" 38 #include "scrub/trace.h" 39 #include "scrub/btree.h" 40 #include "scrub/repair.h" 41 #include "scrub/health.h" 42 43 /* Common code for the metadata scrubbers. */ 44 45 /* 46 * Handling operational errors. 47 * 48 * The *_process_error() family of functions are used to process error return 49 * codes from functions called as part of a scrub operation. 50 * 51 * If there's no error, we return true to tell the caller that it's ok 52 * to move on to the next check in its list. 53 * 54 * For non-verifier errors (e.g. ENOMEM) we return false to tell the 55 * caller that something bad happened, and we preserve *error so that 56 * the caller can return the *error up the stack to userspace. 57 * 58 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting 59 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, 60 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, 61 * not via return codes. We return false to tell the caller that 62 * something bad happened. Since the error has been cleared, the caller 63 * will (presumably) return that zero and scrubbing will move on to 64 * whatever's next. 65 * 66 * ftrace can be used to record the precise metadata location and the 67 * approximate code location of the failed operation. 68 */ 69 70 /* Check for operational errors. */ 71 static bool 72 __xchk_process_error( 73 struct xfs_scrub *sc, 74 xfs_agnumber_t agno, 75 xfs_agblock_t bno, 76 int *error, 77 __u32 errflag, 78 void *ret_ip) 79 { 80 switch (*error) { 81 case 0: 82 return true; 83 case -EDEADLOCK: 84 /* Used to restart an op with deadlock avoidance. */ 85 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 86 break; 87 case -EFSBADCRC: 88 case -EFSCORRUPTED: 89 /* Note the badness but don't abort. */ 90 sc->sm->sm_flags |= errflag; 91 *error = 0; 92 /* fall through */ 93 default: 94 trace_xchk_op_error(sc, agno, bno, *error, 95 ret_ip); 96 break; 97 } 98 return false; 99 } 100 101 bool 102 xchk_process_error( 103 struct xfs_scrub *sc, 104 xfs_agnumber_t agno, 105 xfs_agblock_t bno, 106 int *error) 107 { 108 return __xchk_process_error(sc, agno, bno, error, 109 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 110 } 111 112 bool 113 xchk_xref_process_error( 114 struct xfs_scrub *sc, 115 xfs_agnumber_t agno, 116 xfs_agblock_t bno, 117 int *error) 118 { 119 return __xchk_process_error(sc, agno, bno, error, 120 XFS_SCRUB_OFLAG_XFAIL, __return_address); 121 } 122 123 /* Check for operational errors for a file offset. */ 124 static bool 125 __xchk_fblock_process_error( 126 struct xfs_scrub *sc, 127 int whichfork, 128 xfs_fileoff_t offset, 129 int *error, 130 __u32 errflag, 131 void *ret_ip) 132 { 133 switch (*error) { 134 case 0: 135 return true; 136 case -EDEADLOCK: 137 /* Used to restart an op with deadlock avoidance. */ 138 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 139 break; 140 case -EFSBADCRC: 141 case -EFSCORRUPTED: 142 /* Note the badness but don't abort. */ 143 sc->sm->sm_flags |= errflag; 144 *error = 0; 145 /* fall through */ 146 default: 147 trace_xchk_file_op_error(sc, whichfork, offset, *error, 148 ret_ip); 149 break; 150 } 151 return false; 152 } 153 154 bool 155 xchk_fblock_process_error( 156 struct xfs_scrub *sc, 157 int whichfork, 158 xfs_fileoff_t offset, 159 int *error) 160 { 161 return __xchk_fblock_process_error(sc, whichfork, offset, error, 162 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 163 } 164 165 bool 166 xchk_fblock_xref_process_error( 167 struct xfs_scrub *sc, 168 int whichfork, 169 xfs_fileoff_t offset, 170 int *error) 171 { 172 return __xchk_fblock_process_error(sc, whichfork, offset, error, 173 XFS_SCRUB_OFLAG_XFAIL, __return_address); 174 } 175 176 /* 177 * Handling scrub corruption/optimization/warning checks. 178 * 179 * The *_set_{corrupt,preen,warning}() family of functions are used to 180 * record the presence of metadata that is incorrect (corrupt), could be 181 * optimized somehow (preen), or should be flagged for administrative 182 * review but is not incorrect (warn). 183 * 184 * ftrace can be used to record the precise metadata location and 185 * approximate code location of the failed check. 186 */ 187 188 /* Record a block which could be optimized. */ 189 void 190 xchk_block_set_preen( 191 struct xfs_scrub *sc, 192 struct xfs_buf *bp) 193 { 194 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 195 trace_xchk_block_preen(sc, bp->b_bn, __return_address); 196 } 197 198 /* 199 * Record an inode which could be optimized. The trace data will 200 * include the block given by bp if bp is given; otherwise it will use 201 * the block location of the inode record itself. 202 */ 203 void 204 xchk_ino_set_preen( 205 struct xfs_scrub *sc, 206 xfs_ino_t ino) 207 { 208 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 209 trace_xchk_ino_preen(sc, ino, __return_address); 210 } 211 212 /* Record something being wrong with the filesystem primary superblock. */ 213 void 214 xchk_set_corrupt( 215 struct xfs_scrub *sc) 216 { 217 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 218 trace_xchk_fs_error(sc, 0, __return_address); 219 } 220 221 /* Record a corrupt block. */ 222 void 223 xchk_block_set_corrupt( 224 struct xfs_scrub *sc, 225 struct xfs_buf *bp) 226 { 227 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 228 trace_xchk_block_error(sc, bp->b_bn, __return_address); 229 } 230 231 /* Record a corruption while cross-referencing. */ 232 void 233 xchk_block_xref_set_corrupt( 234 struct xfs_scrub *sc, 235 struct xfs_buf *bp) 236 { 237 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 238 trace_xchk_block_error(sc, bp->b_bn, __return_address); 239 } 240 241 /* 242 * Record a corrupt inode. The trace data will include the block given 243 * by bp if bp is given; otherwise it will use the block location of the 244 * inode record itself. 245 */ 246 void 247 xchk_ino_set_corrupt( 248 struct xfs_scrub *sc, 249 xfs_ino_t ino) 250 { 251 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 252 trace_xchk_ino_error(sc, ino, __return_address); 253 } 254 255 /* Record a corruption while cross-referencing with an inode. */ 256 void 257 xchk_ino_xref_set_corrupt( 258 struct xfs_scrub *sc, 259 xfs_ino_t ino) 260 { 261 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 262 trace_xchk_ino_error(sc, ino, __return_address); 263 } 264 265 /* Record corruption in a block indexed by a file fork. */ 266 void 267 xchk_fblock_set_corrupt( 268 struct xfs_scrub *sc, 269 int whichfork, 270 xfs_fileoff_t offset) 271 { 272 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 273 trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 274 } 275 276 /* Record a corruption while cross-referencing a fork block. */ 277 void 278 xchk_fblock_xref_set_corrupt( 279 struct xfs_scrub *sc, 280 int whichfork, 281 xfs_fileoff_t offset) 282 { 283 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 284 trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 285 } 286 287 /* 288 * Warn about inodes that need administrative review but is not 289 * incorrect. 290 */ 291 void 292 xchk_ino_set_warning( 293 struct xfs_scrub *sc, 294 xfs_ino_t ino) 295 { 296 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 297 trace_xchk_ino_warning(sc, ino, __return_address); 298 } 299 300 /* Warn about a block indexed by a file fork that needs review. */ 301 void 302 xchk_fblock_set_warning( 303 struct xfs_scrub *sc, 304 int whichfork, 305 xfs_fileoff_t offset) 306 { 307 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 308 trace_xchk_fblock_warning(sc, whichfork, offset, __return_address); 309 } 310 311 /* Signal an incomplete scrub. */ 312 void 313 xchk_set_incomplete( 314 struct xfs_scrub *sc) 315 { 316 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 317 trace_xchk_incomplete(sc, __return_address); 318 } 319 320 /* 321 * rmap scrubbing -- compute the number of blocks with a given owner, 322 * at least according to the reverse mapping data. 323 */ 324 325 struct xchk_rmap_ownedby_info { 326 const struct xfs_owner_info *oinfo; 327 xfs_filblks_t *blocks; 328 }; 329 330 STATIC int 331 xchk_count_rmap_ownedby_irec( 332 struct xfs_btree_cur *cur, 333 struct xfs_rmap_irec *rec, 334 void *priv) 335 { 336 struct xchk_rmap_ownedby_info *sroi = priv; 337 bool irec_attr; 338 bool oinfo_attr; 339 340 irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; 341 oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; 342 343 if (rec->rm_owner != sroi->oinfo->oi_owner) 344 return 0; 345 346 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) 347 (*sroi->blocks) += rec->rm_blockcount; 348 349 return 0; 350 } 351 352 /* 353 * Calculate the number of blocks the rmap thinks are owned by something. 354 * The caller should pass us an rmapbt cursor. 355 */ 356 int 357 xchk_count_rmap_ownedby_ag( 358 struct xfs_scrub *sc, 359 struct xfs_btree_cur *cur, 360 const struct xfs_owner_info *oinfo, 361 xfs_filblks_t *blocks) 362 { 363 struct xchk_rmap_ownedby_info sroi = { 364 .oinfo = oinfo, 365 .blocks = blocks, 366 }; 367 368 *blocks = 0; 369 return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec, 370 &sroi); 371 } 372 373 /* 374 * AG scrubbing 375 * 376 * These helpers facilitate locking an allocation group's header 377 * buffers, setting up cursors for all btrees that are present, and 378 * cleaning everything up once we're through. 379 */ 380 381 /* Decide if we want to return an AG header read failure. */ 382 static inline bool 383 want_ag_read_header_failure( 384 struct xfs_scrub *sc, 385 unsigned int type) 386 { 387 /* Return all AG header read failures when scanning btrees. */ 388 if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && 389 sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && 390 sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) 391 return true; 392 /* 393 * If we're scanning a given type of AG header, we only want to 394 * see read failures from that specific header. We'd like the 395 * other headers to cross-check them, but this isn't required. 396 */ 397 if (sc->sm->sm_type == type) 398 return true; 399 return false; 400 } 401 402 /* 403 * Grab all the headers for an AG. 404 * 405 * The headers should be released by xchk_ag_free, but as a fail 406 * safe we attach all the buffers we grab to the scrub transaction so 407 * they'll all be freed when we cancel it. 408 */ 409 int 410 xchk_ag_read_headers( 411 struct xfs_scrub *sc, 412 xfs_agnumber_t agno, 413 struct xfs_buf **agi, 414 struct xfs_buf **agf, 415 struct xfs_buf **agfl) 416 { 417 struct xfs_mount *mp = sc->mp; 418 int error; 419 420 error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi); 421 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) 422 goto out; 423 424 error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf); 425 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) 426 goto out; 427 428 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 429 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 430 goto out; 431 error = 0; 432 out: 433 return error; 434 } 435 436 /* Release all the AG btree cursors. */ 437 void 438 xchk_ag_btcur_free( 439 struct xchk_ag *sa) 440 { 441 if (sa->refc_cur) 442 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); 443 if (sa->rmap_cur) 444 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); 445 if (sa->fino_cur) 446 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); 447 if (sa->ino_cur) 448 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); 449 if (sa->cnt_cur) 450 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); 451 if (sa->bno_cur) 452 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); 453 454 sa->refc_cur = NULL; 455 sa->rmap_cur = NULL; 456 sa->fino_cur = NULL; 457 sa->ino_cur = NULL; 458 sa->bno_cur = NULL; 459 sa->cnt_cur = NULL; 460 } 461 462 /* Initialize all the btree cursors for an AG. */ 463 int 464 xchk_ag_btcur_init( 465 struct xfs_scrub *sc, 466 struct xchk_ag *sa) 467 { 468 struct xfs_mount *mp = sc->mp; 469 xfs_agnumber_t agno = sa->agno; 470 471 xchk_perag_get(sc->mp, sa); 472 if (sa->agf_bp && 473 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) { 474 /* Set up a bnobt cursor for cross-referencing. */ 475 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 476 agno, XFS_BTNUM_BNO); 477 if (!sa->bno_cur) 478 goto err; 479 } 480 481 if (sa->agf_bp && 482 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) { 483 /* Set up a cntbt cursor for cross-referencing. */ 484 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 485 agno, XFS_BTNUM_CNT); 486 if (!sa->cnt_cur) 487 goto err; 488 } 489 490 /* Set up a inobt cursor for cross-referencing. */ 491 if (sa->agi_bp && 492 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { 493 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 494 agno, XFS_BTNUM_INO); 495 if (!sa->ino_cur) 496 goto err; 497 } 498 499 /* Set up a finobt cursor for cross-referencing. */ 500 if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb) && 501 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { 502 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 503 agno, XFS_BTNUM_FINO); 504 if (!sa->fino_cur) 505 goto err; 506 } 507 508 /* Set up a rmapbt cursor for cross-referencing. */ 509 if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb) && 510 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { 511 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, 512 agno); 513 if (!sa->rmap_cur) 514 goto err; 515 } 516 517 /* Set up a refcountbt cursor for cross-referencing. */ 518 if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb) && 519 xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { 520 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, 521 sa->agf_bp, agno); 522 if (!sa->refc_cur) 523 goto err; 524 } 525 526 return 0; 527 err: 528 return -ENOMEM; 529 } 530 531 /* Release the AG header context and btree cursors. */ 532 void 533 xchk_ag_free( 534 struct xfs_scrub *sc, 535 struct xchk_ag *sa) 536 { 537 xchk_ag_btcur_free(sa); 538 if (sa->agfl_bp) { 539 xfs_trans_brelse(sc->tp, sa->agfl_bp); 540 sa->agfl_bp = NULL; 541 } 542 if (sa->agf_bp) { 543 xfs_trans_brelse(sc->tp, sa->agf_bp); 544 sa->agf_bp = NULL; 545 } 546 if (sa->agi_bp) { 547 xfs_trans_brelse(sc->tp, sa->agi_bp); 548 sa->agi_bp = NULL; 549 } 550 if (sa->pag) { 551 xfs_perag_put(sa->pag); 552 sa->pag = NULL; 553 } 554 sa->agno = NULLAGNUMBER; 555 } 556 557 /* 558 * For scrub, grab the AGI and the AGF headers, in that order. Locking 559 * order requires us to get the AGI before the AGF. We use the 560 * transaction to avoid deadlocking on crosslinked metadata buffers; 561 * either the caller passes one in (bmap scrub) or we have to create a 562 * transaction ourselves. 563 */ 564 int 565 xchk_ag_init( 566 struct xfs_scrub *sc, 567 xfs_agnumber_t agno, 568 struct xchk_ag *sa) 569 { 570 int error; 571 572 sa->agno = agno; 573 error = xchk_ag_read_headers(sc, agno, &sa->agi_bp, 574 &sa->agf_bp, &sa->agfl_bp); 575 if (error) 576 return error; 577 578 return xchk_ag_btcur_init(sc, sa); 579 } 580 581 /* 582 * Grab the per-ag structure if we haven't already gotten it. Teardown of the 583 * xchk_ag will release it for us. 584 */ 585 void 586 xchk_perag_get( 587 struct xfs_mount *mp, 588 struct xchk_ag *sa) 589 { 590 if (!sa->pag) 591 sa->pag = xfs_perag_get(mp, sa->agno); 592 } 593 594 /* Per-scrubber setup functions */ 595 596 /* 597 * Grab an empty transaction so that we can re-grab locked buffers if 598 * one of our btrees turns out to be cyclic. 599 * 600 * If we're going to repair something, we need to ask for the largest possible 601 * log reservation so that we can handle the worst case scenario for metadata 602 * updates while rebuilding a metadata item. We also need to reserve as many 603 * blocks in the head transaction as we think we're going to need to rebuild 604 * the metadata object. 605 */ 606 int 607 xchk_trans_alloc( 608 struct xfs_scrub *sc, 609 uint resblks) 610 { 611 if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 612 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 613 resblks, 0, 0, &sc->tp); 614 615 return xfs_trans_alloc_empty(sc->mp, &sc->tp); 616 } 617 618 /* Set us up with a transaction and an empty context. */ 619 int 620 xchk_setup_fs( 621 struct xfs_scrub *sc, 622 struct xfs_inode *ip) 623 { 624 uint resblks; 625 626 resblks = xrep_calc_ag_resblks(sc); 627 return xchk_trans_alloc(sc, resblks); 628 } 629 630 /* Set us up with AG headers and btree cursors. */ 631 int 632 xchk_setup_ag_btree( 633 struct xfs_scrub *sc, 634 struct xfs_inode *ip, 635 bool force_log) 636 { 637 struct xfs_mount *mp = sc->mp; 638 int error; 639 640 /* 641 * If the caller asks us to checkpont the log, do so. This 642 * expensive operation should be performed infrequently and only 643 * as a last resort. Any caller that sets force_log should 644 * document why they need to do so. 645 */ 646 if (force_log) { 647 error = xchk_checkpoint_log(mp); 648 if (error) 649 return error; 650 } 651 652 error = xchk_setup_fs(sc, ip); 653 if (error) 654 return error; 655 656 return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa); 657 } 658 659 /* Push everything out of the log onto disk. */ 660 int 661 xchk_checkpoint_log( 662 struct xfs_mount *mp) 663 { 664 int error; 665 666 error = xfs_log_force(mp, XFS_LOG_SYNC); 667 if (error) 668 return error; 669 xfs_ail_push_all_sync(mp->m_ail); 670 return 0; 671 } 672 673 /* 674 * Given an inode and the scrub control structure, grab either the 675 * inode referenced in the control structure or the inode passed in. 676 * The inode is not locked. 677 */ 678 int 679 xchk_get_inode( 680 struct xfs_scrub *sc, 681 struct xfs_inode *ip_in) 682 { 683 struct xfs_imap imap; 684 struct xfs_mount *mp = sc->mp; 685 struct xfs_inode *ip = NULL; 686 int error; 687 688 /* We want to scan the inode we already had opened. */ 689 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 690 sc->ip = ip_in; 691 return 0; 692 } 693 694 /* Look up the inode, see if the generation number matches. */ 695 if (xfs_internal_inum(mp, sc->sm->sm_ino)) 696 return -ENOENT; 697 error = xfs_iget(mp, NULL, sc->sm->sm_ino, 698 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 699 switch (error) { 700 case -ENOENT: 701 /* Inode doesn't exist, just bail out. */ 702 return error; 703 case 0: 704 /* Got an inode, continue. */ 705 break; 706 case -EINVAL: 707 /* 708 * -EINVAL with IGET_UNTRUSTED could mean one of several 709 * things: userspace gave us an inode number that doesn't 710 * correspond to fs space, or doesn't have an inobt entry; 711 * or it could simply mean that the inode buffer failed the 712 * read verifiers. 713 * 714 * Try just the inode mapping lookup -- if it succeeds, then 715 * the inode buffer verifier failed and something needs fixing. 716 * Otherwise, we really couldn't find it so tell userspace 717 * that it no longer exists. 718 */ 719 error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, 720 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); 721 if (error) 722 return -ENOENT; 723 error = -EFSCORRUPTED; 724 /* fall through */ 725 default: 726 trace_xchk_op_error(sc, 727 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 728 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 729 error, __return_address); 730 return error; 731 } 732 if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { 733 xfs_irele(ip); 734 return -ENOENT; 735 } 736 737 sc->ip = ip; 738 return 0; 739 } 740 741 /* Set us up to scrub a file's contents. */ 742 int 743 xchk_setup_inode_contents( 744 struct xfs_scrub *sc, 745 struct xfs_inode *ip, 746 unsigned int resblks) 747 { 748 int error; 749 750 error = xchk_get_inode(sc, ip); 751 if (error) 752 return error; 753 754 /* Got the inode, lock it and we're ready to go. */ 755 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 756 xfs_ilock(sc->ip, sc->ilock_flags); 757 error = xchk_trans_alloc(sc, resblks); 758 if (error) 759 goto out; 760 sc->ilock_flags |= XFS_ILOCK_EXCL; 761 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 762 763 out: 764 /* scrub teardown will unlock and release the inode for us */ 765 return error; 766 } 767 768 /* 769 * Predicate that decides if we need to evaluate the cross-reference check. 770 * If there was an error accessing the cross-reference btree, just delete 771 * the cursor and skip the check. 772 */ 773 bool 774 xchk_should_check_xref( 775 struct xfs_scrub *sc, 776 int *error, 777 struct xfs_btree_cur **curpp) 778 { 779 /* No point in xref if we already know we're corrupt. */ 780 if (xchk_skip_xref(sc->sm)) 781 return false; 782 783 if (*error == 0) 784 return true; 785 786 if (curpp) { 787 /* If we've already given up on xref, just bail out. */ 788 if (!*curpp) 789 return false; 790 791 /* xref error, delete cursor and bail out. */ 792 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); 793 *curpp = NULL; 794 } 795 796 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 797 trace_xchk_xref_error(sc, *error, __return_address); 798 799 /* 800 * Errors encountered during cross-referencing with another 801 * data structure should not cause this scrubber to abort. 802 */ 803 *error = 0; 804 return false; 805 } 806 807 /* Run the structure verifiers on in-memory buffers to detect bad memory. */ 808 void 809 xchk_buffer_recheck( 810 struct xfs_scrub *sc, 811 struct xfs_buf *bp) 812 { 813 xfs_failaddr_t fa; 814 815 if (bp->b_ops == NULL) { 816 xchk_block_set_corrupt(sc, bp); 817 return; 818 } 819 if (bp->b_ops->verify_struct == NULL) { 820 xchk_set_incomplete(sc); 821 return; 822 } 823 fa = bp->b_ops->verify_struct(bp); 824 if (!fa) 825 return; 826 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 827 trace_xchk_block_error(sc, bp->b_bn, fa); 828 } 829 830 /* 831 * Scrub the attr/data forks of a metadata inode. The metadata inode must be 832 * pointed to by sc->ip and the ILOCK must be held. 833 */ 834 int 835 xchk_metadata_inode_forks( 836 struct xfs_scrub *sc) 837 { 838 __u32 smtype; 839 bool shared; 840 int error; 841 842 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 843 return 0; 844 845 /* Metadata inodes don't live on the rt device. */ 846 if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) { 847 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 848 return 0; 849 } 850 851 /* They should never participate in reflink. */ 852 if (xfs_is_reflink_inode(sc->ip)) { 853 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 854 return 0; 855 } 856 857 /* They also should never have extended attributes. */ 858 if (xfs_inode_hasattr(sc->ip)) { 859 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 860 return 0; 861 } 862 863 /* Invoke the data fork scrubber. */ 864 smtype = sc->sm->sm_type; 865 sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD; 866 error = xchk_bmap_data(sc); 867 sc->sm->sm_type = smtype; 868 if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 869 return error; 870 871 /* Look for incorrect shared blocks. */ 872 if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) { 873 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 874 &shared); 875 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, 876 &error)) 877 return error; 878 if (shared) 879 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 880 } 881 882 return error; 883 } 884 885 /* 886 * Try to lock an inode in violation of the usual locking order rules. For 887 * example, trying to get the IOLOCK while in transaction context, or just 888 * plain breaking AG-order or inode-order inode locking rules. Either way, 889 * the only way to avoid an ABBA deadlock is to use trylock and back off if 890 * we can't. 891 */ 892 int 893 xchk_ilock_inverted( 894 struct xfs_inode *ip, 895 uint lock_mode) 896 { 897 int i; 898 899 for (i = 0; i < 20; i++) { 900 if (xfs_ilock_nowait(ip, lock_mode)) 901 return 0; 902 delay(1); 903 } 904 return -EDEADLOCK; 905 } 906 907 /* Pause background reaping of resources. */ 908 void 909 xchk_stop_reaping( 910 struct xfs_scrub *sc) 911 { 912 sc->flags |= XCHK_REAPING_DISABLED; 913 xfs_stop_block_reaping(sc->mp); 914 } 915 916 /* Restart background reaping of resources. */ 917 void 918 xchk_start_reaping( 919 struct xfs_scrub *sc) 920 { 921 xfs_start_block_reaping(sc->mp); 922 sc->flags &= ~XCHK_REAPING_DISABLED; 923 } 924