1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_icache.h" 34 #include "xfs_itable.h" 35 #include "xfs_alloc.h" 36 #include "xfs_alloc_btree.h" 37 #include "xfs_bmap.h" 38 #include "xfs_bmap_btree.h" 39 #include "xfs_ialloc.h" 40 #include "xfs_ialloc_btree.h" 41 #include "xfs_refcount.h" 42 #include "xfs_refcount_btree.h" 43 #include "xfs_rmap.h" 44 #include "xfs_rmap_btree.h" 45 #include "xfs_log.h" 46 #include "xfs_trans_priv.h" 47 #include "scrub/xfs_scrub.h" 48 #include "scrub/scrub.h" 49 #include "scrub/common.h" 50 #include "scrub/trace.h" 51 #include "scrub/btree.h" 52 53 /* Common code for the metadata scrubbers. */ 54 55 /* 56 * Handling operational errors. 57 * 58 * The *_process_error() family of functions are used to process error return 59 * codes from functions called as part of a scrub operation. 60 * 61 * If there's no error, we return true to tell the caller that it's ok 62 * to move on to the next check in its list. 63 * 64 * For non-verifier errors (e.g. ENOMEM) we return false to tell the 65 * caller that something bad happened, and we preserve *error so that 66 * the caller can return the *error up the stack to userspace. 67 * 68 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting 69 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, 70 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, 71 * not via return codes. We return false to tell the caller that 72 * something bad happened. Since the error has been cleared, the caller 73 * will (presumably) return that zero and scrubbing will move on to 74 * whatever's next. 75 * 76 * ftrace can be used to record the precise metadata location and the 77 * approximate code location of the failed operation. 78 */ 79 80 /* Check for operational errors. */ 81 bool 82 xfs_scrub_process_error( 83 struct xfs_scrub_context *sc, 84 xfs_agnumber_t agno, 85 xfs_agblock_t bno, 86 int *error) 87 { 88 switch (*error) { 89 case 0: 90 return true; 91 case -EDEADLOCK: 92 /* Used to restart an op with deadlock avoidance. */ 93 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 94 break; 95 case -EFSBADCRC: 96 case -EFSCORRUPTED: 97 /* Note the badness but don't abort. */ 98 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 99 *error = 0; 100 /* fall through */ 101 default: 102 trace_xfs_scrub_op_error(sc, agno, bno, *error, 103 __return_address); 104 break; 105 } 106 return false; 107 } 108 109 /* Check for operational errors for a file offset. */ 110 bool 111 xfs_scrub_fblock_process_error( 112 struct xfs_scrub_context *sc, 113 int whichfork, 114 xfs_fileoff_t offset, 115 int *error) 116 { 117 switch (*error) { 118 case 0: 119 return true; 120 case -EDEADLOCK: 121 /* Used to restart an op with deadlock avoidance. */ 122 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 123 break; 124 case -EFSBADCRC: 125 case -EFSCORRUPTED: 126 /* Note the badness but don't abort. */ 127 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 128 *error = 0; 129 /* fall through */ 130 default: 131 trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error, 132 __return_address); 133 break; 134 } 135 return false; 136 } 137 138 /* 139 * Handling scrub corruption/optimization/warning checks. 140 * 141 * The *_set_{corrupt,preen,warning}() family of functions are used to 142 * record the presence of metadata that is incorrect (corrupt), could be 143 * optimized somehow (preen), or should be flagged for administrative 144 * review but is not incorrect (warn). 145 * 146 * ftrace can be used to record the precise metadata location and 147 * approximate code location of the failed check. 148 */ 149 150 /* Record a block which could be optimized. */ 151 void 152 xfs_scrub_block_set_preen( 153 struct xfs_scrub_context *sc, 154 struct xfs_buf *bp) 155 { 156 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 157 trace_xfs_scrub_block_preen(sc, bp->b_bn, __return_address); 158 } 159 160 /* 161 * Record an inode which could be optimized. The trace data will 162 * include the block given by bp if bp is given; otherwise it will use 163 * the block location of the inode record itself. 164 */ 165 void 166 xfs_scrub_ino_set_preen( 167 struct xfs_scrub_context *sc, 168 xfs_ino_t ino, 169 struct xfs_buf *bp) 170 { 171 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 172 trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0, 173 __return_address); 174 } 175 176 /* Record a corrupt block. */ 177 void 178 xfs_scrub_block_set_corrupt( 179 struct xfs_scrub_context *sc, 180 struct xfs_buf *bp) 181 { 182 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 183 trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address); 184 } 185 186 /* 187 * Record a corrupt inode. The trace data will include the block given 188 * by bp if bp is given; otherwise it will use the block location of the 189 * inode record itself. 190 */ 191 void 192 xfs_scrub_ino_set_corrupt( 193 struct xfs_scrub_context *sc, 194 xfs_ino_t ino, 195 struct xfs_buf *bp) 196 { 197 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 198 trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address); 199 } 200 201 /* Record corruption in a block indexed by a file fork. */ 202 void 203 xfs_scrub_fblock_set_corrupt( 204 struct xfs_scrub_context *sc, 205 int whichfork, 206 xfs_fileoff_t offset) 207 { 208 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 209 trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address); 210 } 211 212 /* 213 * Warn about inodes that need administrative review but is not 214 * incorrect. 215 */ 216 void 217 xfs_scrub_ino_set_warning( 218 struct xfs_scrub_context *sc, 219 xfs_ino_t ino, 220 struct xfs_buf *bp) 221 { 222 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 223 trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0, 224 __return_address); 225 } 226 227 /* Warn about a block indexed by a file fork that needs review. */ 228 void 229 xfs_scrub_fblock_set_warning( 230 struct xfs_scrub_context *sc, 231 int whichfork, 232 xfs_fileoff_t offset) 233 { 234 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 235 trace_xfs_scrub_fblock_warning(sc, whichfork, offset, __return_address); 236 } 237 238 /* Signal an incomplete scrub. */ 239 void 240 xfs_scrub_set_incomplete( 241 struct xfs_scrub_context *sc) 242 { 243 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 244 trace_xfs_scrub_incomplete(sc, __return_address); 245 } 246 247 /* 248 * AG scrubbing 249 * 250 * These helpers facilitate locking an allocation group's header 251 * buffers, setting up cursors for all btrees that are present, and 252 * cleaning everything up once we're through. 253 */ 254 255 /* Decide if we want to return an AG header read failure. */ 256 static inline bool 257 want_ag_read_header_failure( 258 struct xfs_scrub_context *sc, 259 unsigned int type) 260 { 261 /* Return all AG header read failures when scanning btrees. */ 262 if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && 263 sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && 264 sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) 265 return true; 266 /* 267 * If we're scanning a given type of AG header, we only want to 268 * see read failures from that specific header. We'd like the 269 * other headers to cross-check them, but this isn't required. 270 */ 271 if (sc->sm->sm_type == type) 272 return true; 273 return false; 274 } 275 276 /* 277 * Grab all the headers for an AG. 278 * 279 * The headers should be released by xfs_scrub_ag_free, but as a fail 280 * safe we attach all the buffers we grab to the scrub transaction so 281 * they'll all be freed when we cancel it. 282 */ 283 int 284 xfs_scrub_ag_read_headers( 285 struct xfs_scrub_context *sc, 286 xfs_agnumber_t agno, 287 struct xfs_buf **agi, 288 struct xfs_buf **agf, 289 struct xfs_buf **agfl) 290 { 291 struct xfs_mount *mp = sc->mp; 292 int error; 293 294 error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi); 295 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) 296 goto out; 297 298 error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf); 299 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) 300 goto out; 301 302 error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl); 303 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) 304 goto out; 305 306 out: 307 return error; 308 } 309 310 /* Release all the AG btree cursors. */ 311 void 312 xfs_scrub_ag_btcur_free( 313 struct xfs_scrub_ag *sa) 314 { 315 if (sa->refc_cur) 316 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); 317 if (sa->rmap_cur) 318 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); 319 if (sa->fino_cur) 320 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); 321 if (sa->ino_cur) 322 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); 323 if (sa->cnt_cur) 324 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); 325 if (sa->bno_cur) 326 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); 327 328 sa->refc_cur = NULL; 329 sa->rmap_cur = NULL; 330 sa->fino_cur = NULL; 331 sa->ino_cur = NULL; 332 sa->bno_cur = NULL; 333 sa->cnt_cur = NULL; 334 } 335 336 /* Initialize all the btree cursors for an AG. */ 337 int 338 xfs_scrub_ag_btcur_init( 339 struct xfs_scrub_context *sc, 340 struct xfs_scrub_ag *sa) 341 { 342 struct xfs_mount *mp = sc->mp; 343 xfs_agnumber_t agno = sa->agno; 344 345 if (sa->agf_bp) { 346 /* Set up a bnobt cursor for cross-referencing. */ 347 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 348 agno, XFS_BTNUM_BNO); 349 if (!sa->bno_cur) 350 goto err; 351 352 /* Set up a cntbt cursor for cross-referencing. */ 353 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 354 agno, XFS_BTNUM_CNT); 355 if (!sa->cnt_cur) 356 goto err; 357 } 358 359 /* Set up a inobt cursor for cross-referencing. */ 360 if (sa->agi_bp) { 361 sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 362 agno, XFS_BTNUM_INO); 363 if (!sa->ino_cur) 364 goto err; 365 } 366 367 /* Set up a finobt cursor for cross-referencing. */ 368 if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) { 369 sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, 370 agno, XFS_BTNUM_FINO); 371 if (!sa->fino_cur) 372 goto err; 373 } 374 375 /* Set up a rmapbt cursor for cross-referencing. */ 376 if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) { 377 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, 378 agno); 379 if (!sa->rmap_cur) 380 goto err; 381 } 382 383 /* Set up a refcountbt cursor for cross-referencing. */ 384 if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) { 385 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, 386 sa->agf_bp, agno, NULL); 387 if (!sa->refc_cur) 388 goto err; 389 } 390 391 return 0; 392 err: 393 return -ENOMEM; 394 } 395 396 /* Release the AG header context and btree cursors. */ 397 void 398 xfs_scrub_ag_free( 399 struct xfs_scrub_context *sc, 400 struct xfs_scrub_ag *sa) 401 { 402 xfs_scrub_ag_btcur_free(sa); 403 if (sa->agfl_bp) { 404 xfs_trans_brelse(sc->tp, sa->agfl_bp); 405 sa->agfl_bp = NULL; 406 } 407 if (sa->agf_bp) { 408 xfs_trans_brelse(sc->tp, sa->agf_bp); 409 sa->agf_bp = NULL; 410 } 411 if (sa->agi_bp) { 412 xfs_trans_brelse(sc->tp, sa->agi_bp); 413 sa->agi_bp = NULL; 414 } 415 sa->agno = NULLAGNUMBER; 416 } 417 418 /* 419 * For scrub, grab the AGI and the AGF headers, in that order. Locking 420 * order requires us to get the AGI before the AGF. We use the 421 * transaction to avoid deadlocking on crosslinked metadata buffers; 422 * either the caller passes one in (bmap scrub) or we have to create a 423 * transaction ourselves. 424 */ 425 int 426 xfs_scrub_ag_init( 427 struct xfs_scrub_context *sc, 428 xfs_agnumber_t agno, 429 struct xfs_scrub_ag *sa) 430 { 431 int error; 432 433 sa->agno = agno; 434 error = xfs_scrub_ag_read_headers(sc, agno, &sa->agi_bp, 435 &sa->agf_bp, &sa->agfl_bp); 436 if (error) 437 return error; 438 439 return xfs_scrub_ag_btcur_init(sc, sa); 440 } 441 442 /* Per-scrubber setup functions */ 443 444 /* Set us up with a transaction and an empty context. */ 445 int 446 xfs_scrub_setup_fs( 447 struct xfs_scrub_context *sc, 448 struct xfs_inode *ip) 449 { 450 return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp); 451 } 452 453 /* Set us up with AG headers and btree cursors. */ 454 int 455 xfs_scrub_setup_ag_btree( 456 struct xfs_scrub_context *sc, 457 struct xfs_inode *ip, 458 bool force_log) 459 { 460 struct xfs_mount *mp = sc->mp; 461 int error; 462 463 /* 464 * If the caller asks us to checkpont the log, do so. This 465 * expensive operation should be performed infrequently and only 466 * as a last resort. Any caller that sets force_log should 467 * document why they need to do so. 468 */ 469 if (force_log) { 470 error = xfs_scrub_checkpoint_log(mp); 471 if (error) 472 return error; 473 } 474 475 error = xfs_scrub_setup_ag_header(sc, ip); 476 if (error) 477 return error; 478 479 return xfs_scrub_ag_init(sc, sc->sm->sm_agno, &sc->sa); 480 } 481 482 /* Push everything out of the log onto disk. */ 483 int 484 xfs_scrub_checkpoint_log( 485 struct xfs_mount *mp) 486 { 487 int error; 488 489 error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); 490 if (error) 491 return error; 492 xfs_ail_push_all_sync(mp->m_ail); 493 return 0; 494 } 495 496 /* 497 * Given an inode and the scrub control structure, grab either the 498 * inode referenced in the control structure or the inode passed in. 499 * The inode is not locked. 500 */ 501 int 502 xfs_scrub_get_inode( 503 struct xfs_scrub_context *sc, 504 struct xfs_inode *ip_in) 505 { 506 struct xfs_mount *mp = sc->mp; 507 struct xfs_inode *ip = NULL; 508 int error; 509 510 /* 511 * If userspace passed us an AG number or a generation number 512 * without an inode number, they haven't got a clue so bail out 513 * immediately. 514 */ 515 if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino)) 516 return -EINVAL; 517 518 /* We want to scan the inode we already had opened. */ 519 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 520 sc->ip = ip_in; 521 return 0; 522 } 523 524 /* Look up the inode, see if the generation number matches. */ 525 if (xfs_internal_inum(mp, sc->sm->sm_ino)) 526 return -ENOENT; 527 error = xfs_iget(mp, NULL, sc->sm->sm_ino, 528 XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); 529 if (error == -ENOENT || error == -EINVAL) { 530 /* inode doesn't exist... */ 531 return -ENOENT; 532 } else if (error) { 533 trace_xfs_scrub_op_error(sc, 534 XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), 535 XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 536 error, __return_address); 537 return error; 538 } 539 if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { 540 iput(VFS_I(ip)); 541 return -ENOENT; 542 } 543 544 sc->ip = ip; 545 return 0; 546 } 547 548 /* Set us up to scrub a file's contents. */ 549 int 550 xfs_scrub_setup_inode_contents( 551 struct xfs_scrub_context *sc, 552 struct xfs_inode *ip, 553 unsigned int resblks) 554 { 555 struct xfs_mount *mp = sc->mp; 556 int error; 557 558 error = xfs_scrub_get_inode(sc, ip); 559 if (error) 560 return error; 561 562 /* Got the inode, lock it and we're ready to go. */ 563 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 564 xfs_ilock(sc->ip, sc->ilock_flags); 565 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 566 if (error) 567 goto out; 568 sc->ilock_flags |= XFS_ILOCK_EXCL; 569 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 570 571 out: 572 /* scrub teardown will unlock and release the inode for us */ 573 return error; 574 } 575