1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_bit.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_alloc.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rmap_btree.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/btree.h" 25 #include "xfs_ag.h" 26 27 /* Set us up with an inode's bmap. */ 28 int 29 xchk_setup_inode_bmap( 30 struct xfs_scrub *sc) 31 { 32 int error; 33 34 if (xchk_need_intent_drain(sc)) 35 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 36 37 error = xchk_iget_for_scrubbing(sc); 38 if (error) 39 goto out; 40 41 xchk_ilock(sc, XFS_IOLOCK_EXCL); 42 43 /* 44 * We don't want any ephemeral data/cow fork updates sitting around 45 * while we inspect block mappings, so wait for directio to finish 46 * and flush dirty data if we have delalloc reservations. 47 */ 48 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 49 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 50 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 51 52 xchk_ilock(sc, XFS_MMAPLOCK_EXCL); 53 54 inode_dio_wait(VFS_I(sc->ip)); 55 56 /* 57 * Try to flush all incore state to disk before we examine the 58 * space mappings for the data fork. Leave accumulated errors 59 * in the mapping for the writer threads to consume. 60 * 61 * On ENOSPC or EIO writeback errors, we continue into the 62 * extent mapping checks because write failures do not 63 * necessarily imply anything about the correctness of the file 64 * metadata. The metadata and the file data could be on 65 * completely separate devices; a media failure might only 66 * affect a subset of the disk, etc. We can handle delalloc 67 * extents in the scrubber, so leaving them in memory is fine. 68 */ 69 error = filemap_fdatawrite(mapping); 70 if (!error) 71 error = filemap_fdatawait_keep_errors(mapping); 72 if (error && (error != -ENOSPC && error != -EIO)) 73 goto out; 74 } 75 76 /* Got the inode, lock it and we're ready to go. */ 77 error = xchk_trans_alloc(sc, 0); 78 if (error) 79 goto out; 80 81 xchk_ilock(sc, XFS_ILOCK_EXCL); 82 out: 83 /* scrub teardown will unlock and release the inode */ 84 return error; 85 } 86 87 /* 88 * Inode fork block mapping (BMBT) scrubber. 89 * More complex than the others because we have to scrub 90 * all the extents regardless of whether or not the fork 91 * is in btree format. 92 */ 93 94 struct xchk_bmap_info { 95 struct xfs_scrub *sc; 96 97 /* Incore extent tree cursor */ 98 struct xfs_iext_cursor icur; 99 100 /* Previous fork mapping that we examined */ 101 struct xfs_bmbt_irec prev_rec; 102 103 /* Is this a realtime fork? */ 104 bool is_rt; 105 106 /* May mappings point to shared space? */ 107 bool is_shared; 108 109 /* Was the incore extent tree loaded? */ 110 bool was_loaded; 111 112 /* Which inode fork are we checking? */ 113 int whichfork; 114 }; 115 116 /* Look for a corresponding rmap for this irec. */ 117 static inline bool 118 xchk_bmap_get_rmap( 119 struct xchk_bmap_info *info, 120 struct xfs_bmbt_irec *irec, 121 xfs_agblock_t agbno, 122 uint64_t owner, 123 struct xfs_rmap_irec *rmap) 124 { 125 xfs_fileoff_t offset; 126 unsigned int rflags = 0; 127 int has_rmap; 128 int error; 129 130 if (info->whichfork == XFS_ATTR_FORK) 131 rflags |= XFS_RMAP_ATTR_FORK; 132 if (irec->br_state == XFS_EXT_UNWRITTEN) 133 rflags |= XFS_RMAP_UNWRITTEN; 134 135 /* 136 * CoW staging extents are owned (on disk) by the refcountbt, so 137 * their rmaps do not have offsets. 138 */ 139 if (info->whichfork == XFS_COW_FORK) 140 offset = 0; 141 else 142 offset = irec->br_startoff; 143 144 /* 145 * If the caller thinks this could be a shared bmbt extent (IOWs, 146 * any data fork extent of a reflink inode) then we have to use the 147 * range rmap lookup to make sure we get the correct owner/offset. 148 */ 149 if (info->is_shared) { 150 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 151 owner, offset, rflags, rmap, &has_rmap); 152 } else { 153 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 154 owner, offset, rflags, rmap, &has_rmap); 155 } 156 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) 157 return false; 158 159 if (!has_rmap) 160 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 161 irec->br_startoff); 162 return has_rmap; 163 } 164 165 /* Make sure that we have rmapbt records for this data/attr fork extent. */ 166 STATIC void 167 xchk_bmap_xref_rmap( 168 struct xchk_bmap_info *info, 169 struct xfs_bmbt_irec *irec, 170 xfs_agblock_t agbno) 171 { 172 struct xfs_rmap_irec rmap; 173 unsigned long long rmap_end; 174 uint64_t owner = info->sc->ip->i_ino; 175 176 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 177 return; 178 179 /* Find the rmap record for this irec. */ 180 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 181 return; 182 183 /* 184 * The rmap must be an exact match for this incore file mapping record, 185 * which may have arisen from multiple ondisk records. 186 */ 187 if (rmap.rm_startblock != agbno) 188 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 189 irec->br_startoff); 190 191 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 192 if (rmap_end != agbno + irec->br_blockcount) 193 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 194 irec->br_startoff); 195 196 /* Check the logical offsets. */ 197 if (rmap.rm_offset != irec->br_startoff) 198 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 199 irec->br_startoff); 200 201 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; 202 if (rmap_end != irec->br_startoff + irec->br_blockcount) 203 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 204 irec->br_startoff); 205 206 /* Check the owner */ 207 if (rmap.rm_owner != owner) 208 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 209 irec->br_startoff); 210 211 /* 212 * Check for discrepancies between the unwritten flag in the irec and 213 * the rmap. Note that the (in-memory) CoW fork distinguishes between 214 * unwritten and written extents, but we don't track that in the rmap 215 * records because the blocks are owned (on-disk) by the refcountbt, 216 * which doesn't track unwritten state. 217 */ 218 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != 219 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 220 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 221 irec->br_startoff); 222 223 if (!!(info->whichfork == XFS_ATTR_FORK) != 224 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 225 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 226 irec->br_startoff); 227 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 228 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 229 irec->br_startoff); 230 } 231 232 /* Make sure that we have rmapbt records for this COW fork extent. */ 233 STATIC void 234 xchk_bmap_xref_rmap_cow( 235 struct xchk_bmap_info *info, 236 struct xfs_bmbt_irec *irec, 237 xfs_agblock_t agbno) 238 { 239 struct xfs_rmap_irec rmap; 240 unsigned long long rmap_end; 241 uint64_t owner = XFS_RMAP_OWN_COW; 242 243 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 244 return; 245 246 /* Find the rmap record for this irec. */ 247 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 248 return; 249 250 /* 251 * CoW staging extents are owned by the refcount btree, so the rmap 252 * can start before and end after the physical space allocated to this 253 * mapping. There are no offsets to check. 254 */ 255 if (rmap.rm_startblock > agbno) 256 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 257 irec->br_startoff); 258 259 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 260 if (rmap_end < agbno + irec->br_blockcount) 261 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 262 irec->br_startoff); 263 264 /* Check the owner */ 265 if (rmap.rm_owner != owner) 266 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 267 irec->br_startoff); 268 269 /* 270 * No flags allowed. Note that the (in-memory) CoW fork distinguishes 271 * between unwritten and written extents, but we don't track that in 272 * the rmap records because the blocks are owned (on-disk) by the 273 * refcountbt, which doesn't track unwritten state. 274 */ 275 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) 276 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 277 irec->br_startoff); 278 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 279 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 280 irec->br_startoff); 281 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) 282 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 283 irec->br_startoff); 284 } 285 286 /* Cross-reference a single rtdev extent record. */ 287 STATIC void 288 xchk_bmap_rt_iextent_xref( 289 struct xfs_inode *ip, 290 struct xchk_bmap_info *info, 291 struct xfs_bmbt_irec *irec) 292 { 293 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, 294 irec->br_blockcount); 295 } 296 297 /* Cross-reference a single datadev extent record. */ 298 STATIC void 299 xchk_bmap_iextent_xref( 300 struct xfs_inode *ip, 301 struct xchk_bmap_info *info, 302 struct xfs_bmbt_irec *irec) 303 { 304 struct xfs_owner_info oinfo; 305 struct xfs_mount *mp = info->sc->mp; 306 xfs_agnumber_t agno; 307 xfs_agblock_t agbno; 308 xfs_extlen_t len; 309 int error; 310 311 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 312 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 313 len = irec->br_blockcount; 314 315 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); 316 if (!xchk_fblock_process_error(info->sc, info->whichfork, 317 irec->br_startoff, &error)) 318 goto out_free; 319 320 xchk_xref_is_used_space(info->sc, agbno, len); 321 xchk_xref_is_not_inode_chunk(info->sc, agbno, len); 322 switch (info->whichfork) { 323 case XFS_DATA_FORK: 324 xchk_bmap_xref_rmap(info, irec, agbno); 325 if (!xfs_is_reflink_inode(info->sc->ip)) { 326 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 327 info->whichfork, irec->br_startoff); 328 xchk_xref_is_only_owned_by(info->sc, agbno, 329 irec->br_blockcount, &oinfo); 330 xchk_xref_is_not_shared(info->sc, agbno, 331 irec->br_blockcount); 332 } 333 xchk_xref_is_not_cow_staging(info->sc, agbno, 334 irec->br_blockcount); 335 break; 336 case XFS_ATTR_FORK: 337 xchk_bmap_xref_rmap(info, irec, agbno); 338 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 339 info->whichfork, irec->br_startoff); 340 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 341 &oinfo); 342 xchk_xref_is_not_shared(info->sc, agbno, 343 irec->br_blockcount); 344 xchk_xref_is_not_cow_staging(info->sc, agbno, 345 irec->br_blockcount); 346 break; 347 case XFS_COW_FORK: 348 xchk_bmap_xref_rmap_cow(info, irec, agbno); 349 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 350 &XFS_RMAP_OINFO_COW); 351 xchk_xref_is_cow_staging(info->sc, agbno, 352 irec->br_blockcount); 353 xchk_xref_is_not_shared(info->sc, agbno, 354 irec->br_blockcount); 355 break; 356 } 357 358 out_free: 359 xchk_ag_free(info->sc, &info->sc->sa); 360 } 361 362 /* 363 * Directories and attr forks should never have blocks that can't be addressed 364 * by a xfs_dablk_t. 365 */ 366 STATIC void 367 xchk_bmap_dirattr_extent( 368 struct xfs_inode *ip, 369 struct xchk_bmap_info *info, 370 struct xfs_bmbt_irec *irec) 371 { 372 struct xfs_mount *mp = ip->i_mount; 373 xfs_fileoff_t off; 374 375 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) 376 return; 377 378 if (!xfs_verify_dablk(mp, irec->br_startoff)) 379 xchk_fblock_set_corrupt(info->sc, info->whichfork, 380 irec->br_startoff); 381 382 off = irec->br_startoff + irec->br_blockcount - 1; 383 if (!xfs_verify_dablk(mp, off)) 384 xchk_fblock_set_corrupt(info->sc, info->whichfork, off); 385 } 386 387 /* Scrub a single extent record. */ 388 STATIC void 389 xchk_bmap_iextent( 390 struct xfs_inode *ip, 391 struct xchk_bmap_info *info, 392 struct xfs_bmbt_irec *irec) 393 { 394 struct xfs_mount *mp = info->sc->mp; 395 396 /* 397 * Check for out-of-order extents. This record could have come 398 * from the incore list, for which there is no ordering check. 399 */ 400 if (irec->br_startoff < info->prev_rec.br_startoff + 401 info->prev_rec.br_blockcount) 402 xchk_fblock_set_corrupt(info->sc, info->whichfork, 403 irec->br_startoff); 404 405 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 406 xchk_fblock_set_corrupt(info->sc, info->whichfork, 407 irec->br_startoff); 408 409 xchk_bmap_dirattr_extent(ip, info, irec); 410 411 /* Make sure the extent points to a valid place. */ 412 if (info->is_rt && 413 !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) 414 xchk_fblock_set_corrupt(info->sc, info->whichfork, 415 irec->br_startoff); 416 if (!info->is_rt && 417 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) 418 xchk_fblock_set_corrupt(info->sc, info->whichfork, 419 irec->br_startoff); 420 421 /* We don't allow unwritten extents on attr forks. */ 422 if (irec->br_state == XFS_EXT_UNWRITTEN && 423 info->whichfork == XFS_ATTR_FORK) 424 xchk_fblock_set_corrupt(info->sc, info->whichfork, 425 irec->br_startoff); 426 427 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 428 return; 429 430 if (info->is_rt) 431 xchk_bmap_rt_iextent_xref(ip, info, irec); 432 else 433 xchk_bmap_iextent_xref(ip, info, irec); 434 } 435 436 /* Scrub a bmbt record. */ 437 STATIC int 438 xchk_bmapbt_rec( 439 struct xchk_btree *bs, 440 const union xfs_btree_rec *rec) 441 { 442 struct xfs_bmbt_irec irec; 443 struct xfs_bmbt_irec iext_irec; 444 struct xfs_iext_cursor icur; 445 struct xchk_bmap_info *info = bs->private; 446 struct xfs_inode *ip = bs->cur->bc_ino.ip; 447 struct xfs_buf *bp = NULL; 448 struct xfs_btree_block *block; 449 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); 450 uint64_t owner; 451 int i; 452 453 /* 454 * Check the owners of the btree blocks up to the level below 455 * the root since the verifiers don't do that. 456 */ 457 if (xfs_has_crc(bs->cur->bc_mp) && 458 bs->cur->bc_levels[0].ptr == 1) { 459 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 460 block = xfs_btree_get_block(bs->cur, i, &bp); 461 owner = be64_to_cpu(block->bb_u.l.bb_owner); 462 if (owner != ip->i_ino) 463 xchk_fblock_set_corrupt(bs->sc, 464 info->whichfork, 0); 465 } 466 } 467 468 /* 469 * Check that the incore extent tree contains an extent that matches 470 * this one exactly. We validate those cached bmaps later, so we don't 471 * need to check them here. If the incore extent tree was just loaded 472 * from disk by the scrubber, we assume that its contents match what's 473 * on disk (we still hold the ILOCK) and skip the equivalence check. 474 */ 475 if (!info->was_loaded) 476 return 0; 477 478 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 479 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { 480 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 481 irec.br_startoff); 482 return 0; 483 } 484 485 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, 486 &iext_irec) || 487 irec.br_startoff != iext_irec.br_startoff || 488 irec.br_startblock != iext_irec.br_startblock || 489 irec.br_blockcount != iext_irec.br_blockcount || 490 irec.br_state != iext_irec.br_state) 491 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 492 irec.br_startoff); 493 return 0; 494 } 495 496 /* Scan the btree records. */ 497 STATIC int 498 xchk_bmap_btree( 499 struct xfs_scrub *sc, 500 int whichfork, 501 struct xchk_bmap_info *info) 502 { 503 struct xfs_owner_info oinfo; 504 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 505 struct xfs_mount *mp = sc->mp; 506 struct xfs_inode *ip = sc->ip; 507 struct xfs_btree_cur *cur; 508 int error; 509 510 /* Load the incore bmap cache if it's not loaded. */ 511 info->was_loaded = !xfs_need_iread_extents(ifp); 512 513 error = xfs_iread_extents(sc->tp, ip, whichfork); 514 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 515 goto out; 516 517 /* Check the btree structure. */ 518 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 519 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 520 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); 521 xfs_btree_del_cursor(cur, error); 522 out: 523 return error; 524 } 525 526 struct xchk_bmap_check_rmap_info { 527 struct xfs_scrub *sc; 528 int whichfork; 529 struct xfs_iext_cursor icur; 530 }; 531 532 /* Can we find bmaps that fit this rmap? */ 533 STATIC int 534 xchk_bmap_check_rmap( 535 struct xfs_btree_cur *cur, 536 const struct xfs_rmap_irec *rec, 537 void *priv) 538 { 539 struct xfs_bmbt_irec irec; 540 struct xfs_rmap_irec check_rec; 541 struct xchk_bmap_check_rmap_info *sbcri = priv; 542 struct xfs_ifork *ifp; 543 struct xfs_scrub *sc = sbcri->sc; 544 bool have_map; 545 546 /* Is this even the right fork? */ 547 if (rec->rm_owner != sc->ip->i_ino) 548 return 0; 549 if ((sbcri->whichfork == XFS_ATTR_FORK) ^ 550 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 551 return 0; 552 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 553 return 0; 554 555 /* Now look up the bmbt record. */ 556 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); 557 if (!ifp) { 558 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 559 rec->rm_offset); 560 goto out; 561 } 562 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, 563 &sbcri->icur, &irec); 564 if (!have_map) 565 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 566 rec->rm_offset); 567 /* 568 * bmap extent record lengths are constrained to 2^21 blocks in length 569 * because of space constraints in the on-disk metadata structure. 570 * However, rmap extent record lengths are constrained only by AG 571 * length, so we have to loop through the bmbt to make sure that the 572 * entire rmap is covered by bmbt records. 573 */ 574 check_rec = *rec; 575 while (have_map) { 576 if (irec.br_startoff != check_rec.rm_offset) 577 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 578 check_rec.rm_offset); 579 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, 580 cur->bc_ag.pag->pag_agno, 581 check_rec.rm_startblock)) 582 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 583 check_rec.rm_offset); 584 if (irec.br_blockcount > check_rec.rm_blockcount) 585 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 586 check_rec.rm_offset); 587 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 588 break; 589 check_rec.rm_startblock += irec.br_blockcount; 590 check_rec.rm_offset += irec.br_blockcount; 591 check_rec.rm_blockcount -= irec.br_blockcount; 592 if (check_rec.rm_blockcount == 0) 593 break; 594 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); 595 if (!have_map) 596 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 597 check_rec.rm_offset); 598 } 599 600 out: 601 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 602 return -ECANCELED; 603 return 0; 604 } 605 606 /* Make sure each rmap has a corresponding bmbt entry. */ 607 STATIC int 608 xchk_bmap_check_ag_rmaps( 609 struct xfs_scrub *sc, 610 int whichfork, 611 struct xfs_perag *pag) 612 { 613 struct xchk_bmap_check_rmap_info sbcri; 614 struct xfs_btree_cur *cur; 615 struct xfs_buf *agf; 616 int error; 617 618 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); 619 if (error) 620 return error; 621 622 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); 623 624 sbcri.sc = sc; 625 sbcri.whichfork = whichfork; 626 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 627 if (error == -ECANCELED) 628 error = 0; 629 630 xfs_btree_del_cursor(cur, error); 631 xfs_trans_brelse(sc->tp, agf); 632 return error; 633 } 634 635 /* 636 * Decide if we want to walk every rmap btree in the fs to make sure that each 637 * rmap for this file fork has corresponding bmbt entries. 638 */ 639 static bool 640 xchk_bmap_want_check_rmaps( 641 struct xchk_bmap_info *info) 642 { 643 struct xfs_scrub *sc = info->sc; 644 struct xfs_ifork *ifp; 645 646 if (!xfs_has_rmapbt(sc->mp)) 647 return false; 648 if (info->whichfork == XFS_COW_FORK) 649 return false; 650 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 651 return false; 652 653 /* Don't support realtime rmap checks yet. */ 654 if (info->is_rt) 655 return false; 656 657 /* 658 * The inode repair code zaps broken inode forks by resetting them back 659 * to EXTENTS format and zero extent records. If we encounter a fork 660 * in this state along with evidence that the fork isn't supposed to be 661 * empty, we need to scan the reverse mappings to decide if we're going 662 * to rebuild the fork. Data forks with nonzero file size are scanned. 663 * xattr forks are never empty of content, so they are always scanned. 664 */ 665 ifp = xfs_ifork_ptr(sc->ip, info->whichfork); 666 if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) { 667 if (info->whichfork == XFS_DATA_FORK && 668 i_size_read(VFS_I(sc->ip)) == 0) 669 return false; 670 671 return true; 672 } 673 674 return false; 675 } 676 677 /* Make sure each rmap has a corresponding bmbt entry. */ 678 STATIC int 679 xchk_bmap_check_rmaps( 680 struct xfs_scrub *sc, 681 int whichfork) 682 { 683 struct xfs_perag *pag; 684 xfs_agnumber_t agno; 685 int error; 686 687 for_each_perag(sc->mp, agno, pag) { 688 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); 689 if (error || 690 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 691 xfs_perag_rele(pag); 692 return error; 693 } 694 } 695 696 return 0; 697 } 698 699 /* Scrub a delalloc reservation from the incore extent map tree. */ 700 STATIC void 701 xchk_bmap_iextent_delalloc( 702 struct xfs_inode *ip, 703 struct xchk_bmap_info *info, 704 struct xfs_bmbt_irec *irec) 705 { 706 struct xfs_mount *mp = info->sc->mp; 707 708 /* 709 * Check for out-of-order extents. This record could have come 710 * from the incore list, for which there is no ordering check. 711 */ 712 if (irec->br_startoff < info->prev_rec.br_startoff + 713 info->prev_rec.br_blockcount) 714 xchk_fblock_set_corrupt(info->sc, info->whichfork, 715 irec->br_startoff); 716 717 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 718 xchk_fblock_set_corrupt(info->sc, info->whichfork, 719 irec->br_startoff); 720 721 /* Make sure the extent points to a valid place. */ 722 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 723 xchk_fblock_set_corrupt(info->sc, info->whichfork, 724 irec->br_startoff); 725 } 726 727 /* Decide if this individual fork mapping is ok. */ 728 static bool 729 xchk_bmap_iext_mapping( 730 struct xchk_bmap_info *info, 731 const struct xfs_bmbt_irec *irec) 732 { 733 /* There should never be a "hole" extent in either extent list. */ 734 if (irec->br_startblock == HOLESTARTBLOCK) 735 return false; 736 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 737 return false; 738 return true; 739 } 740 741 /* Are these two mappings contiguous with each other? */ 742 static inline bool 743 xchk_are_bmaps_contiguous( 744 const struct xfs_bmbt_irec *b1, 745 const struct xfs_bmbt_irec *b2) 746 { 747 /* Don't try to combine unallocated mappings. */ 748 if (!xfs_bmap_is_real_extent(b1)) 749 return false; 750 if (!xfs_bmap_is_real_extent(b2)) 751 return false; 752 753 /* Does b2 come right after b1 in the logical and physical range? */ 754 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) 755 return false; 756 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) 757 return false; 758 if (b1->br_state != b2->br_state) 759 return false; 760 return true; 761 } 762 763 /* 764 * Walk the incore extent records, accumulating consecutive contiguous records 765 * into a single incore mapping. Returns true if @irec has been set to a 766 * mapping or false if there are no more mappings. Caller must ensure that 767 * @info.icur is zeroed before the first call. 768 */ 769 static bool 770 xchk_bmap_iext_iter( 771 struct xchk_bmap_info *info, 772 struct xfs_bmbt_irec *irec) 773 { 774 struct xfs_bmbt_irec got; 775 struct xfs_ifork *ifp; 776 unsigned int nr = 0; 777 778 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); 779 780 /* Advance to the next iextent record and check the mapping. */ 781 xfs_iext_next(ifp, &info->icur); 782 if (!xfs_iext_get_extent(ifp, &info->icur, irec)) 783 return false; 784 785 if (!xchk_bmap_iext_mapping(info, irec)) { 786 xchk_fblock_set_corrupt(info->sc, info->whichfork, 787 irec->br_startoff); 788 return false; 789 } 790 nr++; 791 792 /* 793 * Iterate subsequent iextent records and merge them with the one 794 * that we just read, if possible. 795 */ 796 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { 797 if (!xchk_are_bmaps_contiguous(irec, &got)) 798 break; 799 800 if (!xchk_bmap_iext_mapping(info, &got)) { 801 xchk_fblock_set_corrupt(info->sc, info->whichfork, 802 got.br_startoff); 803 return false; 804 } 805 nr++; 806 807 irec->br_blockcount += got.br_blockcount; 808 xfs_iext_next(ifp, &info->icur); 809 } 810 811 /* 812 * If the merged mapping could be expressed with fewer bmbt records 813 * than we actually found, notify the user that this fork could be 814 * optimized. CoW forks only exist in memory so we ignore them. 815 */ 816 if (nr > 1 && info->whichfork != XFS_COW_FORK && 817 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) 818 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); 819 820 return true; 821 } 822 823 /* 824 * Scrub an inode fork's block mappings. 825 * 826 * First we scan every record in every btree block, if applicable. 827 * Then we unconditionally scan the incore extent cache. 828 */ 829 STATIC int 830 xchk_bmap( 831 struct xfs_scrub *sc, 832 int whichfork) 833 { 834 struct xfs_bmbt_irec irec; 835 struct xchk_bmap_info info = { NULL }; 836 struct xfs_mount *mp = sc->mp; 837 struct xfs_inode *ip = sc->ip; 838 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 839 xfs_fileoff_t endoff; 840 int error = 0; 841 842 /* Non-existent forks can be ignored. */ 843 if (!ifp) 844 return -ENOENT; 845 846 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); 847 info.whichfork = whichfork; 848 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 849 info.sc = sc; 850 851 switch (whichfork) { 852 case XFS_COW_FORK: 853 /* No CoW forks on non-reflink filesystems. */ 854 if (!xfs_has_reflink(mp)) { 855 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 856 return 0; 857 } 858 break; 859 case XFS_ATTR_FORK: 860 if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) 861 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 862 break; 863 default: 864 ASSERT(whichfork == XFS_DATA_FORK); 865 break; 866 } 867 868 /* Check the fork values */ 869 switch (ifp->if_format) { 870 case XFS_DINODE_FMT_UUID: 871 case XFS_DINODE_FMT_DEV: 872 case XFS_DINODE_FMT_LOCAL: 873 /* No mappings to check. */ 874 if (whichfork == XFS_COW_FORK) 875 xchk_fblock_set_corrupt(sc, whichfork, 0); 876 return 0; 877 case XFS_DINODE_FMT_EXTENTS: 878 break; 879 case XFS_DINODE_FMT_BTREE: 880 if (whichfork == XFS_COW_FORK) { 881 xchk_fblock_set_corrupt(sc, whichfork, 0); 882 return 0; 883 } 884 885 error = xchk_bmap_btree(sc, whichfork, &info); 886 if (error) 887 return error; 888 break; 889 default: 890 xchk_fblock_set_corrupt(sc, whichfork, 0); 891 return 0; 892 } 893 894 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 895 return 0; 896 897 /* Find the offset of the last extent in the mapping. */ 898 error = xfs_bmap_last_offset(ip, &endoff, whichfork); 899 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 900 return error; 901 902 /* 903 * Scrub extent records. We use a special iterator function here that 904 * combines adjacent mappings if they are logically and physically 905 * contiguous. For large allocations that require multiple bmbt 906 * records, this reduces the number of cross-referencing calls, which 907 * reduces runtime. Cross referencing with the rmap is simpler because 908 * the rmap must match the combined mapping exactly. 909 */ 910 while (xchk_bmap_iext_iter(&info, &irec)) { 911 if (xchk_should_terminate(sc, &error) || 912 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 913 return 0; 914 915 if (irec.br_startoff >= endoff) { 916 xchk_fblock_set_corrupt(sc, whichfork, 917 irec.br_startoff); 918 return 0; 919 } 920 921 if (isnullstartblock(irec.br_startblock)) 922 xchk_bmap_iextent_delalloc(ip, &info, &irec); 923 else 924 xchk_bmap_iextent(ip, &info, &irec); 925 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); 926 } 927 928 if (xchk_bmap_want_check_rmaps(&info)) { 929 error = xchk_bmap_check_rmaps(sc, whichfork); 930 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 931 return error; 932 } 933 934 return 0; 935 } 936 937 /* Scrub an inode's data fork. */ 938 int 939 xchk_bmap_data( 940 struct xfs_scrub *sc) 941 { 942 return xchk_bmap(sc, XFS_DATA_FORK); 943 } 944 945 /* Scrub an inode's attr fork. */ 946 int 947 xchk_bmap_attr( 948 struct xfs_scrub *sc) 949 { 950 return xchk_bmap(sc, XFS_ATTR_FORK); 951 } 952 953 /* Scrub an inode's CoW fork. */ 954 int 955 xchk_bmap_cow( 956 struct xfs_scrub *sc) 957 { 958 return xchk_bmap(sc, XFS_COW_FORK); 959 } 960