1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_alloc.h" 34 #include "scrub/scrub.h" 35 #include "scrub/common.h" 36 #include "scrub/btree.h" 37 #include "scrub/trace.h" 38 39 /* btree scrubbing */ 40 41 /* 42 * Check for btree operation errors. See the section about handling 43 * operational errors in common.c. 44 */ 45 static bool 46 __xfs_scrub_btree_process_error( 47 struct xfs_scrub_context *sc, 48 struct xfs_btree_cur *cur, 49 int level, 50 int *error, 51 __u32 errflag, 52 void *ret_ip) 53 { 54 if (*error == 0) 55 return true; 56 57 switch (*error) { 58 case -EDEADLOCK: 59 /* Used to restart an op with deadlock avoidance. */ 60 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error); 61 break; 62 case -EFSBADCRC: 63 case -EFSCORRUPTED: 64 /* Note the badness but don't abort. */ 65 sc->sm->sm_flags |= errflag; 66 *error = 0; 67 /* fall through */ 68 default: 69 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 70 trace_xfs_scrub_ifork_btree_op_error(sc, cur, level, 71 *error, ret_ip); 72 else 73 trace_xfs_scrub_btree_op_error(sc, cur, level, 74 *error, ret_ip); 75 break; 76 } 77 return false; 78 } 79 80 bool 81 xfs_scrub_btree_process_error( 82 struct xfs_scrub_context *sc, 83 struct xfs_btree_cur *cur, 84 int level, 85 int *error) 86 { 87 return __xfs_scrub_btree_process_error(sc, cur, level, error, 88 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 89 } 90 91 bool 92 xfs_scrub_btree_xref_process_error( 93 struct xfs_scrub_context *sc, 94 struct xfs_btree_cur *cur, 95 int level, 96 int *error) 97 { 98 return __xfs_scrub_btree_process_error(sc, cur, level, error, 99 XFS_SCRUB_OFLAG_XFAIL, __return_address); 100 } 101 102 /* Record btree block corruption. */ 103 static void 104 __xfs_scrub_btree_set_corrupt( 105 struct xfs_scrub_context *sc, 106 struct xfs_btree_cur *cur, 107 int level, 108 __u32 errflag, 109 void *ret_ip) 110 { 111 sc->sm->sm_flags |= errflag; 112 113 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 114 trace_xfs_scrub_ifork_btree_error(sc, cur, level, 115 ret_ip); 116 else 117 trace_xfs_scrub_btree_error(sc, cur, level, 118 ret_ip); 119 } 120 121 void 122 xfs_scrub_btree_set_corrupt( 123 struct xfs_scrub_context *sc, 124 struct xfs_btree_cur *cur, 125 int level) 126 { 127 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, 128 __return_address); 129 } 130 131 void 132 xfs_scrub_btree_xref_set_corrupt( 133 struct xfs_scrub_context *sc, 134 struct xfs_btree_cur *cur, 135 int level) 136 { 137 __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, 138 __return_address); 139 } 140 141 /* 142 * Make sure this record is in order and doesn't stray outside of the parent 143 * keys. 144 */ 145 STATIC void 146 xfs_scrub_btree_rec( 147 struct xfs_scrub_btree *bs) 148 { 149 struct xfs_btree_cur *cur = bs->cur; 150 union xfs_btree_rec *rec; 151 union xfs_btree_key key; 152 union xfs_btree_key hkey; 153 union xfs_btree_key *keyp; 154 struct xfs_btree_block *block; 155 struct xfs_btree_block *keyblock; 156 struct xfs_buf *bp; 157 158 block = xfs_btree_get_block(cur, 0, &bp); 159 rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 160 161 trace_xfs_scrub_btree_rec(bs->sc, cur, 0); 162 163 /* If this isn't the first record, are they in order? */ 164 if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) 165 xfs_scrub_btree_set_corrupt(bs->sc, cur, 0); 166 bs->firstrec = false; 167 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); 168 169 if (cur->bc_nlevels == 1) 170 return; 171 172 /* Is this at least as large as the parent low key? */ 173 cur->bc_ops->init_key_from_rec(&key, rec); 174 keyblock = xfs_btree_get_block(cur, 1, &bp); 175 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); 176 if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) 177 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 178 179 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 180 return; 181 182 /* Is this no larger than the parent high key? */ 183 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 184 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); 185 if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) 186 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 187 } 188 189 /* 190 * Make sure this key is in order and doesn't stray outside of the parent 191 * keys. 192 */ 193 STATIC void 194 xfs_scrub_btree_key( 195 struct xfs_scrub_btree *bs, 196 int level) 197 { 198 struct xfs_btree_cur *cur = bs->cur; 199 union xfs_btree_key *key; 200 union xfs_btree_key *keyp; 201 struct xfs_btree_block *block; 202 struct xfs_btree_block *keyblock; 203 struct xfs_buf *bp; 204 205 block = xfs_btree_get_block(cur, level, &bp); 206 key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 207 208 trace_xfs_scrub_btree_key(bs->sc, cur, level); 209 210 /* If this isn't the first key, are they in order? */ 211 if (!bs->firstkey[level] && 212 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) 213 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 214 bs->firstkey[level] = false; 215 memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); 216 217 if (level + 1 >= cur->bc_nlevels) 218 return; 219 220 /* Is this at least as large as the parent low key? */ 221 keyblock = xfs_btree_get_block(cur, level + 1, &bp); 222 keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); 223 if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) 224 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 225 226 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 227 return; 228 229 /* Is this no larger than the parent high key? */ 230 key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 231 keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); 232 if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) 233 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 234 } 235 236 /* 237 * Check a btree pointer. Returns true if it's ok to use this pointer. 238 * Callers do not need to set the corrupt flag. 239 */ 240 static bool 241 xfs_scrub_btree_ptr_ok( 242 struct xfs_scrub_btree *bs, 243 int level, 244 union xfs_btree_ptr *ptr) 245 { 246 bool res; 247 248 /* A btree rooted in an inode has no block pointer to the root. */ 249 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 250 level == bs->cur->bc_nlevels) 251 return true; 252 253 /* Otherwise, check the pointers. */ 254 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) 255 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level); 256 else 257 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level); 258 if (!res) 259 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 260 261 return res; 262 } 263 264 /* Check that a btree block's sibling matches what we expect it. */ 265 STATIC int 266 xfs_scrub_btree_block_check_sibling( 267 struct xfs_scrub_btree *bs, 268 int level, 269 int direction, 270 union xfs_btree_ptr *sibling) 271 { 272 struct xfs_btree_cur *cur = bs->cur; 273 struct xfs_btree_block *pblock; 274 struct xfs_buf *pbp; 275 struct xfs_btree_cur *ncur = NULL; 276 union xfs_btree_ptr *pp; 277 int success; 278 int error; 279 280 error = xfs_btree_dup_cursor(cur, &ncur); 281 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) || 282 !ncur) 283 return error; 284 285 /* 286 * If the pointer is null, we shouldn't be able to move the upper 287 * level pointer anywhere. 288 */ 289 if (xfs_btree_ptr_is_null(cur, sibling)) { 290 if (direction > 0) 291 error = xfs_btree_increment(ncur, level + 1, &success); 292 else 293 error = xfs_btree_decrement(ncur, level + 1, &success); 294 if (error == 0 && success) 295 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 296 error = 0; 297 goto out; 298 } 299 300 /* Increment upper level pointer. */ 301 if (direction > 0) 302 error = xfs_btree_increment(ncur, level + 1, &success); 303 else 304 error = xfs_btree_decrement(ncur, level + 1, &success); 305 if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error)) 306 goto out; 307 if (!success) { 308 xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1); 309 goto out; 310 } 311 312 /* Compare upper level pointer to sibling pointer. */ 313 pblock = xfs_btree_get_block(ncur, level + 1, &pbp); 314 pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); 315 if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp)) 316 goto out; 317 318 if (xfs_btree_diff_two_ptrs(cur, pp, sibling)) 319 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 320 out: 321 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); 322 return error; 323 } 324 325 /* Check the siblings of a btree block. */ 326 STATIC int 327 xfs_scrub_btree_block_check_siblings( 328 struct xfs_scrub_btree *bs, 329 struct xfs_btree_block *block) 330 { 331 struct xfs_btree_cur *cur = bs->cur; 332 union xfs_btree_ptr leftsib; 333 union xfs_btree_ptr rightsib; 334 int level; 335 int error = 0; 336 337 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB); 338 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB); 339 level = xfs_btree_get_level(block); 340 341 /* Root block should never have siblings. */ 342 if (level == cur->bc_nlevels - 1) { 343 if (!xfs_btree_ptr_is_null(cur, &leftsib) || 344 !xfs_btree_ptr_is_null(cur, &rightsib)) 345 xfs_scrub_btree_set_corrupt(bs->sc, cur, level); 346 goto out; 347 } 348 349 /* 350 * Does the left & right sibling pointers match the adjacent 351 * parent level pointers? 352 * (These function absorbs error codes for us.) 353 */ 354 error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib); 355 if (error) 356 return error; 357 error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib); 358 if (error) 359 return error; 360 out: 361 return error; 362 } 363 364 struct check_owner { 365 struct list_head list; 366 xfs_daddr_t daddr; 367 int level; 368 }; 369 370 /* 371 * Make sure this btree block isn't in the free list and that there's 372 * an rmap record for it. 373 */ 374 STATIC int 375 xfs_scrub_btree_check_block_owner( 376 struct xfs_scrub_btree *bs, 377 int level, 378 xfs_daddr_t daddr) 379 { 380 xfs_agnumber_t agno; 381 xfs_agblock_t agbno; 382 xfs_btnum_t btnum; 383 bool init_sa; 384 int error = 0; 385 386 if (!bs->cur) 387 return 0; 388 389 btnum = bs->cur->bc_btnum; 390 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); 391 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); 392 393 init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; 394 if (init_sa) { 395 error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa); 396 if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 397 level, &error)) 398 return error; 399 } 400 401 xfs_scrub_xref_is_used_space(bs->sc, agbno, 1); 402 /* 403 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we 404 * have to nullify it (to shut down further block owner checks) if 405 * self-xref encounters problems. 406 */ 407 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO) 408 bs->cur = NULL; 409 410 if (init_sa) 411 xfs_scrub_ag_free(bs->sc, &bs->sc->sa); 412 413 return error; 414 } 415 416 /* Check the owner of a btree block. */ 417 STATIC int 418 xfs_scrub_btree_check_owner( 419 struct xfs_scrub_btree *bs, 420 int level, 421 struct xfs_buf *bp) 422 { 423 struct xfs_btree_cur *cur = bs->cur; 424 struct check_owner *co; 425 426 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) 427 return 0; 428 429 /* 430 * We want to cross-reference each btree block with the bnobt 431 * and the rmapbt. We cannot cross-reference the bnobt or 432 * rmapbt while scanning the bnobt or rmapbt, respectively, 433 * because we cannot alter the cursor and we'd prefer not to 434 * duplicate cursors. Therefore, save the buffer daddr for 435 * later scanning. 436 */ 437 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) { 438 co = kmem_alloc(sizeof(struct check_owner), 439 KM_MAYFAIL | KM_NOFS); 440 if (!co) 441 return -ENOMEM; 442 co->level = level; 443 co->daddr = XFS_BUF_ADDR(bp); 444 list_add_tail(&co->list, &bs->to_check); 445 return 0; 446 } 447 448 return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); 449 } 450 451 /* 452 * Grab and scrub a btree block given a btree pointer. Returns block 453 * and buffer pointers (if applicable) if they're ok to use. 454 */ 455 STATIC int 456 xfs_scrub_btree_get_block( 457 struct xfs_scrub_btree *bs, 458 int level, 459 union xfs_btree_ptr *pp, 460 struct xfs_btree_block **pblock, 461 struct xfs_buf **pbp) 462 { 463 void *failed_at; 464 int error; 465 466 *pblock = NULL; 467 *pbp = NULL; 468 469 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock); 470 if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) || 471 !*pblock) 472 return error; 473 474 xfs_btree_get_block(bs->cur, level, pbp); 475 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS) 476 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock, 477 level, *pbp); 478 else 479 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock, 480 level, *pbp); 481 if (failed_at) { 482 xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level); 483 return 0; 484 } 485 486 /* 487 * Check the block's owner; this function absorbs error codes 488 * for us. 489 */ 490 error = xfs_scrub_btree_check_owner(bs, level, *pbp); 491 if (error) 492 return error; 493 494 /* 495 * Check the block's siblings; this function absorbs error codes 496 * for us. 497 */ 498 return xfs_scrub_btree_block_check_siblings(bs, *pblock); 499 } 500 501 /* 502 * Check that the low and high keys of this block match the keys stored 503 * in the parent block. 504 */ 505 STATIC void 506 xfs_scrub_btree_block_keys( 507 struct xfs_scrub_btree *bs, 508 int level, 509 struct xfs_btree_block *block) 510 { 511 union xfs_btree_key block_keys; 512 struct xfs_btree_cur *cur = bs->cur; 513 union xfs_btree_key *high_bk; 514 union xfs_btree_key *parent_keys; 515 union xfs_btree_key *high_pk; 516 struct xfs_btree_block *parent_block; 517 struct xfs_buf *bp; 518 519 if (level >= cur->bc_nlevels - 1) 520 return; 521 522 /* Calculate the keys for this block. */ 523 xfs_btree_get_keys(cur, block, &block_keys); 524 525 /* Obtain the parent's copy of the keys for this block. */ 526 parent_block = xfs_btree_get_block(cur, level + 1, &bp); 527 parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], 528 parent_block); 529 530 if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) 531 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 532 533 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 534 return; 535 536 /* Get high keys */ 537 high_bk = xfs_btree_high_key_from_key(cur, &block_keys); 538 high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], 539 parent_block); 540 541 if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) 542 xfs_scrub_btree_set_corrupt(bs->sc, cur, 1); 543 } 544 545 /* 546 * Visit all nodes and leaves of a btree. Check that all pointers and 547 * records are in order, that the keys reflect the records, and use a callback 548 * so that the caller can verify individual records. 549 */ 550 int 551 xfs_scrub_btree( 552 struct xfs_scrub_context *sc, 553 struct xfs_btree_cur *cur, 554 xfs_scrub_btree_rec_fn scrub_fn, 555 struct xfs_owner_info *oinfo, 556 void *private) 557 { 558 struct xfs_scrub_btree bs = { NULL }; 559 union xfs_btree_ptr ptr; 560 union xfs_btree_ptr *pp; 561 union xfs_btree_rec *recp; 562 struct xfs_btree_block *block; 563 int level; 564 struct xfs_buf *bp; 565 struct check_owner *co; 566 struct check_owner *n; 567 int i; 568 int error = 0; 569 570 /* Initialize scrub state */ 571 bs.cur = cur; 572 bs.scrub_rec = scrub_fn; 573 bs.oinfo = oinfo; 574 bs.firstrec = true; 575 bs.private = private; 576 bs.sc = sc; 577 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) 578 bs.firstkey[i] = true; 579 INIT_LIST_HEAD(&bs.to_check); 580 581 /* Don't try to check a tree with a height we can't handle. */ 582 if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { 583 xfs_scrub_btree_set_corrupt(sc, cur, 0); 584 goto out; 585 } 586 587 /* 588 * Load the root of the btree. The helper function absorbs 589 * error codes for us. 590 */ 591 level = cur->bc_nlevels - 1; 592 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 593 if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) 594 goto out; 595 error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp); 596 if (error || !block) 597 goto out; 598 599 cur->bc_ptrs[level] = 1; 600 601 while (level < cur->bc_nlevels) { 602 block = xfs_btree_get_block(cur, level, &bp); 603 604 if (level == 0) { 605 /* End of leaf, pop back towards the root. */ 606 if (cur->bc_ptrs[level] > 607 be16_to_cpu(block->bb_numrecs)) { 608 xfs_scrub_btree_block_keys(&bs, level, block); 609 if (level < cur->bc_nlevels - 1) 610 cur->bc_ptrs[level + 1]++; 611 level++; 612 continue; 613 } 614 615 /* Records in order for scrub? */ 616 xfs_scrub_btree_rec(&bs); 617 618 /* Call out to the record checker. */ 619 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 620 error = bs.scrub_rec(&bs, recp); 621 if (error) 622 break; 623 if (xfs_scrub_should_terminate(sc, &error) || 624 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 625 break; 626 627 cur->bc_ptrs[level]++; 628 continue; 629 } 630 631 /* End of node, pop back towards the root. */ 632 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 633 xfs_scrub_btree_block_keys(&bs, level, block); 634 if (level < cur->bc_nlevels - 1) 635 cur->bc_ptrs[level + 1]++; 636 level++; 637 continue; 638 } 639 640 /* Keys in order for scrub? */ 641 xfs_scrub_btree_key(&bs, level); 642 643 /* Drill another level deeper. */ 644 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 645 if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) { 646 cur->bc_ptrs[level]++; 647 continue; 648 } 649 level--; 650 error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp); 651 if (error || !block) 652 goto out; 653 654 cur->bc_ptrs[level] = 1; 655 } 656 657 out: 658 /* Process deferred owner checks on btree blocks. */ 659 list_for_each_entry_safe(co, n, &bs.to_check, list) { 660 if (!error && bs.cur) 661 error = xfs_scrub_btree_check_block_owner(&bs, 662 co->level, co->daddr); 663 list_del(&co->list); 664 kmem_free(co); 665 } 666 667 return error; 668 } 669