1 /* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_mount.h" 28 #include "xfs_error.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_alloc_btree.h" 31 #include "xfs_ialloc_btree.h" 32 #include "xfs_dinode.h" 33 #include "xfs_inode.h" 34 #include "xfs_inode_item.h" 35 #include "xfs_alloc.h" 36 #include "xfs_ialloc.h" 37 #include "xfs_log_priv.h" 38 #include "xfs_buf_item.h" 39 #include "xfs_log_recover.h" 40 #include "xfs_extfree_item.h" 41 #include "xfs_trans_priv.h" 42 #include "xfs_quota.h" 43 #include "xfs_rw.h" 44 #include "xfs_utils.h" 45 #include "xfs_trace.h" 46 47 STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); 48 STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); 49 #if defined(DEBUG) 50 STATIC void xlog_recover_check_summary(xlog_t *); 51 #else 52 #define xlog_recover_check_summary(log) 53 #endif 54 55 /* 56 * This structure is used during recovery to record the buf log items which 57 * have been canceled and should not be replayed. 58 */ 59 struct xfs_buf_cancel { 60 xfs_daddr_t bc_blkno; 61 uint bc_len; 62 int bc_refcount; 63 struct list_head bc_list; 64 }; 65 66 /* 67 * Sector aligned buffer routines for buffer create/read/write/access 68 */ 69 70 /* 71 * Verify the given count of basic blocks is valid number of blocks 72 * to specify for an operation involving the given XFS log buffer. 73 * Returns nonzero if the count is valid, 0 otherwise. 74 */ 75 76 static inline int 77 xlog_buf_bbcount_valid( 78 xlog_t *log, 79 int bbcount) 80 { 81 return bbcount > 0 && bbcount <= log->l_logBBsize; 82 } 83 84 /* 85 * Allocate a buffer to hold log data. The buffer needs to be able 86 * to map to a range of nbblks basic blocks at any valid (basic 87 * block) offset within the log. 88 */ 89 STATIC xfs_buf_t * 90 xlog_get_bp( 91 xlog_t *log, 92 int nbblks) 93 { 94 struct xfs_buf *bp; 95 96 if (!xlog_buf_bbcount_valid(log, nbblks)) { 97 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 98 nbblks); 99 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 100 return NULL; 101 } 102 103 /* 104 * We do log I/O in units of log sectors (a power-of-2 105 * multiple of the basic block size), so we round up the 106 * requested size to accommodate the basic blocks required 107 * for complete log sectors. 108 * 109 * In addition, the buffer may be used for a non-sector- 110 * aligned block offset, in which case an I/O of the 111 * requested size could extend beyond the end of the 112 * buffer. If the requested size is only 1 basic block it 113 * will never straddle a sector boundary, so this won't be 114 * an issue. Nor will this be a problem if the log I/O is 115 * done in basic blocks (sector size 1). But otherwise we 116 * extend the buffer by one extra log sector to ensure 117 * there's space to accommodate this possibility. 118 */ 119 if (nbblks > 1 && log->l_sectBBsize > 1) 120 nbblks += log->l_sectBBsize; 121 nbblks = round_up(nbblks, log->l_sectBBsize); 122 123 bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0); 124 if (bp) 125 xfs_buf_unlock(bp); 126 return bp; 127 } 128 129 STATIC void 130 xlog_put_bp( 131 xfs_buf_t *bp) 132 { 133 xfs_buf_free(bp); 134 } 135 136 /* 137 * Return the address of the start of the given block number's data 138 * in a log buffer. The buffer covers a log sector-aligned region. 139 */ 140 STATIC xfs_caddr_t 141 xlog_align( 142 xlog_t *log, 143 xfs_daddr_t blk_no, 144 int nbblks, 145 xfs_buf_t *bp) 146 { 147 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); 148 149 ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); 150 return XFS_BUF_PTR(bp) + BBTOB(offset); 151 } 152 153 154 /* 155 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 156 */ 157 STATIC int 158 xlog_bread_noalign( 159 xlog_t *log, 160 xfs_daddr_t blk_no, 161 int nbblks, 162 xfs_buf_t *bp) 163 { 164 int error; 165 166 if (!xlog_buf_bbcount_valid(log, nbblks)) { 167 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 168 nbblks); 169 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 170 return EFSCORRUPTED; 171 } 172 173 blk_no = round_down(blk_no, log->l_sectBBsize); 174 nbblks = round_up(nbblks, log->l_sectBBsize); 175 176 ASSERT(nbblks > 0); 177 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 178 179 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 180 XFS_BUF_READ(bp); 181 XFS_BUF_BUSY(bp); 182 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 183 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 184 185 xfsbdstrat(log->l_mp, bp); 186 error = xfs_buf_iowait(bp); 187 if (error) 188 xfs_ioerror_alert("xlog_bread", log->l_mp, 189 bp, XFS_BUF_ADDR(bp)); 190 return error; 191 } 192 193 STATIC int 194 xlog_bread( 195 xlog_t *log, 196 xfs_daddr_t blk_no, 197 int nbblks, 198 xfs_buf_t *bp, 199 xfs_caddr_t *offset) 200 { 201 int error; 202 203 error = xlog_bread_noalign(log, blk_no, nbblks, bp); 204 if (error) 205 return error; 206 207 *offset = xlog_align(log, blk_no, nbblks, bp); 208 return 0; 209 } 210 211 /* 212 * Read at an offset into the buffer. Returns with the buffer in it's original 213 * state regardless of the result of the read. 214 */ 215 STATIC int 216 xlog_bread_offset( 217 xlog_t *log, 218 xfs_daddr_t blk_no, /* block to read from */ 219 int nbblks, /* blocks to read */ 220 xfs_buf_t *bp, 221 xfs_caddr_t offset) 222 { 223 xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); 224 int orig_len = bp->b_buffer_length; 225 int error, error2; 226 227 error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); 228 if (error) 229 return error; 230 231 error = xlog_bread_noalign(log, blk_no, nbblks, bp); 232 233 /* must reset buffer pointer even on error */ 234 error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); 235 if (error) 236 return error; 237 return error2; 238 } 239 240 /* 241 * Write out the buffer at the given block for the given number of blocks. 242 * The buffer is kept locked across the write and is returned locked. 243 * This can only be used for synchronous log writes. 244 */ 245 STATIC int 246 xlog_bwrite( 247 xlog_t *log, 248 xfs_daddr_t blk_no, 249 int nbblks, 250 xfs_buf_t *bp) 251 { 252 int error; 253 254 if (!xlog_buf_bbcount_valid(log, nbblks)) { 255 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 256 nbblks); 257 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 258 return EFSCORRUPTED; 259 } 260 261 blk_no = round_down(blk_no, log->l_sectBBsize); 262 nbblks = round_up(nbblks, log->l_sectBBsize); 263 264 ASSERT(nbblks > 0); 265 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 266 267 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 268 XFS_BUF_ZEROFLAGS(bp); 269 XFS_BUF_BUSY(bp); 270 XFS_BUF_HOLD(bp); 271 xfs_buf_lock(bp); 272 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 273 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 274 275 if ((error = xfs_bwrite(log->l_mp, bp))) 276 xfs_ioerror_alert("xlog_bwrite", log->l_mp, 277 bp, XFS_BUF_ADDR(bp)); 278 return error; 279 } 280 281 #ifdef DEBUG 282 /* 283 * dump debug superblock and log record information 284 */ 285 STATIC void 286 xlog_header_check_dump( 287 xfs_mount_t *mp, 288 xlog_rec_header_t *head) 289 { 290 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", 291 __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 292 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", 293 &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 294 } 295 #else 296 #define xlog_header_check_dump(mp, head) 297 #endif 298 299 /* 300 * check log record header for recovery 301 */ 302 STATIC int 303 xlog_header_check_recover( 304 xfs_mount_t *mp, 305 xlog_rec_header_t *head) 306 { 307 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 308 309 /* 310 * IRIX doesn't write the h_fmt field and leaves it zeroed 311 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 312 * a dirty log created in IRIX. 313 */ 314 if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { 315 xfs_warn(mp, 316 "dirty log written in incompatible format - can't recover"); 317 xlog_header_check_dump(mp, head); 318 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 319 XFS_ERRLEVEL_HIGH, mp); 320 return XFS_ERROR(EFSCORRUPTED); 321 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 322 xfs_warn(mp, 323 "dirty log entry has mismatched uuid - can't recover"); 324 xlog_header_check_dump(mp, head); 325 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 326 XFS_ERRLEVEL_HIGH, mp); 327 return XFS_ERROR(EFSCORRUPTED); 328 } 329 return 0; 330 } 331 332 /* 333 * read the head block of the log and check the header 334 */ 335 STATIC int 336 xlog_header_check_mount( 337 xfs_mount_t *mp, 338 xlog_rec_header_t *head) 339 { 340 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 341 342 if (uuid_is_nil(&head->h_fs_uuid)) { 343 /* 344 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If 345 * h_fs_uuid is nil, we assume this log was last mounted 346 * by IRIX and continue. 347 */ 348 xfs_warn(mp, "nil uuid in log - IRIX style log"); 349 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 350 xfs_warn(mp, "log has mismatched uuid - can't recover"); 351 xlog_header_check_dump(mp, head); 352 XFS_ERROR_REPORT("xlog_header_check_mount", 353 XFS_ERRLEVEL_HIGH, mp); 354 return XFS_ERROR(EFSCORRUPTED); 355 } 356 return 0; 357 } 358 359 STATIC void 360 xlog_recover_iodone( 361 struct xfs_buf *bp) 362 { 363 if (XFS_BUF_GETERROR(bp)) { 364 /* 365 * We're not going to bother about retrying 366 * this during recovery. One strike! 367 */ 368 xfs_ioerror_alert("xlog_recover_iodone", 369 bp->b_target->bt_mount, bp, 370 XFS_BUF_ADDR(bp)); 371 xfs_force_shutdown(bp->b_target->bt_mount, 372 SHUTDOWN_META_IO_ERROR); 373 } 374 bp->b_iodone = NULL; 375 xfs_buf_ioend(bp, 0); 376 } 377 378 /* 379 * This routine finds (to an approximation) the first block in the physical 380 * log which contains the given cycle. It uses a binary search algorithm. 381 * Note that the algorithm can not be perfect because the disk will not 382 * necessarily be perfect. 383 */ 384 STATIC int 385 xlog_find_cycle_start( 386 xlog_t *log, 387 xfs_buf_t *bp, 388 xfs_daddr_t first_blk, 389 xfs_daddr_t *last_blk, 390 uint cycle) 391 { 392 xfs_caddr_t offset; 393 xfs_daddr_t mid_blk; 394 xfs_daddr_t end_blk; 395 uint mid_cycle; 396 int error; 397 398 end_blk = *last_blk; 399 mid_blk = BLK_AVG(first_blk, end_blk); 400 while (mid_blk != first_blk && mid_blk != end_blk) { 401 error = xlog_bread(log, mid_blk, 1, bp, &offset); 402 if (error) 403 return error; 404 mid_cycle = xlog_get_cycle(offset); 405 if (mid_cycle == cycle) 406 end_blk = mid_blk; /* last_half_cycle == mid_cycle */ 407 else 408 first_blk = mid_blk; /* first_half_cycle == mid_cycle */ 409 mid_blk = BLK_AVG(first_blk, end_blk); 410 } 411 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || 412 (mid_blk == end_blk && mid_blk-1 == first_blk)); 413 414 *last_blk = end_blk; 415 416 return 0; 417 } 418 419 /* 420 * Check that a range of blocks does not contain stop_on_cycle_no. 421 * Fill in *new_blk with the block offset where such a block is 422 * found, or with -1 (an invalid block number) if there is no such 423 * block in the range. The scan needs to occur from front to back 424 * and the pointer into the region must be updated since a later 425 * routine will need to perform another test. 426 */ 427 STATIC int 428 xlog_find_verify_cycle( 429 xlog_t *log, 430 xfs_daddr_t start_blk, 431 int nbblks, 432 uint stop_on_cycle_no, 433 xfs_daddr_t *new_blk) 434 { 435 xfs_daddr_t i, j; 436 uint cycle; 437 xfs_buf_t *bp; 438 xfs_daddr_t bufblks; 439 xfs_caddr_t buf = NULL; 440 int error = 0; 441 442 /* 443 * Greedily allocate a buffer big enough to handle the full 444 * range of basic blocks we'll be examining. If that fails, 445 * try a smaller size. We need to be able to read at least 446 * a log sector, or we're out of luck. 447 */ 448 bufblks = 1 << ffs(nbblks); 449 while (!(bp = xlog_get_bp(log, bufblks))) { 450 bufblks >>= 1; 451 if (bufblks < log->l_sectBBsize) 452 return ENOMEM; 453 } 454 455 for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 456 int bcount; 457 458 bcount = min(bufblks, (start_blk + nbblks - i)); 459 460 error = xlog_bread(log, i, bcount, bp, &buf); 461 if (error) 462 goto out; 463 464 for (j = 0; j < bcount; j++) { 465 cycle = xlog_get_cycle(buf); 466 if (cycle == stop_on_cycle_no) { 467 *new_blk = i+j; 468 goto out; 469 } 470 471 buf += BBSIZE; 472 } 473 } 474 475 *new_blk = -1; 476 477 out: 478 xlog_put_bp(bp); 479 return error; 480 } 481 482 /* 483 * Potentially backup over partial log record write. 484 * 485 * In the typical case, last_blk is the number of the block directly after 486 * a good log record. Therefore, we subtract one to get the block number 487 * of the last block in the given buffer. extra_bblks contains the number 488 * of blocks we would have read on a previous read. This happens when the 489 * last log record is split over the end of the physical log. 490 * 491 * extra_bblks is the number of blocks potentially verified on a previous 492 * call to this routine. 493 */ 494 STATIC int 495 xlog_find_verify_log_record( 496 xlog_t *log, 497 xfs_daddr_t start_blk, 498 xfs_daddr_t *last_blk, 499 int extra_bblks) 500 { 501 xfs_daddr_t i; 502 xfs_buf_t *bp; 503 xfs_caddr_t offset = NULL; 504 xlog_rec_header_t *head = NULL; 505 int error = 0; 506 int smallmem = 0; 507 int num_blks = *last_blk - start_blk; 508 int xhdrs; 509 510 ASSERT(start_blk != 0 || *last_blk != start_blk); 511 512 if (!(bp = xlog_get_bp(log, num_blks))) { 513 if (!(bp = xlog_get_bp(log, 1))) 514 return ENOMEM; 515 smallmem = 1; 516 } else { 517 error = xlog_bread(log, start_blk, num_blks, bp, &offset); 518 if (error) 519 goto out; 520 offset += ((num_blks - 1) << BBSHIFT); 521 } 522 523 for (i = (*last_blk) - 1; i >= 0; i--) { 524 if (i < start_blk) { 525 /* valid log record not found */ 526 xfs_warn(log->l_mp, 527 "Log inconsistent (didn't find previous header)"); 528 ASSERT(0); 529 error = XFS_ERROR(EIO); 530 goto out; 531 } 532 533 if (smallmem) { 534 error = xlog_bread(log, i, 1, bp, &offset); 535 if (error) 536 goto out; 537 } 538 539 head = (xlog_rec_header_t *)offset; 540 541 if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 542 break; 543 544 if (!smallmem) 545 offset -= BBSIZE; 546 } 547 548 /* 549 * We hit the beginning of the physical log & still no header. Return 550 * to caller. If caller can handle a return of -1, then this routine 551 * will be called again for the end of the physical log. 552 */ 553 if (i == -1) { 554 error = -1; 555 goto out; 556 } 557 558 /* 559 * We have the final block of the good log (the first block 560 * of the log record _before_ the head. So we check the uuid. 561 */ 562 if ((error = xlog_header_check_mount(log->l_mp, head))) 563 goto out; 564 565 /* 566 * We may have found a log record header before we expected one. 567 * last_blk will be the 1st block # with a given cycle #. We may end 568 * up reading an entire log record. In this case, we don't want to 569 * reset last_blk. Only when last_blk points in the middle of a log 570 * record do we update last_blk. 571 */ 572 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 573 uint h_size = be32_to_cpu(head->h_size); 574 575 xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; 576 if (h_size % XLOG_HEADER_CYCLE_SIZE) 577 xhdrs++; 578 } else { 579 xhdrs = 1; 580 } 581 582 if (*last_blk - i + extra_bblks != 583 BTOBB(be32_to_cpu(head->h_len)) + xhdrs) 584 *last_blk = i; 585 586 out: 587 xlog_put_bp(bp); 588 return error; 589 } 590 591 /* 592 * Head is defined to be the point of the log where the next log write 593 * write could go. This means that incomplete LR writes at the end are 594 * eliminated when calculating the head. We aren't guaranteed that previous 595 * LR have complete transactions. We only know that a cycle number of 596 * current cycle number -1 won't be present in the log if we start writing 597 * from our current block number. 598 * 599 * last_blk contains the block number of the first block with a given 600 * cycle number. 601 * 602 * Return: zero if normal, non-zero if error. 603 */ 604 STATIC int 605 xlog_find_head( 606 xlog_t *log, 607 xfs_daddr_t *return_head_blk) 608 { 609 xfs_buf_t *bp; 610 xfs_caddr_t offset; 611 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 612 int num_scan_bblks; 613 uint first_half_cycle, last_half_cycle; 614 uint stop_on_cycle; 615 int error, log_bbnum = log->l_logBBsize; 616 617 /* Is the end of the log device zeroed? */ 618 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { 619 *return_head_blk = first_blk; 620 621 /* Is the whole lot zeroed? */ 622 if (!first_blk) { 623 /* Linux XFS shouldn't generate totally zeroed logs - 624 * mkfs etc write a dummy unmount record to a fresh 625 * log so we can store the uuid in there 626 */ 627 xfs_warn(log->l_mp, "totally zeroed log"); 628 } 629 630 return 0; 631 } else if (error) { 632 xfs_warn(log->l_mp, "empty log check failed"); 633 return error; 634 } 635 636 first_blk = 0; /* get cycle # of 1st block */ 637 bp = xlog_get_bp(log, 1); 638 if (!bp) 639 return ENOMEM; 640 641 error = xlog_bread(log, 0, 1, bp, &offset); 642 if (error) 643 goto bp_err; 644 645 first_half_cycle = xlog_get_cycle(offset); 646 647 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 648 error = xlog_bread(log, last_blk, 1, bp, &offset); 649 if (error) 650 goto bp_err; 651 652 last_half_cycle = xlog_get_cycle(offset); 653 ASSERT(last_half_cycle != 0); 654 655 /* 656 * If the 1st half cycle number is equal to the last half cycle number, 657 * then the entire log is stamped with the same cycle number. In this 658 * case, head_blk can't be set to zero (which makes sense). The below 659 * math doesn't work out properly with head_blk equal to zero. Instead, 660 * we set it to log_bbnum which is an invalid block number, but this 661 * value makes the math correct. If head_blk doesn't changed through 662 * all the tests below, *head_blk is set to zero at the very end rather 663 * than log_bbnum. In a sense, log_bbnum and zero are the same block 664 * in a circular file. 665 */ 666 if (first_half_cycle == last_half_cycle) { 667 /* 668 * In this case we believe that the entire log should have 669 * cycle number last_half_cycle. We need to scan backwards 670 * from the end verifying that there are no holes still 671 * containing last_half_cycle - 1. If we find such a hole, 672 * then the start of that hole will be the new head. The 673 * simple case looks like 674 * x | x ... | x - 1 | x 675 * Another case that fits this picture would be 676 * x | x + 1 | x ... | x 677 * In this case the head really is somewhere at the end of the 678 * log, as one of the latest writes at the beginning was 679 * incomplete. 680 * One more case is 681 * x | x + 1 | x ... | x - 1 | x 682 * This is really the combination of the above two cases, and 683 * the head has to end up at the start of the x-1 hole at the 684 * end of the log. 685 * 686 * In the 256k log case, we will read from the beginning to the 687 * end of the log and search for cycle numbers equal to x-1. 688 * We don't worry about the x+1 blocks that we encounter, 689 * because we know that they cannot be the head since the log 690 * started with x. 691 */ 692 head_blk = log_bbnum; 693 stop_on_cycle = last_half_cycle - 1; 694 } else { 695 /* 696 * In this case we want to find the first block with cycle 697 * number matching last_half_cycle. We expect the log to be 698 * some variation on 699 * x + 1 ... | x ... | x 700 * The first block with cycle number x (last_half_cycle) will 701 * be where the new head belongs. First we do a binary search 702 * for the first occurrence of last_half_cycle. The binary 703 * search may not be totally accurate, so then we scan back 704 * from there looking for occurrences of last_half_cycle before 705 * us. If that backwards scan wraps around the beginning of 706 * the log, then we look for occurrences of last_half_cycle - 1 707 * at the end of the log. The cases we're looking for look 708 * like 709 * v binary search stopped here 710 * x + 1 ... | x | x + 1 | x ... | x 711 * ^ but we want to locate this spot 712 * or 713 * <---------> less than scan distance 714 * x + 1 ... | x ... | x - 1 | x 715 * ^ we want to locate this spot 716 */ 717 stop_on_cycle = last_half_cycle; 718 if ((error = xlog_find_cycle_start(log, bp, first_blk, 719 &head_blk, last_half_cycle))) 720 goto bp_err; 721 } 722 723 /* 724 * Now validate the answer. Scan back some number of maximum possible 725 * blocks and make sure each one has the expected cycle number. The 726 * maximum is determined by the total possible amount of buffering 727 * in the in-core log. The following number can be made tighter if 728 * we actually look at the block size of the filesystem. 729 */ 730 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 731 if (head_blk >= num_scan_bblks) { 732 /* 733 * We are guaranteed that the entire check can be performed 734 * in one buffer. 735 */ 736 start_blk = head_blk - num_scan_bblks; 737 if ((error = xlog_find_verify_cycle(log, 738 start_blk, num_scan_bblks, 739 stop_on_cycle, &new_blk))) 740 goto bp_err; 741 if (new_blk != -1) 742 head_blk = new_blk; 743 } else { /* need to read 2 parts of log */ 744 /* 745 * We are going to scan backwards in the log in two parts. 746 * First we scan the physical end of the log. In this part 747 * of the log, we are looking for blocks with cycle number 748 * last_half_cycle - 1. 749 * If we find one, then we know that the log starts there, as 750 * we've found a hole that didn't get written in going around 751 * the end of the physical log. The simple case for this is 752 * x + 1 ... | x ... | x - 1 | x 753 * <---------> less than scan distance 754 * If all of the blocks at the end of the log have cycle number 755 * last_half_cycle, then we check the blocks at the start of 756 * the log looking for occurrences of last_half_cycle. If we 757 * find one, then our current estimate for the location of the 758 * first occurrence of last_half_cycle is wrong and we move 759 * back to the hole we've found. This case looks like 760 * x + 1 ... | x | x + 1 | x ... 761 * ^ binary search stopped here 762 * Another case we need to handle that only occurs in 256k 763 * logs is 764 * x + 1 ... | x ... | x+1 | x ... 765 * ^ binary search stops here 766 * In a 256k log, the scan at the end of the log will see the 767 * x + 1 blocks. We need to skip past those since that is 768 * certainly not the head of the log. By searching for 769 * last_half_cycle-1 we accomplish that. 770 */ 771 ASSERT(head_blk <= INT_MAX && 772 (xfs_daddr_t) num_scan_bblks >= head_blk); 773 start_blk = log_bbnum - (num_scan_bblks - head_blk); 774 if ((error = xlog_find_verify_cycle(log, start_blk, 775 num_scan_bblks - (int)head_blk, 776 (stop_on_cycle - 1), &new_blk))) 777 goto bp_err; 778 if (new_blk != -1) { 779 head_blk = new_blk; 780 goto validate_head; 781 } 782 783 /* 784 * Scan beginning of log now. The last part of the physical 785 * log is good. This scan needs to verify that it doesn't find 786 * the last_half_cycle. 787 */ 788 start_blk = 0; 789 ASSERT(head_blk <= INT_MAX); 790 if ((error = xlog_find_verify_cycle(log, 791 start_blk, (int)head_blk, 792 stop_on_cycle, &new_blk))) 793 goto bp_err; 794 if (new_blk != -1) 795 head_blk = new_blk; 796 } 797 798 validate_head: 799 /* 800 * Now we need to make sure head_blk is not pointing to a block in 801 * the middle of a log record. 802 */ 803 num_scan_bblks = XLOG_REC_SHIFT(log); 804 if (head_blk >= num_scan_bblks) { 805 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 806 807 /* start ptr at last block ptr before head_blk */ 808 if ((error = xlog_find_verify_log_record(log, start_blk, 809 &head_blk, 0)) == -1) { 810 error = XFS_ERROR(EIO); 811 goto bp_err; 812 } else if (error) 813 goto bp_err; 814 } else { 815 start_blk = 0; 816 ASSERT(head_blk <= INT_MAX); 817 if ((error = xlog_find_verify_log_record(log, start_blk, 818 &head_blk, 0)) == -1) { 819 /* We hit the beginning of the log during our search */ 820 start_blk = log_bbnum - (num_scan_bblks - head_blk); 821 new_blk = log_bbnum; 822 ASSERT(start_blk <= INT_MAX && 823 (xfs_daddr_t) log_bbnum-start_blk >= 0); 824 ASSERT(head_blk <= INT_MAX); 825 if ((error = xlog_find_verify_log_record(log, 826 start_blk, &new_blk, 827 (int)head_blk)) == -1) { 828 error = XFS_ERROR(EIO); 829 goto bp_err; 830 } else if (error) 831 goto bp_err; 832 if (new_blk != log_bbnum) 833 head_blk = new_blk; 834 } else if (error) 835 goto bp_err; 836 } 837 838 xlog_put_bp(bp); 839 if (head_blk == log_bbnum) 840 *return_head_blk = 0; 841 else 842 *return_head_blk = head_blk; 843 /* 844 * When returning here, we have a good block number. Bad block 845 * means that during a previous crash, we didn't have a clean break 846 * from cycle number N to cycle number N-1. In this case, we need 847 * to find the first block with cycle number N-1. 848 */ 849 return 0; 850 851 bp_err: 852 xlog_put_bp(bp); 853 854 if (error) 855 xfs_warn(log->l_mp, "failed to find log head"); 856 return error; 857 } 858 859 /* 860 * Find the sync block number or the tail of the log. 861 * 862 * This will be the block number of the last record to have its 863 * associated buffers synced to disk. Every log record header has 864 * a sync lsn embedded in it. LSNs hold block numbers, so it is easy 865 * to get a sync block number. The only concern is to figure out which 866 * log record header to believe. 867 * 868 * The following algorithm uses the log record header with the largest 869 * lsn. The entire log record does not need to be valid. We only care 870 * that the header is valid. 871 * 872 * We could speed up search by using current head_blk buffer, but it is not 873 * available. 874 */ 875 STATIC int 876 xlog_find_tail( 877 xlog_t *log, 878 xfs_daddr_t *head_blk, 879 xfs_daddr_t *tail_blk) 880 { 881 xlog_rec_header_t *rhead; 882 xlog_op_header_t *op_head; 883 xfs_caddr_t offset = NULL; 884 xfs_buf_t *bp; 885 int error, i, found; 886 xfs_daddr_t umount_data_blk; 887 xfs_daddr_t after_umount_blk; 888 xfs_lsn_t tail_lsn; 889 int hblks; 890 891 found = 0; 892 893 /* 894 * Find previous log record 895 */ 896 if ((error = xlog_find_head(log, head_blk))) 897 return error; 898 899 bp = xlog_get_bp(log, 1); 900 if (!bp) 901 return ENOMEM; 902 if (*head_blk == 0) { /* special case */ 903 error = xlog_bread(log, 0, 1, bp, &offset); 904 if (error) 905 goto done; 906 907 if (xlog_get_cycle(offset) == 0) { 908 *tail_blk = 0; 909 /* leave all other log inited values alone */ 910 goto done; 911 } 912 } 913 914 /* 915 * Search backwards looking for log record header block 916 */ 917 ASSERT(*head_blk < INT_MAX); 918 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 919 error = xlog_bread(log, i, 1, bp, &offset); 920 if (error) 921 goto done; 922 923 if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 924 found = 1; 925 break; 926 } 927 } 928 /* 929 * If we haven't found the log record header block, start looking 930 * again from the end of the physical log. XXXmiken: There should be 931 * a check here to make sure we didn't search more than N blocks in 932 * the previous code. 933 */ 934 if (!found) { 935 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 936 error = xlog_bread(log, i, 1, bp, &offset); 937 if (error) 938 goto done; 939 940 if (*(__be32 *)offset == 941 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 942 found = 2; 943 break; 944 } 945 } 946 } 947 if (!found) { 948 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 949 ASSERT(0); 950 return XFS_ERROR(EIO); 951 } 952 953 /* find blk_no of tail of log */ 954 rhead = (xlog_rec_header_t *)offset; 955 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); 956 957 /* 958 * Reset log values according to the state of the log when we 959 * crashed. In the case where head_blk == 0, we bump curr_cycle 960 * one because the next write starts a new cycle rather than 961 * continuing the cycle of the last good log record. At this 962 * point we have guaranteed that all partial log records have been 963 * accounted for. Therefore, we know that the last good log record 964 * written was complete and ended exactly on the end boundary 965 * of the physical log. 966 */ 967 log->l_prev_block = i; 968 log->l_curr_block = (int)*head_blk; 969 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 970 if (found == 2) 971 log->l_curr_cycle++; 972 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 973 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 974 xlog_assign_grant_head(&log->l_grant_reserve_head, log->l_curr_cycle, 975 BBTOB(log->l_curr_block)); 976 xlog_assign_grant_head(&log->l_grant_write_head, log->l_curr_cycle, 977 BBTOB(log->l_curr_block)); 978 979 /* 980 * Look for unmount record. If we find it, then we know there 981 * was a clean unmount. Since 'i' could be the last block in 982 * the physical log, we convert to a log block before comparing 983 * to the head_blk. 984 * 985 * Save the current tail lsn to use to pass to 986 * xlog_clear_stale_blocks() below. We won't want to clear the 987 * unmount record if there is one, so we pass the lsn of the 988 * unmount record rather than the block after it. 989 */ 990 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 991 int h_size = be32_to_cpu(rhead->h_size); 992 int h_version = be32_to_cpu(rhead->h_version); 993 994 if ((h_version & XLOG_VERSION_2) && 995 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 996 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 997 if (h_size % XLOG_HEADER_CYCLE_SIZE) 998 hblks++; 999 } else { 1000 hblks = 1; 1001 } 1002 } else { 1003 hblks = 1; 1004 } 1005 after_umount_blk = (i + hblks + (int) 1006 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 1007 tail_lsn = atomic64_read(&log->l_tail_lsn); 1008 if (*head_blk == after_umount_blk && 1009 be32_to_cpu(rhead->h_num_logops) == 1) { 1010 umount_data_blk = (i + hblks) % log->l_logBBsize; 1011 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1012 if (error) 1013 goto done; 1014 1015 op_head = (xlog_op_header_t *)offset; 1016 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1017 /* 1018 * Set tail and last sync so that newly written 1019 * log records will point recovery to after the 1020 * current unmount record. 1021 */ 1022 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1023 log->l_curr_cycle, after_umount_blk); 1024 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1025 log->l_curr_cycle, after_umount_blk); 1026 *tail_blk = after_umount_blk; 1027 1028 /* 1029 * Note that the unmount was clean. If the unmount 1030 * was not clean, we need to know this to rebuild the 1031 * superblock counters from the perag headers if we 1032 * have a filesystem using non-persistent counters. 1033 */ 1034 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; 1035 } 1036 } 1037 1038 /* 1039 * Make sure that there are no blocks in front of the head 1040 * with the same cycle number as the head. This can happen 1041 * because we allow multiple outstanding log writes concurrently, 1042 * and the later writes might make it out before earlier ones. 1043 * 1044 * We use the lsn from before modifying it so that we'll never 1045 * overwrite the unmount record after a clean unmount. 1046 * 1047 * Do this only if we are going to recover the filesystem 1048 * 1049 * NOTE: This used to say "if (!readonly)" 1050 * However on Linux, we can & do recover a read-only filesystem. 1051 * We only skip recovery if NORECOVERY is specified on mount, 1052 * in which case we would not be here. 1053 * 1054 * But... if the -device- itself is readonly, just skip this. 1055 * We can't recover this device anyway, so it won't matter. 1056 */ 1057 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) 1058 error = xlog_clear_stale_blocks(log, tail_lsn); 1059 1060 done: 1061 xlog_put_bp(bp); 1062 1063 if (error) 1064 xfs_warn(log->l_mp, "failed to locate log tail"); 1065 return error; 1066 } 1067 1068 /* 1069 * Is the log zeroed at all? 1070 * 1071 * The last binary search should be changed to perform an X block read 1072 * once X becomes small enough. You can then search linearly through 1073 * the X blocks. This will cut down on the number of reads we need to do. 1074 * 1075 * If the log is partially zeroed, this routine will pass back the blkno 1076 * of the first block with cycle number 0. It won't have a complete LR 1077 * preceding it. 1078 * 1079 * Return: 1080 * 0 => the log is completely written to 1081 * -1 => use *blk_no as the first block of the log 1082 * >0 => error has occurred 1083 */ 1084 STATIC int 1085 xlog_find_zeroed( 1086 xlog_t *log, 1087 xfs_daddr_t *blk_no) 1088 { 1089 xfs_buf_t *bp; 1090 xfs_caddr_t offset; 1091 uint first_cycle, last_cycle; 1092 xfs_daddr_t new_blk, last_blk, start_blk; 1093 xfs_daddr_t num_scan_bblks; 1094 int error, log_bbnum = log->l_logBBsize; 1095 1096 *blk_no = 0; 1097 1098 /* check totally zeroed log */ 1099 bp = xlog_get_bp(log, 1); 1100 if (!bp) 1101 return ENOMEM; 1102 error = xlog_bread(log, 0, 1, bp, &offset); 1103 if (error) 1104 goto bp_err; 1105 1106 first_cycle = xlog_get_cycle(offset); 1107 if (first_cycle == 0) { /* completely zeroed log */ 1108 *blk_no = 0; 1109 xlog_put_bp(bp); 1110 return -1; 1111 } 1112 1113 /* check partially zeroed log */ 1114 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); 1115 if (error) 1116 goto bp_err; 1117 1118 last_cycle = xlog_get_cycle(offset); 1119 if (last_cycle != 0) { /* log completely written to */ 1120 xlog_put_bp(bp); 1121 return 0; 1122 } else if (first_cycle != 1) { 1123 /* 1124 * If the cycle of the last block is zero, the cycle of 1125 * the first block must be 1. If it's not, maybe we're 1126 * not looking at a log... Bail out. 1127 */ 1128 xfs_warn(log->l_mp, 1129 "Log inconsistent or not a log (last==0, first!=1)"); 1130 return XFS_ERROR(EINVAL); 1131 } 1132 1133 /* we have a partially zeroed log */ 1134 last_blk = log_bbnum-1; 1135 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1136 goto bp_err; 1137 1138 /* 1139 * Validate the answer. Because there is no way to guarantee that 1140 * the entire log is made up of log records which are the same size, 1141 * we scan over the defined maximum blocks. At this point, the maximum 1142 * is not chosen to mean anything special. XXXmiken 1143 */ 1144 num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 1145 ASSERT(num_scan_bblks <= INT_MAX); 1146 1147 if (last_blk < num_scan_bblks) 1148 num_scan_bblks = last_blk; 1149 start_blk = last_blk - num_scan_bblks; 1150 1151 /* 1152 * We search for any instances of cycle number 0 that occur before 1153 * our current estimate of the head. What we're trying to detect is 1154 * 1 ... | 0 | 1 | 0... 1155 * ^ binary search ends here 1156 */ 1157 if ((error = xlog_find_verify_cycle(log, start_blk, 1158 (int)num_scan_bblks, 0, &new_blk))) 1159 goto bp_err; 1160 if (new_blk != -1) 1161 last_blk = new_blk; 1162 1163 /* 1164 * Potentially backup over partial log record write. We don't need 1165 * to search the end of the log because we know it is zero. 1166 */ 1167 if ((error = xlog_find_verify_log_record(log, start_blk, 1168 &last_blk, 0)) == -1) { 1169 error = XFS_ERROR(EIO); 1170 goto bp_err; 1171 } else if (error) 1172 goto bp_err; 1173 1174 *blk_no = last_blk; 1175 bp_err: 1176 xlog_put_bp(bp); 1177 if (error) 1178 return error; 1179 return -1; 1180 } 1181 1182 /* 1183 * These are simple subroutines used by xlog_clear_stale_blocks() below 1184 * to initialize a buffer full of empty log record headers and write 1185 * them into the log. 1186 */ 1187 STATIC void 1188 xlog_add_record( 1189 xlog_t *log, 1190 xfs_caddr_t buf, 1191 int cycle, 1192 int block, 1193 int tail_cycle, 1194 int tail_block) 1195 { 1196 xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; 1197 1198 memset(buf, 0, BBSIZE); 1199 recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1200 recp->h_cycle = cpu_to_be32(cycle); 1201 recp->h_version = cpu_to_be32( 1202 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1); 1203 recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); 1204 recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); 1205 recp->h_fmt = cpu_to_be32(XLOG_FMT); 1206 memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); 1207 } 1208 1209 STATIC int 1210 xlog_write_log_records( 1211 xlog_t *log, 1212 int cycle, 1213 int start_block, 1214 int blocks, 1215 int tail_cycle, 1216 int tail_block) 1217 { 1218 xfs_caddr_t offset; 1219 xfs_buf_t *bp; 1220 int balign, ealign; 1221 int sectbb = log->l_sectBBsize; 1222 int end_block = start_block + blocks; 1223 int bufblks; 1224 int error = 0; 1225 int i, j = 0; 1226 1227 /* 1228 * Greedily allocate a buffer big enough to handle the full 1229 * range of basic blocks to be written. If that fails, try 1230 * a smaller size. We need to be able to write at least a 1231 * log sector, or we're out of luck. 1232 */ 1233 bufblks = 1 << ffs(blocks); 1234 while (!(bp = xlog_get_bp(log, bufblks))) { 1235 bufblks >>= 1; 1236 if (bufblks < sectbb) 1237 return ENOMEM; 1238 } 1239 1240 /* We may need to do a read at the start to fill in part of 1241 * the buffer in the starting sector not covered by the first 1242 * write below. 1243 */ 1244 balign = round_down(start_block, sectbb); 1245 if (balign != start_block) { 1246 error = xlog_bread_noalign(log, start_block, 1, bp); 1247 if (error) 1248 goto out_put_bp; 1249 1250 j = start_block - balign; 1251 } 1252 1253 for (i = start_block; i < end_block; i += bufblks) { 1254 int bcount, endcount; 1255 1256 bcount = min(bufblks, end_block - start_block); 1257 endcount = bcount - j; 1258 1259 /* We may need to do a read at the end to fill in part of 1260 * the buffer in the final sector not covered by the write. 1261 * If this is the same sector as the above read, skip it. 1262 */ 1263 ealign = round_down(end_block, sectbb); 1264 if (j == 0 && (start_block + endcount > ealign)) { 1265 offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); 1266 error = xlog_bread_offset(log, ealign, sectbb, 1267 bp, offset); 1268 if (error) 1269 break; 1270 1271 } 1272 1273 offset = xlog_align(log, start_block, endcount, bp); 1274 for (; j < endcount; j++) { 1275 xlog_add_record(log, offset, cycle, i+j, 1276 tail_cycle, tail_block); 1277 offset += BBSIZE; 1278 } 1279 error = xlog_bwrite(log, start_block, endcount, bp); 1280 if (error) 1281 break; 1282 start_block += endcount; 1283 j = 0; 1284 } 1285 1286 out_put_bp: 1287 xlog_put_bp(bp); 1288 return error; 1289 } 1290 1291 /* 1292 * This routine is called to blow away any incomplete log writes out 1293 * in front of the log head. We do this so that we won't become confused 1294 * if we come up, write only a little bit more, and then crash again. 1295 * If we leave the partial log records out there, this situation could 1296 * cause us to think those partial writes are valid blocks since they 1297 * have the current cycle number. We get rid of them by overwriting them 1298 * with empty log records with the old cycle number rather than the 1299 * current one. 1300 * 1301 * The tail lsn is passed in rather than taken from 1302 * the log so that we will not write over the unmount record after a 1303 * clean unmount in a 512 block log. Doing so would leave the log without 1304 * any valid log records in it until a new one was written. If we crashed 1305 * during that time we would not be able to recover. 1306 */ 1307 STATIC int 1308 xlog_clear_stale_blocks( 1309 xlog_t *log, 1310 xfs_lsn_t tail_lsn) 1311 { 1312 int tail_cycle, head_cycle; 1313 int tail_block, head_block; 1314 int tail_distance, max_distance; 1315 int distance; 1316 int error; 1317 1318 tail_cycle = CYCLE_LSN(tail_lsn); 1319 tail_block = BLOCK_LSN(tail_lsn); 1320 head_cycle = log->l_curr_cycle; 1321 head_block = log->l_curr_block; 1322 1323 /* 1324 * Figure out the distance between the new head of the log 1325 * and the tail. We want to write over any blocks beyond the 1326 * head that we may have written just before the crash, but 1327 * we don't want to overwrite the tail of the log. 1328 */ 1329 if (head_cycle == tail_cycle) { 1330 /* 1331 * The tail is behind the head in the physical log, 1332 * so the distance from the head to the tail is the 1333 * distance from the head to the end of the log plus 1334 * the distance from the beginning of the log to the 1335 * tail. 1336 */ 1337 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { 1338 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", 1339 XFS_ERRLEVEL_LOW, log->l_mp); 1340 return XFS_ERROR(EFSCORRUPTED); 1341 } 1342 tail_distance = tail_block + (log->l_logBBsize - head_block); 1343 } else { 1344 /* 1345 * The head is behind the tail in the physical log, 1346 * so the distance from the head to the tail is just 1347 * the tail block minus the head block. 1348 */ 1349 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ 1350 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", 1351 XFS_ERRLEVEL_LOW, log->l_mp); 1352 return XFS_ERROR(EFSCORRUPTED); 1353 } 1354 tail_distance = tail_block - head_block; 1355 } 1356 1357 /* 1358 * If the head is right up against the tail, we can't clear 1359 * anything. 1360 */ 1361 if (tail_distance <= 0) { 1362 ASSERT(tail_distance == 0); 1363 return 0; 1364 } 1365 1366 max_distance = XLOG_TOTAL_REC_SHIFT(log); 1367 /* 1368 * Take the smaller of the maximum amount of outstanding I/O 1369 * we could have and the distance to the tail to clear out. 1370 * We take the smaller so that we don't overwrite the tail and 1371 * we don't waste all day writing from the head to the tail 1372 * for no reason. 1373 */ 1374 max_distance = MIN(max_distance, tail_distance); 1375 1376 if ((head_block + max_distance) <= log->l_logBBsize) { 1377 /* 1378 * We can stomp all the blocks we need to without 1379 * wrapping around the end of the log. Just do it 1380 * in a single write. Use the cycle number of the 1381 * current cycle minus one so that the log will look like: 1382 * n ... | n - 1 ... 1383 */ 1384 error = xlog_write_log_records(log, (head_cycle - 1), 1385 head_block, max_distance, tail_cycle, 1386 tail_block); 1387 if (error) 1388 return error; 1389 } else { 1390 /* 1391 * We need to wrap around the end of the physical log in 1392 * order to clear all the blocks. Do it in two separate 1393 * I/Os. The first write should be from the head to the 1394 * end of the physical log, and it should use the current 1395 * cycle number minus one just like above. 1396 */ 1397 distance = log->l_logBBsize - head_block; 1398 error = xlog_write_log_records(log, (head_cycle - 1), 1399 head_block, distance, tail_cycle, 1400 tail_block); 1401 1402 if (error) 1403 return error; 1404 1405 /* 1406 * Now write the blocks at the start of the physical log. 1407 * This writes the remainder of the blocks we want to clear. 1408 * It uses the current cycle number since we're now on the 1409 * same cycle as the head so that we get: 1410 * n ... n ... | n - 1 ... 1411 * ^^^^^ blocks we're writing 1412 */ 1413 distance = max_distance - (log->l_logBBsize - head_block); 1414 error = xlog_write_log_records(log, head_cycle, 0, distance, 1415 tail_cycle, tail_block); 1416 if (error) 1417 return error; 1418 } 1419 1420 return 0; 1421 } 1422 1423 /****************************************************************************** 1424 * 1425 * Log recover routines 1426 * 1427 ****************************************************************************** 1428 */ 1429 1430 STATIC xlog_recover_t * 1431 xlog_recover_find_tid( 1432 struct hlist_head *head, 1433 xlog_tid_t tid) 1434 { 1435 xlog_recover_t *trans; 1436 struct hlist_node *n; 1437 1438 hlist_for_each_entry(trans, n, head, r_list) { 1439 if (trans->r_log_tid == tid) 1440 return trans; 1441 } 1442 return NULL; 1443 } 1444 1445 STATIC void 1446 xlog_recover_new_tid( 1447 struct hlist_head *head, 1448 xlog_tid_t tid, 1449 xfs_lsn_t lsn) 1450 { 1451 xlog_recover_t *trans; 1452 1453 trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); 1454 trans->r_log_tid = tid; 1455 trans->r_lsn = lsn; 1456 INIT_LIST_HEAD(&trans->r_itemq); 1457 1458 INIT_HLIST_NODE(&trans->r_list); 1459 hlist_add_head(&trans->r_list, head); 1460 } 1461 1462 STATIC void 1463 xlog_recover_add_item( 1464 struct list_head *head) 1465 { 1466 xlog_recover_item_t *item; 1467 1468 item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); 1469 INIT_LIST_HEAD(&item->ri_list); 1470 list_add_tail(&item->ri_list, head); 1471 } 1472 1473 STATIC int 1474 xlog_recover_add_to_cont_trans( 1475 struct log *log, 1476 xlog_recover_t *trans, 1477 xfs_caddr_t dp, 1478 int len) 1479 { 1480 xlog_recover_item_t *item; 1481 xfs_caddr_t ptr, old_ptr; 1482 int old_len; 1483 1484 if (list_empty(&trans->r_itemq)) { 1485 /* finish copying rest of trans header */ 1486 xlog_recover_add_item(&trans->r_itemq); 1487 ptr = (xfs_caddr_t) &trans->r_theader + 1488 sizeof(xfs_trans_header_t) - len; 1489 memcpy(ptr, dp, len); /* d, s, l */ 1490 return 0; 1491 } 1492 /* take the tail entry */ 1493 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 1494 1495 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 1496 old_len = item->ri_buf[item->ri_cnt-1].i_len; 1497 1498 ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); 1499 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1500 item->ri_buf[item->ri_cnt-1].i_len += len; 1501 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1502 trace_xfs_log_recover_item_add_cont(log, trans, item, 0); 1503 return 0; 1504 } 1505 1506 /* 1507 * The next region to add is the start of a new region. It could be 1508 * a whole region or it could be the first part of a new region. Because 1509 * of this, the assumption here is that the type and size fields of all 1510 * format structures fit into the first 32 bits of the structure. 1511 * 1512 * This works because all regions must be 32 bit aligned. Therefore, we 1513 * either have both fields or we have neither field. In the case we have 1514 * neither field, the data part of the region is zero length. We only have 1515 * a log_op_header and can throw away the header since a new one will appear 1516 * later. If we have at least 4 bytes, then we can determine how many regions 1517 * will appear in the current log item. 1518 */ 1519 STATIC int 1520 xlog_recover_add_to_trans( 1521 struct log *log, 1522 xlog_recover_t *trans, 1523 xfs_caddr_t dp, 1524 int len) 1525 { 1526 xfs_inode_log_format_t *in_f; /* any will do */ 1527 xlog_recover_item_t *item; 1528 xfs_caddr_t ptr; 1529 1530 if (!len) 1531 return 0; 1532 if (list_empty(&trans->r_itemq)) { 1533 /* we need to catch log corruptions here */ 1534 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1535 xfs_warn(log->l_mp, "%s: bad header magic number", 1536 __func__); 1537 ASSERT(0); 1538 return XFS_ERROR(EIO); 1539 } 1540 if (len == sizeof(xfs_trans_header_t)) 1541 xlog_recover_add_item(&trans->r_itemq); 1542 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1543 return 0; 1544 } 1545 1546 ptr = kmem_alloc(len, KM_SLEEP); 1547 memcpy(ptr, dp, len); 1548 in_f = (xfs_inode_log_format_t *)ptr; 1549 1550 /* take the tail entry */ 1551 item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); 1552 if (item->ri_total != 0 && 1553 item->ri_total == item->ri_cnt) { 1554 /* tail item is in use, get a new one */ 1555 xlog_recover_add_item(&trans->r_itemq); 1556 item = list_entry(trans->r_itemq.prev, 1557 xlog_recover_item_t, ri_list); 1558 } 1559 1560 if (item->ri_total == 0) { /* first region to be added */ 1561 if (in_f->ilf_size == 0 || 1562 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 1563 xfs_warn(log->l_mp, 1564 "bad number of regions (%d) in inode log format", 1565 in_f->ilf_size); 1566 ASSERT(0); 1567 return XFS_ERROR(EIO); 1568 } 1569 1570 item->ri_total = in_f->ilf_size; 1571 item->ri_buf = 1572 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), 1573 KM_SLEEP); 1574 } 1575 ASSERT(item->ri_total > item->ri_cnt); 1576 /* Description region is ri_buf[0] */ 1577 item->ri_buf[item->ri_cnt].i_addr = ptr; 1578 item->ri_buf[item->ri_cnt].i_len = len; 1579 item->ri_cnt++; 1580 trace_xfs_log_recover_item_add(log, trans, item, 0); 1581 return 0; 1582 } 1583 1584 /* 1585 * Sort the log items in the transaction. Cancelled buffers need 1586 * to be put first so they are processed before any items that might 1587 * modify the buffers. If they are cancelled, then the modifications 1588 * don't need to be replayed. 1589 */ 1590 STATIC int 1591 xlog_recover_reorder_trans( 1592 struct log *log, 1593 xlog_recover_t *trans, 1594 int pass) 1595 { 1596 xlog_recover_item_t *item, *n; 1597 LIST_HEAD(sort_list); 1598 1599 list_splice_init(&trans->r_itemq, &sort_list); 1600 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1601 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1602 1603 switch (ITEM_TYPE(item)) { 1604 case XFS_LI_BUF: 1605 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1606 trace_xfs_log_recover_item_reorder_head(log, 1607 trans, item, pass); 1608 list_move(&item->ri_list, &trans->r_itemq); 1609 break; 1610 } 1611 case XFS_LI_INODE: 1612 case XFS_LI_DQUOT: 1613 case XFS_LI_QUOTAOFF: 1614 case XFS_LI_EFD: 1615 case XFS_LI_EFI: 1616 trace_xfs_log_recover_item_reorder_tail(log, 1617 trans, item, pass); 1618 list_move_tail(&item->ri_list, &trans->r_itemq); 1619 break; 1620 default: 1621 xfs_warn(log->l_mp, 1622 "%s: unrecognized type of log operation", 1623 __func__); 1624 ASSERT(0); 1625 return XFS_ERROR(EIO); 1626 } 1627 } 1628 ASSERT(list_empty(&sort_list)); 1629 return 0; 1630 } 1631 1632 /* 1633 * Build up the table of buf cancel records so that we don't replay 1634 * cancelled data in the second pass. For buffer records that are 1635 * not cancel records, there is nothing to do here so we just return. 1636 * 1637 * If we get a cancel record which is already in the table, this indicates 1638 * that the buffer was cancelled multiple times. In order to ensure 1639 * that during pass 2 we keep the record in the table until we reach its 1640 * last occurrence in the log, we keep a reference count in the cancel 1641 * record in the table to tell us how many times we expect to see this 1642 * record during the second pass. 1643 */ 1644 STATIC int 1645 xlog_recover_buffer_pass1( 1646 struct log *log, 1647 xlog_recover_item_t *item) 1648 { 1649 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1650 struct list_head *bucket; 1651 struct xfs_buf_cancel *bcp; 1652 1653 /* 1654 * If this isn't a cancel buffer item, then just return. 1655 */ 1656 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1657 trace_xfs_log_recover_buf_not_cancel(log, buf_f); 1658 return 0; 1659 } 1660 1661 /* 1662 * Insert an xfs_buf_cancel record into the hash table of them. 1663 * If there is already an identical record, bump its reference count. 1664 */ 1665 bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); 1666 list_for_each_entry(bcp, bucket, bc_list) { 1667 if (bcp->bc_blkno == buf_f->blf_blkno && 1668 bcp->bc_len == buf_f->blf_len) { 1669 bcp->bc_refcount++; 1670 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); 1671 return 0; 1672 } 1673 } 1674 1675 bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); 1676 bcp->bc_blkno = buf_f->blf_blkno; 1677 bcp->bc_len = buf_f->blf_len; 1678 bcp->bc_refcount = 1; 1679 list_add_tail(&bcp->bc_list, bucket); 1680 1681 trace_xfs_log_recover_buf_cancel_add(log, buf_f); 1682 return 0; 1683 } 1684 1685 /* 1686 * Check to see whether the buffer being recovered has a corresponding 1687 * entry in the buffer cancel record table. If it does then return 1 1688 * so that it will be cancelled, otherwise return 0. If the buffer is 1689 * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement 1690 * the refcount on the entry in the table and remove it from the table 1691 * if this is the last reference. 1692 * 1693 * We remove the cancel record from the table when we encounter its 1694 * last occurrence in the log so that if the same buffer is re-used 1695 * again after its last cancellation we actually replay the changes 1696 * made at that point. 1697 */ 1698 STATIC int 1699 xlog_check_buffer_cancelled( 1700 struct log *log, 1701 xfs_daddr_t blkno, 1702 uint len, 1703 ushort flags) 1704 { 1705 struct list_head *bucket; 1706 struct xfs_buf_cancel *bcp; 1707 1708 if (log->l_buf_cancel_table == NULL) { 1709 /* 1710 * There is nothing in the table built in pass one, 1711 * so this buffer must not be cancelled. 1712 */ 1713 ASSERT(!(flags & XFS_BLF_CANCEL)); 1714 return 0; 1715 } 1716 1717 /* 1718 * Search for an entry in the cancel table that matches our buffer. 1719 */ 1720 bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); 1721 list_for_each_entry(bcp, bucket, bc_list) { 1722 if (bcp->bc_blkno == blkno && bcp->bc_len == len) 1723 goto found; 1724 } 1725 1726 /* 1727 * We didn't find a corresponding entry in the table, so return 0 so 1728 * that the buffer is NOT cancelled. 1729 */ 1730 ASSERT(!(flags & XFS_BLF_CANCEL)); 1731 return 0; 1732 1733 found: 1734 /* 1735 * We've go a match, so return 1 so that the recovery of this buffer 1736 * is cancelled. If this buffer is actually a buffer cancel log 1737 * item, then decrement the refcount on the one in the table and 1738 * remove it if this is the last reference. 1739 */ 1740 if (flags & XFS_BLF_CANCEL) { 1741 if (--bcp->bc_refcount == 0) { 1742 list_del(&bcp->bc_list); 1743 kmem_free(bcp); 1744 } 1745 } 1746 return 1; 1747 } 1748 1749 /* 1750 * Perform recovery for a buffer full of inodes. In these buffers, the only 1751 * data which should be recovered is that which corresponds to the 1752 * di_next_unlinked pointers in the on disk inode structures. The rest of the 1753 * data for the inodes is always logged through the inodes themselves rather 1754 * than the inode buffer and is recovered in xlog_recover_inode_pass2(). 1755 * 1756 * The only time when buffers full of inodes are fully recovered is when the 1757 * buffer is full of newly allocated inodes. In this case the buffer will 1758 * not be marked as an inode buffer and so will be sent to 1759 * xlog_recover_do_reg_buffer() below during recovery. 1760 */ 1761 STATIC int 1762 xlog_recover_do_inode_buffer( 1763 struct xfs_mount *mp, 1764 xlog_recover_item_t *item, 1765 struct xfs_buf *bp, 1766 xfs_buf_log_format_t *buf_f) 1767 { 1768 int i; 1769 int item_index = 0; 1770 int bit = 0; 1771 int nbits = 0; 1772 int reg_buf_offset = 0; 1773 int reg_buf_bytes = 0; 1774 int next_unlinked_offset; 1775 int inodes_per_buf; 1776 xfs_agino_t *logged_nextp; 1777 xfs_agino_t *buffer_nextp; 1778 1779 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1780 1781 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1782 for (i = 0; i < inodes_per_buf; i++) { 1783 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 1784 offsetof(xfs_dinode_t, di_next_unlinked); 1785 1786 while (next_unlinked_offset >= 1787 (reg_buf_offset + reg_buf_bytes)) { 1788 /* 1789 * The next di_next_unlinked field is beyond 1790 * the current logged region. Find the next 1791 * logged region that contains or is beyond 1792 * the current di_next_unlinked field. 1793 */ 1794 bit += nbits; 1795 bit = xfs_next_bit(buf_f->blf_data_map, 1796 buf_f->blf_map_size, bit); 1797 1798 /* 1799 * If there are no more logged regions in the 1800 * buffer, then we're done. 1801 */ 1802 if (bit == -1) 1803 return 0; 1804 1805 nbits = xfs_contig_bits(buf_f->blf_data_map, 1806 buf_f->blf_map_size, bit); 1807 ASSERT(nbits > 0); 1808 reg_buf_offset = bit << XFS_BLF_SHIFT; 1809 reg_buf_bytes = nbits << XFS_BLF_SHIFT; 1810 item_index++; 1811 } 1812 1813 /* 1814 * If the current logged region starts after the current 1815 * di_next_unlinked field, then move on to the next 1816 * di_next_unlinked field. 1817 */ 1818 if (next_unlinked_offset < reg_buf_offset) 1819 continue; 1820 1821 ASSERT(item->ri_buf[item_index].i_addr != NULL); 1822 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 1823 ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp)); 1824 1825 /* 1826 * The current logged region contains a copy of the 1827 * current di_next_unlinked field. Extract its value 1828 * and copy it to the buffer copy. 1829 */ 1830 logged_nextp = item->ri_buf[item_index].i_addr + 1831 next_unlinked_offset - reg_buf_offset; 1832 if (unlikely(*logged_nextp == 0)) { 1833 xfs_alert(mp, 1834 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " 1835 "Trying to replay bad (0) inode di_next_unlinked field.", 1836 item, bp); 1837 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1838 XFS_ERRLEVEL_LOW, mp); 1839 return XFS_ERROR(EFSCORRUPTED); 1840 } 1841 1842 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1843 next_unlinked_offset); 1844 *buffer_nextp = *logged_nextp; 1845 } 1846 1847 return 0; 1848 } 1849 1850 /* 1851 * Perform a 'normal' buffer recovery. Each logged region of the 1852 * buffer should be copied over the corresponding region in the 1853 * given buffer. The bitmap in the buf log format structure indicates 1854 * where to place the logged data. 1855 */ 1856 STATIC void 1857 xlog_recover_do_reg_buffer( 1858 struct xfs_mount *mp, 1859 xlog_recover_item_t *item, 1860 struct xfs_buf *bp, 1861 xfs_buf_log_format_t *buf_f) 1862 { 1863 int i; 1864 int bit; 1865 int nbits; 1866 int error; 1867 1868 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); 1869 1870 bit = 0; 1871 i = 1; /* 0 is the buf format structure */ 1872 while (1) { 1873 bit = xfs_next_bit(buf_f->blf_data_map, 1874 buf_f->blf_map_size, bit); 1875 if (bit == -1) 1876 break; 1877 nbits = xfs_contig_bits(buf_f->blf_data_map, 1878 buf_f->blf_map_size, bit); 1879 ASSERT(nbits > 0); 1880 ASSERT(item->ri_buf[i].i_addr != NULL); 1881 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 1882 ASSERT(XFS_BUF_COUNT(bp) >= 1883 ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT)); 1884 1885 /* 1886 * Do a sanity check if this is a dquot buffer. Just checking 1887 * the first dquot in the buffer should do. XXXThis is 1888 * probably a good thing to do for other buf types also. 1889 */ 1890 error = 0; 1891 if (buf_f->blf_flags & 1892 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 1893 if (item->ri_buf[i].i_addr == NULL) { 1894 xfs_alert(mp, 1895 "XFS: NULL dquot in %s.", __func__); 1896 goto next; 1897 } 1898 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { 1899 xfs_alert(mp, 1900 "XFS: dquot too small (%d) in %s.", 1901 item->ri_buf[i].i_len, __func__); 1902 goto next; 1903 } 1904 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, 1905 -1, 0, XFS_QMOPT_DOWARN, 1906 "dquot_buf_recover"); 1907 if (error) 1908 goto next; 1909 } 1910 1911 memcpy(xfs_buf_offset(bp, 1912 (uint)bit << XFS_BLF_SHIFT), /* dest */ 1913 item->ri_buf[i].i_addr, /* source */ 1914 nbits<<XFS_BLF_SHIFT); /* length */ 1915 next: 1916 i++; 1917 bit += nbits; 1918 } 1919 1920 /* Shouldn't be any more regions */ 1921 ASSERT(i == item->ri_total); 1922 } 1923 1924 /* 1925 * Do some primitive error checking on ondisk dquot data structures. 1926 */ 1927 int 1928 xfs_qm_dqcheck( 1929 struct xfs_mount *mp, 1930 xfs_disk_dquot_t *ddq, 1931 xfs_dqid_t id, 1932 uint type, /* used only when IO_dorepair is true */ 1933 uint flags, 1934 char *str) 1935 { 1936 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 1937 int errs = 0; 1938 1939 /* 1940 * We can encounter an uninitialized dquot buffer for 2 reasons: 1941 * 1. If we crash while deleting the quotainode(s), and those blks got 1942 * used for user data. This is because we take the path of regular 1943 * file deletion; however, the size field of quotainodes is never 1944 * updated, so all the tricks that we play in itruncate_finish 1945 * don't quite matter. 1946 * 1947 * 2. We don't play the quota buffers when there's a quotaoff logitem. 1948 * But the allocation will be replayed so we'll end up with an 1949 * uninitialized quota block. 1950 * 1951 * This is all fine; things are still consistent, and we haven't lost 1952 * any quota information. Just don't complain about bad dquot blks. 1953 */ 1954 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { 1955 if (flags & XFS_QMOPT_DOWARN) 1956 xfs_alert(mp, 1957 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1958 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 1959 errs++; 1960 } 1961 if (ddq->d_version != XFS_DQUOT_VERSION) { 1962 if (flags & XFS_QMOPT_DOWARN) 1963 xfs_alert(mp, 1964 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 1965 str, id, ddq->d_version, XFS_DQUOT_VERSION); 1966 errs++; 1967 } 1968 1969 if (ddq->d_flags != XFS_DQ_USER && 1970 ddq->d_flags != XFS_DQ_PROJ && 1971 ddq->d_flags != XFS_DQ_GROUP) { 1972 if (flags & XFS_QMOPT_DOWARN) 1973 xfs_alert(mp, 1974 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 1975 str, id, ddq->d_flags); 1976 errs++; 1977 } 1978 1979 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 1980 if (flags & XFS_QMOPT_DOWARN) 1981 xfs_alert(mp, 1982 "%s : ondisk-dquot 0x%p, ID mismatch: " 1983 "0x%x expected, found id 0x%x", 1984 str, ddq, id, be32_to_cpu(ddq->d_id)); 1985 errs++; 1986 } 1987 1988 if (!errs && ddq->d_id) { 1989 if (ddq->d_blk_softlimit && 1990 be64_to_cpu(ddq->d_bcount) >= 1991 be64_to_cpu(ddq->d_blk_softlimit)) { 1992 if (!ddq->d_btimer) { 1993 if (flags & XFS_QMOPT_DOWARN) 1994 xfs_alert(mp, 1995 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", 1996 str, (int)be32_to_cpu(ddq->d_id), ddq); 1997 errs++; 1998 } 1999 } 2000 if (ddq->d_ino_softlimit && 2001 be64_to_cpu(ddq->d_icount) >= 2002 be64_to_cpu(ddq->d_ino_softlimit)) { 2003 if (!ddq->d_itimer) { 2004 if (flags & XFS_QMOPT_DOWARN) 2005 xfs_alert(mp, 2006 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", 2007 str, (int)be32_to_cpu(ddq->d_id), ddq); 2008 errs++; 2009 } 2010 } 2011 if (ddq->d_rtb_softlimit && 2012 be64_to_cpu(ddq->d_rtbcount) >= 2013 be64_to_cpu(ddq->d_rtb_softlimit)) { 2014 if (!ddq->d_rtbtimer) { 2015 if (flags & XFS_QMOPT_DOWARN) 2016 xfs_alert(mp, 2017 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", 2018 str, (int)be32_to_cpu(ddq->d_id), ddq); 2019 errs++; 2020 } 2021 } 2022 } 2023 2024 if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) 2025 return errs; 2026 2027 if (flags & XFS_QMOPT_DOWARN) 2028 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); 2029 2030 /* 2031 * Typically, a repair is only requested by quotacheck. 2032 */ 2033 ASSERT(id != -1); 2034 ASSERT(flags & XFS_QMOPT_DQREPAIR); 2035 memset(d, 0, sizeof(xfs_dqblk_t)); 2036 2037 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 2038 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 2039 d->dd_diskdq.d_flags = type; 2040 d->dd_diskdq.d_id = cpu_to_be32(id); 2041 2042 return errs; 2043 } 2044 2045 /* 2046 * Perform a dquot buffer recovery. 2047 * Simple algorithm: if we have found a QUOTAOFF logitem of the same type 2048 * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2049 * Else, treat it as a regular buffer and do recovery. 2050 */ 2051 STATIC void 2052 xlog_recover_do_dquot_buffer( 2053 xfs_mount_t *mp, 2054 xlog_t *log, 2055 xlog_recover_item_t *item, 2056 xfs_buf_t *bp, 2057 xfs_buf_log_format_t *buf_f) 2058 { 2059 uint type; 2060 2061 trace_xfs_log_recover_buf_dquot_buf(log, buf_f); 2062 2063 /* 2064 * Filesystems are required to send in quota flags at mount time. 2065 */ 2066 if (mp->m_qflags == 0) { 2067 return; 2068 } 2069 2070 type = 0; 2071 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) 2072 type |= XFS_DQ_USER; 2073 if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) 2074 type |= XFS_DQ_PROJ; 2075 if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) 2076 type |= XFS_DQ_GROUP; 2077 /* 2078 * This type of quotas was turned off, so ignore this buffer 2079 */ 2080 if (log->l_quotaoffs_flag & type) 2081 return; 2082 2083 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2084 } 2085 2086 /* 2087 * This routine replays a modification made to a buffer at runtime. 2088 * There are actually two types of buffer, regular and inode, which 2089 * are handled differently. Inode buffers are handled differently 2090 * in that we only recover a specific set of data from them, namely 2091 * the inode di_next_unlinked fields. This is because all other inode 2092 * data is actually logged via inode records and any data we replay 2093 * here which overlaps that may be stale. 2094 * 2095 * When meta-data buffers are freed at run time we log a buffer item 2096 * with the XFS_BLF_CANCEL bit set to indicate that previous copies 2097 * of the buffer in the log should not be replayed at recovery time. 2098 * This is so that if the blocks covered by the buffer are reused for 2099 * file data before we crash we don't end up replaying old, freed 2100 * meta-data into a user's file. 2101 * 2102 * To handle the cancellation of buffer log items, we make two passes 2103 * over the log during recovery. During the first we build a table of 2104 * those buffers which have been cancelled, and during the second we 2105 * only replay those buffers which do not have corresponding cancel 2106 * records in the table. See xlog_recover_do_buffer_pass[1,2] above 2107 * for more details on the implementation of the table of cancel records. 2108 */ 2109 STATIC int 2110 xlog_recover_buffer_pass2( 2111 xlog_t *log, 2112 xlog_recover_item_t *item) 2113 { 2114 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2115 xfs_mount_t *mp = log->l_mp; 2116 xfs_buf_t *bp; 2117 int error; 2118 uint buf_flags; 2119 2120 /* 2121 * In this pass we only want to recover all the buffers which have 2122 * not been cancelled and are not cancellation buffers themselves. 2123 */ 2124 if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, 2125 buf_f->blf_len, buf_f->blf_flags)) { 2126 trace_xfs_log_recover_buf_cancel(log, buf_f); 2127 return 0; 2128 } 2129 2130 trace_xfs_log_recover_buf_recover(log, buf_f); 2131 2132 buf_flags = XBF_LOCK; 2133 if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF)) 2134 buf_flags |= XBF_MAPPED; 2135 2136 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, 2137 buf_flags); 2138 if (XFS_BUF_ISERROR(bp)) { 2139 xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, 2140 bp, buf_f->blf_blkno); 2141 error = XFS_BUF_GETERROR(bp); 2142 xfs_buf_relse(bp); 2143 return error; 2144 } 2145 2146 error = 0; 2147 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2148 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2149 } else if (buf_f->blf_flags & 2150 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2151 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2152 } else { 2153 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2154 } 2155 if (error) 2156 return XFS_ERROR(error); 2157 2158 /* 2159 * Perform delayed write on the buffer. Asynchronous writes will be 2160 * slower when taking into account all the buffers to be flushed. 2161 * 2162 * Also make sure that only inode buffers with good sizes stay in 2163 * the buffer cache. The kernel moves inodes in buffers of 1 block 2164 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode 2165 * buffers in the log can be a different size if the log was generated 2166 * by an older kernel using unclustered inode buffers or a newer kernel 2167 * running with a different inode cluster size. Regardless, if the 2168 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) 2169 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep 2170 * the buffer out of the buffer cache so that the buffer won't 2171 * overlap with future reads of those inodes. 2172 */ 2173 if (XFS_DINODE_MAGIC == 2174 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2175 (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize, 2176 (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { 2177 XFS_BUF_STALE(bp); 2178 error = xfs_bwrite(mp, bp); 2179 } else { 2180 ASSERT(bp->b_target->bt_mount == mp); 2181 bp->b_iodone = xlog_recover_iodone; 2182 xfs_bdwrite(mp, bp); 2183 } 2184 2185 return (error); 2186 } 2187 2188 STATIC int 2189 xlog_recover_inode_pass2( 2190 xlog_t *log, 2191 xlog_recover_item_t *item) 2192 { 2193 xfs_inode_log_format_t *in_f; 2194 xfs_mount_t *mp = log->l_mp; 2195 xfs_buf_t *bp; 2196 xfs_dinode_t *dip; 2197 int len; 2198 xfs_caddr_t src; 2199 xfs_caddr_t dest; 2200 int error; 2201 int attr_index; 2202 uint fields; 2203 xfs_icdinode_t *dicp; 2204 int need_free = 0; 2205 2206 if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) { 2207 in_f = item->ri_buf[0].i_addr; 2208 } else { 2209 in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP); 2210 need_free = 1; 2211 error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); 2212 if (error) 2213 goto error; 2214 } 2215 2216 /* 2217 * Inode buffers can be freed, look out for it, 2218 * and do not replay the inode. 2219 */ 2220 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2221 in_f->ilf_len, 0)) { 2222 error = 0; 2223 trace_xfs_log_recover_inode_cancel(log, in_f); 2224 goto error; 2225 } 2226 trace_xfs_log_recover_inode_recover(log, in_f); 2227 2228 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2229 XBF_LOCK); 2230 if (XFS_BUF_ISERROR(bp)) { 2231 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2232 bp, in_f->ilf_blkno); 2233 error = XFS_BUF_GETERROR(bp); 2234 xfs_buf_relse(bp); 2235 goto error; 2236 } 2237 error = 0; 2238 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2239 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); 2240 2241 /* 2242 * Make sure the place we're flushing out to really looks 2243 * like an inode! 2244 */ 2245 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { 2246 xfs_buf_relse(bp); 2247 xfs_alert(mp, 2248 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 2249 __func__, dip, bp, in_f->ilf_ino); 2250 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2251 XFS_ERRLEVEL_LOW, mp); 2252 error = EFSCORRUPTED; 2253 goto error; 2254 } 2255 dicp = item->ri_buf[1].i_addr; 2256 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2257 xfs_buf_relse(bp); 2258 xfs_alert(mp, 2259 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", 2260 __func__, item, in_f->ilf_ino); 2261 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2262 XFS_ERRLEVEL_LOW, mp); 2263 error = EFSCORRUPTED; 2264 goto error; 2265 } 2266 2267 /* Skip replay when the on disk inode is newer than the log one */ 2268 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2269 /* 2270 * Deal with the wrap case, DI_MAX_FLUSH is less 2271 * than smaller numbers 2272 */ 2273 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && 2274 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { 2275 /* do nothing */ 2276 } else { 2277 xfs_buf_relse(bp); 2278 trace_xfs_log_recover_inode_skip(log, in_f); 2279 error = 0; 2280 goto error; 2281 } 2282 } 2283 /* Take the opportunity to reset the flush iteration count */ 2284 dicp->di_flushiter = 0; 2285 2286 if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) { 2287 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2288 (dicp->di_format != XFS_DINODE_FMT_BTREE)) { 2289 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2290 XFS_ERRLEVEL_LOW, mp, dicp); 2291 xfs_buf_relse(bp); 2292 xfs_alert(mp, 2293 "%s: Bad regular inode log record, rec ptr 0x%p, " 2294 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2295 __func__, item, dip, bp, in_f->ilf_ino); 2296 error = EFSCORRUPTED; 2297 goto error; 2298 } 2299 } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) { 2300 if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && 2301 (dicp->di_format != XFS_DINODE_FMT_BTREE) && 2302 (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { 2303 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 2304 XFS_ERRLEVEL_LOW, mp, dicp); 2305 xfs_buf_relse(bp); 2306 xfs_alert(mp, 2307 "%s: Bad dir inode log record, rec ptr 0x%p, " 2308 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2309 __func__, item, dip, bp, in_f->ilf_ino); 2310 error = EFSCORRUPTED; 2311 goto error; 2312 } 2313 } 2314 if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ 2315 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 2316 XFS_ERRLEVEL_LOW, mp, dicp); 2317 xfs_buf_relse(bp); 2318 xfs_alert(mp, 2319 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 2320 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2321 __func__, item, dip, bp, in_f->ilf_ino, 2322 dicp->di_nextents + dicp->di_anextents, 2323 dicp->di_nblocks); 2324 error = EFSCORRUPTED; 2325 goto error; 2326 } 2327 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2328 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 2329 XFS_ERRLEVEL_LOW, mp, dicp); 2330 xfs_buf_relse(bp); 2331 xfs_alert(mp, 2332 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 2333 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 2334 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2335 error = EFSCORRUPTED; 2336 goto error; 2337 } 2338 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { 2339 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2340 XFS_ERRLEVEL_LOW, mp, dicp); 2341 xfs_buf_relse(bp); 2342 xfs_alert(mp, 2343 "%s: Bad inode log record length %d, rec ptr 0x%p", 2344 __func__, item->ri_buf[1].i_len, item); 2345 error = EFSCORRUPTED; 2346 goto error; 2347 } 2348 2349 /* The core is in in-core format */ 2350 xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr); 2351 2352 /* the rest is in on-disk format */ 2353 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { 2354 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), 2355 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), 2356 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); 2357 } 2358 2359 fields = in_f->ilf_fields; 2360 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2361 case XFS_ILOG_DEV: 2362 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); 2363 break; 2364 case XFS_ILOG_UUID: 2365 memcpy(XFS_DFORK_DPTR(dip), 2366 &in_f->ilf_u.ilfu_uuid, 2367 sizeof(uuid_t)); 2368 break; 2369 } 2370 2371 if (in_f->ilf_size == 2) 2372 goto write_inode_buffer; 2373 len = item->ri_buf[2].i_len; 2374 src = item->ri_buf[2].i_addr; 2375 ASSERT(in_f->ilf_size <= 4); 2376 ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); 2377 ASSERT(!(fields & XFS_ILOG_DFORK) || 2378 (len == in_f->ilf_dsize)); 2379 2380 switch (fields & XFS_ILOG_DFORK) { 2381 case XFS_ILOG_DDATA: 2382 case XFS_ILOG_DEXT: 2383 memcpy(XFS_DFORK_DPTR(dip), src, len); 2384 break; 2385 2386 case XFS_ILOG_DBROOT: 2387 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, 2388 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), 2389 XFS_DFORK_DSIZE(dip, mp)); 2390 break; 2391 2392 default: 2393 /* 2394 * There are no data fork flags set. 2395 */ 2396 ASSERT((fields & XFS_ILOG_DFORK) == 0); 2397 break; 2398 } 2399 2400 /* 2401 * If we logged any attribute data, recover it. There may or 2402 * may not have been any other non-core data logged in this 2403 * transaction. 2404 */ 2405 if (in_f->ilf_fields & XFS_ILOG_AFORK) { 2406 if (in_f->ilf_fields & XFS_ILOG_DFORK) { 2407 attr_index = 3; 2408 } else { 2409 attr_index = 2; 2410 } 2411 len = item->ri_buf[attr_index].i_len; 2412 src = item->ri_buf[attr_index].i_addr; 2413 ASSERT(len == in_f->ilf_asize); 2414 2415 switch (in_f->ilf_fields & XFS_ILOG_AFORK) { 2416 case XFS_ILOG_ADATA: 2417 case XFS_ILOG_AEXT: 2418 dest = XFS_DFORK_APTR(dip); 2419 ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); 2420 memcpy(dest, src, len); 2421 break; 2422 2423 case XFS_ILOG_ABROOT: 2424 dest = XFS_DFORK_APTR(dip); 2425 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, 2426 len, (xfs_bmdr_block_t*)dest, 2427 XFS_DFORK_ASIZE(dip, mp)); 2428 break; 2429 2430 default: 2431 xfs_warn(log->l_mp, "%s: Invalid flag", __func__); 2432 ASSERT(0); 2433 xfs_buf_relse(bp); 2434 error = EIO; 2435 goto error; 2436 } 2437 } 2438 2439 write_inode_buffer: 2440 ASSERT(bp->b_target->bt_mount == mp); 2441 bp->b_iodone = xlog_recover_iodone; 2442 xfs_bdwrite(mp, bp); 2443 error: 2444 if (need_free) 2445 kmem_free(in_f); 2446 return XFS_ERROR(error); 2447 } 2448 2449 /* 2450 * Recover QUOTAOFF records. We simply make a note of it in the xlog_t 2451 * structure, so that we know not to do any dquot item or dquot buffer recovery, 2452 * of that type. 2453 */ 2454 STATIC int 2455 xlog_recover_quotaoff_pass1( 2456 xlog_t *log, 2457 xlog_recover_item_t *item) 2458 { 2459 xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; 2460 ASSERT(qoff_f); 2461 2462 /* 2463 * The logitem format's flag tells us if this was user quotaoff, 2464 * group/project quotaoff or both. 2465 */ 2466 if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) 2467 log->l_quotaoffs_flag |= XFS_DQ_USER; 2468 if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) 2469 log->l_quotaoffs_flag |= XFS_DQ_PROJ; 2470 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2471 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2472 2473 return (0); 2474 } 2475 2476 /* 2477 * Recover a dquot record 2478 */ 2479 STATIC int 2480 xlog_recover_dquot_pass2( 2481 xlog_t *log, 2482 xlog_recover_item_t *item) 2483 { 2484 xfs_mount_t *mp = log->l_mp; 2485 xfs_buf_t *bp; 2486 struct xfs_disk_dquot *ddq, *recddq; 2487 int error; 2488 xfs_dq_logformat_t *dq_f; 2489 uint type; 2490 2491 2492 /* 2493 * Filesystems are required to send in quota flags at mount time. 2494 */ 2495 if (mp->m_qflags == 0) 2496 return (0); 2497 2498 recddq = item->ri_buf[1].i_addr; 2499 if (recddq == NULL) { 2500 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); 2501 return XFS_ERROR(EIO); 2502 } 2503 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 2504 xfs_alert(log->l_mp, "dquot too small (%d) in %s.", 2505 item->ri_buf[1].i_len, __func__); 2506 return XFS_ERROR(EIO); 2507 } 2508 2509 /* 2510 * This type of quotas was turned off, so ignore this record. 2511 */ 2512 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 2513 ASSERT(type); 2514 if (log->l_quotaoffs_flag & type) 2515 return (0); 2516 2517 /* 2518 * At this point we know that quota was _not_ turned off. 2519 * Since the mount flags are not indicating to us otherwise, this 2520 * must mean that quota is on, and the dquot needs to be replayed. 2521 * Remember that we may not have fully recovered the superblock yet, 2522 * so we can't do the usual trick of looking at the SB quota bits. 2523 * 2524 * The other possibility, of course, is that the quota subsystem was 2525 * removed since the last mount - ENOSYS. 2526 */ 2527 dq_f = item->ri_buf[0].i_addr; 2528 ASSERT(dq_f); 2529 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2530 "xlog_recover_dquot_pass2 (log copy)"); 2531 if (error) 2532 return XFS_ERROR(EIO); 2533 ASSERT(dq_f->qlf_len == 1); 2534 2535 error = xfs_read_buf(mp, mp->m_ddev_targp, 2536 dq_f->qlf_blkno, 2537 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 2538 0, &bp); 2539 if (error) { 2540 xfs_ioerror_alert("xlog_recover_do..(read#3)", mp, 2541 bp, dq_f->qlf_blkno); 2542 return error; 2543 } 2544 ASSERT(bp); 2545 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 2546 2547 /* 2548 * At least the magic num portion should be on disk because this 2549 * was among a chunk of dquots created earlier, and we did some 2550 * minimal initialization then. 2551 */ 2552 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2553 "xlog_recover_dquot_pass2"); 2554 if (error) { 2555 xfs_buf_relse(bp); 2556 return XFS_ERROR(EIO); 2557 } 2558 2559 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2560 2561 ASSERT(dq_f->qlf_size == 2); 2562 ASSERT(bp->b_target->bt_mount == mp); 2563 bp->b_iodone = xlog_recover_iodone; 2564 xfs_bdwrite(mp, bp); 2565 2566 return (0); 2567 } 2568 2569 /* 2570 * This routine is called to create an in-core extent free intent 2571 * item from the efi format structure which was logged on disk. 2572 * It allocates an in-core efi, copies the extents from the format 2573 * structure into it, and adds the efi to the AIL with the given 2574 * LSN. 2575 */ 2576 STATIC int 2577 xlog_recover_efi_pass2( 2578 xlog_t *log, 2579 xlog_recover_item_t *item, 2580 xfs_lsn_t lsn) 2581 { 2582 int error; 2583 xfs_mount_t *mp = log->l_mp; 2584 xfs_efi_log_item_t *efip; 2585 xfs_efi_log_format_t *efi_formatp; 2586 2587 efi_formatp = item->ri_buf[0].i_addr; 2588 2589 efip = xfs_efi_init(mp, efi_formatp->efi_nextents); 2590 if ((error = xfs_efi_copy_format(&(item->ri_buf[0]), 2591 &(efip->efi_format)))) { 2592 xfs_efi_item_free(efip); 2593 return error; 2594 } 2595 atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); 2596 2597 spin_lock(&log->l_ailp->xa_lock); 2598 /* 2599 * xfs_trans_ail_update() drops the AIL lock. 2600 */ 2601 xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); 2602 return 0; 2603 } 2604 2605 2606 /* 2607 * This routine is called when an efd format structure is found in 2608 * a committed transaction in the log. It's purpose is to cancel 2609 * the corresponding efi if it was still in the log. To do this 2610 * it searches the AIL for the efi with an id equal to that in the 2611 * efd format structure. If we find it, we remove the efi from the 2612 * AIL and free it. 2613 */ 2614 STATIC int 2615 xlog_recover_efd_pass2( 2616 xlog_t *log, 2617 xlog_recover_item_t *item) 2618 { 2619 xfs_efd_log_format_t *efd_formatp; 2620 xfs_efi_log_item_t *efip = NULL; 2621 xfs_log_item_t *lip; 2622 __uint64_t efi_id; 2623 struct xfs_ail_cursor cur; 2624 struct xfs_ail *ailp = log->l_ailp; 2625 2626 efd_formatp = item->ri_buf[0].i_addr; 2627 ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + 2628 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || 2629 (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + 2630 ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); 2631 efi_id = efd_formatp->efd_efi_id; 2632 2633 /* 2634 * Search for the efi with the id in the efd format structure 2635 * in the AIL. 2636 */ 2637 spin_lock(&ailp->xa_lock); 2638 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 2639 while (lip != NULL) { 2640 if (lip->li_type == XFS_LI_EFI) { 2641 efip = (xfs_efi_log_item_t *)lip; 2642 if (efip->efi_format.efi_id == efi_id) { 2643 /* 2644 * xfs_trans_ail_delete() drops the 2645 * AIL lock. 2646 */ 2647 xfs_trans_ail_delete(ailp, lip); 2648 xfs_efi_item_free(efip); 2649 spin_lock(&ailp->xa_lock); 2650 break; 2651 } 2652 } 2653 lip = xfs_trans_ail_cursor_next(ailp, &cur); 2654 } 2655 xfs_trans_ail_cursor_done(ailp, &cur); 2656 spin_unlock(&ailp->xa_lock); 2657 2658 return 0; 2659 } 2660 2661 /* 2662 * Free up any resources allocated by the transaction 2663 * 2664 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 2665 */ 2666 STATIC void 2667 xlog_recover_free_trans( 2668 struct xlog_recover *trans) 2669 { 2670 xlog_recover_item_t *item, *n; 2671 int i; 2672 2673 list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { 2674 /* Free the regions in the item. */ 2675 list_del(&item->ri_list); 2676 for (i = 0; i < item->ri_cnt; i++) 2677 kmem_free(item->ri_buf[i].i_addr); 2678 /* Free the item itself */ 2679 kmem_free(item->ri_buf); 2680 kmem_free(item); 2681 } 2682 /* Free the transaction recover structure */ 2683 kmem_free(trans); 2684 } 2685 2686 STATIC int 2687 xlog_recover_commit_pass1( 2688 struct log *log, 2689 struct xlog_recover *trans, 2690 xlog_recover_item_t *item) 2691 { 2692 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); 2693 2694 switch (ITEM_TYPE(item)) { 2695 case XFS_LI_BUF: 2696 return xlog_recover_buffer_pass1(log, item); 2697 case XFS_LI_QUOTAOFF: 2698 return xlog_recover_quotaoff_pass1(log, item); 2699 case XFS_LI_INODE: 2700 case XFS_LI_EFI: 2701 case XFS_LI_EFD: 2702 case XFS_LI_DQUOT: 2703 /* nothing to do in pass 1 */ 2704 return 0; 2705 default: 2706 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 2707 __func__, ITEM_TYPE(item)); 2708 ASSERT(0); 2709 return XFS_ERROR(EIO); 2710 } 2711 } 2712 2713 STATIC int 2714 xlog_recover_commit_pass2( 2715 struct log *log, 2716 struct xlog_recover *trans, 2717 xlog_recover_item_t *item) 2718 { 2719 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); 2720 2721 switch (ITEM_TYPE(item)) { 2722 case XFS_LI_BUF: 2723 return xlog_recover_buffer_pass2(log, item); 2724 case XFS_LI_INODE: 2725 return xlog_recover_inode_pass2(log, item); 2726 case XFS_LI_EFI: 2727 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 2728 case XFS_LI_EFD: 2729 return xlog_recover_efd_pass2(log, item); 2730 case XFS_LI_DQUOT: 2731 return xlog_recover_dquot_pass2(log, item); 2732 case XFS_LI_QUOTAOFF: 2733 /* nothing to do in pass2 */ 2734 return 0; 2735 default: 2736 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 2737 __func__, ITEM_TYPE(item)); 2738 ASSERT(0); 2739 return XFS_ERROR(EIO); 2740 } 2741 } 2742 2743 /* 2744 * Perform the transaction. 2745 * 2746 * If the transaction modifies a buffer or inode, do it now. Otherwise, 2747 * EFIs and EFDs get queued up by adding entries into the AIL for them. 2748 */ 2749 STATIC int 2750 xlog_recover_commit_trans( 2751 struct log *log, 2752 struct xlog_recover *trans, 2753 int pass) 2754 { 2755 int error = 0; 2756 xlog_recover_item_t *item; 2757 2758 hlist_del(&trans->r_list); 2759 2760 error = xlog_recover_reorder_trans(log, trans, pass); 2761 if (error) 2762 return error; 2763 2764 list_for_each_entry(item, &trans->r_itemq, ri_list) { 2765 if (pass == XLOG_RECOVER_PASS1) 2766 error = xlog_recover_commit_pass1(log, trans, item); 2767 else 2768 error = xlog_recover_commit_pass2(log, trans, item); 2769 if (error) 2770 return error; 2771 } 2772 2773 xlog_recover_free_trans(trans); 2774 return 0; 2775 } 2776 2777 STATIC int 2778 xlog_recover_unmount_trans( 2779 struct log *log, 2780 xlog_recover_t *trans) 2781 { 2782 /* Do nothing now */ 2783 xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 2784 return 0; 2785 } 2786 2787 /* 2788 * There are two valid states of the r_state field. 0 indicates that the 2789 * transaction structure is in a normal state. We have either seen the 2790 * start of the transaction or the last operation we added was not a partial 2791 * operation. If the last operation we added to the transaction was a 2792 * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. 2793 * 2794 * NOTE: skip LRs with 0 data length. 2795 */ 2796 STATIC int 2797 xlog_recover_process_data( 2798 xlog_t *log, 2799 struct hlist_head rhash[], 2800 xlog_rec_header_t *rhead, 2801 xfs_caddr_t dp, 2802 int pass) 2803 { 2804 xfs_caddr_t lp; 2805 int num_logops; 2806 xlog_op_header_t *ohead; 2807 xlog_recover_t *trans; 2808 xlog_tid_t tid; 2809 int error; 2810 unsigned long hash; 2811 uint flags; 2812 2813 lp = dp + be32_to_cpu(rhead->h_len); 2814 num_logops = be32_to_cpu(rhead->h_num_logops); 2815 2816 /* check the log format matches our own - else we can't recover */ 2817 if (xlog_header_check_recover(log->l_mp, rhead)) 2818 return (XFS_ERROR(EIO)); 2819 2820 while ((dp < lp) && num_logops) { 2821 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 2822 ohead = (xlog_op_header_t *)dp; 2823 dp += sizeof(xlog_op_header_t); 2824 if (ohead->oh_clientid != XFS_TRANSACTION && 2825 ohead->oh_clientid != XFS_LOG) { 2826 xfs_warn(log->l_mp, "%s: bad clientid 0x%x", 2827 __func__, ohead->oh_clientid); 2828 ASSERT(0); 2829 return (XFS_ERROR(EIO)); 2830 } 2831 tid = be32_to_cpu(ohead->oh_tid); 2832 hash = XLOG_RHASH(tid); 2833 trans = xlog_recover_find_tid(&rhash[hash], tid); 2834 if (trans == NULL) { /* not found; add new tid */ 2835 if (ohead->oh_flags & XLOG_START_TRANS) 2836 xlog_recover_new_tid(&rhash[hash], tid, 2837 be64_to_cpu(rhead->h_lsn)); 2838 } else { 2839 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2840 xfs_warn(log->l_mp, "%s: bad length 0x%x", 2841 __func__, be32_to_cpu(ohead->oh_len)); 2842 WARN_ON(1); 2843 return (XFS_ERROR(EIO)); 2844 } 2845 flags = ohead->oh_flags & ~XLOG_END_TRANS; 2846 if (flags & XLOG_WAS_CONT_TRANS) 2847 flags &= ~XLOG_CONTINUE_TRANS; 2848 switch (flags) { 2849 case XLOG_COMMIT_TRANS: 2850 error = xlog_recover_commit_trans(log, 2851 trans, pass); 2852 break; 2853 case XLOG_UNMOUNT_TRANS: 2854 error = xlog_recover_unmount_trans(log, trans); 2855 break; 2856 case XLOG_WAS_CONT_TRANS: 2857 error = xlog_recover_add_to_cont_trans(log, 2858 trans, dp, 2859 be32_to_cpu(ohead->oh_len)); 2860 break; 2861 case XLOG_START_TRANS: 2862 xfs_warn(log->l_mp, "%s: bad transaction", 2863 __func__); 2864 ASSERT(0); 2865 error = XFS_ERROR(EIO); 2866 break; 2867 case 0: 2868 case XLOG_CONTINUE_TRANS: 2869 error = xlog_recover_add_to_trans(log, trans, 2870 dp, be32_to_cpu(ohead->oh_len)); 2871 break; 2872 default: 2873 xfs_warn(log->l_mp, "%s: bad flag 0x%x", 2874 __func__, flags); 2875 ASSERT(0); 2876 error = XFS_ERROR(EIO); 2877 break; 2878 } 2879 if (error) 2880 return error; 2881 } 2882 dp += be32_to_cpu(ohead->oh_len); 2883 num_logops--; 2884 } 2885 return 0; 2886 } 2887 2888 /* 2889 * Process an extent free intent item that was recovered from 2890 * the log. We need to free the extents that it describes. 2891 */ 2892 STATIC int 2893 xlog_recover_process_efi( 2894 xfs_mount_t *mp, 2895 xfs_efi_log_item_t *efip) 2896 { 2897 xfs_efd_log_item_t *efdp; 2898 xfs_trans_t *tp; 2899 int i; 2900 int error = 0; 2901 xfs_extent_t *extp; 2902 xfs_fsblock_t startblock_fsb; 2903 2904 ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); 2905 2906 /* 2907 * First check the validity of the extents described by the 2908 * EFI. If any are bad, then assume that all are bad and 2909 * just toss the EFI. 2910 */ 2911 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 2912 extp = &(efip->efi_format.efi_extents[i]); 2913 startblock_fsb = XFS_BB_TO_FSB(mp, 2914 XFS_FSB_TO_DADDR(mp, extp->ext_start)); 2915 if ((startblock_fsb == 0) || 2916 (extp->ext_len == 0) || 2917 (startblock_fsb >= mp->m_sb.sb_dblocks) || 2918 (extp->ext_len >= mp->m_sb.sb_agblocks)) { 2919 /* 2920 * This will pull the EFI from the AIL and 2921 * free the memory associated with it. 2922 */ 2923 xfs_efi_release(efip, efip->efi_format.efi_nextents); 2924 return XFS_ERROR(EIO); 2925 } 2926 } 2927 2928 tp = xfs_trans_alloc(mp, 0); 2929 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); 2930 if (error) 2931 goto abort_error; 2932 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 2933 2934 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 2935 extp = &(efip->efi_format.efi_extents[i]); 2936 error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); 2937 if (error) 2938 goto abort_error; 2939 xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, 2940 extp->ext_len); 2941 } 2942 2943 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 2944 error = xfs_trans_commit(tp, 0); 2945 return error; 2946 2947 abort_error: 2948 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 2949 return error; 2950 } 2951 2952 /* 2953 * When this is called, all of the EFIs which did not have 2954 * corresponding EFDs should be in the AIL. What we do now 2955 * is free the extents associated with each one. 2956 * 2957 * Since we process the EFIs in normal transactions, they 2958 * will be removed at some point after the commit. This prevents 2959 * us from just walking down the list processing each one. 2960 * We'll use a flag in the EFI to skip those that we've already 2961 * processed and use the AIL iteration mechanism's generation 2962 * count to try to speed this up at least a bit. 2963 * 2964 * When we start, we know that the EFIs are the only things in 2965 * the AIL. As we process them, however, other items are added 2966 * to the AIL. Since everything added to the AIL must come after 2967 * everything already in the AIL, we stop processing as soon as 2968 * we see something other than an EFI in the AIL. 2969 */ 2970 STATIC int 2971 xlog_recover_process_efis( 2972 xlog_t *log) 2973 { 2974 xfs_log_item_t *lip; 2975 xfs_efi_log_item_t *efip; 2976 int error = 0; 2977 struct xfs_ail_cursor cur; 2978 struct xfs_ail *ailp; 2979 2980 ailp = log->l_ailp; 2981 spin_lock(&ailp->xa_lock); 2982 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 2983 while (lip != NULL) { 2984 /* 2985 * We're done when we see something other than an EFI. 2986 * There should be no EFIs left in the AIL now. 2987 */ 2988 if (lip->li_type != XFS_LI_EFI) { 2989 #ifdef DEBUG 2990 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 2991 ASSERT(lip->li_type != XFS_LI_EFI); 2992 #endif 2993 break; 2994 } 2995 2996 /* 2997 * Skip EFIs that we've already processed. 2998 */ 2999 efip = (xfs_efi_log_item_t *)lip; 3000 if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { 3001 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3002 continue; 3003 } 3004 3005 spin_unlock(&ailp->xa_lock); 3006 error = xlog_recover_process_efi(log->l_mp, efip); 3007 spin_lock(&ailp->xa_lock); 3008 if (error) 3009 goto out; 3010 lip = xfs_trans_ail_cursor_next(ailp, &cur); 3011 } 3012 out: 3013 xfs_trans_ail_cursor_done(ailp, &cur); 3014 spin_unlock(&ailp->xa_lock); 3015 return error; 3016 } 3017 3018 /* 3019 * This routine performs a transaction to null out a bad inode pointer 3020 * in an agi unlinked inode hash bucket. 3021 */ 3022 STATIC void 3023 xlog_recover_clear_agi_bucket( 3024 xfs_mount_t *mp, 3025 xfs_agnumber_t agno, 3026 int bucket) 3027 { 3028 xfs_trans_t *tp; 3029 xfs_agi_t *agi; 3030 xfs_buf_t *agibp; 3031 int offset; 3032 int error; 3033 3034 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3035 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 3036 0, 0, 0); 3037 if (error) 3038 goto out_abort; 3039 3040 error = xfs_read_agi(mp, tp, agno, &agibp); 3041 if (error) 3042 goto out_abort; 3043 3044 agi = XFS_BUF_TO_AGI(agibp); 3045 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3046 offset = offsetof(xfs_agi_t, agi_unlinked) + 3047 (sizeof(xfs_agino_t) * bucket); 3048 xfs_trans_log_buf(tp, agibp, offset, 3049 (offset + sizeof(xfs_agino_t) - 1)); 3050 3051 error = xfs_trans_commit(tp, 0); 3052 if (error) 3053 goto out_error; 3054 return; 3055 3056 out_abort: 3057 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3058 out_error: 3059 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); 3060 return; 3061 } 3062 3063 STATIC xfs_agino_t 3064 xlog_recover_process_one_iunlink( 3065 struct xfs_mount *mp, 3066 xfs_agnumber_t agno, 3067 xfs_agino_t agino, 3068 int bucket) 3069 { 3070 struct xfs_buf *ibp; 3071 struct xfs_dinode *dip; 3072 struct xfs_inode *ip; 3073 xfs_ino_t ino; 3074 int error; 3075 3076 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3077 error = xfs_iget(mp, NULL, ino, 0, 0, &ip); 3078 if (error) 3079 goto fail; 3080 3081 /* 3082 * Get the on disk inode to find the next inode in the bucket. 3083 */ 3084 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); 3085 if (error) 3086 goto fail_iput; 3087 3088 ASSERT(ip->i_d.di_nlink == 0); 3089 ASSERT(ip->i_d.di_mode != 0); 3090 3091 /* setup for the next pass */ 3092 agino = be32_to_cpu(dip->di_next_unlinked); 3093 xfs_buf_relse(ibp); 3094 3095 /* 3096 * Prevent any DMAPI event from being sent when the reference on 3097 * the inode is dropped. 3098 */ 3099 ip->i_d.di_dmevmask = 0; 3100 3101 IRELE(ip); 3102 return agino; 3103 3104 fail_iput: 3105 IRELE(ip); 3106 fail: 3107 /* 3108 * We can't read in the inode this bucket points to, or this inode 3109 * is messed up. Just ditch this bucket of inodes. We will lose 3110 * some inodes and space, but at least we won't hang. 3111 * 3112 * Call xlog_recover_clear_agi_bucket() to perform a transaction to 3113 * clear the inode pointer in the bucket. 3114 */ 3115 xlog_recover_clear_agi_bucket(mp, agno, bucket); 3116 return NULLAGINO; 3117 } 3118 3119 /* 3120 * xlog_iunlink_recover 3121 * 3122 * This is called during recovery to process any inodes which 3123 * we unlinked but not freed when the system crashed. These 3124 * inodes will be on the lists in the AGI blocks. What we do 3125 * here is scan all the AGIs and fully truncate and free any 3126 * inodes found on the lists. Each inode is removed from the 3127 * lists when it has been fully truncated and is freed. The 3128 * freeing of the inode and its removal from the list must be 3129 * atomic. 3130 */ 3131 STATIC void 3132 xlog_recover_process_iunlinks( 3133 xlog_t *log) 3134 { 3135 xfs_mount_t *mp; 3136 xfs_agnumber_t agno; 3137 xfs_agi_t *agi; 3138 xfs_buf_t *agibp; 3139 xfs_agino_t agino; 3140 int bucket; 3141 int error; 3142 uint mp_dmevmask; 3143 3144 mp = log->l_mp; 3145 3146 /* 3147 * Prevent any DMAPI event from being sent while in this function. 3148 */ 3149 mp_dmevmask = mp->m_dmevmask; 3150 mp->m_dmevmask = 0; 3151 3152 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3153 /* 3154 * Find the agi for this ag. 3155 */ 3156 error = xfs_read_agi(mp, NULL, agno, &agibp); 3157 if (error) { 3158 /* 3159 * AGI is b0rked. Don't process it. 3160 * 3161 * We should probably mark the filesystem as corrupt 3162 * after we've recovered all the ag's we can.... 3163 */ 3164 continue; 3165 } 3166 agi = XFS_BUF_TO_AGI(agibp); 3167 3168 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3169 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3170 while (agino != NULLAGINO) { 3171 /* 3172 * Release the agi buffer so that it can 3173 * be acquired in the normal course of the 3174 * transaction to truncate and free the inode. 3175 */ 3176 xfs_buf_relse(agibp); 3177 3178 agino = xlog_recover_process_one_iunlink(mp, 3179 agno, agino, bucket); 3180 3181 /* 3182 * Reacquire the agibuffer and continue around 3183 * the loop. This should never fail as we know 3184 * the buffer was good earlier on. 3185 */ 3186 error = xfs_read_agi(mp, NULL, agno, &agibp); 3187 ASSERT(error == 0); 3188 agi = XFS_BUF_TO_AGI(agibp); 3189 } 3190 } 3191 3192 /* 3193 * Release the buffer for the current agi so we can 3194 * go on to the next one. 3195 */ 3196 xfs_buf_relse(agibp); 3197 } 3198 3199 mp->m_dmevmask = mp_dmevmask; 3200 } 3201 3202 3203 #ifdef DEBUG 3204 STATIC void 3205 xlog_pack_data_checksum( 3206 xlog_t *log, 3207 xlog_in_core_t *iclog, 3208 int size) 3209 { 3210 int i; 3211 __be32 *up; 3212 uint chksum = 0; 3213 3214 up = (__be32 *)iclog->ic_datap; 3215 /* divide length by 4 to get # words */ 3216 for (i = 0; i < (size >> 2); i++) { 3217 chksum ^= be32_to_cpu(*up); 3218 up++; 3219 } 3220 iclog->ic_header.h_chksum = cpu_to_be32(chksum); 3221 } 3222 #else 3223 #define xlog_pack_data_checksum(log, iclog, size) 3224 #endif 3225 3226 /* 3227 * Stamp cycle number in every block 3228 */ 3229 void 3230 xlog_pack_data( 3231 xlog_t *log, 3232 xlog_in_core_t *iclog, 3233 int roundoff) 3234 { 3235 int i, j, k; 3236 int size = iclog->ic_offset + roundoff; 3237 __be32 cycle_lsn; 3238 xfs_caddr_t dp; 3239 3240 xlog_pack_data_checksum(log, iclog, size); 3241 3242 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 3243 3244 dp = iclog->ic_datap; 3245 for (i = 0; i < BTOBB(size) && 3246 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3247 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; 3248 *(__be32 *)dp = cycle_lsn; 3249 dp += BBSIZE; 3250 } 3251 3252 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3253 xlog_in_core_2_t *xhdr = iclog->ic_data; 3254 3255 for ( ; i < BTOBB(size); i++) { 3256 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3257 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3258 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; 3259 *(__be32 *)dp = cycle_lsn; 3260 dp += BBSIZE; 3261 } 3262 3263 for (i = 1; i < log->l_iclog_heads; i++) { 3264 xhdr[i].hic_xheader.xh_cycle = cycle_lsn; 3265 } 3266 } 3267 } 3268 3269 STATIC void 3270 xlog_unpack_data( 3271 xlog_rec_header_t *rhead, 3272 xfs_caddr_t dp, 3273 xlog_t *log) 3274 { 3275 int i, j, k; 3276 3277 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3278 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3279 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; 3280 dp += BBSIZE; 3281 } 3282 3283 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3284 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; 3285 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3286 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3287 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3288 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; 3289 dp += BBSIZE; 3290 } 3291 } 3292 } 3293 3294 STATIC int 3295 xlog_valid_rec_header( 3296 xlog_t *log, 3297 xlog_rec_header_t *rhead, 3298 xfs_daddr_t blkno) 3299 { 3300 int hlen; 3301 3302 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { 3303 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 3304 XFS_ERRLEVEL_LOW, log->l_mp); 3305 return XFS_ERROR(EFSCORRUPTED); 3306 } 3307 if (unlikely( 3308 (!rhead->h_version || 3309 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3310 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", 3311 __func__, be32_to_cpu(rhead->h_version)); 3312 return XFS_ERROR(EIO); 3313 } 3314 3315 /* LR body must have data or it wouldn't have been written */ 3316 hlen = be32_to_cpu(rhead->h_len); 3317 if (unlikely( hlen <= 0 || hlen > INT_MAX )) { 3318 XFS_ERROR_REPORT("xlog_valid_rec_header(2)", 3319 XFS_ERRLEVEL_LOW, log->l_mp); 3320 return XFS_ERROR(EFSCORRUPTED); 3321 } 3322 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { 3323 XFS_ERROR_REPORT("xlog_valid_rec_header(3)", 3324 XFS_ERRLEVEL_LOW, log->l_mp); 3325 return XFS_ERROR(EFSCORRUPTED); 3326 } 3327 return 0; 3328 } 3329 3330 /* 3331 * Read the log from tail to head and process the log records found. 3332 * Handle the two cases where the tail and head are in the same cycle 3333 * and where the active portion of the log wraps around the end of 3334 * the physical log separately. The pass parameter is passed through 3335 * to the routines called to process the data and is not looked at 3336 * here. 3337 */ 3338 STATIC int 3339 xlog_do_recovery_pass( 3340 xlog_t *log, 3341 xfs_daddr_t head_blk, 3342 xfs_daddr_t tail_blk, 3343 int pass) 3344 { 3345 xlog_rec_header_t *rhead; 3346 xfs_daddr_t blk_no; 3347 xfs_caddr_t offset; 3348 xfs_buf_t *hbp, *dbp; 3349 int error = 0, h_size; 3350 int bblks, split_bblks; 3351 int hblks, split_hblks, wrapped_hblks; 3352 struct hlist_head rhash[XLOG_RHASH_SIZE]; 3353 3354 ASSERT(head_blk != tail_blk); 3355 3356 /* 3357 * Read the header of the tail block and get the iclog buffer size from 3358 * h_size. Use this to tell how many sectors make up the log header. 3359 */ 3360 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3361 /* 3362 * When using variable length iclogs, read first sector of 3363 * iclog header and extract the header size from it. Get a 3364 * new hbp that is the correct size. 3365 */ 3366 hbp = xlog_get_bp(log, 1); 3367 if (!hbp) 3368 return ENOMEM; 3369 3370 error = xlog_bread(log, tail_blk, 1, hbp, &offset); 3371 if (error) 3372 goto bread_err1; 3373 3374 rhead = (xlog_rec_header_t *)offset; 3375 error = xlog_valid_rec_header(log, rhead, tail_blk); 3376 if (error) 3377 goto bread_err1; 3378 h_size = be32_to_cpu(rhead->h_size); 3379 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && 3380 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 3381 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 3382 if (h_size % XLOG_HEADER_CYCLE_SIZE) 3383 hblks++; 3384 xlog_put_bp(hbp); 3385 hbp = xlog_get_bp(log, hblks); 3386 } else { 3387 hblks = 1; 3388 } 3389 } else { 3390 ASSERT(log->l_sectBBsize == 1); 3391 hblks = 1; 3392 hbp = xlog_get_bp(log, 1); 3393 h_size = XLOG_BIG_RECORD_BSIZE; 3394 } 3395 3396 if (!hbp) 3397 return ENOMEM; 3398 dbp = xlog_get_bp(log, BTOBB(h_size)); 3399 if (!dbp) { 3400 xlog_put_bp(hbp); 3401 return ENOMEM; 3402 } 3403 3404 memset(rhash, 0, sizeof(rhash)); 3405 if (tail_blk <= head_blk) { 3406 for (blk_no = tail_blk; blk_no < head_blk; ) { 3407 error = xlog_bread(log, blk_no, hblks, hbp, &offset); 3408 if (error) 3409 goto bread_err2; 3410 3411 rhead = (xlog_rec_header_t *)offset; 3412 error = xlog_valid_rec_header(log, rhead, blk_no); 3413 if (error) 3414 goto bread_err2; 3415 3416 /* blocks in data section */ 3417 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3418 error = xlog_bread(log, blk_no + hblks, bblks, dbp, 3419 &offset); 3420 if (error) 3421 goto bread_err2; 3422 3423 xlog_unpack_data(rhead, offset, log); 3424 if ((error = xlog_recover_process_data(log, 3425 rhash, rhead, offset, pass))) 3426 goto bread_err2; 3427 blk_no += bblks + hblks; 3428 } 3429 } else { 3430 /* 3431 * Perform recovery around the end of the physical log. 3432 * When the head is not on the same cycle number as the tail, 3433 * we can't do a sequential recovery as above. 3434 */ 3435 blk_no = tail_blk; 3436 while (blk_no < log->l_logBBsize) { 3437 /* 3438 * Check for header wrapping around physical end-of-log 3439 */ 3440 offset = XFS_BUF_PTR(hbp); 3441 split_hblks = 0; 3442 wrapped_hblks = 0; 3443 if (blk_no + hblks <= log->l_logBBsize) { 3444 /* Read header in one read */ 3445 error = xlog_bread(log, blk_no, hblks, hbp, 3446 &offset); 3447 if (error) 3448 goto bread_err2; 3449 } else { 3450 /* This LR is split across physical log end */ 3451 if (blk_no != log->l_logBBsize) { 3452 /* some data before physical log end */ 3453 ASSERT(blk_no <= INT_MAX); 3454 split_hblks = log->l_logBBsize - (int)blk_no; 3455 ASSERT(split_hblks > 0); 3456 error = xlog_bread(log, blk_no, 3457 split_hblks, hbp, 3458 &offset); 3459 if (error) 3460 goto bread_err2; 3461 } 3462 3463 /* 3464 * Note: this black magic still works with 3465 * large sector sizes (non-512) only because: 3466 * - we increased the buffer size originally 3467 * by 1 sector giving us enough extra space 3468 * for the second read; 3469 * - the log start is guaranteed to be sector 3470 * aligned; 3471 * - we read the log end (LR header start) 3472 * _first_, then the log start (LR header end) 3473 * - order is important. 3474 */ 3475 wrapped_hblks = hblks - split_hblks; 3476 error = xlog_bread_offset(log, 0, 3477 wrapped_hblks, hbp, 3478 offset + BBTOB(split_hblks)); 3479 if (error) 3480 goto bread_err2; 3481 } 3482 rhead = (xlog_rec_header_t *)offset; 3483 error = xlog_valid_rec_header(log, rhead, 3484 split_hblks ? blk_no : 0); 3485 if (error) 3486 goto bread_err2; 3487 3488 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3489 blk_no += hblks; 3490 3491 /* Read in data for log record */ 3492 if (blk_no + bblks <= log->l_logBBsize) { 3493 error = xlog_bread(log, blk_no, bblks, dbp, 3494 &offset); 3495 if (error) 3496 goto bread_err2; 3497 } else { 3498 /* This log record is split across the 3499 * physical end of log */ 3500 offset = XFS_BUF_PTR(dbp); 3501 split_bblks = 0; 3502 if (blk_no != log->l_logBBsize) { 3503 /* some data is before the physical 3504 * end of log */ 3505 ASSERT(!wrapped_hblks); 3506 ASSERT(blk_no <= INT_MAX); 3507 split_bblks = 3508 log->l_logBBsize - (int)blk_no; 3509 ASSERT(split_bblks > 0); 3510 error = xlog_bread(log, blk_no, 3511 split_bblks, dbp, 3512 &offset); 3513 if (error) 3514 goto bread_err2; 3515 } 3516 3517 /* 3518 * Note: this black magic still works with 3519 * large sector sizes (non-512) only because: 3520 * - we increased the buffer size originally 3521 * by 1 sector giving us enough extra space 3522 * for the second read; 3523 * - the log start is guaranteed to be sector 3524 * aligned; 3525 * - we read the log end (LR header start) 3526 * _first_, then the log start (LR header end) 3527 * - order is important. 3528 */ 3529 error = xlog_bread_offset(log, 0, 3530 bblks - split_bblks, hbp, 3531 offset + BBTOB(split_bblks)); 3532 if (error) 3533 goto bread_err2; 3534 } 3535 xlog_unpack_data(rhead, offset, log); 3536 if ((error = xlog_recover_process_data(log, rhash, 3537 rhead, offset, pass))) 3538 goto bread_err2; 3539 blk_no += bblks; 3540 } 3541 3542 ASSERT(blk_no >= log->l_logBBsize); 3543 blk_no -= log->l_logBBsize; 3544 3545 /* read first part of physical log */ 3546 while (blk_no < head_blk) { 3547 error = xlog_bread(log, blk_no, hblks, hbp, &offset); 3548 if (error) 3549 goto bread_err2; 3550 3551 rhead = (xlog_rec_header_t *)offset; 3552 error = xlog_valid_rec_header(log, rhead, blk_no); 3553 if (error) 3554 goto bread_err2; 3555 3556 bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 3557 error = xlog_bread(log, blk_no+hblks, bblks, dbp, 3558 &offset); 3559 if (error) 3560 goto bread_err2; 3561 3562 xlog_unpack_data(rhead, offset, log); 3563 if ((error = xlog_recover_process_data(log, rhash, 3564 rhead, offset, pass))) 3565 goto bread_err2; 3566 blk_no += bblks + hblks; 3567 } 3568 } 3569 3570 bread_err2: 3571 xlog_put_bp(dbp); 3572 bread_err1: 3573 xlog_put_bp(hbp); 3574 return error; 3575 } 3576 3577 /* 3578 * Do the recovery of the log. We actually do this in two phases. 3579 * The two passes are necessary in order to implement the function 3580 * of cancelling a record written into the log. The first pass 3581 * determines those things which have been cancelled, and the 3582 * second pass replays log items normally except for those which 3583 * have been cancelled. The handling of the replay and cancellations 3584 * takes place in the log item type specific routines. 3585 * 3586 * The table of items which have cancel records in the log is allocated 3587 * and freed at this level, since only here do we know when all of 3588 * the log recovery has been completed. 3589 */ 3590 STATIC int 3591 xlog_do_log_recovery( 3592 xlog_t *log, 3593 xfs_daddr_t head_blk, 3594 xfs_daddr_t tail_blk) 3595 { 3596 int error, i; 3597 3598 ASSERT(head_blk != tail_blk); 3599 3600 /* 3601 * First do a pass to find all of the cancelled buf log items. 3602 * Store them in the buf_cancel_table for use in the second pass. 3603 */ 3604 log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * 3605 sizeof(struct list_head), 3606 KM_SLEEP); 3607 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3608 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); 3609 3610 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3611 XLOG_RECOVER_PASS1); 3612 if (error != 0) { 3613 kmem_free(log->l_buf_cancel_table); 3614 log->l_buf_cancel_table = NULL; 3615 return error; 3616 } 3617 /* 3618 * Then do a second pass to actually recover the items in the log. 3619 * When it is complete free the table of buf cancel items. 3620 */ 3621 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3622 XLOG_RECOVER_PASS2); 3623 #ifdef DEBUG 3624 if (!error) { 3625 int i; 3626 3627 for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) 3628 ASSERT(list_empty(&log->l_buf_cancel_table[i])); 3629 } 3630 #endif /* DEBUG */ 3631 3632 kmem_free(log->l_buf_cancel_table); 3633 log->l_buf_cancel_table = NULL; 3634 3635 return error; 3636 } 3637 3638 /* 3639 * Do the actual recovery 3640 */ 3641 STATIC int 3642 xlog_do_recover( 3643 xlog_t *log, 3644 xfs_daddr_t head_blk, 3645 xfs_daddr_t tail_blk) 3646 { 3647 int error; 3648 xfs_buf_t *bp; 3649 xfs_sb_t *sbp; 3650 3651 /* 3652 * First replay the images in the log. 3653 */ 3654 error = xlog_do_log_recovery(log, head_blk, tail_blk); 3655 if (error) { 3656 return error; 3657 } 3658 3659 XFS_bflush(log->l_mp->m_ddev_targp); 3660 3661 /* 3662 * If IO errors happened during recovery, bail out. 3663 */ 3664 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 3665 return (EIO); 3666 } 3667 3668 /* 3669 * We now update the tail_lsn since much of the recovery has completed 3670 * and there may be space available to use. If there were no extent 3671 * or iunlinks, we can free up the entire log and set the tail_lsn to 3672 * be the last_sync_lsn. This was set in xlog_find_tail to be the 3673 * lsn of the last known good LR on disk. If there are extent frees 3674 * or iunlinks they will have some entries in the AIL; so we look at 3675 * the AIL to determine how to set the tail_lsn. 3676 */ 3677 xlog_assign_tail_lsn(log->l_mp); 3678 3679 /* 3680 * Now that we've finished replaying all buffer and inode 3681 * updates, re-read in the superblock. 3682 */ 3683 bp = xfs_getsb(log->l_mp, 0); 3684 XFS_BUF_UNDONE(bp); 3685 ASSERT(!(XFS_BUF_ISWRITE(bp))); 3686 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 3687 XFS_BUF_READ(bp); 3688 XFS_BUF_UNASYNC(bp); 3689 xfsbdstrat(log->l_mp, bp); 3690 error = xfs_buf_iowait(bp); 3691 if (error) { 3692 xfs_ioerror_alert("xlog_do_recover", 3693 log->l_mp, bp, XFS_BUF_ADDR(bp)); 3694 ASSERT(0); 3695 xfs_buf_relse(bp); 3696 return error; 3697 } 3698 3699 /* Convert superblock from on-disk format */ 3700 sbp = &log->l_mp->m_sb; 3701 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 3702 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 3703 ASSERT(xfs_sb_good_version(sbp)); 3704 xfs_buf_relse(bp); 3705 3706 /* We've re-read the superblock so re-initialize per-cpu counters */ 3707 xfs_icsb_reinit_counters(log->l_mp); 3708 3709 xlog_recover_check_summary(log); 3710 3711 /* Normal transactions can now occur */ 3712 log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 3713 return 0; 3714 } 3715 3716 /* 3717 * Perform recovery and re-initialize some log variables in xlog_find_tail. 3718 * 3719 * Return error or zero. 3720 */ 3721 int 3722 xlog_recover( 3723 xlog_t *log) 3724 { 3725 xfs_daddr_t head_blk, tail_blk; 3726 int error; 3727 3728 /* find the tail of the log */ 3729 if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 3730 return error; 3731 3732 if (tail_blk != head_blk) { 3733 /* There used to be a comment here: 3734 * 3735 * disallow recovery on read-only mounts. note -- mount 3736 * checks for ENOSPC and turns it into an intelligent 3737 * error message. 3738 * ...but this is no longer true. Now, unless you specify 3739 * NORECOVERY (in which case this function would never be 3740 * called), we just go ahead and recover. We do this all 3741 * under the vfs layer, so we can get away with it unless 3742 * the device itself is read-only, in which case we fail. 3743 */ 3744 if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { 3745 return error; 3746 } 3747 3748 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 3749 log->l_mp->m_logname ? log->l_mp->m_logname 3750 : "internal"); 3751 3752 error = xlog_do_recover(log, head_blk, tail_blk); 3753 log->l_flags |= XLOG_RECOVERY_NEEDED; 3754 } 3755 return error; 3756 } 3757 3758 /* 3759 * In the first part of recovery we replay inodes and buffers and build 3760 * up the list of extent free items which need to be processed. Here 3761 * we process the extent free items and clean up the on disk unlinked 3762 * inode lists. This is separated from the first part of recovery so 3763 * that the root and real-time bitmap inodes can be read in from disk in 3764 * between the two stages. This is necessary so that we can free space 3765 * in the real-time portion of the file system. 3766 */ 3767 int 3768 xlog_recover_finish( 3769 xlog_t *log) 3770 { 3771 /* 3772 * Now we're ready to do the transactions needed for the 3773 * rest of recovery. Start with completing all the extent 3774 * free intent records and then process the unlinked inode 3775 * lists. At this point, we essentially run in normal mode 3776 * except that we're still performing recovery actions 3777 * rather than accepting new requests. 3778 */ 3779 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 3780 int error; 3781 error = xlog_recover_process_efis(log); 3782 if (error) { 3783 xfs_alert(log->l_mp, "Failed to recover EFIs"); 3784 return error; 3785 } 3786 /* 3787 * Sync the log to get all the EFIs out of the AIL. 3788 * This isn't absolutely necessary, but it helps in 3789 * case the unlink transactions would have problems 3790 * pushing the EFIs out of the way. 3791 */ 3792 xfs_log_force(log->l_mp, XFS_LOG_SYNC); 3793 3794 xlog_recover_process_iunlinks(log); 3795 3796 xlog_recover_check_summary(log); 3797 3798 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", 3799 log->l_mp->m_logname ? log->l_mp->m_logname 3800 : "internal"); 3801 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3802 } else { 3803 xfs_info(log->l_mp, "Ending clean mount"); 3804 } 3805 return 0; 3806 } 3807 3808 3809 #if defined(DEBUG) 3810 /* 3811 * Read all of the agf and agi counters and check that they 3812 * are consistent with the superblock counters. 3813 */ 3814 void 3815 xlog_recover_check_summary( 3816 xlog_t *log) 3817 { 3818 xfs_mount_t *mp; 3819 xfs_agf_t *agfp; 3820 xfs_buf_t *agfbp; 3821 xfs_buf_t *agibp; 3822 xfs_agnumber_t agno; 3823 __uint64_t freeblks; 3824 __uint64_t itotal; 3825 __uint64_t ifree; 3826 int error; 3827 3828 mp = log->l_mp; 3829 3830 freeblks = 0LL; 3831 itotal = 0LL; 3832 ifree = 0LL; 3833 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3834 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3835 if (error) { 3836 xfs_alert(mp, "%s agf read failed agno %d error %d", 3837 __func__, agno, error); 3838 } else { 3839 agfp = XFS_BUF_TO_AGF(agfbp); 3840 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3841 be32_to_cpu(agfp->agf_flcount); 3842 xfs_buf_relse(agfbp); 3843 } 3844 3845 error = xfs_read_agi(mp, NULL, agno, &agibp); 3846 if (error) { 3847 xfs_alert(mp, "%s agi read failed agno %d error %d", 3848 __func__, agno, error); 3849 } else { 3850 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3851 3852 itotal += be32_to_cpu(agi->agi_count); 3853 ifree += be32_to_cpu(agi->agi_freecount); 3854 xfs_buf_relse(agibp); 3855 } 3856 } 3857 } 3858 #endif /* DEBUG */ 3859