1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/checkpoint.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999 Red Hat Software --- All Rights Reserved 8 * 9 * Checkpoint routines for the generic filesystem journaling code. 10 * Part of the ext2fs journaling system. 11 * 12 * Checkpointing is the process of ensuring that a section of the log is 13 * committed fully to disk, so that that portion of the log can be 14 * reused. 15 */ 16 17 #include <linux/time.h> 18 #include <linux/fs.h> 19 #include <linux/jbd2.h> 20 #include <linux/errno.h> 21 #include <linux/slab.h> 22 #include <linux/blkdev.h> 23 #include <trace/events/jbd2.h> 24 25 /* 26 * Unlink a buffer from a transaction checkpoint list. 27 * 28 * Called with j_list_lock held. 29 */ 30 static inline void __buffer_unlink_first(struct journal_head *jh) 31 { 32 transaction_t *transaction = jh->b_cp_transaction; 33 34 jh->b_cpnext->b_cpprev = jh->b_cpprev; 35 jh->b_cpprev->b_cpnext = jh->b_cpnext; 36 if (transaction->t_checkpoint_list == jh) { 37 transaction->t_checkpoint_list = jh->b_cpnext; 38 if (transaction->t_checkpoint_list == jh) 39 transaction->t_checkpoint_list = NULL; 40 } 41 } 42 43 /* 44 * Unlink a buffer from a transaction checkpoint(io) list. 45 * 46 * Called with j_list_lock held. 47 */ 48 static inline void __buffer_unlink(struct journal_head *jh) 49 { 50 transaction_t *transaction = jh->b_cp_transaction; 51 52 __buffer_unlink_first(jh); 53 if (transaction->t_checkpoint_io_list == jh) { 54 transaction->t_checkpoint_io_list = jh->b_cpnext; 55 if (transaction->t_checkpoint_io_list == jh) 56 transaction->t_checkpoint_io_list = NULL; 57 } 58 } 59 60 /* 61 * Move a buffer from the checkpoint list to the checkpoint io list 62 * 63 * Called with j_list_lock held 64 */ 65 static inline void __buffer_relink_io(struct journal_head *jh) 66 { 67 transaction_t *transaction = jh->b_cp_transaction; 68 69 __buffer_unlink_first(jh); 70 71 if (!transaction->t_checkpoint_io_list) { 72 jh->b_cpnext = jh->b_cpprev = jh; 73 } else { 74 jh->b_cpnext = transaction->t_checkpoint_io_list; 75 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; 76 jh->b_cpprev->b_cpnext = jh; 77 jh->b_cpnext->b_cpprev = jh; 78 } 79 transaction->t_checkpoint_io_list = jh; 80 } 81 82 /* 83 * Try to release a checkpointed buffer from its transaction. 84 * Returns 1 if we released it and 2 if we also released the 85 * whole transaction. 86 * 87 * Requires j_list_lock 88 */ 89 static int __try_to_free_cp_buf(struct journal_head *jh) 90 { 91 int ret = 0; 92 struct buffer_head *bh = jh2bh(jh); 93 94 if (jh->b_transaction == NULL && !buffer_locked(bh) && 95 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 96 JBUFFER_TRACE(jh, "remove from checkpoint list"); 97 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 98 } 99 return ret; 100 } 101 102 /* 103 * __jbd2_log_wait_for_space: wait until there is space in the journal. 104 * 105 * Called under j-state_lock *only*. It will be unlocked if we have to wait 106 * for a checkpoint to free up some space in the log. 107 */ 108 void __jbd2_log_wait_for_space(journal_t *journal) 109 { 110 int nblocks, space_left; 111 /* assert_spin_locked(&journal->j_state_lock); */ 112 113 nblocks = jbd2_space_needed(journal); 114 while (jbd2_log_space_left(journal) < nblocks) { 115 write_unlock(&journal->j_state_lock); 116 mutex_lock(&journal->j_checkpoint_mutex); 117 118 /* 119 * Test again, another process may have checkpointed while we 120 * were waiting for the checkpoint lock. If there are no 121 * transactions ready to be checkpointed, try to recover 122 * journal space by calling cleanup_journal_tail(), and if 123 * that doesn't work, by waiting for the currently committing 124 * transaction to complete. If there is absolutely no way 125 * to make progress, this is either a BUG or corrupted 126 * filesystem, so abort the journal and leave a stack 127 * trace for forensic evidence. 128 */ 129 write_lock(&journal->j_state_lock); 130 if (journal->j_flags & JBD2_ABORT) { 131 mutex_unlock(&journal->j_checkpoint_mutex); 132 return; 133 } 134 spin_lock(&journal->j_list_lock); 135 nblocks = jbd2_space_needed(journal); 136 space_left = jbd2_log_space_left(journal); 137 if (space_left < nblocks) { 138 int chkpt = journal->j_checkpoint_transactions != NULL; 139 tid_t tid = 0; 140 141 if (journal->j_committing_transaction) 142 tid = journal->j_committing_transaction->t_tid; 143 spin_unlock(&journal->j_list_lock); 144 write_unlock(&journal->j_state_lock); 145 if (chkpt) { 146 jbd2_log_do_checkpoint(journal); 147 } else if (jbd2_cleanup_journal_tail(journal) == 0) { 148 /* We were able to recover space; yay! */ 149 ; 150 } else if (tid) { 151 /* 152 * jbd2_journal_commit_transaction() may want 153 * to take the checkpoint_mutex if JBD2_FLUSHED 154 * is set. So we need to temporarily drop it. 155 */ 156 mutex_unlock(&journal->j_checkpoint_mutex); 157 jbd2_log_wait_commit(journal, tid); 158 write_lock(&journal->j_state_lock); 159 continue; 160 } else { 161 printk(KERN_ERR "%s: needed %d blocks and " 162 "only had %d space available\n", 163 __func__, nblocks, space_left); 164 printk(KERN_ERR "%s: no way to get more " 165 "journal space in %s\n", __func__, 166 journal->j_devname); 167 WARN_ON(1); 168 jbd2_journal_abort(journal, 0); 169 } 170 write_lock(&journal->j_state_lock); 171 } else { 172 spin_unlock(&journal->j_list_lock); 173 } 174 mutex_unlock(&journal->j_checkpoint_mutex); 175 } 176 } 177 178 static void 179 __flush_batch(journal_t *journal, int *batch_count) 180 { 181 int i; 182 struct blk_plug plug; 183 184 blk_start_plug(&plug); 185 for (i = 0; i < *batch_count; i++) 186 write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC); 187 blk_finish_plug(&plug); 188 189 for (i = 0; i < *batch_count; i++) { 190 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 191 BUFFER_TRACE(bh, "brelse"); 192 __brelse(bh); 193 } 194 *batch_count = 0; 195 } 196 197 /* 198 * Perform an actual checkpoint. We take the first transaction on the 199 * list of transactions to be checkpointed and send all its buffers 200 * to disk. We submit larger chunks of data at once. 201 * 202 * The journal should be locked before calling this function. 203 * Called with j_checkpoint_mutex held. 204 */ 205 int jbd2_log_do_checkpoint(journal_t *journal) 206 { 207 struct journal_head *jh; 208 struct buffer_head *bh; 209 transaction_t *transaction; 210 tid_t this_tid; 211 int result, batch_count = 0; 212 213 jbd_debug(1, "Start checkpoint\n"); 214 215 /* 216 * First thing: if there are any transactions in the log which 217 * don't need checkpointing, just eliminate them from the 218 * journal straight away. 219 */ 220 result = jbd2_cleanup_journal_tail(journal); 221 trace_jbd2_checkpoint(journal, result); 222 jbd_debug(1, "cleanup_journal_tail returned %d\n", result); 223 if (result <= 0) 224 return result; 225 226 /* 227 * OK, we need to start writing disk blocks. Take one transaction 228 * and write it. 229 */ 230 result = 0; 231 spin_lock(&journal->j_list_lock); 232 if (!journal->j_checkpoint_transactions) 233 goto out; 234 transaction = journal->j_checkpoint_transactions; 235 if (transaction->t_chp_stats.cs_chp_time == 0) 236 transaction->t_chp_stats.cs_chp_time = jiffies; 237 this_tid = transaction->t_tid; 238 restart: 239 /* 240 * If someone cleaned up this transaction while we slept, we're 241 * done (maybe it's a new transaction, but it fell at the same 242 * address). 243 */ 244 if (journal->j_checkpoint_transactions != transaction || 245 transaction->t_tid != this_tid) 246 goto out; 247 248 /* checkpoint all of the transaction's buffers */ 249 while (transaction->t_checkpoint_list) { 250 jh = transaction->t_checkpoint_list; 251 bh = jh2bh(jh); 252 253 if (buffer_locked(bh)) { 254 get_bh(bh); 255 spin_unlock(&journal->j_list_lock); 256 wait_on_buffer(bh); 257 /* the journal_head may have gone by now */ 258 BUFFER_TRACE(bh, "brelse"); 259 __brelse(bh); 260 goto retry; 261 } 262 if (jh->b_transaction != NULL) { 263 transaction_t *t = jh->b_transaction; 264 tid_t tid = t->t_tid; 265 266 transaction->t_chp_stats.cs_forced_to_close++; 267 spin_unlock(&journal->j_list_lock); 268 if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 269 /* 270 * The journal thread is dead; so 271 * starting and waiting for a commit 272 * to finish will cause us to wait for 273 * a _very_ long time. 274 */ 275 printk(KERN_ERR 276 "JBD2: %s: Waiting for Godot: block %llu\n", 277 journal->j_devname, (unsigned long long) bh->b_blocknr); 278 279 jbd2_log_start_commit(journal, tid); 280 jbd2_log_wait_commit(journal, tid); 281 goto retry; 282 } 283 if (!buffer_dirty(bh)) { 284 if (unlikely(buffer_write_io_error(bh)) && !result) 285 result = -EIO; 286 BUFFER_TRACE(bh, "remove from checkpoint"); 287 if (__jbd2_journal_remove_checkpoint(jh)) 288 /* The transaction was released; we're done */ 289 goto out; 290 continue; 291 } 292 /* 293 * Important: we are about to write the buffer, and 294 * possibly block, while still holding the journal 295 * lock. We cannot afford to let the transaction 296 * logic start messing around with this buffer before 297 * we write it to disk, as that would break 298 * recoverability. 299 */ 300 BUFFER_TRACE(bh, "queue"); 301 get_bh(bh); 302 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 303 journal->j_chkpt_bhs[batch_count++] = bh; 304 __buffer_relink_io(jh); 305 transaction->t_chp_stats.cs_written++; 306 if ((batch_count == JBD2_NR_BATCH) || 307 need_resched() || 308 spin_needbreak(&journal->j_list_lock)) 309 goto unlock_and_flush; 310 } 311 312 if (batch_count) { 313 unlock_and_flush: 314 spin_unlock(&journal->j_list_lock); 315 retry: 316 if (batch_count) 317 __flush_batch(journal, &batch_count); 318 spin_lock(&journal->j_list_lock); 319 goto restart; 320 } 321 322 /* 323 * Now we issued all of the transaction's buffers, let's deal 324 * with the buffers that are out for I/O. 325 */ 326 restart2: 327 /* Did somebody clean up the transaction in the meanwhile? */ 328 if (journal->j_checkpoint_transactions != transaction || 329 transaction->t_tid != this_tid) 330 goto out; 331 332 while (transaction->t_checkpoint_io_list) { 333 jh = transaction->t_checkpoint_io_list; 334 bh = jh2bh(jh); 335 if (buffer_locked(bh)) { 336 get_bh(bh); 337 spin_unlock(&journal->j_list_lock); 338 wait_on_buffer(bh); 339 /* the journal_head may have gone by now */ 340 BUFFER_TRACE(bh, "brelse"); 341 __brelse(bh); 342 spin_lock(&journal->j_list_lock); 343 goto restart2; 344 } 345 if (unlikely(buffer_write_io_error(bh)) && !result) 346 result = -EIO; 347 348 /* 349 * Now in whatever state the buffer currently is, we 350 * know that it has been written out and so we can 351 * drop it from the list 352 */ 353 if (__jbd2_journal_remove_checkpoint(jh)) 354 break; 355 } 356 out: 357 spin_unlock(&journal->j_list_lock); 358 if (result < 0) 359 jbd2_journal_abort(journal, result); 360 else 361 result = jbd2_cleanup_journal_tail(journal); 362 363 return (result < 0) ? result : 0; 364 } 365 366 /* 367 * Check the list of checkpoint transactions for the journal to see if 368 * we have already got rid of any since the last update of the log tail 369 * in the journal superblock. If so, we can instantly roll the 370 * superblock forward to remove those transactions from the log. 371 * 372 * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 373 * 374 * Called with the journal lock held. 375 * 376 * This is the only part of the journaling code which really needs to be 377 * aware of transaction aborts. Checkpointing involves writing to the 378 * main filesystem area rather than to the journal, so it can proceed 379 * even in abort state, but we must not update the super block if 380 * checkpointing may have failed. Otherwise, we would lose some metadata 381 * buffers which should be written-back to the filesystem. 382 */ 383 384 int jbd2_cleanup_journal_tail(journal_t *journal) 385 { 386 tid_t first_tid; 387 unsigned long blocknr; 388 389 if (is_journal_aborted(journal)) 390 return -EIO; 391 392 if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) 393 return 1; 394 J_ASSERT(blocknr != 0); 395 396 /* 397 * We need to make sure that any blocks that were recently written out 398 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before 399 * we drop the transactions from the journal. It's unlikely this will 400 * be necessary, especially with an appropriately sized journal, but we 401 * need this to guarantee correctness. Fortunately 402 * jbd2_cleanup_journal_tail() doesn't get called all that often. 403 */ 404 if (journal->j_flags & JBD2_BARRIER) 405 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); 406 407 return __jbd2_update_log_tail(journal, first_tid, blocknr); 408 } 409 410 411 /* Checkpoint list management */ 412 413 /* 414 * journal_clean_one_cp_list 415 * 416 * Find all the written-back checkpoint buffers in the given list and 417 * release them. If 'destroy' is set, clean all buffers unconditionally. 418 * 419 * Called with j_list_lock held. 420 * Returns 1 if we freed the transaction, 0 otherwise. 421 */ 422 static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) 423 { 424 struct journal_head *last_jh; 425 struct journal_head *next_jh = jh; 426 int ret; 427 428 if (!jh) 429 return 0; 430 431 last_jh = jh->b_cpprev; 432 do { 433 jh = next_jh; 434 next_jh = jh->b_cpnext; 435 if (!destroy) 436 ret = __try_to_free_cp_buf(jh); 437 else 438 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 439 if (!ret) 440 return 0; 441 if (ret == 2) 442 return 1; 443 /* 444 * This function only frees up some memory 445 * if possible so we dont have an obligation 446 * to finish processing. Bail out if preemption 447 * requested: 448 */ 449 if (need_resched()) 450 return 0; 451 } while (jh != last_jh); 452 453 return 0; 454 } 455 456 /* 457 * journal_clean_checkpoint_list 458 * 459 * Find all the written-back checkpoint buffers in the journal and release them. 460 * If 'destroy' is set, release all buffers unconditionally. 461 * 462 * Called with j_list_lock held. 463 */ 464 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) 465 { 466 transaction_t *transaction, *last_transaction, *next_transaction; 467 int ret; 468 469 transaction = journal->j_checkpoint_transactions; 470 if (!transaction) 471 return; 472 473 last_transaction = transaction->t_cpprev; 474 next_transaction = transaction; 475 do { 476 transaction = next_transaction; 477 next_transaction = transaction->t_cpnext; 478 ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, 479 destroy); 480 /* 481 * This function only frees up some memory if possible so we 482 * dont have an obligation to finish processing. Bail out if 483 * preemption requested: 484 */ 485 if (need_resched()) 486 return; 487 if (ret) 488 continue; 489 /* 490 * It is essential that we are as careful as in the case of 491 * t_checkpoint_list with removing the buffer from the list as 492 * we can possibly see not yet submitted buffers on io_list 493 */ 494 ret = journal_clean_one_cp_list(transaction-> 495 t_checkpoint_io_list, destroy); 496 if (need_resched()) 497 return; 498 /* 499 * Stop scanning if we couldn't free the transaction. This 500 * avoids pointless scanning of transactions which still 501 * weren't checkpointed. 502 */ 503 if (!ret) 504 return; 505 } while (transaction != last_transaction); 506 } 507 508 /* 509 * Remove buffers from all checkpoint lists as journal is aborted and we just 510 * need to free memory 511 */ 512 void jbd2_journal_destroy_checkpoint(journal_t *journal) 513 { 514 /* 515 * We loop because __jbd2_journal_clean_checkpoint_list() may abort 516 * early due to a need of rescheduling. 517 */ 518 while (1) { 519 spin_lock(&journal->j_list_lock); 520 if (!journal->j_checkpoint_transactions) { 521 spin_unlock(&journal->j_list_lock); 522 break; 523 } 524 __jbd2_journal_clean_checkpoint_list(journal, true); 525 spin_unlock(&journal->j_list_lock); 526 cond_resched(); 527 } 528 } 529 530 /* 531 * journal_remove_checkpoint: called after a buffer has been committed 532 * to disk (either by being write-back flushed to disk, or being 533 * committed to the log). 534 * 535 * We cannot safely clean a transaction out of the log until all of the 536 * buffer updates committed in that transaction have safely been stored 537 * elsewhere on disk. To achieve this, all of the buffers in a 538 * transaction need to be maintained on the transaction's checkpoint 539 * lists until they have been rewritten, at which point this function is 540 * called to remove the buffer from the existing transaction's 541 * checkpoint lists. 542 * 543 * The function returns 1 if it frees the transaction, 0 otherwise. 544 * The function can free jh and bh. 545 * 546 * This function is called with j_list_lock held. 547 */ 548 int __jbd2_journal_remove_checkpoint(struct journal_head *jh) 549 { 550 struct transaction_chp_stats_s *stats; 551 transaction_t *transaction; 552 journal_t *journal; 553 int ret = 0; 554 555 JBUFFER_TRACE(jh, "entry"); 556 557 if ((transaction = jh->b_cp_transaction) == NULL) { 558 JBUFFER_TRACE(jh, "not on transaction"); 559 goto out; 560 } 561 journal = transaction->t_journal; 562 563 JBUFFER_TRACE(jh, "removing from transaction"); 564 __buffer_unlink(jh); 565 jh->b_cp_transaction = NULL; 566 jbd2_journal_put_journal_head(jh); 567 568 if (transaction->t_checkpoint_list != NULL || 569 transaction->t_checkpoint_io_list != NULL) 570 goto out; 571 572 /* 573 * There is one special case to worry about: if we have just pulled the 574 * buffer off a running or committing transaction's checkpoing list, 575 * then even if the checkpoint list is empty, the transaction obviously 576 * cannot be dropped! 577 * 578 * The locking here around t_state is a bit sleazy. 579 * See the comment at the end of jbd2_journal_commit_transaction(). 580 */ 581 if (transaction->t_state != T_FINISHED) 582 goto out; 583 584 /* OK, that was the last buffer for the transaction: we can now 585 safely remove this transaction from the log */ 586 stats = &transaction->t_chp_stats; 587 if (stats->cs_chp_time) 588 stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time, 589 jiffies); 590 trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev, 591 transaction->t_tid, stats); 592 593 __jbd2_journal_drop_transaction(journal, transaction); 594 jbd2_journal_free_transaction(transaction); 595 ret = 1; 596 out: 597 return ret; 598 } 599 600 /* 601 * journal_insert_checkpoint: put a committed buffer onto a checkpoint 602 * list so that we know when it is safe to clean the transaction out of 603 * the log. 604 * 605 * Called with the journal locked. 606 * Called with j_list_lock held. 607 */ 608 void __jbd2_journal_insert_checkpoint(struct journal_head *jh, 609 transaction_t *transaction) 610 { 611 JBUFFER_TRACE(jh, "entry"); 612 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 613 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 614 615 /* Get reference for checkpointing transaction */ 616 jbd2_journal_grab_journal_head(jh2bh(jh)); 617 jh->b_cp_transaction = transaction; 618 619 if (!transaction->t_checkpoint_list) { 620 jh->b_cpnext = jh->b_cpprev = jh; 621 } else { 622 jh->b_cpnext = transaction->t_checkpoint_list; 623 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; 624 jh->b_cpprev->b_cpnext = jh; 625 jh->b_cpnext->b_cpprev = jh; 626 } 627 transaction->t_checkpoint_list = jh; 628 } 629 630 /* 631 * We've finished with this transaction structure: adios... 632 * 633 * The transaction must have no links except for the checkpoint by this 634 * point. 635 * 636 * Called with the journal locked. 637 * Called with j_list_lock held. 638 */ 639 640 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction) 641 { 642 assert_spin_locked(&journal->j_list_lock); 643 if (transaction->t_cpnext) { 644 transaction->t_cpnext->t_cpprev = transaction->t_cpprev; 645 transaction->t_cpprev->t_cpnext = transaction->t_cpnext; 646 if (journal->j_checkpoint_transactions == transaction) 647 journal->j_checkpoint_transactions = 648 transaction->t_cpnext; 649 if (journal->j_checkpoint_transactions == transaction) 650 journal->j_checkpoint_transactions = NULL; 651 } 652 653 J_ASSERT(transaction->t_state == T_FINISHED); 654 J_ASSERT(transaction->t_buffers == NULL); 655 J_ASSERT(transaction->t_forget == NULL); 656 J_ASSERT(transaction->t_shadow_list == NULL); 657 J_ASSERT(transaction->t_checkpoint_list == NULL); 658 J_ASSERT(transaction->t_checkpoint_io_list == NULL); 659 J_ASSERT(atomic_read(&transaction->t_updates) == 0); 660 J_ASSERT(journal->j_committing_transaction != transaction); 661 J_ASSERT(journal->j_running_transaction != transaction); 662 663 trace_jbd2_drop_transaction(journal, transaction); 664 665 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 666 } 667