1f5166768STheodore Ts'o // SPDX-License-Identifier: GPL-2.0+
2470decc6SDave Kleikamp /*
358862699SUwe Kleine-König * linux/fs/jbd2/checkpoint.c
4470decc6SDave Kleikamp *
5470decc6SDave Kleikamp * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6470decc6SDave Kleikamp *
7470decc6SDave Kleikamp * Copyright 1999 Red Hat Software --- All Rights Reserved
8470decc6SDave Kleikamp *
9470decc6SDave Kleikamp * Checkpoint routines for the generic filesystem journaling code.
10470decc6SDave Kleikamp * Part of the ext2fs journaling system.
11470decc6SDave Kleikamp *
12470decc6SDave Kleikamp * Checkpointing is the process of ensuring that a section of the log is
13470decc6SDave Kleikamp * committed fully to disk, so that that portion of the log can be
14470decc6SDave Kleikamp * reused.
15470decc6SDave Kleikamp */
16470decc6SDave Kleikamp
17470decc6SDave Kleikamp #include <linux/time.h>
18470decc6SDave Kleikamp #include <linux/fs.h>
19f7f4bccbSMingming Cao #include <linux/jbd2.h>
20470decc6SDave Kleikamp #include <linux/errno.h>
21470decc6SDave Kleikamp #include <linux/slab.h>
22cc3e1beaSTheodore Ts'o #include <linux/blkdev.h>
23879c5e6bSTheodore Ts'o #include <trace/events/jbd2.h>
24470decc6SDave Kleikamp
25470decc6SDave Kleikamp /*
26470decc6SDave Kleikamp * Unlink a buffer from a transaction checkpoint list.
27470decc6SDave Kleikamp *
28470decc6SDave Kleikamp * Called with j_list_lock held.
29470decc6SDave Kleikamp */
__buffer_unlink(struct journal_head * jh)30be222553SZhang Yi static inline void __buffer_unlink(struct journal_head *jh)
31470decc6SDave Kleikamp {
32470decc6SDave Kleikamp transaction_t *transaction = jh->b_cp_transaction;
33470decc6SDave Kleikamp
34470decc6SDave Kleikamp jh->b_cpnext->b_cpprev = jh->b_cpprev;
35470decc6SDave Kleikamp jh->b_cpprev->b_cpnext = jh->b_cpnext;
36470decc6SDave Kleikamp if (transaction->t_checkpoint_list == jh) {
37470decc6SDave Kleikamp transaction->t_checkpoint_list = jh->b_cpnext;
38470decc6SDave Kleikamp if (transaction->t_checkpoint_list == jh)
39470decc6SDave Kleikamp transaction->t_checkpoint_list = NULL;
40470decc6SDave Kleikamp }
41470decc6SDave Kleikamp }
42470decc6SDave Kleikamp
43470decc6SDave Kleikamp /*
44f7f4bccbSMingming Cao * __jbd2_log_wait_for_space: wait until there is space in the journal.
45470decc6SDave Kleikamp *
46470decc6SDave Kleikamp * Called under j-state_lock *only*. It will be unlocked if we have to wait
47470decc6SDave Kleikamp * for a checkpoint to free up some space in the log.
48470decc6SDave Kleikamp */
__jbd2_log_wait_for_space(journal_t * journal)49f7f4bccbSMingming Cao void __jbd2_log_wait_for_space(journal_t *journal)
5005d5233dSTheodore Ts'o __acquires(&journal->j_state_lock)
5105d5233dSTheodore Ts'o __releases(&journal->j_state_lock)
52470decc6SDave Kleikamp {
538c3f25d8STheodore Ts'o int nblocks, space_left;
54a931da6aSTheodore Ts'o /* assert_spin_locked(&journal->j_state_lock); */
55470decc6SDave Kleikamp
5677444ac4SJan Kara nblocks = journal->j_max_transaction_buffers;
5776c39904SJan Kara while (jbd2_log_space_left(journal) < nblocks) {
58a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
5953cf9784SXiaoguang Wang mutex_lock_io(&journal->j_checkpoint_mutex);
60470decc6SDave Kleikamp
61470decc6SDave Kleikamp /*
62470decc6SDave Kleikamp * Test again, another process may have checkpointed while we
6323f8b79eSDuane Griffin * were waiting for the checkpoint lock. If there are no
648c3f25d8STheodore Ts'o * transactions ready to be checkpointed, try to recover
658c3f25d8STheodore Ts'o * journal space by calling cleanup_journal_tail(), and if
668c3f25d8STheodore Ts'o * that doesn't work, by waiting for the currently committing
678c3f25d8STheodore Ts'o * transaction to complete. If there is absolutely no way
688c3f25d8STheodore Ts'o * to make progress, this is either a BUG or corrupted
698c3f25d8STheodore Ts'o * filesystem, so abort the journal and leave a stack
708c3f25d8STheodore Ts'o * trace for forensic evidence.
71470decc6SDave Kleikamp */
72a931da6aSTheodore Ts'o write_lock(&journal->j_state_lock);
731245799fSDmitry Monakhov if (journal->j_flags & JBD2_ABORT) {
741245799fSDmitry Monakhov mutex_unlock(&journal->j_checkpoint_mutex);
751245799fSDmitry Monakhov return;
761245799fSDmitry Monakhov }
7723f8b79eSDuane Griffin spin_lock(&journal->j_list_lock);
7876c39904SJan Kara space_left = jbd2_log_space_left(journal);
798c3f25d8STheodore Ts'o if (space_left < nblocks) {
8023f8b79eSDuane Griffin int chkpt = journal->j_checkpoint_transactions != NULL;
818c3f25d8STheodore Ts'o tid_t tid = 0;
8293051d16SLuis Henriques (SUSE) bool has_transaction = false;
8323f8b79eSDuane Griffin
8493051d16SLuis Henriques (SUSE) if (journal->j_committing_transaction) {
858c3f25d8STheodore Ts'o tid = journal->j_committing_transaction->t_tid;
8693051d16SLuis Henriques (SUSE) has_transaction = true;
8793051d16SLuis Henriques (SUSE) }
8823f8b79eSDuane Griffin spin_unlock(&journal->j_list_lock);
89a931da6aSTheodore Ts'o write_unlock(&journal->j_state_lock);
9023f8b79eSDuane Griffin if (chkpt) {
91f7f4bccbSMingming Cao jbd2_log_do_checkpoint(journal);
92*1c62dc0dSBaokun Li } else if (jbd2_cleanup_journal_tail(journal) <= 0) {
93*1c62dc0dSBaokun Li /*
94*1c62dc0dSBaokun Li * We were able to recover space or the
95*1c62dc0dSBaokun Li * journal was aborted due to an error.
96*1c62dc0dSBaokun Li */
978c3f25d8STheodore Ts'o ;
9893051d16SLuis Henriques (SUSE) } else if (has_transaction) {
990ef54180SPaul Gortmaker /*
1000ef54180SPaul Gortmaker * jbd2_journal_commit_transaction() may want
1010ef54180SPaul Gortmaker * to take the checkpoint_mutex if JBD2_FLUSHED
1020ef54180SPaul Gortmaker * is set. So we need to temporarily drop it.
1030ef54180SPaul Gortmaker */
1040ef54180SPaul Gortmaker mutex_unlock(&journal->j_checkpoint_mutex);
1058c3f25d8STheodore Ts'o jbd2_log_wait_commit(journal, tid);
1060ef54180SPaul Gortmaker write_lock(&journal->j_state_lock);
1070ef54180SPaul Gortmaker continue;
10823f8b79eSDuane Griffin } else {
1098c3f25d8STheodore Ts'o printk(KERN_ERR "%s: needed %d blocks and "
1108c3f25d8STheodore Ts'o "only had %d space available\n",
1118c3f25d8STheodore Ts'o __func__, nblocks, space_left);
1128c3f25d8STheodore Ts'o printk(KERN_ERR "%s: no way to get more "
1138c3f25d8STheodore Ts'o "journal space in %s\n", __func__,
1148c3f25d8STheodore Ts'o journal->j_devname);
1158c3f25d8STheodore Ts'o WARN_ON(1);
11651f57b01Szhangyi (F) jbd2_journal_abort(journal, -EIO);
11723f8b79eSDuane Griffin }
118a931da6aSTheodore Ts'o write_lock(&journal->j_state_lock);
11923f8b79eSDuane Griffin } else {
12023f8b79eSDuane Griffin spin_unlock(&journal->j_list_lock);
121470decc6SDave Kleikamp }
122470decc6SDave Kleikamp mutex_unlock(&journal->j_checkpoint_mutex);
123470decc6SDave Kleikamp }
124470decc6SDave Kleikamp }
125470decc6SDave Kleikamp
126470decc6SDave Kleikamp static void
__flush_batch(journal_t * journal,int * batch_count)1271a0d3786STheodore Ts'o __flush_batch(journal_t *journal, int *batch_count)
128470decc6SDave Kleikamp {
129470decc6SDave Kleikamp int i;
130d3ad8434STao Ma struct blk_plug plug;
131470decc6SDave Kleikamp
132d3ad8434STao Ma blk_start_plug(&plug);
1339cb569d6SChristoph Hellwig for (i = 0; i < *batch_count; i++)
13470fd7614SChristoph Hellwig write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
135d3ad8434STao Ma blk_finish_plug(&plug);
1369cb569d6SChristoph Hellwig
137470decc6SDave Kleikamp for (i = 0; i < *batch_count; i++) {
1381a0d3786STheodore Ts'o struct buffer_head *bh = journal->j_chkpt_bhs[i];
139470decc6SDave Kleikamp BUFFER_TRACE(bh, "brelse");
140470decc6SDave Kleikamp __brelse(bh);
141c2d6fd9dSZhang Yi journal->j_chkpt_bhs[i] = NULL;
142470decc6SDave Kleikamp }
143470decc6SDave Kleikamp *batch_count = 0;
144470decc6SDave Kleikamp }
145470decc6SDave Kleikamp
146470decc6SDave Kleikamp /*
147470decc6SDave Kleikamp * Perform an actual checkpoint. We take the first transaction on the
148470decc6SDave Kleikamp * list of transactions to be checkpointed and send all its buffers
149470decc6SDave Kleikamp * to disk. We submit larger chunks of data at once.
150470decc6SDave Kleikamp *
151470decc6SDave Kleikamp * The journal should be locked before calling this function.
15244519fafSHidehiro Kawai * Called with j_checkpoint_mutex held.
153470decc6SDave Kleikamp */
jbd2_log_do_checkpoint(journal_t * journal)154f7f4bccbSMingming Cao int jbd2_log_do_checkpoint(journal_t *journal)
155470decc6SDave Kleikamp {
156be1158ccSTheodore Ts'o struct journal_head *jh;
157be1158ccSTheodore Ts'o struct buffer_head *bh;
158470decc6SDave Kleikamp transaction_t *transaction;
159470decc6SDave Kleikamp tid_t this_tid;
160dc6e8d66STheodore Ts'o int result, batch_count = 0;
161470decc6SDave Kleikamp
162cb3b3bf2SJan Kara jbd2_debug(1, "Start checkpoint\n");
163470decc6SDave Kleikamp
164470decc6SDave Kleikamp /*
165470decc6SDave Kleikamp * First thing: if there are any transactions in the log which
166470decc6SDave Kleikamp * don't need checkpointing, just eliminate them from the
167470decc6SDave Kleikamp * journal straight away.
168470decc6SDave Kleikamp */
169f7f4bccbSMingming Cao result = jbd2_cleanup_journal_tail(journal);
170879c5e6bSTheodore Ts'o trace_jbd2_checkpoint(journal, result);
171cb3b3bf2SJan Kara jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
172470decc6SDave Kleikamp if (result <= 0)
173470decc6SDave Kleikamp return result;
174470decc6SDave Kleikamp
175470decc6SDave Kleikamp /*
176470decc6SDave Kleikamp * OK, we need to start writing disk blocks. Take one transaction
177470decc6SDave Kleikamp * and write it.
178470decc6SDave Kleikamp */
179470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
180470decc6SDave Kleikamp if (!journal->j_checkpoint_transactions)
181470decc6SDave Kleikamp goto out;
182470decc6SDave Kleikamp transaction = journal->j_checkpoint_transactions;
1838e85fb3fSJohann Lombardi if (transaction->t_chp_stats.cs_chp_time == 0)
1848e85fb3fSJohann Lombardi transaction->t_chp_stats.cs_chp_time = jiffies;
185470decc6SDave Kleikamp this_tid = transaction->t_tid;
186470decc6SDave Kleikamp restart:
187470decc6SDave Kleikamp /*
188470decc6SDave Kleikamp * If someone cleaned up this transaction while we slept, we're
189470decc6SDave Kleikamp * done (maybe it's a new transaction, but it fell at the same
190470decc6SDave Kleikamp * address).
191470decc6SDave Kleikamp */
192be1158ccSTheodore Ts'o if (journal->j_checkpoint_transactions != transaction ||
193be1158ccSTheodore Ts'o transaction->t_tid != this_tid)
194be1158ccSTheodore Ts'o goto out;
195470decc6SDave Kleikamp
196be1158ccSTheodore Ts'o /* checkpoint all of the transaction's buffers */
197be1158ccSTheodore Ts'o while (transaction->t_checkpoint_list) {
198470decc6SDave Kleikamp jh = transaction->t_checkpoint_list;
199be1158ccSTheodore Ts'o bh = jh2bh(jh);
200be1158ccSTheodore Ts'o
201be1158ccSTheodore Ts'o if (jh->b_transaction != NULL) {
202be1158ccSTheodore Ts'o transaction_t *t = jh->b_transaction;
203be1158ccSTheodore Ts'o tid_t tid = t->t_tid;
204be1158ccSTheodore Ts'o
205be1158ccSTheodore Ts'o transaction->t_chp_stats.cs_forced_to_close++;
206be1158ccSTheodore Ts'o spin_unlock(&journal->j_list_lock);
207be1158ccSTheodore Ts'o if (unlikely(journal->j_flags & JBD2_UNMOUNT))
208be1158ccSTheodore Ts'o /*
209be1158ccSTheodore Ts'o * The journal thread is dead; so
210be1158ccSTheodore Ts'o * starting and waiting for a commit
211be1158ccSTheodore Ts'o * to finish will cause us to wait for
212be1158ccSTheodore Ts'o * a _very_ long time.
213be1158ccSTheodore Ts'o */
214be1158ccSTheodore Ts'o printk(KERN_ERR
215be1158ccSTheodore Ts'o "JBD2: %s: Waiting for Godot: block %llu\n",
216be1158ccSTheodore Ts'o journal->j_devname, (unsigned long long) bh->b_blocknr);
217be1158ccSTheodore Ts'o
21853cf9784SXiaoguang Wang if (batch_count)
21953cf9784SXiaoguang Wang __flush_batch(journal, &batch_count);
220be1158ccSTheodore Ts'o jbd2_log_start_commit(journal, tid);
22153cf9784SXiaoguang Wang /*
22253cf9784SXiaoguang Wang * jbd2_journal_commit_transaction() may want
22353cf9784SXiaoguang Wang * to take the checkpoint_mutex if JBD2_FLUSHED
22453cf9784SXiaoguang Wang * is set, jbd2_update_log_tail() called by
22553cf9784SXiaoguang Wang * jbd2_journal_commit_transaction() may also take
22653cf9784SXiaoguang Wang * checkpoint_mutex. So we need to temporarily
22753cf9784SXiaoguang Wang * drop it.
22853cf9784SXiaoguang Wang */
22953cf9784SXiaoguang Wang mutex_unlock(&journal->j_checkpoint_mutex);
230be1158ccSTheodore Ts'o jbd2_log_wait_commit(journal, tid);
23153cf9784SXiaoguang Wang mutex_lock_io(&journal->j_checkpoint_mutex);
23253cf9784SXiaoguang Wang spin_lock(&journal->j_list_lock);
23353cf9784SXiaoguang Wang goto restart;
234be1158ccSTheodore Ts'o }
235e34c8dd2SZhihao Cheng if (!trylock_buffer(bh)) {
236e34c8dd2SZhihao Cheng /*
237e34c8dd2SZhihao Cheng * The buffer is locked, it may be writing back, or
238e34c8dd2SZhihao Cheng * flushing out in the last couple of cycles, or
239e34c8dd2SZhihao Cheng * re-adding into a new transaction, need to check
240e34c8dd2SZhihao Cheng * it again until it's unlocked.
241e34c8dd2SZhihao Cheng */
242e34c8dd2SZhihao Cheng get_bh(bh);
243e34c8dd2SZhihao Cheng spin_unlock(&journal->j_list_lock);
244e34c8dd2SZhihao Cheng wait_on_buffer(bh);
245e34c8dd2SZhihao Cheng /* the journal_head may have gone by now */
246e34c8dd2SZhihao Cheng BUFFER_TRACE(bh, "brelse");
247e34c8dd2SZhihao Cheng __brelse(bh);
248e34c8dd2SZhihao Cheng goto retry;
249e34c8dd2SZhihao Cheng } else if (!buffer_dirty(bh)) {
250e34c8dd2SZhihao Cheng unlock_buffer(bh);
251be1158ccSTheodore Ts'o BUFFER_TRACE(bh, "remove from checkpoint");
252be1158ccSTheodore Ts'o /*
253c2d6fd9dSZhang Yi * If the transaction was released or the checkpoint
254c2d6fd9dSZhang Yi * list was empty, we're done.
255c2d6fd9dSZhang Yi */
256c2d6fd9dSZhang Yi if (__jbd2_journal_remove_checkpoint(jh) ||
257c2d6fd9dSZhang Yi !transaction->t_checkpoint_list)
258c2d6fd9dSZhang Yi goto out;
259c2d6fd9dSZhang Yi } else {
260e34c8dd2SZhihao Cheng unlock_buffer(bh);
261c2d6fd9dSZhang Yi /*
262c2d6fd9dSZhang Yi * We are about to write the buffer, it could be
263c2d6fd9dSZhang Yi * raced by some other transaction shrink or buffer
264c2d6fd9dSZhang Yi * re-log logic once we release the j_list_lock,
265c2d6fd9dSZhang Yi * leave it on the checkpoint list and check status
266c2d6fd9dSZhang Yi * again to make sure it's clean.
267be1158ccSTheodore Ts'o */
268be1158ccSTheodore Ts'o BUFFER_TRACE(bh, "queue");
269be1158ccSTheodore Ts'o get_bh(bh);
270be1158ccSTheodore Ts'o J_ASSERT_BH(bh, !buffer_jwrite(bh));
271be1158ccSTheodore Ts'o journal->j_chkpt_bhs[batch_count++] = bh;
272be1158ccSTheodore Ts'o transaction->t_chp_stats.cs_written++;
273c2d6fd9dSZhang Yi transaction->t_checkpoint_list = jh->b_cpnext;
274c2d6fd9dSZhang Yi }
275c2d6fd9dSZhang Yi
276be1158ccSTheodore Ts'o if ((batch_count == JBD2_NR_BATCH) ||
277c2d6fd9dSZhang Yi need_resched() || spin_needbreak(&journal->j_list_lock) ||
278c2d6fd9dSZhang Yi jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
279be1158ccSTheodore Ts'o goto unlock_and_flush;
280470decc6SDave Kleikamp }
281470decc6SDave Kleikamp
282470decc6SDave Kleikamp if (batch_count) {
283be1158ccSTheodore Ts'o unlock_and_flush:
284470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
285be1158ccSTheodore Ts'o retry:
286be1158ccSTheodore Ts'o if (batch_count)
2871a0d3786STheodore Ts'o __flush_batch(journal, &batch_count);
288470decc6SDave Kleikamp spin_lock(&journal->j_list_lock);
289470decc6SDave Kleikamp goto restart;
290470decc6SDave Kleikamp }
291be1158ccSTheodore Ts'o
292470decc6SDave Kleikamp out:
293470decc6SDave Kleikamp spin_unlock(&journal->j_list_lock);
29444519fafSHidehiro Kawai result = jbd2_cleanup_journal_tail(journal);
29544519fafSHidehiro Kawai
29644519fafSHidehiro Kawai return (result < 0) ? result : 0;
297470decc6SDave Kleikamp }
298470decc6SDave Kleikamp
299470decc6SDave Kleikamp /*
300470decc6SDave Kleikamp * Check the list of checkpoint transactions for the journal to see if
301470decc6SDave Kleikamp * we have already got rid of any since the last update of the log tail
302470decc6SDave Kleikamp * in the journal superblock. If so, we can instantly roll the
303470decc6SDave Kleikamp * superblock forward to remove those transactions from the log.
304470decc6SDave Kleikamp *
305470decc6SDave Kleikamp * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
306470decc6SDave Kleikamp *
307470decc6SDave Kleikamp * Called with the journal lock held.
308470decc6SDave Kleikamp *
309470decc6SDave Kleikamp * This is the only part of the journaling code which really needs to be
310470decc6SDave Kleikamp * aware of transaction aborts. Checkpointing involves writing to the
311470decc6SDave Kleikamp * main filesystem area rather than to the journal, so it can proceed
31244519fafSHidehiro Kawai * even in abort state, but we must not update the super block if
31344519fafSHidehiro Kawai * checkpointing may have failed. Otherwise, we would lose some metadata
31444519fafSHidehiro Kawai * buffers which should be written-back to the filesystem.
315470decc6SDave Kleikamp */
316470decc6SDave Kleikamp
jbd2_cleanup_journal_tail(journal_t * journal)317f7f4bccbSMingming Cao int jbd2_cleanup_journal_tail(journal_t *journal)
318470decc6SDave Kleikamp {
319470decc6SDave Kleikamp tid_t first_tid;
32079feb521SJan Kara unsigned long blocknr;
321470decc6SDave Kleikamp
32244519fafSHidehiro Kawai if (is_journal_aborted(journal))
3236f6a6fdaSJoseph Qi return -EIO;
32444519fafSHidehiro Kawai
32579feb521SJan Kara if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
32679feb521SJan Kara return 1;
327470decc6SDave Kleikamp J_ASSERT(blocknr != 0);
328470decc6SDave Kleikamp
329cc3e1beaSTheodore Ts'o /*
33079feb521SJan Kara * We need to make sure that any blocks that were recently written out
33179feb521SJan Kara * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
33279feb521SJan Kara * we drop the transactions from the journal. It's unlikely this will
33379feb521SJan Kara * be necessary, especially with an appropriately sized journal, but we
33479feb521SJan Kara * need this to guarantee correctness. Fortunately
33579feb521SJan Kara * jbd2_cleanup_journal_tail() doesn't get called all that often.
336cc3e1beaSTheodore Ts'o */
33779feb521SJan Kara if (journal->j_flags & JBD2_BARRIER)
338c6bf3f0eSChristoph Hellwig blkdev_issue_flush(journal->j_fs_dev);
33979feb521SJan Kara
3406f6a6fdaSJoseph Qi return __jbd2_update_log_tail(journal, first_tid, blocknr);
341470decc6SDave Kleikamp }
342470decc6SDave Kleikamp
343470decc6SDave Kleikamp
344470decc6SDave Kleikamp /* Checkpoint list management */
345470decc6SDave Kleikamp
346373ac521SZhang Yi enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
347373ac521SZhang Yi
348470decc6SDave Kleikamp /*
349b98dba27SZhang Yi * journal_shrink_one_cp_list
350470decc6SDave Kleikamp *
351b98dba27SZhang Yi * Find all the written-back checkpoint buffers in the given list
352b98dba27SZhang Yi * and try to release them. If the whole transaction is released, set
353b98dba27SZhang Yi * the 'released' parameter. Return the number of released checkpointed
354b98dba27SZhang Yi * buffers.
355470decc6SDave Kleikamp *
356470decc6SDave Kleikamp * Called with j_list_lock held.
357470decc6SDave Kleikamp */
journal_shrink_one_cp_list(struct journal_head * jh,enum shrink_type type,bool * released)358b98dba27SZhang Yi static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
359373ac521SZhang Yi enum shrink_type type,
360373ac521SZhang Yi bool *released)
361470decc6SDave Kleikamp {
362470decc6SDave Kleikamp struct journal_head *last_jh;
363470decc6SDave Kleikamp struct journal_head *next_jh = jh;
364b98dba27SZhang Yi unsigned long nr_freed = 0;
365b98dba27SZhang Yi int ret;
366470decc6SDave Kleikamp
367b98dba27SZhang Yi *released = false;
368470decc6SDave Kleikamp if (!jh)
369470decc6SDave Kleikamp return 0;
370470decc6SDave Kleikamp
371470decc6SDave Kleikamp last_jh = jh->b_cpprev;
372470decc6SDave Kleikamp do {
373470decc6SDave Kleikamp jh = next_jh;
374470decc6SDave Kleikamp next_jh = jh->b_cpnext;
375dbf2bab7SZhang Yi
376373ac521SZhang Yi if (type == SHRINK_DESTROY) {
37746f881b5SZhang Yi ret = __jbd2_journal_remove_checkpoint(jh);
37846f881b5SZhang Yi } else {
37946f881b5SZhang Yi ret = jbd2_journal_try_remove_checkpoint(jh);
380373ac521SZhang Yi if (ret < 0) {
381373ac521SZhang Yi if (type == SHRINK_BUSY_SKIP)
3824ba3fcddSZhang Yi continue;
383373ac521SZhang Yi break;
384373ac521SZhang Yi }
38546f881b5SZhang Yi }
3864ba3fcddSZhang Yi
3874ba3fcddSZhang Yi nr_freed++;
3884ba3fcddSZhang Yi if (ret) {
3894ba3fcddSZhang Yi *released = true;
3904ba3fcddSZhang Yi break;
3914ba3fcddSZhang Yi }
3924ba3fcddSZhang Yi
3934ba3fcddSZhang Yi if (need_resched())
3944ba3fcddSZhang Yi break;
395b98dba27SZhang Yi } while (jh != last_jh);
3964ba3fcddSZhang Yi
3974ba3fcddSZhang Yi return nr_freed;
3984ba3fcddSZhang Yi }
3994ba3fcddSZhang Yi
4004ba3fcddSZhang Yi /*
4014ba3fcddSZhang Yi * jbd2_journal_shrink_checkpoint_list
4024ba3fcddSZhang Yi *
4034ba3fcddSZhang Yi * Find 'nr_to_scan' written-back checkpoint buffers in the journal
4044ba3fcddSZhang Yi * and try to release them. Return the number of released checkpointed
4054ba3fcddSZhang Yi * buffers.
4064ba3fcddSZhang Yi *
4074ba3fcddSZhang Yi * Called with j_list_lock held.
4084ba3fcddSZhang Yi */
jbd2_journal_shrink_checkpoint_list(journal_t * journal,unsigned long * nr_to_scan)4094ba3fcddSZhang Yi unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
4104ba3fcddSZhang Yi unsigned long *nr_to_scan)
4114ba3fcddSZhang Yi {
4124ba3fcddSZhang Yi transaction_t *transaction, *last_transaction, *next_transaction;
413b98dba27SZhang Yi bool __maybe_unused released;
4144ba3fcddSZhang Yi tid_t first_tid = 0, last_tid = 0, next_tid = 0;
4154ba3fcddSZhang Yi tid_t tid = 0;
4164ba3fcddSZhang Yi unsigned long nr_freed = 0;
417b98dba27SZhang Yi unsigned long freed;
4181552199aSLuis Henriques (SUSE) bool first_set = false;
4194ba3fcddSZhang Yi
4204ba3fcddSZhang Yi again:
4214ba3fcddSZhang Yi spin_lock(&journal->j_list_lock);
4224ba3fcddSZhang Yi if (!journal->j_checkpoint_transactions) {
4234ba3fcddSZhang Yi spin_unlock(&journal->j_list_lock);
4244ba3fcddSZhang Yi goto out;
4254ba3fcddSZhang Yi }
4264ba3fcddSZhang Yi
4274ba3fcddSZhang Yi /*
4284ba3fcddSZhang Yi * Get next shrink transaction, resume previous scan or start
4294ba3fcddSZhang Yi * over again. If some others do checkpoint and drop transaction
4304ba3fcddSZhang Yi * from the checkpoint list, we ignore saved j_shrink_transaction
4314ba3fcddSZhang Yi * and start over unconditionally.
4324ba3fcddSZhang Yi */
4334ba3fcddSZhang Yi if (journal->j_shrink_transaction)
4344ba3fcddSZhang Yi transaction = journal->j_shrink_transaction;
4354ba3fcddSZhang Yi else
4364ba3fcddSZhang Yi transaction = journal->j_checkpoint_transactions;
4374ba3fcddSZhang Yi
4381552199aSLuis Henriques (SUSE) if (!first_set) {
4394ba3fcddSZhang Yi first_tid = transaction->t_tid;
4401552199aSLuis Henriques (SUSE) first_set = true;
4411552199aSLuis Henriques (SUSE) }
4424ba3fcddSZhang Yi last_transaction = journal->j_checkpoint_transactions->t_cpprev;
4434ba3fcddSZhang Yi next_transaction = transaction;
4444ba3fcddSZhang Yi last_tid = last_transaction->t_tid;
4454ba3fcddSZhang Yi do {
4464ba3fcddSZhang Yi transaction = next_transaction;
4474ba3fcddSZhang Yi next_transaction = transaction->t_cpnext;
4484ba3fcddSZhang Yi tid = transaction->t_tid;
4494ba3fcddSZhang Yi
450b98dba27SZhang Yi freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
451373ac521SZhang Yi SHRINK_BUSY_SKIP, &released);
452b98dba27SZhang Yi nr_freed += freed;
453b98dba27SZhang Yi (*nr_to_scan) -= min(*nr_to_scan, freed);
4544ba3fcddSZhang Yi if (*nr_to_scan == 0)
4554ba3fcddSZhang Yi break;
4564ba3fcddSZhang Yi if (need_resched() || spin_needbreak(&journal->j_list_lock))
4574ba3fcddSZhang Yi break;
4584ba3fcddSZhang Yi } while (transaction != last_transaction);
4594ba3fcddSZhang Yi
4604ba3fcddSZhang Yi if (transaction != last_transaction) {
4614ba3fcddSZhang Yi journal->j_shrink_transaction = next_transaction;
4624ba3fcddSZhang Yi next_tid = next_transaction->t_tid;
4634ba3fcddSZhang Yi } else {
4644ba3fcddSZhang Yi journal->j_shrink_transaction = NULL;
4654ba3fcddSZhang Yi next_tid = 0;
4664ba3fcddSZhang Yi }
4674ba3fcddSZhang Yi
4684ba3fcddSZhang Yi spin_unlock(&journal->j_list_lock);
4694ba3fcddSZhang Yi cond_resched();
4704ba3fcddSZhang Yi
4711552199aSLuis Henriques (SUSE) if (*nr_to_scan && journal->j_shrink_transaction)
4724ba3fcddSZhang Yi goto again;
4734ba3fcddSZhang Yi out:
4744ba3fcddSZhang Yi trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
475b98dba27SZhang Yi nr_freed, next_tid);
4764ba3fcddSZhang Yi
4774ba3fcddSZhang Yi return nr_freed;
4784ba3fcddSZhang Yi }
4794ba3fcddSZhang Yi
4804ba3fcddSZhang Yi /*
481470decc6SDave Kleikamp * journal_clean_checkpoint_list
482470decc6SDave Kleikamp *
483470decc6SDave Kleikamp * Find all the written-back checkpoint buffers in the journal and release them.
484841df7dfSJan Kara * If 'destroy' is set, release all buffers unconditionally.
485470decc6SDave Kleikamp *
486470decc6SDave Kleikamp * Called with j_list_lock held.
487470decc6SDave Kleikamp */
__jbd2_journal_clean_checkpoint_list(journal_t * journal,bool destroy)488841df7dfSJan Kara void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
489470decc6SDave Kleikamp {
490470decc6SDave Kleikamp transaction_t *transaction, *last_transaction, *next_transaction;
491373ac521SZhang Yi enum shrink_type type;
492b98dba27SZhang Yi bool released;
493470decc6SDave Kleikamp
494470decc6SDave Kleikamp transaction = journal->j_checkpoint_transactions;
495470decc6SDave Kleikamp if (!transaction)
49650849db3SJan Kara return;
497470decc6SDave Kleikamp
498373ac521SZhang Yi type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
499470decc6SDave Kleikamp last_transaction = transaction->t_cpprev;
500470decc6SDave Kleikamp next_transaction = transaction;
501470decc6SDave Kleikamp do {
502470decc6SDave Kleikamp transaction = next_transaction;
503470decc6SDave Kleikamp next_transaction = transaction->t_cpnext;
504b98dba27SZhang Yi journal_shrink_one_cp_list(transaction->t_checkpoint_list,
505373ac521SZhang Yi type, &released);
506470decc6SDave Kleikamp /*
507470decc6SDave Kleikamp * This function only frees up some memory if possible so we
508470decc6SDave Kleikamp * dont have an obligation to finish processing. Bail out if
509470decc6SDave Kleikamp * preemption requested:
510470decc6SDave Kleikamp */
51150849db3SJan Kara if (need_resched())
51250849db3SJan Kara return;
51350849db3SJan Kara /*
51450849db3SJan Kara * Stop scanning if we couldn't free the transaction. This
51550849db3SJan Kara * avoids pointless scanning of transactions which still
51650849db3SJan Kara * weren't checkpointed.
51750849db3SJan Kara */
518b98dba27SZhang Yi if (!released)
51950849db3SJan Kara return;
520470decc6SDave Kleikamp } while (transaction != last_transaction);
521470decc6SDave Kleikamp }
522470decc6SDave Kleikamp
523470decc6SDave Kleikamp /*
524841df7dfSJan Kara * Remove buffers from all checkpoint lists as journal is aborted and we just
525841df7dfSJan Kara * need to free memory
526841df7dfSJan Kara */
jbd2_journal_destroy_checkpoint(journal_t * journal)527841df7dfSJan Kara void jbd2_journal_destroy_checkpoint(journal_t *journal)
528841df7dfSJan Kara {
529841df7dfSJan Kara /*
530841df7dfSJan Kara * We loop because __jbd2_journal_clean_checkpoint_list() may abort
531841df7dfSJan Kara * early due to a need of rescheduling.
532841df7dfSJan Kara */
533841df7dfSJan Kara while (1) {
534841df7dfSJan Kara spin_lock(&journal->j_list_lock);
535841df7dfSJan Kara if (!journal->j_checkpoint_transactions) {
536841df7dfSJan Kara spin_unlock(&journal->j_list_lock);
537841df7dfSJan Kara break;
538841df7dfSJan Kara }
539841df7dfSJan Kara __jbd2_journal_clean_checkpoint_list(journal, true);
540841df7dfSJan Kara spin_unlock(&journal->j_list_lock);
541841df7dfSJan Kara cond_resched();
542841df7dfSJan Kara }
543841df7dfSJan Kara }
544841df7dfSJan Kara
545841df7dfSJan Kara /*
546470decc6SDave Kleikamp * journal_remove_checkpoint: called after a buffer has been committed
547470decc6SDave Kleikamp * to disk (either by being write-back flushed to disk, or being
548470decc6SDave Kleikamp * committed to the log).
549470decc6SDave Kleikamp *
550470decc6SDave Kleikamp * We cannot safely clean a transaction out of the log until all of the
551470decc6SDave Kleikamp * buffer updates committed in that transaction have safely been stored
552470decc6SDave Kleikamp * elsewhere on disk. To achieve this, all of the buffers in a
553470decc6SDave Kleikamp * transaction need to be maintained on the transaction's checkpoint
554470decc6SDave Kleikamp * lists until they have been rewritten, at which point this function is
555470decc6SDave Kleikamp * called to remove the buffer from the existing transaction's
556470decc6SDave Kleikamp * checkpoint lists.
557470decc6SDave Kleikamp *
558470decc6SDave Kleikamp * The function returns 1 if it frees the transaction, 0 otherwise.
559de1b7941SJan Kara * The function can free jh and bh.
560470decc6SDave Kleikamp *
561470decc6SDave Kleikamp * This function is called with j_list_lock held.
562470decc6SDave Kleikamp */
__jbd2_journal_remove_checkpoint(struct journal_head * jh)563f7f4bccbSMingming Cao int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
564470decc6SDave Kleikamp {
565bf699327STheodore Ts'o struct transaction_chp_stats_s *stats;
566470decc6SDave Kleikamp transaction_t *transaction;
567470decc6SDave Kleikamp journal_t *journal;
568fcf37549SZhang Yi struct buffer_head *bh = jh2bh(jh);
569470decc6SDave Kleikamp
570470decc6SDave Kleikamp JBUFFER_TRACE(jh, "entry");
571470decc6SDave Kleikamp
5721866cba8SZhang Yi transaction = jh->b_cp_transaction;
5731866cba8SZhang Yi if (!transaction) {
574470decc6SDave Kleikamp JBUFFER_TRACE(jh, "not on transaction");
5751866cba8SZhang Yi return 0;
576470decc6SDave Kleikamp }
577470decc6SDave Kleikamp journal = transaction->t_journal;
578470decc6SDave Kleikamp
579de1b7941SJan Kara JBUFFER_TRACE(jh, "removing from transaction");
580fcf37549SZhang Yi
581fcf37549SZhang Yi /*
582fcf37549SZhang Yi * If we have failed to write the buffer out to disk, the filesystem
583fcf37549SZhang Yi * may become inconsistent. We cannot abort the journal here since
584fcf37549SZhang Yi * we hold j_list_lock and we have to be careful about races with
585fcf37549SZhang Yi * jbd2_journal_destroy(). So mark the writeback IO error in the
586fcf37549SZhang Yi * journal here and we abort the journal later from a better context.
587fcf37549SZhang Yi */
588fcf37549SZhang Yi if (buffer_write_io_error(bh))
589fcf37549SZhang Yi set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
590fcf37549SZhang Yi
591470decc6SDave Kleikamp __buffer_unlink(jh);
592470decc6SDave Kleikamp jh->b_cp_transaction = NULL;
5930705e8d1STheodore Ts'o percpu_counter_dec(&journal->j_checkpoint_jh_count);
594de1b7941SJan Kara jbd2_journal_put_journal_head(jh);
595470decc6SDave Kleikamp
5961866cba8SZhang Yi /* Is this transaction empty? */
597be222553SZhang Yi if (transaction->t_checkpoint_list)
5981866cba8SZhang Yi return 0;
599470decc6SDave Kleikamp
600470decc6SDave Kleikamp /*
601470decc6SDave Kleikamp * There is one special case to worry about: if we have just pulled the
602f5a7a6b0SJan Kara * buffer off a running or committing transaction's checkpoing list,
603f5a7a6b0SJan Kara * then even if the checkpoint list is empty, the transaction obviously
604f5a7a6b0SJan Kara * cannot be dropped!
605470decc6SDave Kleikamp *
606f5a7a6b0SJan Kara * The locking here around t_state is a bit sleazy.
607f7f4bccbSMingming Cao * See the comment at the end of jbd2_journal_commit_transaction().
608470decc6SDave Kleikamp */
609de1b7941SJan Kara if (transaction->t_state != T_FINISHED)
6101866cba8SZhang Yi return 0;
611470decc6SDave Kleikamp
6121866cba8SZhang Yi /*
6131866cba8SZhang Yi * OK, that was the last buffer for the transaction, we can now
6141866cba8SZhang Yi * safely remove this transaction from the log.
6151866cba8SZhang Yi */
616bf699327STheodore Ts'o stats = &transaction->t_chp_stats;
617bf699327STheodore Ts'o if (stats->cs_chp_time)
618bf699327STheodore Ts'o stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
619bf699327STheodore Ts'o jiffies);
620bf699327STheodore Ts'o trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
621bf699327STheodore Ts'o transaction->t_tid, stats);
622470decc6SDave Kleikamp
623f7f4bccbSMingming Cao __jbd2_journal_drop_transaction(journal, transaction);
6240c2022ecSYongqiang Yang jbd2_journal_free_transaction(transaction);
6251866cba8SZhang Yi return 1;
626470decc6SDave Kleikamp }
627470decc6SDave Kleikamp
628470decc6SDave Kleikamp /*
62946f881b5SZhang Yi * Check the checkpoint buffer and try to remove it from the checkpoint
63046f881b5SZhang Yi * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
63146f881b5SZhang Yi * it frees the transaction, 0 otherwise.
63246f881b5SZhang Yi *
63346f881b5SZhang Yi * This function is called with j_list_lock held.
63446f881b5SZhang Yi */
jbd2_journal_try_remove_checkpoint(struct journal_head * jh)63546f881b5SZhang Yi int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
63646f881b5SZhang Yi {
63746f881b5SZhang Yi struct buffer_head *bh = jh2bh(jh);
63846f881b5SZhang Yi
639590a809fSZhihao Cheng if (jh->b_transaction)
640590a809fSZhihao Cheng return -EBUSY;
64146f881b5SZhang Yi if (!trylock_buffer(bh))
64246f881b5SZhang Yi return -EBUSY;
64346f881b5SZhang Yi if (buffer_dirty(bh)) {
64446f881b5SZhang Yi unlock_buffer(bh);
64546f881b5SZhang Yi return -EBUSY;
64646f881b5SZhang Yi }
64746f881b5SZhang Yi unlock_buffer(bh);
64846f881b5SZhang Yi
64946f881b5SZhang Yi /*
65046f881b5SZhang Yi * Buffer is clean and the IO has finished (we held the buffer
65146f881b5SZhang Yi * lock) so the checkpoint is done. We can safely remove the
65246f881b5SZhang Yi * buffer from this transaction.
65346f881b5SZhang Yi */
65446f881b5SZhang Yi JBUFFER_TRACE(jh, "remove from checkpoint list");
65546f881b5SZhang Yi return __jbd2_journal_remove_checkpoint(jh);
65646f881b5SZhang Yi }
65746f881b5SZhang Yi
65846f881b5SZhang Yi /*
659470decc6SDave Kleikamp * journal_insert_checkpoint: put a committed buffer onto a checkpoint
660470decc6SDave Kleikamp * list so that we know when it is safe to clean the transaction out of
661470decc6SDave Kleikamp * the log.
662470decc6SDave Kleikamp *
663470decc6SDave Kleikamp * Called with the journal locked.
664470decc6SDave Kleikamp * Called with j_list_lock held.
665470decc6SDave Kleikamp */
__jbd2_journal_insert_checkpoint(struct journal_head * jh,transaction_t * transaction)666f7f4bccbSMingming Cao void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
667470decc6SDave Kleikamp transaction_t *transaction)
668470decc6SDave Kleikamp {
669470decc6SDave Kleikamp JBUFFER_TRACE(jh, "entry");
670470decc6SDave Kleikamp J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
671470decc6SDave Kleikamp J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
672470decc6SDave Kleikamp
673de1b7941SJan Kara /* Get reference for checkpointing transaction */
674de1b7941SJan Kara jbd2_journal_grab_journal_head(jh2bh(jh));
675470decc6SDave Kleikamp jh->b_cp_transaction = transaction;
676470decc6SDave Kleikamp
677470decc6SDave Kleikamp if (!transaction->t_checkpoint_list) {
678470decc6SDave Kleikamp jh->b_cpnext = jh->b_cpprev = jh;
679470decc6SDave Kleikamp } else {
680470decc6SDave Kleikamp jh->b_cpnext = transaction->t_checkpoint_list;
681470decc6SDave Kleikamp jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
682470decc6SDave Kleikamp jh->b_cpprev->b_cpnext = jh;
683470decc6SDave Kleikamp jh->b_cpnext->b_cpprev = jh;
684470decc6SDave Kleikamp }
685470decc6SDave Kleikamp transaction->t_checkpoint_list = jh;
6860705e8d1STheodore Ts'o percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count);
687470decc6SDave Kleikamp }
688470decc6SDave Kleikamp
689470decc6SDave Kleikamp /*
690470decc6SDave Kleikamp * We've finished with this transaction structure: adios...
691470decc6SDave Kleikamp *
692470decc6SDave Kleikamp * The transaction must have no links except for the checkpoint by this
693470decc6SDave Kleikamp * point.
694470decc6SDave Kleikamp *
695470decc6SDave Kleikamp * Called with the journal locked.
696470decc6SDave Kleikamp * Called with j_list_lock held.
697470decc6SDave Kleikamp */
698470decc6SDave Kleikamp
__jbd2_journal_drop_transaction(journal_t * journal,transaction_t * transaction)699f7f4bccbSMingming Cao void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
700470decc6SDave Kleikamp {
701470decc6SDave Kleikamp assert_spin_locked(&journal->j_list_lock);
7024ba3fcddSZhang Yi
7034ba3fcddSZhang Yi journal->j_shrink_transaction = NULL;
704470decc6SDave Kleikamp if (transaction->t_cpnext) {
705470decc6SDave Kleikamp transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
706470decc6SDave Kleikamp transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
707470decc6SDave Kleikamp if (journal->j_checkpoint_transactions == transaction)
708470decc6SDave Kleikamp journal->j_checkpoint_transactions =
709470decc6SDave Kleikamp transaction->t_cpnext;
710470decc6SDave Kleikamp if (journal->j_checkpoint_transactions == transaction)
711470decc6SDave Kleikamp journal->j_checkpoint_transactions = NULL;
712470decc6SDave Kleikamp }
713470decc6SDave Kleikamp
714470decc6SDave Kleikamp J_ASSERT(transaction->t_state == T_FINISHED);
715470decc6SDave Kleikamp J_ASSERT(transaction->t_buffers == NULL);
716470decc6SDave Kleikamp J_ASSERT(transaction->t_forget == NULL);
717470decc6SDave Kleikamp J_ASSERT(transaction->t_shadow_list == NULL);
718470decc6SDave Kleikamp J_ASSERT(transaction->t_checkpoint_list == NULL);
719a51dca9cSTheodore Ts'o J_ASSERT(atomic_read(&transaction->t_updates) == 0);
720470decc6SDave Kleikamp J_ASSERT(journal->j_committing_transaction != transaction);
721470decc6SDave Kleikamp J_ASSERT(journal->j_running_transaction != transaction);
722470decc6SDave Kleikamp
7232201c590SSeiji Aguchi trace_jbd2_drop_transaction(journal, transaction);
7242201c590SSeiji Aguchi
725cb3b3bf2SJan Kara jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
726470decc6SDave Kleikamp }
727