xref: /openbmc/linux/fs/reiserfs/journal.c (revision 033369d1)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
425985edcSLucas De Marchi ** The background commits make this code very interrelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <linux/time.h>
386188e10dSMatthew Wilcox #include <linux/semaphore.h>
391da177e4SLinus Torvalds #include <linux/vmalloc.h>
40f466c6fdSAl Viro #include "reiserfs.h"
411da177e4SLinus Torvalds #include <linux/kernel.h>
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/fcntl.h>
441da177e4SLinus Torvalds #include <linux/stat.h>
451da177e4SLinus Torvalds #include <linux/string.h>
461da177e4SLinus Torvalds #include <linux/buffer_head.h>
471da177e4SLinus Torvalds #include <linux/workqueue.h>
481da177e4SLinus Torvalds #include <linux/writeback.h>
491da177e4SLinus Torvalds #include <linux/blkdev.h>
503fcfab16SAndrew Morton #include <linux/backing-dev.h>
5190415deaSJeff Mahoney #include <linux/uaccess.h>
525a0e3ad6STejun Heo #include <linux/slab.h>
5390415deaSJeff Mahoney 
541da177e4SLinus Torvalds 
551da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
561da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
571da177e4SLinus Torvalds                                j_list))
581da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
591da177e4SLinus Torvalds                                j_working_list))
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
621da177e4SLinus Torvalds ** start and kill the commit workqueue
631da177e4SLinus Torvalds */
641da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
691da177e4SLinus Torvalds 				   structs at 4k */
701da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
751da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
781da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds /* journal list state bits */
811da177e4SLinus Torvalds #define LIST_TOUCHED 1
821da177e4SLinus Torvalds #define LIST_DIRTY   2
831da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds /* flags for do_journal_end */
861da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
871da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
881da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
891da177e4SLinus Torvalds 
90bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
91bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
92bd4c625cSLinus Torvalds 			  int flags);
93bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
94bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
95bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
96bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
971da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
98bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
99a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks);
1001da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1011da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1021da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1031da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
104c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1051da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1061da177e4SLinus Torvalds 
1071da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1081da177e4SLinus Torvalds enum {
1091da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1101da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1111da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1121da177e4SLinus Torvalds };
1131da177e4SLinus Torvalds 
1141da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
115a9dd3643SJeff Mahoney 			      struct super_block *sb,
1161da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1171da177e4SLinus Torvalds 
118a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb)
119bd4c625cSLinus Torvalds {
120a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
121bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
122bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1231da177e4SLinus Torvalds }
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds /*
1261da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1271da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1281da177e4SLinus Torvalds ** more details.
1291da177e4SLinus Torvalds */
130bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
131bd4c625cSLinus Torvalds {
1321da177e4SLinus Torvalds 	if (bh) {
1331da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1341da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1351da177e4SLinus Torvalds 	}
1361da177e4SLinus Torvalds 	return 0;
1371da177e4SLinus Torvalds }
1381da177e4SLinus Torvalds 
139bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
140a9dd3643SJeff Mahoney 							 *sb)
141bd4c625cSLinus Torvalds {
1421da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1431da177e4SLinus Torvalds 	static int id;
1441da177e4SLinus Torvalds 
145d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1461da177e4SLinus Torvalds 	if (!bn) {
1471da177e4SLinus Torvalds 		return NULL;
1481da177e4SLinus Torvalds 	}
149a9dd3643SJeff Mahoney 	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
1501da177e4SLinus Torvalds 	if (!bn->data) {
151d739b42bSPekka Enberg 		kfree(bn);
1521da177e4SLinus Torvalds 		return NULL;
1531da177e4SLinus Torvalds 	}
1541da177e4SLinus Torvalds 	bn->id = id++;
1551da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1561da177e4SLinus Torvalds 	return bn;
1571da177e4SLinus Torvalds }
1581da177e4SLinus Torvalds 
159a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
160bd4c625cSLinus Torvalds {
161a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1621da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1631da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1661da177e4SLinus Torvalds       repeat:
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1691da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1701da177e4SLinus Torvalds 		list_del(entry);
171a9dd3643SJeff Mahoney 		memset(bn->data, 0, sb->s_blocksize);
1721da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1731da177e4SLinus Torvalds 		return bn;
1741da177e4SLinus Torvalds 	}
175a9dd3643SJeff Mahoney 	bn = allocate_bitmap_node(sb);
1761da177e4SLinus Torvalds 	if (!bn) {
1771da177e4SLinus Torvalds 		yield();
1781da177e4SLinus Torvalds 		goto repeat;
1791da177e4SLinus Torvalds 	}
1801da177e4SLinus Torvalds 	return bn;
1811da177e4SLinus Torvalds }
182a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb,
183bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
184bd4c625cSLinus Torvalds {
185a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1861da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1871da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
188d739b42bSPekka Enberg 		kfree(bn->data);
189d739b42bSPekka Enberg 		kfree(bn);
1901da177e4SLinus Torvalds 	} else {
1911da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
1921da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
1931da177e4SLinus Torvalds 	}
1941da177e4SLinus Torvalds }
1951da177e4SLinus Torvalds 
196a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb)
197bd4c625cSLinus Torvalds {
1981da177e4SLinus Torvalds 	int i;
199a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2001da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2011da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
202a9dd3643SJeff Mahoney 		bn = allocate_bitmap_node(sb);
2031da177e4SLinus Torvalds 		if (bn) {
2041da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2051da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2061da177e4SLinus Torvalds 		} else {
2070222e657SJeff Mahoney 			break;	/* this is ok, we'll try again when more are needed */
2081da177e4SLinus Torvalds 		}
2091da177e4SLinus Torvalds 	}
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds 
212a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb,
2133ee16670SJeff Mahoney 				  b_blocknr_t block,
214bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
215bd4c625cSLinus Torvalds {
216a9dd3643SJeff Mahoney 	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
217a9dd3643SJeff Mahoney 	unsigned int bit_nr = block % (sb->s_blocksize << 3);
2181da177e4SLinus Torvalds 
2191da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
220a9dd3643SJeff Mahoney 		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
2211da177e4SLinus Torvalds 	}
2221da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2231da177e4SLinus Torvalds 	return 0;
2241da177e4SLinus Torvalds }
2251da177e4SLinus Torvalds 
226a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb,
227bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
228bd4c625cSLinus Torvalds {
2291da177e4SLinus Torvalds 	int i;
2301da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2311da177e4SLinus Torvalds 		return;
2321da177e4SLinus Torvalds 
233a9dd3643SJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
2341da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
235a9dd3643SJeff Mahoney 			free_bitmap_node(sb, jb->bitmaps[i]);
2361da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2371da177e4SLinus Torvalds 		}
2381da177e4SLinus Torvalds 	}
2391da177e4SLinus Torvalds }
2401da177e4SLinus Torvalds 
2411da177e4SLinus Torvalds /*
2421da177e4SLinus Torvalds ** only call this on FS unmount.
2431da177e4SLinus Torvalds */
244a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb,
245bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
246bd4c625cSLinus Torvalds {
2471da177e4SLinus Torvalds 	int i;
2481da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2491da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2501da177e4SLinus Torvalds 		jb = jb_array + i;
2511da177e4SLinus Torvalds 		jb->journal_list = NULL;
252a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
2531da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2541da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2551da177e4SLinus Torvalds 	}
2561da177e4SLinus Torvalds 	return 0;
2571da177e4SLinus Torvalds }
2581da177e4SLinus Torvalds 
259a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb)
260bd4c625cSLinus Torvalds {
261a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2621da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2631da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2661da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2671da177e4SLinus Torvalds 		list_del(next);
268d739b42bSPekka Enberg 		kfree(bn->data);
269d739b42bSPekka Enberg 		kfree(bn);
2701da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2711da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds 	return 0;
2751da177e4SLinus Torvalds }
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds /*
2781da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2791da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2801da177e4SLinus Torvalds */
281a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb,
2821da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2833ee16670SJeff Mahoney 				   unsigned int bmap_nr)
284bd4c625cSLinus Torvalds {
2851da177e4SLinus Torvalds 	int i;
2861da177e4SLinus Torvalds 	int failed = 0;
2871da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2881da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2891da177e4SLinus Torvalds 
2901da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2911da177e4SLinus Torvalds 		jb = jb_array + i;
2921da177e4SLinus Torvalds 		jb->journal_list = NULL;
293558feb08SJoe Perches 		jb->bitmaps = vzalloc(mem);
2941da177e4SLinus Torvalds 		if (!jb->bitmaps) {
295a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2000", "unable to "
29645b03d5eSJeff Mahoney 					 "allocate bitmaps for journal lists");
2971da177e4SLinus Torvalds 			failed = 1;
2981da177e4SLinus Torvalds 			break;
2991da177e4SLinus Torvalds 		}
3001da177e4SLinus Torvalds 	}
3011da177e4SLinus Torvalds 	if (failed) {
302a9dd3643SJeff Mahoney 		free_list_bitmaps(sb, jb_array);
3031da177e4SLinus Torvalds 		return -1;
3041da177e4SLinus Torvalds 	}
3051da177e4SLinus Torvalds 	return 0;
3061da177e4SLinus Torvalds }
3071da177e4SLinus Torvalds 
3081da177e4SLinus Torvalds /*
3091da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3101da177e4SLinus Torvalds ** and try again
3111da177e4SLinus Torvalds */
312a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
313bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
314bd4c625cSLinus Torvalds 						    *jl)
315bd4c625cSLinus Torvalds {
3161da177e4SLinus Torvalds 	int i, j;
317a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3181da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3211da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3221da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3231da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3241da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
325a9dd3643SJeff Mahoney 			flush_commit_list(sb,
326bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
327bd4c625cSLinus Torvalds 					  journal_list, 1);
3281da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3291da177e4SLinus Torvalds 				break;
3301da177e4SLinus Torvalds 			}
3311da177e4SLinus Torvalds 		} else {
3321da177e4SLinus Torvalds 			break;
3331da177e4SLinus Torvalds 		}
3341da177e4SLinus Torvalds 	}
3351da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3361da177e4SLinus Torvalds 		return NULL;
3371da177e4SLinus Torvalds 	}
3381da177e4SLinus Torvalds 	jb->journal_list = jl;
3391da177e4SLinus Torvalds 	return jb;
3401da177e4SLinus Torvalds }
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds /*
3431da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3441da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3451da177e4SLinus Torvalds ** returns NULL on failure
3461da177e4SLinus Torvalds */
347bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
348bd4c625cSLinus Torvalds {
3491da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3501da177e4SLinus Torvalds 	int i;
3511da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3521da177e4SLinus Torvalds 		return NULL;
3531da177e4SLinus Torvalds 	}
354558feb08SJoe Perches 	head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3551da177e4SLinus Torvalds 	if (!head) {
3561da177e4SLinus Torvalds 		return NULL;
3571da177e4SLinus Torvalds 	}
3581da177e4SLinus Torvalds 	head[0].prev = NULL;
3591da177e4SLinus Torvalds 	head[0].next = head + 1;
3601da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3611da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3621da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3631da177e4SLinus Torvalds 	}
3641da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3651da177e4SLinus Torvalds 	return head;
3661da177e4SLinus Torvalds }
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds /*
3691da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3701da177e4SLinus Torvalds */
371a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
372bd4c625cSLinus Torvalds {
3731da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
374a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3751da177e4SLinus Torvalds 
376a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "get_cnode");
3771da177e4SLinus Torvalds 
3781da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3791da177e4SLinus Torvalds 		return NULL;
3801da177e4SLinus Torvalds 	}
3811da177e4SLinus Torvalds 	journal->j_cnode_used++;
3821da177e4SLinus Torvalds 	journal->j_cnode_free--;
3831da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3841da177e4SLinus Torvalds 	if (!cn) {
3851da177e4SLinus Torvalds 		return cn;
3861da177e4SLinus Torvalds 	}
3871da177e4SLinus Torvalds 	if (cn->next) {
3881da177e4SLinus Torvalds 		cn->next->prev = NULL;
3891da177e4SLinus Torvalds 	}
3901da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
3911da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
3921da177e4SLinus Torvalds 	return cn;
3931da177e4SLinus Torvalds }
3941da177e4SLinus Torvalds 
3951da177e4SLinus Torvalds /*
3961da177e4SLinus Torvalds ** returns a cnode to the free list
3971da177e4SLinus Torvalds */
398a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb,
399bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
400bd4c625cSLinus Torvalds {
401a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4021da177e4SLinus Torvalds 
403a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "free_cnode");
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds 	journal->j_cnode_used--;
4061da177e4SLinus Torvalds 	journal->j_cnode_free++;
4071da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4081da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4091da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4101da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4111da177e4SLinus Torvalds 	}
4121da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4131da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4141da177e4SLinus Torvalds }
4151da177e4SLinus Torvalds 
416bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
417bd4c625cSLinus Torvalds {
4181da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4191da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4201da177e4SLinus Torvalds }
4211da177e4SLinus Torvalds 
4221da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
423bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
424bd4c625cSLinus Torvalds 								  super_block
425bd4c625cSLinus Torvalds 								  *sb,
426bd4c625cSLinus Torvalds 								  struct
427bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
428bd4c625cSLinus Torvalds 								  **table,
4291da177e4SLinus Torvalds 								  long bl)
4301da177e4SLinus Torvalds {
4311da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4321da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4331da177e4SLinus Torvalds 	while (cn) {
4341da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4351da177e4SLinus Torvalds 			return cn;
4361da177e4SLinus Torvalds 		cn = cn->hnext;
4371da177e4SLinus Torvalds 	}
4381da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4391da177e4SLinus Torvalds }
4401da177e4SLinus Torvalds 
4411da177e4SLinus Torvalds /*
4421da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4431da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4441da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4451da177e4SLinus Torvalds **
4461da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4471da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4481da177e4SLinus Torvalds ** sure you never write the block without logging it.
4491da177e4SLinus Torvalds **
4501da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4511da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4521da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4531da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4541da177e4SLinus Torvalds **
4551da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4561da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4571da177e4SLinus Torvalds **
4581da177e4SLinus Torvalds */
459a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb,
4603ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
461bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
462bd4c625cSLinus Torvalds {
463a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4641da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4651da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4661da177e4SLinus Torvalds 	int i;
4671da177e4SLinus Torvalds 	unsigned long bl;
4681da177e4SLinus Torvalds 
4691da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4701da177e4SLinus Torvalds 
471a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal);
4721da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
4731da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
4741da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
4751da177e4SLinus Torvalds 	 */
4761da177e4SLinus Torvalds 	if (search_all) {
4771da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
478a9dd3643SJeff Mahoney 			PROC_INFO_INC(sb, journal.in_journal_bitmap);
4791da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
4801da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
481bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
482bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
483bd4c625cSLinus Torvalds 				     data)) {
484bd4c625cSLinus Torvalds 				*next_zero_bit =
485bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
486bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
487bd4c625cSLinus Torvalds 							data),
488a9dd3643SJeff Mahoney 						       sb->s_blocksize << 3,
489bd4c625cSLinus Torvalds 						       bit_nr + 1);
4901da177e4SLinus Torvalds 				return 1;
4911da177e4SLinus Torvalds 			}
4921da177e4SLinus Torvalds 		}
4931da177e4SLinus Torvalds 	}
4941da177e4SLinus Torvalds 
495a9dd3643SJeff Mahoney 	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
4961da177e4SLinus Torvalds 	/* is it in any old transactions? */
497bd4c625cSLinus Torvalds 	if (search_all
498bd4c625cSLinus Torvalds 	    && (cn =
499a9dd3643SJeff Mahoney 		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
5001da177e4SLinus Torvalds 		return 1;
5011da177e4SLinus Torvalds 	}
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
504a9dd3643SJeff Mahoney 	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
5051da177e4SLinus Torvalds 		BUG();
5061da177e4SLinus Torvalds 		return 1;
5071da177e4SLinus Torvalds 	}
5081da177e4SLinus Torvalds 
509a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal_reusable);
5101da177e4SLinus Torvalds 	/* safe for reuse */
5111da177e4SLinus Torvalds 	return 0;
5121da177e4SLinus Torvalds }
5131da177e4SLinus Torvalds 
5141da177e4SLinus Torvalds /* insert cn into table
5151da177e4SLinus Torvalds */
516bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
517bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
518bd4c625cSLinus Torvalds {
5191da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5201da177e4SLinus Torvalds 
5211da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5221da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5231da177e4SLinus Torvalds 	cn->hprev = NULL;
5241da177e4SLinus Torvalds 	if (cn_orig) {
5251da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5261da177e4SLinus Torvalds 	}
5271da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5281da177e4SLinus Torvalds }
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds /* lock the current transaction */
531a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb)
532bd4c625cSLinus Torvalds {
533a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.lock_journal);
5348ebc4232SFrederic Weisbecker 
5358ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
5361da177e4SLinus Torvalds }
5371da177e4SLinus Torvalds 
5381da177e4SLinus Torvalds /* unlock the current transaction */
539a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb)
540bd4c625cSLinus Torvalds {
541a9dd3643SJeff Mahoney 	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
5421da177e4SLinus Torvalds }
5431da177e4SLinus Torvalds 
5441da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5451da177e4SLinus Torvalds {
5461da177e4SLinus Torvalds 	jl->j_refcount++;
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds 
5491da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5501da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5511da177e4SLinus Torvalds {
5521da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
553c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
554bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
5551da177e4SLinus Torvalds 	}
5561da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
557d739b42bSPekka Enberg 		kfree(jl);
5581da177e4SLinus Torvalds }
5591da177e4SLinus Torvalds 
5601da177e4SLinus Torvalds /*
5611da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
5621da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
5631da177e4SLinus Torvalds ** transaction.
5641da177e4SLinus Torvalds */
565a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb,
566bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
567bd4c625cSLinus Torvalds {
5681da177e4SLinus Torvalds 
5691da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
5701da177e4SLinus Torvalds 	if (jb) {
571a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
5721da177e4SLinus Torvalds 	}
5731da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
5741da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
5751da177e4SLinus Torvalds }
5761da177e4SLinus Torvalds 
5771da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
578600ed416SJeff Mahoney 				    unsigned int trans_id)
5791da177e4SLinus Torvalds {
5801da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
5811da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
5821da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	if (!list_empty(entry)) {
5851da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
5861da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
5871da177e4SLinus Torvalds 			return 1;
5881da177e4SLinus Torvalds 		}
5891da177e4SLinus Torvalds 	}
5901da177e4SLinus Torvalds 	return 0;
5911da177e4SLinus Torvalds }
5921da177e4SLinus Torvalds 
593398c95bdSChris Mason /*
594398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
595398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
596398c95bdSChris Mason  * logged via data=journal.
597398c95bdSChris Mason  *
598398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
599398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
600398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
601398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
602398c95bdSChris Mason  */
603398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
604398c95bdSChris Mason {
605398c95bdSChris Mason 	struct page *page = bh->b_page;
606529ae9aaSNick Piggin 	if (!page->mapping && trylock_page(page)) {
607398c95bdSChris Mason 		page_cache_get(page);
608398c95bdSChris Mason 		put_bh(bh);
609398c95bdSChris Mason 		if (!page->mapping)
610398c95bdSChris Mason 			try_to_free_buffers(page);
611398c95bdSChris Mason 		unlock_page(page);
612398c95bdSChris Mason 		page_cache_release(page);
613398c95bdSChris Mason 	} else {
614398c95bdSChris Mason 		put_bh(bh);
615398c95bdSChris Mason 	}
616398c95bdSChris Mason }
617398c95bdSChris Mason 
618bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
619bd4c625cSLinus Torvalds {
6201da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
62345b03d5eSJeff Mahoney 		reiserfs_warning(NULL, "clm-2084",
62445b03d5eSJeff Mahoney 				 "pinned buffer %lu:%s sent to disk",
6251da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6261da177e4SLinus Torvalds 	}
6271da177e4SLinus Torvalds 	if (uptodate)
6281da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6291da177e4SLinus Torvalds 	else
6301da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
631398c95bdSChris Mason 
6321da177e4SLinus Torvalds 	unlock_buffer(bh);
633398c95bdSChris Mason 	release_buffer_page(bh);
6341da177e4SLinus Torvalds }
6351da177e4SLinus Torvalds 
636bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
637bd4c625cSLinus Torvalds {
6381da177e4SLinus Torvalds 	if (uptodate)
6391da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6401da177e4SLinus Torvalds 	else
6411da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6421da177e4SLinus Torvalds 	unlock_buffer(bh);
6431da177e4SLinus Torvalds 	put_bh(bh);
6441da177e4SLinus Torvalds }
6451da177e4SLinus Torvalds 
646bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
647bd4c625cSLinus Torvalds {
6481da177e4SLinus Torvalds 	get_bh(bh);
6491da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6501da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6511da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6521da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6531da177e4SLinus Torvalds 		BUG();
6541da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6551da177e4SLinus Torvalds 		BUG();
6561da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6571da177e4SLinus Torvalds }
6581da177e4SLinus Torvalds 
659bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
660bd4c625cSLinus Torvalds {
6611da177e4SLinus Torvalds 	get_bh(bh);
6621da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6631da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6641da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6651da177e4SLinus Torvalds 		BUG();
6661da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6671da177e4SLinus Torvalds }
6681da177e4SLinus Torvalds 
6691da177e4SLinus Torvalds #define CHUNK_SIZE 32
6701da177e4SLinus Torvalds struct buffer_chunk {
6711da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
6721da177e4SLinus Torvalds 	int nr;
6731da177e4SLinus Torvalds };
6741da177e4SLinus Torvalds 
675bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
676bd4c625cSLinus Torvalds {
6771da177e4SLinus Torvalds 	int i;
6781da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6791da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
6801da177e4SLinus Torvalds 	}
6811da177e4SLinus Torvalds 	chunk->nr = 0;
6821da177e4SLinus Torvalds }
6831da177e4SLinus Torvalds 
684bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
685bd4c625cSLinus Torvalds {
6861da177e4SLinus Torvalds 	int i;
6871da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6881da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
6891da177e4SLinus Torvalds 	}
6901da177e4SLinus Torvalds 	chunk->nr = 0;
6911da177e4SLinus Torvalds }
6921da177e4SLinus Torvalds 
6931da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
694bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
6951da177e4SLinus Torvalds {
6961da177e4SLinus Torvalds 	int ret = 0;
69714a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
6981da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
6991da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7001da177e4SLinus Torvalds 		ret = 1;
7011da177e4SLinus Torvalds 		if (lock)
7021da177e4SLinus Torvalds 			spin_unlock(lock);
7031da177e4SLinus Torvalds 		fn(chunk);
7041da177e4SLinus Torvalds 		if (lock)
7051da177e4SLinus Torvalds 			spin_lock(lock);
7061da177e4SLinus Torvalds 	}
7071da177e4SLinus Torvalds 	return ret;
7081da177e4SLinus Torvalds }
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
711bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
712bd4c625cSLinus Torvalds {
7131da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7141da177e4SLinus Torvalds 	while (1) {
7151da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7161da177e4SLinus Torvalds 		if (jh) {
7171da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7181da177e4SLinus Torvalds 			return jh;
7191da177e4SLinus Torvalds 		}
7201da177e4SLinus Torvalds 		yield();
7211da177e4SLinus Torvalds 	}
7221da177e4SLinus Torvalds }
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds /*
7251da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
7261da177e4SLinus Torvalds  * and waited on
7271da177e4SLinus Torvalds  */
728bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
729bd4c625cSLinus Torvalds {
7301da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7311da177e4SLinus Torvalds 
7321da177e4SLinus Torvalds 	jh = bh->b_private;
7331da177e4SLinus Torvalds 	if (jh) {
7341da177e4SLinus Torvalds 		bh->b_private = NULL;
7351da177e4SLinus Torvalds 		jh->bh = NULL;
7361da177e4SLinus Torvalds 		list_del_init(&jh->list);
7371da177e4SLinus Torvalds 		kfree(jh);
7381da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
7391da177e4SLinus Torvalds 			BUG();
7401da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
7411da177e4SLinus Torvalds 		put_bh(bh);
7421da177e4SLinus Torvalds 	}
7431da177e4SLinus Torvalds }
7441da177e4SLinus Torvalds 
7451da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
7461da177e4SLinus Torvalds 			   int tail)
7471da177e4SLinus Torvalds {
7481da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 	if (bh->b_private) {
7511da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7521da177e4SLinus Torvalds 		if (!bh->b_private) {
7531da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
7541da177e4SLinus Torvalds 			goto no_jh;
7551da177e4SLinus Torvalds 		}
7561da177e4SLinus Torvalds 		jh = bh->b_private;
7571da177e4SLinus Torvalds 		list_del_init(&jh->list);
7581da177e4SLinus Torvalds 	} else {
7591da177e4SLinus Torvalds 	      no_jh:
7601da177e4SLinus Torvalds 		get_bh(bh);
7611da177e4SLinus Torvalds 		jh = alloc_jh();
7621da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7631da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
7641da177e4SLinus Torvalds 		 * two adds at the same time
7651da177e4SLinus Torvalds 		 */
76614a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
7671da177e4SLinus Torvalds 		jh->bh = bh;
7681da177e4SLinus Torvalds 		bh->b_private = jh;
7691da177e4SLinus Torvalds 	}
7701da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
7711da177e4SLinus Torvalds 	if (tail)
7721da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
7731da177e4SLinus Torvalds 	else {
7741da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
7751da177e4SLinus Torvalds 	}
7761da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
7771da177e4SLinus Torvalds 	return 0;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds 
780bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
781bd4c625cSLinus Torvalds {
7821da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
7831da177e4SLinus Torvalds }
784bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
785bd4c625cSLinus Torvalds {
7861da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
7901da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
7911da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
7921da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
7931da177e4SLinus Torvalds 				 struct list_head *list)
7941da177e4SLinus Torvalds {
7951da177e4SLinus Torvalds 	struct buffer_head *bh;
7961da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7971da177e4SLinus Torvalds 	int ret = j->j_errno;
7981da177e4SLinus Torvalds 	struct buffer_chunk chunk;
7991da177e4SLinus Torvalds 	struct list_head tmp;
8001da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	chunk.nr = 0;
8031da177e4SLinus Torvalds 	spin_lock(lock);
8041da177e4SLinus Torvalds 	while (!list_empty(list)) {
8051da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8061da177e4SLinus Torvalds 		bh = jh->bh;
8071da177e4SLinus Torvalds 		get_bh(bh);
808ca5de404SNick Piggin 		if (!trylock_buffer(bh)) {
8091da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
810f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8111da177e4SLinus Torvalds 				goto loop_next;
8121da177e4SLinus Torvalds 			}
8131da177e4SLinus Torvalds 			spin_unlock(lock);
8141da177e4SLinus Torvalds 			if (chunk.nr)
8151da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8161da177e4SLinus Torvalds 			wait_on_buffer(bh);
8171da177e4SLinus Torvalds 			cond_resched();
8181da177e4SLinus Torvalds 			spin_lock(lock);
8191da177e4SLinus Torvalds 			goto loop_next;
8201da177e4SLinus Torvalds 		}
8213d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8223d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8233d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8243d4492f8SChris Mason 		 */
8253d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
8263d4492f8SChris Mason 			clear_buffer_dirty(bh);
8273d4492f8SChris Mason 			ret = -EIO;
8283d4492f8SChris Mason 		}
8291da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
830f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
8311da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
8321da177e4SLinus Torvalds 		} else {
8331da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
8341da177e4SLinus Torvalds 			unlock_buffer(bh);
8351da177e4SLinus Torvalds 		}
8361da177e4SLinus Torvalds 	      loop_next:
8371da177e4SLinus Torvalds 		put_bh(bh);
8381da177e4SLinus Torvalds 		cond_resched_lock(lock);
8391da177e4SLinus Torvalds 	}
8401da177e4SLinus Torvalds 	if (chunk.nr) {
8411da177e4SLinus Torvalds 		spin_unlock(lock);
8421da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
8431da177e4SLinus Torvalds 		spin_lock(lock);
8441da177e4SLinus Torvalds 	}
8451da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
8461da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
8471da177e4SLinus Torvalds 		bh = jh->bh;
8481da177e4SLinus Torvalds 		get_bh(bh);
8491da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
8521da177e4SLinus Torvalds 			spin_unlock(lock);
8531da177e4SLinus Torvalds 			wait_on_buffer(bh);
8541da177e4SLinus Torvalds 			spin_lock(lock);
8551da177e4SLinus Torvalds 		}
8561da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
8571da177e4SLinus Torvalds 			ret = -EIO;
8581da177e4SLinus Torvalds 		}
859d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
860d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
861d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
862d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
863d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
864d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
865d62b1b87SChris Mason 		 * after freeing the journal header.
866d62b1b87SChris Mason 		 */
867d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
868d62b1b87SChris Mason 			spin_unlock(lock);
869d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
870d62b1b87SChris Mason 			spin_lock(lock);
871d62b1b87SChris Mason 		}
8721da177e4SLinus Torvalds 		put_bh(bh);
8731da177e4SLinus Torvalds 		cond_resched_lock(lock);
8741da177e4SLinus Torvalds 	}
8751da177e4SLinus Torvalds 	spin_unlock(lock);
8761da177e4SLinus Torvalds 	return ret;
8771da177e4SLinus Torvalds }
8781da177e4SLinus Torvalds 
879bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
880bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
881bd4c625cSLinus Torvalds {
8821da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
8831da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
8841da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
8851da177e4SLinus Torvalds 	struct list_head *entry;
886600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
887600ed416SJeff Mahoney 	unsigned int other_trans_id;
888600ed416SJeff Mahoney 	unsigned int first_trans_id;
8891da177e4SLinus Torvalds 
8901da177e4SLinus Torvalds       find_first:
8911da177e4SLinus Torvalds 	/*
8921da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
8931da177e4SLinus Torvalds 	 */
8941da177e4SLinus Torvalds 	first_jl = jl;
8951da177e4SLinus Torvalds 	entry = jl->j_list.prev;
8961da177e4SLinus Torvalds 	while (1) {
8971da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
8981da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
8991da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9001da177e4SLinus Torvalds 			break;
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds 		first_jl = other_jl;
9031da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9041da177e4SLinus Torvalds 	}
9051da177e4SLinus Torvalds 
9061da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9071da177e4SLinus Torvalds 	if (first_jl == jl) {
9081da177e4SLinus Torvalds 		return 0;
9091da177e4SLinus Torvalds 	}
9101da177e4SLinus Torvalds 
9111da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9121da177e4SLinus Torvalds 
9131da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9141da177e4SLinus Torvalds 	while (1) {
9151da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9161da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9171da177e4SLinus Torvalds 
9181da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9191da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9201da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9211da177e4SLinus Torvalds 
9221da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9231da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9241da177e4SLinus Torvalds 					return 1;
9251da177e4SLinus Torvalds 
9261da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
9271da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
9281da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
9291da177e4SLinus Torvalds 				 */
930bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
931bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
9321da177e4SLinus Torvalds 					goto find_first;
9331da177e4SLinus Torvalds 				}
9341da177e4SLinus Torvalds 			}
9351da177e4SLinus Torvalds 			entry = entry->next;
9361da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
9371da177e4SLinus Torvalds 				return 0;
9381da177e4SLinus Torvalds 		} else {
9391da177e4SLinus Torvalds 			return 0;
9401da177e4SLinus Torvalds 		}
9411da177e4SLinus Torvalds 	}
9421da177e4SLinus Torvalds 	return 0;
9431da177e4SLinus Torvalds }
944deba0f49SAdrian Bunk 
945deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
946bd4c625cSLinus Torvalds {
9471da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
9488ebc4232SFrederic Weisbecker 
9498ebc4232SFrederic Weisbecker 	if (atomic_read(&j->j_async_throttle)) {
9508ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
9518aa7e847SJens Axboe 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
9528ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
9538ebc4232SFrederic Weisbecker 	}
9548ebc4232SFrederic Weisbecker 
9551da177e4SLinus Torvalds 	return 0;
9561da177e4SLinus Torvalds }
9571da177e4SLinus Torvalds 
9581da177e4SLinus Torvalds /*
9591da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
9601da177e4SLinus Torvalds **
9611da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
9621da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
9631da177e4SLinus Torvalds **
9641da177e4SLinus Torvalds */
965bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
966bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
967bd4c625cSLinus Torvalds {
9681da177e4SLinus Torvalds 	int i;
9693ee16670SJeff Mahoney 	b_blocknr_t bn;
9701da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
971600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
9721da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9731da177e4SLinus Torvalds 	int retval = 0;
974e0e851cfSChris Mason 	int write_len;
9751da177e4SLinus Torvalds 
9761da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
9771da177e4SLinus Torvalds 
9781da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
9791da177e4SLinus Torvalds 		return 0;
9801da177e4SLinus Torvalds 	}
9811da177e4SLinus Torvalds 
9821da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
9831da177e4SLinus Torvalds 	 ** us is on disk too
9841da177e4SLinus Torvalds 	 */
9851da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
9861da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
9871da177e4SLinus Torvalds 
9881da177e4SLinus Torvalds 	get_journal_list(jl);
9891da177e4SLinus Torvalds 	if (flushall) {
9901da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
9911da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
9921da177e4SLinus Torvalds 			goto put_jl;
9931da177e4SLinus Torvalds 		}
9941da177e4SLinus Torvalds 	}
9951da177e4SLinus Torvalds 
9961da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
9978ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
9988ebc4232SFrederic Weisbecker 
9991da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
100090415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10011da177e4SLinus Torvalds 		goto put_jl;
10021da177e4SLinus Torvalds 	}
10031da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10041da177e4SLinus Torvalds 
10051da177e4SLinus Torvalds 	/* this commit is done, exit */
10061da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10071da177e4SLinus Torvalds 		if (flushall) {
10081da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10091da177e4SLinus Torvalds 		}
101090415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10111da177e4SLinus Torvalds 		goto put_jl;
10121da177e4SLinus Torvalds 	}
10131da177e4SLinus Torvalds 
10141da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10153d4492f8SChris Mason 		int ret;
10168ebc4232SFrederic Weisbecker 
10178ebc4232SFrederic Weisbecker 		/*
10188ebc4232SFrederic Weisbecker 		 * We might sleep in numerous places inside
10198ebc4232SFrederic Weisbecker 		 * write_ordered_buffers. Relax the write lock.
10208ebc4232SFrederic Weisbecker 		 */
10218ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10223d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
10231da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
10243d4492f8SChris Mason 		if (ret < 0 && retval == 0)
10253d4492f8SChris Mason 			retval = ret;
10268ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10271da177e4SLinus Torvalds 	}
10281da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
10291da177e4SLinus Torvalds 	/*
10301da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1031e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1032e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1033e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1034e0e851cfSChris Mason 	 * get a chunk of data on there.
10351da177e4SLinus Torvalds 	 */
10361da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1037e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1038e0e851cfSChris Mason 	if (write_len < 256)
1039e0e851cfSChris Mason 		write_len = 256;
1040e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
10411da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
10421da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
10431da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1044e0e851cfSChris Mason 		if (tbh) {
10456e3647acSFrederic Weisbecker 			if (buffer_dirty(tbh)) {
10466e3647acSFrederic Weisbecker 		            reiserfs_write_unlock(s);
1047e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh);
10486e3647acSFrederic Weisbecker 			    reiserfs_write_lock(s);
10496e3647acSFrederic Weisbecker 			}
10501da177e4SLinus Torvalds 			put_bh(tbh) ;
10511da177e4SLinus Torvalds 		}
1052e0e851cfSChris Mason 	}
10531da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
10541da177e4SLinus Torvalds 
10551da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
10561da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
10571da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
10581da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
10598ebc4232SFrederic Weisbecker 
10608ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10611da177e4SLinus Torvalds 		wait_on_buffer(tbh);
10628ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10631da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
10641da177e4SLinus Torvalds 		// a locked buffer.  Double check here
10651da177e4SLinus Torvalds 		//
10668ebc4232SFrederic Weisbecker 		/* redundant, sync_dirty_buffer() checks */
10678ebc4232SFrederic Weisbecker 		if (buffer_dirty(tbh)) {
10688ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
10691da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
10708ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
10718ebc4232SFrederic Weisbecker 		}
10721da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
10731da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
107445b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-601",
107545b03d5eSJeff Mahoney 					 "buffer write failed");
10761da177e4SLinus Torvalds #endif
10771da177e4SLinus Torvalds 			retval = -EIO;
10781da177e4SLinus Torvalds 		}
10791da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
10801da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
10811da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
10821da177e4SLinus Torvalds 	}
10831da177e4SLinus Torvalds 
10841da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
10851da177e4SLinus Torvalds 
10865d5e8156SJeff Mahoney 	/* If there was a write error in the journal - we can't commit
10875d5e8156SJeff Mahoney 	 * this transaction - it will be invalid and, if successful,
1088beb7dd86SRobert P. J. Day 	 * will just end up propagating the write error out to
10895d5e8156SJeff Mahoney 	 * the file system. */
10905d5e8156SJeff Mahoney 	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
10911da177e4SLinus Torvalds 		if (buffer_dirty(jl->j_commit_bh))
10921da177e4SLinus Torvalds 			BUG();
10931da177e4SLinus Torvalds 		mark_buffer_dirty(jl->j_commit_bh) ;
10948ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10957cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(s))
10967cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
10977cd33ad2SChristoph Hellwig 		else
10981da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh);
10998ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11005d5e8156SJeff Mahoney 	}
11011da177e4SLinus Torvalds 
11021da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
11031da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1104beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
11051da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
11061da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
110745b03d5eSJeff Mahoney 		reiserfs_warning(s, "journal-615", "buffer write failed");
11081da177e4SLinus Torvalds #endif
11091da177e4SLinus Torvalds 		retval = -EIO;
11101da177e4SLinus Torvalds 	}
11111da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
11121da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
11131da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
111445b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1115bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
11161da177e4SLinus Torvalds 	}
11171da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
11181da177e4SLinus Torvalds 
11191da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
11201da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
11211da177e4SLinus Torvalds 
11221da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
11231da177e4SLinus Torvalds 
11241da177e4SLinus Torvalds 	/* mark the metadata dirty */
11251da177e4SLinus Torvalds 	if (!retval)
11261da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
11271da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
11281da177e4SLinus Torvalds 
11291da177e4SLinus Torvalds 	if (flushall) {
11301da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
11311da177e4SLinus Torvalds 	}
113290415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
11331da177e4SLinus Torvalds       put_jl:
11341da177e4SLinus Torvalds 	put_journal_list(s, jl);
11351da177e4SLinus Torvalds 
11361da177e4SLinus Torvalds 	if (retval)
1137bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1138fbe5498bSHarvey Harrison 			       __func__);
11391da177e4SLinus Torvalds 	return retval;
11401da177e4SLinus Torvalds }
11411da177e4SLinus Torvalds 
11421da177e4SLinus Torvalds /*
11431da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
11441da177e4SLinus Torvalds ** returns NULL if it can't find anything
11451da177e4SLinus Torvalds */
1146bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1147bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1148bd4c625cSLinus Torvalds 							  *cn)
1149bd4c625cSLinus Torvalds {
11501da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
11511da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
11521da177e4SLinus Torvalds 
11531da177e4SLinus Torvalds 	cn = cn->hprev;
11541da177e4SLinus Torvalds 	while (cn) {
11551da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
11561da177e4SLinus Torvalds 			return cn->jlist;
11571da177e4SLinus Torvalds 		}
11581da177e4SLinus Torvalds 		cn = cn->hprev;
11591da177e4SLinus Torvalds 	}
11601da177e4SLinus Torvalds 	return NULL;
11611da177e4SLinus Torvalds }
11621da177e4SLinus Torvalds 
1163a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1164a3172027SChris Mason {
1165a3172027SChris Mason 	struct super_block *sb = cn->sb;
1166a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1167a3172027SChris Mason 
1168a3172027SChris Mason 	cn = cn->hprev;
1169a3172027SChris Mason 	while (cn) {
1170a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1171a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1172a3172027SChris Mason 				    return 0;
1173a3172027SChris Mason 		cn = cn->hprev;
1174a3172027SChris Mason 	}
1175a3172027SChris Mason 	return 1;
1176a3172027SChris Mason }
1177a3172027SChris Mason 
1178bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1179bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1180bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1181bd4c625cSLinus Torvalds 				int);
11821da177e4SLinus Torvalds 
11831da177e4SLinus Torvalds /*
11841da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
11851da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
11861da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
11871da177e4SLinus Torvalds */
1188a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb,
1189bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1190bd4c625cSLinus Torvalds 					 int debug)
1191bd4c625cSLinus Torvalds {
1192a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
11931da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
11941da177e4SLinus Torvalds 	cn = jl->j_realblock;
11951da177e4SLinus Torvalds 
11961da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
11971da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
11981da177e4SLinus Torvalds 	 */
11991da177e4SLinus Torvalds 	while (cn) {
12001da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
12011da177e4SLinus Torvalds 			if (debug) {
1202a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2201",
1203bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1204bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1205bd4c625cSLinus Torvalds 						 cn->state);
12061da177e4SLinus Torvalds 			}
12071da177e4SLinus Torvalds 			cn->state = 0;
1208a9dd3643SJeff Mahoney 			remove_journal_hash(sb, journal->j_list_hash_table,
1209bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
12101da177e4SLinus Torvalds 		}
12111da177e4SLinus Torvalds 		last = cn;
12121da177e4SLinus Torvalds 		cn = cn->next;
1213a9dd3643SJeff Mahoney 		free_cnode(sb, last);
12141da177e4SLinus Torvalds 	}
12151da177e4SLinus Torvalds 	jl->j_realblock = NULL;
12161da177e4SLinus Torvalds }
12171da177e4SLinus Torvalds 
12181da177e4SLinus Torvalds /*
12191da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
12201da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
12211da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
12221da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
12231da177e4SLinus Torvalds **
12241da177e4SLinus Torvalds */
1225a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb,
1226bd4c625cSLinus Torvalds 					unsigned long offset,
1227600ed416SJeff Mahoney 					unsigned int trans_id)
1228bd4c625cSLinus Torvalds {
12291da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
1230a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12311da177e4SLinus Torvalds 
12321da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
12331da177e4SLinus Torvalds 		return -EIO;
12341da177e4SLinus Torvalds 
12351da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
12361da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
12378ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
12381da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
12398ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
12401da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
12411da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1242a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "journal-699",
124345b03d5eSJeff Mahoney 						 "buffer write failed");
12441da177e4SLinus Torvalds #endif
12451da177e4SLinus Torvalds 				return -EIO;
12461da177e4SLinus Torvalds 			}
12471da177e4SLinus Torvalds 		}
12481da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
12491da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1250bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1251bd4c625cSLinus Torvalds 							b_data);
12521da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
12531da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
12541da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
12551da177e4SLinus Torvalds 
12561da177e4SLinus Torvalds 		set_buffer_dirty(journal->j_header_bh);
12578ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
12587cd33ad2SChristoph Hellwig 
12597cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(sb))
12607cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
12617cd33ad2SChristoph Hellwig 		else
12621da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
12637cd33ad2SChristoph Hellwig 
12648ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
12651da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1266a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-837",
126745b03d5eSJeff Mahoney 					 "IO error during journal replay");
12681da177e4SLinus Torvalds 			return -EIO;
12691da177e4SLinus Torvalds 		}
12701da177e4SLinus Torvalds 	}
12711da177e4SLinus Torvalds 	return 0;
12721da177e4SLinus Torvalds }
12731da177e4SLinus Torvalds 
1274a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb,
12751da177e4SLinus Torvalds 				       unsigned long offset,
1276600ed416SJeff Mahoney 				       unsigned int trans_id)
1277bd4c625cSLinus Torvalds {
1278a9dd3643SJeff Mahoney 	return _update_journal_header_block(sb, offset, trans_id);
12791da177e4SLinus Torvalds }
1280bd4c625cSLinus Torvalds 
12811da177e4SLinus Torvalds /*
12821da177e4SLinus Torvalds ** flush any and all journal lists older than you are
12831da177e4SLinus Torvalds ** can only be called from flush_journal_list
12841da177e4SLinus Torvalds */
1285a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb,
12861da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
12871da177e4SLinus Torvalds {
12881da177e4SLinus Torvalds 	struct list_head *entry;
12891da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
1290a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1291600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
12921da177e4SLinus Torvalds 
12931da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
12941da177e4SLinus Torvalds 	 * protection is required.
12951da177e4SLinus Torvalds 	 */
12961da177e4SLinus Torvalds       restart:
12971da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
12981da177e4SLinus Torvalds 	/* Did we wrap? */
12991da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
13001da177e4SLinus Torvalds 		return 0;
13011da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
13021da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
13031da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
13041da177e4SLinus Torvalds 		/* do not flush all */
1305a9dd3643SJeff Mahoney 		flush_journal_list(sb, other_jl, 0);
13061da177e4SLinus Torvalds 
13071da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
13081da177e4SLinus Torvalds 		goto restart;
13091da177e4SLinus Torvalds 	}
13101da177e4SLinus Torvalds 	return 0;
13111da177e4SLinus Torvalds }
13121da177e4SLinus Torvalds 
13131da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1314bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1315bd4c625cSLinus Torvalds {
13161da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13171da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
13181da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
13191da177e4SLinus Torvalds 		journal->j_num_work_lists--;
13201da177e4SLinus Torvalds 	}
13211da177e4SLinus Torvalds }
13221da177e4SLinus Torvalds 
13231da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
13241da177e4SLinus Torvalds **
13251da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
13261da177e4SLinus Torvalds ** flush_journal_list
13271da177e4SLinus Torvalds **
13281da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
13291da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
13301da177e4SLinus Torvalds ** do_journal_end, or by journal_release
13311da177e4SLinus Torvalds */
13321da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1333bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1334bd4c625cSLinus Torvalds {
13351da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
13361da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
13371da177e4SLinus Torvalds 	int count;
13381da177e4SLinus Torvalds 	int was_jwait = 0;
13391da177e4SLinus Torvalds 	int was_dirty = 0;
13401da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
13411da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
13421da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13431da177e4SLinus Torvalds 	int err = 0;
13441da177e4SLinus Torvalds 
13451da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
13461da177e4SLinus Torvalds 
13471da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
134845b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2048", "called with wcount %d",
13491da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
13501da177e4SLinus Torvalds 	}
13511da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
13521da177e4SLinus Torvalds 
13531da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
13541da177e4SLinus Torvalds 	if (flushall) {
13558ebc4232SFrederic Weisbecker 		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1356afe70259SJeff Mahoney 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
13571da177e4SLinus Torvalds 		BUG();
13581da177e4SLinus Torvalds 	}
13591da177e4SLinus Torvalds 
13601da177e4SLinus Torvalds 	count = 0;
13611da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1362c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1363bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
13641da177e4SLinus Torvalds 		return 0;
13651da177e4SLinus Torvalds 	}
13661da177e4SLinus Torvalds 
13671da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
13681da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13691da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13701da177e4SLinus Torvalds 		goto flush_older_and_return;
13711da177e4SLinus Torvalds 	}
13721da177e4SLinus Torvalds 
13731da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
13741da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
13751da177e4SLinus Torvalds 	 */
13761da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
13771da177e4SLinus Torvalds 
1378bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1379bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
13801da177e4SLinus Torvalds 		BUG();
13811da177e4SLinus Torvalds 
13821da177e4SLinus Torvalds 	/* are we done now? */
13831da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13841da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13851da177e4SLinus Torvalds 		goto flush_older_and_return;
13861da177e4SLinus Torvalds 	}
13871da177e4SLinus Torvalds 
13881da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
13891da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
13901da177e4SLinus Torvalds 	 */
13911da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1392c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
1393c3a9c210SJeff Mahoney 			       "wcount is not 0");
13941da177e4SLinus Torvalds 	}
13951da177e4SLinus Torvalds 	cn = jl->j_realblock;
13961da177e4SLinus Torvalds 	while (cn) {
13971da177e4SLinus Torvalds 		was_jwait = 0;
13981da177e4SLinus Torvalds 		was_dirty = 0;
13991da177e4SLinus Torvalds 		saved_bh = NULL;
14001da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
14011da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
14021da177e4SLinus Torvalds 			goto free_cnode;
14031da177e4SLinus Torvalds 		}
14041da177e4SLinus Torvalds 
14051da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
14061da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
14071da177e4SLinus Torvalds 			goto free_cnode;
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
14101da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
14111da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
14121da177e4SLinus Torvalds 		 ** block to disk
14131da177e4SLinus Torvalds 		 */
14141da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
14151da177e4SLinus Torvalds 			saved_bh = cn->bh;
14161da177e4SLinus Torvalds 
14171da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
14181da177e4SLinus Torvalds 			 ** we are working with it
14191da177e4SLinus Torvalds 			 */
14201da177e4SLinus Torvalds 			get_bh(saved_bh);
14211da177e4SLinus Torvalds 
14221da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
14231da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
14241da177e4SLinus Torvalds 				was_jwait = 1;
14251da177e4SLinus Torvalds 				was_dirty = 1;
14261da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
14271da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
14281da177e4SLinus Torvalds 				BUG();
14291da177e4SLinus Torvalds 			}
14301da177e4SLinus Torvalds 		}
14311da177e4SLinus Torvalds 
14321da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
14330779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
14341da177e4SLinus Torvalds 		 */
14351da177e4SLinus Torvalds 		if (pjl) {
14361da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
14371da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
14381da177e4SLinus Torvalds 			goto free_cnode;
14391da177e4SLinus Torvalds 		}
14401da177e4SLinus Torvalds 
14411da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
14421da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
14431da177e4SLinus Torvalds 		 */
14441da177e4SLinus Torvalds 		if (saved_bh == NULL) {
14451da177e4SLinus Torvalds 			goto free_cnode;
14461da177e4SLinus Torvalds 		}
14471da177e4SLinus Torvalds 
14481da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
14491da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
14501da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
14511da177e4SLinus Torvalds 		 */
14521da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
145345b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-813",
145445b03d5eSJeff Mahoney 					 "BAD! buffer %llu %cdirty %cjwait, "
14551da177e4SLinus Torvalds 					 "not in a newer tranasction",
1456bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1457bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1458bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
14591da177e4SLinus Torvalds 		}
14601da177e4SLinus Torvalds 		if (was_dirty) {
14611da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
14621da177e4SLinus Torvalds 			get_bh(saved_bh);
14631da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
14641da177e4SLinus Torvalds 			lock_buffer(saved_bh);
14651da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
14661da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
14671da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
14681da177e4SLinus Torvalds 			else
14691da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
14701da177e4SLinus Torvalds 			count++;
14711da177e4SLinus Torvalds 		} else {
147245b03d5eSJeff Mahoney 			reiserfs_warning(s, "clm-2082",
147345b03d5eSJeff Mahoney 					 "Unable to flush buffer %llu in %s",
1474bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1475fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
14761da177e4SLinus Torvalds 		}
14771da177e4SLinus Torvalds 	      free_cnode:
14781da177e4SLinus Torvalds 		last = cn;
14791da177e4SLinus Torvalds 		cn = cn->next;
14801da177e4SLinus Torvalds 		if (saved_bh) {
14811da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
14821da177e4SLinus Torvalds 			put_bh(saved_bh);
14831da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
148445b03d5eSJeff Mahoney 				reiserfs_warning(s, "journal-945",
148545b03d5eSJeff Mahoney 						 "saved_bh->b_count < 0");
14861da177e4SLinus Torvalds 			}
14871da177e4SLinus Torvalds 		}
14881da177e4SLinus Torvalds 	}
14891da177e4SLinus Torvalds 	if (count > 0) {
14901da177e4SLinus Torvalds 		cn = jl->j_realblock;
14911da177e4SLinus Torvalds 		while (cn) {
14921da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
14931da177e4SLinus Torvalds 				if (!cn->bh) {
1494c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1011",
1495c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
14961da177e4SLinus Torvalds 				}
14978ebc4232SFrederic Weisbecker 
14988ebc4232SFrederic Weisbecker 				reiserfs_write_unlock(s);
14991da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
15008ebc4232SFrederic Weisbecker 				reiserfs_write_lock(s);
15018ebc4232SFrederic Weisbecker 
15021da177e4SLinus Torvalds 				if (!cn->bh) {
1503c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1012",
1504c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15051da177e4SLinus Torvalds 				}
15061da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
15071da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
150845b03d5eSJeff Mahoney 					reiserfs_warning(s, "journal-949",
150945b03d5eSJeff Mahoney 							 "buffer write failed");
15101da177e4SLinus Torvalds #endif
15111da177e4SLinus Torvalds 					err = -EIO;
15121da177e4SLinus Torvalds 				}
15131da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
15141da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
15151da177e4SLinus Torvalds 				 */
1516bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1517bd4c625cSLinus Torvalds 				       (cn->bh));
15181da177e4SLinus Torvalds 
1519398c95bdSChris Mason 				/* drop one ref for us */
15201da177e4SLinus Torvalds 				put_bh(cn->bh);
1521398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1522398c95bdSChris Mason 				release_buffer_page(cn->bh);
15231da177e4SLinus Torvalds 			}
15241da177e4SLinus Torvalds 			cn = cn->next;
15251da177e4SLinus Torvalds 		}
15261da177e4SLinus Torvalds 	}
15271da177e4SLinus Torvalds 
15281da177e4SLinus Torvalds 	if (err)
1529bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1530bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1531fbe5498bSHarvey Harrison 			       __func__);
15321da177e4SLinus Torvalds       flush_older_and_return:
15331da177e4SLinus Torvalds 
15341da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
15351da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
15361da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
15371da177e4SLinus Torvalds 	 ** replayed after a crash
15381da177e4SLinus Torvalds 	 */
15391da177e4SLinus Torvalds 	if (flushall) {
15401da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
15411da177e4SLinus Torvalds 	}
15421da177e4SLinus Torvalds 
15431da177e4SLinus Torvalds 	err = journal->j_errno;
15441da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
15451da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
15461da177e4SLinus Torvalds 	 **
15471da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
15481da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
15491da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
15501da177e4SLinus Torvalds 	 ** being flushed
15511da177e4SLinus Torvalds 	 */
15521da177e4SLinus Torvalds 	if (!err && flushall) {
1553bd4c625cSLinus Torvalds 		err =
1554bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1555bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1556bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1557bd4c625cSLinus Torvalds 						jl->j_trans_id);
15581da177e4SLinus Torvalds 		if (err)
1559bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1560bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1561fbe5498bSHarvey Harrison 				       __func__);
15621da177e4SLinus Torvalds 	}
15631da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
15641da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
15651da177e4SLinus Torvalds 	journal->j_num_lists--;
15661da177e4SLinus Torvalds 	del_from_work_list(s, jl);
15671da177e4SLinus Torvalds 
15681da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
15691da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
157045b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1571bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
15721da177e4SLinus Torvalds 	}
15731da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
15741da177e4SLinus Torvalds 
15751da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
15761da177e4SLinus Torvalds 	 * help find code using dead lists later on
15771da177e4SLinus Torvalds 	 */
15781da177e4SLinus Torvalds 	jl->j_len = 0;
15791da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
15801da177e4SLinus Torvalds 	jl->j_start = 0;
15811da177e4SLinus Torvalds 	jl->j_realblock = NULL;
15821da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
15831da177e4SLinus Torvalds 	jl->j_trans_id = 0;
15841da177e4SLinus Torvalds 	jl->j_state = 0;
15851da177e4SLinus Torvalds 	put_journal_list(s, jl);
15861da177e4SLinus Torvalds 	if (flushall)
1587afe70259SJeff Mahoney 		mutex_unlock(&journal->j_flush_mutex);
15881da177e4SLinus Torvalds 	return err;
15891da177e4SLinus Torvalds }
15901da177e4SLinus Torvalds 
1591a3172027SChris Mason static int test_transaction(struct super_block *s,
1592a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1593a3172027SChris Mason {
1594a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1595a3172027SChris Mason 
1596a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1597a3172027SChris Mason 		return 1;
1598a3172027SChris Mason 
1599a3172027SChris Mason 	cn = jl->j_realblock;
1600a3172027SChris Mason 	while (cn) {
1601a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1602a3172027SChris Mason 		 ** skip it
1603a3172027SChris Mason 		 */
1604a3172027SChris Mason 		if (cn->blocknr == 0) {
1605a3172027SChris Mason 			goto next;
1606a3172027SChris Mason 		}
1607a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1608a3172027SChris Mason 			return 0;
1609a3172027SChris Mason 	      next:
1610a3172027SChris Mason 		cn = cn->next;
1611a3172027SChris Mason 		cond_resched();
1612a3172027SChris Mason 	}
1613a3172027SChris Mason 	return 0;
1614a3172027SChris Mason }
1615a3172027SChris Mason 
16161da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
16171da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
16181da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
16191da177e4SLinus Torvalds {
16201da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16211da177e4SLinus Torvalds 	int ret = 0;
16221da177e4SLinus Torvalds 
16231da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
16241da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16251da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
16261da177e4SLinus Torvalds 		return 0;
16271da177e4SLinus Torvalds 	}
16281da177e4SLinus Torvalds 
16291da177e4SLinus Torvalds 	cn = jl->j_realblock;
16301da177e4SLinus Torvalds 	while (cn) {
16311da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
16321da177e4SLinus Torvalds 		 ** skip it
16331da177e4SLinus Torvalds 		 */
16341da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
16351da177e4SLinus Torvalds 			goto next;
16361da177e4SLinus Torvalds 		}
16371da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
16381da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
16391da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
16401da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
16411da177e4SLinus Torvalds 			 * count, and recheck things after locking
16421da177e4SLinus Torvalds 			 */
16431da177e4SLinus Torvalds 			tmp_bh = cn->bh;
16441da177e4SLinus Torvalds 			get_bh(tmp_bh);
16451da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
16461da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
16471da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
16481da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
16491da177e4SLinus Torvalds 					BUG();
16501da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
16511da177e4SLinus Torvalds 				ret++;
16521da177e4SLinus Torvalds 			} else {
16531da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
16541da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
16551da177e4SLinus Torvalds 			}
16561da177e4SLinus Torvalds 			put_bh(tmp_bh);
16571da177e4SLinus Torvalds 		}
16581da177e4SLinus Torvalds 	      next:
16591da177e4SLinus Torvalds 		cn = cn->next;
16601da177e4SLinus Torvalds 		cond_resched();
16611da177e4SLinus Torvalds 	}
16621da177e4SLinus Torvalds 	return ret;
16631da177e4SLinus Torvalds }
16641da177e4SLinus Torvalds 
16651da177e4SLinus Torvalds /* used by flush_commit_list */
16661da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
16671da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
16681da177e4SLinus Torvalds {
16691da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16701da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
16711da177e4SLinus Torvalds 	int ret = 0;
16721da177e4SLinus Torvalds 
16731da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
16741da177e4SLinus Torvalds 	cn = jl->j_realblock;
16751da177e4SLinus Torvalds 	while (cn) {
16761da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
16771da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
16781da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
16791da177e4SLinus Torvalds 		 */
16801da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1681bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1682bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
16831da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
16841da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
16851da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
16861da177e4SLinus Torvalds 			 * it actually gets marked dirty
16871da177e4SLinus Torvalds 			 */
16881da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
16891da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
16901da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
16911da177e4SLinus Torvalds 			} else {
16921da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
16931da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
16941da177e4SLinus Torvalds 			}
16951da177e4SLinus Torvalds 		}
16961da177e4SLinus Torvalds 		cn = cn->next;
16971da177e4SLinus Torvalds 	}
16981da177e4SLinus Torvalds 	return ret;
16991da177e4SLinus Torvalds }
17001da177e4SLinus Torvalds 
17011da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
17021da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
17031da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
1704600ed416SJeff Mahoney 				unsigned int *next_trans_id,
1705bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1706bd4c625cSLinus Torvalds {
17071da177e4SLinus Torvalds 	int ret = 0;
17081da177e4SLinus Torvalds 	int written = 0;
17091da177e4SLinus Torvalds 	int transactions_flushed = 0;
1710600ed416SJeff Mahoney 	unsigned int orig_trans_id = jl->j_trans_id;
17111da177e4SLinus Torvalds 	struct buffer_chunk chunk;
17121da177e4SLinus Torvalds 	struct list_head *entry;
17131da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17141da177e4SLinus Torvalds 	chunk.nr = 0;
17151da177e4SLinus Torvalds 
1716a412f9efSFrederic Weisbecker 	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
17171da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
17181da177e4SLinus Torvalds 		goto done;
17191da177e4SLinus Torvalds 	}
17201da177e4SLinus Torvalds 
1721afe70259SJeff Mahoney 	/* we've got j_flush_mutex held, nobody is going to delete any
17221da177e4SLinus Torvalds 	 * of these lists out from underneath us
17231da177e4SLinus Torvalds 	 */
17241da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
17251da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
17261da177e4SLinus Torvalds 
17271da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1728bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1729bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
17301da177e4SLinus Torvalds 			del_from_work_list(s, jl);
17311da177e4SLinus Torvalds 			break;
17321da177e4SLinus Torvalds 		}
17331da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
17341da177e4SLinus Torvalds 
17351da177e4SLinus Torvalds 		if (ret < 0)
17361da177e4SLinus Torvalds 			goto done;
17371da177e4SLinus Torvalds 		transactions_flushed++;
17381da177e4SLinus Torvalds 		written += ret;
17391da177e4SLinus Torvalds 		entry = jl->j_list.next;
17401da177e4SLinus Torvalds 
17411da177e4SLinus Torvalds 		/* did we wrap? */
17421da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
17431da177e4SLinus Torvalds 			break;
17441da177e4SLinus Torvalds 		}
17451da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
17461da177e4SLinus Torvalds 
17471da177e4SLinus Torvalds 		/* don't bother with older transactions */
17481da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
17491da177e4SLinus Torvalds 			break;
17501da177e4SLinus Torvalds 	}
17511da177e4SLinus Torvalds 	if (chunk.nr) {
17521da177e4SLinus Torvalds 		write_chunk(&chunk);
17531da177e4SLinus Torvalds 	}
17541da177e4SLinus Torvalds 
17551da177e4SLinus Torvalds       done:
1756afe70259SJeff Mahoney 	mutex_unlock(&journal->j_flush_mutex);
17571da177e4SLinus Torvalds 	return ret;
17581da177e4SLinus Torvalds }
17591da177e4SLinus Torvalds 
17601da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
17611da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
17621da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
17631da177e4SLinus Torvalds ** adds seeks and slows things down.
17641da177e4SLinus Torvalds **
17651da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
17661da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
17671da177e4SLinus Torvalds ** list updates the header block
17681da177e4SLinus Torvalds */
17691da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1770bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1771bd4c625cSLinus Torvalds {
17721da177e4SLinus Torvalds 	unsigned long len = 0;
17731da177e4SLinus Torvalds 	unsigned long cur_len;
17741da177e4SLinus Torvalds 	int ret;
17751da177e4SLinus Torvalds 	int i;
17761da177e4SLinus Torvalds 	int limit = 256;
17771da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
17781da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
1779600ed416SJeff Mahoney 	unsigned int trans_id;
17801da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17811da177e4SLinus Torvalds 
17821da177e4SLinus Torvalds 	flush_jl = tjl = jl;
17831da177e4SLinus Torvalds 
17841da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
17851da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
17861da177e4SLinus Torvalds 		limit = 1024;
17871da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
17881da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
17891da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
17901da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
17911da177e4SLinus Torvalds 			break;
17921da177e4SLinus Torvalds 		}
17931da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
17941da177e4SLinus Torvalds 		if (cur_len > 0) {
17951da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
17961da177e4SLinus Torvalds 		}
17971da177e4SLinus Torvalds 		len += cur_len;
17981da177e4SLinus Torvalds 		flush_jl = tjl;
17991da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
18001da177e4SLinus Torvalds 			break;
18011da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
18021da177e4SLinus Torvalds 	}
18031da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
18041da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
18051da177e4SLinus Torvalds 	 ** across multiple lists
18061da177e4SLinus Torvalds 	 */
18071da177e4SLinus Torvalds 	if (flush_jl != jl) {
18081da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
18091da177e4SLinus Torvalds 	}
18101da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
18111da177e4SLinus Torvalds 	return 0;
18121da177e4SLinus Torvalds }
18131da177e4SLinus Torvalds 
18141da177e4SLinus Torvalds /*
18151da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
18161da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
18171da177e4SLinus Torvalds */
18181da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
18191da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
18201da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
18211da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
18221da177e4SLinus Torvalds {
18231da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
18241da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
18251da177e4SLinus Torvalds 
18261da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
18271da177e4SLinus Torvalds 	if (!head) {
18281da177e4SLinus Torvalds 		return;
18291da177e4SLinus Torvalds 	}
18301da177e4SLinus Torvalds 	cur = *head;
18311da177e4SLinus Torvalds 	while (cur) {
1832bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1833bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1834bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
18351da177e4SLinus Torvalds 			if (cur->hnext) {
18361da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
18371da177e4SLinus Torvalds 			}
18381da177e4SLinus Torvalds 			if (cur->hprev) {
18391da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
18401da177e4SLinus Torvalds 			} else {
18411da177e4SLinus Torvalds 				*head = cur->hnext;
18421da177e4SLinus Torvalds 			}
18431da177e4SLinus Torvalds 			cur->blocknr = 0;
18441da177e4SLinus Torvalds 			cur->sb = NULL;
18451da177e4SLinus Torvalds 			cur->state = 0;
18461da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
18471da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
18481da177e4SLinus Torvalds 			cur->bh = NULL;
18491da177e4SLinus Torvalds 			cur->jlist = NULL;
18501da177e4SLinus Torvalds 		}
18511da177e4SLinus Torvalds 		cur = cur->hnext;
18521da177e4SLinus Torvalds 	}
18531da177e4SLinus Torvalds }
18541da177e4SLinus Torvalds 
1855a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb)
1856bd4c625cSLinus Torvalds {
1857a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1858d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
18591da177e4SLinus Torvalds 	journal->j_num_lists--;
18601da177e4SLinus Torvalds 
18611da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
1862a9dd3643SJeff Mahoney 	free_list_bitmaps(sb, journal->j_list_bitmap);
1863a9dd3643SJeff Mahoney 	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
18641da177e4SLinus Torvalds 	if (journal->j_header_bh) {
18651da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
18661da177e4SLinus Torvalds 	}
18671da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
18681da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
18691da177e4SLinus Torvalds 	 */
1870a9dd3643SJeff Mahoney 	release_journal_dev(sb, journal);
18711da177e4SLinus Torvalds 	vfree(journal);
18721da177e4SLinus Torvalds }
18731da177e4SLinus Torvalds 
18741da177e4SLinus Torvalds /*
18751da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
18761da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
18771da177e4SLinus Torvalds */
1878bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1879a9dd3643SJeff Mahoney 			      struct super_block *sb, int error)
1880bd4c625cSLinus Torvalds {
18811da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
18821da177e4SLinus Torvalds 	int flushed = 0;
1883a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
18841da177e4SLinus Torvalds 
18851da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
18861da177e4SLinus Torvalds 	 */
1887a9dd3643SJeff Mahoney 	if (!error && !(sb->s_flags & MS_RDONLY)) {
18881da177e4SLinus Torvalds 		/* end the current trans */
18891da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
1890a9dd3643SJeff Mahoney 		do_journal_end(th, sb, 10, FLUSH_ALL);
18911da177e4SLinus Torvalds 
18921da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
1893a9dd3643SJeff Mahoney 		if (!journal_join(&myth, sb, 1)) {
1894a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1895a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1896bd4c625cSLinus Torvalds 						     1);
1897a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1898a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1899a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19001da177e4SLinus Torvalds 			flushed = 1;
19011da177e4SLinus Torvalds 		}
19021da177e4SLinus Torvalds 	}
19031da177e4SLinus Torvalds 
19041da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
19051da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
19061da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
1907a9dd3643SJeff Mahoney 		if (!journal_join_abort(&myth, sb, 1)) {
1908a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1909a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1910bd4c625cSLinus Torvalds 						     1);
1911a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1912a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1913a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19141da177e4SLinus Torvalds 		}
19151da177e4SLinus Torvalds 	}
19161da177e4SLinus Torvalds 
19171da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
19181da177e4SLinus Torvalds 	/* wait for all commits to finish */
1919a9dd3643SJeff Mahoney 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
19208ebc4232SFrederic Weisbecker 
19218ebc4232SFrederic Weisbecker 	/*
19228ebc4232SFrederic Weisbecker 	 * We must release the write lock here because
19238ebc4232SFrederic Weisbecker 	 * the workqueue job (flush_async_commit) needs this lock
19248ebc4232SFrederic Weisbecker 	 */
19258ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
1926033369d1SArtem Bityutskiy 
1927033369d1SArtem Bityutskiy 	cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
19281da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
19298ebc4232SFrederic Weisbecker 
19301da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
19311da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
19321da177e4SLinus Torvalds 		commit_wq = NULL;
19331da177e4SLinus Torvalds 	}
19341da177e4SLinus Torvalds 
1935a9dd3643SJeff Mahoney 	free_journal_ram(sb);
19361da177e4SLinus Torvalds 
19370523676dSFrederic Weisbecker 	reiserfs_write_lock(sb);
19380523676dSFrederic Weisbecker 
19391da177e4SLinus Torvalds 	return 0;
19401da177e4SLinus Torvalds }
19411da177e4SLinus Torvalds 
19421da177e4SLinus Torvalds /*
19431da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
19441da177e4SLinus Torvalds */
1945bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
1946a9dd3643SJeff Mahoney 		    struct super_block *sb)
1947bd4c625cSLinus Torvalds {
1948a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 0);
19491da177e4SLinus Torvalds }
1950bd4c625cSLinus Torvalds 
19511da177e4SLinus Torvalds /*
19521da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
19531da177e4SLinus Torvalds */
1954bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
1955a9dd3643SJeff Mahoney 			  struct super_block *sb)
1956bd4c625cSLinus Torvalds {
1957a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 1);
19581da177e4SLinus Torvalds }
19591da177e4SLinus Torvalds 
19601da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
1961a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb,
1962bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
1963bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
1964bd4c625cSLinus Torvalds {
19651da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
19661da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1967a9dd3643SJeff Mahoney 	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
1968bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
19691da177e4SLinus Torvalds 		return 1;
19701da177e4SLinus Torvalds 	}
19711da177e4SLinus Torvalds 	return 0;
19721da177e4SLinus Torvalds }
1973bd4c625cSLinus Torvalds 
19741da177e4SLinus Torvalds /* returns 0 if it did not find a description block
19751da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
19761da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
19771da177e4SLinus Torvalds */
1978a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb,
1979bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
1980600ed416SJeff Mahoney 					unsigned int *oldest_invalid_trans_id,
1981bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
1982bd4c625cSLinus Torvalds {
19831da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
19841da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
19851da177e4SLinus Torvalds 	struct buffer_head *c_bh;
19861da177e4SLinus Torvalds 	unsigned long offset;
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds 	if (!d_bh)
19891da177e4SLinus Torvalds 		return 0;
19901da177e4SLinus Torvalds 
19911da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1992bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
1993bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
1994bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
1995bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
1996a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1997bd4c625cSLinus Torvalds 				       "journal-986: transaction "
19981da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
1999bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
2000bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
20011da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
20021da177e4SLinus Torvalds 			return 0;
20031da177e4SLinus Torvalds 		}
2004bd4c625cSLinus Torvalds 		if (newest_mount_id
2005bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2006a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2007bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
20081da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2009bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2010bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
20111da177e4SLinus Torvalds 				       *newest_mount_id);
20121da177e4SLinus Torvalds 			return -1;
20131da177e4SLinus Torvalds 		}
2014a9dd3643SJeff Mahoney 		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2015a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2018",
201645b03d5eSJeff Mahoney 					 "Bad transaction length %d "
201745b03d5eSJeff Mahoney 					 "encountered, ignoring transaction",
2018bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
20191da177e4SLinus Torvalds 			return -1;
20201da177e4SLinus Torvalds 		}
2021a9dd3643SJeff Mahoney 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
20221da177e4SLinus Torvalds 
20231da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2024bd4c625cSLinus Torvalds 		c_bh =
2025a9dd3643SJeff Mahoney 		    journal_bread(sb,
2026a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2027bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2028a9dd3643SJeff Mahoney 				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
20291da177e4SLinus Torvalds 		if (!c_bh)
20301da177e4SLinus Torvalds 			return 0;
20311da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2032a9dd3643SJeff Mahoney 		if (journal_compare_desc_commit(sb, desc, commit)) {
2033a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
20341da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
20351da177e4SLinus Torvalds 				       "time %d or length %d",
2036bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2037a9dd3643SJeff Mahoney 				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
20381da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
20391da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
20401da177e4SLinus Torvalds 			brelse(c_bh);
20411da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2042bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2043bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2044a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2045bd4c625cSLinus Torvalds 					       "journal-1004: "
20461da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2047bd4c625cSLinus Torvalds 					       "to %d",
2048bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
20491da177e4SLinus Torvalds 			}
20501da177e4SLinus Torvalds 			return -1;
20511da177e4SLinus Torvalds 		}
20521da177e4SLinus Torvalds 		brelse(c_bh);
2053a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2054bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
20551da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2056bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2057a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2058bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2059bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
20601da177e4SLinus Torvalds 		return 1;
20611da177e4SLinus Torvalds 	} else {
20621da177e4SLinus Torvalds 		return 0;
20631da177e4SLinus Torvalds 	}
20641da177e4SLinus Torvalds }
20651da177e4SLinus Torvalds 
2066bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2067bd4c625cSLinus Torvalds {
20681da177e4SLinus Torvalds 	int i;
20691da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
20701da177e4SLinus Torvalds 		brelse(heads[i]);
20711da177e4SLinus Torvalds 	}
20721da177e4SLinus Torvalds }
20731da177e4SLinus Torvalds 
20741da177e4SLinus Torvalds /*
20751da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
20761da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
20771da177e4SLinus Torvalds ** is invalid, or too old.
20781da177e4SLinus Torvalds */
2079a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb,
2080bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2081bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2082600ed416SJeff Mahoney 				    unsigned int oldest_trans_id,
2083bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2084bd4c625cSLinus Torvalds {
2085a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
20861da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20871da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
2088600ed416SJeff Mahoney 	unsigned int trans_id = 0;
20891da177e4SLinus Torvalds 	struct buffer_head *c_bh;
20901da177e4SLinus Torvalds 	struct buffer_head *d_bh;
20911da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
20921da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
2093600ed416SJeff Mahoney 	unsigned int trans_offset;
20941da177e4SLinus Torvalds 	int i;
20951da177e4SLinus Torvalds 	int trans_half;
20961da177e4SLinus Torvalds 
2097a9dd3643SJeff Mahoney 	d_bh = journal_bread(sb, cur_dblock);
20981da177e4SLinus Torvalds 	if (!d_bh)
20991da177e4SLinus Torvalds 		return 1;
21001da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2101a9dd3643SJeff Mahoney 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2102a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
21031da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
2104a9dd3643SJeff Mahoney 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21051da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
21061da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
2107a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
21081da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2109bd4c625cSLinus Torvalds 			       cur_dblock -
2110a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
21111da177e4SLinus Torvalds 		brelse(d_bh);
21121da177e4SLinus Torvalds 		return 1;
21131da177e4SLinus Torvalds 	}
21141da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
2115a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
21161da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
21171da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
21181da177e4SLinus Torvalds 			       newest_mount_id);
21191da177e4SLinus Torvalds 		brelse(d_bh);
21201da177e4SLinus Torvalds 		return 1;
21211da177e4SLinus Torvalds 	}
2122a9dd3643SJeff Mahoney 	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
21231da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
2124a9dd3643SJeff Mahoney 			      SB_ONDISK_JOURNAL_SIZE(sb)));
21251da177e4SLinus Torvalds 	if (!c_bh) {
21261da177e4SLinus Torvalds 		brelse(d_bh);
21271da177e4SLinus Torvalds 		return 1;
21281da177e4SLinus Torvalds 	}
21291da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2130a9dd3643SJeff Mahoney 	if (journal_compare_desc_commit(sb, desc, commit)) {
2131a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2132bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
21331da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2134bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2135a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2136bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2137bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
21381da177e4SLinus Torvalds 		brelse(c_bh);
21391da177e4SLinus Torvalds 		brelse(d_bh);
21401da177e4SLinus Torvalds 		return 1;
21411da177e4SLinus Torvalds 	}
21423f8b5ee3SJeff Mahoney 
21433f8b5ee3SJeff Mahoney 	if (bdev_read_only(sb->s_bdev)) {
21443f8b5ee3SJeff Mahoney 		reiserfs_warning(sb, "clm-2076",
21453f8b5ee3SJeff Mahoney 				 "device is readonly, unable to replay log");
21463f8b5ee3SJeff Mahoney 		brelse(c_bh);
21473f8b5ee3SJeff Mahoney 		brelse(d_bh);
21483f8b5ee3SJeff Mahoney 		return -EROFS;
21493f8b5ee3SJeff Mahoney 	}
21503f8b5ee3SJeff Mahoney 
21511da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
21521da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2153d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2154d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2155d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2156d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
21571da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
21581da177e4SLinus Torvalds 		brelse(c_bh);
21591da177e4SLinus Torvalds 		brelse(d_bh);
2160d739b42bSPekka Enberg 		kfree(log_blocks);
2161d739b42bSPekka Enberg 		kfree(real_blocks);
2162a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1169",
216345b03d5eSJeff Mahoney 				 "kmalloc failed, unable to mount FS");
21641da177e4SLinus Torvalds 		return -1;
21651da177e4SLinus Torvalds 	}
21661da177e4SLinus Torvalds 	/* get all the buffer heads */
2167a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
21681da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2169bd4c625cSLinus Torvalds 		log_blocks[i] =
2170a9dd3643SJeff Mahoney 		    journal_getblk(sb,
2171a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2172bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2173a9dd3643SJeff Mahoney 				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
21741da177e4SLinus Torvalds 		if (i < trans_half) {
2175bd4c625cSLinus Torvalds 			real_blocks[i] =
2176a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2177bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
21781da177e4SLinus Torvalds 		} else {
2179bd4c625cSLinus Torvalds 			real_blocks[i] =
2180a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2181bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2182bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
21831da177e4SLinus Torvalds 		}
2184a9dd3643SJeff Mahoney 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2185a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1207",
218645b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
218745b03d5eSJeff Mahoney 					 "Block to replay is outside of "
218845b03d5eSJeff Mahoney 					 "filesystem");
21891da177e4SLinus Torvalds 			goto abort_replay;
21901da177e4SLinus Torvalds 		}
21911da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2192bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2193a9dd3643SJeff Mahoney 		    (sb, real_blocks[i]->b_blocknr)) {
2194a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1204",
219545b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
219645b03d5eSJeff Mahoney 					 "Trying to replay onto a log block");
21971da177e4SLinus Torvalds 		      abort_replay:
21981da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
21991da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
22001da177e4SLinus Torvalds 			brelse(c_bh);
22011da177e4SLinus Torvalds 			brelse(d_bh);
2202d739b42bSPekka Enberg 			kfree(log_blocks);
2203d739b42bSPekka Enberg 			kfree(real_blocks);
22041da177e4SLinus Torvalds 			return -1;
22051da177e4SLinus Torvalds 		}
22061da177e4SLinus Torvalds 	}
22071da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
22081da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
22091da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22108ebc4232SFrederic Weisbecker 
22118ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
22121da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
22138ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
22148ebc4232SFrederic Weisbecker 
22151da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2216a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1212",
221745b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
221845b03d5eSJeff Mahoney 					 "buffer write failed");
2219bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2220bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22211da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
22221da177e4SLinus Torvalds 			brelse(c_bh);
22231da177e4SLinus Torvalds 			brelse(d_bh);
2224d739b42bSPekka Enberg 			kfree(log_blocks);
2225d739b42bSPekka Enberg 			kfree(real_blocks);
22261da177e4SLinus Torvalds 			return -1;
22271da177e4SLinus Torvalds 		}
2228bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2229bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
22301da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
22311da177e4SLinus Torvalds 		brelse(log_blocks[i]);
22321da177e4SLinus Torvalds 	}
22331da177e4SLinus Torvalds 	/* flush out the real blocks */
22341da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22351da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
22369cb569d6SChristoph Hellwig 		write_dirty_buffer(real_blocks[i], WRITE);
22371da177e4SLinus Torvalds 	}
22381da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22391da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
22401da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2241a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1226",
224245b03d5eSJeff Mahoney 					 "REPLAY FAILURE, fsck required! "
224345b03d5eSJeff Mahoney 					 "buffer write failed");
2244bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2245bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22461da177e4SLinus Torvalds 			brelse(c_bh);
22471da177e4SLinus Torvalds 			brelse(d_bh);
2248d739b42bSPekka Enberg 			kfree(log_blocks);
2249d739b42bSPekka Enberg 			kfree(real_blocks);
22501da177e4SLinus Torvalds 			return -1;
22511da177e4SLinus Torvalds 		}
22521da177e4SLinus Torvalds 		brelse(real_blocks[i]);
22531da177e4SLinus Torvalds 	}
2254bd4c625cSLinus Torvalds 	cur_dblock =
2255a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2256bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2257a9dd3643SJeff Mahoney 	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
2258a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2259bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
2260a9dd3643SJeff Mahoney 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
22611da177e4SLinus Torvalds 
22621da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2263a9dd3643SJeff Mahoney 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
22641da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
22651da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2266a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2267a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2268a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
22691da177e4SLinus Torvalds 	brelse(c_bh);
22701da177e4SLinus Torvalds 	brelse(d_bh);
2271d739b42bSPekka Enberg 	kfree(log_blocks);
2272d739b42bSPekka Enberg 	kfree(real_blocks);
22731da177e4SLinus Torvalds 	return 0;
22741da177e4SLinus Torvalds }
22751da177e4SLinus Torvalds 
22761da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
22771da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
22781da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
22791da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
22801da177e4SLinus Torvalds    from other places.
22811da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
22823ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
22833ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
22843ee16670SJeff Mahoney 					   b_blocknr_t max_block)
22851da177e4SLinus Torvalds {
22861da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
22871da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
22881da177e4SLinus Torvalds 	struct buffer_head *bh;
22891da177e4SLinus Torvalds 	int i, j;
22901da177e4SLinus Torvalds 
22911da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
22921da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
22931da177e4SLinus Torvalds 		return (bh);
22941da177e4SLinus Torvalds 
22951da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
22961da177e4SLinus Torvalds 		blocks = max_block - block;
22971da177e4SLinus Torvalds 	}
22981da177e4SLinus Torvalds 	bhlist[0] = bh;
22991da177e4SLinus Torvalds 	j = 1;
23001da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
23011da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
23021da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
23031da177e4SLinus Torvalds 			brelse(bh);
23041da177e4SLinus Torvalds 			break;
2305bd4c625cSLinus Torvalds 		} else
2306bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
23071da177e4SLinus Torvalds 	}
23081da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
23091da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
23101da177e4SLinus Torvalds 		brelse(bhlist[i]);
23111da177e4SLinus Torvalds 	bh = bhlist[0];
23121da177e4SLinus Torvalds 	wait_on_buffer(bh);
23131da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23141da177e4SLinus Torvalds 		return bh;
23151da177e4SLinus Torvalds 	brelse(bh);
23161da177e4SLinus Torvalds 	return NULL;
23171da177e4SLinus Torvalds }
23181da177e4SLinus Torvalds 
23191da177e4SLinus Torvalds /*
23201da177e4SLinus Torvalds ** read and replay the log
23211da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
23221da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
23231da177e4SLinus Torvalds **
23241da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
23251da177e4SLinus Torvalds **
23261da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
23271da177e4SLinus Torvalds */
2328a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb)
2329bd4c625cSLinus Torvalds {
2330a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
23311da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
2332600ed416SJeff Mahoney 	unsigned int oldest_trans_id = 0;
2333600ed416SJeff Mahoney 	unsigned int oldest_invalid_trans_id = 0;
23341da177e4SLinus Torvalds 	time_t start;
23351da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
23361da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
23371da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
23381da177e4SLinus Torvalds 	struct buffer_head *d_bh;
23391da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
23401da177e4SLinus Torvalds 	int valid_journal_header = 0;
23411da177e4SLinus Torvalds 	int replay_count = 0;
23421da177e4SLinus Torvalds 	int continue_replay = 1;
23431da177e4SLinus Torvalds 	int ret;
23441da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
23451da177e4SLinus Torvalds 
2346a9dd3643SJeff Mahoney 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2347a9dd3643SJeff Mahoney 	reiserfs_info(sb, "checking transaction log (%s)\n",
23481da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
23491da177e4SLinus Torvalds 	start = get_seconds();
23501da177e4SLinus Torvalds 
23511da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
23521da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
23531da177e4SLinus Torvalds 	 ** replay is done
23541da177e4SLinus Torvalds 	 */
2355a9dd3643SJeff Mahoney 	journal->j_header_bh = journal_bread(sb,
2356a9dd3643SJeff Mahoney 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2357a9dd3643SJeff Mahoney 					     + SB_ONDISK_JOURNAL_SIZE(sb));
23581da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
23591da177e4SLinus Torvalds 		return 1;
23601da177e4SLinus Torvalds 	}
23611da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2362c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2363a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_SIZE(sb)
2364bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2365bd4c625cSLinus Torvalds 		oldest_start =
2366a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
23671da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
23681da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
23691da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2370a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2371bd4c625cSLinus Torvalds 			       "journal-1153: found in "
23721da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
23731da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
23741da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
23751da177e4SLinus Torvalds 		valid_journal_header = 1;
23761da177e4SLinus Torvalds 
23771da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
23781da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
23791da177e4SLinus Torvalds 		 ** through the whole log.
23801da177e4SLinus Torvalds 		 */
2381bd4c625cSLinus Torvalds 		d_bh =
2382a9dd3643SJeff Mahoney 		    journal_bread(sb,
2383a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2384bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
2385a9dd3643SJeff Mahoney 		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
23861da177e4SLinus Torvalds 		if (!ret) {
23871da177e4SLinus Torvalds 			continue_replay = 0;
23881da177e4SLinus Torvalds 		}
23891da177e4SLinus Torvalds 		brelse(d_bh);
23901da177e4SLinus Torvalds 		goto start_log_replay;
23911da177e4SLinus Torvalds 	}
23921da177e4SLinus Torvalds 
23931da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
23941da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
23951da177e4SLinus Torvalds 	 */
2396bd4c625cSLinus Torvalds 	while (continue_replay
2397bd4c625cSLinus Torvalds 	       && cur_dblock <
2398a9dd3643SJeff Mahoney 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2399a9dd3643SJeff Mahoney 		SB_ONDISK_JOURNAL_SIZE(sb))) {
24001da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
24011da177e4SLinus Torvalds 		   device to be the same */
2402bd4c625cSLinus Torvalds 		d_bh =
2403bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2404a9dd3643SJeff Mahoney 				    sb->s_blocksize,
2405a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2406a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_SIZE(sb));
2407bd4c625cSLinus Torvalds 		ret =
2408a9dd3643SJeff Mahoney 		    journal_transaction_is_valid(sb, d_bh,
2409bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2410bd4c625cSLinus Torvalds 						 &newest_mount_id);
24111da177e4SLinus Torvalds 		if (ret == 1) {
24121da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
24131da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
24141da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24151da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
24161da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2417a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2418bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
24191da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2420bd4c625cSLinus Torvalds 					       oldest_start -
2421bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2422a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24231da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
24241da177e4SLinus Torvalds 				/* one we just read was older */
24251da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24261da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2427a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2428bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
24291da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2430bd4c625cSLinus Torvalds 					       oldest_start -
2431bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2432a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24331da177e4SLinus Torvalds 			}
24341da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
24351da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2436a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2437bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2438bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2439bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
24401da177e4SLinus Torvalds 			}
24411da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
24421da177e4SLinus Torvalds 		} else {
24431da177e4SLinus Torvalds 			cur_dblock++;
24441da177e4SLinus Torvalds 		}
24451da177e4SLinus Torvalds 		brelse(d_bh);
24461da177e4SLinus Torvalds 	}
24471da177e4SLinus Torvalds 
24481da177e4SLinus Torvalds       start_log_replay:
24491da177e4SLinus Torvalds 	cur_dblock = oldest_start;
24501da177e4SLinus Torvalds 	if (oldest_trans_id) {
2451a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2452bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
24531da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
2454a9dd3643SJeff Mahoney 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
24551da177e4SLinus Torvalds 			       oldest_trans_id);
24561da177e4SLinus Torvalds 
24571da177e4SLinus Torvalds 	}
24581da177e4SLinus Torvalds 	replay_count = 0;
24591da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2460bd4c625cSLinus Torvalds 		ret =
2461a9dd3643SJeff Mahoney 		    journal_read_transaction(sb, cur_dblock, oldest_start,
2462bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
24631da177e4SLinus Torvalds 		if (ret < 0) {
24641da177e4SLinus Torvalds 			return ret;
24651da177e4SLinus Torvalds 		} else if (ret != 0) {
24661da177e4SLinus Torvalds 			break;
24671da177e4SLinus Torvalds 		}
2468bd4c625cSLinus Torvalds 		cur_dblock =
2469a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
24701da177e4SLinus Torvalds 		replay_count++;
24711da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
24721da177e4SLinus Torvalds 			break;
24731da177e4SLinus Torvalds 	}
24741da177e4SLinus Torvalds 
24751da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2476a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2477bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
24781da177e4SLinus Torvalds 	}
24791da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
24801da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
24811da177e4SLinus Torvalds 	 ** copy the trans_id from the header
24821da177e4SLinus Torvalds 	 */
24831da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
24841da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2485bd4c625cSLinus Torvalds 		journal->j_trans_id =
2486bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2487a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2488a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2489a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2490bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2491bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
24921da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
24931da177e4SLinus Torvalds 	} else {
24941da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
24951da177e4SLinus Torvalds 	}
2496a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
24971da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
24981da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
24991da177e4SLinus Torvalds 	if (replay_count > 0) {
2500a9dd3643SJeff Mahoney 		reiserfs_info(sb,
2501bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
25021da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
25031da177e4SLinus Torvalds 	}
2504a9dd3643SJeff Mahoney 	if (!bdev_read_only(sb->s_bdev) &&
2505a9dd3643SJeff Mahoney 	    _update_journal_header_block(sb, journal->j_start,
2506bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
25071da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
25081da177e4SLinus Torvalds 		 ** the mount
25091da177e4SLinus Torvalds 		 */
25101da177e4SLinus Torvalds 		return -1;
25111da177e4SLinus Torvalds 	}
25121da177e4SLinus Torvalds 	return 0;
25131da177e4SLinus Torvalds }
25141da177e4SLinus Torvalds 
25151da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
25161da177e4SLinus Torvalds {
25171da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
25188c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
25198c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
25201da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
25211da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
25221da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
25231da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
252490415deaSJeff Mahoney 	mutex_init(&jl->j_commit_mutex);
25251da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
25261da177e4SLinus Torvalds 	get_journal_list(jl);
25271da177e4SLinus Torvalds 	return jl;
25281da177e4SLinus Torvalds }
25291da177e4SLinus Torvalds 
2530a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb)
2531bd4c625cSLinus Torvalds {
2532a9dd3643SJeff Mahoney 	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
25331da177e4SLinus Torvalds }
25341da177e4SLinus Torvalds 
25351da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
25361da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
25371da177e4SLinus Torvalds {
25381da177e4SLinus Torvalds 	int result;
25391da177e4SLinus Torvalds 
25401da177e4SLinus Torvalds 	result = 0;
25411da177e4SLinus Torvalds 
254286098fa0SChristoph Hellwig 	if (journal->j_dev_bd != NULL) {
2543e5eb8caaSAl Viro 		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
25441da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
25451da177e4SLinus Torvalds 	}
25461da177e4SLinus Torvalds 
25471da177e4SLinus Torvalds 	if (result != 0) {
254845b03d5eSJeff Mahoney 		reiserfs_warning(super, "sh-457",
254945b03d5eSJeff Mahoney 				 "Cannot release journal device: %i", result);
25501da177e4SLinus Torvalds 	}
25511da177e4SLinus Torvalds 	return result;
25521da177e4SLinus Torvalds }
25531da177e4SLinus Torvalds 
25541da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
25551da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
25561da177e4SLinus Torvalds 			    const char *jdev_name)
25571da177e4SLinus Torvalds {
25581da177e4SLinus Torvalds 	int result;
25591da177e4SLinus Torvalds 	dev_t jdev;
2560e525fd89STejun Heo 	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
25611da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
25621da177e4SLinus Torvalds 
25631da177e4SLinus Torvalds 	result = 0;
25641da177e4SLinus Torvalds 
25651da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
25661da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
25671da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
25681da177e4SLinus Torvalds 
25691da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
25701da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
25711da177e4SLinus Torvalds 
25721da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
25731da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
2574e525fd89STejun Heo 		if (jdev == super->s_dev)
2575e525fd89STejun Heo 			blkdev_mode &= ~FMODE_EXCL;
2576d4d77629STejun Heo 		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
2577d4d77629STejun Heo 						      journal);
2578e5eb8caaSAl Viro 		journal->j_dev_mode = blkdev_mode;
25791da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
25801da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
25811da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
258245b03d5eSJeff Mahoney 			reiserfs_warning(super, "sh-458",
25831da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
25841da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
25851da177e4SLinus Torvalds 			return result;
2586e525fd89STejun Heo 		} else if (jdev != super->s_dev)
25871da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
258886098fa0SChristoph Hellwig 
25891da177e4SLinus Torvalds 		return 0;
25901da177e4SLinus Torvalds 	}
25911da177e4SLinus Torvalds 
2592e5eb8caaSAl Viro 	journal->j_dev_mode = blkdev_mode;
2593d4d77629STejun Heo 	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
259486098fa0SChristoph Hellwig 	if (IS_ERR(journal->j_dev_bd)) {
259586098fa0SChristoph Hellwig 		result = PTR_ERR(journal->j_dev_bd);
259686098fa0SChristoph Hellwig 		journal->j_dev_bd = NULL;
259786098fa0SChristoph Hellwig 		reiserfs_warning(super,
259886098fa0SChristoph Hellwig 				 "journal_init_dev: Cannot open '%s': %i",
259986098fa0SChristoph Hellwig 				 jdev_name, result);
260086098fa0SChristoph Hellwig 		return result;
260186098fa0SChristoph Hellwig 	}
260286098fa0SChristoph Hellwig 
26031da177e4SLinus Torvalds 	set_blocksize(journal->j_dev_bd, super->s_blocksize);
2604bd4c625cSLinus Torvalds 	reiserfs_info(super,
2605bd4c625cSLinus Torvalds 		      "journal_init_dev: journal device: %s\n",
260674f9f974SEdward Shishkin 		      bdevname(journal->j_dev_bd, b));
260786098fa0SChristoph Hellwig 	return 0;
26081da177e4SLinus Torvalds }
26091da177e4SLinus Torvalds 
2610cf3d0b81SEdward Shishkin /**
2611cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2612cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2613cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2614cf3d0b81SEdward Shishkin  *
2615cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2616cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2617cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2618cf3d0b81SEdward Shishkin  *
2619cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2620cf3d0b81SEdward Shishkin  * decreased proportionally.
2621cf3d0b81SEdward Shishkin  */
2622cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2623cf3d0b81SEdward Shishkin 
2624a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb,
2625cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2626cf3d0b81SEdward Shishkin {
2627cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2628cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2629cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2630cf3d0b81SEdward Shishkin 	        int ratio = 1;
2631a9dd3643SJeff Mahoney 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2632a9dd3643SJeff Mahoney 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2633cf3d0b81SEdward Shishkin 
2634cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2635cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2636a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2637cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2638a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-462",
263945b03d5eSJeff Mahoney 					 "bad transaction max size (%u). "
264045b03d5eSJeff Mahoney 					 "FSCK?", journal->j_trans_max);
2641cf3d0b81SEdward Shishkin 			return 1;
2642cf3d0b81SEdward Shishkin 		}
2643cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2644cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2645a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-463",
264645b03d5eSJeff Mahoney 					 "bad transaction max batch (%u). "
264745b03d5eSJeff Mahoney 					 "FSCK?", journal->j_max_batch);
2648cf3d0b81SEdward Shishkin 			return 1;
2649cf3d0b81SEdward Shishkin 		}
2650cf3d0b81SEdward Shishkin 	} else {
2651cf3d0b81SEdward Shishkin 		/* Default journal params.
2652cf3d0b81SEdward Shishkin                    The file system was created by old version
2653cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2654cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2655a9dd3643SJeff Mahoney 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2656a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2657a9dd3643SJeff Mahoney 					 sb->s_blocksize);
265845b03d5eSJeff Mahoney 			return 1;
265945b03d5eSJeff Mahoney 		}
2660cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2661cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2662cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2663cf3d0b81SEdward Shishkin 	}
2664cf3d0b81SEdward Shishkin 	return 0;
2665cf3d0b81SEdward Shishkin }
2666cf3d0b81SEdward Shishkin 
26671da177e4SLinus Torvalds /*
26681da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
26691da177e4SLinus Torvalds */
2670a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name,
2671bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2672bd4c625cSLinus Torvalds {
2673a9dd3643SJeff Mahoney 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
26741da177e4SLinus Torvalds 	struct buffer_head *bhjh;
26751da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
26761da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
26771da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
26781da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
26791da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
268098ea3f50SFrederic Weisbecker 	int ret;
26811da177e4SLinus Torvalds 
2682558feb08SJoe Perches 	journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
26831da177e4SLinus Torvalds 	if (!journal) {
2684a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1256",
268545b03d5eSJeff Mahoney 				 "unable to get memory for journal structure");
26861da177e4SLinus Torvalds 		return 1;
26871da177e4SLinus Torvalds 	}
26881da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
26891da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
26901da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
26911da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
26921da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
269337c69b98SFrederic Weisbecker 	if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
269437c69b98SFrederic Weisbecker 					   reiserfs_bmap_count(sb)))
26951da177e4SLinus Torvalds 		goto free_and_return;
269698ea3f50SFrederic Weisbecker 
2697a9dd3643SJeff Mahoney 	allocate_bitmap_nodes(sb);
26981da177e4SLinus Torvalds 
26991da177e4SLinus Torvalds 	/* reserved for journal area support */
2700a9dd3643SJeff Mahoney 	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2701bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2702a9dd3643SJeff Mahoney 						 / sb->s_blocksize +
2703a9dd3643SJeff Mahoney 						 reiserfs_bmap_count(sb) +
2704bd4c625cSLinus Torvalds 						 1 :
2705bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2706a9dd3643SJeff Mahoney 						 sb->s_blocksize + 2);
27071da177e4SLinus Torvalds 
270825985edcSLucas De Marchi 	/* Sanity check to see is the standard journal fitting within first bitmap
27091da177e4SLinus Torvalds 	   (actual for small blocksizes) */
2710a9dd3643SJeff Mahoney 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2711a9dd3643SJeff Mahoney 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2712a9dd3643SJeff Mahoney 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2713a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1393",
271445b03d5eSJeff Mahoney 				 "journal does not fit for area addressed "
271545b03d5eSJeff Mahoney 				 "by first of bitmap blocks. It starts at "
27161da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
2717a9dd3643SJeff Mahoney 				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2718a9dd3643SJeff Mahoney 				 SB_ONDISK_JOURNAL_SIZE(sb),
2719a9dd3643SJeff Mahoney 				 sb->s_blocksize);
27201da177e4SLinus Torvalds 		goto free_and_return;
27211da177e4SLinus Torvalds 	}
27221da177e4SLinus Torvalds 
2723a9dd3643SJeff Mahoney 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2724a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-462",
272545b03d5eSJeff Mahoney 				 "unable to initialize jornal device");
27261da177e4SLinus Torvalds 		goto free_and_return;
27271da177e4SLinus Torvalds 	}
27281da177e4SLinus Torvalds 
2729a9dd3643SJeff Mahoney 	rs = SB_DISK_SUPER_BLOCK(sb);
27301da177e4SLinus Torvalds 
27311da177e4SLinus Torvalds 	/* read journal header */
2732a9dd3643SJeff Mahoney 	bhjh = journal_bread(sb,
2733a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2734a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb));
27351da177e4SLinus Torvalds 	if (!bhjh) {
2736a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-459",
273745b03d5eSJeff Mahoney 				 "unable to read journal header");
27381da177e4SLinus Torvalds 		goto free_and_return;
27391da177e4SLinus Torvalds 	}
27401da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
27411da177e4SLinus Torvalds 
27421da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2743bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2744bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2745bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2746a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-460",
274745b03d5eSJeff Mahoney 				 "journal header magic %x (device %s) does "
274845b03d5eSJeff Mahoney 				 "not match to magic found in super block %x",
274945b03d5eSJeff Mahoney 				 jh->jh_journal.jp_journal_magic,
27501da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
27511da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
27521da177e4SLinus Torvalds 		brelse(bhjh);
27531da177e4SLinus Torvalds 		goto free_and_return;
27541da177e4SLinus Torvalds 	}
27551da177e4SLinus Torvalds 
27561da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
27571da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2758bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2759bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
27601da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
27611da177e4SLinus Torvalds 
2762a9dd3643SJeff Mahoney 	if (check_advise_trans_params(sb, journal) != 0)
2763cf3d0b81SEdward Shishkin 	        goto free_and_return;
27641da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
27651da177e4SLinus Torvalds 
27661da177e4SLinus Torvalds 	if (commit_max_age != 0) {
27671da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
27681da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
27691da177e4SLinus Torvalds 	}
27701da177e4SLinus Torvalds 
2771a9dd3643SJeff Mahoney 	reiserfs_info(sb, "journal params: device %s, size %u, "
27721da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
27731da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
27741da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
2775a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_SIZE(sb),
2776a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
27771da177e4SLinus Torvalds 		      journal->j_trans_max,
27781da177e4SLinus Torvalds 		      journal->j_max_batch,
2779bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
27801da177e4SLinus Torvalds 
27811da177e4SLinus Torvalds 	brelse(bhjh);
27821da177e4SLinus Torvalds 
27831da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
2784a9dd3643SJeff Mahoney 	journal_list_init(sb);
27851da177e4SLinus Torvalds 
2786bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2787bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
27881da177e4SLinus Torvalds 
27891da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
27901da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
27911da177e4SLinus Torvalds 
27921da177e4SLinus Torvalds 	journal->j_start = 0;
27931da177e4SLinus Torvalds 	journal->j_len = 0;
27941da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
27951da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
27961da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
27971da177e4SLinus Torvalds 	journal->j_bcount = 0;
27981da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
27991da177e4SLinus Torvalds 	journal->j_last = NULL;
28001da177e4SLinus Torvalds 	journal->j_first = NULL;
28011da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
2802f68215c4SJeff Mahoney 	mutex_init(&journal->j_mutex);
2803afe70259SJeff Mahoney 	mutex_init(&journal->j_flush_mutex);
28041da177e4SLinus Torvalds 
28051da177e4SLinus Torvalds 	journal->j_trans_id = 10;
28061da177e4SLinus Torvalds 	journal->j_mount_id = 10;
28071da177e4SLinus Torvalds 	journal->j_state = 0;
28081da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
28091da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
28101da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
28111da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
28121da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
28131da177e4SLinus Torvalds 	journal->j_must_wait = 0;
28141da177e4SLinus Torvalds 
2815576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2816a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2817576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2818576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2819576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2820576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2821576f6d79SJeff Mahoney         	goto free_and_return;
2822576f6d79SJeff Mahoney 	}
2823576f6d79SJeff Mahoney 
2824a9dd3643SJeff Mahoney 	init_journal_hash(sb);
28251da177e4SLinus Torvalds 	jl = journal->j_current_jl;
282637c69b98SFrederic Weisbecker 
282737c69b98SFrederic Weisbecker 	/*
282837c69b98SFrederic Weisbecker 	 * get_list_bitmap() may call flush_commit_list() which
282937c69b98SFrederic Weisbecker 	 * requires the lock. Calling flush_commit_list() shouldn't happen
283037c69b98SFrederic Weisbecker 	 * this early but I like to be paranoid.
283137c69b98SFrederic Weisbecker 	 */
283237c69b98SFrederic Weisbecker 	reiserfs_write_lock(sb);
2833a9dd3643SJeff Mahoney 	jl->j_list_bitmap = get_list_bitmap(sb, jl);
283437c69b98SFrederic Weisbecker 	reiserfs_write_unlock(sb);
28351da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2836a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2005",
283745b03d5eSJeff Mahoney 				 "get_list_bitmap failed for journal list 0");
28381da177e4SLinus Torvalds 		goto free_and_return;
28391da177e4SLinus Torvalds 	}
284037c69b98SFrederic Weisbecker 
284137c69b98SFrederic Weisbecker 	/*
284237c69b98SFrederic Weisbecker 	 * Journal_read needs to be inspected in order to push down
284337c69b98SFrederic Weisbecker 	 * the lock further inside (or even remove it).
284437c69b98SFrederic Weisbecker 	 */
284537c69b98SFrederic Weisbecker 	reiserfs_write_lock(sb);
284637c69b98SFrederic Weisbecker 	ret = journal_read(sb);
284737c69b98SFrederic Weisbecker 	reiserfs_write_unlock(sb);
284837c69b98SFrederic Weisbecker 	if (ret < 0) {
2849a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "reiserfs-2006",
285045b03d5eSJeff Mahoney 				 "Replay Failure, unable to mount");
28511da177e4SLinus Torvalds 		goto free_and_return;
28521da177e4SLinus Torvalds 	}
28531da177e4SLinus Torvalds 
28541da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
285537c69b98SFrederic Weisbecker 	if (reiserfs_mounted_fs_count <= 1)
285628aadf51STejun Heo 		commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
28571da177e4SLinus Torvalds 
2858c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2859a9dd3643SJeff Mahoney 	journal->j_work_sb = sb;
28601da177e4SLinus Torvalds 	return 0;
28611da177e4SLinus Torvalds       free_and_return:
2862a9dd3643SJeff Mahoney 	free_journal_ram(sb);
28631da177e4SLinus Torvalds 	return 1;
28641da177e4SLinus Torvalds }
28651da177e4SLinus Torvalds 
28661da177e4SLinus Torvalds /*
28671da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
28681da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
28691da177e4SLinus Torvalds ** transaction
28701da177e4SLinus Torvalds */
2871bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2872bd4c625cSLinus Torvalds 				   int new_alloc)
2873bd4c625cSLinus Torvalds {
28741da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
28751da177e4SLinus Torvalds 	time_t now = get_seconds();
28761da177e4SLinus Torvalds 	/* cannot restart while nested */
28771da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
28781da177e4SLinus Torvalds 	if (th->t_refcount > 1)
28791da177e4SLinus Torvalds 		return 0;
28801da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
28811da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
28821da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
28831da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
28841da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
28851da177e4SLinus Torvalds 		return 1;
28861da177e4SLinus Torvalds 	}
2887b18c1c6eSDavidlohr Bueso 
28886ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
28896ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
28901da177e4SLinus Torvalds 	return 0;
28911da177e4SLinus Torvalds }
28921da177e4SLinus Torvalds 
2893b18c1c6eSDavidlohr Bueso /* this must be called inside a transaction
28941da177e4SLinus Torvalds */
2895bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2896bd4c625cSLinus Torvalds {
28971da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
28981da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
28991da177e4SLinus Torvalds 	journal->j_must_wait = 1;
29001da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
29011da177e4SLinus Torvalds 	return;
29021da177e4SLinus Torvalds }
29031da177e4SLinus Torvalds 
2904b18c1c6eSDavidlohr Bueso /* this must be called without a transaction started
29051da177e4SLinus Torvalds */
2906bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
2907bd4c625cSLinus Torvalds {
29081da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29091da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
29101da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
29111da177e4SLinus Torvalds }
29121da177e4SLinus Torvalds 
2913b18c1c6eSDavidlohr Bueso /* this must be called without a transaction started
29141da177e4SLinus Torvalds */
2915bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
2916bd4c625cSLinus Torvalds {
29171da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29181da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
29191da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
29201da177e4SLinus Torvalds }
29211da177e4SLinus Torvalds 
2922bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
2923bd4c625cSLinus Torvalds {
29241da177e4SLinus Torvalds 	wait_queue_t wait;
29251da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29261da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
29271da177e4SLinus Torvalds 
29281da177e4SLinus Torvalds 	/*
29291da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
29301da177e4SLinus Torvalds 	 * we only want to wait once.
29311da177e4SLinus Torvalds 	 */
29321da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
29331da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
29341da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
29358ebc4232SFrederic Weisbecker 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
29368ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
29371da177e4SLinus Torvalds 		schedule();
29388ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
29398ebc4232SFrederic Weisbecker 	}
29405ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
29411da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
29421da177e4SLinus Torvalds }
29431da177e4SLinus Torvalds 
2944bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
2945bd4c625cSLinus Torvalds {
29461da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29471da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
29481da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
29491da177e4SLinus Torvalds }
29501da177e4SLinus Torvalds 
2951600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
29521da177e4SLinus Torvalds {
29531da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
29541da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
29551da177e4SLinus Torvalds 	while (1) {
29568ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
2957041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
29588ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
29591da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
29601da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
29611da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
29621da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
29631da177e4SLinus Torvalds 			queue_log_writer(sb);
29641da177e4SLinus Torvalds 		}
29651da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
29661da177e4SLinus Torvalds 			break;
29671da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
29681da177e4SLinus Torvalds 			break;
29691da177e4SLinus Torvalds 		bcount = journal->j_bcount;
29701da177e4SLinus Torvalds 	}
29711da177e4SLinus Torvalds }
29721da177e4SLinus Torvalds 
29731da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
29741da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
29751da177e4SLinus Torvalds **
29761da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
29771da177e4SLinus Torvalds ** expect to use in nblocks.
29781da177e4SLinus Torvalds */
2979bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2980a9dd3643SJeff Mahoney 			      struct super_block *sb, unsigned long nblocks,
2981bd4c625cSLinus Torvalds 			      int join)
2982bd4c625cSLinus Torvalds {
29831da177e4SLinus Torvalds 	time_t now = get_seconds();
2984600ed416SJeff Mahoney 	unsigned int old_trans_id;
2985a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
29861da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
29871da177e4SLinus Torvalds 	int sched_count = 0;
29881da177e4SLinus Torvalds 	int retval;
29891da177e4SLinus Torvalds 
2990a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal_begin");
299114a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
29921da177e4SLinus Torvalds 
2993a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.journal_being);
29941da177e4SLinus Torvalds 	/* set here for journal_join */
29951da177e4SLinus Torvalds 	th->t_refcount = 1;
2996a9dd3643SJeff Mahoney 	th->t_super = sb;
29971da177e4SLinus Torvalds 
29981da177e4SLinus Torvalds       relock:
2999a9dd3643SJeff Mahoney 	lock_journal(sb);
30001da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3001a9dd3643SJeff Mahoney 		unlock_journal(sb);
30021da177e4SLinus Torvalds 		retval = journal->j_errno;
30031da177e4SLinus Torvalds 		goto out_fail;
30041da177e4SLinus Torvalds 	}
30051da177e4SLinus Torvalds 	journal->j_bcount++;
30061da177e4SLinus Torvalds 
30071da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3008a9dd3643SJeff Mahoney 		unlock_journal(sb);
30098ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3010a9dd3643SJeff Mahoney 		reiserfs_wait_on_write_block(sb);
30118ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
3012a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_writers);
30131da177e4SLinus Torvalds 		goto relock;
30141da177e4SLinus Torvalds 	}
30151da177e4SLinus Torvalds 	now = get_seconds();
30161da177e4SLinus Torvalds 
30171da177e4SLinus Torvalds 	/* if there is no room in the journal OR
30181da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
30191da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
30201da177e4SLinus Torvalds 	 */
30211da177e4SLinus Torvalds 
30221da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3023bd4c625cSLinus Torvalds 	    (!join
3024bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3025bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3026bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3027bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3028bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3029bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3030bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
30311da177e4SLinus Torvalds 
30321da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
3033a9dd3643SJeff Mahoney 		unlock_journal(sb);	/* allow others to finish this transaction */
30341da177e4SLinus Torvalds 
30351da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
30361da177e4SLinus Torvalds 		    journal->j_max_batch &&
3037bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3038bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
30391da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
30401da177e4SLinus Torvalds 				sched_count++;
3041a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30421da177e4SLinus Torvalds 				goto relock;
30431da177e4SLinus Torvalds 			}
30441da177e4SLinus Torvalds 		}
30451da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
30461da177e4SLinus Torvalds 		 * wait for someone else to do a commit
30471da177e4SLinus Torvalds 		 */
30481da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
30491da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
30501da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
3051a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30521da177e4SLinus Torvalds 			}
30531da177e4SLinus Torvalds 			goto relock;
30541da177e4SLinus Torvalds 		}
3055a9dd3643SJeff Mahoney 		retval = journal_join(&myth, sb, 1);
30561da177e4SLinus Torvalds 		if (retval)
30571da177e4SLinus Torvalds 			goto out_fail;
30581da177e4SLinus Torvalds 
30591da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
30601da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
3061a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, 0);
30621da177e4SLinus Torvalds 		} else {
3063a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
30641da177e4SLinus Torvalds 		}
30651da177e4SLinus Torvalds 
30661da177e4SLinus Torvalds 		if (retval)
30671da177e4SLinus Torvalds 			goto out_fail;
30681da177e4SLinus Torvalds 
3069a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_wcount);
30701da177e4SLinus Torvalds 		goto relock;
30711da177e4SLinus Torvalds 	}
30721da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
30731da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
30741da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
30751da177e4SLinus Torvalds 	}
30761da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
30771da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
30781da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
30791da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
30801da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
3081a9dd3643SJeff Mahoney 	unlock_journal(sb);
30821da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
30831da177e4SLinus Torvalds 	return 0;
30841da177e4SLinus Torvalds 
30851da177e4SLinus Torvalds       out_fail:
30861da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
30871da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
30881da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
30891da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
3090a9dd3643SJeff Mahoney 	th->t_super = sb;
30911da177e4SLinus Torvalds 	return retval;
30921da177e4SLinus Torvalds }
30931da177e4SLinus Torvalds 
3094bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3095bd4c625cSLinus Torvalds 								    super_block
3096bd4c625cSLinus Torvalds 								    *s,
3097bd4c625cSLinus Torvalds 								    int nblocks)
3098bd4c625cSLinus Torvalds {
30991da177e4SLinus Torvalds 	int ret;
31001da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
31011da177e4SLinus Torvalds 
31021da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
31031da177e4SLinus Torvalds 	 ** persistent on its own
31041da177e4SLinus Torvalds 	 */
31051da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
31061da177e4SLinus Torvalds 		th = current->journal_info;
31071da177e4SLinus Torvalds 		th->t_refcount++;
310814a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
310914a61442SEric Sesterhenn 
31101da177e4SLinus Torvalds 		return th;
31111da177e4SLinus Torvalds 	}
3112d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
31131da177e4SLinus Torvalds 	if (!th)
31141da177e4SLinus Torvalds 		return NULL;
31151da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
31161da177e4SLinus Torvalds 	if (ret) {
3117d739b42bSPekka Enberg 		kfree(th);
31181da177e4SLinus Torvalds 		return NULL;
31191da177e4SLinus Torvalds 	}
31201da177e4SLinus Torvalds 
31211da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
31221da177e4SLinus Torvalds 	return th;
31231da177e4SLinus Torvalds }
31241da177e4SLinus Torvalds 
3125bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3126bd4c625cSLinus Torvalds {
31271da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
31281da177e4SLinus Torvalds 	int ret = 0;
31291da177e4SLinus Torvalds 	if (th->t_trans_id)
31301da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
31311da177e4SLinus Torvalds 	else
31321da177e4SLinus Torvalds 		ret = -EIO;
31331da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
31341da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3135d739b42bSPekka Enberg 		kfree(th);
31361da177e4SLinus Torvalds 	}
31371da177e4SLinus Torvalds 	return ret;
31381da177e4SLinus Torvalds }
31391da177e4SLinus Torvalds 
3140bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3141a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks)
3142bd4c625cSLinus Torvalds {
31431da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31441da177e4SLinus Torvalds 
31451da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31461da177e4SLinus Torvalds 	 ** pointer
31471da177e4SLinus Torvalds 	 */
31481da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
314914a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3150a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
31511da177e4SLinus Torvalds }
31521da177e4SLinus Torvalds 
3153bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3154a9dd3643SJeff Mahoney 		       struct super_block *sb, unsigned long nblocks)
3155bd4c625cSLinus Torvalds {
31561da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31571da177e4SLinus Torvalds 
31581da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31591da177e4SLinus Torvalds 	 ** pointer
31601da177e4SLinus Torvalds 	 */
31611da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
316214a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3163a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
31641da177e4SLinus Torvalds }
31651da177e4SLinus Torvalds 
3166bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3167a9dd3643SJeff Mahoney 		  struct super_block *sb, unsigned long nblocks)
3168bd4c625cSLinus Torvalds {
31691da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31701da177e4SLinus Torvalds 	int ret;
31711da177e4SLinus Torvalds 
31721da177e4SLinus Torvalds 	th->t_handle_save = NULL;
31731da177e4SLinus Torvalds 	if (cur_th) {
31741da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
3175a9dd3643SJeff Mahoney 		if (cur_th->t_super == sb) {
31761da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
31771da177e4SLinus Torvalds 			cur_th->t_refcount++;
31781da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
31791da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3180a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2005",
318145b03d5eSJeff Mahoney 						 "BAD: refcount <= 1, but "
318245b03d5eSJeff Mahoney 						 "journal_info != 0");
31831da177e4SLinus Torvalds 			return 0;
31841da177e4SLinus Torvalds 		} else {
31851da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
31861da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
31871da177e4SLinus Torvalds 			 ** really happen...
31881da177e4SLinus Torvalds 			 */
3189a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2100",
319045b03d5eSJeff Mahoney 					 "nesting info a different FS");
31911da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
31921da177e4SLinus Torvalds 			current->journal_info = th;
31931da177e4SLinus Torvalds 		}
31941da177e4SLinus Torvalds 	} else {
31951da177e4SLinus Torvalds 		current->journal_info = th;
31961da177e4SLinus Torvalds 	}
3197a9dd3643SJeff Mahoney 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
319814a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
31991da177e4SLinus Torvalds 
32001da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
32011da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
32021da177e4SLinus Torvalds 	 * won't be called to do it. */
32031da177e4SLinus Torvalds 	if (ret)
32041da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
32051da177e4SLinus Torvalds 	else
32061da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
32071da177e4SLinus Torvalds 
32081da177e4SLinus Torvalds 	return ret;
32091da177e4SLinus Torvalds }
32101da177e4SLinus Torvalds 
32111da177e4SLinus Torvalds /*
32121da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
32131da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
32141da177e4SLinus Torvalds **
32151da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
32161da177e4SLinus Torvalds ** transaction is committed.
32171da177e4SLinus Torvalds **
32181da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
32191da177e4SLinus Torvalds */
3220bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3221a9dd3643SJeff Mahoney 		       struct super_block *sb, struct buffer_head *bh)
3222bd4c625cSLinus Torvalds {
3223a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
32241da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
32251da177e4SLinus Torvalds 	int count_already_incd = 0;
32261da177e4SLinus Torvalds 	int prepared = 0;
32271da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
32281da177e4SLinus Torvalds 
3229a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.mark_dirty);
32301da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3231c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3232c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
32331da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
32341da177e4SLinus Torvalds 	}
32351da177e4SLinus Torvalds 
32361da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
32371da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
32381da177e4SLinus Torvalds 	/* already in this transaction, we are done */
32391da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
3240a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_already);
32411da177e4SLinus Torvalds 		return 0;
32421da177e4SLinus Torvalds 	}
32431da177e4SLinus Torvalds 
32441da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
32451da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
32461da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
32471da177e4SLinus Torvalds 	 */
32481da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
3249a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1777",
325045b03d5eSJeff Mahoney 				 "buffer %llu bad state "
32511da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3252bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3253bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
32541da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
32551da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
32561da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
32571da177e4SLinus Torvalds 	}
32581da177e4SLinus Torvalds 
32591da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3260a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1409",
326145b03d5eSJeff Mahoney 				 "returning because j_wcount was %d",
3262bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
32631da177e4SLinus Torvalds 		return 1;
32641da177e4SLinus Torvalds 	}
32651da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
32661da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
32671da177e4SLinus Torvalds 	 */
32681da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3269c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1413",
3270c3a9c210SJeff Mahoney 			       "j_len (%lu) is too big",
3271bd4c625cSLinus Torvalds 			       journal->j_len);
32721da177e4SLinus Torvalds 	}
32731da177e4SLinus Torvalds 
32741da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
32751da177e4SLinus Torvalds 		count_already_incd = 1;
3276a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
32771da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
32781da177e4SLinus Torvalds 	}
32791da177e4SLinus Torvalds 
32801da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
32811da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
32821da177e4SLinus Torvalds 	}
32831da177e4SLinus Torvalds 
32841da177e4SLinus Torvalds 	set_buffer_journaled(bh);
32851da177e4SLinus Torvalds 
32861da177e4SLinus Torvalds 	/* now put this guy on the end */
32871da177e4SLinus Torvalds 	if (!cn) {
3288a9dd3643SJeff Mahoney 		cn = get_cnode(sb);
32891da177e4SLinus Torvalds 		if (!cn) {
3290a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
32911da177e4SLinus Torvalds 		}
32921da177e4SLinus Torvalds 
32931da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
32941da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
32951da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
32961da177e4SLinus Torvalds 		}
32971da177e4SLinus Torvalds 		th->t_blocks_logged++;
32981da177e4SLinus Torvalds 		journal->j_len++;
32991da177e4SLinus Torvalds 
33001da177e4SLinus Torvalds 		cn->bh = bh;
33011da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
3302a9dd3643SJeff Mahoney 		cn->sb = sb;
33031da177e4SLinus Torvalds 		cn->jlist = NULL;
33041da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
33051da177e4SLinus Torvalds 		if (!count_already_incd) {
33061da177e4SLinus Torvalds 			get_bh(bh);
33071da177e4SLinus Torvalds 		}
33081da177e4SLinus Torvalds 	}
33091da177e4SLinus Torvalds 	cn->next = NULL;
33101da177e4SLinus Torvalds 	cn->prev = journal->j_last;
33111da177e4SLinus Torvalds 	cn->bh = bh;
33121da177e4SLinus Torvalds 	if (journal->j_last) {
33131da177e4SLinus Torvalds 		journal->j_last->next = cn;
33141da177e4SLinus Torvalds 		journal->j_last = cn;
33151da177e4SLinus Torvalds 	} else {
33161da177e4SLinus Torvalds 		journal->j_first = cn;
33171da177e4SLinus Torvalds 		journal->j_last = cn;
33181da177e4SLinus Torvalds 	}
3319033369d1SArtem Bityutskiy 	reiserfs_schedule_old_flush(sb);
33201da177e4SLinus Torvalds 	return 0;
33211da177e4SLinus Torvalds }
33221da177e4SLinus Torvalds 
3323bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3324a9dd3643SJeff Mahoney 		struct super_block *sb, unsigned long nblocks)
3325bd4c625cSLinus Torvalds {
33261da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
3327a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "REISER-NESTING",
332845b03d5eSJeff Mahoney 				 "th NULL, refcount %d", th->t_refcount);
33291da177e4SLinus Torvalds 
33301da177e4SLinus Torvalds 	if (!th->t_trans_id) {
33311da177e4SLinus Torvalds 		WARN_ON(1);
33321da177e4SLinus Torvalds 		return -EIO;
33331da177e4SLinus Torvalds 	}
33341da177e4SLinus Torvalds 
33351da177e4SLinus Torvalds 	th->t_refcount--;
33361da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3337bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3338bd4c625cSLinus Torvalds 		    current->journal_info;
33391da177e4SLinus Torvalds 
33401da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
33411da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
33421da177e4SLinus Torvalds 		 */
334314a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
33441da177e4SLinus Torvalds 
33451da177e4SLinus Torvalds 		if (th != cur_th) {
33461da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
33471da177e4SLinus Torvalds 			th->t_trans_id = 0;
33481da177e4SLinus Torvalds 		}
33491da177e4SLinus Torvalds 		return 0;
33501da177e4SLinus Torvalds 	} else {
3351a9dd3643SJeff Mahoney 		return do_journal_end(th, sb, nblocks, 0);
33521da177e4SLinus Torvalds 	}
33531da177e4SLinus Torvalds }
33541da177e4SLinus Torvalds 
33551da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
33561da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
33571da177e4SLinus Torvalds **
33581da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
33591da177e4SLinus Torvalds **
33601da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
33611da177e4SLinus Torvalds */
3362a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb,
3363bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3364bd4c625cSLinus Torvalds {
33651da177e4SLinus Torvalds 	struct buffer_head *bh;
33661da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3367a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
33681da177e4SLinus Torvalds 	int ret = 0;
33691da177e4SLinus Torvalds 
3370a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
33711da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
33721da177e4SLinus Torvalds 		return ret;
33731da177e4SLinus Torvalds 	}
33741da177e4SLinus Torvalds 	bh = cn->bh;
33751da177e4SLinus Torvalds 	if (cn->prev) {
33761da177e4SLinus Torvalds 		cn->prev->next = cn->next;
33771da177e4SLinus Torvalds 	}
33781da177e4SLinus Torvalds 	if (cn->next) {
33791da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
33801da177e4SLinus Torvalds 	}
33811da177e4SLinus Torvalds 	if (cn == journal->j_first) {
33821da177e4SLinus Torvalds 		journal->j_first = cn->next;
33831da177e4SLinus Torvalds 	}
33841da177e4SLinus Torvalds 	if (cn == journal->j_last) {
33851da177e4SLinus Torvalds 		journal->j_last = cn->prev;
33861da177e4SLinus Torvalds 	}
33871da177e4SLinus Torvalds 	if (bh)
3388a9dd3643SJeff Mahoney 		remove_journal_hash(sb, journal->j_hash_table, NULL,
3389bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
33901da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
33911da177e4SLinus Torvalds 
33921da177e4SLinus Torvalds 	if (!already_cleaned) {
33931da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
33941da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
33951da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
33961da177e4SLinus Torvalds 		put_bh(bh);
33971da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3398a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1752",
339945b03d5eSJeff Mahoney 					 "b_count < 0");
34001da177e4SLinus Torvalds 		}
34011da177e4SLinus Torvalds 		ret = 1;
34021da177e4SLinus Torvalds 	}
34031da177e4SLinus Torvalds 	journal->j_len--;
34041da177e4SLinus Torvalds 	journal->j_len_alloc--;
3405a9dd3643SJeff Mahoney 	free_cnode(sb, cn);
34061da177e4SLinus Torvalds 	return ret;
34071da177e4SLinus Torvalds }
34081da177e4SLinus Torvalds 
34091da177e4SLinus Torvalds /*
34101da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
34110779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
34121da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
34131da177e4SLinus Torvalds ** and 0 if you aren't
34141da177e4SLinus Torvalds **
34151da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
34161da177e4SLinus Torvalds ** blocks for a given transaction on disk
34171da177e4SLinus Torvalds **
34181da177e4SLinus Torvalds */
3419bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3420bd4c625cSLinus Torvalds {
34211da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
34221da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
34231da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
34241da177e4SLinus Torvalds 	int can_dirty = 1;
34251da177e4SLinus Torvalds 
34261da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
34271da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
34281da177e4SLinus Torvalds 	 ** to disk right now.
34291da177e4SLinus Torvalds 	 */
34301da177e4SLinus Torvalds 	while (cur && can_dirty) {
34311da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
34321da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
34331da177e4SLinus Torvalds 			can_dirty = 0;
34341da177e4SLinus Torvalds 		}
34351da177e4SLinus Torvalds 		cur = cur->hprev;
34361da177e4SLinus Torvalds 	}
34371da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
34381da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
34391da177e4SLinus Torvalds 	 */
34401da177e4SLinus Torvalds 	cur = cn->hnext;
34411da177e4SLinus Torvalds 	while (cur && can_dirty) {
34421da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
34431da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
34441da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
34451da177e4SLinus Torvalds 			can_dirty = 0;
34461da177e4SLinus Torvalds 		}
34471da177e4SLinus Torvalds 		cur = cur->hnext;
34481da177e4SLinus Torvalds 	}
34491da177e4SLinus Torvalds 	return can_dirty;
34501da177e4SLinus Torvalds }
34511da177e4SLinus Torvalds 
34521da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
34530779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
34541da177e4SLinus Torvalds */
3455bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3456a9dd3643SJeff Mahoney 		     struct super_block *sb, unsigned long nblocks)
3457bd4c625cSLinus Torvalds {
3458a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34591da177e4SLinus Torvalds 
34601da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
34611da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
346214a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
34631da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3464a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3465bd4c625cSLinus Torvalds 					     1);
3466a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
34671da177e4SLinus Torvalds 	}
3468a9dd3643SJeff Mahoney 	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
34691da177e4SLinus Torvalds }
34701da177e4SLinus Torvalds 
34711da177e4SLinus Torvalds /*
34721da177e4SLinus Torvalds ** writeback the pending async commits to disk
34731da177e4SLinus Torvalds */
3474c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3475bd4c625cSLinus Torvalds {
3476c4028958SDavid Howells 	struct reiserfs_journal *journal =
3477c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3478a9dd3643SJeff Mahoney 	struct super_block *sb = journal->j_work_sb;
34791da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
34801da177e4SLinus Torvalds 	struct list_head *entry;
34811da177e4SLinus Torvalds 
34828ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
34831da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
34841da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
34851da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
34861da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
3487a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
34881da177e4SLinus Torvalds 	}
34898ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
34901da177e4SLinus Torvalds }
34911da177e4SLinus Torvalds 
34921da177e4SLinus Torvalds /*
34931da177e4SLinus Torvalds ** flushes any old transactions to disk
34941da177e4SLinus Torvalds ** ends the current transaction if it is too old
34951da177e4SLinus Torvalds */
349625729b0eSArtem Bityutskiy void reiserfs_flush_old_commits(struct super_block *sb)
3497bd4c625cSLinus Torvalds {
34981da177e4SLinus Torvalds 	time_t now;
34991da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
3500a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35011da177e4SLinus Torvalds 
35021da177e4SLinus Torvalds 	now = get_seconds();
35031da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
35041da177e4SLinus Torvalds 	 * mount
35051da177e4SLinus Torvalds 	 */
350625729b0eSArtem Bityutskiy 	if (list_empty(&journal->j_journal_list))
350725729b0eSArtem Bityutskiy 		return;
35081da177e4SLinus Torvalds 
35091da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
35101da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
35111da177e4SLinus Torvalds 	 */
35121da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
35131da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
35141da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3515bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3516a9dd3643SJeff Mahoney 		if (!journal_join(&th, sb, 1)) {
3517a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
3518a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
3519bd4c625cSLinus Torvalds 						     1);
3520a9dd3643SJeff Mahoney 			journal_mark_dirty(&th, sb,
3521a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
35221da177e4SLinus Torvalds 
35231da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
35241da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
35251da177e4SLinus Torvalds 			 */
3526a9dd3643SJeff Mahoney 			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
35271da177e4SLinus Torvalds 		}
35281da177e4SLinus Torvalds 	}
35291da177e4SLinus Torvalds }
35301da177e4SLinus Torvalds 
35311da177e4SLinus Torvalds /*
35321da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
35331da177e4SLinus Torvalds **
35341da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
35351da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
35361da177e4SLinus Torvalds ** flushes the commit list and returns 0.
35371da177e4SLinus Torvalds **
35381da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
35391da177e4SLinus Torvalds **
35401da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
35411da177e4SLinus Torvalds */
3542bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3543a9dd3643SJeff Mahoney 			     struct super_block *sb, unsigned long nblocks,
3544bd4c625cSLinus Torvalds 			     int flags)
3545bd4c625cSLinus Torvalds {
35461da177e4SLinus Torvalds 
35471da177e4SLinus Torvalds 	time_t now;
35481da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
35491da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
35501da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
35511da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
3552a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35531da177e4SLinus Torvalds 
35541da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35551da177e4SLinus Torvalds 
35561da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3557c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3558c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
35591da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
35601da177e4SLinus Torvalds 	}
35611da177e4SLinus Torvalds 
35621da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
35631da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
35641da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
35651da177e4SLinus Torvalds 	}
35661da177e4SLinus Torvalds 
35671da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
35681da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
35691da177e4SLinus Torvalds 	 ** care of in this trans
35701da177e4SLinus Torvalds 	 */
357114a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
357214a61442SEric Sesterhenn 
35731da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
35741da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
35751da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
35761da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
35771da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
35781da177e4SLinus Torvalds 	 */
35791da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
35801da177e4SLinus Torvalds 		if (flush || commit_now) {
35811da177e4SLinus Torvalds 			unsigned trans_id;
35821da177e4SLinus Torvalds 
35831da177e4SLinus Torvalds 			jl = journal->j_current_jl;
35841da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
35851da177e4SLinus Torvalds 			if (wait_on_commit)
35861da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
35871da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
35881da177e4SLinus Torvalds 			if (flush) {
35891da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
35901da177e4SLinus Torvalds 			}
3591a9dd3643SJeff Mahoney 			unlock_journal(sb);
35921da177e4SLinus Torvalds 
35931da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
35941da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
35951da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
3596a9dd3643SJeff Mahoney 					queue_log_writer(sb);
35971da177e4SLinus Torvalds 				} else {
3598a9dd3643SJeff Mahoney 					lock_journal(sb);
35991da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3600bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3601bd4c625cSLinus Torvalds 							   1);
36021da177e4SLinus Torvalds 					}
3603a9dd3643SJeff Mahoney 					unlock_journal(sb);
36041da177e4SLinus Torvalds 				}
36051da177e4SLinus Torvalds 			}
360614a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
360714a61442SEric Sesterhenn 
3608bd4c625cSLinus Torvalds 			if (commit_now
3609a9dd3643SJeff Mahoney 			    && journal_list_still_alive(sb, trans_id)
3610bd4c625cSLinus Torvalds 			    && wait_on_commit) {
3611a9dd3643SJeff Mahoney 				flush_commit_list(sb, jl, 1);
36121da177e4SLinus Torvalds 			}
36131da177e4SLinus Torvalds 			return 0;
36141da177e4SLinus Torvalds 		}
3615a9dd3643SJeff Mahoney 		unlock_journal(sb);
36161da177e4SLinus Torvalds 		return 0;
36171da177e4SLinus Torvalds 	}
36181da177e4SLinus Torvalds 
36191da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
36201da177e4SLinus Torvalds 	now = get_seconds();
36211da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
36221da177e4SLinus Torvalds 		commit_now = 1;
36231da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
36241da177e4SLinus Torvalds 	}
36251da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
36261da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3627bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3628bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3629bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3630bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
36311da177e4SLinus Torvalds 		journal->j_bcount++;
3632a9dd3643SJeff Mahoney 		unlock_journal(sb);
36331da177e4SLinus Torvalds 		return 0;
36341da177e4SLinus Torvalds 	}
36351da177e4SLinus Torvalds 
3636a9dd3643SJeff Mahoney 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3637a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-003",
3638c3a9c210SJeff Mahoney 			       "j_start (%ld) is too high",
3639bd4c625cSLinus Torvalds 			       journal->j_start);
36401da177e4SLinus Torvalds 	}
36411da177e4SLinus Torvalds 	return 1;
36421da177e4SLinus Torvalds }
36431da177e4SLinus Torvalds 
36441da177e4SLinus Torvalds /*
36451da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
36461da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
36471da177e4SLinus Torvalds **
36481da177e4SLinus Torvalds ** otherwise:
36491da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
36501da177e4SLinus Torvalds ** before this transaction has finished.
36511da177e4SLinus Torvalds **
36521da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
36531da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
36541da177e4SLinus Torvalds ** the block can't be reallocated yet.
36551da177e4SLinus Torvalds **
36561da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
36571da177e4SLinus Torvalds */
3658bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3659a9dd3643SJeff Mahoney 		       struct super_block *sb, b_blocknr_t blocknr)
3660bd4c625cSLinus Torvalds {
3661a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36621da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
36631da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
36641da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
36651da177e4SLinus Torvalds 	int cleaned = 0;
36661da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
36671da177e4SLinus Torvalds 
3668a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
36691da177e4SLinus Torvalds 	if (cn && cn->bh) {
36701da177e4SLinus Torvalds 		bh = cn->bh;
36711da177e4SLinus Torvalds 		get_bh(bh);
36721da177e4SLinus Torvalds 	}
36731da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
36741da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
36751da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
36761da177e4SLinus Torvalds 		clear_prepared_bits(bh);
36771da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
3678a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
36791da177e4SLinus Torvalds 	} else {
36801da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
36811da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
36821da177e4SLinus Torvalds 		if (!jb) {
3683a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-1702",
3684c3a9c210SJeff Mahoney 				       "journal_list_bitmap is NULL");
36851da177e4SLinus Torvalds 		}
3686a9dd3643SJeff Mahoney 		set_bit_in_list_bitmap(sb, blocknr, jb);
36871da177e4SLinus Torvalds 
36881da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
36891da177e4SLinus Torvalds 
36901da177e4SLinus Torvalds 		if (bh) {
36911da177e4SLinus Torvalds 			clear_prepared_bits(bh);
36921da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
36931da177e4SLinus Torvalds 		}
3694a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
36951da177e4SLinus Torvalds 
36961da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3697a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3698bd4c625cSLinus Torvalds 					  blocknr);
36991da177e4SLinus Torvalds 		while (cn) {
3700a9dd3643SJeff Mahoney 			if (sb == cn->sb && blocknr == cn->blocknr) {
37011da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
37021da177e4SLinus Torvalds 				if (cn->bh) {
37031da177e4SLinus Torvalds 					if (!cleaned) {
37041da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
37051da177e4SLinus Torvalds 						 ** in the current trans
37061da177e4SLinus Torvalds 						 */
3707bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3708bd4c625cSLinus Torvalds 									   bh);
37091da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3710bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3711bd4c625cSLinus Torvalds 									  bh);
37121da177e4SLinus Torvalds 						cleaned = 1;
37131da177e4SLinus Torvalds 						put_bh(cn->bh);
3714bd4c625cSLinus Torvalds 						if (atomic_read
3715bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3716a9dd3643SJeff Mahoney 							reiserfs_warning(sb,
371745b03d5eSJeff Mahoney 								 "journal-2138",
371845b03d5eSJeff Mahoney 								 "cn->bh->b_count < 0");
37191da177e4SLinus Torvalds 						}
37201da177e4SLinus Torvalds 					}
37211da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3722bd4c625cSLinus Torvalds 						atomic_dec(&
3723bd4c625cSLinus Torvalds 							   (cn->jlist->
3724bd4c625cSLinus Torvalds 							    j_nonzerolen));
37251da177e4SLinus Torvalds 					}
37261da177e4SLinus Torvalds 					cn->bh = NULL;
37271da177e4SLinus Torvalds 				}
37281da177e4SLinus Torvalds 			}
37291da177e4SLinus Torvalds 			cn = cn->hnext;
37301da177e4SLinus Torvalds 		}
37311da177e4SLinus Torvalds 	}
37321da177e4SLinus Torvalds 
3733398c95bdSChris Mason 	if (bh)
3734398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
37351da177e4SLinus Torvalds 	return 0;
37361da177e4SLinus Torvalds }
37371da177e4SLinus Torvalds 
3738bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3739bd4c625cSLinus Torvalds {
37401da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
37411da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
37421da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
37431da177e4SLinus Torvalds }
37441da177e4SLinus Torvalds 
37451da177e4SLinus Torvalds /*
37461da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
37471da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
37481da177e4SLinus Torvalds  */
37491da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
37501da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
37511da177e4SLinus Torvalds {
37521da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
37531da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
37541da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37551da177e4SLinus Torvalds 	int ret = 0;
37561da177e4SLinus Torvalds 
37571da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
37581da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
37591da177e4SLinus Torvalds 		jl = journal->j_current_jl;
37601da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
37611da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
37621da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
37631da177e4SLinus Torvalds 			goto flush_commit_only;
37641da177e4SLinus Torvalds 		}
37651da177e4SLinus Torvalds 
37661da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
37671da177e4SLinus Torvalds 		if (ret)
37681da177e4SLinus Torvalds 			return ret;
37691da177e4SLinus Torvalds 
37701da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
37711da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3772bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3773bd4c625cSLinus Torvalds 						     1);
37741da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
37751da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
37761da177e4SLinus Torvalds 			goto flush_commit_only;
37771da177e4SLinus Torvalds 		}
37781da177e4SLinus Torvalds 
37791da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
37801da177e4SLinus Torvalds 		if (!ret)
37811da177e4SLinus Torvalds 			ret = 1;
37821da177e4SLinus Torvalds 
37831da177e4SLinus Torvalds 	} else {
37841da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
37851da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
37861da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
37871da177e4SLinus Torvalds 		 */
37881da177e4SLinus Torvalds 	      flush_commit_only:
37891da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
37901da177e4SLinus Torvalds 			/*
37911da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
37921da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
37931da177e4SLinus Torvalds 			 * block.
37941da177e4SLinus Torvalds 			 */
37951da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
37961da177e4SLinus Torvalds 				ret = 1;
37971da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
37981da177e4SLinus Torvalds 			if (journal->j_errno)
37991da177e4SLinus Torvalds 				ret = journal->j_errno;
38001da177e4SLinus Torvalds 		}
38011da177e4SLinus Torvalds 	}
38021da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
38031da177e4SLinus Torvalds 	return ret;
38041da177e4SLinus Torvalds }
38051da177e4SLinus Torvalds 
3806bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3807bd4c625cSLinus Torvalds {
3808600ed416SJeff Mahoney 	unsigned int id = REISERFS_I(inode)->i_trans_id;
38091da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
38101da177e4SLinus Torvalds 
38111da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
38121da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
38131da177e4SLinus Torvalds 	 */
38141da177e4SLinus Torvalds 	if (!id || !jl) {
38151da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
38161da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
38171da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
38181da177e4SLinus Torvalds 	}
38191da177e4SLinus Torvalds 
38201da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
38211da177e4SLinus Torvalds }
38221da177e4SLinus Torvalds 
3823a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb,
3824bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3825bd4c625cSLinus Torvalds {
3826a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3827a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.restore_prepared);
38281da177e4SLinus Torvalds 	if (!bh) {
38291da177e4SLinus Torvalds 		return;
38301da177e4SLinus Torvalds 	}
38311da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
38321da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
38331da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
3834a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb,
38351da177e4SLinus Torvalds 					  journal->j_list_hash_table,
38361da177e4SLinus Torvalds 					  bh->b_blocknr);
38371da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
38381da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
38391da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
38401da177e4SLinus Torvalds 		}
38411da177e4SLinus Torvalds 	}
38421da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
38431da177e4SLinus Torvalds }
38441da177e4SLinus Torvalds 
38451da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
38461da177e4SLinus Torvalds /*
38471da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
38481da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
38491da177e4SLinus Torvalds ** clean it
38501da177e4SLinus Torvalds ** wait on it.
38511da177e4SLinus Torvalds **
38521da177e4SLinus Torvalds */
3853a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb,
3854bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3855bd4c625cSLinus Torvalds {
3856a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.prepare);
38571da177e4SLinus Torvalds 
3858ca5de404SNick Piggin 	if (!trylock_buffer(bh)) {
38591da177e4SLinus Torvalds 		if (!wait)
38601da177e4SLinus Torvalds 			return 0;
38611da177e4SLinus Torvalds 		lock_buffer(bh);
38621da177e4SLinus Torvalds 	}
38631da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
38641da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
38651da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
38661da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
38671da177e4SLinus Torvalds 	}
38681da177e4SLinus Torvalds 	unlock_buffer(bh);
38691da177e4SLinus Torvalds 	return 1;
38701da177e4SLinus Torvalds }
38711da177e4SLinus Torvalds 
3872bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3873bd4c625cSLinus Torvalds {
38741da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
38751da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
38761da177e4SLinus Torvalds 	struct list_head *entry;
38771da177e4SLinus Torvalds 	time_t now = get_seconds();
38781da177e4SLinus Torvalds 
38791da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
38801da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
38811da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
38821da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3883a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3884a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3885a3172027SChris Mason 		    test_transaction(s, jl)) {
38861da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
38871da177e4SLinus Torvalds 		} else {
38881da177e4SLinus Torvalds 			break;
38891da177e4SLinus Torvalds 		}
38901da177e4SLinus Torvalds 	}
38911da177e4SLinus Torvalds }
38921da177e4SLinus Torvalds 
38931da177e4SLinus Torvalds /*
38941da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
38951da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
38961da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
38971da177e4SLinus Torvalds **
38981da177e4SLinus Torvalds ** keep reading, there are comments as you go along
38991da177e4SLinus Torvalds **
39001da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
39011da177e4SLinus Torvalds ** journal lists, etc just won't happen.
39021da177e4SLinus Torvalds */
3903bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
3904a9dd3643SJeff Mahoney 			  struct super_block *sb, unsigned long nblocks,
3905bd4c625cSLinus Torvalds 			  int flags)
3906bd4c625cSLinus Torvalds {
3907a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
39081da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
39091da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
39101da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
39111da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
39121da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
39131da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
39141da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
39151da177e4SLinus Torvalds 	int old_start;
39161da177e4SLinus Torvalds 	int i;
3917a44c94a7SAlexander Zarochentsev 	int flush;
3918a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
39191da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
39201da177e4SLinus Torvalds 	struct list_head *entry, *safe;
39211da177e4SLinus Torvalds 	unsigned long jindex;
3922600ed416SJeff Mahoney 	unsigned int commit_trans_id;
39231da177e4SLinus Torvalds 	int trans_half;
39241da177e4SLinus Torvalds 
39251da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
39261da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
39271da177e4SLinus Torvalds 
3928a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
3929a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
3930600ed416SJeff Mahoney 	if (th->t_trans_id == ~0U)
3931a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3932a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
3933a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
3934a44c94a7SAlexander Zarochentsev 
39351da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
3936a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end");
39371da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3938a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3939bd4c625cSLinus Torvalds 					     1);
3940a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
39411da177e4SLinus Torvalds 	}
39421da177e4SLinus Torvalds 
3943a9dd3643SJeff Mahoney 	lock_journal(sb);
39441da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39451da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
39461da177e4SLinus Torvalds 		flush = 1;
39471da177e4SLinus Torvalds 	}
39481da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
39491da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
39501da177e4SLinus Torvalds 		wait_on_commit = 1;
39511da177e4SLinus Torvalds 	}
39521da177e4SLinus Torvalds 
39531da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
39541da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
39551da177e4SLinus Torvalds 	 */
3956a9dd3643SJeff Mahoney 	if (!check_journal_end(th, sb, nblocks, flags)) {
3957033369d1SArtem Bityutskiy 		reiserfs_schedule_old_flush(sb);
3958a9dd3643SJeff Mahoney 		wake_queued_writers(sb);
3959a9dd3643SJeff Mahoney 		reiserfs_async_progress_wait(sb);
39601da177e4SLinus Torvalds 		goto out;
39611da177e4SLinus Torvalds 	}
39621da177e4SLinus Torvalds 
39631da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
39641da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39651da177e4SLinus Torvalds 		flush = 1;
39661da177e4SLinus Torvalds 	}
39671da177e4SLinus Torvalds 
39681da177e4SLinus Torvalds 	/*
39691da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
39701da177e4SLinus Torvalds 	 ** this transaction
39711da177e4SLinus Torvalds 	 */
39721da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
39731da177e4SLinus Torvalds 		flush = 1;
39741da177e4SLinus Torvalds 	}
39751da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
3976ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
3977ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
39781da177e4SLinus Torvalds 	current->journal_info = th;
3979ef43bc4fSJan Kara 	th->t_refcount++;
39801da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
39811da177e4SLinus Torvalds 						 * the transaction */
3982ef43bc4fSJan Kara 	th->t_refcount--;
39831da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
39841da177e4SLinus Torvalds #endif
39851da177e4SLinus Torvalds 
39861da177e4SLinus Torvalds 	/* setup description block */
3987bd4c625cSLinus Torvalds 	d_bh =
3988a9dd3643SJeff Mahoney 	    journal_getblk(sb,
3989a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
3990bd4c625cSLinus Torvalds 			   journal->j_start);
39911da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
39921da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
39931da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
39941da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
39951da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
39961da177e4SLinus Torvalds 
39971da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
3998a9dd3643SJeff Mahoney 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
3999bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4000a9dd3643SJeff Mahoney 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
40011da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
40021da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
40031da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
40041da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
40051da177e4SLinus Torvalds 
40061da177e4SLinus Torvalds 	/* init this journal list */
40071da177e4SLinus Torvalds 	jl = journal->j_current_jl;
40081da177e4SLinus Torvalds 
40091da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
40101da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
40111da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
40121da177e4SLinus Torvalds 	 * ordered bh list
40131da177e4SLinus Torvalds 	 */
40148ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
40151da177e4SLinus Torvalds 
40161da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
40171da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
40181da177e4SLinus Torvalds 
40191da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
40201da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
40211da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
40221da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
40231da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
40241da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
40251da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
40261da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
40271da177e4SLinus Torvalds 	jl->j_realblock = NULL;
40281da177e4SLinus Torvalds 
40291da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
40301da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
40311da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
40321da177e4SLinus Torvalds 	 */
4033a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
40341da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
40351da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
4036a9dd3643SJeff Mahoney 			jl_cn = get_cnode(sb);
40371da177e4SLinus Torvalds 			if (!jl_cn) {
4038a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-1676",
4039c3a9c210SJeff Mahoney 					       "get_cnode returned NULL");
40401da177e4SLinus Torvalds 			}
40411da177e4SLinus Torvalds 			if (i == 0) {
40421da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
40431da177e4SLinus Torvalds 			}
40441da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
40451da177e4SLinus Torvalds 			jl_cn->next = NULL;
40461da177e4SLinus Torvalds 			if (last_cn) {
40471da177e4SLinus Torvalds 				last_cn->next = jl_cn;
40481da177e4SLinus Torvalds 			}
40491da177e4SLinus Torvalds 			last_cn = jl_cn;
40501da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
40511da177e4SLinus Torvalds 			   of journal or reserved area */
40521da177e4SLinus Torvalds 
4053bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4054a9dd3643SJeff Mahoney 			    (sb, cn->bh->b_blocknr)) {
4055a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-2332",
4056c3a9c210SJeff Mahoney 					       "Trying to log block %lu, "
4057c3a9c210SJeff Mahoney 					       "which is a log block",
4058bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
40591da177e4SLinus Torvalds 			}
40601da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
40611da177e4SLinus Torvalds 			jl_cn->state = 0;
4062a9dd3643SJeff Mahoney 			jl_cn->sb = sb;
40631da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
40641da177e4SLinus Torvalds 			jl_cn->jlist = jl;
40651da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
40661da177e4SLinus Torvalds 			if (i < trans_half) {
4067bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4068bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40691da177e4SLinus Torvalds 			} else {
4070bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4071bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40721da177e4SLinus Torvalds 			}
40731da177e4SLinus Torvalds 		} else {
40741da177e4SLinus Torvalds 			i--;
40751da177e4SLinus Torvalds 		}
40761da177e4SLinus Torvalds 	}
40771da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
40781da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
40791da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
40801da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
40811da177e4SLinus Torvalds 
40821da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
408314a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
40841da177e4SLinus Torvalds 
40851da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
40861da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
40871da177e4SLinus Torvalds 	 * others are on disk
40881da177e4SLinus Torvalds 	 */
40891da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
40901da177e4SLinus Torvalds 
40911da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
40921da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
40931da177e4SLinus Torvalds 	cn = journal->j_first;
40941da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
40951da177e4SLinus Torvalds 	while (cn) {
40961da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
40971da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
40981da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
40991da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
41001da177e4SLinus Torvalds 			char *addr;
41011da177e4SLinus Torvalds 			struct page *page;
4102bd4c625cSLinus Torvalds 			tmp_bh =
4103a9dd3643SJeff Mahoney 			    journal_getblk(sb,
4104a9dd3643SJeff Mahoney 					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4105bd4c625cSLinus Torvalds 					   ((cur_write_start +
4106bd4c625cSLinus Torvalds 					     jindex) %
4107a9dd3643SJeff Mahoney 					    SB_ONDISK_JOURNAL_SIZE(sb)));
41081da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
41091da177e4SLinus Torvalds 			page = cn->bh->b_page;
41101da177e4SLinus Torvalds 			addr = kmap(page);
4111bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4112bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
41131da177e4SLinus Torvalds 			       cn->bh->b_size);
41141da177e4SLinus Torvalds 			kunmap(page);
41151da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
41161da177e4SLinus Torvalds 			jindex++;
41171da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
41181da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
41191da177e4SLinus Torvalds 		} else {
41201da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4121a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2048",
412245b03d5eSJeff Mahoney 					 "BAD, buffer in journal hash, "
412345b03d5eSJeff Mahoney 					 "but not JDirty!");
41241da177e4SLinus Torvalds 			brelse(cn->bh);
41251da177e4SLinus Torvalds 		}
41261da177e4SLinus Torvalds 		next = cn->next;
4127a9dd3643SJeff Mahoney 		free_cnode(sb, cn);
41281da177e4SLinus Torvalds 		cn = next;
4129e6950a4dSFrederic Weisbecker 		reiserfs_write_unlock(sb);
41301da177e4SLinus Torvalds 		cond_resched();
4131e6950a4dSFrederic Weisbecker 		reiserfs_write_lock(sb);
41321da177e4SLinus Torvalds 	}
41331da177e4SLinus Torvalds 
41341da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
41351da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
41361da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
41371da177e4SLinus Torvalds 	 */
41381da177e4SLinus Torvalds 
4139a9dd3643SJeff Mahoney 	journal->j_current_jl = alloc_journal_list(sb);
41401da177e4SLinus Torvalds 
41411da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
41421da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
41431da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
41441da177e4SLinus Torvalds 	journal->j_num_work_lists++;
41451da177e4SLinus Torvalds 
41461da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
41471da177e4SLinus Torvalds 	old_start = journal->j_start;
4148bd4c625cSLinus Torvalds 	journal->j_start =
4149bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4150a9dd3643SJeff Mahoney 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
41511da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
41521da177e4SLinus Torvalds 	journal->j_bcount = 0;
41531da177e4SLinus Torvalds 	journal->j_last = NULL;
41541da177e4SLinus Torvalds 	journal->j_first = NULL;
41551da177e4SLinus Torvalds 	journal->j_len = 0;
41561da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4157a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4158a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4159a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
41601da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
41611da177e4SLinus Torvalds 	journal->j_must_wait = 0;
41621da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
41631da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
41641da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
4165a9dd3643SJeff Mahoney 	init_journal_hash(sb);
41661da177e4SLinus Torvalds 
41671da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
41681da177e4SLinus Torvalds 	// write out the tails
41691da177e4SLinus Torvalds 	smp_mb();
41701da177e4SLinus Torvalds 
41711da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
41721da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
41731da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
41741da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
41751da177e4SLinus Torvalds 	 * is lost.
41761da177e4SLinus Torvalds 	 */
41771da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
41788ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
41791da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
41801da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
41818ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
41821da177e4SLinus Torvalds 	}
418314a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
418490415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
41851da177e4SLinus Torvalds 
41861da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
41871da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
41881da177e4SLinus Torvalds 	 **
41891da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
41901da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
41911da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
41921da177e4SLinus Torvalds 	 */
41931da177e4SLinus Torvalds 	if (flush) {
4194a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
4195a9dd3643SJeff Mahoney 		flush_journal_list(sb, jl, 1);
41961da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
41971da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
41981da177e4SLinus Torvalds 
41991da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
42001da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
42011da177e4SLinus Torvalds 	 ** old flush them as well.
42021da177e4SLinus Torvalds 	 */
42031da177e4SLinus Torvalds       first_jl:
42041da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
42051da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
42061da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
42071da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4208bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4209a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42101da177e4SLinus Torvalds 				goto first_jl;
42111da177e4SLinus Torvalds 			} else if ((journal->j_start +
42121da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4213a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
42141da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
42151da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
42161da177e4SLinus Torvalds 				 * break now
42171da177e4SLinus Torvalds 				 */
42181da177e4SLinus Torvalds 				break;
42191da177e4SLinus Torvalds 			}
42201da177e4SLinus Torvalds 		} else if ((journal->j_start +
42211da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4222a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_SIZE(sb)) {
42231da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4224a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
4225bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4226a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42271da177e4SLinus Torvalds 				goto first_jl;
42281da177e4SLinus Torvalds 			} else {
42291da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
42301da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
42311da177e4SLinus Torvalds 				 * the start of the log.  We can break
42321da177e4SLinus Torvalds 				 */
42331da177e4SLinus Torvalds 				break;
42341da177e4SLinus Torvalds 			}
42351da177e4SLinus Torvalds 		}
42361da177e4SLinus Torvalds 	}
4237a9dd3643SJeff Mahoney 	flush_old_journal_lists(sb);
42381da177e4SLinus Torvalds 
4239bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4240a9dd3643SJeff Mahoney 	    get_list_bitmap(sb, journal->j_current_jl);
42411da177e4SLinus Torvalds 
42421da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4243a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-1996",
4244c3a9c210SJeff Mahoney 			       "could not get a list bitmap");
42451da177e4SLinus Torvalds 	}
42461da177e4SLinus Torvalds 
42471da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
4248a9dd3643SJeff Mahoney 	unlock_journal(sb);
42491da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
42501da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
42511da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
42521da177e4SLinus Torvalds 
42531da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
4254a9dd3643SJeff Mahoney 	    journal_list_still_alive(sb, commit_trans_id)) {
4255a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
42561da177e4SLinus Torvalds 	}
42571da177e4SLinus Torvalds       out:
4258a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end2");
42591da177e4SLinus Torvalds 
42601da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
42611da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
42621da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
42631da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
4264a9dd3643SJeff Mahoney 	th->t_super = sb;
42651da177e4SLinus Torvalds 
42661da177e4SLinus Torvalds 	return journal->j_errno;
42671da177e4SLinus Torvalds }
42681da177e4SLinus Torvalds 
426932e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */
427032e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno)
42711da177e4SLinus Torvalds {
42721da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
42731da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
42741da177e4SLinus Torvalds 		return;
42751da177e4SLinus Torvalds 
427632e8b106SJeff Mahoney 	if (!journal->j_errno)
427732e8b106SJeff Mahoney 		journal->j_errno = errno;
42781da177e4SLinus Torvalds 
42791da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
42801da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
42811da177e4SLinus Torvalds 
42821da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
42831da177e4SLinus Torvalds 	dump_stack();
42841da177e4SLinus Torvalds #endif
42851da177e4SLinus Torvalds }
4286