xref: /openbmc/linux/fs/reiserfs/journal.c (revision a412f9ef)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
41da177e4SLinus Torvalds ** The background commits make this code very interelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <linux/time.h>
386188e10dSMatthew Wilcox #include <linux/semaphore.h>
391da177e4SLinus Torvalds #include <linux/vmalloc.h>
401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
411da177e4SLinus Torvalds #include <linux/kernel.h>
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/fcntl.h>
441da177e4SLinus Torvalds #include <linux/stat.h>
451da177e4SLinus Torvalds #include <linux/string.h>
461da177e4SLinus Torvalds #include <linux/smp_lock.h>
471da177e4SLinus Torvalds #include <linux/buffer_head.h>
481da177e4SLinus Torvalds #include <linux/workqueue.h>
491da177e4SLinus Torvalds #include <linux/writeback.h>
501da177e4SLinus Torvalds #include <linux/blkdev.h>
513fcfab16SAndrew Morton #include <linux/backing-dev.h>
5290415deaSJeff Mahoney #include <linux/uaccess.h>
5390415deaSJeff Mahoney 
5490415deaSJeff Mahoney #include <asm/system.h>
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
571da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
581da177e4SLinus Torvalds                                j_list))
591da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
601da177e4SLinus Torvalds                                j_working_list))
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
631da177e4SLinus Torvalds ** start and kill the commit workqueue
641da177e4SLinus Torvalds */
651da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
701da177e4SLinus Torvalds 				   structs at 4k */
711da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
761da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
791da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds /* journal list state bits */
821da177e4SLinus Torvalds #define LIST_TOUCHED 1
831da177e4SLinus Torvalds #define LIST_DIRTY   2
841da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds /* flags for do_journal_end */
871da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
881da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
891da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
901da177e4SLinus Torvalds 
91bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
92bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
93bd4c625cSLinus Torvalds 			  int flags);
94bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
95bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
96bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
97bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
981da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
99bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
100a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks);
1011da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1021da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1031da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1041da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
105c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1061da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1091da177e4SLinus Torvalds enum {
1101da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1111da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1121da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
116a9dd3643SJeff Mahoney 			      struct super_block *sb,
1171da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1181da177e4SLinus Torvalds 
119a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb)
120bd4c625cSLinus Torvalds {
121a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
122bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
123bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1241da177e4SLinus Torvalds }
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds /*
1271da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1281da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1291da177e4SLinus Torvalds ** more details.
1301da177e4SLinus Torvalds */
131bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
132bd4c625cSLinus Torvalds {
1331da177e4SLinus Torvalds 	if (bh) {
1341da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1351da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1361da177e4SLinus Torvalds 	}
1371da177e4SLinus Torvalds 	return 0;
1381da177e4SLinus Torvalds }
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds static void disable_barrier(struct super_block *s)
1411da177e4SLinus Torvalds {
1421da177e4SLinus Torvalds 	REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
143bd4c625cSLinus Torvalds 	printk("reiserfs: disabling flush barriers on %s\n",
144bd4c625cSLinus Torvalds 	       reiserfs_bdevname(s));
1451da177e4SLinus Torvalds }
1461da177e4SLinus Torvalds 
147bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
148a9dd3643SJeff Mahoney 							 *sb)
149bd4c625cSLinus Torvalds {
1501da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1511da177e4SLinus Torvalds 	static int id;
1521da177e4SLinus Torvalds 
153d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1541da177e4SLinus Torvalds 	if (!bn) {
1551da177e4SLinus Torvalds 		return NULL;
1561da177e4SLinus Torvalds 	}
157a9dd3643SJeff Mahoney 	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
1581da177e4SLinus Torvalds 	if (!bn->data) {
159d739b42bSPekka Enberg 		kfree(bn);
1601da177e4SLinus Torvalds 		return NULL;
1611da177e4SLinus Torvalds 	}
1621da177e4SLinus Torvalds 	bn->id = id++;
1631da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1641da177e4SLinus Torvalds 	return bn;
1651da177e4SLinus Torvalds }
1661da177e4SLinus Torvalds 
167a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
168bd4c625cSLinus Torvalds {
169a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1701da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1711da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1721da177e4SLinus Torvalds 
1731da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1741da177e4SLinus Torvalds       repeat:
1751da177e4SLinus Torvalds 
1761da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1771da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1781da177e4SLinus Torvalds 		list_del(entry);
179a9dd3643SJeff Mahoney 		memset(bn->data, 0, sb->s_blocksize);
1801da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1811da177e4SLinus Torvalds 		return bn;
1821da177e4SLinus Torvalds 	}
183a9dd3643SJeff Mahoney 	bn = allocate_bitmap_node(sb);
1841da177e4SLinus Torvalds 	if (!bn) {
1851da177e4SLinus Torvalds 		yield();
1861da177e4SLinus Torvalds 		goto repeat;
1871da177e4SLinus Torvalds 	}
1881da177e4SLinus Torvalds 	return bn;
1891da177e4SLinus Torvalds }
190a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb,
191bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
192bd4c625cSLinus Torvalds {
193a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1941da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1951da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
196d739b42bSPekka Enberg 		kfree(bn->data);
197d739b42bSPekka Enberg 		kfree(bn);
1981da177e4SLinus Torvalds 	} else {
1991da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
2001da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
2011da177e4SLinus Torvalds 	}
2021da177e4SLinus Torvalds }
2031da177e4SLinus Torvalds 
204a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb)
205bd4c625cSLinus Torvalds {
2061da177e4SLinus Torvalds 	int i;
207a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2081da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2091da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
210a9dd3643SJeff Mahoney 		bn = allocate_bitmap_node(sb);
2111da177e4SLinus Torvalds 		if (bn) {
2121da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2131da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2141da177e4SLinus Torvalds 		} else {
2150222e657SJeff Mahoney 			break;	/* this is ok, we'll try again when more are needed */
2161da177e4SLinus Torvalds 		}
2171da177e4SLinus Torvalds 	}
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds 
220a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb,
2213ee16670SJeff Mahoney 				  b_blocknr_t block,
222bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
223bd4c625cSLinus Torvalds {
224a9dd3643SJeff Mahoney 	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
225a9dd3643SJeff Mahoney 	unsigned int bit_nr = block % (sb->s_blocksize << 3);
2261da177e4SLinus Torvalds 
2271da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
228a9dd3643SJeff Mahoney 		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
2291da177e4SLinus Torvalds 	}
2301da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2311da177e4SLinus Torvalds 	return 0;
2321da177e4SLinus Torvalds }
2331da177e4SLinus Torvalds 
234a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb,
235bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
236bd4c625cSLinus Torvalds {
2371da177e4SLinus Torvalds 	int i;
2381da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2391da177e4SLinus Torvalds 		return;
2401da177e4SLinus Torvalds 
241a9dd3643SJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
2421da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
243a9dd3643SJeff Mahoney 			free_bitmap_node(sb, jb->bitmaps[i]);
2441da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2451da177e4SLinus Torvalds 		}
2461da177e4SLinus Torvalds 	}
2471da177e4SLinus Torvalds }
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds /*
2501da177e4SLinus Torvalds ** only call this on FS unmount.
2511da177e4SLinus Torvalds */
252a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb,
253bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
254bd4c625cSLinus Torvalds {
2551da177e4SLinus Torvalds 	int i;
2561da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2571da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2581da177e4SLinus Torvalds 		jb = jb_array + i;
2591da177e4SLinus Torvalds 		jb->journal_list = NULL;
260a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
2611da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2621da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2631da177e4SLinus Torvalds 	}
2641da177e4SLinus Torvalds 	return 0;
2651da177e4SLinus Torvalds }
2661da177e4SLinus Torvalds 
267a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb)
268bd4c625cSLinus Torvalds {
269a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2701da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2711da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2741da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2751da177e4SLinus Torvalds 		list_del(next);
276d739b42bSPekka Enberg 		kfree(bn->data);
277d739b42bSPekka Enberg 		kfree(bn);
2781da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2791da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2801da177e4SLinus Torvalds 	}
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds 	return 0;
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
2851da177e4SLinus Torvalds /*
2861da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2871da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2881da177e4SLinus Torvalds */
289a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb,
2901da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2913ee16670SJeff Mahoney 				   unsigned int bmap_nr)
292bd4c625cSLinus Torvalds {
2931da177e4SLinus Torvalds 	int i;
2941da177e4SLinus Torvalds 	int failed = 0;
2951da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2961da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2991da177e4SLinus Torvalds 		jb = jb_array + i;
3001da177e4SLinus Torvalds 		jb->journal_list = NULL;
3011da177e4SLinus Torvalds 		jb->bitmaps = vmalloc(mem);
3021da177e4SLinus Torvalds 		if (!jb->bitmaps) {
303a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2000", "unable to "
30445b03d5eSJeff Mahoney 					 "allocate bitmaps for journal lists");
3051da177e4SLinus Torvalds 			failed = 1;
3061da177e4SLinus Torvalds 			break;
3071da177e4SLinus Torvalds 		}
3081da177e4SLinus Torvalds 		memset(jb->bitmaps, 0, mem);
3091da177e4SLinus Torvalds 	}
3101da177e4SLinus Torvalds 	if (failed) {
311a9dd3643SJeff Mahoney 		free_list_bitmaps(sb, jb_array);
3121da177e4SLinus Torvalds 		return -1;
3131da177e4SLinus Torvalds 	}
3141da177e4SLinus Torvalds 	return 0;
3151da177e4SLinus Torvalds }
3161da177e4SLinus Torvalds 
3171da177e4SLinus Torvalds /*
3181da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3191da177e4SLinus Torvalds ** and try again
3201da177e4SLinus Torvalds */
321a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
322bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
323bd4c625cSLinus Torvalds 						    *jl)
324bd4c625cSLinus Torvalds {
3251da177e4SLinus Torvalds 	int i, j;
326a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3271da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3301da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3311da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3321da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3331da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
334a9dd3643SJeff Mahoney 			flush_commit_list(sb,
335bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
336bd4c625cSLinus Torvalds 					  journal_list, 1);
3371da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3381da177e4SLinus Torvalds 				break;
3391da177e4SLinus Torvalds 			}
3401da177e4SLinus Torvalds 		} else {
3411da177e4SLinus Torvalds 			break;
3421da177e4SLinus Torvalds 		}
3431da177e4SLinus Torvalds 	}
3441da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3451da177e4SLinus Torvalds 		return NULL;
3461da177e4SLinus Torvalds 	}
3471da177e4SLinus Torvalds 	jb->journal_list = jl;
3481da177e4SLinus Torvalds 	return jb;
3491da177e4SLinus Torvalds }
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds /*
3521da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3531da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3541da177e4SLinus Torvalds ** returns NULL on failure
3551da177e4SLinus Torvalds */
356bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
357bd4c625cSLinus Torvalds {
3581da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3591da177e4SLinus Torvalds 	int i;
3601da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3611da177e4SLinus Torvalds 		return NULL;
3621da177e4SLinus Torvalds 	}
3631da177e4SLinus Torvalds 	head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3641da177e4SLinus Torvalds 	if (!head) {
3651da177e4SLinus Torvalds 		return NULL;
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 	memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
3681da177e4SLinus Torvalds 	head[0].prev = NULL;
3691da177e4SLinus Torvalds 	head[0].next = head + 1;
3701da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3711da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3721da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3731da177e4SLinus Torvalds 	}
3741da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3751da177e4SLinus Torvalds 	return head;
3761da177e4SLinus Torvalds }
3771da177e4SLinus Torvalds 
3781da177e4SLinus Torvalds /*
3791da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3801da177e4SLinus Torvalds */
381a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
382bd4c625cSLinus Torvalds {
3831da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
384a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3851da177e4SLinus Torvalds 
386a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "get_cnode");
3871da177e4SLinus Torvalds 
3881da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3891da177e4SLinus Torvalds 		return NULL;
3901da177e4SLinus Torvalds 	}
3911da177e4SLinus Torvalds 	journal->j_cnode_used++;
3921da177e4SLinus Torvalds 	journal->j_cnode_free--;
3931da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3941da177e4SLinus Torvalds 	if (!cn) {
3951da177e4SLinus Torvalds 		return cn;
3961da177e4SLinus Torvalds 	}
3971da177e4SLinus Torvalds 	if (cn->next) {
3981da177e4SLinus Torvalds 		cn->next->prev = NULL;
3991da177e4SLinus Torvalds 	}
4001da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
4011da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
4021da177e4SLinus Torvalds 	return cn;
4031da177e4SLinus Torvalds }
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds /*
4061da177e4SLinus Torvalds ** returns a cnode to the free list
4071da177e4SLinus Torvalds */
408a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb,
409bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
410bd4c625cSLinus Torvalds {
411a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4121da177e4SLinus Torvalds 
413a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "free_cnode");
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	journal->j_cnode_used--;
4161da177e4SLinus Torvalds 	journal->j_cnode_free++;
4171da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4181da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4191da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4201da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4211da177e4SLinus Torvalds 	}
4221da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4231da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4241da177e4SLinus Torvalds }
4251da177e4SLinus Torvalds 
426bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
427bd4c625cSLinus Torvalds {
4281da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4291da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4301da177e4SLinus Torvalds }
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
433bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
434bd4c625cSLinus Torvalds 								  super_block
435bd4c625cSLinus Torvalds 								  *sb,
436bd4c625cSLinus Torvalds 								  struct
437bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
438bd4c625cSLinus Torvalds 								  **table,
4391da177e4SLinus Torvalds 								  long bl)
4401da177e4SLinus Torvalds {
4411da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4421da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4431da177e4SLinus Torvalds 	while (cn) {
4441da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4451da177e4SLinus Torvalds 			return cn;
4461da177e4SLinus Torvalds 		cn = cn->hnext;
4471da177e4SLinus Torvalds 	}
4481da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
4511da177e4SLinus Torvalds /*
4521da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4531da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4541da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4551da177e4SLinus Torvalds **
4561da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4571da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4581da177e4SLinus Torvalds ** sure you never write the block without logging it.
4591da177e4SLinus Torvalds **
4601da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4611da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4621da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4631da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4641da177e4SLinus Torvalds **
4651da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4661da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4671da177e4SLinus Torvalds **
4681da177e4SLinus Torvalds */
469a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb,
4703ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
471bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
472bd4c625cSLinus Torvalds {
473a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4741da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4751da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4761da177e4SLinus Torvalds 	int i;
4771da177e4SLinus Torvalds 	unsigned long bl;
4781da177e4SLinus Torvalds 
4791da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4801da177e4SLinus Torvalds 
481a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal);
4821da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
4831da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
4841da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
4851da177e4SLinus Torvalds 	 */
4861da177e4SLinus Torvalds 	if (search_all) {
4871da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
488a9dd3643SJeff Mahoney 			PROC_INFO_INC(sb, journal.in_journal_bitmap);
4891da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
4901da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
491bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
492bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
493bd4c625cSLinus Torvalds 				     data)) {
494bd4c625cSLinus Torvalds 				*next_zero_bit =
495bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
496bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
497bd4c625cSLinus Torvalds 							data),
498a9dd3643SJeff Mahoney 						       sb->s_blocksize << 3,
499bd4c625cSLinus Torvalds 						       bit_nr + 1);
5001da177e4SLinus Torvalds 				return 1;
5011da177e4SLinus Torvalds 			}
5021da177e4SLinus Torvalds 		}
5031da177e4SLinus Torvalds 	}
5041da177e4SLinus Torvalds 
505a9dd3643SJeff Mahoney 	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
5061da177e4SLinus Torvalds 	/* is it in any old transactions? */
507bd4c625cSLinus Torvalds 	if (search_all
508bd4c625cSLinus Torvalds 	    && (cn =
509a9dd3643SJeff Mahoney 		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
5101da177e4SLinus Torvalds 		return 1;
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
514a9dd3643SJeff Mahoney 	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
5151da177e4SLinus Torvalds 		BUG();
5161da177e4SLinus Torvalds 		return 1;
5171da177e4SLinus Torvalds 	}
5181da177e4SLinus Torvalds 
519a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal_reusable);
5201da177e4SLinus Torvalds 	/* safe for reuse */
5211da177e4SLinus Torvalds 	return 0;
5221da177e4SLinus Torvalds }
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds /* insert cn into table
5251da177e4SLinus Torvalds */
526bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
527bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
528bd4c625cSLinus Torvalds {
5291da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5301da177e4SLinus Torvalds 
5311da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5321da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5331da177e4SLinus Torvalds 	cn->hprev = NULL;
5341da177e4SLinus Torvalds 	if (cn_orig) {
5351da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5361da177e4SLinus Torvalds 	}
5371da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5381da177e4SLinus Torvalds }
5391da177e4SLinus Torvalds 
5408ebc4232SFrederic Weisbecker /*
5418ebc4232SFrederic Weisbecker  * Several mutexes depend on the write lock.
5428ebc4232SFrederic Weisbecker  * However sometimes we want to relax the write lock while we hold
5438ebc4232SFrederic Weisbecker  * these mutexes, according to the release/reacquire on schedule()
5448ebc4232SFrederic Weisbecker  * properties of the Bkl that were used.
5458ebc4232SFrederic Weisbecker  * Reiserfs performances and locking were based on this scheme.
5468ebc4232SFrederic Weisbecker  * Now that the write lock is a mutex and not the bkl anymore, doing so
5478ebc4232SFrederic Weisbecker  * may result in a deadlock:
5488ebc4232SFrederic Weisbecker  *
5498ebc4232SFrederic Weisbecker  * A acquire write_lock
5508ebc4232SFrederic Weisbecker  * A acquire j_commit_mutex
5518ebc4232SFrederic Weisbecker  * A release write_lock and wait for something
5528ebc4232SFrederic Weisbecker  * B acquire write_lock
5538ebc4232SFrederic Weisbecker  * B can't acquire j_commit_mutex and sleep
5548ebc4232SFrederic Weisbecker  * A can't acquire write lock anymore
5558ebc4232SFrederic Weisbecker  * deadlock
5568ebc4232SFrederic Weisbecker  *
5578ebc4232SFrederic Weisbecker  * What we do here is avoiding such deadlock by playing the same game
5588ebc4232SFrederic Weisbecker  * than the Bkl: if we can't acquire a mutex that depends on the write lock,
5598ebc4232SFrederic Weisbecker  * we release the write lock, wait a bit and then retry.
5608ebc4232SFrederic Weisbecker  *
5618ebc4232SFrederic Weisbecker  * The mutexes concerned by this hack are:
5628ebc4232SFrederic Weisbecker  * - The commit mutex of a journal list
5638ebc4232SFrederic Weisbecker  * - The flush mutex
5648ebc4232SFrederic Weisbecker  * - The journal lock
5658ebc4232SFrederic Weisbecker  */
5668ebc4232SFrederic Weisbecker static inline void reiserfs_mutex_lock_safe(struct mutex *m,
5678ebc4232SFrederic Weisbecker 			       struct super_block *s)
5688ebc4232SFrederic Weisbecker {
5698ebc4232SFrederic Weisbecker 	while (!mutex_trylock(m)) {
5708ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
5718ebc4232SFrederic Weisbecker 		schedule();
5728ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
5738ebc4232SFrederic Weisbecker 	}
5748ebc4232SFrederic Weisbecker }
5758ebc4232SFrederic Weisbecker 
5761da177e4SLinus Torvalds /* lock the current transaction */
577a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb)
578bd4c625cSLinus Torvalds {
579a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.lock_journal);
5808ebc4232SFrederic Weisbecker 
5818ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
5821da177e4SLinus Torvalds }
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds /* unlock the current transaction */
585a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb)
586bd4c625cSLinus Torvalds {
587a9dd3643SJeff Mahoney 	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
5881da177e4SLinus Torvalds }
5891da177e4SLinus Torvalds 
5901da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5911da177e4SLinus Torvalds {
5921da177e4SLinus Torvalds 	jl->j_refcount++;
5931da177e4SLinus Torvalds }
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5961da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5971da177e4SLinus Torvalds {
5981da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
599c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
600bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
6011da177e4SLinus Torvalds 	}
6021da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
603d739b42bSPekka Enberg 		kfree(jl);
6041da177e4SLinus Torvalds }
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds /*
6071da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
6081da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
6091da177e4SLinus Torvalds ** transaction.
6101da177e4SLinus Torvalds */
611a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb,
612bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
613bd4c625cSLinus Torvalds {
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
6161da177e4SLinus Torvalds 	if (jb) {
617a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
6181da177e4SLinus Torvalds 	}
6191da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
6201da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
6211da177e4SLinus Torvalds }
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
624600ed416SJeff Mahoney 				    unsigned int trans_id)
6251da177e4SLinus Torvalds {
6261da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
6271da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
6281da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds 	if (!list_empty(entry)) {
6311da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
6321da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
6331da177e4SLinus Torvalds 			return 1;
6341da177e4SLinus Torvalds 		}
6351da177e4SLinus Torvalds 	}
6361da177e4SLinus Torvalds 	return 0;
6371da177e4SLinus Torvalds }
6381da177e4SLinus Torvalds 
639398c95bdSChris Mason /*
640398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
641398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
642398c95bdSChris Mason  * logged via data=journal.
643398c95bdSChris Mason  *
644398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
645398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
646398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
647398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
648398c95bdSChris Mason  */
649398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
650398c95bdSChris Mason {
651398c95bdSChris Mason 	struct page *page = bh->b_page;
652529ae9aaSNick Piggin 	if (!page->mapping && trylock_page(page)) {
653398c95bdSChris Mason 		page_cache_get(page);
654398c95bdSChris Mason 		put_bh(bh);
655398c95bdSChris Mason 		if (!page->mapping)
656398c95bdSChris Mason 			try_to_free_buffers(page);
657398c95bdSChris Mason 		unlock_page(page);
658398c95bdSChris Mason 		page_cache_release(page);
659398c95bdSChris Mason 	} else {
660398c95bdSChris Mason 		put_bh(bh);
661398c95bdSChris Mason 	}
662398c95bdSChris Mason }
663398c95bdSChris Mason 
664bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
665bd4c625cSLinus Torvalds {
6661da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6671da177e4SLinus Torvalds 
6681da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
66945b03d5eSJeff Mahoney 		reiserfs_warning(NULL, "clm-2084",
67045b03d5eSJeff Mahoney 				 "pinned buffer %lu:%s sent to disk",
6711da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6721da177e4SLinus Torvalds 	}
6731da177e4SLinus Torvalds 	if (uptodate)
6741da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6751da177e4SLinus Torvalds 	else
6761da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
677398c95bdSChris Mason 
6781da177e4SLinus Torvalds 	unlock_buffer(bh);
679398c95bdSChris Mason 	release_buffer_page(bh);
6801da177e4SLinus Torvalds }
6811da177e4SLinus Torvalds 
682bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
683bd4c625cSLinus Torvalds {
6841da177e4SLinus Torvalds 	if (uptodate)
6851da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6861da177e4SLinus Torvalds 	else
6871da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6881da177e4SLinus Torvalds 	unlock_buffer(bh);
6891da177e4SLinus Torvalds 	put_bh(bh);
6901da177e4SLinus Torvalds }
6911da177e4SLinus Torvalds 
692bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
693bd4c625cSLinus Torvalds {
6941da177e4SLinus Torvalds 	get_bh(bh);
6951da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6961da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6971da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6981da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6991da177e4SLinus Torvalds 		BUG();
7001da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
7011da177e4SLinus Torvalds 		BUG();
7021da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
7031da177e4SLinus Torvalds }
7041da177e4SLinus Torvalds 
705bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
706bd4c625cSLinus Torvalds {
7071da177e4SLinus Torvalds 	get_bh(bh);
7081da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
7091da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
7101da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
7111da177e4SLinus Torvalds 		BUG();
7121da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
7131da177e4SLinus Torvalds }
7141da177e4SLinus Torvalds 
715bd4c625cSLinus Torvalds static int submit_barrier_buffer(struct buffer_head *bh)
716bd4c625cSLinus Torvalds {
7171da177e4SLinus Torvalds 	get_bh(bh);
7181da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
7191da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
7201da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
7211da177e4SLinus Torvalds 		BUG();
7221da177e4SLinus Torvalds 	return submit_bh(WRITE_BARRIER, bh);
7231da177e4SLinus Torvalds }
7241da177e4SLinus Torvalds 
7251da177e4SLinus Torvalds static void check_barrier_completion(struct super_block *s,
726bd4c625cSLinus Torvalds 				     struct buffer_head *bh)
727bd4c625cSLinus Torvalds {
7281da177e4SLinus Torvalds 	if (buffer_eopnotsupp(bh)) {
7291da177e4SLinus Torvalds 		clear_buffer_eopnotsupp(bh);
7301da177e4SLinus Torvalds 		disable_barrier(s);
7311da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
7321da177e4SLinus Torvalds 		set_buffer_dirty(bh);
7338ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
7341da177e4SLinus Torvalds 		sync_dirty_buffer(bh);
7358ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
7361da177e4SLinus Torvalds 	}
7371da177e4SLinus Torvalds }
7381da177e4SLinus Torvalds 
7391da177e4SLinus Torvalds #define CHUNK_SIZE 32
7401da177e4SLinus Torvalds struct buffer_chunk {
7411da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
7421da177e4SLinus Torvalds 	int nr;
7431da177e4SLinus Torvalds };
7441da177e4SLinus Torvalds 
745bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
746bd4c625cSLinus Torvalds {
7471da177e4SLinus Torvalds 	int i;
74822e2c507SJens Axboe 	get_fs_excl();
7491da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7501da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
7511da177e4SLinus Torvalds 	}
7521da177e4SLinus Torvalds 	chunk->nr = 0;
75322e2c507SJens Axboe 	put_fs_excl();
7541da177e4SLinus Torvalds }
7551da177e4SLinus Torvalds 
756bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
757bd4c625cSLinus Torvalds {
7581da177e4SLinus Torvalds 	int i;
75922e2c507SJens Axboe 	get_fs_excl();
7601da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7611da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
7621da177e4SLinus Torvalds 	}
7631da177e4SLinus Torvalds 	chunk->nr = 0;
76422e2c507SJens Axboe 	put_fs_excl();
7651da177e4SLinus Torvalds }
7661da177e4SLinus Torvalds 
7671da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
768bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
7691da177e4SLinus Torvalds {
7701da177e4SLinus Torvalds 	int ret = 0;
77114a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
7721da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
7731da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7741da177e4SLinus Torvalds 		ret = 1;
7751da177e4SLinus Torvalds 		if (lock)
7761da177e4SLinus Torvalds 			spin_unlock(lock);
7771da177e4SLinus Torvalds 		fn(chunk);
7781da177e4SLinus Torvalds 		if (lock)
7791da177e4SLinus Torvalds 			spin_lock(lock);
7801da177e4SLinus Torvalds 	}
7811da177e4SLinus Torvalds 	return ret;
7821da177e4SLinus Torvalds }
7831da177e4SLinus Torvalds 
7841da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
785bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
786bd4c625cSLinus Torvalds {
7871da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7881da177e4SLinus Torvalds 	while (1) {
7891da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7901da177e4SLinus Torvalds 		if (jh) {
7911da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7921da177e4SLinus Torvalds 			return jh;
7931da177e4SLinus Torvalds 		}
7941da177e4SLinus Torvalds 		yield();
7951da177e4SLinus Torvalds 	}
7961da177e4SLinus Torvalds }
7971da177e4SLinus Torvalds 
7981da177e4SLinus Torvalds /*
7991da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
8001da177e4SLinus Torvalds  * and waited on
8011da177e4SLinus Torvalds  */
802bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
803bd4c625cSLinus Torvalds {
8041da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8051da177e4SLinus Torvalds 
8061da177e4SLinus Torvalds 	jh = bh->b_private;
8071da177e4SLinus Torvalds 	if (jh) {
8081da177e4SLinus Torvalds 		bh->b_private = NULL;
8091da177e4SLinus Torvalds 		jh->bh = NULL;
8101da177e4SLinus Torvalds 		list_del_init(&jh->list);
8111da177e4SLinus Torvalds 		kfree(jh);
8121da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
8131da177e4SLinus Torvalds 			BUG();
8141da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
8151da177e4SLinus Torvalds 		put_bh(bh);
8161da177e4SLinus Torvalds 	}
8171da177e4SLinus Torvalds }
8181da177e4SLinus Torvalds 
8191da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
8201da177e4SLinus Torvalds 			   int tail)
8211da177e4SLinus Torvalds {
8221da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8231da177e4SLinus Torvalds 
8241da177e4SLinus Torvalds 	if (bh->b_private) {
8251da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
8261da177e4SLinus Torvalds 		if (!bh->b_private) {
8271da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
8281da177e4SLinus Torvalds 			goto no_jh;
8291da177e4SLinus Torvalds 		}
8301da177e4SLinus Torvalds 		jh = bh->b_private;
8311da177e4SLinus Torvalds 		list_del_init(&jh->list);
8321da177e4SLinus Torvalds 	} else {
8331da177e4SLinus Torvalds 	      no_jh:
8341da177e4SLinus Torvalds 		get_bh(bh);
8351da177e4SLinus Torvalds 		jh = alloc_jh();
8361da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
8371da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
8381da177e4SLinus Torvalds 		 * two adds at the same time
8391da177e4SLinus Torvalds 		 */
84014a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
8411da177e4SLinus Torvalds 		jh->bh = bh;
8421da177e4SLinus Torvalds 		bh->b_private = jh;
8431da177e4SLinus Torvalds 	}
8441da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
8451da177e4SLinus Torvalds 	if (tail)
8461da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
8471da177e4SLinus Torvalds 	else {
8481da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
8491da177e4SLinus Torvalds 	}
8501da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
8511da177e4SLinus Torvalds 	return 0;
8521da177e4SLinus Torvalds }
8531da177e4SLinus Torvalds 
854bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
855bd4c625cSLinus Torvalds {
8561da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
8571da177e4SLinus Torvalds }
858bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
859bd4c625cSLinus Torvalds {
8601da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
8611da177e4SLinus Torvalds }
8621da177e4SLinus Torvalds 
8631da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
8641da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
8651da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
8661da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
8671da177e4SLinus Torvalds 				 struct list_head *list)
8681da177e4SLinus Torvalds {
8691da177e4SLinus Torvalds 	struct buffer_head *bh;
8701da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8711da177e4SLinus Torvalds 	int ret = j->j_errno;
8721da177e4SLinus Torvalds 	struct buffer_chunk chunk;
8731da177e4SLinus Torvalds 	struct list_head tmp;
8741da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds 	chunk.nr = 0;
8771da177e4SLinus Torvalds 	spin_lock(lock);
8781da177e4SLinus Torvalds 	while (!list_empty(list)) {
8791da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8801da177e4SLinus Torvalds 		bh = jh->bh;
8811da177e4SLinus Torvalds 		get_bh(bh);
882ca5de404SNick Piggin 		if (!trylock_buffer(bh)) {
8831da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
884f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8851da177e4SLinus Torvalds 				goto loop_next;
8861da177e4SLinus Torvalds 			}
8871da177e4SLinus Torvalds 			spin_unlock(lock);
8881da177e4SLinus Torvalds 			if (chunk.nr)
8891da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8901da177e4SLinus Torvalds 			wait_on_buffer(bh);
8911da177e4SLinus Torvalds 			cond_resched();
8921da177e4SLinus Torvalds 			spin_lock(lock);
8931da177e4SLinus Torvalds 			goto loop_next;
8941da177e4SLinus Torvalds 		}
8953d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8963d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8973d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8983d4492f8SChris Mason 		 */
8993d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
9003d4492f8SChris Mason 			clear_buffer_dirty(bh);
9013d4492f8SChris Mason 			ret = -EIO;
9023d4492f8SChris Mason 		}
9031da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
904f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
9051da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
9061da177e4SLinus Torvalds 		} else {
9071da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
9081da177e4SLinus Torvalds 			unlock_buffer(bh);
9091da177e4SLinus Torvalds 		}
9101da177e4SLinus Torvalds 	      loop_next:
9111da177e4SLinus Torvalds 		put_bh(bh);
9121da177e4SLinus Torvalds 		cond_resched_lock(lock);
9131da177e4SLinus Torvalds 	}
9141da177e4SLinus Torvalds 	if (chunk.nr) {
9151da177e4SLinus Torvalds 		spin_unlock(lock);
9161da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
9171da177e4SLinus Torvalds 		spin_lock(lock);
9181da177e4SLinus Torvalds 	}
9191da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
9201da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
9211da177e4SLinus Torvalds 		bh = jh->bh;
9221da177e4SLinus Torvalds 		get_bh(bh);
9231da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
9241da177e4SLinus Torvalds 
9251da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
9261da177e4SLinus Torvalds 			spin_unlock(lock);
9271da177e4SLinus Torvalds 			wait_on_buffer(bh);
9281da177e4SLinus Torvalds 			spin_lock(lock);
9291da177e4SLinus Torvalds 		}
9301da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
9311da177e4SLinus Torvalds 			ret = -EIO;
9321da177e4SLinus Torvalds 		}
933d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
934d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
935d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
936d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
937d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
938d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
939d62b1b87SChris Mason 		 * after freeing the journal header.
940d62b1b87SChris Mason 		 */
941d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
942d62b1b87SChris Mason 			spin_unlock(lock);
943d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
944d62b1b87SChris Mason 			spin_lock(lock);
945d62b1b87SChris Mason 		}
9461da177e4SLinus Torvalds 		put_bh(bh);
9471da177e4SLinus Torvalds 		cond_resched_lock(lock);
9481da177e4SLinus Torvalds 	}
9491da177e4SLinus Torvalds 	spin_unlock(lock);
9501da177e4SLinus Torvalds 	return ret;
9511da177e4SLinus Torvalds }
9521da177e4SLinus Torvalds 
953bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
954bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
955bd4c625cSLinus Torvalds {
9561da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9571da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
9581da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
9591da177e4SLinus Torvalds 	struct list_head *entry;
960600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
961600ed416SJeff Mahoney 	unsigned int other_trans_id;
962600ed416SJeff Mahoney 	unsigned int first_trans_id;
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds       find_first:
9651da177e4SLinus Torvalds 	/*
9661da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
9671da177e4SLinus Torvalds 	 */
9681da177e4SLinus Torvalds 	first_jl = jl;
9691da177e4SLinus Torvalds 	entry = jl->j_list.prev;
9701da177e4SLinus Torvalds 	while (1) {
9711da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9721da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
9731da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9741da177e4SLinus Torvalds 			break;
9751da177e4SLinus Torvalds 
9761da177e4SLinus Torvalds 		first_jl = other_jl;
9771da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9781da177e4SLinus Torvalds 	}
9791da177e4SLinus Torvalds 
9801da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9811da177e4SLinus Torvalds 	if (first_jl == jl) {
9821da177e4SLinus Torvalds 		return 0;
9831da177e4SLinus Torvalds 	}
9841da177e4SLinus Torvalds 
9851da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9861da177e4SLinus Torvalds 
9871da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9881da177e4SLinus Torvalds 	while (1) {
9891da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9901da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9911da177e4SLinus Torvalds 
9921da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9931da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9941da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9951da177e4SLinus Torvalds 
9961da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9971da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9981da177e4SLinus Torvalds 					return 1;
9991da177e4SLinus Torvalds 
10001da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
10011da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
10021da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
10031da177e4SLinus Torvalds 				 */
1004bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
1005bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
10061da177e4SLinus Torvalds 					goto find_first;
10071da177e4SLinus Torvalds 				}
10081da177e4SLinus Torvalds 			}
10091da177e4SLinus Torvalds 			entry = entry->next;
10101da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
10111da177e4SLinus Torvalds 				return 0;
10121da177e4SLinus Torvalds 		} else {
10131da177e4SLinus Torvalds 			return 0;
10141da177e4SLinus Torvalds 		}
10151da177e4SLinus Torvalds 	}
10161da177e4SLinus Torvalds 	return 0;
10171da177e4SLinus Torvalds }
1018deba0f49SAdrian Bunk 
1019deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
1020bd4c625cSLinus Torvalds {
10211da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
10221da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
10238ebc4232SFrederic Weisbecker 
10248ebc4232SFrederic Weisbecker 	if (atomic_read(&j->j_async_throttle)) {
10258ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10268aa7e847SJens Axboe 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
10278ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10288ebc4232SFrederic Weisbecker 	}
10298ebc4232SFrederic Weisbecker 
10301da177e4SLinus Torvalds 	return 0;
10311da177e4SLinus Torvalds }
10321da177e4SLinus Torvalds 
10331da177e4SLinus Torvalds /*
10341da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
10351da177e4SLinus Torvalds **
10361da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
10371da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
10381da177e4SLinus Torvalds **
10391da177e4SLinus Torvalds */
1040bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
1041bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
1042bd4c625cSLinus Torvalds {
10431da177e4SLinus Torvalds 	int i;
10443ee16670SJeff Mahoney 	b_blocknr_t bn;
10451da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
1046600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
10471da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
10481da177e4SLinus Torvalds 	int barrier = 0;
10491da177e4SLinus Torvalds 	int retval = 0;
1050e0e851cfSChris Mason 	int write_len;
10511da177e4SLinus Torvalds 
10521da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
10531da177e4SLinus Torvalds 
10541da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
10551da177e4SLinus Torvalds 		return 0;
10561da177e4SLinus Torvalds 	}
10571da177e4SLinus Torvalds 
105822e2c507SJens Axboe 	get_fs_excl();
105922e2c507SJens Axboe 
10601da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
10611da177e4SLinus Torvalds 	 ** us is on disk too
10621da177e4SLinus Torvalds 	 */
10631da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
10641da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
10651da177e4SLinus Torvalds 
10661da177e4SLinus Torvalds 	get_journal_list(jl);
10671da177e4SLinus Torvalds 	if (flushall) {
10681da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
10691da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
10701da177e4SLinus Torvalds 			goto put_jl;
10711da177e4SLinus Torvalds 		}
10721da177e4SLinus Torvalds 	}
10731da177e4SLinus Torvalds 
10741da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
10758ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
10768ebc4232SFrederic Weisbecker 
10771da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
107890415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10791da177e4SLinus Torvalds 		goto put_jl;
10801da177e4SLinus Torvalds 	}
10811da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10821da177e4SLinus Torvalds 
10831da177e4SLinus Torvalds 	/* this commit is done, exit */
10841da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10851da177e4SLinus Torvalds 		if (flushall) {
10861da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10871da177e4SLinus Torvalds 		}
108890415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10891da177e4SLinus Torvalds 		goto put_jl;
10901da177e4SLinus Torvalds 	}
10911da177e4SLinus Torvalds 
10921da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10933d4492f8SChris Mason 		int ret;
10948ebc4232SFrederic Weisbecker 
10958ebc4232SFrederic Weisbecker 		/*
10968ebc4232SFrederic Weisbecker 		 * We might sleep in numerous places inside
10978ebc4232SFrederic Weisbecker 		 * write_ordered_buffers. Relax the write lock.
10988ebc4232SFrederic Weisbecker 		 */
10998ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11003d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
11011da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
11023d4492f8SChris Mason 		if (ret < 0 && retval == 0)
11033d4492f8SChris Mason 			retval = ret;
11048ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11051da177e4SLinus Torvalds 	}
11061da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
11071da177e4SLinus Torvalds 	/*
11081da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1109e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1110e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1111e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1112e0e851cfSChris Mason 	 * get a chunk of data on there.
11131da177e4SLinus Torvalds 	 */
11141da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1115e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1116e0e851cfSChris Mason 	if (write_len < 256)
1117e0e851cfSChris Mason 		write_len = 256;
1118e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
11191da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
11201da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
11211da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1122e0e851cfSChris Mason 		if (tbh) {
1123e0e851cfSChris Mason 			if (buffer_dirty(tbh))
1124e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh) ;
11251da177e4SLinus Torvalds 			put_bh(tbh) ;
11261da177e4SLinus Torvalds 		}
1127e0e851cfSChris Mason 	}
11281da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
11291da177e4SLinus Torvalds 
11305d5e8156SJeff Mahoney 	/* We're skipping the commit if there's an error */
11315d5e8156SJeff Mahoney 	if (retval || reiserfs_is_journal_aborted(journal))
11325d5e8156SJeff Mahoney 		barrier = 0;
11335d5e8156SJeff Mahoney 
11341da177e4SLinus Torvalds 	/* wait on everything written so far before writing the commit
11351da177e4SLinus Torvalds 	 * if we are in barrier mode, send the commit down now
11361da177e4SLinus Torvalds 	 */
11371da177e4SLinus Torvalds 	barrier = reiserfs_barrier_flush(s);
11381da177e4SLinus Torvalds 	if (barrier) {
11391da177e4SLinus Torvalds 		int ret;
11401da177e4SLinus Torvalds 		lock_buffer(jl->j_commit_bh);
11411da177e4SLinus Torvalds 		ret = submit_barrier_buffer(jl->j_commit_bh);
11421da177e4SLinus Torvalds 		if (ret == -EOPNOTSUPP) {
11431da177e4SLinus Torvalds 			set_buffer_uptodate(jl->j_commit_bh);
11441da177e4SLinus Torvalds 			disable_barrier(s);
11451da177e4SLinus Torvalds 			barrier = 0;
11461da177e4SLinus Torvalds 		}
11471da177e4SLinus Torvalds 	}
11481da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
11491da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
11501da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
11511da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
11528ebc4232SFrederic Weisbecker 
11538ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11541da177e4SLinus Torvalds 		wait_on_buffer(tbh);
11558ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11561da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
11571da177e4SLinus Torvalds 		// a locked buffer.  Double check here
11581da177e4SLinus Torvalds 		//
11598ebc4232SFrederic Weisbecker 		/* redundant, sync_dirty_buffer() checks */
11608ebc4232SFrederic Weisbecker 		if (buffer_dirty(tbh)) {
11618ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
11621da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
11638ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
11648ebc4232SFrederic Weisbecker 		}
11651da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
11661da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
116745b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-601",
116845b03d5eSJeff Mahoney 					 "buffer write failed");
11691da177e4SLinus Torvalds #endif
11701da177e4SLinus Torvalds 			retval = -EIO;
11711da177e4SLinus Torvalds 		}
11721da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
11731da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
11741da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
11751da177e4SLinus Torvalds 	}
11761da177e4SLinus Torvalds 
11771da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
11781da177e4SLinus Torvalds 
11791da177e4SLinus Torvalds 	if (!barrier) {
11805d5e8156SJeff Mahoney 		/* If there was a write error in the journal - we can't commit
11815d5e8156SJeff Mahoney 		 * this transaction - it will be invalid and, if successful,
1182beb7dd86SRobert P. J. Day 		 * will just end up propagating the write error out to
11835d5e8156SJeff Mahoney 		 * the file system. */
11845d5e8156SJeff Mahoney 		if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
11851da177e4SLinus Torvalds 			if (buffer_dirty(jl->j_commit_bh))
11861da177e4SLinus Torvalds 				BUG();
11871da177e4SLinus Torvalds 			mark_buffer_dirty(jl->j_commit_bh) ;
11888ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
11891da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh) ;
11908ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
11915d5e8156SJeff Mahoney 		}
11928ebc4232SFrederic Weisbecker 	} else {
11938ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11941da177e4SLinus Torvalds 		wait_on_buffer(jl->j_commit_bh);
11958ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11968ebc4232SFrederic Weisbecker 	}
11971da177e4SLinus Torvalds 
11981da177e4SLinus Torvalds 	check_barrier_completion(s, jl->j_commit_bh);
11991da177e4SLinus Torvalds 
12001da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
12011da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1202beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
12031da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
12041da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
120545b03d5eSJeff Mahoney 		reiserfs_warning(s, "journal-615", "buffer write failed");
12061da177e4SLinus Torvalds #endif
12071da177e4SLinus Torvalds 		retval = -EIO;
12081da177e4SLinus Torvalds 	}
12091da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
12101da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
12111da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
121245b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1213bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
12141da177e4SLinus Torvalds 	}
12151da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
12161da177e4SLinus Torvalds 
12171da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
12181da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
12191da177e4SLinus Torvalds 
12201da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
12211da177e4SLinus Torvalds 
12221da177e4SLinus Torvalds 	/* mark the metadata dirty */
12231da177e4SLinus Torvalds 	if (!retval)
12241da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
12251da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds 	if (flushall) {
12281da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
12291da177e4SLinus Torvalds 	}
123090415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
12311da177e4SLinus Torvalds       put_jl:
12321da177e4SLinus Torvalds 	put_journal_list(s, jl);
12331da177e4SLinus Torvalds 
12341da177e4SLinus Torvalds 	if (retval)
1235bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1236fbe5498bSHarvey Harrison 			       __func__);
123722e2c507SJens Axboe 	put_fs_excl();
12381da177e4SLinus Torvalds 	return retval;
12391da177e4SLinus Torvalds }
12401da177e4SLinus Torvalds 
12411da177e4SLinus Torvalds /*
12421da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
12431da177e4SLinus Torvalds ** returns NULL if it can't find anything
12441da177e4SLinus Torvalds */
1245bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1246bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1247bd4c625cSLinus Torvalds 							  *cn)
1248bd4c625cSLinus Torvalds {
12491da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
12501da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
12511da177e4SLinus Torvalds 
12521da177e4SLinus Torvalds 	cn = cn->hprev;
12531da177e4SLinus Torvalds 	while (cn) {
12541da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
12551da177e4SLinus Torvalds 			return cn->jlist;
12561da177e4SLinus Torvalds 		}
12571da177e4SLinus Torvalds 		cn = cn->hprev;
12581da177e4SLinus Torvalds 	}
12591da177e4SLinus Torvalds 	return NULL;
12601da177e4SLinus Torvalds }
12611da177e4SLinus Torvalds 
1262a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1263a3172027SChris Mason {
1264a3172027SChris Mason 	struct super_block *sb = cn->sb;
1265a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1266a3172027SChris Mason 
1267a3172027SChris Mason 	cn = cn->hprev;
1268a3172027SChris Mason 	while (cn) {
1269a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1270a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1271a3172027SChris Mason 				    return 0;
1272a3172027SChris Mason 		cn = cn->hprev;
1273a3172027SChris Mason 	}
1274a3172027SChris Mason 	return 1;
1275a3172027SChris Mason }
1276a3172027SChris Mason 
1277bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1278bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1279bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1280bd4c625cSLinus Torvalds 				int);
12811da177e4SLinus Torvalds 
12821da177e4SLinus Torvalds /*
12831da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
12841da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
12851da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
12861da177e4SLinus Torvalds */
1287a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb,
1288bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1289bd4c625cSLinus Torvalds 					 int debug)
1290bd4c625cSLinus Torvalds {
1291a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12921da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
12931da177e4SLinus Torvalds 	cn = jl->j_realblock;
12941da177e4SLinus Torvalds 
12951da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
12961da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
12971da177e4SLinus Torvalds 	 */
12981da177e4SLinus Torvalds 	while (cn) {
12991da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
13001da177e4SLinus Torvalds 			if (debug) {
1301a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2201",
1302bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1303bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1304bd4c625cSLinus Torvalds 						 cn->state);
13051da177e4SLinus Torvalds 			}
13061da177e4SLinus Torvalds 			cn->state = 0;
1307a9dd3643SJeff Mahoney 			remove_journal_hash(sb, journal->j_list_hash_table,
1308bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
13091da177e4SLinus Torvalds 		}
13101da177e4SLinus Torvalds 		last = cn;
13111da177e4SLinus Torvalds 		cn = cn->next;
1312a9dd3643SJeff Mahoney 		free_cnode(sb, last);
13131da177e4SLinus Torvalds 	}
13141da177e4SLinus Torvalds 	jl->j_realblock = NULL;
13151da177e4SLinus Torvalds }
13161da177e4SLinus Torvalds 
13171da177e4SLinus Torvalds /*
13181da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
13191da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
13201da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
13211da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
13221da177e4SLinus Torvalds **
13231da177e4SLinus Torvalds */
1324a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb,
1325bd4c625cSLinus Torvalds 					unsigned long offset,
1326600ed416SJeff Mahoney 					unsigned int trans_id)
1327bd4c625cSLinus Torvalds {
13281da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
1329a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
13301da177e4SLinus Torvalds 
13311da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
13321da177e4SLinus Torvalds 		return -EIO;
13331da177e4SLinus Torvalds 
13341da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
13351da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
13368ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13371da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
13388ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
13391da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
13401da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1341a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "journal-699",
134245b03d5eSJeff Mahoney 						 "buffer write failed");
13431da177e4SLinus Torvalds #endif
13441da177e4SLinus Torvalds 				return -EIO;
13451da177e4SLinus Torvalds 			}
13461da177e4SLinus Torvalds 		}
13471da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
13481da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1349bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1350bd4c625cSLinus Torvalds 							b_data);
13511da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
13521da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
13531da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
13541da177e4SLinus Torvalds 
1355a9dd3643SJeff Mahoney 		if (reiserfs_barrier_flush(sb)) {
13561da177e4SLinus Torvalds 			int ret;
13571da177e4SLinus Torvalds 			lock_buffer(journal->j_header_bh);
13581da177e4SLinus Torvalds 			ret = submit_barrier_buffer(journal->j_header_bh);
13591da177e4SLinus Torvalds 			if (ret == -EOPNOTSUPP) {
13601da177e4SLinus Torvalds 				set_buffer_uptodate(journal->j_header_bh);
1361a9dd3643SJeff Mahoney 				disable_barrier(sb);
13621da177e4SLinus Torvalds 				goto sync;
13631da177e4SLinus Torvalds 			}
13648ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13651da177e4SLinus Torvalds 			wait_on_buffer(journal->j_header_bh);
13668ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
1367a9dd3643SJeff Mahoney 			check_barrier_completion(sb, journal->j_header_bh);
13681da177e4SLinus Torvalds 		} else {
13691da177e4SLinus Torvalds 		      sync:
13701da177e4SLinus Torvalds 			set_buffer_dirty(journal->j_header_bh);
13718ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13721da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
13738ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
13741da177e4SLinus Torvalds 		}
13751da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1376a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-837",
137745b03d5eSJeff Mahoney 					 "IO error during journal replay");
13781da177e4SLinus Torvalds 			return -EIO;
13791da177e4SLinus Torvalds 		}
13801da177e4SLinus Torvalds 	}
13811da177e4SLinus Torvalds 	return 0;
13821da177e4SLinus Torvalds }
13831da177e4SLinus Torvalds 
1384a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb,
13851da177e4SLinus Torvalds 				       unsigned long offset,
1386600ed416SJeff Mahoney 				       unsigned int trans_id)
1387bd4c625cSLinus Torvalds {
1388a9dd3643SJeff Mahoney 	return _update_journal_header_block(sb, offset, trans_id);
13891da177e4SLinus Torvalds }
1390bd4c625cSLinus Torvalds 
13911da177e4SLinus Torvalds /*
13921da177e4SLinus Torvalds ** flush any and all journal lists older than you are
13931da177e4SLinus Torvalds ** can only be called from flush_journal_list
13941da177e4SLinus Torvalds */
1395a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb,
13961da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
13971da177e4SLinus Torvalds {
13981da177e4SLinus Torvalds 	struct list_head *entry;
13991da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
1400a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1401600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
14021da177e4SLinus Torvalds 
14031da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
14041da177e4SLinus Torvalds 	 * protection is required.
14051da177e4SLinus Torvalds 	 */
14061da177e4SLinus Torvalds       restart:
14071da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
14081da177e4SLinus Torvalds 	/* Did we wrap? */
14091da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
14101da177e4SLinus Torvalds 		return 0;
14111da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
14121da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
14131da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
14141da177e4SLinus Torvalds 		/* do not flush all */
1415a9dd3643SJeff Mahoney 		flush_journal_list(sb, other_jl, 0);
14161da177e4SLinus Torvalds 
14171da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
14181da177e4SLinus Torvalds 		goto restart;
14191da177e4SLinus Torvalds 	}
14201da177e4SLinus Torvalds 	return 0;
14211da177e4SLinus Torvalds }
14221da177e4SLinus Torvalds 
14231da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1424bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1425bd4c625cSLinus Torvalds {
14261da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
14271da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
14281da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
14291da177e4SLinus Torvalds 		journal->j_num_work_lists--;
14301da177e4SLinus Torvalds 	}
14311da177e4SLinus Torvalds }
14321da177e4SLinus Torvalds 
14331da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
14341da177e4SLinus Torvalds **
14351da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
14361da177e4SLinus Torvalds ** flush_journal_list
14371da177e4SLinus Torvalds **
14381da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
14391da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
14401da177e4SLinus Torvalds ** do_journal_end, or by journal_release
14411da177e4SLinus Torvalds */
14421da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1443bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1444bd4c625cSLinus Torvalds {
14451da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
14461da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
14471da177e4SLinus Torvalds 	int count;
14481da177e4SLinus Torvalds 	int was_jwait = 0;
14491da177e4SLinus Torvalds 	int was_dirty = 0;
14501da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
14511da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
14521da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
14531da177e4SLinus Torvalds 	int err = 0;
14541da177e4SLinus Torvalds 
14551da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
14561da177e4SLinus Torvalds 
14571da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
145845b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2048", "called with wcount %d",
14591da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
14601da177e4SLinus Torvalds 	}
14611da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
14621da177e4SLinus Torvalds 
14631da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
14641da177e4SLinus Torvalds 	if (flushall) {
14658ebc4232SFrederic Weisbecker 		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1466afe70259SJeff Mahoney 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
14671da177e4SLinus Torvalds 		BUG();
14681da177e4SLinus Torvalds 	}
14691da177e4SLinus Torvalds 
14701da177e4SLinus Torvalds 	count = 0;
14711da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1472c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1473bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
14741da177e4SLinus Torvalds 		return 0;
14751da177e4SLinus Torvalds 	}
14761da177e4SLinus Torvalds 
147722e2c507SJens Axboe 	get_fs_excl();
147822e2c507SJens Axboe 
14791da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
14801da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14811da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14821da177e4SLinus Torvalds 		goto flush_older_and_return;
14831da177e4SLinus Torvalds 	}
14841da177e4SLinus Torvalds 
14851da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
14861da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
14871da177e4SLinus Torvalds 	 */
14881da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
14891da177e4SLinus Torvalds 
1490bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1491bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
14921da177e4SLinus Torvalds 		BUG();
14931da177e4SLinus Torvalds 
14941da177e4SLinus Torvalds 	/* are we done now? */
14951da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14961da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14971da177e4SLinus Torvalds 		goto flush_older_and_return;
14981da177e4SLinus Torvalds 	}
14991da177e4SLinus Torvalds 
15001da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
15011da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
15021da177e4SLinus Torvalds 	 */
15031da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1504c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
1505c3a9c210SJeff Mahoney 			       "wcount is not 0");
15061da177e4SLinus Torvalds 	}
15071da177e4SLinus Torvalds 	cn = jl->j_realblock;
15081da177e4SLinus Torvalds 	while (cn) {
15091da177e4SLinus Torvalds 		was_jwait = 0;
15101da177e4SLinus Torvalds 		was_dirty = 0;
15111da177e4SLinus Torvalds 		saved_bh = NULL;
15121da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
15131da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
15141da177e4SLinus Torvalds 			goto free_cnode;
15151da177e4SLinus Torvalds 		}
15161da177e4SLinus Torvalds 
15171da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
15181da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
15191da177e4SLinus Torvalds 			goto free_cnode;
15201da177e4SLinus Torvalds 
15211da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
15221da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
15231da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
15241da177e4SLinus Torvalds 		 ** block to disk
15251da177e4SLinus Torvalds 		 */
15261da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
15271da177e4SLinus Torvalds 			saved_bh = cn->bh;
15281da177e4SLinus Torvalds 
15291da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
15301da177e4SLinus Torvalds 			 ** we are working with it
15311da177e4SLinus Torvalds 			 */
15321da177e4SLinus Torvalds 			get_bh(saved_bh);
15331da177e4SLinus Torvalds 
15341da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
15351da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
15361da177e4SLinus Torvalds 				was_jwait = 1;
15371da177e4SLinus Torvalds 				was_dirty = 1;
15381da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
15391da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
15401da177e4SLinus Torvalds 				BUG();
15411da177e4SLinus Torvalds 			}
15421da177e4SLinus Torvalds 		}
15431da177e4SLinus Torvalds 
15441da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
15450779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
15461da177e4SLinus Torvalds 		 */
15471da177e4SLinus Torvalds 		if (pjl) {
15481da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
15491da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
15501da177e4SLinus Torvalds 			goto free_cnode;
15511da177e4SLinus Torvalds 		}
15521da177e4SLinus Torvalds 
15531da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
15541da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
15551da177e4SLinus Torvalds 		 */
15561da177e4SLinus Torvalds 		if (saved_bh == NULL) {
15571da177e4SLinus Torvalds 			goto free_cnode;
15581da177e4SLinus Torvalds 		}
15591da177e4SLinus Torvalds 
15601da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
15611da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
15621da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
15631da177e4SLinus Torvalds 		 */
15641da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
156545b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-813",
156645b03d5eSJeff Mahoney 					 "BAD! buffer %llu %cdirty %cjwait, "
15671da177e4SLinus Torvalds 					 "not in a newer tranasction",
1568bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1569bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1570bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
15711da177e4SLinus Torvalds 		}
15721da177e4SLinus Torvalds 		if (was_dirty) {
15731da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
15741da177e4SLinus Torvalds 			get_bh(saved_bh);
15751da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
15761da177e4SLinus Torvalds 			lock_buffer(saved_bh);
15771da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
15781da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
15791da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
15801da177e4SLinus Torvalds 			else
15811da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
15821da177e4SLinus Torvalds 			count++;
15831da177e4SLinus Torvalds 		} else {
158445b03d5eSJeff Mahoney 			reiserfs_warning(s, "clm-2082",
158545b03d5eSJeff Mahoney 					 "Unable to flush buffer %llu in %s",
1586bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1587fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
15881da177e4SLinus Torvalds 		}
15891da177e4SLinus Torvalds 	      free_cnode:
15901da177e4SLinus Torvalds 		last = cn;
15911da177e4SLinus Torvalds 		cn = cn->next;
15921da177e4SLinus Torvalds 		if (saved_bh) {
15931da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
15941da177e4SLinus Torvalds 			put_bh(saved_bh);
15951da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
159645b03d5eSJeff Mahoney 				reiserfs_warning(s, "journal-945",
159745b03d5eSJeff Mahoney 						 "saved_bh->b_count < 0");
15981da177e4SLinus Torvalds 			}
15991da177e4SLinus Torvalds 		}
16001da177e4SLinus Torvalds 	}
16011da177e4SLinus Torvalds 	if (count > 0) {
16021da177e4SLinus Torvalds 		cn = jl->j_realblock;
16031da177e4SLinus Torvalds 		while (cn) {
16041da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
16051da177e4SLinus Torvalds 				if (!cn->bh) {
1606c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1011",
1607c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
16081da177e4SLinus Torvalds 				}
16098ebc4232SFrederic Weisbecker 
16108ebc4232SFrederic Weisbecker 				reiserfs_write_unlock(s);
16111da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
16128ebc4232SFrederic Weisbecker 				reiserfs_write_lock(s);
16138ebc4232SFrederic Weisbecker 
16141da177e4SLinus Torvalds 				if (!cn->bh) {
1615c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1012",
1616c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
16171da177e4SLinus Torvalds 				}
16181da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
16191da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
162045b03d5eSJeff Mahoney 					reiserfs_warning(s, "journal-949",
162145b03d5eSJeff Mahoney 							 "buffer write failed");
16221da177e4SLinus Torvalds #endif
16231da177e4SLinus Torvalds 					err = -EIO;
16241da177e4SLinus Torvalds 				}
16251da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
16261da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
16271da177e4SLinus Torvalds 				 */
1628bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1629bd4c625cSLinus Torvalds 				       (cn->bh));
16301da177e4SLinus Torvalds 
1631398c95bdSChris Mason 				/* drop one ref for us */
16321da177e4SLinus Torvalds 				put_bh(cn->bh);
1633398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1634398c95bdSChris Mason 				release_buffer_page(cn->bh);
16351da177e4SLinus Torvalds 			}
16361da177e4SLinus Torvalds 			cn = cn->next;
16371da177e4SLinus Torvalds 		}
16381da177e4SLinus Torvalds 	}
16391da177e4SLinus Torvalds 
16401da177e4SLinus Torvalds 	if (err)
1641bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1642bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1643fbe5498bSHarvey Harrison 			       __func__);
16441da177e4SLinus Torvalds       flush_older_and_return:
16451da177e4SLinus Torvalds 
16461da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
16471da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
16481da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
16491da177e4SLinus Torvalds 	 ** replayed after a crash
16501da177e4SLinus Torvalds 	 */
16511da177e4SLinus Torvalds 	if (flushall) {
16521da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
16531da177e4SLinus Torvalds 	}
16541da177e4SLinus Torvalds 
16551da177e4SLinus Torvalds 	err = journal->j_errno;
16561da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
16571da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
16581da177e4SLinus Torvalds 	 **
16591da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
16601da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
16611da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
16621da177e4SLinus Torvalds 	 ** being flushed
16631da177e4SLinus Torvalds 	 */
16641da177e4SLinus Torvalds 	if (!err && flushall) {
1665bd4c625cSLinus Torvalds 		err =
1666bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1667bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1668bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1669bd4c625cSLinus Torvalds 						jl->j_trans_id);
16701da177e4SLinus Torvalds 		if (err)
1671bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1672bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1673fbe5498bSHarvey Harrison 				       __func__);
16741da177e4SLinus Torvalds 	}
16751da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
16761da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
16771da177e4SLinus Torvalds 	journal->j_num_lists--;
16781da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16791da177e4SLinus Torvalds 
16801da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
16811da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
168245b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1683bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
16841da177e4SLinus Torvalds 	}
16851da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
16861da177e4SLinus Torvalds 
16871da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
16881da177e4SLinus Torvalds 	 * help find code using dead lists later on
16891da177e4SLinus Torvalds 	 */
16901da177e4SLinus Torvalds 	jl->j_len = 0;
16911da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
16921da177e4SLinus Torvalds 	jl->j_start = 0;
16931da177e4SLinus Torvalds 	jl->j_realblock = NULL;
16941da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
16951da177e4SLinus Torvalds 	jl->j_trans_id = 0;
16961da177e4SLinus Torvalds 	jl->j_state = 0;
16971da177e4SLinus Torvalds 	put_journal_list(s, jl);
16981da177e4SLinus Torvalds 	if (flushall)
1699afe70259SJeff Mahoney 		mutex_unlock(&journal->j_flush_mutex);
170022e2c507SJens Axboe 	put_fs_excl();
17011da177e4SLinus Torvalds 	return err;
17021da177e4SLinus Torvalds }
17031da177e4SLinus Torvalds 
1704a3172027SChris Mason static int test_transaction(struct super_block *s,
1705a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1706a3172027SChris Mason {
1707a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1708a3172027SChris Mason 
1709a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1710a3172027SChris Mason 		return 1;
1711a3172027SChris Mason 
1712a3172027SChris Mason 	cn = jl->j_realblock;
1713a3172027SChris Mason 	while (cn) {
1714a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1715a3172027SChris Mason 		 ** skip it
1716a3172027SChris Mason 		 */
1717a3172027SChris Mason 		if (cn->blocknr == 0) {
1718a3172027SChris Mason 			goto next;
1719a3172027SChris Mason 		}
1720a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1721a3172027SChris Mason 			return 0;
1722a3172027SChris Mason 	      next:
1723a3172027SChris Mason 		cn = cn->next;
1724a3172027SChris Mason 		cond_resched();
1725a3172027SChris Mason 	}
1726a3172027SChris Mason 	return 0;
1727a3172027SChris Mason }
1728a3172027SChris Mason 
17291da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
17301da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
17311da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
17321da177e4SLinus Torvalds {
17331da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
17341da177e4SLinus Torvalds 	int ret = 0;
17351da177e4SLinus Torvalds 
17361da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
17371da177e4SLinus Torvalds 	del_from_work_list(s, jl);
17381da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
17391da177e4SLinus Torvalds 		return 0;
17401da177e4SLinus Torvalds 	}
17411da177e4SLinus Torvalds 
17421da177e4SLinus Torvalds 	cn = jl->j_realblock;
17431da177e4SLinus Torvalds 	while (cn) {
17441da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
17451da177e4SLinus Torvalds 		 ** skip it
17461da177e4SLinus Torvalds 		 */
17471da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
17481da177e4SLinus Torvalds 			goto next;
17491da177e4SLinus Torvalds 		}
17501da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
17511da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
17521da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
17531da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
17541da177e4SLinus Torvalds 			 * count, and recheck things after locking
17551da177e4SLinus Torvalds 			 */
17561da177e4SLinus Torvalds 			tmp_bh = cn->bh;
17571da177e4SLinus Torvalds 			get_bh(tmp_bh);
17581da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
17591da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
17601da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
17611da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
17621da177e4SLinus Torvalds 					BUG();
17631da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
17641da177e4SLinus Torvalds 				ret++;
17651da177e4SLinus Torvalds 			} else {
17661da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
17671da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
17681da177e4SLinus Torvalds 			}
17691da177e4SLinus Torvalds 			put_bh(tmp_bh);
17701da177e4SLinus Torvalds 		}
17711da177e4SLinus Torvalds 	      next:
17721da177e4SLinus Torvalds 		cn = cn->next;
17731da177e4SLinus Torvalds 		cond_resched();
17741da177e4SLinus Torvalds 	}
17751da177e4SLinus Torvalds 	return ret;
17761da177e4SLinus Torvalds }
17771da177e4SLinus Torvalds 
17781da177e4SLinus Torvalds /* used by flush_commit_list */
17791da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
17801da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
17811da177e4SLinus Torvalds {
17821da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
17831da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
17841da177e4SLinus Torvalds 	int ret = 0;
17851da177e4SLinus Torvalds 
17861da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
17871da177e4SLinus Torvalds 	cn = jl->j_realblock;
17881da177e4SLinus Torvalds 	while (cn) {
17891da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
17901da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
17911da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
17921da177e4SLinus Torvalds 		 */
17931da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1794bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1795bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
17961da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
17971da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
17981da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
17991da177e4SLinus Torvalds 			 * it actually gets marked dirty
18001da177e4SLinus Torvalds 			 */
18011da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
18021da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
18031da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
18041da177e4SLinus Torvalds 			} else {
18051da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
18061da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
18071da177e4SLinus Torvalds 			}
18081da177e4SLinus Torvalds 		}
18091da177e4SLinus Torvalds 		cn = cn->next;
18101da177e4SLinus Torvalds 	}
18111da177e4SLinus Torvalds 	return ret;
18121da177e4SLinus Torvalds }
18131da177e4SLinus Torvalds 
18141da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
18151da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
18161da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
1817600ed416SJeff Mahoney 				unsigned int *next_trans_id,
1818bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1819bd4c625cSLinus Torvalds {
18201da177e4SLinus Torvalds 	int ret = 0;
18211da177e4SLinus Torvalds 	int written = 0;
18221da177e4SLinus Torvalds 	int transactions_flushed = 0;
1823600ed416SJeff Mahoney 	unsigned int orig_trans_id = jl->j_trans_id;
18241da177e4SLinus Torvalds 	struct buffer_chunk chunk;
18251da177e4SLinus Torvalds 	struct list_head *entry;
18261da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
18271da177e4SLinus Torvalds 	chunk.nr = 0;
18281da177e4SLinus Torvalds 
1829a412f9efSFrederic Weisbecker 	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
18301da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
18311da177e4SLinus Torvalds 		goto done;
18321da177e4SLinus Torvalds 	}
18331da177e4SLinus Torvalds 
1834afe70259SJeff Mahoney 	/* we've got j_flush_mutex held, nobody is going to delete any
18351da177e4SLinus Torvalds 	 * of these lists out from underneath us
18361da177e4SLinus Torvalds 	 */
18371da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
18381da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
18391da177e4SLinus Torvalds 
18401da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1841bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1842bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
18431da177e4SLinus Torvalds 			del_from_work_list(s, jl);
18441da177e4SLinus Torvalds 			break;
18451da177e4SLinus Torvalds 		}
18461da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
18471da177e4SLinus Torvalds 
18481da177e4SLinus Torvalds 		if (ret < 0)
18491da177e4SLinus Torvalds 			goto done;
18501da177e4SLinus Torvalds 		transactions_flushed++;
18511da177e4SLinus Torvalds 		written += ret;
18521da177e4SLinus Torvalds 		entry = jl->j_list.next;
18531da177e4SLinus Torvalds 
18541da177e4SLinus Torvalds 		/* did we wrap? */
18551da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
18561da177e4SLinus Torvalds 			break;
18571da177e4SLinus Torvalds 		}
18581da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
18591da177e4SLinus Torvalds 
18601da177e4SLinus Torvalds 		/* don't bother with older transactions */
18611da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
18621da177e4SLinus Torvalds 			break;
18631da177e4SLinus Torvalds 	}
18641da177e4SLinus Torvalds 	if (chunk.nr) {
18651da177e4SLinus Torvalds 		write_chunk(&chunk);
18661da177e4SLinus Torvalds 	}
18671da177e4SLinus Torvalds 
18681da177e4SLinus Torvalds       done:
1869afe70259SJeff Mahoney 	mutex_unlock(&journal->j_flush_mutex);
18701da177e4SLinus Torvalds 	return ret;
18711da177e4SLinus Torvalds }
18721da177e4SLinus Torvalds 
18731da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
18741da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
18751da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
18761da177e4SLinus Torvalds ** adds seeks and slows things down.
18771da177e4SLinus Torvalds **
18781da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
18791da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
18801da177e4SLinus Torvalds ** list updates the header block
18811da177e4SLinus Torvalds */
18821da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1883bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1884bd4c625cSLinus Torvalds {
18851da177e4SLinus Torvalds 	unsigned long len = 0;
18861da177e4SLinus Torvalds 	unsigned long cur_len;
18871da177e4SLinus Torvalds 	int ret;
18881da177e4SLinus Torvalds 	int i;
18891da177e4SLinus Torvalds 	int limit = 256;
18901da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
18911da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
1892600ed416SJeff Mahoney 	unsigned int trans_id;
18931da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
18941da177e4SLinus Torvalds 
18951da177e4SLinus Torvalds 	flush_jl = tjl = jl;
18961da177e4SLinus Torvalds 
18971da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
18981da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
18991da177e4SLinus Torvalds 		limit = 1024;
19001da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
19011da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
19021da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
19031da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
19041da177e4SLinus Torvalds 			break;
19051da177e4SLinus Torvalds 		}
19061da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
19071da177e4SLinus Torvalds 		if (cur_len > 0) {
19081da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
19091da177e4SLinus Torvalds 		}
19101da177e4SLinus Torvalds 		len += cur_len;
19111da177e4SLinus Torvalds 		flush_jl = tjl;
19121da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
19131da177e4SLinus Torvalds 			break;
19141da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
19151da177e4SLinus Torvalds 	}
19161da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
19171da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
19181da177e4SLinus Torvalds 	 ** across multiple lists
19191da177e4SLinus Torvalds 	 */
19201da177e4SLinus Torvalds 	if (flush_jl != jl) {
19211da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
19221da177e4SLinus Torvalds 	}
19231da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
19241da177e4SLinus Torvalds 	return 0;
19251da177e4SLinus Torvalds }
19261da177e4SLinus Torvalds 
19271da177e4SLinus Torvalds /*
19281da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
19291da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
19301da177e4SLinus Torvalds */
19311da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
19321da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
19331da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
19341da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
19351da177e4SLinus Torvalds {
19361da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
19371da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
19381da177e4SLinus Torvalds 
19391da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
19401da177e4SLinus Torvalds 	if (!head) {
19411da177e4SLinus Torvalds 		return;
19421da177e4SLinus Torvalds 	}
19431da177e4SLinus Torvalds 	cur = *head;
19441da177e4SLinus Torvalds 	while (cur) {
1945bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1946bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1947bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
19481da177e4SLinus Torvalds 			if (cur->hnext) {
19491da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
19501da177e4SLinus Torvalds 			}
19511da177e4SLinus Torvalds 			if (cur->hprev) {
19521da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
19531da177e4SLinus Torvalds 			} else {
19541da177e4SLinus Torvalds 				*head = cur->hnext;
19551da177e4SLinus Torvalds 			}
19561da177e4SLinus Torvalds 			cur->blocknr = 0;
19571da177e4SLinus Torvalds 			cur->sb = NULL;
19581da177e4SLinus Torvalds 			cur->state = 0;
19591da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
19601da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
19611da177e4SLinus Torvalds 			cur->bh = NULL;
19621da177e4SLinus Torvalds 			cur->jlist = NULL;
19631da177e4SLinus Torvalds 		}
19641da177e4SLinus Torvalds 		cur = cur->hnext;
19651da177e4SLinus Torvalds 	}
19661da177e4SLinus Torvalds }
19671da177e4SLinus Torvalds 
1968a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb)
1969bd4c625cSLinus Torvalds {
1970a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1971d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
19721da177e4SLinus Torvalds 	journal->j_num_lists--;
19731da177e4SLinus Torvalds 
19741da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
1975a9dd3643SJeff Mahoney 	free_list_bitmaps(sb, journal->j_list_bitmap);
1976a9dd3643SJeff Mahoney 	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
19771da177e4SLinus Torvalds 	if (journal->j_header_bh) {
19781da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
19791da177e4SLinus Torvalds 	}
19801da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
19811da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
19821da177e4SLinus Torvalds 	 */
1983a9dd3643SJeff Mahoney 	release_journal_dev(sb, journal);
19841da177e4SLinus Torvalds 	vfree(journal);
19851da177e4SLinus Torvalds }
19861da177e4SLinus Torvalds 
19871da177e4SLinus Torvalds /*
19881da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
19891da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
19901da177e4SLinus Torvalds */
1991bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1992a9dd3643SJeff Mahoney 			      struct super_block *sb, int error)
1993bd4c625cSLinus Torvalds {
19941da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
19951da177e4SLinus Torvalds 	int flushed = 0;
1996a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
19971da177e4SLinus Torvalds 
19981da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
19991da177e4SLinus Torvalds 	 */
2000a9dd3643SJeff Mahoney 	if (!error && !(sb->s_flags & MS_RDONLY)) {
20011da177e4SLinus Torvalds 		/* end the current trans */
20021da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
2003a9dd3643SJeff Mahoney 		do_journal_end(th, sb, 10, FLUSH_ALL);
20041da177e4SLinus Torvalds 
20051da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
2006a9dd3643SJeff Mahoney 		if (!journal_join(&myth, sb, 1)) {
2007a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
2008a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
2009bd4c625cSLinus Torvalds 						     1);
2010a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
2011a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
2012a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
20131da177e4SLinus Torvalds 			flushed = 1;
20141da177e4SLinus Torvalds 		}
20151da177e4SLinus Torvalds 	}
20161da177e4SLinus Torvalds 
20171da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
20181da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
20191da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
2020a9dd3643SJeff Mahoney 		if (!journal_join_abort(&myth, sb, 1)) {
2021a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
2022a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
2023bd4c625cSLinus Torvalds 						     1);
2024a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
2025a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
2026a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
20271da177e4SLinus Torvalds 		}
20281da177e4SLinus Torvalds 	}
20291da177e4SLinus Torvalds 
20301da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
20311da177e4SLinus Torvalds 	/* wait for all commits to finish */
2032a9dd3643SJeff Mahoney 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
20338ebc4232SFrederic Weisbecker 
20348ebc4232SFrederic Weisbecker 	/*
20358ebc4232SFrederic Weisbecker 	 * We must release the write lock here because
20368ebc4232SFrederic Weisbecker 	 * the workqueue job (flush_async_commit) needs this lock
20378ebc4232SFrederic Weisbecker 	 */
20388ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
20391da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
20408ebc4232SFrederic Weisbecker 
20411da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
20421da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
20431da177e4SLinus Torvalds 		commit_wq = NULL;
20441da177e4SLinus Torvalds 	}
20458ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
20461da177e4SLinus Torvalds 
2047a9dd3643SJeff Mahoney 	free_journal_ram(sb);
20481da177e4SLinus Torvalds 
20491da177e4SLinus Torvalds 	return 0;
20501da177e4SLinus Torvalds }
20511da177e4SLinus Torvalds 
20521da177e4SLinus Torvalds /*
20531da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
20541da177e4SLinus Torvalds */
2055bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
2056a9dd3643SJeff Mahoney 		    struct super_block *sb)
2057bd4c625cSLinus Torvalds {
2058a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 0);
20591da177e4SLinus Torvalds }
2060bd4c625cSLinus Torvalds 
20611da177e4SLinus Torvalds /*
20621da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
20631da177e4SLinus Torvalds */
2064bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
2065a9dd3643SJeff Mahoney 			  struct super_block *sb)
2066bd4c625cSLinus Torvalds {
2067a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 1);
20681da177e4SLinus Torvalds }
20691da177e4SLinus Torvalds 
20701da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
2071a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb,
2072bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
2073bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
2074bd4c625cSLinus Torvalds {
20751da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
20761da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
2077a9dd3643SJeff Mahoney 	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
2078bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
20791da177e4SLinus Torvalds 		return 1;
20801da177e4SLinus Torvalds 	}
20811da177e4SLinus Torvalds 	return 0;
20821da177e4SLinus Torvalds }
2083bd4c625cSLinus Torvalds 
20841da177e4SLinus Torvalds /* returns 0 if it did not find a description block
20851da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
20861da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
20871da177e4SLinus Torvalds */
2088a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb,
2089bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
2090600ed416SJeff Mahoney 					unsigned int *oldest_invalid_trans_id,
2091bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
2092bd4c625cSLinus Torvalds {
20931da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20941da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
20951da177e4SLinus Torvalds 	struct buffer_head *c_bh;
20961da177e4SLinus Torvalds 	unsigned long offset;
20971da177e4SLinus Torvalds 
20981da177e4SLinus Torvalds 	if (!d_bh)
20991da177e4SLinus Torvalds 		return 0;
21001da177e4SLinus Torvalds 
21011da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2102bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
2103bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
2104bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
2105bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
2106a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2107bd4c625cSLinus Torvalds 				       "journal-986: transaction "
21081da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
2109bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
2110bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
21111da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
21121da177e4SLinus Torvalds 			return 0;
21131da177e4SLinus Torvalds 		}
2114bd4c625cSLinus Torvalds 		if (newest_mount_id
2115bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2116a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2117bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
21181da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2119bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2120bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
21211da177e4SLinus Torvalds 				       *newest_mount_id);
21221da177e4SLinus Torvalds 			return -1;
21231da177e4SLinus Torvalds 		}
2124a9dd3643SJeff Mahoney 		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2125a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2018",
212645b03d5eSJeff Mahoney 					 "Bad transaction length %d "
212745b03d5eSJeff Mahoney 					 "encountered, ignoring transaction",
2128bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
21291da177e4SLinus Torvalds 			return -1;
21301da177e4SLinus Torvalds 		}
2131a9dd3643SJeff Mahoney 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
21321da177e4SLinus Torvalds 
21331da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2134bd4c625cSLinus Torvalds 		c_bh =
2135a9dd3643SJeff Mahoney 		    journal_bread(sb,
2136a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2137bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2138a9dd3643SJeff Mahoney 				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
21391da177e4SLinus Torvalds 		if (!c_bh)
21401da177e4SLinus Torvalds 			return 0;
21411da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2142a9dd3643SJeff Mahoney 		if (journal_compare_desc_commit(sb, desc, commit)) {
2143a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
21441da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
21451da177e4SLinus Torvalds 				       "time %d or length %d",
2146bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2147a9dd3643SJeff Mahoney 				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21481da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
21491da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
21501da177e4SLinus Torvalds 			brelse(c_bh);
21511da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2152bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2153bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2154a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2155bd4c625cSLinus Torvalds 					       "journal-1004: "
21561da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2157bd4c625cSLinus Torvalds 					       "to %d",
2158bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
21591da177e4SLinus Torvalds 			}
21601da177e4SLinus Torvalds 			return -1;
21611da177e4SLinus Torvalds 		}
21621da177e4SLinus Torvalds 		brelse(c_bh);
2163a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2164bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
21651da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2166bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2167a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2168bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2169bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
21701da177e4SLinus Torvalds 		return 1;
21711da177e4SLinus Torvalds 	} else {
21721da177e4SLinus Torvalds 		return 0;
21731da177e4SLinus Torvalds 	}
21741da177e4SLinus Torvalds }
21751da177e4SLinus Torvalds 
2176bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2177bd4c625cSLinus Torvalds {
21781da177e4SLinus Torvalds 	int i;
21791da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
21801da177e4SLinus Torvalds 		brelse(heads[i]);
21811da177e4SLinus Torvalds 	}
21821da177e4SLinus Torvalds }
21831da177e4SLinus Torvalds 
21841da177e4SLinus Torvalds /*
21851da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
21861da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
21871da177e4SLinus Torvalds ** is invalid, or too old.
21881da177e4SLinus Torvalds */
2189a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb,
2190bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2191bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2192600ed416SJeff Mahoney 				    unsigned int oldest_trans_id,
2193bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2194bd4c625cSLinus Torvalds {
2195a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
21961da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
21971da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
2198600ed416SJeff Mahoney 	unsigned int trans_id = 0;
21991da177e4SLinus Torvalds 	struct buffer_head *c_bh;
22001da177e4SLinus Torvalds 	struct buffer_head *d_bh;
22011da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
22021da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
2203600ed416SJeff Mahoney 	unsigned int trans_offset;
22041da177e4SLinus Torvalds 	int i;
22051da177e4SLinus Torvalds 	int trans_half;
22061da177e4SLinus Torvalds 
2207a9dd3643SJeff Mahoney 	d_bh = journal_bread(sb, cur_dblock);
22081da177e4SLinus Torvalds 	if (!d_bh)
22091da177e4SLinus Torvalds 		return 1;
22101da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2211a9dd3643SJeff Mahoney 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2212a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
22131da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
2214a9dd3643SJeff Mahoney 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
22151da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
22161da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
2217a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
22181da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2219bd4c625cSLinus Torvalds 			       cur_dblock -
2220a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
22211da177e4SLinus Torvalds 		brelse(d_bh);
22221da177e4SLinus Torvalds 		return 1;
22231da177e4SLinus Torvalds 	}
22241da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
2225a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
22261da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
22271da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
22281da177e4SLinus Torvalds 			       newest_mount_id);
22291da177e4SLinus Torvalds 		brelse(d_bh);
22301da177e4SLinus Torvalds 		return 1;
22311da177e4SLinus Torvalds 	}
2232a9dd3643SJeff Mahoney 	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
22331da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
2234a9dd3643SJeff Mahoney 			      SB_ONDISK_JOURNAL_SIZE(sb)));
22351da177e4SLinus Torvalds 	if (!c_bh) {
22361da177e4SLinus Torvalds 		brelse(d_bh);
22371da177e4SLinus Torvalds 		return 1;
22381da177e4SLinus Torvalds 	}
22391da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2240a9dd3643SJeff Mahoney 	if (journal_compare_desc_commit(sb, desc, commit)) {
2241a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2242bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
22431da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2244bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2245a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2246bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2247bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
22481da177e4SLinus Torvalds 		brelse(c_bh);
22491da177e4SLinus Torvalds 		brelse(d_bh);
22501da177e4SLinus Torvalds 		return 1;
22511da177e4SLinus Torvalds 	}
22521da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
22531da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2254d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2255d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2256d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2257d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
22581da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
22591da177e4SLinus Torvalds 		brelse(c_bh);
22601da177e4SLinus Torvalds 		brelse(d_bh);
2261d739b42bSPekka Enberg 		kfree(log_blocks);
2262d739b42bSPekka Enberg 		kfree(real_blocks);
2263a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1169",
226445b03d5eSJeff Mahoney 				 "kmalloc failed, unable to mount FS");
22651da177e4SLinus Torvalds 		return -1;
22661da177e4SLinus Torvalds 	}
22671da177e4SLinus Torvalds 	/* get all the buffer heads */
2268a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
22691da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2270bd4c625cSLinus Torvalds 		log_blocks[i] =
2271a9dd3643SJeff Mahoney 		    journal_getblk(sb,
2272a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2273bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2274a9dd3643SJeff Mahoney 				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
22751da177e4SLinus Torvalds 		if (i < trans_half) {
2276bd4c625cSLinus Torvalds 			real_blocks[i] =
2277a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2278bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
22791da177e4SLinus Torvalds 		} else {
2280bd4c625cSLinus Torvalds 			real_blocks[i] =
2281a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2282bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2283bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
22841da177e4SLinus Torvalds 		}
2285a9dd3643SJeff Mahoney 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2286a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1207",
228745b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
228845b03d5eSJeff Mahoney 					 "Block to replay is outside of "
228945b03d5eSJeff Mahoney 					 "filesystem");
22901da177e4SLinus Torvalds 			goto abort_replay;
22911da177e4SLinus Torvalds 		}
22921da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2293bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2294a9dd3643SJeff Mahoney 		    (sb, real_blocks[i]->b_blocknr)) {
2295a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1204",
229645b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
229745b03d5eSJeff Mahoney 					 "Trying to replay onto a log block");
22981da177e4SLinus Torvalds 		      abort_replay:
22991da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
23001da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
23011da177e4SLinus Torvalds 			brelse(c_bh);
23021da177e4SLinus Torvalds 			brelse(d_bh);
2303d739b42bSPekka Enberg 			kfree(log_blocks);
2304d739b42bSPekka Enberg 			kfree(real_blocks);
23051da177e4SLinus Torvalds 			return -1;
23061da177e4SLinus Torvalds 		}
23071da177e4SLinus Torvalds 	}
23081da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
23091da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
23101da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
23118ebc4232SFrederic Weisbecker 
23128ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
23131da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
23148ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
23158ebc4232SFrederic Weisbecker 
23161da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2317a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1212",
231845b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
231945b03d5eSJeff Mahoney 					 "buffer write failed");
2320bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2321bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
23221da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
23231da177e4SLinus Torvalds 			brelse(c_bh);
23241da177e4SLinus Torvalds 			brelse(d_bh);
2325d739b42bSPekka Enberg 			kfree(log_blocks);
2326d739b42bSPekka Enberg 			kfree(real_blocks);
23271da177e4SLinus Torvalds 			return -1;
23281da177e4SLinus Torvalds 		}
2329bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2330bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
23311da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
23321da177e4SLinus Torvalds 		brelse(log_blocks[i]);
23331da177e4SLinus Torvalds 	}
23341da177e4SLinus Torvalds 	/* flush out the real blocks */
23351da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
23361da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
233753778ffdSJan Kara 		ll_rw_block(SWRITE, 1, real_blocks + i);
23381da177e4SLinus Torvalds 	}
23391da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
23401da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
23411da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2342a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1226",
234345b03d5eSJeff Mahoney 					 "REPLAY FAILURE, fsck required! "
234445b03d5eSJeff Mahoney 					 "buffer write failed");
2345bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2346bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
23471da177e4SLinus Torvalds 			brelse(c_bh);
23481da177e4SLinus Torvalds 			brelse(d_bh);
2349d739b42bSPekka Enberg 			kfree(log_blocks);
2350d739b42bSPekka Enberg 			kfree(real_blocks);
23511da177e4SLinus Torvalds 			return -1;
23521da177e4SLinus Torvalds 		}
23531da177e4SLinus Torvalds 		brelse(real_blocks[i]);
23541da177e4SLinus Torvalds 	}
2355bd4c625cSLinus Torvalds 	cur_dblock =
2356a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2357bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2358a9dd3643SJeff Mahoney 	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
2359a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2360bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
2361a9dd3643SJeff Mahoney 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
23621da177e4SLinus Torvalds 
23631da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2364a9dd3643SJeff Mahoney 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
23651da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
23661da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2367a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2368a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2369a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
23701da177e4SLinus Torvalds 	brelse(c_bh);
23711da177e4SLinus Torvalds 	brelse(d_bh);
2372d739b42bSPekka Enberg 	kfree(log_blocks);
2373d739b42bSPekka Enberg 	kfree(real_blocks);
23741da177e4SLinus Torvalds 	return 0;
23751da177e4SLinus Torvalds }
23761da177e4SLinus Torvalds 
23771da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
23781da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
23791da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
23801da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
23811da177e4SLinus Torvalds    from other places.
23821da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
23833ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
23843ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
23853ee16670SJeff Mahoney 					   b_blocknr_t max_block)
23861da177e4SLinus Torvalds {
23871da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
23881da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
23891da177e4SLinus Torvalds 	struct buffer_head *bh;
23901da177e4SLinus Torvalds 	int i, j;
23911da177e4SLinus Torvalds 
23921da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
23931da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23941da177e4SLinus Torvalds 		return (bh);
23951da177e4SLinus Torvalds 
23961da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
23971da177e4SLinus Torvalds 		blocks = max_block - block;
23981da177e4SLinus Torvalds 	}
23991da177e4SLinus Torvalds 	bhlist[0] = bh;
24001da177e4SLinus Torvalds 	j = 1;
24011da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
24021da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
24031da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
24041da177e4SLinus Torvalds 			brelse(bh);
24051da177e4SLinus Torvalds 			break;
2406bd4c625cSLinus Torvalds 		} else
2407bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
24081da177e4SLinus Torvalds 	}
24091da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
24101da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
24111da177e4SLinus Torvalds 		brelse(bhlist[i]);
24121da177e4SLinus Torvalds 	bh = bhlist[0];
24131da177e4SLinus Torvalds 	wait_on_buffer(bh);
24141da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
24151da177e4SLinus Torvalds 		return bh;
24161da177e4SLinus Torvalds 	brelse(bh);
24171da177e4SLinus Torvalds 	return NULL;
24181da177e4SLinus Torvalds }
24191da177e4SLinus Torvalds 
24201da177e4SLinus Torvalds /*
24211da177e4SLinus Torvalds ** read and replay the log
24221da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
24231da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
24241da177e4SLinus Torvalds **
24251da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
24261da177e4SLinus Torvalds **
24271da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
24281da177e4SLinus Torvalds */
2429a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb)
2430bd4c625cSLinus Torvalds {
2431a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
24321da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
2433600ed416SJeff Mahoney 	unsigned int oldest_trans_id = 0;
2434600ed416SJeff Mahoney 	unsigned int oldest_invalid_trans_id = 0;
24351da177e4SLinus Torvalds 	time_t start;
24361da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
24371da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
24381da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
24391da177e4SLinus Torvalds 	struct buffer_head *d_bh;
24401da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
24411da177e4SLinus Torvalds 	int valid_journal_header = 0;
24421da177e4SLinus Torvalds 	int replay_count = 0;
24431da177e4SLinus Torvalds 	int continue_replay = 1;
24441da177e4SLinus Torvalds 	int ret;
24451da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
24461da177e4SLinus Torvalds 
2447a9dd3643SJeff Mahoney 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2448a9dd3643SJeff Mahoney 	reiserfs_info(sb, "checking transaction log (%s)\n",
24491da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
24501da177e4SLinus Torvalds 	start = get_seconds();
24511da177e4SLinus Torvalds 
24521da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
24531da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
24541da177e4SLinus Torvalds 	 ** replay is done
24551da177e4SLinus Torvalds 	 */
2456a9dd3643SJeff Mahoney 	journal->j_header_bh = journal_bread(sb,
2457a9dd3643SJeff Mahoney 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2458a9dd3643SJeff Mahoney 					     + SB_ONDISK_JOURNAL_SIZE(sb));
24591da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
24601da177e4SLinus Torvalds 		return 1;
24611da177e4SLinus Torvalds 	}
24621da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2463c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2464a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_SIZE(sb)
2465bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2466bd4c625cSLinus Torvalds 		oldest_start =
2467a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
24681da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
24691da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
24701da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2471a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2472bd4c625cSLinus Torvalds 			       "journal-1153: found in "
24731da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
24741da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
24751da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
24761da177e4SLinus Torvalds 		valid_journal_header = 1;
24771da177e4SLinus Torvalds 
24781da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
24791da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
24801da177e4SLinus Torvalds 		 ** through the whole log.
24811da177e4SLinus Torvalds 		 */
2482bd4c625cSLinus Torvalds 		d_bh =
2483a9dd3643SJeff Mahoney 		    journal_bread(sb,
2484a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2485bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
2486a9dd3643SJeff Mahoney 		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
24871da177e4SLinus Torvalds 		if (!ret) {
24881da177e4SLinus Torvalds 			continue_replay = 0;
24891da177e4SLinus Torvalds 		}
24901da177e4SLinus Torvalds 		brelse(d_bh);
24911da177e4SLinus Torvalds 		goto start_log_replay;
24921da177e4SLinus Torvalds 	}
24931da177e4SLinus Torvalds 
2494a9dd3643SJeff Mahoney 	if (continue_replay && bdev_read_only(sb->s_bdev)) {
2495a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "clm-2076",
249645b03d5eSJeff Mahoney 				 "device is readonly, unable to replay log");
24971da177e4SLinus Torvalds 		return -1;
24981da177e4SLinus Torvalds 	}
24991da177e4SLinus Torvalds 
25001da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
25011da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
25021da177e4SLinus Torvalds 	 */
2503bd4c625cSLinus Torvalds 	while (continue_replay
2504bd4c625cSLinus Torvalds 	       && cur_dblock <
2505a9dd3643SJeff Mahoney 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2506a9dd3643SJeff Mahoney 		SB_ONDISK_JOURNAL_SIZE(sb))) {
25071da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
25081da177e4SLinus Torvalds 		   device to be the same */
2509bd4c625cSLinus Torvalds 		d_bh =
2510bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2511a9dd3643SJeff Mahoney 				    sb->s_blocksize,
2512a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2513a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_SIZE(sb));
2514bd4c625cSLinus Torvalds 		ret =
2515a9dd3643SJeff Mahoney 		    journal_transaction_is_valid(sb, d_bh,
2516bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2517bd4c625cSLinus Torvalds 						 &newest_mount_id);
25181da177e4SLinus Torvalds 		if (ret == 1) {
25191da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
25201da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
25211da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
25221da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
25231da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2524a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2525bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
25261da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2527bd4c625cSLinus Torvalds 					       oldest_start -
2528bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2529a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
25301da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
25311da177e4SLinus Torvalds 				/* one we just read was older */
25321da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
25331da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2534a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2535bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
25361da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2537bd4c625cSLinus Torvalds 					       oldest_start -
2538bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2539a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
25401da177e4SLinus Torvalds 			}
25411da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
25421da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2543a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2544bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2545bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2546bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
25471da177e4SLinus Torvalds 			}
25481da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
25491da177e4SLinus Torvalds 		} else {
25501da177e4SLinus Torvalds 			cur_dblock++;
25511da177e4SLinus Torvalds 		}
25521da177e4SLinus Torvalds 		brelse(d_bh);
25531da177e4SLinus Torvalds 	}
25541da177e4SLinus Torvalds 
25551da177e4SLinus Torvalds       start_log_replay:
25561da177e4SLinus Torvalds 	cur_dblock = oldest_start;
25571da177e4SLinus Torvalds 	if (oldest_trans_id) {
2558a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2559bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
25601da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
2561a9dd3643SJeff Mahoney 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
25621da177e4SLinus Torvalds 			       oldest_trans_id);
25631da177e4SLinus Torvalds 
25641da177e4SLinus Torvalds 	}
25651da177e4SLinus Torvalds 	replay_count = 0;
25661da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2567bd4c625cSLinus Torvalds 		ret =
2568a9dd3643SJeff Mahoney 		    journal_read_transaction(sb, cur_dblock, oldest_start,
2569bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
25701da177e4SLinus Torvalds 		if (ret < 0) {
25711da177e4SLinus Torvalds 			return ret;
25721da177e4SLinus Torvalds 		} else if (ret != 0) {
25731da177e4SLinus Torvalds 			break;
25741da177e4SLinus Torvalds 		}
2575bd4c625cSLinus Torvalds 		cur_dblock =
2576a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
25771da177e4SLinus Torvalds 		replay_count++;
25781da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
25791da177e4SLinus Torvalds 			break;
25801da177e4SLinus Torvalds 	}
25811da177e4SLinus Torvalds 
25821da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2583a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2584bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
25851da177e4SLinus Torvalds 	}
25861da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
25871da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
25881da177e4SLinus Torvalds 	 ** copy the trans_id from the header
25891da177e4SLinus Torvalds 	 */
25901da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
25911da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2592bd4c625cSLinus Torvalds 		journal->j_trans_id =
2593bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2594a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2595a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2596a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2597bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2598bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
25991da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
26001da177e4SLinus Torvalds 	} else {
26011da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
26021da177e4SLinus Torvalds 	}
2603a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
26041da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
26051da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
26061da177e4SLinus Torvalds 	if (replay_count > 0) {
2607a9dd3643SJeff Mahoney 		reiserfs_info(sb,
2608bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
26091da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
26101da177e4SLinus Torvalds 	}
2611a9dd3643SJeff Mahoney 	if (!bdev_read_only(sb->s_bdev) &&
2612a9dd3643SJeff Mahoney 	    _update_journal_header_block(sb, journal->j_start,
2613bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
26141da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
26151da177e4SLinus Torvalds 		 ** the mount
26161da177e4SLinus Torvalds 		 */
26171da177e4SLinus Torvalds 		return -1;
26181da177e4SLinus Torvalds 	}
26191da177e4SLinus Torvalds 	return 0;
26201da177e4SLinus Torvalds }
26211da177e4SLinus Torvalds 
26221da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
26231da177e4SLinus Torvalds {
26241da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
26258c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
26268c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
26271da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
26281da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
26291da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
26301da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
263190415deaSJeff Mahoney 	mutex_init(&jl->j_commit_mutex);
26321da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
26331da177e4SLinus Torvalds 	get_journal_list(jl);
26341da177e4SLinus Torvalds 	return jl;
26351da177e4SLinus Torvalds }
26361da177e4SLinus Torvalds 
2637a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb)
2638bd4c625cSLinus Torvalds {
2639a9dd3643SJeff Mahoney 	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
26401da177e4SLinus Torvalds }
26411da177e4SLinus Torvalds 
26421da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
26431da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
26441da177e4SLinus Torvalds {
26451da177e4SLinus Torvalds 	int result;
26461da177e4SLinus Torvalds 
26471da177e4SLinus Torvalds 	result = 0;
26481da177e4SLinus Torvalds 
264986098fa0SChristoph Hellwig 	if (journal->j_dev_bd != NULL) {
265086098fa0SChristoph Hellwig 		if (journal->j_dev_bd->bd_dev != super->s_dev)
265186098fa0SChristoph Hellwig 			bd_release(journal->j_dev_bd);
2652e5eb8caaSAl Viro 		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
26531da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
26541da177e4SLinus Torvalds 	}
26551da177e4SLinus Torvalds 
26561da177e4SLinus Torvalds 	if (result != 0) {
265745b03d5eSJeff Mahoney 		reiserfs_warning(super, "sh-457",
265845b03d5eSJeff Mahoney 				 "Cannot release journal device: %i", result);
26591da177e4SLinus Torvalds 	}
26601da177e4SLinus Torvalds 	return result;
26611da177e4SLinus Torvalds }
26621da177e4SLinus Torvalds 
26631da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
26641da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
26651da177e4SLinus Torvalds 			    const char *jdev_name)
26661da177e4SLinus Torvalds {
26671da177e4SLinus Torvalds 	int result;
26681da177e4SLinus Torvalds 	dev_t jdev;
2669aeb5d727SAl Viro 	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
26701da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
26711da177e4SLinus Torvalds 
26721da177e4SLinus Torvalds 	result = 0;
26731da177e4SLinus Torvalds 
26741da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
26751da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
26761da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
26771da177e4SLinus Torvalds 
26781da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
26791da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
26801da177e4SLinus Torvalds 
26811da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
26821da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
26831da177e4SLinus Torvalds 		journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2684e5eb8caaSAl Viro 		journal->j_dev_mode = blkdev_mode;
26851da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
26861da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
26871da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
268845b03d5eSJeff Mahoney 			reiserfs_warning(super, "sh-458",
26891da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
26901da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
26911da177e4SLinus Torvalds 			return result;
269286098fa0SChristoph Hellwig 		} else if (jdev != super->s_dev) {
269386098fa0SChristoph Hellwig 			result = bd_claim(journal->j_dev_bd, journal);
269486098fa0SChristoph Hellwig 			if (result) {
26959a1c3542SAl Viro 				blkdev_put(journal->j_dev_bd, blkdev_mode);
269686098fa0SChristoph Hellwig 				return result;
269786098fa0SChristoph Hellwig 			}
269886098fa0SChristoph Hellwig 
26991da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
270086098fa0SChristoph Hellwig 		}
270186098fa0SChristoph Hellwig 
27021da177e4SLinus Torvalds 		return 0;
27031da177e4SLinus Torvalds 	}
27041da177e4SLinus Torvalds 
2705e5eb8caaSAl Viro 	journal->j_dev_mode = blkdev_mode;
270630c40d2cSAl Viro 	journal->j_dev_bd = open_bdev_exclusive(jdev_name,
2707e5eb8caaSAl Viro 						blkdev_mode, journal);
270886098fa0SChristoph Hellwig 	if (IS_ERR(journal->j_dev_bd)) {
270986098fa0SChristoph Hellwig 		result = PTR_ERR(journal->j_dev_bd);
271086098fa0SChristoph Hellwig 		journal->j_dev_bd = NULL;
271186098fa0SChristoph Hellwig 		reiserfs_warning(super,
271286098fa0SChristoph Hellwig 				 "journal_init_dev: Cannot open '%s': %i",
271386098fa0SChristoph Hellwig 				 jdev_name, result);
271486098fa0SChristoph Hellwig 		return result;
271586098fa0SChristoph Hellwig 	}
271686098fa0SChristoph Hellwig 
27171da177e4SLinus Torvalds 	set_blocksize(journal->j_dev_bd, super->s_blocksize);
2718bd4c625cSLinus Torvalds 	reiserfs_info(super,
2719bd4c625cSLinus Torvalds 		      "journal_init_dev: journal device: %s\n",
272074f9f974SEdward Shishkin 		      bdevname(journal->j_dev_bd, b));
272186098fa0SChristoph Hellwig 	return 0;
27221da177e4SLinus Torvalds }
27231da177e4SLinus Torvalds 
2724cf3d0b81SEdward Shishkin /**
2725cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2726cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2727cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2728cf3d0b81SEdward Shishkin  *
2729cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2730cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2731cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2732cf3d0b81SEdward Shishkin  *
2733cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2734cf3d0b81SEdward Shishkin  * decreased proportionally.
2735cf3d0b81SEdward Shishkin  */
2736cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2737cf3d0b81SEdward Shishkin 
2738a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb,
2739cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2740cf3d0b81SEdward Shishkin {
2741cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2742cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2743cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2744cf3d0b81SEdward Shishkin 	        int ratio = 1;
2745a9dd3643SJeff Mahoney 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2746a9dd3643SJeff Mahoney 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2747cf3d0b81SEdward Shishkin 
2748cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2749cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2750a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2751cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2752a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-462",
275345b03d5eSJeff Mahoney 					 "bad transaction max size (%u). "
275445b03d5eSJeff Mahoney 					 "FSCK?", journal->j_trans_max);
2755cf3d0b81SEdward Shishkin 			return 1;
2756cf3d0b81SEdward Shishkin 		}
2757cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2758cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2759a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-463",
276045b03d5eSJeff Mahoney 					 "bad transaction max batch (%u). "
276145b03d5eSJeff Mahoney 					 "FSCK?", journal->j_max_batch);
2762cf3d0b81SEdward Shishkin 			return 1;
2763cf3d0b81SEdward Shishkin 		}
2764cf3d0b81SEdward Shishkin 	} else {
2765cf3d0b81SEdward Shishkin 		/* Default journal params.
2766cf3d0b81SEdward Shishkin                    The file system was created by old version
2767cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2768cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2769a9dd3643SJeff Mahoney 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2770a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2771a9dd3643SJeff Mahoney 					 sb->s_blocksize);
277245b03d5eSJeff Mahoney 			return 1;
277345b03d5eSJeff Mahoney 		}
2774cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2775cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2776cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2777cf3d0b81SEdward Shishkin 	}
2778cf3d0b81SEdward Shishkin 	return 0;
2779cf3d0b81SEdward Shishkin }
2780cf3d0b81SEdward Shishkin 
27811da177e4SLinus Torvalds /*
27821da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
27831da177e4SLinus Torvalds */
2784a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name,
2785bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2786bd4c625cSLinus Torvalds {
2787a9dd3643SJeff Mahoney 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
27881da177e4SLinus Torvalds 	struct buffer_head *bhjh;
27891da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
27901da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
27911da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
27921da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
27931da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
27941da177e4SLinus Torvalds 
2795a9dd3643SJeff Mahoney 	journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
27961da177e4SLinus Torvalds 	if (!journal) {
2797a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1256",
279845b03d5eSJeff Mahoney 				 "unable to get memory for journal structure");
27991da177e4SLinus Torvalds 		return 1;
28001da177e4SLinus Torvalds 	}
28011da177e4SLinus Torvalds 	memset(journal, 0, sizeof(struct reiserfs_journal));
28021da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
28031da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
28041da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
28051da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
28061da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
2807a9dd3643SJeff Mahoney 	if (reiserfs_allocate_list_bitmaps(sb,
28081da177e4SLinus Torvalds 					   journal->j_list_bitmap,
2809a9dd3643SJeff Mahoney 					   reiserfs_bmap_count(sb)))
28101da177e4SLinus Torvalds 		goto free_and_return;
2811a9dd3643SJeff Mahoney 	allocate_bitmap_nodes(sb);
28121da177e4SLinus Torvalds 
28131da177e4SLinus Torvalds 	/* reserved for journal area support */
2814a9dd3643SJeff Mahoney 	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2815bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2816a9dd3643SJeff Mahoney 						 / sb->s_blocksize +
2817a9dd3643SJeff Mahoney 						 reiserfs_bmap_count(sb) +
2818bd4c625cSLinus Torvalds 						 1 :
2819bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2820a9dd3643SJeff Mahoney 						 sb->s_blocksize + 2);
28211da177e4SLinus Torvalds 
28221da177e4SLinus Torvalds 	/* Sanity check to see is the standard journal fitting withing first bitmap
28231da177e4SLinus Torvalds 	   (actual for small blocksizes) */
2824a9dd3643SJeff Mahoney 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2825a9dd3643SJeff Mahoney 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2826a9dd3643SJeff Mahoney 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2827a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1393",
282845b03d5eSJeff Mahoney 				 "journal does not fit for area addressed "
282945b03d5eSJeff Mahoney 				 "by first of bitmap blocks. It starts at "
28301da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
2831a9dd3643SJeff Mahoney 				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2832a9dd3643SJeff Mahoney 				 SB_ONDISK_JOURNAL_SIZE(sb),
2833a9dd3643SJeff Mahoney 				 sb->s_blocksize);
28341da177e4SLinus Torvalds 		goto free_and_return;
28351da177e4SLinus Torvalds 	}
28361da177e4SLinus Torvalds 
2837a9dd3643SJeff Mahoney 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2838a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-462",
283945b03d5eSJeff Mahoney 				 "unable to initialize jornal device");
28401da177e4SLinus Torvalds 		goto free_and_return;
28411da177e4SLinus Torvalds 	}
28421da177e4SLinus Torvalds 
2843a9dd3643SJeff Mahoney 	rs = SB_DISK_SUPER_BLOCK(sb);
28441da177e4SLinus Torvalds 
28451da177e4SLinus Torvalds 	/* read journal header */
2846a9dd3643SJeff Mahoney 	bhjh = journal_bread(sb,
2847a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2848a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb));
28491da177e4SLinus Torvalds 	if (!bhjh) {
2850a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-459",
285145b03d5eSJeff Mahoney 				 "unable to read journal header");
28521da177e4SLinus Torvalds 		goto free_and_return;
28531da177e4SLinus Torvalds 	}
28541da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
28551da177e4SLinus Torvalds 
28561da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2857bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2858bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2859bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2860a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-460",
286145b03d5eSJeff Mahoney 				 "journal header magic %x (device %s) does "
286245b03d5eSJeff Mahoney 				 "not match to magic found in super block %x",
286345b03d5eSJeff Mahoney 				 jh->jh_journal.jp_journal_magic,
28641da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
28651da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
28661da177e4SLinus Torvalds 		brelse(bhjh);
28671da177e4SLinus Torvalds 		goto free_and_return;
28681da177e4SLinus Torvalds 	}
28691da177e4SLinus Torvalds 
28701da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
28711da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2872bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2873bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
28741da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
28751da177e4SLinus Torvalds 
2876a9dd3643SJeff Mahoney 	if (check_advise_trans_params(sb, journal) != 0)
2877cf3d0b81SEdward Shishkin 	        goto free_and_return;
28781da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
28791da177e4SLinus Torvalds 
28801da177e4SLinus Torvalds 	if (commit_max_age != 0) {
28811da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
28821da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
28831da177e4SLinus Torvalds 	}
28841da177e4SLinus Torvalds 
2885a9dd3643SJeff Mahoney 	reiserfs_info(sb, "journal params: device %s, size %u, "
28861da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
28871da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
28881da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
2889a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_SIZE(sb),
2890a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
28911da177e4SLinus Torvalds 		      journal->j_trans_max,
28921da177e4SLinus Torvalds 		      journal->j_max_batch,
2893bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
28941da177e4SLinus Torvalds 
28951da177e4SLinus Torvalds 	brelse(bhjh);
28961da177e4SLinus Torvalds 
28971da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
2898a9dd3643SJeff Mahoney 	journal_list_init(sb);
28991da177e4SLinus Torvalds 
2900bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2901bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
29021da177e4SLinus Torvalds 
29031da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
29041da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
29051da177e4SLinus Torvalds 
29061da177e4SLinus Torvalds 	journal->j_start = 0;
29071da177e4SLinus Torvalds 	journal->j_len = 0;
29081da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
29091da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
29101da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
29111da177e4SLinus Torvalds 	journal->j_bcount = 0;
29121da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
29131da177e4SLinus Torvalds 	journal->j_last = NULL;
29141da177e4SLinus Torvalds 	journal->j_first = NULL;
29151da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
2916f68215c4SJeff Mahoney 	mutex_init(&journal->j_mutex);
2917afe70259SJeff Mahoney 	mutex_init(&journal->j_flush_mutex);
29181da177e4SLinus Torvalds 
29191da177e4SLinus Torvalds 	journal->j_trans_id = 10;
29201da177e4SLinus Torvalds 	journal->j_mount_id = 10;
29211da177e4SLinus Torvalds 	journal->j_state = 0;
29221da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
29231da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
29241da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
29251da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
29261da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
29271da177e4SLinus Torvalds 	journal->j_must_wait = 0;
29281da177e4SLinus Torvalds 
2929576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2930a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2931576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2932576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2933576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2934576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2935576f6d79SJeff Mahoney         	goto free_and_return;
2936576f6d79SJeff Mahoney 	}
2937576f6d79SJeff Mahoney 
2938a9dd3643SJeff Mahoney 	init_journal_hash(sb);
29391da177e4SLinus Torvalds 	jl = journal->j_current_jl;
2940a9dd3643SJeff Mahoney 	jl->j_list_bitmap = get_list_bitmap(sb, jl);
29411da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2942a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2005",
294345b03d5eSJeff Mahoney 				 "get_list_bitmap failed for journal list 0");
29441da177e4SLinus Torvalds 		goto free_and_return;
29451da177e4SLinus Torvalds 	}
2946a9dd3643SJeff Mahoney 	if (journal_read(sb) < 0) {
2947a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "reiserfs-2006",
294845b03d5eSJeff Mahoney 				 "Replay Failure, unable to mount");
29491da177e4SLinus Torvalds 		goto free_and_return;
29501da177e4SLinus Torvalds 	}
29511da177e4SLinus Torvalds 
29521da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
29531da177e4SLinus Torvalds 	if (reiserfs_mounted_fs_count <= 1)
29541da177e4SLinus Torvalds 		commit_wq = create_workqueue("reiserfs");
29551da177e4SLinus Torvalds 
2956c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2957a9dd3643SJeff Mahoney 	journal->j_work_sb = sb;
29581da177e4SLinus Torvalds 	return 0;
29591da177e4SLinus Torvalds       free_and_return:
2960a9dd3643SJeff Mahoney 	free_journal_ram(sb);
29611da177e4SLinus Torvalds 	return 1;
29621da177e4SLinus Torvalds }
29631da177e4SLinus Torvalds 
29641da177e4SLinus Torvalds /*
29651da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
29661da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
29671da177e4SLinus Torvalds ** transaction
29681da177e4SLinus Torvalds */
2969bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2970bd4c625cSLinus Torvalds 				   int new_alloc)
2971bd4c625cSLinus Torvalds {
29721da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29731da177e4SLinus Torvalds 	time_t now = get_seconds();
29741da177e4SLinus Torvalds 	/* cannot restart while nested */
29751da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29761da177e4SLinus Torvalds 	if (th->t_refcount > 1)
29771da177e4SLinus Torvalds 		return 0;
29781da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
29791da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
29801da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
29811da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
29821da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
29831da177e4SLinus Torvalds 		return 1;
29841da177e4SLinus Torvalds 	}
29856ae1ea44SChris Mason 	/* protected by the BKL here */
29866ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
29876ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
29881da177e4SLinus Torvalds 	return 0;
29891da177e4SLinus Torvalds }
29901da177e4SLinus Torvalds 
29911da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the
29921da177e4SLinus Torvalds ** kernel_lock to be held
29931da177e4SLinus Torvalds */
2994bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2995bd4c625cSLinus Torvalds {
29961da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29971da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29981da177e4SLinus Torvalds 	journal->j_must_wait = 1;
29991da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
30001da177e4SLinus Torvalds 	return;
30011da177e4SLinus Torvalds }
30021da177e4SLinus Torvalds 
30031da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
30041da177e4SLinus Torvalds ** require BKL
30051da177e4SLinus Torvalds */
3006bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
3007bd4c625cSLinus Torvalds {
30081da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30091da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
30101da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
30111da177e4SLinus Torvalds }
30121da177e4SLinus Torvalds 
30131da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
30141da177e4SLinus Torvalds ** require BKL
30151da177e4SLinus Torvalds */
3016bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
3017bd4c625cSLinus Torvalds {
30181da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30191da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
30201da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
30211da177e4SLinus Torvalds }
30221da177e4SLinus Torvalds 
3023bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
3024bd4c625cSLinus Torvalds {
30251da177e4SLinus Torvalds 	wait_queue_t wait;
30261da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30271da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
30281da177e4SLinus Torvalds 
30291da177e4SLinus Torvalds 	/*
30301da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
30311da177e4SLinus Torvalds 	 * we only want to wait once.
30321da177e4SLinus Torvalds 	 */
30331da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
30341da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
30351da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
30368ebc4232SFrederic Weisbecker 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
30378ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
30381da177e4SLinus Torvalds 		schedule();
30398ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
30408ebc4232SFrederic Weisbecker 	}
30415ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
30421da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
30431da177e4SLinus Torvalds }
30441da177e4SLinus Torvalds 
3045bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
3046bd4c625cSLinus Torvalds {
30471da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30481da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
30491da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
30501da177e4SLinus Torvalds }
30511da177e4SLinus Torvalds 
3052600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
30531da177e4SLinus Torvalds {
30541da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30551da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
30561da177e4SLinus Torvalds 	while (1) {
30578ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3058041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
30598ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
30601da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
30611da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
30621da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
30631da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
30641da177e4SLinus Torvalds 			queue_log_writer(sb);
30651da177e4SLinus Torvalds 		}
30661da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
30671da177e4SLinus Torvalds 			break;
30681da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
30691da177e4SLinus Torvalds 			break;
30701da177e4SLinus Torvalds 		bcount = journal->j_bcount;
30711da177e4SLinus Torvalds 	}
30721da177e4SLinus Torvalds }
30731da177e4SLinus Torvalds 
30741da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
30751da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
30761da177e4SLinus Torvalds **
30771da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
30781da177e4SLinus Torvalds ** expect to use in nblocks.
30791da177e4SLinus Torvalds */
3080bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3081a9dd3643SJeff Mahoney 			      struct super_block *sb, unsigned long nblocks,
3082bd4c625cSLinus Torvalds 			      int join)
3083bd4c625cSLinus Torvalds {
30841da177e4SLinus Torvalds 	time_t now = get_seconds();
3085600ed416SJeff Mahoney 	unsigned int old_trans_id;
3086a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30871da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
30881da177e4SLinus Torvalds 	int sched_count = 0;
30891da177e4SLinus Torvalds 	int retval;
30901da177e4SLinus Torvalds 
3091a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal_begin");
309214a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
30931da177e4SLinus Torvalds 
3094a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.journal_being);
30951da177e4SLinus Torvalds 	/* set here for journal_join */
30961da177e4SLinus Torvalds 	th->t_refcount = 1;
3097a9dd3643SJeff Mahoney 	th->t_super = sb;
30981da177e4SLinus Torvalds 
30991da177e4SLinus Torvalds       relock:
3100a9dd3643SJeff Mahoney 	lock_journal(sb);
31011da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3102a9dd3643SJeff Mahoney 		unlock_journal(sb);
31031da177e4SLinus Torvalds 		retval = journal->j_errno;
31041da177e4SLinus Torvalds 		goto out_fail;
31051da177e4SLinus Torvalds 	}
31061da177e4SLinus Torvalds 	journal->j_bcount++;
31071da177e4SLinus Torvalds 
31081da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3109a9dd3643SJeff Mahoney 		unlock_journal(sb);
31108ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3111a9dd3643SJeff Mahoney 		reiserfs_wait_on_write_block(sb);
31128ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
3113a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_writers);
31141da177e4SLinus Torvalds 		goto relock;
31151da177e4SLinus Torvalds 	}
31161da177e4SLinus Torvalds 	now = get_seconds();
31171da177e4SLinus Torvalds 
31181da177e4SLinus Torvalds 	/* if there is no room in the journal OR
31191da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
31201da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
31211da177e4SLinus Torvalds 	 */
31221da177e4SLinus Torvalds 
31231da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3124bd4c625cSLinus Torvalds 	    (!join
3125bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3126bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3127bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3128bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3129bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3130bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3131bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
31321da177e4SLinus Torvalds 
31331da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
3134a9dd3643SJeff Mahoney 		unlock_journal(sb);	/* allow others to finish this transaction */
31351da177e4SLinus Torvalds 
31361da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
31371da177e4SLinus Torvalds 		    journal->j_max_batch &&
3138bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3139bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
31401da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
31411da177e4SLinus Torvalds 				sched_count++;
3142a9dd3643SJeff Mahoney 				queue_log_writer(sb);
31431da177e4SLinus Torvalds 				goto relock;
31441da177e4SLinus Torvalds 			}
31451da177e4SLinus Torvalds 		}
31461da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
31471da177e4SLinus Torvalds 		 * wait for someone else to do a commit
31481da177e4SLinus Torvalds 		 */
31491da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
31501da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
31511da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
3152a9dd3643SJeff Mahoney 				queue_log_writer(sb);
31531da177e4SLinus Torvalds 			}
31541da177e4SLinus Torvalds 			goto relock;
31551da177e4SLinus Torvalds 		}
3156a9dd3643SJeff Mahoney 		retval = journal_join(&myth, sb, 1);
31571da177e4SLinus Torvalds 		if (retval)
31581da177e4SLinus Torvalds 			goto out_fail;
31591da177e4SLinus Torvalds 
31601da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
31611da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
3162a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, 0);
31631da177e4SLinus Torvalds 		} else {
3164a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
31651da177e4SLinus Torvalds 		}
31661da177e4SLinus Torvalds 
31671da177e4SLinus Torvalds 		if (retval)
31681da177e4SLinus Torvalds 			goto out_fail;
31691da177e4SLinus Torvalds 
3170a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_wcount);
31711da177e4SLinus Torvalds 		goto relock;
31721da177e4SLinus Torvalds 	}
31731da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
31741da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
31751da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
31761da177e4SLinus Torvalds 	}
31771da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
31781da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
31791da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
31801da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
31811da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
3182a9dd3643SJeff Mahoney 	unlock_journal(sb);
31831da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
318422e2c507SJens Axboe 	get_fs_excl();
31851da177e4SLinus Torvalds 	return 0;
31861da177e4SLinus Torvalds 
31871da177e4SLinus Torvalds       out_fail:
31881da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
31891da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
31901da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
31911da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
3192a9dd3643SJeff Mahoney 	th->t_super = sb;
31931da177e4SLinus Torvalds 	return retval;
31941da177e4SLinus Torvalds }
31951da177e4SLinus Torvalds 
3196bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3197bd4c625cSLinus Torvalds 								    super_block
3198bd4c625cSLinus Torvalds 								    *s,
3199bd4c625cSLinus Torvalds 								    int nblocks)
3200bd4c625cSLinus Torvalds {
32011da177e4SLinus Torvalds 	int ret;
32021da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
32031da177e4SLinus Torvalds 
32041da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
32051da177e4SLinus Torvalds 	 ** persistent on its own
32061da177e4SLinus Torvalds 	 */
32071da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
32081da177e4SLinus Torvalds 		th = current->journal_info;
32091da177e4SLinus Torvalds 		th->t_refcount++;
321014a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
321114a61442SEric Sesterhenn 
32121da177e4SLinus Torvalds 		return th;
32131da177e4SLinus Torvalds 	}
3214d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
32151da177e4SLinus Torvalds 	if (!th)
32161da177e4SLinus Torvalds 		return NULL;
32171da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
32181da177e4SLinus Torvalds 	if (ret) {
3219d739b42bSPekka Enberg 		kfree(th);
32201da177e4SLinus Torvalds 		return NULL;
32211da177e4SLinus Torvalds 	}
32221da177e4SLinus Torvalds 
32231da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
32241da177e4SLinus Torvalds 	return th;
32251da177e4SLinus Torvalds }
32261da177e4SLinus Torvalds 
3227bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3228bd4c625cSLinus Torvalds {
32291da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
32301da177e4SLinus Torvalds 	int ret = 0;
32311da177e4SLinus Torvalds 	if (th->t_trans_id)
32321da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
32331da177e4SLinus Torvalds 	else
32341da177e4SLinus Torvalds 		ret = -EIO;
32351da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
32361da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3237d739b42bSPekka Enberg 		kfree(th);
32381da177e4SLinus Torvalds 	}
32391da177e4SLinus Torvalds 	return ret;
32401da177e4SLinus Torvalds }
32411da177e4SLinus Torvalds 
3242bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3243a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks)
3244bd4c625cSLinus Torvalds {
32451da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32461da177e4SLinus Torvalds 
32471da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
32481da177e4SLinus Torvalds 	 ** pointer
32491da177e4SLinus Torvalds 	 */
32501da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
325114a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3252a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
32531da177e4SLinus Torvalds }
32541da177e4SLinus Torvalds 
3255bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3256a9dd3643SJeff Mahoney 		       struct super_block *sb, unsigned long nblocks)
3257bd4c625cSLinus Torvalds {
32581da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32591da177e4SLinus Torvalds 
32601da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
32611da177e4SLinus Torvalds 	 ** pointer
32621da177e4SLinus Torvalds 	 */
32631da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
326414a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3265a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
32661da177e4SLinus Torvalds }
32671da177e4SLinus Torvalds 
3268bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3269a9dd3643SJeff Mahoney 		  struct super_block *sb, unsigned long nblocks)
3270bd4c625cSLinus Torvalds {
32711da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32721da177e4SLinus Torvalds 	int ret;
32731da177e4SLinus Torvalds 
32741da177e4SLinus Torvalds 	th->t_handle_save = NULL;
32751da177e4SLinus Torvalds 	if (cur_th) {
32761da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
3277a9dd3643SJeff Mahoney 		if (cur_th->t_super == sb) {
32781da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
32791da177e4SLinus Torvalds 			cur_th->t_refcount++;
32801da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
32811da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3282a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2005",
328345b03d5eSJeff Mahoney 						 "BAD: refcount <= 1, but "
328445b03d5eSJeff Mahoney 						 "journal_info != 0");
32851da177e4SLinus Torvalds 			return 0;
32861da177e4SLinus Torvalds 		} else {
32871da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
32881da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
32891da177e4SLinus Torvalds 			 ** really happen...
32901da177e4SLinus Torvalds 			 */
3291a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2100",
329245b03d5eSJeff Mahoney 					 "nesting info a different FS");
32931da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
32941da177e4SLinus Torvalds 			current->journal_info = th;
32951da177e4SLinus Torvalds 		}
32961da177e4SLinus Torvalds 	} else {
32971da177e4SLinus Torvalds 		current->journal_info = th;
32981da177e4SLinus Torvalds 	}
3299a9dd3643SJeff Mahoney 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
330014a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
33011da177e4SLinus Torvalds 
33021da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
33031da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
33041da177e4SLinus Torvalds 	 * won't be called to do it. */
33051da177e4SLinus Torvalds 	if (ret)
33061da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
33071da177e4SLinus Torvalds 	else
33081da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
33091da177e4SLinus Torvalds 
33101da177e4SLinus Torvalds 	return ret;
33111da177e4SLinus Torvalds }
33121da177e4SLinus Torvalds 
33131da177e4SLinus Torvalds /*
33141da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
33151da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
33161da177e4SLinus Torvalds **
33171da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
33181da177e4SLinus Torvalds ** transaction is committed.
33191da177e4SLinus Torvalds **
33201da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
33211da177e4SLinus Torvalds */
3322bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3323a9dd3643SJeff Mahoney 		       struct super_block *sb, struct buffer_head *bh)
3324bd4c625cSLinus Torvalds {
3325a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
33261da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
33271da177e4SLinus Torvalds 	int count_already_incd = 0;
33281da177e4SLinus Torvalds 	int prepared = 0;
33291da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
33301da177e4SLinus Torvalds 
3331a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.mark_dirty);
33321da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3333c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3334c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
33351da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
33361da177e4SLinus Torvalds 	}
33371da177e4SLinus Torvalds 
3338a9dd3643SJeff Mahoney 	sb->s_dirt = 1;
33391da177e4SLinus Torvalds 
33401da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
33411da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
33421da177e4SLinus Torvalds 	/* already in this transaction, we are done */
33431da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
3344a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_already);
33451da177e4SLinus Torvalds 		return 0;
33461da177e4SLinus Torvalds 	}
33471da177e4SLinus Torvalds 
33481da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
33491da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
33501da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
33511da177e4SLinus Torvalds 	 */
33521da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
3353a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1777",
335445b03d5eSJeff Mahoney 				 "buffer %llu bad state "
33551da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3356bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3357bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
33581da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
33591da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
33601da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
33611da177e4SLinus Torvalds 	}
33621da177e4SLinus Torvalds 
33631da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3364a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1409",
336545b03d5eSJeff Mahoney 				 "returning because j_wcount was %d",
3366bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
33671da177e4SLinus Torvalds 		return 1;
33681da177e4SLinus Torvalds 	}
33691da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
33701da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
33711da177e4SLinus Torvalds 	 */
33721da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3373c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1413",
3374c3a9c210SJeff Mahoney 			       "j_len (%lu) is too big",
3375bd4c625cSLinus Torvalds 			       journal->j_len);
33761da177e4SLinus Torvalds 	}
33771da177e4SLinus Torvalds 
33781da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
33791da177e4SLinus Torvalds 		count_already_incd = 1;
3380a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
33811da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
33821da177e4SLinus Torvalds 	}
33831da177e4SLinus Torvalds 
33841da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
33851da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
33861da177e4SLinus Torvalds 	}
33871da177e4SLinus Torvalds 
33881da177e4SLinus Torvalds 	set_buffer_journaled(bh);
33891da177e4SLinus Torvalds 
33901da177e4SLinus Torvalds 	/* now put this guy on the end */
33911da177e4SLinus Torvalds 	if (!cn) {
3392a9dd3643SJeff Mahoney 		cn = get_cnode(sb);
33931da177e4SLinus Torvalds 		if (!cn) {
3394a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
33951da177e4SLinus Torvalds 		}
33961da177e4SLinus Torvalds 
33971da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
33981da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
33991da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
34001da177e4SLinus Torvalds 		}
34011da177e4SLinus Torvalds 		th->t_blocks_logged++;
34021da177e4SLinus Torvalds 		journal->j_len++;
34031da177e4SLinus Torvalds 
34041da177e4SLinus Torvalds 		cn->bh = bh;
34051da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
3406a9dd3643SJeff Mahoney 		cn->sb = sb;
34071da177e4SLinus Torvalds 		cn->jlist = NULL;
34081da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
34091da177e4SLinus Torvalds 		if (!count_already_incd) {
34101da177e4SLinus Torvalds 			get_bh(bh);
34111da177e4SLinus Torvalds 		}
34121da177e4SLinus Torvalds 	}
34131da177e4SLinus Torvalds 	cn->next = NULL;
34141da177e4SLinus Torvalds 	cn->prev = journal->j_last;
34151da177e4SLinus Torvalds 	cn->bh = bh;
34161da177e4SLinus Torvalds 	if (journal->j_last) {
34171da177e4SLinus Torvalds 		journal->j_last->next = cn;
34181da177e4SLinus Torvalds 		journal->j_last = cn;
34191da177e4SLinus Torvalds 	} else {
34201da177e4SLinus Torvalds 		journal->j_first = cn;
34211da177e4SLinus Torvalds 		journal->j_last = cn;
34221da177e4SLinus Torvalds 	}
34231da177e4SLinus Torvalds 	return 0;
34241da177e4SLinus Torvalds }
34251da177e4SLinus Torvalds 
3426bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3427a9dd3643SJeff Mahoney 		struct super_block *sb, unsigned long nblocks)
3428bd4c625cSLinus Torvalds {
34291da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
3430a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "REISER-NESTING",
343145b03d5eSJeff Mahoney 				 "th NULL, refcount %d", th->t_refcount);
34321da177e4SLinus Torvalds 
34331da177e4SLinus Torvalds 	if (!th->t_trans_id) {
34341da177e4SLinus Torvalds 		WARN_ON(1);
34351da177e4SLinus Torvalds 		return -EIO;
34361da177e4SLinus Torvalds 	}
34371da177e4SLinus Torvalds 
34381da177e4SLinus Torvalds 	th->t_refcount--;
34391da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3440bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3441bd4c625cSLinus Torvalds 		    current->journal_info;
34421da177e4SLinus Torvalds 
34431da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
34441da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
34451da177e4SLinus Torvalds 		 */
344614a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
34471da177e4SLinus Torvalds 
34481da177e4SLinus Torvalds 		if (th != cur_th) {
34491da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
34501da177e4SLinus Torvalds 			th->t_trans_id = 0;
34511da177e4SLinus Torvalds 		}
34521da177e4SLinus Torvalds 		return 0;
34531da177e4SLinus Torvalds 	} else {
3454a9dd3643SJeff Mahoney 		return do_journal_end(th, sb, nblocks, 0);
34551da177e4SLinus Torvalds 	}
34561da177e4SLinus Torvalds }
34571da177e4SLinus Torvalds 
34581da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
34591da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
34601da177e4SLinus Torvalds **
34611da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
34621da177e4SLinus Torvalds **
34631da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
34641da177e4SLinus Torvalds */
3465a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb,
3466bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3467bd4c625cSLinus Torvalds {
34681da177e4SLinus Torvalds 	struct buffer_head *bh;
34691da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3470a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34711da177e4SLinus Torvalds 	int ret = 0;
34721da177e4SLinus Torvalds 
3473a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
34741da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
34751da177e4SLinus Torvalds 		return ret;
34761da177e4SLinus Torvalds 	}
34771da177e4SLinus Torvalds 	bh = cn->bh;
34781da177e4SLinus Torvalds 	if (cn->prev) {
34791da177e4SLinus Torvalds 		cn->prev->next = cn->next;
34801da177e4SLinus Torvalds 	}
34811da177e4SLinus Torvalds 	if (cn->next) {
34821da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
34831da177e4SLinus Torvalds 	}
34841da177e4SLinus Torvalds 	if (cn == journal->j_first) {
34851da177e4SLinus Torvalds 		journal->j_first = cn->next;
34861da177e4SLinus Torvalds 	}
34871da177e4SLinus Torvalds 	if (cn == journal->j_last) {
34881da177e4SLinus Torvalds 		journal->j_last = cn->prev;
34891da177e4SLinus Torvalds 	}
34901da177e4SLinus Torvalds 	if (bh)
3491a9dd3643SJeff Mahoney 		remove_journal_hash(sb, journal->j_hash_table, NULL,
3492bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
34931da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
34941da177e4SLinus Torvalds 
34951da177e4SLinus Torvalds 	if (!already_cleaned) {
34961da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
34971da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
34981da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
34991da177e4SLinus Torvalds 		put_bh(bh);
35001da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3501a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1752",
350245b03d5eSJeff Mahoney 					 "b_count < 0");
35031da177e4SLinus Torvalds 		}
35041da177e4SLinus Torvalds 		ret = 1;
35051da177e4SLinus Torvalds 	}
35061da177e4SLinus Torvalds 	journal->j_len--;
35071da177e4SLinus Torvalds 	journal->j_len_alloc--;
3508a9dd3643SJeff Mahoney 	free_cnode(sb, cn);
35091da177e4SLinus Torvalds 	return ret;
35101da177e4SLinus Torvalds }
35111da177e4SLinus Torvalds 
35121da177e4SLinus Torvalds /*
35131da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
35140779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
35151da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
35161da177e4SLinus Torvalds ** and 0 if you aren't
35171da177e4SLinus Torvalds **
35181da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
35191da177e4SLinus Torvalds ** blocks for a given transaction on disk
35201da177e4SLinus Torvalds **
35211da177e4SLinus Torvalds */
3522bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3523bd4c625cSLinus Torvalds {
35241da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
35251da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
35261da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
35271da177e4SLinus Torvalds 	int can_dirty = 1;
35281da177e4SLinus Torvalds 
35291da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
35301da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
35311da177e4SLinus Torvalds 	 ** to disk right now.
35321da177e4SLinus Torvalds 	 */
35331da177e4SLinus Torvalds 	while (cur && can_dirty) {
35341da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
35351da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
35361da177e4SLinus Torvalds 			can_dirty = 0;
35371da177e4SLinus Torvalds 		}
35381da177e4SLinus Torvalds 		cur = cur->hprev;
35391da177e4SLinus Torvalds 	}
35401da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
35411da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
35421da177e4SLinus Torvalds 	 */
35431da177e4SLinus Torvalds 	cur = cn->hnext;
35441da177e4SLinus Torvalds 	while (cur && can_dirty) {
35451da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
35461da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
35471da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
35481da177e4SLinus Torvalds 			can_dirty = 0;
35491da177e4SLinus Torvalds 		}
35501da177e4SLinus Torvalds 		cur = cur->hnext;
35511da177e4SLinus Torvalds 	}
35521da177e4SLinus Torvalds 	return can_dirty;
35531da177e4SLinus Torvalds }
35541da177e4SLinus Torvalds 
35551da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
35560779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
35571da177e4SLinus Torvalds */
3558bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3559a9dd3643SJeff Mahoney 		     struct super_block *sb, unsigned long nblocks)
3560bd4c625cSLinus Torvalds {
3561a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35621da177e4SLinus Torvalds 
35631da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35641da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
356514a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
35661da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3567a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3568bd4c625cSLinus Torvalds 					     1);
3569a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
35701da177e4SLinus Torvalds 	}
3571a9dd3643SJeff Mahoney 	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
35721da177e4SLinus Torvalds }
35731da177e4SLinus Torvalds 
35741da177e4SLinus Torvalds /*
35751da177e4SLinus Torvalds ** writeback the pending async commits to disk
35761da177e4SLinus Torvalds */
3577c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3578bd4c625cSLinus Torvalds {
3579c4028958SDavid Howells 	struct reiserfs_journal *journal =
3580c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3581a9dd3643SJeff Mahoney 	struct super_block *sb = journal->j_work_sb;
35821da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
35831da177e4SLinus Torvalds 	struct list_head *entry;
35841da177e4SLinus Torvalds 
35858ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
35861da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
35871da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
35881da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
35891da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
3590a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
35911da177e4SLinus Torvalds 	}
35928ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
35931da177e4SLinus Torvalds }
35941da177e4SLinus Torvalds 
35951da177e4SLinus Torvalds /*
35961da177e4SLinus Torvalds ** flushes any old transactions to disk
35971da177e4SLinus Torvalds ** ends the current transaction if it is too old
35981da177e4SLinus Torvalds */
3599a9dd3643SJeff Mahoney int reiserfs_flush_old_commits(struct super_block *sb)
3600bd4c625cSLinus Torvalds {
36011da177e4SLinus Torvalds 	time_t now;
36021da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
3603a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36041da177e4SLinus Torvalds 
36051da177e4SLinus Torvalds 	now = get_seconds();
36061da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
36071da177e4SLinus Torvalds 	 * mount
36081da177e4SLinus Torvalds 	 */
36091da177e4SLinus Torvalds 	if (list_empty(&journal->j_journal_list)) {
36101da177e4SLinus Torvalds 		return 0;
36111da177e4SLinus Torvalds 	}
36121da177e4SLinus Torvalds 
36131da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
36141da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
36151da177e4SLinus Torvalds 	 */
36161da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
36171da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
36181da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3619bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3620a9dd3643SJeff Mahoney 		if (!journal_join(&th, sb, 1)) {
3621a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
3622a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
3623bd4c625cSLinus Torvalds 						     1);
3624a9dd3643SJeff Mahoney 			journal_mark_dirty(&th, sb,
3625a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
36261da177e4SLinus Torvalds 
36271da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
36281da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
36291da177e4SLinus Torvalds 			 */
3630a9dd3643SJeff Mahoney 			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
36311da177e4SLinus Torvalds 		}
36321da177e4SLinus Torvalds 	}
3633a9dd3643SJeff Mahoney 	return sb->s_dirt;
36341da177e4SLinus Torvalds }
36351da177e4SLinus Torvalds 
36361da177e4SLinus Torvalds /*
36371da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
36381da177e4SLinus Torvalds **
36391da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
36401da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
36411da177e4SLinus Torvalds ** flushes the commit list and returns 0.
36421da177e4SLinus Torvalds **
36431da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
36441da177e4SLinus Torvalds **
36451da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
36461da177e4SLinus Torvalds */
3647bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3648a9dd3643SJeff Mahoney 			     struct super_block *sb, unsigned long nblocks,
3649bd4c625cSLinus Torvalds 			     int flags)
3650bd4c625cSLinus Torvalds {
36511da177e4SLinus Torvalds 
36521da177e4SLinus Torvalds 	time_t now;
36531da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
36541da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
36551da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
36561da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
3657a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36581da177e4SLinus Torvalds 
36591da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
36601da177e4SLinus Torvalds 
36611da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3662c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3663c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
36641da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
36651da177e4SLinus Torvalds 	}
36661da177e4SLinus Torvalds 
36671da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
36681da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
36691da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
36701da177e4SLinus Torvalds 	}
36711da177e4SLinus Torvalds 
36721da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
36731da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
36741da177e4SLinus Torvalds 	 ** care of in this trans
36751da177e4SLinus Torvalds 	 */
367614a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
367714a61442SEric Sesterhenn 
36781da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
36791da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
36801da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
36811da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
36821da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
36831da177e4SLinus Torvalds 	 */
36841da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
36851da177e4SLinus Torvalds 		if (flush || commit_now) {
36861da177e4SLinus Torvalds 			unsigned trans_id;
36871da177e4SLinus Torvalds 
36881da177e4SLinus Torvalds 			jl = journal->j_current_jl;
36891da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
36901da177e4SLinus Torvalds 			if (wait_on_commit)
36911da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
36921da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
36931da177e4SLinus Torvalds 			if (flush) {
36941da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
36951da177e4SLinus Torvalds 			}
3696a9dd3643SJeff Mahoney 			unlock_journal(sb);
36971da177e4SLinus Torvalds 
36981da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
36991da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
37001da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
3701a9dd3643SJeff Mahoney 					queue_log_writer(sb);
37021da177e4SLinus Torvalds 				} else {
3703a9dd3643SJeff Mahoney 					lock_journal(sb);
37041da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3705bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3706bd4c625cSLinus Torvalds 							   1);
37071da177e4SLinus Torvalds 					}
3708a9dd3643SJeff Mahoney 					unlock_journal(sb);
37091da177e4SLinus Torvalds 				}
37101da177e4SLinus Torvalds 			}
371114a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
371214a61442SEric Sesterhenn 
3713bd4c625cSLinus Torvalds 			if (commit_now
3714a9dd3643SJeff Mahoney 			    && journal_list_still_alive(sb, trans_id)
3715bd4c625cSLinus Torvalds 			    && wait_on_commit) {
3716a9dd3643SJeff Mahoney 				flush_commit_list(sb, jl, 1);
37171da177e4SLinus Torvalds 			}
37181da177e4SLinus Torvalds 			return 0;
37191da177e4SLinus Torvalds 		}
3720a9dd3643SJeff Mahoney 		unlock_journal(sb);
37211da177e4SLinus Torvalds 		return 0;
37221da177e4SLinus Torvalds 	}
37231da177e4SLinus Torvalds 
37241da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
37251da177e4SLinus Torvalds 	now = get_seconds();
37261da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
37271da177e4SLinus Torvalds 		commit_now = 1;
37281da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
37291da177e4SLinus Torvalds 	}
37301da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
37311da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3732bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3733bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3734bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3735bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
37361da177e4SLinus Torvalds 		journal->j_bcount++;
3737a9dd3643SJeff Mahoney 		unlock_journal(sb);
37381da177e4SLinus Torvalds 		return 0;
37391da177e4SLinus Torvalds 	}
37401da177e4SLinus Torvalds 
3741a9dd3643SJeff Mahoney 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3742a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-003",
3743c3a9c210SJeff Mahoney 			       "j_start (%ld) is too high",
3744bd4c625cSLinus Torvalds 			       journal->j_start);
37451da177e4SLinus Torvalds 	}
37461da177e4SLinus Torvalds 	return 1;
37471da177e4SLinus Torvalds }
37481da177e4SLinus Torvalds 
37491da177e4SLinus Torvalds /*
37501da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
37511da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
37521da177e4SLinus Torvalds **
37531da177e4SLinus Torvalds ** otherwise:
37541da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
37551da177e4SLinus Torvalds ** before this transaction has finished.
37561da177e4SLinus Torvalds **
37571da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
37581da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
37591da177e4SLinus Torvalds ** the block can't be reallocated yet.
37601da177e4SLinus Torvalds **
37611da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
37621da177e4SLinus Torvalds */
3763bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3764a9dd3643SJeff Mahoney 		       struct super_block *sb, b_blocknr_t blocknr)
3765bd4c625cSLinus Torvalds {
3766a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37671da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
37681da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
37691da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
37701da177e4SLinus Torvalds 	int cleaned = 0;
37711da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
37721da177e4SLinus Torvalds 
3773a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
37741da177e4SLinus Torvalds 	if (cn && cn->bh) {
37751da177e4SLinus Torvalds 		bh = cn->bh;
37761da177e4SLinus Torvalds 		get_bh(bh);
37771da177e4SLinus Torvalds 	}
37781da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
37791da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
37801da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
37811da177e4SLinus Torvalds 		clear_prepared_bits(bh);
37821da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
3783a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
37841da177e4SLinus Torvalds 	} else {
37851da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
37861da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
37871da177e4SLinus Torvalds 		if (!jb) {
3788a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-1702",
3789c3a9c210SJeff Mahoney 				       "journal_list_bitmap is NULL");
37901da177e4SLinus Torvalds 		}
3791a9dd3643SJeff Mahoney 		set_bit_in_list_bitmap(sb, blocknr, jb);
37921da177e4SLinus Torvalds 
37931da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
37941da177e4SLinus Torvalds 
37951da177e4SLinus Torvalds 		if (bh) {
37961da177e4SLinus Torvalds 			clear_prepared_bits(bh);
37971da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
37981da177e4SLinus Torvalds 		}
3799a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
38001da177e4SLinus Torvalds 
38011da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3802a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3803bd4c625cSLinus Torvalds 					  blocknr);
38041da177e4SLinus Torvalds 		while (cn) {
3805a9dd3643SJeff Mahoney 			if (sb == cn->sb && blocknr == cn->blocknr) {
38061da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
38071da177e4SLinus Torvalds 				if (cn->bh) {
38081da177e4SLinus Torvalds 					if (!cleaned) {
38091da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
38101da177e4SLinus Torvalds 						 ** in the current trans
38111da177e4SLinus Torvalds 						 */
3812bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3813bd4c625cSLinus Torvalds 									   bh);
38141da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3815bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3816bd4c625cSLinus Torvalds 									  bh);
38171da177e4SLinus Torvalds 						cleaned = 1;
38181da177e4SLinus Torvalds 						put_bh(cn->bh);
3819bd4c625cSLinus Torvalds 						if (atomic_read
3820bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3821a9dd3643SJeff Mahoney 							reiserfs_warning(sb,
382245b03d5eSJeff Mahoney 								 "journal-2138",
382345b03d5eSJeff Mahoney 								 "cn->bh->b_count < 0");
38241da177e4SLinus Torvalds 						}
38251da177e4SLinus Torvalds 					}
38261da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3827bd4c625cSLinus Torvalds 						atomic_dec(&
3828bd4c625cSLinus Torvalds 							   (cn->jlist->
3829bd4c625cSLinus Torvalds 							    j_nonzerolen));
38301da177e4SLinus Torvalds 					}
38311da177e4SLinus Torvalds 					cn->bh = NULL;
38321da177e4SLinus Torvalds 				}
38331da177e4SLinus Torvalds 			}
38341da177e4SLinus Torvalds 			cn = cn->hnext;
38351da177e4SLinus Torvalds 		}
38361da177e4SLinus Torvalds 	}
38371da177e4SLinus Torvalds 
3838398c95bdSChris Mason 	if (bh)
3839398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
38401da177e4SLinus Torvalds 	return 0;
38411da177e4SLinus Torvalds }
38421da177e4SLinus Torvalds 
3843bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3844bd4c625cSLinus Torvalds {
38451da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
38461da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
38471da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
38481da177e4SLinus Torvalds }
38491da177e4SLinus Torvalds 
38501da177e4SLinus Torvalds /*
38511da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
38521da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
38531da177e4SLinus Torvalds  */
38541da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
38551da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
38561da177e4SLinus Torvalds {
38571da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
38581da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
38591da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
38601da177e4SLinus Torvalds 	int ret = 0;
38611da177e4SLinus Torvalds 
38621da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
38631da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
38641da177e4SLinus Torvalds 		jl = journal->j_current_jl;
38651da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
38661da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
38671da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
38681da177e4SLinus Torvalds 			goto flush_commit_only;
38691da177e4SLinus Torvalds 		}
38701da177e4SLinus Torvalds 
38711da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
38721da177e4SLinus Torvalds 		if (ret)
38731da177e4SLinus Torvalds 			return ret;
38741da177e4SLinus Torvalds 
38751da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
38761da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3877bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3878bd4c625cSLinus Torvalds 						     1);
38791da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
38801da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
38811da177e4SLinus Torvalds 			goto flush_commit_only;
38821da177e4SLinus Torvalds 		}
38831da177e4SLinus Torvalds 
38841da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
38851da177e4SLinus Torvalds 		if (!ret)
38861da177e4SLinus Torvalds 			ret = 1;
38871da177e4SLinus Torvalds 
38881da177e4SLinus Torvalds 	} else {
38891da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
38901da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
38911da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
38921da177e4SLinus Torvalds 		 */
38931da177e4SLinus Torvalds 	      flush_commit_only:
38941da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
38951da177e4SLinus Torvalds 			/*
38961da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
38971da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
38981da177e4SLinus Torvalds 			 * block.
38991da177e4SLinus Torvalds 			 */
39001da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
39011da177e4SLinus Torvalds 				ret = 1;
39021da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
39031da177e4SLinus Torvalds 			if (journal->j_errno)
39041da177e4SLinus Torvalds 				ret = journal->j_errno;
39051da177e4SLinus Torvalds 		}
39061da177e4SLinus Torvalds 	}
39071da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
39081da177e4SLinus Torvalds 	return ret;
39091da177e4SLinus Torvalds }
39101da177e4SLinus Torvalds 
3911bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3912bd4c625cSLinus Torvalds {
3913600ed416SJeff Mahoney 	unsigned int id = REISERFS_I(inode)->i_trans_id;
39141da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
39151da177e4SLinus Torvalds 
39161da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
39171da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
39181da177e4SLinus Torvalds 	 */
39191da177e4SLinus Torvalds 	if (!id || !jl) {
39201da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
39211da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
39221da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
39231da177e4SLinus Torvalds 	}
39241da177e4SLinus Torvalds 
39251da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
39261da177e4SLinus Torvalds }
39271da177e4SLinus Torvalds 
3928a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb,
3929bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3930bd4c625cSLinus Torvalds {
3931a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3932a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.restore_prepared);
39331da177e4SLinus Torvalds 	if (!bh) {
39341da177e4SLinus Torvalds 		return;
39351da177e4SLinus Torvalds 	}
39361da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
39371da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
39381da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
3939a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb,
39401da177e4SLinus Torvalds 					  journal->j_list_hash_table,
39411da177e4SLinus Torvalds 					  bh->b_blocknr);
39421da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
39431da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
39441da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
39451da177e4SLinus Torvalds 		}
39461da177e4SLinus Torvalds 	}
39471da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
39481da177e4SLinus Torvalds }
39491da177e4SLinus Torvalds 
39501da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
39511da177e4SLinus Torvalds /*
39521da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
39531da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
39541da177e4SLinus Torvalds ** clean it
39551da177e4SLinus Torvalds ** wait on it.
39561da177e4SLinus Torvalds **
39571da177e4SLinus Torvalds */
3958a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb,
3959bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3960bd4c625cSLinus Torvalds {
3961a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.prepare);
39621da177e4SLinus Torvalds 
3963ca5de404SNick Piggin 	if (!trylock_buffer(bh)) {
39641da177e4SLinus Torvalds 		if (!wait)
39651da177e4SLinus Torvalds 			return 0;
39661da177e4SLinus Torvalds 		lock_buffer(bh);
39671da177e4SLinus Torvalds 	}
39681da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
39691da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
39701da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
39711da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
39721da177e4SLinus Torvalds 	}
39731da177e4SLinus Torvalds 	unlock_buffer(bh);
39741da177e4SLinus Torvalds 	return 1;
39751da177e4SLinus Torvalds }
39761da177e4SLinus Torvalds 
3977bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3978bd4c625cSLinus Torvalds {
39791da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
39801da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
39811da177e4SLinus Torvalds 	struct list_head *entry;
39821da177e4SLinus Torvalds 	time_t now = get_seconds();
39831da177e4SLinus Torvalds 
39841da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
39851da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
39861da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
39871da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3988a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3989a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3990a3172027SChris Mason 		    test_transaction(s, jl)) {
39911da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
39921da177e4SLinus Torvalds 		} else {
39931da177e4SLinus Torvalds 			break;
39941da177e4SLinus Torvalds 		}
39951da177e4SLinus Torvalds 	}
39961da177e4SLinus Torvalds }
39971da177e4SLinus Torvalds 
39981da177e4SLinus Torvalds /*
39991da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
40001da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
40011da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
40021da177e4SLinus Torvalds **
40031da177e4SLinus Torvalds ** keep reading, there are comments as you go along
40041da177e4SLinus Torvalds **
40051da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
40061da177e4SLinus Torvalds ** journal lists, etc just won't happen.
40071da177e4SLinus Torvalds */
4008bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
4009a9dd3643SJeff Mahoney 			  struct super_block *sb, unsigned long nblocks,
4010bd4c625cSLinus Torvalds 			  int flags)
4011bd4c625cSLinus Torvalds {
4012a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
40131da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
40141da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
40151da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
40161da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
40171da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
40181da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
40191da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
40201da177e4SLinus Torvalds 	int old_start;
40211da177e4SLinus Torvalds 	int i;
4022a44c94a7SAlexander Zarochentsev 	int flush;
4023a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
40241da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
40251da177e4SLinus Torvalds 	struct list_head *entry, *safe;
40261da177e4SLinus Torvalds 	unsigned long jindex;
4027600ed416SJeff Mahoney 	unsigned int commit_trans_id;
40281da177e4SLinus Torvalds 	int trans_half;
40291da177e4SLinus Torvalds 
40301da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
40311da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
40321da177e4SLinus Torvalds 
4033a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
4034a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
4035600ed416SJeff Mahoney 	if (th->t_trans_id == ~0U)
4036a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
4037a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
4038a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
4039a44c94a7SAlexander Zarochentsev 
404022e2c507SJens Axboe 	put_fs_excl();
40411da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
4042a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end");
40431da177e4SLinus Torvalds 	if (journal->j_len == 0) {
4044a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
4045bd4c625cSLinus Torvalds 					     1);
4046a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
40471da177e4SLinus Torvalds 	}
40481da177e4SLinus Torvalds 
4049a9dd3643SJeff Mahoney 	lock_journal(sb);
40501da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
40511da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
40521da177e4SLinus Torvalds 		flush = 1;
40531da177e4SLinus Torvalds 	}
40541da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
40551da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
40561da177e4SLinus Torvalds 		wait_on_commit = 1;
40571da177e4SLinus Torvalds 	}
40581da177e4SLinus Torvalds 
40591da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
40601da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
40611da177e4SLinus Torvalds 	 */
4062a9dd3643SJeff Mahoney 	if (!check_journal_end(th, sb, nblocks, flags)) {
4063a9dd3643SJeff Mahoney 		sb->s_dirt = 1;
4064a9dd3643SJeff Mahoney 		wake_queued_writers(sb);
4065a9dd3643SJeff Mahoney 		reiserfs_async_progress_wait(sb);
40661da177e4SLinus Torvalds 		goto out;
40671da177e4SLinus Torvalds 	}
40681da177e4SLinus Torvalds 
40691da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
40701da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
40711da177e4SLinus Torvalds 		flush = 1;
40721da177e4SLinus Torvalds 	}
40731da177e4SLinus Torvalds 
40741da177e4SLinus Torvalds 	/*
40751da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
40761da177e4SLinus Torvalds 	 ** this transaction
40771da177e4SLinus Torvalds 	 */
40781da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
40791da177e4SLinus Torvalds 		flush = 1;
40801da177e4SLinus Torvalds 	}
40811da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
4082ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
4083ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
40841da177e4SLinus Torvalds 	current->journal_info = th;
4085ef43bc4fSJan Kara 	th->t_refcount++;
40861da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
40871da177e4SLinus Torvalds 						 * the transaction */
4088ef43bc4fSJan Kara 	th->t_refcount--;
40891da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
40901da177e4SLinus Torvalds #endif
40911da177e4SLinus Torvalds 
40921da177e4SLinus Torvalds 	/* setup description block */
4093bd4c625cSLinus Torvalds 	d_bh =
4094a9dd3643SJeff Mahoney 	    journal_getblk(sb,
4095a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4096bd4c625cSLinus Torvalds 			   journal->j_start);
40971da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
40981da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
40991da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
41001da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
41011da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41021da177e4SLinus Torvalds 
41031da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
4104a9dd3643SJeff Mahoney 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4105bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4106a9dd3643SJeff Mahoney 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
41071da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
41081da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
41091da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
41101da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
41111da177e4SLinus Torvalds 
41121da177e4SLinus Torvalds 	/* init this journal list */
41131da177e4SLinus Torvalds 	jl = journal->j_current_jl;
41141da177e4SLinus Torvalds 
41151da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
41161da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
41171da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
41181da177e4SLinus Torvalds 	 * ordered bh list
41191da177e4SLinus Torvalds 	 */
41208ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
41211da177e4SLinus Torvalds 
41221da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
41231da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
41241da177e4SLinus Torvalds 
41251da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
41261da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
41271da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
41281da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
41291da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
41301da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
41311da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
41321da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
41331da177e4SLinus Torvalds 	jl->j_realblock = NULL;
41341da177e4SLinus Torvalds 
41351da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
41361da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
41371da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
41381da177e4SLinus Torvalds 	 */
4139a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
41401da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
41411da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
4142a9dd3643SJeff Mahoney 			jl_cn = get_cnode(sb);
41431da177e4SLinus Torvalds 			if (!jl_cn) {
4144a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-1676",
4145c3a9c210SJeff Mahoney 					       "get_cnode returned NULL");
41461da177e4SLinus Torvalds 			}
41471da177e4SLinus Torvalds 			if (i == 0) {
41481da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
41491da177e4SLinus Torvalds 			}
41501da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
41511da177e4SLinus Torvalds 			jl_cn->next = NULL;
41521da177e4SLinus Torvalds 			if (last_cn) {
41531da177e4SLinus Torvalds 				last_cn->next = jl_cn;
41541da177e4SLinus Torvalds 			}
41551da177e4SLinus Torvalds 			last_cn = jl_cn;
41561da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
41571da177e4SLinus Torvalds 			   of journal or reserved area */
41581da177e4SLinus Torvalds 
4159bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4160a9dd3643SJeff Mahoney 			    (sb, cn->bh->b_blocknr)) {
4161a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-2332",
4162c3a9c210SJeff Mahoney 					       "Trying to log block %lu, "
4163c3a9c210SJeff Mahoney 					       "which is a log block",
4164bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
41651da177e4SLinus Torvalds 			}
41661da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
41671da177e4SLinus Torvalds 			jl_cn->state = 0;
4168a9dd3643SJeff Mahoney 			jl_cn->sb = sb;
41691da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
41701da177e4SLinus Torvalds 			jl_cn->jlist = jl;
41711da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
41721da177e4SLinus Torvalds 			if (i < trans_half) {
4173bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4174bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41751da177e4SLinus Torvalds 			} else {
4176bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4177bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41781da177e4SLinus Torvalds 			}
41791da177e4SLinus Torvalds 		} else {
41801da177e4SLinus Torvalds 			i--;
41811da177e4SLinus Torvalds 		}
41821da177e4SLinus Torvalds 	}
41831da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
41841da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
41851da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41861da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
41871da177e4SLinus Torvalds 
41881da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
418914a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
41901da177e4SLinus Torvalds 
41911da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
41921da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
41931da177e4SLinus Torvalds 	 * others are on disk
41941da177e4SLinus Torvalds 	 */
41951da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
41961da177e4SLinus Torvalds 
41971da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
41981da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
41991da177e4SLinus Torvalds 	cn = journal->j_first;
42001da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
42011da177e4SLinus Torvalds 	while (cn) {
42021da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
42031da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
42041da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
42051da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
42061da177e4SLinus Torvalds 			char *addr;
42071da177e4SLinus Torvalds 			struct page *page;
4208bd4c625cSLinus Torvalds 			tmp_bh =
4209a9dd3643SJeff Mahoney 			    journal_getblk(sb,
4210a9dd3643SJeff Mahoney 					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4211bd4c625cSLinus Torvalds 					   ((cur_write_start +
4212bd4c625cSLinus Torvalds 					     jindex) %
4213a9dd3643SJeff Mahoney 					    SB_ONDISK_JOURNAL_SIZE(sb)));
42141da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
42151da177e4SLinus Torvalds 			page = cn->bh->b_page;
42161da177e4SLinus Torvalds 			addr = kmap(page);
4217bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4218bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
42191da177e4SLinus Torvalds 			       cn->bh->b_size);
42201da177e4SLinus Torvalds 			kunmap(page);
42211da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
42221da177e4SLinus Torvalds 			jindex++;
42231da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
42241da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
42251da177e4SLinus Torvalds 		} else {
42261da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4227a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2048",
422845b03d5eSJeff Mahoney 					 "BAD, buffer in journal hash, "
422945b03d5eSJeff Mahoney 					 "but not JDirty!");
42301da177e4SLinus Torvalds 			brelse(cn->bh);
42311da177e4SLinus Torvalds 		}
42321da177e4SLinus Torvalds 		next = cn->next;
4233a9dd3643SJeff Mahoney 		free_cnode(sb, cn);
42341da177e4SLinus Torvalds 		cn = next;
42351da177e4SLinus Torvalds 		cond_resched();
42361da177e4SLinus Torvalds 	}
42371da177e4SLinus Torvalds 
42381da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
42391da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
42401da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
42411da177e4SLinus Torvalds 	 */
42421da177e4SLinus Torvalds 
4243a9dd3643SJeff Mahoney 	journal->j_current_jl = alloc_journal_list(sb);
42441da177e4SLinus Torvalds 
42451da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
42461da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
42471da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
42481da177e4SLinus Torvalds 	journal->j_num_work_lists++;
42491da177e4SLinus Torvalds 
42501da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
42511da177e4SLinus Torvalds 	old_start = journal->j_start;
4252bd4c625cSLinus Torvalds 	journal->j_start =
4253bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4254a9dd3643SJeff Mahoney 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
42551da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
42561da177e4SLinus Torvalds 	journal->j_bcount = 0;
42571da177e4SLinus Torvalds 	journal->j_last = NULL;
42581da177e4SLinus Torvalds 	journal->j_first = NULL;
42591da177e4SLinus Torvalds 	journal->j_len = 0;
42601da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4261a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4262a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4263a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
42641da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
42651da177e4SLinus Torvalds 	journal->j_must_wait = 0;
42661da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
42671da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
42681da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
4269a9dd3643SJeff Mahoney 	init_journal_hash(sb);
42701da177e4SLinus Torvalds 
42711da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
42721da177e4SLinus Torvalds 	// write out the tails
42731da177e4SLinus Torvalds 	smp_mb();
42741da177e4SLinus Torvalds 
42751da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
42761da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
42771da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
42781da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
42791da177e4SLinus Torvalds 	 * is lost.
42801da177e4SLinus Torvalds 	 */
42811da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
42828ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
42831da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
42841da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
42858ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
42861da177e4SLinus Torvalds 	}
428714a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
428890415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
42891da177e4SLinus Torvalds 
42901da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
42911da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
42921da177e4SLinus Torvalds 	 **
42931da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
42941da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
42951da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
42961da177e4SLinus Torvalds 	 */
42971da177e4SLinus Torvalds 	if (flush) {
4298a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
4299a9dd3643SJeff Mahoney 		flush_journal_list(sb, jl, 1);
43001da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
43011da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
43021da177e4SLinus Torvalds 
43031da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
43041da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
43051da177e4SLinus Torvalds 	 ** old flush them as well.
43061da177e4SLinus Torvalds 	 */
43071da177e4SLinus Torvalds       first_jl:
43081da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
43091da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
43101da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
43111da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4312bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4313a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
43141da177e4SLinus Torvalds 				goto first_jl;
43151da177e4SLinus Torvalds 			} else if ((journal->j_start +
43161da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4317a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
43181da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
43191da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
43201da177e4SLinus Torvalds 				 * break now
43211da177e4SLinus Torvalds 				 */
43221da177e4SLinus Torvalds 				break;
43231da177e4SLinus Torvalds 			}
43241da177e4SLinus Torvalds 		} else if ((journal->j_start +
43251da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4326a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_SIZE(sb)) {
43271da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4328a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
4329bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4330a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
43311da177e4SLinus Torvalds 				goto first_jl;
43321da177e4SLinus Torvalds 			} else {
43331da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
43341da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
43351da177e4SLinus Torvalds 				 * the start of the log.  We can break
43361da177e4SLinus Torvalds 				 */
43371da177e4SLinus Torvalds 				break;
43381da177e4SLinus Torvalds 			}
43391da177e4SLinus Torvalds 		}
43401da177e4SLinus Torvalds 	}
4341a9dd3643SJeff Mahoney 	flush_old_journal_lists(sb);
43421da177e4SLinus Torvalds 
4343bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4344a9dd3643SJeff Mahoney 	    get_list_bitmap(sb, journal->j_current_jl);
43451da177e4SLinus Torvalds 
43461da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4347a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-1996",
4348c3a9c210SJeff Mahoney 			       "could not get a list bitmap");
43491da177e4SLinus Torvalds 	}
43501da177e4SLinus Torvalds 
43511da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
4352a9dd3643SJeff Mahoney 	unlock_journal(sb);
43531da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
43541da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
43551da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
43561da177e4SLinus Torvalds 
43571da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
4358a9dd3643SJeff Mahoney 	    journal_list_still_alive(sb, commit_trans_id)) {
4359a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
43601da177e4SLinus Torvalds 	}
43611da177e4SLinus Torvalds       out:
4362a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end2");
43631da177e4SLinus Torvalds 
43641da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
43651da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
43661da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
43671da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
4368a9dd3643SJeff Mahoney 	th->t_super = sb;
43691da177e4SLinus Torvalds 
43701da177e4SLinus Torvalds 	return journal->j_errno;
43711da177e4SLinus Torvalds }
43721da177e4SLinus Torvalds 
437332e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */
437432e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno)
43751da177e4SLinus Torvalds {
43761da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
43771da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
43781da177e4SLinus Torvalds 		return;
43791da177e4SLinus Torvalds 
438032e8b106SJeff Mahoney 	if (!journal->j_errno)
438132e8b106SJeff Mahoney 		journal->j_errno = errno;
43821da177e4SLinus Torvalds 
43831da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
43841da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
43851da177e4SLinus Torvalds 
43861da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
43871da177e4SLinus Torvalds 	dump_stack();
43881da177e4SLinus Torvalds #endif
43891da177e4SLinus Torvalds }
43901da177e4SLinus Torvalds 
4391