xref: /openbmc/linux/fs/reiserfs/journal.c (revision 5a0e3ad6)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
41da177e4SLinus Torvalds ** The background commits make this code very interelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <linux/time.h>
386188e10dSMatthew Wilcox #include <linux/semaphore.h>
391da177e4SLinus Torvalds #include <linux/vmalloc.h>
401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
411da177e4SLinus Torvalds #include <linux/kernel.h>
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/fcntl.h>
441da177e4SLinus Torvalds #include <linux/stat.h>
451da177e4SLinus Torvalds #include <linux/string.h>
461da177e4SLinus Torvalds #include <linux/smp_lock.h>
471da177e4SLinus Torvalds #include <linux/buffer_head.h>
481da177e4SLinus Torvalds #include <linux/workqueue.h>
491da177e4SLinus Torvalds #include <linux/writeback.h>
501da177e4SLinus Torvalds #include <linux/blkdev.h>
513fcfab16SAndrew Morton #include <linux/backing-dev.h>
5290415deaSJeff Mahoney #include <linux/uaccess.h>
535a0e3ad6STejun Heo #include <linux/slab.h>
5490415deaSJeff Mahoney 
5590415deaSJeff Mahoney #include <asm/system.h>
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
581da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
591da177e4SLinus Torvalds                                j_list))
601da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
611da177e4SLinus Torvalds                                j_working_list))
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
641da177e4SLinus Torvalds ** start and kill the commit workqueue
651da177e4SLinus Torvalds */
661da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
711da177e4SLinus Torvalds 				   structs at 4k */
721da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
771da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
801da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds /* journal list state bits */
831da177e4SLinus Torvalds #define LIST_TOUCHED 1
841da177e4SLinus Torvalds #define LIST_DIRTY   2
851da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds /* flags for do_journal_end */
881da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
891da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
901da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
911da177e4SLinus Torvalds 
92bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
93bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
94bd4c625cSLinus Torvalds 			  int flags);
95bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
96bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
97bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
98bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
991da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
100bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
101a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks);
1021da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1031da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1041da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1051da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
106c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1071da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1101da177e4SLinus Torvalds enum {
1111da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1121da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1131da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1141da177e4SLinus Torvalds };
1151da177e4SLinus Torvalds 
1161da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
117a9dd3643SJeff Mahoney 			      struct super_block *sb,
1181da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1191da177e4SLinus Torvalds 
120a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb)
121bd4c625cSLinus Torvalds {
122a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
123bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
124bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1251da177e4SLinus Torvalds }
1261da177e4SLinus Torvalds 
1271da177e4SLinus Torvalds /*
1281da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1291da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1301da177e4SLinus Torvalds ** more details.
1311da177e4SLinus Torvalds */
132bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
133bd4c625cSLinus Torvalds {
1341da177e4SLinus Torvalds 	if (bh) {
1351da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1361da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1371da177e4SLinus Torvalds 	}
1381da177e4SLinus Torvalds 	return 0;
1391da177e4SLinus Torvalds }
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds static void disable_barrier(struct super_block *s)
1421da177e4SLinus Torvalds {
1431da177e4SLinus Torvalds 	REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
144bd4c625cSLinus Torvalds 	printk("reiserfs: disabling flush barriers on %s\n",
145bd4c625cSLinus Torvalds 	       reiserfs_bdevname(s));
1461da177e4SLinus Torvalds }
1471da177e4SLinus Torvalds 
148bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
149a9dd3643SJeff Mahoney 							 *sb)
150bd4c625cSLinus Torvalds {
1511da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1521da177e4SLinus Torvalds 	static int id;
1531da177e4SLinus Torvalds 
154d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1551da177e4SLinus Torvalds 	if (!bn) {
1561da177e4SLinus Torvalds 		return NULL;
1571da177e4SLinus Torvalds 	}
158a9dd3643SJeff Mahoney 	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
1591da177e4SLinus Torvalds 	if (!bn->data) {
160d739b42bSPekka Enberg 		kfree(bn);
1611da177e4SLinus Torvalds 		return NULL;
1621da177e4SLinus Torvalds 	}
1631da177e4SLinus Torvalds 	bn->id = id++;
1641da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1651da177e4SLinus Torvalds 	return bn;
1661da177e4SLinus Torvalds }
1671da177e4SLinus Torvalds 
168a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
169bd4c625cSLinus Torvalds {
170a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1711da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1721da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1731da177e4SLinus Torvalds 
1741da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1751da177e4SLinus Torvalds       repeat:
1761da177e4SLinus Torvalds 
1771da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1781da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1791da177e4SLinus Torvalds 		list_del(entry);
180a9dd3643SJeff Mahoney 		memset(bn->data, 0, sb->s_blocksize);
1811da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1821da177e4SLinus Torvalds 		return bn;
1831da177e4SLinus Torvalds 	}
184a9dd3643SJeff Mahoney 	bn = allocate_bitmap_node(sb);
1851da177e4SLinus Torvalds 	if (!bn) {
1861da177e4SLinus Torvalds 		yield();
1871da177e4SLinus Torvalds 		goto repeat;
1881da177e4SLinus Torvalds 	}
1891da177e4SLinus Torvalds 	return bn;
1901da177e4SLinus Torvalds }
191a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb,
192bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
193bd4c625cSLinus Torvalds {
194a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1951da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1961da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
197d739b42bSPekka Enberg 		kfree(bn->data);
198d739b42bSPekka Enberg 		kfree(bn);
1991da177e4SLinus Torvalds 	} else {
2001da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
2011da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
2021da177e4SLinus Torvalds 	}
2031da177e4SLinus Torvalds }
2041da177e4SLinus Torvalds 
205a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb)
206bd4c625cSLinus Torvalds {
2071da177e4SLinus Torvalds 	int i;
208a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2091da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2101da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
211a9dd3643SJeff Mahoney 		bn = allocate_bitmap_node(sb);
2121da177e4SLinus Torvalds 		if (bn) {
2131da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2141da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2151da177e4SLinus Torvalds 		} else {
2160222e657SJeff Mahoney 			break;	/* this is ok, we'll try again when more are needed */
2171da177e4SLinus Torvalds 		}
2181da177e4SLinus Torvalds 	}
2191da177e4SLinus Torvalds }
2201da177e4SLinus Torvalds 
221a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb,
2223ee16670SJeff Mahoney 				  b_blocknr_t block,
223bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
224bd4c625cSLinus Torvalds {
225a9dd3643SJeff Mahoney 	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
226a9dd3643SJeff Mahoney 	unsigned int bit_nr = block % (sb->s_blocksize << 3);
2271da177e4SLinus Torvalds 
2281da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
229a9dd3643SJeff Mahoney 		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
2301da177e4SLinus Torvalds 	}
2311da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2321da177e4SLinus Torvalds 	return 0;
2331da177e4SLinus Torvalds }
2341da177e4SLinus Torvalds 
235a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb,
236bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
237bd4c625cSLinus Torvalds {
2381da177e4SLinus Torvalds 	int i;
2391da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2401da177e4SLinus Torvalds 		return;
2411da177e4SLinus Torvalds 
242a9dd3643SJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
2431da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
244a9dd3643SJeff Mahoney 			free_bitmap_node(sb, jb->bitmaps[i]);
2451da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2461da177e4SLinus Torvalds 		}
2471da177e4SLinus Torvalds 	}
2481da177e4SLinus Torvalds }
2491da177e4SLinus Torvalds 
2501da177e4SLinus Torvalds /*
2511da177e4SLinus Torvalds ** only call this on FS unmount.
2521da177e4SLinus Torvalds */
253a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb,
254bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
255bd4c625cSLinus Torvalds {
2561da177e4SLinus Torvalds 	int i;
2571da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2581da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2591da177e4SLinus Torvalds 		jb = jb_array + i;
2601da177e4SLinus Torvalds 		jb->journal_list = NULL;
261a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
2621da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2631da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2641da177e4SLinus Torvalds 	}
2651da177e4SLinus Torvalds 	return 0;
2661da177e4SLinus Torvalds }
2671da177e4SLinus Torvalds 
268a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb)
269bd4c625cSLinus Torvalds {
270a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2711da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2721da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2751da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2761da177e4SLinus Torvalds 		list_del(next);
277d739b42bSPekka Enberg 		kfree(bn->data);
278d739b42bSPekka Enberg 		kfree(bn);
2791da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2801da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2811da177e4SLinus Torvalds 	}
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds 	return 0;
2841da177e4SLinus Torvalds }
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds /*
2871da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2881da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2891da177e4SLinus Torvalds */
290a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb,
2911da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2923ee16670SJeff Mahoney 				   unsigned int bmap_nr)
293bd4c625cSLinus Torvalds {
2941da177e4SLinus Torvalds 	int i;
2951da177e4SLinus Torvalds 	int failed = 0;
2961da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2971da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
3001da177e4SLinus Torvalds 		jb = jb_array + i;
3011da177e4SLinus Torvalds 		jb->journal_list = NULL;
3021da177e4SLinus Torvalds 		jb->bitmaps = vmalloc(mem);
3031da177e4SLinus Torvalds 		if (!jb->bitmaps) {
304a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2000", "unable to "
30545b03d5eSJeff Mahoney 					 "allocate bitmaps for journal lists");
3061da177e4SLinus Torvalds 			failed = 1;
3071da177e4SLinus Torvalds 			break;
3081da177e4SLinus Torvalds 		}
3091da177e4SLinus Torvalds 		memset(jb->bitmaps, 0, mem);
3101da177e4SLinus Torvalds 	}
3111da177e4SLinus Torvalds 	if (failed) {
312a9dd3643SJeff Mahoney 		free_list_bitmaps(sb, jb_array);
3131da177e4SLinus Torvalds 		return -1;
3141da177e4SLinus Torvalds 	}
3151da177e4SLinus Torvalds 	return 0;
3161da177e4SLinus Torvalds }
3171da177e4SLinus Torvalds 
3181da177e4SLinus Torvalds /*
3191da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3201da177e4SLinus Torvalds ** and try again
3211da177e4SLinus Torvalds */
322a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
323bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
324bd4c625cSLinus Torvalds 						    *jl)
325bd4c625cSLinus Torvalds {
3261da177e4SLinus Torvalds 	int i, j;
327a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3281da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3311da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3321da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3331da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3341da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
335a9dd3643SJeff Mahoney 			flush_commit_list(sb,
336bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
337bd4c625cSLinus Torvalds 					  journal_list, 1);
3381da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3391da177e4SLinus Torvalds 				break;
3401da177e4SLinus Torvalds 			}
3411da177e4SLinus Torvalds 		} else {
3421da177e4SLinus Torvalds 			break;
3431da177e4SLinus Torvalds 		}
3441da177e4SLinus Torvalds 	}
3451da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3461da177e4SLinus Torvalds 		return NULL;
3471da177e4SLinus Torvalds 	}
3481da177e4SLinus Torvalds 	jb->journal_list = jl;
3491da177e4SLinus Torvalds 	return jb;
3501da177e4SLinus Torvalds }
3511da177e4SLinus Torvalds 
3521da177e4SLinus Torvalds /*
3531da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3541da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3551da177e4SLinus Torvalds ** returns NULL on failure
3561da177e4SLinus Torvalds */
357bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
358bd4c625cSLinus Torvalds {
3591da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3601da177e4SLinus Torvalds 	int i;
3611da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3621da177e4SLinus Torvalds 		return NULL;
3631da177e4SLinus Torvalds 	}
3641da177e4SLinus Torvalds 	head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3651da177e4SLinus Torvalds 	if (!head) {
3661da177e4SLinus Torvalds 		return NULL;
3671da177e4SLinus Torvalds 	}
3681da177e4SLinus Torvalds 	memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
3691da177e4SLinus Torvalds 	head[0].prev = NULL;
3701da177e4SLinus Torvalds 	head[0].next = head + 1;
3711da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3721da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3731da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3741da177e4SLinus Torvalds 	}
3751da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3761da177e4SLinus Torvalds 	return head;
3771da177e4SLinus Torvalds }
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds /*
3801da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3811da177e4SLinus Torvalds */
382a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
383bd4c625cSLinus Torvalds {
3841da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
385a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3861da177e4SLinus Torvalds 
387a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "get_cnode");
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3901da177e4SLinus Torvalds 		return NULL;
3911da177e4SLinus Torvalds 	}
3921da177e4SLinus Torvalds 	journal->j_cnode_used++;
3931da177e4SLinus Torvalds 	journal->j_cnode_free--;
3941da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3951da177e4SLinus Torvalds 	if (!cn) {
3961da177e4SLinus Torvalds 		return cn;
3971da177e4SLinus Torvalds 	}
3981da177e4SLinus Torvalds 	if (cn->next) {
3991da177e4SLinus Torvalds 		cn->next->prev = NULL;
4001da177e4SLinus Torvalds 	}
4011da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
4021da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
4031da177e4SLinus Torvalds 	return cn;
4041da177e4SLinus Torvalds }
4051da177e4SLinus Torvalds 
4061da177e4SLinus Torvalds /*
4071da177e4SLinus Torvalds ** returns a cnode to the free list
4081da177e4SLinus Torvalds */
409a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb,
410bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
411bd4c625cSLinus Torvalds {
412a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4131da177e4SLinus Torvalds 
414a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "free_cnode");
4151da177e4SLinus Torvalds 
4161da177e4SLinus Torvalds 	journal->j_cnode_used--;
4171da177e4SLinus Torvalds 	journal->j_cnode_free++;
4181da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4191da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4201da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4211da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4221da177e4SLinus Torvalds 	}
4231da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4241da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4251da177e4SLinus Torvalds }
4261da177e4SLinus Torvalds 
427bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
428bd4c625cSLinus Torvalds {
4291da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4301da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4311da177e4SLinus Torvalds }
4321da177e4SLinus Torvalds 
4331da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
434bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
435bd4c625cSLinus Torvalds 								  super_block
436bd4c625cSLinus Torvalds 								  *sb,
437bd4c625cSLinus Torvalds 								  struct
438bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
439bd4c625cSLinus Torvalds 								  **table,
4401da177e4SLinus Torvalds 								  long bl)
4411da177e4SLinus Torvalds {
4421da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4431da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4441da177e4SLinus Torvalds 	while (cn) {
4451da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4461da177e4SLinus Torvalds 			return cn;
4471da177e4SLinus Torvalds 		cn = cn->hnext;
4481da177e4SLinus Torvalds 	}
4491da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4501da177e4SLinus Torvalds }
4511da177e4SLinus Torvalds 
4521da177e4SLinus Torvalds /*
4531da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4541da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4551da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4561da177e4SLinus Torvalds **
4571da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4581da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4591da177e4SLinus Torvalds ** sure you never write the block without logging it.
4601da177e4SLinus Torvalds **
4611da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4621da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4631da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4641da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4651da177e4SLinus Torvalds **
4661da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4671da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4681da177e4SLinus Torvalds **
4691da177e4SLinus Torvalds */
470a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb,
4713ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
472bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
473bd4c625cSLinus Torvalds {
474a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4751da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4761da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4771da177e4SLinus Torvalds 	int i;
4781da177e4SLinus Torvalds 	unsigned long bl;
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4811da177e4SLinus Torvalds 
482a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal);
4831da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
4841da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
4851da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
4861da177e4SLinus Torvalds 	 */
4871da177e4SLinus Torvalds 	if (search_all) {
4881da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
489a9dd3643SJeff Mahoney 			PROC_INFO_INC(sb, journal.in_journal_bitmap);
4901da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
4911da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
492bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
493bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
494bd4c625cSLinus Torvalds 				     data)) {
495bd4c625cSLinus Torvalds 				*next_zero_bit =
496bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
497bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
498bd4c625cSLinus Torvalds 							data),
499a9dd3643SJeff Mahoney 						       sb->s_blocksize << 3,
500bd4c625cSLinus Torvalds 						       bit_nr + 1);
5011da177e4SLinus Torvalds 				return 1;
5021da177e4SLinus Torvalds 			}
5031da177e4SLinus Torvalds 		}
5041da177e4SLinus Torvalds 	}
5051da177e4SLinus Torvalds 
506a9dd3643SJeff Mahoney 	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
5071da177e4SLinus Torvalds 	/* is it in any old transactions? */
508bd4c625cSLinus Torvalds 	if (search_all
509bd4c625cSLinus Torvalds 	    && (cn =
510a9dd3643SJeff Mahoney 		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
5111da177e4SLinus Torvalds 		return 1;
5121da177e4SLinus Torvalds 	}
5131da177e4SLinus Torvalds 
5141da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
515a9dd3643SJeff Mahoney 	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
5161da177e4SLinus Torvalds 		BUG();
5171da177e4SLinus Torvalds 		return 1;
5181da177e4SLinus Torvalds 	}
5191da177e4SLinus Torvalds 
520a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal_reusable);
5211da177e4SLinus Torvalds 	/* safe for reuse */
5221da177e4SLinus Torvalds 	return 0;
5231da177e4SLinus Torvalds }
5241da177e4SLinus Torvalds 
5251da177e4SLinus Torvalds /* insert cn into table
5261da177e4SLinus Torvalds */
527bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
528bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
529bd4c625cSLinus Torvalds {
5301da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5311da177e4SLinus Torvalds 
5321da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5331da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5341da177e4SLinus Torvalds 	cn->hprev = NULL;
5351da177e4SLinus Torvalds 	if (cn_orig) {
5361da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5371da177e4SLinus Torvalds 	}
5381da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5391da177e4SLinus Torvalds }
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds /* lock the current transaction */
542a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb)
543bd4c625cSLinus Torvalds {
544a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.lock_journal);
5458ebc4232SFrederic Weisbecker 
5468ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds 
5491da177e4SLinus Torvalds /* unlock the current transaction */
550a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb)
551bd4c625cSLinus Torvalds {
552a9dd3643SJeff Mahoney 	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
5531da177e4SLinus Torvalds }
5541da177e4SLinus Torvalds 
5551da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5561da177e4SLinus Torvalds {
5571da177e4SLinus Torvalds 	jl->j_refcount++;
5581da177e4SLinus Torvalds }
5591da177e4SLinus Torvalds 
5601da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5611da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5621da177e4SLinus Torvalds {
5631da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
564c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
565bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
5661da177e4SLinus Torvalds 	}
5671da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
568d739b42bSPekka Enberg 		kfree(jl);
5691da177e4SLinus Torvalds }
5701da177e4SLinus Torvalds 
5711da177e4SLinus Torvalds /*
5721da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
5731da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
5741da177e4SLinus Torvalds ** transaction.
5751da177e4SLinus Torvalds */
576a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb,
577bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
578bd4c625cSLinus Torvalds {
5791da177e4SLinus Torvalds 
5801da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
5811da177e4SLinus Torvalds 	if (jb) {
582a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
5831da177e4SLinus Torvalds 	}
5841da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
5851da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
5861da177e4SLinus Torvalds }
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
589600ed416SJeff Mahoney 				    unsigned int trans_id)
5901da177e4SLinus Torvalds {
5911da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
5921da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
5931da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	if (!list_empty(entry)) {
5961da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
5971da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
5981da177e4SLinus Torvalds 			return 1;
5991da177e4SLinus Torvalds 		}
6001da177e4SLinus Torvalds 	}
6011da177e4SLinus Torvalds 	return 0;
6021da177e4SLinus Torvalds }
6031da177e4SLinus Torvalds 
604398c95bdSChris Mason /*
605398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
606398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
607398c95bdSChris Mason  * logged via data=journal.
608398c95bdSChris Mason  *
609398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
610398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
611398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
612398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
613398c95bdSChris Mason  */
614398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
615398c95bdSChris Mason {
616398c95bdSChris Mason 	struct page *page = bh->b_page;
617529ae9aaSNick Piggin 	if (!page->mapping && trylock_page(page)) {
618398c95bdSChris Mason 		page_cache_get(page);
619398c95bdSChris Mason 		put_bh(bh);
620398c95bdSChris Mason 		if (!page->mapping)
621398c95bdSChris Mason 			try_to_free_buffers(page);
622398c95bdSChris Mason 		unlock_page(page);
623398c95bdSChris Mason 		page_cache_release(page);
624398c95bdSChris Mason 	} else {
625398c95bdSChris Mason 		put_bh(bh);
626398c95bdSChris Mason 	}
627398c95bdSChris Mason }
628398c95bdSChris Mason 
629bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
630bd4c625cSLinus Torvalds {
6311da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6321da177e4SLinus Torvalds 
6331da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
63445b03d5eSJeff Mahoney 		reiserfs_warning(NULL, "clm-2084",
63545b03d5eSJeff Mahoney 				 "pinned buffer %lu:%s sent to disk",
6361da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 	if (uptodate)
6391da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6401da177e4SLinus Torvalds 	else
6411da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
642398c95bdSChris Mason 
6431da177e4SLinus Torvalds 	unlock_buffer(bh);
644398c95bdSChris Mason 	release_buffer_page(bh);
6451da177e4SLinus Torvalds }
6461da177e4SLinus Torvalds 
647bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
648bd4c625cSLinus Torvalds {
6491da177e4SLinus Torvalds 	if (uptodate)
6501da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6511da177e4SLinus Torvalds 	else
6521da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6531da177e4SLinus Torvalds 	unlock_buffer(bh);
6541da177e4SLinus Torvalds 	put_bh(bh);
6551da177e4SLinus Torvalds }
6561da177e4SLinus Torvalds 
657bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
658bd4c625cSLinus Torvalds {
6591da177e4SLinus Torvalds 	get_bh(bh);
6601da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6611da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6621da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6631da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6641da177e4SLinus Torvalds 		BUG();
6651da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6661da177e4SLinus Torvalds 		BUG();
6671da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6681da177e4SLinus Torvalds }
6691da177e4SLinus Torvalds 
670bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
671bd4c625cSLinus Torvalds {
6721da177e4SLinus Torvalds 	get_bh(bh);
6731da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6741da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6751da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6761da177e4SLinus Torvalds 		BUG();
6771da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6781da177e4SLinus Torvalds }
6791da177e4SLinus Torvalds 
680bd4c625cSLinus Torvalds static int submit_barrier_buffer(struct buffer_head *bh)
681bd4c625cSLinus Torvalds {
6821da177e4SLinus Torvalds 	get_bh(bh);
6831da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6841da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6851da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6861da177e4SLinus Torvalds 		BUG();
6871da177e4SLinus Torvalds 	return submit_bh(WRITE_BARRIER, bh);
6881da177e4SLinus Torvalds }
6891da177e4SLinus Torvalds 
6901da177e4SLinus Torvalds static void check_barrier_completion(struct super_block *s,
691bd4c625cSLinus Torvalds 				     struct buffer_head *bh)
692bd4c625cSLinus Torvalds {
6931da177e4SLinus Torvalds 	if (buffer_eopnotsupp(bh)) {
6941da177e4SLinus Torvalds 		clear_buffer_eopnotsupp(bh);
6951da177e4SLinus Torvalds 		disable_barrier(s);
6961da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6971da177e4SLinus Torvalds 		set_buffer_dirty(bh);
6988ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
6991da177e4SLinus Torvalds 		sync_dirty_buffer(bh);
7008ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
7011da177e4SLinus Torvalds 	}
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds #define CHUNK_SIZE 32
7051da177e4SLinus Torvalds struct buffer_chunk {
7061da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
7071da177e4SLinus Torvalds 	int nr;
7081da177e4SLinus Torvalds };
7091da177e4SLinus Torvalds 
710bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
711bd4c625cSLinus Torvalds {
7121da177e4SLinus Torvalds 	int i;
71322e2c507SJens Axboe 	get_fs_excl();
7141da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7151da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
7161da177e4SLinus Torvalds 	}
7171da177e4SLinus Torvalds 	chunk->nr = 0;
71822e2c507SJens Axboe 	put_fs_excl();
7191da177e4SLinus Torvalds }
7201da177e4SLinus Torvalds 
721bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
722bd4c625cSLinus Torvalds {
7231da177e4SLinus Torvalds 	int i;
72422e2c507SJens Axboe 	get_fs_excl();
7251da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7261da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
7271da177e4SLinus Torvalds 	}
7281da177e4SLinus Torvalds 	chunk->nr = 0;
72922e2c507SJens Axboe 	put_fs_excl();
7301da177e4SLinus Torvalds }
7311da177e4SLinus Torvalds 
7321da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
733bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
7341da177e4SLinus Torvalds {
7351da177e4SLinus Torvalds 	int ret = 0;
73614a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
7371da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
7381da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7391da177e4SLinus Torvalds 		ret = 1;
7401da177e4SLinus Torvalds 		if (lock)
7411da177e4SLinus Torvalds 			spin_unlock(lock);
7421da177e4SLinus Torvalds 		fn(chunk);
7431da177e4SLinus Torvalds 		if (lock)
7441da177e4SLinus Torvalds 			spin_lock(lock);
7451da177e4SLinus Torvalds 	}
7461da177e4SLinus Torvalds 	return ret;
7471da177e4SLinus Torvalds }
7481da177e4SLinus Torvalds 
7491da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
750bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
751bd4c625cSLinus Torvalds {
7521da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7531da177e4SLinus Torvalds 	while (1) {
7541da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7551da177e4SLinus Torvalds 		if (jh) {
7561da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7571da177e4SLinus Torvalds 			return jh;
7581da177e4SLinus Torvalds 		}
7591da177e4SLinus Torvalds 		yield();
7601da177e4SLinus Torvalds 	}
7611da177e4SLinus Torvalds }
7621da177e4SLinus Torvalds 
7631da177e4SLinus Torvalds /*
7641da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
7651da177e4SLinus Torvalds  * and waited on
7661da177e4SLinus Torvalds  */
767bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
768bd4c625cSLinus Torvalds {
7691da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7701da177e4SLinus Torvalds 
7711da177e4SLinus Torvalds 	jh = bh->b_private;
7721da177e4SLinus Torvalds 	if (jh) {
7731da177e4SLinus Torvalds 		bh->b_private = NULL;
7741da177e4SLinus Torvalds 		jh->bh = NULL;
7751da177e4SLinus Torvalds 		list_del_init(&jh->list);
7761da177e4SLinus Torvalds 		kfree(jh);
7771da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
7781da177e4SLinus Torvalds 			BUG();
7791da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
7801da177e4SLinus Torvalds 		put_bh(bh);
7811da177e4SLinus Torvalds 	}
7821da177e4SLinus Torvalds }
7831da177e4SLinus Torvalds 
7841da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
7851da177e4SLinus Torvalds 			   int tail)
7861da177e4SLinus Torvalds {
7871da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds 	if (bh->b_private) {
7901da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7911da177e4SLinus Torvalds 		if (!bh->b_private) {
7921da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
7931da177e4SLinus Torvalds 			goto no_jh;
7941da177e4SLinus Torvalds 		}
7951da177e4SLinus Torvalds 		jh = bh->b_private;
7961da177e4SLinus Torvalds 		list_del_init(&jh->list);
7971da177e4SLinus Torvalds 	} else {
7981da177e4SLinus Torvalds 	      no_jh:
7991da177e4SLinus Torvalds 		get_bh(bh);
8001da177e4SLinus Torvalds 		jh = alloc_jh();
8011da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
8021da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
8031da177e4SLinus Torvalds 		 * two adds at the same time
8041da177e4SLinus Torvalds 		 */
80514a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
8061da177e4SLinus Torvalds 		jh->bh = bh;
8071da177e4SLinus Torvalds 		bh->b_private = jh;
8081da177e4SLinus Torvalds 	}
8091da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
8101da177e4SLinus Torvalds 	if (tail)
8111da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
8121da177e4SLinus Torvalds 	else {
8131da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
8141da177e4SLinus Torvalds 	}
8151da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
8161da177e4SLinus Torvalds 	return 0;
8171da177e4SLinus Torvalds }
8181da177e4SLinus Torvalds 
819bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
820bd4c625cSLinus Torvalds {
8211da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
8221da177e4SLinus Torvalds }
823bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
824bd4c625cSLinus Torvalds {
8251da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
8261da177e4SLinus Torvalds }
8271da177e4SLinus Torvalds 
8281da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
8291da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
8301da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
8311da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
8321da177e4SLinus Torvalds 				 struct list_head *list)
8331da177e4SLinus Torvalds {
8341da177e4SLinus Torvalds 	struct buffer_head *bh;
8351da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8361da177e4SLinus Torvalds 	int ret = j->j_errno;
8371da177e4SLinus Torvalds 	struct buffer_chunk chunk;
8381da177e4SLinus Torvalds 	struct list_head tmp;
8391da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8401da177e4SLinus Torvalds 
8411da177e4SLinus Torvalds 	chunk.nr = 0;
8421da177e4SLinus Torvalds 	spin_lock(lock);
8431da177e4SLinus Torvalds 	while (!list_empty(list)) {
8441da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8451da177e4SLinus Torvalds 		bh = jh->bh;
8461da177e4SLinus Torvalds 		get_bh(bh);
847ca5de404SNick Piggin 		if (!trylock_buffer(bh)) {
8481da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
849f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8501da177e4SLinus Torvalds 				goto loop_next;
8511da177e4SLinus Torvalds 			}
8521da177e4SLinus Torvalds 			spin_unlock(lock);
8531da177e4SLinus Torvalds 			if (chunk.nr)
8541da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8551da177e4SLinus Torvalds 			wait_on_buffer(bh);
8561da177e4SLinus Torvalds 			cond_resched();
8571da177e4SLinus Torvalds 			spin_lock(lock);
8581da177e4SLinus Torvalds 			goto loop_next;
8591da177e4SLinus Torvalds 		}
8603d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8613d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8623d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8633d4492f8SChris Mason 		 */
8643d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
8653d4492f8SChris Mason 			clear_buffer_dirty(bh);
8663d4492f8SChris Mason 			ret = -EIO;
8673d4492f8SChris Mason 		}
8681da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
869f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
8701da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
8711da177e4SLinus Torvalds 		} else {
8721da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
8731da177e4SLinus Torvalds 			unlock_buffer(bh);
8741da177e4SLinus Torvalds 		}
8751da177e4SLinus Torvalds 	      loop_next:
8761da177e4SLinus Torvalds 		put_bh(bh);
8771da177e4SLinus Torvalds 		cond_resched_lock(lock);
8781da177e4SLinus Torvalds 	}
8791da177e4SLinus Torvalds 	if (chunk.nr) {
8801da177e4SLinus Torvalds 		spin_unlock(lock);
8811da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
8821da177e4SLinus Torvalds 		spin_lock(lock);
8831da177e4SLinus Torvalds 	}
8841da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
8851da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
8861da177e4SLinus Torvalds 		bh = jh->bh;
8871da177e4SLinus Torvalds 		get_bh(bh);
8881da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
8891da177e4SLinus Torvalds 
8901da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
8911da177e4SLinus Torvalds 			spin_unlock(lock);
8921da177e4SLinus Torvalds 			wait_on_buffer(bh);
8931da177e4SLinus Torvalds 			spin_lock(lock);
8941da177e4SLinus Torvalds 		}
8951da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
8961da177e4SLinus Torvalds 			ret = -EIO;
8971da177e4SLinus Torvalds 		}
898d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
899d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
900d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
901d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
902d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
903d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
904d62b1b87SChris Mason 		 * after freeing the journal header.
905d62b1b87SChris Mason 		 */
906d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
907d62b1b87SChris Mason 			spin_unlock(lock);
908d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
909d62b1b87SChris Mason 			spin_lock(lock);
910d62b1b87SChris Mason 		}
9111da177e4SLinus Torvalds 		put_bh(bh);
9121da177e4SLinus Torvalds 		cond_resched_lock(lock);
9131da177e4SLinus Torvalds 	}
9141da177e4SLinus Torvalds 	spin_unlock(lock);
9151da177e4SLinus Torvalds 	return ret;
9161da177e4SLinus Torvalds }
9171da177e4SLinus Torvalds 
918bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
919bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
920bd4c625cSLinus Torvalds {
9211da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9221da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
9231da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
9241da177e4SLinus Torvalds 	struct list_head *entry;
925600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
926600ed416SJeff Mahoney 	unsigned int other_trans_id;
927600ed416SJeff Mahoney 	unsigned int first_trans_id;
9281da177e4SLinus Torvalds 
9291da177e4SLinus Torvalds       find_first:
9301da177e4SLinus Torvalds 	/*
9311da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
9321da177e4SLinus Torvalds 	 */
9331da177e4SLinus Torvalds 	first_jl = jl;
9341da177e4SLinus Torvalds 	entry = jl->j_list.prev;
9351da177e4SLinus Torvalds 	while (1) {
9361da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9371da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
9381da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9391da177e4SLinus Torvalds 			break;
9401da177e4SLinus Torvalds 
9411da177e4SLinus Torvalds 		first_jl = other_jl;
9421da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9431da177e4SLinus Torvalds 	}
9441da177e4SLinus Torvalds 
9451da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9461da177e4SLinus Torvalds 	if (first_jl == jl) {
9471da177e4SLinus Torvalds 		return 0;
9481da177e4SLinus Torvalds 	}
9491da177e4SLinus Torvalds 
9501da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9511da177e4SLinus Torvalds 
9521da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9531da177e4SLinus Torvalds 	while (1) {
9541da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9551da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9561da177e4SLinus Torvalds 
9571da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9581da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9591da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9601da177e4SLinus Torvalds 
9611da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9621da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9631da177e4SLinus Torvalds 					return 1;
9641da177e4SLinus Torvalds 
9651da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
9661da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
9671da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
9681da177e4SLinus Torvalds 				 */
969bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
970bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
9711da177e4SLinus Torvalds 					goto find_first;
9721da177e4SLinus Torvalds 				}
9731da177e4SLinus Torvalds 			}
9741da177e4SLinus Torvalds 			entry = entry->next;
9751da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
9761da177e4SLinus Torvalds 				return 0;
9771da177e4SLinus Torvalds 		} else {
9781da177e4SLinus Torvalds 			return 0;
9791da177e4SLinus Torvalds 		}
9801da177e4SLinus Torvalds 	}
9811da177e4SLinus Torvalds 	return 0;
9821da177e4SLinus Torvalds }
983deba0f49SAdrian Bunk 
984deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
985bd4c625cSLinus Torvalds {
9861da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
9871da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
9888ebc4232SFrederic Weisbecker 
9898ebc4232SFrederic Weisbecker 	if (atomic_read(&j->j_async_throttle)) {
9908ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
9918aa7e847SJens Axboe 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
9928ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
9938ebc4232SFrederic Weisbecker 	}
9948ebc4232SFrederic Weisbecker 
9951da177e4SLinus Torvalds 	return 0;
9961da177e4SLinus Torvalds }
9971da177e4SLinus Torvalds 
9981da177e4SLinus Torvalds /*
9991da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
10001da177e4SLinus Torvalds **
10011da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
10021da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
10031da177e4SLinus Torvalds **
10041da177e4SLinus Torvalds */
1005bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
1006bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
1007bd4c625cSLinus Torvalds {
10081da177e4SLinus Torvalds 	int i;
10093ee16670SJeff Mahoney 	b_blocknr_t bn;
10101da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
1011600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
10121da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
10131da177e4SLinus Torvalds 	int barrier = 0;
10141da177e4SLinus Torvalds 	int retval = 0;
1015e0e851cfSChris Mason 	int write_len;
10161da177e4SLinus Torvalds 
10171da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
10181da177e4SLinus Torvalds 
10191da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
10201da177e4SLinus Torvalds 		return 0;
10211da177e4SLinus Torvalds 	}
10221da177e4SLinus Torvalds 
102322e2c507SJens Axboe 	get_fs_excl();
102422e2c507SJens Axboe 
10251da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
10261da177e4SLinus Torvalds 	 ** us is on disk too
10271da177e4SLinus Torvalds 	 */
10281da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
10291da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
10301da177e4SLinus Torvalds 
10311da177e4SLinus Torvalds 	get_journal_list(jl);
10321da177e4SLinus Torvalds 	if (flushall) {
10331da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
10341da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
10351da177e4SLinus Torvalds 			goto put_jl;
10361da177e4SLinus Torvalds 		}
10371da177e4SLinus Torvalds 	}
10381da177e4SLinus Torvalds 
10391da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
10408ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
10418ebc4232SFrederic Weisbecker 
10421da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
104390415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10441da177e4SLinus Torvalds 		goto put_jl;
10451da177e4SLinus Torvalds 	}
10461da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10471da177e4SLinus Torvalds 
10481da177e4SLinus Torvalds 	/* this commit is done, exit */
10491da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10501da177e4SLinus Torvalds 		if (flushall) {
10511da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10521da177e4SLinus Torvalds 		}
105390415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10541da177e4SLinus Torvalds 		goto put_jl;
10551da177e4SLinus Torvalds 	}
10561da177e4SLinus Torvalds 
10571da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10583d4492f8SChris Mason 		int ret;
10598ebc4232SFrederic Weisbecker 
10608ebc4232SFrederic Weisbecker 		/*
10618ebc4232SFrederic Weisbecker 		 * We might sleep in numerous places inside
10628ebc4232SFrederic Weisbecker 		 * write_ordered_buffers. Relax the write lock.
10638ebc4232SFrederic Weisbecker 		 */
10648ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10653d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
10661da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
10673d4492f8SChris Mason 		if (ret < 0 && retval == 0)
10683d4492f8SChris Mason 			retval = ret;
10698ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10701da177e4SLinus Torvalds 	}
10711da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
10721da177e4SLinus Torvalds 	/*
10731da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1074e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1075e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1076e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1077e0e851cfSChris Mason 	 * get a chunk of data on there.
10781da177e4SLinus Torvalds 	 */
10791da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1080e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1081e0e851cfSChris Mason 	if (write_len < 256)
1082e0e851cfSChris Mason 		write_len = 256;
1083e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
10841da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
10851da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
10861da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1087e0e851cfSChris Mason 		if (tbh) {
10886e3647acSFrederic Weisbecker 			if (buffer_dirty(tbh)) {
10896e3647acSFrederic Weisbecker 		            reiserfs_write_unlock(s);
1090e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh);
10916e3647acSFrederic Weisbecker 			    reiserfs_write_lock(s);
10926e3647acSFrederic Weisbecker 			}
10931da177e4SLinus Torvalds 			put_bh(tbh) ;
10941da177e4SLinus Torvalds 		}
1095e0e851cfSChris Mason 	}
10961da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
10971da177e4SLinus Torvalds 
10985d5e8156SJeff Mahoney 	/* We're skipping the commit if there's an error */
10995d5e8156SJeff Mahoney 	if (retval || reiserfs_is_journal_aborted(journal))
11005d5e8156SJeff Mahoney 		barrier = 0;
11015d5e8156SJeff Mahoney 
11021da177e4SLinus Torvalds 	/* wait on everything written so far before writing the commit
11031da177e4SLinus Torvalds 	 * if we are in barrier mode, send the commit down now
11041da177e4SLinus Torvalds 	 */
11051da177e4SLinus Torvalds 	barrier = reiserfs_barrier_flush(s);
11061da177e4SLinus Torvalds 	if (barrier) {
11071da177e4SLinus Torvalds 		int ret;
11081da177e4SLinus Torvalds 		lock_buffer(jl->j_commit_bh);
11091da177e4SLinus Torvalds 		ret = submit_barrier_buffer(jl->j_commit_bh);
11101da177e4SLinus Torvalds 		if (ret == -EOPNOTSUPP) {
11111da177e4SLinus Torvalds 			set_buffer_uptodate(jl->j_commit_bh);
11121da177e4SLinus Torvalds 			disable_barrier(s);
11131da177e4SLinus Torvalds 			barrier = 0;
11141da177e4SLinus Torvalds 		}
11151da177e4SLinus Torvalds 	}
11161da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
11171da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
11181da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
11191da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
11208ebc4232SFrederic Weisbecker 
11218ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11221da177e4SLinus Torvalds 		wait_on_buffer(tbh);
11238ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11241da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
11251da177e4SLinus Torvalds 		// a locked buffer.  Double check here
11261da177e4SLinus Torvalds 		//
11278ebc4232SFrederic Weisbecker 		/* redundant, sync_dirty_buffer() checks */
11288ebc4232SFrederic Weisbecker 		if (buffer_dirty(tbh)) {
11298ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
11301da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
11318ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
11328ebc4232SFrederic Weisbecker 		}
11331da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
11341da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
113545b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-601",
113645b03d5eSJeff Mahoney 					 "buffer write failed");
11371da177e4SLinus Torvalds #endif
11381da177e4SLinus Torvalds 			retval = -EIO;
11391da177e4SLinus Torvalds 		}
11401da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
11411da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
11421da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
11431da177e4SLinus Torvalds 	}
11441da177e4SLinus Torvalds 
11451da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
11461da177e4SLinus Torvalds 
11471da177e4SLinus Torvalds 	if (!barrier) {
11485d5e8156SJeff Mahoney 		/* If there was a write error in the journal - we can't commit
11495d5e8156SJeff Mahoney 		 * this transaction - it will be invalid and, if successful,
1150beb7dd86SRobert P. J. Day 		 * will just end up propagating the write error out to
11515d5e8156SJeff Mahoney 		 * the file system. */
11525d5e8156SJeff Mahoney 		if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
11531da177e4SLinus Torvalds 			if (buffer_dirty(jl->j_commit_bh))
11541da177e4SLinus Torvalds 				BUG();
11551da177e4SLinus Torvalds 			mark_buffer_dirty(jl->j_commit_bh) ;
11568ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
11571da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh) ;
11588ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
11595d5e8156SJeff Mahoney 		}
11608ebc4232SFrederic Weisbecker 	} else {
11618ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11621da177e4SLinus Torvalds 		wait_on_buffer(jl->j_commit_bh);
11638ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11648ebc4232SFrederic Weisbecker 	}
11651da177e4SLinus Torvalds 
11661da177e4SLinus Torvalds 	check_barrier_completion(s, jl->j_commit_bh);
11671da177e4SLinus Torvalds 
11681da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
11691da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1170beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
11711da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
11721da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
117345b03d5eSJeff Mahoney 		reiserfs_warning(s, "journal-615", "buffer write failed");
11741da177e4SLinus Torvalds #endif
11751da177e4SLinus Torvalds 		retval = -EIO;
11761da177e4SLinus Torvalds 	}
11771da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
11781da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
11791da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
118045b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1181bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
11821da177e4SLinus Torvalds 	}
11831da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
11841da177e4SLinus Torvalds 
11851da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
11861da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
11871da177e4SLinus Torvalds 
11881da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
11891da177e4SLinus Torvalds 
11901da177e4SLinus Torvalds 	/* mark the metadata dirty */
11911da177e4SLinus Torvalds 	if (!retval)
11921da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
11931da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
11941da177e4SLinus Torvalds 
11951da177e4SLinus Torvalds 	if (flushall) {
11961da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
11971da177e4SLinus Torvalds 	}
119890415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
11991da177e4SLinus Torvalds       put_jl:
12001da177e4SLinus Torvalds 	put_journal_list(s, jl);
12011da177e4SLinus Torvalds 
12021da177e4SLinus Torvalds 	if (retval)
1203bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1204fbe5498bSHarvey Harrison 			       __func__);
120522e2c507SJens Axboe 	put_fs_excl();
12061da177e4SLinus Torvalds 	return retval;
12071da177e4SLinus Torvalds }
12081da177e4SLinus Torvalds 
12091da177e4SLinus Torvalds /*
12101da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
12111da177e4SLinus Torvalds ** returns NULL if it can't find anything
12121da177e4SLinus Torvalds */
1213bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1214bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1215bd4c625cSLinus Torvalds 							  *cn)
1216bd4c625cSLinus Torvalds {
12171da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
12181da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
12191da177e4SLinus Torvalds 
12201da177e4SLinus Torvalds 	cn = cn->hprev;
12211da177e4SLinus Torvalds 	while (cn) {
12221da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
12231da177e4SLinus Torvalds 			return cn->jlist;
12241da177e4SLinus Torvalds 		}
12251da177e4SLinus Torvalds 		cn = cn->hprev;
12261da177e4SLinus Torvalds 	}
12271da177e4SLinus Torvalds 	return NULL;
12281da177e4SLinus Torvalds }
12291da177e4SLinus Torvalds 
1230a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1231a3172027SChris Mason {
1232a3172027SChris Mason 	struct super_block *sb = cn->sb;
1233a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1234a3172027SChris Mason 
1235a3172027SChris Mason 	cn = cn->hprev;
1236a3172027SChris Mason 	while (cn) {
1237a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1238a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1239a3172027SChris Mason 				    return 0;
1240a3172027SChris Mason 		cn = cn->hprev;
1241a3172027SChris Mason 	}
1242a3172027SChris Mason 	return 1;
1243a3172027SChris Mason }
1244a3172027SChris Mason 
1245bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1246bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1247bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1248bd4c625cSLinus Torvalds 				int);
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds /*
12511da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
12521da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
12531da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
12541da177e4SLinus Torvalds */
1255a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb,
1256bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1257bd4c625cSLinus Torvalds 					 int debug)
1258bd4c625cSLinus Torvalds {
1259a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12601da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
12611da177e4SLinus Torvalds 	cn = jl->j_realblock;
12621da177e4SLinus Torvalds 
12631da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
12641da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
12651da177e4SLinus Torvalds 	 */
12661da177e4SLinus Torvalds 	while (cn) {
12671da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
12681da177e4SLinus Torvalds 			if (debug) {
1269a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2201",
1270bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1271bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1272bd4c625cSLinus Torvalds 						 cn->state);
12731da177e4SLinus Torvalds 			}
12741da177e4SLinus Torvalds 			cn->state = 0;
1275a9dd3643SJeff Mahoney 			remove_journal_hash(sb, journal->j_list_hash_table,
1276bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
12771da177e4SLinus Torvalds 		}
12781da177e4SLinus Torvalds 		last = cn;
12791da177e4SLinus Torvalds 		cn = cn->next;
1280a9dd3643SJeff Mahoney 		free_cnode(sb, last);
12811da177e4SLinus Torvalds 	}
12821da177e4SLinus Torvalds 	jl->j_realblock = NULL;
12831da177e4SLinus Torvalds }
12841da177e4SLinus Torvalds 
12851da177e4SLinus Torvalds /*
12861da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
12871da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
12881da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
12891da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
12901da177e4SLinus Torvalds **
12911da177e4SLinus Torvalds */
1292a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb,
1293bd4c625cSLinus Torvalds 					unsigned long offset,
1294600ed416SJeff Mahoney 					unsigned int trans_id)
1295bd4c625cSLinus Torvalds {
12961da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
1297a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12981da177e4SLinus Torvalds 
12991da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
13001da177e4SLinus Torvalds 		return -EIO;
13011da177e4SLinus Torvalds 
13021da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
13031da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
13048ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13051da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
13068ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
13071da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
13081da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1309a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "journal-699",
131045b03d5eSJeff Mahoney 						 "buffer write failed");
13111da177e4SLinus Torvalds #endif
13121da177e4SLinus Torvalds 				return -EIO;
13131da177e4SLinus Torvalds 			}
13141da177e4SLinus Torvalds 		}
13151da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
13161da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1317bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1318bd4c625cSLinus Torvalds 							b_data);
13191da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
13201da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
13211da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
13221da177e4SLinus Torvalds 
1323a9dd3643SJeff Mahoney 		if (reiserfs_barrier_flush(sb)) {
13241da177e4SLinus Torvalds 			int ret;
13251da177e4SLinus Torvalds 			lock_buffer(journal->j_header_bh);
13261da177e4SLinus Torvalds 			ret = submit_barrier_buffer(journal->j_header_bh);
13271da177e4SLinus Torvalds 			if (ret == -EOPNOTSUPP) {
13281da177e4SLinus Torvalds 				set_buffer_uptodate(journal->j_header_bh);
1329a9dd3643SJeff Mahoney 				disable_barrier(sb);
13301da177e4SLinus Torvalds 				goto sync;
13311da177e4SLinus Torvalds 			}
13328ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13331da177e4SLinus Torvalds 			wait_on_buffer(journal->j_header_bh);
13348ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
1335a9dd3643SJeff Mahoney 			check_barrier_completion(sb, journal->j_header_bh);
13361da177e4SLinus Torvalds 		} else {
13371da177e4SLinus Torvalds 		      sync:
13381da177e4SLinus Torvalds 			set_buffer_dirty(journal->j_header_bh);
13398ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
13401da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
13418ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
13421da177e4SLinus Torvalds 		}
13431da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1344a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-837",
134545b03d5eSJeff Mahoney 					 "IO error during journal replay");
13461da177e4SLinus Torvalds 			return -EIO;
13471da177e4SLinus Torvalds 		}
13481da177e4SLinus Torvalds 	}
13491da177e4SLinus Torvalds 	return 0;
13501da177e4SLinus Torvalds }
13511da177e4SLinus Torvalds 
1352a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb,
13531da177e4SLinus Torvalds 				       unsigned long offset,
1354600ed416SJeff Mahoney 				       unsigned int trans_id)
1355bd4c625cSLinus Torvalds {
1356a9dd3643SJeff Mahoney 	return _update_journal_header_block(sb, offset, trans_id);
13571da177e4SLinus Torvalds }
1358bd4c625cSLinus Torvalds 
13591da177e4SLinus Torvalds /*
13601da177e4SLinus Torvalds ** flush any and all journal lists older than you are
13611da177e4SLinus Torvalds ** can only be called from flush_journal_list
13621da177e4SLinus Torvalds */
1363a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb,
13641da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
13651da177e4SLinus Torvalds {
13661da177e4SLinus Torvalds 	struct list_head *entry;
13671da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
1368a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1369600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
13701da177e4SLinus Torvalds 
13711da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
13721da177e4SLinus Torvalds 	 * protection is required.
13731da177e4SLinus Torvalds 	 */
13741da177e4SLinus Torvalds       restart:
13751da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
13761da177e4SLinus Torvalds 	/* Did we wrap? */
13771da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
13781da177e4SLinus Torvalds 		return 0;
13791da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
13801da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
13811da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
13821da177e4SLinus Torvalds 		/* do not flush all */
1383a9dd3643SJeff Mahoney 		flush_journal_list(sb, other_jl, 0);
13841da177e4SLinus Torvalds 
13851da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
13861da177e4SLinus Torvalds 		goto restart;
13871da177e4SLinus Torvalds 	}
13881da177e4SLinus Torvalds 	return 0;
13891da177e4SLinus Torvalds }
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1392bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1393bd4c625cSLinus Torvalds {
13941da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13951da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
13961da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
13971da177e4SLinus Torvalds 		journal->j_num_work_lists--;
13981da177e4SLinus Torvalds 	}
13991da177e4SLinus Torvalds }
14001da177e4SLinus Torvalds 
14011da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
14021da177e4SLinus Torvalds **
14031da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
14041da177e4SLinus Torvalds ** flush_journal_list
14051da177e4SLinus Torvalds **
14061da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
14071da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
14081da177e4SLinus Torvalds ** do_journal_end, or by journal_release
14091da177e4SLinus Torvalds */
14101da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1411bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1412bd4c625cSLinus Torvalds {
14131da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
14141da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
14151da177e4SLinus Torvalds 	int count;
14161da177e4SLinus Torvalds 	int was_jwait = 0;
14171da177e4SLinus Torvalds 	int was_dirty = 0;
14181da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
14191da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
14201da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
14211da177e4SLinus Torvalds 	int err = 0;
14221da177e4SLinus Torvalds 
14231da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
14241da177e4SLinus Torvalds 
14251da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
142645b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2048", "called with wcount %d",
14271da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
14281da177e4SLinus Torvalds 	}
14291da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
14301da177e4SLinus Torvalds 
14311da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
14321da177e4SLinus Torvalds 	if (flushall) {
14338ebc4232SFrederic Weisbecker 		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1434afe70259SJeff Mahoney 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
14351da177e4SLinus Torvalds 		BUG();
14361da177e4SLinus Torvalds 	}
14371da177e4SLinus Torvalds 
14381da177e4SLinus Torvalds 	count = 0;
14391da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1440c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1441bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
14421da177e4SLinus Torvalds 		return 0;
14431da177e4SLinus Torvalds 	}
14441da177e4SLinus Torvalds 
144522e2c507SJens Axboe 	get_fs_excl();
144622e2c507SJens Axboe 
14471da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
14481da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14491da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14501da177e4SLinus Torvalds 		goto flush_older_and_return;
14511da177e4SLinus Torvalds 	}
14521da177e4SLinus Torvalds 
14531da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
14541da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
14551da177e4SLinus Torvalds 	 */
14561da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
14571da177e4SLinus Torvalds 
1458bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1459bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
14601da177e4SLinus Torvalds 		BUG();
14611da177e4SLinus Torvalds 
14621da177e4SLinus Torvalds 	/* are we done now? */
14631da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14641da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14651da177e4SLinus Torvalds 		goto flush_older_and_return;
14661da177e4SLinus Torvalds 	}
14671da177e4SLinus Torvalds 
14681da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
14691da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
14701da177e4SLinus Torvalds 	 */
14711da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1472c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
1473c3a9c210SJeff Mahoney 			       "wcount is not 0");
14741da177e4SLinus Torvalds 	}
14751da177e4SLinus Torvalds 	cn = jl->j_realblock;
14761da177e4SLinus Torvalds 	while (cn) {
14771da177e4SLinus Torvalds 		was_jwait = 0;
14781da177e4SLinus Torvalds 		was_dirty = 0;
14791da177e4SLinus Torvalds 		saved_bh = NULL;
14801da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
14811da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
14821da177e4SLinus Torvalds 			goto free_cnode;
14831da177e4SLinus Torvalds 		}
14841da177e4SLinus Torvalds 
14851da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
14861da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
14871da177e4SLinus Torvalds 			goto free_cnode;
14881da177e4SLinus Torvalds 
14891da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
14901da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
14911da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
14921da177e4SLinus Torvalds 		 ** block to disk
14931da177e4SLinus Torvalds 		 */
14941da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
14951da177e4SLinus Torvalds 			saved_bh = cn->bh;
14961da177e4SLinus Torvalds 
14971da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
14981da177e4SLinus Torvalds 			 ** we are working with it
14991da177e4SLinus Torvalds 			 */
15001da177e4SLinus Torvalds 			get_bh(saved_bh);
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
15031da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
15041da177e4SLinus Torvalds 				was_jwait = 1;
15051da177e4SLinus Torvalds 				was_dirty = 1;
15061da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
15071da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
15081da177e4SLinus Torvalds 				BUG();
15091da177e4SLinus Torvalds 			}
15101da177e4SLinus Torvalds 		}
15111da177e4SLinus Torvalds 
15121da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
15130779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
15141da177e4SLinus Torvalds 		 */
15151da177e4SLinus Torvalds 		if (pjl) {
15161da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
15171da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
15181da177e4SLinus Torvalds 			goto free_cnode;
15191da177e4SLinus Torvalds 		}
15201da177e4SLinus Torvalds 
15211da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
15221da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
15231da177e4SLinus Torvalds 		 */
15241da177e4SLinus Torvalds 		if (saved_bh == NULL) {
15251da177e4SLinus Torvalds 			goto free_cnode;
15261da177e4SLinus Torvalds 		}
15271da177e4SLinus Torvalds 
15281da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
15291da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
15301da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
15311da177e4SLinus Torvalds 		 */
15321da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
153345b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-813",
153445b03d5eSJeff Mahoney 					 "BAD! buffer %llu %cdirty %cjwait, "
15351da177e4SLinus Torvalds 					 "not in a newer tranasction",
1536bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1537bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1538bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
15391da177e4SLinus Torvalds 		}
15401da177e4SLinus Torvalds 		if (was_dirty) {
15411da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
15421da177e4SLinus Torvalds 			get_bh(saved_bh);
15431da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
15441da177e4SLinus Torvalds 			lock_buffer(saved_bh);
15451da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
15461da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
15471da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
15481da177e4SLinus Torvalds 			else
15491da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
15501da177e4SLinus Torvalds 			count++;
15511da177e4SLinus Torvalds 		} else {
155245b03d5eSJeff Mahoney 			reiserfs_warning(s, "clm-2082",
155345b03d5eSJeff Mahoney 					 "Unable to flush buffer %llu in %s",
1554bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1555fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
15561da177e4SLinus Torvalds 		}
15571da177e4SLinus Torvalds 	      free_cnode:
15581da177e4SLinus Torvalds 		last = cn;
15591da177e4SLinus Torvalds 		cn = cn->next;
15601da177e4SLinus Torvalds 		if (saved_bh) {
15611da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
15621da177e4SLinus Torvalds 			put_bh(saved_bh);
15631da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
156445b03d5eSJeff Mahoney 				reiserfs_warning(s, "journal-945",
156545b03d5eSJeff Mahoney 						 "saved_bh->b_count < 0");
15661da177e4SLinus Torvalds 			}
15671da177e4SLinus Torvalds 		}
15681da177e4SLinus Torvalds 	}
15691da177e4SLinus Torvalds 	if (count > 0) {
15701da177e4SLinus Torvalds 		cn = jl->j_realblock;
15711da177e4SLinus Torvalds 		while (cn) {
15721da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
15731da177e4SLinus Torvalds 				if (!cn->bh) {
1574c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1011",
1575c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15761da177e4SLinus Torvalds 				}
15778ebc4232SFrederic Weisbecker 
15788ebc4232SFrederic Weisbecker 				reiserfs_write_unlock(s);
15791da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
15808ebc4232SFrederic Weisbecker 				reiserfs_write_lock(s);
15818ebc4232SFrederic Weisbecker 
15821da177e4SLinus Torvalds 				if (!cn->bh) {
1583c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1012",
1584c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15851da177e4SLinus Torvalds 				}
15861da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
15871da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
158845b03d5eSJeff Mahoney 					reiserfs_warning(s, "journal-949",
158945b03d5eSJeff Mahoney 							 "buffer write failed");
15901da177e4SLinus Torvalds #endif
15911da177e4SLinus Torvalds 					err = -EIO;
15921da177e4SLinus Torvalds 				}
15931da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
15941da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
15951da177e4SLinus Torvalds 				 */
1596bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1597bd4c625cSLinus Torvalds 				       (cn->bh));
15981da177e4SLinus Torvalds 
1599398c95bdSChris Mason 				/* drop one ref for us */
16001da177e4SLinus Torvalds 				put_bh(cn->bh);
1601398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1602398c95bdSChris Mason 				release_buffer_page(cn->bh);
16031da177e4SLinus Torvalds 			}
16041da177e4SLinus Torvalds 			cn = cn->next;
16051da177e4SLinus Torvalds 		}
16061da177e4SLinus Torvalds 	}
16071da177e4SLinus Torvalds 
16081da177e4SLinus Torvalds 	if (err)
1609bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1610bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1611fbe5498bSHarvey Harrison 			       __func__);
16121da177e4SLinus Torvalds       flush_older_and_return:
16131da177e4SLinus Torvalds 
16141da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
16151da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
16161da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
16171da177e4SLinus Torvalds 	 ** replayed after a crash
16181da177e4SLinus Torvalds 	 */
16191da177e4SLinus Torvalds 	if (flushall) {
16201da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
16211da177e4SLinus Torvalds 	}
16221da177e4SLinus Torvalds 
16231da177e4SLinus Torvalds 	err = journal->j_errno;
16241da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
16251da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
16261da177e4SLinus Torvalds 	 **
16271da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
16281da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
16291da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
16301da177e4SLinus Torvalds 	 ** being flushed
16311da177e4SLinus Torvalds 	 */
16321da177e4SLinus Torvalds 	if (!err && flushall) {
1633bd4c625cSLinus Torvalds 		err =
1634bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1635bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1636bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1637bd4c625cSLinus Torvalds 						jl->j_trans_id);
16381da177e4SLinus Torvalds 		if (err)
1639bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1640bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1641fbe5498bSHarvey Harrison 				       __func__);
16421da177e4SLinus Torvalds 	}
16431da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
16441da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
16451da177e4SLinus Torvalds 	journal->j_num_lists--;
16461da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16471da177e4SLinus Torvalds 
16481da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
16491da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
165045b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1651bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
16521da177e4SLinus Torvalds 	}
16531da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
16541da177e4SLinus Torvalds 
16551da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
16561da177e4SLinus Torvalds 	 * help find code using dead lists later on
16571da177e4SLinus Torvalds 	 */
16581da177e4SLinus Torvalds 	jl->j_len = 0;
16591da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
16601da177e4SLinus Torvalds 	jl->j_start = 0;
16611da177e4SLinus Torvalds 	jl->j_realblock = NULL;
16621da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
16631da177e4SLinus Torvalds 	jl->j_trans_id = 0;
16641da177e4SLinus Torvalds 	jl->j_state = 0;
16651da177e4SLinus Torvalds 	put_journal_list(s, jl);
16661da177e4SLinus Torvalds 	if (flushall)
1667afe70259SJeff Mahoney 		mutex_unlock(&journal->j_flush_mutex);
166822e2c507SJens Axboe 	put_fs_excl();
16691da177e4SLinus Torvalds 	return err;
16701da177e4SLinus Torvalds }
16711da177e4SLinus Torvalds 
1672a3172027SChris Mason static int test_transaction(struct super_block *s,
1673a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1674a3172027SChris Mason {
1675a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1676a3172027SChris Mason 
1677a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1678a3172027SChris Mason 		return 1;
1679a3172027SChris Mason 
1680a3172027SChris Mason 	cn = jl->j_realblock;
1681a3172027SChris Mason 	while (cn) {
1682a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1683a3172027SChris Mason 		 ** skip it
1684a3172027SChris Mason 		 */
1685a3172027SChris Mason 		if (cn->blocknr == 0) {
1686a3172027SChris Mason 			goto next;
1687a3172027SChris Mason 		}
1688a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1689a3172027SChris Mason 			return 0;
1690a3172027SChris Mason 	      next:
1691a3172027SChris Mason 		cn = cn->next;
1692a3172027SChris Mason 		cond_resched();
1693a3172027SChris Mason 	}
1694a3172027SChris Mason 	return 0;
1695a3172027SChris Mason }
1696a3172027SChris Mason 
16971da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
16981da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
16991da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
17001da177e4SLinus Torvalds {
17011da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
17021da177e4SLinus Torvalds 	int ret = 0;
17031da177e4SLinus Torvalds 
17041da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
17051da177e4SLinus Torvalds 	del_from_work_list(s, jl);
17061da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
17071da177e4SLinus Torvalds 		return 0;
17081da177e4SLinus Torvalds 	}
17091da177e4SLinus Torvalds 
17101da177e4SLinus Torvalds 	cn = jl->j_realblock;
17111da177e4SLinus Torvalds 	while (cn) {
17121da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
17131da177e4SLinus Torvalds 		 ** skip it
17141da177e4SLinus Torvalds 		 */
17151da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
17161da177e4SLinus Torvalds 			goto next;
17171da177e4SLinus Torvalds 		}
17181da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
17191da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
17201da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
17211da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
17221da177e4SLinus Torvalds 			 * count, and recheck things after locking
17231da177e4SLinus Torvalds 			 */
17241da177e4SLinus Torvalds 			tmp_bh = cn->bh;
17251da177e4SLinus Torvalds 			get_bh(tmp_bh);
17261da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
17271da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
17281da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
17291da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
17301da177e4SLinus Torvalds 					BUG();
17311da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
17321da177e4SLinus Torvalds 				ret++;
17331da177e4SLinus Torvalds 			} else {
17341da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
17351da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
17361da177e4SLinus Torvalds 			}
17371da177e4SLinus Torvalds 			put_bh(tmp_bh);
17381da177e4SLinus Torvalds 		}
17391da177e4SLinus Torvalds 	      next:
17401da177e4SLinus Torvalds 		cn = cn->next;
17411da177e4SLinus Torvalds 		cond_resched();
17421da177e4SLinus Torvalds 	}
17431da177e4SLinus Torvalds 	return ret;
17441da177e4SLinus Torvalds }
17451da177e4SLinus Torvalds 
17461da177e4SLinus Torvalds /* used by flush_commit_list */
17471da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
17481da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
17491da177e4SLinus Torvalds {
17501da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
17511da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
17521da177e4SLinus Torvalds 	int ret = 0;
17531da177e4SLinus Torvalds 
17541da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
17551da177e4SLinus Torvalds 	cn = jl->j_realblock;
17561da177e4SLinus Torvalds 	while (cn) {
17571da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
17581da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
17591da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
17601da177e4SLinus Torvalds 		 */
17611da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1762bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1763bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
17641da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
17651da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
17661da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
17671da177e4SLinus Torvalds 			 * it actually gets marked dirty
17681da177e4SLinus Torvalds 			 */
17691da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
17701da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
17711da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
17721da177e4SLinus Torvalds 			} else {
17731da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
17741da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
17751da177e4SLinus Torvalds 			}
17761da177e4SLinus Torvalds 		}
17771da177e4SLinus Torvalds 		cn = cn->next;
17781da177e4SLinus Torvalds 	}
17791da177e4SLinus Torvalds 	return ret;
17801da177e4SLinus Torvalds }
17811da177e4SLinus Torvalds 
17821da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
17831da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
17841da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
1785600ed416SJeff Mahoney 				unsigned int *next_trans_id,
1786bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1787bd4c625cSLinus Torvalds {
17881da177e4SLinus Torvalds 	int ret = 0;
17891da177e4SLinus Torvalds 	int written = 0;
17901da177e4SLinus Torvalds 	int transactions_flushed = 0;
1791600ed416SJeff Mahoney 	unsigned int orig_trans_id = jl->j_trans_id;
17921da177e4SLinus Torvalds 	struct buffer_chunk chunk;
17931da177e4SLinus Torvalds 	struct list_head *entry;
17941da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17951da177e4SLinus Torvalds 	chunk.nr = 0;
17961da177e4SLinus Torvalds 
1797a412f9efSFrederic Weisbecker 	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
17981da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
17991da177e4SLinus Torvalds 		goto done;
18001da177e4SLinus Torvalds 	}
18011da177e4SLinus Torvalds 
1802afe70259SJeff Mahoney 	/* we've got j_flush_mutex held, nobody is going to delete any
18031da177e4SLinus Torvalds 	 * of these lists out from underneath us
18041da177e4SLinus Torvalds 	 */
18051da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
18061da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
18071da177e4SLinus Torvalds 
18081da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1809bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1810bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
18111da177e4SLinus Torvalds 			del_from_work_list(s, jl);
18121da177e4SLinus Torvalds 			break;
18131da177e4SLinus Torvalds 		}
18141da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
18151da177e4SLinus Torvalds 
18161da177e4SLinus Torvalds 		if (ret < 0)
18171da177e4SLinus Torvalds 			goto done;
18181da177e4SLinus Torvalds 		transactions_flushed++;
18191da177e4SLinus Torvalds 		written += ret;
18201da177e4SLinus Torvalds 		entry = jl->j_list.next;
18211da177e4SLinus Torvalds 
18221da177e4SLinus Torvalds 		/* did we wrap? */
18231da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
18241da177e4SLinus Torvalds 			break;
18251da177e4SLinus Torvalds 		}
18261da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
18271da177e4SLinus Torvalds 
18281da177e4SLinus Torvalds 		/* don't bother with older transactions */
18291da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
18301da177e4SLinus Torvalds 			break;
18311da177e4SLinus Torvalds 	}
18321da177e4SLinus Torvalds 	if (chunk.nr) {
18331da177e4SLinus Torvalds 		write_chunk(&chunk);
18341da177e4SLinus Torvalds 	}
18351da177e4SLinus Torvalds 
18361da177e4SLinus Torvalds       done:
1837afe70259SJeff Mahoney 	mutex_unlock(&journal->j_flush_mutex);
18381da177e4SLinus Torvalds 	return ret;
18391da177e4SLinus Torvalds }
18401da177e4SLinus Torvalds 
18411da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
18421da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
18431da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
18441da177e4SLinus Torvalds ** adds seeks and slows things down.
18451da177e4SLinus Torvalds **
18461da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
18471da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
18481da177e4SLinus Torvalds ** list updates the header block
18491da177e4SLinus Torvalds */
18501da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1851bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1852bd4c625cSLinus Torvalds {
18531da177e4SLinus Torvalds 	unsigned long len = 0;
18541da177e4SLinus Torvalds 	unsigned long cur_len;
18551da177e4SLinus Torvalds 	int ret;
18561da177e4SLinus Torvalds 	int i;
18571da177e4SLinus Torvalds 	int limit = 256;
18581da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
18591da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
1860600ed416SJeff Mahoney 	unsigned int trans_id;
18611da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
18621da177e4SLinus Torvalds 
18631da177e4SLinus Torvalds 	flush_jl = tjl = jl;
18641da177e4SLinus Torvalds 
18651da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
18661da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
18671da177e4SLinus Torvalds 		limit = 1024;
18681da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
18691da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
18701da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
18711da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
18721da177e4SLinus Torvalds 			break;
18731da177e4SLinus Torvalds 		}
18741da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
18751da177e4SLinus Torvalds 		if (cur_len > 0) {
18761da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
18771da177e4SLinus Torvalds 		}
18781da177e4SLinus Torvalds 		len += cur_len;
18791da177e4SLinus Torvalds 		flush_jl = tjl;
18801da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
18811da177e4SLinus Torvalds 			break;
18821da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
18831da177e4SLinus Torvalds 	}
18841da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
18851da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
18861da177e4SLinus Torvalds 	 ** across multiple lists
18871da177e4SLinus Torvalds 	 */
18881da177e4SLinus Torvalds 	if (flush_jl != jl) {
18891da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
18901da177e4SLinus Torvalds 	}
18911da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
18921da177e4SLinus Torvalds 	return 0;
18931da177e4SLinus Torvalds }
18941da177e4SLinus Torvalds 
18951da177e4SLinus Torvalds /*
18961da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
18971da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
18981da177e4SLinus Torvalds */
18991da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
19001da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
19011da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
19021da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
19031da177e4SLinus Torvalds {
19041da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
19051da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
19061da177e4SLinus Torvalds 
19071da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
19081da177e4SLinus Torvalds 	if (!head) {
19091da177e4SLinus Torvalds 		return;
19101da177e4SLinus Torvalds 	}
19111da177e4SLinus Torvalds 	cur = *head;
19121da177e4SLinus Torvalds 	while (cur) {
1913bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1914bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1915bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
19161da177e4SLinus Torvalds 			if (cur->hnext) {
19171da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
19181da177e4SLinus Torvalds 			}
19191da177e4SLinus Torvalds 			if (cur->hprev) {
19201da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
19211da177e4SLinus Torvalds 			} else {
19221da177e4SLinus Torvalds 				*head = cur->hnext;
19231da177e4SLinus Torvalds 			}
19241da177e4SLinus Torvalds 			cur->blocknr = 0;
19251da177e4SLinus Torvalds 			cur->sb = NULL;
19261da177e4SLinus Torvalds 			cur->state = 0;
19271da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
19281da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
19291da177e4SLinus Torvalds 			cur->bh = NULL;
19301da177e4SLinus Torvalds 			cur->jlist = NULL;
19311da177e4SLinus Torvalds 		}
19321da177e4SLinus Torvalds 		cur = cur->hnext;
19331da177e4SLinus Torvalds 	}
19341da177e4SLinus Torvalds }
19351da177e4SLinus Torvalds 
1936a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb)
1937bd4c625cSLinus Torvalds {
1938a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1939d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
19401da177e4SLinus Torvalds 	journal->j_num_lists--;
19411da177e4SLinus Torvalds 
19421da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
1943a9dd3643SJeff Mahoney 	free_list_bitmaps(sb, journal->j_list_bitmap);
1944a9dd3643SJeff Mahoney 	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
19451da177e4SLinus Torvalds 	if (journal->j_header_bh) {
19461da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
19471da177e4SLinus Torvalds 	}
19481da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
19491da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
19501da177e4SLinus Torvalds 	 */
1951a9dd3643SJeff Mahoney 	release_journal_dev(sb, journal);
19521da177e4SLinus Torvalds 	vfree(journal);
19531da177e4SLinus Torvalds }
19541da177e4SLinus Torvalds 
19551da177e4SLinus Torvalds /*
19561da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
19571da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
19581da177e4SLinus Torvalds */
1959bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1960a9dd3643SJeff Mahoney 			      struct super_block *sb, int error)
1961bd4c625cSLinus Torvalds {
19621da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
19631da177e4SLinus Torvalds 	int flushed = 0;
1964a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
19651da177e4SLinus Torvalds 
19661da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
19671da177e4SLinus Torvalds 	 */
1968a9dd3643SJeff Mahoney 	if (!error && !(sb->s_flags & MS_RDONLY)) {
19691da177e4SLinus Torvalds 		/* end the current trans */
19701da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
1971a9dd3643SJeff Mahoney 		do_journal_end(th, sb, 10, FLUSH_ALL);
19721da177e4SLinus Torvalds 
19731da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
1974a9dd3643SJeff Mahoney 		if (!journal_join(&myth, sb, 1)) {
1975a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1976a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1977bd4c625cSLinus Torvalds 						     1);
1978a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1979a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1980a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19811da177e4SLinus Torvalds 			flushed = 1;
19821da177e4SLinus Torvalds 		}
19831da177e4SLinus Torvalds 	}
19841da177e4SLinus Torvalds 
19851da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
19861da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
19871da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
1988a9dd3643SJeff Mahoney 		if (!journal_join_abort(&myth, sb, 1)) {
1989a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1990a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1991bd4c625cSLinus Torvalds 						     1);
1992a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1993a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1994a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19951da177e4SLinus Torvalds 		}
19961da177e4SLinus Torvalds 	}
19971da177e4SLinus Torvalds 
19981da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
19991da177e4SLinus Torvalds 	/* wait for all commits to finish */
2000a9dd3643SJeff Mahoney 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
20018ebc4232SFrederic Weisbecker 
20028ebc4232SFrederic Weisbecker 	/*
20038ebc4232SFrederic Weisbecker 	 * We must release the write lock here because
20048ebc4232SFrederic Weisbecker 	 * the workqueue job (flush_async_commit) needs this lock
20058ebc4232SFrederic Weisbecker 	 */
20068ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
20071da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
20088ebc4232SFrederic Weisbecker 
20091da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
20101da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
20111da177e4SLinus Torvalds 		commit_wq = NULL;
20121da177e4SLinus Torvalds 	}
20131da177e4SLinus Torvalds 
2014a9dd3643SJeff Mahoney 	free_journal_ram(sb);
20151da177e4SLinus Torvalds 
20160523676dSFrederic Weisbecker 	reiserfs_write_lock(sb);
20170523676dSFrederic Weisbecker 
20181da177e4SLinus Torvalds 	return 0;
20191da177e4SLinus Torvalds }
20201da177e4SLinus Torvalds 
20211da177e4SLinus Torvalds /*
20221da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
20231da177e4SLinus Torvalds */
2024bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
2025a9dd3643SJeff Mahoney 		    struct super_block *sb)
2026bd4c625cSLinus Torvalds {
2027a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 0);
20281da177e4SLinus Torvalds }
2029bd4c625cSLinus Torvalds 
20301da177e4SLinus Torvalds /*
20311da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
20321da177e4SLinus Torvalds */
2033bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
2034a9dd3643SJeff Mahoney 			  struct super_block *sb)
2035bd4c625cSLinus Torvalds {
2036a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 1);
20371da177e4SLinus Torvalds }
20381da177e4SLinus Torvalds 
20391da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
2040a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb,
2041bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
2042bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
2043bd4c625cSLinus Torvalds {
20441da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
20451da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
2046a9dd3643SJeff Mahoney 	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
2047bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
20481da177e4SLinus Torvalds 		return 1;
20491da177e4SLinus Torvalds 	}
20501da177e4SLinus Torvalds 	return 0;
20511da177e4SLinus Torvalds }
2052bd4c625cSLinus Torvalds 
20531da177e4SLinus Torvalds /* returns 0 if it did not find a description block
20541da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
20551da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
20561da177e4SLinus Torvalds */
2057a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb,
2058bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
2059600ed416SJeff Mahoney 					unsigned int *oldest_invalid_trans_id,
2060bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
2061bd4c625cSLinus Torvalds {
20621da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20631da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
20641da177e4SLinus Torvalds 	struct buffer_head *c_bh;
20651da177e4SLinus Torvalds 	unsigned long offset;
20661da177e4SLinus Torvalds 
20671da177e4SLinus Torvalds 	if (!d_bh)
20681da177e4SLinus Torvalds 		return 0;
20691da177e4SLinus Torvalds 
20701da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2071bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
2072bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
2073bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
2074bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
2075a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2076bd4c625cSLinus Torvalds 				       "journal-986: transaction "
20771da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
2078bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
2079bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
20801da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
20811da177e4SLinus Torvalds 			return 0;
20821da177e4SLinus Torvalds 		}
2083bd4c625cSLinus Torvalds 		if (newest_mount_id
2084bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2085a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2086bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
20871da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2088bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2089bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
20901da177e4SLinus Torvalds 				       *newest_mount_id);
20911da177e4SLinus Torvalds 			return -1;
20921da177e4SLinus Torvalds 		}
2093a9dd3643SJeff Mahoney 		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2094a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2018",
209545b03d5eSJeff Mahoney 					 "Bad transaction length %d "
209645b03d5eSJeff Mahoney 					 "encountered, ignoring transaction",
2097bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
20981da177e4SLinus Torvalds 			return -1;
20991da177e4SLinus Torvalds 		}
2100a9dd3643SJeff Mahoney 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
21011da177e4SLinus Torvalds 
21021da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2103bd4c625cSLinus Torvalds 		c_bh =
2104a9dd3643SJeff Mahoney 		    journal_bread(sb,
2105a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2106bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2107a9dd3643SJeff Mahoney 				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
21081da177e4SLinus Torvalds 		if (!c_bh)
21091da177e4SLinus Torvalds 			return 0;
21101da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2111a9dd3643SJeff Mahoney 		if (journal_compare_desc_commit(sb, desc, commit)) {
2112a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
21131da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
21141da177e4SLinus Torvalds 				       "time %d or length %d",
2115bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2116a9dd3643SJeff Mahoney 				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21171da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
21181da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
21191da177e4SLinus Torvalds 			brelse(c_bh);
21201da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2121bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2122bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2123a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2124bd4c625cSLinus Torvalds 					       "journal-1004: "
21251da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2126bd4c625cSLinus Torvalds 					       "to %d",
2127bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
21281da177e4SLinus Torvalds 			}
21291da177e4SLinus Torvalds 			return -1;
21301da177e4SLinus Torvalds 		}
21311da177e4SLinus Torvalds 		brelse(c_bh);
2132a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2133bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
21341da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2135bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2136a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2137bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2138bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
21391da177e4SLinus Torvalds 		return 1;
21401da177e4SLinus Torvalds 	} else {
21411da177e4SLinus Torvalds 		return 0;
21421da177e4SLinus Torvalds 	}
21431da177e4SLinus Torvalds }
21441da177e4SLinus Torvalds 
2145bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2146bd4c625cSLinus Torvalds {
21471da177e4SLinus Torvalds 	int i;
21481da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
21491da177e4SLinus Torvalds 		brelse(heads[i]);
21501da177e4SLinus Torvalds 	}
21511da177e4SLinus Torvalds }
21521da177e4SLinus Torvalds 
21531da177e4SLinus Torvalds /*
21541da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
21551da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
21561da177e4SLinus Torvalds ** is invalid, or too old.
21571da177e4SLinus Torvalds */
2158a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb,
2159bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2160bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2161600ed416SJeff Mahoney 				    unsigned int oldest_trans_id,
2162bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2163bd4c625cSLinus Torvalds {
2164a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
21651da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
21661da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
2167600ed416SJeff Mahoney 	unsigned int trans_id = 0;
21681da177e4SLinus Torvalds 	struct buffer_head *c_bh;
21691da177e4SLinus Torvalds 	struct buffer_head *d_bh;
21701da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
21711da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
2172600ed416SJeff Mahoney 	unsigned int trans_offset;
21731da177e4SLinus Torvalds 	int i;
21741da177e4SLinus Torvalds 	int trans_half;
21751da177e4SLinus Torvalds 
2176a9dd3643SJeff Mahoney 	d_bh = journal_bread(sb, cur_dblock);
21771da177e4SLinus Torvalds 	if (!d_bh)
21781da177e4SLinus Torvalds 		return 1;
21791da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2180a9dd3643SJeff Mahoney 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2181a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
21821da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
2183a9dd3643SJeff Mahoney 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21841da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
21851da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
2186a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
21871da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2188bd4c625cSLinus Torvalds 			       cur_dblock -
2189a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
21901da177e4SLinus Torvalds 		brelse(d_bh);
21911da177e4SLinus Torvalds 		return 1;
21921da177e4SLinus Torvalds 	}
21931da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
2194a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
21951da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
21961da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
21971da177e4SLinus Torvalds 			       newest_mount_id);
21981da177e4SLinus Torvalds 		brelse(d_bh);
21991da177e4SLinus Torvalds 		return 1;
22001da177e4SLinus Torvalds 	}
2201a9dd3643SJeff Mahoney 	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
22021da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
2203a9dd3643SJeff Mahoney 			      SB_ONDISK_JOURNAL_SIZE(sb)));
22041da177e4SLinus Torvalds 	if (!c_bh) {
22051da177e4SLinus Torvalds 		brelse(d_bh);
22061da177e4SLinus Torvalds 		return 1;
22071da177e4SLinus Torvalds 	}
22081da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2209a9dd3643SJeff Mahoney 	if (journal_compare_desc_commit(sb, desc, commit)) {
2210a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2211bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
22121da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2213bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2214a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2215bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2216bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
22171da177e4SLinus Torvalds 		brelse(c_bh);
22181da177e4SLinus Torvalds 		brelse(d_bh);
22191da177e4SLinus Torvalds 		return 1;
22201da177e4SLinus Torvalds 	}
22213f8b5ee3SJeff Mahoney 
22223f8b5ee3SJeff Mahoney 	if (bdev_read_only(sb->s_bdev)) {
22233f8b5ee3SJeff Mahoney 		reiserfs_warning(sb, "clm-2076",
22243f8b5ee3SJeff Mahoney 				 "device is readonly, unable to replay log");
22253f8b5ee3SJeff Mahoney 		brelse(c_bh);
22263f8b5ee3SJeff Mahoney 		brelse(d_bh);
22273f8b5ee3SJeff Mahoney 		return -EROFS;
22283f8b5ee3SJeff Mahoney 	}
22293f8b5ee3SJeff Mahoney 
22301da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
22311da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2232d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2233d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2234d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2235d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
22361da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
22371da177e4SLinus Torvalds 		brelse(c_bh);
22381da177e4SLinus Torvalds 		brelse(d_bh);
2239d739b42bSPekka Enberg 		kfree(log_blocks);
2240d739b42bSPekka Enberg 		kfree(real_blocks);
2241a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1169",
224245b03d5eSJeff Mahoney 				 "kmalloc failed, unable to mount FS");
22431da177e4SLinus Torvalds 		return -1;
22441da177e4SLinus Torvalds 	}
22451da177e4SLinus Torvalds 	/* get all the buffer heads */
2246a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
22471da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2248bd4c625cSLinus Torvalds 		log_blocks[i] =
2249a9dd3643SJeff Mahoney 		    journal_getblk(sb,
2250a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2251bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2252a9dd3643SJeff Mahoney 				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
22531da177e4SLinus Torvalds 		if (i < trans_half) {
2254bd4c625cSLinus Torvalds 			real_blocks[i] =
2255a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2256bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
22571da177e4SLinus Torvalds 		} else {
2258bd4c625cSLinus Torvalds 			real_blocks[i] =
2259a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2260bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2261bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
22621da177e4SLinus Torvalds 		}
2263a9dd3643SJeff Mahoney 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2264a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1207",
226545b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
226645b03d5eSJeff Mahoney 					 "Block to replay is outside of "
226745b03d5eSJeff Mahoney 					 "filesystem");
22681da177e4SLinus Torvalds 			goto abort_replay;
22691da177e4SLinus Torvalds 		}
22701da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2271bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2272a9dd3643SJeff Mahoney 		    (sb, real_blocks[i]->b_blocknr)) {
2273a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1204",
227445b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
227545b03d5eSJeff Mahoney 					 "Trying to replay onto a log block");
22761da177e4SLinus Torvalds 		      abort_replay:
22771da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
22781da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
22791da177e4SLinus Torvalds 			brelse(c_bh);
22801da177e4SLinus Torvalds 			brelse(d_bh);
2281d739b42bSPekka Enberg 			kfree(log_blocks);
2282d739b42bSPekka Enberg 			kfree(real_blocks);
22831da177e4SLinus Torvalds 			return -1;
22841da177e4SLinus Torvalds 		}
22851da177e4SLinus Torvalds 	}
22861da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
22871da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
22881da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22898ebc4232SFrederic Weisbecker 
22908ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
22911da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
22928ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
22938ebc4232SFrederic Weisbecker 
22941da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2295a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1212",
229645b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
229745b03d5eSJeff Mahoney 					 "buffer write failed");
2298bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2299bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
23001da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
23011da177e4SLinus Torvalds 			brelse(c_bh);
23021da177e4SLinus Torvalds 			brelse(d_bh);
2303d739b42bSPekka Enberg 			kfree(log_blocks);
2304d739b42bSPekka Enberg 			kfree(real_blocks);
23051da177e4SLinus Torvalds 			return -1;
23061da177e4SLinus Torvalds 		}
2307bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2308bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
23091da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
23101da177e4SLinus Torvalds 		brelse(log_blocks[i]);
23111da177e4SLinus Torvalds 	}
23121da177e4SLinus Torvalds 	/* flush out the real blocks */
23131da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
23141da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
231553778ffdSJan Kara 		ll_rw_block(SWRITE, 1, real_blocks + i);
23161da177e4SLinus Torvalds 	}
23171da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
23181da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
23191da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2320a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1226",
232145b03d5eSJeff Mahoney 					 "REPLAY FAILURE, fsck required! "
232245b03d5eSJeff Mahoney 					 "buffer write failed");
2323bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2324bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
23251da177e4SLinus Torvalds 			brelse(c_bh);
23261da177e4SLinus Torvalds 			brelse(d_bh);
2327d739b42bSPekka Enberg 			kfree(log_blocks);
2328d739b42bSPekka Enberg 			kfree(real_blocks);
23291da177e4SLinus Torvalds 			return -1;
23301da177e4SLinus Torvalds 		}
23311da177e4SLinus Torvalds 		brelse(real_blocks[i]);
23321da177e4SLinus Torvalds 	}
2333bd4c625cSLinus Torvalds 	cur_dblock =
2334a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2335bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2336a9dd3643SJeff Mahoney 	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
2337a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2338bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
2339a9dd3643SJeff Mahoney 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
23401da177e4SLinus Torvalds 
23411da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2342a9dd3643SJeff Mahoney 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
23431da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
23441da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2345a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2346a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2347a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
23481da177e4SLinus Torvalds 	brelse(c_bh);
23491da177e4SLinus Torvalds 	brelse(d_bh);
2350d739b42bSPekka Enberg 	kfree(log_blocks);
2351d739b42bSPekka Enberg 	kfree(real_blocks);
23521da177e4SLinus Torvalds 	return 0;
23531da177e4SLinus Torvalds }
23541da177e4SLinus Torvalds 
23551da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
23561da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
23571da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
23581da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
23591da177e4SLinus Torvalds    from other places.
23601da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
23613ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
23623ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
23633ee16670SJeff Mahoney 					   b_blocknr_t max_block)
23641da177e4SLinus Torvalds {
23651da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
23661da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
23671da177e4SLinus Torvalds 	struct buffer_head *bh;
23681da177e4SLinus Torvalds 	int i, j;
23691da177e4SLinus Torvalds 
23701da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
23711da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23721da177e4SLinus Torvalds 		return (bh);
23731da177e4SLinus Torvalds 
23741da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
23751da177e4SLinus Torvalds 		blocks = max_block - block;
23761da177e4SLinus Torvalds 	}
23771da177e4SLinus Torvalds 	bhlist[0] = bh;
23781da177e4SLinus Torvalds 	j = 1;
23791da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
23801da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
23811da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
23821da177e4SLinus Torvalds 			brelse(bh);
23831da177e4SLinus Torvalds 			break;
2384bd4c625cSLinus Torvalds 		} else
2385bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
23861da177e4SLinus Torvalds 	}
23871da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
23881da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
23891da177e4SLinus Torvalds 		brelse(bhlist[i]);
23901da177e4SLinus Torvalds 	bh = bhlist[0];
23911da177e4SLinus Torvalds 	wait_on_buffer(bh);
23921da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23931da177e4SLinus Torvalds 		return bh;
23941da177e4SLinus Torvalds 	brelse(bh);
23951da177e4SLinus Torvalds 	return NULL;
23961da177e4SLinus Torvalds }
23971da177e4SLinus Torvalds 
23981da177e4SLinus Torvalds /*
23991da177e4SLinus Torvalds ** read and replay the log
24001da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
24011da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
24021da177e4SLinus Torvalds **
24031da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
24041da177e4SLinus Torvalds **
24051da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
24061da177e4SLinus Torvalds */
2407a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb)
2408bd4c625cSLinus Torvalds {
2409a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
24101da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
2411600ed416SJeff Mahoney 	unsigned int oldest_trans_id = 0;
2412600ed416SJeff Mahoney 	unsigned int oldest_invalid_trans_id = 0;
24131da177e4SLinus Torvalds 	time_t start;
24141da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
24151da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
24161da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
24171da177e4SLinus Torvalds 	struct buffer_head *d_bh;
24181da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
24191da177e4SLinus Torvalds 	int valid_journal_header = 0;
24201da177e4SLinus Torvalds 	int replay_count = 0;
24211da177e4SLinus Torvalds 	int continue_replay = 1;
24221da177e4SLinus Torvalds 	int ret;
24231da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
24241da177e4SLinus Torvalds 
2425a9dd3643SJeff Mahoney 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2426a9dd3643SJeff Mahoney 	reiserfs_info(sb, "checking transaction log (%s)\n",
24271da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
24281da177e4SLinus Torvalds 	start = get_seconds();
24291da177e4SLinus Torvalds 
24301da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
24311da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
24321da177e4SLinus Torvalds 	 ** replay is done
24331da177e4SLinus Torvalds 	 */
2434a9dd3643SJeff Mahoney 	journal->j_header_bh = journal_bread(sb,
2435a9dd3643SJeff Mahoney 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2436a9dd3643SJeff Mahoney 					     + SB_ONDISK_JOURNAL_SIZE(sb));
24371da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
24381da177e4SLinus Torvalds 		return 1;
24391da177e4SLinus Torvalds 	}
24401da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2441c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2442a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_SIZE(sb)
2443bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2444bd4c625cSLinus Torvalds 		oldest_start =
2445a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
24461da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
24471da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
24481da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2449a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2450bd4c625cSLinus Torvalds 			       "journal-1153: found in "
24511da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
24521da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
24531da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
24541da177e4SLinus Torvalds 		valid_journal_header = 1;
24551da177e4SLinus Torvalds 
24561da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
24571da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
24581da177e4SLinus Torvalds 		 ** through the whole log.
24591da177e4SLinus Torvalds 		 */
2460bd4c625cSLinus Torvalds 		d_bh =
2461a9dd3643SJeff Mahoney 		    journal_bread(sb,
2462a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2463bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
2464a9dd3643SJeff Mahoney 		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
24651da177e4SLinus Torvalds 		if (!ret) {
24661da177e4SLinus Torvalds 			continue_replay = 0;
24671da177e4SLinus Torvalds 		}
24681da177e4SLinus Torvalds 		brelse(d_bh);
24691da177e4SLinus Torvalds 		goto start_log_replay;
24701da177e4SLinus Torvalds 	}
24711da177e4SLinus Torvalds 
24721da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
24731da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
24741da177e4SLinus Torvalds 	 */
2475bd4c625cSLinus Torvalds 	while (continue_replay
2476bd4c625cSLinus Torvalds 	       && cur_dblock <
2477a9dd3643SJeff Mahoney 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2478a9dd3643SJeff Mahoney 		SB_ONDISK_JOURNAL_SIZE(sb))) {
24791da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
24801da177e4SLinus Torvalds 		   device to be the same */
2481bd4c625cSLinus Torvalds 		d_bh =
2482bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2483a9dd3643SJeff Mahoney 				    sb->s_blocksize,
2484a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2485a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_SIZE(sb));
2486bd4c625cSLinus Torvalds 		ret =
2487a9dd3643SJeff Mahoney 		    journal_transaction_is_valid(sb, d_bh,
2488bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2489bd4c625cSLinus Torvalds 						 &newest_mount_id);
24901da177e4SLinus Torvalds 		if (ret == 1) {
24911da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
24921da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
24931da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24941da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
24951da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2496a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2497bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
24981da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2499bd4c625cSLinus Torvalds 					       oldest_start -
2500bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2501a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
25021da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
25031da177e4SLinus Torvalds 				/* one we just read was older */
25041da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
25051da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2506a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2507bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
25081da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2509bd4c625cSLinus Torvalds 					       oldest_start -
2510bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2511a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
25121da177e4SLinus Torvalds 			}
25131da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
25141da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2515a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2516bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2517bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2518bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
25191da177e4SLinus Torvalds 			}
25201da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
25211da177e4SLinus Torvalds 		} else {
25221da177e4SLinus Torvalds 			cur_dblock++;
25231da177e4SLinus Torvalds 		}
25241da177e4SLinus Torvalds 		brelse(d_bh);
25251da177e4SLinus Torvalds 	}
25261da177e4SLinus Torvalds 
25271da177e4SLinus Torvalds       start_log_replay:
25281da177e4SLinus Torvalds 	cur_dblock = oldest_start;
25291da177e4SLinus Torvalds 	if (oldest_trans_id) {
2530a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2531bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
25321da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
2533a9dd3643SJeff Mahoney 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
25341da177e4SLinus Torvalds 			       oldest_trans_id);
25351da177e4SLinus Torvalds 
25361da177e4SLinus Torvalds 	}
25371da177e4SLinus Torvalds 	replay_count = 0;
25381da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2539bd4c625cSLinus Torvalds 		ret =
2540a9dd3643SJeff Mahoney 		    journal_read_transaction(sb, cur_dblock, oldest_start,
2541bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
25421da177e4SLinus Torvalds 		if (ret < 0) {
25431da177e4SLinus Torvalds 			return ret;
25441da177e4SLinus Torvalds 		} else if (ret != 0) {
25451da177e4SLinus Torvalds 			break;
25461da177e4SLinus Torvalds 		}
2547bd4c625cSLinus Torvalds 		cur_dblock =
2548a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
25491da177e4SLinus Torvalds 		replay_count++;
25501da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
25511da177e4SLinus Torvalds 			break;
25521da177e4SLinus Torvalds 	}
25531da177e4SLinus Torvalds 
25541da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2555a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2556bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
25571da177e4SLinus Torvalds 	}
25581da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
25591da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
25601da177e4SLinus Torvalds 	 ** copy the trans_id from the header
25611da177e4SLinus Torvalds 	 */
25621da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
25631da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2564bd4c625cSLinus Torvalds 		journal->j_trans_id =
2565bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2566a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2567a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2568a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2569bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2570bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
25711da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
25721da177e4SLinus Torvalds 	} else {
25731da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
25741da177e4SLinus Torvalds 	}
2575a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
25761da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
25771da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
25781da177e4SLinus Torvalds 	if (replay_count > 0) {
2579a9dd3643SJeff Mahoney 		reiserfs_info(sb,
2580bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
25811da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
25821da177e4SLinus Torvalds 	}
2583a9dd3643SJeff Mahoney 	if (!bdev_read_only(sb->s_bdev) &&
2584a9dd3643SJeff Mahoney 	    _update_journal_header_block(sb, journal->j_start,
2585bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
25861da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
25871da177e4SLinus Torvalds 		 ** the mount
25881da177e4SLinus Torvalds 		 */
25891da177e4SLinus Torvalds 		return -1;
25901da177e4SLinus Torvalds 	}
25911da177e4SLinus Torvalds 	return 0;
25921da177e4SLinus Torvalds }
25931da177e4SLinus Torvalds 
25941da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
25951da177e4SLinus Torvalds {
25961da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
25978c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
25988c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
25991da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
26001da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
26011da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
26021da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
260390415deaSJeff Mahoney 	mutex_init(&jl->j_commit_mutex);
26041da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
26051da177e4SLinus Torvalds 	get_journal_list(jl);
26061da177e4SLinus Torvalds 	return jl;
26071da177e4SLinus Torvalds }
26081da177e4SLinus Torvalds 
2609a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb)
2610bd4c625cSLinus Torvalds {
2611a9dd3643SJeff Mahoney 	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
26121da177e4SLinus Torvalds }
26131da177e4SLinus Torvalds 
26141da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
26151da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
26161da177e4SLinus Torvalds {
26171da177e4SLinus Torvalds 	int result;
26181da177e4SLinus Torvalds 
26191da177e4SLinus Torvalds 	result = 0;
26201da177e4SLinus Torvalds 
262186098fa0SChristoph Hellwig 	if (journal->j_dev_bd != NULL) {
262286098fa0SChristoph Hellwig 		if (journal->j_dev_bd->bd_dev != super->s_dev)
262386098fa0SChristoph Hellwig 			bd_release(journal->j_dev_bd);
2624e5eb8caaSAl Viro 		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
26251da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
26261da177e4SLinus Torvalds 	}
26271da177e4SLinus Torvalds 
26281da177e4SLinus Torvalds 	if (result != 0) {
262945b03d5eSJeff Mahoney 		reiserfs_warning(super, "sh-457",
263045b03d5eSJeff Mahoney 				 "Cannot release journal device: %i", result);
26311da177e4SLinus Torvalds 	}
26321da177e4SLinus Torvalds 	return result;
26331da177e4SLinus Torvalds }
26341da177e4SLinus Torvalds 
26351da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
26361da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
26371da177e4SLinus Torvalds 			    const char *jdev_name)
26381da177e4SLinus Torvalds {
26391da177e4SLinus Torvalds 	int result;
26401da177e4SLinus Torvalds 	dev_t jdev;
2641aeb5d727SAl Viro 	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
26421da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
26431da177e4SLinus Torvalds 
26441da177e4SLinus Torvalds 	result = 0;
26451da177e4SLinus Torvalds 
26461da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
26471da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
26481da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
26491da177e4SLinus Torvalds 
26501da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
26511da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
26521da177e4SLinus Torvalds 
26531da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
26541da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
26551da177e4SLinus Torvalds 		journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2656e5eb8caaSAl Viro 		journal->j_dev_mode = blkdev_mode;
26571da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
26581da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
26591da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
266045b03d5eSJeff Mahoney 			reiserfs_warning(super, "sh-458",
26611da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
26621da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
26631da177e4SLinus Torvalds 			return result;
266486098fa0SChristoph Hellwig 		} else if (jdev != super->s_dev) {
266586098fa0SChristoph Hellwig 			result = bd_claim(journal->j_dev_bd, journal);
266686098fa0SChristoph Hellwig 			if (result) {
26679a1c3542SAl Viro 				blkdev_put(journal->j_dev_bd, blkdev_mode);
266886098fa0SChristoph Hellwig 				return result;
266986098fa0SChristoph Hellwig 			}
267086098fa0SChristoph Hellwig 
26711da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
267286098fa0SChristoph Hellwig 		}
267386098fa0SChristoph Hellwig 
26741da177e4SLinus Torvalds 		return 0;
26751da177e4SLinus Torvalds 	}
26761da177e4SLinus Torvalds 
2677e5eb8caaSAl Viro 	journal->j_dev_mode = blkdev_mode;
267830c40d2cSAl Viro 	journal->j_dev_bd = open_bdev_exclusive(jdev_name,
2679e5eb8caaSAl Viro 						blkdev_mode, journal);
268086098fa0SChristoph Hellwig 	if (IS_ERR(journal->j_dev_bd)) {
268186098fa0SChristoph Hellwig 		result = PTR_ERR(journal->j_dev_bd);
268286098fa0SChristoph Hellwig 		journal->j_dev_bd = NULL;
268386098fa0SChristoph Hellwig 		reiserfs_warning(super,
268486098fa0SChristoph Hellwig 				 "journal_init_dev: Cannot open '%s': %i",
268586098fa0SChristoph Hellwig 				 jdev_name, result);
268686098fa0SChristoph Hellwig 		return result;
268786098fa0SChristoph Hellwig 	}
268886098fa0SChristoph Hellwig 
26891da177e4SLinus Torvalds 	set_blocksize(journal->j_dev_bd, super->s_blocksize);
2690bd4c625cSLinus Torvalds 	reiserfs_info(super,
2691bd4c625cSLinus Torvalds 		      "journal_init_dev: journal device: %s\n",
269274f9f974SEdward Shishkin 		      bdevname(journal->j_dev_bd, b));
269386098fa0SChristoph Hellwig 	return 0;
26941da177e4SLinus Torvalds }
26951da177e4SLinus Torvalds 
2696cf3d0b81SEdward Shishkin /**
2697cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2698cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2699cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2700cf3d0b81SEdward Shishkin  *
2701cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2702cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2703cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2704cf3d0b81SEdward Shishkin  *
2705cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2706cf3d0b81SEdward Shishkin  * decreased proportionally.
2707cf3d0b81SEdward Shishkin  */
2708cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2709cf3d0b81SEdward Shishkin 
2710a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb,
2711cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2712cf3d0b81SEdward Shishkin {
2713cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2714cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2715cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2716cf3d0b81SEdward Shishkin 	        int ratio = 1;
2717a9dd3643SJeff Mahoney 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2718a9dd3643SJeff Mahoney 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2719cf3d0b81SEdward Shishkin 
2720cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2721cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2722a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2723cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2724a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-462",
272545b03d5eSJeff Mahoney 					 "bad transaction max size (%u). "
272645b03d5eSJeff Mahoney 					 "FSCK?", journal->j_trans_max);
2727cf3d0b81SEdward Shishkin 			return 1;
2728cf3d0b81SEdward Shishkin 		}
2729cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2730cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2731a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-463",
273245b03d5eSJeff Mahoney 					 "bad transaction max batch (%u). "
273345b03d5eSJeff Mahoney 					 "FSCK?", journal->j_max_batch);
2734cf3d0b81SEdward Shishkin 			return 1;
2735cf3d0b81SEdward Shishkin 		}
2736cf3d0b81SEdward Shishkin 	} else {
2737cf3d0b81SEdward Shishkin 		/* Default journal params.
2738cf3d0b81SEdward Shishkin                    The file system was created by old version
2739cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2740cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2741a9dd3643SJeff Mahoney 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2742a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2743a9dd3643SJeff Mahoney 					 sb->s_blocksize);
274445b03d5eSJeff Mahoney 			return 1;
274545b03d5eSJeff Mahoney 		}
2746cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2747cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2748cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2749cf3d0b81SEdward Shishkin 	}
2750cf3d0b81SEdward Shishkin 	return 0;
2751cf3d0b81SEdward Shishkin }
2752cf3d0b81SEdward Shishkin 
27531da177e4SLinus Torvalds /*
27541da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
27551da177e4SLinus Torvalds */
2756a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name,
2757bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2758bd4c625cSLinus Torvalds {
2759a9dd3643SJeff Mahoney 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
27601da177e4SLinus Torvalds 	struct buffer_head *bhjh;
27611da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
27621da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
27631da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
27641da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
27651da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
276698ea3f50SFrederic Weisbecker 	int ret;
27671da177e4SLinus Torvalds 
276898ea3f50SFrederic Weisbecker 	/*
276998ea3f50SFrederic Weisbecker 	 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
277098ea3f50SFrederic Weisbecker 	 * dependency inversion warnings.
277198ea3f50SFrederic Weisbecker 	 */
277298ea3f50SFrederic Weisbecker 	reiserfs_write_unlock(sb);
2773a9dd3643SJeff Mahoney 	journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
27741da177e4SLinus Torvalds 	if (!journal) {
2775a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1256",
277645b03d5eSJeff Mahoney 				 "unable to get memory for journal structure");
277798ea3f50SFrederic Weisbecker 		reiserfs_write_lock(sb);
27781da177e4SLinus Torvalds 		return 1;
27791da177e4SLinus Torvalds 	}
27801da177e4SLinus Torvalds 	memset(journal, 0, sizeof(struct reiserfs_journal));
27811da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
27821da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
27831da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
27841da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
27851da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
278698ea3f50SFrederic Weisbecker 	ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
278798ea3f50SFrederic Weisbecker 					   reiserfs_bmap_count(sb));
278898ea3f50SFrederic Weisbecker 	reiserfs_write_lock(sb);
278998ea3f50SFrederic Weisbecker 	if (ret)
27901da177e4SLinus Torvalds 		goto free_and_return;
279198ea3f50SFrederic Weisbecker 
2792a9dd3643SJeff Mahoney 	allocate_bitmap_nodes(sb);
27931da177e4SLinus Torvalds 
27941da177e4SLinus Torvalds 	/* reserved for journal area support */
2795a9dd3643SJeff Mahoney 	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2796bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2797a9dd3643SJeff Mahoney 						 / sb->s_blocksize +
2798a9dd3643SJeff Mahoney 						 reiserfs_bmap_count(sb) +
2799bd4c625cSLinus Torvalds 						 1 :
2800bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2801a9dd3643SJeff Mahoney 						 sb->s_blocksize + 2);
28021da177e4SLinus Torvalds 
28031da177e4SLinus Torvalds 	/* Sanity check to see is the standard journal fitting withing first bitmap
28041da177e4SLinus Torvalds 	   (actual for small blocksizes) */
2805a9dd3643SJeff Mahoney 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2806a9dd3643SJeff Mahoney 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2807a9dd3643SJeff Mahoney 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2808a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1393",
280945b03d5eSJeff Mahoney 				 "journal does not fit for area addressed "
281045b03d5eSJeff Mahoney 				 "by first of bitmap blocks. It starts at "
28111da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
2812a9dd3643SJeff Mahoney 				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2813a9dd3643SJeff Mahoney 				 SB_ONDISK_JOURNAL_SIZE(sb),
2814a9dd3643SJeff Mahoney 				 sb->s_blocksize);
28151da177e4SLinus Torvalds 		goto free_and_return;
28161da177e4SLinus Torvalds 	}
28171da177e4SLinus Torvalds 
2818193be0eeSFrederic Weisbecker 	/*
2819193be0eeSFrederic Weisbecker 	 * We need to unlock here to avoid creating the following
2820193be0eeSFrederic Weisbecker 	 * dependency:
2821193be0eeSFrederic Weisbecker 	 * reiserfs_lock -> sysfs_mutex
2822193be0eeSFrederic Weisbecker 	 * Because the reiserfs mmap path creates the following dependency:
2823193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock, hence we have
2824193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2825193be0eeSFrederic Weisbecker 	 * This would ends up in a circular dependency with sysfs readdir path
2826193be0eeSFrederic Weisbecker 	 * which does sysfs_mutex -> mm->mmap_sem
2827193be0eeSFrederic Weisbecker 	 * This is fine because the reiserfs lock is useless in mount path,
2828193be0eeSFrederic Weisbecker 	 * at least until we call journal_begin. We keep it for paranoid
2829193be0eeSFrederic Weisbecker 	 * reasons.
2830193be0eeSFrederic Weisbecker 	 */
2831193be0eeSFrederic Weisbecker 	reiserfs_write_unlock(sb);
2832a9dd3643SJeff Mahoney 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2833193be0eeSFrederic Weisbecker 		reiserfs_write_lock(sb);
2834a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-462",
283545b03d5eSJeff Mahoney 				 "unable to initialize jornal device");
28361da177e4SLinus Torvalds 		goto free_and_return;
28371da177e4SLinus Torvalds 	}
2838193be0eeSFrederic Weisbecker 	reiserfs_write_lock(sb);
28391da177e4SLinus Torvalds 
2840a9dd3643SJeff Mahoney 	rs = SB_DISK_SUPER_BLOCK(sb);
28411da177e4SLinus Torvalds 
28421da177e4SLinus Torvalds 	/* read journal header */
2843a9dd3643SJeff Mahoney 	bhjh = journal_bread(sb,
2844a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2845a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb));
28461da177e4SLinus Torvalds 	if (!bhjh) {
2847a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-459",
284845b03d5eSJeff Mahoney 				 "unable to read journal header");
28491da177e4SLinus Torvalds 		goto free_and_return;
28501da177e4SLinus Torvalds 	}
28511da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
28521da177e4SLinus Torvalds 
28531da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2854bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2855bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2856bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2857a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-460",
285845b03d5eSJeff Mahoney 				 "journal header magic %x (device %s) does "
285945b03d5eSJeff Mahoney 				 "not match to magic found in super block %x",
286045b03d5eSJeff Mahoney 				 jh->jh_journal.jp_journal_magic,
28611da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
28621da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
28631da177e4SLinus Torvalds 		brelse(bhjh);
28641da177e4SLinus Torvalds 		goto free_and_return;
28651da177e4SLinus Torvalds 	}
28661da177e4SLinus Torvalds 
28671da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
28681da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2869bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2870bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
28711da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
28721da177e4SLinus Torvalds 
2873a9dd3643SJeff Mahoney 	if (check_advise_trans_params(sb, journal) != 0)
2874cf3d0b81SEdward Shishkin 	        goto free_and_return;
28751da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
28761da177e4SLinus Torvalds 
28771da177e4SLinus Torvalds 	if (commit_max_age != 0) {
28781da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
28791da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
28801da177e4SLinus Torvalds 	}
28811da177e4SLinus Torvalds 
2882a9dd3643SJeff Mahoney 	reiserfs_info(sb, "journal params: device %s, size %u, "
28831da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
28841da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
28851da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
2886a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_SIZE(sb),
2887a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
28881da177e4SLinus Torvalds 		      journal->j_trans_max,
28891da177e4SLinus Torvalds 		      journal->j_max_batch,
2890bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
28911da177e4SLinus Torvalds 
28921da177e4SLinus Torvalds 	brelse(bhjh);
28931da177e4SLinus Torvalds 
28941da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
2895a9dd3643SJeff Mahoney 	journal_list_init(sb);
28961da177e4SLinus Torvalds 
2897bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2898bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
28991da177e4SLinus Torvalds 
29001da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
29011da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
29021da177e4SLinus Torvalds 
29031da177e4SLinus Torvalds 	journal->j_start = 0;
29041da177e4SLinus Torvalds 	journal->j_len = 0;
29051da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
29061da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
29071da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
29081da177e4SLinus Torvalds 	journal->j_bcount = 0;
29091da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
29101da177e4SLinus Torvalds 	journal->j_last = NULL;
29111da177e4SLinus Torvalds 	journal->j_first = NULL;
29121da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
2913f68215c4SJeff Mahoney 	mutex_init(&journal->j_mutex);
2914afe70259SJeff Mahoney 	mutex_init(&journal->j_flush_mutex);
29151da177e4SLinus Torvalds 
29161da177e4SLinus Torvalds 	journal->j_trans_id = 10;
29171da177e4SLinus Torvalds 	journal->j_mount_id = 10;
29181da177e4SLinus Torvalds 	journal->j_state = 0;
29191da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
2920bbec9191SFrederic Weisbecker 	reiserfs_write_unlock(sb);
29211da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2922bbec9191SFrederic Weisbecker 	reiserfs_write_lock(sb);
29231da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
29241da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
29251da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
29261da177e4SLinus Torvalds 	journal->j_must_wait = 0;
29271da177e4SLinus Torvalds 
2928576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2929a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2930576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2931576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2932576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2933576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2934576f6d79SJeff Mahoney         	goto free_and_return;
2935576f6d79SJeff Mahoney 	}
2936576f6d79SJeff Mahoney 
2937a9dd3643SJeff Mahoney 	init_journal_hash(sb);
29381da177e4SLinus Torvalds 	jl = journal->j_current_jl;
2939a9dd3643SJeff Mahoney 	jl->j_list_bitmap = get_list_bitmap(sb, jl);
29401da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2941a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2005",
294245b03d5eSJeff Mahoney 				 "get_list_bitmap failed for journal list 0");
29431da177e4SLinus Torvalds 		goto free_and_return;
29441da177e4SLinus Torvalds 	}
2945a9dd3643SJeff Mahoney 	if (journal_read(sb) < 0) {
2946a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "reiserfs-2006",
294745b03d5eSJeff Mahoney 				 "Replay Failure, unable to mount");
29481da177e4SLinus Torvalds 		goto free_and_return;
29491da177e4SLinus Torvalds 	}
29501da177e4SLinus Torvalds 
29511da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
295248f6ba5eSFrederic Weisbecker 	if (reiserfs_mounted_fs_count <= 1) {
295348f6ba5eSFrederic Weisbecker 		reiserfs_write_unlock(sb);
29541da177e4SLinus Torvalds 		commit_wq = create_workqueue("reiserfs");
295548f6ba5eSFrederic Weisbecker 		reiserfs_write_lock(sb);
295648f6ba5eSFrederic Weisbecker 	}
29571da177e4SLinus Torvalds 
2958c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2959a9dd3643SJeff Mahoney 	journal->j_work_sb = sb;
29601da177e4SLinus Torvalds 	return 0;
29611da177e4SLinus Torvalds       free_and_return:
2962a9dd3643SJeff Mahoney 	free_journal_ram(sb);
29631da177e4SLinus Torvalds 	return 1;
29641da177e4SLinus Torvalds }
29651da177e4SLinus Torvalds 
29661da177e4SLinus Torvalds /*
29671da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
29681da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
29691da177e4SLinus Torvalds ** transaction
29701da177e4SLinus Torvalds */
2971bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2972bd4c625cSLinus Torvalds 				   int new_alloc)
2973bd4c625cSLinus Torvalds {
29741da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29751da177e4SLinus Torvalds 	time_t now = get_seconds();
29761da177e4SLinus Torvalds 	/* cannot restart while nested */
29771da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29781da177e4SLinus Torvalds 	if (th->t_refcount > 1)
29791da177e4SLinus Torvalds 		return 0;
29801da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
29811da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
29821da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
29831da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
29841da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
29851da177e4SLinus Torvalds 		return 1;
29861da177e4SLinus Torvalds 	}
29876ae1ea44SChris Mason 	/* protected by the BKL here */
29886ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
29896ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
29901da177e4SLinus Torvalds 	return 0;
29911da177e4SLinus Torvalds }
29921da177e4SLinus Torvalds 
29931da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the
29941da177e4SLinus Torvalds ** kernel_lock to be held
29951da177e4SLinus Torvalds */
2996bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2997bd4c625cSLinus Torvalds {
29981da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29991da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
30001da177e4SLinus Torvalds 	journal->j_must_wait = 1;
30011da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
30021da177e4SLinus Torvalds 	return;
30031da177e4SLinus Torvalds }
30041da177e4SLinus Torvalds 
30051da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
30061da177e4SLinus Torvalds ** require BKL
30071da177e4SLinus Torvalds */
3008bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
3009bd4c625cSLinus Torvalds {
30101da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30111da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
30121da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
30131da177e4SLinus Torvalds }
30141da177e4SLinus Torvalds 
30151da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
30161da177e4SLinus Torvalds ** require BKL
30171da177e4SLinus Torvalds */
3018bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
3019bd4c625cSLinus Torvalds {
30201da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30211da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
30221da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
30231da177e4SLinus Torvalds }
30241da177e4SLinus Torvalds 
3025bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
3026bd4c625cSLinus Torvalds {
30271da177e4SLinus Torvalds 	wait_queue_t wait;
30281da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30291da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
30301da177e4SLinus Torvalds 
30311da177e4SLinus Torvalds 	/*
30321da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
30331da177e4SLinus Torvalds 	 * we only want to wait once.
30341da177e4SLinus Torvalds 	 */
30351da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
30361da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
30371da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
30388ebc4232SFrederic Weisbecker 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
30398ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
30401da177e4SLinus Torvalds 		schedule();
30418ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
30428ebc4232SFrederic Weisbecker 	}
30435ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
30441da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
30451da177e4SLinus Torvalds }
30461da177e4SLinus Torvalds 
3047bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
3048bd4c625cSLinus Torvalds {
30491da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
30501da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
30511da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
30521da177e4SLinus Torvalds }
30531da177e4SLinus Torvalds 
3054600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
30551da177e4SLinus Torvalds {
30561da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30571da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
30581da177e4SLinus Torvalds 	while (1) {
30598ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3060041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
30618ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
30621da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
30631da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
30641da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
30651da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
30661da177e4SLinus Torvalds 			queue_log_writer(sb);
30671da177e4SLinus Torvalds 		}
30681da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
30691da177e4SLinus Torvalds 			break;
30701da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
30711da177e4SLinus Torvalds 			break;
30721da177e4SLinus Torvalds 		bcount = journal->j_bcount;
30731da177e4SLinus Torvalds 	}
30741da177e4SLinus Torvalds }
30751da177e4SLinus Torvalds 
30761da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
30771da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
30781da177e4SLinus Torvalds **
30791da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
30801da177e4SLinus Torvalds ** expect to use in nblocks.
30811da177e4SLinus Torvalds */
3082bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3083a9dd3643SJeff Mahoney 			      struct super_block *sb, unsigned long nblocks,
3084bd4c625cSLinus Torvalds 			      int join)
3085bd4c625cSLinus Torvalds {
30861da177e4SLinus Torvalds 	time_t now = get_seconds();
3087600ed416SJeff Mahoney 	unsigned int old_trans_id;
3088a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30891da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
30901da177e4SLinus Torvalds 	int sched_count = 0;
30911da177e4SLinus Torvalds 	int retval;
30921da177e4SLinus Torvalds 
3093a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal_begin");
309414a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
30951da177e4SLinus Torvalds 
3096a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.journal_being);
30971da177e4SLinus Torvalds 	/* set here for journal_join */
30981da177e4SLinus Torvalds 	th->t_refcount = 1;
3099a9dd3643SJeff Mahoney 	th->t_super = sb;
31001da177e4SLinus Torvalds 
31011da177e4SLinus Torvalds       relock:
3102a9dd3643SJeff Mahoney 	lock_journal(sb);
31031da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3104a9dd3643SJeff Mahoney 		unlock_journal(sb);
31051da177e4SLinus Torvalds 		retval = journal->j_errno;
31061da177e4SLinus Torvalds 		goto out_fail;
31071da177e4SLinus Torvalds 	}
31081da177e4SLinus Torvalds 	journal->j_bcount++;
31091da177e4SLinus Torvalds 
31101da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3111a9dd3643SJeff Mahoney 		unlock_journal(sb);
31128ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3113a9dd3643SJeff Mahoney 		reiserfs_wait_on_write_block(sb);
31148ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
3115a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_writers);
31161da177e4SLinus Torvalds 		goto relock;
31171da177e4SLinus Torvalds 	}
31181da177e4SLinus Torvalds 	now = get_seconds();
31191da177e4SLinus Torvalds 
31201da177e4SLinus Torvalds 	/* if there is no room in the journal OR
31211da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
31221da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
31231da177e4SLinus Torvalds 	 */
31241da177e4SLinus Torvalds 
31251da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3126bd4c625cSLinus Torvalds 	    (!join
3127bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3128bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3129bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3130bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3131bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3132bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3133bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
31341da177e4SLinus Torvalds 
31351da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
3136a9dd3643SJeff Mahoney 		unlock_journal(sb);	/* allow others to finish this transaction */
31371da177e4SLinus Torvalds 
31381da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
31391da177e4SLinus Torvalds 		    journal->j_max_batch &&
3140bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3141bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
31421da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
31431da177e4SLinus Torvalds 				sched_count++;
3144a9dd3643SJeff Mahoney 				queue_log_writer(sb);
31451da177e4SLinus Torvalds 				goto relock;
31461da177e4SLinus Torvalds 			}
31471da177e4SLinus Torvalds 		}
31481da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
31491da177e4SLinus Torvalds 		 * wait for someone else to do a commit
31501da177e4SLinus Torvalds 		 */
31511da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
31521da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
31531da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
3154a9dd3643SJeff Mahoney 				queue_log_writer(sb);
31551da177e4SLinus Torvalds 			}
31561da177e4SLinus Torvalds 			goto relock;
31571da177e4SLinus Torvalds 		}
3158a9dd3643SJeff Mahoney 		retval = journal_join(&myth, sb, 1);
31591da177e4SLinus Torvalds 		if (retval)
31601da177e4SLinus Torvalds 			goto out_fail;
31611da177e4SLinus Torvalds 
31621da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
31631da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
3164a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, 0);
31651da177e4SLinus Torvalds 		} else {
3166a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
31671da177e4SLinus Torvalds 		}
31681da177e4SLinus Torvalds 
31691da177e4SLinus Torvalds 		if (retval)
31701da177e4SLinus Torvalds 			goto out_fail;
31711da177e4SLinus Torvalds 
3172a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_wcount);
31731da177e4SLinus Torvalds 		goto relock;
31741da177e4SLinus Torvalds 	}
31751da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
31761da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
31771da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
31781da177e4SLinus Torvalds 	}
31791da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
31801da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
31811da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
31821da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
31831da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
3184a9dd3643SJeff Mahoney 	unlock_journal(sb);
31851da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
318622e2c507SJens Axboe 	get_fs_excl();
31871da177e4SLinus Torvalds 	return 0;
31881da177e4SLinus Torvalds 
31891da177e4SLinus Torvalds       out_fail:
31901da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
31911da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
31921da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
31931da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
3194a9dd3643SJeff Mahoney 	th->t_super = sb;
31951da177e4SLinus Torvalds 	return retval;
31961da177e4SLinus Torvalds }
31971da177e4SLinus Torvalds 
3198bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3199bd4c625cSLinus Torvalds 								    super_block
3200bd4c625cSLinus Torvalds 								    *s,
3201bd4c625cSLinus Torvalds 								    int nblocks)
3202bd4c625cSLinus Torvalds {
32031da177e4SLinus Torvalds 	int ret;
32041da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
32051da177e4SLinus Torvalds 
32061da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
32071da177e4SLinus Torvalds 	 ** persistent on its own
32081da177e4SLinus Torvalds 	 */
32091da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
32101da177e4SLinus Torvalds 		th = current->journal_info;
32111da177e4SLinus Torvalds 		th->t_refcount++;
321214a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
321314a61442SEric Sesterhenn 
32141da177e4SLinus Torvalds 		return th;
32151da177e4SLinus Torvalds 	}
3216d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
32171da177e4SLinus Torvalds 	if (!th)
32181da177e4SLinus Torvalds 		return NULL;
32191da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
32201da177e4SLinus Torvalds 	if (ret) {
3221d739b42bSPekka Enberg 		kfree(th);
32221da177e4SLinus Torvalds 		return NULL;
32231da177e4SLinus Torvalds 	}
32241da177e4SLinus Torvalds 
32251da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
32261da177e4SLinus Torvalds 	return th;
32271da177e4SLinus Torvalds }
32281da177e4SLinus Torvalds 
3229bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3230bd4c625cSLinus Torvalds {
32311da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
32321da177e4SLinus Torvalds 	int ret = 0;
32331da177e4SLinus Torvalds 	if (th->t_trans_id)
32341da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
32351da177e4SLinus Torvalds 	else
32361da177e4SLinus Torvalds 		ret = -EIO;
32371da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
32381da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3239d739b42bSPekka Enberg 		kfree(th);
32401da177e4SLinus Torvalds 	}
32411da177e4SLinus Torvalds 	return ret;
32421da177e4SLinus Torvalds }
32431da177e4SLinus Torvalds 
3244bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3245a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks)
3246bd4c625cSLinus Torvalds {
32471da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32481da177e4SLinus Torvalds 
32491da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
32501da177e4SLinus Torvalds 	 ** pointer
32511da177e4SLinus Torvalds 	 */
32521da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
325314a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3254a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
32551da177e4SLinus Torvalds }
32561da177e4SLinus Torvalds 
3257bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3258a9dd3643SJeff Mahoney 		       struct super_block *sb, unsigned long nblocks)
3259bd4c625cSLinus Torvalds {
32601da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32611da177e4SLinus Torvalds 
32621da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
32631da177e4SLinus Torvalds 	 ** pointer
32641da177e4SLinus Torvalds 	 */
32651da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
326614a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3267a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
32681da177e4SLinus Torvalds }
32691da177e4SLinus Torvalds 
3270bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3271a9dd3643SJeff Mahoney 		  struct super_block *sb, unsigned long nblocks)
3272bd4c625cSLinus Torvalds {
32731da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32741da177e4SLinus Torvalds 	int ret;
32751da177e4SLinus Torvalds 
32761da177e4SLinus Torvalds 	th->t_handle_save = NULL;
32771da177e4SLinus Torvalds 	if (cur_th) {
32781da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
3279a9dd3643SJeff Mahoney 		if (cur_th->t_super == sb) {
32801da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
32811da177e4SLinus Torvalds 			cur_th->t_refcount++;
32821da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
32831da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3284a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2005",
328545b03d5eSJeff Mahoney 						 "BAD: refcount <= 1, but "
328645b03d5eSJeff Mahoney 						 "journal_info != 0");
32871da177e4SLinus Torvalds 			return 0;
32881da177e4SLinus Torvalds 		} else {
32891da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
32901da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
32911da177e4SLinus Torvalds 			 ** really happen...
32921da177e4SLinus Torvalds 			 */
3293a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2100",
329445b03d5eSJeff Mahoney 					 "nesting info a different FS");
32951da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
32961da177e4SLinus Torvalds 			current->journal_info = th;
32971da177e4SLinus Torvalds 		}
32981da177e4SLinus Torvalds 	} else {
32991da177e4SLinus Torvalds 		current->journal_info = th;
33001da177e4SLinus Torvalds 	}
3301a9dd3643SJeff Mahoney 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
330214a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
33031da177e4SLinus Torvalds 
33041da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
33051da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
33061da177e4SLinus Torvalds 	 * won't be called to do it. */
33071da177e4SLinus Torvalds 	if (ret)
33081da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
33091da177e4SLinus Torvalds 	else
33101da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
33111da177e4SLinus Torvalds 
33121da177e4SLinus Torvalds 	return ret;
33131da177e4SLinus Torvalds }
33141da177e4SLinus Torvalds 
33151da177e4SLinus Torvalds /*
33161da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
33171da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
33181da177e4SLinus Torvalds **
33191da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
33201da177e4SLinus Torvalds ** transaction is committed.
33211da177e4SLinus Torvalds **
33221da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
33231da177e4SLinus Torvalds */
3324bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3325a9dd3643SJeff Mahoney 		       struct super_block *sb, struct buffer_head *bh)
3326bd4c625cSLinus Torvalds {
3327a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
33281da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
33291da177e4SLinus Torvalds 	int count_already_incd = 0;
33301da177e4SLinus Torvalds 	int prepared = 0;
33311da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
33321da177e4SLinus Torvalds 
3333a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.mark_dirty);
33341da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3335c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3336c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
33371da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
33381da177e4SLinus Torvalds 	}
33391da177e4SLinus Torvalds 
3340a9dd3643SJeff Mahoney 	sb->s_dirt = 1;
33411da177e4SLinus Torvalds 
33421da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
33431da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
33441da177e4SLinus Torvalds 	/* already in this transaction, we are done */
33451da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
3346a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_already);
33471da177e4SLinus Torvalds 		return 0;
33481da177e4SLinus Torvalds 	}
33491da177e4SLinus Torvalds 
33501da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
33511da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
33521da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
33531da177e4SLinus Torvalds 	 */
33541da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
3355a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1777",
335645b03d5eSJeff Mahoney 				 "buffer %llu bad state "
33571da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3358bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3359bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
33601da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
33611da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
33621da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
33631da177e4SLinus Torvalds 	}
33641da177e4SLinus Torvalds 
33651da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3366a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1409",
336745b03d5eSJeff Mahoney 				 "returning because j_wcount was %d",
3368bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
33691da177e4SLinus Torvalds 		return 1;
33701da177e4SLinus Torvalds 	}
33711da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
33721da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
33731da177e4SLinus Torvalds 	 */
33741da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3375c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1413",
3376c3a9c210SJeff Mahoney 			       "j_len (%lu) is too big",
3377bd4c625cSLinus Torvalds 			       journal->j_len);
33781da177e4SLinus Torvalds 	}
33791da177e4SLinus Torvalds 
33801da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
33811da177e4SLinus Torvalds 		count_already_incd = 1;
3382a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
33831da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
33841da177e4SLinus Torvalds 	}
33851da177e4SLinus Torvalds 
33861da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
33871da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
33881da177e4SLinus Torvalds 	}
33891da177e4SLinus Torvalds 
33901da177e4SLinus Torvalds 	set_buffer_journaled(bh);
33911da177e4SLinus Torvalds 
33921da177e4SLinus Torvalds 	/* now put this guy on the end */
33931da177e4SLinus Torvalds 	if (!cn) {
3394a9dd3643SJeff Mahoney 		cn = get_cnode(sb);
33951da177e4SLinus Torvalds 		if (!cn) {
3396a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
33971da177e4SLinus Torvalds 		}
33981da177e4SLinus Torvalds 
33991da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
34001da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
34011da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
34021da177e4SLinus Torvalds 		}
34031da177e4SLinus Torvalds 		th->t_blocks_logged++;
34041da177e4SLinus Torvalds 		journal->j_len++;
34051da177e4SLinus Torvalds 
34061da177e4SLinus Torvalds 		cn->bh = bh;
34071da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
3408a9dd3643SJeff Mahoney 		cn->sb = sb;
34091da177e4SLinus Torvalds 		cn->jlist = NULL;
34101da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
34111da177e4SLinus Torvalds 		if (!count_already_incd) {
34121da177e4SLinus Torvalds 			get_bh(bh);
34131da177e4SLinus Torvalds 		}
34141da177e4SLinus Torvalds 	}
34151da177e4SLinus Torvalds 	cn->next = NULL;
34161da177e4SLinus Torvalds 	cn->prev = journal->j_last;
34171da177e4SLinus Torvalds 	cn->bh = bh;
34181da177e4SLinus Torvalds 	if (journal->j_last) {
34191da177e4SLinus Torvalds 		journal->j_last->next = cn;
34201da177e4SLinus Torvalds 		journal->j_last = cn;
34211da177e4SLinus Torvalds 	} else {
34221da177e4SLinus Torvalds 		journal->j_first = cn;
34231da177e4SLinus Torvalds 		journal->j_last = cn;
34241da177e4SLinus Torvalds 	}
34251da177e4SLinus Torvalds 	return 0;
34261da177e4SLinus Torvalds }
34271da177e4SLinus Torvalds 
3428bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3429a9dd3643SJeff Mahoney 		struct super_block *sb, unsigned long nblocks)
3430bd4c625cSLinus Torvalds {
34311da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
3432a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "REISER-NESTING",
343345b03d5eSJeff Mahoney 				 "th NULL, refcount %d", th->t_refcount);
34341da177e4SLinus Torvalds 
34351da177e4SLinus Torvalds 	if (!th->t_trans_id) {
34361da177e4SLinus Torvalds 		WARN_ON(1);
34371da177e4SLinus Torvalds 		return -EIO;
34381da177e4SLinus Torvalds 	}
34391da177e4SLinus Torvalds 
34401da177e4SLinus Torvalds 	th->t_refcount--;
34411da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3442bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3443bd4c625cSLinus Torvalds 		    current->journal_info;
34441da177e4SLinus Torvalds 
34451da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
34461da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
34471da177e4SLinus Torvalds 		 */
344814a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
34491da177e4SLinus Torvalds 
34501da177e4SLinus Torvalds 		if (th != cur_th) {
34511da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
34521da177e4SLinus Torvalds 			th->t_trans_id = 0;
34531da177e4SLinus Torvalds 		}
34541da177e4SLinus Torvalds 		return 0;
34551da177e4SLinus Torvalds 	} else {
3456a9dd3643SJeff Mahoney 		return do_journal_end(th, sb, nblocks, 0);
34571da177e4SLinus Torvalds 	}
34581da177e4SLinus Torvalds }
34591da177e4SLinus Torvalds 
34601da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
34611da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
34621da177e4SLinus Torvalds **
34631da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
34641da177e4SLinus Torvalds **
34651da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
34661da177e4SLinus Torvalds */
3467a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb,
3468bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3469bd4c625cSLinus Torvalds {
34701da177e4SLinus Torvalds 	struct buffer_head *bh;
34711da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3472a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34731da177e4SLinus Torvalds 	int ret = 0;
34741da177e4SLinus Torvalds 
3475a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
34761da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
34771da177e4SLinus Torvalds 		return ret;
34781da177e4SLinus Torvalds 	}
34791da177e4SLinus Torvalds 	bh = cn->bh;
34801da177e4SLinus Torvalds 	if (cn->prev) {
34811da177e4SLinus Torvalds 		cn->prev->next = cn->next;
34821da177e4SLinus Torvalds 	}
34831da177e4SLinus Torvalds 	if (cn->next) {
34841da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
34851da177e4SLinus Torvalds 	}
34861da177e4SLinus Torvalds 	if (cn == journal->j_first) {
34871da177e4SLinus Torvalds 		journal->j_first = cn->next;
34881da177e4SLinus Torvalds 	}
34891da177e4SLinus Torvalds 	if (cn == journal->j_last) {
34901da177e4SLinus Torvalds 		journal->j_last = cn->prev;
34911da177e4SLinus Torvalds 	}
34921da177e4SLinus Torvalds 	if (bh)
3493a9dd3643SJeff Mahoney 		remove_journal_hash(sb, journal->j_hash_table, NULL,
3494bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
34951da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
34961da177e4SLinus Torvalds 
34971da177e4SLinus Torvalds 	if (!already_cleaned) {
34981da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
34991da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
35001da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
35011da177e4SLinus Torvalds 		put_bh(bh);
35021da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3503a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1752",
350445b03d5eSJeff Mahoney 					 "b_count < 0");
35051da177e4SLinus Torvalds 		}
35061da177e4SLinus Torvalds 		ret = 1;
35071da177e4SLinus Torvalds 	}
35081da177e4SLinus Torvalds 	journal->j_len--;
35091da177e4SLinus Torvalds 	journal->j_len_alloc--;
3510a9dd3643SJeff Mahoney 	free_cnode(sb, cn);
35111da177e4SLinus Torvalds 	return ret;
35121da177e4SLinus Torvalds }
35131da177e4SLinus Torvalds 
35141da177e4SLinus Torvalds /*
35151da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
35160779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
35171da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
35181da177e4SLinus Torvalds ** and 0 if you aren't
35191da177e4SLinus Torvalds **
35201da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
35211da177e4SLinus Torvalds ** blocks for a given transaction on disk
35221da177e4SLinus Torvalds **
35231da177e4SLinus Torvalds */
3524bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3525bd4c625cSLinus Torvalds {
35261da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
35271da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
35281da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
35291da177e4SLinus Torvalds 	int can_dirty = 1;
35301da177e4SLinus Torvalds 
35311da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
35321da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
35331da177e4SLinus Torvalds 	 ** to disk right now.
35341da177e4SLinus Torvalds 	 */
35351da177e4SLinus Torvalds 	while (cur && can_dirty) {
35361da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
35371da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
35381da177e4SLinus Torvalds 			can_dirty = 0;
35391da177e4SLinus Torvalds 		}
35401da177e4SLinus Torvalds 		cur = cur->hprev;
35411da177e4SLinus Torvalds 	}
35421da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
35431da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
35441da177e4SLinus Torvalds 	 */
35451da177e4SLinus Torvalds 	cur = cn->hnext;
35461da177e4SLinus Torvalds 	while (cur && can_dirty) {
35471da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
35481da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
35491da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
35501da177e4SLinus Torvalds 			can_dirty = 0;
35511da177e4SLinus Torvalds 		}
35521da177e4SLinus Torvalds 		cur = cur->hnext;
35531da177e4SLinus Torvalds 	}
35541da177e4SLinus Torvalds 	return can_dirty;
35551da177e4SLinus Torvalds }
35561da177e4SLinus Torvalds 
35571da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
35580779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
35591da177e4SLinus Torvalds */
3560bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3561a9dd3643SJeff Mahoney 		     struct super_block *sb, unsigned long nblocks)
3562bd4c625cSLinus Torvalds {
3563a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35641da177e4SLinus Torvalds 
35651da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35661da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
356714a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
35681da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3569a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3570bd4c625cSLinus Torvalds 					     1);
3571a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
35721da177e4SLinus Torvalds 	}
3573a9dd3643SJeff Mahoney 	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
35741da177e4SLinus Torvalds }
35751da177e4SLinus Torvalds 
35761da177e4SLinus Torvalds /*
35771da177e4SLinus Torvalds ** writeback the pending async commits to disk
35781da177e4SLinus Torvalds */
3579c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3580bd4c625cSLinus Torvalds {
3581c4028958SDavid Howells 	struct reiserfs_journal *journal =
3582c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3583a9dd3643SJeff Mahoney 	struct super_block *sb = journal->j_work_sb;
35841da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
35851da177e4SLinus Torvalds 	struct list_head *entry;
35861da177e4SLinus Torvalds 
35878ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
35881da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
35891da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
35901da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
35911da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
3592a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
35931da177e4SLinus Torvalds 	}
35948ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
35951da177e4SLinus Torvalds }
35961da177e4SLinus Torvalds 
35971da177e4SLinus Torvalds /*
35981da177e4SLinus Torvalds ** flushes any old transactions to disk
35991da177e4SLinus Torvalds ** ends the current transaction if it is too old
36001da177e4SLinus Torvalds */
3601a9dd3643SJeff Mahoney int reiserfs_flush_old_commits(struct super_block *sb)
3602bd4c625cSLinus Torvalds {
36031da177e4SLinus Torvalds 	time_t now;
36041da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
3605a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36061da177e4SLinus Torvalds 
36071da177e4SLinus Torvalds 	now = get_seconds();
36081da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
36091da177e4SLinus Torvalds 	 * mount
36101da177e4SLinus Torvalds 	 */
36111da177e4SLinus Torvalds 	if (list_empty(&journal->j_journal_list)) {
36121da177e4SLinus Torvalds 		return 0;
36131da177e4SLinus Torvalds 	}
36141da177e4SLinus Torvalds 
36151da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
36161da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
36171da177e4SLinus Torvalds 	 */
36181da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
36191da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
36201da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3621bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3622a9dd3643SJeff Mahoney 		if (!journal_join(&th, sb, 1)) {
3623a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
3624a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
3625bd4c625cSLinus Torvalds 						     1);
3626a9dd3643SJeff Mahoney 			journal_mark_dirty(&th, sb,
3627a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
36281da177e4SLinus Torvalds 
36291da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
36301da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
36311da177e4SLinus Torvalds 			 */
3632a9dd3643SJeff Mahoney 			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
36331da177e4SLinus Torvalds 		}
36341da177e4SLinus Torvalds 	}
3635a9dd3643SJeff Mahoney 	return sb->s_dirt;
36361da177e4SLinus Torvalds }
36371da177e4SLinus Torvalds 
36381da177e4SLinus Torvalds /*
36391da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
36401da177e4SLinus Torvalds **
36411da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
36421da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
36431da177e4SLinus Torvalds ** flushes the commit list and returns 0.
36441da177e4SLinus Torvalds **
36451da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
36461da177e4SLinus Torvalds **
36471da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
36481da177e4SLinus Torvalds */
3649bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3650a9dd3643SJeff Mahoney 			     struct super_block *sb, unsigned long nblocks,
3651bd4c625cSLinus Torvalds 			     int flags)
3652bd4c625cSLinus Torvalds {
36531da177e4SLinus Torvalds 
36541da177e4SLinus Torvalds 	time_t now;
36551da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
36561da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
36571da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
36581da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
3659a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36601da177e4SLinus Torvalds 
36611da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
36621da177e4SLinus Torvalds 
36631da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3664c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3665c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
36661da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
36671da177e4SLinus Torvalds 	}
36681da177e4SLinus Torvalds 
36691da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
36701da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
36711da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
36721da177e4SLinus Torvalds 	}
36731da177e4SLinus Torvalds 
36741da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
36751da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
36761da177e4SLinus Torvalds 	 ** care of in this trans
36771da177e4SLinus Torvalds 	 */
367814a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
367914a61442SEric Sesterhenn 
36801da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
36811da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
36821da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
36831da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
36841da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
36851da177e4SLinus Torvalds 	 */
36861da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
36871da177e4SLinus Torvalds 		if (flush || commit_now) {
36881da177e4SLinus Torvalds 			unsigned trans_id;
36891da177e4SLinus Torvalds 
36901da177e4SLinus Torvalds 			jl = journal->j_current_jl;
36911da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
36921da177e4SLinus Torvalds 			if (wait_on_commit)
36931da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
36941da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
36951da177e4SLinus Torvalds 			if (flush) {
36961da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
36971da177e4SLinus Torvalds 			}
3698a9dd3643SJeff Mahoney 			unlock_journal(sb);
36991da177e4SLinus Torvalds 
37001da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
37011da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
37021da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
3703a9dd3643SJeff Mahoney 					queue_log_writer(sb);
37041da177e4SLinus Torvalds 				} else {
3705a9dd3643SJeff Mahoney 					lock_journal(sb);
37061da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3707bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3708bd4c625cSLinus Torvalds 							   1);
37091da177e4SLinus Torvalds 					}
3710a9dd3643SJeff Mahoney 					unlock_journal(sb);
37111da177e4SLinus Torvalds 				}
37121da177e4SLinus Torvalds 			}
371314a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
371414a61442SEric Sesterhenn 
3715bd4c625cSLinus Torvalds 			if (commit_now
3716a9dd3643SJeff Mahoney 			    && journal_list_still_alive(sb, trans_id)
3717bd4c625cSLinus Torvalds 			    && wait_on_commit) {
3718a9dd3643SJeff Mahoney 				flush_commit_list(sb, jl, 1);
37191da177e4SLinus Torvalds 			}
37201da177e4SLinus Torvalds 			return 0;
37211da177e4SLinus Torvalds 		}
3722a9dd3643SJeff Mahoney 		unlock_journal(sb);
37231da177e4SLinus Torvalds 		return 0;
37241da177e4SLinus Torvalds 	}
37251da177e4SLinus Torvalds 
37261da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
37271da177e4SLinus Torvalds 	now = get_seconds();
37281da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
37291da177e4SLinus Torvalds 		commit_now = 1;
37301da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
37311da177e4SLinus Torvalds 	}
37321da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
37331da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3734bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3735bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3736bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3737bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
37381da177e4SLinus Torvalds 		journal->j_bcount++;
3739a9dd3643SJeff Mahoney 		unlock_journal(sb);
37401da177e4SLinus Torvalds 		return 0;
37411da177e4SLinus Torvalds 	}
37421da177e4SLinus Torvalds 
3743a9dd3643SJeff Mahoney 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3744a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-003",
3745c3a9c210SJeff Mahoney 			       "j_start (%ld) is too high",
3746bd4c625cSLinus Torvalds 			       journal->j_start);
37471da177e4SLinus Torvalds 	}
37481da177e4SLinus Torvalds 	return 1;
37491da177e4SLinus Torvalds }
37501da177e4SLinus Torvalds 
37511da177e4SLinus Torvalds /*
37521da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
37531da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
37541da177e4SLinus Torvalds **
37551da177e4SLinus Torvalds ** otherwise:
37561da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
37571da177e4SLinus Torvalds ** before this transaction has finished.
37581da177e4SLinus Torvalds **
37591da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
37601da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
37611da177e4SLinus Torvalds ** the block can't be reallocated yet.
37621da177e4SLinus Torvalds **
37631da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
37641da177e4SLinus Torvalds */
3765bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3766a9dd3643SJeff Mahoney 		       struct super_block *sb, b_blocknr_t blocknr)
3767bd4c625cSLinus Torvalds {
3768a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37691da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
37701da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
37711da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
37721da177e4SLinus Torvalds 	int cleaned = 0;
37731da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
37741da177e4SLinus Torvalds 
3775a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
37761da177e4SLinus Torvalds 	if (cn && cn->bh) {
37771da177e4SLinus Torvalds 		bh = cn->bh;
37781da177e4SLinus Torvalds 		get_bh(bh);
37791da177e4SLinus Torvalds 	}
37801da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
37811da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
37821da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
37831da177e4SLinus Torvalds 		clear_prepared_bits(bh);
37841da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
3785a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
37861da177e4SLinus Torvalds 	} else {
37871da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
37881da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
37891da177e4SLinus Torvalds 		if (!jb) {
3790a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-1702",
3791c3a9c210SJeff Mahoney 				       "journal_list_bitmap is NULL");
37921da177e4SLinus Torvalds 		}
3793a9dd3643SJeff Mahoney 		set_bit_in_list_bitmap(sb, blocknr, jb);
37941da177e4SLinus Torvalds 
37951da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
37961da177e4SLinus Torvalds 
37971da177e4SLinus Torvalds 		if (bh) {
37981da177e4SLinus Torvalds 			clear_prepared_bits(bh);
37991da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
38001da177e4SLinus Torvalds 		}
3801a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
38021da177e4SLinus Torvalds 
38031da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3804a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3805bd4c625cSLinus Torvalds 					  blocknr);
38061da177e4SLinus Torvalds 		while (cn) {
3807a9dd3643SJeff Mahoney 			if (sb == cn->sb && blocknr == cn->blocknr) {
38081da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
38091da177e4SLinus Torvalds 				if (cn->bh) {
38101da177e4SLinus Torvalds 					if (!cleaned) {
38111da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
38121da177e4SLinus Torvalds 						 ** in the current trans
38131da177e4SLinus Torvalds 						 */
3814bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3815bd4c625cSLinus Torvalds 									   bh);
38161da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3817bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3818bd4c625cSLinus Torvalds 									  bh);
38191da177e4SLinus Torvalds 						cleaned = 1;
38201da177e4SLinus Torvalds 						put_bh(cn->bh);
3821bd4c625cSLinus Torvalds 						if (atomic_read
3822bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3823a9dd3643SJeff Mahoney 							reiserfs_warning(sb,
382445b03d5eSJeff Mahoney 								 "journal-2138",
382545b03d5eSJeff Mahoney 								 "cn->bh->b_count < 0");
38261da177e4SLinus Torvalds 						}
38271da177e4SLinus Torvalds 					}
38281da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3829bd4c625cSLinus Torvalds 						atomic_dec(&
3830bd4c625cSLinus Torvalds 							   (cn->jlist->
3831bd4c625cSLinus Torvalds 							    j_nonzerolen));
38321da177e4SLinus Torvalds 					}
38331da177e4SLinus Torvalds 					cn->bh = NULL;
38341da177e4SLinus Torvalds 				}
38351da177e4SLinus Torvalds 			}
38361da177e4SLinus Torvalds 			cn = cn->hnext;
38371da177e4SLinus Torvalds 		}
38381da177e4SLinus Torvalds 	}
38391da177e4SLinus Torvalds 
3840398c95bdSChris Mason 	if (bh)
3841398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
38421da177e4SLinus Torvalds 	return 0;
38431da177e4SLinus Torvalds }
38441da177e4SLinus Torvalds 
3845bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3846bd4c625cSLinus Torvalds {
38471da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
38481da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
38491da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
38501da177e4SLinus Torvalds }
38511da177e4SLinus Torvalds 
38521da177e4SLinus Torvalds /*
38531da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
38541da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
38551da177e4SLinus Torvalds  */
38561da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
38571da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
38581da177e4SLinus Torvalds {
38591da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
38601da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
38611da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
38621da177e4SLinus Torvalds 	int ret = 0;
38631da177e4SLinus Torvalds 
38641da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
38651da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
38661da177e4SLinus Torvalds 		jl = journal->j_current_jl;
38671da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
38681da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
38691da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
38701da177e4SLinus Torvalds 			goto flush_commit_only;
38711da177e4SLinus Torvalds 		}
38721da177e4SLinus Torvalds 
38731da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
38741da177e4SLinus Torvalds 		if (ret)
38751da177e4SLinus Torvalds 			return ret;
38761da177e4SLinus Torvalds 
38771da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
38781da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3879bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3880bd4c625cSLinus Torvalds 						     1);
38811da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
38821da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
38831da177e4SLinus Torvalds 			goto flush_commit_only;
38841da177e4SLinus Torvalds 		}
38851da177e4SLinus Torvalds 
38861da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
38871da177e4SLinus Torvalds 		if (!ret)
38881da177e4SLinus Torvalds 			ret = 1;
38891da177e4SLinus Torvalds 
38901da177e4SLinus Torvalds 	} else {
38911da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
38921da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
38931da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
38941da177e4SLinus Torvalds 		 */
38951da177e4SLinus Torvalds 	      flush_commit_only:
38961da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
38971da177e4SLinus Torvalds 			/*
38981da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
38991da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
39001da177e4SLinus Torvalds 			 * block.
39011da177e4SLinus Torvalds 			 */
39021da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
39031da177e4SLinus Torvalds 				ret = 1;
39041da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
39051da177e4SLinus Torvalds 			if (journal->j_errno)
39061da177e4SLinus Torvalds 				ret = journal->j_errno;
39071da177e4SLinus Torvalds 		}
39081da177e4SLinus Torvalds 	}
39091da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
39101da177e4SLinus Torvalds 	return ret;
39111da177e4SLinus Torvalds }
39121da177e4SLinus Torvalds 
3913bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3914bd4c625cSLinus Torvalds {
3915600ed416SJeff Mahoney 	unsigned int id = REISERFS_I(inode)->i_trans_id;
39161da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
39171da177e4SLinus Torvalds 
39181da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
39191da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
39201da177e4SLinus Torvalds 	 */
39211da177e4SLinus Torvalds 	if (!id || !jl) {
39221da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
39231da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
39241da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
39251da177e4SLinus Torvalds 	}
39261da177e4SLinus Torvalds 
39271da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
39281da177e4SLinus Torvalds }
39291da177e4SLinus Torvalds 
3930a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb,
3931bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3932bd4c625cSLinus Torvalds {
3933a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3934a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.restore_prepared);
39351da177e4SLinus Torvalds 	if (!bh) {
39361da177e4SLinus Torvalds 		return;
39371da177e4SLinus Torvalds 	}
39381da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
39391da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
39401da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
3941a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb,
39421da177e4SLinus Torvalds 					  journal->j_list_hash_table,
39431da177e4SLinus Torvalds 					  bh->b_blocknr);
39441da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
39451da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
39461da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
39471da177e4SLinus Torvalds 		}
39481da177e4SLinus Torvalds 	}
39491da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
39501da177e4SLinus Torvalds }
39511da177e4SLinus Torvalds 
39521da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
39531da177e4SLinus Torvalds /*
39541da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
39551da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
39561da177e4SLinus Torvalds ** clean it
39571da177e4SLinus Torvalds ** wait on it.
39581da177e4SLinus Torvalds **
39591da177e4SLinus Torvalds */
3960a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb,
3961bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3962bd4c625cSLinus Torvalds {
3963a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.prepare);
39641da177e4SLinus Torvalds 
3965ca5de404SNick Piggin 	if (!trylock_buffer(bh)) {
39661da177e4SLinus Torvalds 		if (!wait)
39671da177e4SLinus Torvalds 			return 0;
39681da177e4SLinus Torvalds 		lock_buffer(bh);
39691da177e4SLinus Torvalds 	}
39701da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
39711da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
39721da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
39731da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
39741da177e4SLinus Torvalds 	}
39751da177e4SLinus Torvalds 	unlock_buffer(bh);
39761da177e4SLinus Torvalds 	return 1;
39771da177e4SLinus Torvalds }
39781da177e4SLinus Torvalds 
3979bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3980bd4c625cSLinus Torvalds {
39811da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
39821da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
39831da177e4SLinus Torvalds 	struct list_head *entry;
39841da177e4SLinus Torvalds 	time_t now = get_seconds();
39851da177e4SLinus Torvalds 
39861da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
39871da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
39881da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
39891da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3990a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3991a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3992a3172027SChris Mason 		    test_transaction(s, jl)) {
39931da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
39941da177e4SLinus Torvalds 		} else {
39951da177e4SLinus Torvalds 			break;
39961da177e4SLinus Torvalds 		}
39971da177e4SLinus Torvalds 	}
39981da177e4SLinus Torvalds }
39991da177e4SLinus Torvalds 
40001da177e4SLinus Torvalds /*
40011da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
40021da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
40031da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
40041da177e4SLinus Torvalds **
40051da177e4SLinus Torvalds ** keep reading, there are comments as you go along
40061da177e4SLinus Torvalds **
40071da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
40081da177e4SLinus Torvalds ** journal lists, etc just won't happen.
40091da177e4SLinus Torvalds */
4010bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
4011a9dd3643SJeff Mahoney 			  struct super_block *sb, unsigned long nblocks,
4012bd4c625cSLinus Torvalds 			  int flags)
4013bd4c625cSLinus Torvalds {
4014a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
40151da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
40161da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
40171da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
40181da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
40191da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
40201da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
40211da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
40221da177e4SLinus Torvalds 	int old_start;
40231da177e4SLinus Torvalds 	int i;
4024a44c94a7SAlexander Zarochentsev 	int flush;
4025a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
40261da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
40271da177e4SLinus Torvalds 	struct list_head *entry, *safe;
40281da177e4SLinus Torvalds 	unsigned long jindex;
4029600ed416SJeff Mahoney 	unsigned int commit_trans_id;
40301da177e4SLinus Torvalds 	int trans_half;
40311da177e4SLinus Torvalds 
40321da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
40331da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
40341da177e4SLinus Torvalds 
4035a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
4036a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
4037600ed416SJeff Mahoney 	if (th->t_trans_id == ~0U)
4038a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
4039a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
4040a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
4041a44c94a7SAlexander Zarochentsev 
404222e2c507SJens Axboe 	put_fs_excl();
40431da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
4044a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end");
40451da177e4SLinus Torvalds 	if (journal->j_len == 0) {
4046a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
4047bd4c625cSLinus Torvalds 					     1);
4048a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
40491da177e4SLinus Torvalds 	}
40501da177e4SLinus Torvalds 
4051a9dd3643SJeff Mahoney 	lock_journal(sb);
40521da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
40531da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
40541da177e4SLinus Torvalds 		flush = 1;
40551da177e4SLinus Torvalds 	}
40561da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
40571da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
40581da177e4SLinus Torvalds 		wait_on_commit = 1;
40591da177e4SLinus Torvalds 	}
40601da177e4SLinus Torvalds 
40611da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
40621da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
40631da177e4SLinus Torvalds 	 */
4064a9dd3643SJeff Mahoney 	if (!check_journal_end(th, sb, nblocks, flags)) {
4065a9dd3643SJeff Mahoney 		sb->s_dirt = 1;
4066a9dd3643SJeff Mahoney 		wake_queued_writers(sb);
4067a9dd3643SJeff Mahoney 		reiserfs_async_progress_wait(sb);
40681da177e4SLinus Torvalds 		goto out;
40691da177e4SLinus Torvalds 	}
40701da177e4SLinus Torvalds 
40711da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
40721da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
40731da177e4SLinus Torvalds 		flush = 1;
40741da177e4SLinus Torvalds 	}
40751da177e4SLinus Torvalds 
40761da177e4SLinus Torvalds 	/*
40771da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
40781da177e4SLinus Torvalds 	 ** this transaction
40791da177e4SLinus Torvalds 	 */
40801da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
40811da177e4SLinus Torvalds 		flush = 1;
40821da177e4SLinus Torvalds 	}
40831da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
4084ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
4085ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
40861da177e4SLinus Torvalds 	current->journal_info = th;
4087ef43bc4fSJan Kara 	th->t_refcount++;
40881da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
40891da177e4SLinus Torvalds 						 * the transaction */
4090ef43bc4fSJan Kara 	th->t_refcount--;
40911da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
40921da177e4SLinus Torvalds #endif
40931da177e4SLinus Torvalds 
40941da177e4SLinus Torvalds 	/* setup description block */
4095bd4c625cSLinus Torvalds 	d_bh =
4096a9dd3643SJeff Mahoney 	    journal_getblk(sb,
4097a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4098bd4c625cSLinus Torvalds 			   journal->j_start);
40991da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
41001da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
41011da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
41021da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
41031da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41041da177e4SLinus Torvalds 
41051da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
4106a9dd3643SJeff Mahoney 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4107bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4108a9dd3643SJeff Mahoney 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
41091da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
41101da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
41111da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
41121da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
41131da177e4SLinus Torvalds 
41141da177e4SLinus Torvalds 	/* init this journal list */
41151da177e4SLinus Torvalds 	jl = journal->j_current_jl;
41161da177e4SLinus Torvalds 
41171da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
41181da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
41191da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
41201da177e4SLinus Torvalds 	 * ordered bh list
41211da177e4SLinus Torvalds 	 */
41228ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
41231da177e4SLinus Torvalds 
41241da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
41251da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
41261da177e4SLinus Torvalds 
41271da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
41281da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
41291da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
41301da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
41311da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
41321da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
41331da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
41341da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
41351da177e4SLinus Torvalds 	jl->j_realblock = NULL;
41361da177e4SLinus Torvalds 
41371da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
41381da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
41391da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
41401da177e4SLinus Torvalds 	 */
4141a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
41421da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
41431da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
4144a9dd3643SJeff Mahoney 			jl_cn = get_cnode(sb);
41451da177e4SLinus Torvalds 			if (!jl_cn) {
4146a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-1676",
4147c3a9c210SJeff Mahoney 					       "get_cnode returned NULL");
41481da177e4SLinus Torvalds 			}
41491da177e4SLinus Torvalds 			if (i == 0) {
41501da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
41511da177e4SLinus Torvalds 			}
41521da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
41531da177e4SLinus Torvalds 			jl_cn->next = NULL;
41541da177e4SLinus Torvalds 			if (last_cn) {
41551da177e4SLinus Torvalds 				last_cn->next = jl_cn;
41561da177e4SLinus Torvalds 			}
41571da177e4SLinus Torvalds 			last_cn = jl_cn;
41581da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
41591da177e4SLinus Torvalds 			   of journal or reserved area */
41601da177e4SLinus Torvalds 
4161bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4162a9dd3643SJeff Mahoney 			    (sb, cn->bh->b_blocknr)) {
4163a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-2332",
4164c3a9c210SJeff Mahoney 					       "Trying to log block %lu, "
4165c3a9c210SJeff Mahoney 					       "which is a log block",
4166bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
41671da177e4SLinus Torvalds 			}
41681da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
41691da177e4SLinus Torvalds 			jl_cn->state = 0;
4170a9dd3643SJeff Mahoney 			jl_cn->sb = sb;
41711da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
41721da177e4SLinus Torvalds 			jl_cn->jlist = jl;
41731da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
41741da177e4SLinus Torvalds 			if (i < trans_half) {
4175bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4176bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41771da177e4SLinus Torvalds 			} else {
4178bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4179bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41801da177e4SLinus Torvalds 			}
41811da177e4SLinus Torvalds 		} else {
41821da177e4SLinus Torvalds 			i--;
41831da177e4SLinus Torvalds 		}
41841da177e4SLinus Torvalds 	}
41851da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
41861da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
41871da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41881da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
41891da177e4SLinus Torvalds 
41901da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
419114a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
41921da177e4SLinus Torvalds 
41931da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
41941da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
41951da177e4SLinus Torvalds 	 * others are on disk
41961da177e4SLinus Torvalds 	 */
41971da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
41981da177e4SLinus Torvalds 
41991da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
42001da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
42011da177e4SLinus Torvalds 	cn = journal->j_first;
42021da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
42031da177e4SLinus Torvalds 	while (cn) {
42041da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
42051da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
42061da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
42071da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
42081da177e4SLinus Torvalds 			char *addr;
42091da177e4SLinus Torvalds 			struct page *page;
4210bd4c625cSLinus Torvalds 			tmp_bh =
4211a9dd3643SJeff Mahoney 			    journal_getblk(sb,
4212a9dd3643SJeff Mahoney 					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4213bd4c625cSLinus Torvalds 					   ((cur_write_start +
4214bd4c625cSLinus Torvalds 					     jindex) %
4215a9dd3643SJeff Mahoney 					    SB_ONDISK_JOURNAL_SIZE(sb)));
42161da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
42171da177e4SLinus Torvalds 			page = cn->bh->b_page;
42181da177e4SLinus Torvalds 			addr = kmap(page);
4219bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4220bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
42211da177e4SLinus Torvalds 			       cn->bh->b_size);
42221da177e4SLinus Torvalds 			kunmap(page);
42231da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
42241da177e4SLinus Torvalds 			jindex++;
42251da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
42261da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
42271da177e4SLinus Torvalds 		} else {
42281da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4229a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2048",
423045b03d5eSJeff Mahoney 					 "BAD, buffer in journal hash, "
423145b03d5eSJeff Mahoney 					 "but not JDirty!");
42321da177e4SLinus Torvalds 			brelse(cn->bh);
42331da177e4SLinus Torvalds 		}
42341da177e4SLinus Torvalds 		next = cn->next;
4235a9dd3643SJeff Mahoney 		free_cnode(sb, cn);
42361da177e4SLinus Torvalds 		cn = next;
4237e6950a4dSFrederic Weisbecker 		reiserfs_write_unlock(sb);
42381da177e4SLinus Torvalds 		cond_resched();
4239e6950a4dSFrederic Weisbecker 		reiserfs_write_lock(sb);
42401da177e4SLinus Torvalds 	}
42411da177e4SLinus Torvalds 
42421da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
42431da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
42441da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
42451da177e4SLinus Torvalds 	 */
42461da177e4SLinus Torvalds 
4247a9dd3643SJeff Mahoney 	journal->j_current_jl = alloc_journal_list(sb);
42481da177e4SLinus Torvalds 
42491da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
42501da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
42511da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
42521da177e4SLinus Torvalds 	journal->j_num_work_lists++;
42531da177e4SLinus Torvalds 
42541da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
42551da177e4SLinus Torvalds 	old_start = journal->j_start;
4256bd4c625cSLinus Torvalds 	journal->j_start =
4257bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4258a9dd3643SJeff Mahoney 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
42591da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
42601da177e4SLinus Torvalds 	journal->j_bcount = 0;
42611da177e4SLinus Torvalds 	journal->j_last = NULL;
42621da177e4SLinus Torvalds 	journal->j_first = NULL;
42631da177e4SLinus Torvalds 	journal->j_len = 0;
42641da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4265a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4266a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4267a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
42681da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
42691da177e4SLinus Torvalds 	journal->j_must_wait = 0;
42701da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
42711da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
42721da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
4273a9dd3643SJeff Mahoney 	init_journal_hash(sb);
42741da177e4SLinus Torvalds 
42751da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
42761da177e4SLinus Torvalds 	// write out the tails
42771da177e4SLinus Torvalds 	smp_mb();
42781da177e4SLinus Torvalds 
42791da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
42801da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
42811da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
42821da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
42831da177e4SLinus Torvalds 	 * is lost.
42841da177e4SLinus Torvalds 	 */
42851da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
42868ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
42871da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
42881da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
42898ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
42901da177e4SLinus Torvalds 	}
429114a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
429290415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
42931da177e4SLinus Torvalds 
42941da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
42951da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
42961da177e4SLinus Torvalds 	 **
42971da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
42981da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
42991da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
43001da177e4SLinus Torvalds 	 */
43011da177e4SLinus Torvalds 	if (flush) {
4302a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
4303a9dd3643SJeff Mahoney 		flush_journal_list(sb, jl, 1);
43041da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
43051da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
43061da177e4SLinus Torvalds 
43071da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
43081da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
43091da177e4SLinus Torvalds 	 ** old flush them as well.
43101da177e4SLinus Torvalds 	 */
43111da177e4SLinus Torvalds       first_jl:
43121da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
43131da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
43141da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
43151da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4316bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4317a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
43181da177e4SLinus Torvalds 				goto first_jl;
43191da177e4SLinus Torvalds 			} else if ((journal->j_start +
43201da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4321a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
43221da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
43231da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
43241da177e4SLinus Torvalds 				 * break now
43251da177e4SLinus Torvalds 				 */
43261da177e4SLinus Torvalds 				break;
43271da177e4SLinus Torvalds 			}
43281da177e4SLinus Torvalds 		} else if ((journal->j_start +
43291da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4330a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_SIZE(sb)) {
43311da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4332a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
4333bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4334a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
43351da177e4SLinus Torvalds 				goto first_jl;
43361da177e4SLinus Torvalds 			} else {
43371da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
43381da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
43391da177e4SLinus Torvalds 				 * the start of the log.  We can break
43401da177e4SLinus Torvalds 				 */
43411da177e4SLinus Torvalds 				break;
43421da177e4SLinus Torvalds 			}
43431da177e4SLinus Torvalds 		}
43441da177e4SLinus Torvalds 	}
4345a9dd3643SJeff Mahoney 	flush_old_journal_lists(sb);
43461da177e4SLinus Torvalds 
4347bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4348a9dd3643SJeff Mahoney 	    get_list_bitmap(sb, journal->j_current_jl);
43491da177e4SLinus Torvalds 
43501da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4351a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-1996",
4352c3a9c210SJeff Mahoney 			       "could not get a list bitmap");
43531da177e4SLinus Torvalds 	}
43541da177e4SLinus Torvalds 
43551da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
4356a9dd3643SJeff Mahoney 	unlock_journal(sb);
43571da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
43581da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
43591da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
43601da177e4SLinus Torvalds 
43611da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
4362a9dd3643SJeff Mahoney 	    journal_list_still_alive(sb, commit_trans_id)) {
4363a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
43641da177e4SLinus Torvalds 	}
43651da177e4SLinus Torvalds       out:
4366a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end2");
43671da177e4SLinus Torvalds 
43681da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
43691da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
43701da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
43711da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
4372a9dd3643SJeff Mahoney 	th->t_super = sb;
43731da177e4SLinus Torvalds 
43741da177e4SLinus Torvalds 	return journal->j_errno;
43751da177e4SLinus Torvalds }
43761da177e4SLinus Torvalds 
437732e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */
437832e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno)
43791da177e4SLinus Torvalds {
43801da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
43811da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
43821da177e4SLinus Torvalds 		return;
43831da177e4SLinus Torvalds 
438432e8b106SJeff Mahoney 	if (!journal->j_errno)
438532e8b106SJeff Mahoney 		journal->j_errno = errno;
43861da177e4SLinus Torvalds 
43871da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
43881da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
43891da177e4SLinus Torvalds 
43901da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
43911da177e4SLinus Torvalds 	dump_stack();
43921da177e4SLinus Torvalds #endif
43931da177e4SLinus Torvalds }
43941da177e4SLinus Torvalds 
4395