xref: /openbmc/linux/fs/reiserfs/journal.c (revision 7cd33ad2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
41da177e4SLinus Torvalds ** The background commits make this code very interelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <linux/time.h>
386188e10dSMatthew Wilcox #include <linux/semaphore.h>
391da177e4SLinus Torvalds #include <linux/vmalloc.h>
401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
411da177e4SLinus Torvalds #include <linux/kernel.h>
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/fcntl.h>
441da177e4SLinus Torvalds #include <linux/stat.h>
451da177e4SLinus Torvalds #include <linux/string.h>
461da177e4SLinus Torvalds #include <linux/smp_lock.h>
471da177e4SLinus Torvalds #include <linux/buffer_head.h>
481da177e4SLinus Torvalds #include <linux/workqueue.h>
491da177e4SLinus Torvalds #include <linux/writeback.h>
501da177e4SLinus Torvalds #include <linux/blkdev.h>
513fcfab16SAndrew Morton #include <linux/backing-dev.h>
5290415deaSJeff Mahoney #include <linux/uaccess.h>
535a0e3ad6STejun Heo #include <linux/slab.h>
5490415deaSJeff Mahoney 
5590415deaSJeff Mahoney #include <asm/system.h>
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
581da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
591da177e4SLinus Torvalds                                j_list))
601da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
611da177e4SLinus Torvalds                                j_working_list))
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
641da177e4SLinus Torvalds ** start and kill the commit workqueue
651da177e4SLinus Torvalds */
661da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
711da177e4SLinus Torvalds 				   structs at 4k */
721da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
771da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
801da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds /* journal list state bits */
831da177e4SLinus Torvalds #define LIST_TOUCHED 1
841da177e4SLinus Torvalds #define LIST_DIRTY   2
851da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds /* flags for do_journal_end */
881da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
891da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
901da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
911da177e4SLinus Torvalds 
92bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
93bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
94bd4c625cSLinus Torvalds 			  int flags);
95bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
96bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
97bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
98bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
991da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
100bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
101a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks);
1021da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1031da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1041da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1051da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
106c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1071da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1101da177e4SLinus Torvalds enum {
1111da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1121da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1131da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1141da177e4SLinus Torvalds };
1151da177e4SLinus Torvalds 
1161da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
117a9dd3643SJeff Mahoney 			      struct super_block *sb,
1181da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1191da177e4SLinus Torvalds 
120a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb)
121bd4c625cSLinus Torvalds {
122a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
123bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
124bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1251da177e4SLinus Torvalds }
1261da177e4SLinus Torvalds 
1271da177e4SLinus Torvalds /*
1281da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1291da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1301da177e4SLinus Torvalds ** more details.
1311da177e4SLinus Torvalds */
132bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
133bd4c625cSLinus Torvalds {
1341da177e4SLinus Torvalds 	if (bh) {
1351da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1361da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1371da177e4SLinus Torvalds 	}
1381da177e4SLinus Torvalds 	return 0;
1391da177e4SLinus Torvalds }
1401da177e4SLinus Torvalds 
141bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
142a9dd3643SJeff Mahoney 							 *sb)
143bd4c625cSLinus Torvalds {
1441da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1451da177e4SLinus Torvalds 	static int id;
1461da177e4SLinus Torvalds 
147d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1481da177e4SLinus Torvalds 	if (!bn) {
1491da177e4SLinus Torvalds 		return NULL;
1501da177e4SLinus Torvalds 	}
151a9dd3643SJeff Mahoney 	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
1521da177e4SLinus Torvalds 	if (!bn->data) {
153d739b42bSPekka Enberg 		kfree(bn);
1541da177e4SLinus Torvalds 		return NULL;
1551da177e4SLinus Torvalds 	}
1561da177e4SLinus Torvalds 	bn->id = id++;
1571da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1581da177e4SLinus Torvalds 	return bn;
1591da177e4SLinus Torvalds }
1601da177e4SLinus Torvalds 
161a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
162bd4c625cSLinus Torvalds {
163a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1641da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1651da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1661da177e4SLinus Torvalds 
1671da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1681da177e4SLinus Torvalds       repeat:
1691da177e4SLinus Torvalds 
1701da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1711da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1721da177e4SLinus Torvalds 		list_del(entry);
173a9dd3643SJeff Mahoney 		memset(bn->data, 0, sb->s_blocksize);
1741da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1751da177e4SLinus Torvalds 		return bn;
1761da177e4SLinus Torvalds 	}
177a9dd3643SJeff Mahoney 	bn = allocate_bitmap_node(sb);
1781da177e4SLinus Torvalds 	if (!bn) {
1791da177e4SLinus Torvalds 		yield();
1801da177e4SLinus Torvalds 		goto repeat;
1811da177e4SLinus Torvalds 	}
1821da177e4SLinus Torvalds 	return bn;
1831da177e4SLinus Torvalds }
184a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb,
185bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
186bd4c625cSLinus Torvalds {
187a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1881da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1891da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
190d739b42bSPekka Enberg 		kfree(bn->data);
191d739b42bSPekka Enberg 		kfree(bn);
1921da177e4SLinus Torvalds 	} else {
1931da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
1941da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
1951da177e4SLinus Torvalds 	}
1961da177e4SLinus Torvalds }
1971da177e4SLinus Torvalds 
198a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb)
199bd4c625cSLinus Torvalds {
2001da177e4SLinus Torvalds 	int i;
201a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2021da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2031da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
204a9dd3643SJeff Mahoney 		bn = allocate_bitmap_node(sb);
2051da177e4SLinus Torvalds 		if (bn) {
2061da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2071da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2081da177e4SLinus Torvalds 		} else {
2090222e657SJeff Mahoney 			break;	/* this is ok, we'll try again when more are needed */
2101da177e4SLinus Torvalds 		}
2111da177e4SLinus Torvalds 	}
2121da177e4SLinus Torvalds }
2131da177e4SLinus Torvalds 
214a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb,
2153ee16670SJeff Mahoney 				  b_blocknr_t block,
216bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
217bd4c625cSLinus Torvalds {
218a9dd3643SJeff Mahoney 	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
219a9dd3643SJeff Mahoney 	unsigned int bit_nr = block % (sb->s_blocksize << 3);
2201da177e4SLinus Torvalds 
2211da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
222a9dd3643SJeff Mahoney 		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
2231da177e4SLinus Torvalds 	}
2241da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2251da177e4SLinus Torvalds 	return 0;
2261da177e4SLinus Torvalds }
2271da177e4SLinus Torvalds 
228a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb,
229bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
230bd4c625cSLinus Torvalds {
2311da177e4SLinus Torvalds 	int i;
2321da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2331da177e4SLinus Torvalds 		return;
2341da177e4SLinus Torvalds 
235a9dd3643SJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
2361da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
237a9dd3643SJeff Mahoney 			free_bitmap_node(sb, jb->bitmaps[i]);
2381da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2391da177e4SLinus Torvalds 		}
2401da177e4SLinus Torvalds 	}
2411da177e4SLinus Torvalds }
2421da177e4SLinus Torvalds 
2431da177e4SLinus Torvalds /*
2441da177e4SLinus Torvalds ** only call this on FS unmount.
2451da177e4SLinus Torvalds */
246a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb,
247bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
248bd4c625cSLinus Torvalds {
2491da177e4SLinus Torvalds 	int i;
2501da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2511da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2521da177e4SLinus Torvalds 		jb = jb_array + i;
2531da177e4SLinus Torvalds 		jb->journal_list = NULL;
254a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
2551da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2561da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2571da177e4SLinus Torvalds 	}
2581da177e4SLinus Torvalds 	return 0;
2591da177e4SLinus Torvalds }
2601da177e4SLinus Torvalds 
261a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb)
262bd4c625cSLinus Torvalds {
263a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2641da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2651da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2661da177e4SLinus Torvalds 
2671da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2681da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2691da177e4SLinus Torvalds 		list_del(next);
270d739b42bSPekka Enberg 		kfree(bn->data);
271d739b42bSPekka Enberg 		kfree(bn);
2721da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2731da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2741da177e4SLinus Torvalds 	}
2751da177e4SLinus Torvalds 
2761da177e4SLinus Torvalds 	return 0;
2771da177e4SLinus Torvalds }
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds /*
2801da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2811da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2821da177e4SLinus Torvalds */
283a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb,
2841da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2853ee16670SJeff Mahoney 				   unsigned int bmap_nr)
286bd4c625cSLinus Torvalds {
2871da177e4SLinus Torvalds 	int i;
2881da177e4SLinus Torvalds 	int failed = 0;
2891da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2901da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2931da177e4SLinus Torvalds 		jb = jb_array + i;
2941da177e4SLinus Torvalds 		jb->journal_list = NULL;
2951da177e4SLinus Torvalds 		jb->bitmaps = vmalloc(mem);
2961da177e4SLinus Torvalds 		if (!jb->bitmaps) {
297a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2000", "unable to "
29845b03d5eSJeff Mahoney 					 "allocate bitmaps for journal lists");
2991da177e4SLinus Torvalds 			failed = 1;
3001da177e4SLinus Torvalds 			break;
3011da177e4SLinus Torvalds 		}
3021da177e4SLinus Torvalds 		memset(jb->bitmaps, 0, mem);
3031da177e4SLinus Torvalds 	}
3041da177e4SLinus Torvalds 	if (failed) {
305a9dd3643SJeff Mahoney 		free_list_bitmaps(sb, jb_array);
3061da177e4SLinus Torvalds 		return -1;
3071da177e4SLinus Torvalds 	}
3081da177e4SLinus Torvalds 	return 0;
3091da177e4SLinus Torvalds }
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds /*
3121da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3131da177e4SLinus Torvalds ** and try again
3141da177e4SLinus Torvalds */
315a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
316bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
317bd4c625cSLinus Torvalds 						    *jl)
318bd4c625cSLinus Torvalds {
3191da177e4SLinus Torvalds 	int i, j;
320a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3211da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3241da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3251da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3261da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3271da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
328a9dd3643SJeff Mahoney 			flush_commit_list(sb,
329bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
330bd4c625cSLinus Torvalds 					  journal_list, 1);
3311da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3321da177e4SLinus Torvalds 				break;
3331da177e4SLinus Torvalds 			}
3341da177e4SLinus Torvalds 		} else {
3351da177e4SLinus Torvalds 			break;
3361da177e4SLinus Torvalds 		}
3371da177e4SLinus Torvalds 	}
3381da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3391da177e4SLinus Torvalds 		return NULL;
3401da177e4SLinus Torvalds 	}
3411da177e4SLinus Torvalds 	jb->journal_list = jl;
3421da177e4SLinus Torvalds 	return jb;
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
3451da177e4SLinus Torvalds /*
3461da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3471da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3481da177e4SLinus Torvalds ** returns NULL on failure
3491da177e4SLinus Torvalds */
350bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
351bd4c625cSLinus Torvalds {
3521da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3531da177e4SLinus Torvalds 	int i;
3541da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3551da177e4SLinus Torvalds 		return NULL;
3561da177e4SLinus Torvalds 	}
3571da177e4SLinus Torvalds 	head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3581da177e4SLinus Torvalds 	if (!head) {
3591da177e4SLinus Torvalds 		return NULL;
3601da177e4SLinus Torvalds 	}
3611da177e4SLinus Torvalds 	memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
3621da177e4SLinus Torvalds 	head[0].prev = NULL;
3631da177e4SLinus Torvalds 	head[0].next = head + 1;
3641da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3651da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3661da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3671da177e4SLinus Torvalds 	}
3681da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3691da177e4SLinus Torvalds 	return head;
3701da177e4SLinus Torvalds }
3711da177e4SLinus Torvalds 
3721da177e4SLinus Torvalds /*
3731da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3741da177e4SLinus Torvalds */
375a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
376bd4c625cSLinus Torvalds {
3771da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
378a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3791da177e4SLinus Torvalds 
380a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "get_cnode");
3811da177e4SLinus Torvalds 
3821da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3831da177e4SLinus Torvalds 		return NULL;
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds 	journal->j_cnode_used++;
3861da177e4SLinus Torvalds 	journal->j_cnode_free--;
3871da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3881da177e4SLinus Torvalds 	if (!cn) {
3891da177e4SLinus Torvalds 		return cn;
3901da177e4SLinus Torvalds 	}
3911da177e4SLinus Torvalds 	if (cn->next) {
3921da177e4SLinus Torvalds 		cn->next->prev = NULL;
3931da177e4SLinus Torvalds 	}
3941da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
3951da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
3961da177e4SLinus Torvalds 	return cn;
3971da177e4SLinus Torvalds }
3981da177e4SLinus Torvalds 
3991da177e4SLinus Torvalds /*
4001da177e4SLinus Torvalds ** returns a cnode to the free list
4011da177e4SLinus Torvalds */
402a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb,
403bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
404bd4c625cSLinus Torvalds {
405a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4061da177e4SLinus Torvalds 
407a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "free_cnode");
4081da177e4SLinus Torvalds 
4091da177e4SLinus Torvalds 	journal->j_cnode_used--;
4101da177e4SLinus Torvalds 	journal->j_cnode_free++;
4111da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4121da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4131da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4141da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4151da177e4SLinus Torvalds 	}
4161da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4171da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4181da177e4SLinus Torvalds }
4191da177e4SLinus Torvalds 
420bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
421bd4c625cSLinus Torvalds {
4221da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4231da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4241da177e4SLinus Torvalds }
4251da177e4SLinus Torvalds 
4261da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
427bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
428bd4c625cSLinus Torvalds 								  super_block
429bd4c625cSLinus Torvalds 								  *sb,
430bd4c625cSLinus Torvalds 								  struct
431bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
432bd4c625cSLinus Torvalds 								  **table,
4331da177e4SLinus Torvalds 								  long bl)
4341da177e4SLinus Torvalds {
4351da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4361da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4371da177e4SLinus Torvalds 	while (cn) {
4381da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4391da177e4SLinus Torvalds 			return cn;
4401da177e4SLinus Torvalds 		cn = cn->hnext;
4411da177e4SLinus Torvalds 	}
4421da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4431da177e4SLinus Torvalds }
4441da177e4SLinus Torvalds 
4451da177e4SLinus Torvalds /*
4461da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4471da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4481da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4491da177e4SLinus Torvalds **
4501da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4511da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4521da177e4SLinus Torvalds ** sure you never write the block without logging it.
4531da177e4SLinus Torvalds **
4541da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4551da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4561da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4571da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4581da177e4SLinus Torvalds **
4591da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4601da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4611da177e4SLinus Torvalds **
4621da177e4SLinus Torvalds */
463a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb,
4643ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
465bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
466bd4c625cSLinus Torvalds {
467a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4681da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4691da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4701da177e4SLinus Torvalds 	int i;
4711da177e4SLinus Torvalds 	unsigned long bl;
4721da177e4SLinus Torvalds 
4731da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4741da177e4SLinus Torvalds 
475a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal);
4761da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
4771da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
4781da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
4791da177e4SLinus Torvalds 	 */
4801da177e4SLinus Torvalds 	if (search_all) {
4811da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
482a9dd3643SJeff Mahoney 			PROC_INFO_INC(sb, journal.in_journal_bitmap);
4831da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
4841da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
485bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
486bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
487bd4c625cSLinus Torvalds 				     data)) {
488bd4c625cSLinus Torvalds 				*next_zero_bit =
489bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
490bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
491bd4c625cSLinus Torvalds 							data),
492a9dd3643SJeff Mahoney 						       sb->s_blocksize << 3,
493bd4c625cSLinus Torvalds 						       bit_nr + 1);
4941da177e4SLinus Torvalds 				return 1;
4951da177e4SLinus Torvalds 			}
4961da177e4SLinus Torvalds 		}
4971da177e4SLinus Torvalds 	}
4981da177e4SLinus Torvalds 
499a9dd3643SJeff Mahoney 	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
5001da177e4SLinus Torvalds 	/* is it in any old transactions? */
501bd4c625cSLinus Torvalds 	if (search_all
502bd4c625cSLinus Torvalds 	    && (cn =
503a9dd3643SJeff Mahoney 		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
5041da177e4SLinus Torvalds 		return 1;
5051da177e4SLinus Torvalds 	}
5061da177e4SLinus Torvalds 
5071da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
508a9dd3643SJeff Mahoney 	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
5091da177e4SLinus Torvalds 		BUG();
5101da177e4SLinus Torvalds 		return 1;
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
513a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal_reusable);
5141da177e4SLinus Torvalds 	/* safe for reuse */
5151da177e4SLinus Torvalds 	return 0;
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds /* insert cn into table
5191da177e4SLinus Torvalds */
520bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
521bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
522bd4c625cSLinus Torvalds {
5231da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5241da177e4SLinus Torvalds 
5251da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5261da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5271da177e4SLinus Torvalds 	cn->hprev = NULL;
5281da177e4SLinus Torvalds 	if (cn_orig) {
5291da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5301da177e4SLinus Torvalds 	}
5311da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5321da177e4SLinus Torvalds }
5331da177e4SLinus Torvalds 
5341da177e4SLinus Torvalds /* lock the current transaction */
535a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb)
536bd4c625cSLinus Torvalds {
537a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.lock_journal);
5388ebc4232SFrederic Weisbecker 
5398ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
5401da177e4SLinus Torvalds }
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds /* unlock the current transaction */
543a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb)
544bd4c625cSLinus Torvalds {
545a9dd3643SJeff Mahoney 	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
5461da177e4SLinus Torvalds }
5471da177e4SLinus Torvalds 
5481da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5491da177e4SLinus Torvalds {
5501da177e4SLinus Torvalds 	jl->j_refcount++;
5511da177e4SLinus Torvalds }
5521da177e4SLinus Torvalds 
5531da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5541da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5551da177e4SLinus Torvalds {
5561da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
557c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
558bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
5591da177e4SLinus Torvalds 	}
5601da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
561d739b42bSPekka Enberg 		kfree(jl);
5621da177e4SLinus Torvalds }
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds /*
5651da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
5661da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
5671da177e4SLinus Torvalds ** transaction.
5681da177e4SLinus Torvalds */
569a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb,
570bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
571bd4c625cSLinus Torvalds {
5721da177e4SLinus Torvalds 
5731da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
5741da177e4SLinus Torvalds 	if (jb) {
575a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
5761da177e4SLinus Torvalds 	}
5771da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
5781da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
582600ed416SJeff Mahoney 				    unsigned int trans_id)
5831da177e4SLinus Torvalds {
5841da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
5851da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
5861da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds 	if (!list_empty(entry)) {
5891da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
5901da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
5911da177e4SLinus Torvalds 			return 1;
5921da177e4SLinus Torvalds 		}
5931da177e4SLinus Torvalds 	}
5941da177e4SLinus Torvalds 	return 0;
5951da177e4SLinus Torvalds }
5961da177e4SLinus Torvalds 
597398c95bdSChris Mason /*
598398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
599398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
600398c95bdSChris Mason  * logged via data=journal.
601398c95bdSChris Mason  *
602398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
603398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
604398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
605398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
606398c95bdSChris Mason  */
607398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
608398c95bdSChris Mason {
609398c95bdSChris Mason 	struct page *page = bh->b_page;
610529ae9aaSNick Piggin 	if (!page->mapping && trylock_page(page)) {
611398c95bdSChris Mason 		page_cache_get(page);
612398c95bdSChris Mason 		put_bh(bh);
613398c95bdSChris Mason 		if (!page->mapping)
614398c95bdSChris Mason 			try_to_free_buffers(page);
615398c95bdSChris Mason 		unlock_page(page);
616398c95bdSChris Mason 		page_cache_release(page);
617398c95bdSChris Mason 	} else {
618398c95bdSChris Mason 		put_bh(bh);
619398c95bdSChris Mason 	}
620398c95bdSChris Mason }
621398c95bdSChris Mason 
622bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
623bd4c625cSLinus Torvalds {
6241da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6251da177e4SLinus Torvalds 
6261da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
62745b03d5eSJeff Mahoney 		reiserfs_warning(NULL, "clm-2084",
62845b03d5eSJeff Mahoney 				 "pinned buffer %lu:%s sent to disk",
6291da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6301da177e4SLinus Torvalds 	}
6311da177e4SLinus Torvalds 	if (uptodate)
6321da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6331da177e4SLinus Torvalds 	else
6341da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
635398c95bdSChris Mason 
6361da177e4SLinus Torvalds 	unlock_buffer(bh);
637398c95bdSChris Mason 	release_buffer_page(bh);
6381da177e4SLinus Torvalds }
6391da177e4SLinus Torvalds 
640bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
641bd4c625cSLinus Torvalds {
6421da177e4SLinus Torvalds 	if (uptodate)
6431da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6441da177e4SLinus Torvalds 	else
6451da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6461da177e4SLinus Torvalds 	unlock_buffer(bh);
6471da177e4SLinus Torvalds 	put_bh(bh);
6481da177e4SLinus Torvalds }
6491da177e4SLinus Torvalds 
650bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
651bd4c625cSLinus Torvalds {
6521da177e4SLinus Torvalds 	get_bh(bh);
6531da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6541da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6551da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6561da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6571da177e4SLinus Torvalds 		BUG();
6581da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6591da177e4SLinus Torvalds 		BUG();
6601da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6611da177e4SLinus Torvalds }
6621da177e4SLinus Torvalds 
663bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
664bd4c625cSLinus Torvalds {
6651da177e4SLinus Torvalds 	get_bh(bh);
6661da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6671da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6681da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6691da177e4SLinus Torvalds 		BUG();
6701da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6711da177e4SLinus Torvalds }
6721da177e4SLinus Torvalds 
6731da177e4SLinus Torvalds #define CHUNK_SIZE 32
6741da177e4SLinus Torvalds struct buffer_chunk {
6751da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
6761da177e4SLinus Torvalds 	int nr;
6771da177e4SLinus Torvalds };
6781da177e4SLinus Torvalds 
679bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
680bd4c625cSLinus Torvalds {
6811da177e4SLinus Torvalds 	int i;
68222e2c507SJens Axboe 	get_fs_excl();
6831da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6841da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
6851da177e4SLinus Torvalds 	}
6861da177e4SLinus Torvalds 	chunk->nr = 0;
68722e2c507SJens Axboe 	put_fs_excl();
6881da177e4SLinus Torvalds }
6891da177e4SLinus Torvalds 
690bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
691bd4c625cSLinus Torvalds {
6921da177e4SLinus Torvalds 	int i;
69322e2c507SJens Axboe 	get_fs_excl();
6941da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6951da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
6961da177e4SLinus Torvalds 	}
6971da177e4SLinus Torvalds 	chunk->nr = 0;
69822e2c507SJens Axboe 	put_fs_excl();
6991da177e4SLinus Torvalds }
7001da177e4SLinus Torvalds 
7011da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
702bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
7031da177e4SLinus Torvalds {
7041da177e4SLinus Torvalds 	int ret = 0;
70514a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
7061da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
7071da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7081da177e4SLinus Torvalds 		ret = 1;
7091da177e4SLinus Torvalds 		if (lock)
7101da177e4SLinus Torvalds 			spin_unlock(lock);
7111da177e4SLinus Torvalds 		fn(chunk);
7121da177e4SLinus Torvalds 		if (lock)
7131da177e4SLinus Torvalds 			spin_lock(lock);
7141da177e4SLinus Torvalds 	}
7151da177e4SLinus Torvalds 	return ret;
7161da177e4SLinus Torvalds }
7171da177e4SLinus Torvalds 
7181da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
719bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
720bd4c625cSLinus Torvalds {
7211da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7221da177e4SLinus Torvalds 	while (1) {
7231da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7241da177e4SLinus Torvalds 		if (jh) {
7251da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7261da177e4SLinus Torvalds 			return jh;
7271da177e4SLinus Torvalds 		}
7281da177e4SLinus Torvalds 		yield();
7291da177e4SLinus Torvalds 	}
7301da177e4SLinus Torvalds }
7311da177e4SLinus Torvalds 
7321da177e4SLinus Torvalds /*
7331da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
7341da177e4SLinus Torvalds  * and waited on
7351da177e4SLinus Torvalds  */
736bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
737bd4c625cSLinus Torvalds {
7381da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7391da177e4SLinus Torvalds 
7401da177e4SLinus Torvalds 	jh = bh->b_private;
7411da177e4SLinus Torvalds 	if (jh) {
7421da177e4SLinus Torvalds 		bh->b_private = NULL;
7431da177e4SLinus Torvalds 		jh->bh = NULL;
7441da177e4SLinus Torvalds 		list_del_init(&jh->list);
7451da177e4SLinus Torvalds 		kfree(jh);
7461da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
7471da177e4SLinus Torvalds 			BUG();
7481da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
7491da177e4SLinus Torvalds 		put_bh(bh);
7501da177e4SLinus Torvalds 	}
7511da177e4SLinus Torvalds }
7521da177e4SLinus Torvalds 
7531da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
7541da177e4SLinus Torvalds 			   int tail)
7551da177e4SLinus Torvalds {
7561da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7571da177e4SLinus Torvalds 
7581da177e4SLinus Torvalds 	if (bh->b_private) {
7591da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7601da177e4SLinus Torvalds 		if (!bh->b_private) {
7611da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
7621da177e4SLinus Torvalds 			goto no_jh;
7631da177e4SLinus Torvalds 		}
7641da177e4SLinus Torvalds 		jh = bh->b_private;
7651da177e4SLinus Torvalds 		list_del_init(&jh->list);
7661da177e4SLinus Torvalds 	} else {
7671da177e4SLinus Torvalds 	      no_jh:
7681da177e4SLinus Torvalds 		get_bh(bh);
7691da177e4SLinus Torvalds 		jh = alloc_jh();
7701da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7711da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
7721da177e4SLinus Torvalds 		 * two adds at the same time
7731da177e4SLinus Torvalds 		 */
77414a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
7751da177e4SLinus Torvalds 		jh->bh = bh;
7761da177e4SLinus Torvalds 		bh->b_private = jh;
7771da177e4SLinus Torvalds 	}
7781da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
7791da177e4SLinus Torvalds 	if (tail)
7801da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
7811da177e4SLinus Torvalds 	else {
7821da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
7831da177e4SLinus Torvalds 	}
7841da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
7851da177e4SLinus Torvalds 	return 0;
7861da177e4SLinus Torvalds }
7871da177e4SLinus Torvalds 
788bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
789bd4c625cSLinus Torvalds {
7901da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
7911da177e4SLinus Torvalds }
792bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
793bd4c625cSLinus Torvalds {
7941da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
7951da177e4SLinus Torvalds }
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
7981da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
7991da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
8001da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
8011da177e4SLinus Torvalds 				 struct list_head *list)
8021da177e4SLinus Torvalds {
8031da177e4SLinus Torvalds 	struct buffer_head *bh;
8041da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8051da177e4SLinus Torvalds 	int ret = j->j_errno;
8061da177e4SLinus Torvalds 	struct buffer_chunk chunk;
8071da177e4SLinus Torvalds 	struct list_head tmp;
8081da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8091da177e4SLinus Torvalds 
8101da177e4SLinus Torvalds 	chunk.nr = 0;
8111da177e4SLinus Torvalds 	spin_lock(lock);
8121da177e4SLinus Torvalds 	while (!list_empty(list)) {
8131da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8141da177e4SLinus Torvalds 		bh = jh->bh;
8151da177e4SLinus Torvalds 		get_bh(bh);
816ca5de404SNick Piggin 		if (!trylock_buffer(bh)) {
8171da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
818f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8191da177e4SLinus Torvalds 				goto loop_next;
8201da177e4SLinus Torvalds 			}
8211da177e4SLinus Torvalds 			spin_unlock(lock);
8221da177e4SLinus Torvalds 			if (chunk.nr)
8231da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8241da177e4SLinus Torvalds 			wait_on_buffer(bh);
8251da177e4SLinus Torvalds 			cond_resched();
8261da177e4SLinus Torvalds 			spin_lock(lock);
8271da177e4SLinus Torvalds 			goto loop_next;
8281da177e4SLinus Torvalds 		}
8293d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8303d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8313d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8323d4492f8SChris Mason 		 */
8333d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
8343d4492f8SChris Mason 			clear_buffer_dirty(bh);
8353d4492f8SChris Mason 			ret = -EIO;
8363d4492f8SChris Mason 		}
8371da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
838f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
8391da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
8401da177e4SLinus Torvalds 		} else {
8411da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
8421da177e4SLinus Torvalds 			unlock_buffer(bh);
8431da177e4SLinus Torvalds 		}
8441da177e4SLinus Torvalds 	      loop_next:
8451da177e4SLinus Torvalds 		put_bh(bh);
8461da177e4SLinus Torvalds 		cond_resched_lock(lock);
8471da177e4SLinus Torvalds 	}
8481da177e4SLinus Torvalds 	if (chunk.nr) {
8491da177e4SLinus Torvalds 		spin_unlock(lock);
8501da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
8511da177e4SLinus Torvalds 		spin_lock(lock);
8521da177e4SLinus Torvalds 	}
8531da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
8541da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
8551da177e4SLinus Torvalds 		bh = jh->bh;
8561da177e4SLinus Torvalds 		get_bh(bh);
8571da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
8581da177e4SLinus Torvalds 
8591da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
8601da177e4SLinus Torvalds 			spin_unlock(lock);
8611da177e4SLinus Torvalds 			wait_on_buffer(bh);
8621da177e4SLinus Torvalds 			spin_lock(lock);
8631da177e4SLinus Torvalds 		}
8641da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
8651da177e4SLinus Torvalds 			ret = -EIO;
8661da177e4SLinus Torvalds 		}
867d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
868d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
869d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
870d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
871d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
872d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
873d62b1b87SChris Mason 		 * after freeing the journal header.
874d62b1b87SChris Mason 		 */
875d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
876d62b1b87SChris Mason 			spin_unlock(lock);
877d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
878d62b1b87SChris Mason 			spin_lock(lock);
879d62b1b87SChris Mason 		}
8801da177e4SLinus Torvalds 		put_bh(bh);
8811da177e4SLinus Torvalds 		cond_resched_lock(lock);
8821da177e4SLinus Torvalds 	}
8831da177e4SLinus Torvalds 	spin_unlock(lock);
8841da177e4SLinus Torvalds 	return ret;
8851da177e4SLinus Torvalds }
8861da177e4SLinus Torvalds 
887bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
888bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
889bd4c625cSLinus Torvalds {
8901da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
8911da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
8921da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
8931da177e4SLinus Torvalds 	struct list_head *entry;
894600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
895600ed416SJeff Mahoney 	unsigned int other_trans_id;
896600ed416SJeff Mahoney 	unsigned int first_trans_id;
8971da177e4SLinus Torvalds 
8981da177e4SLinus Torvalds       find_first:
8991da177e4SLinus Torvalds 	/*
9001da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
9011da177e4SLinus Torvalds 	 */
9021da177e4SLinus Torvalds 	first_jl = jl;
9031da177e4SLinus Torvalds 	entry = jl->j_list.prev;
9041da177e4SLinus Torvalds 	while (1) {
9051da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9061da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
9071da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9081da177e4SLinus Torvalds 			break;
9091da177e4SLinus Torvalds 
9101da177e4SLinus Torvalds 		first_jl = other_jl;
9111da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9121da177e4SLinus Torvalds 	}
9131da177e4SLinus Torvalds 
9141da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9151da177e4SLinus Torvalds 	if (first_jl == jl) {
9161da177e4SLinus Torvalds 		return 0;
9171da177e4SLinus Torvalds 	}
9181da177e4SLinus Torvalds 
9191da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9201da177e4SLinus Torvalds 
9211da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9221da177e4SLinus Torvalds 	while (1) {
9231da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9241da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9251da177e4SLinus Torvalds 
9261da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9271da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9281da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9311da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9321da177e4SLinus Torvalds 					return 1;
9331da177e4SLinus Torvalds 
9341da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
9351da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
9361da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
9371da177e4SLinus Torvalds 				 */
938bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
939bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
9401da177e4SLinus Torvalds 					goto find_first;
9411da177e4SLinus Torvalds 				}
9421da177e4SLinus Torvalds 			}
9431da177e4SLinus Torvalds 			entry = entry->next;
9441da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
9451da177e4SLinus Torvalds 				return 0;
9461da177e4SLinus Torvalds 		} else {
9471da177e4SLinus Torvalds 			return 0;
9481da177e4SLinus Torvalds 		}
9491da177e4SLinus Torvalds 	}
9501da177e4SLinus Torvalds 	return 0;
9511da177e4SLinus Torvalds }
952deba0f49SAdrian Bunk 
953deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
954bd4c625cSLinus Torvalds {
9551da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
9568ebc4232SFrederic Weisbecker 
9578ebc4232SFrederic Weisbecker 	if (atomic_read(&j->j_async_throttle)) {
9588ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
9598aa7e847SJens Axboe 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
9608ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
9618ebc4232SFrederic Weisbecker 	}
9628ebc4232SFrederic Weisbecker 
9631da177e4SLinus Torvalds 	return 0;
9641da177e4SLinus Torvalds }
9651da177e4SLinus Torvalds 
9661da177e4SLinus Torvalds /*
9671da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
9681da177e4SLinus Torvalds **
9691da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
9701da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
9711da177e4SLinus Torvalds **
9721da177e4SLinus Torvalds */
973bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
974bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
975bd4c625cSLinus Torvalds {
9761da177e4SLinus Torvalds 	int i;
9773ee16670SJeff Mahoney 	b_blocknr_t bn;
9781da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
979600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
9801da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9811da177e4SLinus Torvalds 	int retval = 0;
982e0e851cfSChris Mason 	int write_len;
9831da177e4SLinus Torvalds 
9841da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
9871da177e4SLinus Torvalds 		return 0;
9881da177e4SLinus Torvalds 	}
9891da177e4SLinus Torvalds 
99022e2c507SJens Axboe 	get_fs_excl();
99122e2c507SJens Axboe 
9921da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
9931da177e4SLinus Torvalds 	 ** us is on disk too
9941da177e4SLinus Torvalds 	 */
9951da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
9961da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
9971da177e4SLinus Torvalds 
9981da177e4SLinus Torvalds 	get_journal_list(jl);
9991da177e4SLinus Torvalds 	if (flushall) {
10001da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
10011da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
10021da177e4SLinus Torvalds 			goto put_jl;
10031da177e4SLinus Torvalds 		}
10041da177e4SLinus Torvalds 	}
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
10078ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
10088ebc4232SFrederic Weisbecker 
10091da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
101090415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10111da177e4SLinus Torvalds 		goto put_jl;
10121da177e4SLinus Torvalds 	}
10131da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10141da177e4SLinus Torvalds 
10151da177e4SLinus Torvalds 	/* this commit is done, exit */
10161da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10171da177e4SLinus Torvalds 		if (flushall) {
10181da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10191da177e4SLinus Torvalds 		}
102090415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10211da177e4SLinus Torvalds 		goto put_jl;
10221da177e4SLinus Torvalds 	}
10231da177e4SLinus Torvalds 
10241da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10253d4492f8SChris Mason 		int ret;
10268ebc4232SFrederic Weisbecker 
10278ebc4232SFrederic Weisbecker 		/*
10288ebc4232SFrederic Weisbecker 		 * We might sleep in numerous places inside
10298ebc4232SFrederic Weisbecker 		 * write_ordered_buffers. Relax the write lock.
10308ebc4232SFrederic Weisbecker 		 */
10318ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10323d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
10331da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
10343d4492f8SChris Mason 		if (ret < 0 && retval == 0)
10353d4492f8SChris Mason 			retval = ret;
10368ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10371da177e4SLinus Torvalds 	}
10381da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
10391da177e4SLinus Torvalds 	/*
10401da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1041e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1042e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1043e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1044e0e851cfSChris Mason 	 * get a chunk of data on there.
10451da177e4SLinus Torvalds 	 */
10461da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1047e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1048e0e851cfSChris Mason 	if (write_len < 256)
1049e0e851cfSChris Mason 		write_len = 256;
1050e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
10511da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
10521da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
10531da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1054e0e851cfSChris Mason 		if (tbh) {
10556e3647acSFrederic Weisbecker 			if (buffer_dirty(tbh)) {
10566e3647acSFrederic Weisbecker 		            reiserfs_write_unlock(s);
1057e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh);
10586e3647acSFrederic Weisbecker 			    reiserfs_write_lock(s);
10596e3647acSFrederic Weisbecker 			}
10601da177e4SLinus Torvalds 			put_bh(tbh) ;
10611da177e4SLinus Torvalds 		}
1062e0e851cfSChris Mason 	}
10631da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
10641da177e4SLinus Torvalds 
10651da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
10661da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
10671da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
10681da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
10698ebc4232SFrederic Weisbecker 
10708ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10711da177e4SLinus Torvalds 		wait_on_buffer(tbh);
10728ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10731da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
10741da177e4SLinus Torvalds 		// a locked buffer.  Double check here
10751da177e4SLinus Torvalds 		//
10768ebc4232SFrederic Weisbecker 		/* redundant, sync_dirty_buffer() checks */
10778ebc4232SFrederic Weisbecker 		if (buffer_dirty(tbh)) {
10788ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
10791da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
10808ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
10818ebc4232SFrederic Weisbecker 		}
10821da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
10831da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
108445b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-601",
108545b03d5eSJeff Mahoney 					 "buffer write failed");
10861da177e4SLinus Torvalds #endif
10871da177e4SLinus Torvalds 			retval = -EIO;
10881da177e4SLinus Torvalds 		}
10891da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
10901da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
10911da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
10921da177e4SLinus Torvalds 	}
10931da177e4SLinus Torvalds 
10941da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
10951da177e4SLinus Torvalds 
10965d5e8156SJeff Mahoney 	/* If there was a write error in the journal - we can't commit
10975d5e8156SJeff Mahoney 	 * this transaction - it will be invalid and, if successful,
1098beb7dd86SRobert P. J. Day 	 * will just end up propagating the write error out to
10995d5e8156SJeff Mahoney 	 * the file system. */
11005d5e8156SJeff Mahoney 	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
11011da177e4SLinus Torvalds 		if (buffer_dirty(jl->j_commit_bh))
11021da177e4SLinus Torvalds 			BUG();
11031da177e4SLinus Torvalds 		mark_buffer_dirty(jl->j_commit_bh) ;
11048ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
11057cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(s))
11067cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
11077cd33ad2SChristoph Hellwig 		else
11081da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh);
11098ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11105d5e8156SJeff Mahoney 	}
11111da177e4SLinus Torvalds 
11121da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
11131da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1114beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
11151da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
11161da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
111745b03d5eSJeff Mahoney 		reiserfs_warning(s, "journal-615", "buffer write failed");
11181da177e4SLinus Torvalds #endif
11191da177e4SLinus Torvalds 		retval = -EIO;
11201da177e4SLinus Torvalds 	}
11211da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
11221da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
11231da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
112445b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1125bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
11261da177e4SLinus Torvalds 	}
11271da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
11281da177e4SLinus Torvalds 
11291da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
11301da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
11311da177e4SLinus Torvalds 
11321da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
11331da177e4SLinus Torvalds 
11341da177e4SLinus Torvalds 	/* mark the metadata dirty */
11351da177e4SLinus Torvalds 	if (!retval)
11361da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
11371da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
11381da177e4SLinus Torvalds 
11391da177e4SLinus Torvalds 	if (flushall) {
11401da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
11411da177e4SLinus Torvalds 	}
114290415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
11431da177e4SLinus Torvalds       put_jl:
11441da177e4SLinus Torvalds 	put_journal_list(s, jl);
11451da177e4SLinus Torvalds 
11461da177e4SLinus Torvalds 	if (retval)
1147bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1148fbe5498bSHarvey Harrison 			       __func__);
114922e2c507SJens Axboe 	put_fs_excl();
11501da177e4SLinus Torvalds 	return retval;
11511da177e4SLinus Torvalds }
11521da177e4SLinus Torvalds 
11531da177e4SLinus Torvalds /*
11541da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
11551da177e4SLinus Torvalds ** returns NULL if it can't find anything
11561da177e4SLinus Torvalds */
1157bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1158bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1159bd4c625cSLinus Torvalds 							  *cn)
1160bd4c625cSLinus Torvalds {
11611da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
11621da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
11631da177e4SLinus Torvalds 
11641da177e4SLinus Torvalds 	cn = cn->hprev;
11651da177e4SLinus Torvalds 	while (cn) {
11661da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
11671da177e4SLinus Torvalds 			return cn->jlist;
11681da177e4SLinus Torvalds 		}
11691da177e4SLinus Torvalds 		cn = cn->hprev;
11701da177e4SLinus Torvalds 	}
11711da177e4SLinus Torvalds 	return NULL;
11721da177e4SLinus Torvalds }
11731da177e4SLinus Torvalds 
1174a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1175a3172027SChris Mason {
1176a3172027SChris Mason 	struct super_block *sb = cn->sb;
1177a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1178a3172027SChris Mason 
1179a3172027SChris Mason 	cn = cn->hprev;
1180a3172027SChris Mason 	while (cn) {
1181a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1182a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1183a3172027SChris Mason 				    return 0;
1184a3172027SChris Mason 		cn = cn->hprev;
1185a3172027SChris Mason 	}
1186a3172027SChris Mason 	return 1;
1187a3172027SChris Mason }
1188a3172027SChris Mason 
1189bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1190bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1191bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1192bd4c625cSLinus Torvalds 				int);
11931da177e4SLinus Torvalds 
11941da177e4SLinus Torvalds /*
11951da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
11961da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
11971da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
11981da177e4SLinus Torvalds */
1199a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb,
1200bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1201bd4c625cSLinus Torvalds 					 int debug)
1202bd4c625cSLinus Torvalds {
1203a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12041da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
12051da177e4SLinus Torvalds 	cn = jl->j_realblock;
12061da177e4SLinus Torvalds 
12071da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
12081da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
12091da177e4SLinus Torvalds 	 */
12101da177e4SLinus Torvalds 	while (cn) {
12111da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
12121da177e4SLinus Torvalds 			if (debug) {
1213a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2201",
1214bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1215bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1216bd4c625cSLinus Torvalds 						 cn->state);
12171da177e4SLinus Torvalds 			}
12181da177e4SLinus Torvalds 			cn->state = 0;
1219a9dd3643SJeff Mahoney 			remove_journal_hash(sb, journal->j_list_hash_table,
1220bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
12211da177e4SLinus Torvalds 		}
12221da177e4SLinus Torvalds 		last = cn;
12231da177e4SLinus Torvalds 		cn = cn->next;
1224a9dd3643SJeff Mahoney 		free_cnode(sb, last);
12251da177e4SLinus Torvalds 	}
12261da177e4SLinus Torvalds 	jl->j_realblock = NULL;
12271da177e4SLinus Torvalds }
12281da177e4SLinus Torvalds 
12291da177e4SLinus Torvalds /*
12301da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
12311da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
12321da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
12331da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
12341da177e4SLinus Torvalds **
12351da177e4SLinus Torvalds */
1236a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb,
1237bd4c625cSLinus Torvalds 					unsigned long offset,
1238600ed416SJeff Mahoney 					unsigned int trans_id)
1239bd4c625cSLinus Torvalds {
12401da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
1241a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12421da177e4SLinus Torvalds 
12431da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
12441da177e4SLinus Torvalds 		return -EIO;
12451da177e4SLinus Torvalds 
12461da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
12471da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
12488ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
12491da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
12508ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
12511da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
12521da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1253a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "journal-699",
125445b03d5eSJeff Mahoney 						 "buffer write failed");
12551da177e4SLinus Torvalds #endif
12561da177e4SLinus Torvalds 				return -EIO;
12571da177e4SLinus Torvalds 			}
12581da177e4SLinus Torvalds 		}
12591da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
12601da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1261bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1262bd4c625cSLinus Torvalds 							b_data);
12631da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
12641da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
12651da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
12661da177e4SLinus Torvalds 
12671da177e4SLinus Torvalds 		set_buffer_dirty(journal->j_header_bh);
12688ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
12697cd33ad2SChristoph Hellwig 
12707cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(sb))
12717cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
12727cd33ad2SChristoph Hellwig 		else
12731da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
12747cd33ad2SChristoph Hellwig 
12758ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
12761da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1277a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-837",
127845b03d5eSJeff Mahoney 					 "IO error during journal replay");
12791da177e4SLinus Torvalds 			return -EIO;
12801da177e4SLinus Torvalds 		}
12811da177e4SLinus Torvalds 	}
12821da177e4SLinus Torvalds 	return 0;
12831da177e4SLinus Torvalds }
12841da177e4SLinus Torvalds 
1285a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb,
12861da177e4SLinus Torvalds 				       unsigned long offset,
1287600ed416SJeff Mahoney 				       unsigned int trans_id)
1288bd4c625cSLinus Torvalds {
1289a9dd3643SJeff Mahoney 	return _update_journal_header_block(sb, offset, trans_id);
12901da177e4SLinus Torvalds }
1291bd4c625cSLinus Torvalds 
12921da177e4SLinus Torvalds /*
12931da177e4SLinus Torvalds ** flush any and all journal lists older than you are
12941da177e4SLinus Torvalds ** can only be called from flush_journal_list
12951da177e4SLinus Torvalds */
1296a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb,
12971da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
12981da177e4SLinus Torvalds {
12991da177e4SLinus Torvalds 	struct list_head *entry;
13001da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
1301a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1302600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
13031da177e4SLinus Torvalds 
13041da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
13051da177e4SLinus Torvalds 	 * protection is required.
13061da177e4SLinus Torvalds 	 */
13071da177e4SLinus Torvalds       restart:
13081da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
13091da177e4SLinus Torvalds 	/* Did we wrap? */
13101da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
13111da177e4SLinus Torvalds 		return 0;
13121da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
13131da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
13141da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
13151da177e4SLinus Torvalds 		/* do not flush all */
1316a9dd3643SJeff Mahoney 		flush_journal_list(sb, other_jl, 0);
13171da177e4SLinus Torvalds 
13181da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
13191da177e4SLinus Torvalds 		goto restart;
13201da177e4SLinus Torvalds 	}
13211da177e4SLinus Torvalds 	return 0;
13221da177e4SLinus Torvalds }
13231da177e4SLinus Torvalds 
13241da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1325bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1326bd4c625cSLinus Torvalds {
13271da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13281da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
13291da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
13301da177e4SLinus Torvalds 		journal->j_num_work_lists--;
13311da177e4SLinus Torvalds 	}
13321da177e4SLinus Torvalds }
13331da177e4SLinus Torvalds 
13341da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
13351da177e4SLinus Torvalds **
13361da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
13371da177e4SLinus Torvalds ** flush_journal_list
13381da177e4SLinus Torvalds **
13391da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
13401da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
13411da177e4SLinus Torvalds ** do_journal_end, or by journal_release
13421da177e4SLinus Torvalds */
13431da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1344bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1345bd4c625cSLinus Torvalds {
13461da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
13471da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
13481da177e4SLinus Torvalds 	int count;
13491da177e4SLinus Torvalds 	int was_jwait = 0;
13501da177e4SLinus Torvalds 	int was_dirty = 0;
13511da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
13521da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
13531da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13541da177e4SLinus Torvalds 	int err = 0;
13551da177e4SLinus Torvalds 
13561da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
13571da177e4SLinus Torvalds 
13581da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
135945b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2048", "called with wcount %d",
13601da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
13611da177e4SLinus Torvalds 	}
13621da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
13631da177e4SLinus Torvalds 
13641da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
13651da177e4SLinus Torvalds 	if (flushall) {
13668ebc4232SFrederic Weisbecker 		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1367afe70259SJeff Mahoney 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
13681da177e4SLinus Torvalds 		BUG();
13691da177e4SLinus Torvalds 	}
13701da177e4SLinus Torvalds 
13711da177e4SLinus Torvalds 	count = 0;
13721da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1373c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1374bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
13751da177e4SLinus Torvalds 		return 0;
13761da177e4SLinus Torvalds 	}
13771da177e4SLinus Torvalds 
137822e2c507SJens Axboe 	get_fs_excl();
137922e2c507SJens Axboe 
13801da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
13811da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13821da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13831da177e4SLinus Torvalds 		goto flush_older_and_return;
13841da177e4SLinus Torvalds 	}
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
13871da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
13881da177e4SLinus Torvalds 	 */
13891da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
13901da177e4SLinus Torvalds 
1391bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1392bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
13931da177e4SLinus Torvalds 		BUG();
13941da177e4SLinus Torvalds 
13951da177e4SLinus Torvalds 	/* are we done now? */
13961da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13971da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13981da177e4SLinus Torvalds 		goto flush_older_and_return;
13991da177e4SLinus Torvalds 	}
14001da177e4SLinus Torvalds 
14011da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
14021da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
14031da177e4SLinus Torvalds 	 */
14041da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1405c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
1406c3a9c210SJeff Mahoney 			       "wcount is not 0");
14071da177e4SLinus Torvalds 	}
14081da177e4SLinus Torvalds 	cn = jl->j_realblock;
14091da177e4SLinus Torvalds 	while (cn) {
14101da177e4SLinus Torvalds 		was_jwait = 0;
14111da177e4SLinus Torvalds 		was_dirty = 0;
14121da177e4SLinus Torvalds 		saved_bh = NULL;
14131da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
14141da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
14151da177e4SLinus Torvalds 			goto free_cnode;
14161da177e4SLinus Torvalds 		}
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
14191da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
14201da177e4SLinus Torvalds 			goto free_cnode;
14211da177e4SLinus Torvalds 
14221da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
14231da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
14241da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
14251da177e4SLinus Torvalds 		 ** block to disk
14261da177e4SLinus Torvalds 		 */
14271da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
14281da177e4SLinus Torvalds 			saved_bh = cn->bh;
14291da177e4SLinus Torvalds 
14301da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
14311da177e4SLinus Torvalds 			 ** we are working with it
14321da177e4SLinus Torvalds 			 */
14331da177e4SLinus Torvalds 			get_bh(saved_bh);
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
14361da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
14371da177e4SLinus Torvalds 				was_jwait = 1;
14381da177e4SLinus Torvalds 				was_dirty = 1;
14391da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
14401da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
14411da177e4SLinus Torvalds 				BUG();
14421da177e4SLinus Torvalds 			}
14431da177e4SLinus Torvalds 		}
14441da177e4SLinus Torvalds 
14451da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
14460779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
14471da177e4SLinus Torvalds 		 */
14481da177e4SLinus Torvalds 		if (pjl) {
14491da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
14501da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
14511da177e4SLinus Torvalds 			goto free_cnode;
14521da177e4SLinus Torvalds 		}
14531da177e4SLinus Torvalds 
14541da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
14551da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
14561da177e4SLinus Torvalds 		 */
14571da177e4SLinus Torvalds 		if (saved_bh == NULL) {
14581da177e4SLinus Torvalds 			goto free_cnode;
14591da177e4SLinus Torvalds 		}
14601da177e4SLinus Torvalds 
14611da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
14621da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
14631da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
14641da177e4SLinus Torvalds 		 */
14651da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
146645b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-813",
146745b03d5eSJeff Mahoney 					 "BAD! buffer %llu %cdirty %cjwait, "
14681da177e4SLinus Torvalds 					 "not in a newer tranasction",
1469bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1470bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1471bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
14721da177e4SLinus Torvalds 		}
14731da177e4SLinus Torvalds 		if (was_dirty) {
14741da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
14751da177e4SLinus Torvalds 			get_bh(saved_bh);
14761da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
14771da177e4SLinus Torvalds 			lock_buffer(saved_bh);
14781da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
14791da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
14801da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
14811da177e4SLinus Torvalds 			else
14821da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
14831da177e4SLinus Torvalds 			count++;
14841da177e4SLinus Torvalds 		} else {
148545b03d5eSJeff Mahoney 			reiserfs_warning(s, "clm-2082",
148645b03d5eSJeff Mahoney 					 "Unable to flush buffer %llu in %s",
1487bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1488fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
14891da177e4SLinus Torvalds 		}
14901da177e4SLinus Torvalds 	      free_cnode:
14911da177e4SLinus Torvalds 		last = cn;
14921da177e4SLinus Torvalds 		cn = cn->next;
14931da177e4SLinus Torvalds 		if (saved_bh) {
14941da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
14951da177e4SLinus Torvalds 			put_bh(saved_bh);
14961da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
149745b03d5eSJeff Mahoney 				reiserfs_warning(s, "journal-945",
149845b03d5eSJeff Mahoney 						 "saved_bh->b_count < 0");
14991da177e4SLinus Torvalds 			}
15001da177e4SLinus Torvalds 		}
15011da177e4SLinus Torvalds 	}
15021da177e4SLinus Torvalds 	if (count > 0) {
15031da177e4SLinus Torvalds 		cn = jl->j_realblock;
15041da177e4SLinus Torvalds 		while (cn) {
15051da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
15061da177e4SLinus Torvalds 				if (!cn->bh) {
1507c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1011",
1508c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15091da177e4SLinus Torvalds 				}
15108ebc4232SFrederic Weisbecker 
15118ebc4232SFrederic Weisbecker 				reiserfs_write_unlock(s);
15121da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
15138ebc4232SFrederic Weisbecker 				reiserfs_write_lock(s);
15148ebc4232SFrederic Weisbecker 
15151da177e4SLinus Torvalds 				if (!cn->bh) {
1516c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1012",
1517c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15181da177e4SLinus Torvalds 				}
15191da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
15201da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
152145b03d5eSJeff Mahoney 					reiserfs_warning(s, "journal-949",
152245b03d5eSJeff Mahoney 							 "buffer write failed");
15231da177e4SLinus Torvalds #endif
15241da177e4SLinus Torvalds 					err = -EIO;
15251da177e4SLinus Torvalds 				}
15261da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
15271da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
15281da177e4SLinus Torvalds 				 */
1529bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1530bd4c625cSLinus Torvalds 				       (cn->bh));
15311da177e4SLinus Torvalds 
1532398c95bdSChris Mason 				/* drop one ref for us */
15331da177e4SLinus Torvalds 				put_bh(cn->bh);
1534398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1535398c95bdSChris Mason 				release_buffer_page(cn->bh);
15361da177e4SLinus Torvalds 			}
15371da177e4SLinus Torvalds 			cn = cn->next;
15381da177e4SLinus Torvalds 		}
15391da177e4SLinus Torvalds 	}
15401da177e4SLinus Torvalds 
15411da177e4SLinus Torvalds 	if (err)
1542bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1543bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1544fbe5498bSHarvey Harrison 			       __func__);
15451da177e4SLinus Torvalds       flush_older_and_return:
15461da177e4SLinus Torvalds 
15471da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
15481da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
15491da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
15501da177e4SLinus Torvalds 	 ** replayed after a crash
15511da177e4SLinus Torvalds 	 */
15521da177e4SLinus Torvalds 	if (flushall) {
15531da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
15541da177e4SLinus Torvalds 	}
15551da177e4SLinus Torvalds 
15561da177e4SLinus Torvalds 	err = journal->j_errno;
15571da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
15581da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
15591da177e4SLinus Torvalds 	 **
15601da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
15611da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
15621da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
15631da177e4SLinus Torvalds 	 ** being flushed
15641da177e4SLinus Torvalds 	 */
15651da177e4SLinus Torvalds 	if (!err && flushall) {
1566bd4c625cSLinus Torvalds 		err =
1567bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1568bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1569bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1570bd4c625cSLinus Torvalds 						jl->j_trans_id);
15711da177e4SLinus Torvalds 		if (err)
1572bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1573bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1574fbe5498bSHarvey Harrison 				       __func__);
15751da177e4SLinus Torvalds 	}
15761da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
15771da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
15781da177e4SLinus Torvalds 	journal->j_num_lists--;
15791da177e4SLinus Torvalds 	del_from_work_list(s, jl);
15801da177e4SLinus Torvalds 
15811da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
15821da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
158345b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1584bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
15851da177e4SLinus Torvalds 	}
15861da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
15871da177e4SLinus Torvalds 
15881da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
15891da177e4SLinus Torvalds 	 * help find code using dead lists later on
15901da177e4SLinus Torvalds 	 */
15911da177e4SLinus Torvalds 	jl->j_len = 0;
15921da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
15931da177e4SLinus Torvalds 	jl->j_start = 0;
15941da177e4SLinus Torvalds 	jl->j_realblock = NULL;
15951da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
15961da177e4SLinus Torvalds 	jl->j_trans_id = 0;
15971da177e4SLinus Torvalds 	jl->j_state = 0;
15981da177e4SLinus Torvalds 	put_journal_list(s, jl);
15991da177e4SLinus Torvalds 	if (flushall)
1600afe70259SJeff Mahoney 		mutex_unlock(&journal->j_flush_mutex);
160122e2c507SJens Axboe 	put_fs_excl();
16021da177e4SLinus Torvalds 	return err;
16031da177e4SLinus Torvalds }
16041da177e4SLinus Torvalds 
1605a3172027SChris Mason static int test_transaction(struct super_block *s,
1606a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1607a3172027SChris Mason {
1608a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1609a3172027SChris Mason 
1610a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1611a3172027SChris Mason 		return 1;
1612a3172027SChris Mason 
1613a3172027SChris Mason 	cn = jl->j_realblock;
1614a3172027SChris Mason 	while (cn) {
1615a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1616a3172027SChris Mason 		 ** skip it
1617a3172027SChris Mason 		 */
1618a3172027SChris Mason 		if (cn->blocknr == 0) {
1619a3172027SChris Mason 			goto next;
1620a3172027SChris Mason 		}
1621a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1622a3172027SChris Mason 			return 0;
1623a3172027SChris Mason 	      next:
1624a3172027SChris Mason 		cn = cn->next;
1625a3172027SChris Mason 		cond_resched();
1626a3172027SChris Mason 	}
1627a3172027SChris Mason 	return 0;
1628a3172027SChris Mason }
1629a3172027SChris Mason 
16301da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
16311da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
16321da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
16331da177e4SLinus Torvalds {
16341da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16351da177e4SLinus Torvalds 	int ret = 0;
16361da177e4SLinus Torvalds 
16371da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
16381da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16391da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
16401da177e4SLinus Torvalds 		return 0;
16411da177e4SLinus Torvalds 	}
16421da177e4SLinus Torvalds 
16431da177e4SLinus Torvalds 	cn = jl->j_realblock;
16441da177e4SLinus Torvalds 	while (cn) {
16451da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
16461da177e4SLinus Torvalds 		 ** skip it
16471da177e4SLinus Torvalds 		 */
16481da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
16491da177e4SLinus Torvalds 			goto next;
16501da177e4SLinus Torvalds 		}
16511da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
16521da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
16531da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
16541da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
16551da177e4SLinus Torvalds 			 * count, and recheck things after locking
16561da177e4SLinus Torvalds 			 */
16571da177e4SLinus Torvalds 			tmp_bh = cn->bh;
16581da177e4SLinus Torvalds 			get_bh(tmp_bh);
16591da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
16601da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
16611da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
16621da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
16631da177e4SLinus Torvalds 					BUG();
16641da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
16651da177e4SLinus Torvalds 				ret++;
16661da177e4SLinus Torvalds 			} else {
16671da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
16681da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
16691da177e4SLinus Torvalds 			}
16701da177e4SLinus Torvalds 			put_bh(tmp_bh);
16711da177e4SLinus Torvalds 		}
16721da177e4SLinus Torvalds 	      next:
16731da177e4SLinus Torvalds 		cn = cn->next;
16741da177e4SLinus Torvalds 		cond_resched();
16751da177e4SLinus Torvalds 	}
16761da177e4SLinus Torvalds 	return ret;
16771da177e4SLinus Torvalds }
16781da177e4SLinus Torvalds 
16791da177e4SLinus Torvalds /* used by flush_commit_list */
16801da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
16811da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
16821da177e4SLinus Torvalds {
16831da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16841da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
16851da177e4SLinus Torvalds 	int ret = 0;
16861da177e4SLinus Torvalds 
16871da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
16881da177e4SLinus Torvalds 	cn = jl->j_realblock;
16891da177e4SLinus Torvalds 	while (cn) {
16901da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
16911da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
16921da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
16931da177e4SLinus Torvalds 		 */
16941da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1695bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1696bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
16971da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
16981da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
16991da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
17001da177e4SLinus Torvalds 			 * it actually gets marked dirty
17011da177e4SLinus Torvalds 			 */
17021da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
17031da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
17041da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
17051da177e4SLinus Torvalds 			} else {
17061da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
17071da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
17081da177e4SLinus Torvalds 			}
17091da177e4SLinus Torvalds 		}
17101da177e4SLinus Torvalds 		cn = cn->next;
17111da177e4SLinus Torvalds 	}
17121da177e4SLinus Torvalds 	return ret;
17131da177e4SLinus Torvalds }
17141da177e4SLinus Torvalds 
17151da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
17161da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
17171da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
1718600ed416SJeff Mahoney 				unsigned int *next_trans_id,
1719bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1720bd4c625cSLinus Torvalds {
17211da177e4SLinus Torvalds 	int ret = 0;
17221da177e4SLinus Torvalds 	int written = 0;
17231da177e4SLinus Torvalds 	int transactions_flushed = 0;
1724600ed416SJeff Mahoney 	unsigned int orig_trans_id = jl->j_trans_id;
17251da177e4SLinus Torvalds 	struct buffer_chunk chunk;
17261da177e4SLinus Torvalds 	struct list_head *entry;
17271da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17281da177e4SLinus Torvalds 	chunk.nr = 0;
17291da177e4SLinus Torvalds 
1730a412f9efSFrederic Weisbecker 	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
17311da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
17321da177e4SLinus Torvalds 		goto done;
17331da177e4SLinus Torvalds 	}
17341da177e4SLinus Torvalds 
1735afe70259SJeff Mahoney 	/* we've got j_flush_mutex held, nobody is going to delete any
17361da177e4SLinus Torvalds 	 * of these lists out from underneath us
17371da177e4SLinus Torvalds 	 */
17381da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
17391da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
17401da177e4SLinus Torvalds 
17411da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1742bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1743bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
17441da177e4SLinus Torvalds 			del_from_work_list(s, jl);
17451da177e4SLinus Torvalds 			break;
17461da177e4SLinus Torvalds 		}
17471da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
17481da177e4SLinus Torvalds 
17491da177e4SLinus Torvalds 		if (ret < 0)
17501da177e4SLinus Torvalds 			goto done;
17511da177e4SLinus Torvalds 		transactions_flushed++;
17521da177e4SLinus Torvalds 		written += ret;
17531da177e4SLinus Torvalds 		entry = jl->j_list.next;
17541da177e4SLinus Torvalds 
17551da177e4SLinus Torvalds 		/* did we wrap? */
17561da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
17571da177e4SLinus Torvalds 			break;
17581da177e4SLinus Torvalds 		}
17591da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
17601da177e4SLinus Torvalds 
17611da177e4SLinus Torvalds 		/* don't bother with older transactions */
17621da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
17631da177e4SLinus Torvalds 			break;
17641da177e4SLinus Torvalds 	}
17651da177e4SLinus Torvalds 	if (chunk.nr) {
17661da177e4SLinus Torvalds 		write_chunk(&chunk);
17671da177e4SLinus Torvalds 	}
17681da177e4SLinus Torvalds 
17691da177e4SLinus Torvalds       done:
1770afe70259SJeff Mahoney 	mutex_unlock(&journal->j_flush_mutex);
17711da177e4SLinus Torvalds 	return ret;
17721da177e4SLinus Torvalds }
17731da177e4SLinus Torvalds 
17741da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
17751da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
17761da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
17771da177e4SLinus Torvalds ** adds seeks and slows things down.
17781da177e4SLinus Torvalds **
17791da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
17801da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
17811da177e4SLinus Torvalds ** list updates the header block
17821da177e4SLinus Torvalds */
17831da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1784bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1785bd4c625cSLinus Torvalds {
17861da177e4SLinus Torvalds 	unsigned long len = 0;
17871da177e4SLinus Torvalds 	unsigned long cur_len;
17881da177e4SLinus Torvalds 	int ret;
17891da177e4SLinus Torvalds 	int i;
17901da177e4SLinus Torvalds 	int limit = 256;
17911da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
17921da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
1793600ed416SJeff Mahoney 	unsigned int trans_id;
17941da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17951da177e4SLinus Torvalds 
17961da177e4SLinus Torvalds 	flush_jl = tjl = jl;
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
17991da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
18001da177e4SLinus Torvalds 		limit = 1024;
18011da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
18021da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
18031da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
18041da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
18051da177e4SLinus Torvalds 			break;
18061da177e4SLinus Torvalds 		}
18071da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
18081da177e4SLinus Torvalds 		if (cur_len > 0) {
18091da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
18101da177e4SLinus Torvalds 		}
18111da177e4SLinus Torvalds 		len += cur_len;
18121da177e4SLinus Torvalds 		flush_jl = tjl;
18131da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
18141da177e4SLinus Torvalds 			break;
18151da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
18161da177e4SLinus Torvalds 	}
18171da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
18181da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
18191da177e4SLinus Torvalds 	 ** across multiple lists
18201da177e4SLinus Torvalds 	 */
18211da177e4SLinus Torvalds 	if (flush_jl != jl) {
18221da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
18231da177e4SLinus Torvalds 	}
18241da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
18251da177e4SLinus Torvalds 	return 0;
18261da177e4SLinus Torvalds }
18271da177e4SLinus Torvalds 
18281da177e4SLinus Torvalds /*
18291da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
18301da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
18311da177e4SLinus Torvalds */
18321da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
18331da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
18341da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
18351da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
18361da177e4SLinus Torvalds {
18371da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
18381da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
18391da177e4SLinus Torvalds 
18401da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
18411da177e4SLinus Torvalds 	if (!head) {
18421da177e4SLinus Torvalds 		return;
18431da177e4SLinus Torvalds 	}
18441da177e4SLinus Torvalds 	cur = *head;
18451da177e4SLinus Torvalds 	while (cur) {
1846bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1847bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1848bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
18491da177e4SLinus Torvalds 			if (cur->hnext) {
18501da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
18511da177e4SLinus Torvalds 			}
18521da177e4SLinus Torvalds 			if (cur->hprev) {
18531da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
18541da177e4SLinus Torvalds 			} else {
18551da177e4SLinus Torvalds 				*head = cur->hnext;
18561da177e4SLinus Torvalds 			}
18571da177e4SLinus Torvalds 			cur->blocknr = 0;
18581da177e4SLinus Torvalds 			cur->sb = NULL;
18591da177e4SLinus Torvalds 			cur->state = 0;
18601da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
18611da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
18621da177e4SLinus Torvalds 			cur->bh = NULL;
18631da177e4SLinus Torvalds 			cur->jlist = NULL;
18641da177e4SLinus Torvalds 		}
18651da177e4SLinus Torvalds 		cur = cur->hnext;
18661da177e4SLinus Torvalds 	}
18671da177e4SLinus Torvalds }
18681da177e4SLinus Torvalds 
1869a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb)
1870bd4c625cSLinus Torvalds {
1871a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1872d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
18731da177e4SLinus Torvalds 	journal->j_num_lists--;
18741da177e4SLinus Torvalds 
18751da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
1876a9dd3643SJeff Mahoney 	free_list_bitmaps(sb, journal->j_list_bitmap);
1877a9dd3643SJeff Mahoney 	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
18781da177e4SLinus Torvalds 	if (journal->j_header_bh) {
18791da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
18801da177e4SLinus Torvalds 	}
18811da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
18821da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
18831da177e4SLinus Torvalds 	 */
1884a9dd3643SJeff Mahoney 	release_journal_dev(sb, journal);
18851da177e4SLinus Torvalds 	vfree(journal);
18861da177e4SLinus Torvalds }
18871da177e4SLinus Torvalds 
18881da177e4SLinus Torvalds /*
18891da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
18901da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
18911da177e4SLinus Torvalds */
1892bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1893a9dd3643SJeff Mahoney 			      struct super_block *sb, int error)
1894bd4c625cSLinus Torvalds {
18951da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
18961da177e4SLinus Torvalds 	int flushed = 0;
1897a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
18981da177e4SLinus Torvalds 
18991da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
19001da177e4SLinus Torvalds 	 */
1901a9dd3643SJeff Mahoney 	if (!error && !(sb->s_flags & MS_RDONLY)) {
19021da177e4SLinus Torvalds 		/* end the current trans */
19031da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
1904a9dd3643SJeff Mahoney 		do_journal_end(th, sb, 10, FLUSH_ALL);
19051da177e4SLinus Torvalds 
19061da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
1907a9dd3643SJeff Mahoney 		if (!journal_join(&myth, sb, 1)) {
1908a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1909a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1910bd4c625cSLinus Torvalds 						     1);
1911a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1912a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1913a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19141da177e4SLinus Torvalds 			flushed = 1;
19151da177e4SLinus Torvalds 		}
19161da177e4SLinus Torvalds 	}
19171da177e4SLinus Torvalds 
19181da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
19191da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
19201da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
1921a9dd3643SJeff Mahoney 		if (!journal_join_abort(&myth, sb, 1)) {
1922a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1923a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1924bd4c625cSLinus Torvalds 						     1);
1925a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1926a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1927a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19281da177e4SLinus Torvalds 		}
19291da177e4SLinus Torvalds 	}
19301da177e4SLinus Torvalds 
19311da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
19321da177e4SLinus Torvalds 	/* wait for all commits to finish */
1933a9dd3643SJeff Mahoney 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
19348ebc4232SFrederic Weisbecker 
19358ebc4232SFrederic Weisbecker 	/*
19368ebc4232SFrederic Weisbecker 	 * We must release the write lock here because
19378ebc4232SFrederic Weisbecker 	 * the workqueue job (flush_async_commit) needs this lock
19388ebc4232SFrederic Weisbecker 	 */
19398ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
19401da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
19418ebc4232SFrederic Weisbecker 
19421da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
19431da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
19441da177e4SLinus Torvalds 		commit_wq = NULL;
19451da177e4SLinus Torvalds 	}
19461da177e4SLinus Torvalds 
1947a9dd3643SJeff Mahoney 	free_journal_ram(sb);
19481da177e4SLinus Torvalds 
19490523676dSFrederic Weisbecker 	reiserfs_write_lock(sb);
19500523676dSFrederic Weisbecker 
19511da177e4SLinus Torvalds 	return 0;
19521da177e4SLinus Torvalds }
19531da177e4SLinus Torvalds 
19541da177e4SLinus Torvalds /*
19551da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
19561da177e4SLinus Torvalds */
1957bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
1958a9dd3643SJeff Mahoney 		    struct super_block *sb)
1959bd4c625cSLinus Torvalds {
1960a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 0);
19611da177e4SLinus Torvalds }
1962bd4c625cSLinus Torvalds 
19631da177e4SLinus Torvalds /*
19641da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
19651da177e4SLinus Torvalds */
1966bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
1967a9dd3643SJeff Mahoney 			  struct super_block *sb)
1968bd4c625cSLinus Torvalds {
1969a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 1);
19701da177e4SLinus Torvalds }
19711da177e4SLinus Torvalds 
19721da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
1973a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb,
1974bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
1975bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
1976bd4c625cSLinus Torvalds {
19771da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
19781da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1979a9dd3643SJeff Mahoney 	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
1980bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
19811da177e4SLinus Torvalds 		return 1;
19821da177e4SLinus Torvalds 	}
19831da177e4SLinus Torvalds 	return 0;
19841da177e4SLinus Torvalds }
1985bd4c625cSLinus Torvalds 
19861da177e4SLinus Torvalds /* returns 0 if it did not find a description block
19871da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
19881da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
19891da177e4SLinus Torvalds */
1990a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb,
1991bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
1992600ed416SJeff Mahoney 					unsigned int *oldest_invalid_trans_id,
1993bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
1994bd4c625cSLinus Torvalds {
19951da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
19961da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
19971da177e4SLinus Torvalds 	struct buffer_head *c_bh;
19981da177e4SLinus Torvalds 	unsigned long offset;
19991da177e4SLinus Torvalds 
20001da177e4SLinus Torvalds 	if (!d_bh)
20011da177e4SLinus Torvalds 		return 0;
20021da177e4SLinus Torvalds 
20031da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2004bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
2005bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
2006bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
2007bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
2008a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2009bd4c625cSLinus Torvalds 				       "journal-986: transaction "
20101da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
2011bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
2012bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
20131da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
20141da177e4SLinus Torvalds 			return 0;
20151da177e4SLinus Torvalds 		}
2016bd4c625cSLinus Torvalds 		if (newest_mount_id
2017bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2018a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2019bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
20201da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2021bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2022bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
20231da177e4SLinus Torvalds 				       *newest_mount_id);
20241da177e4SLinus Torvalds 			return -1;
20251da177e4SLinus Torvalds 		}
2026a9dd3643SJeff Mahoney 		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2027a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2018",
202845b03d5eSJeff Mahoney 					 "Bad transaction length %d "
202945b03d5eSJeff Mahoney 					 "encountered, ignoring transaction",
2030bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
20311da177e4SLinus Torvalds 			return -1;
20321da177e4SLinus Torvalds 		}
2033a9dd3643SJeff Mahoney 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
20341da177e4SLinus Torvalds 
20351da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2036bd4c625cSLinus Torvalds 		c_bh =
2037a9dd3643SJeff Mahoney 		    journal_bread(sb,
2038a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2039bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2040a9dd3643SJeff Mahoney 				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
20411da177e4SLinus Torvalds 		if (!c_bh)
20421da177e4SLinus Torvalds 			return 0;
20431da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2044a9dd3643SJeff Mahoney 		if (journal_compare_desc_commit(sb, desc, commit)) {
2045a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
20461da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
20471da177e4SLinus Torvalds 				       "time %d or length %d",
2048bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2049a9dd3643SJeff Mahoney 				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
20501da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
20511da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
20521da177e4SLinus Torvalds 			brelse(c_bh);
20531da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2054bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2055bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2056a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2057bd4c625cSLinus Torvalds 					       "journal-1004: "
20581da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2059bd4c625cSLinus Torvalds 					       "to %d",
2060bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
20611da177e4SLinus Torvalds 			}
20621da177e4SLinus Torvalds 			return -1;
20631da177e4SLinus Torvalds 		}
20641da177e4SLinus Torvalds 		brelse(c_bh);
2065a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2066bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
20671da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2068bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2069a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2070bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2071bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
20721da177e4SLinus Torvalds 		return 1;
20731da177e4SLinus Torvalds 	} else {
20741da177e4SLinus Torvalds 		return 0;
20751da177e4SLinus Torvalds 	}
20761da177e4SLinus Torvalds }
20771da177e4SLinus Torvalds 
2078bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2079bd4c625cSLinus Torvalds {
20801da177e4SLinus Torvalds 	int i;
20811da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
20821da177e4SLinus Torvalds 		brelse(heads[i]);
20831da177e4SLinus Torvalds 	}
20841da177e4SLinus Torvalds }
20851da177e4SLinus Torvalds 
20861da177e4SLinus Torvalds /*
20871da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
20881da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
20891da177e4SLinus Torvalds ** is invalid, or too old.
20901da177e4SLinus Torvalds */
2091a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb,
2092bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2093bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2094600ed416SJeff Mahoney 				    unsigned int oldest_trans_id,
2095bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2096bd4c625cSLinus Torvalds {
2097a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
20981da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20991da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
2100600ed416SJeff Mahoney 	unsigned int trans_id = 0;
21011da177e4SLinus Torvalds 	struct buffer_head *c_bh;
21021da177e4SLinus Torvalds 	struct buffer_head *d_bh;
21031da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
21041da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
2105600ed416SJeff Mahoney 	unsigned int trans_offset;
21061da177e4SLinus Torvalds 	int i;
21071da177e4SLinus Torvalds 	int trans_half;
21081da177e4SLinus Torvalds 
2109a9dd3643SJeff Mahoney 	d_bh = journal_bread(sb, cur_dblock);
21101da177e4SLinus Torvalds 	if (!d_bh)
21111da177e4SLinus Torvalds 		return 1;
21121da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2113a9dd3643SJeff Mahoney 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2114a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
21151da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
2116a9dd3643SJeff Mahoney 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21171da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
21181da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
2119a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
21201da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2121bd4c625cSLinus Torvalds 			       cur_dblock -
2122a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
21231da177e4SLinus Torvalds 		brelse(d_bh);
21241da177e4SLinus Torvalds 		return 1;
21251da177e4SLinus Torvalds 	}
21261da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
2127a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
21281da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
21291da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
21301da177e4SLinus Torvalds 			       newest_mount_id);
21311da177e4SLinus Torvalds 		brelse(d_bh);
21321da177e4SLinus Torvalds 		return 1;
21331da177e4SLinus Torvalds 	}
2134a9dd3643SJeff Mahoney 	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
21351da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
2136a9dd3643SJeff Mahoney 			      SB_ONDISK_JOURNAL_SIZE(sb)));
21371da177e4SLinus Torvalds 	if (!c_bh) {
21381da177e4SLinus Torvalds 		brelse(d_bh);
21391da177e4SLinus Torvalds 		return 1;
21401da177e4SLinus Torvalds 	}
21411da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2142a9dd3643SJeff Mahoney 	if (journal_compare_desc_commit(sb, desc, commit)) {
2143a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2144bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
21451da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2146bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2147a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2148bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2149bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
21501da177e4SLinus Torvalds 		brelse(c_bh);
21511da177e4SLinus Torvalds 		brelse(d_bh);
21521da177e4SLinus Torvalds 		return 1;
21531da177e4SLinus Torvalds 	}
21543f8b5ee3SJeff Mahoney 
21553f8b5ee3SJeff Mahoney 	if (bdev_read_only(sb->s_bdev)) {
21563f8b5ee3SJeff Mahoney 		reiserfs_warning(sb, "clm-2076",
21573f8b5ee3SJeff Mahoney 				 "device is readonly, unable to replay log");
21583f8b5ee3SJeff Mahoney 		brelse(c_bh);
21593f8b5ee3SJeff Mahoney 		brelse(d_bh);
21603f8b5ee3SJeff Mahoney 		return -EROFS;
21613f8b5ee3SJeff Mahoney 	}
21623f8b5ee3SJeff Mahoney 
21631da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
21641da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2165d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2166d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2167d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2168d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
21691da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
21701da177e4SLinus Torvalds 		brelse(c_bh);
21711da177e4SLinus Torvalds 		brelse(d_bh);
2172d739b42bSPekka Enberg 		kfree(log_blocks);
2173d739b42bSPekka Enberg 		kfree(real_blocks);
2174a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1169",
217545b03d5eSJeff Mahoney 				 "kmalloc failed, unable to mount FS");
21761da177e4SLinus Torvalds 		return -1;
21771da177e4SLinus Torvalds 	}
21781da177e4SLinus Torvalds 	/* get all the buffer heads */
2179a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
21801da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2181bd4c625cSLinus Torvalds 		log_blocks[i] =
2182a9dd3643SJeff Mahoney 		    journal_getblk(sb,
2183a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2184bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2185a9dd3643SJeff Mahoney 				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
21861da177e4SLinus Torvalds 		if (i < trans_half) {
2187bd4c625cSLinus Torvalds 			real_blocks[i] =
2188a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2189bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
21901da177e4SLinus Torvalds 		} else {
2191bd4c625cSLinus Torvalds 			real_blocks[i] =
2192a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2193bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2194bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
21951da177e4SLinus Torvalds 		}
2196a9dd3643SJeff Mahoney 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2197a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1207",
219845b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
219945b03d5eSJeff Mahoney 					 "Block to replay is outside of "
220045b03d5eSJeff Mahoney 					 "filesystem");
22011da177e4SLinus Torvalds 			goto abort_replay;
22021da177e4SLinus Torvalds 		}
22031da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2204bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2205a9dd3643SJeff Mahoney 		    (sb, real_blocks[i]->b_blocknr)) {
2206a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1204",
220745b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
220845b03d5eSJeff Mahoney 					 "Trying to replay onto a log block");
22091da177e4SLinus Torvalds 		      abort_replay:
22101da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
22111da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
22121da177e4SLinus Torvalds 			brelse(c_bh);
22131da177e4SLinus Torvalds 			brelse(d_bh);
2214d739b42bSPekka Enberg 			kfree(log_blocks);
2215d739b42bSPekka Enberg 			kfree(real_blocks);
22161da177e4SLinus Torvalds 			return -1;
22171da177e4SLinus Torvalds 		}
22181da177e4SLinus Torvalds 	}
22191da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
22201da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
22211da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22228ebc4232SFrederic Weisbecker 
22238ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
22241da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
22258ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
22268ebc4232SFrederic Weisbecker 
22271da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2228a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1212",
222945b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
223045b03d5eSJeff Mahoney 					 "buffer write failed");
2231bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2232bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22331da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
22341da177e4SLinus Torvalds 			brelse(c_bh);
22351da177e4SLinus Torvalds 			brelse(d_bh);
2236d739b42bSPekka Enberg 			kfree(log_blocks);
2237d739b42bSPekka Enberg 			kfree(real_blocks);
22381da177e4SLinus Torvalds 			return -1;
22391da177e4SLinus Torvalds 		}
2240bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2241bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
22421da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
22431da177e4SLinus Torvalds 		brelse(log_blocks[i]);
22441da177e4SLinus Torvalds 	}
22451da177e4SLinus Torvalds 	/* flush out the real blocks */
22461da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22471da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
22489cb569d6SChristoph Hellwig 		write_dirty_buffer(real_blocks[i], WRITE);
22491da177e4SLinus Torvalds 	}
22501da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22511da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
22521da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2253a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1226",
225445b03d5eSJeff Mahoney 					 "REPLAY FAILURE, fsck required! "
225545b03d5eSJeff Mahoney 					 "buffer write failed");
2256bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2257bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22581da177e4SLinus Torvalds 			brelse(c_bh);
22591da177e4SLinus Torvalds 			brelse(d_bh);
2260d739b42bSPekka Enberg 			kfree(log_blocks);
2261d739b42bSPekka Enberg 			kfree(real_blocks);
22621da177e4SLinus Torvalds 			return -1;
22631da177e4SLinus Torvalds 		}
22641da177e4SLinus Torvalds 		brelse(real_blocks[i]);
22651da177e4SLinus Torvalds 	}
2266bd4c625cSLinus Torvalds 	cur_dblock =
2267a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2268bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2269a9dd3643SJeff Mahoney 	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
2270a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2271bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
2272a9dd3643SJeff Mahoney 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
22731da177e4SLinus Torvalds 
22741da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2275a9dd3643SJeff Mahoney 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
22761da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
22771da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2278a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2279a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2280a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
22811da177e4SLinus Torvalds 	brelse(c_bh);
22821da177e4SLinus Torvalds 	brelse(d_bh);
2283d739b42bSPekka Enberg 	kfree(log_blocks);
2284d739b42bSPekka Enberg 	kfree(real_blocks);
22851da177e4SLinus Torvalds 	return 0;
22861da177e4SLinus Torvalds }
22871da177e4SLinus Torvalds 
22881da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
22891da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
22901da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
22911da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
22921da177e4SLinus Torvalds    from other places.
22931da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
22943ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
22953ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
22963ee16670SJeff Mahoney 					   b_blocknr_t max_block)
22971da177e4SLinus Torvalds {
22981da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
22991da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
23001da177e4SLinus Torvalds 	struct buffer_head *bh;
23011da177e4SLinus Torvalds 	int i, j;
23021da177e4SLinus Torvalds 
23031da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
23041da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23051da177e4SLinus Torvalds 		return (bh);
23061da177e4SLinus Torvalds 
23071da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
23081da177e4SLinus Torvalds 		blocks = max_block - block;
23091da177e4SLinus Torvalds 	}
23101da177e4SLinus Torvalds 	bhlist[0] = bh;
23111da177e4SLinus Torvalds 	j = 1;
23121da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
23131da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
23141da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
23151da177e4SLinus Torvalds 			brelse(bh);
23161da177e4SLinus Torvalds 			break;
2317bd4c625cSLinus Torvalds 		} else
2318bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
23191da177e4SLinus Torvalds 	}
23201da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
23211da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
23221da177e4SLinus Torvalds 		brelse(bhlist[i]);
23231da177e4SLinus Torvalds 	bh = bhlist[0];
23241da177e4SLinus Torvalds 	wait_on_buffer(bh);
23251da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23261da177e4SLinus Torvalds 		return bh;
23271da177e4SLinus Torvalds 	brelse(bh);
23281da177e4SLinus Torvalds 	return NULL;
23291da177e4SLinus Torvalds }
23301da177e4SLinus Torvalds 
23311da177e4SLinus Torvalds /*
23321da177e4SLinus Torvalds ** read and replay the log
23331da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
23341da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
23351da177e4SLinus Torvalds **
23361da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
23371da177e4SLinus Torvalds **
23381da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
23391da177e4SLinus Torvalds */
2340a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb)
2341bd4c625cSLinus Torvalds {
2342a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
23431da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
2344600ed416SJeff Mahoney 	unsigned int oldest_trans_id = 0;
2345600ed416SJeff Mahoney 	unsigned int oldest_invalid_trans_id = 0;
23461da177e4SLinus Torvalds 	time_t start;
23471da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
23481da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
23491da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
23501da177e4SLinus Torvalds 	struct buffer_head *d_bh;
23511da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
23521da177e4SLinus Torvalds 	int valid_journal_header = 0;
23531da177e4SLinus Torvalds 	int replay_count = 0;
23541da177e4SLinus Torvalds 	int continue_replay = 1;
23551da177e4SLinus Torvalds 	int ret;
23561da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
23571da177e4SLinus Torvalds 
2358a9dd3643SJeff Mahoney 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2359a9dd3643SJeff Mahoney 	reiserfs_info(sb, "checking transaction log (%s)\n",
23601da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
23611da177e4SLinus Torvalds 	start = get_seconds();
23621da177e4SLinus Torvalds 
23631da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
23641da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
23651da177e4SLinus Torvalds 	 ** replay is done
23661da177e4SLinus Torvalds 	 */
2367a9dd3643SJeff Mahoney 	journal->j_header_bh = journal_bread(sb,
2368a9dd3643SJeff Mahoney 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2369a9dd3643SJeff Mahoney 					     + SB_ONDISK_JOURNAL_SIZE(sb));
23701da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
23711da177e4SLinus Torvalds 		return 1;
23721da177e4SLinus Torvalds 	}
23731da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2374c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2375a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_SIZE(sb)
2376bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2377bd4c625cSLinus Torvalds 		oldest_start =
2378a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
23791da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
23801da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
23811da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2382a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2383bd4c625cSLinus Torvalds 			       "journal-1153: found in "
23841da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
23851da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
23861da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
23871da177e4SLinus Torvalds 		valid_journal_header = 1;
23881da177e4SLinus Torvalds 
23891da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
23901da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
23911da177e4SLinus Torvalds 		 ** through the whole log.
23921da177e4SLinus Torvalds 		 */
2393bd4c625cSLinus Torvalds 		d_bh =
2394a9dd3643SJeff Mahoney 		    journal_bread(sb,
2395a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2396bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
2397a9dd3643SJeff Mahoney 		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
23981da177e4SLinus Torvalds 		if (!ret) {
23991da177e4SLinus Torvalds 			continue_replay = 0;
24001da177e4SLinus Torvalds 		}
24011da177e4SLinus Torvalds 		brelse(d_bh);
24021da177e4SLinus Torvalds 		goto start_log_replay;
24031da177e4SLinus Torvalds 	}
24041da177e4SLinus Torvalds 
24051da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
24061da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
24071da177e4SLinus Torvalds 	 */
2408bd4c625cSLinus Torvalds 	while (continue_replay
2409bd4c625cSLinus Torvalds 	       && cur_dblock <
2410a9dd3643SJeff Mahoney 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2411a9dd3643SJeff Mahoney 		SB_ONDISK_JOURNAL_SIZE(sb))) {
24121da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
24131da177e4SLinus Torvalds 		   device to be the same */
2414bd4c625cSLinus Torvalds 		d_bh =
2415bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2416a9dd3643SJeff Mahoney 				    sb->s_blocksize,
2417a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2418a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_SIZE(sb));
2419bd4c625cSLinus Torvalds 		ret =
2420a9dd3643SJeff Mahoney 		    journal_transaction_is_valid(sb, d_bh,
2421bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2422bd4c625cSLinus Torvalds 						 &newest_mount_id);
24231da177e4SLinus Torvalds 		if (ret == 1) {
24241da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
24251da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
24261da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24271da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
24281da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2429a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2430bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
24311da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2432bd4c625cSLinus Torvalds 					       oldest_start -
2433bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2434a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24351da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
24361da177e4SLinus Torvalds 				/* one we just read was older */
24371da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24381da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2439a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2440bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
24411da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2442bd4c625cSLinus Torvalds 					       oldest_start -
2443bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2444a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24451da177e4SLinus Torvalds 			}
24461da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
24471da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2448a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2449bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2450bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2451bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
24521da177e4SLinus Torvalds 			}
24531da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
24541da177e4SLinus Torvalds 		} else {
24551da177e4SLinus Torvalds 			cur_dblock++;
24561da177e4SLinus Torvalds 		}
24571da177e4SLinus Torvalds 		brelse(d_bh);
24581da177e4SLinus Torvalds 	}
24591da177e4SLinus Torvalds 
24601da177e4SLinus Torvalds       start_log_replay:
24611da177e4SLinus Torvalds 	cur_dblock = oldest_start;
24621da177e4SLinus Torvalds 	if (oldest_trans_id) {
2463a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2464bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
24651da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
2466a9dd3643SJeff Mahoney 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
24671da177e4SLinus Torvalds 			       oldest_trans_id);
24681da177e4SLinus Torvalds 
24691da177e4SLinus Torvalds 	}
24701da177e4SLinus Torvalds 	replay_count = 0;
24711da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2472bd4c625cSLinus Torvalds 		ret =
2473a9dd3643SJeff Mahoney 		    journal_read_transaction(sb, cur_dblock, oldest_start,
2474bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
24751da177e4SLinus Torvalds 		if (ret < 0) {
24761da177e4SLinus Torvalds 			return ret;
24771da177e4SLinus Torvalds 		} else if (ret != 0) {
24781da177e4SLinus Torvalds 			break;
24791da177e4SLinus Torvalds 		}
2480bd4c625cSLinus Torvalds 		cur_dblock =
2481a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
24821da177e4SLinus Torvalds 		replay_count++;
24831da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
24841da177e4SLinus Torvalds 			break;
24851da177e4SLinus Torvalds 	}
24861da177e4SLinus Torvalds 
24871da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2488a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2489bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
24901da177e4SLinus Torvalds 	}
24911da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
24921da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
24931da177e4SLinus Torvalds 	 ** copy the trans_id from the header
24941da177e4SLinus Torvalds 	 */
24951da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
24961da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2497bd4c625cSLinus Torvalds 		journal->j_trans_id =
2498bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2499a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2500a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2501a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2502bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2503bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
25041da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
25051da177e4SLinus Torvalds 	} else {
25061da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
25071da177e4SLinus Torvalds 	}
2508a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
25091da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
25101da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
25111da177e4SLinus Torvalds 	if (replay_count > 0) {
2512a9dd3643SJeff Mahoney 		reiserfs_info(sb,
2513bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
25141da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
25151da177e4SLinus Torvalds 	}
2516a9dd3643SJeff Mahoney 	if (!bdev_read_only(sb->s_bdev) &&
2517a9dd3643SJeff Mahoney 	    _update_journal_header_block(sb, journal->j_start,
2518bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
25191da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
25201da177e4SLinus Torvalds 		 ** the mount
25211da177e4SLinus Torvalds 		 */
25221da177e4SLinus Torvalds 		return -1;
25231da177e4SLinus Torvalds 	}
25241da177e4SLinus Torvalds 	return 0;
25251da177e4SLinus Torvalds }
25261da177e4SLinus Torvalds 
25271da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
25281da177e4SLinus Torvalds {
25291da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
25308c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
25318c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
25321da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
25331da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
25341da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
25351da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
253690415deaSJeff Mahoney 	mutex_init(&jl->j_commit_mutex);
25371da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
25381da177e4SLinus Torvalds 	get_journal_list(jl);
25391da177e4SLinus Torvalds 	return jl;
25401da177e4SLinus Torvalds }
25411da177e4SLinus Torvalds 
2542a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb)
2543bd4c625cSLinus Torvalds {
2544a9dd3643SJeff Mahoney 	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
25451da177e4SLinus Torvalds }
25461da177e4SLinus Torvalds 
25471da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
25481da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
25491da177e4SLinus Torvalds {
25501da177e4SLinus Torvalds 	int result;
25511da177e4SLinus Torvalds 
25521da177e4SLinus Torvalds 	result = 0;
25531da177e4SLinus Torvalds 
255486098fa0SChristoph Hellwig 	if (journal->j_dev_bd != NULL) {
255586098fa0SChristoph Hellwig 		if (journal->j_dev_bd->bd_dev != super->s_dev)
255686098fa0SChristoph Hellwig 			bd_release(journal->j_dev_bd);
2557e5eb8caaSAl Viro 		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
25581da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
25591da177e4SLinus Torvalds 	}
25601da177e4SLinus Torvalds 
25611da177e4SLinus Torvalds 	if (result != 0) {
256245b03d5eSJeff Mahoney 		reiserfs_warning(super, "sh-457",
256345b03d5eSJeff Mahoney 				 "Cannot release journal device: %i", result);
25641da177e4SLinus Torvalds 	}
25651da177e4SLinus Torvalds 	return result;
25661da177e4SLinus Torvalds }
25671da177e4SLinus Torvalds 
25681da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
25691da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
25701da177e4SLinus Torvalds 			    const char *jdev_name)
25711da177e4SLinus Torvalds {
25721da177e4SLinus Torvalds 	int result;
25731da177e4SLinus Torvalds 	dev_t jdev;
2574aeb5d727SAl Viro 	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
25751da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
25761da177e4SLinus Torvalds 
25771da177e4SLinus Torvalds 	result = 0;
25781da177e4SLinus Torvalds 
25791da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
25801da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
25811da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
25821da177e4SLinus Torvalds 
25831da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
25841da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
25851da177e4SLinus Torvalds 
25861da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
25871da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
25881da177e4SLinus Torvalds 		journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2589e5eb8caaSAl Viro 		journal->j_dev_mode = blkdev_mode;
25901da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
25911da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
25921da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
259345b03d5eSJeff Mahoney 			reiserfs_warning(super, "sh-458",
25941da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
25951da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
25961da177e4SLinus Torvalds 			return result;
259786098fa0SChristoph Hellwig 		} else if (jdev != super->s_dev) {
259886098fa0SChristoph Hellwig 			result = bd_claim(journal->j_dev_bd, journal);
259986098fa0SChristoph Hellwig 			if (result) {
26009a1c3542SAl Viro 				blkdev_put(journal->j_dev_bd, blkdev_mode);
260186098fa0SChristoph Hellwig 				return result;
260286098fa0SChristoph Hellwig 			}
260386098fa0SChristoph Hellwig 
26041da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
260586098fa0SChristoph Hellwig 		}
260686098fa0SChristoph Hellwig 
26071da177e4SLinus Torvalds 		return 0;
26081da177e4SLinus Torvalds 	}
26091da177e4SLinus Torvalds 
2610e5eb8caaSAl Viro 	journal->j_dev_mode = blkdev_mode;
261130c40d2cSAl Viro 	journal->j_dev_bd = open_bdev_exclusive(jdev_name,
2612e5eb8caaSAl Viro 						blkdev_mode, journal);
261386098fa0SChristoph Hellwig 	if (IS_ERR(journal->j_dev_bd)) {
261486098fa0SChristoph Hellwig 		result = PTR_ERR(journal->j_dev_bd);
261586098fa0SChristoph Hellwig 		journal->j_dev_bd = NULL;
261686098fa0SChristoph Hellwig 		reiserfs_warning(super,
261786098fa0SChristoph Hellwig 				 "journal_init_dev: Cannot open '%s': %i",
261886098fa0SChristoph Hellwig 				 jdev_name, result);
261986098fa0SChristoph Hellwig 		return result;
262086098fa0SChristoph Hellwig 	}
262186098fa0SChristoph Hellwig 
26221da177e4SLinus Torvalds 	set_blocksize(journal->j_dev_bd, super->s_blocksize);
2623bd4c625cSLinus Torvalds 	reiserfs_info(super,
2624bd4c625cSLinus Torvalds 		      "journal_init_dev: journal device: %s\n",
262574f9f974SEdward Shishkin 		      bdevname(journal->j_dev_bd, b));
262686098fa0SChristoph Hellwig 	return 0;
26271da177e4SLinus Torvalds }
26281da177e4SLinus Torvalds 
2629cf3d0b81SEdward Shishkin /**
2630cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2631cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2632cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2633cf3d0b81SEdward Shishkin  *
2634cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2635cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2636cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2637cf3d0b81SEdward Shishkin  *
2638cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2639cf3d0b81SEdward Shishkin  * decreased proportionally.
2640cf3d0b81SEdward Shishkin  */
2641cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2642cf3d0b81SEdward Shishkin 
2643a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb,
2644cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2645cf3d0b81SEdward Shishkin {
2646cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2647cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2648cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2649cf3d0b81SEdward Shishkin 	        int ratio = 1;
2650a9dd3643SJeff Mahoney 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2651a9dd3643SJeff Mahoney 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2652cf3d0b81SEdward Shishkin 
2653cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2654cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2655a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2656cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2657a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-462",
265845b03d5eSJeff Mahoney 					 "bad transaction max size (%u). "
265945b03d5eSJeff Mahoney 					 "FSCK?", journal->j_trans_max);
2660cf3d0b81SEdward Shishkin 			return 1;
2661cf3d0b81SEdward Shishkin 		}
2662cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2663cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2664a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-463",
266545b03d5eSJeff Mahoney 					 "bad transaction max batch (%u). "
266645b03d5eSJeff Mahoney 					 "FSCK?", journal->j_max_batch);
2667cf3d0b81SEdward Shishkin 			return 1;
2668cf3d0b81SEdward Shishkin 		}
2669cf3d0b81SEdward Shishkin 	} else {
2670cf3d0b81SEdward Shishkin 		/* Default journal params.
2671cf3d0b81SEdward Shishkin                    The file system was created by old version
2672cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2673cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2674a9dd3643SJeff Mahoney 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2675a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2676a9dd3643SJeff Mahoney 					 sb->s_blocksize);
267745b03d5eSJeff Mahoney 			return 1;
267845b03d5eSJeff Mahoney 		}
2679cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2680cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2681cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2682cf3d0b81SEdward Shishkin 	}
2683cf3d0b81SEdward Shishkin 	return 0;
2684cf3d0b81SEdward Shishkin }
2685cf3d0b81SEdward Shishkin 
26861da177e4SLinus Torvalds /*
26871da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
26881da177e4SLinus Torvalds */
2689a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name,
2690bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2691bd4c625cSLinus Torvalds {
2692a9dd3643SJeff Mahoney 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
26931da177e4SLinus Torvalds 	struct buffer_head *bhjh;
26941da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
26951da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
26961da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
26971da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
26981da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
269998ea3f50SFrederic Weisbecker 	int ret;
27001da177e4SLinus Torvalds 
270198ea3f50SFrederic Weisbecker 	/*
270298ea3f50SFrederic Weisbecker 	 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
270398ea3f50SFrederic Weisbecker 	 * dependency inversion warnings.
270498ea3f50SFrederic Weisbecker 	 */
270598ea3f50SFrederic Weisbecker 	reiserfs_write_unlock(sb);
2706a9dd3643SJeff Mahoney 	journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
27071da177e4SLinus Torvalds 	if (!journal) {
2708a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1256",
270945b03d5eSJeff Mahoney 				 "unable to get memory for journal structure");
271098ea3f50SFrederic Weisbecker 		reiserfs_write_lock(sb);
27111da177e4SLinus Torvalds 		return 1;
27121da177e4SLinus Torvalds 	}
27131da177e4SLinus Torvalds 	memset(journal, 0, sizeof(struct reiserfs_journal));
27141da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
27151da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
27161da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
27171da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
27181da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
271998ea3f50SFrederic Weisbecker 	ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
272098ea3f50SFrederic Weisbecker 					   reiserfs_bmap_count(sb));
272198ea3f50SFrederic Weisbecker 	reiserfs_write_lock(sb);
272298ea3f50SFrederic Weisbecker 	if (ret)
27231da177e4SLinus Torvalds 		goto free_and_return;
272498ea3f50SFrederic Weisbecker 
2725a9dd3643SJeff Mahoney 	allocate_bitmap_nodes(sb);
27261da177e4SLinus Torvalds 
27271da177e4SLinus Torvalds 	/* reserved for journal area support */
2728a9dd3643SJeff Mahoney 	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2729bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2730a9dd3643SJeff Mahoney 						 / sb->s_blocksize +
2731a9dd3643SJeff Mahoney 						 reiserfs_bmap_count(sb) +
2732bd4c625cSLinus Torvalds 						 1 :
2733bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2734a9dd3643SJeff Mahoney 						 sb->s_blocksize + 2);
27351da177e4SLinus Torvalds 
27361da177e4SLinus Torvalds 	/* Sanity check to see is the standard journal fitting withing first bitmap
27371da177e4SLinus Torvalds 	   (actual for small blocksizes) */
2738a9dd3643SJeff Mahoney 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2739a9dd3643SJeff Mahoney 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2740a9dd3643SJeff Mahoney 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2741a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1393",
274245b03d5eSJeff Mahoney 				 "journal does not fit for area addressed "
274345b03d5eSJeff Mahoney 				 "by first of bitmap blocks. It starts at "
27441da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
2745a9dd3643SJeff Mahoney 				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2746a9dd3643SJeff Mahoney 				 SB_ONDISK_JOURNAL_SIZE(sb),
2747a9dd3643SJeff Mahoney 				 sb->s_blocksize);
27481da177e4SLinus Torvalds 		goto free_and_return;
27491da177e4SLinus Torvalds 	}
27501da177e4SLinus Torvalds 
2751193be0eeSFrederic Weisbecker 	/*
2752193be0eeSFrederic Weisbecker 	 * We need to unlock here to avoid creating the following
2753193be0eeSFrederic Weisbecker 	 * dependency:
2754193be0eeSFrederic Weisbecker 	 * reiserfs_lock -> sysfs_mutex
2755193be0eeSFrederic Weisbecker 	 * Because the reiserfs mmap path creates the following dependency:
2756193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock, hence we have
2757193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2758193be0eeSFrederic Weisbecker 	 * This would ends up in a circular dependency with sysfs readdir path
2759193be0eeSFrederic Weisbecker 	 * which does sysfs_mutex -> mm->mmap_sem
2760193be0eeSFrederic Weisbecker 	 * This is fine because the reiserfs lock is useless in mount path,
2761193be0eeSFrederic Weisbecker 	 * at least until we call journal_begin. We keep it for paranoid
2762193be0eeSFrederic Weisbecker 	 * reasons.
2763193be0eeSFrederic Weisbecker 	 */
2764193be0eeSFrederic Weisbecker 	reiserfs_write_unlock(sb);
2765a9dd3643SJeff Mahoney 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2766193be0eeSFrederic Weisbecker 		reiserfs_write_lock(sb);
2767a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-462",
276845b03d5eSJeff Mahoney 				 "unable to initialize jornal device");
27691da177e4SLinus Torvalds 		goto free_and_return;
27701da177e4SLinus Torvalds 	}
2771193be0eeSFrederic Weisbecker 	reiserfs_write_lock(sb);
27721da177e4SLinus Torvalds 
2773a9dd3643SJeff Mahoney 	rs = SB_DISK_SUPER_BLOCK(sb);
27741da177e4SLinus Torvalds 
27751da177e4SLinus Torvalds 	/* read journal header */
2776a9dd3643SJeff Mahoney 	bhjh = journal_bread(sb,
2777a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2778a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb));
27791da177e4SLinus Torvalds 	if (!bhjh) {
2780a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-459",
278145b03d5eSJeff Mahoney 				 "unable to read journal header");
27821da177e4SLinus Torvalds 		goto free_and_return;
27831da177e4SLinus Torvalds 	}
27841da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
27851da177e4SLinus Torvalds 
27861da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2787bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2788bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2789bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2790a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-460",
279145b03d5eSJeff Mahoney 				 "journal header magic %x (device %s) does "
279245b03d5eSJeff Mahoney 				 "not match to magic found in super block %x",
279345b03d5eSJeff Mahoney 				 jh->jh_journal.jp_journal_magic,
27941da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
27951da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
27961da177e4SLinus Torvalds 		brelse(bhjh);
27971da177e4SLinus Torvalds 		goto free_and_return;
27981da177e4SLinus Torvalds 	}
27991da177e4SLinus Torvalds 
28001da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
28011da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2802bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2803bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
28041da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
28051da177e4SLinus Torvalds 
2806a9dd3643SJeff Mahoney 	if (check_advise_trans_params(sb, journal) != 0)
2807cf3d0b81SEdward Shishkin 	        goto free_and_return;
28081da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
28091da177e4SLinus Torvalds 
28101da177e4SLinus Torvalds 	if (commit_max_age != 0) {
28111da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
28121da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
28131da177e4SLinus Torvalds 	}
28141da177e4SLinus Torvalds 
2815a9dd3643SJeff Mahoney 	reiserfs_info(sb, "journal params: device %s, size %u, "
28161da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
28171da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
28181da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
2819a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_SIZE(sb),
2820a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
28211da177e4SLinus Torvalds 		      journal->j_trans_max,
28221da177e4SLinus Torvalds 		      journal->j_max_batch,
2823bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
28241da177e4SLinus Torvalds 
28251da177e4SLinus Torvalds 	brelse(bhjh);
28261da177e4SLinus Torvalds 
28271da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
2828a9dd3643SJeff Mahoney 	journal_list_init(sb);
28291da177e4SLinus Torvalds 
2830bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2831bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
28321da177e4SLinus Torvalds 
28331da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
28341da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
28351da177e4SLinus Torvalds 
28361da177e4SLinus Torvalds 	journal->j_start = 0;
28371da177e4SLinus Torvalds 	journal->j_len = 0;
28381da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
28391da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
28401da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
28411da177e4SLinus Torvalds 	journal->j_bcount = 0;
28421da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
28431da177e4SLinus Torvalds 	journal->j_last = NULL;
28441da177e4SLinus Torvalds 	journal->j_first = NULL;
28451da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
2846f68215c4SJeff Mahoney 	mutex_init(&journal->j_mutex);
2847afe70259SJeff Mahoney 	mutex_init(&journal->j_flush_mutex);
28481da177e4SLinus Torvalds 
28491da177e4SLinus Torvalds 	journal->j_trans_id = 10;
28501da177e4SLinus Torvalds 	journal->j_mount_id = 10;
28511da177e4SLinus Torvalds 	journal->j_state = 0;
28521da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
2853bbec9191SFrederic Weisbecker 	reiserfs_write_unlock(sb);
28541da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2855bbec9191SFrederic Weisbecker 	reiserfs_write_lock(sb);
28561da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
28571da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
28581da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
28591da177e4SLinus Torvalds 	journal->j_must_wait = 0;
28601da177e4SLinus Torvalds 
2861576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2862a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2863576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2864576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2865576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2866576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2867576f6d79SJeff Mahoney         	goto free_and_return;
2868576f6d79SJeff Mahoney 	}
2869576f6d79SJeff Mahoney 
2870a9dd3643SJeff Mahoney 	init_journal_hash(sb);
28711da177e4SLinus Torvalds 	jl = journal->j_current_jl;
2872a9dd3643SJeff Mahoney 	jl->j_list_bitmap = get_list_bitmap(sb, jl);
28731da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2874a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2005",
287545b03d5eSJeff Mahoney 				 "get_list_bitmap failed for journal list 0");
28761da177e4SLinus Torvalds 		goto free_and_return;
28771da177e4SLinus Torvalds 	}
2878a9dd3643SJeff Mahoney 	if (journal_read(sb) < 0) {
2879a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "reiserfs-2006",
288045b03d5eSJeff Mahoney 				 "Replay Failure, unable to mount");
28811da177e4SLinus Torvalds 		goto free_and_return;
28821da177e4SLinus Torvalds 	}
28831da177e4SLinus Torvalds 
28841da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
288548f6ba5eSFrederic Weisbecker 	if (reiserfs_mounted_fs_count <= 1) {
288648f6ba5eSFrederic Weisbecker 		reiserfs_write_unlock(sb);
28871da177e4SLinus Torvalds 		commit_wq = create_workqueue("reiserfs");
288848f6ba5eSFrederic Weisbecker 		reiserfs_write_lock(sb);
288948f6ba5eSFrederic Weisbecker 	}
28901da177e4SLinus Torvalds 
2891c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2892a9dd3643SJeff Mahoney 	journal->j_work_sb = sb;
28931da177e4SLinus Torvalds 	return 0;
28941da177e4SLinus Torvalds       free_and_return:
2895a9dd3643SJeff Mahoney 	free_journal_ram(sb);
28961da177e4SLinus Torvalds 	return 1;
28971da177e4SLinus Torvalds }
28981da177e4SLinus Torvalds 
28991da177e4SLinus Torvalds /*
29001da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
29011da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
29021da177e4SLinus Torvalds ** transaction
29031da177e4SLinus Torvalds */
2904bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2905bd4c625cSLinus Torvalds 				   int new_alloc)
2906bd4c625cSLinus Torvalds {
29071da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29081da177e4SLinus Torvalds 	time_t now = get_seconds();
29091da177e4SLinus Torvalds 	/* cannot restart while nested */
29101da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29111da177e4SLinus Torvalds 	if (th->t_refcount > 1)
29121da177e4SLinus Torvalds 		return 0;
29131da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
29141da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
29151da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
29161da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
29171da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
29181da177e4SLinus Torvalds 		return 1;
29191da177e4SLinus Torvalds 	}
29206ae1ea44SChris Mason 	/* protected by the BKL here */
29216ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
29226ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
29231da177e4SLinus Torvalds 	return 0;
29241da177e4SLinus Torvalds }
29251da177e4SLinus Torvalds 
29261da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the
29271da177e4SLinus Torvalds ** kernel_lock to be held
29281da177e4SLinus Torvalds */
2929bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2930bd4c625cSLinus Torvalds {
29311da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29321da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29331da177e4SLinus Torvalds 	journal->j_must_wait = 1;
29341da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
29351da177e4SLinus Torvalds 	return;
29361da177e4SLinus Torvalds }
29371da177e4SLinus Torvalds 
29381da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29391da177e4SLinus Torvalds ** require BKL
29401da177e4SLinus Torvalds */
2941bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
2942bd4c625cSLinus Torvalds {
29431da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29441da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
29451da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
29461da177e4SLinus Torvalds }
29471da177e4SLinus Torvalds 
29481da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29491da177e4SLinus Torvalds ** require BKL
29501da177e4SLinus Torvalds */
2951bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
2952bd4c625cSLinus Torvalds {
29531da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29541da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
29551da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
29561da177e4SLinus Torvalds }
29571da177e4SLinus Torvalds 
2958bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
2959bd4c625cSLinus Torvalds {
29601da177e4SLinus Torvalds 	wait_queue_t wait;
29611da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29621da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
29631da177e4SLinus Torvalds 
29641da177e4SLinus Torvalds 	/*
29651da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
29661da177e4SLinus Torvalds 	 * we only want to wait once.
29671da177e4SLinus Torvalds 	 */
29681da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
29691da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
29701da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
29718ebc4232SFrederic Weisbecker 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
29728ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
29731da177e4SLinus Torvalds 		schedule();
29748ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
29758ebc4232SFrederic Weisbecker 	}
29765ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
29771da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
29781da177e4SLinus Torvalds }
29791da177e4SLinus Torvalds 
2980bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
2981bd4c625cSLinus Torvalds {
29821da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29831da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
29841da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
29851da177e4SLinus Torvalds }
29861da177e4SLinus Torvalds 
2987600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
29881da177e4SLinus Torvalds {
29891da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
29901da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
29911da177e4SLinus Torvalds 	while (1) {
29928ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
2993041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
29948ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
29951da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
29961da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
29971da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
29981da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
29991da177e4SLinus Torvalds 			queue_log_writer(sb);
30001da177e4SLinus Torvalds 		}
30011da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
30021da177e4SLinus Torvalds 			break;
30031da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
30041da177e4SLinus Torvalds 			break;
30051da177e4SLinus Torvalds 		bcount = journal->j_bcount;
30061da177e4SLinus Torvalds 	}
30071da177e4SLinus Torvalds }
30081da177e4SLinus Torvalds 
30091da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
30101da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
30111da177e4SLinus Torvalds **
30121da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
30131da177e4SLinus Torvalds ** expect to use in nblocks.
30141da177e4SLinus Torvalds */
3015bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3016a9dd3643SJeff Mahoney 			      struct super_block *sb, unsigned long nblocks,
3017bd4c625cSLinus Torvalds 			      int join)
3018bd4c625cSLinus Torvalds {
30191da177e4SLinus Torvalds 	time_t now = get_seconds();
3020600ed416SJeff Mahoney 	unsigned int old_trans_id;
3021a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30221da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
30231da177e4SLinus Torvalds 	int sched_count = 0;
30241da177e4SLinus Torvalds 	int retval;
30251da177e4SLinus Torvalds 
3026a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal_begin");
302714a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
30281da177e4SLinus Torvalds 
3029a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.journal_being);
30301da177e4SLinus Torvalds 	/* set here for journal_join */
30311da177e4SLinus Torvalds 	th->t_refcount = 1;
3032a9dd3643SJeff Mahoney 	th->t_super = sb;
30331da177e4SLinus Torvalds 
30341da177e4SLinus Torvalds       relock:
3035a9dd3643SJeff Mahoney 	lock_journal(sb);
30361da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3037a9dd3643SJeff Mahoney 		unlock_journal(sb);
30381da177e4SLinus Torvalds 		retval = journal->j_errno;
30391da177e4SLinus Torvalds 		goto out_fail;
30401da177e4SLinus Torvalds 	}
30411da177e4SLinus Torvalds 	journal->j_bcount++;
30421da177e4SLinus Torvalds 
30431da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3044a9dd3643SJeff Mahoney 		unlock_journal(sb);
30458ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3046a9dd3643SJeff Mahoney 		reiserfs_wait_on_write_block(sb);
30478ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
3048a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_writers);
30491da177e4SLinus Torvalds 		goto relock;
30501da177e4SLinus Torvalds 	}
30511da177e4SLinus Torvalds 	now = get_seconds();
30521da177e4SLinus Torvalds 
30531da177e4SLinus Torvalds 	/* if there is no room in the journal OR
30541da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
30551da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
30561da177e4SLinus Torvalds 	 */
30571da177e4SLinus Torvalds 
30581da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3059bd4c625cSLinus Torvalds 	    (!join
3060bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3061bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3062bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3063bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3064bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3065bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3066bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
30671da177e4SLinus Torvalds 
30681da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
3069a9dd3643SJeff Mahoney 		unlock_journal(sb);	/* allow others to finish this transaction */
30701da177e4SLinus Torvalds 
30711da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
30721da177e4SLinus Torvalds 		    journal->j_max_batch &&
3073bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3074bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
30751da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
30761da177e4SLinus Torvalds 				sched_count++;
3077a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30781da177e4SLinus Torvalds 				goto relock;
30791da177e4SLinus Torvalds 			}
30801da177e4SLinus Torvalds 		}
30811da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
30821da177e4SLinus Torvalds 		 * wait for someone else to do a commit
30831da177e4SLinus Torvalds 		 */
30841da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
30851da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
30861da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
3087a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30881da177e4SLinus Torvalds 			}
30891da177e4SLinus Torvalds 			goto relock;
30901da177e4SLinus Torvalds 		}
3091a9dd3643SJeff Mahoney 		retval = journal_join(&myth, sb, 1);
30921da177e4SLinus Torvalds 		if (retval)
30931da177e4SLinus Torvalds 			goto out_fail;
30941da177e4SLinus Torvalds 
30951da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
30961da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
3097a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, 0);
30981da177e4SLinus Torvalds 		} else {
3099a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
31001da177e4SLinus Torvalds 		}
31011da177e4SLinus Torvalds 
31021da177e4SLinus Torvalds 		if (retval)
31031da177e4SLinus Torvalds 			goto out_fail;
31041da177e4SLinus Torvalds 
3105a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_wcount);
31061da177e4SLinus Torvalds 		goto relock;
31071da177e4SLinus Torvalds 	}
31081da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
31091da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
31101da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
31111da177e4SLinus Torvalds 	}
31121da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
31131da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
31141da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
31151da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
31161da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
3117a9dd3643SJeff Mahoney 	unlock_journal(sb);
31181da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
311922e2c507SJens Axboe 	get_fs_excl();
31201da177e4SLinus Torvalds 	return 0;
31211da177e4SLinus Torvalds 
31221da177e4SLinus Torvalds       out_fail:
31231da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
31241da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
31251da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
31261da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
3127a9dd3643SJeff Mahoney 	th->t_super = sb;
31281da177e4SLinus Torvalds 	return retval;
31291da177e4SLinus Torvalds }
31301da177e4SLinus Torvalds 
3131bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3132bd4c625cSLinus Torvalds 								    super_block
3133bd4c625cSLinus Torvalds 								    *s,
3134bd4c625cSLinus Torvalds 								    int nblocks)
3135bd4c625cSLinus Torvalds {
31361da177e4SLinus Torvalds 	int ret;
31371da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
31381da177e4SLinus Torvalds 
31391da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
31401da177e4SLinus Torvalds 	 ** persistent on its own
31411da177e4SLinus Torvalds 	 */
31421da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
31431da177e4SLinus Torvalds 		th = current->journal_info;
31441da177e4SLinus Torvalds 		th->t_refcount++;
314514a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
314614a61442SEric Sesterhenn 
31471da177e4SLinus Torvalds 		return th;
31481da177e4SLinus Torvalds 	}
3149d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
31501da177e4SLinus Torvalds 	if (!th)
31511da177e4SLinus Torvalds 		return NULL;
31521da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
31531da177e4SLinus Torvalds 	if (ret) {
3154d739b42bSPekka Enberg 		kfree(th);
31551da177e4SLinus Torvalds 		return NULL;
31561da177e4SLinus Torvalds 	}
31571da177e4SLinus Torvalds 
31581da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
31591da177e4SLinus Torvalds 	return th;
31601da177e4SLinus Torvalds }
31611da177e4SLinus Torvalds 
3162bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3163bd4c625cSLinus Torvalds {
31641da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
31651da177e4SLinus Torvalds 	int ret = 0;
31661da177e4SLinus Torvalds 	if (th->t_trans_id)
31671da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
31681da177e4SLinus Torvalds 	else
31691da177e4SLinus Torvalds 		ret = -EIO;
31701da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
31711da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3172d739b42bSPekka Enberg 		kfree(th);
31731da177e4SLinus Torvalds 	}
31741da177e4SLinus Torvalds 	return ret;
31751da177e4SLinus Torvalds }
31761da177e4SLinus Torvalds 
3177bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3178a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks)
3179bd4c625cSLinus Torvalds {
31801da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31811da177e4SLinus Torvalds 
31821da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31831da177e4SLinus Torvalds 	 ** pointer
31841da177e4SLinus Torvalds 	 */
31851da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
318614a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3187a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
31881da177e4SLinus Torvalds }
31891da177e4SLinus Torvalds 
3190bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3191a9dd3643SJeff Mahoney 		       struct super_block *sb, unsigned long nblocks)
3192bd4c625cSLinus Torvalds {
31931da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31941da177e4SLinus Torvalds 
31951da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31961da177e4SLinus Torvalds 	 ** pointer
31971da177e4SLinus Torvalds 	 */
31981da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
319914a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3200a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
32011da177e4SLinus Torvalds }
32021da177e4SLinus Torvalds 
3203bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3204a9dd3643SJeff Mahoney 		  struct super_block *sb, unsigned long nblocks)
3205bd4c625cSLinus Torvalds {
32061da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
32071da177e4SLinus Torvalds 	int ret;
32081da177e4SLinus Torvalds 
32091da177e4SLinus Torvalds 	th->t_handle_save = NULL;
32101da177e4SLinus Torvalds 	if (cur_th) {
32111da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
3212a9dd3643SJeff Mahoney 		if (cur_th->t_super == sb) {
32131da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
32141da177e4SLinus Torvalds 			cur_th->t_refcount++;
32151da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
32161da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3217a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2005",
321845b03d5eSJeff Mahoney 						 "BAD: refcount <= 1, but "
321945b03d5eSJeff Mahoney 						 "journal_info != 0");
32201da177e4SLinus Torvalds 			return 0;
32211da177e4SLinus Torvalds 		} else {
32221da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
32231da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
32241da177e4SLinus Torvalds 			 ** really happen...
32251da177e4SLinus Torvalds 			 */
3226a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2100",
322745b03d5eSJeff Mahoney 					 "nesting info a different FS");
32281da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
32291da177e4SLinus Torvalds 			current->journal_info = th;
32301da177e4SLinus Torvalds 		}
32311da177e4SLinus Torvalds 	} else {
32321da177e4SLinus Torvalds 		current->journal_info = th;
32331da177e4SLinus Torvalds 	}
3234a9dd3643SJeff Mahoney 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
323514a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
32361da177e4SLinus Torvalds 
32371da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
32381da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
32391da177e4SLinus Torvalds 	 * won't be called to do it. */
32401da177e4SLinus Torvalds 	if (ret)
32411da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
32421da177e4SLinus Torvalds 	else
32431da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
32441da177e4SLinus Torvalds 
32451da177e4SLinus Torvalds 	return ret;
32461da177e4SLinus Torvalds }
32471da177e4SLinus Torvalds 
32481da177e4SLinus Torvalds /*
32491da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
32501da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
32511da177e4SLinus Torvalds **
32521da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
32531da177e4SLinus Torvalds ** transaction is committed.
32541da177e4SLinus Torvalds **
32551da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
32561da177e4SLinus Torvalds */
3257bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3258a9dd3643SJeff Mahoney 		       struct super_block *sb, struct buffer_head *bh)
3259bd4c625cSLinus Torvalds {
3260a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
32611da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
32621da177e4SLinus Torvalds 	int count_already_incd = 0;
32631da177e4SLinus Torvalds 	int prepared = 0;
32641da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
32651da177e4SLinus Torvalds 
3266a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.mark_dirty);
32671da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3268c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3269c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
32701da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
32711da177e4SLinus Torvalds 	}
32721da177e4SLinus Torvalds 
3273a9dd3643SJeff Mahoney 	sb->s_dirt = 1;
32741da177e4SLinus Torvalds 
32751da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
32761da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
32771da177e4SLinus Torvalds 	/* already in this transaction, we are done */
32781da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
3279a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_already);
32801da177e4SLinus Torvalds 		return 0;
32811da177e4SLinus Torvalds 	}
32821da177e4SLinus Torvalds 
32831da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
32841da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
32851da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
32861da177e4SLinus Torvalds 	 */
32871da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
3288a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1777",
328945b03d5eSJeff Mahoney 				 "buffer %llu bad state "
32901da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3291bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3292bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
32931da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
32941da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
32951da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
32961da177e4SLinus Torvalds 	}
32971da177e4SLinus Torvalds 
32981da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3299a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1409",
330045b03d5eSJeff Mahoney 				 "returning because j_wcount was %d",
3301bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
33021da177e4SLinus Torvalds 		return 1;
33031da177e4SLinus Torvalds 	}
33041da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
33051da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
33061da177e4SLinus Torvalds 	 */
33071da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3308c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1413",
3309c3a9c210SJeff Mahoney 			       "j_len (%lu) is too big",
3310bd4c625cSLinus Torvalds 			       journal->j_len);
33111da177e4SLinus Torvalds 	}
33121da177e4SLinus Torvalds 
33131da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
33141da177e4SLinus Torvalds 		count_already_incd = 1;
3315a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
33161da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
33171da177e4SLinus Torvalds 	}
33181da177e4SLinus Torvalds 
33191da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
33201da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
33211da177e4SLinus Torvalds 	}
33221da177e4SLinus Torvalds 
33231da177e4SLinus Torvalds 	set_buffer_journaled(bh);
33241da177e4SLinus Torvalds 
33251da177e4SLinus Torvalds 	/* now put this guy on the end */
33261da177e4SLinus Torvalds 	if (!cn) {
3327a9dd3643SJeff Mahoney 		cn = get_cnode(sb);
33281da177e4SLinus Torvalds 		if (!cn) {
3329a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
33301da177e4SLinus Torvalds 		}
33311da177e4SLinus Torvalds 
33321da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
33331da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
33341da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
33351da177e4SLinus Torvalds 		}
33361da177e4SLinus Torvalds 		th->t_blocks_logged++;
33371da177e4SLinus Torvalds 		journal->j_len++;
33381da177e4SLinus Torvalds 
33391da177e4SLinus Torvalds 		cn->bh = bh;
33401da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
3341a9dd3643SJeff Mahoney 		cn->sb = sb;
33421da177e4SLinus Torvalds 		cn->jlist = NULL;
33431da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
33441da177e4SLinus Torvalds 		if (!count_already_incd) {
33451da177e4SLinus Torvalds 			get_bh(bh);
33461da177e4SLinus Torvalds 		}
33471da177e4SLinus Torvalds 	}
33481da177e4SLinus Torvalds 	cn->next = NULL;
33491da177e4SLinus Torvalds 	cn->prev = journal->j_last;
33501da177e4SLinus Torvalds 	cn->bh = bh;
33511da177e4SLinus Torvalds 	if (journal->j_last) {
33521da177e4SLinus Torvalds 		journal->j_last->next = cn;
33531da177e4SLinus Torvalds 		journal->j_last = cn;
33541da177e4SLinus Torvalds 	} else {
33551da177e4SLinus Torvalds 		journal->j_first = cn;
33561da177e4SLinus Torvalds 		journal->j_last = cn;
33571da177e4SLinus Torvalds 	}
33581da177e4SLinus Torvalds 	return 0;
33591da177e4SLinus Torvalds }
33601da177e4SLinus Torvalds 
3361bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3362a9dd3643SJeff Mahoney 		struct super_block *sb, unsigned long nblocks)
3363bd4c625cSLinus Torvalds {
33641da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
3365a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "REISER-NESTING",
336645b03d5eSJeff Mahoney 				 "th NULL, refcount %d", th->t_refcount);
33671da177e4SLinus Torvalds 
33681da177e4SLinus Torvalds 	if (!th->t_trans_id) {
33691da177e4SLinus Torvalds 		WARN_ON(1);
33701da177e4SLinus Torvalds 		return -EIO;
33711da177e4SLinus Torvalds 	}
33721da177e4SLinus Torvalds 
33731da177e4SLinus Torvalds 	th->t_refcount--;
33741da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3375bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3376bd4c625cSLinus Torvalds 		    current->journal_info;
33771da177e4SLinus Torvalds 
33781da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
33791da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
33801da177e4SLinus Torvalds 		 */
338114a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
33821da177e4SLinus Torvalds 
33831da177e4SLinus Torvalds 		if (th != cur_th) {
33841da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
33851da177e4SLinus Torvalds 			th->t_trans_id = 0;
33861da177e4SLinus Torvalds 		}
33871da177e4SLinus Torvalds 		return 0;
33881da177e4SLinus Torvalds 	} else {
3389a9dd3643SJeff Mahoney 		return do_journal_end(th, sb, nblocks, 0);
33901da177e4SLinus Torvalds 	}
33911da177e4SLinus Torvalds }
33921da177e4SLinus Torvalds 
33931da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
33941da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
33951da177e4SLinus Torvalds **
33961da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
33971da177e4SLinus Torvalds **
33981da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
33991da177e4SLinus Torvalds */
3400a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb,
3401bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3402bd4c625cSLinus Torvalds {
34031da177e4SLinus Torvalds 	struct buffer_head *bh;
34041da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3405a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34061da177e4SLinus Torvalds 	int ret = 0;
34071da177e4SLinus Torvalds 
3408a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
34091da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
34101da177e4SLinus Torvalds 		return ret;
34111da177e4SLinus Torvalds 	}
34121da177e4SLinus Torvalds 	bh = cn->bh;
34131da177e4SLinus Torvalds 	if (cn->prev) {
34141da177e4SLinus Torvalds 		cn->prev->next = cn->next;
34151da177e4SLinus Torvalds 	}
34161da177e4SLinus Torvalds 	if (cn->next) {
34171da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
34181da177e4SLinus Torvalds 	}
34191da177e4SLinus Torvalds 	if (cn == journal->j_first) {
34201da177e4SLinus Torvalds 		journal->j_first = cn->next;
34211da177e4SLinus Torvalds 	}
34221da177e4SLinus Torvalds 	if (cn == journal->j_last) {
34231da177e4SLinus Torvalds 		journal->j_last = cn->prev;
34241da177e4SLinus Torvalds 	}
34251da177e4SLinus Torvalds 	if (bh)
3426a9dd3643SJeff Mahoney 		remove_journal_hash(sb, journal->j_hash_table, NULL,
3427bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
34281da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
34291da177e4SLinus Torvalds 
34301da177e4SLinus Torvalds 	if (!already_cleaned) {
34311da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
34321da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
34331da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
34341da177e4SLinus Torvalds 		put_bh(bh);
34351da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3436a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1752",
343745b03d5eSJeff Mahoney 					 "b_count < 0");
34381da177e4SLinus Torvalds 		}
34391da177e4SLinus Torvalds 		ret = 1;
34401da177e4SLinus Torvalds 	}
34411da177e4SLinus Torvalds 	journal->j_len--;
34421da177e4SLinus Torvalds 	journal->j_len_alloc--;
3443a9dd3643SJeff Mahoney 	free_cnode(sb, cn);
34441da177e4SLinus Torvalds 	return ret;
34451da177e4SLinus Torvalds }
34461da177e4SLinus Torvalds 
34471da177e4SLinus Torvalds /*
34481da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
34490779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
34501da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
34511da177e4SLinus Torvalds ** and 0 if you aren't
34521da177e4SLinus Torvalds **
34531da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
34541da177e4SLinus Torvalds ** blocks for a given transaction on disk
34551da177e4SLinus Torvalds **
34561da177e4SLinus Torvalds */
3457bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3458bd4c625cSLinus Torvalds {
34591da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
34601da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
34611da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
34621da177e4SLinus Torvalds 	int can_dirty = 1;
34631da177e4SLinus Torvalds 
34641da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
34651da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
34661da177e4SLinus Torvalds 	 ** to disk right now.
34671da177e4SLinus Torvalds 	 */
34681da177e4SLinus Torvalds 	while (cur && can_dirty) {
34691da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
34701da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
34711da177e4SLinus Torvalds 			can_dirty = 0;
34721da177e4SLinus Torvalds 		}
34731da177e4SLinus Torvalds 		cur = cur->hprev;
34741da177e4SLinus Torvalds 	}
34751da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
34761da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
34771da177e4SLinus Torvalds 	 */
34781da177e4SLinus Torvalds 	cur = cn->hnext;
34791da177e4SLinus Torvalds 	while (cur && can_dirty) {
34801da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
34811da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
34821da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
34831da177e4SLinus Torvalds 			can_dirty = 0;
34841da177e4SLinus Torvalds 		}
34851da177e4SLinus Torvalds 		cur = cur->hnext;
34861da177e4SLinus Torvalds 	}
34871da177e4SLinus Torvalds 	return can_dirty;
34881da177e4SLinus Torvalds }
34891da177e4SLinus Torvalds 
34901da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
34910779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
34921da177e4SLinus Torvalds */
3493bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3494a9dd3643SJeff Mahoney 		     struct super_block *sb, unsigned long nblocks)
3495bd4c625cSLinus Torvalds {
3496a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34971da177e4SLinus Torvalds 
34981da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
34991da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
350014a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
35011da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3502a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3503bd4c625cSLinus Torvalds 					     1);
3504a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
35051da177e4SLinus Torvalds 	}
3506a9dd3643SJeff Mahoney 	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
35071da177e4SLinus Torvalds }
35081da177e4SLinus Torvalds 
35091da177e4SLinus Torvalds /*
35101da177e4SLinus Torvalds ** writeback the pending async commits to disk
35111da177e4SLinus Torvalds */
3512c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3513bd4c625cSLinus Torvalds {
3514c4028958SDavid Howells 	struct reiserfs_journal *journal =
3515c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3516a9dd3643SJeff Mahoney 	struct super_block *sb = journal->j_work_sb;
35171da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
35181da177e4SLinus Torvalds 	struct list_head *entry;
35191da177e4SLinus Torvalds 
35208ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
35211da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
35221da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
35231da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
35241da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
3525a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
35261da177e4SLinus Torvalds 	}
35278ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
35281da177e4SLinus Torvalds }
35291da177e4SLinus Torvalds 
35301da177e4SLinus Torvalds /*
35311da177e4SLinus Torvalds ** flushes any old transactions to disk
35321da177e4SLinus Torvalds ** ends the current transaction if it is too old
35331da177e4SLinus Torvalds */
3534a9dd3643SJeff Mahoney int reiserfs_flush_old_commits(struct super_block *sb)
3535bd4c625cSLinus Torvalds {
35361da177e4SLinus Torvalds 	time_t now;
35371da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
3538a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35391da177e4SLinus Torvalds 
35401da177e4SLinus Torvalds 	now = get_seconds();
35411da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
35421da177e4SLinus Torvalds 	 * mount
35431da177e4SLinus Torvalds 	 */
35441da177e4SLinus Torvalds 	if (list_empty(&journal->j_journal_list)) {
35451da177e4SLinus Torvalds 		return 0;
35461da177e4SLinus Torvalds 	}
35471da177e4SLinus Torvalds 
35481da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
35491da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
35501da177e4SLinus Torvalds 	 */
35511da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
35521da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
35531da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3554bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3555a9dd3643SJeff Mahoney 		if (!journal_join(&th, sb, 1)) {
3556a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
3557a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
3558bd4c625cSLinus Torvalds 						     1);
3559a9dd3643SJeff Mahoney 			journal_mark_dirty(&th, sb,
3560a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
35611da177e4SLinus Torvalds 
35621da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
35631da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
35641da177e4SLinus Torvalds 			 */
3565a9dd3643SJeff Mahoney 			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
35661da177e4SLinus Torvalds 		}
35671da177e4SLinus Torvalds 	}
3568a9dd3643SJeff Mahoney 	return sb->s_dirt;
35691da177e4SLinus Torvalds }
35701da177e4SLinus Torvalds 
35711da177e4SLinus Torvalds /*
35721da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
35731da177e4SLinus Torvalds **
35741da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
35751da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
35761da177e4SLinus Torvalds ** flushes the commit list and returns 0.
35771da177e4SLinus Torvalds **
35781da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
35791da177e4SLinus Torvalds **
35801da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
35811da177e4SLinus Torvalds */
3582bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3583a9dd3643SJeff Mahoney 			     struct super_block *sb, unsigned long nblocks,
3584bd4c625cSLinus Torvalds 			     int flags)
3585bd4c625cSLinus Torvalds {
35861da177e4SLinus Torvalds 
35871da177e4SLinus Torvalds 	time_t now;
35881da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
35891da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
35901da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
35911da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
3592a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35931da177e4SLinus Torvalds 
35941da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35951da177e4SLinus Torvalds 
35961da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3597c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3598c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
35991da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
36001da177e4SLinus Torvalds 	}
36011da177e4SLinus Torvalds 
36021da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
36031da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
36041da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
36051da177e4SLinus Torvalds 	}
36061da177e4SLinus Torvalds 
36071da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
36081da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
36091da177e4SLinus Torvalds 	 ** care of in this trans
36101da177e4SLinus Torvalds 	 */
361114a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
361214a61442SEric Sesterhenn 
36131da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
36141da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
36151da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
36161da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
36171da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
36181da177e4SLinus Torvalds 	 */
36191da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
36201da177e4SLinus Torvalds 		if (flush || commit_now) {
36211da177e4SLinus Torvalds 			unsigned trans_id;
36221da177e4SLinus Torvalds 
36231da177e4SLinus Torvalds 			jl = journal->j_current_jl;
36241da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
36251da177e4SLinus Torvalds 			if (wait_on_commit)
36261da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
36271da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
36281da177e4SLinus Torvalds 			if (flush) {
36291da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
36301da177e4SLinus Torvalds 			}
3631a9dd3643SJeff Mahoney 			unlock_journal(sb);
36321da177e4SLinus Torvalds 
36331da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
36341da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
36351da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
3636a9dd3643SJeff Mahoney 					queue_log_writer(sb);
36371da177e4SLinus Torvalds 				} else {
3638a9dd3643SJeff Mahoney 					lock_journal(sb);
36391da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3640bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3641bd4c625cSLinus Torvalds 							   1);
36421da177e4SLinus Torvalds 					}
3643a9dd3643SJeff Mahoney 					unlock_journal(sb);
36441da177e4SLinus Torvalds 				}
36451da177e4SLinus Torvalds 			}
364614a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
364714a61442SEric Sesterhenn 
3648bd4c625cSLinus Torvalds 			if (commit_now
3649a9dd3643SJeff Mahoney 			    && journal_list_still_alive(sb, trans_id)
3650bd4c625cSLinus Torvalds 			    && wait_on_commit) {
3651a9dd3643SJeff Mahoney 				flush_commit_list(sb, jl, 1);
36521da177e4SLinus Torvalds 			}
36531da177e4SLinus Torvalds 			return 0;
36541da177e4SLinus Torvalds 		}
3655a9dd3643SJeff Mahoney 		unlock_journal(sb);
36561da177e4SLinus Torvalds 		return 0;
36571da177e4SLinus Torvalds 	}
36581da177e4SLinus Torvalds 
36591da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
36601da177e4SLinus Torvalds 	now = get_seconds();
36611da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
36621da177e4SLinus Torvalds 		commit_now = 1;
36631da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
36641da177e4SLinus Torvalds 	}
36651da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
36661da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3667bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3668bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3669bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3670bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
36711da177e4SLinus Torvalds 		journal->j_bcount++;
3672a9dd3643SJeff Mahoney 		unlock_journal(sb);
36731da177e4SLinus Torvalds 		return 0;
36741da177e4SLinus Torvalds 	}
36751da177e4SLinus Torvalds 
3676a9dd3643SJeff Mahoney 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3677a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-003",
3678c3a9c210SJeff Mahoney 			       "j_start (%ld) is too high",
3679bd4c625cSLinus Torvalds 			       journal->j_start);
36801da177e4SLinus Torvalds 	}
36811da177e4SLinus Torvalds 	return 1;
36821da177e4SLinus Torvalds }
36831da177e4SLinus Torvalds 
36841da177e4SLinus Torvalds /*
36851da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
36861da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
36871da177e4SLinus Torvalds **
36881da177e4SLinus Torvalds ** otherwise:
36891da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
36901da177e4SLinus Torvalds ** before this transaction has finished.
36911da177e4SLinus Torvalds **
36921da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
36931da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
36941da177e4SLinus Torvalds ** the block can't be reallocated yet.
36951da177e4SLinus Torvalds **
36961da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
36971da177e4SLinus Torvalds */
3698bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3699a9dd3643SJeff Mahoney 		       struct super_block *sb, b_blocknr_t blocknr)
3700bd4c625cSLinus Torvalds {
3701a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37021da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
37031da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
37041da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
37051da177e4SLinus Torvalds 	int cleaned = 0;
37061da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
37071da177e4SLinus Torvalds 
3708a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
37091da177e4SLinus Torvalds 	if (cn && cn->bh) {
37101da177e4SLinus Torvalds 		bh = cn->bh;
37111da177e4SLinus Torvalds 		get_bh(bh);
37121da177e4SLinus Torvalds 	}
37131da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
37141da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
37151da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
37161da177e4SLinus Torvalds 		clear_prepared_bits(bh);
37171da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
3718a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
37191da177e4SLinus Torvalds 	} else {
37201da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
37211da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
37221da177e4SLinus Torvalds 		if (!jb) {
3723a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-1702",
3724c3a9c210SJeff Mahoney 				       "journal_list_bitmap is NULL");
37251da177e4SLinus Torvalds 		}
3726a9dd3643SJeff Mahoney 		set_bit_in_list_bitmap(sb, blocknr, jb);
37271da177e4SLinus Torvalds 
37281da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
37291da177e4SLinus Torvalds 
37301da177e4SLinus Torvalds 		if (bh) {
37311da177e4SLinus Torvalds 			clear_prepared_bits(bh);
37321da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
37331da177e4SLinus Torvalds 		}
3734a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
37351da177e4SLinus Torvalds 
37361da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3737a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3738bd4c625cSLinus Torvalds 					  blocknr);
37391da177e4SLinus Torvalds 		while (cn) {
3740a9dd3643SJeff Mahoney 			if (sb == cn->sb && blocknr == cn->blocknr) {
37411da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
37421da177e4SLinus Torvalds 				if (cn->bh) {
37431da177e4SLinus Torvalds 					if (!cleaned) {
37441da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
37451da177e4SLinus Torvalds 						 ** in the current trans
37461da177e4SLinus Torvalds 						 */
3747bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3748bd4c625cSLinus Torvalds 									   bh);
37491da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3750bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3751bd4c625cSLinus Torvalds 									  bh);
37521da177e4SLinus Torvalds 						cleaned = 1;
37531da177e4SLinus Torvalds 						put_bh(cn->bh);
3754bd4c625cSLinus Torvalds 						if (atomic_read
3755bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3756a9dd3643SJeff Mahoney 							reiserfs_warning(sb,
375745b03d5eSJeff Mahoney 								 "journal-2138",
375845b03d5eSJeff Mahoney 								 "cn->bh->b_count < 0");
37591da177e4SLinus Torvalds 						}
37601da177e4SLinus Torvalds 					}
37611da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3762bd4c625cSLinus Torvalds 						atomic_dec(&
3763bd4c625cSLinus Torvalds 							   (cn->jlist->
3764bd4c625cSLinus Torvalds 							    j_nonzerolen));
37651da177e4SLinus Torvalds 					}
37661da177e4SLinus Torvalds 					cn->bh = NULL;
37671da177e4SLinus Torvalds 				}
37681da177e4SLinus Torvalds 			}
37691da177e4SLinus Torvalds 			cn = cn->hnext;
37701da177e4SLinus Torvalds 		}
37711da177e4SLinus Torvalds 	}
37721da177e4SLinus Torvalds 
3773398c95bdSChris Mason 	if (bh)
3774398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
37751da177e4SLinus Torvalds 	return 0;
37761da177e4SLinus Torvalds }
37771da177e4SLinus Torvalds 
3778bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3779bd4c625cSLinus Torvalds {
37801da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
37811da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
37821da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
37831da177e4SLinus Torvalds }
37841da177e4SLinus Torvalds 
37851da177e4SLinus Torvalds /*
37861da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
37871da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
37881da177e4SLinus Torvalds  */
37891da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
37901da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
37911da177e4SLinus Torvalds {
37921da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
37931da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
37941da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37951da177e4SLinus Torvalds 	int ret = 0;
37961da177e4SLinus Torvalds 
37971da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
37981da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
37991da177e4SLinus Torvalds 		jl = journal->j_current_jl;
38001da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
38011da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
38021da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
38031da177e4SLinus Torvalds 			goto flush_commit_only;
38041da177e4SLinus Torvalds 		}
38051da177e4SLinus Torvalds 
38061da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
38071da177e4SLinus Torvalds 		if (ret)
38081da177e4SLinus Torvalds 			return ret;
38091da177e4SLinus Torvalds 
38101da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
38111da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3812bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3813bd4c625cSLinus Torvalds 						     1);
38141da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
38151da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
38161da177e4SLinus Torvalds 			goto flush_commit_only;
38171da177e4SLinus Torvalds 		}
38181da177e4SLinus Torvalds 
38191da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
38201da177e4SLinus Torvalds 		if (!ret)
38211da177e4SLinus Torvalds 			ret = 1;
38221da177e4SLinus Torvalds 
38231da177e4SLinus Torvalds 	} else {
38241da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
38251da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
38261da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
38271da177e4SLinus Torvalds 		 */
38281da177e4SLinus Torvalds 	      flush_commit_only:
38291da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
38301da177e4SLinus Torvalds 			/*
38311da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
38321da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
38331da177e4SLinus Torvalds 			 * block.
38341da177e4SLinus Torvalds 			 */
38351da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
38361da177e4SLinus Torvalds 				ret = 1;
38371da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
38381da177e4SLinus Torvalds 			if (journal->j_errno)
38391da177e4SLinus Torvalds 				ret = journal->j_errno;
38401da177e4SLinus Torvalds 		}
38411da177e4SLinus Torvalds 	}
38421da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
38431da177e4SLinus Torvalds 	return ret;
38441da177e4SLinus Torvalds }
38451da177e4SLinus Torvalds 
3846bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3847bd4c625cSLinus Torvalds {
3848600ed416SJeff Mahoney 	unsigned int id = REISERFS_I(inode)->i_trans_id;
38491da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
38501da177e4SLinus Torvalds 
38511da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
38521da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
38531da177e4SLinus Torvalds 	 */
38541da177e4SLinus Torvalds 	if (!id || !jl) {
38551da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
38561da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
38571da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
38581da177e4SLinus Torvalds 	}
38591da177e4SLinus Torvalds 
38601da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
38611da177e4SLinus Torvalds }
38621da177e4SLinus Torvalds 
3863a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb,
3864bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3865bd4c625cSLinus Torvalds {
3866a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3867a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.restore_prepared);
38681da177e4SLinus Torvalds 	if (!bh) {
38691da177e4SLinus Torvalds 		return;
38701da177e4SLinus Torvalds 	}
38711da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
38721da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
38731da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
3874a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb,
38751da177e4SLinus Torvalds 					  journal->j_list_hash_table,
38761da177e4SLinus Torvalds 					  bh->b_blocknr);
38771da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
38781da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
38791da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
38801da177e4SLinus Torvalds 		}
38811da177e4SLinus Torvalds 	}
38821da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
38831da177e4SLinus Torvalds }
38841da177e4SLinus Torvalds 
38851da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
38861da177e4SLinus Torvalds /*
38871da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
38881da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
38891da177e4SLinus Torvalds ** clean it
38901da177e4SLinus Torvalds ** wait on it.
38911da177e4SLinus Torvalds **
38921da177e4SLinus Torvalds */
3893a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb,
3894bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3895bd4c625cSLinus Torvalds {
3896a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.prepare);
38971da177e4SLinus Torvalds 
3898ca5de404SNick Piggin 	if (!trylock_buffer(bh)) {
38991da177e4SLinus Torvalds 		if (!wait)
39001da177e4SLinus Torvalds 			return 0;
39011da177e4SLinus Torvalds 		lock_buffer(bh);
39021da177e4SLinus Torvalds 	}
39031da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
39041da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
39051da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
39061da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
39071da177e4SLinus Torvalds 	}
39081da177e4SLinus Torvalds 	unlock_buffer(bh);
39091da177e4SLinus Torvalds 	return 1;
39101da177e4SLinus Torvalds }
39111da177e4SLinus Torvalds 
3912bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3913bd4c625cSLinus Torvalds {
39141da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
39151da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
39161da177e4SLinus Torvalds 	struct list_head *entry;
39171da177e4SLinus Torvalds 	time_t now = get_seconds();
39181da177e4SLinus Torvalds 
39191da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
39201da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
39211da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
39221da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3923a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3924a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3925a3172027SChris Mason 		    test_transaction(s, jl)) {
39261da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
39271da177e4SLinus Torvalds 		} else {
39281da177e4SLinus Torvalds 			break;
39291da177e4SLinus Torvalds 		}
39301da177e4SLinus Torvalds 	}
39311da177e4SLinus Torvalds }
39321da177e4SLinus Torvalds 
39331da177e4SLinus Torvalds /*
39341da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
39351da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
39361da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
39371da177e4SLinus Torvalds **
39381da177e4SLinus Torvalds ** keep reading, there are comments as you go along
39391da177e4SLinus Torvalds **
39401da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
39411da177e4SLinus Torvalds ** journal lists, etc just won't happen.
39421da177e4SLinus Torvalds */
3943bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
3944a9dd3643SJeff Mahoney 			  struct super_block *sb, unsigned long nblocks,
3945bd4c625cSLinus Torvalds 			  int flags)
3946bd4c625cSLinus Torvalds {
3947a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
39481da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
39491da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
39501da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
39511da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
39521da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
39531da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
39541da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
39551da177e4SLinus Torvalds 	int old_start;
39561da177e4SLinus Torvalds 	int i;
3957a44c94a7SAlexander Zarochentsev 	int flush;
3958a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
39591da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
39601da177e4SLinus Torvalds 	struct list_head *entry, *safe;
39611da177e4SLinus Torvalds 	unsigned long jindex;
3962600ed416SJeff Mahoney 	unsigned int commit_trans_id;
39631da177e4SLinus Torvalds 	int trans_half;
39641da177e4SLinus Torvalds 
39651da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
39661da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
39671da177e4SLinus Torvalds 
3968a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
3969a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
3970600ed416SJeff Mahoney 	if (th->t_trans_id == ~0U)
3971a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3972a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
3973a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
3974a44c94a7SAlexander Zarochentsev 
397522e2c507SJens Axboe 	put_fs_excl();
39761da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
3977a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end");
39781da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3979a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3980bd4c625cSLinus Torvalds 					     1);
3981a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
39821da177e4SLinus Torvalds 	}
39831da177e4SLinus Torvalds 
3984a9dd3643SJeff Mahoney 	lock_journal(sb);
39851da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39861da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
39871da177e4SLinus Torvalds 		flush = 1;
39881da177e4SLinus Torvalds 	}
39891da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
39901da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
39911da177e4SLinus Torvalds 		wait_on_commit = 1;
39921da177e4SLinus Torvalds 	}
39931da177e4SLinus Torvalds 
39941da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
39951da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
39961da177e4SLinus Torvalds 	 */
3997a9dd3643SJeff Mahoney 	if (!check_journal_end(th, sb, nblocks, flags)) {
3998a9dd3643SJeff Mahoney 		sb->s_dirt = 1;
3999a9dd3643SJeff Mahoney 		wake_queued_writers(sb);
4000a9dd3643SJeff Mahoney 		reiserfs_async_progress_wait(sb);
40011da177e4SLinus Torvalds 		goto out;
40021da177e4SLinus Torvalds 	}
40031da177e4SLinus Torvalds 
40041da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
40051da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
40061da177e4SLinus Torvalds 		flush = 1;
40071da177e4SLinus Torvalds 	}
40081da177e4SLinus Torvalds 
40091da177e4SLinus Torvalds 	/*
40101da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
40111da177e4SLinus Torvalds 	 ** this transaction
40121da177e4SLinus Torvalds 	 */
40131da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
40141da177e4SLinus Torvalds 		flush = 1;
40151da177e4SLinus Torvalds 	}
40161da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
4017ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
4018ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
40191da177e4SLinus Torvalds 	current->journal_info = th;
4020ef43bc4fSJan Kara 	th->t_refcount++;
40211da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
40221da177e4SLinus Torvalds 						 * the transaction */
4023ef43bc4fSJan Kara 	th->t_refcount--;
40241da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
40251da177e4SLinus Torvalds #endif
40261da177e4SLinus Torvalds 
40271da177e4SLinus Torvalds 	/* setup description block */
4028bd4c625cSLinus Torvalds 	d_bh =
4029a9dd3643SJeff Mahoney 	    journal_getblk(sb,
4030a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4031bd4c625cSLinus Torvalds 			   journal->j_start);
40321da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
40331da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
40341da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
40351da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
40361da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
40371da177e4SLinus Torvalds 
40381da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
4039a9dd3643SJeff Mahoney 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4040bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4041a9dd3643SJeff Mahoney 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
40421da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
40431da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
40441da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
40451da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
40461da177e4SLinus Torvalds 
40471da177e4SLinus Torvalds 	/* init this journal list */
40481da177e4SLinus Torvalds 	jl = journal->j_current_jl;
40491da177e4SLinus Torvalds 
40501da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
40511da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
40521da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
40531da177e4SLinus Torvalds 	 * ordered bh list
40541da177e4SLinus Torvalds 	 */
40558ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
40561da177e4SLinus Torvalds 
40571da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
40581da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
40591da177e4SLinus Torvalds 
40601da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
40611da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
40621da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
40631da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
40641da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
40651da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
40661da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
40671da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
40681da177e4SLinus Torvalds 	jl->j_realblock = NULL;
40691da177e4SLinus Torvalds 
40701da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
40711da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
40721da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
40731da177e4SLinus Torvalds 	 */
4074a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
40751da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
40761da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
4077a9dd3643SJeff Mahoney 			jl_cn = get_cnode(sb);
40781da177e4SLinus Torvalds 			if (!jl_cn) {
4079a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-1676",
4080c3a9c210SJeff Mahoney 					       "get_cnode returned NULL");
40811da177e4SLinus Torvalds 			}
40821da177e4SLinus Torvalds 			if (i == 0) {
40831da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
40841da177e4SLinus Torvalds 			}
40851da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
40861da177e4SLinus Torvalds 			jl_cn->next = NULL;
40871da177e4SLinus Torvalds 			if (last_cn) {
40881da177e4SLinus Torvalds 				last_cn->next = jl_cn;
40891da177e4SLinus Torvalds 			}
40901da177e4SLinus Torvalds 			last_cn = jl_cn;
40911da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
40921da177e4SLinus Torvalds 			   of journal or reserved area */
40931da177e4SLinus Torvalds 
4094bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4095a9dd3643SJeff Mahoney 			    (sb, cn->bh->b_blocknr)) {
4096a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-2332",
4097c3a9c210SJeff Mahoney 					       "Trying to log block %lu, "
4098c3a9c210SJeff Mahoney 					       "which is a log block",
4099bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
41001da177e4SLinus Torvalds 			}
41011da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
41021da177e4SLinus Torvalds 			jl_cn->state = 0;
4103a9dd3643SJeff Mahoney 			jl_cn->sb = sb;
41041da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
41051da177e4SLinus Torvalds 			jl_cn->jlist = jl;
41061da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
41071da177e4SLinus Torvalds 			if (i < trans_half) {
4108bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4109bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41101da177e4SLinus Torvalds 			} else {
4111bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4112bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
41131da177e4SLinus Torvalds 			}
41141da177e4SLinus Torvalds 		} else {
41151da177e4SLinus Torvalds 			i--;
41161da177e4SLinus Torvalds 		}
41171da177e4SLinus Torvalds 	}
41181da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
41191da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
41201da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41211da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
41221da177e4SLinus Torvalds 
41231da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
412414a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
41251da177e4SLinus Torvalds 
41261da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
41271da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
41281da177e4SLinus Torvalds 	 * others are on disk
41291da177e4SLinus Torvalds 	 */
41301da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
41311da177e4SLinus Torvalds 
41321da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
41331da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
41341da177e4SLinus Torvalds 	cn = journal->j_first;
41351da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
41361da177e4SLinus Torvalds 	while (cn) {
41371da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
41381da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
41391da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
41401da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
41411da177e4SLinus Torvalds 			char *addr;
41421da177e4SLinus Torvalds 			struct page *page;
4143bd4c625cSLinus Torvalds 			tmp_bh =
4144a9dd3643SJeff Mahoney 			    journal_getblk(sb,
4145a9dd3643SJeff Mahoney 					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4146bd4c625cSLinus Torvalds 					   ((cur_write_start +
4147bd4c625cSLinus Torvalds 					     jindex) %
4148a9dd3643SJeff Mahoney 					    SB_ONDISK_JOURNAL_SIZE(sb)));
41491da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
41501da177e4SLinus Torvalds 			page = cn->bh->b_page;
41511da177e4SLinus Torvalds 			addr = kmap(page);
4152bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4153bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
41541da177e4SLinus Torvalds 			       cn->bh->b_size);
41551da177e4SLinus Torvalds 			kunmap(page);
41561da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
41571da177e4SLinus Torvalds 			jindex++;
41581da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
41591da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
41601da177e4SLinus Torvalds 		} else {
41611da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4162a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2048",
416345b03d5eSJeff Mahoney 					 "BAD, buffer in journal hash, "
416445b03d5eSJeff Mahoney 					 "but not JDirty!");
41651da177e4SLinus Torvalds 			brelse(cn->bh);
41661da177e4SLinus Torvalds 		}
41671da177e4SLinus Torvalds 		next = cn->next;
4168a9dd3643SJeff Mahoney 		free_cnode(sb, cn);
41691da177e4SLinus Torvalds 		cn = next;
4170e6950a4dSFrederic Weisbecker 		reiserfs_write_unlock(sb);
41711da177e4SLinus Torvalds 		cond_resched();
4172e6950a4dSFrederic Weisbecker 		reiserfs_write_lock(sb);
41731da177e4SLinus Torvalds 	}
41741da177e4SLinus Torvalds 
41751da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
41761da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
41771da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
41781da177e4SLinus Torvalds 	 */
41791da177e4SLinus Torvalds 
4180a9dd3643SJeff Mahoney 	journal->j_current_jl = alloc_journal_list(sb);
41811da177e4SLinus Torvalds 
41821da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
41831da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
41841da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
41851da177e4SLinus Torvalds 	journal->j_num_work_lists++;
41861da177e4SLinus Torvalds 
41871da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
41881da177e4SLinus Torvalds 	old_start = journal->j_start;
4189bd4c625cSLinus Torvalds 	journal->j_start =
4190bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4191a9dd3643SJeff Mahoney 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
41921da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
41931da177e4SLinus Torvalds 	journal->j_bcount = 0;
41941da177e4SLinus Torvalds 	journal->j_last = NULL;
41951da177e4SLinus Torvalds 	journal->j_first = NULL;
41961da177e4SLinus Torvalds 	journal->j_len = 0;
41971da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4198a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4199a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4200a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
42011da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
42021da177e4SLinus Torvalds 	journal->j_must_wait = 0;
42031da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
42041da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
42051da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
4206a9dd3643SJeff Mahoney 	init_journal_hash(sb);
42071da177e4SLinus Torvalds 
42081da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
42091da177e4SLinus Torvalds 	// write out the tails
42101da177e4SLinus Torvalds 	smp_mb();
42111da177e4SLinus Torvalds 
42121da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
42131da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
42141da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
42151da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
42161da177e4SLinus Torvalds 	 * is lost.
42171da177e4SLinus Torvalds 	 */
42181da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
42198ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
42201da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
42211da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
42228ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
42231da177e4SLinus Torvalds 	}
422414a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
422590415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
42261da177e4SLinus Torvalds 
42271da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
42281da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
42291da177e4SLinus Torvalds 	 **
42301da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
42311da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
42321da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
42331da177e4SLinus Torvalds 	 */
42341da177e4SLinus Torvalds 	if (flush) {
4235a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
4236a9dd3643SJeff Mahoney 		flush_journal_list(sb, jl, 1);
42371da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
42381da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
42391da177e4SLinus Torvalds 
42401da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
42411da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
42421da177e4SLinus Torvalds 	 ** old flush them as well.
42431da177e4SLinus Torvalds 	 */
42441da177e4SLinus Torvalds       first_jl:
42451da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
42461da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
42471da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
42481da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4249bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4250a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42511da177e4SLinus Torvalds 				goto first_jl;
42521da177e4SLinus Torvalds 			} else if ((journal->j_start +
42531da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4254a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
42551da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
42561da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
42571da177e4SLinus Torvalds 				 * break now
42581da177e4SLinus Torvalds 				 */
42591da177e4SLinus Torvalds 				break;
42601da177e4SLinus Torvalds 			}
42611da177e4SLinus Torvalds 		} else if ((journal->j_start +
42621da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4263a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_SIZE(sb)) {
42641da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4265a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
4266bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4267a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42681da177e4SLinus Torvalds 				goto first_jl;
42691da177e4SLinus Torvalds 			} else {
42701da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
42711da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
42721da177e4SLinus Torvalds 				 * the start of the log.  We can break
42731da177e4SLinus Torvalds 				 */
42741da177e4SLinus Torvalds 				break;
42751da177e4SLinus Torvalds 			}
42761da177e4SLinus Torvalds 		}
42771da177e4SLinus Torvalds 	}
4278a9dd3643SJeff Mahoney 	flush_old_journal_lists(sb);
42791da177e4SLinus Torvalds 
4280bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4281a9dd3643SJeff Mahoney 	    get_list_bitmap(sb, journal->j_current_jl);
42821da177e4SLinus Torvalds 
42831da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4284a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-1996",
4285c3a9c210SJeff Mahoney 			       "could not get a list bitmap");
42861da177e4SLinus Torvalds 	}
42871da177e4SLinus Torvalds 
42881da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
4289a9dd3643SJeff Mahoney 	unlock_journal(sb);
42901da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
42911da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
42921da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
42931da177e4SLinus Torvalds 
42941da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
4295a9dd3643SJeff Mahoney 	    journal_list_still_alive(sb, commit_trans_id)) {
4296a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
42971da177e4SLinus Torvalds 	}
42981da177e4SLinus Torvalds       out:
4299a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end2");
43001da177e4SLinus Torvalds 
43011da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
43021da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
43031da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
43041da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
4305a9dd3643SJeff Mahoney 	th->t_super = sb;
43061da177e4SLinus Torvalds 
43071da177e4SLinus Torvalds 	return journal->j_errno;
43081da177e4SLinus Torvalds }
43091da177e4SLinus Torvalds 
431032e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */
431132e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno)
43121da177e4SLinus Torvalds {
43131da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
43141da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
43151da177e4SLinus Torvalds 		return;
43161da177e4SLinus Torvalds 
431732e8b106SJeff Mahoney 	if (!journal->j_errno)
431832e8b106SJeff Mahoney 		journal->j_errno = errno;
43191da177e4SLinus Torvalds 
43201da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
43211da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
43221da177e4SLinus Torvalds 
43231da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
43241da177e4SLinus Torvalds 	dump_stack();
43251da177e4SLinus Torvalds #endif
43261da177e4SLinus Torvalds }
43271da177e4SLinus Torvalds 
4328