xref: /openbmc/linux/fs/reiserfs/journal.c (revision 558feb08)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
425985edcSLucas De Marchi ** The background commits make this code very interrelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <linux/time.h>
386188e10dSMatthew Wilcox #include <linux/semaphore.h>
391da177e4SLinus Torvalds #include <linux/vmalloc.h>
401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
411da177e4SLinus Torvalds #include <linux/kernel.h>
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/fcntl.h>
441da177e4SLinus Torvalds #include <linux/stat.h>
451da177e4SLinus Torvalds #include <linux/string.h>
461da177e4SLinus Torvalds #include <linux/buffer_head.h>
471da177e4SLinus Torvalds #include <linux/workqueue.h>
481da177e4SLinus Torvalds #include <linux/writeback.h>
491da177e4SLinus Torvalds #include <linux/blkdev.h>
503fcfab16SAndrew Morton #include <linux/backing-dev.h>
5190415deaSJeff Mahoney #include <linux/uaccess.h>
525a0e3ad6STejun Heo #include <linux/slab.h>
5390415deaSJeff Mahoney 
5490415deaSJeff Mahoney #include <asm/system.h>
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
571da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
581da177e4SLinus Torvalds                                j_list))
591da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
601da177e4SLinus Torvalds                                j_working_list))
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
631da177e4SLinus Torvalds ** start and kill the commit workqueue
641da177e4SLinus Torvalds */
651da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
701da177e4SLinus Torvalds 				   structs at 4k */
711da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
761da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
791da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds /* journal list state bits */
821da177e4SLinus Torvalds #define LIST_TOUCHED 1
831da177e4SLinus Torvalds #define LIST_DIRTY   2
841da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds /* flags for do_journal_end */
871da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
881da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
891da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
901da177e4SLinus Torvalds 
91bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
92bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
93bd4c625cSLinus Torvalds 			  int flags);
94bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
95bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
96bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
97bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
981da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
99bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
100a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks);
1011da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1021da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1031da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1041da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
105c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1061da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1091da177e4SLinus Torvalds enum {
1101da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1111da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1121da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1131da177e4SLinus Torvalds };
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
116a9dd3643SJeff Mahoney 			      struct super_block *sb,
1171da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1181da177e4SLinus Torvalds 
119a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb)
120bd4c625cSLinus Torvalds {
121a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
122bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
123bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1241da177e4SLinus Torvalds }
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds /*
1271da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1281da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1291da177e4SLinus Torvalds ** more details.
1301da177e4SLinus Torvalds */
131bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
132bd4c625cSLinus Torvalds {
1331da177e4SLinus Torvalds 	if (bh) {
1341da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1351da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1361da177e4SLinus Torvalds 	}
1371da177e4SLinus Torvalds 	return 0;
1381da177e4SLinus Torvalds }
1391da177e4SLinus Torvalds 
140bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
141a9dd3643SJeff Mahoney 							 *sb)
142bd4c625cSLinus Torvalds {
1431da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1441da177e4SLinus Torvalds 	static int id;
1451da177e4SLinus Torvalds 
146d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1471da177e4SLinus Torvalds 	if (!bn) {
1481da177e4SLinus Torvalds 		return NULL;
1491da177e4SLinus Torvalds 	}
150a9dd3643SJeff Mahoney 	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
1511da177e4SLinus Torvalds 	if (!bn->data) {
152d739b42bSPekka Enberg 		kfree(bn);
1531da177e4SLinus Torvalds 		return NULL;
1541da177e4SLinus Torvalds 	}
1551da177e4SLinus Torvalds 	bn->id = id++;
1561da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1571da177e4SLinus Torvalds 	return bn;
1581da177e4SLinus Torvalds }
1591da177e4SLinus Torvalds 
160a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
161bd4c625cSLinus Torvalds {
162a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1631da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1641da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1651da177e4SLinus Torvalds 
1661da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1671da177e4SLinus Torvalds       repeat:
1681da177e4SLinus Torvalds 
1691da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1701da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1711da177e4SLinus Torvalds 		list_del(entry);
172a9dd3643SJeff Mahoney 		memset(bn->data, 0, sb->s_blocksize);
1731da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1741da177e4SLinus Torvalds 		return bn;
1751da177e4SLinus Torvalds 	}
176a9dd3643SJeff Mahoney 	bn = allocate_bitmap_node(sb);
1771da177e4SLinus Torvalds 	if (!bn) {
1781da177e4SLinus Torvalds 		yield();
1791da177e4SLinus Torvalds 		goto repeat;
1801da177e4SLinus Torvalds 	}
1811da177e4SLinus Torvalds 	return bn;
1821da177e4SLinus Torvalds }
183a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb,
184bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
185bd4c625cSLinus Torvalds {
186a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1871da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1881da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
189d739b42bSPekka Enberg 		kfree(bn->data);
190d739b42bSPekka Enberg 		kfree(bn);
1911da177e4SLinus Torvalds 	} else {
1921da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
1931da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
1941da177e4SLinus Torvalds 	}
1951da177e4SLinus Torvalds }
1961da177e4SLinus Torvalds 
197a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb)
198bd4c625cSLinus Torvalds {
1991da177e4SLinus Torvalds 	int i;
200a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2011da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2021da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
203a9dd3643SJeff Mahoney 		bn = allocate_bitmap_node(sb);
2041da177e4SLinus Torvalds 		if (bn) {
2051da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2061da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2071da177e4SLinus Torvalds 		} else {
2080222e657SJeff Mahoney 			break;	/* this is ok, we'll try again when more are needed */
2091da177e4SLinus Torvalds 		}
2101da177e4SLinus Torvalds 	}
2111da177e4SLinus Torvalds }
2121da177e4SLinus Torvalds 
213a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb,
2143ee16670SJeff Mahoney 				  b_blocknr_t block,
215bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
216bd4c625cSLinus Torvalds {
217a9dd3643SJeff Mahoney 	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
218a9dd3643SJeff Mahoney 	unsigned int bit_nr = block % (sb->s_blocksize << 3);
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
221a9dd3643SJeff Mahoney 		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
2221da177e4SLinus Torvalds 	}
2231da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2241da177e4SLinus Torvalds 	return 0;
2251da177e4SLinus Torvalds }
2261da177e4SLinus Torvalds 
227a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb,
228bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
229bd4c625cSLinus Torvalds {
2301da177e4SLinus Torvalds 	int i;
2311da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2321da177e4SLinus Torvalds 		return;
2331da177e4SLinus Torvalds 
234a9dd3643SJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
2351da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
236a9dd3643SJeff Mahoney 			free_bitmap_node(sb, jb->bitmaps[i]);
2371da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2381da177e4SLinus Torvalds 		}
2391da177e4SLinus Torvalds 	}
2401da177e4SLinus Torvalds }
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds /*
2431da177e4SLinus Torvalds ** only call this on FS unmount.
2441da177e4SLinus Torvalds */
245a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb,
246bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
247bd4c625cSLinus Torvalds {
2481da177e4SLinus Torvalds 	int i;
2491da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2501da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2511da177e4SLinus Torvalds 		jb = jb_array + i;
2521da177e4SLinus Torvalds 		jb->journal_list = NULL;
253a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
2541da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2551da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2561da177e4SLinus Torvalds 	}
2571da177e4SLinus Torvalds 	return 0;
2581da177e4SLinus Torvalds }
2591da177e4SLinus Torvalds 
260a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb)
261bd4c625cSLinus Torvalds {
262a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
2631da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2641da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2671da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2681da177e4SLinus Torvalds 		list_del(next);
269d739b42bSPekka Enberg 		kfree(bn->data);
270d739b42bSPekka Enberg 		kfree(bn);
2711da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2721da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2731da177e4SLinus Torvalds 	}
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds 	return 0;
2761da177e4SLinus Torvalds }
2771da177e4SLinus Torvalds 
2781da177e4SLinus Torvalds /*
2791da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2801da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2811da177e4SLinus Torvalds */
282a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb,
2831da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2843ee16670SJeff Mahoney 				   unsigned int bmap_nr)
285bd4c625cSLinus Torvalds {
2861da177e4SLinus Torvalds 	int i;
2871da177e4SLinus Torvalds 	int failed = 0;
2881da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2891da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2921da177e4SLinus Torvalds 		jb = jb_array + i;
2931da177e4SLinus Torvalds 		jb->journal_list = NULL;
294558feb08SJoe Perches 		jb->bitmaps = vzalloc(mem);
2951da177e4SLinus Torvalds 		if (!jb->bitmaps) {
296a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2000", "unable to "
29745b03d5eSJeff Mahoney 					 "allocate bitmaps for journal lists");
2981da177e4SLinus Torvalds 			failed = 1;
2991da177e4SLinus Torvalds 			break;
3001da177e4SLinus Torvalds 		}
3011da177e4SLinus Torvalds 	}
3021da177e4SLinus Torvalds 	if (failed) {
303a9dd3643SJeff Mahoney 		free_list_bitmaps(sb, jb_array);
3041da177e4SLinus Torvalds 		return -1;
3051da177e4SLinus Torvalds 	}
3061da177e4SLinus Torvalds 	return 0;
3071da177e4SLinus Torvalds }
3081da177e4SLinus Torvalds 
3091da177e4SLinus Torvalds /*
3101da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3111da177e4SLinus Torvalds ** and try again
3121da177e4SLinus Torvalds */
313a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
314bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
315bd4c625cSLinus Torvalds 						    *jl)
316bd4c625cSLinus Torvalds {
3171da177e4SLinus Torvalds 	int i, j;
318a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3191da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3221da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3231da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3241da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3251da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
326a9dd3643SJeff Mahoney 			flush_commit_list(sb,
327bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
328bd4c625cSLinus Torvalds 					  journal_list, 1);
3291da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3301da177e4SLinus Torvalds 				break;
3311da177e4SLinus Torvalds 			}
3321da177e4SLinus Torvalds 		} else {
3331da177e4SLinus Torvalds 			break;
3341da177e4SLinus Torvalds 		}
3351da177e4SLinus Torvalds 	}
3361da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3371da177e4SLinus Torvalds 		return NULL;
3381da177e4SLinus Torvalds 	}
3391da177e4SLinus Torvalds 	jb->journal_list = jl;
3401da177e4SLinus Torvalds 	return jb;
3411da177e4SLinus Torvalds }
3421da177e4SLinus Torvalds 
3431da177e4SLinus Torvalds /*
3441da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3451da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3461da177e4SLinus Torvalds ** returns NULL on failure
3471da177e4SLinus Torvalds */
348bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
349bd4c625cSLinus Torvalds {
3501da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3511da177e4SLinus Torvalds 	int i;
3521da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3531da177e4SLinus Torvalds 		return NULL;
3541da177e4SLinus Torvalds 	}
355558feb08SJoe Perches 	head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3561da177e4SLinus Torvalds 	if (!head) {
3571da177e4SLinus Torvalds 		return NULL;
3581da177e4SLinus Torvalds 	}
3591da177e4SLinus Torvalds 	head[0].prev = NULL;
3601da177e4SLinus Torvalds 	head[0].next = head + 1;
3611da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3621da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3631da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3641da177e4SLinus Torvalds 	}
3651da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3661da177e4SLinus Torvalds 	return head;
3671da177e4SLinus Torvalds }
3681da177e4SLinus Torvalds 
3691da177e4SLinus Torvalds /*
3701da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3711da177e4SLinus Torvalds */
372a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
373bd4c625cSLinus Torvalds {
3741da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
375a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3761da177e4SLinus Torvalds 
377a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "get_cnode");
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3801da177e4SLinus Torvalds 		return NULL;
3811da177e4SLinus Torvalds 	}
3821da177e4SLinus Torvalds 	journal->j_cnode_used++;
3831da177e4SLinus Torvalds 	journal->j_cnode_free--;
3841da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3851da177e4SLinus Torvalds 	if (!cn) {
3861da177e4SLinus Torvalds 		return cn;
3871da177e4SLinus Torvalds 	}
3881da177e4SLinus Torvalds 	if (cn->next) {
3891da177e4SLinus Torvalds 		cn->next->prev = NULL;
3901da177e4SLinus Torvalds 	}
3911da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
3921da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
3931da177e4SLinus Torvalds 	return cn;
3941da177e4SLinus Torvalds }
3951da177e4SLinus Torvalds 
3961da177e4SLinus Torvalds /*
3971da177e4SLinus Torvalds ** returns a cnode to the free list
3981da177e4SLinus Torvalds */
399a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb,
400bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
401bd4c625cSLinus Torvalds {
402a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4031da177e4SLinus Torvalds 
404a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "free_cnode");
4051da177e4SLinus Torvalds 
4061da177e4SLinus Torvalds 	journal->j_cnode_used--;
4071da177e4SLinus Torvalds 	journal->j_cnode_free++;
4081da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4091da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4101da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4111da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4121da177e4SLinus Torvalds 	}
4131da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4141da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4151da177e4SLinus Torvalds }
4161da177e4SLinus Torvalds 
417bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
418bd4c625cSLinus Torvalds {
4191da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4201da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4211da177e4SLinus Torvalds }
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
424bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
425bd4c625cSLinus Torvalds 								  super_block
426bd4c625cSLinus Torvalds 								  *sb,
427bd4c625cSLinus Torvalds 								  struct
428bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
429bd4c625cSLinus Torvalds 								  **table,
4301da177e4SLinus Torvalds 								  long bl)
4311da177e4SLinus Torvalds {
4321da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4331da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4341da177e4SLinus Torvalds 	while (cn) {
4351da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4361da177e4SLinus Torvalds 			return cn;
4371da177e4SLinus Torvalds 		cn = cn->hnext;
4381da177e4SLinus Torvalds 	}
4391da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4401da177e4SLinus Torvalds }
4411da177e4SLinus Torvalds 
4421da177e4SLinus Torvalds /*
4431da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4441da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4451da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4461da177e4SLinus Torvalds **
4471da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4481da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4491da177e4SLinus Torvalds ** sure you never write the block without logging it.
4501da177e4SLinus Torvalds **
4511da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4521da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4531da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4541da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4551da177e4SLinus Torvalds **
4561da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4571da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4581da177e4SLinus Torvalds **
4591da177e4SLinus Torvalds */
460a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb,
4613ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
462bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
463bd4c625cSLinus Torvalds {
464a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
4651da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4661da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4671da177e4SLinus Torvalds 	int i;
4681da177e4SLinus Torvalds 	unsigned long bl;
4691da177e4SLinus Torvalds 
4701da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4711da177e4SLinus Torvalds 
472a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal);
4731da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
4741da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
4751da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
4761da177e4SLinus Torvalds 	 */
4771da177e4SLinus Torvalds 	if (search_all) {
4781da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
479a9dd3643SJeff Mahoney 			PROC_INFO_INC(sb, journal.in_journal_bitmap);
4801da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
4811da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
482bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
483bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
484bd4c625cSLinus Torvalds 				     data)) {
485bd4c625cSLinus Torvalds 				*next_zero_bit =
486bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
487bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
488bd4c625cSLinus Torvalds 							data),
489a9dd3643SJeff Mahoney 						       sb->s_blocksize << 3,
490bd4c625cSLinus Torvalds 						       bit_nr + 1);
4911da177e4SLinus Torvalds 				return 1;
4921da177e4SLinus Torvalds 			}
4931da177e4SLinus Torvalds 		}
4941da177e4SLinus Torvalds 	}
4951da177e4SLinus Torvalds 
496a9dd3643SJeff Mahoney 	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
4971da177e4SLinus Torvalds 	/* is it in any old transactions? */
498bd4c625cSLinus Torvalds 	if (search_all
499bd4c625cSLinus Torvalds 	    && (cn =
500a9dd3643SJeff Mahoney 		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
5011da177e4SLinus Torvalds 		return 1;
5021da177e4SLinus Torvalds 	}
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
505a9dd3643SJeff Mahoney 	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
5061da177e4SLinus Torvalds 		BUG();
5071da177e4SLinus Torvalds 		return 1;
5081da177e4SLinus Torvalds 	}
5091da177e4SLinus Torvalds 
510a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.in_journal_reusable);
5111da177e4SLinus Torvalds 	/* safe for reuse */
5121da177e4SLinus Torvalds 	return 0;
5131da177e4SLinus Torvalds }
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds /* insert cn into table
5161da177e4SLinus Torvalds */
517bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
518bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
519bd4c625cSLinus Torvalds {
5201da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5231da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5241da177e4SLinus Torvalds 	cn->hprev = NULL;
5251da177e4SLinus Torvalds 	if (cn_orig) {
5261da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5271da177e4SLinus Torvalds 	}
5281da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5291da177e4SLinus Torvalds }
5301da177e4SLinus Torvalds 
5311da177e4SLinus Torvalds /* lock the current transaction */
532a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb)
533bd4c625cSLinus Torvalds {
534a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.lock_journal);
5358ebc4232SFrederic Weisbecker 
5368ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
5371da177e4SLinus Torvalds }
5381da177e4SLinus Torvalds 
5391da177e4SLinus Torvalds /* unlock the current transaction */
540a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb)
541bd4c625cSLinus Torvalds {
542a9dd3643SJeff Mahoney 	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
5431da177e4SLinus Torvalds }
5441da177e4SLinus Torvalds 
5451da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5461da177e4SLinus Torvalds {
5471da177e4SLinus Torvalds 	jl->j_refcount++;
5481da177e4SLinus Torvalds }
5491da177e4SLinus Torvalds 
5501da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5511da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5521da177e4SLinus Torvalds {
5531da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
554c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
555bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
5561da177e4SLinus Torvalds 	}
5571da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
558d739b42bSPekka Enberg 		kfree(jl);
5591da177e4SLinus Torvalds }
5601da177e4SLinus Torvalds 
5611da177e4SLinus Torvalds /*
5621da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
5631da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
5641da177e4SLinus Torvalds ** transaction.
5651da177e4SLinus Torvalds */
566a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb,
567bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
568bd4c625cSLinus Torvalds {
5691da177e4SLinus Torvalds 
5701da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
5711da177e4SLinus Torvalds 	if (jb) {
572a9dd3643SJeff Mahoney 		cleanup_bitmap_list(sb, jb);
5731da177e4SLinus Torvalds 	}
5741da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
5751da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
5761da177e4SLinus Torvalds }
5771da177e4SLinus Torvalds 
5781da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
579600ed416SJeff Mahoney 				    unsigned int trans_id)
5801da177e4SLinus Torvalds {
5811da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
5821da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
5831da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
5841da177e4SLinus Torvalds 
5851da177e4SLinus Torvalds 	if (!list_empty(entry)) {
5861da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
5871da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
5881da177e4SLinus Torvalds 			return 1;
5891da177e4SLinus Torvalds 		}
5901da177e4SLinus Torvalds 	}
5911da177e4SLinus Torvalds 	return 0;
5921da177e4SLinus Torvalds }
5931da177e4SLinus Torvalds 
594398c95bdSChris Mason /*
595398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
596398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
597398c95bdSChris Mason  * logged via data=journal.
598398c95bdSChris Mason  *
599398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
600398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
601398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
602398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
603398c95bdSChris Mason  */
604398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
605398c95bdSChris Mason {
606398c95bdSChris Mason 	struct page *page = bh->b_page;
607529ae9aaSNick Piggin 	if (!page->mapping && trylock_page(page)) {
608398c95bdSChris Mason 		page_cache_get(page);
609398c95bdSChris Mason 		put_bh(bh);
610398c95bdSChris Mason 		if (!page->mapping)
611398c95bdSChris Mason 			try_to_free_buffers(page);
612398c95bdSChris Mason 		unlock_page(page);
613398c95bdSChris Mason 		page_cache_release(page);
614398c95bdSChris Mason 	} else {
615398c95bdSChris Mason 		put_bh(bh);
616398c95bdSChris Mason 	}
617398c95bdSChris Mason }
618398c95bdSChris Mason 
619bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
620bd4c625cSLinus Torvalds {
6211da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
62445b03d5eSJeff Mahoney 		reiserfs_warning(NULL, "clm-2084",
62545b03d5eSJeff Mahoney 				 "pinned buffer %lu:%s sent to disk",
6261da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6271da177e4SLinus Torvalds 	}
6281da177e4SLinus Torvalds 	if (uptodate)
6291da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6301da177e4SLinus Torvalds 	else
6311da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
632398c95bdSChris Mason 
6331da177e4SLinus Torvalds 	unlock_buffer(bh);
634398c95bdSChris Mason 	release_buffer_page(bh);
6351da177e4SLinus Torvalds }
6361da177e4SLinus Torvalds 
637bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
638bd4c625cSLinus Torvalds {
6391da177e4SLinus Torvalds 	if (uptodate)
6401da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6411da177e4SLinus Torvalds 	else
6421da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6431da177e4SLinus Torvalds 	unlock_buffer(bh);
6441da177e4SLinus Torvalds 	put_bh(bh);
6451da177e4SLinus Torvalds }
6461da177e4SLinus Torvalds 
647bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
648bd4c625cSLinus Torvalds {
6491da177e4SLinus Torvalds 	get_bh(bh);
6501da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6511da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6521da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6531da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6541da177e4SLinus Torvalds 		BUG();
6551da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6561da177e4SLinus Torvalds 		BUG();
6571da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6581da177e4SLinus Torvalds }
6591da177e4SLinus Torvalds 
660bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
661bd4c625cSLinus Torvalds {
6621da177e4SLinus Torvalds 	get_bh(bh);
6631da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6641da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6651da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6661da177e4SLinus Torvalds 		BUG();
6671da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6681da177e4SLinus Torvalds }
6691da177e4SLinus Torvalds 
6701da177e4SLinus Torvalds #define CHUNK_SIZE 32
6711da177e4SLinus Torvalds struct buffer_chunk {
6721da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
6731da177e4SLinus Torvalds 	int nr;
6741da177e4SLinus Torvalds };
6751da177e4SLinus Torvalds 
676bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
677bd4c625cSLinus Torvalds {
6781da177e4SLinus Torvalds 	int i;
6791da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6801da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
6811da177e4SLinus Torvalds 	}
6821da177e4SLinus Torvalds 	chunk->nr = 0;
6831da177e4SLinus Torvalds }
6841da177e4SLinus Torvalds 
685bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
686bd4c625cSLinus Torvalds {
6871da177e4SLinus Torvalds 	int i;
6881da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
6891da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
6901da177e4SLinus Torvalds 	}
6911da177e4SLinus Torvalds 	chunk->nr = 0;
6921da177e4SLinus Torvalds }
6931da177e4SLinus Torvalds 
6941da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
695bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
6961da177e4SLinus Torvalds {
6971da177e4SLinus Torvalds 	int ret = 0;
69814a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
6991da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
7001da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7011da177e4SLinus Torvalds 		ret = 1;
7021da177e4SLinus Torvalds 		if (lock)
7031da177e4SLinus Torvalds 			spin_unlock(lock);
7041da177e4SLinus Torvalds 		fn(chunk);
7051da177e4SLinus Torvalds 		if (lock)
7061da177e4SLinus Torvalds 			spin_lock(lock);
7071da177e4SLinus Torvalds 	}
7081da177e4SLinus Torvalds 	return ret;
7091da177e4SLinus Torvalds }
7101da177e4SLinus Torvalds 
7111da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
712bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
713bd4c625cSLinus Torvalds {
7141da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7151da177e4SLinus Torvalds 	while (1) {
7161da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7171da177e4SLinus Torvalds 		if (jh) {
7181da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7191da177e4SLinus Torvalds 			return jh;
7201da177e4SLinus Torvalds 		}
7211da177e4SLinus Torvalds 		yield();
7221da177e4SLinus Torvalds 	}
7231da177e4SLinus Torvalds }
7241da177e4SLinus Torvalds 
7251da177e4SLinus Torvalds /*
7261da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
7271da177e4SLinus Torvalds  * and waited on
7281da177e4SLinus Torvalds  */
729bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
730bd4c625cSLinus Torvalds {
7311da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7321da177e4SLinus Torvalds 
7331da177e4SLinus Torvalds 	jh = bh->b_private;
7341da177e4SLinus Torvalds 	if (jh) {
7351da177e4SLinus Torvalds 		bh->b_private = NULL;
7361da177e4SLinus Torvalds 		jh->bh = NULL;
7371da177e4SLinus Torvalds 		list_del_init(&jh->list);
7381da177e4SLinus Torvalds 		kfree(jh);
7391da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
7401da177e4SLinus Torvalds 			BUG();
7411da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
7421da177e4SLinus Torvalds 		put_bh(bh);
7431da177e4SLinus Torvalds 	}
7441da177e4SLinus Torvalds }
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
7471da177e4SLinus Torvalds 			   int tail)
7481da177e4SLinus Torvalds {
7491da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7501da177e4SLinus Torvalds 
7511da177e4SLinus Torvalds 	if (bh->b_private) {
7521da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7531da177e4SLinus Torvalds 		if (!bh->b_private) {
7541da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
7551da177e4SLinus Torvalds 			goto no_jh;
7561da177e4SLinus Torvalds 		}
7571da177e4SLinus Torvalds 		jh = bh->b_private;
7581da177e4SLinus Torvalds 		list_del_init(&jh->list);
7591da177e4SLinus Torvalds 	} else {
7601da177e4SLinus Torvalds 	      no_jh:
7611da177e4SLinus Torvalds 		get_bh(bh);
7621da177e4SLinus Torvalds 		jh = alloc_jh();
7631da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
7641da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
7651da177e4SLinus Torvalds 		 * two adds at the same time
7661da177e4SLinus Torvalds 		 */
76714a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
7681da177e4SLinus Torvalds 		jh->bh = bh;
7691da177e4SLinus Torvalds 		bh->b_private = jh;
7701da177e4SLinus Torvalds 	}
7711da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
7721da177e4SLinus Torvalds 	if (tail)
7731da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
7741da177e4SLinus Torvalds 	else {
7751da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
7761da177e4SLinus Torvalds 	}
7771da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
7781da177e4SLinus Torvalds 	return 0;
7791da177e4SLinus Torvalds }
7801da177e4SLinus Torvalds 
781bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
782bd4c625cSLinus Torvalds {
7831da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
7841da177e4SLinus Torvalds }
785bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
786bd4c625cSLinus Torvalds {
7871da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
7881da177e4SLinus Torvalds }
7891da177e4SLinus Torvalds 
7901da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
7911da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
7921da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
7931da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
7941da177e4SLinus Torvalds 				 struct list_head *list)
7951da177e4SLinus Torvalds {
7961da177e4SLinus Torvalds 	struct buffer_head *bh;
7971da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7981da177e4SLinus Torvalds 	int ret = j->j_errno;
7991da177e4SLinus Torvalds 	struct buffer_chunk chunk;
8001da177e4SLinus Torvalds 	struct list_head tmp;
8011da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8021da177e4SLinus Torvalds 
8031da177e4SLinus Torvalds 	chunk.nr = 0;
8041da177e4SLinus Torvalds 	spin_lock(lock);
8051da177e4SLinus Torvalds 	while (!list_empty(list)) {
8061da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8071da177e4SLinus Torvalds 		bh = jh->bh;
8081da177e4SLinus Torvalds 		get_bh(bh);
809ca5de404SNick Piggin 		if (!trylock_buffer(bh)) {
8101da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
811f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8121da177e4SLinus Torvalds 				goto loop_next;
8131da177e4SLinus Torvalds 			}
8141da177e4SLinus Torvalds 			spin_unlock(lock);
8151da177e4SLinus Torvalds 			if (chunk.nr)
8161da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8171da177e4SLinus Torvalds 			wait_on_buffer(bh);
8181da177e4SLinus Torvalds 			cond_resched();
8191da177e4SLinus Torvalds 			spin_lock(lock);
8201da177e4SLinus Torvalds 			goto loop_next;
8211da177e4SLinus Torvalds 		}
8223d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8233d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8243d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8253d4492f8SChris Mason 		 */
8263d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
8273d4492f8SChris Mason 			clear_buffer_dirty(bh);
8283d4492f8SChris Mason 			ret = -EIO;
8293d4492f8SChris Mason 		}
8301da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
831f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
8321da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
8331da177e4SLinus Torvalds 		} else {
8341da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
8351da177e4SLinus Torvalds 			unlock_buffer(bh);
8361da177e4SLinus Torvalds 		}
8371da177e4SLinus Torvalds 	      loop_next:
8381da177e4SLinus Torvalds 		put_bh(bh);
8391da177e4SLinus Torvalds 		cond_resched_lock(lock);
8401da177e4SLinus Torvalds 	}
8411da177e4SLinus Torvalds 	if (chunk.nr) {
8421da177e4SLinus Torvalds 		spin_unlock(lock);
8431da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
8441da177e4SLinus Torvalds 		spin_lock(lock);
8451da177e4SLinus Torvalds 	}
8461da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
8471da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
8481da177e4SLinus Torvalds 		bh = jh->bh;
8491da177e4SLinus Torvalds 		get_bh(bh);
8501da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
8511da177e4SLinus Torvalds 
8521da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
8531da177e4SLinus Torvalds 			spin_unlock(lock);
8541da177e4SLinus Torvalds 			wait_on_buffer(bh);
8551da177e4SLinus Torvalds 			spin_lock(lock);
8561da177e4SLinus Torvalds 		}
8571da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
8581da177e4SLinus Torvalds 			ret = -EIO;
8591da177e4SLinus Torvalds 		}
860d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
861d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
862d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
863d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
864d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
865d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
866d62b1b87SChris Mason 		 * after freeing the journal header.
867d62b1b87SChris Mason 		 */
868d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
869d62b1b87SChris Mason 			spin_unlock(lock);
870d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
871d62b1b87SChris Mason 			spin_lock(lock);
872d62b1b87SChris Mason 		}
8731da177e4SLinus Torvalds 		put_bh(bh);
8741da177e4SLinus Torvalds 		cond_resched_lock(lock);
8751da177e4SLinus Torvalds 	}
8761da177e4SLinus Torvalds 	spin_unlock(lock);
8771da177e4SLinus Torvalds 	return ret;
8781da177e4SLinus Torvalds }
8791da177e4SLinus Torvalds 
880bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
881bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
882bd4c625cSLinus Torvalds {
8831da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
8841da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
8851da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
8861da177e4SLinus Torvalds 	struct list_head *entry;
887600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
888600ed416SJeff Mahoney 	unsigned int other_trans_id;
889600ed416SJeff Mahoney 	unsigned int first_trans_id;
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds       find_first:
8921da177e4SLinus Torvalds 	/*
8931da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
8941da177e4SLinus Torvalds 	 */
8951da177e4SLinus Torvalds 	first_jl = jl;
8961da177e4SLinus Torvalds 	entry = jl->j_list.prev;
8971da177e4SLinus Torvalds 	while (1) {
8981da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
8991da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
9001da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9011da177e4SLinus Torvalds 			break;
9021da177e4SLinus Torvalds 
9031da177e4SLinus Torvalds 		first_jl = other_jl;
9041da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9051da177e4SLinus Torvalds 	}
9061da177e4SLinus Torvalds 
9071da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9081da177e4SLinus Torvalds 	if (first_jl == jl) {
9091da177e4SLinus Torvalds 		return 0;
9101da177e4SLinus Torvalds 	}
9111da177e4SLinus Torvalds 
9121da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9131da177e4SLinus Torvalds 
9141da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9151da177e4SLinus Torvalds 	while (1) {
9161da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9171da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9181da177e4SLinus Torvalds 
9191da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9201da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9211da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9221da177e4SLinus Torvalds 
9231da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9241da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9251da177e4SLinus Torvalds 					return 1;
9261da177e4SLinus Torvalds 
9271da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
9281da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
9291da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
9301da177e4SLinus Torvalds 				 */
931bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
932bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
9331da177e4SLinus Torvalds 					goto find_first;
9341da177e4SLinus Torvalds 				}
9351da177e4SLinus Torvalds 			}
9361da177e4SLinus Torvalds 			entry = entry->next;
9371da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
9381da177e4SLinus Torvalds 				return 0;
9391da177e4SLinus Torvalds 		} else {
9401da177e4SLinus Torvalds 			return 0;
9411da177e4SLinus Torvalds 		}
9421da177e4SLinus Torvalds 	}
9431da177e4SLinus Torvalds 	return 0;
9441da177e4SLinus Torvalds }
945deba0f49SAdrian Bunk 
946deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
947bd4c625cSLinus Torvalds {
9481da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
9498ebc4232SFrederic Weisbecker 
9508ebc4232SFrederic Weisbecker 	if (atomic_read(&j->j_async_throttle)) {
9518ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
9528aa7e847SJens Axboe 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
9538ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
9548ebc4232SFrederic Weisbecker 	}
9558ebc4232SFrederic Weisbecker 
9561da177e4SLinus Torvalds 	return 0;
9571da177e4SLinus Torvalds }
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds /*
9601da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
9611da177e4SLinus Torvalds **
9621da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
9631da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
9641da177e4SLinus Torvalds **
9651da177e4SLinus Torvalds */
966bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
967bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
968bd4c625cSLinus Torvalds {
9691da177e4SLinus Torvalds 	int i;
9703ee16670SJeff Mahoney 	b_blocknr_t bn;
9711da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
972600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
9731da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9741da177e4SLinus Torvalds 	int retval = 0;
975e0e851cfSChris Mason 	int write_len;
9761da177e4SLinus Torvalds 
9771da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
9781da177e4SLinus Torvalds 
9791da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
9801da177e4SLinus Torvalds 		return 0;
9811da177e4SLinus Torvalds 	}
9821da177e4SLinus Torvalds 
9831da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
9841da177e4SLinus Torvalds 	 ** us is on disk too
9851da177e4SLinus Torvalds 	 */
9861da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
9871da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
9881da177e4SLinus Torvalds 
9891da177e4SLinus Torvalds 	get_journal_list(jl);
9901da177e4SLinus Torvalds 	if (flushall) {
9911da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
9921da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
9931da177e4SLinus Torvalds 			goto put_jl;
9941da177e4SLinus Torvalds 		}
9951da177e4SLinus Torvalds 	}
9961da177e4SLinus Torvalds 
9971da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
9988ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
9998ebc4232SFrederic Weisbecker 
10001da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
100190415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10021da177e4SLinus Torvalds 		goto put_jl;
10031da177e4SLinus Torvalds 	}
10041da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds 	/* this commit is done, exit */
10071da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10081da177e4SLinus Torvalds 		if (flushall) {
10091da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10101da177e4SLinus Torvalds 		}
101190415deaSJeff Mahoney 		mutex_unlock(&jl->j_commit_mutex);
10121da177e4SLinus Torvalds 		goto put_jl;
10131da177e4SLinus Torvalds 	}
10141da177e4SLinus Torvalds 
10151da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10163d4492f8SChris Mason 		int ret;
10178ebc4232SFrederic Weisbecker 
10188ebc4232SFrederic Weisbecker 		/*
10198ebc4232SFrederic Weisbecker 		 * We might sleep in numerous places inside
10208ebc4232SFrederic Weisbecker 		 * write_ordered_buffers. Relax the write lock.
10218ebc4232SFrederic Weisbecker 		 */
10228ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10233d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
10241da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
10253d4492f8SChris Mason 		if (ret < 0 && retval == 0)
10263d4492f8SChris Mason 			retval = ret;
10278ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10281da177e4SLinus Torvalds 	}
10291da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
10301da177e4SLinus Torvalds 	/*
10311da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1032e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1033e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1034e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1035e0e851cfSChris Mason 	 * get a chunk of data on there.
10361da177e4SLinus Torvalds 	 */
10371da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1038e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1039e0e851cfSChris Mason 	if (write_len < 256)
1040e0e851cfSChris Mason 		write_len = 256;
1041e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
10421da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
10431da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
10441da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1045e0e851cfSChris Mason 		if (tbh) {
10466e3647acSFrederic Weisbecker 			if (buffer_dirty(tbh)) {
10476e3647acSFrederic Weisbecker 		            reiserfs_write_unlock(s);
1048e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh);
10496e3647acSFrederic Weisbecker 			    reiserfs_write_lock(s);
10506e3647acSFrederic Weisbecker 			}
10511da177e4SLinus Torvalds 			put_bh(tbh) ;
10521da177e4SLinus Torvalds 		}
1053e0e851cfSChris Mason 	}
10541da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
10551da177e4SLinus Torvalds 
10561da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
10571da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
10581da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
10591da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
10608ebc4232SFrederic Weisbecker 
10618ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10621da177e4SLinus Torvalds 		wait_on_buffer(tbh);
10638ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
10641da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
10651da177e4SLinus Torvalds 		// a locked buffer.  Double check here
10661da177e4SLinus Torvalds 		//
10678ebc4232SFrederic Weisbecker 		/* redundant, sync_dirty_buffer() checks */
10688ebc4232SFrederic Weisbecker 		if (buffer_dirty(tbh)) {
10698ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(s);
10701da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
10718ebc4232SFrederic Weisbecker 			reiserfs_write_lock(s);
10728ebc4232SFrederic Weisbecker 		}
10731da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
10741da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
107545b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-601",
107645b03d5eSJeff Mahoney 					 "buffer write failed");
10771da177e4SLinus Torvalds #endif
10781da177e4SLinus Torvalds 			retval = -EIO;
10791da177e4SLinus Torvalds 		}
10801da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
10811da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
10821da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
10831da177e4SLinus Torvalds 	}
10841da177e4SLinus Torvalds 
10851da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
10861da177e4SLinus Torvalds 
10875d5e8156SJeff Mahoney 	/* If there was a write error in the journal - we can't commit
10885d5e8156SJeff Mahoney 	 * this transaction - it will be invalid and, if successful,
1089beb7dd86SRobert P. J. Day 	 * will just end up propagating the write error out to
10905d5e8156SJeff Mahoney 	 * the file system. */
10915d5e8156SJeff Mahoney 	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
10921da177e4SLinus Torvalds 		if (buffer_dirty(jl->j_commit_bh))
10931da177e4SLinus Torvalds 			BUG();
10941da177e4SLinus Torvalds 		mark_buffer_dirty(jl->j_commit_bh) ;
10958ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
10967cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(s))
10977cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
10987cd33ad2SChristoph Hellwig 		else
10991da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh);
11008ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
11015d5e8156SJeff Mahoney 	}
11021da177e4SLinus Torvalds 
11031da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
11041da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1105beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
11061da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
11071da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
110845b03d5eSJeff Mahoney 		reiserfs_warning(s, "journal-615", "buffer write failed");
11091da177e4SLinus Torvalds #endif
11101da177e4SLinus Torvalds 		retval = -EIO;
11111da177e4SLinus Torvalds 	}
11121da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
11131da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
11141da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
111545b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1116bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
11171da177e4SLinus Torvalds 	}
11181da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
11191da177e4SLinus Torvalds 
11201da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
11211da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
11221da177e4SLinus Torvalds 
11231da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
11241da177e4SLinus Torvalds 
11251da177e4SLinus Torvalds 	/* mark the metadata dirty */
11261da177e4SLinus Torvalds 	if (!retval)
11271da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
11281da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
11291da177e4SLinus Torvalds 
11301da177e4SLinus Torvalds 	if (flushall) {
11311da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
11321da177e4SLinus Torvalds 	}
113390415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
11341da177e4SLinus Torvalds       put_jl:
11351da177e4SLinus Torvalds 	put_journal_list(s, jl);
11361da177e4SLinus Torvalds 
11371da177e4SLinus Torvalds 	if (retval)
1138bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1139fbe5498bSHarvey Harrison 			       __func__);
11401da177e4SLinus Torvalds 	return retval;
11411da177e4SLinus Torvalds }
11421da177e4SLinus Torvalds 
11431da177e4SLinus Torvalds /*
11441da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
11451da177e4SLinus Torvalds ** returns NULL if it can't find anything
11461da177e4SLinus Torvalds */
1147bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1148bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1149bd4c625cSLinus Torvalds 							  *cn)
1150bd4c625cSLinus Torvalds {
11511da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
11521da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
11531da177e4SLinus Torvalds 
11541da177e4SLinus Torvalds 	cn = cn->hprev;
11551da177e4SLinus Torvalds 	while (cn) {
11561da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
11571da177e4SLinus Torvalds 			return cn->jlist;
11581da177e4SLinus Torvalds 		}
11591da177e4SLinus Torvalds 		cn = cn->hprev;
11601da177e4SLinus Torvalds 	}
11611da177e4SLinus Torvalds 	return NULL;
11621da177e4SLinus Torvalds }
11631da177e4SLinus Torvalds 
1164a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1165a3172027SChris Mason {
1166a3172027SChris Mason 	struct super_block *sb = cn->sb;
1167a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1168a3172027SChris Mason 
1169a3172027SChris Mason 	cn = cn->hprev;
1170a3172027SChris Mason 	while (cn) {
1171a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1172a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1173a3172027SChris Mason 				    return 0;
1174a3172027SChris Mason 		cn = cn->hprev;
1175a3172027SChris Mason 	}
1176a3172027SChris Mason 	return 1;
1177a3172027SChris Mason }
1178a3172027SChris Mason 
1179bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1180bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1181bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1182bd4c625cSLinus Torvalds 				int);
11831da177e4SLinus Torvalds 
11841da177e4SLinus Torvalds /*
11851da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
11861da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
11871da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
11881da177e4SLinus Torvalds */
1189a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb,
1190bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1191bd4c625cSLinus Torvalds 					 int debug)
1192bd4c625cSLinus Torvalds {
1193a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
11941da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
11951da177e4SLinus Torvalds 	cn = jl->j_realblock;
11961da177e4SLinus Torvalds 
11971da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
11981da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
11991da177e4SLinus Torvalds 	 */
12001da177e4SLinus Torvalds 	while (cn) {
12011da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
12021da177e4SLinus Torvalds 			if (debug) {
1203a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2201",
1204bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1205bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1206bd4c625cSLinus Torvalds 						 cn->state);
12071da177e4SLinus Torvalds 			}
12081da177e4SLinus Torvalds 			cn->state = 0;
1209a9dd3643SJeff Mahoney 			remove_journal_hash(sb, journal->j_list_hash_table,
1210bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
12111da177e4SLinus Torvalds 		}
12121da177e4SLinus Torvalds 		last = cn;
12131da177e4SLinus Torvalds 		cn = cn->next;
1214a9dd3643SJeff Mahoney 		free_cnode(sb, last);
12151da177e4SLinus Torvalds 	}
12161da177e4SLinus Torvalds 	jl->j_realblock = NULL;
12171da177e4SLinus Torvalds }
12181da177e4SLinus Torvalds 
12191da177e4SLinus Torvalds /*
12201da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
12211da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
12221da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
12231da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
12241da177e4SLinus Torvalds **
12251da177e4SLinus Torvalds */
1226a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb,
1227bd4c625cSLinus Torvalds 					unsigned long offset,
1228600ed416SJeff Mahoney 					unsigned int trans_id)
1229bd4c625cSLinus Torvalds {
12301da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
1231a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
12321da177e4SLinus Torvalds 
12331da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
12341da177e4SLinus Torvalds 		return -EIO;
12351da177e4SLinus Torvalds 
12361da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
12371da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
12388ebc4232SFrederic Weisbecker 			reiserfs_write_unlock(sb);
12391da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
12408ebc4232SFrederic Weisbecker 			reiserfs_write_lock(sb);
12411da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
12421da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1243a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "journal-699",
124445b03d5eSJeff Mahoney 						 "buffer write failed");
12451da177e4SLinus Torvalds #endif
12461da177e4SLinus Torvalds 				return -EIO;
12471da177e4SLinus Torvalds 			}
12481da177e4SLinus Torvalds 		}
12491da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
12501da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1251bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1252bd4c625cSLinus Torvalds 							b_data);
12531da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
12541da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
12551da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
12561da177e4SLinus Torvalds 
12571da177e4SLinus Torvalds 		set_buffer_dirty(journal->j_header_bh);
12588ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
12597cd33ad2SChristoph Hellwig 
12607cd33ad2SChristoph Hellwig 		if (reiserfs_barrier_flush(sb))
12617cd33ad2SChristoph Hellwig 			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
12627cd33ad2SChristoph Hellwig 		else
12631da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
12647cd33ad2SChristoph Hellwig 
12658ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
12661da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1267a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-837",
126845b03d5eSJeff Mahoney 					 "IO error during journal replay");
12691da177e4SLinus Torvalds 			return -EIO;
12701da177e4SLinus Torvalds 		}
12711da177e4SLinus Torvalds 	}
12721da177e4SLinus Torvalds 	return 0;
12731da177e4SLinus Torvalds }
12741da177e4SLinus Torvalds 
1275a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb,
12761da177e4SLinus Torvalds 				       unsigned long offset,
1277600ed416SJeff Mahoney 				       unsigned int trans_id)
1278bd4c625cSLinus Torvalds {
1279a9dd3643SJeff Mahoney 	return _update_journal_header_block(sb, offset, trans_id);
12801da177e4SLinus Torvalds }
1281bd4c625cSLinus Torvalds 
12821da177e4SLinus Torvalds /*
12831da177e4SLinus Torvalds ** flush any and all journal lists older than you are
12841da177e4SLinus Torvalds ** can only be called from flush_journal_list
12851da177e4SLinus Torvalds */
1286a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb,
12871da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
12881da177e4SLinus Torvalds {
12891da177e4SLinus Torvalds 	struct list_head *entry;
12901da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
1291a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1292600ed416SJeff Mahoney 	unsigned int trans_id = jl->j_trans_id;
12931da177e4SLinus Torvalds 
12941da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
12951da177e4SLinus Torvalds 	 * protection is required.
12961da177e4SLinus Torvalds 	 */
12971da177e4SLinus Torvalds       restart:
12981da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
12991da177e4SLinus Torvalds 	/* Did we wrap? */
13001da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
13011da177e4SLinus Torvalds 		return 0;
13021da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
13031da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
13041da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
13051da177e4SLinus Torvalds 		/* do not flush all */
1306a9dd3643SJeff Mahoney 		flush_journal_list(sb, other_jl, 0);
13071da177e4SLinus Torvalds 
13081da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
13091da177e4SLinus Torvalds 		goto restart;
13101da177e4SLinus Torvalds 	}
13111da177e4SLinus Torvalds 	return 0;
13121da177e4SLinus Torvalds }
13131da177e4SLinus Torvalds 
13141da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1315bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1316bd4c625cSLinus Torvalds {
13171da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13181da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
13191da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
13201da177e4SLinus Torvalds 		journal->j_num_work_lists--;
13211da177e4SLinus Torvalds 	}
13221da177e4SLinus Torvalds }
13231da177e4SLinus Torvalds 
13241da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
13251da177e4SLinus Torvalds **
13261da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
13271da177e4SLinus Torvalds ** flush_journal_list
13281da177e4SLinus Torvalds **
13291da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
13301da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
13311da177e4SLinus Torvalds ** do_journal_end, or by journal_release
13321da177e4SLinus Torvalds */
13331da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1334bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1335bd4c625cSLinus Torvalds {
13361da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
13371da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
13381da177e4SLinus Torvalds 	int count;
13391da177e4SLinus Torvalds 	int was_jwait = 0;
13401da177e4SLinus Torvalds 	int was_dirty = 0;
13411da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
13421da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
13431da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13441da177e4SLinus Torvalds 	int err = 0;
13451da177e4SLinus Torvalds 
13461da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
13471da177e4SLinus Torvalds 
13481da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
134945b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2048", "called with wcount %d",
13501da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
13511da177e4SLinus Torvalds 	}
13521da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
13531da177e4SLinus Torvalds 
13541da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
13551da177e4SLinus Torvalds 	if (flushall) {
13568ebc4232SFrederic Weisbecker 		reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1357afe70259SJeff Mahoney 	} else if (mutex_trylock(&journal->j_flush_mutex)) {
13581da177e4SLinus Torvalds 		BUG();
13591da177e4SLinus Torvalds 	}
13601da177e4SLinus Torvalds 
13611da177e4SLinus Torvalds 	count = 0;
13621da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1363c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1364bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
13651da177e4SLinus Torvalds 		return 0;
13661da177e4SLinus Torvalds 	}
13671da177e4SLinus Torvalds 
13681da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
13691da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13701da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13711da177e4SLinus Torvalds 		goto flush_older_and_return;
13721da177e4SLinus Torvalds 	}
13731da177e4SLinus Torvalds 
13741da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
13751da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
13761da177e4SLinus Torvalds 	 */
13771da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
13781da177e4SLinus Torvalds 
1379bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1380bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
13811da177e4SLinus Torvalds 		BUG();
13821da177e4SLinus Torvalds 
13831da177e4SLinus Torvalds 	/* are we done now? */
13841da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
13851da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
13861da177e4SLinus Torvalds 		goto flush_older_and_return;
13871da177e4SLinus Torvalds 	}
13881da177e4SLinus Torvalds 
13891da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
13901da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
13911da177e4SLinus Torvalds 	 */
13921da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1393c3a9c210SJeff Mahoney 		reiserfs_panic(s, "journal-844", "journal list is flushing, "
1394c3a9c210SJeff Mahoney 			       "wcount is not 0");
13951da177e4SLinus Torvalds 	}
13961da177e4SLinus Torvalds 	cn = jl->j_realblock;
13971da177e4SLinus Torvalds 	while (cn) {
13981da177e4SLinus Torvalds 		was_jwait = 0;
13991da177e4SLinus Torvalds 		was_dirty = 0;
14001da177e4SLinus Torvalds 		saved_bh = NULL;
14011da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
14021da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
14031da177e4SLinus Torvalds 			goto free_cnode;
14041da177e4SLinus Torvalds 		}
14051da177e4SLinus Torvalds 
14061da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
14071da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
14081da177e4SLinus Torvalds 			goto free_cnode;
14091da177e4SLinus Torvalds 
14101da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
14111da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
14121da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
14131da177e4SLinus Torvalds 		 ** block to disk
14141da177e4SLinus Torvalds 		 */
14151da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
14161da177e4SLinus Torvalds 			saved_bh = cn->bh;
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
14191da177e4SLinus Torvalds 			 ** we are working with it
14201da177e4SLinus Torvalds 			 */
14211da177e4SLinus Torvalds 			get_bh(saved_bh);
14221da177e4SLinus Torvalds 
14231da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
14241da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
14251da177e4SLinus Torvalds 				was_jwait = 1;
14261da177e4SLinus Torvalds 				was_dirty = 1;
14271da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
14281da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
14291da177e4SLinus Torvalds 				BUG();
14301da177e4SLinus Torvalds 			}
14311da177e4SLinus Torvalds 		}
14321da177e4SLinus Torvalds 
14331da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
14340779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
14351da177e4SLinus Torvalds 		 */
14361da177e4SLinus Torvalds 		if (pjl) {
14371da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
14381da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
14391da177e4SLinus Torvalds 			goto free_cnode;
14401da177e4SLinus Torvalds 		}
14411da177e4SLinus Torvalds 
14421da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
14431da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
14441da177e4SLinus Torvalds 		 */
14451da177e4SLinus Torvalds 		if (saved_bh == NULL) {
14461da177e4SLinus Torvalds 			goto free_cnode;
14471da177e4SLinus Torvalds 		}
14481da177e4SLinus Torvalds 
14491da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
14501da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
14511da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
14521da177e4SLinus Torvalds 		 */
14531da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
145445b03d5eSJeff Mahoney 			reiserfs_warning(s, "journal-813",
145545b03d5eSJeff Mahoney 					 "BAD! buffer %llu %cdirty %cjwait, "
14561da177e4SLinus Torvalds 					 "not in a newer tranasction",
1457bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1458bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1459bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
14601da177e4SLinus Torvalds 		}
14611da177e4SLinus Torvalds 		if (was_dirty) {
14621da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
14631da177e4SLinus Torvalds 			get_bh(saved_bh);
14641da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
14651da177e4SLinus Torvalds 			lock_buffer(saved_bh);
14661da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
14671da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
14681da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
14691da177e4SLinus Torvalds 			else
14701da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
14711da177e4SLinus Torvalds 			count++;
14721da177e4SLinus Torvalds 		} else {
147345b03d5eSJeff Mahoney 			reiserfs_warning(s, "clm-2082",
147445b03d5eSJeff Mahoney 					 "Unable to flush buffer %llu in %s",
1475bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1476fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
14771da177e4SLinus Torvalds 		}
14781da177e4SLinus Torvalds 	      free_cnode:
14791da177e4SLinus Torvalds 		last = cn;
14801da177e4SLinus Torvalds 		cn = cn->next;
14811da177e4SLinus Torvalds 		if (saved_bh) {
14821da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
14831da177e4SLinus Torvalds 			put_bh(saved_bh);
14841da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
148545b03d5eSJeff Mahoney 				reiserfs_warning(s, "journal-945",
148645b03d5eSJeff Mahoney 						 "saved_bh->b_count < 0");
14871da177e4SLinus Torvalds 			}
14881da177e4SLinus Torvalds 		}
14891da177e4SLinus Torvalds 	}
14901da177e4SLinus Torvalds 	if (count > 0) {
14911da177e4SLinus Torvalds 		cn = jl->j_realblock;
14921da177e4SLinus Torvalds 		while (cn) {
14931da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
14941da177e4SLinus Torvalds 				if (!cn->bh) {
1495c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1011",
1496c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
14971da177e4SLinus Torvalds 				}
14988ebc4232SFrederic Weisbecker 
14998ebc4232SFrederic Weisbecker 				reiserfs_write_unlock(s);
15001da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
15018ebc4232SFrederic Weisbecker 				reiserfs_write_lock(s);
15028ebc4232SFrederic Weisbecker 
15031da177e4SLinus Torvalds 				if (!cn->bh) {
1504c3a9c210SJeff Mahoney 					reiserfs_panic(s, "journal-1012",
1505c3a9c210SJeff Mahoney 						       "cn->bh is NULL");
15061da177e4SLinus Torvalds 				}
15071da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
15081da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
150945b03d5eSJeff Mahoney 					reiserfs_warning(s, "journal-949",
151045b03d5eSJeff Mahoney 							 "buffer write failed");
15111da177e4SLinus Torvalds #endif
15121da177e4SLinus Torvalds 					err = -EIO;
15131da177e4SLinus Torvalds 				}
15141da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
15151da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
15161da177e4SLinus Torvalds 				 */
1517bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1518bd4c625cSLinus Torvalds 				       (cn->bh));
15191da177e4SLinus Torvalds 
1520398c95bdSChris Mason 				/* drop one ref for us */
15211da177e4SLinus Torvalds 				put_bh(cn->bh);
1522398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1523398c95bdSChris Mason 				release_buffer_page(cn->bh);
15241da177e4SLinus Torvalds 			}
15251da177e4SLinus Torvalds 			cn = cn->next;
15261da177e4SLinus Torvalds 		}
15271da177e4SLinus Torvalds 	}
15281da177e4SLinus Torvalds 
15291da177e4SLinus Torvalds 	if (err)
1530bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1531bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1532fbe5498bSHarvey Harrison 			       __func__);
15331da177e4SLinus Torvalds       flush_older_and_return:
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
15361da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
15371da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
15381da177e4SLinus Torvalds 	 ** replayed after a crash
15391da177e4SLinus Torvalds 	 */
15401da177e4SLinus Torvalds 	if (flushall) {
15411da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
15421da177e4SLinus Torvalds 	}
15431da177e4SLinus Torvalds 
15441da177e4SLinus Torvalds 	err = journal->j_errno;
15451da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
15461da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
15471da177e4SLinus Torvalds 	 **
15481da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
15491da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
15501da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
15511da177e4SLinus Torvalds 	 ** being flushed
15521da177e4SLinus Torvalds 	 */
15531da177e4SLinus Torvalds 	if (!err && flushall) {
1554bd4c625cSLinus Torvalds 		err =
1555bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1556bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1557bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1558bd4c625cSLinus Torvalds 						jl->j_trans_id);
15591da177e4SLinus Torvalds 		if (err)
1560bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1561bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1562fbe5498bSHarvey Harrison 				       __func__);
15631da177e4SLinus Torvalds 	}
15641da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
15651da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
15661da177e4SLinus Torvalds 	journal->j_num_lists--;
15671da177e4SLinus Torvalds 	del_from_work_list(s, jl);
15681da177e4SLinus Torvalds 
15691da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
15701da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
157145b03d5eSJeff Mahoney 		reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1572bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
15731da177e4SLinus Torvalds 	}
15741da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
15751da177e4SLinus Torvalds 
15761da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
15771da177e4SLinus Torvalds 	 * help find code using dead lists later on
15781da177e4SLinus Torvalds 	 */
15791da177e4SLinus Torvalds 	jl->j_len = 0;
15801da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
15811da177e4SLinus Torvalds 	jl->j_start = 0;
15821da177e4SLinus Torvalds 	jl->j_realblock = NULL;
15831da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
15841da177e4SLinus Torvalds 	jl->j_trans_id = 0;
15851da177e4SLinus Torvalds 	jl->j_state = 0;
15861da177e4SLinus Torvalds 	put_journal_list(s, jl);
15871da177e4SLinus Torvalds 	if (flushall)
1588afe70259SJeff Mahoney 		mutex_unlock(&journal->j_flush_mutex);
15891da177e4SLinus Torvalds 	return err;
15901da177e4SLinus Torvalds }
15911da177e4SLinus Torvalds 
1592a3172027SChris Mason static int test_transaction(struct super_block *s,
1593a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1594a3172027SChris Mason {
1595a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1596a3172027SChris Mason 
1597a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1598a3172027SChris Mason 		return 1;
1599a3172027SChris Mason 
1600a3172027SChris Mason 	cn = jl->j_realblock;
1601a3172027SChris Mason 	while (cn) {
1602a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1603a3172027SChris Mason 		 ** skip it
1604a3172027SChris Mason 		 */
1605a3172027SChris Mason 		if (cn->blocknr == 0) {
1606a3172027SChris Mason 			goto next;
1607a3172027SChris Mason 		}
1608a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1609a3172027SChris Mason 			return 0;
1610a3172027SChris Mason 	      next:
1611a3172027SChris Mason 		cn = cn->next;
1612a3172027SChris Mason 		cond_resched();
1613a3172027SChris Mason 	}
1614a3172027SChris Mason 	return 0;
1615a3172027SChris Mason }
1616a3172027SChris Mason 
16171da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
16181da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
16191da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
16201da177e4SLinus Torvalds {
16211da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16221da177e4SLinus Torvalds 	int ret = 0;
16231da177e4SLinus Torvalds 
16241da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
16251da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16261da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
16271da177e4SLinus Torvalds 		return 0;
16281da177e4SLinus Torvalds 	}
16291da177e4SLinus Torvalds 
16301da177e4SLinus Torvalds 	cn = jl->j_realblock;
16311da177e4SLinus Torvalds 	while (cn) {
16321da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
16331da177e4SLinus Torvalds 		 ** skip it
16341da177e4SLinus Torvalds 		 */
16351da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
16361da177e4SLinus Torvalds 			goto next;
16371da177e4SLinus Torvalds 		}
16381da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
16391da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
16401da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
16411da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
16421da177e4SLinus Torvalds 			 * count, and recheck things after locking
16431da177e4SLinus Torvalds 			 */
16441da177e4SLinus Torvalds 			tmp_bh = cn->bh;
16451da177e4SLinus Torvalds 			get_bh(tmp_bh);
16461da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
16471da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
16481da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
16491da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
16501da177e4SLinus Torvalds 					BUG();
16511da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
16521da177e4SLinus Torvalds 				ret++;
16531da177e4SLinus Torvalds 			} else {
16541da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
16551da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
16561da177e4SLinus Torvalds 			}
16571da177e4SLinus Torvalds 			put_bh(tmp_bh);
16581da177e4SLinus Torvalds 		}
16591da177e4SLinus Torvalds 	      next:
16601da177e4SLinus Torvalds 		cn = cn->next;
16611da177e4SLinus Torvalds 		cond_resched();
16621da177e4SLinus Torvalds 	}
16631da177e4SLinus Torvalds 	return ret;
16641da177e4SLinus Torvalds }
16651da177e4SLinus Torvalds 
16661da177e4SLinus Torvalds /* used by flush_commit_list */
16671da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
16681da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
16691da177e4SLinus Torvalds {
16701da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16711da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
16721da177e4SLinus Torvalds 	int ret = 0;
16731da177e4SLinus Torvalds 
16741da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
16751da177e4SLinus Torvalds 	cn = jl->j_realblock;
16761da177e4SLinus Torvalds 	while (cn) {
16771da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
16781da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
16791da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
16801da177e4SLinus Torvalds 		 */
16811da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1682bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1683bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
16841da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
16851da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
16861da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
16871da177e4SLinus Torvalds 			 * it actually gets marked dirty
16881da177e4SLinus Torvalds 			 */
16891da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
16901da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
16911da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
16921da177e4SLinus Torvalds 			} else {
16931da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
16941da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
16951da177e4SLinus Torvalds 			}
16961da177e4SLinus Torvalds 		}
16971da177e4SLinus Torvalds 		cn = cn->next;
16981da177e4SLinus Torvalds 	}
16991da177e4SLinus Torvalds 	return ret;
17001da177e4SLinus Torvalds }
17011da177e4SLinus Torvalds 
17021da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
17031da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
17041da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
1705600ed416SJeff Mahoney 				unsigned int *next_trans_id,
1706bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1707bd4c625cSLinus Torvalds {
17081da177e4SLinus Torvalds 	int ret = 0;
17091da177e4SLinus Torvalds 	int written = 0;
17101da177e4SLinus Torvalds 	int transactions_flushed = 0;
1711600ed416SJeff Mahoney 	unsigned int orig_trans_id = jl->j_trans_id;
17121da177e4SLinus Torvalds 	struct buffer_chunk chunk;
17131da177e4SLinus Torvalds 	struct list_head *entry;
17141da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17151da177e4SLinus Torvalds 	chunk.nr = 0;
17161da177e4SLinus Torvalds 
1717a412f9efSFrederic Weisbecker 	reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
17181da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
17191da177e4SLinus Torvalds 		goto done;
17201da177e4SLinus Torvalds 	}
17211da177e4SLinus Torvalds 
1722afe70259SJeff Mahoney 	/* we've got j_flush_mutex held, nobody is going to delete any
17231da177e4SLinus Torvalds 	 * of these lists out from underneath us
17241da177e4SLinus Torvalds 	 */
17251da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
17261da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
17271da177e4SLinus Torvalds 
17281da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1729bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1730bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
17311da177e4SLinus Torvalds 			del_from_work_list(s, jl);
17321da177e4SLinus Torvalds 			break;
17331da177e4SLinus Torvalds 		}
17341da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
17351da177e4SLinus Torvalds 
17361da177e4SLinus Torvalds 		if (ret < 0)
17371da177e4SLinus Torvalds 			goto done;
17381da177e4SLinus Torvalds 		transactions_flushed++;
17391da177e4SLinus Torvalds 		written += ret;
17401da177e4SLinus Torvalds 		entry = jl->j_list.next;
17411da177e4SLinus Torvalds 
17421da177e4SLinus Torvalds 		/* did we wrap? */
17431da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
17441da177e4SLinus Torvalds 			break;
17451da177e4SLinus Torvalds 		}
17461da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
17471da177e4SLinus Torvalds 
17481da177e4SLinus Torvalds 		/* don't bother with older transactions */
17491da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
17501da177e4SLinus Torvalds 			break;
17511da177e4SLinus Torvalds 	}
17521da177e4SLinus Torvalds 	if (chunk.nr) {
17531da177e4SLinus Torvalds 		write_chunk(&chunk);
17541da177e4SLinus Torvalds 	}
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds       done:
1757afe70259SJeff Mahoney 	mutex_unlock(&journal->j_flush_mutex);
17581da177e4SLinus Torvalds 	return ret;
17591da177e4SLinus Torvalds }
17601da177e4SLinus Torvalds 
17611da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
17621da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
17631da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
17641da177e4SLinus Torvalds ** adds seeks and slows things down.
17651da177e4SLinus Torvalds **
17661da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
17671da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
17681da177e4SLinus Torvalds ** list updates the header block
17691da177e4SLinus Torvalds */
17701da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1771bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1772bd4c625cSLinus Torvalds {
17731da177e4SLinus Torvalds 	unsigned long len = 0;
17741da177e4SLinus Torvalds 	unsigned long cur_len;
17751da177e4SLinus Torvalds 	int ret;
17761da177e4SLinus Torvalds 	int i;
17771da177e4SLinus Torvalds 	int limit = 256;
17781da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
17791da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
1780600ed416SJeff Mahoney 	unsigned int trans_id;
17811da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17821da177e4SLinus Torvalds 
17831da177e4SLinus Torvalds 	flush_jl = tjl = jl;
17841da177e4SLinus Torvalds 
17851da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
17861da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
17871da177e4SLinus Torvalds 		limit = 1024;
17881da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
17891da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
17901da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
17911da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
17921da177e4SLinus Torvalds 			break;
17931da177e4SLinus Torvalds 		}
17941da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
17951da177e4SLinus Torvalds 		if (cur_len > 0) {
17961da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
17971da177e4SLinus Torvalds 		}
17981da177e4SLinus Torvalds 		len += cur_len;
17991da177e4SLinus Torvalds 		flush_jl = tjl;
18001da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
18011da177e4SLinus Torvalds 			break;
18021da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
18031da177e4SLinus Torvalds 	}
18041da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
18051da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
18061da177e4SLinus Torvalds 	 ** across multiple lists
18071da177e4SLinus Torvalds 	 */
18081da177e4SLinus Torvalds 	if (flush_jl != jl) {
18091da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
18101da177e4SLinus Torvalds 	}
18111da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
18121da177e4SLinus Torvalds 	return 0;
18131da177e4SLinus Torvalds }
18141da177e4SLinus Torvalds 
18151da177e4SLinus Torvalds /*
18161da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
18171da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
18181da177e4SLinus Torvalds */
18191da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
18201da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
18211da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
18221da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
18231da177e4SLinus Torvalds {
18241da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
18251da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
18261da177e4SLinus Torvalds 
18271da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
18281da177e4SLinus Torvalds 	if (!head) {
18291da177e4SLinus Torvalds 		return;
18301da177e4SLinus Torvalds 	}
18311da177e4SLinus Torvalds 	cur = *head;
18321da177e4SLinus Torvalds 	while (cur) {
1833bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1834bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1835bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
18361da177e4SLinus Torvalds 			if (cur->hnext) {
18371da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
18381da177e4SLinus Torvalds 			}
18391da177e4SLinus Torvalds 			if (cur->hprev) {
18401da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
18411da177e4SLinus Torvalds 			} else {
18421da177e4SLinus Torvalds 				*head = cur->hnext;
18431da177e4SLinus Torvalds 			}
18441da177e4SLinus Torvalds 			cur->blocknr = 0;
18451da177e4SLinus Torvalds 			cur->sb = NULL;
18461da177e4SLinus Torvalds 			cur->state = 0;
18471da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
18481da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
18491da177e4SLinus Torvalds 			cur->bh = NULL;
18501da177e4SLinus Torvalds 			cur->jlist = NULL;
18511da177e4SLinus Torvalds 		}
18521da177e4SLinus Torvalds 		cur = cur->hnext;
18531da177e4SLinus Torvalds 	}
18541da177e4SLinus Torvalds }
18551da177e4SLinus Torvalds 
1856a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb)
1857bd4c625cSLinus Torvalds {
1858a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1859d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
18601da177e4SLinus Torvalds 	journal->j_num_lists--;
18611da177e4SLinus Torvalds 
18621da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
1863a9dd3643SJeff Mahoney 	free_list_bitmaps(sb, journal->j_list_bitmap);
1864a9dd3643SJeff Mahoney 	free_bitmap_nodes(sb);	/* must be after free_list_bitmaps */
18651da177e4SLinus Torvalds 	if (journal->j_header_bh) {
18661da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
18671da177e4SLinus Torvalds 	}
18681da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
18691da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
18701da177e4SLinus Torvalds 	 */
1871a9dd3643SJeff Mahoney 	release_journal_dev(sb, journal);
18721da177e4SLinus Torvalds 	vfree(journal);
18731da177e4SLinus Torvalds }
18741da177e4SLinus Torvalds 
18751da177e4SLinus Torvalds /*
18761da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
18771da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
18781da177e4SLinus Torvalds */
1879bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1880a9dd3643SJeff Mahoney 			      struct super_block *sb, int error)
1881bd4c625cSLinus Torvalds {
18821da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
18831da177e4SLinus Torvalds 	int flushed = 0;
1884a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
18851da177e4SLinus Torvalds 
18861da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
18871da177e4SLinus Torvalds 	 */
1888a9dd3643SJeff Mahoney 	if (!error && !(sb->s_flags & MS_RDONLY)) {
18891da177e4SLinus Torvalds 		/* end the current trans */
18901da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
1891a9dd3643SJeff Mahoney 		do_journal_end(th, sb, 10, FLUSH_ALL);
18921da177e4SLinus Torvalds 
18931da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
1894a9dd3643SJeff Mahoney 		if (!journal_join(&myth, sb, 1)) {
1895a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1896a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1897bd4c625cSLinus Torvalds 						     1);
1898a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1899a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1900a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19011da177e4SLinus Torvalds 			flushed = 1;
19021da177e4SLinus Torvalds 		}
19031da177e4SLinus Torvalds 	}
19041da177e4SLinus Torvalds 
19051da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
19061da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
19071da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
1908a9dd3643SJeff Mahoney 		if (!journal_join_abort(&myth, sb, 1)) {
1909a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
1910a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
1911bd4c625cSLinus Torvalds 						     1);
1912a9dd3643SJeff Mahoney 			journal_mark_dirty(&myth, sb,
1913a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
1914a9dd3643SJeff Mahoney 			do_journal_end(&myth, sb, 1, FLUSH_ALL);
19151da177e4SLinus Torvalds 		}
19161da177e4SLinus Torvalds 	}
19171da177e4SLinus Torvalds 
19181da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
19191da177e4SLinus Torvalds 	/* wait for all commits to finish */
1920a9dd3643SJeff Mahoney 	cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
19218ebc4232SFrederic Weisbecker 
19228ebc4232SFrederic Weisbecker 	/*
19238ebc4232SFrederic Weisbecker 	 * We must release the write lock here because
19248ebc4232SFrederic Weisbecker 	 * the workqueue job (flush_async_commit) needs this lock
19258ebc4232SFrederic Weisbecker 	 */
19268ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
19271da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
19288ebc4232SFrederic Weisbecker 
19291da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
19301da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
19311da177e4SLinus Torvalds 		commit_wq = NULL;
19321da177e4SLinus Torvalds 	}
19331da177e4SLinus Torvalds 
1934a9dd3643SJeff Mahoney 	free_journal_ram(sb);
19351da177e4SLinus Torvalds 
19360523676dSFrederic Weisbecker 	reiserfs_write_lock(sb);
19370523676dSFrederic Weisbecker 
19381da177e4SLinus Torvalds 	return 0;
19391da177e4SLinus Torvalds }
19401da177e4SLinus Torvalds 
19411da177e4SLinus Torvalds /*
19421da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
19431da177e4SLinus Torvalds */
1944bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
1945a9dd3643SJeff Mahoney 		    struct super_block *sb)
1946bd4c625cSLinus Torvalds {
1947a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 0);
19481da177e4SLinus Torvalds }
1949bd4c625cSLinus Torvalds 
19501da177e4SLinus Torvalds /*
19511da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
19521da177e4SLinus Torvalds */
1953bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
1954a9dd3643SJeff Mahoney 			  struct super_block *sb)
1955bd4c625cSLinus Torvalds {
1956a9dd3643SJeff Mahoney 	return do_journal_release(th, sb, 1);
19571da177e4SLinus Torvalds }
19581da177e4SLinus Torvalds 
19591da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
1960a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb,
1961bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
1962bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
1963bd4c625cSLinus Torvalds {
19641da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
19651da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1966a9dd3643SJeff Mahoney 	    get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
1967bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
19681da177e4SLinus Torvalds 		return 1;
19691da177e4SLinus Torvalds 	}
19701da177e4SLinus Torvalds 	return 0;
19711da177e4SLinus Torvalds }
1972bd4c625cSLinus Torvalds 
19731da177e4SLinus Torvalds /* returns 0 if it did not find a description block
19741da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
19751da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
19761da177e4SLinus Torvalds */
1977a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb,
1978bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
1979600ed416SJeff Mahoney 					unsigned int *oldest_invalid_trans_id,
1980bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
1981bd4c625cSLinus Torvalds {
19821da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
19831da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
19841da177e4SLinus Torvalds 	struct buffer_head *c_bh;
19851da177e4SLinus Torvalds 	unsigned long offset;
19861da177e4SLinus Torvalds 
19871da177e4SLinus Torvalds 	if (!d_bh)
19881da177e4SLinus Torvalds 		return 0;
19891da177e4SLinus Torvalds 
19901da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1991bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
1992bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
1993bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
1994bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
1995a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1996bd4c625cSLinus Torvalds 				       "journal-986: transaction "
19971da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
1998bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
1999bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
20001da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
20011da177e4SLinus Torvalds 			return 0;
20021da177e4SLinus Torvalds 		}
2003bd4c625cSLinus Torvalds 		if (newest_mount_id
2004bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2005a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2006bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
20071da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2008bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2009bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
20101da177e4SLinus Torvalds 				       *newest_mount_id);
20111da177e4SLinus Torvalds 			return -1;
20121da177e4SLinus Torvalds 		}
2013a9dd3643SJeff Mahoney 		if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2014a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2018",
201545b03d5eSJeff Mahoney 					 "Bad transaction length %d "
201645b03d5eSJeff Mahoney 					 "encountered, ignoring transaction",
2017bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
20181da177e4SLinus Torvalds 			return -1;
20191da177e4SLinus Torvalds 		}
2020a9dd3643SJeff Mahoney 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
20211da177e4SLinus Torvalds 
20221da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2023bd4c625cSLinus Torvalds 		c_bh =
2024a9dd3643SJeff Mahoney 		    journal_bread(sb,
2025a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2026bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2027a9dd3643SJeff Mahoney 				    1) % SB_ONDISK_JOURNAL_SIZE(sb)));
20281da177e4SLinus Torvalds 		if (!c_bh)
20291da177e4SLinus Torvalds 			return 0;
20301da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2031a9dd3643SJeff Mahoney 		if (journal_compare_desc_commit(sb, desc, commit)) {
2032a9dd3643SJeff Mahoney 			reiserfs_debug(sb, REISERFS_DEBUG_CODE,
20331da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
20341da177e4SLinus Torvalds 				       "time %d or length %d",
2035bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2036a9dd3643SJeff Mahoney 				       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
20371da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
20381da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
20391da177e4SLinus Torvalds 			brelse(c_bh);
20401da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2041bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2042bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2043a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2044bd4c625cSLinus Torvalds 					       "journal-1004: "
20451da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2046bd4c625cSLinus Torvalds 					       "to %d",
2047bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
20481da177e4SLinus Torvalds 			}
20491da177e4SLinus Torvalds 			return -1;
20501da177e4SLinus Torvalds 		}
20511da177e4SLinus Torvalds 		brelse(c_bh);
2052a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2053bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
20541da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2055bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2056a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2057bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2058bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
20591da177e4SLinus Torvalds 		return 1;
20601da177e4SLinus Torvalds 	} else {
20611da177e4SLinus Torvalds 		return 0;
20621da177e4SLinus Torvalds 	}
20631da177e4SLinus Torvalds }
20641da177e4SLinus Torvalds 
2065bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2066bd4c625cSLinus Torvalds {
20671da177e4SLinus Torvalds 	int i;
20681da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
20691da177e4SLinus Torvalds 		brelse(heads[i]);
20701da177e4SLinus Torvalds 	}
20711da177e4SLinus Torvalds }
20721da177e4SLinus Torvalds 
20731da177e4SLinus Torvalds /*
20741da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
20751da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
20761da177e4SLinus Torvalds ** is invalid, or too old.
20771da177e4SLinus Torvalds */
2078a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb,
2079bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2080bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2081600ed416SJeff Mahoney 				    unsigned int oldest_trans_id,
2082bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2083bd4c625cSLinus Torvalds {
2084a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
20851da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20861da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
2087600ed416SJeff Mahoney 	unsigned int trans_id = 0;
20881da177e4SLinus Torvalds 	struct buffer_head *c_bh;
20891da177e4SLinus Torvalds 	struct buffer_head *d_bh;
20901da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
20911da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
2092600ed416SJeff Mahoney 	unsigned int trans_offset;
20931da177e4SLinus Torvalds 	int i;
20941da177e4SLinus Torvalds 	int trans_half;
20951da177e4SLinus Torvalds 
2096a9dd3643SJeff Mahoney 	d_bh = journal_bread(sb, cur_dblock);
20971da177e4SLinus Torvalds 	if (!d_bh)
20981da177e4SLinus Torvalds 		return 1;
20991da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2100a9dd3643SJeff Mahoney 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2101a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
21021da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
2103a9dd3643SJeff Mahoney 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
21041da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
21051da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
2106a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
21071da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2108bd4c625cSLinus Torvalds 			       cur_dblock -
2109a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb));
21101da177e4SLinus Torvalds 		brelse(d_bh);
21111da177e4SLinus Torvalds 		return 1;
21121da177e4SLinus Torvalds 	}
21131da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
2114a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
21151da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
21161da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
21171da177e4SLinus Torvalds 			       newest_mount_id);
21181da177e4SLinus Torvalds 		brelse(d_bh);
21191da177e4SLinus Torvalds 		return 1;
21201da177e4SLinus Torvalds 	}
2121a9dd3643SJeff Mahoney 	c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
21221da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
2123a9dd3643SJeff Mahoney 			      SB_ONDISK_JOURNAL_SIZE(sb)));
21241da177e4SLinus Torvalds 	if (!c_bh) {
21251da177e4SLinus Torvalds 		brelse(d_bh);
21261da177e4SLinus Torvalds 		return 1;
21271da177e4SLinus Torvalds 	}
21281da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2129a9dd3643SJeff Mahoney 	if (journal_compare_desc_commit(sb, desc, commit)) {
2130a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2131bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
21321da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2133bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2134a9dd3643SJeff Mahoney 			       SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2135bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2136bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
21371da177e4SLinus Torvalds 		brelse(c_bh);
21381da177e4SLinus Torvalds 		brelse(d_bh);
21391da177e4SLinus Torvalds 		return 1;
21401da177e4SLinus Torvalds 	}
21413f8b5ee3SJeff Mahoney 
21423f8b5ee3SJeff Mahoney 	if (bdev_read_only(sb->s_bdev)) {
21433f8b5ee3SJeff Mahoney 		reiserfs_warning(sb, "clm-2076",
21443f8b5ee3SJeff Mahoney 				 "device is readonly, unable to replay log");
21453f8b5ee3SJeff Mahoney 		brelse(c_bh);
21463f8b5ee3SJeff Mahoney 		brelse(d_bh);
21473f8b5ee3SJeff Mahoney 		return -EROFS;
21483f8b5ee3SJeff Mahoney 	}
21493f8b5ee3SJeff Mahoney 
21501da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
21511da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2152d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2153d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2154d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2155d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
21561da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
21571da177e4SLinus Torvalds 		brelse(c_bh);
21581da177e4SLinus Torvalds 		brelse(d_bh);
2159d739b42bSPekka Enberg 		kfree(log_blocks);
2160d739b42bSPekka Enberg 		kfree(real_blocks);
2161a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1169",
216245b03d5eSJeff Mahoney 				 "kmalloc failed, unable to mount FS");
21631da177e4SLinus Torvalds 		return -1;
21641da177e4SLinus Torvalds 	}
21651da177e4SLinus Torvalds 	/* get all the buffer heads */
2166a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
21671da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2168bd4c625cSLinus Torvalds 		log_blocks[i] =
2169a9dd3643SJeff Mahoney 		    journal_getblk(sb,
2170a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2171bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2172a9dd3643SJeff Mahoney 				    i) % SB_ONDISK_JOURNAL_SIZE(sb));
21731da177e4SLinus Torvalds 		if (i < trans_half) {
2174bd4c625cSLinus Torvalds 			real_blocks[i] =
2175a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2176bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
21771da177e4SLinus Torvalds 		} else {
2178bd4c625cSLinus Torvalds 			real_blocks[i] =
2179a9dd3643SJeff Mahoney 			    sb_getblk(sb,
2180bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2181bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
21821da177e4SLinus Torvalds 		}
2183a9dd3643SJeff Mahoney 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2184a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1207",
218545b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
218645b03d5eSJeff Mahoney 					 "Block to replay is outside of "
218745b03d5eSJeff Mahoney 					 "filesystem");
21881da177e4SLinus Torvalds 			goto abort_replay;
21891da177e4SLinus Torvalds 		}
21901da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2191bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2192a9dd3643SJeff Mahoney 		    (sb, real_blocks[i]->b_blocknr)) {
2193a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1204",
219445b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
219545b03d5eSJeff Mahoney 					 "Trying to replay onto a log block");
21961da177e4SLinus Torvalds 		      abort_replay:
21971da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
21981da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
21991da177e4SLinus Torvalds 			brelse(c_bh);
22001da177e4SLinus Torvalds 			brelse(d_bh);
2201d739b42bSPekka Enberg 			kfree(log_blocks);
2202d739b42bSPekka Enberg 			kfree(real_blocks);
22031da177e4SLinus Torvalds 			return -1;
22041da177e4SLinus Torvalds 		}
22051da177e4SLinus Torvalds 	}
22061da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
22071da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
22081da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22098ebc4232SFrederic Weisbecker 
22108ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
22111da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
22128ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
22138ebc4232SFrederic Weisbecker 
22141da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2215a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1212",
221645b03d5eSJeff Mahoney 					 "REPLAY FAILURE fsck required! "
221745b03d5eSJeff Mahoney 					 "buffer write failed");
2218bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2219bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22201da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
22211da177e4SLinus Torvalds 			brelse(c_bh);
22221da177e4SLinus Torvalds 			brelse(d_bh);
2223d739b42bSPekka Enberg 			kfree(log_blocks);
2224d739b42bSPekka Enberg 			kfree(real_blocks);
22251da177e4SLinus Torvalds 			return -1;
22261da177e4SLinus Torvalds 		}
2227bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2228bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
22291da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
22301da177e4SLinus Torvalds 		brelse(log_blocks[i]);
22311da177e4SLinus Torvalds 	}
22321da177e4SLinus Torvalds 	/* flush out the real blocks */
22331da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22341da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
22359cb569d6SChristoph Hellwig 		write_dirty_buffer(real_blocks[i], WRITE);
22361da177e4SLinus Torvalds 	}
22371da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22381da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
22391da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2240a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1226",
224145b03d5eSJeff Mahoney 					 "REPLAY FAILURE, fsck required! "
224245b03d5eSJeff Mahoney 					 "buffer write failed");
2243bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2244bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22451da177e4SLinus Torvalds 			brelse(c_bh);
22461da177e4SLinus Torvalds 			brelse(d_bh);
2247d739b42bSPekka Enberg 			kfree(log_blocks);
2248d739b42bSPekka Enberg 			kfree(real_blocks);
22491da177e4SLinus Torvalds 			return -1;
22501da177e4SLinus Torvalds 		}
22511da177e4SLinus Torvalds 		brelse(real_blocks[i]);
22521da177e4SLinus Torvalds 	}
2253bd4c625cSLinus Torvalds 	cur_dblock =
2254a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2255bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2256a9dd3643SJeff Mahoney 	      2) % SB_ONDISK_JOURNAL_SIZE(sb));
2257a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2258bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
2259a9dd3643SJeff Mahoney 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
22601da177e4SLinus Torvalds 
22611da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2262a9dd3643SJeff Mahoney 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
22631da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
22641da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2265a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2266a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2267a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
22681da177e4SLinus Torvalds 	brelse(c_bh);
22691da177e4SLinus Torvalds 	brelse(d_bh);
2270d739b42bSPekka Enberg 	kfree(log_blocks);
2271d739b42bSPekka Enberg 	kfree(real_blocks);
22721da177e4SLinus Torvalds 	return 0;
22731da177e4SLinus Torvalds }
22741da177e4SLinus Torvalds 
22751da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
22761da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
22771da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
22781da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
22791da177e4SLinus Torvalds    from other places.
22801da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
22813ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
22823ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
22833ee16670SJeff Mahoney 					   b_blocknr_t max_block)
22841da177e4SLinus Torvalds {
22851da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
22861da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
22871da177e4SLinus Torvalds 	struct buffer_head *bh;
22881da177e4SLinus Torvalds 	int i, j;
22891da177e4SLinus Torvalds 
22901da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
22911da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
22921da177e4SLinus Torvalds 		return (bh);
22931da177e4SLinus Torvalds 
22941da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
22951da177e4SLinus Torvalds 		blocks = max_block - block;
22961da177e4SLinus Torvalds 	}
22971da177e4SLinus Torvalds 	bhlist[0] = bh;
22981da177e4SLinus Torvalds 	j = 1;
22991da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
23001da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
23011da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
23021da177e4SLinus Torvalds 			brelse(bh);
23031da177e4SLinus Torvalds 			break;
2304bd4c625cSLinus Torvalds 		} else
2305bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
23061da177e4SLinus Torvalds 	}
23071da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
23081da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
23091da177e4SLinus Torvalds 		brelse(bhlist[i]);
23101da177e4SLinus Torvalds 	bh = bhlist[0];
23111da177e4SLinus Torvalds 	wait_on_buffer(bh);
23121da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23131da177e4SLinus Torvalds 		return bh;
23141da177e4SLinus Torvalds 	brelse(bh);
23151da177e4SLinus Torvalds 	return NULL;
23161da177e4SLinus Torvalds }
23171da177e4SLinus Torvalds 
23181da177e4SLinus Torvalds /*
23191da177e4SLinus Torvalds ** read and replay the log
23201da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
23211da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
23221da177e4SLinus Torvalds **
23231da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
23241da177e4SLinus Torvalds **
23251da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
23261da177e4SLinus Torvalds */
2327a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb)
2328bd4c625cSLinus Torvalds {
2329a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
23301da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
2331600ed416SJeff Mahoney 	unsigned int oldest_trans_id = 0;
2332600ed416SJeff Mahoney 	unsigned int oldest_invalid_trans_id = 0;
23331da177e4SLinus Torvalds 	time_t start;
23341da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
23351da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
23361da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
23371da177e4SLinus Torvalds 	struct buffer_head *d_bh;
23381da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
23391da177e4SLinus Torvalds 	int valid_journal_header = 0;
23401da177e4SLinus Torvalds 	int replay_count = 0;
23411da177e4SLinus Torvalds 	int continue_replay = 1;
23421da177e4SLinus Torvalds 	int ret;
23431da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
23441da177e4SLinus Torvalds 
2345a9dd3643SJeff Mahoney 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2346a9dd3643SJeff Mahoney 	reiserfs_info(sb, "checking transaction log (%s)\n",
23471da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
23481da177e4SLinus Torvalds 	start = get_seconds();
23491da177e4SLinus Torvalds 
23501da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
23511da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
23521da177e4SLinus Torvalds 	 ** replay is done
23531da177e4SLinus Torvalds 	 */
2354a9dd3643SJeff Mahoney 	journal->j_header_bh = journal_bread(sb,
2355a9dd3643SJeff Mahoney 					     SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2356a9dd3643SJeff Mahoney 					     + SB_ONDISK_JOURNAL_SIZE(sb));
23571da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
23581da177e4SLinus Torvalds 		return 1;
23591da177e4SLinus Torvalds 	}
23601da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2361c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2362a9dd3643SJeff Mahoney 	    SB_ONDISK_JOURNAL_SIZE(sb)
2363bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2364bd4c625cSLinus Torvalds 		oldest_start =
2365a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
23661da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
23671da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
23681da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2369a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2370bd4c625cSLinus Torvalds 			       "journal-1153: found in "
23711da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
23721da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
23731da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
23741da177e4SLinus Torvalds 		valid_journal_header = 1;
23751da177e4SLinus Torvalds 
23761da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
23771da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
23781da177e4SLinus Torvalds 		 ** through the whole log.
23791da177e4SLinus Torvalds 		 */
2380bd4c625cSLinus Torvalds 		d_bh =
2381a9dd3643SJeff Mahoney 		    journal_bread(sb,
2382a9dd3643SJeff Mahoney 				  SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2383bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
2384a9dd3643SJeff Mahoney 		ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
23851da177e4SLinus Torvalds 		if (!ret) {
23861da177e4SLinus Torvalds 			continue_replay = 0;
23871da177e4SLinus Torvalds 		}
23881da177e4SLinus Torvalds 		brelse(d_bh);
23891da177e4SLinus Torvalds 		goto start_log_replay;
23901da177e4SLinus Torvalds 	}
23911da177e4SLinus Torvalds 
23921da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
23931da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
23941da177e4SLinus Torvalds 	 */
2395bd4c625cSLinus Torvalds 	while (continue_replay
2396bd4c625cSLinus Torvalds 	       && cur_dblock <
2397a9dd3643SJeff Mahoney 	       (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2398a9dd3643SJeff Mahoney 		SB_ONDISK_JOURNAL_SIZE(sb))) {
23991da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
24001da177e4SLinus Torvalds 		   device to be the same */
2401bd4c625cSLinus Torvalds 		d_bh =
2402bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2403a9dd3643SJeff Mahoney 				    sb->s_blocksize,
2404a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2405a9dd3643SJeff Mahoney 				    SB_ONDISK_JOURNAL_SIZE(sb));
2406bd4c625cSLinus Torvalds 		ret =
2407a9dd3643SJeff Mahoney 		    journal_transaction_is_valid(sb, d_bh,
2408bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2409bd4c625cSLinus Torvalds 						 &newest_mount_id);
24101da177e4SLinus Torvalds 		if (ret == 1) {
24111da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
24121da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
24131da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24141da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
24151da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2416a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2417bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
24181da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2419bd4c625cSLinus Torvalds 					       oldest_start -
2420bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2421a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24221da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
24231da177e4SLinus Torvalds 				/* one we just read was older */
24241da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24251da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2426a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2427bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
24281da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2429bd4c625cSLinus Torvalds 					       oldest_start -
2430bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2431a9dd3643SJeff Mahoney 					       (sb), oldest_trans_id);
24321da177e4SLinus Torvalds 			}
24331da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
24341da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2435a9dd3643SJeff Mahoney 				reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2436bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2437bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2438bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
24391da177e4SLinus Torvalds 			}
24401da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
24411da177e4SLinus Torvalds 		} else {
24421da177e4SLinus Torvalds 			cur_dblock++;
24431da177e4SLinus Torvalds 		}
24441da177e4SLinus Torvalds 		brelse(d_bh);
24451da177e4SLinus Torvalds 	}
24461da177e4SLinus Torvalds 
24471da177e4SLinus Torvalds       start_log_replay:
24481da177e4SLinus Torvalds 	cur_dblock = oldest_start;
24491da177e4SLinus Torvalds 	if (oldest_trans_id) {
2450a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2451bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
24521da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
2453a9dd3643SJeff Mahoney 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
24541da177e4SLinus Torvalds 			       oldest_trans_id);
24551da177e4SLinus Torvalds 
24561da177e4SLinus Torvalds 	}
24571da177e4SLinus Torvalds 	replay_count = 0;
24581da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2459bd4c625cSLinus Torvalds 		ret =
2460a9dd3643SJeff Mahoney 		    journal_read_transaction(sb, cur_dblock, oldest_start,
2461bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
24621da177e4SLinus Torvalds 		if (ret < 0) {
24631da177e4SLinus Torvalds 			return ret;
24641da177e4SLinus Torvalds 		} else if (ret != 0) {
24651da177e4SLinus Torvalds 			break;
24661da177e4SLinus Torvalds 		}
2467bd4c625cSLinus Torvalds 		cur_dblock =
2468a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
24691da177e4SLinus Torvalds 		replay_count++;
24701da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
24711da177e4SLinus Torvalds 			break;
24721da177e4SLinus Torvalds 	}
24731da177e4SLinus Torvalds 
24741da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2475a9dd3643SJeff Mahoney 		reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2476bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
24771da177e4SLinus Torvalds 	}
24781da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
24791da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
24801da177e4SLinus Torvalds 	 ** copy the trans_id from the header
24811da177e4SLinus Torvalds 	 */
24821da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
24831da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2484bd4c625cSLinus Torvalds 		journal->j_trans_id =
2485bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2486a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2487a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2488a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2489bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2490bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
24911da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
24921da177e4SLinus Torvalds 	} else {
24931da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
24941da177e4SLinus Torvalds 	}
2495a9dd3643SJeff Mahoney 	reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
24961da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
24971da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
24981da177e4SLinus Torvalds 	if (replay_count > 0) {
2499a9dd3643SJeff Mahoney 		reiserfs_info(sb,
2500bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
25011da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
25021da177e4SLinus Torvalds 	}
2503a9dd3643SJeff Mahoney 	if (!bdev_read_only(sb->s_bdev) &&
2504a9dd3643SJeff Mahoney 	    _update_journal_header_block(sb, journal->j_start,
2505bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
25061da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
25071da177e4SLinus Torvalds 		 ** the mount
25081da177e4SLinus Torvalds 		 */
25091da177e4SLinus Torvalds 		return -1;
25101da177e4SLinus Torvalds 	}
25111da177e4SLinus Torvalds 	return 0;
25121da177e4SLinus Torvalds }
25131da177e4SLinus Torvalds 
25141da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
25151da177e4SLinus Torvalds {
25161da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
25178c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
25188c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
25191da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
25201da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
25211da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
25221da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
252390415deaSJeff Mahoney 	mutex_init(&jl->j_commit_mutex);
25241da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
25251da177e4SLinus Torvalds 	get_journal_list(jl);
25261da177e4SLinus Torvalds 	return jl;
25271da177e4SLinus Torvalds }
25281da177e4SLinus Torvalds 
2529a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb)
2530bd4c625cSLinus Torvalds {
2531a9dd3643SJeff Mahoney 	SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
25321da177e4SLinus Torvalds }
25331da177e4SLinus Torvalds 
25341da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
25351da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
25361da177e4SLinus Torvalds {
25371da177e4SLinus Torvalds 	int result;
25381da177e4SLinus Torvalds 
25391da177e4SLinus Torvalds 	result = 0;
25401da177e4SLinus Torvalds 
254186098fa0SChristoph Hellwig 	if (journal->j_dev_bd != NULL) {
2542e5eb8caaSAl Viro 		result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
25431da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
25441da177e4SLinus Torvalds 	}
25451da177e4SLinus Torvalds 
25461da177e4SLinus Torvalds 	if (result != 0) {
254745b03d5eSJeff Mahoney 		reiserfs_warning(super, "sh-457",
254845b03d5eSJeff Mahoney 				 "Cannot release journal device: %i", result);
25491da177e4SLinus Torvalds 	}
25501da177e4SLinus Torvalds 	return result;
25511da177e4SLinus Torvalds }
25521da177e4SLinus Torvalds 
25531da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
25541da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
25551da177e4SLinus Torvalds 			    const char *jdev_name)
25561da177e4SLinus Torvalds {
25571da177e4SLinus Torvalds 	int result;
25581da177e4SLinus Torvalds 	dev_t jdev;
2559e525fd89STejun Heo 	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
25601da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
25611da177e4SLinus Torvalds 
25621da177e4SLinus Torvalds 	result = 0;
25631da177e4SLinus Torvalds 
25641da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
25651da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
25661da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
25671da177e4SLinus Torvalds 
25681da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
25691da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
25701da177e4SLinus Torvalds 
25711da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
25721da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
2573e525fd89STejun Heo 		if (jdev == super->s_dev)
2574e525fd89STejun Heo 			blkdev_mode &= ~FMODE_EXCL;
2575d4d77629STejun Heo 		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
2576d4d77629STejun Heo 						      journal);
2577e5eb8caaSAl Viro 		journal->j_dev_mode = blkdev_mode;
25781da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
25791da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
25801da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
258145b03d5eSJeff Mahoney 			reiserfs_warning(super, "sh-458",
25821da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
25831da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
25841da177e4SLinus Torvalds 			return result;
2585e525fd89STejun Heo 		} else if (jdev != super->s_dev)
25861da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
258786098fa0SChristoph Hellwig 
25881da177e4SLinus Torvalds 		return 0;
25891da177e4SLinus Torvalds 	}
25901da177e4SLinus Torvalds 
2591e5eb8caaSAl Viro 	journal->j_dev_mode = blkdev_mode;
2592d4d77629STejun Heo 	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
259386098fa0SChristoph Hellwig 	if (IS_ERR(journal->j_dev_bd)) {
259486098fa0SChristoph Hellwig 		result = PTR_ERR(journal->j_dev_bd);
259586098fa0SChristoph Hellwig 		journal->j_dev_bd = NULL;
259686098fa0SChristoph Hellwig 		reiserfs_warning(super,
259786098fa0SChristoph Hellwig 				 "journal_init_dev: Cannot open '%s': %i",
259886098fa0SChristoph Hellwig 				 jdev_name, result);
259986098fa0SChristoph Hellwig 		return result;
260086098fa0SChristoph Hellwig 	}
260186098fa0SChristoph Hellwig 
26021da177e4SLinus Torvalds 	set_blocksize(journal->j_dev_bd, super->s_blocksize);
2603bd4c625cSLinus Torvalds 	reiserfs_info(super,
2604bd4c625cSLinus Torvalds 		      "journal_init_dev: journal device: %s\n",
260574f9f974SEdward Shishkin 		      bdevname(journal->j_dev_bd, b));
260686098fa0SChristoph Hellwig 	return 0;
26071da177e4SLinus Torvalds }
26081da177e4SLinus Torvalds 
2609cf3d0b81SEdward Shishkin /**
2610cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2611cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2612cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2613cf3d0b81SEdward Shishkin  *
2614cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2615cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2616cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2617cf3d0b81SEdward Shishkin  *
2618cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2619cf3d0b81SEdward Shishkin  * decreased proportionally.
2620cf3d0b81SEdward Shishkin  */
2621cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2622cf3d0b81SEdward Shishkin 
2623a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb,
2624cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2625cf3d0b81SEdward Shishkin {
2626cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2627cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2628cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2629cf3d0b81SEdward Shishkin 	        int ratio = 1;
2630a9dd3643SJeff Mahoney 		if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2631a9dd3643SJeff Mahoney 		        ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2632cf3d0b81SEdward Shishkin 
2633cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2634cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2635a9dd3643SJeff Mahoney 		    SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2636cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2637a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-462",
263845b03d5eSJeff Mahoney 					 "bad transaction max size (%u). "
263945b03d5eSJeff Mahoney 					 "FSCK?", journal->j_trans_max);
2640cf3d0b81SEdward Shishkin 			return 1;
2641cf3d0b81SEdward Shishkin 		}
2642cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2643cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2644a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-463",
264545b03d5eSJeff Mahoney 					 "bad transaction max batch (%u). "
264645b03d5eSJeff Mahoney 					 "FSCK?", journal->j_max_batch);
2647cf3d0b81SEdward Shishkin 			return 1;
2648cf3d0b81SEdward Shishkin 		}
2649cf3d0b81SEdward Shishkin 	} else {
2650cf3d0b81SEdward Shishkin 		/* Default journal params.
2651cf3d0b81SEdward Shishkin                    The file system was created by old version
2652cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2653cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2654a9dd3643SJeff Mahoney 		if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2655a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2656a9dd3643SJeff Mahoney 					 sb->s_blocksize);
265745b03d5eSJeff Mahoney 			return 1;
265845b03d5eSJeff Mahoney 		}
2659cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2660cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2661cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2662cf3d0b81SEdward Shishkin 	}
2663cf3d0b81SEdward Shishkin 	return 0;
2664cf3d0b81SEdward Shishkin }
2665cf3d0b81SEdward Shishkin 
26661da177e4SLinus Torvalds /*
26671da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
26681da177e4SLinus Torvalds */
2669a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name,
2670bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2671bd4c625cSLinus Torvalds {
2672a9dd3643SJeff Mahoney 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
26731da177e4SLinus Torvalds 	struct buffer_head *bhjh;
26741da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
26751da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
26761da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
26771da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
26781da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
267998ea3f50SFrederic Weisbecker 	int ret;
26801da177e4SLinus Torvalds 
268198ea3f50SFrederic Weisbecker 	/*
268298ea3f50SFrederic Weisbecker 	 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
268398ea3f50SFrederic Weisbecker 	 * dependency inversion warnings.
268498ea3f50SFrederic Weisbecker 	 */
268598ea3f50SFrederic Weisbecker 	reiserfs_write_unlock(sb);
2686558feb08SJoe Perches 	journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
26871da177e4SLinus Torvalds 	if (!journal) {
2688a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1256",
268945b03d5eSJeff Mahoney 				 "unable to get memory for journal structure");
269098ea3f50SFrederic Weisbecker 		reiserfs_write_lock(sb);
26911da177e4SLinus Torvalds 		return 1;
26921da177e4SLinus Torvalds 	}
26931da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
26941da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
26951da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
26961da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
26971da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
269898ea3f50SFrederic Weisbecker 	ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
269998ea3f50SFrederic Weisbecker 					   reiserfs_bmap_count(sb));
270098ea3f50SFrederic Weisbecker 	reiserfs_write_lock(sb);
270198ea3f50SFrederic Weisbecker 	if (ret)
27021da177e4SLinus Torvalds 		goto free_and_return;
270398ea3f50SFrederic Weisbecker 
2704a9dd3643SJeff Mahoney 	allocate_bitmap_nodes(sb);
27051da177e4SLinus Torvalds 
27061da177e4SLinus Torvalds 	/* reserved for journal area support */
2707a9dd3643SJeff Mahoney 	SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2708bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2709a9dd3643SJeff Mahoney 						 / sb->s_blocksize +
2710a9dd3643SJeff Mahoney 						 reiserfs_bmap_count(sb) +
2711bd4c625cSLinus Torvalds 						 1 :
2712bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2713a9dd3643SJeff Mahoney 						 sb->s_blocksize + 2);
27141da177e4SLinus Torvalds 
271525985edcSLucas De Marchi 	/* Sanity check to see is the standard journal fitting within first bitmap
27161da177e4SLinus Torvalds 	   (actual for small blocksizes) */
2717a9dd3643SJeff Mahoney 	if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2718a9dd3643SJeff Mahoney 	    (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2719a9dd3643SJeff Mahoney 	     SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2720a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1393",
272145b03d5eSJeff Mahoney 				 "journal does not fit for area addressed "
272245b03d5eSJeff Mahoney 				 "by first of bitmap blocks. It starts at "
27231da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
2724a9dd3643SJeff Mahoney 				 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2725a9dd3643SJeff Mahoney 				 SB_ONDISK_JOURNAL_SIZE(sb),
2726a9dd3643SJeff Mahoney 				 sb->s_blocksize);
27271da177e4SLinus Torvalds 		goto free_and_return;
27281da177e4SLinus Torvalds 	}
27291da177e4SLinus Torvalds 
2730193be0eeSFrederic Weisbecker 	/*
2731193be0eeSFrederic Weisbecker 	 * We need to unlock here to avoid creating the following
2732193be0eeSFrederic Weisbecker 	 * dependency:
2733193be0eeSFrederic Weisbecker 	 * reiserfs_lock -> sysfs_mutex
2734193be0eeSFrederic Weisbecker 	 * Because the reiserfs mmap path creates the following dependency:
2735193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock, hence we have
2736193be0eeSFrederic Weisbecker 	 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2737193be0eeSFrederic Weisbecker 	 * This would ends up in a circular dependency with sysfs readdir path
2738193be0eeSFrederic Weisbecker 	 * which does sysfs_mutex -> mm->mmap_sem
2739193be0eeSFrederic Weisbecker 	 * This is fine because the reiserfs lock is useless in mount path,
2740193be0eeSFrederic Weisbecker 	 * at least until we call journal_begin. We keep it for paranoid
2741193be0eeSFrederic Weisbecker 	 * reasons.
2742193be0eeSFrederic Weisbecker 	 */
2743193be0eeSFrederic Weisbecker 	reiserfs_write_unlock(sb);
2744a9dd3643SJeff Mahoney 	if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2745193be0eeSFrederic Weisbecker 		reiserfs_write_lock(sb);
2746a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-462",
274745b03d5eSJeff Mahoney 				 "unable to initialize jornal device");
27481da177e4SLinus Torvalds 		goto free_and_return;
27491da177e4SLinus Torvalds 	}
2750193be0eeSFrederic Weisbecker 	reiserfs_write_lock(sb);
27511da177e4SLinus Torvalds 
2752a9dd3643SJeff Mahoney 	rs = SB_DISK_SUPER_BLOCK(sb);
27531da177e4SLinus Torvalds 
27541da177e4SLinus Torvalds 	/* read journal header */
2755a9dd3643SJeff Mahoney 	bhjh = journal_bread(sb,
2756a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2757a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb));
27581da177e4SLinus Torvalds 	if (!bhjh) {
2759a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-459",
276045b03d5eSJeff Mahoney 				 "unable to read journal header");
27611da177e4SLinus Torvalds 		goto free_and_return;
27621da177e4SLinus Torvalds 	}
27631da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
27641da177e4SLinus Torvalds 
27651da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2766bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2767bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2768bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2769a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "sh-460",
277045b03d5eSJeff Mahoney 				 "journal header magic %x (device %s) does "
277145b03d5eSJeff Mahoney 				 "not match to magic found in super block %x",
277245b03d5eSJeff Mahoney 				 jh->jh_journal.jp_journal_magic,
27731da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
27741da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
27751da177e4SLinus Torvalds 		brelse(bhjh);
27761da177e4SLinus Torvalds 		goto free_and_return;
27771da177e4SLinus Torvalds 	}
27781da177e4SLinus Torvalds 
27791da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
27801da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2781bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2782bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
27831da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
27841da177e4SLinus Torvalds 
2785a9dd3643SJeff Mahoney 	if (check_advise_trans_params(sb, journal) != 0)
2786cf3d0b81SEdward Shishkin 	        goto free_and_return;
27871da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
27881da177e4SLinus Torvalds 
27891da177e4SLinus Torvalds 	if (commit_max_age != 0) {
27901da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
27911da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
27921da177e4SLinus Torvalds 	}
27931da177e4SLinus Torvalds 
2794a9dd3643SJeff Mahoney 	reiserfs_info(sb, "journal params: device %s, size %u, "
27951da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
27961da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
27971da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
2798a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_SIZE(sb),
2799a9dd3643SJeff Mahoney 		      SB_ONDISK_JOURNAL_1st_BLOCK(sb),
28001da177e4SLinus Torvalds 		      journal->j_trans_max,
28011da177e4SLinus Torvalds 		      journal->j_max_batch,
2802bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
28031da177e4SLinus Torvalds 
28041da177e4SLinus Torvalds 	brelse(bhjh);
28051da177e4SLinus Torvalds 
28061da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
2807a9dd3643SJeff Mahoney 	journal_list_init(sb);
28081da177e4SLinus Torvalds 
2809bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2810bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
28111da177e4SLinus Torvalds 
28121da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
28131da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
28141da177e4SLinus Torvalds 
28151da177e4SLinus Torvalds 	journal->j_start = 0;
28161da177e4SLinus Torvalds 	journal->j_len = 0;
28171da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
28181da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
28191da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
28201da177e4SLinus Torvalds 	journal->j_bcount = 0;
28211da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
28221da177e4SLinus Torvalds 	journal->j_last = NULL;
28231da177e4SLinus Torvalds 	journal->j_first = NULL;
28241da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
2825f68215c4SJeff Mahoney 	mutex_init(&journal->j_mutex);
2826afe70259SJeff Mahoney 	mutex_init(&journal->j_flush_mutex);
28271da177e4SLinus Torvalds 
28281da177e4SLinus Torvalds 	journal->j_trans_id = 10;
28291da177e4SLinus Torvalds 	journal->j_mount_id = 10;
28301da177e4SLinus Torvalds 	journal->j_state = 0;
28311da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
2832bbec9191SFrederic Weisbecker 	reiserfs_write_unlock(sb);
28331da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2834bbec9191SFrederic Weisbecker 	reiserfs_write_lock(sb);
28351da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
28361da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
28371da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
28381da177e4SLinus Torvalds 	journal->j_must_wait = 0;
28391da177e4SLinus Torvalds 
2840576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2841a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2842576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2843576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2844576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2845576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2846576f6d79SJeff Mahoney         	goto free_and_return;
2847576f6d79SJeff Mahoney 	}
2848576f6d79SJeff Mahoney 
2849a9dd3643SJeff Mahoney 	init_journal_hash(sb);
28501da177e4SLinus Torvalds 	jl = journal->j_current_jl;
2851a9dd3643SJeff Mahoney 	jl->j_list_bitmap = get_list_bitmap(sb, jl);
28521da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2853a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-2005",
285445b03d5eSJeff Mahoney 				 "get_list_bitmap failed for journal list 0");
28551da177e4SLinus Torvalds 		goto free_and_return;
28561da177e4SLinus Torvalds 	}
2857a9dd3643SJeff Mahoney 	if (journal_read(sb) < 0) {
2858a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "reiserfs-2006",
285945b03d5eSJeff Mahoney 				 "Replay Failure, unable to mount");
28601da177e4SLinus Torvalds 		goto free_and_return;
28611da177e4SLinus Torvalds 	}
28621da177e4SLinus Torvalds 
28631da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
286448f6ba5eSFrederic Weisbecker 	if (reiserfs_mounted_fs_count <= 1) {
286548f6ba5eSFrederic Weisbecker 		reiserfs_write_unlock(sb);
286628aadf51STejun Heo 		commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
286748f6ba5eSFrederic Weisbecker 		reiserfs_write_lock(sb);
286848f6ba5eSFrederic Weisbecker 	}
28691da177e4SLinus Torvalds 
2870c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2871a9dd3643SJeff Mahoney 	journal->j_work_sb = sb;
28721da177e4SLinus Torvalds 	return 0;
28731da177e4SLinus Torvalds       free_and_return:
2874a9dd3643SJeff Mahoney 	free_journal_ram(sb);
28751da177e4SLinus Torvalds 	return 1;
28761da177e4SLinus Torvalds }
28771da177e4SLinus Torvalds 
28781da177e4SLinus Torvalds /*
28791da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
28801da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
28811da177e4SLinus Torvalds ** transaction
28821da177e4SLinus Torvalds */
2883bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2884bd4c625cSLinus Torvalds 				   int new_alloc)
2885bd4c625cSLinus Torvalds {
28861da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
28871da177e4SLinus Torvalds 	time_t now = get_seconds();
28881da177e4SLinus Torvalds 	/* cannot restart while nested */
28891da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
28901da177e4SLinus Torvalds 	if (th->t_refcount > 1)
28911da177e4SLinus Torvalds 		return 0;
28921da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
28931da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
28941da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
28951da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
28961da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
28971da177e4SLinus Torvalds 		return 1;
28981da177e4SLinus Torvalds 	}
28996ae1ea44SChris Mason 	/* protected by the BKL here */
29006ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
29016ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
29021da177e4SLinus Torvalds 	return 0;
29031da177e4SLinus Torvalds }
29041da177e4SLinus Torvalds 
29051da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the
29061da177e4SLinus Torvalds ** kernel_lock to be held
29071da177e4SLinus Torvalds */
2908bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2909bd4c625cSLinus Torvalds {
29101da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29111da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29121da177e4SLinus Torvalds 	journal->j_must_wait = 1;
29131da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
29141da177e4SLinus Torvalds 	return;
29151da177e4SLinus Torvalds }
29161da177e4SLinus Torvalds 
29171da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29181da177e4SLinus Torvalds ** require BKL
29191da177e4SLinus Torvalds */
2920bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
2921bd4c625cSLinus Torvalds {
29221da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29231da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
29241da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
29251da177e4SLinus Torvalds }
29261da177e4SLinus Torvalds 
29271da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29281da177e4SLinus Torvalds ** require BKL
29291da177e4SLinus Torvalds */
2930bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
2931bd4c625cSLinus Torvalds {
29321da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29331da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
29341da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
29351da177e4SLinus Torvalds }
29361da177e4SLinus Torvalds 
2937bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
2938bd4c625cSLinus Torvalds {
29391da177e4SLinus Torvalds 	wait_queue_t wait;
29401da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29411da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
29421da177e4SLinus Torvalds 
29431da177e4SLinus Torvalds 	/*
29441da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
29451da177e4SLinus Torvalds 	 * we only want to wait once.
29461da177e4SLinus Torvalds 	 */
29471da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
29481da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
29491da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
29508ebc4232SFrederic Weisbecker 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
29518ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(s);
29521da177e4SLinus Torvalds 		schedule();
29538ebc4232SFrederic Weisbecker 		reiserfs_write_lock(s);
29548ebc4232SFrederic Weisbecker 	}
29555ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
29561da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
29571da177e4SLinus Torvalds }
29581da177e4SLinus Torvalds 
2959bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
2960bd4c625cSLinus Torvalds {
29611da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29621da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
29631da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
29641da177e4SLinus Torvalds }
29651da177e4SLinus Torvalds 
2966600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
29671da177e4SLinus Torvalds {
29681da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
29691da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
29701da177e4SLinus Torvalds 	while (1) {
29718ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
2972041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
29738ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
29741da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
29751da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
29761da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
29771da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
29781da177e4SLinus Torvalds 			queue_log_writer(sb);
29791da177e4SLinus Torvalds 		}
29801da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
29811da177e4SLinus Torvalds 			break;
29821da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
29831da177e4SLinus Torvalds 			break;
29841da177e4SLinus Torvalds 		bcount = journal->j_bcount;
29851da177e4SLinus Torvalds 	}
29861da177e4SLinus Torvalds }
29871da177e4SLinus Torvalds 
29881da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
29891da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
29901da177e4SLinus Torvalds **
29911da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
29921da177e4SLinus Torvalds ** expect to use in nblocks.
29931da177e4SLinus Torvalds */
2994bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2995a9dd3643SJeff Mahoney 			      struct super_block *sb, unsigned long nblocks,
2996bd4c625cSLinus Torvalds 			      int join)
2997bd4c625cSLinus Torvalds {
29981da177e4SLinus Torvalds 	time_t now = get_seconds();
2999600ed416SJeff Mahoney 	unsigned int old_trans_id;
3000a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
30011da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
30021da177e4SLinus Torvalds 	int sched_count = 0;
30031da177e4SLinus Torvalds 	int retval;
30041da177e4SLinus Torvalds 
3005a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal_begin");
300614a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
30071da177e4SLinus Torvalds 
3008a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.journal_being);
30091da177e4SLinus Torvalds 	/* set here for journal_join */
30101da177e4SLinus Torvalds 	th->t_refcount = 1;
3011a9dd3643SJeff Mahoney 	th->t_super = sb;
30121da177e4SLinus Torvalds 
30131da177e4SLinus Torvalds       relock:
3014a9dd3643SJeff Mahoney 	lock_journal(sb);
30151da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3016a9dd3643SJeff Mahoney 		unlock_journal(sb);
30171da177e4SLinus Torvalds 		retval = journal->j_errno;
30181da177e4SLinus Torvalds 		goto out_fail;
30191da177e4SLinus Torvalds 	}
30201da177e4SLinus Torvalds 	journal->j_bcount++;
30211da177e4SLinus Torvalds 
30221da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3023a9dd3643SJeff Mahoney 		unlock_journal(sb);
30248ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
3025a9dd3643SJeff Mahoney 		reiserfs_wait_on_write_block(sb);
30268ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
3027a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_writers);
30281da177e4SLinus Torvalds 		goto relock;
30291da177e4SLinus Torvalds 	}
30301da177e4SLinus Torvalds 	now = get_seconds();
30311da177e4SLinus Torvalds 
30321da177e4SLinus Torvalds 	/* if there is no room in the journal OR
30331da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
30341da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
30351da177e4SLinus Torvalds 	 */
30361da177e4SLinus Torvalds 
30371da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3038bd4c625cSLinus Torvalds 	    (!join
3039bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3040bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3041bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3042bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3043bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3044bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3045bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
30461da177e4SLinus Torvalds 
30471da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
3048a9dd3643SJeff Mahoney 		unlock_journal(sb);	/* allow others to finish this transaction */
30491da177e4SLinus Torvalds 
30501da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
30511da177e4SLinus Torvalds 		    journal->j_max_batch &&
3052bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3053bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
30541da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
30551da177e4SLinus Torvalds 				sched_count++;
3056a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30571da177e4SLinus Torvalds 				goto relock;
30581da177e4SLinus Torvalds 			}
30591da177e4SLinus Torvalds 		}
30601da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
30611da177e4SLinus Torvalds 		 * wait for someone else to do a commit
30621da177e4SLinus Torvalds 		 */
30631da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
30641da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
30651da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
3066a9dd3643SJeff Mahoney 				queue_log_writer(sb);
30671da177e4SLinus Torvalds 			}
30681da177e4SLinus Torvalds 			goto relock;
30691da177e4SLinus Torvalds 		}
3070a9dd3643SJeff Mahoney 		retval = journal_join(&myth, sb, 1);
30711da177e4SLinus Torvalds 		if (retval)
30721da177e4SLinus Torvalds 			goto out_fail;
30731da177e4SLinus Torvalds 
30741da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
30751da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
3076a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, 0);
30771da177e4SLinus Torvalds 		} else {
3078a9dd3643SJeff Mahoney 			retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
30791da177e4SLinus Torvalds 		}
30801da177e4SLinus Torvalds 
30811da177e4SLinus Torvalds 		if (retval)
30821da177e4SLinus Torvalds 			goto out_fail;
30831da177e4SLinus Torvalds 
3084a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.journal_relock_wcount);
30851da177e4SLinus Torvalds 		goto relock;
30861da177e4SLinus Torvalds 	}
30871da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
30881da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
30891da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
30901da177e4SLinus Torvalds 	}
30911da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
30921da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
30931da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
30941da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
30951da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
3096a9dd3643SJeff Mahoney 	unlock_journal(sb);
30971da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
30981da177e4SLinus Torvalds 	return 0;
30991da177e4SLinus Torvalds 
31001da177e4SLinus Torvalds       out_fail:
31011da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
31021da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
31031da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
31041da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
3105a9dd3643SJeff Mahoney 	th->t_super = sb;
31061da177e4SLinus Torvalds 	return retval;
31071da177e4SLinus Torvalds }
31081da177e4SLinus Torvalds 
3109bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3110bd4c625cSLinus Torvalds 								    super_block
3111bd4c625cSLinus Torvalds 								    *s,
3112bd4c625cSLinus Torvalds 								    int nblocks)
3113bd4c625cSLinus Torvalds {
31141da177e4SLinus Torvalds 	int ret;
31151da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
31161da177e4SLinus Torvalds 
31171da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
31181da177e4SLinus Torvalds 	 ** persistent on its own
31191da177e4SLinus Torvalds 	 */
31201da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
31211da177e4SLinus Torvalds 		th = current->journal_info;
31221da177e4SLinus Torvalds 		th->t_refcount++;
312314a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
312414a61442SEric Sesterhenn 
31251da177e4SLinus Torvalds 		return th;
31261da177e4SLinus Torvalds 	}
3127d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
31281da177e4SLinus Torvalds 	if (!th)
31291da177e4SLinus Torvalds 		return NULL;
31301da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
31311da177e4SLinus Torvalds 	if (ret) {
3132d739b42bSPekka Enberg 		kfree(th);
31331da177e4SLinus Torvalds 		return NULL;
31341da177e4SLinus Torvalds 	}
31351da177e4SLinus Torvalds 
31361da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
31371da177e4SLinus Torvalds 	return th;
31381da177e4SLinus Torvalds }
31391da177e4SLinus Torvalds 
3140bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3141bd4c625cSLinus Torvalds {
31421da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
31431da177e4SLinus Torvalds 	int ret = 0;
31441da177e4SLinus Torvalds 	if (th->t_trans_id)
31451da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
31461da177e4SLinus Torvalds 	else
31471da177e4SLinus Torvalds 		ret = -EIO;
31481da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
31491da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3150d739b42bSPekka Enberg 		kfree(th);
31511da177e4SLinus Torvalds 	}
31521da177e4SLinus Torvalds 	return ret;
31531da177e4SLinus Torvalds }
31541da177e4SLinus Torvalds 
3155bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3156a9dd3643SJeff Mahoney 			struct super_block *sb, unsigned long nblocks)
3157bd4c625cSLinus Torvalds {
31581da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31591da177e4SLinus Torvalds 
31601da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31611da177e4SLinus Torvalds 	 ** pointer
31621da177e4SLinus Torvalds 	 */
31631da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
316414a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3165a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
31661da177e4SLinus Torvalds }
31671da177e4SLinus Torvalds 
3168bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3169a9dd3643SJeff Mahoney 		       struct super_block *sb, unsigned long nblocks)
3170bd4c625cSLinus Torvalds {
31711da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31721da177e4SLinus Torvalds 
31731da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31741da177e4SLinus Torvalds 	 ** pointer
31751da177e4SLinus Torvalds 	 */
31761da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
317714a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
3178a9dd3643SJeff Mahoney 	return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
31791da177e4SLinus Torvalds }
31801da177e4SLinus Torvalds 
3181bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3182a9dd3643SJeff Mahoney 		  struct super_block *sb, unsigned long nblocks)
3183bd4c625cSLinus Torvalds {
31841da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31851da177e4SLinus Torvalds 	int ret;
31861da177e4SLinus Torvalds 
31871da177e4SLinus Torvalds 	th->t_handle_save = NULL;
31881da177e4SLinus Torvalds 	if (cur_th) {
31891da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
3190a9dd3643SJeff Mahoney 		if (cur_th->t_super == sb) {
31911da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
31921da177e4SLinus Torvalds 			cur_th->t_refcount++;
31931da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
31941da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3195a9dd3643SJeff Mahoney 				reiserfs_warning(sb, "reiserfs-2005",
319645b03d5eSJeff Mahoney 						 "BAD: refcount <= 1, but "
319745b03d5eSJeff Mahoney 						 "journal_info != 0");
31981da177e4SLinus Torvalds 			return 0;
31991da177e4SLinus Torvalds 		} else {
32001da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
32011da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
32021da177e4SLinus Torvalds 			 ** really happen...
32031da177e4SLinus Torvalds 			 */
3204a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "clm-2100",
320545b03d5eSJeff Mahoney 					 "nesting info a different FS");
32061da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
32071da177e4SLinus Torvalds 			current->journal_info = th;
32081da177e4SLinus Torvalds 		}
32091da177e4SLinus Torvalds 	} else {
32101da177e4SLinus Torvalds 		current->journal_info = th;
32111da177e4SLinus Torvalds 	}
3212a9dd3643SJeff Mahoney 	ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
321314a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
32141da177e4SLinus Torvalds 
32151da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
32161da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
32171da177e4SLinus Torvalds 	 * won't be called to do it. */
32181da177e4SLinus Torvalds 	if (ret)
32191da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
32201da177e4SLinus Torvalds 	else
32211da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
32221da177e4SLinus Torvalds 
32231da177e4SLinus Torvalds 	return ret;
32241da177e4SLinus Torvalds }
32251da177e4SLinus Torvalds 
32261da177e4SLinus Torvalds /*
32271da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
32281da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
32291da177e4SLinus Torvalds **
32301da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
32311da177e4SLinus Torvalds ** transaction is committed.
32321da177e4SLinus Torvalds **
32331da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
32341da177e4SLinus Torvalds */
3235bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3236a9dd3643SJeff Mahoney 		       struct super_block *sb, struct buffer_head *bh)
3237bd4c625cSLinus Torvalds {
3238a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
32391da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
32401da177e4SLinus Torvalds 	int count_already_incd = 0;
32411da177e4SLinus Torvalds 	int prepared = 0;
32421da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
32431da177e4SLinus Torvalds 
3244a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.mark_dirty);
32451da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3246c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3247c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
32481da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
32491da177e4SLinus Torvalds 	}
32501da177e4SLinus Torvalds 
3251a9dd3643SJeff Mahoney 	sb->s_dirt = 1;
32521da177e4SLinus Torvalds 
32531da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
32541da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
32551da177e4SLinus Torvalds 	/* already in this transaction, we are done */
32561da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
3257a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_already);
32581da177e4SLinus Torvalds 		return 0;
32591da177e4SLinus Torvalds 	}
32601da177e4SLinus Torvalds 
32611da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
32621da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
32631da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
32641da177e4SLinus Torvalds 	 */
32651da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
3266a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1777",
326745b03d5eSJeff Mahoney 				 "buffer %llu bad state "
32681da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3269bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3270bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
32711da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
32721da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
32731da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
32741da177e4SLinus Torvalds 	}
32751da177e4SLinus Torvalds 
32761da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3277a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "journal-1409",
327845b03d5eSJeff Mahoney 				 "returning because j_wcount was %d",
3279bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
32801da177e4SLinus Torvalds 		return 1;
32811da177e4SLinus Torvalds 	}
32821da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
32831da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
32841da177e4SLinus Torvalds 	 */
32851da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3286c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1413",
3287c3a9c210SJeff Mahoney 			       "j_len (%lu) is too big",
3288bd4c625cSLinus Torvalds 			       journal->j_len);
32891da177e4SLinus Torvalds 	}
32901da177e4SLinus Torvalds 
32911da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
32921da177e4SLinus Torvalds 		count_already_incd = 1;
3293a9dd3643SJeff Mahoney 		PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
32941da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
32951da177e4SLinus Torvalds 	}
32961da177e4SLinus Torvalds 
32971da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
32981da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
32991da177e4SLinus Torvalds 	}
33001da177e4SLinus Torvalds 
33011da177e4SLinus Torvalds 	set_buffer_journaled(bh);
33021da177e4SLinus Torvalds 
33031da177e4SLinus Torvalds 	/* now put this guy on the end */
33041da177e4SLinus Torvalds 	if (!cn) {
3305a9dd3643SJeff Mahoney 		cn = get_cnode(sb);
33061da177e4SLinus Torvalds 		if (!cn) {
3307a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-4", "get_cnode failed!");
33081da177e4SLinus Torvalds 		}
33091da177e4SLinus Torvalds 
33101da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
33111da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
33121da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
33131da177e4SLinus Torvalds 		}
33141da177e4SLinus Torvalds 		th->t_blocks_logged++;
33151da177e4SLinus Torvalds 		journal->j_len++;
33161da177e4SLinus Torvalds 
33171da177e4SLinus Torvalds 		cn->bh = bh;
33181da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
3319a9dd3643SJeff Mahoney 		cn->sb = sb;
33201da177e4SLinus Torvalds 		cn->jlist = NULL;
33211da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
33221da177e4SLinus Torvalds 		if (!count_already_incd) {
33231da177e4SLinus Torvalds 			get_bh(bh);
33241da177e4SLinus Torvalds 		}
33251da177e4SLinus Torvalds 	}
33261da177e4SLinus Torvalds 	cn->next = NULL;
33271da177e4SLinus Torvalds 	cn->prev = journal->j_last;
33281da177e4SLinus Torvalds 	cn->bh = bh;
33291da177e4SLinus Torvalds 	if (journal->j_last) {
33301da177e4SLinus Torvalds 		journal->j_last->next = cn;
33311da177e4SLinus Torvalds 		journal->j_last = cn;
33321da177e4SLinus Torvalds 	} else {
33331da177e4SLinus Torvalds 		journal->j_first = cn;
33341da177e4SLinus Torvalds 		journal->j_last = cn;
33351da177e4SLinus Torvalds 	}
33361da177e4SLinus Torvalds 	return 0;
33371da177e4SLinus Torvalds }
33381da177e4SLinus Torvalds 
3339bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3340a9dd3643SJeff Mahoney 		struct super_block *sb, unsigned long nblocks)
3341bd4c625cSLinus Torvalds {
33421da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
3343a9dd3643SJeff Mahoney 		reiserfs_warning(sb, "REISER-NESTING",
334445b03d5eSJeff Mahoney 				 "th NULL, refcount %d", th->t_refcount);
33451da177e4SLinus Torvalds 
33461da177e4SLinus Torvalds 	if (!th->t_trans_id) {
33471da177e4SLinus Torvalds 		WARN_ON(1);
33481da177e4SLinus Torvalds 		return -EIO;
33491da177e4SLinus Torvalds 	}
33501da177e4SLinus Torvalds 
33511da177e4SLinus Torvalds 	th->t_refcount--;
33521da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3353bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3354bd4c625cSLinus Torvalds 		    current->journal_info;
33551da177e4SLinus Torvalds 
33561da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
33571da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
33581da177e4SLinus Torvalds 		 */
335914a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
33601da177e4SLinus Torvalds 
33611da177e4SLinus Torvalds 		if (th != cur_th) {
33621da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
33631da177e4SLinus Torvalds 			th->t_trans_id = 0;
33641da177e4SLinus Torvalds 		}
33651da177e4SLinus Torvalds 		return 0;
33661da177e4SLinus Torvalds 	} else {
3367a9dd3643SJeff Mahoney 		return do_journal_end(th, sb, nblocks, 0);
33681da177e4SLinus Torvalds 	}
33691da177e4SLinus Torvalds }
33701da177e4SLinus Torvalds 
33711da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
33721da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
33731da177e4SLinus Torvalds **
33741da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
33751da177e4SLinus Torvalds **
33761da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
33771da177e4SLinus Torvalds */
3378a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb,
3379bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3380bd4c625cSLinus Torvalds {
33811da177e4SLinus Torvalds 	struct buffer_head *bh;
33821da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3383a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
33841da177e4SLinus Torvalds 	int ret = 0;
33851da177e4SLinus Torvalds 
3386a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
33871da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
33881da177e4SLinus Torvalds 		return ret;
33891da177e4SLinus Torvalds 	}
33901da177e4SLinus Torvalds 	bh = cn->bh;
33911da177e4SLinus Torvalds 	if (cn->prev) {
33921da177e4SLinus Torvalds 		cn->prev->next = cn->next;
33931da177e4SLinus Torvalds 	}
33941da177e4SLinus Torvalds 	if (cn->next) {
33951da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
33961da177e4SLinus Torvalds 	}
33971da177e4SLinus Torvalds 	if (cn == journal->j_first) {
33981da177e4SLinus Torvalds 		journal->j_first = cn->next;
33991da177e4SLinus Torvalds 	}
34001da177e4SLinus Torvalds 	if (cn == journal->j_last) {
34011da177e4SLinus Torvalds 		journal->j_last = cn->prev;
34021da177e4SLinus Torvalds 	}
34031da177e4SLinus Torvalds 	if (bh)
3404a9dd3643SJeff Mahoney 		remove_journal_hash(sb, journal->j_hash_table, NULL,
3405bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
34061da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
34071da177e4SLinus Torvalds 
34081da177e4SLinus Torvalds 	if (!already_cleaned) {
34091da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
34101da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
34111da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
34121da177e4SLinus Torvalds 		put_bh(bh);
34131da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3414a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-1752",
341545b03d5eSJeff Mahoney 					 "b_count < 0");
34161da177e4SLinus Torvalds 		}
34171da177e4SLinus Torvalds 		ret = 1;
34181da177e4SLinus Torvalds 	}
34191da177e4SLinus Torvalds 	journal->j_len--;
34201da177e4SLinus Torvalds 	journal->j_len_alloc--;
3421a9dd3643SJeff Mahoney 	free_cnode(sb, cn);
34221da177e4SLinus Torvalds 	return ret;
34231da177e4SLinus Torvalds }
34241da177e4SLinus Torvalds 
34251da177e4SLinus Torvalds /*
34261da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
34270779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
34281da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
34291da177e4SLinus Torvalds ** and 0 if you aren't
34301da177e4SLinus Torvalds **
34311da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
34321da177e4SLinus Torvalds ** blocks for a given transaction on disk
34331da177e4SLinus Torvalds **
34341da177e4SLinus Torvalds */
3435bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3436bd4c625cSLinus Torvalds {
34371da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
34381da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
34391da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
34401da177e4SLinus Torvalds 	int can_dirty = 1;
34411da177e4SLinus Torvalds 
34421da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
34431da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
34441da177e4SLinus Torvalds 	 ** to disk right now.
34451da177e4SLinus Torvalds 	 */
34461da177e4SLinus Torvalds 	while (cur && can_dirty) {
34471da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
34481da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
34491da177e4SLinus Torvalds 			can_dirty = 0;
34501da177e4SLinus Torvalds 		}
34511da177e4SLinus Torvalds 		cur = cur->hprev;
34521da177e4SLinus Torvalds 	}
34531da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
34541da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
34551da177e4SLinus Torvalds 	 */
34561da177e4SLinus Torvalds 	cur = cn->hnext;
34571da177e4SLinus Torvalds 	while (cur && can_dirty) {
34581da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
34591da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
34601da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
34611da177e4SLinus Torvalds 			can_dirty = 0;
34621da177e4SLinus Torvalds 		}
34631da177e4SLinus Torvalds 		cur = cur->hnext;
34641da177e4SLinus Torvalds 	}
34651da177e4SLinus Torvalds 	return can_dirty;
34661da177e4SLinus Torvalds }
34671da177e4SLinus Torvalds 
34681da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
34690779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
34701da177e4SLinus Torvalds */
3471bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3472a9dd3643SJeff Mahoney 		     struct super_block *sb, unsigned long nblocks)
3473bd4c625cSLinus Torvalds {
3474a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
34751da177e4SLinus Torvalds 
34761da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
34771da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
347814a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
34791da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3480a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3481bd4c625cSLinus Torvalds 					     1);
3482a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
34831da177e4SLinus Torvalds 	}
3484a9dd3643SJeff Mahoney 	return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
34851da177e4SLinus Torvalds }
34861da177e4SLinus Torvalds 
34871da177e4SLinus Torvalds /*
34881da177e4SLinus Torvalds ** writeback the pending async commits to disk
34891da177e4SLinus Torvalds */
3490c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3491bd4c625cSLinus Torvalds {
3492c4028958SDavid Howells 	struct reiserfs_journal *journal =
3493c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3494a9dd3643SJeff Mahoney 	struct super_block *sb = journal->j_work_sb;
34951da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
34961da177e4SLinus Torvalds 	struct list_head *entry;
34971da177e4SLinus Torvalds 
34988ebc4232SFrederic Weisbecker 	reiserfs_write_lock(sb);
34991da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
35001da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
35011da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
35021da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
3503a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
35041da177e4SLinus Torvalds 	}
35058ebc4232SFrederic Weisbecker 	reiserfs_write_unlock(sb);
35061da177e4SLinus Torvalds }
35071da177e4SLinus Torvalds 
35081da177e4SLinus Torvalds /*
35091da177e4SLinus Torvalds ** flushes any old transactions to disk
35101da177e4SLinus Torvalds ** ends the current transaction if it is too old
35111da177e4SLinus Torvalds */
3512a9dd3643SJeff Mahoney int reiserfs_flush_old_commits(struct super_block *sb)
3513bd4c625cSLinus Torvalds {
35141da177e4SLinus Torvalds 	time_t now;
35151da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
3516a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35171da177e4SLinus Torvalds 
35181da177e4SLinus Torvalds 	now = get_seconds();
35191da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
35201da177e4SLinus Torvalds 	 * mount
35211da177e4SLinus Torvalds 	 */
35221da177e4SLinus Torvalds 	if (list_empty(&journal->j_journal_list)) {
35231da177e4SLinus Torvalds 		return 0;
35241da177e4SLinus Torvalds 	}
35251da177e4SLinus Torvalds 
35261da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
35271da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
35281da177e4SLinus Torvalds 	 */
35291da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
35301da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
35311da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3532bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3533a9dd3643SJeff Mahoney 		if (!journal_join(&th, sb, 1)) {
3534a9dd3643SJeff Mahoney 			reiserfs_prepare_for_journal(sb,
3535a9dd3643SJeff Mahoney 						     SB_BUFFER_WITH_SB(sb),
3536bd4c625cSLinus Torvalds 						     1);
3537a9dd3643SJeff Mahoney 			journal_mark_dirty(&th, sb,
3538a9dd3643SJeff Mahoney 					   SB_BUFFER_WITH_SB(sb));
35391da177e4SLinus Torvalds 
35401da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
35411da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
35421da177e4SLinus Torvalds 			 */
3543a9dd3643SJeff Mahoney 			do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
35441da177e4SLinus Torvalds 		}
35451da177e4SLinus Torvalds 	}
3546a9dd3643SJeff Mahoney 	return sb->s_dirt;
35471da177e4SLinus Torvalds }
35481da177e4SLinus Torvalds 
35491da177e4SLinus Torvalds /*
35501da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
35511da177e4SLinus Torvalds **
35521da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
35531da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
35541da177e4SLinus Torvalds ** flushes the commit list and returns 0.
35551da177e4SLinus Torvalds **
35561da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
35571da177e4SLinus Torvalds **
35581da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
35591da177e4SLinus Torvalds */
3560bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3561a9dd3643SJeff Mahoney 			     struct super_block *sb, unsigned long nblocks,
3562bd4c625cSLinus Torvalds 			     int flags)
3563bd4c625cSLinus Torvalds {
35641da177e4SLinus Torvalds 
35651da177e4SLinus Torvalds 	time_t now;
35661da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
35671da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
35681da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
35691da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
3570a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
35711da177e4SLinus Torvalds 
35721da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35731da177e4SLinus Torvalds 
35741da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3575c3a9c210SJeff Mahoney 		reiserfs_panic(th->t_super, "journal-1577",
3576c3a9c210SJeff Mahoney 			       "handle trans id %ld != current trans id %ld",
35771da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
35781da177e4SLinus Torvalds 	}
35791da177e4SLinus Torvalds 
35801da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
35811da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
35821da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
35831da177e4SLinus Torvalds 	}
35841da177e4SLinus Torvalds 
35851da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
35861da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
35871da177e4SLinus Torvalds 	 ** care of in this trans
35881da177e4SLinus Torvalds 	 */
358914a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
359014a61442SEric Sesterhenn 
35911da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
35921da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
35931da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
35941da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
35951da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
35961da177e4SLinus Torvalds 	 */
35971da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
35981da177e4SLinus Torvalds 		if (flush || commit_now) {
35991da177e4SLinus Torvalds 			unsigned trans_id;
36001da177e4SLinus Torvalds 
36011da177e4SLinus Torvalds 			jl = journal->j_current_jl;
36021da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
36031da177e4SLinus Torvalds 			if (wait_on_commit)
36041da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
36051da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
36061da177e4SLinus Torvalds 			if (flush) {
36071da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
36081da177e4SLinus Torvalds 			}
3609a9dd3643SJeff Mahoney 			unlock_journal(sb);
36101da177e4SLinus Torvalds 
36111da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
36121da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
36131da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
3614a9dd3643SJeff Mahoney 					queue_log_writer(sb);
36151da177e4SLinus Torvalds 				} else {
3616a9dd3643SJeff Mahoney 					lock_journal(sb);
36171da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3618bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3619bd4c625cSLinus Torvalds 							   1);
36201da177e4SLinus Torvalds 					}
3621a9dd3643SJeff Mahoney 					unlock_journal(sb);
36221da177e4SLinus Torvalds 				}
36231da177e4SLinus Torvalds 			}
362414a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
362514a61442SEric Sesterhenn 
3626bd4c625cSLinus Torvalds 			if (commit_now
3627a9dd3643SJeff Mahoney 			    && journal_list_still_alive(sb, trans_id)
3628bd4c625cSLinus Torvalds 			    && wait_on_commit) {
3629a9dd3643SJeff Mahoney 				flush_commit_list(sb, jl, 1);
36301da177e4SLinus Torvalds 			}
36311da177e4SLinus Torvalds 			return 0;
36321da177e4SLinus Torvalds 		}
3633a9dd3643SJeff Mahoney 		unlock_journal(sb);
36341da177e4SLinus Torvalds 		return 0;
36351da177e4SLinus Torvalds 	}
36361da177e4SLinus Torvalds 
36371da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
36381da177e4SLinus Torvalds 	now = get_seconds();
36391da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
36401da177e4SLinus Torvalds 		commit_now = 1;
36411da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
36421da177e4SLinus Torvalds 	}
36431da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
36441da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3645bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3646bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3647bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3648bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
36491da177e4SLinus Torvalds 		journal->j_bcount++;
3650a9dd3643SJeff Mahoney 		unlock_journal(sb);
36511da177e4SLinus Torvalds 		return 0;
36521da177e4SLinus Torvalds 	}
36531da177e4SLinus Torvalds 
3654a9dd3643SJeff Mahoney 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3655a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-003",
3656c3a9c210SJeff Mahoney 			       "j_start (%ld) is too high",
3657bd4c625cSLinus Torvalds 			       journal->j_start);
36581da177e4SLinus Torvalds 	}
36591da177e4SLinus Torvalds 	return 1;
36601da177e4SLinus Torvalds }
36611da177e4SLinus Torvalds 
36621da177e4SLinus Torvalds /*
36631da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
36641da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
36651da177e4SLinus Torvalds **
36661da177e4SLinus Torvalds ** otherwise:
36671da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
36681da177e4SLinus Torvalds ** before this transaction has finished.
36691da177e4SLinus Torvalds **
36701da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
36711da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
36721da177e4SLinus Torvalds ** the block can't be reallocated yet.
36731da177e4SLinus Torvalds **
36741da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
36751da177e4SLinus Torvalds */
3676bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3677a9dd3643SJeff Mahoney 		       struct super_block *sb, b_blocknr_t blocknr)
3678bd4c625cSLinus Torvalds {
3679a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
36801da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
36811da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
36821da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
36831da177e4SLinus Torvalds 	int cleaned = 0;
36841da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
36851da177e4SLinus Torvalds 
3686a9dd3643SJeff Mahoney 	cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
36871da177e4SLinus Torvalds 	if (cn && cn->bh) {
36881da177e4SLinus Torvalds 		bh = cn->bh;
36891da177e4SLinus Torvalds 		get_bh(bh);
36901da177e4SLinus Torvalds 	}
36911da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
36921da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
36931da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
36941da177e4SLinus Torvalds 		clear_prepared_bits(bh);
36951da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
3696a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
36971da177e4SLinus Torvalds 	} else {
36981da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
36991da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
37001da177e4SLinus Torvalds 		if (!jb) {
3701a9dd3643SJeff Mahoney 			reiserfs_panic(sb, "journal-1702",
3702c3a9c210SJeff Mahoney 				       "journal_list_bitmap is NULL");
37031da177e4SLinus Torvalds 		}
3704a9dd3643SJeff Mahoney 		set_bit_in_list_bitmap(sb, blocknr, jb);
37051da177e4SLinus Torvalds 
37061da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
37071da177e4SLinus Torvalds 
37081da177e4SLinus Torvalds 		if (bh) {
37091da177e4SLinus Torvalds 			clear_prepared_bits(bh);
37101da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
37111da177e4SLinus Torvalds 		}
3712a9dd3643SJeff Mahoney 		cleaned = remove_from_transaction(sb, blocknr, cleaned);
37131da177e4SLinus Torvalds 
37141da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3715a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3716bd4c625cSLinus Torvalds 					  blocknr);
37171da177e4SLinus Torvalds 		while (cn) {
3718a9dd3643SJeff Mahoney 			if (sb == cn->sb && blocknr == cn->blocknr) {
37191da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
37201da177e4SLinus Torvalds 				if (cn->bh) {
37211da177e4SLinus Torvalds 					if (!cleaned) {
37221da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
37231da177e4SLinus Torvalds 						 ** in the current trans
37241da177e4SLinus Torvalds 						 */
3725bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3726bd4c625cSLinus Torvalds 									   bh);
37271da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3728bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3729bd4c625cSLinus Torvalds 									  bh);
37301da177e4SLinus Torvalds 						cleaned = 1;
37311da177e4SLinus Torvalds 						put_bh(cn->bh);
3732bd4c625cSLinus Torvalds 						if (atomic_read
3733bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3734a9dd3643SJeff Mahoney 							reiserfs_warning(sb,
373545b03d5eSJeff Mahoney 								 "journal-2138",
373645b03d5eSJeff Mahoney 								 "cn->bh->b_count < 0");
37371da177e4SLinus Torvalds 						}
37381da177e4SLinus Torvalds 					}
37391da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3740bd4c625cSLinus Torvalds 						atomic_dec(&
3741bd4c625cSLinus Torvalds 							   (cn->jlist->
3742bd4c625cSLinus Torvalds 							    j_nonzerolen));
37431da177e4SLinus Torvalds 					}
37441da177e4SLinus Torvalds 					cn->bh = NULL;
37451da177e4SLinus Torvalds 				}
37461da177e4SLinus Torvalds 			}
37471da177e4SLinus Torvalds 			cn = cn->hnext;
37481da177e4SLinus Torvalds 		}
37491da177e4SLinus Torvalds 	}
37501da177e4SLinus Torvalds 
3751398c95bdSChris Mason 	if (bh)
3752398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
37531da177e4SLinus Torvalds 	return 0;
37541da177e4SLinus Torvalds }
37551da177e4SLinus Torvalds 
3756bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3757bd4c625cSLinus Torvalds {
37581da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
37591da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
37601da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
37611da177e4SLinus Torvalds }
37621da177e4SLinus Torvalds 
37631da177e4SLinus Torvalds /*
37641da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
37651da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
37661da177e4SLinus Torvalds  */
37671da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
37681da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
37691da177e4SLinus Torvalds {
37701da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
37711da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
37721da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37731da177e4SLinus Torvalds 	int ret = 0;
37741da177e4SLinus Torvalds 
37751da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
37761da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
37771da177e4SLinus Torvalds 		jl = journal->j_current_jl;
37781da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
37791da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
37801da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
37811da177e4SLinus Torvalds 			goto flush_commit_only;
37821da177e4SLinus Torvalds 		}
37831da177e4SLinus Torvalds 
37841da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
37851da177e4SLinus Torvalds 		if (ret)
37861da177e4SLinus Torvalds 			return ret;
37871da177e4SLinus Torvalds 
37881da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
37891da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3790bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3791bd4c625cSLinus Torvalds 						     1);
37921da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
37931da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
37941da177e4SLinus Torvalds 			goto flush_commit_only;
37951da177e4SLinus Torvalds 		}
37961da177e4SLinus Torvalds 
37971da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
37981da177e4SLinus Torvalds 		if (!ret)
37991da177e4SLinus Torvalds 			ret = 1;
38001da177e4SLinus Torvalds 
38011da177e4SLinus Torvalds 	} else {
38021da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
38031da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
38041da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
38051da177e4SLinus Torvalds 		 */
38061da177e4SLinus Torvalds 	      flush_commit_only:
38071da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
38081da177e4SLinus Torvalds 			/*
38091da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
38101da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
38111da177e4SLinus Torvalds 			 * block.
38121da177e4SLinus Torvalds 			 */
38131da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
38141da177e4SLinus Torvalds 				ret = 1;
38151da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
38161da177e4SLinus Torvalds 			if (journal->j_errno)
38171da177e4SLinus Torvalds 				ret = journal->j_errno;
38181da177e4SLinus Torvalds 		}
38191da177e4SLinus Torvalds 	}
38201da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
38211da177e4SLinus Torvalds 	return ret;
38221da177e4SLinus Torvalds }
38231da177e4SLinus Torvalds 
3824bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3825bd4c625cSLinus Torvalds {
3826600ed416SJeff Mahoney 	unsigned int id = REISERFS_I(inode)->i_trans_id;
38271da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
38281da177e4SLinus Torvalds 
38291da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
38301da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
38311da177e4SLinus Torvalds 	 */
38321da177e4SLinus Torvalds 	if (!id || !jl) {
38331da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
38341da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
38351da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
38361da177e4SLinus Torvalds 	}
38371da177e4SLinus Torvalds 
38381da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
38391da177e4SLinus Torvalds }
38401da177e4SLinus Torvalds 
3841a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb,
3842bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3843bd4c625cSLinus Torvalds {
3844a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
3845a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.restore_prepared);
38461da177e4SLinus Torvalds 	if (!bh) {
38471da177e4SLinus Torvalds 		return;
38481da177e4SLinus Torvalds 	}
38491da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
38501da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
38511da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
3852a9dd3643SJeff Mahoney 		cn = get_journal_hash_dev(sb,
38531da177e4SLinus Torvalds 					  journal->j_list_hash_table,
38541da177e4SLinus Torvalds 					  bh->b_blocknr);
38551da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
38561da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
38571da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
38581da177e4SLinus Torvalds 		}
38591da177e4SLinus Torvalds 	}
38601da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
38611da177e4SLinus Torvalds }
38621da177e4SLinus Torvalds 
38631da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
38641da177e4SLinus Torvalds /*
38651da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
38661da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
38671da177e4SLinus Torvalds ** clean it
38681da177e4SLinus Torvalds ** wait on it.
38691da177e4SLinus Torvalds **
38701da177e4SLinus Torvalds */
3871a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb,
3872bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3873bd4c625cSLinus Torvalds {
3874a9dd3643SJeff Mahoney 	PROC_INFO_INC(sb, journal.prepare);
38751da177e4SLinus Torvalds 
3876ca5de404SNick Piggin 	if (!trylock_buffer(bh)) {
38771da177e4SLinus Torvalds 		if (!wait)
38781da177e4SLinus Torvalds 			return 0;
38791da177e4SLinus Torvalds 		lock_buffer(bh);
38801da177e4SLinus Torvalds 	}
38811da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
38821da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
38831da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
38841da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
38851da177e4SLinus Torvalds 	}
38861da177e4SLinus Torvalds 	unlock_buffer(bh);
38871da177e4SLinus Torvalds 	return 1;
38881da177e4SLinus Torvalds }
38891da177e4SLinus Torvalds 
3890bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3891bd4c625cSLinus Torvalds {
38921da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
38931da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
38941da177e4SLinus Torvalds 	struct list_head *entry;
38951da177e4SLinus Torvalds 	time_t now = get_seconds();
38961da177e4SLinus Torvalds 
38971da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
38981da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
38991da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
39001da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3901a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3902a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3903a3172027SChris Mason 		    test_transaction(s, jl)) {
39041da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
39051da177e4SLinus Torvalds 		} else {
39061da177e4SLinus Torvalds 			break;
39071da177e4SLinus Torvalds 		}
39081da177e4SLinus Torvalds 	}
39091da177e4SLinus Torvalds }
39101da177e4SLinus Torvalds 
39111da177e4SLinus Torvalds /*
39121da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
39131da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
39141da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
39151da177e4SLinus Torvalds **
39161da177e4SLinus Torvalds ** keep reading, there are comments as you go along
39171da177e4SLinus Torvalds **
39181da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
39191da177e4SLinus Torvalds ** journal lists, etc just won't happen.
39201da177e4SLinus Torvalds */
3921bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
3922a9dd3643SJeff Mahoney 			  struct super_block *sb, unsigned long nblocks,
3923bd4c625cSLinus Torvalds 			  int flags)
3924bd4c625cSLinus Torvalds {
3925a9dd3643SJeff Mahoney 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
39261da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
39271da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
39281da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
39291da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
39301da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
39311da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
39321da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
39331da177e4SLinus Torvalds 	int old_start;
39341da177e4SLinus Torvalds 	int i;
3935a44c94a7SAlexander Zarochentsev 	int flush;
3936a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
39371da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
39381da177e4SLinus Torvalds 	struct list_head *entry, *safe;
39391da177e4SLinus Torvalds 	unsigned long jindex;
3940600ed416SJeff Mahoney 	unsigned int commit_trans_id;
39411da177e4SLinus Torvalds 	int trans_half;
39421da177e4SLinus Torvalds 
39431da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
39441da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
39451da177e4SLinus Torvalds 
3946a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
3947a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
3948600ed416SJeff Mahoney 	if (th->t_trans_id == ~0U)
3949a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3950a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
3951a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
3952a44c94a7SAlexander Zarochentsev 
39531da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
3954a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end");
39551da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3956a9dd3643SJeff Mahoney 		reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3957bd4c625cSLinus Torvalds 					     1);
3958a9dd3643SJeff Mahoney 		journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
39591da177e4SLinus Torvalds 	}
39601da177e4SLinus Torvalds 
3961a9dd3643SJeff Mahoney 	lock_journal(sb);
39621da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39631da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
39641da177e4SLinus Torvalds 		flush = 1;
39651da177e4SLinus Torvalds 	}
39661da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
39671da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
39681da177e4SLinus Torvalds 		wait_on_commit = 1;
39691da177e4SLinus Torvalds 	}
39701da177e4SLinus Torvalds 
39711da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
39721da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
39731da177e4SLinus Torvalds 	 */
3974a9dd3643SJeff Mahoney 	if (!check_journal_end(th, sb, nblocks, flags)) {
3975a9dd3643SJeff Mahoney 		sb->s_dirt = 1;
3976a9dd3643SJeff Mahoney 		wake_queued_writers(sb);
3977a9dd3643SJeff Mahoney 		reiserfs_async_progress_wait(sb);
39781da177e4SLinus Torvalds 		goto out;
39791da177e4SLinus Torvalds 	}
39801da177e4SLinus Torvalds 
39811da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
39821da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39831da177e4SLinus Torvalds 		flush = 1;
39841da177e4SLinus Torvalds 	}
39851da177e4SLinus Torvalds 
39861da177e4SLinus Torvalds 	/*
39871da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
39881da177e4SLinus Torvalds 	 ** this transaction
39891da177e4SLinus Torvalds 	 */
39901da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
39911da177e4SLinus Torvalds 		flush = 1;
39921da177e4SLinus Torvalds 	}
39931da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
3994ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
3995ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
39961da177e4SLinus Torvalds 	current->journal_info = th;
3997ef43bc4fSJan Kara 	th->t_refcount++;
39981da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
39991da177e4SLinus Torvalds 						 * the transaction */
4000ef43bc4fSJan Kara 	th->t_refcount--;
40011da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
40021da177e4SLinus Torvalds #endif
40031da177e4SLinus Torvalds 
40041da177e4SLinus Torvalds 	/* setup description block */
4005bd4c625cSLinus Torvalds 	d_bh =
4006a9dd3643SJeff Mahoney 	    journal_getblk(sb,
4007a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4008bd4c625cSLinus Torvalds 			   journal->j_start);
40091da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
40101da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
40111da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
40121da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
40131da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
40141da177e4SLinus Torvalds 
40151da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
4016a9dd3643SJeff Mahoney 	c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4017bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4018a9dd3643SJeff Mahoney 				1) % SB_ONDISK_JOURNAL_SIZE(sb)));
40191da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
40201da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
40211da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
40221da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
40231da177e4SLinus Torvalds 
40241da177e4SLinus Torvalds 	/* init this journal list */
40251da177e4SLinus Torvalds 	jl = journal->j_current_jl;
40261da177e4SLinus Torvalds 
40271da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
40281da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
40291da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
40301da177e4SLinus Torvalds 	 * ordered bh list
40311da177e4SLinus Torvalds 	 */
40328ebc4232SFrederic Weisbecker 	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
40331da177e4SLinus Torvalds 
40341da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
40351da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
40361da177e4SLinus Torvalds 
40371da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
40381da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
40391da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
40401da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
40411da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
40421da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
40431da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
40441da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
40451da177e4SLinus Torvalds 	jl->j_realblock = NULL;
40461da177e4SLinus Torvalds 
40471da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
40481da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
40491da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
40501da177e4SLinus Torvalds 	 */
4051a9dd3643SJeff Mahoney 	trans_half = journal_trans_half(sb->s_blocksize);
40521da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
40531da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
4054a9dd3643SJeff Mahoney 			jl_cn = get_cnode(sb);
40551da177e4SLinus Torvalds 			if (!jl_cn) {
4056a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-1676",
4057c3a9c210SJeff Mahoney 					       "get_cnode returned NULL");
40581da177e4SLinus Torvalds 			}
40591da177e4SLinus Torvalds 			if (i == 0) {
40601da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
40611da177e4SLinus Torvalds 			}
40621da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
40631da177e4SLinus Torvalds 			jl_cn->next = NULL;
40641da177e4SLinus Torvalds 			if (last_cn) {
40651da177e4SLinus Torvalds 				last_cn->next = jl_cn;
40661da177e4SLinus Torvalds 			}
40671da177e4SLinus Torvalds 			last_cn = jl_cn;
40681da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
40691da177e4SLinus Torvalds 			   of journal or reserved area */
40701da177e4SLinus Torvalds 
4071bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4072a9dd3643SJeff Mahoney 			    (sb, cn->bh->b_blocknr)) {
4073a9dd3643SJeff Mahoney 				reiserfs_panic(sb, "journal-2332",
4074c3a9c210SJeff Mahoney 					       "Trying to log block %lu, "
4075c3a9c210SJeff Mahoney 					       "which is a log block",
4076bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
40771da177e4SLinus Torvalds 			}
40781da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
40791da177e4SLinus Torvalds 			jl_cn->state = 0;
4080a9dd3643SJeff Mahoney 			jl_cn->sb = sb;
40811da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
40821da177e4SLinus Torvalds 			jl_cn->jlist = jl;
40831da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
40841da177e4SLinus Torvalds 			if (i < trans_half) {
4085bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4086bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40871da177e4SLinus Torvalds 			} else {
4088bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4089bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40901da177e4SLinus Torvalds 			}
40911da177e4SLinus Torvalds 		} else {
40921da177e4SLinus Torvalds 			i--;
40931da177e4SLinus Torvalds 		}
40941da177e4SLinus Torvalds 	}
40951da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
40961da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
40971da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
40981da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
40991da177e4SLinus Torvalds 
41001da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
410114a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
41021da177e4SLinus Torvalds 
41031da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
41041da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
41051da177e4SLinus Torvalds 	 * others are on disk
41061da177e4SLinus Torvalds 	 */
41071da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
41081da177e4SLinus Torvalds 
41091da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
41101da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
41111da177e4SLinus Torvalds 	cn = journal->j_first;
41121da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
41131da177e4SLinus Torvalds 	while (cn) {
41141da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
41151da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
41161da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
41171da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
41181da177e4SLinus Torvalds 			char *addr;
41191da177e4SLinus Torvalds 			struct page *page;
4120bd4c625cSLinus Torvalds 			tmp_bh =
4121a9dd3643SJeff Mahoney 			    journal_getblk(sb,
4122a9dd3643SJeff Mahoney 					   SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4123bd4c625cSLinus Torvalds 					   ((cur_write_start +
4124bd4c625cSLinus Torvalds 					     jindex) %
4125a9dd3643SJeff Mahoney 					    SB_ONDISK_JOURNAL_SIZE(sb)));
41261da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
41271da177e4SLinus Torvalds 			page = cn->bh->b_page;
41281da177e4SLinus Torvalds 			addr = kmap(page);
4129bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4130bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
41311da177e4SLinus Torvalds 			       cn->bh->b_size);
41321da177e4SLinus Torvalds 			kunmap(page);
41331da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
41341da177e4SLinus Torvalds 			jindex++;
41351da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
41361da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
41371da177e4SLinus Torvalds 		} else {
41381da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4139a9dd3643SJeff Mahoney 			reiserfs_warning(sb, "journal-2048",
414045b03d5eSJeff Mahoney 					 "BAD, buffer in journal hash, "
414145b03d5eSJeff Mahoney 					 "but not JDirty!");
41421da177e4SLinus Torvalds 			brelse(cn->bh);
41431da177e4SLinus Torvalds 		}
41441da177e4SLinus Torvalds 		next = cn->next;
4145a9dd3643SJeff Mahoney 		free_cnode(sb, cn);
41461da177e4SLinus Torvalds 		cn = next;
4147e6950a4dSFrederic Weisbecker 		reiserfs_write_unlock(sb);
41481da177e4SLinus Torvalds 		cond_resched();
4149e6950a4dSFrederic Weisbecker 		reiserfs_write_lock(sb);
41501da177e4SLinus Torvalds 	}
41511da177e4SLinus Torvalds 
41521da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
41531da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
41541da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
41551da177e4SLinus Torvalds 	 */
41561da177e4SLinus Torvalds 
4157a9dd3643SJeff Mahoney 	journal->j_current_jl = alloc_journal_list(sb);
41581da177e4SLinus Torvalds 
41591da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
41601da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
41611da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
41621da177e4SLinus Torvalds 	journal->j_num_work_lists++;
41631da177e4SLinus Torvalds 
41641da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
41651da177e4SLinus Torvalds 	old_start = journal->j_start;
4166bd4c625cSLinus Torvalds 	journal->j_start =
4167bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4168a9dd3643SJeff Mahoney 	     2) % SB_ONDISK_JOURNAL_SIZE(sb);
41691da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
41701da177e4SLinus Torvalds 	journal->j_bcount = 0;
41711da177e4SLinus Torvalds 	journal->j_last = NULL;
41721da177e4SLinus Torvalds 	journal->j_first = NULL;
41731da177e4SLinus Torvalds 	journal->j_len = 0;
41741da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4175a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4176a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4177a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
41781da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
41791da177e4SLinus Torvalds 	journal->j_must_wait = 0;
41801da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
41811da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
41821da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
4183a9dd3643SJeff Mahoney 	init_journal_hash(sb);
41841da177e4SLinus Torvalds 
41851da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
41861da177e4SLinus Torvalds 	// write out the tails
41871da177e4SLinus Torvalds 	smp_mb();
41881da177e4SLinus Torvalds 
41891da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
41901da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
41911da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
41921da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
41931da177e4SLinus Torvalds 	 * is lost.
41941da177e4SLinus Torvalds 	 */
41951da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
41968ebc4232SFrederic Weisbecker 		reiserfs_write_unlock(sb);
41971da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
41981da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
41998ebc4232SFrederic Weisbecker 		reiserfs_write_lock(sb);
42001da177e4SLinus Torvalds 	}
420114a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
420290415deaSJeff Mahoney 	mutex_unlock(&jl->j_commit_mutex);
42031da177e4SLinus Torvalds 
42041da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
42051da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
42061da177e4SLinus Torvalds 	 **
42071da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
42081da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
42091da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
42101da177e4SLinus Torvalds 	 */
42111da177e4SLinus Torvalds 	if (flush) {
4212a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
4213a9dd3643SJeff Mahoney 		flush_journal_list(sb, jl, 1);
42141da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
42151da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
42161da177e4SLinus Torvalds 
42171da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
42181da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
42191da177e4SLinus Torvalds 	 ** old flush them as well.
42201da177e4SLinus Torvalds 	 */
42211da177e4SLinus Torvalds       first_jl:
42221da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
42231da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
42241da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
42251da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4226bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4227a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42281da177e4SLinus Torvalds 				goto first_jl;
42291da177e4SLinus Torvalds 			} else if ((journal->j_start +
42301da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4231a9dd3643SJeff Mahoney 				   SB_ONDISK_JOURNAL_SIZE(sb)) {
42321da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
42331da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
42341da177e4SLinus Torvalds 				 * break now
42351da177e4SLinus Torvalds 				 */
42361da177e4SLinus Torvalds 				break;
42371da177e4SLinus Torvalds 			}
42381da177e4SLinus Torvalds 		} else if ((journal->j_start +
42391da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4240a9dd3643SJeff Mahoney 			   SB_ONDISK_JOURNAL_SIZE(sb)) {
42411da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4242a9dd3643SJeff Mahoney 			     SB_ONDISK_JOURNAL_SIZE(sb)) >=
4243bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
4244a9dd3643SJeff Mahoney 				flush_used_journal_lists(sb, temp_jl);
42451da177e4SLinus Torvalds 				goto first_jl;
42461da177e4SLinus Torvalds 			} else {
42471da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
42481da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
42491da177e4SLinus Torvalds 				 * the start of the log.  We can break
42501da177e4SLinus Torvalds 				 */
42511da177e4SLinus Torvalds 				break;
42521da177e4SLinus Torvalds 			}
42531da177e4SLinus Torvalds 		}
42541da177e4SLinus Torvalds 	}
4255a9dd3643SJeff Mahoney 	flush_old_journal_lists(sb);
42561da177e4SLinus Torvalds 
4257bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4258a9dd3643SJeff Mahoney 	    get_list_bitmap(sb, journal->j_current_jl);
42591da177e4SLinus Torvalds 
42601da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4261a9dd3643SJeff Mahoney 		reiserfs_panic(sb, "journal-1996",
4262c3a9c210SJeff Mahoney 			       "could not get a list bitmap");
42631da177e4SLinus Torvalds 	}
42641da177e4SLinus Torvalds 
42651da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
4266a9dd3643SJeff Mahoney 	unlock_journal(sb);
42671da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
42681da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
42691da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
42701da177e4SLinus Torvalds 
42711da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
4272a9dd3643SJeff Mahoney 	    journal_list_still_alive(sb, commit_trans_id)) {
4273a9dd3643SJeff Mahoney 		flush_commit_list(sb, jl, 1);
42741da177e4SLinus Torvalds 	}
42751da177e4SLinus Torvalds       out:
4276a9dd3643SJeff Mahoney 	reiserfs_check_lock_depth(sb, "journal end2");
42771da177e4SLinus Torvalds 
42781da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
42791da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
42801da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
42811da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
4282a9dd3643SJeff Mahoney 	th->t_super = sb;
42831da177e4SLinus Torvalds 
42841da177e4SLinus Torvalds 	return journal->j_errno;
42851da177e4SLinus Torvalds }
42861da177e4SLinus Torvalds 
428732e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */
428832e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno)
42891da177e4SLinus Torvalds {
42901da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
42911da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
42921da177e4SLinus Torvalds 		return;
42931da177e4SLinus Torvalds 
429432e8b106SJeff Mahoney 	if (!journal->j_errno)
429532e8b106SJeff Mahoney 		journal->j_errno = errno;
42961da177e4SLinus Torvalds 
42971da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
42981da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
42991da177e4SLinus Torvalds 
43001da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
43011da177e4SLinus Torvalds 	dump_stack();
43021da177e4SLinus Torvalds #endif
43031da177e4SLinus Torvalds }
4304