xref: /openbmc/linux/fs/reiserfs/journal.c (revision fbe5498b)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000
31da177e4SLinus Torvalds **
41da177e4SLinus Torvalds ** The background commits make this code very interelated, and
51da177e4SLinus Torvalds ** overly complex.  I need to rethink things a bit....The major players:
61da177e4SLinus Torvalds **
71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log.
81da177e4SLinus Torvalds **                  If the current transaction is too
91da177e4SLinus Torvalds ** 		    old, it will block until the current transaction is
101da177e4SLinus Torvalds ** 		    finished, and then start a new one.
111da177e4SLinus Torvalds **		    Usually, your transaction will get joined in with
121da177e4SLinus Torvalds **                  previous ones for speed.
131da177e4SLinus Torvalds **
141da177e4SLinus Torvalds ** journal_join  -- same as journal_begin, but won't block on the current
151da177e4SLinus Torvalds **                  transaction regardless of age.  Don't ever call
161da177e4SLinus Torvalds **                  this.  Ever.  There are only two places it should be
171da177e4SLinus Torvalds **                  called from, and they are both inside this file.
181da177e4SLinus Torvalds **
191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction.  clears any flags
201da177e4SLinus Torvalds **                       that might make them get sent to disk
211da177e4SLinus Torvalds **                       and then marks them BH_JDirty.  Puts the buffer head
221da177e4SLinus Torvalds **                       into the current transaction hash.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing
251da177e4SLinus Torvalds **                   otherwise, it could do an async/synchronous commit, or
261da177e4SLinus Torvalds **                   a full flush of all log and real blocks in the
271da177e4SLinus Torvalds **                   transaction.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and
301da177e4SLinus Torvalds **                      commit blocks are sent to disk.  Forces commit blocks
311da177e4SLinus Torvalds **                      to disk for all backgrounded commits that have been
321da177e4SLinus Torvalds **                      around too long.
331da177e4SLinus Torvalds **		     -- Note, if you call this as an immediate flush from
341da177e4SLinus Torvalds **		        from within kupdate, it will ignore the immediate flag
351da177e4SLinus Torvalds */
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include <asm/uaccess.h>
381da177e4SLinus Torvalds #include <asm/system.h>
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds #include <linux/time.h>
416188e10dSMatthew Wilcox #include <linux/semaphore.h>
421da177e4SLinus Torvalds 
431da177e4SLinus Torvalds #include <linux/vmalloc.h>
441da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds #include <linux/kernel.h>
471da177e4SLinus Torvalds #include <linux/errno.h>
481da177e4SLinus Torvalds #include <linux/fcntl.h>
491da177e4SLinus Torvalds #include <linux/stat.h>
501da177e4SLinus Torvalds #include <linux/string.h>
511da177e4SLinus Torvalds #include <linux/smp_lock.h>
521da177e4SLinus Torvalds #include <linux/buffer_head.h>
531da177e4SLinus Torvalds #include <linux/workqueue.h>
541da177e4SLinus Torvalds #include <linux/writeback.h>
551da177e4SLinus Torvalds #include <linux/blkdev.h>
563fcfab16SAndrew Morton #include <linux/backing-dev.h>
571da177e4SLinus Torvalds 
581da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */
591da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
601da177e4SLinus Torvalds                                j_list))
611da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
621da177e4SLinus Torvalds                                j_working_list))
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds /* the number of mounted filesystems.  This is used to decide when to
651da177e4SLinus Torvalds ** start and kill the commit workqueue
661da177e4SLinus Torvalds */
671da177e4SLinus Torvalds static int reiserfs_mounted_fs_count;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds static struct workqueue_struct *commit_wq;
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018	/* must be correct to keep the desc and commit
721da177e4SLinus Torvalds 				   structs at 4k */
731da177e4SLinus Torvalds #define BUFNR 64		/*read ahead */
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds /* cnode stat bits.  Move these into reiserfs_fs.h */
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds #define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
781da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3	/* this block was freed during this transaction, and can't be written */
791da177e4SLinus Torvalds 
801da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
811da177e4SLinus Torvalds #define BLOCK_DIRTIED 5
821da177e4SLinus Torvalds 
831da177e4SLinus Torvalds /* journal list state bits */
841da177e4SLinus Torvalds #define LIST_TOUCHED 1
851da177e4SLinus Torvalds #define LIST_DIRTY   2
861da177e4SLinus Torvalds #define LIST_COMMIT_PENDING  4	/* someone will commit this list */
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds /* flags for do_journal_end */
891da177e4SLinus Torvalds #define FLUSH_ALL   1		/* flush commit and real blocks */
901da177e4SLinus Torvalds #define COMMIT_NOW  2		/* end and commit this transaction */
911da177e4SLinus Torvalds #define WAIT        4		/* wait for the log blocks to hit the disk */
921da177e4SLinus Torvalds 
93bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *,
94bd4c625cSLinus Torvalds 			  struct super_block *, unsigned long nblocks,
95bd4c625cSLinus Torvalds 			  int flags);
96bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s,
97bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall);
98bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
99bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall);
1001da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn);
101bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
102bd4c625cSLinus Torvalds 			struct super_block *p_s_sb, unsigned long nblocks);
1031da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
1041da177e4SLinus Torvalds 			       struct reiserfs_journal *journal);
1051da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
1061da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl);
107c4028958SDavid Howells static void flush_async_commits(struct work_struct *work);
1081da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s);
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds /* values for join in do_journal_begin_r */
1111da177e4SLinus Torvalds enum {
1121da177e4SLinus Torvalds 	JBEGIN_REG = 0,		/* regular journal begin */
1131da177e4SLinus Torvalds 	JBEGIN_JOIN = 1,	/* join the running transaction if at all possible */
1141da177e4SLinus Torvalds 	JBEGIN_ABORT = 2,	/* called from cleanup code, ignores aborted flag */
1151da177e4SLinus Torvalds };
1161da177e4SLinus Torvalds 
1171da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
1181da177e4SLinus Torvalds 			      struct super_block *p_s_sb,
1191da177e4SLinus Torvalds 			      unsigned long nblocks, int join);
1201da177e4SLinus Torvalds 
121bd4c625cSLinus Torvalds static void init_journal_hash(struct super_block *p_s_sb)
122bd4c625cSLinus Torvalds {
1231da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
124bd4c625cSLinus Torvalds 	memset(journal->j_hash_table, 0,
125bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
1261da177e4SLinus Torvalds }
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds /*
1291da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
1301da177e4SLinus Torvalds ** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
1311da177e4SLinus Torvalds ** more details.
1321da177e4SLinus Torvalds */
133bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
134bd4c625cSLinus Torvalds {
1351da177e4SLinus Torvalds 	if (bh) {
1361da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
1371da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
1381da177e4SLinus Torvalds 	}
1391da177e4SLinus Torvalds 	return 0;
1401da177e4SLinus Torvalds }
1411da177e4SLinus Torvalds 
1421da177e4SLinus Torvalds static void disable_barrier(struct super_block *s)
1431da177e4SLinus Torvalds {
1441da177e4SLinus Torvalds 	REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
145bd4c625cSLinus Torvalds 	printk("reiserfs: disabling flush barriers on %s\n",
146bd4c625cSLinus Torvalds 	       reiserfs_bdevname(s));
1471da177e4SLinus Torvalds }
1481da177e4SLinus Torvalds 
149bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
150bd4c625cSLinus Torvalds 							 *p_s_sb)
151bd4c625cSLinus Torvalds {
1521da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
1531da177e4SLinus Torvalds 	static int id;
1541da177e4SLinus Torvalds 
155d739b42bSPekka Enberg 	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
1561da177e4SLinus Torvalds 	if (!bn) {
1571da177e4SLinus Torvalds 		return NULL;
1581da177e4SLinus Torvalds 	}
159d739b42bSPekka Enberg 	bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
1601da177e4SLinus Torvalds 	if (!bn->data) {
161d739b42bSPekka Enberg 		kfree(bn);
1621da177e4SLinus Torvalds 		return NULL;
1631da177e4SLinus Torvalds 	}
1641da177e4SLinus Torvalds 	bn->id = id++;
1651da177e4SLinus Torvalds 	INIT_LIST_HEAD(&bn->list);
1661da177e4SLinus Torvalds 	return bn;
1671da177e4SLinus Torvalds }
1681da177e4SLinus Torvalds 
169bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
170bd4c625cSLinus Torvalds {
1711da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1721da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
1731da177e4SLinus Torvalds 	struct list_head *entry = journal->j_bitmap_nodes.next;
1741da177e4SLinus Torvalds 
1751da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes++;
1761da177e4SLinus Torvalds       repeat:
1771da177e4SLinus Torvalds 
1781da177e4SLinus Torvalds 	if (entry != &journal->j_bitmap_nodes) {
1791da177e4SLinus Torvalds 		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
1801da177e4SLinus Torvalds 		list_del(entry);
1811da177e4SLinus Torvalds 		memset(bn->data, 0, p_s_sb->s_blocksize);
1821da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
1831da177e4SLinus Torvalds 		return bn;
1841da177e4SLinus Torvalds 	}
1851da177e4SLinus Torvalds 	bn = allocate_bitmap_node(p_s_sb);
1861da177e4SLinus Torvalds 	if (!bn) {
1871da177e4SLinus Torvalds 		yield();
1881da177e4SLinus Torvalds 		goto repeat;
1891da177e4SLinus Torvalds 	}
1901da177e4SLinus Torvalds 	return bn;
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds static inline void free_bitmap_node(struct super_block *p_s_sb,
193bd4c625cSLinus Torvalds 				    struct reiserfs_bitmap_node *bn)
194bd4c625cSLinus Torvalds {
1951da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1961da177e4SLinus Torvalds 	journal->j_used_bitmap_nodes--;
1971da177e4SLinus Torvalds 	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
198d739b42bSPekka Enberg 		kfree(bn->data);
199d739b42bSPekka Enberg 		kfree(bn);
2001da177e4SLinus Torvalds 	} else {
2011da177e4SLinus Torvalds 		list_add(&bn->list, &journal->j_bitmap_nodes);
2021da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes++;
2031da177e4SLinus Torvalds 	}
2041da177e4SLinus Torvalds }
2051da177e4SLinus Torvalds 
206bd4c625cSLinus Torvalds static void allocate_bitmap_nodes(struct super_block *p_s_sb)
207bd4c625cSLinus Torvalds {
2081da177e4SLinus Torvalds 	int i;
2091da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2101da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn = NULL;
2111da177e4SLinus Torvalds 	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
2121da177e4SLinus Torvalds 		bn = allocate_bitmap_node(p_s_sb);
2131da177e4SLinus Torvalds 		if (bn) {
2141da177e4SLinus Torvalds 			list_add(&bn->list, &journal->j_bitmap_nodes);
2151da177e4SLinus Torvalds 			journal->j_free_bitmap_nodes++;
2161da177e4SLinus Torvalds 		} else {
2171da177e4SLinus Torvalds 			break;	// this is ok, we'll try again when more are needed
2181da177e4SLinus Torvalds 		}
2191da177e4SLinus Torvalds 	}
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2223ee16670SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
2233ee16670SJeff Mahoney 				  b_blocknr_t block,
224bd4c625cSLinus Torvalds 				  struct reiserfs_list_bitmap *jb)
225bd4c625cSLinus Torvalds {
2263ee16670SJeff Mahoney 	unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
2273ee16670SJeff Mahoney 	unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds 	if (!jb->bitmaps[bmap_nr]) {
2301da177e4SLinus Torvalds 		jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
2311da177e4SLinus Torvalds 	}
2321da177e4SLinus Torvalds 	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
2331da177e4SLinus Torvalds 	return 0;
2341da177e4SLinus Torvalds }
2351da177e4SLinus Torvalds 
2361da177e4SLinus Torvalds static void cleanup_bitmap_list(struct super_block *p_s_sb,
237bd4c625cSLinus Torvalds 				struct reiserfs_list_bitmap *jb)
238bd4c625cSLinus Torvalds {
2391da177e4SLinus Torvalds 	int i;
2401da177e4SLinus Torvalds 	if (jb->bitmaps == NULL)
2411da177e4SLinus Torvalds 		return;
2421da177e4SLinus Torvalds 
243cb680c1bSJeff Mahoney 	for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
2441da177e4SLinus Torvalds 		if (jb->bitmaps[i]) {
2451da177e4SLinus Torvalds 			free_bitmap_node(p_s_sb, jb->bitmaps[i]);
2461da177e4SLinus Torvalds 			jb->bitmaps[i] = NULL;
2471da177e4SLinus Torvalds 		}
2481da177e4SLinus Torvalds 	}
2491da177e4SLinus Torvalds }
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds /*
2521da177e4SLinus Torvalds ** only call this on FS unmount.
2531da177e4SLinus Torvalds */
2541da177e4SLinus Torvalds static int free_list_bitmaps(struct super_block *p_s_sb,
255bd4c625cSLinus Torvalds 			     struct reiserfs_list_bitmap *jb_array)
256bd4c625cSLinus Torvalds {
2571da177e4SLinus Torvalds 	int i;
2581da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2591da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
2601da177e4SLinus Torvalds 		jb = jb_array + i;
2611da177e4SLinus Torvalds 		jb->journal_list = NULL;
2621da177e4SLinus Torvalds 		cleanup_bitmap_list(p_s_sb, jb);
2631da177e4SLinus Torvalds 		vfree(jb->bitmaps);
2641da177e4SLinus Torvalds 		jb->bitmaps = NULL;
2651da177e4SLinus Torvalds 	}
2661da177e4SLinus Torvalds 	return 0;
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
269bd4c625cSLinus Torvalds static int free_bitmap_nodes(struct super_block *p_s_sb)
270bd4c625cSLinus Torvalds {
2711da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2721da177e4SLinus Torvalds 	struct list_head *next = journal->j_bitmap_nodes.next;
2731da177e4SLinus Torvalds 	struct reiserfs_bitmap_node *bn;
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds 	while (next != &journal->j_bitmap_nodes) {
2761da177e4SLinus Torvalds 		bn = list_entry(next, struct reiserfs_bitmap_node, list);
2771da177e4SLinus Torvalds 		list_del(next);
278d739b42bSPekka Enberg 		kfree(bn->data);
279d739b42bSPekka Enberg 		kfree(bn);
2801da177e4SLinus Torvalds 		next = journal->j_bitmap_nodes.next;
2811da177e4SLinus Torvalds 		journal->j_free_bitmap_nodes--;
2821da177e4SLinus Torvalds 	}
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	return 0;
2851da177e4SLinus Torvalds }
2861da177e4SLinus Torvalds 
2871da177e4SLinus Torvalds /*
2881da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
2891da177e4SLinus Torvalds ** jb_array is the array to be filled in.
2901da177e4SLinus Torvalds */
2911da177e4SLinus Torvalds int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
2921da177e4SLinus Torvalds 				   struct reiserfs_list_bitmap *jb_array,
2933ee16670SJeff Mahoney 				   unsigned int bmap_nr)
294bd4c625cSLinus Torvalds {
2951da177e4SLinus Torvalds 	int i;
2961da177e4SLinus Torvalds 	int failed = 0;
2971da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
2981da177e4SLinus Torvalds 	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
2991da177e4SLinus Torvalds 
3001da177e4SLinus Torvalds 	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
3011da177e4SLinus Torvalds 		jb = jb_array + i;
3021da177e4SLinus Torvalds 		jb->journal_list = NULL;
3031da177e4SLinus Torvalds 		jb->bitmaps = vmalloc(mem);
3041da177e4SLinus Torvalds 		if (!jb->bitmaps) {
305bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
306bd4c625cSLinus Torvalds 					 "clm-2000, unable to allocate bitmaps for journal lists");
3071da177e4SLinus Torvalds 			failed = 1;
3081da177e4SLinus Torvalds 			break;
3091da177e4SLinus Torvalds 		}
3101da177e4SLinus Torvalds 		memset(jb->bitmaps, 0, mem);
3111da177e4SLinus Torvalds 	}
3121da177e4SLinus Torvalds 	if (failed) {
3131da177e4SLinus Torvalds 		free_list_bitmaps(p_s_sb, jb_array);
3141da177e4SLinus Torvalds 		return -1;
3151da177e4SLinus Torvalds 	}
3161da177e4SLinus Torvalds 	return 0;
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds 
3191da177e4SLinus Torvalds /*
3201da177e4SLinus Torvalds ** find an available list bitmap.  If you can't find one, flush a commit list
3211da177e4SLinus Torvalds ** and try again
3221da177e4SLinus Torvalds */
323bd4c625cSLinus Torvalds static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
324bd4c625cSLinus Torvalds 						    struct reiserfs_journal_list
325bd4c625cSLinus Torvalds 						    *jl)
326bd4c625cSLinus Torvalds {
3271da177e4SLinus Torvalds 	int i, j;
3281da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3291da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
3301da177e4SLinus Torvalds 
3311da177e4SLinus Torvalds 	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
3321da177e4SLinus Torvalds 		i = journal->j_list_bitmap_index;
3331da177e4SLinus Torvalds 		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
3341da177e4SLinus Torvalds 		jb = journal->j_list_bitmap + i;
3351da177e4SLinus Torvalds 		if (journal->j_list_bitmap[i].journal_list) {
336bd4c625cSLinus Torvalds 			flush_commit_list(p_s_sb,
337bd4c625cSLinus Torvalds 					  journal->j_list_bitmap[i].
338bd4c625cSLinus Torvalds 					  journal_list, 1);
3391da177e4SLinus Torvalds 			if (!journal->j_list_bitmap[i].journal_list) {
3401da177e4SLinus Torvalds 				break;
3411da177e4SLinus Torvalds 			}
3421da177e4SLinus Torvalds 		} else {
3431da177e4SLinus Torvalds 			break;
3441da177e4SLinus Torvalds 		}
3451da177e4SLinus Torvalds 	}
3461da177e4SLinus Torvalds 	if (jb->journal_list) {	/* double check to make sure if flushed correctly */
3471da177e4SLinus Torvalds 		return NULL;
3481da177e4SLinus Torvalds 	}
3491da177e4SLinus Torvalds 	jb->journal_list = jl;
3501da177e4SLinus Torvalds 	return jb;
3511da177e4SLinus Torvalds }
3521da177e4SLinus Torvalds 
3531da177e4SLinus Torvalds /*
3541da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list.
3551da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers
3561da177e4SLinus Torvalds ** returns NULL on failure
3571da177e4SLinus Torvalds */
358bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
359bd4c625cSLinus Torvalds {
3601da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *head;
3611da177e4SLinus Torvalds 	int i;
3621da177e4SLinus Torvalds 	if (num_cnodes <= 0) {
3631da177e4SLinus Torvalds 		return NULL;
3641da177e4SLinus Torvalds 	}
3651da177e4SLinus Torvalds 	head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
3661da177e4SLinus Torvalds 	if (!head) {
3671da177e4SLinus Torvalds 		return NULL;
3681da177e4SLinus Torvalds 	}
3691da177e4SLinus Torvalds 	memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
3701da177e4SLinus Torvalds 	head[0].prev = NULL;
3711da177e4SLinus Torvalds 	head[0].next = head + 1;
3721da177e4SLinus Torvalds 	for (i = 1; i < num_cnodes; i++) {
3731da177e4SLinus Torvalds 		head[i].prev = head + (i - 1);
3741da177e4SLinus Torvalds 		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
3751da177e4SLinus Torvalds 	}
3761da177e4SLinus Torvalds 	head[num_cnodes - 1].next = NULL;
3771da177e4SLinus Torvalds 	return head;
3781da177e4SLinus Torvalds }
3791da177e4SLinus Torvalds 
3801da177e4SLinus Torvalds /*
3811da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure
3821da177e4SLinus Torvalds */
383bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
384bd4c625cSLinus Torvalds {
3851da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
3861da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3871da177e4SLinus Torvalds 
3881da177e4SLinus Torvalds 	reiserfs_check_lock_depth(p_s_sb, "get_cnode");
3891da177e4SLinus Torvalds 
3901da177e4SLinus Torvalds 	if (journal->j_cnode_free <= 0) {
3911da177e4SLinus Torvalds 		return NULL;
3921da177e4SLinus Torvalds 	}
3931da177e4SLinus Torvalds 	journal->j_cnode_used++;
3941da177e4SLinus Torvalds 	journal->j_cnode_free--;
3951da177e4SLinus Torvalds 	cn = journal->j_cnode_free_list;
3961da177e4SLinus Torvalds 	if (!cn) {
3971da177e4SLinus Torvalds 		return cn;
3981da177e4SLinus Torvalds 	}
3991da177e4SLinus Torvalds 	if (cn->next) {
4001da177e4SLinus Torvalds 		cn->next->prev = NULL;
4011da177e4SLinus Torvalds 	}
4021da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn->next;
4031da177e4SLinus Torvalds 	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
4041da177e4SLinus Torvalds 	return cn;
4051da177e4SLinus Torvalds }
4061da177e4SLinus Torvalds 
4071da177e4SLinus Torvalds /*
4081da177e4SLinus Torvalds ** returns a cnode to the free list
4091da177e4SLinus Torvalds */
410bd4c625cSLinus Torvalds static void free_cnode(struct super_block *p_s_sb,
411bd4c625cSLinus Torvalds 		       struct reiserfs_journal_cnode *cn)
412bd4c625cSLinus Torvalds {
4131da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	reiserfs_check_lock_depth(p_s_sb, "free_cnode");
4161da177e4SLinus Torvalds 
4171da177e4SLinus Torvalds 	journal->j_cnode_used--;
4181da177e4SLinus Torvalds 	journal->j_cnode_free++;
4191da177e4SLinus Torvalds 	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
4201da177e4SLinus Torvalds 	cn->next = journal->j_cnode_free_list;
4211da177e4SLinus Torvalds 	if (journal->j_cnode_free_list) {
4221da177e4SLinus Torvalds 		journal->j_cnode_free_list->prev = cn;
4231da177e4SLinus Torvalds 	}
4241da177e4SLinus Torvalds 	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
4251da177e4SLinus Torvalds 	journal->j_cnode_free_list = cn;
4261da177e4SLinus Torvalds }
4271da177e4SLinus Torvalds 
428bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh)
429bd4c625cSLinus Torvalds {
4301da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
4311da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
4321da177e4SLinus Torvalds }
4331da177e4SLinus Torvalds 
4341da177e4SLinus Torvalds /* utility function to force a BUG if it is called without the big
4351da177e4SLinus Torvalds ** kernel lock held.  caller is the string printed just before calling BUG()
4361da177e4SLinus Torvalds */
437bd4c625cSLinus Torvalds void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
438bd4c625cSLinus Torvalds {
4391da177e4SLinus Torvalds #ifdef CONFIG_SMP
4401da177e4SLinus Torvalds 	if (current->lock_depth < 0) {
441bd4c625cSLinus Torvalds 		reiserfs_panic(sb, "%s called without kernel lock held",
442bd4c625cSLinus Torvalds 			       caller);
4431da177e4SLinus Torvalds 	}
4441da177e4SLinus Torvalds #else
4451da177e4SLinus Torvalds 	;
4461da177e4SLinus Torvalds #endif
4471da177e4SLinus Torvalds }
4481da177e4SLinus Torvalds 
4491da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */
450bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
451bd4c625cSLinus Torvalds 								  super_block
452bd4c625cSLinus Torvalds 								  *sb,
453bd4c625cSLinus Torvalds 								  struct
454bd4c625cSLinus Torvalds 								  reiserfs_journal_cnode
455bd4c625cSLinus Torvalds 								  **table,
4561da177e4SLinus Torvalds 								  long bl)
4571da177e4SLinus Torvalds {
4581da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4591da177e4SLinus Torvalds 	cn = journal_hash(table, sb, bl);
4601da177e4SLinus Torvalds 	while (cn) {
4611da177e4SLinus Torvalds 		if (cn->blocknr == bl && cn->sb == sb)
4621da177e4SLinus Torvalds 			return cn;
4631da177e4SLinus Torvalds 		cn = cn->hnext;
4641da177e4SLinus Torvalds 	}
4651da177e4SLinus Torvalds 	return (struct reiserfs_journal_cnode *)0;
4661da177e4SLinus Torvalds }
4671da177e4SLinus Torvalds 
4681da177e4SLinus Torvalds /*
4691da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
4701da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
4711da177e4SLinus Torvalds ** being overwritten by a replay after crashing.
4721da177e4SLinus Torvalds **
4731da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
4741da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
4751da177e4SLinus Torvalds ** sure you never write the block without logging it.
4761da177e4SLinus Torvalds **
4771da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward.
4781da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search
4791da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl.  Then, we return that
4801da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try.
4811da177e4SLinus Torvalds **
4821da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't
4831da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal
4841da177e4SLinus Torvalds **
4851da177e4SLinus Torvalds */
4861da177e4SLinus Torvalds int reiserfs_in_journal(struct super_block *p_s_sb,
4873ee16670SJeff Mahoney 			unsigned int bmap_nr, int bit_nr, int search_all,
488bd4c625cSLinus Torvalds 			b_blocknr_t * next_zero_bit)
489bd4c625cSLinus Torvalds {
4901da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
4911da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
4921da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb;
4931da177e4SLinus Torvalds 	int i;
4941da177e4SLinus Torvalds 	unsigned long bl;
4951da177e4SLinus Torvalds 
4961da177e4SLinus Torvalds 	*next_zero_bit = 0;	/* always start this at zero. */
4971da177e4SLinus Torvalds 
4981da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.in_journal);
4991da177e4SLinus Torvalds 	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
5001da177e4SLinus Torvalds 	 ** if we crash before the transaction that freed it commits,  this transaction won't
5011da177e4SLinus Torvalds 	 ** have committed either, and the block will never be written
5021da177e4SLinus Torvalds 	 */
5031da177e4SLinus Torvalds 	if (search_all) {
5041da177e4SLinus Torvalds 		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
5051da177e4SLinus Torvalds 			PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap);
5061da177e4SLinus Torvalds 			jb = journal->j_list_bitmap + i;
5071da177e4SLinus Torvalds 			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
508bd4c625cSLinus Torvalds 			    test_bit(bit_nr,
509bd4c625cSLinus Torvalds 				     (unsigned long *)jb->bitmaps[bmap_nr]->
510bd4c625cSLinus Torvalds 				     data)) {
511bd4c625cSLinus Torvalds 				*next_zero_bit =
512bd4c625cSLinus Torvalds 				    find_next_zero_bit((unsigned long *)
513bd4c625cSLinus Torvalds 						       (jb->bitmaps[bmap_nr]->
514bd4c625cSLinus Torvalds 							data),
515bd4c625cSLinus Torvalds 						       p_s_sb->s_blocksize << 3,
516bd4c625cSLinus Torvalds 						       bit_nr + 1);
5171da177e4SLinus Torvalds 				return 1;
5181da177e4SLinus Torvalds 			}
5191da177e4SLinus Torvalds 		}
5201da177e4SLinus Torvalds 	}
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 	bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
5231da177e4SLinus Torvalds 	/* is it in any old transactions? */
524bd4c625cSLinus Torvalds 	if (search_all
525bd4c625cSLinus Torvalds 	    && (cn =
526bd4c625cSLinus Torvalds 		get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
5271da177e4SLinus Torvalds 		return 1;
5281da177e4SLinus Torvalds 	}
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 	/* is it in the current transaction.  This should never happen */
5311da177e4SLinus Torvalds 	if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
5321da177e4SLinus Torvalds 		BUG();
5331da177e4SLinus Torvalds 		return 1;
5341da177e4SLinus Torvalds 	}
5351da177e4SLinus Torvalds 
5361da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.in_journal_reusable);
5371da177e4SLinus Torvalds 	/* safe for reuse */
5381da177e4SLinus Torvalds 	return 0;
5391da177e4SLinus Torvalds }
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds /* insert cn into table
5421da177e4SLinus Torvalds */
543bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
544bd4c625cSLinus Torvalds 				       struct reiserfs_journal_cnode *cn)
545bd4c625cSLinus Torvalds {
5461da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn_orig;
5471da177e4SLinus Torvalds 
5481da177e4SLinus Torvalds 	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
5491da177e4SLinus Torvalds 	cn->hnext = cn_orig;
5501da177e4SLinus Torvalds 	cn->hprev = NULL;
5511da177e4SLinus Torvalds 	if (cn_orig) {
5521da177e4SLinus Torvalds 		cn_orig->hprev = cn;
5531da177e4SLinus Torvalds 	}
5541da177e4SLinus Torvalds 	journal_hash(table, cn->sb, cn->blocknr) = cn;
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
5571da177e4SLinus Torvalds /* lock the current transaction */
55877933d72SJesper Juhl static inline void lock_journal(struct super_block *p_s_sb)
559bd4c625cSLinus Torvalds {
5601da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.lock_journal);
5611da177e4SLinus Torvalds 	down(&SB_JOURNAL(p_s_sb)->j_lock);
5621da177e4SLinus Torvalds }
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds /* unlock the current transaction */
56577933d72SJesper Juhl static inline void unlock_journal(struct super_block *p_s_sb)
566bd4c625cSLinus Torvalds {
5671da177e4SLinus Torvalds 	up(&SB_JOURNAL(p_s_sb)->j_lock);
5681da177e4SLinus Torvalds }
5691da177e4SLinus Torvalds 
5701da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl)
5711da177e4SLinus Torvalds {
5721da177e4SLinus Torvalds 	jl->j_refcount++;
5731da177e4SLinus Torvalds }
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s,
5761da177e4SLinus Torvalds 				    struct reiserfs_journal_list *jl)
5771da177e4SLinus Torvalds {
5781da177e4SLinus Torvalds 	if (jl->j_refcount < 1) {
579bd4c625cSLinus Torvalds 		reiserfs_panic(s, "trans id %lu, refcount at %d",
580bd4c625cSLinus Torvalds 			       jl->j_trans_id, jl->j_refcount);
5811da177e4SLinus Torvalds 	}
5821da177e4SLinus Torvalds 	if (--jl->j_refcount == 0)
583d739b42bSPekka Enberg 		kfree(jl);
5841da177e4SLinus Torvalds }
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds /*
5871da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again.
5881da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
5891da177e4SLinus Torvalds ** transaction.
5901da177e4SLinus Torvalds */
591bd4c625cSLinus Torvalds static void cleanup_freed_for_journal_list(struct super_block *p_s_sb,
592bd4c625cSLinus Torvalds 					   struct reiserfs_journal_list *jl)
593bd4c625cSLinus Torvalds {
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
5961da177e4SLinus Torvalds 	if (jb) {
5971da177e4SLinus Torvalds 		cleanup_bitmap_list(p_s_sb, jb);
5981da177e4SLinus Torvalds 	}
5991da177e4SLinus Torvalds 	jl->j_list_bitmap->journal_list = NULL;
6001da177e4SLinus Torvalds 	jl->j_list_bitmap = NULL;
6011da177e4SLinus Torvalds }
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s,
6041da177e4SLinus Torvalds 				    unsigned long trans_id)
6051da177e4SLinus Torvalds {
6061da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
6071da177e4SLinus Torvalds 	struct list_head *entry = &journal->j_journal_list;
6081da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 	if (!list_empty(entry)) {
6111da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry->next);
6121da177e4SLinus Torvalds 		if (jl->j_trans_id <= trans_id) {
6131da177e4SLinus Torvalds 			return 1;
6141da177e4SLinus Torvalds 		}
6151da177e4SLinus Torvalds 	}
6161da177e4SLinus Torvalds 	return 0;
6171da177e4SLinus Torvalds }
6181da177e4SLinus Torvalds 
619398c95bdSChris Mason /*
620398c95bdSChris Mason  * If page->mapping was null, we failed to truncate this page for
621398c95bdSChris Mason  * some reason.  Most likely because it was truncated after being
622398c95bdSChris Mason  * logged via data=journal.
623398c95bdSChris Mason  *
624398c95bdSChris Mason  * This does a check to see if the buffer belongs to one of these
625398c95bdSChris Mason  * lost pages before doing the final put_bh.  If page->mapping was
626398c95bdSChris Mason  * null, it tries to free buffers on the page, which should make the
627398c95bdSChris Mason  * final page_cache_release drop the page from the lru.
628398c95bdSChris Mason  */
629398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh)
630398c95bdSChris Mason {
631398c95bdSChris Mason 	struct page *page = bh->b_page;
632398c95bdSChris Mason 	if (!page->mapping && !TestSetPageLocked(page)) {
633398c95bdSChris Mason 		page_cache_get(page);
634398c95bdSChris Mason 		put_bh(bh);
635398c95bdSChris Mason 		if (!page->mapping)
636398c95bdSChris Mason 			try_to_free_buffers(page);
637398c95bdSChris Mason 		unlock_page(page);
638398c95bdSChris Mason 		page_cache_release(page);
639398c95bdSChris Mason 	} else {
640398c95bdSChris Mason 		put_bh(bh);
641398c95bdSChris Mason 	}
642398c95bdSChris Mason }
643398c95bdSChris Mason 
644bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
645bd4c625cSLinus Torvalds {
6461da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
649bd4c625cSLinus Torvalds 		reiserfs_warning(NULL,
650bd4c625cSLinus Torvalds 				 "clm-2084: pinned buffer %lu:%s sent to disk",
6511da177e4SLinus Torvalds 				 bh->b_blocknr, bdevname(bh->b_bdev, b));
6521da177e4SLinus Torvalds 	}
6531da177e4SLinus Torvalds 	if (uptodate)
6541da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6551da177e4SLinus Torvalds 	else
6561da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
657398c95bdSChris Mason 
6581da177e4SLinus Torvalds 	unlock_buffer(bh);
659398c95bdSChris Mason 	release_buffer_page(bh);
6601da177e4SLinus Torvalds }
6611da177e4SLinus Torvalds 
662bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
663bd4c625cSLinus Torvalds {
6641da177e4SLinus Torvalds 	if (uptodate)
6651da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
6661da177e4SLinus Torvalds 	else
6671da177e4SLinus Torvalds 		clear_buffer_uptodate(bh);
6681da177e4SLinus Torvalds 	unlock_buffer(bh);
6691da177e4SLinus Torvalds 	put_bh(bh);
6701da177e4SLinus Torvalds }
6711da177e4SLinus Torvalds 
672bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh)
673bd4c625cSLinus Torvalds {
6741da177e4SLinus Torvalds 	get_bh(bh);
6751da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_buffer_io_sync;
6761da177e4SLinus Torvalds 	clear_buffer_journal_new(bh);
6771da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6781da177e4SLinus Torvalds 	if (!test_clear_buffer_journal_test(bh))
6791da177e4SLinus Torvalds 		BUG();
6801da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6811da177e4SLinus Torvalds 		BUG();
6821da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6831da177e4SLinus Torvalds }
6841da177e4SLinus Torvalds 
685bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh)
686bd4c625cSLinus Torvalds {
6871da177e4SLinus Torvalds 	get_bh(bh);
6881da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6891da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
6901da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
6911da177e4SLinus Torvalds 		BUG();
6921da177e4SLinus Torvalds 	submit_bh(WRITE, bh);
6931da177e4SLinus Torvalds }
6941da177e4SLinus Torvalds 
695bd4c625cSLinus Torvalds static int submit_barrier_buffer(struct buffer_head *bh)
696bd4c625cSLinus Torvalds {
6971da177e4SLinus Torvalds 	get_bh(bh);
6981da177e4SLinus Torvalds 	bh->b_end_io = reiserfs_end_ordered_io;
6991da177e4SLinus Torvalds 	clear_buffer_dirty(bh);
7001da177e4SLinus Torvalds 	if (!buffer_uptodate(bh))
7011da177e4SLinus Torvalds 		BUG();
7021da177e4SLinus Torvalds 	return submit_bh(WRITE_BARRIER, bh);
7031da177e4SLinus Torvalds }
7041da177e4SLinus Torvalds 
7051da177e4SLinus Torvalds static void check_barrier_completion(struct super_block *s,
706bd4c625cSLinus Torvalds 				     struct buffer_head *bh)
707bd4c625cSLinus Torvalds {
7081da177e4SLinus Torvalds 	if (buffer_eopnotsupp(bh)) {
7091da177e4SLinus Torvalds 		clear_buffer_eopnotsupp(bh);
7101da177e4SLinus Torvalds 		disable_barrier(s);
7111da177e4SLinus Torvalds 		set_buffer_uptodate(bh);
7121da177e4SLinus Torvalds 		set_buffer_dirty(bh);
7131da177e4SLinus Torvalds 		sync_dirty_buffer(bh);
7141da177e4SLinus Torvalds 	}
7151da177e4SLinus Torvalds }
7161da177e4SLinus Torvalds 
7171da177e4SLinus Torvalds #define CHUNK_SIZE 32
7181da177e4SLinus Torvalds struct buffer_chunk {
7191da177e4SLinus Torvalds 	struct buffer_head *bh[CHUNK_SIZE];
7201da177e4SLinus Torvalds 	int nr;
7211da177e4SLinus Torvalds };
7221da177e4SLinus Torvalds 
723bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk)
724bd4c625cSLinus Torvalds {
7251da177e4SLinus Torvalds 	int i;
72622e2c507SJens Axboe 	get_fs_excl();
7271da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7281da177e4SLinus Torvalds 		submit_logged_buffer(chunk->bh[i]);
7291da177e4SLinus Torvalds 	}
7301da177e4SLinus Torvalds 	chunk->nr = 0;
73122e2c507SJens Axboe 	put_fs_excl();
7321da177e4SLinus Torvalds }
7331da177e4SLinus Torvalds 
734bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk)
735bd4c625cSLinus Torvalds {
7361da177e4SLinus Torvalds 	int i;
73722e2c507SJens Axboe 	get_fs_excl();
7381da177e4SLinus Torvalds 	for (i = 0; i < chunk->nr; i++) {
7391da177e4SLinus Torvalds 		submit_ordered_buffer(chunk->bh[i]);
7401da177e4SLinus Torvalds 	}
7411da177e4SLinus Torvalds 	chunk->nr = 0;
74222e2c507SJens Axboe 	put_fs_excl();
7431da177e4SLinus Torvalds }
7441da177e4SLinus Torvalds 
7451da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
746bd4c625cSLinus Torvalds 			spinlock_t * lock, void (fn) (struct buffer_chunk *))
7471da177e4SLinus Torvalds {
7481da177e4SLinus Torvalds 	int ret = 0;
74914a61442SEric Sesterhenn 	BUG_ON(chunk->nr >= CHUNK_SIZE);
7501da177e4SLinus Torvalds 	chunk->bh[chunk->nr++] = bh;
7511da177e4SLinus Torvalds 	if (chunk->nr >= CHUNK_SIZE) {
7521da177e4SLinus Torvalds 		ret = 1;
7531da177e4SLinus Torvalds 		if (lock)
7541da177e4SLinus Torvalds 			spin_unlock(lock);
7551da177e4SLinus Torvalds 		fn(chunk);
7561da177e4SLinus Torvalds 		if (lock)
7571da177e4SLinus Torvalds 			spin_lock(lock);
7581da177e4SLinus Torvalds 	}
7591da177e4SLinus Torvalds 	return ret;
7601da177e4SLinus Torvalds }
7611da177e4SLinus Torvalds 
7621da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
763bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void)
764bd4c625cSLinus Torvalds {
7651da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7661da177e4SLinus Torvalds 	while (1) {
7671da177e4SLinus Torvalds 		jh = kmalloc(sizeof(*jh), GFP_NOFS);
7681da177e4SLinus Torvalds 		if (jh) {
7691da177e4SLinus Torvalds 			atomic_inc(&nr_reiserfs_jh);
7701da177e4SLinus Torvalds 			return jh;
7711da177e4SLinus Torvalds 		}
7721da177e4SLinus Torvalds 		yield();
7731da177e4SLinus Torvalds 	}
7741da177e4SLinus Torvalds }
7751da177e4SLinus Torvalds 
7761da177e4SLinus Torvalds /*
7771da177e4SLinus Torvalds  * we want to free the jh when the buffer has been written
7781da177e4SLinus Torvalds  * and waited on
7791da177e4SLinus Torvalds  */
780bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh)
781bd4c625cSLinus Torvalds {
7821da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
7831da177e4SLinus Torvalds 
7841da177e4SLinus Torvalds 	jh = bh->b_private;
7851da177e4SLinus Torvalds 	if (jh) {
7861da177e4SLinus Torvalds 		bh->b_private = NULL;
7871da177e4SLinus Torvalds 		jh->bh = NULL;
7881da177e4SLinus Torvalds 		list_del_init(&jh->list);
7891da177e4SLinus Torvalds 		kfree(jh);
7901da177e4SLinus Torvalds 		if (atomic_read(&nr_reiserfs_jh) <= 0)
7911da177e4SLinus Torvalds 			BUG();
7921da177e4SLinus Torvalds 		atomic_dec(&nr_reiserfs_jh);
7931da177e4SLinus Torvalds 		put_bh(bh);
7941da177e4SLinus Torvalds 	}
7951da177e4SLinus Torvalds }
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
7981da177e4SLinus Torvalds 			   int tail)
7991da177e4SLinus Torvalds {
8001da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	if (bh->b_private) {
8031da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
8041da177e4SLinus Torvalds 		if (!bh->b_private) {
8051da177e4SLinus Torvalds 			spin_unlock(&j->j_dirty_buffers_lock);
8061da177e4SLinus Torvalds 			goto no_jh;
8071da177e4SLinus Torvalds 		}
8081da177e4SLinus Torvalds 		jh = bh->b_private;
8091da177e4SLinus Torvalds 		list_del_init(&jh->list);
8101da177e4SLinus Torvalds 	} else {
8111da177e4SLinus Torvalds 	      no_jh:
8121da177e4SLinus Torvalds 		get_bh(bh);
8131da177e4SLinus Torvalds 		jh = alloc_jh();
8141da177e4SLinus Torvalds 		spin_lock(&j->j_dirty_buffers_lock);
8151da177e4SLinus Torvalds 		/* buffer must be locked for __add_jh, should be able to have
8161da177e4SLinus Torvalds 		 * two adds at the same time
8171da177e4SLinus Torvalds 		 */
81814a61442SEric Sesterhenn 		BUG_ON(bh->b_private);
8191da177e4SLinus Torvalds 		jh->bh = bh;
8201da177e4SLinus Torvalds 		bh->b_private = jh;
8211da177e4SLinus Torvalds 	}
8221da177e4SLinus Torvalds 	jh->jl = j->j_current_jl;
8231da177e4SLinus Torvalds 	if (tail)
8241da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
8251da177e4SLinus Torvalds 	else {
8261da177e4SLinus Torvalds 		list_add_tail(&jh->list, &jh->jl->j_bh_list);
8271da177e4SLinus Torvalds 	}
8281da177e4SLinus Torvalds 	spin_unlock(&j->j_dirty_buffers_lock);
8291da177e4SLinus Torvalds 	return 0;
8301da177e4SLinus Torvalds }
8311da177e4SLinus Torvalds 
832bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
833bd4c625cSLinus Torvalds {
8341da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
8351da177e4SLinus Torvalds }
836bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
837bd4c625cSLinus Torvalds {
8381da177e4SLinus Torvalds 	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
8391da177e4SLinus Torvalds }
8401da177e4SLinus Torvalds 
8411da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
8421da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock,
8431da177e4SLinus Torvalds 				 struct reiserfs_journal *j,
8441da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
8451da177e4SLinus Torvalds 				 struct list_head *list)
8461da177e4SLinus Torvalds {
8471da177e4SLinus Torvalds 	struct buffer_head *bh;
8481da177e4SLinus Torvalds 	struct reiserfs_jh *jh;
8491da177e4SLinus Torvalds 	int ret = j->j_errno;
8501da177e4SLinus Torvalds 	struct buffer_chunk chunk;
8511da177e4SLinus Torvalds 	struct list_head tmp;
8521da177e4SLinus Torvalds 	INIT_LIST_HEAD(&tmp);
8531da177e4SLinus Torvalds 
8541da177e4SLinus Torvalds 	chunk.nr = 0;
8551da177e4SLinus Torvalds 	spin_lock(lock);
8561da177e4SLinus Torvalds 	while (!list_empty(list)) {
8571da177e4SLinus Torvalds 		jh = JH_ENTRY(list->next);
8581da177e4SLinus Torvalds 		bh = jh->bh;
8591da177e4SLinus Torvalds 		get_bh(bh);
8601da177e4SLinus Torvalds 		if (test_set_buffer_locked(bh)) {
8611da177e4SLinus Torvalds 			if (!buffer_dirty(bh)) {
862f116629dSAkinobu Mita 				list_move(&jh->list, &tmp);
8631da177e4SLinus Torvalds 				goto loop_next;
8641da177e4SLinus Torvalds 			}
8651da177e4SLinus Torvalds 			spin_unlock(lock);
8661da177e4SLinus Torvalds 			if (chunk.nr)
8671da177e4SLinus Torvalds 				write_ordered_chunk(&chunk);
8681da177e4SLinus Torvalds 			wait_on_buffer(bh);
8691da177e4SLinus Torvalds 			cond_resched();
8701da177e4SLinus Torvalds 			spin_lock(lock);
8711da177e4SLinus Torvalds 			goto loop_next;
8721da177e4SLinus Torvalds 		}
8733d4492f8SChris Mason 		/* in theory, dirty non-uptodate buffers should never get here,
8743d4492f8SChris Mason 		 * but the upper layer io error paths still have a few quirks.
8753d4492f8SChris Mason 		 * Handle them here as gracefully as we can
8763d4492f8SChris Mason 		 */
8773d4492f8SChris Mason 		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
8783d4492f8SChris Mason 			clear_buffer_dirty(bh);
8793d4492f8SChris Mason 			ret = -EIO;
8803d4492f8SChris Mason 		}
8811da177e4SLinus Torvalds 		if (buffer_dirty(bh)) {
882f116629dSAkinobu Mita 			list_move(&jh->list, &tmp);
8831da177e4SLinus Torvalds 			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
8841da177e4SLinus Torvalds 		} else {
8851da177e4SLinus Torvalds 			reiserfs_free_jh(bh);
8861da177e4SLinus Torvalds 			unlock_buffer(bh);
8871da177e4SLinus Torvalds 		}
8881da177e4SLinus Torvalds 	      loop_next:
8891da177e4SLinus Torvalds 		put_bh(bh);
8901da177e4SLinus Torvalds 		cond_resched_lock(lock);
8911da177e4SLinus Torvalds 	}
8921da177e4SLinus Torvalds 	if (chunk.nr) {
8931da177e4SLinus Torvalds 		spin_unlock(lock);
8941da177e4SLinus Torvalds 		write_ordered_chunk(&chunk);
8951da177e4SLinus Torvalds 		spin_lock(lock);
8961da177e4SLinus Torvalds 	}
8971da177e4SLinus Torvalds 	while (!list_empty(&tmp)) {
8981da177e4SLinus Torvalds 		jh = JH_ENTRY(tmp.prev);
8991da177e4SLinus Torvalds 		bh = jh->bh;
9001da177e4SLinus Torvalds 		get_bh(bh);
9011da177e4SLinus Torvalds 		reiserfs_free_jh(bh);
9021da177e4SLinus Torvalds 
9031da177e4SLinus Torvalds 		if (buffer_locked(bh)) {
9041da177e4SLinus Torvalds 			spin_unlock(lock);
9051da177e4SLinus Torvalds 			wait_on_buffer(bh);
9061da177e4SLinus Torvalds 			spin_lock(lock);
9071da177e4SLinus Torvalds 		}
9081da177e4SLinus Torvalds 		if (!buffer_uptodate(bh)) {
9091da177e4SLinus Torvalds 			ret = -EIO;
9101da177e4SLinus Torvalds 		}
911d62b1b87SChris Mason 		/* ugly interaction with invalidatepage here.
912d62b1b87SChris Mason 		 * reiserfs_invalidate_page will pin any buffer that has a valid
913d62b1b87SChris Mason 		 * journal head from an older transaction.  If someone else sets
914d62b1b87SChris Mason 		 * our buffer dirty after we write it in the first loop, and
915d62b1b87SChris Mason 		 * then someone truncates the page away, nobody will ever write
916d62b1b87SChris Mason 		 * the buffer. We're safe if we write the page one last time
917d62b1b87SChris Mason 		 * after freeing the journal header.
918d62b1b87SChris Mason 		 */
919d62b1b87SChris Mason 		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
920d62b1b87SChris Mason 			spin_unlock(lock);
921d62b1b87SChris Mason 			ll_rw_block(WRITE, 1, &bh);
922d62b1b87SChris Mason 			spin_lock(lock);
923d62b1b87SChris Mason 		}
9241da177e4SLinus Torvalds 		put_bh(bh);
9251da177e4SLinus Torvalds 		cond_resched_lock(lock);
9261da177e4SLinus Torvalds 	}
9271da177e4SLinus Torvalds 	spin_unlock(lock);
9281da177e4SLinus Torvalds 	return ret;
9291da177e4SLinus Torvalds }
9301da177e4SLinus Torvalds 
931bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s,
932bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
933bd4c625cSLinus Torvalds {
9341da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
9351da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
9361da177e4SLinus Torvalds 	struct reiserfs_journal_list *first_jl;
9371da177e4SLinus Torvalds 	struct list_head *entry;
9381da177e4SLinus Torvalds 	unsigned long trans_id = jl->j_trans_id;
9391da177e4SLinus Torvalds 	unsigned long other_trans_id;
9401da177e4SLinus Torvalds 	unsigned long first_trans_id;
9411da177e4SLinus Torvalds 
9421da177e4SLinus Torvalds       find_first:
9431da177e4SLinus Torvalds 	/*
9441da177e4SLinus Torvalds 	 * first we walk backwards to find the oldest uncommitted transation
9451da177e4SLinus Torvalds 	 */
9461da177e4SLinus Torvalds 	first_jl = jl;
9471da177e4SLinus Torvalds 	entry = jl->j_list.prev;
9481da177e4SLinus Torvalds 	while (1) {
9491da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9501da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list ||
9511da177e4SLinus Torvalds 		    atomic_read(&other_jl->j_older_commits_done))
9521da177e4SLinus Torvalds 			break;
9531da177e4SLinus Torvalds 
9541da177e4SLinus Torvalds 		first_jl = other_jl;
9551da177e4SLinus Torvalds 		entry = other_jl->j_list.prev;
9561da177e4SLinus Torvalds 	}
9571da177e4SLinus Torvalds 
9581da177e4SLinus Torvalds 	/* if we didn't find any older uncommitted transactions, return now */
9591da177e4SLinus Torvalds 	if (first_jl == jl) {
9601da177e4SLinus Torvalds 		return 0;
9611da177e4SLinus Torvalds 	}
9621da177e4SLinus Torvalds 
9631da177e4SLinus Torvalds 	first_trans_id = first_jl->j_trans_id;
9641da177e4SLinus Torvalds 
9651da177e4SLinus Torvalds 	entry = &first_jl->j_list;
9661da177e4SLinus Torvalds 	while (1) {
9671da177e4SLinus Torvalds 		other_jl = JOURNAL_LIST_ENTRY(entry);
9681da177e4SLinus Torvalds 		other_trans_id = other_jl->j_trans_id;
9691da177e4SLinus Torvalds 
9701da177e4SLinus Torvalds 		if (other_trans_id < trans_id) {
9711da177e4SLinus Torvalds 			if (atomic_read(&other_jl->j_commit_left) != 0) {
9721da177e4SLinus Torvalds 				flush_commit_list(s, other_jl, 0);
9731da177e4SLinus Torvalds 
9741da177e4SLinus Torvalds 				/* list we were called with is gone, return */
9751da177e4SLinus Torvalds 				if (!journal_list_still_alive(s, trans_id))
9761da177e4SLinus Torvalds 					return 1;
9771da177e4SLinus Torvalds 
9781da177e4SLinus Torvalds 				/* the one we just flushed is gone, this means all
9791da177e4SLinus Torvalds 				 * older lists are also gone, so first_jl is no longer
9801da177e4SLinus Torvalds 				 * valid either.  Go back to the beginning.
9811da177e4SLinus Torvalds 				 */
982bd4c625cSLinus Torvalds 				if (!journal_list_still_alive
983bd4c625cSLinus Torvalds 				    (s, other_trans_id)) {
9841da177e4SLinus Torvalds 					goto find_first;
9851da177e4SLinus Torvalds 				}
9861da177e4SLinus Torvalds 			}
9871da177e4SLinus Torvalds 			entry = entry->next;
9881da177e4SLinus Torvalds 			if (entry == &journal->j_journal_list)
9891da177e4SLinus Torvalds 				return 0;
9901da177e4SLinus Torvalds 		} else {
9911da177e4SLinus Torvalds 			return 0;
9921da177e4SLinus Torvalds 		}
9931da177e4SLinus Torvalds 	}
9941da177e4SLinus Torvalds 	return 0;
9951da177e4SLinus Torvalds }
996deba0f49SAdrian Bunk 
997deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s)
998bd4c625cSLinus Torvalds {
9991da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
10001da177e4SLinus Torvalds 	struct reiserfs_journal *j = SB_JOURNAL(s);
10011da177e4SLinus Torvalds 	if (atomic_read(&j->j_async_throttle))
10023fcfab16SAndrew Morton 		congestion_wait(WRITE, HZ / 10);
10031da177e4SLinus Torvalds 	return 0;
10041da177e4SLinus Torvalds }
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds /*
10071da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk.
10081da177e4SLinus Torvalds **
10091da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
10101da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk
10111da177e4SLinus Torvalds **
10121da177e4SLinus Torvalds */
1013bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s,
1014bd4c625cSLinus Torvalds 			     struct reiserfs_journal_list *jl, int flushall)
1015bd4c625cSLinus Torvalds {
10161da177e4SLinus Torvalds 	int i;
10173ee16670SJeff Mahoney 	b_blocknr_t bn;
10181da177e4SLinus Torvalds 	struct buffer_head *tbh = NULL;
10191da177e4SLinus Torvalds 	unsigned long trans_id = jl->j_trans_id;
10201da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
10211da177e4SLinus Torvalds 	int barrier = 0;
10221da177e4SLinus Torvalds 	int retval = 0;
1023e0e851cfSChris Mason 	int write_len;
10241da177e4SLinus Torvalds 
10251da177e4SLinus Torvalds 	reiserfs_check_lock_depth(s, "flush_commit_list");
10261da177e4SLinus Torvalds 
10271da177e4SLinus Torvalds 	if (atomic_read(&jl->j_older_commits_done)) {
10281da177e4SLinus Torvalds 		return 0;
10291da177e4SLinus Torvalds 	}
10301da177e4SLinus Torvalds 
103122e2c507SJens Axboe 	get_fs_excl();
103222e2c507SJens Axboe 
10331da177e4SLinus Torvalds 	/* before we can put our commit blocks on disk, we have to make sure everyone older than
10341da177e4SLinus Torvalds 	 ** us is on disk too
10351da177e4SLinus Torvalds 	 */
10361da177e4SLinus Torvalds 	BUG_ON(jl->j_len <= 0);
10371da177e4SLinus Torvalds 	BUG_ON(trans_id == journal->j_trans_id);
10381da177e4SLinus Torvalds 
10391da177e4SLinus Torvalds 	get_journal_list(jl);
10401da177e4SLinus Torvalds 	if (flushall) {
10411da177e4SLinus Torvalds 		if (flush_older_commits(s, jl) == 1) {
10421da177e4SLinus Torvalds 			/* list disappeared during flush_older_commits.  return */
10431da177e4SLinus Torvalds 			goto put_jl;
10441da177e4SLinus Torvalds 		}
10451da177e4SLinus Torvalds 	}
10461da177e4SLinus Torvalds 
10471da177e4SLinus Torvalds 	/* make sure nobody is trying to flush this one at the same time */
10481da177e4SLinus Torvalds 	down(&jl->j_commit_lock);
10491da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, trans_id)) {
10501da177e4SLinus Torvalds 		up(&jl->j_commit_lock);
10511da177e4SLinus Torvalds 		goto put_jl;
10521da177e4SLinus Torvalds 	}
10531da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
10541da177e4SLinus Torvalds 
10551da177e4SLinus Torvalds 	/* this commit is done, exit */
10561da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_commit_left)) <= 0) {
10571da177e4SLinus Torvalds 		if (flushall) {
10581da177e4SLinus Torvalds 			atomic_set(&(jl->j_older_commits_done), 1);
10591da177e4SLinus Torvalds 		}
10601da177e4SLinus Torvalds 		up(&jl->j_commit_lock);
10611da177e4SLinus Torvalds 		goto put_jl;
10621da177e4SLinus Torvalds 	}
10631da177e4SLinus Torvalds 
10641da177e4SLinus Torvalds 	if (!list_empty(&jl->j_bh_list)) {
10653d4492f8SChris Mason 		int ret;
10661da177e4SLinus Torvalds 		unlock_kernel();
10673d4492f8SChris Mason 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
10681da177e4SLinus Torvalds 					    journal, jl, &jl->j_bh_list);
10693d4492f8SChris Mason 		if (ret < 0 && retval == 0)
10703d4492f8SChris Mason 			retval = ret;
10711da177e4SLinus Torvalds 		lock_kernel();
10721da177e4SLinus Torvalds 	}
10731da177e4SLinus Torvalds 	BUG_ON(!list_empty(&jl->j_bh_list));
10741da177e4SLinus Torvalds 	/*
10751da177e4SLinus Torvalds 	 * for the description block and all the log blocks, submit any buffers
1076e0e851cfSChris Mason 	 * that haven't already reached the disk.  Try to write at least 256
1077e0e851cfSChris Mason 	 * log blocks. later on, we will only wait on blocks that correspond
1078e0e851cfSChris Mason 	 * to this transaction, but while we're unplugging we might as well
1079e0e851cfSChris Mason 	 * get a chunk of data on there.
10801da177e4SLinus Torvalds 	 */
10811da177e4SLinus Torvalds 	atomic_inc(&journal->j_async_throttle);
1082e0e851cfSChris Mason 	write_len = jl->j_len + 1;
1083e0e851cfSChris Mason 	if (write_len < 256)
1084e0e851cfSChris Mason 		write_len = 256;
1085e0e851cfSChris Mason 	for (i = 0 ; i < write_len ; i++) {
10861da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
10871da177e4SLinus Torvalds 		    SB_ONDISK_JOURNAL_SIZE(s);
10881da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
1089e0e851cfSChris Mason 		if (tbh) {
1090e0e851cfSChris Mason 			if (buffer_dirty(tbh))
1091e0e851cfSChris Mason 			    ll_rw_block(WRITE, 1, &tbh) ;
10921da177e4SLinus Torvalds 			put_bh(tbh) ;
10931da177e4SLinus Torvalds 		}
1094e0e851cfSChris Mason 	}
10951da177e4SLinus Torvalds 	atomic_dec(&journal->j_async_throttle);
10961da177e4SLinus Torvalds 
10975d5e8156SJeff Mahoney 	/* We're skipping the commit if there's an error */
10985d5e8156SJeff Mahoney 	if (retval || reiserfs_is_journal_aborted(journal))
10995d5e8156SJeff Mahoney 		barrier = 0;
11005d5e8156SJeff Mahoney 
11011da177e4SLinus Torvalds 	/* wait on everything written so far before writing the commit
11021da177e4SLinus Torvalds 	 * if we are in barrier mode, send the commit down now
11031da177e4SLinus Torvalds 	 */
11041da177e4SLinus Torvalds 	barrier = reiserfs_barrier_flush(s);
11051da177e4SLinus Torvalds 	if (barrier) {
11061da177e4SLinus Torvalds 		int ret;
11071da177e4SLinus Torvalds 		lock_buffer(jl->j_commit_bh);
11081da177e4SLinus Torvalds 		ret = submit_barrier_buffer(jl->j_commit_bh);
11091da177e4SLinus Torvalds 		if (ret == -EOPNOTSUPP) {
11101da177e4SLinus Torvalds 			set_buffer_uptodate(jl->j_commit_bh);
11111da177e4SLinus Torvalds 			disable_barrier(s);
11121da177e4SLinus Torvalds 			barrier = 0;
11131da177e4SLinus Torvalds 		}
11141da177e4SLinus Torvalds 	}
11151da177e4SLinus Torvalds 	for (i = 0; i < (jl->j_len + 1); i++) {
11161da177e4SLinus Torvalds 		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
11171da177e4SLinus Torvalds 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
11181da177e4SLinus Torvalds 		tbh = journal_find_get_block(s, bn);
11191da177e4SLinus Torvalds 		wait_on_buffer(tbh);
11201da177e4SLinus Torvalds 		// since we're using ll_rw_blk above, it might have skipped over
11211da177e4SLinus Torvalds 		// a locked buffer.  Double check here
11221da177e4SLinus Torvalds 		//
11231da177e4SLinus Torvalds 		if (buffer_dirty(tbh))	/* redundant, sync_dirty_buffer() checks */
11241da177e4SLinus Torvalds 			sync_dirty_buffer(tbh);
11251da177e4SLinus Torvalds 		if (unlikely(!buffer_uptodate(tbh))) {
11261da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
11271da177e4SLinus Torvalds 			reiserfs_warning(s, "journal-601, buffer write failed");
11281da177e4SLinus Torvalds #endif
11291da177e4SLinus Torvalds 			retval = -EIO;
11301da177e4SLinus Torvalds 		}
11311da177e4SLinus Torvalds 		put_bh(tbh);	/* once for journal_find_get_block */
11321da177e4SLinus Torvalds 		put_bh(tbh);	/* once due to original getblk in do_journal_end */
11331da177e4SLinus Torvalds 		atomic_dec(&(jl->j_commit_left));
11341da177e4SLinus Torvalds 	}
11351da177e4SLinus Torvalds 
11361da177e4SLinus Torvalds 	BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
11371da177e4SLinus Torvalds 
11381da177e4SLinus Torvalds 	if (!barrier) {
11395d5e8156SJeff Mahoney 		/* If there was a write error in the journal - we can't commit
11405d5e8156SJeff Mahoney 		 * this transaction - it will be invalid and, if successful,
1141beb7dd86SRobert P. J. Day 		 * will just end up propagating the write error out to
11425d5e8156SJeff Mahoney 		 * the file system. */
11435d5e8156SJeff Mahoney 		if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
11441da177e4SLinus Torvalds 			if (buffer_dirty(jl->j_commit_bh))
11451da177e4SLinus Torvalds 				BUG();
11461da177e4SLinus Torvalds 			mark_buffer_dirty(jl->j_commit_bh) ;
11471da177e4SLinus Torvalds 			sync_dirty_buffer(jl->j_commit_bh) ;
11485d5e8156SJeff Mahoney 		}
11491da177e4SLinus Torvalds 	} else
11501da177e4SLinus Torvalds 		wait_on_buffer(jl->j_commit_bh);
11511da177e4SLinus Torvalds 
11521da177e4SLinus Torvalds 	check_barrier_completion(s, jl->j_commit_bh);
11531da177e4SLinus Torvalds 
11541da177e4SLinus Torvalds 	/* If there was a write error in the journal - we can't commit this
11551da177e4SLinus Torvalds 	 * transaction - it will be invalid and, if successful, will just end
1156beb7dd86SRobert P. J. Day 	 * up propagating the write error out to the filesystem. */
11571da177e4SLinus Torvalds 	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
11581da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
11591da177e4SLinus Torvalds 		reiserfs_warning(s, "journal-615: buffer write failed");
11601da177e4SLinus Torvalds #endif
11611da177e4SLinus Torvalds 		retval = -EIO;
11621da177e4SLinus Torvalds 	}
11631da177e4SLinus Torvalds 	bforget(jl->j_commit_bh);
11641da177e4SLinus Torvalds 	if (journal->j_last_commit_id != 0 &&
11651da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
11661da177e4SLinus Torvalds 		reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
1167bd4c625cSLinus Torvalds 				 journal->j_last_commit_id, jl->j_trans_id);
11681da177e4SLinus Torvalds 	}
11691da177e4SLinus Torvalds 	journal->j_last_commit_id = jl->j_trans_id;
11701da177e4SLinus Torvalds 
11711da177e4SLinus Torvalds 	/* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
11721da177e4SLinus Torvalds 	cleanup_freed_for_journal_list(s, jl);
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds 	retval = retval ? retval : journal->j_errno;
11751da177e4SLinus Torvalds 
11761da177e4SLinus Torvalds 	/* mark the metadata dirty */
11771da177e4SLinus Torvalds 	if (!retval)
11781da177e4SLinus Torvalds 		dirty_one_transaction(s, jl);
11791da177e4SLinus Torvalds 	atomic_dec(&(jl->j_commit_left));
11801da177e4SLinus Torvalds 
11811da177e4SLinus Torvalds 	if (flushall) {
11821da177e4SLinus Torvalds 		atomic_set(&(jl->j_older_commits_done), 1);
11831da177e4SLinus Torvalds 	}
11841da177e4SLinus Torvalds 	up(&jl->j_commit_lock);
11851da177e4SLinus Torvalds       put_jl:
11861da177e4SLinus Torvalds 	put_journal_list(s, jl);
11871da177e4SLinus Torvalds 
11881da177e4SLinus Torvalds 	if (retval)
1189bd4c625cSLinus Torvalds 		reiserfs_abort(s, retval, "Journal write error in %s",
1190fbe5498bSHarvey Harrison 			       __func__);
119122e2c507SJens Axboe 	put_fs_excl();
11921da177e4SLinus Torvalds 	return retval;
11931da177e4SLinus Torvalds }
11941da177e4SLinus Torvalds 
11951da177e4SLinus Torvalds /*
11961da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or
11971da177e4SLinus Torvalds ** returns NULL if it can't find anything
11981da177e4SLinus Torvalds */
1199bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1200bd4c625cSLinus Torvalds 							  reiserfs_journal_cnode
1201bd4c625cSLinus Torvalds 							  *cn)
1202bd4c625cSLinus Torvalds {
12031da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
12041da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
12051da177e4SLinus Torvalds 
12061da177e4SLinus Torvalds 	cn = cn->hprev;
12071da177e4SLinus Torvalds 	while (cn) {
12081da177e4SLinus Torvalds 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
12091da177e4SLinus Torvalds 			return cn->jlist;
12101da177e4SLinus Torvalds 		}
12111da177e4SLinus Torvalds 		cn = cn->hprev;
12121da177e4SLinus Torvalds 	}
12131da177e4SLinus Torvalds 	return NULL;
12141da177e4SLinus Torvalds }
12151da177e4SLinus Torvalds 
1216a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn)
1217a3172027SChris Mason {
1218a3172027SChris Mason 	struct super_block *sb = cn->sb;
1219a3172027SChris Mason 	b_blocknr_t blocknr = cn->blocknr;
1220a3172027SChris Mason 
1221a3172027SChris Mason 	cn = cn->hprev;
1222a3172027SChris Mason 	while (cn) {
1223a3172027SChris Mason 		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
1224a3172027SChris Mason 		    atomic_read(&cn->jlist->j_commit_left) != 0)
1225a3172027SChris Mason 				    return 0;
1226a3172027SChris Mason 		cn = cn->hprev;
1227a3172027SChris Mason 	}
1228a3172027SChris Mason 	return 1;
1229a3172027SChris Mason }
1230a3172027SChris Mason 
1231bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *,
1232bd4c625cSLinus Torvalds 				struct reiserfs_journal_cnode **,
1233bd4c625cSLinus Torvalds 				struct reiserfs_journal_list *, unsigned long,
1234bd4c625cSLinus Torvalds 				int);
12351da177e4SLinus Torvalds 
12361da177e4SLinus Torvalds /*
12371da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the
12381da177e4SLinus Torvalds ** journal list for this transaction.  Aside from freeing the cnode, this also allows the
12391da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted.
12401da177e4SLinus Torvalds */
1241bd4c625cSLinus Torvalds static void remove_all_from_journal_list(struct super_block *p_s_sb,
1242bd4c625cSLinus Torvalds 					 struct reiserfs_journal_list *jl,
1243bd4c625cSLinus Torvalds 					 int debug)
1244bd4c625cSLinus Torvalds {
12451da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
12461da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
12471da177e4SLinus Torvalds 	cn = jl->j_realblock;
12481da177e4SLinus Torvalds 
12491da177e4SLinus Torvalds 	/* which is better, to lock once around the whole loop, or
12501da177e4SLinus Torvalds 	 ** to lock for each call to remove_journal_hash?
12511da177e4SLinus Torvalds 	 */
12521da177e4SLinus Torvalds 	while (cn) {
12531da177e4SLinus Torvalds 		if (cn->blocknr != 0) {
12541da177e4SLinus Torvalds 			if (debug) {
1255bd4c625cSLinus Torvalds 				reiserfs_warning(p_s_sb,
1256bd4c625cSLinus Torvalds 						 "block %u, bh is %d, state %ld",
1257bd4c625cSLinus Torvalds 						 cn->blocknr, cn->bh ? 1 : 0,
1258bd4c625cSLinus Torvalds 						 cn->state);
12591da177e4SLinus Torvalds 			}
12601da177e4SLinus Torvalds 			cn->state = 0;
1261bd4c625cSLinus Torvalds 			remove_journal_hash(p_s_sb, journal->j_list_hash_table,
1262bd4c625cSLinus Torvalds 					    jl, cn->blocknr, 1);
12631da177e4SLinus Torvalds 		}
12641da177e4SLinus Torvalds 		last = cn;
12651da177e4SLinus Torvalds 		cn = cn->next;
12661da177e4SLinus Torvalds 		free_cnode(p_s_sb, last);
12671da177e4SLinus Torvalds 	}
12681da177e4SLinus Torvalds 	jl->j_realblock = NULL;
12691da177e4SLinus Torvalds }
12701da177e4SLinus Torvalds 
12711da177e4SLinus Torvalds /*
12721da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
12731da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
12741da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks.
12751da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list
12761da177e4SLinus Torvalds **
12771da177e4SLinus Torvalds */
1278bd4c625cSLinus Torvalds static int _update_journal_header_block(struct super_block *p_s_sb,
1279bd4c625cSLinus Torvalds 					unsigned long offset,
1280bd4c625cSLinus Torvalds 					unsigned long trans_id)
1281bd4c625cSLinus Torvalds {
12821da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
12831da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
12841da177e4SLinus Torvalds 
12851da177e4SLinus Torvalds 	if (reiserfs_is_journal_aborted(journal))
12861da177e4SLinus Torvalds 		return -EIO;
12871da177e4SLinus Torvalds 
12881da177e4SLinus Torvalds 	if (trans_id >= journal->j_last_flush_trans_id) {
12891da177e4SLinus Torvalds 		if (buffer_locked((journal->j_header_bh))) {
12901da177e4SLinus Torvalds 			wait_on_buffer((journal->j_header_bh));
12911da177e4SLinus Torvalds 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
12921da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1293bd4c625cSLinus Torvalds 				reiserfs_warning(p_s_sb,
1294bd4c625cSLinus Torvalds 						 "journal-699: buffer write failed");
12951da177e4SLinus Torvalds #endif
12961da177e4SLinus Torvalds 				return -EIO;
12971da177e4SLinus Torvalds 			}
12981da177e4SLinus Torvalds 		}
12991da177e4SLinus Torvalds 		journal->j_last_flush_trans_id = trans_id;
13001da177e4SLinus Torvalds 		journal->j_first_unflushed_offset = offset;
1301bd4c625cSLinus Torvalds 		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1302bd4c625cSLinus Torvalds 							b_data);
13031da177e4SLinus Torvalds 		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
13041da177e4SLinus Torvalds 		jh->j_first_unflushed_offset = cpu_to_le32(offset);
13051da177e4SLinus Torvalds 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
13061da177e4SLinus Torvalds 
13071da177e4SLinus Torvalds 		if (reiserfs_barrier_flush(p_s_sb)) {
13081da177e4SLinus Torvalds 			int ret;
13091da177e4SLinus Torvalds 			lock_buffer(journal->j_header_bh);
13101da177e4SLinus Torvalds 			ret = submit_barrier_buffer(journal->j_header_bh);
13111da177e4SLinus Torvalds 			if (ret == -EOPNOTSUPP) {
13121da177e4SLinus Torvalds 				set_buffer_uptodate(journal->j_header_bh);
13131da177e4SLinus Torvalds 				disable_barrier(p_s_sb);
13141da177e4SLinus Torvalds 				goto sync;
13151da177e4SLinus Torvalds 			}
13161da177e4SLinus Torvalds 			wait_on_buffer(journal->j_header_bh);
13171da177e4SLinus Torvalds 			check_barrier_completion(p_s_sb, journal->j_header_bh);
13181da177e4SLinus Torvalds 		} else {
13191da177e4SLinus Torvalds 		      sync:
13201da177e4SLinus Torvalds 			set_buffer_dirty(journal->j_header_bh);
13211da177e4SLinus Torvalds 			sync_dirty_buffer(journal->j_header_bh);
13221da177e4SLinus Torvalds 		}
13231da177e4SLinus Torvalds 		if (!buffer_uptodate(journal->j_header_bh)) {
1324bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
1325bd4c625cSLinus Torvalds 					 "journal-837: IO error during journal replay");
13261da177e4SLinus Torvalds 			return -EIO;
13271da177e4SLinus Torvalds 		}
13281da177e4SLinus Torvalds 	}
13291da177e4SLinus Torvalds 	return 0;
13301da177e4SLinus Torvalds }
13311da177e4SLinus Torvalds 
13321da177e4SLinus Torvalds static int update_journal_header_block(struct super_block *p_s_sb,
13331da177e4SLinus Torvalds 				       unsigned long offset,
1334bd4c625cSLinus Torvalds 				       unsigned long trans_id)
1335bd4c625cSLinus Torvalds {
13361da177e4SLinus Torvalds 	return _update_journal_header_block(p_s_sb, offset, trans_id);
13371da177e4SLinus Torvalds }
1338bd4c625cSLinus Torvalds 
13391da177e4SLinus Torvalds /*
13401da177e4SLinus Torvalds ** flush any and all journal lists older than you are
13411da177e4SLinus Torvalds ** can only be called from flush_journal_list
13421da177e4SLinus Torvalds */
13431da177e4SLinus Torvalds static int flush_older_journal_lists(struct super_block *p_s_sb,
13441da177e4SLinus Torvalds 				     struct reiserfs_journal_list *jl)
13451da177e4SLinus Torvalds {
13461da177e4SLinus Torvalds 	struct list_head *entry;
13471da177e4SLinus Torvalds 	struct reiserfs_journal_list *other_jl;
13481da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
13491da177e4SLinus Torvalds 	unsigned long trans_id = jl->j_trans_id;
13501da177e4SLinus Torvalds 
13511da177e4SLinus Torvalds 	/* we know we are the only ones flushing things, no extra race
13521da177e4SLinus Torvalds 	 * protection is required.
13531da177e4SLinus Torvalds 	 */
13541da177e4SLinus Torvalds       restart:
13551da177e4SLinus Torvalds 	entry = journal->j_journal_list.next;
13561da177e4SLinus Torvalds 	/* Did we wrap? */
13571da177e4SLinus Torvalds 	if (entry == &journal->j_journal_list)
13581da177e4SLinus Torvalds 		return 0;
13591da177e4SLinus Torvalds 	other_jl = JOURNAL_LIST_ENTRY(entry);
13601da177e4SLinus Torvalds 	if (other_jl->j_trans_id < trans_id) {
13611da177e4SLinus Torvalds 		BUG_ON(other_jl->j_refcount <= 0);
13621da177e4SLinus Torvalds 		/* do not flush all */
13631da177e4SLinus Torvalds 		flush_journal_list(p_s_sb, other_jl, 0);
13641da177e4SLinus Torvalds 
13651da177e4SLinus Torvalds 		/* other_jl is now deleted from the list */
13661da177e4SLinus Torvalds 		goto restart;
13671da177e4SLinus Torvalds 	}
13681da177e4SLinus Torvalds 	return 0;
13691da177e4SLinus Torvalds }
13701da177e4SLinus Torvalds 
13711da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s,
1372bd4c625cSLinus Torvalds 			       struct reiserfs_journal_list *jl)
1373bd4c625cSLinus Torvalds {
13741da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
13751da177e4SLinus Torvalds 	if (!list_empty(&jl->j_working_list)) {
13761da177e4SLinus Torvalds 		list_del_init(&jl->j_working_list);
13771da177e4SLinus Torvalds 		journal->j_num_work_lists--;
13781da177e4SLinus Torvalds 	}
13791da177e4SLinus Torvalds }
13801da177e4SLinus Torvalds 
13811da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks
13821da177e4SLinus Torvalds **
13831da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside
13841da177e4SLinus Torvalds ** flush_journal_list
13851da177e4SLinus Torvalds **
13861da177e4SLinus Torvalds ** IMPORTANT.  This can only be called while there are no journal writers,
13871da177e4SLinus Torvalds ** and the journal is locked.  That means it can only be called from
13881da177e4SLinus Torvalds ** do_journal_end, or by journal_release
13891da177e4SLinus Torvalds */
13901da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s,
1391bd4c625cSLinus Torvalds 			      struct reiserfs_journal_list *jl, int flushall)
1392bd4c625cSLinus Torvalds {
13931da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
13941da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *last;
13951da177e4SLinus Torvalds 	int count;
13961da177e4SLinus Torvalds 	int was_jwait = 0;
13971da177e4SLinus Torvalds 	int was_dirty = 0;
13981da177e4SLinus Torvalds 	struct buffer_head *saved_bh;
13991da177e4SLinus Torvalds 	unsigned long j_len_saved = jl->j_len;
14001da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
14011da177e4SLinus Torvalds 	int err = 0;
14021da177e4SLinus Torvalds 
14031da177e4SLinus Torvalds 	BUG_ON(j_len_saved <= 0);
14041da177e4SLinus Torvalds 
14051da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) != 0) {
1406bd4c625cSLinus Torvalds 		reiserfs_warning(s,
1407bd4c625cSLinus Torvalds 				 "clm-2048: flush_journal_list called with wcount %d",
14081da177e4SLinus Torvalds 				 atomic_read(&journal->j_wcount));
14091da177e4SLinus Torvalds 	}
14101da177e4SLinus Torvalds 	BUG_ON(jl->j_trans_id == 0);
14111da177e4SLinus Torvalds 
14121da177e4SLinus Torvalds 	/* if flushall == 0, the lock is already held */
14131da177e4SLinus Torvalds 	if (flushall) {
14141da177e4SLinus Torvalds 		down(&journal->j_flush_sem);
14151da177e4SLinus Torvalds 	} else if (!down_trylock(&journal->j_flush_sem)) {
14161da177e4SLinus Torvalds 		BUG();
14171da177e4SLinus Torvalds 	}
14181da177e4SLinus Torvalds 
14191da177e4SLinus Torvalds 	count = 0;
14201da177e4SLinus Torvalds 	if (j_len_saved > journal->j_trans_max) {
1421bd4c625cSLinus Torvalds 		reiserfs_panic(s,
1422bd4c625cSLinus Torvalds 			       "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
1423bd4c625cSLinus Torvalds 			       j_len_saved, jl->j_trans_id);
14241da177e4SLinus Torvalds 		return 0;
14251da177e4SLinus Torvalds 	}
14261da177e4SLinus Torvalds 
142722e2c507SJens Axboe 	get_fs_excl();
142822e2c507SJens Axboe 
14291da177e4SLinus Torvalds 	/* if all the work is already done, get out of here */
14301da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14311da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14321da177e4SLinus Torvalds 		goto flush_older_and_return;
14331da177e4SLinus Torvalds 	}
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 	/* start by putting the commit list on disk.  This will also flush
14361da177e4SLinus Torvalds 	 ** the commit lists of any olders transactions
14371da177e4SLinus Torvalds 	 */
14381da177e4SLinus Torvalds 	flush_commit_list(s, jl, 1);
14391da177e4SLinus Torvalds 
1440bd4c625cSLinus Torvalds 	if (!(jl->j_state & LIST_DIRTY)
1441bd4c625cSLinus Torvalds 	    && !reiserfs_is_journal_aborted(journal))
14421da177e4SLinus Torvalds 		BUG();
14431da177e4SLinus Torvalds 
14441da177e4SLinus Torvalds 	/* are we done now? */
14451da177e4SLinus Torvalds 	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
14461da177e4SLinus Torvalds 	    atomic_read(&(jl->j_commit_left)) <= 0) {
14471da177e4SLinus Torvalds 		goto flush_older_and_return;
14481da177e4SLinus Torvalds 	}
14491da177e4SLinus Torvalds 
14501da177e4SLinus Torvalds 	/* loop through each cnode, see if we need to write it,
14511da177e4SLinus Torvalds 	 ** or wait on a more recent transaction, or just ignore it
14521da177e4SLinus Torvalds 	 */
14531da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) != 0) {
1454bd4c625cSLinus Torvalds 		reiserfs_panic(s,
1455bd4c625cSLinus Torvalds 			       "journal-844: panic journal list is flushing, wcount is not 0\n");
14561da177e4SLinus Torvalds 	}
14571da177e4SLinus Torvalds 	cn = jl->j_realblock;
14581da177e4SLinus Torvalds 	while (cn) {
14591da177e4SLinus Torvalds 		was_jwait = 0;
14601da177e4SLinus Torvalds 		was_dirty = 0;
14611da177e4SLinus Torvalds 		saved_bh = NULL;
14621da177e4SLinus Torvalds 		/* blocknr of 0 is no longer in the hash, ignore it */
14631da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
14641da177e4SLinus Torvalds 			goto free_cnode;
14651da177e4SLinus Torvalds 		}
14661da177e4SLinus Torvalds 
14671da177e4SLinus Torvalds 		/* This transaction failed commit. Don't write out to the disk */
14681da177e4SLinus Torvalds 		if (!(jl->j_state & LIST_DIRTY))
14691da177e4SLinus Torvalds 			goto free_cnode;
14701da177e4SLinus Torvalds 
14711da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
14721da177e4SLinus Torvalds 		/* the order is important here.  We check pjl to make sure we
14731da177e4SLinus Torvalds 		 ** don't clear BH_JDirty_wait if we aren't the one writing this
14741da177e4SLinus Torvalds 		 ** block to disk
14751da177e4SLinus Torvalds 		 */
14761da177e4SLinus Torvalds 		if (!pjl && cn->bh) {
14771da177e4SLinus Torvalds 			saved_bh = cn->bh;
14781da177e4SLinus Torvalds 
14791da177e4SLinus Torvalds 			/* we do this to make sure nobody releases the buffer while
14801da177e4SLinus Torvalds 			 ** we are working with it
14811da177e4SLinus Torvalds 			 */
14821da177e4SLinus Torvalds 			get_bh(saved_bh);
14831da177e4SLinus Torvalds 
14841da177e4SLinus Torvalds 			if (buffer_journal_dirty(saved_bh)) {
14851da177e4SLinus Torvalds 				BUG_ON(!can_dirty(cn));
14861da177e4SLinus Torvalds 				was_jwait = 1;
14871da177e4SLinus Torvalds 				was_dirty = 1;
14881da177e4SLinus Torvalds 			} else if (can_dirty(cn)) {
14891da177e4SLinus Torvalds 				/* everything with !pjl && jwait should be writable */
14901da177e4SLinus Torvalds 				BUG();
14911da177e4SLinus Torvalds 			}
14921da177e4SLinus Torvalds 		}
14931da177e4SLinus Torvalds 
14941da177e4SLinus Torvalds 		/* if someone has this block in a newer transaction, just make
14950779bf2dSMatt LaPlante 		 ** sure they are committed, and don't try writing it to disk
14961da177e4SLinus Torvalds 		 */
14971da177e4SLinus Torvalds 		if (pjl) {
14981da177e4SLinus Torvalds 			if (atomic_read(&pjl->j_commit_left))
14991da177e4SLinus Torvalds 				flush_commit_list(s, pjl, 1);
15001da177e4SLinus Torvalds 			goto free_cnode;
15011da177e4SLinus Torvalds 		}
15021da177e4SLinus Torvalds 
15031da177e4SLinus Torvalds 		/* bh == NULL when the block got to disk on its own, OR,
15041da177e4SLinus Torvalds 		 ** the block got freed in a future transaction
15051da177e4SLinus Torvalds 		 */
15061da177e4SLinus Torvalds 		if (saved_bh == NULL) {
15071da177e4SLinus Torvalds 			goto free_cnode;
15081da177e4SLinus Torvalds 		}
15091da177e4SLinus Torvalds 
15101da177e4SLinus Torvalds 		/* this should never happen.  kupdate_one_transaction has this list
15111da177e4SLinus Torvalds 		 ** locked while it works, so we should never see a buffer here that
15121da177e4SLinus Torvalds 		 ** is not marked JDirty_wait
15131da177e4SLinus Torvalds 		 */
15141da177e4SLinus Torvalds 		if ((!was_jwait) && !buffer_locked(saved_bh)) {
1515bd4c625cSLinus Torvalds 			reiserfs_warning(s,
1516bd4c625cSLinus Torvalds 					 "journal-813: BAD! buffer %llu %cdirty %cjwait, "
15171da177e4SLinus Torvalds 					 "not in a newer tranasction",
1518bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1519bd4c625cSLinus Torvalds 					 b_blocknr, was_dirty ? ' ' : '!',
1520bd4c625cSLinus Torvalds 					 was_jwait ? ' ' : '!');
15211da177e4SLinus Torvalds 		}
15221da177e4SLinus Torvalds 		if (was_dirty) {
15231da177e4SLinus Torvalds 			/* we inc again because saved_bh gets decremented at free_cnode */
15241da177e4SLinus Torvalds 			get_bh(saved_bh);
15251da177e4SLinus Torvalds 			set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
15261da177e4SLinus Torvalds 			lock_buffer(saved_bh);
15271da177e4SLinus Torvalds 			BUG_ON(cn->blocknr != saved_bh->b_blocknr);
15281da177e4SLinus Torvalds 			if (buffer_dirty(saved_bh))
15291da177e4SLinus Torvalds 				submit_logged_buffer(saved_bh);
15301da177e4SLinus Torvalds 			else
15311da177e4SLinus Torvalds 				unlock_buffer(saved_bh);
15321da177e4SLinus Torvalds 			count++;
15331da177e4SLinus Torvalds 		} else {
1534bd4c625cSLinus Torvalds 			reiserfs_warning(s,
1535bd4c625cSLinus Torvalds 					 "clm-2082: Unable to flush buffer %llu in %s",
1536bd4c625cSLinus Torvalds 					 (unsigned long long)saved_bh->
1537fbe5498bSHarvey Harrison 					 b_blocknr, __func__);
15381da177e4SLinus Torvalds 		}
15391da177e4SLinus Torvalds 	      free_cnode:
15401da177e4SLinus Torvalds 		last = cn;
15411da177e4SLinus Torvalds 		cn = cn->next;
15421da177e4SLinus Torvalds 		if (saved_bh) {
15431da177e4SLinus Torvalds 			/* we incremented this to keep others from taking the buffer head away */
15441da177e4SLinus Torvalds 			put_bh(saved_bh);
15451da177e4SLinus Torvalds 			if (atomic_read(&(saved_bh->b_count)) < 0) {
1546bd4c625cSLinus Torvalds 				reiserfs_warning(s,
1547bd4c625cSLinus Torvalds 						 "journal-945: saved_bh->b_count < 0");
15481da177e4SLinus Torvalds 			}
15491da177e4SLinus Torvalds 		}
15501da177e4SLinus Torvalds 	}
15511da177e4SLinus Torvalds 	if (count > 0) {
15521da177e4SLinus Torvalds 		cn = jl->j_realblock;
15531da177e4SLinus Torvalds 		while (cn) {
15541da177e4SLinus Torvalds 			if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
15551da177e4SLinus Torvalds 				if (!cn->bh) {
1556bd4c625cSLinus Torvalds 					reiserfs_panic(s,
1557bd4c625cSLinus Torvalds 						       "journal-1011: cn->bh is NULL\n");
15581da177e4SLinus Torvalds 				}
15591da177e4SLinus Torvalds 				wait_on_buffer(cn->bh);
15601da177e4SLinus Torvalds 				if (!cn->bh) {
1561bd4c625cSLinus Torvalds 					reiserfs_panic(s,
1562bd4c625cSLinus Torvalds 						       "journal-1012: cn->bh is NULL\n");
15631da177e4SLinus Torvalds 				}
15641da177e4SLinus Torvalds 				if (unlikely(!buffer_uptodate(cn->bh))) {
15651da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
1566bd4c625cSLinus Torvalds 					reiserfs_warning(s,
1567bd4c625cSLinus Torvalds 							 "journal-949: buffer write failed\n");
15681da177e4SLinus Torvalds #endif
15691da177e4SLinus Torvalds 					err = -EIO;
15701da177e4SLinus Torvalds 				}
15711da177e4SLinus Torvalds 				/* note, we must clear the JDirty_wait bit after the up to date
15721da177e4SLinus Torvalds 				 ** check, otherwise we race against our flushpage routine
15731da177e4SLinus Torvalds 				 */
1574bd4c625cSLinus Torvalds 				BUG_ON(!test_clear_buffer_journal_dirty
1575bd4c625cSLinus Torvalds 				       (cn->bh));
15761da177e4SLinus Torvalds 
1577398c95bdSChris Mason 				/* drop one ref for us */
15781da177e4SLinus Torvalds 				put_bh(cn->bh);
1579398c95bdSChris Mason 				/* drop one ref for journal_mark_dirty */
1580398c95bdSChris Mason 				release_buffer_page(cn->bh);
15811da177e4SLinus Torvalds 			}
15821da177e4SLinus Torvalds 			cn = cn->next;
15831da177e4SLinus Torvalds 		}
15841da177e4SLinus Torvalds 	}
15851da177e4SLinus Torvalds 
15861da177e4SLinus Torvalds 	if (err)
1587bd4c625cSLinus Torvalds 		reiserfs_abort(s, -EIO,
1588bd4c625cSLinus Torvalds 			       "Write error while pushing transaction to disk in %s",
1589fbe5498bSHarvey Harrison 			       __func__);
15901da177e4SLinus Torvalds       flush_older_and_return:
15911da177e4SLinus Torvalds 
15921da177e4SLinus Torvalds 	/* before we can update the journal header block, we _must_ flush all
15931da177e4SLinus Torvalds 	 ** real blocks from all older transactions to disk.  This is because
15941da177e4SLinus Torvalds 	 ** once the header block is updated, this transaction will not be
15951da177e4SLinus Torvalds 	 ** replayed after a crash
15961da177e4SLinus Torvalds 	 */
15971da177e4SLinus Torvalds 	if (flushall) {
15981da177e4SLinus Torvalds 		flush_older_journal_lists(s, jl);
15991da177e4SLinus Torvalds 	}
16001da177e4SLinus Torvalds 
16011da177e4SLinus Torvalds 	err = journal->j_errno;
16021da177e4SLinus Torvalds 	/* before we can remove everything from the hash tables for this
16031da177e4SLinus Torvalds 	 ** transaction, we must make sure it can never be replayed
16041da177e4SLinus Torvalds 	 **
16051da177e4SLinus Torvalds 	 ** since we are only called from do_journal_end, we know for sure there
16061da177e4SLinus Torvalds 	 ** are no allocations going on while we are flushing journal lists.  So,
16071da177e4SLinus Torvalds 	 ** we only need to update the journal header block for the last list
16081da177e4SLinus Torvalds 	 ** being flushed
16091da177e4SLinus Torvalds 	 */
16101da177e4SLinus Torvalds 	if (!err && flushall) {
1611bd4c625cSLinus Torvalds 		err =
1612bd4c625cSLinus Torvalds 		    update_journal_header_block(s,
1613bd4c625cSLinus Torvalds 						(jl->j_start + jl->j_len +
1614bd4c625cSLinus Torvalds 						 2) % SB_ONDISK_JOURNAL_SIZE(s),
1615bd4c625cSLinus Torvalds 						jl->j_trans_id);
16161da177e4SLinus Torvalds 		if (err)
1617bd4c625cSLinus Torvalds 			reiserfs_abort(s, -EIO,
1618bd4c625cSLinus Torvalds 				       "Write error while updating journal header in %s",
1619fbe5498bSHarvey Harrison 				       __func__);
16201da177e4SLinus Torvalds 	}
16211da177e4SLinus Torvalds 	remove_all_from_journal_list(s, jl, 0);
16221da177e4SLinus Torvalds 	list_del_init(&jl->j_list);
16231da177e4SLinus Torvalds 	journal->j_num_lists--;
16241da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16251da177e4SLinus Torvalds 
16261da177e4SLinus Torvalds 	if (journal->j_last_flush_id != 0 &&
16271da177e4SLinus Torvalds 	    (jl->j_trans_id - journal->j_last_flush_id) != 1) {
16281da177e4SLinus Torvalds 		reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
1629bd4c625cSLinus Torvalds 				 journal->j_last_flush_id, jl->j_trans_id);
16301da177e4SLinus Torvalds 	}
16311da177e4SLinus Torvalds 	journal->j_last_flush_id = jl->j_trans_id;
16321da177e4SLinus Torvalds 
16331da177e4SLinus Torvalds 	/* not strictly required since we are freeing the list, but it should
16341da177e4SLinus Torvalds 	 * help find code using dead lists later on
16351da177e4SLinus Torvalds 	 */
16361da177e4SLinus Torvalds 	jl->j_len = 0;
16371da177e4SLinus Torvalds 	atomic_set(&(jl->j_nonzerolen), 0);
16381da177e4SLinus Torvalds 	jl->j_start = 0;
16391da177e4SLinus Torvalds 	jl->j_realblock = NULL;
16401da177e4SLinus Torvalds 	jl->j_commit_bh = NULL;
16411da177e4SLinus Torvalds 	jl->j_trans_id = 0;
16421da177e4SLinus Torvalds 	jl->j_state = 0;
16431da177e4SLinus Torvalds 	put_journal_list(s, jl);
16441da177e4SLinus Torvalds 	if (flushall)
16451da177e4SLinus Torvalds 		up(&journal->j_flush_sem);
164622e2c507SJens Axboe 	put_fs_excl();
16471da177e4SLinus Torvalds 	return err;
16481da177e4SLinus Torvalds }
16491da177e4SLinus Torvalds 
1650a3172027SChris Mason static int test_transaction(struct super_block *s,
1651a3172027SChris Mason                             struct reiserfs_journal_list *jl)
1652a3172027SChris Mason {
1653a3172027SChris Mason 	struct reiserfs_journal_cnode *cn;
1654a3172027SChris Mason 
1655a3172027SChris Mason 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
1656a3172027SChris Mason 		return 1;
1657a3172027SChris Mason 
1658a3172027SChris Mason 	cn = jl->j_realblock;
1659a3172027SChris Mason 	while (cn) {
1660a3172027SChris Mason 		/* if the blocknr == 0, this has been cleared from the hash,
1661a3172027SChris Mason 		 ** skip it
1662a3172027SChris Mason 		 */
1663a3172027SChris Mason 		if (cn->blocknr == 0) {
1664a3172027SChris Mason 			goto next;
1665a3172027SChris Mason 		}
1666a3172027SChris Mason 		if (cn->bh && !newer_jl_done(cn))
1667a3172027SChris Mason 			return 0;
1668a3172027SChris Mason 	      next:
1669a3172027SChris Mason 		cn = cn->next;
1670a3172027SChris Mason 		cond_resched();
1671a3172027SChris Mason 	}
1672a3172027SChris Mason 	return 0;
1673a3172027SChris Mason }
1674a3172027SChris Mason 
16751da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s,
16761da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl,
16771da177e4SLinus Torvalds 				 struct buffer_chunk *chunk)
16781da177e4SLinus Torvalds {
16791da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
16801da177e4SLinus Torvalds 	int ret = 0;
16811da177e4SLinus Torvalds 
16821da177e4SLinus Torvalds 	jl->j_state |= LIST_TOUCHED;
16831da177e4SLinus Torvalds 	del_from_work_list(s, jl);
16841da177e4SLinus Torvalds 	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
16851da177e4SLinus Torvalds 		return 0;
16861da177e4SLinus Torvalds 	}
16871da177e4SLinus Torvalds 
16881da177e4SLinus Torvalds 	cn = jl->j_realblock;
16891da177e4SLinus Torvalds 	while (cn) {
16901da177e4SLinus Torvalds 		/* if the blocknr == 0, this has been cleared from the hash,
16911da177e4SLinus Torvalds 		 ** skip it
16921da177e4SLinus Torvalds 		 */
16931da177e4SLinus Torvalds 		if (cn->blocknr == 0) {
16941da177e4SLinus Torvalds 			goto next;
16951da177e4SLinus Torvalds 		}
16961da177e4SLinus Torvalds 		if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
16971da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
16981da177e4SLinus Torvalds 			/* we can race against journal_mark_freed when we try
16991da177e4SLinus Torvalds 			 * to lock_buffer(cn->bh), so we have to inc the buffer
17001da177e4SLinus Torvalds 			 * count, and recheck things after locking
17011da177e4SLinus Torvalds 			 */
17021da177e4SLinus Torvalds 			tmp_bh = cn->bh;
17031da177e4SLinus Torvalds 			get_bh(tmp_bh);
17041da177e4SLinus Torvalds 			lock_buffer(tmp_bh);
17051da177e4SLinus Torvalds 			if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
17061da177e4SLinus Torvalds 				if (!buffer_journal_dirty(tmp_bh) ||
17071da177e4SLinus Torvalds 				    buffer_journal_prepared(tmp_bh))
17081da177e4SLinus Torvalds 					BUG();
17091da177e4SLinus Torvalds 				add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
17101da177e4SLinus Torvalds 				ret++;
17111da177e4SLinus Torvalds 			} else {
17121da177e4SLinus Torvalds 				/* note, cn->bh might be null now */
17131da177e4SLinus Torvalds 				unlock_buffer(tmp_bh);
17141da177e4SLinus Torvalds 			}
17151da177e4SLinus Torvalds 			put_bh(tmp_bh);
17161da177e4SLinus Torvalds 		}
17171da177e4SLinus Torvalds 	      next:
17181da177e4SLinus Torvalds 		cn = cn->next;
17191da177e4SLinus Torvalds 		cond_resched();
17201da177e4SLinus Torvalds 	}
17211da177e4SLinus Torvalds 	return ret;
17221da177e4SLinus Torvalds }
17231da177e4SLinus Torvalds 
17241da177e4SLinus Torvalds /* used by flush_commit_list */
17251da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s,
17261da177e4SLinus Torvalds 				 struct reiserfs_journal_list *jl)
17271da177e4SLinus Torvalds {
17281da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
17291da177e4SLinus Torvalds 	struct reiserfs_journal_list *pjl;
17301da177e4SLinus Torvalds 	int ret = 0;
17311da177e4SLinus Torvalds 
17321da177e4SLinus Torvalds 	jl->j_state |= LIST_DIRTY;
17331da177e4SLinus Torvalds 	cn = jl->j_realblock;
17341da177e4SLinus Torvalds 	while (cn) {
17351da177e4SLinus Torvalds 		/* look for a more recent transaction that logged this
17361da177e4SLinus Torvalds 		 ** buffer.  Only the most recent transaction with a buffer in
17371da177e4SLinus Torvalds 		 ** it is allowed to send that buffer to disk
17381da177e4SLinus Torvalds 		 */
17391da177e4SLinus Torvalds 		pjl = find_newer_jl_for_cn(cn);
1740bd4c625cSLinus Torvalds 		if (!pjl && cn->blocknr && cn->bh
1741bd4c625cSLinus Torvalds 		    && buffer_journal_dirty(cn->bh)) {
17421da177e4SLinus Torvalds 			BUG_ON(!can_dirty(cn));
17431da177e4SLinus Torvalds 			/* if the buffer is prepared, it will either be logged
17441da177e4SLinus Torvalds 			 * or restored.  If restored, we need to make sure
17451da177e4SLinus Torvalds 			 * it actually gets marked dirty
17461da177e4SLinus Torvalds 			 */
17471da177e4SLinus Torvalds 			clear_buffer_journal_new(cn->bh);
17481da177e4SLinus Torvalds 			if (buffer_journal_prepared(cn->bh)) {
17491da177e4SLinus Torvalds 				set_buffer_journal_restore_dirty(cn->bh);
17501da177e4SLinus Torvalds 			} else {
17511da177e4SLinus Torvalds 				set_buffer_journal_test(cn->bh);
17521da177e4SLinus Torvalds 				mark_buffer_dirty(cn->bh);
17531da177e4SLinus Torvalds 			}
17541da177e4SLinus Torvalds 		}
17551da177e4SLinus Torvalds 		cn = cn->next;
17561da177e4SLinus Torvalds 	}
17571da177e4SLinus Torvalds 	return ret;
17581da177e4SLinus Torvalds }
17591da177e4SLinus Torvalds 
17601da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s,
17611da177e4SLinus Torvalds 				struct reiserfs_journal_list *jl,
17621da177e4SLinus Torvalds 				struct reiserfs_journal_list **next_jl,
17631da177e4SLinus Torvalds 				unsigned long *next_trans_id,
1764bd4c625cSLinus Torvalds 				int num_blocks, int num_trans)
1765bd4c625cSLinus Torvalds {
17661da177e4SLinus Torvalds 	int ret = 0;
17671da177e4SLinus Torvalds 	int written = 0;
17681da177e4SLinus Torvalds 	int transactions_flushed = 0;
17691da177e4SLinus Torvalds 	unsigned long orig_trans_id = jl->j_trans_id;
17701da177e4SLinus Torvalds 	struct buffer_chunk chunk;
17711da177e4SLinus Torvalds 	struct list_head *entry;
17721da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
17731da177e4SLinus Torvalds 	chunk.nr = 0;
17741da177e4SLinus Torvalds 
17751da177e4SLinus Torvalds 	down(&journal->j_flush_sem);
17761da177e4SLinus Torvalds 	if (!journal_list_still_alive(s, orig_trans_id)) {
17771da177e4SLinus Torvalds 		goto done;
17781da177e4SLinus Torvalds 	}
17791da177e4SLinus Torvalds 
17801da177e4SLinus Torvalds 	/* we've got j_flush_sem held, nobody is going to delete any
17811da177e4SLinus Torvalds 	 * of these lists out from underneath us
17821da177e4SLinus Torvalds 	 */
17831da177e4SLinus Torvalds 	while ((num_trans && transactions_flushed < num_trans) ||
17841da177e4SLinus Torvalds 	       (!num_trans && written < num_blocks)) {
17851da177e4SLinus Torvalds 
17861da177e4SLinus Torvalds 		if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1787bd4c625cSLinus Torvalds 		    atomic_read(&jl->j_commit_left)
1788bd4c625cSLinus Torvalds 		    || !(jl->j_state & LIST_DIRTY)) {
17891da177e4SLinus Torvalds 			del_from_work_list(s, jl);
17901da177e4SLinus Torvalds 			break;
17911da177e4SLinus Torvalds 		}
17921da177e4SLinus Torvalds 		ret = write_one_transaction(s, jl, &chunk);
17931da177e4SLinus Torvalds 
17941da177e4SLinus Torvalds 		if (ret < 0)
17951da177e4SLinus Torvalds 			goto done;
17961da177e4SLinus Torvalds 		transactions_flushed++;
17971da177e4SLinus Torvalds 		written += ret;
17981da177e4SLinus Torvalds 		entry = jl->j_list.next;
17991da177e4SLinus Torvalds 
18001da177e4SLinus Torvalds 		/* did we wrap? */
18011da177e4SLinus Torvalds 		if (entry == &journal->j_journal_list) {
18021da177e4SLinus Torvalds 			break;
18031da177e4SLinus Torvalds 		}
18041da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
18051da177e4SLinus Torvalds 
18061da177e4SLinus Torvalds 		/* don't bother with older transactions */
18071da177e4SLinus Torvalds 		if (jl->j_trans_id <= orig_trans_id)
18081da177e4SLinus Torvalds 			break;
18091da177e4SLinus Torvalds 	}
18101da177e4SLinus Torvalds 	if (chunk.nr) {
18111da177e4SLinus Torvalds 		write_chunk(&chunk);
18121da177e4SLinus Torvalds 	}
18131da177e4SLinus Torvalds 
18141da177e4SLinus Torvalds       done:
18151da177e4SLinus Torvalds 	up(&journal->j_flush_sem);
18161da177e4SLinus Torvalds 	return ret;
18171da177e4SLinus Torvalds }
18181da177e4SLinus Torvalds 
18191da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use
18201da177e4SLinus Torvalds ** all the journa list slots with tiny transactions.  These
18211da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which
18221da177e4SLinus Torvalds ** adds seeks and slows things down.
18231da177e4SLinus Torvalds **
18241da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists
18251da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal
18261da177e4SLinus Torvalds ** list updates the header block
18271da177e4SLinus Torvalds */
18281da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s,
1829bd4c625cSLinus Torvalds 				    struct reiserfs_journal_list *jl)
1830bd4c625cSLinus Torvalds {
18311da177e4SLinus Torvalds 	unsigned long len = 0;
18321da177e4SLinus Torvalds 	unsigned long cur_len;
18331da177e4SLinus Torvalds 	int ret;
18341da177e4SLinus Torvalds 	int i;
18351da177e4SLinus Torvalds 	int limit = 256;
18361da177e4SLinus Torvalds 	struct reiserfs_journal_list *tjl;
18371da177e4SLinus Torvalds 	struct reiserfs_journal_list *flush_jl;
18381da177e4SLinus Torvalds 	unsigned long trans_id;
18391da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
18401da177e4SLinus Torvalds 
18411da177e4SLinus Torvalds 	flush_jl = tjl = jl;
18421da177e4SLinus Torvalds 
18431da177e4SLinus Torvalds 	/* in data logging mode, try harder to flush a lot of blocks */
18441da177e4SLinus Torvalds 	if (reiserfs_data_log(s))
18451da177e4SLinus Torvalds 		limit = 1024;
18461da177e4SLinus Torvalds 	/* flush for 256 transactions or limit blocks, whichever comes first */
18471da177e4SLinus Torvalds 	for (i = 0; i < 256 && len < limit; i++) {
18481da177e4SLinus Torvalds 		if (atomic_read(&tjl->j_commit_left) ||
18491da177e4SLinus Torvalds 		    tjl->j_trans_id < jl->j_trans_id) {
18501da177e4SLinus Torvalds 			break;
18511da177e4SLinus Torvalds 		}
18521da177e4SLinus Torvalds 		cur_len = atomic_read(&tjl->j_nonzerolen);
18531da177e4SLinus Torvalds 		if (cur_len > 0) {
18541da177e4SLinus Torvalds 			tjl->j_state &= ~LIST_TOUCHED;
18551da177e4SLinus Torvalds 		}
18561da177e4SLinus Torvalds 		len += cur_len;
18571da177e4SLinus Torvalds 		flush_jl = tjl;
18581da177e4SLinus Torvalds 		if (tjl->j_list.next == &journal->j_journal_list)
18591da177e4SLinus Torvalds 			break;
18601da177e4SLinus Torvalds 		tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
18611da177e4SLinus Torvalds 	}
18621da177e4SLinus Torvalds 	/* try to find a group of blocks we can flush across all the
18631da177e4SLinus Torvalds 	 ** transactions, but only bother if we've actually spanned
18641da177e4SLinus Torvalds 	 ** across multiple lists
18651da177e4SLinus Torvalds 	 */
18661da177e4SLinus Torvalds 	if (flush_jl != jl) {
18671da177e4SLinus Torvalds 		ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
18681da177e4SLinus Torvalds 	}
18691da177e4SLinus Torvalds 	flush_journal_list(s, flush_jl, 1);
18701da177e4SLinus Torvalds 	return 0;
18711da177e4SLinus Torvalds }
18721da177e4SLinus Torvalds 
18731da177e4SLinus Torvalds /*
18741da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh.
18751da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers.
18761da177e4SLinus Torvalds */
18771da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb,
18781da177e4SLinus Torvalds 			 struct reiserfs_journal_cnode **table,
18791da177e4SLinus Torvalds 			 struct reiserfs_journal_list *jl,
18801da177e4SLinus Torvalds 			 unsigned long block, int remove_freed)
18811da177e4SLinus Torvalds {
18821da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur;
18831da177e4SLinus Torvalds 	struct reiserfs_journal_cnode **head;
18841da177e4SLinus Torvalds 
18851da177e4SLinus Torvalds 	head = &(journal_hash(table, sb, block));
18861da177e4SLinus Torvalds 	if (!head) {
18871da177e4SLinus Torvalds 		return;
18881da177e4SLinus Torvalds 	}
18891da177e4SLinus Torvalds 	cur = *head;
18901da177e4SLinus Torvalds 	while (cur) {
1891bd4c625cSLinus Torvalds 		if (cur->blocknr == block && cur->sb == sb
1892bd4c625cSLinus Torvalds 		    && (jl == NULL || jl == cur->jlist)
1893bd4c625cSLinus Torvalds 		    && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
18941da177e4SLinus Torvalds 			if (cur->hnext) {
18951da177e4SLinus Torvalds 				cur->hnext->hprev = cur->hprev;
18961da177e4SLinus Torvalds 			}
18971da177e4SLinus Torvalds 			if (cur->hprev) {
18981da177e4SLinus Torvalds 				cur->hprev->hnext = cur->hnext;
18991da177e4SLinus Torvalds 			} else {
19001da177e4SLinus Torvalds 				*head = cur->hnext;
19011da177e4SLinus Torvalds 			}
19021da177e4SLinus Torvalds 			cur->blocknr = 0;
19031da177e4SLinus Torvalds 			cur->sb = NULL;
19041da177e4SLinus Torvalds 			cur->state = 0;
19051da177e4SLinus Torvalds 			if (cur->bh && cur->jlist)	/* anybody who clears the cur->bh will also dec the nonzerolen */
19061da177e4SLinus Torvalds 				atomic_dec(&(cur->jlist->j_nonzerolen));
19071da177e4SLinus Torvalds 			cur->bh = NULL;
19081da177e4SLinus Torvalds 			cur->jlist = NULL;
19091da177e4SLinus Torvalds 		}
19101da177e4SLinus Torvalds 		cur = cur->hnext;
19111da177e4SLinus Torvalds 	}
19121da177e4SLinus Torvalds }
19131da177e4SLinus Torvalds 
1914bd4c625cSLinus Torvalds static void free_journal_ram(struct super_block *p_s_sb)
1915bd4c625cSLinus Torvalds {
19161da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1917d739b42bSPekka Enberg 	kfree(journal->j_current_jl);
19181da177e4SLinus Torvalds 	journal->j_num_lists--;
19191da177e4SLinus Torvalds 
19201da177e4SLinus Torvalds 	vfree(journal->j_cnode_free_orig);
19211da177e4SLinus Torvalds 	free_list_bitmaps(p_s_sb, journal->j_list_bitmap);
19221da177e4SLinus Torvalds 	free_bitmap_nodes(p_s_sb);	/* must be after free_list_bitmaps */
19231da177e4SLinus Torvalds 	if (journal->j_header_bh) {
19241da177e4SLinus Torvalds 		brelse(journal->j_header_bh);
19251da177e4SLinus Torvalds 	}
19261da177e4SLinus Torvalds 	/* j_header_bh is on the journal dev, make sure not to release the journal
19271da177e4SLinus Torvalds 	 * dev until we brelse j_header_bh
19281da177e4SLinus Torvalds 	 */
19291da177e4SLinus Torvalds 	release_journal_dev(p_s_sb, journal);
19301da177e4SLinus Torvalds 	vfree(journal);
19311da177e4SLinus Torvalds }
19321da177e4SLinus Torvalds 
19331da177e4SLinus Torvalds /*
19341da177e4SLinus Torvalds ** call on unmount.  Only set error to 1 if you haven't made your way out
19351da177e4SLinus Torvalds ** of read_super() yet.  Any other caller must keep error at 0.
19361da177e4SLinus Torvalds */
1937bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th,
1938bd4c625cSLinus Torvalds 			      struct super_block *p_s_sb, int error)
1939bd4c625cSLinus Torvalds {
19401da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
19411da177e4SLinus Torvalds 	int flushed = 0;
19421da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
19431da177e4SLinus Torvalds 
19441da177e4SLinus Torvalds 	/* we only want to flush out transactions if we were called with error == 0
19451da177e4SLinus Torvalds 	 */
19461da177e4SLinus Torvalds 	if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
19471da177e4SLinus Torvalds 		/* end the current trans */
19481da177e4SLinus Torvalds 		BUG_ON(!th->t_trans_id);
19491da177e4SLinus Torvalds 		do_journal_end(th, p_s_sb, 10, FLUSH_ALL);
19501da177e4SLinus Torvalds 
19511da177e4SLinus Torvalds 		/* make sure something gets logged to force our way into the flush code */
19521da177e4SLinus Torvalds 		if (!journal_join(&myth, p_s_sb, 1)) {
1953bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(p_s_sb,
1954bd4c625cSLinus Torvalds 						     SB_BUFFER_WITH_SB(p_s_sb),
1955bd4c625cSLinus Torvalds 						     1);
1956bd4c625cSLinus Torvalds 			journal_mark_dirty(&myth, p_s_sb,
1957bd4c625cSLinus Torvalds 					   SB_BUFFER_WITH_SB(p_s_sb));
19581da177e4SLinus Torvalds 			do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
19591da177e4SLinus Torvalds 			flushed = 1;
19601da177e4SLinus Torvalds 		}
19611da177e4SLinus Torvalds 	}
19621da177e4SLinus Torvalds 
19631da177e4SLinus Torvalds 	/* this also catches errors during the do_journal_end above */
19641da177e4SLinus Torvalds 	if (!error && reiserfs_is_journal_aborted(journal)) {
19651da177e4SLinus Torvalds 		memset(&myth, 0, sizeof(myth));
19661da177e4SLinus Torvalds 		if (!journal_join_abort(&myth, p_s_sb, 1)) {
1967bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(p_s_sb,
1968bd4c625cSLinus Torvalds 						     SB_BUFFER_WITH_SB(p_s_sb),
1969bd4c625cSLinus Torvalds 						     1);
1970bd4c625cSLinus Torvalds 			journal_mark_dirty(&myth, p_s_sb,
1971bd4c625cSLinus Torvalds 					   SB_BUFFER_WITH_SB(p_s_sb));
19721da177e4SLinus Torvalds 			do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
19731da177e4SLinus Torvalds 		}
19741da177e4SLinus Torvalds 	}
19751da177e4SLinus Torvalds 
19761da177e4SLinus Torvalds 	reiserfs_mounted_fs_count--;
19771da177e4SLinus Torvalds 	/* wait for all commits to finish */
19781da177e4SLinus Torvalds 	cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
19791da177e4SLinus Torvalds 	flush_workqueue(commit_wq);
19801da177e4SLinus Torvalds 	if (!reiserfs_mounted_fs_count) {
19811da177e4SLinus Torvalds 		destroy_workqueue(commit_wq);
19821da177e4SLinus Torvalds 		commit_wq = NULL;
19831da177e4SLinus Torvalds 	}
19841da177e4SLinus Torvalds 
19851da177e4SLinus Torvalds 	free_journal_ram(p_s_sb);
19861da177e4SLinus Torvalds 
19871da177e4SLinus Torvalds 	return 0;
19881da177e4SLinus Torvalds }
19891da177e4SLinus Torvalds 
19901da177e4SLinus Torvalds /*
19911da177e4SLinus Torvalds ** call on unmount.  flush all journal trans, release all alloc'd ram
19921da177e4SLinus Torvalds */
1993bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th,
1994bd4c625cSLinus Torvalds 		    struct super_block *p_s_sb)
1995bd4c625cSLinus Torvalds {
19961da177e4SLinus Torvalds 	return do_journal_release(th, p_s_sb, 0);
19971da177e4SLinus Torvalds }
1998bd4c625cSLinus Torvalds 
19991da177e4SLinus Torvalds /*
20001da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super!
20011da177e4SLinus Torvalds */
2002bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th,
2003bd4c625cSLinus Torvalds 			  struct super_block *p_s_sb)
2004bd4c625cSLinus Torvalds {
20051da177e4SLinus Torvalds 	return do_journal_release(th, p_s_sb, 1);
20061da177e4SLinus Torvalds }
20071da177e4SLinus Torvalds 
20081da177e4SLinus Torvalds /* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
2009bd4c625cSLinus Torvalds static int journal_compare_desc_commit(struct super_block *p_s_sb,
2010bd4c625cSLinus Torvalds 				       struct reiserfs_journal_desc *desc,
2011bd4c625cSLinus Torvalds 				       struct reiserfs_journal_commit *commit)
2012bd4c625cSLinus Torvalds {
20131da177e4SLinus Torvalds 	if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
20141da177e4SLinus Torvalds 	    get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
20151da177e4SLinus Torvalds 	    get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max ||
2016bd4c625cSLinus Torvalds 	    get_commit_trans_len(commit) <= 0) {
20171da177e4SLinus Torvalds 		return 1;
20181da177e4SLinus Torvalds 	}
20191da177e4SLinus Torvalds 	return 0;
20201da177e4SLinus Torvalds }
2021bd4c625cSLinus Torvalds 
20221da177e4SLinus Torvalds /* returns 0 if it did not find a description block
20231da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block
20241da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid
20251da177e4SLinus Torvalds */
2026bd4c625cSLinus Torvalds static int journal_transaction_is_valid(struct super_block *p_s_sb,
2027bd4c625cSLinus Torvalds 					struct buffer_head *d_bh,
2028bd4c625cSLinus Torvalds 					unsigned long *oldest_invalid_trans_id,
2029bd4c625cSLinus Torvalds 					unsigned long *newest_mount_id)
2030bd4c625cSLinus Torvalds {
20311da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
20321da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
20331da177e4SLinus Torvalds 	struct buffer_head *c_bh;
20341da177e4SLinus Torvalds 	unsigned long offset;
20351da177e4SLinus Torvalds 
20361da177e4SLinus Torvalds 	if (!d_bh)
20371da177e4SLinus Torvalds 		return 0;
20381da177e4SLinus Torvalds 
20391da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2040bd4c625cSLinus Torvalds 	if (get_desc_trans_len(desc) > 0
2041bd4c625cSLinus Torvalds 	    && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
2042bd4c625cSLinus Torvalds 		if (oldest_invalid_trans_id && *oldest_invalid_trans_id
2043bd4c625cSLinus Torvalds 		    && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
2044bd4c625cSLinus Torvalds 			reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2045bd4c625cSLinus Torvalds 				       "journal-986: transaction "
20461da177e4SLinus Torvalds 				       "is valid returning because trans_id %d is greater than "
2047bd4c625cSLinus Torvalds 				       "oldest_invalid %lu",
2048bd4c625cSLinus Torvalds 				       get_desc_trans_id(desc),
20491da177e4SLinus Torvalds 				       *oldest_invalid_trans_id);
20501da177e4SLinus Torvalds 			return 0;
20511da177e4SLinus Torvalds 		}
2052bd4c625cSLinus Torvalds 		if (newest_mount_id
2053bd4c625cSLinus Torvalds 		    && *newest_mount_id > get_desc_mount_id(desc)) {
2054bd4c625cSLinus Torvalds 			reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2055bd4c625cSLinus Torvalds 				       "journal-1087: transaction "
20561da177e4SLinus Torvalds 				       "is valid returning because mount_id %d is less than "
2057bd4c625cSLinus Torvalds 				       "newest_mount_id %lu",
2058bd4c625cSLinus Torvalds 				       get_desc_mount_id(desc),
20591da177e4SLinus Torvalds 				       *newest_mount_id);
20601da177e4SLinus Torvalds 			return -1;
20611da177e4SLinus Torvalds 		}
20621da177e4SLinus Torvalds 		if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) {
2063bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
2064bd4c625cSLinus Torvalds 					 "journal-2018: Bad transaction length %d encountered, ignoring transaction",
2065bd4c625cSLinus Torvalds 					 get_desc_trans_len(desc));
20661da177e4SLinus Torvalds 			return -1;
20671da177e4SLinus Torvalds 		}
20681da177e4SLinus Torvalds 		offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
20691da177e4SLinus Torvalds 
20701da177e4SLinus Torvalds 		/* ok, we have a journal description block, lets see if the transaction was valid */
2071bd4c625cSLinus Torvalds 		c_bh =
2072bd4c625cSLinus Torvalds 		    journal_bread(p_s_sb,
2073bd4c625cSLinus Torvalds 				  SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2074bd4c625cSLinus Torvalds 				  ((offset + get_desc_trans_len(desc) +
2075bd4c625cSLinus Torvalds 				    1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
20761da177e4SLinus Torvalds 		if (!c_bh)
20771da177e4SLinus Torvalds 			return 0;
20781da177e4SLinus Torvalds 		commit = (struct reiserfs_journal_commit *)c_bh->b_data;
20791da177e4SLinus Torvalds 		if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
20801da177e4SLinus Torvalds 			reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
20811da177e4SLinus Torvalds 				       "journal_transaction_is_valid, commit offset %ld had bad "
20821da177e4SLinus Torvalds 				       "time %d or length %d",
2083bd4c625cSLinus Torvalds 				       c_bh->b_blocknr -
2084bd4c625cSLinus Torvalds 				       SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
20851da177e4SLinus Torvalds 				       get_commit_trans_id(commit),
20861da177e4SLinus Torvalds 				       get_commit_trans_len(commit));
20871da177e4SLinus Torvalds 			brelse(c_bh);
20881da177e4SLinus Torvalds 			if (oldest_invalid_trans_id) {
2089bd4c625cSLinus Torvalds 				*oldest_invalid_trans_id =
2090bd4c625cSLinus Torvalds 				    get_desc_trans_id(desc);
2091bd4c625cSLinus Torvalds 				reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2092bd4c625cSLinus Torvalds 					       "journal-1004: "
20931da177e4SLinus Torvalds 					       "transaction_is_valid setting oldest invalid trans_id "
2094bd4c625cSLinus Torvalds 					       "to %d",
2095bd4c625cSLinus Torvalds 					       get_desc_trans_id(desc));
20961da177e4SLinus Torvalds 			}
20971da177e4SLinus Torvalds 			return -1;
20981da177e4SLinus Torvalds 		}
20991da177e4SLinus Torvalds 		brelse(c_bh);
2100bd4c625cSLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2101bd4c625cSLinus Torvalds 			       "journal-1006: found valid "
21021da177e4SLinus Torvalds 			       "transaction start offset %llu, len %d id %d",
2103bd4c625cSLinus Torvalds 			       d_bh->b_blocknr -
2104bd4c625cSLinus Torvalds 			       SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2105bd4c625cSLinus Torvalds 			       get_desc_trans_len(desc),
2106bd4c625cSLinus Torvalds 			       get_desc_trans_id(desc));
21071da177e4SLinus Torvalds 		return 1;
21081da177e4SLinus Torvalds 	} else {
21091da177e4SLinus Torvalds 		return 0;
21101da177e4SLinus Torvalds 	}
21111da177e4SLinus Torvalds }
21121da177e4SLinus Torvalds 
2113bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num)
2114bd4c625cSLinus Torvalds {
21151da177e4SLinus Torvalds 	int i;
21161da177e4SLinus Torvalds 	for (i = 0; i < num; i++) {
21171da177e4SLinus Torvalds 		brelse(heads[i]);
21181da177e4SLinus Torvalds 	}
21191da177e4SLinus Torvalds }
21201da177e4SLinus Torvalds 
21211da177e4SLinus Torvalds /*
21221da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions,
21231da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction
21241da177e4SLinus Torvalds ** is invalid, or too old.
21251da177e4SLinus Torvalds */
2126bd4c625cSLinus Torvalds static int journal_read_transaction(struct super_block *p_s_sb,
2127bd4c625cSLinus Torvalds 				    unsigned long cur_dblock,
2128bd4c625cSLinus Torvalds 				    unsigned long oldest_start,
2129bd4c625cSLinus Torvalds 				    unsigned long oldest_trans_id,
2130bd4c625cSLinus Torvalds 				    unsigned long newest_mount_id)
2131bd4c625cSLinus Torvalds {
21321da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
21331da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
21341da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
21351da177e4SLinus Torvalds 	unsigned long trans_id = 0;
21361da177e4SLinus Torvalds 	struct buffer_head *c_bh;
21371da177e4SLinus Torvalds 	struct buffer_head *d_bh;
21381da177e4SLinus Torvalds 	struct buffer_head **log_blocks = NULL;
21391da177e4SLinus Torvalds 	struct buffer_head **real_blocks = NULL;
21401da177e4SLinus Torvalds 	unsigned long trans_offset;
21411da177e4SLinus Torvalds 	int i;
21421da177e4SLinus Torvalds 	int trans_half;
21431da177e4SLinus Torvalds 
21441da177e4SLinus Torvalds 	d_bh = journal_bread(p_s_sb, cur_dblock);
21451da177e4SLinus Torvalds 	if (!d_bh)
21461da177e4SLinus Torvalds 		return 1;
21471da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)d_bh->b_data;
21481da177e4SLinus Torvalds 	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
21491da177e4SLinus Torvalds 	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
21501da177e4SLinus Torvalds 		       "journal_read_transaction, offset %llu, len %d mount_id %d",
21511da177e4SLinus Torvalds 		       d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
21521da177e4SLinus Torvalds 		       get_desc_trans_len(desc), get_desc_mount_id(desc));
21531da177e4SLinus Torvalds 	if (get_desc_trans_id(desc) < oldest_trans_id) {
21541da177e4SLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
21551da177e4SLinus Torvalds 			       "journal_read_trans skipping because %lu is too old",
2156bd4c625cSLinus Torvalds 			       cur_dblock -
2157bd4c625cSLinus Torvalds 			       SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
21581da177e4SLinus Torvalds 		brelse(d_bh);
21591da177e4SLinus Torvalds 		return 1;
21601da177e4SLinus Torvalds 	}
21611da177e4SLinus Torvalds 	if (get_desc_mount_id(desc) != newest_mount_id) {
21621da177e4SLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
21631da177e4SLinus Torvalds 			       "journal_read_trans skipping because %d is != "
21641da177e4SLinus Torvalds 			       "newest_mount_id %lu", get_desc_mount_id(desc),
21651da177e4SLinus Torvalds 			       newest_mount_id);
21661da177e4SLinus Torvalds 		brelse(d_bh);
21671da177e4SLinus Torvalds 		return 1;
21681da177e4SLinus Torvalds 	}
21691da177e4SLinus Torvalds 	c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
21701da177e4SLinus Torvalds 			     ((trans_offset + get_desc_trans_len(desc) + 1) %
21711da177e4SLinus Torvalds 			      SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
21721da177e4SLinus Torvalds 	if (!c_bh) {
21731da177e4SLinus Torvalds 		brelse(d_bh);
21741da177e4SLinus Torvalds 		return 1;
21751da177e4SLinus Torvalds 	}
21761da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
21771da177e4SLinus Torvalds 	if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
2178bd4c625cSLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2179bd4c625cSLinus Torvalds 			       "journal_read_transaction, "
21801da177e4SLinus Torvalds 			       "commit offset %llu had bad time %d or length %d",
2181bd4c625cSLinus Torvalds 			       c_bh->b_blocknr -
2182bd4c625cSLinus Torvalds 			       SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2183bd4c625cSLinus Torvalds 			       get_commit_trans_id(commit),
2184bd4c625cSLinus Torvalds 			       get_commit_trans_len(commit));
21851da177e4SLinus Torvalds 		brelse(c_bh);
21861da177e4SLinus Torvalds 		brelse(d_bh);
21871da177e4SLinus Torvalds 		return 1;
21881da177e4SLinus Torvalds 	}
21891da177e4SLinus Torvalds 	trans_id = get_desc_trans_id(desc);
21901da177e4SLinus Torvalds 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
2191d739b42bSPekka Enberg 	log_blocks = kmalloc(get_desc_trans_len(desc) *
2192d739b42bSPekka Enberg 			     sizeof(struct buffer_head *), GFP_NOFS);
2193d739b42bSPekka Enberg 	real_blocks = kmalloc(get_desc_trans_len(desc) *
2194d739b42bSPekka Enberg 			      sizeof(struct buffer_head *), GFP_NOFS);
21951da177e4SLinus Torvalds 	if (!log_blocks || !real_blocks) {
21961da177e4SLinus Torvalds 		brelse(c_bh);
21971da177e4SLinus Torvalds 		brelse(d_bh);
2198d739b42bSPekka Enberg 		kfree(log_blocks);
2199d739b42bSPekka Enberg 		kfree(real_blocks);
2200bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2201bd4c625cSLinus Torvalds 				 "journal-1169: kmalloc failed, unable to mount FS");
22021da177e4SLinus Torvalds 		return -1;
22031da177e4SLinus Torvalds 	}
22041da177e4SLinus Torvalds 	/* get all the buffer heads */
22051da177e4SLinus Torvalds 	trans_half = journal_trans_half(p_s_sb->s_blocksize);
22061da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
2207bd4c625cSLinus Torvalds 		log_blocks[i] =
2208bd4c625cSLinus Torvalds 		    journal_getblk(p_s_sb,
2209bd4c625cSLinus Torvalds 				   SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2210bd4c625cSLinus Torvalds 				   (trans_offset + 1 +
2211bd4c625cSLinus Torvalds 				    i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
22121da177e4SLinus Torvalds 		if (i < trans_half) {
2213bd4c625cSLinus Torvalds 			real_blocks[i] =
2214bd4c625cSLinus Torvalds 			    sb_getblk(p_s_sb,
2215bd4c625cSLinus Torvalds 				      le32_to_cpu(desc->j_realblock[i]));
22161da177e4SLinus Torvalds 		} else {
2217bd4c625cSLinus Torvalds 			real_blocks[i] =
2218bd4c625cSLinus Torvalds 			    sb_getblk(p_s_sb,
2219bd4c625cSLinus Torvalds 				      le32_to_cpu(commit->
2220bd4c625cSLinus Torvalds 						  j_realblock[i - trans_half]));
22211da177e4SLinus Torvalds 		}
22221da177e4SLinus Torvalds 		if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2223bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
2224bd4c625cSLinus Torvalds 					 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
22251da177e4SLinus Torvalds 			goto abort_replay;
22261da177e4SLinus Torvalds 		}
22271da177e4SLinus Torvalds 		/* make sure we don't try to replay onto log or reserved area */
2228bd4c625cSLinus Torvalds 		if (is_block_in_log_or_reserved_area
2229bd4c625cSLinus Torvalds 		    (p_s_sb, real_blocks[i]->b_blocknr)) {
2230bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
2231bd4c625cSLinus Torvalds 					 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block");
22321da177e4SLinus Torvalds 		      abort_replay:
22331da177e4SLinus Torvalds 			brelse_array(log_blocks, i);
22341da177e4SLinus Torvalds 			brelse_array(real_blocks, i);
22351da177e4SLinus Torvalds 			brelse(c_bh);
22361da177e4SLinus Torvalds 			brelse(d_bh);
2237d739b42bSPekka Enberg 			kfree(log_blocks);
2238d739b42bSPekka Enberg 			kfree(real_blocks);
22391da177e4SLinus Torvalds 			return -1;
22401da177e4SLinus Torvalds 		}
22411da177e4SLinus Torvalds 	}
22421da177e4SLinus Torvalds 	/* read in the log blocks, memcpy to the corresponding real block */
22431da177e4SLinus Torvalds 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
22441da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22451da177e4SLinus Torvalds 		wait_on_buffer(log_blocks[i]);
22461da177e4SLinus Torvalds 		if (!buffer_uptodate(log_blocks[i])) {
2247bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
2248bd4c625cSLinus Torvalds 					 "journal-1212: REPLAY FAILURE fsck required! buffer write failed");
2249bd4c625cSLinus Torvalds 			brelse_array(log_blocks + i,
2250bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22511da177e4SLinus Torvalds 			brelse_array(real_blocks, get_desc_trans_len(desc));
22521da177e4SLinus Torvalds 			brelse(c_bh);
22531da177e4SLinus Torvalds 			brelse(d_bh);
2254d739b42bSPekka Enberg 			kfree(log_blocks);
2255d739b42bSPekka Enberg 			kfree(real_blocks);
22561da177e4SLinus Torvalds 			return -1;
22571da177e4SLinus Torvalds 		}
2258bd4c625cSLinus Torvalds 		memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2259bd4c625cSLinus Torvalds 		       real_blocks[i]->b_size);
22601da177e4SLinus Torvalds 		set_buffer_uptodate(real_blocks[i]);
22611da177e4SLinus Torvalds 		brelse(log_blocks[i]);
22621da177e4SLinus Torvalds 	}
22631da177e4SLinus Torvalds 	/* flush out the real blocks */
22641da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22651da177e4SLinus Torvalds 		set_buffer_dirty(real_blocks[i]);
226653778ffdSJan Kara 		ll_rw_block(SWRITE, 1, real_blocks + i);
22671da177e4SLinus Torvalds 	}
22681da177e4SLinus Torvalds 	for (i = 0; i < get_desc_trans_len(desc); i++) {
22691da177e4SLinus Torvalds 		wait_on_buffer(real_blocks[i]);
22701da177e4SLinus Torvalds 		if (!buffer_uptodate(real_blocks[i])) {
2271bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
2272bd4c625cSLinus Torvalds 					 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed");
2273bd4c625cSLinus Torvalds 			brelse_array(real_blocks + i,
2274bd4c625cSLinus Torvalds 				     get_desc_trans_len(desc) - i);
22751da177e4SLinus Torvalds 			brelse(c_bh);
22761da177e4SLinus Torvalds 			brelse(d_bh);
2277d739b42bSPekka Enberg 			kfree(log_blocks);
2278d739b42bSPekka Enberg 			kfree(real_blocks);
22791da177e4SLinus Torvalds 			return -1;
22801da177e4SLinus Torvalds 		}
22811da177e4SLinus Torvalds 		brelse(real_blocks[i]);
22821da177e4SLinus Torvalds 	}
2283bd4c625cSLinus Torvalds 	cur_dblock =
2284bd4c625cSLinus Torvalds 	    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2285bd4c625cSLinus Torvalds 	    ((trans_offset + get_desc_trans_len(desc) +
2286bd4c625cSLinus Torvalds 	      2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2287bd4c625cSLinus Torvalds 	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2288bd4c625cSLinus Torvalds 		       "journal-1095: setting journal " "start to offset %ld",
22891da177e4SLinus Torvalds 		       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
22901da177e4SLinus Torvalds 
22911da177e4SLinus Torvalds 	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
22921da177e4SLinus Torvalds 	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
22931da177e4SLinus Torvalds 	journal->j_last_flush_trans_id = trans_id;
22941da177e4SLinus Torvalds 	journal->j_trans_id = trans_id + 1;
2295a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
2296a44c94a7SAlexander Zarochentsev 	if (journal->j_trans_id == 0)
2297a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
22981da177e4SLinus Torvalds 	brelse(c_bh);
22991da177e4SLinus Torvalds 	brelse(d_bh);
2300d739b42bSPekka Enberg 	kfree(log_blocks);
2301d739b42bSPekka Enberg 	kfree(real_blocks);
23021da177e4SLinus Torvalds 	return 0;
23031da177e4SLinus Torvalds }
23041da177e4SLinus Torvalds 
23051da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize
23061da177e4SLinus Torvalds    size (but no more than BUFNR blocks at a time). This proved to improve
23071da177e4SLinus Torvalds    mounting speed on self-rebuilding raid5 arrays at least.
23081da177e4SLinus Torvalds    Right now it is only used from journal code. But later we might use it
23091da177e4SLinus Torvalds    from other places.
23101da177e4SLinus Torvalds    Note: Do not use journal_getblk/sb_getblk functions here! */
23113ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev,
23123ee16670SJeff Mahoney 					   b_blocknr_t block, int bufsize,
23133ee16670SJeff Mahoney 					   b_blocknr_t max_block)
23141da177e4SLinus Torvalds {
23151da177e4SLinus Torvalds 	struct buffer_head *bhlist[BUFNR];
23161da177e4SLinus Torvalds 	unsigned int blocks = BUFNR;
23171da177e4SLinus Torvalds 	struct buffer_head *bh;
23181da177e4SLinus Torvalds 	int i, j;
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds 	bh = __getblk(dev, block, bufsize);
23211da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23221da177e4SLinus Torvalds 		return (bh);
23231da177e4SLinus Torvalds 
23241da177e4SLinus Torvalds 	if (block + BUFNR > max_block) {
23251da177e4SLinus Torvalds 		blocks = max_block - block;
23261da177e4SLinus Torvalds 	}
23271da177e4SLinus Torvalds 	bhlist[0] = bh;
23281da177e4SLinus Torvalds 	j = 1;
23291da177e4SLinus Torvalds 	for (i = 1; i < blocks; i++) {
23301da177e4SLinus Torvalds 		bh = __getblk(dev, block + i, bufsize);
23311da177e4SLinus Torvalds 		if (buffer_uptodate(bh)) {
23321da177e4SLinus Torvalds 			brelse(bh);
23331da177e4SLinus Torvalds 			break;
2334bd4c625cSLinus Torvalds 		} else
2335bd4c625cSLinus Torvalds 			bhlist[j++] = bh;
23361da177e4SLinus Torvalds 	}
23371da177e4SLinus Torvalds 	ll_rw_block(READ, j, bhlist);
23381da177e4SLinus Torvalds 	for (i = 1; i < j; i++)
23391da177e4SLinus Torvalds 		brelse(bhlist[i]);
23401da177e4SLinus Torvalds 	bh = bhlist[0];
23411da177e4SLinus Torvalds 	wait_on_buffer(bh);
23421da177e4SLinus Torvalds 	if (buffer_uptodate(bh))
23431da177e4SLinus Torvalds 		return bh;
23441da177e4SLinus Torvalds 	brelse(bh);
23451da177e4SLinus Torvalds 	return NULL;
23461da177e4SLinus Torvalds }
23471da177e4SLinus Torvalds 
23481da177e4SLinus Torvalds /*
23491da177e4SLinus Torvalds ** read and replay the log
23501da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
23511da177e4SLinus Torvalds ** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
23521da177e4SLinus Torvalds **
23531da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
23541da177e4SLinus Torvalds **
23551da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly.
23561da177e4SLinus Torvalds */
2357bd4c625cSLinus Torvalds static int journal_read(struct super_block *p_s_sb)
2358bd4c625cSLinus Torvalds {
23591da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
23601da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
23611da177e4SLinus Torvalds 	unsigned long oldest_trans_id = 0;
23621da177e4SLinus Torvalds 	unsigned long oldest_invalid_trans_id = 0;
23631da177e4SLinus Torvalds 	time_t start;
23641da177e4SLinus Torvalds 	unsigned long oldest_start = 0;
23651da177e4SLinus Torvalds 	unsigned long cur_dblock = 0;
23661da177e4SLinus Torvalds 	unsigned long newest_mount_id = 9;
23671da177e4SLinus Torvalds 	struct buffer_head *d_bh;
23681da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
23691da177e4SLinus Torvalds 	int valid_journal_header = 0;
23701da177e4SLinus Torvalds 	int replay_count = 0;
23711da177e4SLinus Torvalds 	int continue_replay = 1;
23721da177e4SLinus Torvalds 	int ret;
23731da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
23741da177e4SLinus Torvalds 
23751da177e4SLinus Torvalds 	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
23761da177e4SLinus Torvalds 	reiserfs_info(p_s_sb, "checking transaction log (%s)\n",
23771da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b));
23781da177e4SLinus Torvalds 	start = get_seconds();
23791da177e4SLinus Torvalds 
23801da177e4SLinus Torvalds 	/* step 1, read in the journal header block.  Check the transaction it says
23811da177e4SLinus Torvalds 	 ** is the first unflushed, and if that transaction is not valid,
23821da177e4SLinus Torvalds 	 ** replay is done
23831da177e4SLinus Torvalds 	 */
23841da177e4SLinus Torvalds 	journal->j_header_bh = journal_bread(p_s_sb,
2385bd4c625cSLinus Torvalds 					     SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)
2386bd4c625cSLinus Torvalds 					     + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
23871da177e4SLinus Torvalds 	if (!journal->j_header_bh) {
23881da177e4SLinus Torvalds 		return 1;
23891da177e4SLinus Torvalds 	}
23901da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2391c499ec24SVladimir V. Saveliev 	if (le32_to_cpu(jh->j_first_unflushed_offset) <
2392bd4c625cSLinus Torvalds 	    SB_ONDISK_JOURNAL_SIZE(p_s_sb)
2393bd4c625cSLinus Torvalds 	    && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2394bd4c625cSLinus Torvalds 		oldest_start =
2395bd4c625cSLinus Torvalds 		    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
23961da177e4SLinus Torvalds 		    le32_to_cpu(jh->j_first_unflushed_offset);
23971da177e4SLinus Torvalds 		oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
23981da177e4SLinus Torvalds 		newest_mount_id = le32_to_cpu(jh->j_mount_id);
2399bd4c625cSLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2400bd4c625cSLinus Torvalds 			       "journal-1153: found in "
24011da177e4SLinus Torvalds 			       "header: first_unflushed_offset %d, last_flushed_trans_id "
24021da177e4SLinus Torvalds 			       "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
24031da177e4SLinus Torvalds 			       le32_to_cpu(jh->j_last_flush_trans_id));
24041da177e4SLinus Torvalds 		valid_journal_header = 1;
24051da177e4SLinus Torvalds 
24061da177e4SLinus Torvalds 		/* now, we try to read the first unflushed offset.  If it is not valid,
24071da177e4SLinus Torvalds 		 ** there is nothing more we can do, and it makes no sense to read
24081da177e4SLinus Torvalds 		 ** through the whole log.
24091da177e4SLinus Torvalds 		 */
2410bd4c625cSLinus Torvalds 		d_bh =
2411bd4c625cSLinus Torvalds 		    journal_bread(p_s_sb,
2412bd4c625cSLinus Torvalds 				  SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2413bd4c625cSLinus Torvalds 				  le32_to_cpu(jh->j_first_unflushed_offset));
24141da177e4SLinus Torvalds 		ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL);
24151da177e4SLinus Torvalds 		if (!ret) {
24161da177e4SLinus Torvalds 			continue_replay = 0;
24171da177e4SLinus Torvalds 		}
24181da177e4SLinus Torvalds 		brelse(d_bh);
24191da177e4SLinus Torvalds 		goto start_log_replay;
24201da177e4SLinus Torvalds 	}
24211da177e4SLinus Torvalds 
24221da177e4SLinus Torvalds 	if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
24231da177e4SLinus Torvalds 		reiserfs_warning(p_s_sb,
24241da177e4SLinus Torvalds 				 "clm-2076: device is readonly, unable to replay log");
24251da177e4SLinus Torvalds 		return -1;
24261da177e4SLinus Torvalds 	}
24271da177e4SLinus Torvalds 
24281da177e4SLinus Torvalds 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
24291da177e4SLinus Torvalds 	 ** all the valid transactions, and pick out the oldest.
24301da177e4SLinus Torvalds 	 */
2431bd4c625cSLinus Torvalds 	while (continue_replay
2432bd4c625cSLinus Torvalds 	       && cur_dblock <
2433bd4c625cSLinus Torvalds 	       (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2434bd4c625cSLinus Torvalds 		SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
24351da177e4SLinus Torvalds 		/* Note that it is required for blocksize of primary fs device and journal
24361da177e4SLinus Torvalds 		   device to be the same */
2437bd4c625cSLinus Torvalds 		d_bh =
2438bd4c625cSLinus Torvalds 		    reiserfs_breada(journal->j_dev_bd, cur_dblock,
2439bd4c625cSLinus Torvalds 				    p_s_sb->s_blocksize,
2440bd4c625cSLinus Torvalds 				    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2441bd4c625cSLinus Torvalds 				    SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2442bd4c625cSLinus Torvalds 		ret =
2443bd4c625cSLinus Torvalds 		    journal_transaction_is_valid(p_s_sb, d_bh,
2444bd4c625cSLinus Torvalds 						 &oldest_invalid_trans_id,
2445bd4c625cSLinus Torvalds 						 &newest_mount_id);
24461da177e4SLinus Torvalds 		if (ret == 1) {
24471da177e4SLinus Torvalds 			desc = (struct reiserfs_journal_desc *)d_bh->b_data;
24481da177e4SLinus Torvalds 			if (oldest_start == 0) {	/* init all oldest_ values */
24491da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24501da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
24511da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2452bd4c625cSLinus Torvalds 				reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2453bd4c625cSLinus Torvalds 					       "journal-1179: Setting "
24541da177e4SLinus Torvalds 					       "oldest_start to offset %llu, trans_id %lu",
2455bd4c625cSLinus Torvalds 					       oldest_start -
2456bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2457bd4c625cSLinus Torvalds 					       (p_s_sb), oldest_trans_id);
24581da177e4SLinus Torvalds 			} else if (oldest_trans_id > get_desc_trans_id(desc)) {
24591da177e4SLinus Torvalds 				/* one we just read was older */
24601da177e4SLinus Torvalds 				oldest_trans_id = get_desc_trans_id(desc);
24611da177e4SLinus Torvalds 				oldest_start = d_bh->b_blocknr;
2462bd4c625cSLinus Torvalds 				reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2463bd4c625cSLinus Torvalds 					       "journal-1180: Resetting "
24641da177e4SLinus Torvalds 					       "oldest_start to offset %lu, trans_id %lu",
2465bd4c625cSLinus Torvalds 					       oldest_start -
2466bd4c625cSLinus Torvalds 					       SB_ONDISK_JOURNAL_1st_BLOCK
2467bd4c625cSLinus Torvalds 					       (p_s_sb), oldest_trans_id);
24681da177e4SLinus Torvalds 			}
24691da177e4SLinus Torvalds 			if (newest_mount_id < get_desc_mount_id(desc)) {
24701da177e4SLinus Torvalds 				newest_mount_id = get_desc_mount_id(desc);
2471bd4c625cSLinus Torvalds 				reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2472bd4c625cSLinus Torvalds 					       "journal-1299: Setting "
2473bd4c625cSLinus Torvalds 					       "newest_mount_id to %d",
2474bd4c625cSLinus Torvalds 					       get_desc_mount_id(desc));
24751da177e4SLinus Torvalds 			}
24761da177e4SLinus Torvalds 			cur_dblock += get_desc_trans_len(desc) + 2;
24771da177e4SLinus Torvalds 		} else {
24781da177e4SLinus Torvalds 			cur_dblock++;
24791da177e4SLinus Torvalds 		}
24801da177e4SLinus Torvalds 		brelse(d_bh);
24811da177e4SLinus Torvalds 	}
24821da177e4SLinus Torvalds 
24831da177e4SLinus Torvalds       start_log_replay:
24841da177e4SLinus Torvalds 	cur_dblock = oldest_start;
24851da177e4SLinus Torvalds 	if (oldest_trans_id) {
2486bd4c625cSLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2487bd4c625cSLinus Torvalds 			       "journal-1206: Starting replay "
24881da177e4SLinus Torvalds 			       "from offset %llu, trans_id %lu",
24891da177e4SLinus Torvalds 			       cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
24901da177e4SLinus Torvalds 			       oldest_trans_id);
24911da177e4SLinus Torvalds 
24921da177e4SLinus Torvalds 	}
24931da177e4SLinus Torvalds 	replay_count = 0;
24941da177e4SLinus Torvalds 	while (continue_replay && oldest_trans_id > 0) {
2495bd4c625cSLinus Torvalds 		ret =
2496bd4c625cSLinus Torvalds 		    journal_read_transaction(p_s_sb, cur_dblock, oldest_start,
2497bd4c625cSLinus Torvalds 					     oldest_trans_id, newest_mount_id);
24981da177e4SLinus Torvalds 		if (ret < 0) {
24991da177e4SLinus Torvalds 			return ret;
25001da177e4SLinus Torvalds 		} else if (ret != 0) {
25011da177e4SLinus Torvalds 			break;
25021da177e4SLinus Torvalds 		}
2503bd4c625cSLinus Torvalds 		cur_dblock =
2504bd4c625cSLinus Torvalds 		    SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start;
25051da177e4SLinus Torvalds 		replay_count++;
25061da177e4SLinus Torvalds 		if (cur_dblock == oldest_start)
25071da177e4SLinus Torvalds 			break;
25081da177e4SLinus Torvalds 	}
25091da177e4SLinus Torvalds 
25101da177e4SLinus Torvalds 	if (oldest_trans_id == 0) {
2511bd4c625cSLinus Torvalds 		reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2512bd4c625cSLinus Torvalds 			       "journal-1225: No valid " "transactions found");
25131da177e4SLinus Torvalds 	}
25141da177e4SLinus Torvalds 	/* j_start does not get set correctly if we don't replay any transactions.
25151da177e4SLinus Torvalds 	 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
25161da177e4SLinus Torvalds 	 ** copy the trans_id from the header
25171da177e4SLinus Torvalds 	 */
25181da177e4SLinus Torvalds 	if (valid_journal_header && replay_count == 0) {
25191da177e4SLinus Torvalds 		journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2520bd4c625cSLinus Torvalds 		journal->j_trans_id =
2521bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2522a44c94a7SAlexander Zarochentsev 		/* check for trans_id overflow */
2523a44c94a7SAlexander Zarochentsev 		if (journal->j_trans_id == 0)
2524a44c94a7SAlexander Zarochentsev 			journal->j_trans_id = 10;
2525bd4c625cSLinus Torvalds 		journal->j_last_flush_trans_id =
2526bd4c625cSLinus Torvalds 		    le32_to_cpu(jh->j_last_flush_trans_id);
25271da177e4SLinus Torvalds 		journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
25281da177e4SLinus Torvalds 	} else {
25291da177e4SLinus Torvalds 		journal->j_mount_id = newest_mount_id + 1;
25301da177e4SLinus Torvalds 	}
25311da177e4SLinus Torvalds 	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
25321da177e4SLinus Torvalds 		       "newest_mount_id to %lu", journal->j_mount_id);
25331da177e4SLinus Torvalds 	journal->j_first_unflushed_offset = journal->j_start;
25341da177e4SLinus Torvalds 	if (replay_count > 0) {
2535bd4c625cSLinus Torvalds 		reiserfs_info(p_s_sb,
2536bd4c625cSLinus Torvalds 			      "replayed %d transactions in %lu seconds\n",
25371da177e4SLinus Torvalds 			      replay_count, get_seconds() - start);
25381da177e4SLinus Torvalds 	}
25391da177e4SLinus Torvalds 	if (!bdev_read_only(p_s_sb->s_bdev) &&
25401da177e4SLinus Torvalds 	    _update_journal_header_block(p_s_sb, journal->j_start,
2541bd4c625cSLinus Torvalds 					 journal->j_last_flush_trans_id)) {
25421da177e4SLinus Torvalds 		/* replay failed, caller must call free_journal_ram and abort
25431da177e4SLinus Torvalds 		 ** the mount
25441da177e4SLinus Torvalds 		 */
25451da177e4SLinus Torvalds 		return -1;
25461da177e4SLinus Torvalds 	}
25471da177e4SLinus Torvalds 	return 0;
25481da177e4SLinus Torvalds }
25491da177e4SLinus Torvalds 
25501da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
25511da177e4SLinus Torvalds {
25521da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
25538c777cc4SPekka Enberg 	jl = kzalloc(sizeof(struct reiserfs_journal_list),
25548c777cc4SPekka Enberg 		     GFP_NOFS | __GFP_NOFAIL);
25551da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_list);
25561da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_working_list);
25571da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_tail_bh_list);
25581da177e4SLinus Torvalds 	INIT_LIST_HEAD(&jl->j_bh_list);
25591da177e4SLinus Torvalds 	sema_init(&jl->j_commit_lock, 1);
25601da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_num_lists++;
25611da177e4SLinus Torvalds 	get_journal_list(jl);
25621da177e4SLinus Torvalds 	return jl;
25631da177e4SLinus Torvalds }
25641da177e4SLinus Torvalds 
2565bd4c625cSLinus Torvalds static void journal_list_init(struct super_block *p_s_sb)
2566bd4c625cSLinus Torvalds {
25671da177e4SLinus Torvalds 	SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
25681da177e4SLinus Torvalds }
25691da177e4SLinus Torvalds 
25701da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super,
25711da177e4SLinus Torvalds 			       struct reiserfs_journal *journal)
25721da177e4SLinus Torvalds {
25731da177e4SLinus Torvalds 	int result;
25741da177e4SLinus Torvalds 
25751da177e4SLinus Torvalds 	result = 0;
25761da177e4SLinus Torvalds 
25771da177e4SLinus Torvalds 	if (journal->j_dev_file != NULL) {
25781da177e4SLinus Torvalds 		result = filp_close(journal->j_dev_file, NULL);
25791da177e4SLinus Torvalds 		journal->j_dev_file = NULL;
25801da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
25811da177e4SLinus Torvalds 	} else if (journal->j_dev_bd != NULL) {
25821da177e4SLinus Torvalds 		result = blkdev_put(journal->j_dev_bd);
25831da177e4SLinus Torvalds 		journal->j_dev_bd = NULL;
25841da177e4SLinus Torvalds 	}
25851da177e4SLinus Torvalds 
25861da177e4SLinus Torvalds 	if (result != 0) {
2587bd4c625cSLinus Torvalds 		reiserfs_warning(super,
2588bd4c625cSLinus Torvalds 				 "sh-457: release_journal_dev: Cannot release journal device: %i",
2589bd4c625cSLinus Torvalds 				 result);
25901da177e4SLinus Torvalds 	}
25911da177e4SLinus Torvalds 	return result;
25921da177e4SLinus Torvalds }
25931da177e4SLinus Torvalds 
25941da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super,
25951da177e4SLinus Torvalds 			    struct reiserfs_journal *journal,
25961da177e4SLinus Torvalds 			    const char *jdev_name)
25971da177e4SLinus Torvalds {
25981da177e4SLinus Torvalds 	int result;
25991da177e4SLinus Torvalds 	dev_t jdev;
26001da177e4SLinus Torvalds 	int blkdev_mode = FMODE_READ | FMODE_WRITE;
26011da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
26021da177e4SLinus Torvalds 
26031da177e4SLinus Torvalds 	result = 0;
26041da177e4SLinus Torvalds 
26051da177e4SLinus Torvalds 	journal->j_dev_bd = NULL;
26061da177e4SLinus Torvalds 	journal->j_dev_file = NULL;
26071da177e4SLinus Torvalds 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
26081da177e4SLinus Torvalds 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
26091da177e4SLinus Torvalds 
26101da177e4SLinus Torvalds 	if (bdev_read_only(super->s_bdev))
26111da177e4SLinus Torvalds 		blkdev_mode = FMODE_READ;
26121da177e4SLinus Torvalds 
26131da177e4SLinus Torvalds 	/* there is no "jdev" option and journal is on separate device */
26141da177e4SLinus Torvalds 	if ((!jdev_name || !jdev_name[0])) {
26151da177e4SLinus Torvalds 		journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
26161da177e4SLinus Torvalds 		if (IS_ERR(journal->j_dev_bd)) {
26171da177e4SLinus Torvalds 			result = PTR_ERR(journal->j_dev_bd);
26181da177e4SLinus Torvalds 			journal->j_dev_bd = NULL;
26191da177e4SLinus Torvalds 			reiserfs_warning(super, "sh-458: journal_init_dev: "
26201da177e4SLinus Torvalds 					 "cannot init journal device '%s': %i",
26211da177e4SLinus Torvalds 					 __bdevname(jdev, b), result);
26221da177e4SLinus Torvalds 			return result;
26231da177e4SLinus Torvalds 		} else if (jdev != super->s_dev)
26241da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
26251da177e4SLinus Torvalds 		return 0;
26261da177e4SLinus Torvalds 	}
26271da177e4SLinus Torvalds 
26281da177e4SLinus Torvalds 	journal->j_dev_file = filp_open(jdev_name, 0, 0);
26291da177e4SLinus Torvalds 	if (!IS_ERR(journal->j_dev_file)) {
26301da177e4SLinus Torvalds 		struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
26311da177e4SLinus Torvalds 		if (!S_ISBLK(jdev_inode->i_mode)) {
26321da177e4SLinus Torvalds 			reiserfs_warning(super, "journal_init_dev: '%s' is "
26331da177e4SLinus Torvalds 					 "not a block device", jdev_name);
26341da177e4SLinus Torvalds 			result = -ENOTBLK;
263574f9f974SEdward Shishkin 			release_journal_dev(super, journal);
26361da177e4SLinus Torvalds 		} else {
26371da177e4SLinus Torvalds 			/* ok */
26381da177e4SLinus Torvalds 			journal->j_dev_bd = I_BDEV(jdev_inode);
26391da177e4SLinus Torvalds 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
2640bd4c625cSLinus Torvalds 			reiserfs_info(super,
2641bd4c625cSLinus Torvalds 				      "journal_init_dev: journal device: %s\n",
264274f9f974SEdward Shishkin 				      bdevname(journal->j_dev_bd, b));
26431da177e4SLinus Torvalds 		}
26441da177e4SLinus Torvalds 	} else {
26451da177e4SLinus Torvalds 		result = PTR_ERR(journal->j_dev_file);
26461da177e4SLinus Torvalds 		journal->j_dev_file = NULL;
26471da177e4SLinus Torvalds 		reiserfs_warning(super,
26481da177e4SLinus Torvalds 				 "journal_init_dev: Cannot open '%s': %i",
26491da177e4SLinus Torvalds 				 jdev_name, result);
26501da177e4SLinus Torvalds 	}
26511da177e4SLinus Torvalds 	return result;
26521da177e4SLinus Torvalds }
26531da177e4SLinus Torvalds 
2654cf3d0b81SEdward Shishkin /**
2655cf3d0b81SEdward Shishkin  * When creating/tuning a file system user can assign some
2656cf3d0b81SEdward Shishkin  * journal params within boundaries which depend on the ratio
2657cf3d0b81SEdward Shishkin  * blocksize/standard_blocksize.
2658cf3d0b81SEdward Shishkin  *
2659cf3d0b81SEdward Shishkin  * For blocks >= standard_blocksize transaction size should
2660cf3d0b81SEdward Shishkin  * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
2661cf3d0b81SEdward Shishkin  * then JOURNAL_TRANS_MAX_DEFAULT.
2662cf3d0b81SEdward Shishkin  *
2663cf3d0b81SEdward Shishkin  * For blocks < standard_blocksize these boundaries should be
2664cf3d0b81SEdward Shishkin  * decreased proportionally.
2665cf3d0b81SEdward Shishkin  */
2666cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096)
2667cf3d0b81SEdward Shishkin 
2668cf3d0b81SEdward Shishkin static int check_advise_trans_params(struct super_block *p_s_sb,
2669cf3d0b81SEdward Shishkin 				     struct reiserfs_journal *journal)
2670cf3d0b81SEdward Shishkin {
2671cf3d0b81SEdward Shishkin         if (journal->j_trans_max) {
2672cf3d0b81SEdward Shishkin 	        /* Non-default journal params.
2673cf3d0b81SEdward Shishkin 		   Do sanity check for them. */
2674cf3d0b81SEdward Shishkin 	        int ratio = 1;
2675cf3d0b81SEdward Shishkin 		if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2676cf3d0b81SEdward Shishkin 		        ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize;
2677cf3d0b81SEdward Shishkin 
2678cf3d0b81SEdward Shishkin 		if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2679cf3d0b81SEdward Shishkin 		    journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2680cf3d0b81SEdward Shishkin 		    SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
2681cf3d0b81SEdward Shishkin 		    JOURNAL_MIN_RATIO) {
2682cf3d0b81SEdward Shishkin 		        reiserfs_warning(p_s_sb,
2683cf3d0b81SEdward Shishkin 				 "sh-462: bad transaction max size (%u). FSCK?",
2684cf3d0b81SEdward Shishkin 				 journal->j_trans_max);
2685cf3d0b81SEdward Shishkin 			return 1;
2686cf3d0b81SEdward Shishkin 		}
2687cf3d0b81SEdward Shishkin 		if (journal->j_max_batch != (journal->j_trans_max) *
2688cf3d0b81SEdward Shishkin 		        JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2689cf3d0b81SEdward Shishkin 		        reiserfs_warning(p_s_sb,
2690cf3d0b81SEdward Shishkin 				"sh-463: bad transaction max batch (%u). FSCK?",
2691cf3d0b81SEdward Shishkin 				journal->j_max_batch);
2692cf3d0b81SEdward Shishkin 			return 1;
2693cf3d0b81SEdward Shishkin 		}
2694cf3d0b81SEdward Shishkin 	} else {
2695cf3d0b81SEdward Shishkin 		/* Default journal params.
2696cf3d0b81SEdward Shishkin                    The file system was created by old version
2697cf3d0b81SEdward Shishkin 		   of mkreiserfs, so some fields contain zeros,
2698cf3d0b81SEdward Shishkin 		   and we need to advise proper values for them */
2699cf3d0b81SEdward Shishkin 	        if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE)
2700cf3d0b81SEdward Shishkin 	                reiserfs_panic(p_s_sb, "sh-464: bad blocksize (%u)",
2701cf3d0b81SEdward Shishkin 				       p_s_sb->s_blocksize);
2702cf3d0b81SEdward Shishkin 		journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2703cf3d0b81SEdward Shishkin 		journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2704cf3d0b81SEdward Shishkin 		journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2705cf3d0b81SEdward Shishkin 	}
2706cf3d0b81SEdward Shishkin 	return 0;
2707cf3d0b81SEdward Shishkin }
2708cf3d0b81SEdward Shishkin 
27091da177e4SLinus Torvalds /*
27101da177e4SLinus Torvalds ** must be called once on fs mount.  calls journal_read for you
27111da177e4SLinus Torvalds */
2712bd4c625cSLinus Torvalds int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2713bd4c625cSLinus Torvalds 		 int old_format, unsigned int commit_max_age)
2714bd4c625cSLinus Torvalds {
27151da177e4SLinus Torvalds 	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2;
27161da177e4SLinus Torvalds 	struct buffer_head *bhjh;
27171da177e4SLinus Torvalds 	struct reiserfs_super_block *rs;
27181da177e4SLinus Torvalds 	struct reiserfs_journal_header *jh;
27191da177e4SLinus Torvalds 	struct reiserfs_journal *journal;
27201da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
27211da177e4SLinus Torvalds 	char b[BDEVNAME_SIZE];
27221da177e4SLinus Torvalds 
27231da177e4SLinus Torvalds 	journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal));
27241da177e4SLinus Torvalds 	if (!journal) {
2725bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2726bd4c625cSLinus Torvalds 				 "journal-1256: unable to get memory for journal structure");
27271da177e4SLinus Torvalds 		return 1;
27281da177e4SLinus Torvalds 	}
27291da177e4SLinus Torvalds 	memset(journal, 0, sizeof(struct reiserfs_journal));
27301da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_bitmap_nodes);
27311da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_prealloc_list);
27321da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_working_list);
27331da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_journal_list);
27341da177e4SLinus Torvalds 	journal->j_persistent_trans = 0;
27351da177e4SLinus Torvalds 	if (reiserfs_allocate_list_bitmaps(p_s_sb,
27361da177e4SLinus Torvalds 					   journal->j_list_bitmap,
2737cb680c1bSJeff Mahoney 					   reiserfs_bmap_count(p_s_sb)))
27381da177e4SLinus Torvalds 		goto free_and_return;
27391da177e4SLinus Torvalds 	allocate_bitmap_nodes(p_s_sb);
27401da177e4SLinus Torvalds 
27411da177e4SLinus Torvalds 	/* reserved for journal area support */
27421da177e4SLinus Torvalds 	SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
2743bd4c625cSLinus Torvalds 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2744bd4c625cSLinus Torvalds 						 / p_s_sb->s_blocksize +
2745cb680c1bSJeff Mahoney 						 reiserfs_bmap_count(p_s_sb) +
2746bd4c625cSLinus Torvalds 						 1 :
2747bd4c625cSLinus Torvalds 						 REISERFS_DISK_OFFSET_IN_BYTES /
2748bd4c625cSLinus Torvalds 						 p_s_sb->s_blocksize + 2);
27491da177e4SLinus Torvalds 
27501da177e4SLinus Torvalds 	/* Sanity check to see is the standard journal fitting withing first bitmap
27511da177e4SLinus Torvalds 	   (actual for small blocksizes) */
27521da177e4SLinus Torvalds 	if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) &&
2753bd4c625cSLinus Torvalds 	    (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) +
2754bd4c625cSLinus Torvalds 	     SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) {
2755bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2756bd4c625cSLinus Torvalds 				 "journal-1393: journal does not fit for area "
27571da177e4SLinus Torvalds 				 "addressed by first of bitmap blocks. It starts at "
27581da177e4SLinus Torvalds 				 "%u and its size is %u. Block size %ld",
27591da177e4SLinus Torvalds 				 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
2760bd4c625cSLinus Torvalds 				 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2761bd4c625cSLinus Torvalds 				 p_s_sb->s_blocksize);
27621da177e4SLinus Torvalds 		goto free_and_return;
27631da177e4SLinus Torvalds 	}
27641da177e4SLinus Torvalds 
27651da177e4SLinus Torvalds 	if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) {
2766bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2767bd4c625cSLinus Torvalds 				 "sh-462: unable to initialize jornal device");
27681da177e4SLinus Torvalds 		goto free_and_return;
27691da177e4SLinus Torvalds 	}
27701da177e4SLinus Torvalds 
27711da177e4SLinus Torvalds 	rs = SB_DISK_SUPER_BLOCK(p_s_sb);
27721da177e4SLinus Torvalds 
27731da177e4SLinus Torvalds 	/* read journal header */
27741da177e4SLinus Torvalds 	bhjh = journal_bread(p_s_sb,
2775bd4c625cSLinus Torvalds 			     SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2776bd4c625cSLinus Torvalds 			     SB_ONDISK_JOURNAL_SIZE(p_s_sb));
27771da177e4SLinus Torvalds 	if (!bhjh) {
2778bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2779bd4c625cSLinus Torvalds 				 "sh-459: unable to read journal header");
27801da177e4SLinus Torvalds 		goto free_and_return;
27811da177e4SLinus Torvalds 	}
27821da177e4SLinus Torvalds 	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
27831da177e4SLinus Torvalds 
27841da177e4SLinus Torvalds 	/* make sure that journal matches to the super block */
2785bd4c625cSLinus Torvalds 	if (is_reiserfs_jr(rs)
2786bd4c625cSLinus Torvalds 	    && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2787bd4c625cSLinus Torvalds 		sb_jp_journal_magic(rs))) {
2788bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2789bd4c625cSLinus Torvalds 				 "sh-460: journal header magic %x "
27901da177e4SLinus Torvalds 				 "(device %s) does not match to magic found in super "
2791bd4c625cSLinus Torvalds 				 "block %x", jh->jh_journal.jp_journal_magic,
27921da177e4SLinus Torvalds 				 bdevname(journal->j_dev_bd, b),
27931da177e4SLinus Torvalds 				 sb_jp_journal_magic(rs));
27941da177e4SLinus Torvalds 		brelse(bhjh);
27951da177e4SLinus Torvalds 		goto free_and_return;
27961da177e4SLinus Torvalds 	}
27971da177e4SLinus Torvalds 
27981da177e4SLinus Torvalds 	journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
27991da177e4SLinus Torvalds 	journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2800bd4c625cSLinus Torvalds 	journal->j_max_commit_age =
2801bd4c625cSLinus Torvalds 	    le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
28021da177e4SLinus Torvalds 	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
28031da177e4SLinus Torvalds 
2804cf3d0b81SEdward Shishkin 	if (check_advise_trans_params(p_s_sb, journal) != 0)
2805cf3d0b81SEdward Shishkin 	        goto free_and_return;
28061da177e4SLinus Torvalds 	journal->j_default_max_commit_age = journal->j_max_commit_age;
28071da177e4SLinus Torvalds 
28081da177e4SLinus Torvalds 	if (commit_max_age != 0) {
28091da177e4SLinus Torvalds 		journal->j_max_commit_age = commit_max_age;
28101da177e4SLinus Torvalds 		journal->j_max_trans_age = commit_max_age;
28111da177e4SLinus Torvalds 	}
28121da177e4SLinus Torvalds 
28131da177e4SLinus Torvalds 	reiserfs_info(p_s_sb, "journal params: device %s, size %u, "
28141da177e4SLinus Torvalds 		      "journal first block %u, max trans len %u, max batch %u, "
28151da177e4SLinus Torvalds 		      "max commit age %u, max trans age %u\n",
28161da177e4SLinus Torvalds 		      bdevname(journal->j_dev_bd, b),
28171da177e4SLinus Torvalds 		      SB_ONDISK_JOURNAL_SIZE(p_s_sb),
28181da177e4SLinus Torvalds 		      SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
28191da177e4SLinus Torvalds 		      journal->j_trans_max,
28201da177e4SLinus Torvalds 		      journal->j_max_batch,
2821bd4c625cSLinus Torvalds 		      journal->j_max_commit_age, journal->j_max_trans_age);
28221da177e4SLinus Torvalds 
28231da177e4SLinus Torvalds 	brelse(bhjh);
28241da177e4SLinus Torvalds 
28251da177e4SLinus Torvalds 	journal->j_list_bitmap_index = 0;
28261da177e4SLinus Torvalds 	journal_list_init(p_s_sb);
28271da177e4SLinus Torvalds 
2828bd4c625cSLinus Torvalds 	memset(journal->j_list_hash_table, 0,
2829bd4c625cSLinus Torvalds 	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
28301da177e4SLinus Torvalds 
28311da177e4SLinus Torvalds 	INIT_LIST_HEAD(&journal->j_dirty_buffers);
28321da177e4SLinus Torvalds 	spin_lock_init(&journal->j_dirty_buffers_lock);
28331da177e4SLinus Torvalds 
28341da177e4SLinus Torvalds 	journal->j_start = 0;
28351da177e4SLinus Torvalds 	journal->j_len = 0;
28361da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
28371da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
28381da177e4SLinus Torvalds 	atomic_set(&(journal->j_async_throttle), 0);
28391da177e4SLinus Torvalds 	journal->j_bcount = 0;
28401da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
28411da177e4SLinus Torvalds 	journal->j_last = NULL;
28421da177e4SLinus Torvalds 	journal->j_first = NULL;
28431da177e4SLinus Torvalds 	init_waitqueue_head(&(journal->j_join_wait));
28441da177e4SLinus Torvalds 	sema_init(&journal->j_lock, 1);
28451da177e4SLinus Torvalds 	sema_init(&journal->j_flush_sem, 1);
28461da177e4SLinus Torvalds 
28471da177e4SLinus Torvalds 	journal->j_trans_id = 10;
28481da177e4SLinus Torvalds 	journal->j_mount_id = 10;
28491da177e4SLinus Torvalds 	journal->j_state = 0;
28501da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
28511da177e4SLinus Torvalds 	journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
28521da177e4SLinus Torvalds 	journal->j_cnode_free_orig = journal->j_cnode_free_list;
28531da177e4SLinus Torvalds 	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
28541da177e4SLinus Torvalds 	journal->j_cnode_used = 0;
28551da177e4SLinus Torvalds 	journal->j_must_wait = 0;
28561da177e4SLinus Torvalds 
2857576f6d79SJeff Mahoney 	if (journal->j_cnode_free == 0) {
2858576f6d79SJeff Mahoney         	reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory "
2859576f6d79SJeff Mahoney 		                 "allocation failed (%ld bytes). Journal is "
2860576f6d79SJeff Mahoney 		                 "too large for available memory. Usually "
2861576f6d79SJeff Mahoney 		                 "this is due to a journal that is too large.",
2862576f6d79SJeff Mahoney 		                 sizeof (struct reiserfs_journal_cnode) * num_cnodes);
2863576f6d79SJeff Mahoney         	goto free_and_return;
2864576f6d79SJeff Mahoney 	}
2865576f6d79SJeff Mahoney 
28661da177e4SLinus Torvalds 	init_journal_hash(p_s_sb);
28671da177e4SLinus Torvalds 	jl = journal->j_current_jl;
28681da177e4SLinus Torvalds 	jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
28691da177e4SLinus Torvalds 	if (!jl->j_list_bitmap) {
2870bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
2871bd4c625cSLinus Torvalds 				 "journal-2005, get_list_bitmap failed for journal list 0");
28721da177e4SLinus Torvalds 		goto free_and_return;
28731da177e4SLinus Torvalds 	}
28741da177e4SLinus Torvalds 	if (journal_read(p_s_sb) < 0) {
28751da177e4SLinus Torvalds 		reiserfs_warning(p_s_sb, "Replay Failure, unable to mount");
28761da177e4SLinus Torvalds 		goto free_and_return;
28771da177e4SLinus Torvalds 	}
28781da177e4SLinus Torvalds 
28791da177e4SLinus Torvalds 	reiserfs_mounted_fs_count++;
28801da177e4SLinus Torvalds 	if (reiserfs_mounted_fs_count <= 1)
28811da177e4SLinus Torvalds 		commit_wq = create_workqueue("reiserfs");
28821da177e4SLinus Torvalds 
2883c4028958SDavid Howells 	INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2884c4028958SDavid Howells 	journal->j_work_sb = p_s_sb;
28851da177e4SLinus Torvalds 	return 0;
28861da177e4SLinus Torvalds       free_and_return:
28871da177e4SLinus Torvalds 	free_journal_ram(p_s_sb);
28881da177e4SLinus Torvalds 	return 1;
28891da177e4SLinus Torvalds }
28901da177e4SLinus Torvalds 
28911da177e4SLinus Torvalds /*
28921da177e4SLinus Torvalds ** test for a polite end of the current transaction.  Used by file_write, and should
28931da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single
28941da177e4SLinus Torvalds ** transaction
28951da177e4SLinus Torvalds */
2896bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2897bd4c625cSLinus Torvalds 				   int new_alloc)
2898bd4c625cSLinus Torvalds {
28991da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29001da177e4SLinus Torvalds 	time_t now = get_seconds();
29011da177e4SLinus Torvalds 	/* cannot restart while nested */
29021da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29031da177e4SLinus Torvalds 	if (th->t_refcount > 1)
29041da177e4SLinus Torvalds 		return 0;
29051da177e4SLinus Torvalds 	if (journal->j_must_wait > 0 ||
29061da177e4SLinus Torvalds 	    (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
29071da177e4SLinus Torvalds 	    atomic_read(&(journal->j_jlock)) ||
29081da177e4SLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
29091da177e4SLinus Torvalds 	    journal->j_cnode_free < (journal->j_trans_max * 3)) {
29101da177e4SLinus Torvalds 		return 1;
29111da177e4SLinus Torvalds 	}
29126ae1ea44SChris Mason 	/* protected by the BKL here */
29136ae1ea44SChris Mason 	journal->j_len_alloc += new_alloc;
29146ae1ea44SChris Mason 	th->t_blocks_allocated += new_alloc ;
29151da177e4SLinus Torvalds 	return 0;
29161da177e4SLinus Torvalds }
29171da177e4SLinus Torvalds 
29181da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the
29191da177e4SLinus Torvalds ** kernel_lock to be held
29201da177e4SLinus Torvalds */
2921bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2922bd4c625cSLinus Torvalds {
29231da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
29241da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
29251da177e4SLinus Torvalds 	journal->j_must_wait = 1;
29261da177e4SLinus Torvalds 	set_bit(J_WRITERS_BLOCKED, &journal->j_state);
29271da177e4SLinus Torvalds 	return;
29281da177e4SLinus Torvalds }
29291da177e4SLinus Torvalds 
29301da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29311da177e4SLinus Torvalds ** require BKL
29321da177e4SLinus Torvalds */
2933bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s)
2934bd4c625cSLinus Torvalds {
29351da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29361da177e4SLinus Torvalds 	clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
29371da177e4SLinus Torvalds 	wake_up(&journal->j_join_wait);
29381da177e4SLinus Torvalds }
29391da177e4SLinus Torvalds 
29401da177e4SLinus Torvalds /* this must be called without a transaction started, and does not
29411da177e4SLinus Torvalds ** require BKL
29421da177e4SLinus Torvalds */
2943bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s)
2944bd4c625cSLinus Torvalds {
29451da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29461da177e4SLinus Torvalds 	wait_event(journal->j_join_wait,
29471da177e4SLinus Torvalds 		   !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
29481da177e4SLinus Torvalds }
29491da177e4SLinus Torvalds 
2950bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s)
2951bd4c625cSLinus Torvalds {
29521da177e4SLinus Torvalds 	wait_queue_t wait;
29531da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29541da177e4SLinus Torvalds 	set_bit(J_WRITERS_QUEUED, &journal->j_state);
29551da177e4SLinus Torvalds 
29561da177e4SLinus Torvalds 	/*
29571da177e4SLinus Torvalds 	 * we don't want to use wait_event here because
29581da177e4SLinus Torvalds 	 * we only want to wait once.
29591da177e4SLinus Torvalds 	 */
29601da177e4SLinus Torvalds 	init_waitqueue_entry(&wait, current);
29611da177e4SLinus Torvalds 	add_wait_queue(&journal->j_join_wait, &wait);
29621da177e4SLinus Torvalds 	set_current_state(TASK_UNINTERRUPTIBLE);
29631da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
29641da177e4SLinus Torvalds 		schedule();
29655ab2f7e0SMilind Arun Choudhary 	__set_current_state(TASK_RUNNING);
29661da177e4SLinus Torvalds 	remove_wait_queue(&journal->j_join_wait, &wait);
29671da177e4SLinus Torvalds }
29681da177e4SLinus Torvalds 
2969bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s)
2970bd4c625cSLinus Torvalds {
29711da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
29721da177e4SLinus Torvalds 	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
29731da177e4SLinus Torvalds 		wake_up(&journal->j_join_wait);
29741da177e4SLinus Torvalds }
29751da177e4SLinus Torvalds 
2976bd4c625cSLinus Torvalds static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
29771da177e4SLinus Torvalds {
29781da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
29791da177e4SLinus Torvalds 	unsigned long bcount = journal->j_bcount;
29801da177e4SLinus Torvalds 	while (1) {
2981041e0e3bSNishanth Aravamudan 		schedule_timeout_uninterruptible(1);
29821da177e4SLinus Torvalds 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
29831da177e4SLinus Torvalds 		while ((atomic_read(&journal->j_wcount) > 0 ||
29841da177e4SLinus Torvalds 			atomic_read(&journal->j_jlock)) &&
29851da177e4SLinus Torvalds 		       journal->j_trans_id == trans_id) {
29861da177e4SLinus Torvalds 			queue_log_writer(sb);
29871da177e4SLinus Torvalds 		}
29881da177e4SLinus Torvalds 		if (journal->j_trans_id != trans_id)
29891da177e4SLinus Torvalds 			break;
29901da177e4SLinus Torvalds 		if (bcount == journal->j_bcount)
29911da177e4SLinus Torvalds 			break;
29921da177e4SLinus Torvalds 		bcount = journal->j_bcount;
29931da177e4SLinus Torvalds 	}
29941da177e4SLinus Torvalds }
29951da177e4SLinus Torvalds 
29961da177e4SLinus Torvalds /* join == true if you must join an existing transaction.
29971da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish
29981da177e4SLinus Torvalds **
29991da177e4SLinus Torvalds ** this will block until the transaction is joinable.  send the number of blocks you
30001da177e4SLinus Torvalds ** expect to use in nblocks.
30011da177e4SLinus Torvalds */
3002bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3003bd4c625cSLinus Torvalds 			      struct super_block *p_s_sb, unsigned long nblocks,
3004bd4c625cSLinus Torvalds 			      int join)
3005bd4c625cSLinus Torvalds {
30061da177e4SLinus Torvalds 	time_t now = get_seconds();
30071da177e4SLinus Torvalds 	int old_trans_id;
30081da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
30091da177e4SLinus Torvalds 	struct reiserfs_transaction_handle myth;
30101da177e4SLinus Torvalds 	int sched_count = 0;
30111da177e4SLinus Torvalds 	int retval;
30121da177e4SLinus Torvalds 
30131da177e4SLinus Torvalds 	reiserfs_check_lock_depth(p_s_sb, "journal_begin");
301414a61442SEric Sesterhenn 	BUG_ON(nblocks > journal->j_trans_max);
30151da177e4SLinus Torvalds 
30161da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.journal_being);
30171da177e4SLinus Torvalds 	/* set here for journal_join */
30181da177e4SLinus Torvalds 	th->t_refcount = 1;
30191da177e4SLinus Torvalds 	th->t_super = p_s_sb;
30201da177e4SLinus Torvalds 
30211da177e4SLinus Torvalds       relock:
30221da177e4SLinus Torvalds 	lock_journal(p_s_sb);
30231da177e4SLinus Torvalds 	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
30241da177e4SLinus Torvalds 		unlock_journal(p_s_sb);
30251da177e4SLinus Torvalds 		retval = journal->j_errno;
30261da177e4SLinus Torvalds 		goto out_fail;
30271da177e4SLinus Torvalds 	}
30281da177e4SLinus Torvalds 	journal->j_bcount++;
30291da177e4SLinus Torvalds 
30301da177e4SLinus Torvalds 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
30311da177e4SLinus Torvalds 		unlock_journal(p_s_sb);
30321da177e4SLinus Torvalds 		reiserfs_wait_on_write_block(p_s_sb);
30331da177e4SLinus Torvalds 		PROC_INFO_INC(p_s_sb, journal.journal_relock_writers);
30341da177e4SLinus Torvalds 		goto relock;
30351da177e4SLinus Torvalds 	}
30361da177e4SLinus Torvalds 	now = get_seconds();
30371da177e4SLinus Torvalds 
30381da177e4SLinus Torvalds 	/* if there is no room in the journal OR
30391da177e4SLinus Torvalds 	 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
30401da177e4SLinus Torvalds 	 ** we don't sleep if there aren't other writers
30411da177e4SLinus Torvalds 	 */
30421da177e4SLinus Torvalds 
30431da177e4SLinus Torvalds 	if ((!join && journal->j_must_wait > 0) ||
3044bd4c625cSLinus Torvalds 	    (!join
3045bd4c625cSLinus Torvalds 	     && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
3046bd4c625cSLinus Torvalds 	    || (!join && atomic_read(&journal->j_wcount) > 0
3047bd4c625cSLinus Torvalds 		&& journal->j_trans_start_time > 0
3048bd4c625cSLinus Torvalds 		&& (now - journal->j_trans_start_time) >
3049bd4c625cSLinus Torvalds 		journal->j_max_trans_age) || (!join
3050bd4c625cSLinus Torvalds 					      && atomic_read(&journal->j_jlock))
3051bd4c625cSLinus Torvalds 	    || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
30521da177e4SLinus Torvalds 
30531da177e4SLinus Torvalds 		old_trans_id = journal->j_trans_id;
30541da177e4SLinus Torvalds 		unlock_journal(p_s_sb);	/* allow others to finish this transaction */
30551da177e4SLinus Torvalds 
30561da177e4SLinus Torvalds 		if (!join && (journal->j_len_alloc + nblocks + 2) >=
30571da177e4SLinus Torvalds 		    journal->j_max_batch &&
3058bd4c625cSLinus Torvalds 		    ((journal->j_len + nblocks + 2) * 100) <
3059bd4c625cSLinus Torvalds 		    (journal->j_len_alloc * 75)) {
30601da177e4SLinus Torvalds 			if (atomic_read(&journal->j_wcount) > 10) {
30611da177e4SLinus Torvalds 				sched_count++;
30621da177e4SLinus Torvalds 				queue_log_writer(p_s_sb);
30631da177e4SLinus Torvalds 				goto relock;
30641da177e4SLinus Torvalds 			}
30651da177e4SLinus Torvalds 		}
30661da177e4SLinus Torvalds 		/* don't mess with joining the transaction if all we have to do is
30671da177e4SLinus Torvalds 		 * wait for someone else to do a commit
30681da177e4SLinus Torvalds 		 */
30691da177e4SLinus Torvalds 		if (atomic_read(&journal->j_jlock)) {
30701da177e4SLinus Torvalds 			while (journal->j_trans_id == old_trans_id &&
30711da177e4SLinus Torvalds 			       atomic_read(&journal->j_jlock)) {
30721da177e4SLinus Torvalds 				queue_log_writer(p_s_sb);
30731da177e4SLinus Torvalds 			}
30741da177e4SLinus Torvalds 			goto relock;
30751da177e4SLinus Torvalds 		}
30761da177e4SLinus Torvalds 		retval = journal_join(&myth, p_s_sb, 1);
30771da177e4SLinus Torvalds 		if (retval)
30781da177e4SLinus Torvalds 			goto out_fail;
30791da177e4SLinus Torvalds 
30801da177e4SLinus Torvalds 		/* someone might have ended the transaction while we joined */
30811da177e4SLinus Torvalds 		if (old_trans_id != journal->j_trans_id) {
30821da177e4SLinus Torvalds 			retval = do_journal_end(&myth, p_s_sb, 1, 0);
30831da177e4SLinus Torvalds 		} else {
30841da177e4SLinus Torvalds 			retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW);
30851da177e4SLinus Torvalds 		}
30861da177e4SLinus Torvalds 
30871da177e4SLinus Torvalds 		if (retval)
30881da177e4SLinus Torvalds 			goto out_fail;
30891da177e4SLinus Torvalds 
30901da177e4SLinus Torvalds 		PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount);
30911da177e4SLinus Torvalds 		goto relock;
30921da177e4SLinus Torvalds 	}
30931da177e4SLinus Torvalds 	/* we are the first writer, set trans_id */
30941da177e4SLinus Torvalds 	if (journal->j_trans_start_time == 0) {
30951da177e4SLinus Torvalds 		journal->j_trans_start_time = get_seconds();
30961da177e4SLinus Torvalds 	}
30971da177e4SLinus Torvalds 	atomic_inc(&(journal->j_wcount));
30981da177e4SLinus Torvalds 	journal->j_len_alloc += nblocks;
30991da177e4SLinus Torvalds 	th->t_blocks_logged = 0;
31001da177e4SLinus Torvalds 	th->t_blocks_allocated = nblocks;
31011da177e4SLinus Torvalds 	th->t_trans_id = journal->j_trans_id;
31021da177e4SLinus Torvalds 	unlock_journal(p_s_sb);
31031da177e4SLinus Torvalds 	INIT_LIST_HEAD(&th->t_list);
310422e2c507SJens Axboe 	get_fs_excl();
31051da177e4SLinus Torvalds 	return 0;
31061da177e4SLinus Torvalds 
31071da177e4SLinus Torvalds       out_fail:
31081da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
31091da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
31101da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
31111da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
31121da177e4SLinus Torvalds 	th->t_super = p_s_sb;
31131da177e4SLinus Torvalds 	return retval;
31141da177e4SLinus Torvalds }
31151da177e4SLinus Torvalds 
3116bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3117bd4c625cSLinus Torvalds 								    super_block
3118bd4c625cSLinus Torvalds 								    *s,
3119bd4c625cSLinus Torvalds 								    int nblocks)
3120bd4c625cSLinus Torvalds {
31211da177e4SLinus Torvalds 	int ret;
31221da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *th;
31231da177e4SLinus Torvalds 
31241da177e4SLinus Torvalds 	/* if we're nesting into an existing transaction.  It will be
31251da177e4SLinus Torvalds 	 ** persistent on its own
31261da177e4SLinus Torvalds 	 */
31271da177e4SLinus Torvalds 	if (reiserfs_transaction_running(s)) {
31281da177e4SLinus Torvalds 		th = current->journal_info;
31291da177e4SLinus Torvalds 		th->t_refcount++;
313014a61442SEric Sesterhenn 		BUG_ON(th->t_refcount < 2);
313114a61442SEric Sesterhenn 
31321da177e4SLinus Torvalds 		return th;
31331da177e4SLinus Torvalds 	}
3134d739b42bSPekka Enberg 	th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
31351da177e4SLinus Torvalds 	if (!th)
31361da177e4SLinus Torvalds 		return NULL;
31371da177e4SLinus Torvalds 	ret = journal_begin(th, s, nblocks);
31381da177e4SLinus Torvalds 	if (ret) {
3139d739b42bSPekka Enberg 		kfree(th);
31401da177e4SLinus Torvalds 		return NULL;
31411da177e4SLinus Torvalds 	}
31421da177e4SLinus Torvalds 
31431da177e4SLinus Torvalds 	SB_JOURNAL(s)->j_persistent_trans++;
31441da177e4SLinus Torvalds 	return th;
31451da177e4SLinus Torvalds }
31461da177e4SLinus Torvalds 
3147bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3148bd4c625cSLinus Torvalds {
31491da177e4SLinus Torvalds 	struct super_block *s = th->t_super;
31501da177e4SLinus Torvalds 	int ret = 0;
31511da177e4SLinus Torvalds 	if (th->t_trans_id)
31521da177e4SLinus Torvalds 		ret = journal_end(th, th->t_super, th->t_blocks_allocated);
31531da177e4SLinus Torvalds 	else
31541da177e4SLinus Torvalds 		ret = -EIO;
31551da177e4SLinus Torvalds 	if (th->t_refcount == 0) {
31561da177e4SLinus Torvalds 		SB_JOURNAL(s)->j_persistent_trans--;
3157d739b42bSPekka Enberg 		kfree(th);
31581da177e4SLinus Torvalds 	}
31591da177e4SLinus Torvalds 	return ret;
31601da177e4SLinus Torvalds }
31611da177e4SLinus Torvalds 
3162bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th,
3163bd4c625cSLinus Torvalds 			struct super_block *p_s_sb, unsigned long nblocks)
3164bd4c625cSLinus Torvalds {
31651da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31661da177e4SLinus Torvalds 
31671da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31681da177e4SLinus Torvalds 	 ** pointer
31691da177e4SLinus Torvalds 	 */
31701da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
317114a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
31721da177e4SLinus Torvalds 	return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
31731da177e4SLinus Torvalds }
31741da177e4SLinus Torvalds 
3175bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th,
3176bd4c625cSLinus Torvalds 		       struct super_block *p_s_sb, unsigned long nblocks)
3177bd4c625cSLinus Torvalds {
31781da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31791da177e4SLinus Torvalds 
31801da177e4SLinus Torvalds 	/* this keeps do_journal_end from NULLing out the current->journal_info
31811da177e4SLinus Torvalds 	 ** pointer
31821da177e4SLinus Torvalds 	 */
31831da177e4SLinus Torvalds 	th->t_handle_save = cur_th;
318414a61442SEric Sesterhenn 	BUG_ON(cur_th && cur_th->t_refcount > 1);
31851da177e4SLinus Torvalds 	return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
31861da177e4SLinus Torvalds }
31871da177e4SLinus Torvalds 
3188bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th,
3189bd4c625cSLinus Torvalds 		  struct super_block *p_s_sb, unsigned long nblocks)
3190bd4c625cSLinus Torvalds {
31911da177e4SLinus Torvalds 	struct reiserfs_transaction_handle *cur_th = current->journal_info;
31921da177e4SLinus Torvalds 	int ret;
31931da177e4SLinus Torvalds 
31941da177e4SLinus Torvalds 	th->t_handle_save = NULL;
31951da177e4SLinus Torvalds 	if (cur_th) {
31961da177e4SLinus Torvalds 		/* we are nesting into the current transaction */
31971da177e4SLinus Torvalds 		if (cur_th->t_super == p_s_sb) {
31981da177e4SLinus Torvalds 			BUG_ON(!cur_th->t_refcount);
31991da177e4SLinus Torvalds 			cur_th->t_refcount++;
32001da177e4SLinus Torvalds 			memcpy(th, cur_th, sizeof(*th));
32011da177e4SLinus Torvalds 			if (th->t_refcount <= 1)
3202bd4c625cSLinus Torvalds 				reiserfs_warning(p_s_sb,
3203bd4c625cSLinus Torvalds 						 "BAD: refcount <= 1, but journal_info != 0");
32041da177e4SLinus Torvalds 			return 0;
32051da177e4SLinus Torvalds 		} else {
32061da177e4SLinus Torvalds 			/* we've ended up with a handle from a different filesystem.
32071da177e4SLinus Torvalds 			 ** save it and restore on journal_end.  This should never
32081da177e4SLinus Torvalds 			 ** really happen...
32091da177e4SLinus Torvalds 			 */
3210bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
3211bd4c625cSLinus Torvalds 					 "clm-2100: nesting info a different FS");
32121da177e4SLinus Torvalds 			th->t_handle_save = current->journal_info;
32131da177e4SLinus Torvalds 			current->journal_info = th;
32141da177e4SLinus Torvalds 		}
32151da177e4SLinus Torvalds 	} else {
32161da177e4SLinus Torvalds 		current->journal_info = th;
32171da177e4SLinus Torvalds 	}
32181da177e4SLinus Torvalds 	ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
321914a61442SEric Sesterhenn 	BUG_ON(current->journal_info != th);
32201da177e4SLinus Torvalds 
32211da177e4SLinus Torvalds 	/* I guess this boils down to being the reciprocal of clm-2100 above.
32221da177e4SLinus Torvalds 	 * If do_journal_begin_r fails, we need to put it back, since journal_end
32231da177e4SLinus Torvalds 	 * won't be called to do it. */
32241da177e4SLinus Torvalds 	if (ret)
32251da177e4SLinus Torvalds 		current->journal_info = th->t_handle_save;
32261da177e4SLinus Torvalds 	else
32271da177e4SLinus Torvalds 		BUG_ON(!th->t_refcount);
32281da177e4SLinus Torvalds 
32291da177e4SLinus Torvalds 	return ret;
32301da177e4SLinus Torvalds }
32311da177e4SLinus Torvalds 
32321da177e4SLinus Torvalds /*
32331da177e4SLinus Torvalds ** puts bh into the current transaction.  If it was already there, reorders removes the
32341da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
32351da177e4SLinus Torvalds **
32361da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
32371da177e4SLinus Torvalds ** transaction is committed.
32381da177e4SLinus Torvalds **
32391da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
32401da177e4SLinus Torvalds */
3241bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3242bd4c625cSLinus Torvalds 		       struct super_block *p_s_sb, struct buffer_head *bh)
3243bd4c625cSLinus Torvalds {
32441da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
32451da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
32461da177e4SLinus Torvalds 	int count_already_incd = 0;
32471da177e4SLinus Torvalds 	int prepared = 0;
32481da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
32491da177e4SLinus Torvalds 
32501da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.mark_dirty);
32511da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3252bd4c625cSLinus Torvalds 		reiserfs_panic(th->t_super,
3253bd4c625cSLinus Torvalds 			       "journal-1577: handle trans id %ld != current trans id %ld\n",
32541da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
32551da177e4SLinus Torvalds 	}
32561da177e4SLinus Torvalds 
32571da177e4SLinus Torvalds 	p_s_sb->s_dirt = 1;
32581da177e4SLinus Torvalds 
32591da177e4SLinus Torvalds 	prepared = test_clear_buffer_journal_prepared(bh);
32601da177e4SLinus Torvalds 	clear_buffer_journal_restore_dirty(bh);
32611da177e4SLinus Torvalds 	/* already in this transaction, we are done */
32621da177e4SLinus Torvalds 	if (buffer_journaled(bh)) {
32631da177e4SLinus Torvalds 		PROC_INFO_INC(p_s_sb, journal.mark_dirty_already);
32641da177e4SLinus Torvalds 		return 0;
32651da177e4SLinus Torvalds 	}
32661da177e4SLinus Torvalds 
32671da177e4SLinus Torvalds 	/* this must be turned into a panic instead of a warning.  We can't allow
32681da177e4SLinus Torvalds 	 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
32691da177e4SLinus Torvalds 	 ** could get to disk too early.  NOT GOOD.
32701da177e4SLinus Torvalds 	 */
32711da177e4SLinus Torvalds 	if (!prepared || buffer_dirty(bh)) {
32721da177e4SLinus Torvalds 		reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state "
32731da177e4SLinus Torvalds 				 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3274bd4c625cSLinus Torvalds 				 (unsigned long long)bh->b_blocknr,
3275bd4c625cSLinus Torvalds 				 prepared ? ' ' : '!',
32761da177e4SLinus Torvalds 				 buffer_locked(bh) ? ' ' : '!',
32771da177e4SLinus Torvalds 				 buffer_dirty(bh) ? ' ' : '!',
32781da177e4SLinus Torvalds 				 buffer_journal_dirty(bh) ? ' ' : '!');
32791da177e4SLinus Torvalds 	}
32801da177e4SLinus Torvalds 
32811da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) <= 0) {
3282bd4c625cSLinus Torvalds 		reiserfs_warning(p_s_sb,
3283bd4c625cSLinus Torvalds 				 "journal-1409: journal_mark_dirty returning because j_wcount was %d",
3284bd4c625cSLinus Torvalds 				 atomic_read(&(journal->j_wcount)));
32851da177e4SLinus Torvalds 		return 1;
32861da177e4SLinus Torvalds 	}
32871da177e4SLinus Torvalds 	/* this error means I've screwed up, and we've overflowed the transaction.
32881da177e4SLinus Torvalds 	 ** Nothing can be done here, except make the FS readonly or panic.
32891da177e4SLinus Torvalds 	 */
32901da177e4SLinus Torvalds 	if (journal->j_len >= journal->j_trans_max) {
3291bd4c625cSLinus Torvalds 		reiserfs_panic(th->t_super,
3292bd4c625cSLinus Torvalds 			       "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n",
3293bd4c625cSLinus Torvalds 			       journal->j_len);
32941da177e4SLinus Torvalds 	}
32951da177e4SLinus Torvalds 
32961da177e4SLinus Torvalds 	if (buffer_journal_dirty(bh)) {
32971da177e4SLinus Torvalds 		count_already_incd = 1;
32981da177e4SLinus Torvalds 		PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal);
32991da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
33001da177e4SLinus Torvalds 	}
33011da177e4SLinus Torvalds 
33021da177e4SLinus Torvalds 	if (journal->j_len > journal->j_len_alloc) {
33031da177e4SLinus Torvalds 		journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
33041da177e4SLinus Torvalds 	}
33051da177e4SLinus Torvalds 
33061da177e4SLinus Torvalds 	set_buffer_journaled(bh);
33071da177e4SLinus Torvalds 
33081da177e4SLinus Torvalds 	/* now put this guy on the end */
33091da177e4SLinus Torvalds 	if (!cn) {
33101da177e4SLinus Torvalds 		cn = get_cnode(p_s_sb);
33111da177e4SLinus Torvalds 		if (!cn) {
33121da177e4SLinus Torvalds 			reiserfs_panic(p_s_sb, "get_cnode failed!\n");
33131da177e4SLinus Torvalds 		}
33141da177e4SLinus Torvalds 
33151da177e4SLinus Torvalds 		if (th->t_blocks_logged == th->t_blocks_allocated) {
33161da177e4SLinus Torvalds 			th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
33171da177e4SLinus Torvalds 			journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
33181da177e4SLinus Torvalds 		}
33191da177e4SLinus Torvalds 		th->t_blocks_logged++;
33201da177e4SLinus Torvalds 		journal->j_len++;
33211da177e4SLinus Torvalds 
33221da177e4SLinus Torvalds 		cn->bh = bh;
33231da177e4SLinus Torvalds 		cn->blocknr = bh->b_blocknr;
33241da177e4SLinus Torvalds 		cn->sb = p_s_sb;
33251da177e4SLinus Torvalds 		cn->jlist = NULL;
33261da177e4SLinus Torvalds 		insert_journal_hash(journal->j_hash_table, cn);
33271da177e4SLinus Torvalds 		if (!count_already_incd) {
33281da177e4SLinus Torvalds 			get_bh(bh);
33291da177e4SLinus Torvalds 		}
33301da177e4SLinus Torvalds 	}
33311da177e4SLinus Torvalds 	cn->next = NULL;
33321da177e4SLinus Torvalds 	cn->prev = journal->j_last;
33331da177e4SLinus Torvalds 	cn->bh = bh;
33341da177e4SLinus Torvalds 	if (journal->j_last) {
33351da177e4SLinus Torvalds 		journal->j_last->next = cn;
33361da177e4SLinus Torvalds 		journal->j_last = cn;
33371da177e4SLinus Torvalds 	} else {
33381da177e4SLinus Torvalds 		journal->j_first = cn;
33391da177e4SLinus Torvalds 		journal->j_last = cn;
33401da177e4SLinus Torvalds 	}
33411da177e4SLinus Torvalds 	return 0;
33421da177e4SLinus Torvalds }
33431da177e4SLinus Torvalds 
3344bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th,
3345bd4c625cSLinus Torvalds 		struct super_block *p_s_sb, unsigned long nblocks)
3346bd4c625cSLinus Torvalds {
33471da177e4SLinus Torvalds 	if (!current->journal_info && th->t_refcount > 1)
33481da177e4SLinus Torvalds 		reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d",
33491da177e4SLinus Torvalds 				 th->t_refcount);
33501da177e4SLinus Torvalds 
33511da177e4SLinus Torvalds 	if (!th->t_trans_id) {
33521da177e4SLinus Torvalds 		WARN_ON(1);
33531da177e4SLinus Torvalds 		return -EIO;
33541da177e4SLinus Torvalds 	}
33551da177e4SLinus Torvalds 
33561da177e4SLinus Torvalds 	th->t_refcount--;
33571da177e4SLinus Torvalds 	if (th->t_refcount > 0) {
3358bd4c625cSLinus Torvalds 		struct reiserfs_transaction_handle *cur_th =
3359bd4c625cSLinus Torvalds 		    current->journal_info;
33601da177e4SLinus Torvalds 
33611da177e4SLinus Torvalds 		/* we aren't allowed to close a nested transaction on a different
33621da177e4SLinus Torvalds 		 ** filesystem from the one in the task struct
33631da177e4SLinus Torvalds 		 */
336414a61442SEric Sesterhenn 		BUG_ON(cur_th->t_super != th->t_super);
33651da177e4SLinus Torvalds 
33661da177e4SLinus Torvalds 		if (th != cur_th) {
33671da177e4SLinus Torvalds 			memcpy(current->journal_info, th, sizeof(*th));
33681da177e4SLinus Torvalds 			th->t_trans_id = 0;
33691da177e4SLinus Torvalds 		}
33701da177e4SLinus Torvalds 		return 0;
33711da177e4SLinus Torvalds 	} else {
33721da177e4SLinus Torvalds 		return do_journal_end(th, p_s_sb, nblocks, 0);
33731da177e4SLinus Torvalds 	}
33741da177e4SLinus Torvalds }
33751da177e4SLinus Torvalds 
33761da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters.
33771da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list
33781da177e4SLinus Torvalds **
33791da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted
33801da177e4SLinus Torvalds **
33811da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise
33821da177e4SLinus Torvalds */
3383bd4c625cSLinus Torvalds static int remove_from_transaction(struct super_block *p_s_sb,
3384bd4c625cSLinus Torvalds 				   b_blocknr_t blocknr, int already_cleaned)
3385bd4c625cSLinus Torvalds {
33861da177e4SLinus Torvalds 	struct buffer_head *bh;
33871da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn;
33881da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
33891da177e4SLinus Torvalds 	int ret = 0;
33901da177e4SLinus Torvalds 
33911da177e4SLinus Torvalds 	cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
33921da177e4SLinus Torvalds 	if (!cn || !cn->bh) {
33931da177e4SLinus Torvalds 		return ret;
33941da177e4SLinus Torvalds 	}
33951da177e4SLinus Torvalds 	bh = cn->bh;
33961da177e4SLinus Torvalds 	if (cn->prev) {
33971da177e4SLinus Torvalds 		cn->prev->next = cn->next;
33981da177e4SLinus Torvalds 	}
33991da177e4SLinus Torvalds 	if (cn->next) {
34001da177e4SLinus Torvalds 		cn->next->prev = cn->prev;
34011da177e4SLinus Torvalds 	}
34021da177e4SLinus Torvalds 	if (cn == journal->j_first) {
34031da177e4SLinus Torvalds 		journal->j_first = cn->next;
34041da177e4SLinus Torvalds 	}
34051da177e4SLinus Torvalds 	if (cn == journal->j_last) {
34061da177e4SLinus Torvalds 		journal->j_last = cn->prev;
34071da177e4SLinus Torvalds 	}
34081da177e4SLinus Torvalds 	if (bh)
3409bd4c625cSLinus Torvalds 		remove_journal_hash(p_s_sb, journal->j_hash_table, NULL,
3410bd4c625cSLinus Torvalds 				    bh->b_blocknr, 0);
34111da177e4SLinus Torvalds 	clear_buffer_journaled(bh);	/* don't log this one */
34121da177e4SLinus Torvalds 
34131da177e4SLinus Torvalds 	if (!already_cleaned) {
34141da177e4SLinus Torvalds 		clear_buffer_journal_dirty(bh);
34151da177e4SLinus Torvalds 		clear_buffer_dirty(bh);
34161da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
34171da177e4SLinus Torvalds 		put_bh(bh);
34181da177e4SLinus Torvalds 		if (atomic_read(&(bh->b_count)) < 0) {
3419bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
3420bd4c625cSLinus Torvalds 					 "journal-1752: remove from trans, b_count < 0");
34211da177e4SLinus Torvalds 		}
34221da177e4SLinus Torvalds 		ret = 1;
34231da177e4SLinus Torvalds 	}
34241da177e4SLinus Torvalds 	journal->j_len--;
34251da177e4SLinus Torvalds 	journal->j_len_alloc--;
34261da177e4SLinus Torvalds 	free_cnode(p_s_sb, cn);
34271da177e4SLinus Torvalds 	return ret;
34281da177e4SLinus Torvalds }
34291da177e4SLinus Torvalds 
34301da177e4SLinus Torvalds /*
34311da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the
34320779bf2dSMatt LaPlante ** transactions that include it are committed to disk.
34331da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty,
34341da177e4SLinus Torvalds ** and 0 if you aren't
34351da177e4SLinus Torvalds **
34361da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
34371da177e4SLinus Torvalds ** blocks for a given transaction on disk
34381da177e4SLinus Torvalds **
34391da177e4SLinus Torvalds */
3440bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn)
3441bd4c625cSLinus Torvalds {
34421da177e4SLinus Torvalds 	struct super_block *sb = cn->sb;
34431da177e4SLinus Torvalds 	b_blocknr_t blocknr = cn->blocknr;
34441da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cur = cn->hprev;
34451da177e4SLinus Torvalds 	int can_dirty = 1;
34461da177e4SLinus Torvalds 
34471da177e4SLinus Torvalds 	/* first test hprev.  These are all newer than cn, so any node here
34481da177e4SLinus Torvalds 	 ** with the same block number and dev means this node can't be sent
34491da177e4SLinus Torvalds 	 ** to disk right now.
34501da177e4SLinus Torvalds 	 */
34511da177e4SLinus Torvalds 	while (cur && can_dirty) {
34521da177e4SLinus Torvalds 		if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
34531da177e4SLinus Torvalds 		    cur->blocknr == blocknr) {
34541da177e4SLinus Torvalds 			can_dirty = 0;
34551da177e4SLinus Torvalds 		}
34561da177e4SLinus Torvalds 		cur = cur->hprev;
34571da177e4SLinus Torvalds 	}
34581da177e4SLinus Torvalds 	/* then test hnext.  These are all older than cn.  As long as they
34591da177e4SLinus Torvalds 	 ** are committed to the log, it is safe to write cn to disk
34601da177e4SLinus Torvalds 	 */
34611da177e4SLinus Torvalds 	cur = cn->hnext;
34621da177e4SLinus Torvalds 	while (cur && can_dirty) {
34631da177e4SLinus Torvalds 		if (cur->jlist && cur->jlist->j_len > 0 &&
34641da177e4SLinus Torvalds 		    atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
34651da177e4SLinus Torvalds 		    cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
34661da177e4SLinus Torvalds 			can_dirty = 0;
34671da177e4SLinus Torvalds 		}
34681da177e4SLinus Torvalds 		cur = cur->hnext;
34691da177e4SLinus Torvalds 	}
34701da177e4SLinus Torvalds 	return can_dirty;
34711da177e4SLinus Torvalds }
34721da177e4SLinus Torvalds 
34731da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk
34740779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning
34751da177e4SLinus Torvalds */
3476bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th,
3477bd4c625cSLinus Torvalds 		     struct super_block *p_s_sb, unsigned long nblocks)
3478bd4c625cSLinus Torvalds {
34791da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
34801da177e4SLinus Torvalds 
34811da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
34821da177e4SLinus Torvalds 	/* you can sync while nested, very, very bad */
348314a61442SEric Sesterhenn 	BUG_ON(th->t_refcount > 1);
34841da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3485bd4c625cSLinus Torvalds 		reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3486bd4c625cSLinus Torvalds 					     1);
34871da177e4SLinus Torvalds 		journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
34881da177e4SLinus Torvalds 	}
34891da177e4SLinus Torvalds 	return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT);
34901da177e4SLinus Torvalds }
34911da177e4SLinus Torvalds 
34921da177e4SLinus Torvalds /*
34931da177e4SLinus Torvalds ** writeback the pending async commits to disk
34941da177e4SLinus Torvalds */
3495c4028958SDavid Howells static void flush_async_commits(struct work_struct *work)
3496bd4c625cSLinus Torvalds {
3497c4028958SDavid Howells 	struct reiserfs_journal *journal =
3498c4028958SDavid Howells 		container_of(work, struct reiserfs_journal, j_work.work);
3499c4028958SDavid Howells 	struct super_block *p_s_sb = journal->j_work_sb;
35001da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
35011da177e4SLinus Torvalds 	struct list_head *entry;
35021da177e4SLinus Torvalds 
35031da177e4SLinus Torvalds 	lock_kernel();
35041da177e4SLinus Torvalds 	if (!list_empty(&journal->j_journal_list)) {
35051da177e4SLinus Torvalds 		/* last entry is the youngest, commit it and you get everything */
35061da177e4SLinus Torvalds 		entry = journal->j_journal_list.prev;
35071da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
35081da177e4SLinus Torvalds 		flush_commit_list(p_s_sb, jl, 1);
35091da177e4SLinus Torvalds 	}
35101da177e4SLinus Torvalds 	unlock_kernel();
35111da177e4SLinus Torvalds }
35121da177e4SLinus Torvalds 
35131da177e4SLinus Torvalds /*
35141da177e4SLinus Torvalds ** flushes any old transactions to disk
35151da177e4SLinus Torvalds ** ends the current transaction if it is too old
35161da177e4SLinus Torvalds */
3517bd4c625cSLinus Torvalds int reiserfs_flush_old_commits(struct super_block *p_s_sb)
3518bd4c625cSLinus Torvalds {
35191da177e4SLinus Torvalds 	time_t now;
35201da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
35211da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
35221da177e4SLinus Torvalds 
35231da177e4SLinus Torvalds 	now = get_seconds();
35241da177e4SLinus Torvalds 	/* safety check so we don't flush while we are replaying the log during
35251da177e4SLinus Torvalds 	 * mount
35261da177e4SLinus Torvalds 	 */
35271da177e4SLinus Torvalds 	if (list_empty(&journal->j_journal_list)) {
35281da177e4SLinus Torvalds 		return 0;
35291da177e4SLinus Torvalds 	}
35301da177e4SLinus Torvalds 
35311da177e4SLinus Torvalds 	/* check the current transaction.  If there are no writers, and it is
35321da177e4SLinus Torvalds 	 * too old, finish it, and force the commit blocks to disk
35331da177e4SLinus Torvalds 	 */
35341da177e4SLinus Torvalds 	if (atomic_read(&journal->j_wcount) <= 0 &&
35351da177e4SLinus Torvalds 	    journal->j_trans_start_time > 0 &&
35361da177e4SLinus Torvalds 	    journal->j_len > 0 &&
3537bd4c625cSLinus Torvalds 	    (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
35381da177e4SLinus Torvalds 		if (!journal_join(&th, p_s_sb, 1)) {
3539bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(p_s_sb,
3540bd4c625cSLinus Torvalds 						     SB_BUFFER_WITH_SB(p_s_sb),
3541bd4c625cSLinus Torvalds 						     1);
3542bd4c625cSLinus Torvalds 			journal_mark_dirty(&th, p_s_sb,
3543bd4c625cSLinus Torvalds 					   SB_BUFFER_WITH_SB(p_s_sb));
35441da177e4SLinus Torvalds 
35451da177e4SLinus Torvalds 			/* we're only being called from kreiserfsd, it makes no sense to do
35461da177e4SLinus Torvalds 			 ** an async commit so that kreiserfsd can do it later
35471da177e4SLinus Torvalds 			 */
35481da177e4SLinus Torvalds 			do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT);
35491da177e4SLinus Torvalds 		}
35501da177e4SLinus Torvalds 	}
35511da177e4SLinus Torvalds 	return p_s_sb->s_dirt;
35521da177e4SLinus Torvalds }
35531da177e4SLinus Torvalds 
35541da177e4SLinus Torvalds /*
35551da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
35561da177e4SLinus Torvalds **
35571da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
35581da177e4SLinus Torvalds ** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
35591da177e4SLinus Torvalds ** flushes the commit list and returns 0.
35601da177e4SLinus Torvalds **
35611da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
35621da177e4SLinus Torvalds **
35631da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log.
35641da177e4SLinus Torvalds */
3565bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th,
3566bd4c625cSLinus Torvalds 			     struct super_block *p_s_sb, unsigned long nblocks,
3567bd4c625cSLinus Torvalds 			     int flags)
3568bd4c625cSLinus Torvalds {
35691da177e4SLinus Torvalds 
35701da177e4SLinus Torvalds 	time_t now;
35711da177e4SLinus Torvalds 	int flush = flags & FLUSH_ALL;
35721da177e4SLinus Torvalds 	int commit_now = flags & COMMIT_NOW;
35731da177e4SLinus Torvalds 	int wait_on_commit = flags & WAIT;
35741da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
35751da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
35761da177e4SLinus Torvalds 
35771da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
35781da177e4SLinus Torvalds 
35791da177e4SLinus Torvalds 	if (th->t_trans_id != journal->j_trans_id) {
3580bd4c625cSLinus Torvalds 		reiserfs_panic(th->t_super,
3581bd4c625cSLinus Torvalds 			       "journal-1577: handle trans id %ld != current trans id %ld\n",
35821da177e4SLinus Torvalds 			       th->t_trans_id, journal->j_trans_id);
35831da177e4SLinus Torvalds 	}
35841da177e4SLinus Torvalds 
35851da177e4SLinus Torvalds 	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
35861da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {	/* <= 0 is allowed.  unmounting might not call begin */
35871da177e4SLinus Torvalds 		atomic_dec(&(journal->j_wcount));
35881da177e4SLinus Torvalds 	}
35891da177e4SLinus Torvalds 
35901da177e4SLinus Torvalds 	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
35911da177e4SLinus Torvalds 	 ** will be dealt with by next transaction that actually writes something, but should be taken
35921da177e4SLinus Torvalds 	 ** care of in this trans
35931da177e4SLinus Torvalds 	 */
359414a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
359514a61442SEric Sesterhenn 
35961da177e4SLinus Torvalds 	/* if wcount > 0, and we are called to with flush or commit_now,
35971da177e4SLinus Torvalds 	 ** we wait on j_join_wait.  We will wake up when the last writer has
35981da177e4SLinus Torvalds 	 ** finished the transaction, and started it on its way to the disk.
35991da177e4SLinus Torvalds 	 ** Then, we flush the commit or journal list, and just return 0
36001da177e4SLinus Torvalds 	 ** because the rest of journal end was already done for this transaction.
36011da177e4SLinus Torvalds 	 */
36021da177e4SLinus Torvalds 	if (atomic_read(&(journal->j_wcount)) > 0) {
36031da177e4SLinus Torvalds 		if (flush || commit_now) {
36041da177e4SLinus Torvalds 			unsigned trans_id;
36051da177e4SLinus Torvalds 
36061da177e4SLinus Torvalds 			jl = journal->j_current_jl;
36071da177e4SLinus Torvalds 			trans_id = jl->j_trans_id;
36081da177e4SLinus Torvalds 			if (wait_on_commit)
36091da177e4SLinus Torvalds 				jl->j_state |= LIST_COMMIT_PENDING;
36101da177e4SLinus Torvalds 			atomic_set(&(journal->j_jlock), 1);
36111da177e4SLinus Torvalds 			if (flush) {
36121da177e4SLinus Torvalds 				journal->j_next_full_flush = 1;
36131da177e4SLinus Torvalds 			}
36141da177e4SLinus Torvalds 			unlock_journal(p_s_sb);
36151da177e4SLinus Torvalds 
36161da177e4SLinus Torvalds 			/* sleep while the current transaction is still j_jlocked */
36171da177e4SLinus Torvalds 			while (journal->j_trans_id == trans_id) {
36181da177e4SLinus Torvalds 				if (atomic_read(&journal->j_jlock)) {
36191da177e4SLinus Torvalds 					queue_log_writer(p_s_sb);
36201da177e4SLinus Torvalds 				} else {
36211da177e4SLinus Torvalds 					lock_journal(p_s_sb);
36221da177e4SLinus Torvalds 					if (journal->j_trans_id == trans_id) {
3623bd4c625cSLinus Torvalds 						atomic_set(&(journal->j_jlock),
3624bd4c625cSLinus Torvalds 							   1);
36251da177e4SLinus Torvalds 					}
36261da177e4SLinus Torvalds 					unlock_journal(p_s_sb);
36271da177e4SLinus Torvalds 				}
36281da177e4SLinus Torvalds 			}
362914a61442SEric Sesterhenn 			BUG_ON(journal->j_trans_id == trans_id);
363014a61442SEric Sesterhenn 
3631bd4c625cSLinus Torvalds 			if (commit_now
3632bd4c625cSLinus Torvalds 			    && journal_list_still_alive(p_s_sb, trans_id)
3633bd4c625cSLinus Torvalds 			    && wait_on_commit) {
36341da177e4SLinus Torvalds 				flush_commit_list(p_s_sb, jl, 1);
36351da177e4SLinus Torvalds 			}
36361da177e4SLinus Torvalds 			return 0;
36371da177e4SLinus Torvalds 		}
36381da177e4SLinus Torvalds 		unlock_journal(p_s_sb);
36391da177e4SLinus Torvalds 		return 0;
36401da177e4SLinus Torvalds 	}
36411da177e4SLinus Torvalds 
36421da177e4SLinus Torvalds 	/* deal with old transactions where we are the last writers */
36431da177e4SLinus Torvalds 	now = get_seconds();
36441da177e4SLinus Torvalds 	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
36451da177e4SLinus Torvalds 		commit_now = 1;
36461da177e4SLinus Torvalds 		journal->j_next_async_flush = 1;
36471da177e4SLinus Torvalds 	}
36481da177e4SLinus Torvalds 	/* don't batch when someone is waiting on j_join_wait */
36491da177e4SLinus Torvalds 	/* don't batch when syncing the commit or flushing the whole trans */
3650bd4c625cSLinus Torvalds 	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3651bd4c625cSLinus Torvalds 	    && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3652bd4c625cSLinus Torvalds 	    && journal->j_len_alloc < journal->j_max_batch
3653bd4c625cSLinus Torvalds 	    && journal->j_cnode_free > (journal->j_trans_max * 3)) {
36541da177e4SLinus Torvalds 		journal->j_bcount++;
36551da177e4SLinus Torvalds 		unlock_journal(p_s_sb);
36561da177e4SLinus Torvalds 		return 0;
36571da177e4SLinus Torvalds 	}
36581da177e4SLinus Torvalds 
36591da177e4SLinus Torvalds 	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3660bd4c625cSLinus Torvalds 		reiserfs_panic(p_s_sb,
3661bd4c625cSLinus Torvalds 			       "journal-003: journal_end: j_start (%ld) is too high\n",
3662bd4c625cSLinus Torvalds 			       journal->j_start);
36631da177e4SLinus Torvalds 	}
36641da177e4SLinus Torvalds 	return 1;
36651da177e4SLinus Torvalds }
36661da177e4SLinus Torvalds 
36671da177e4SLinus Torvalds /*
36681da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe.
36691da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
36701da177e4SLinus Torvalds **
36711da177e4SLinus Torvalds ** otherwise:
36721da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
36731da177e4SLinus Torvalds ** before this transaction has finished.
36741da177e4SLinus Torvalds **
36751da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
36761da177e4SLinus Torvalds ** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
36771da177e4SLinus Torvalds ** the block can't be reallocated yet.
36781da177e4SLinus Torvalds **
36791da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
36801da177e4SLinus Torvalds */
3681bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th,
3682bd4c625cSLinus Torvalds 		       struct super_block *p_s_sb, b_blocknr_t blocknr)
3683bd4c625cSLinus Torvalds {
36841da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
36851da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn = NULL;
36861da177e4SLinus Torvalds 	struct buffer_head *bh = NULL;
36871da177e4SLinus Torvalds 	struct reiserfs_list_bitmap *jb = NULL;
36881da177e4SLinus Torvalds 	int cleaned = 0;
36891da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
36901da177e4SLinus Torvalds 
36911da177e4SLinus Torvalds 	cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
36921da177e4SLinus Torvalds 	if (cn && cn->bh) {
36931da177e4SLinus Torvalds 		bh = cn->bh;
36941da177e4SLinus Torvalds 		get_bh(bh);
36951da177e4SLinus Torvalds 	}
36961da177e4SLinus Torvalds 	/* if it is journal new, we just remove it from this transaction */
36971da177e4SLinus Torvalds 	if (bh && buffer_journal_new(bh)) {
36981da177e4SLinus Torvalds 		clear_buffer_journal_new(bh);
36991da177e4SLinus Torvalds 		clear_prepared_bits(bh);
37001da177e4SLinus Torvalds 		reiserfs_clean_and_file_buffer(bh);
37011da177e4SLinus Torvalds 		cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
37021da177e4SLinus Torvalds 	} else {
37031da177e4SLinus Torvalds 		/* set the bit for this block in the journal bitmap for this transaction */
37041da177e4SLinus Torvalds 		jb = journal->j_current_jl->j_list_bitmap;
37051da177e4SLinus Torvalds 		if (!jb) {
3706bd4c625cSLinus Torvalds 			reiserfs_panic(p_s_sb,
3707bd4c625cSLinus Torvalds 				       "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n");
37081da177e4SLinus Torvalds 		}
37091da177e4SLinus Torvalds 		set_bit_in_list_bitmap(p_s_sb, blocknr, jb);
37101da177e4SLinus Torvalds 
37111da177e4SLinus Torvalds 		/* Note, the entire while loop is not allowed to schedule.  */
37121da177e4SLinus Torvalds 
37131da177e4SLinus Torvalds 		if (bh) {
37141da177e4SLinus Torvalds 			clear_prepared_bits(bh);
37151da177e4SLinus Torvalds 			reiserfs_clean_and_file_buffer(bh);
37161da177e4SLinus Torvalds 		}
37171da177e4SLinus Torvalds 		cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
37181da177e4SLinus Torvalds 
37191da177e4SLinus Torvalds 		/* find all older transactions with this block, make sure they don't try to write it out */
3720bd4c625cSLinus Torvalds 		cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table,
3721bd4c625cSLinus Torvalds 					  blocknr);
37221da177e4SLinus Torvalds 		while (cn) {
37231da177e4SLinus Torvalds 			if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
37241da177e4SLinus Torvalds 				set_bit(BLOCK_FREED, &cn->state);
37251da177e4SLinus Torvalds 				if (cn->bh) {
37261da177e4SLinus Torvalds 					if (!cleaned) {
37271da177e4SLinus Torvalds 						/* remove_from_transaction will brelse the buffer if it was
37281da177e4SLinus Torvalds 						 ** in the current trans
37291da177e4SLinus Torvalds 						 */
3730bd4c625cSLinus Torvalds 						clear_buffer_journal_dirty(cn->
3731bd4c625cSLinus Torvalds 									   bh);
37321da177e4SLinus Torvalds 						clear_buffer_dirty(cn->bh);
3733bd4c625cSLinus Torvalds 						clear_buffer_journal_test(cn->
3734bd4c625cSLinus Torvalds 									  bh);
37351da177e4SLinus Torvalds 						cleaned = 1;
37361da177e4SLinus Torvalds 						put_bh(cn->bh);
3737bd4c625cSLinus Torvalds 						if (atomic_read
3738bd4c625cSLinus Torvalds 						    (&(cn->bh->b_count)) < 0) {
3739bd4c625cSLinus Torvalds 							reiserfs_warning(p_s_sb,
3740bd4c625cSLinus Torvalds 									 "journal-2138: cn->bh->b_count < 0");
37411da177e4SLinus Torvalds 						}
37421da177e4SLinus Torvalds 					}
37431da177e4SLinus Torvalds 					if (cn->jlist) {	/* since we are clearing the bh, we MUST dec nonzerolen */
3744bd4c625cSLinus Torvalds 						atomic_dec(&
3745bd4c625cSLinus Torvalds 							   (cn->jlist->
3746bd4c625cSLinus Torvalds 							    j_nonzerolen));
37471da177e4SLinus Torvalds 					}
37481da177e4SLinus Torvalds 					cn->bh = NULL;
37491da177e4SLinus Torvalds 				}
37501da177e4SLinus Torvalds 			}
37511da177e4SLinus Torvalds 			cn = cn->hnext;
37521da177e4SLinus Torvalds 		}
37531da177e4SLinus Torvalds 	}
37541da177e4SLinus Torvalds 
3755398c95bdSChris Mason 	if (bh)
3756398c95bdSChris Mason 		release_buffer_page(bh); /* get_hash grabs the buffer */
37571da177e4SLinus Torvalds 	return 0;
37581da177e4SLinus Torvalds }
37591da177e4SLinus Torvalds 
3760bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode)
3761bd4c625cSLinus Torvalds {
37621da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
37631da177e4SLinus Torvalds 	REISERFS_I(inode)->i_jl = journal->j_current_jl;
37641da177e4SLinus Torvalds 	REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
37651da177e4SLinus Torvalds }
37661da177e4SLinus Torvalds 
37671da177e4SLinus Torvalds /*
37681da177e4SLinus Torvalds  * returns -1 on error, 0 if no commits/barriers were done and 1
37691da177e4SLinus Torvalds  * if a transaction was actually committed and the barrier was done
37701da177e4SLinus Torvalds  */
37711da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id,
37721da177e4SLinus Torvalds 			     struct reiserfs_journal_list *jl)
37731da177e4SLinus Torvalds {
37741da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
37751da177e4SLinus Torvalds 	struct super_block *sb = inode->i_sb;
37761da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
37771da177e4SLinus Torvalds 	int ret = 0;
37781da177e4SLinus Torvalds 
37791da177e4SLinus Torvalds 	/* is it from the current transaction, or from an unknown transaction? */
37801da177e4SLinus Torvalds 	if (id == journal->j_trans_id) {
37811da177e4SLinus Torvalds 		jl = journal->j_current_jl;
37821da177e4SLinus Torvalds 		/* try to let other writers come in and grow this transaction */
37831da177e4SLinus Torvalds 		let_transaction_grow(sb, id);
37841da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
37851da177e4SLinus Torvalds 			goto flush_commit_only;
37861da177e4SLinus Torvalds 		}
37871da177e4SLinus Torvalds 
37881da177e4SLinus Torvalds 		ret = journal_begin(&th, sb, 1);
37891da177e4SLinus Torvalds 		if (ret)
37901da177e4SLinus Torvalds 			return ret;
37911da177e4SLinus Torvalds 
37921da177e4SLinus Torvalds 		/* someone might have ended this transaction while we joined */
37931da177e4SLinus Torvalds 		if (journal->j_trans_id != id) {
3794bd4c625cSLinus Torvalds 			reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3795bd4c625cSLinus Torvalds 						     1);
37961da177e4SLinus Torvalds 			journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
37971da177e4SLinus Torvalds 			ret = journal_end(&th, sb, 1);
37981da177e4SLinus Torvalds 			goto flush_commit_only;
37991da177e4SLinus Torvalds 		}
38001da177e4SLinus Torvalds 
38011da177e4SLinus Torvalds 		ret = journal_end_sync(&th, sb, 1);
38021da177e4SLinus Torvalds 		if (!ret)
38031da177e4SLinus Torvalds 			ret = 1;
38041da177e4SLinus Torvalds 
38051da177e4SLinus Torvalds 	} else {
38061da177e4SLinus Torvalds 		/* this gets tricky, we have to make sure the journal list in
38071da177e4SLinus Torvalds 		 * the inode still exists.  We know the list is still around
38081da177e4SLinus Torvalds 		 * if we've got a larger transaction id than the oldest list
38091da177e4SLinus Torvalds 		 */
38101da177e4SLinus Torvalds 	      flush_commit_only:
38111da177e4SLinus Torvalds 		if (journal_list_still_alive(inode->i_sb, id)) {
38121da177e4SLinus Torvalds 			/*
38131da177e4SLinus Torvalds 			 * we only set ret to 1 when we know for sure
38141da177e4SLinus Torvalds 			 * the barrier hasn't been started yet on the commit
38151da177e4SLinus Torvalds 			 * block.
38161da177e4SLinus Torvalds 			 */
38171da177e4SLinus Torvalds 			if (atomic_read(&jl->j_commit_left) > 1)
38181da177e4SLinus Torvalds 				ret = 1;
38191da177e4SLinus Torvalds 			flush_commit_list(sb, jl, 1);
38201da177e4SLinus Torvalds 			if (journal->j_errno)
38211da177e4SLinus Torvalds 				ret = journal->j_errno;
38221da177e4SLinus Torvalds 		}
38231da177e4SLinus Torvalds 	}
38241da177e4SLinus Torvalds 	/* otherwise the list is gone, and long since committed */
38251da177e4SLinus Torvalds 	return ret;
38261da177e4SLinus Torvalds }
38271da177e4SLinus Torvalds 
3828bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode)
3829bd4c625cSLinus Torvalds {
38301da177e4SLinus Torvalds 	unsigned long id = REISERFS_I(inode)->i_trans_id;
38311da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
38321da177e4SLinus Torvalds 
38331da177e4SLinus Torvalds 	/* for the whole inode, assume unset id means it was
38341da177e4SLinus Torvalds 	 * changed in the current transaction.  More conservative
38351da177e4SLinus Torvalds 	 */
38361da177e4SLinus Torvalds 	if (!id || !jl) {
38371da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
38381da177e4SLinus Torvalds 		id = REISERFS_I(inode)->i_trans_id;
38391da177e4SLinus Torvalds 		/* jl will be updated in __commit_trans_jl */
38401da177e4SLinus Torvalds 	}
38411da177e4SLinus Torvalds 
38421da177e4SLinus Torvalds 	return __commit_trans_jl(inode, id, jl);
38431da177e4SLinus Torvalds }
38441da177e4SLinus Torvalds 
38451da177e4SLinus Torvalds void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3846bd4c625cSLinus Torvalds 				      struct buffer_head *bh)
3847bd4c625cSLinus Torvalds {
38481da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
38491da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.restore_prepared);
38501da177e4SLinus Torvalds 	if (!bh) {
38511da177e4SLinus Torvalds 		return;
38521da177e4SLinus Torvalds 	}
38531da177e4SLinus Torvalds 	if (test_clear_buffer_journal_restore_dirty(bh) &&
38541da177e4SLinus Torvalds 	    buffer_journal_dirty(bh)) {
38551da177e4SLinus Torvalds 		struct reiserfs_journal_cnode *cn;
38561da177e4SLinus Torvalds 		cn = get_journal_hash_dev(p_s_sb,
38571da177e4SLinus Torvalds 					  journal->j_list_hash_table,
38581da177e4SLinus Torvalds 					  bh->b_blocknr);
38591da177e4SLinus Torvalds 		if (cn && can_dirty(cn)) {
38601da177e4SLinus Torvalds 			set_buffer_journal_test(bh);
38611da177e4SLinus Torvalds 			mark_buffer_dirty(bh);
38621da177e4SLinus Torvalds 		}
38631da177e4SLinus Torvalds 	}
38641da177e4SLinus Torvalds 	clear_buffer_journal_prepared(bh);
38651da177e4SLinus Torvalds }
38661da177e4SLinus Torvalds 
38671da177e4SLinus Torvalds extern struct tree_balance *cur_tb;
38681da177e4SLinus Torvalds /*
38691da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't
38701da177e4SLinus Torvalds ** be written to disk while we are altering it.  So, we must:
38711da177e4SLinus Torvalds ** clean it
38721da177e4SLinus Torvalds ** wait on it.
38731da177e4SLinus Torvalds **
38741da177e4SLinus Torvalds */
38751da177e4SLinus Torvalds int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3876bd4c625cSLinus Torvalds 				 struct buffer_head *bh, int wait)
3877bd4c625cSLinus Torvalds {
38781da177e4SLinus Torvalds 	PROC_INFO_INC(p_s_sb, journal.prepare);
38791da177e4SLinus Torvalds 
38801da177e4SLinus Torvalds 	if (test_set_buffer_locked(bh)) {
38811da177e4SLinus Torvalds 		if (!wait)
38821da177e4SLinus Torvalds 			return 0;
38831da177e4SLinus Torvalds 		lock_buffer(bh);
38841da177e4SLinus Torvalds 	}
38851da177e4SLinus Torvalds 	set_buffer_journal_prepared(bh);
38861da177e4SLinus Torvalds 	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
38871da177e4SLinus Torvalds 		clear_buffer_journal_test(bh);
38881da177e4SLinus Torvalds 		set_buffer_journal_restore_dirty(bh);
38891da177e4SLinus Torvalds 	}
38901da177e4SLinus Torvalds 	unlock_buffer(bh);
38911da177e4SLinus Torvalds 	return 1;
38921da177e4SLinus Torvalds }
38931da177e4SLinus Torvalds 
3894bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s)
3895bd4c625cSLinus Torvalds {
38961da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(s);
38971da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl;
38981da177e4SLinus Torvalds 	struct list_head *entry;
38991da177e4SLinus Torvalds 	time_t now = get_seconds();
39001da177e4SLinus Torvalds 
39011da177e4SLinus Torvalds 	while (!list_empty(&journal->j_journal_list)) {
39021da177e4SLinus Torvalds 		entry = journal->j_journal_list.next;
39031da177e4SLinus Torvalds 		jl = JOURNAL_LIST_ENTRY(entry);
39041da177e4SLinus Torvalds 		/* this check should always be run, to send old lists to disk */
3905a3172027SChris Mason 		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
3906a3172027SChris Mason 		    atomic_read(&jl->j_commit_left) == 0 &&
3907a3172027SChris Mason 		    test_transaction(s, jl)) {
39081da177e4SLinus Torvalds 			flush_used_journal_lists(s, jl);
39091da177e4SLinus Torvalds 		} else {
39101da177e4SLinus Torvalds 			break;
39111da177e4SLinus Torvalds 		}
39121da177e4SLinus Torvalds 	}
39131da177e4SLinus Torvalds }
39141da177e4SLinus Torvalds 
39151da177e4SLinus Torvalds /*
39161da177e4SLinus Torvalds ** long and ugly.  If flush, will not return until all commit
39171da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk.
39181da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk.
39191da177e4SLinus Torvalds **
39201da177e4SLinus Torvalds ** keep reading, there are comments as you go along
39211da177e4SLinus Torvalds **
39221da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing
39231da177e4SLinus Torvalds ** journal lists, etc just won't happen.
39241da177e4SLinus Torvalds */
3925bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th,
3926bd4c625cSLinus Torvalds 			  struct super_block *p_s_sb, unsigned long nblocks,
3927bd4c625cSLinus Torvalds 			  int flags)
3928bd4c625cSLinus Torvalds {
39291da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
39301da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *cn, *next, *jl_cn;
39311da177e4SLinus Torvalds 	struct reiserfs_journal_cnode *last_cn = NULL;
39321da177e4SLinus Torvalds 	struct reiserfs_journal_desc *desc;
39331da177e4SLinus Torvalds 	struct reiserfs_journal_commit *commit;
39341da177e4SLinus Torvalds 	struct buffer_head *c_bh;	/* commit bh */
39351da177e4SLinus Torvalds 	struct buffer_head *d_bh;	/* desc bh */
39361da177e4SLinus Torvalds 	int cur_write_start = 0;	/* start index of current log write */
39371da177e4SLinus Torvalds 	int old_start;
39381da177e4SLinus Torvalds 	int i;
3939a44c94a7SAlexander Zarochentsev 	int flush;
3940a44c94a7SAlexander Zarochentsev 	int wait_on_commit;
39411da177e4SLinus Torvalds 	struct reiserfs_journal_list *jl, *temp_jl;
39421da177e4SLinus Torvalds 	struct list_head *entry, *safe;
39431da177e4SLinus Torvalds 	unsigned long jindex;
39441da177e4SLinus Torvalds 	unsigned long commit_trans_id;
39451da177e4SLinus Torvalds 	int trans_half;
39461da177e4SLinus Torvalds 
39471da177e4SLinus Torvalds 	BUG_ON(th->t_refcount > 1);
39481da177e4SLinus Torvalds 	BUG_ON(!th->t_trans_id);
39491da177e4SLinus Torvalds 
3950a44c94a7SAlexander Zarochentsev 	/* protect flush_older_commits from doing mistakes if the
3951a44c94a7SAlexander Zarochentsev            transaction ID counter gets overflowed.  */
3952a44c94a7SAlexander Zarochentsev 	if (th->t_trans_id == ~0UL)
3953a44c94a7SAlexander Zarochentsev 		flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3954a44c94a7SAlexander Zarochentsev 	flush = flags & FLUSH_ALL;
3955a44c94a7SAlexander Zarochentsev 	wait_on_commit = flags & WAIT;
3956a44c94a7SAlexander Zarochentsev 
395722e2c507SJens Axboe 	put_fs_excl();
39581da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
39591da177e4SLinus Torvalds 	reiserfs_check_lock_depth(p_s_sb, "journal end");
39601da177e4SLinus Torvalds 	if (journal->j_len == 0) {
3961bd4c625cSLinus Torvalds 		reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3962bd4c625cSLinus Torvalds 					     1);
39631da177e4SLinus Torvalds 		journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
39641da177e4SLinus Torvalds 	}
39651da177e4SLinus Torvalds 
39661da177e4SLinus Torvalds 	lock_journal(p_s_sb);
39671da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39681da177e4SLinus Torvalds 		flags |= FLUSH_ALL;
39691da177e4SLinus Torvalds 		flush = 1;
39701da177e4SLinus Torvalds 	}
39711da177e4SLinus Torvalds 	if (journal->j_next_async_flush) {
39721da177e4SLinus Torvalds 		flags |= COMMIT_NOW | WAIT;
39731da177e4SLinus Torvalds 		wait_on_commit = 1;
39741da177e4SLinus Torvalds 	}
39751da177e4SLinus Torvalds 
39761da177e4SLinus Torvalds 	/* check_journal_end locks the journal, and unlocks if it does not return 1
39771da177e4SLinus Torvalds 	 ** it tells us if we should continue with the journal_end, or just return
39781da177e4SLinus Torvalds 	 */
39791da177e4SLinus Torvalds 	if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
39801da177e4SLinus Torvalds 		p_s_sb->s_dirt = 1;
39811da177e4SLinus Torvalds 		wake_queued_writers(p_s_sb);
39821da177e4SLinus Torvalds 		reiserfs_async_progress_wait(p_s_sb);
39831da177e4SLinus Torvalds 		goto out;
39841da177e4SLinus Torvalds 	}
39851da177e4SLinus Torvalds 
39861da177e4SLinus Torvalds 	/* check_journal_end might set these, check again */
39871da177e4SLinus Torvalds 	if (journal->j_next_full_flush) {
39881da177e4SLinus Torvalds 		flush = 1;
39891da177e4SLinus Torvalds 	}
39901da177e4SLinus Torvalds 
39911da177e4SLinus Torvalds 	/*
39921da177e4SLinus Torvalds 	 ** j must wait means we have to flush the log blocks, and the real blocks for
39931da177e4SLinus Torvalds 	 ** this transaction
39941da177e4SLinus Torvalds 	 */
39951da177e4SLinus Torvalds 	if (journal->j_must_wait > 0) {
39961da177e4SLinus Torvalds 		flush = 1;
39971da177e4SLinus Torvalds 	}
39981da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
3999ef43bc4fSJan Kara 	/* quota ops might need to nest, setup the journal_info pointer for them
4000ef43bc4fSJan Kara 	 * and raise the refcount so that it is > 0. */
40011da177e4SLinus Torvalds 	current->journal_info = th;
4002ef43bc4fSJan Kara 	th->t_refcount++;
40031da177e4SLinus Torvalds 	reiserfs_discard_all_prealloc(th);	/* it should not involve new blocks into
40041da177e4SLinus Torvalds 						 * the transaction */
4005ef43bc4fSJan Kara 	th->t_refcount--;
40061da177e4SLinus Torvalds 	current->journal_info = th->t_handle_save;
40071da177e4SLinus Torvalds #endif
40081da177e4SLinus Torvalds 
40091da177e4SLinus Torvalds 	/* setup description block */
4010bd4c625cSLinus Torvalds 	d_bh =
4011bd4c625cSLinus Torvalds 	    journal_getblk(p_s_sb,
4012bd4c625cSLinus Torvalds 			   SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
4013bd4c625cSLinus Torvalds 			   journal->j_start);
40141da177e4SLinus Torvalds 	set_buffer_uptodate(d_bh);
40151da177e4SLinus Torvalds 	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
40161da177e4SLinus Torvalds 	memset(d_bh->b_data, 0, d_bh->b_size);
40171da177e4SLinus Torvalds 	memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
40181da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
40191da177e4SLinus Torvalds 
40201da177e4SLinus Torvalds 	/* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
40211da177e4SLinus Torvalds 	c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
4022bd4c625cSLinus Torvalds 			      ((journal->j_start + journal->j_len +
4023bd4c625cSLinus Torvalds 				1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
40241da177e4SLinus Torvalds 	commit = (struct reiserfs_journal_commit *)c_bh->b_data;
40251da177e4SLinus Torvalds 	memset(c_bh->b_data, 0, c_bh->b_size);
40261da177e4SLinus Torvalds 	set_commit_trans_id(commit, journal->j_trans_id);
40271da177e4SLinus Torvalds 	set_buffer_uptodate(c_bh);
40281da177e4SLinus Torvalds 
40291da177e4SLinus Torvalds 	/* init this journal list */
40301da177e4SLinus Torvalds 	jl = journal->j_current_jl;
40311da177e4SLinus Torvalds 
40321da177e4SLinus Torvalds 	/* we lock the commit before doing anything because
40331da177e4SLinus Torvalds 	 * we want to make sure nobody tries to run flush_commit_list until
40341da177e4SLinus Torvalds 	 * the new transaction is fully setup, and we've already flushed the
40351da177e4SLinus Torvalds 	 * ordered bh list
40361da177e4SLinus Torvalds 	 */
40371da177e4SLinus Torvalds 	down(&jl->j_commit_lock);
40381da177e4SLinus Torvalds 
40391da177e4SLinus Torvalds 	/* save the transaction id in case we need to commit it later */
40401da177e4SLinus Torvalds 	commit_trans_id = jl->j_trans_id;
40411da177e4SLinus Torvalds 
40421da177e4SLinus Torvalds 	atomic_set(&jl->j_older_commits_done, 0);
40431da177e4SLinus Torvalds 	jl->j_trans_id = journal->j_trans_id;
40441da177e4SLinus Torvalds 	jl->j_timestamp = journal->j_trans_start_time;
40451da177e4SLinus Torvalds 	jl->j_commit_bh = c_bh;
40461da177e4SLinus Torvalds 	jl->j_start = journal->j_start;
40471da177e4SLinus Torvalds 	jl->j_len = journal->j_len;
40481da177e4SLinus Torvalds 	atomic_set(&jl->j_nonzerolen, journal->j_len);
40491da177e4SLinus Torvalds 	atomic_set(&jl->j_commit_left, journal->j_len + 2);
40501da177e4SLinus Torvalds 	jl->j_realblock = NULL;
40511da177e4SLinus Torvalds 
40521da177e4SLinus Torvalds 	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
40531da177e4SLinus Torvalds 	 **  for each real block, add it to the journal list hash,
40541da177e4SLinus Torvalds 	 ** copy into real block index array in the commit or desc block
40551da177e4SLinus Torvalds 	 */
40561da177e4SLinus Torvalds 	trans_half = journal_trans_half(p_s_sb->s_blocksize);
40571da177e4SLinus Torvalds 	for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
40581da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
40591da177e4SLinus Torvalds 			jl_cn = get_cnode(p_s_sb);
40601da177e4SLinus Torvalds 			if (!jl_cn) {
4061bd4c625cSLinus Torvalds 				reiserfs_panic(p_s_sb,
4062bd4c625cSLinus Torvalds 					       "journal-1676, get_cnode returned NULL\n");
40631da177e4SLinus Torvalds 			}
40641da177e4SLinus Torvalds 			if (i == 0) {
40651da177e4SLinus Torvalds 				jl->j_realblock = jl_cn;
40661da177e4SLinus Torvalds 			}
40671da177e4SLinus Torvalds 			jl_cn->prev = last_cn;
40681da177e4SLinus Torvalds 			jl_cn->next = NULL;
40691da177e4SLinus Torvalds 			if (last_cn) {
40701da177e4SLinus Torvalds 				last_cn->next = jl_cn;
40711da177e4SLinus Torvalds 			}
40721da177e4SLinus Torvalds 			last_cn = jl_cn;
40731da177e4SLinus Torvalds 			/* make sure the block we are trying to log is not a block
40741da177e4SLinus Torvalds 			   of journal or reserved area */
40751da177e4SLinus Torvalds 
4076bd4c625cSLinus Torvalds 			if (is_block_in_log_or_reserved_area
4077bd4c625cSLinus Torvalds 			    (p_s_sb, cn->bh->b_blocknr)) {
4078bd4c625cSLinus Torvalds 				reiserfs_panic(p_s_sb,
4079bd4c625cSLinus Torvalds 					       "journal-2332: Trying to log block %lu, which is a log block\n",
4080bd4c625cSLinus Torvalds 					       cn->bh->b_blocknr);
40811da177e4SLinus Torvalds 			}
40821da177e4SLinus Torvalds 			jl_cn->blocknr = cn->bh->b_blocknr;
40831da177e4SLinus Torvalds 			jl_cn->state = 0;
40841da177e4SLinus Torvalds 			jl_cn->sb = p_s_sb;
40851da177e4SLinus Torvalds 			jl_cn->bh = cn->bh;
40861da177e4SLinus Torvalds 			jl_cn->jlist = jl;
40871da177e4SLinus Torvalds 			insert_journal_hash(journal->j_list_hash_table, jl_cn);
40881da177e4SLinus Torvalds 			if (i < trans_half) {
4089bd4c625cSLinus Torvalds 				desc->j_realblock[i] =
4090bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40911da177e4SLinus Torvalds 			} else {
4092bd4c625cSLinus Torvalds 				commit->j_realblock[i - trans_half] =
4093bd4c625cSLinus Torvalds 				    cpu_to_le32(cn->bh->b_blocknr);
40941da177e4SLinus Torvalds 			}
40951da177e4SLinus Torvalds 		} else {
40961da177e4SLinus Torvalds 			i--;
40971da177e4SLinus Torvalds 		}
40981da177e4SLinus Torvalds 	}
40991da177e4SLinus Torvalds 	set_desc_trans_len(desc, journal->j_len);
41001da177e4SLinus Torvalds 	set_desc_mount_id(desc, journal->j_mount_id);
41011da177e4SLinus Torvalds 	set_desc_trans_id(desc, journal->j_trans_id);
41021da177e4SLinus Torvalds 	set_commit_trans_len(commit, journal->j_len);
41031da177e4SLinus Torvalds 
41041da177e4SLinus Torvalds 	/* special check in case all buffers in the journal were marked for not logging */
410514a61442SEric Sesterhenn 	BUG_ON(journal->j_len == 0);
41061da177e4SLinus Torvalds 
41071da177e4SLinus Torvalds 	/* we're about to dirty all the log blocks, mark the description block
41081da177e4SLinus Torvalds 	 * dirty now too.  Don't mark the commit block dirty until all the
41091da177e4SLinus Torvalds 	 * others are on disk
41101da177e4SLinus Torvalds 	 */
41111da177e4SLinus Torvalds 	mark_buffer_dirty(d_bh);
41121da177e4SLinus Torvalds 
41131da177e4SLinus Torvalds 	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
41141da177e4SLinus Torvalds 	cur_write_start = journal->j_start;
41151da177e4SLinus Torvalds 	cn = journal->j_first;
41161da177e4SLinus Torvalds 	jindex = 1;		/* start at one so we don't get the desc again */
41171da177e4SLinus Torvalds 	while (cn) {
41181da177e4SLinus Torvalds 		clear_buffer_journal_new(cn->bh);
41191da177e4SLinus Torvalds 		/* copy all the real blocks into log area.  dirty log blocks */
41201da177e4SLinus Torvalds 		if (buffer_journaled(cn->bh)) {
41211da177e4SLinus Torvalds 			struct buffer_head *tmp_bh;
41221da177e4SLinus Torvalds 			char *addr;
41231da177e4SLinus Torvalds 			struct page *page;
4124bd4c625cSLinus Torvalds 			tmp_bh =
4125bd4c625cSLinus Torvalds 			    journal_getblk(p_s_sb,
4126bd4c625cSLinus Torvalds 					   SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
4127bd4c625cSLinus Torvalds 					   ((cur_write_start +
4128bd4c625cSLinus Torvalds 					     jindex) %
4129bd4c625cSLinus Torvalds 					    SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
41301da177e4SLinus Torvalds 			set_buffer_uptodate(tmp_bh);
41311da177e4SLinus Torvalds 			page = cn->bh->b_page;
41321da177e4SLinus Torvalds 			addr = kmap(page);
4133bd4c625cSLinus Torvalds 			memcpy(tmp_bh->b_data,
4134bd4c625cSLinus Torvalds 			       addr + offset_in_page(cn->bh->b_data),
41351da177e4SLinus Torvalds 			       cn->bh->b_size);
41361da177e4SLinus Torvalds 			kunmap(page);
41371da177e4SLinus Torvalds 			mark_buffer_dirty(tmp_bh);
41381da177e4SLinus Torvalds 			jindex++;
41391da177e4SLinus Torvalds 			set_buffer_journal_dirty(cn->bh);
41401da177e4SLinus Torvalds 			clear_buffer_journaled(cn->bh);
41411da177e4SLinus Torvalds 		} else {
41421da177e4SLinus Torvalds 			/* JDirty cleared sometime during transaction.  don't log this one */
4143bd4c625cSLinus Torvalds 			reiserfs_warning(p_s_sb,
4144bd4c625cSLinus Torvalds 					 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!");
41451da177e4SLinus Torvalds 			brelse(cn->bh);
41461da177e4SLinus Torvalds 		}
41471da177e4SLinus Torvalds 		next = cn->next;
41481da177e4SLinus Torvalds 		free_cnode(p_s_sb, cn);
41491da177e4SLinus Torvalds 		cn = next;
41501da177e4SLinus Torvalds 		cond_resched();
41511da177e4SLinus Torvalds 	}
41521da177e4SLinus Torvalds 
41531da177e4SLinus Torvalds 	/* we are done  with both the c_bh and d_bh, but
41541da177e4SLinus Torvalds 	 ** c_bh must be written after all other commit blocks,
41551da177e4SLinus Torvalds 	 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
41561da177e4SLinus Torvalds 	 */
41571da177e4SLinus Torvalds 
41581da177e4SLinus Torvalds 	journal->j_current_jl = alloc_journal_list(p_s_sb);
41591da177e4SLinus Torvalds 
41601da177e4SLinus Torvalds 	/* now it is safe to insert this transaction on the main list */
41611da177e4SLinus Torvalds 	list_add_tail(&jl->j_list, &journal->j_journal_list);
41621da177e4SLinus Torvalds 	list_add_tail(&jl->j_working_list, &journal->j_working_list);
41631da177e4SLinus Torvalds 	journal->j_num_work_lists++;
41641da177e4SLinus Torvalds 
41651da177e4SLinus Torvalds 	/* reset journal values for the next transaction */
41661da177e4SLinus Torvalds 	old_start = journal->j_start;
4167bd4c625cSLinus Torvalds 	journal->j_start =
4168bd4c625cSLinus Torvalds 	    (journal->j_start + journal->j_len +
4169bd4c625cSLinus Torvalds 	     2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
41701da177e4SLinus Torvalds 	atomic_set(&(journal->j_wcount), 0);
41711da177e4SLinus Torvalds 	journal->j_bcount = 0;
41721da177e4SLinus Torvalds 	journal->j_last = NULL;
41731da177e4SLinus Torvalds 	journal->j_first = NULL;
41741da177e4SLinus Torvalds 	journal->j_len = 0;
41751da177e4SLinus Torvalds 	journal->j_trans_start_time = 0;
4176a44c94a7SAlexander Zarochentsev 	/* check for trans_id overflow */
4177a44c94a7SAlexander Zarochentsev 	if (++journal->j_trans_id == 0)
4178a44c94a7SAlexander Zarochentsev 		journal->j_trans_id = 10;
41791da177e4SLinus Torvalds 	journal->j_current_jl->j_trans_id = journal->j_trans_id;
41801da177e4SLinus Torvalds 	journal->j_must_wait = 0;
41811da177e4SLinus Torvalds 	journal->j_len_alloc = 0;
41821da177e4SLinus Torvalds 	journal->j_next_full_flush = 0;
41831da177e4SLinus Torvalds 	journal->j_next_async_flush = 0;
41841da177e4SLinus Torvalds 	init_journal_hash(p_s_sb);
41851da177e4SLinus Torvalds 
41861da177e4SLinus Torvalds 	// make sure reiserfs_add_jh sees the new current_jl before we
41871da177e4SLinus Torvalds 	// write out the tails
41881da177e4SLinus Torvalds 	smp_mb();
41891da177e4SLinus Torvalds 
41901da177e4SLinus Torvalds 	/* tail conversion targets have to hit the disk before we end the
41911da177e4SLinus Torvalds 	 * transaction.  Otherwise a later transaction might repack the tail
41921da177e4SLinus Torvalds 	 * before this transaction commits, leaving the data block unflushed and
41931da177e4SLinus Torvalds 	 * clean, if we crash before the later transaction commits, the data block
41941da177e4SLinus Torvalds 	 * is lost.
41951da177e4SLinus Torvalds 	 */
41961da177e4SLinus Torvalds 	if (!list_empty(&jl->j_tail_bh_list)) {
41971da177e4SLinus Torvalds 		unlock_kernel();
41981da177e4SLinus Torvalds 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
41991da177e4SLinus Torvalds 				      journal, jl, &jl->j_tail_bh_list);
42001da177e4SLinus Torvalds 		lock_kernel();
42011da177e4SLinus Torvalds 	}
420214a61442SEric Sesterhenn 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
42031da177e4SLinus Torvalds 	up(&jl->j_commit_lock);
42041da177e4SLinus Torvalds 
42051da177e4SLinus Torvalds 	/* honor the flush wishes from the caller, simple commits can
42061da177e4SLinus Torvalds 	 ** be done outside the journal lock, they are done below
42071da177e4SLinus Torvalds 	 **
42081da177e4SLinus Torvalds 	 ** if we don't flush the commit list right now, we put it into
42091da177e4SLinus Torvalds 	 ** the work queue so the people waiting on the async progress work
42101da177e4SLinus Torvalds 	 ** queue don't wait for this proc to flush journal lists and such.
42111da177e4SLinus Torvalds 	 */
42121da177e4SLinus Torvalds 	if (flush) {
42131da177e4SLinus Torvalds 		flush_commit_list(p_s_sb, jl, 1);
42141da177e4SLinus Torvalds 		flush_journal_list(p_s_sb, jl, 1);
42151da177e4SLinus Torvalds 	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
42161da177e4SLinus Torvalds 		queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
42171da177e4SLinus Torvalds 
42181da177e4SLinus Torvalds 	/* if the next transaction has any chance of wrapping, flush
42191da177e4SLinus Torvalds 	 ** transactions that might get overwritten.  If any journal lists are very
42201da177e4SLinus Torvalds 	 ** old flush them as well.
42211da177e4SLinus Torvalds 	 */
42221da177e4SLinus Torvalds       first_jl:
42231da177e4SLinus Torvalds 	list_for_each_safe(entry, safe, &journal->j_journal_list) {
42241da177e4SLinus Torvalds 		temp_jl = JOURNAL_LIST_ENTRY(entry);
42251da177e4SLinus Torvalds 		if (journal->j_start <= temp_jl->j_start) {
42261da177e4SLinus Torvalds 			if ((journal->j_start + journal->j_trans_max + 1) >=
4227bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
42281da177e4SLinus Torvalds 				flush_used_journal_lists(p_s_sb, temp_jl);
42291da177e4SLinus Torvalds 				goto first_jl;
42301da177e4SLinus Torvalds 			} else if ((journal->j_start +
42311da177e4SLinus Torvalds 				    journal->j_trans_max + 1) <
4232bd4c625cSLinus Torvalds 				   SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
42331da177e4SLinus Torvalds 				/* if we don't cross into the next transaction and we don't
42341da177e4SLinus Torvalds 				 * wrap, there is no way we can overlap any later transactions
42351da177e4SLinus Torvalds 				 * break now
42361da177e4SLinus Torvalds 				 */
42371da177e4SLinus Torvalds 				break;
42381da177e4SLinus Torvalds 			}
42391da177e4SLinus Torvalds 		} else if ((journal->j_start +
42401da177e4SLinus Torvalds 			    journal->j_trans_max + 1) >
4241bd4c625cSLinus Torvalds 			   SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
42421da177e4SLinus Torvalds 			if (((journal->j_start + journal->j_trans_max + 1) %
4243bd4c625cSLinus Torvalds 			     SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
4244bd4c625cSLinus Torvalds 			    temp_jl->j_start) {
42451da177e4SLinus Torvalds 				flush_used_journal_lists(p_s_sb, temp_jl);
42461da177e4SLinus Torvalds 				goto first_jl;
42471da177e4SLinus Torvalds 			} else {
42481da177e4SLinus Torvalds 				/* we don't overlap anything from out start to the end of the
42491da177e4SLinus Torvalds 				 * log, and our wrapped portion doesn't overlap anything at
42501da177e4SLinus Torvalds 				 * the start of the log.  We can break
42511da177e4SLinus Torvalds 				 */
42521da177e4SLinus Torvalds 				break;
42531da177e4SLinus Torvalds 			}
42541da177e4SLinus Torvalds 		}
42551da177e4SLinus Torvalds 	}
42561da177e4SLinus Torvalds 	flush_old_journal_lists(p_s_sb);
42571da177e4SLinus Torvalds 
4258bd4c625cSLinus Torvalds 	journal->j_current_jl->j_list_bitmap =
4259bd4c625cSLinus Torvalds 	    get_list_bitmap(p_s_sb, journal->j_current_jl);
42601da177e4SLinus Torvalds 
42611da177e4SLinus Torvalds 	if (!(journal->j_current_jl->j_list_bitmap)) {
4262bd4c625cSLinus Torvalds 		reiserfs_panic(p_s_sb,
4263bd4c625cSLinus Torvalds 			       "journal-1996: do_journal_end, could not get a list bitmap\n");
42641da177e4SLinus Torvalds 	}
42651da177e4SLinus Torvalds 
42661da177e4SLinus Torvalds 	atomic_set(&(journal->j_jlock), 0);
42671da177e4SLinus Torvalds 	unlock_journal(p_s_sb);
42681da177e4SLinus Torvalds 	/* wake up any body waiting to join. */
42691da177e4SLinus Torvalds 	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
42701da177e4SLinus Torvalds 	wake_up(&(journal->j_join_wait));
42711da177e4SLinus Torvalds 
42721da177e4SLinus Torvalds 	if (!flush && wait_on_commit &&
42731da177e4SLinus Torvalds 	    journal_list_still_alive(p_s_sb, commit_trans_id)) {
42741da177e4SLinus Torvalds 		flush_commit_list(p_s_sb, jl, 1);
42751da177e4SLinus Torvalds 	}
42761da177e4SLinus Torvalds       out:
42771da177e4SLinus Torvalds 	reiserfs_check_lock_depth(p_s_sb, "journal end2");
42781da177e4SLinus Torvalds 
42791da177e4SLinus Torvalds 	memset(th, 0, sizeof(*th));
42801da177e4SLinus Torvalds 	/* Re-set th->t_super, so we can properly keep track of how many
42811da177e4SLinus Torvalds 	 * persistent transactions there are. We need to do this so if this
42821da177e4SLinus Torvalds 	 * call is part of a failed restart_transaction, we can free it later */
42831da177e4SLinus Torvalds 	th->t_super = p_s_sb;
42841da177e4SLinus Torvalds 
42851da177e4SLinus Torvalds 	return journal->j_errno;
42861da177e4SLinus Torvalds }
42871da177e4SLinus Torvalds 
4288bd4c625cSLinus Torvalds static void __reiserfs_journal_abort_hard(struct super_block *sb)
42891da177e4SLinus Torvalds {
42901da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
42911da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
42921da177e4SLinus Torvalds 		return;
42931da177e4SLinus Torvalds 
42941da177e4SLinus Torvalds 	printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
42951da177e4SLinus Torvalds 	       reiserfs_bdevname(sb));
42961da177e4SLinus Torvalds 
42971da177e4SLinus Torvalds 	sb->s_flags |= MS_RDONLY;
42981da177e4SLinus Torvalds 	set_bit(J_ABORTED, &journal->j_state);
42991da177e4SLinus Torvalds 
43001da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK
43011da177e4SLinus Torvalds 	dump_stack();
43021da177e4SLinus Torvalds #endif
43031da177e4SLinus Torvalds }
43041da177e4SLinus Torvalds 
4305bd4c625cSLinus Torvalds static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
43061da177e4SLinus Torvalds {
43071da177e4SLinus Torvalds 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
43081da177e4SLinus Torvalds 	if (test_bit(J_ABORTED, &journal->j_state))
43091da177e4SLinus Torvalds 		return;
43101da177e4SLinus Torvalds 
43111da177e4SLinus Torvalds 	if (!journal->j_errno)
43121da177e4SLinus Torvalds 		journal->j_errno = errno;
43131da177e4SLinus Torvalds 
43141da177e4SLinus Torvalds 	__reiserfs_journal_abort_hard(sb);
43151da177e4SLinus Torvalds }
43161da177e4SLinus Torvalds 
4317bd4c625cSLinus Torvalds void reiserfs_journal_abort(struct super_block *sb, int errno)
43181da177e4SLinus Torvalds {
4319e13601bcSHarvey Harrison 	__reiserfs_journal_abort_soft(sb, errno);
43201da177e4SLinus Torvalds }
4321