11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000 31da177e4SLinus Torvalds ** 41da177e4SLinus Torvalds ** The background commits make this code very interelated, and 51da177e4SLinus Torvalds ** overly complex. I need to rethink things a bit....The major players: 61da177e4SLinus Torvalds ** 71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log. 81da177e4SLinus Torvalds ** If the current transaction is too 91da177e4SLinus Torvalds ** old, it will block until the current transaction is 101da177e4SLinus Torvalds ** finished, and then start a new one. 111da177e4SLinus Torvalds ** Usually, your transaction will get joined in with 121da177e4SLinus Torvalds ** previous ones for speed. 131da177e4SLinus Torvalds ** 141da177e4SLinus Torvalds ** journal_join -- same as journal_begin, but won't block on the current 151da177e4SLinus Torvalds ** transaction regardless of age. Don't ever call 161da177e4SLinus Torvalds ** this. Ever. There are only two places it should be 171da177e4SLinus Torvalds ** called from, and they are both inside this file. 181da177e4SLinus Torvalds ** 191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction. clears any flags 201da177e4SLinus Torvalds ** that might make them get sent to disk 211da177e4SLinus Torvalds ** and then marks them BH_JDirty. Puts the buffer head 221da177e4SLinus Torvalds ** into the current transaction hash. 231da177e4SLinus Torvalds ** 241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing 251da177e4SLinus Torvalds ** otherwise, it could do an async/synchronous commit, or 261da177e4SLinus Torvalds ** a full flush of all log and real blocks in the 271da177e4SLinus Torvalds ** transaction. 281da177e4SLinus Torvalds ** 291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and 301da177e4SLinus Torvalds ** commit blocks are sent to disk. Forces commit blocks 311da177e4SLinus Torvalds ** to disk for all backgrounded commits that have been 321da177e4SLinus Torvalds ** around too long. 331da177e4SLinus Torvalds ** -- Note, if you call this as an immediate flush from 341da177e4SLinus Torvalds ** from within kupdate, it will ignore the immediate flag 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include <linux/time.h> 386188e10dSMatthew Wilcox #include <linux/semaphore.h> 391da177e4SLinus Torvalds #include <linux/vmalloc.h> 401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h> 411da177e4SLinus Torvalds #include <linux/kernel.h> 421da177e4SLinus Torvalds #include <linux/errno.h> 431da177e4SLinus Torvalds #include <linux/fcntl.h> 441da177e4SLinus Torvalds #include <linux/stat.h> 451da177e4SLinus Torvalds #include <linux/string.h> 461da177e4SLinus Torvalds #include <linux/smp_lock.h> 471da177e4SLinus Torvalds #include <linux/buffer_head.h> 481da177e4SLinus Torvalds #include <linux/workqueue.h> 491da177e4SLinus Torvalds #include <linux/writeback.h> 501da177e4SLinus Torvalds #include <linux/blkdev.h> 513fcfab16SAndrew Morton #include <linux/backing-dev.h> 5290415deaSJeff Mahoney #include <linux/uaccess.h> 5390415deaSJeff Mahoney 5490415deaSJeff Mahoney #include <asm/system.h> 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */ 571da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 581da177e4SLinus Torvalds j_list)) 591da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 601da177e4SLinus Torvalds j_working_list)) 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds /* the number of mounted filesystems. This is used to decide when to 631da177e4SLinus Torvalds ** start and kill the commit workqueue 641da177e4SLinus Torvalds */ 651da177e4SLinus Torvalds static int reiserfs_mounted_fs_count; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds static struct workqueue_struct *commit_wq; 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 701da177e4SLinus Torvalds structs at 4k */ 711da177e4SLinus Torvalds #define BUFNR 64 /*read ahead */ 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds /* cnode stat bits. Move these into reiserfs_fs.h */ 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 761da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 791da177e4SLinus Torvalds #define BLOCK_DIRTIED 5 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* journal list state bits */ 821da177e4SLinus Torvalds #define LIST_TOUCHED 1 831da177e4SLinus Torvalds #define LIST_DIRTY 2 841da177e4SLinus Torvalds #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds /* flags for do_journal_end */ 871da177e4SLinus Torvalds #define FLUSH_ALL 1 /* flush commit and real blocks */ 881da177e4SLinus Torvalds #define COMMIT_NOW 2 /* end and commit this transaction */ 891da177e4SLinus Torvalds #define WAIT 4 /* wait for the log blocks to hit the disk */ 901da177e4SLinus Torvalds 91bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *, 92bd4c625cSLinus Torvalds struct super_block *, unsigned long nblocks, 93bd4c625cSLinus Torvalds int flags); 94bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s, 95bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 96bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 97bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 981da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn); 99bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 100a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks); 1011da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super, 1021da177e4SLinus Torvalds struct reiserfs_journal *journal); 1031da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 1041da177e4SLinus Torvalds struct reiserfs_journal_list *jl); 105c4028958SDavid Howells static void flush_async_commits(struct work_struct *work); 1061da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s); 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds /* values for join in do_journal_begin_r */ 1091da177e4SLinus Torvalds enum { 1101da177e4SLinus Torvalds JBEGIN_REG = 0, /* regular journal begin */ 1111da177e4SLinus Torvalds JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 1121da177e4SLinus Torvalds JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 1131da177e4SLinus Torvalds }; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 116a9dd3643SJeff Mahoney struct super_block *sb, 1171da177e4SLinus Torvalds unsigned long nblocks, int join); 1181da177e4SLinus Torvalds 119a9dd3643SJeff Mahoney static void init_journal_hash(struct super_block *sb) 120bd4c625cSLinus Torvalds { 121a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 122bd4c625cSLinus Torvalds memset(journal->j_hash_table, 0, 123bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* 1271da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 1281da177e4SLinus Torvalds ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 1291da177e4SLinus Torvalds ** more details. 1301da177e4SLinus Torvalds */ 131bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 132bd4c625cSLinus Torvalds { 1331da177e4SLinus Torvalds if (bh) { 1341da177e4SLinus Torvalds clear_buffer_dirty(bh); 1351da177e4SLinus Torvalds clear_buffer_journal_test(bh); 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds return 0; 1381da177e4SLinus Torvalds } 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds static void disable_barrier(struct super_block *s) 1411da177e4SLinus Torvalds { 1421da177e4SLinus Torvalds REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); 143bd4c625cSLinus Torvalds printk("reiserfs: disabling flush barriers on %s\n", 144bd4c625cSLinus Torvalds reiserfs_bdevname(s)); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds 147bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block 148a9dd3643SJeff Mahoney *sb) 149bd4c625cSLinus Torvalds { 1501da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 1511da177e4SLinus Torvalds static int id; 1521da177e4SLinus Torvalds 153d739b42bSPekka Enberg bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); 1541da177e4SLinus Torvalds if (!bn) { 1551da177e4SLinus Torvalds return NULL; 1561da177e4SLinus Torvalds } 157a9dd3643SJeff Mahoney bn->data = kzalloc(sb->s_blocksize, GFP_NOFS); 1581da177e4SLinus Torvalds if (!bn->data) { 159d739b42bSPekka Enberg kfree(bn); 1601da177e4SLinus Torvalds return NULL; 1611da177e4SLinus Torvalds } 1621da177e4SLinus Torvalds bn->id = id++; 1631da177e4SLinus Torvalds INIT_LIST_HEAD(&bn->list); 1641da177e4SLinus Torvalds return bn; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds 167a9dd3643SJeff Mahoney static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb) 168bd4c625cSLinus Torvalds { 169a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1701da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 1711da177e4SLinus Torvalds struct list_head *entry = journal->j_bitmap_nodes.next; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds journal->j_used_bitmap_nodes++; 1741da177e4SLinus Torvalds repeat: 1751da177e4SLinus Torvalds 1761da177e4SLinus Torvalds if (entry != &journal->j_bitmap_nodes) { 1771da177e4SLinus Torvalds bn = list_entry(entry, struct reiserfs_bitmap_node, list); 1781da177e4SLinus Torvalds list_del(entry); 179a9dd3643SJeff Mahoney memset(bn->data, 0, sb->s_blocksize); 1801da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 1811da177e4SLinus Torvalds return bn; 1821da177e4SLinus Torvalds } 183a9dd3643SJeff Mahoney bn = allocate_bitmap_node(sb); 1841da177e4SLinus Torvalds if (!bn) { 1851da177e4SLinus Torvalds yield(); 1861da177e4SLinus Torvalds goto repeat; 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds return bn; 1891da177e4SLinus Torvalds } 190a9dd3643SJeff Mahoney static inline void free_bitmap_node(struct super_block *sb, 191bd4c625cSLinus Torvalds struct reiserfs_bitmap_node *bn) 192bd4c625cSLinus Torvalds { 193a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1941da177e4SLinus Torvalds journal->j_used_bitmap_nodes--; 1951da177e4SLinus Torvalds if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 196d739b42bSPekka Enberg kfree(bn->data); 197d739b42bSPekka Enberg kfree(bn); 1981da177e4SLinus Torvalds } else { 1991da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 2001da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 2011da177e4SLinus Torvalds } 2021da177e4SLinus Torvalds } 2031da177e4SLinus Torvalds 204a9dd3643SJeff Mahoney static void allocate_bitmap_nodes(struct super_block *sb) 205bd4c625cSLinus Torvalds { 2061da177e4SLinus Torvalds int i; 207a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 2081da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 2091da177e4SLinus Torvalds for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { 210a9dd3643SJeff Mahoney bn = allocate_bitmap_node(sb); 2111da177e4SLinus Torvalds if (bn) { 2121da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 2131da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 2141da177e4SLinus Torvalds } else { 2150222e657SJeff Mahoney break; /* this is ok, we'll try again when more are needed */ 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 220a9dd3643SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *sb, 2213ee16670SJeff Mahoney b_blocknr_t block, 222bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 223bd4c625cSLinus Torvalds { 224a9dd3643SJeff Mahoney unsigned int bmap_nr = block / (sb->s_blocksize << 3); 225a9dd3643SJeff Mahoney unsigned int bit_nr = block % (sb->s_blocksize << 3); 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds if (!jb->bitmaps[bmap_nr]) { 228a9dd3643SJeff Mahoney jb->bitmaps[bmap_nr] = get_bitmap_node(sb); 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); 2311da177e4SLinus Torvalds return 0; 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds 234a9dd3643SJeff Mahoney static void cleanup_bitmap_list(struct super_block *sb, 235bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 236bd4c625cSLinus Torvalds { 2371da177e4SLinus Torvalds int i; 2381da177e4SLinus Torvalds if (jb->bitmaps == NULL) 2391da177e4SLinus Torvalds return; 2401da177e4SLinus Torvalds 241a9dd3643SJeff Mahoney for (i = 0; i < reiserfs_bmap_count(sb); i++) { 2421da177e4SLinus Torvalds if (jb->bitmaps[i]) { 243a9dd3643SJeff Mahoney free_bitmap_node(sb, jb->bitmaps[i]); 2441da177e4SLinus Torvalds jb->bitmaps[i] = NULL; 2451da177e4SLinus Torvalds } 2461da177e4SLinus Torvalds } 2471da177e4SLinus Torvalds } 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds /* 2501da177e4SLinus Torvalds ** only call this on FS unmount. 2511da177e4SLinus Torvalds */ 252a9dd3643SJeff Mahoney static int free_list_bitmaps(struct super_block *sb, 253bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb_array) 254bd4c625cSLinus Torvalds { 2551da177e4SLinus Torvalds int i; 2561da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2571da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2581da177e4SLinus Torvalds jb = jb_array + i; 2591da177e4SLinus Torvalds jb->journal_list = NULL; 260a9dd3643SJeff Mahoney cleanup_bitmap_list(sb, jb); 2611da177e4SLinus Torvalds vfree(jb->bitmaps); 2621da177e4SLinus Torvalds jb->bitmaps = NULL; 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds return 0; 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds 267a9dd3643SJeff Mahoney static int free_bitmap_nodes(struct super_block *sb) 268bd4c625cSLinus Torvalds { 269a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 2701da177e4SLinus Torvalds struct list_head *next = journal->j_bitmap_nodes.next; 2711da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds while (next != &journal->j_bitmap_nodes) { 2741da177e4SLinus Torvalds bn = list_entry(next, struct reiserfs_bitmap_node, list); 2751da177e4SLinus Torvalds list_del(next); 276d739b42bSPekka Enberg kfree(bn->data); 277d739b42bSPekka Enberg kfree(bn); 2781da177e4SLinus Torvalds next = journal->j_bitmap_nodes.next; 2791da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds return 0; 2831da177e4SLinus Torvalds } 2841da177e4SLinus Torvalds 2851da177e4SLinus Torvalds /* 2861da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 2871da177e4SLinus Torvalds ** jb_array is the array to be filled in. 2881da177e4SLinus Torvalds */ 289a9dd3643SJeff Mahoney int reiserfs_allocate_list_bitmaps(struct super_block *sb, 2901da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb_array, 2913ee16670SJeff Mahoney unsigned int bmap_nr) 292bd4c625cSLinus Torvalds { 2931da177e4SLinus Torvalds int i; 2941da177e4SLinus Torvalds int failed = 0; 2951da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2961da177e4SLinus Torvalds int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2991da177e4SLinus Torvalds jb = jb_array + i; 3001da177e4SLinus Torvalds jb->journal_list = NULL; 3011da177e4SLinus Torvalds jb->bitmaps = vmalloc(mem); 3021da177e4SLinus Torvalds if (!jb->bitmaps) { 303a9dd3643SJeff Mahoney reiserfs_warning(sb, "clm-2000", "unable to " 30445b03d5eSJeff Mahoney "allocate bitmaps for journal lists"); 3051da177e4SLinus Torvalds failed = 1; 3061da177e4SLinus Torvalds break; 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds memset(jb->bitmaps, 0, mem); 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds if (failed) { 311a9dd3643SJeff Mahoney free_list_bitmaps(sb, jb_array); 3121da177e4SLinus Torvalds return -1; 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds return 0; 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds /* 3181da177e4SLinus Torvalds ** find an available list bitmap. If you can't find one, flush a commit list 3191da177e4SLinus Torvalds ** and try again 3201da177e4SLinus Torvalds */ 321a9dd3643SJeff Mahoney static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, 322bd4c625cSLinus Torvalds struct reiserfs_journal_list 323bd4c625cSLinus Torvalds *jl) 324bd4c625cSLinus Torvalds { 3251da177e4SLinus Torvalds int i, j; 326a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3271da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { 3301da177e4SLinus Torvalds i = journal->j_list_bitmap_index; 3311da177e4SLinus Torvalds journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; 3321da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 3331da177e4SLinus Torvalds if (journal->j_list_bitmap[i].journal_list) { 334a9dd3643SJeff Mahoney flush_commit_list(sb, 335bd4c625cSLinus Torvalds journal->j_list_bitmap[i]. 336bd4c625cSLinus Torvalds journal_list, 1); 3371da177e4SLinus Torvalds if (!journal->j_list_bitmap[i].journal_list) { 3381da177e4SLinus Torvalds break; 3391da177e4SLinus Torvalds } 3401da177e4SLinus Torvalds } else { 3411da177e4SLinus Torvalds break; 3421da177e4SLinus Torvalds } 3431da177e4SLinus Torvalds } 3441da177e4SLinus Torvalds if (jb->journal_list) { /* double check to make sure if flushed correctly */ 3451da177e4SLinus Torvalds return NULL; 3461da177e4SLinus Torvalds } 3471da177e4SLinus Torvalds jb->journal_list = jl; 3481da177e4SLinus Torvalds return jb; 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 3511da177e4SLinus Torvalds /* 3521da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list. 3531da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers 3541da177e4SLinus Torvalds ** returns NULL on failure 3551da177e4SLinus Torvalds */ 356bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 357bd4c625cSLinus Torvalds { 3581da177e4SLinus Torvalds struct reiserfs_journal_cnode *head; 3591da177e4SLinus Torvalds int i; 3601da177e4SLinus Torvalds if (num_cnodes <= 0) { 3611da177e4SLinus Torvalds return NULL; 3621da177e4SLinus Torvalds } 3631da177e4SLinus Torvalds head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); 3641da177e4SLinus Torvalds if (!head) { 3651da177e4SLinus Torvalds return NULL; 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); 3681da177e4SLinus Torvalds head[0].prev = NULL; 3691da177e4SLinus Torvalds head[0].next = head + 1; 3701da177e4SLinus Torvalds for (i = 1; i < num_cnodes; i++) { 3711da177e4SLinus Torvalds head[i].prev = head + (i - 1); 3721da177e4SLinus Torvalds head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds head[num_cnodes - 1].next = NULL; 3751da177e4SLinus Torvalds return head; 3761da177e4SLinus Torvalds } 3771da177e4SLinus Torvalds 3781da177e4SLinus Torvalds /* 3791da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure 3801da177e4SLinus Torvalds */ 381a9dd3643SJeff Mahoney static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) 382bd4c625cSLinus Torvalds { 3831da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 384a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3851da177e4SLinus Torvalds 386a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "get_cnode"); 3871da177e4SLinus Torvalds 3881da177e4SLinus Torvalds if (journal->j_cnode_free <= 0) { 3891da177e4SLinus Torvalds return NULL; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds journal->j_cnode_used++; 3921da177e4SLinus Torvalds journal->j_cnode_free--; 3931da177e4SLinus Torvalds cn = journal->j_cnode_free_list; 3941da177e4SLinus Torvalds if (!cn) { 3951da177e4SLinus Torvalds return cn; 3961da177e4SLinus Torvalds } 3971da177e4SLinus Torvalds if (cn->next) { 3981da177e4SLinus Torvalds cn->next->prev = NULL; 3991da177e4SLinus Torvalds } 4001da177e4SLinus Torvalds journal->j_cnode_free_list = cn->next; 4011da177e4SLinus Torvalds memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); 4021da177e4SLinus Torvalds return cn; 4031da177e4SLinus Torvalds } 4041da177e4SLinus Torvalds 4051da177e4SLinus Torvalds /* 4061da177e4SLinus Torvalds ** returns a cnode to the free list 4071da177e4SLinus Torvalds */ 408a9dd3643SJeff Mahoney static void free_cnode(struct super_block *sb, 409bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 410bd4c625cSLinus Torvalds { 411a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 4121da177e4SLinus Torvalds 413a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "free_cnode"); 4141da177e4SLinus Torvalds 4151da177e4SLinus Torvalds journal->j_cnode_used--; 4161da177e4SLinus Torvalds journal->j_cnode_free++; 4171da177e4SLinus Torvalds /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 4181da177e4SLinus Torvalds cn->next = journal->j_cnode_free_list; 4191da177e4SLinus Torvalds if (journal->j_cnode_free_list) { 4201da177e4SLinus Torvalds journal->j_cnode_free_list->prev = cn; 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ 4231da177e4SLinus Torvalds journal->j_cnode_free_list = cn; 4241da177e4SLinus Torvalds } 4251da177e4SLinus Torvalds 426bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh) 427bd4c625cSLinus Torvalds { 4281da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 4291da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 4301da177e4SLinus Torvalds } 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */ 433bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 434bd4c625cSLinus Torvalds super_block 435bd4c625cSLinus Torvalds *sb, 436bd4c625cSLinus Torvalds struct 437bd4c625cSLinus Torvalds reiserfs_journal_cnode 438bd4c625cSLinus Torvalds **table, 4391da177e4SLinus Torvalds long bl) 4401da177e4SLinus Torvalds { 4411da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4421da177e4SLinus Torvalds cn = journal_hash(table, sb, bl); 4431da177e4SLinus Torvalds while (cn) { 4441da177e4SLinus Torvalds if (cn->blocknr == bl && cn->sb == sb) 4451da177e4SLinus Torvalds return cn; 4461da177e4SLinus Torvalds cn = cn->hnext; 4471da177e4SLinus Torvalds } 4481da177e4SLinus Torvalds return (struct reiserfs_journal_cnode *)0; 4491da177e4SLinus Torvalds } 4501da177e4SLinus Torvalds 4511da177e4SLinus Torvalds /* 4521da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 4531da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 4541da177e4SLinus Torvalds ** being overwritten by a replay after crashing. 4551da177e4SLinus Torvalds ** 4561da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 4571da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 4581da177e4SLinus Torvalds ** sure you never write the block without logging it. 4591da177e4SLinus Torvalds ** 4601da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward. 4611da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search 4621da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl. Then, we return that 4631da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try. 4641da177e4SLinus Torvalds ** 4651da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't 4661da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal 4671da177e4SLinus Torvalds ** 4681da177e4SLinus Torvalds */ 469a9dd3643SJeff Mahoney int reiserfs_in_journal(struct super_block *sb, 4703ee16670SJeff Mahoney unsigned int bmap_nr, int bit_nr, int search_all, 471bd4c625cSLinus Torvalds b_blocknr_t * next_zero_bit) 472bd4c625cSLinus Torvalds { 473a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 4741da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4751da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 4761da177e4SLinus Torvalds int i; 4771da177e4SLinus Torvalds unsigned long bl; 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds *next_zero_bit = 0; /* always start this at zero. */ 4801da177e4SLinus Torvalds 481a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal); 4821da177e4SLinus Torvalds /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 4831da177e4SLinus Torvalds ** if we crash before the transaction that freed it commits, this transaction won't 4841da177e4SLinus Torvalds ** have committed either, and the block will never be written 4851da177e4SLinus Torvalds */ 4861da177e4SLinus Torvalds if (search_all) { 4871da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 488a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal_bitmap); 4891da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 4901da177e4SLinus Torvalds if (jb->journal_list && jb->bitmaps[bmap_nr] && 491bd4c625cSLinus Torvalds test_bit(bit_nr, 492bd4c625cSLinus Torvalds (unsigned long *)jb->bitmaps[bmap_nr]-> 493bd4c625cSLinus Torvalds data)) { 494bd4c625cSLinus Torvalds *next_zero_bit = 495bd4c625cSLinus Torvalds find_next_zero_bit((unsigned long *) 496bd4c625cSLinus Torvalds (jb->bitmaps[bmap_nr]-> 497bd4c625cSLinus Torvalds data), 498a9dd3643SJeff Mahoney sb->s_blocksize << 3, 499bd4c625cSLinus Torvalds bit_nr + 1); 5001da177e4SLinus Torvalds return 1; 5011da177e4SLinus Torvalds } 5021da177e4SLinus Torvalds } 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 505a9dd3643SJeff Mahoney bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr; 5061da177e4SLinus Torvalds /* is it in any old transactions? */ 507bd4c625cSLinus Torvalds if (search_all 508bd4c625cSLinus Torvalds && (cn = 509a9dd3643SJeff Mahoney get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) { 5101da177e4SLinus Torvalds return 1; 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 5131da177e4SLinus Torvalds /* is it in the current transaction. This should never happen */ 514a9dd3643SJeff Mahoney if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) { 5151da177e4SLinus Torvalds BUG(); 5161da177e4SLinus Torvalds return 1; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds 519a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.in_journal_reusable); 5201da177e4SLinus Torvalds /* safe for reuse */ 5211da177e4SLinus Torvalds return 0; 5221da177e4SLinus Torvalds } 5231da177e4SLinus Torvalds 5241da177e4SLinus Torvalds /* insert cn into table 5251da177e4SLinus Torvalds */ 526bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 527bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 528bd4c625cSLinus Torvalds { 5291da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn_orig; 5301da177e4SLinus Torvalds 5311da177e4SLinus Torvalds cn_orig = journal_hash(table, cn->sb, cn->blocknr); 5321da177e4SLinus Torvalds cn->hnext = cn_orig; 5331da177e4SLinus Torvalds cn->hprev = NULL; 5341da177e4SLinus Torvalds if (cn_orig) { 5351da177e4SLinus Torvalds cn_orig->hprev = cn; 5361da177e4SLinus Torvalds } 5371da177e4SLinus Torvalds journal_hash(table, cn->sb, cn->blocknr) = cn; 5381da177e4SLinus Torvalds } 5391da177e4SLinus Torvalds 5408ebc4232SFrederic Weisbecker /* 5418ebc4232SFrederic Weisbecker * Several mutexes depend on the write lock. 5428ebc4232SFrederic Weisbecker * However sometimes we want to relax the write lock while we hold 5438ebc4232SFrederic Weisbecker * these mutexes, according to the release/reacquire on schedule() 5448ebc4232SFrederic Weisbecker * properties of the Bkl that were used. 5458ebc4232SFrederic Weisbecker * Reiserfs performances and locking were based on this scheme. 5468ebc4232SFrederic Weisbecker * Now that the write lock is a mutex and not the bkl anymore, doing so 5478ebc4232SFrederic Weisbecker * may result in a deadlock: 5488ebc4232SFrederic Weisbecker * 5498ebc4232SFrederic Weisbecker * A acquire write_lock 5508ebc4232SFrederic Weisbecker * A acquire j_commit_mutex 5518ebc4232SFrederic Weisbecker * A release write_lock and wait for something 5528ebc4232SFrederic Weisbecker * B acquire write_lock 5538ebc4232SFrederic Weisbecker * B can't acquire j_commit_mutex and sleep 5548ebc4232SFrederic Weisbecker * A can't acquire write lock anymore 5558ebc4232SFrederic Weisbecker * deadlock 5568ebc4232SFrederic Weisbecker * 5578ebc4232SFrederic Weisbecker * What we do here is avoiding such deadlock by playing the same game 5588ebc4232SFrederic Weisbecker * than the Bkl: if we can't acquire a mutex that depends on the write lock, 5598ebc4232SFrederic Weisbecker * we release the write lock, wait a bit and then retry. 5608ebc4232SFrederic Weisbecker * 5618ebc4232SFrederic Weisbecker * The mutexes concerned by this hack are: 5628ebc4232SFrederic Weisbecker * - The commit mutex of a journal list 5638ebc4232SFrederic Weisbecker * - The flush mutex 5648ebc4232SFrederic Weisbecker * - The journal lock 5658ebc4232SFrederic Weisbecker */ 5668ebc4232SFrederic Weisbecker static inline void reiserfs_mutex_lock_safe(struct mutex *m, 5678ebc4232SFrederic Weisbecker struct super_block *s) 5688ebc4232SFrederic Weisbecker { 5698ebc4232SFrederic Weisbecker while (!mutex_trylock(m)) { 5708ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 5718ebc4232SFrederic Weisbecker schedule(); 5728ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 5738ebc4232SFrederic Weisbecker } 5748ebc4232SFrederic Weisbecker } 5758ebc4232SFrederic Weisbecker 5761da177e4SLinus Torvalds /* lock the current transaction */ 577a9dd3643SJeff Mahoney static inline void lock_journal(struct super_block *sb) 578bd4c625cSLinus Torvalds { 579a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.lock_journal); 5808ebc4232SFrederic Weisbecker 5818ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb); 5821da177e4SLinus Torvalds } 5831da177e4SLinus Torvalds 5841da177e4SLinus Torvalds /* unlock the current transaction */ 585a9dd3643SJeff Mahoney static inline void unlock_journal(struct super_block *sb) 586bd4c625cSLinus Torvalds { 587a9dd3643SJeff Mahoney mutex_unlock(&SB_JOURNAL(sb)->j_mutex); 5881da177e4SLinus Torvalds } 5891da177e4SLinus Torvalds 5901da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl) 5911da177e4SLinus Torvalds { 5921da177e4SLinus Torvalds jl->j_refcount++; 5931da177e4SLinus Torvalds } 5941da177e4SLinus Torvalds 5951da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s, 5961da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 5971da177e4SLinus Torvalds { 5981da177e4SLinus Torvalds if (jl->j_refcount < 1) { 599c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d", 600bd4c625cSLinus Torvalds jl->j_trans_id, jl->j_refcount); 6011da177e4SLinus Torvalds } 6021da177e4SLinus Torvalds if (--jl->j_refcount == 0) 603d739b42bSPekka Enberg kfree(jl); 6041da177e4SLinus Torvalds } 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds /* 6071da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again. 6081da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 6091da177e4SLinus Torvalds ** transaction. 6101da177e4SLinus Torvalds */ 611a9dd3643SJeff Mahoney static void cleanup_freed_for_journal_list(struct super_block *sb, 612bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 613bd4c625cSLinus Torvalds { 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; 6161da177e4SLinus Torvalds if (jb) { 617a9dd3643SJeff Mahoney cleanup_bitmap_list(sb, jb); 6181da177e4SLinus Torvalds } 6191da177e4SLinus Torvalds jl->j_list_bitmap->journal_list = NULL; 6201da177e4SLinus Torvalds jl->j_list_bitmap = NULL; 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s, 624600ed416SJeff Mahoney unsigned int trans_id) 6251da177e4SLinus Torvalds { 6261da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 6271da177e4SLinus Torvalds struct list_head *entry = &journal->j_journal_list; 6281da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds if (!list_empty(entry)) { 6311da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry->next); 6321da177e4SLinus Torvalds if (jl->j_trans_id <= trans_id) { 6331da177e4SLinus Torvalds return 1; 6341da177e4SLinus Torvalds } 6351da177e4SLinus Torvalds } 6361da177e4SLinus Torvalds return 0; 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds 639398c95bdSChris Mason /* 640398c95bdSChris Mason * If page->mapping was null, we failed to truncate this page for 641398c95bdSChris Mason * some reason. Most likely because it was truncated after being 642398c95bdSChris Mason * logged via data=journal. 643398c95bdSChris Mason * 644398c95bdSChris Mason * This does a check to see if the buffer belongs to one of these 645398c95bdSChris Mason * lost pages before doing the final put_bh. If page->mapping was 646398c95bdSChris Mason * null, it tries to free buffers on the page, which should make the 647398c95bdSChris Mason * final page_cache_release drop the page from the lru. 648398c95bdSChris Mason */ 649398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh) 650398c95bdSChris Mason { 651398c95bdSChris Mason struct page *page = bh->b_page; 652529ae9aaSNick Piggin if (!page->mapping && trylock_page(page)) { 653398c95bdSChris Mason page_cache_get(page); 654398c95bdSChris Mason put_bh(bh); 655398c95bdSChris Mason if (!page->mapping) 656398c95bdSChris Mason try_to_free_buffers(page); 657398c95bdSChris Mason unlock_page(page); 658398c95bdSChris Mason page_cache_release(page); 659398c95bdSChris Mason } else { 660398c95bdSChris Mason put_bh(bh); 661398c95bdSChris Mason } 662398c95bdSChris Mason } 663398c95bdSChris Mason 664bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 665bd4c625cSLinus Torvalds { 6661da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 6671da177e4SLinus Torvalds 6681da177e4SLinus Torvalds if (buffer_journaled(bh)) { 66945b03d5eSJeff Mahoney reiserfs_warning(NULL, "clm-2084", 67045b03d5eSJeff Mahoney "pinned buffer %lu:%s sent to disk", 6711da177e4SLinus Torvalds bh->b_blocknr, bdevname(bh->b_bdev, b)); 6721da177e4SLinus Torvalds } 6731da177e4SLinus Torvalds if (uptodate) 6741da177e4SLinus Torvalds set_buffer_uptodate(bh); 6751da177e4SLinus Torvalds else 6761da177e4SLinus Torvalds clear_buffer_uptodate(bh); 677398c95bdSChris Mason 6781da177e4SLinus Torvalds unlock_buffer(bh); 679398c95bdSChris Mason release_buffer_page(bh); 6801da177e4SLinus Torvalds } 6811da177e4SLinus Torvalds 682bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) 683bd4c625cSLinus Torvalds { 6841da177e4SLinus Torvalds if (uptodate) 6851da177e4SLinus Torvalds set_buffer_uptodate(bh); 6861da177e4SLinus Torvalds else 6871da177e4SLinus Torvalds clear_buffer_uptodate(bh); 6881da177e4SLinus Torvalds unlock_buffer(bh); 6891da177e4SLinus Torvalds put_bh(bh); 6901da177e4SLinus Torvalds } 6911da177e4SLinus Torvalds 692bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh) 693bd4c625cSLinus Torvalds { 6941da177e4SLinus Torvalds get_bh(bh); 6951da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_buffer_io_sync; 6961da177e4SLinus Torvalds clear_buffer_journal_new(bh); 6971da177e4SLinus Torvalds clear_buffer_dirty(bh); 6981da177e4SLinus Torvalds if (!test_clear_buffer_journal_test(bh)) 6991da177e4SLinus Torvalds BUG(); 7001da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 7011da177e4SLinus Torvalds BUG(); 7021da177e4SLinus Torvalds submit_bh(WRITE, bh); 7031da177e4SLinus Torvalds } 7041da177e4SLinus Torvalds 705bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh) 706bd4c625cSLinus Torvalds { 7071da177e4SLinus Torvalds get_bh(bh); 7081da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_ordered_io; 7091da177e4SLinus Torvalds clear_buffer_dirty(bh); 7101da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 7111da177e4SLinus Torvalds BUG(); 7121da177e4SLinus Torvalds submit_bh(WRITE, bh); 7131da177e4SLinus Torvalds } 7141da177e4SLinus Torvalds 715bd4c625cSLinus Torvalds static int submit_barrier_buffer(struct buffer_head *bh) 716bd4c625cSLinus Torvalds { 7171da177e4SLinus Torvalds get_bh(bh); 7181da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_ordered_io; 7191da177e4SLinus Torvalds clear_buffer_dirty(bh); 7201da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 7211da177e4SLinus Torvalds BUG(); 7221da177e4SLinus Torvalds return submit_bh(WRITE_BARRIER, bh); 7231da177e4SLinus Torvalds } 7241da177e4SLinus Torvalds 7251da177e4SLinus Torvalds static void check_barrier_completion(struct super_block *s, 726bd4c625cSLinus Torvalds struct buffer_head *bh) 727bd4c625cSLinus Torvalds { 7281da177e4SLinus Torvalds if (buffer_eopnotsupp(bh)) { 7291da177e4SLinus Torvalds clear_buffer_eopnotsupp(bh); 7301da177e4SLinus Torvalds disable_barrier(s); 7311da177e4SLinus Torvalds set_buffer_uptodate(bh); 7321da177e4SLinus Torvalds set_buffer_dirty(bh); 7338ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 7341da177e4SLinus Torvalds sync_dirty_buffer(bh); 7358ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 7361da177e4SLinus Torvalds } 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds 7391da177e4SLinus Torvalds #define CHUNK_SIZE 32 7401da177e4SLinus Torvalds struct buffer_chunk { 7411da177e4SLinus Torvalds struct buffer_head *bh[CHUNK_SIZE]; 7421da177e4SLinus Torvalds int nr; 7431da177e4SLinus Torvalds }; 7441da177e4SLinus Torvalds 745bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk) 746bd4c625cSLinus Torvalds { 7471da177e4SLinus Torvalds int i; 74822e2c507SJens Axboe get_fs_excl(); 7491da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 7501da177e4SLinus Torvalds submit_logged_buffer(chunk->bh[i]); 7511da177e4SLinus Torvalds } 7521da177e4SLinus Torvalds chunk->nr = 0; 75322e2c507SJens Axboe put_fs_excl(); 7541da177e4SLinus Torvalds } 7551da177e4SLinus Torvalds 756bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk) 757bd4c625cSLinus Torvalds { 7581da177e4SLinus Torvalds int i; 75922e2c507SJens Axboe get_fs_excl(); 7601da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 7611da177e4SLinus Torvalds submit_ordered_buffer(chunk->bh[i]); 7621da177e4SLinus Torvalds } 7631da177e4SLinus Torvalds chunk->nr = 0; 76422e2c507SJens Axboe put_fs_excl(); 7651da177e4SLinus Torvalds } 7661da177e4SLinus Torvalds 7671da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 768bd4c625cSLinus Torvalds spinlock_t * lock, void (fn) (struct buffer_chunk *)) 7691da177e4SLinus Torvalds { 7701da177e4SLinus Torvalds int ret = 0; 77114a61442SEric Sesterhenn BUG_ON(chunk->nr >= CHUNK_SIZE); 7721da177e4SLinus Torvalds chunk->bh[chunk->nr++] = bh; 7731da177e4SLinus Torvalds if (chunk->nr >= CHUNK_SIZE) { 7741da177e4SLinus Torvalds ret = 1; 7751da177e4SLinus Torvalds if (lock) 7761da177e4SLinus Torvalds spin_unlock(lock); 7771da177e4SLinus Torvalds fn(chunk); 7781da177e4SLinus Torvalds if (lock) 7791da177e4SLinus Torvalds spin_lock(lock); 7801da177e4SLinus Torvalds } 7811da177e4SLinus Torvalds return ret; 7821da177e4SLinus Torvalds } 7831da177e4SLinus Torvalds 7841da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 785bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void) 786bd4c625cSLinus Torvalds { 7871da177e4SLinus Torvalds struct reiserfs_jh *jh; 7881da177e4SLinus Torvalds while (1) { 7891da177e4SLinus Torvalds jh = kmalloc(sizeof(*jh), GFP_NOFS); 7901da177e4SLinus Torvalds if (jh) { 7911da177e4SLinus Torvalds atomic_inc(&nr_reiserfs_jh); 7921da177e4SLinus Torvalds return jh; 7931da177e4SLinus Torvalds } 7941da177e4SLinus Torvalds yield(); 7951da177e4SLinus Torvalds } 7961da177e4SLinus Torvalds } 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds /* 7991da177e4SLinus Torvalds * we want to free the jh when the buffer has been written 8001da177e4SLinus Torvalds * and waited on 8011da177e4SLinus Torvalds */ 802bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh) 803bd4c625cSLinus Torvalds { 8041da177e4SLinus Torvalds struct reiserfs_jh *jh; 8051da177e4SLinus Torvalds 8061da177e4SLinus Torvalds jh = bh->b_private; 8071da177e4SLinus Torvalds if (jh) { 8081da177e4SLinus Torvalds bh->b_private = NULL; 8091da177e4SLinus Torvalds jh->bh = NULL; 8101da177e4SLinus Torvalds list_del_init(&jh->list); 8111da177e4SLinus Torvalds kfree(jh); 8121da177e4SLinus Torvalds if (atomic_read(&nr_reiserfs_jh) <= 0) 8131da177e4SLinus Torvalds BUG(); 8141da177e4SLinus Torvalds atomic_dec(&nr_reiserfs_jh); 8151da177e4SLinus Torvalds put_bh(bh); 8161da177e4SLinus Torvalds } 8171da177e4SLinus Torvalds } 8181da177e4SLinus Torvalds 8191da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 8201da177e4SLinus Torvalds int tail) 8211da177e4SLinus Torvalds { 8221da177e4SLinus Torvalds struct reiserfs_jh *jh; 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds if (bh->b_private) { 8251da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 8261da177e4SLinus Torvalds if (!bh->b_private) { 8271da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 8281da177e4SLinus Torvalds goto no_jh; 8291da177e4SLinus Torvalds } 8301da177e4SLinus Torvalds jh = bh->b_private; 8311da177e4SLinus Torvalds list_del_init(&jh->list); 8321da177e4SLinus Torvalds } else { 8331da177e4SLinus Torvalds no_jh: 8341da177e4SLinus Torvalds get_bh(bh); 8351da177e4SLinus Torvalds jh = alloc_jh(); 8361da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 8371da177e4SLinus Torvalds /* buffer must be locked for __add_jh, should be able to have 8381da177e4SLinus Torvalds * two adds at the same time 8391da177e4SLinus Torvalds */ 84014a61442SEric Sesterhenn BUG_ON(bh->b_private); 8411da177e4SLinus Torvalds jh->bh = bh; 8421da177e4SLinus Torvalds bh->b_private = jh; 8431da177e4SLinus Torvalds } 8441da177e4SLinus Torvalds jh->jl = j->j_current_jl; 8451da177e4SLinus Torvalds if (tail) 8461da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); 8471da177e4SLinus Torvalds else { 8481da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_bh_list); 8491da177e4SLinus Torvalds } 8501da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 8511da177e4SLinus Torvalds return 0; 8521da177e4SLinus Torvalds } 8531da177e4SLinus Torvalds 854bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) 855bd4c625cSLinus Torvalds { 8561da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 8571da177e4SLinus Torvalds } 858bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) 859bd4c625cSLinus Torvalds { 8601da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 8611da177e4SLinus Torvalds } 8621da177e4SLinus Torvalds 8631da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 8641da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock, 8651da177e4SLinus Torvalds struct reiserfs_journal *j, 8661da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 8671da177e4SLinus Torvalds struct list_head *list) 8681da177e4SLinus Torvalds { 8691da177e4SLinus Torvalds struct buffer_head *bh; 8701da177e4SLinus Torvalds struct reiserfs_jh *jh; 8711da177e4SLinus Torvalds int ret = j->j_errno; 8721da177e4SLinus Torvalds struct buffer_chunk chunk; 8731da177e4SLinus Torvalds struct list_head tmp; 8741da177e4SLinus Torvalds INIT_LIST_HEAD(&tmp); 8751da177e4SLinus Torvalds 8761da177e4SLinus Torvalds chunk.nr = 0; 8771da177e4SLinus Torvalds spin_lock(lock); 8781da177e4SLinus Torvalds while (!list_empty(list)) { 8791da177e4SLinus Torvalds jh = JH_ENTRY(list->next); 8801da177e4SLinus Torvalds bh = jh->bh; 8811da177e4SLinus Torvalds get_bh(bh); 882ca5de404SNick Piggin if (!trylock_buffer(bh)) { 8831da177e4SLinus Torvalds if (!buffer_dirty(bh)) { 884f116629dSAkinobu Mita list_move(&jh->list, &tmp); 8851da177e4SLinus Torvalds goto loop_next; 8861da177e4SLinus Torvalds } 8871da177e4SLinus Torvalds spin_unlock(lock); 8881da177e4SLinus Torvalds if (chunk.nr) 8891da177e4SLinus Torvalds write_ordered_chunk(&chunk); 8901da177e4SLinus Torvalds wait_on_buffer(bh); 8911da177e4SLinus Torvalds cond_resched(); 8921da177e4SLinus Torvalds spin_lock(lock); 8931da177e4SLinus Torvalds goto loop_next; 8941da177e4SLinus Torvalds } 8953d4492f8SChris Mason /* in theory, dirty non-uptodate buffers should never get here, 8963d4492f8SChris Mason * but the upper layer io error paths still have a few quirks. 8973d4492f8SChris Mason * Handle them here as gracefully as we can 8983d4492f8SChris Mason */ 8993d4492f8SChris Mason if (!buffer_uptodate(bh) && buffer_dirty(bh)) { 9003d4492f8SChris Mason clear_buffer_dirty(bh); 9013d4492f8SChris Mason ret = -EIO; 9023d4492f8SChris Mason } 9031da177e4SLinus Torvalds if (buffer_dirty(bh)) { 904f116629dSAkinobu Mita list_move(&jh->list, &tmp); 9051da177e4SLinus Torvalds add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 9061da177e4SLinus Torvalds } else { 9071da177e4SLinus Torvalds reiserfs_free_jh(bh); 9081da177e4SLinus Torvalds unlock_buffer(bh); 9091da177e4SLinus Torvalds } 9101da177e4SLinus Torvalds loop_next: 9111da177e4SLinus Torvalds put_bh(bh); 9121da177e4SLinus Torvalds cond_resched_lock(lock); 9131da177e4SLinus Torvalds } 9141da177e4SLinus Torvalds if (chunk.nr) { 9151da177e4SLinus Torvalds spin_unlock(lock); 9161da177e4SLinus Torvalds write_ordered_chunk(&chunk); 9171da177e4SLinus Torvalds spin_lock(lock); 9181da177e4SLinus Torvalds } 9191da177e4SLinus Torvalds while (!list_empty(&tmp)) { 9201da177e4SLinus Torvalds jh = JH_ENTRY(tmp.prev); 9211da177e4SLinus Torvalds bh = jh->bh; 9221da177e4SLinus Torvalds get_bh(bh); 9231da177e4SLinus Torvalds reiserfs_free_jh(bh); 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds if (buffer_locked(bh)) { 9261da177e4SLinus Torvalds spin_unlock(lock); 9271da177e4SLinus Torvalds wait_on_buffer(bh); 9281da177e4SLinus Torvalds spin_lock(lock); 9291da177e4SLinus Torvalds } 9301da177e4SLinus Torvalds if (!buffer_uptodate(bh)) { 9311da177e4SLinus Torvalds ret = -EIO; 9321da177e4SLinus Torvalds } 933d62b1b87SChris Mason /* ugly interaction with invalidatepage here. 934d62b1b87SChris Mason * reiserfs_invalidate_page will pin any buffer that has a valid 935d62b1b87SChris Mason * journal head from an older transaction. If someone else sets 936d62b1b87SChris Mason * our buffer dirty after we write it in the first loop, and 937d62b1b87SChris Mason * then someone truncates the page away, nobody will ever write 938d62b1b87SChris Mason * the buffer. We're safe if we write the page one last time 939d62b1b87SChris Mason * after freeing the journal header. 940d62b1b87SChris Mason */ 941d62b1b87SChris Mason if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 942d62b1b87SChris Mason spin_unlock(lock); 943d62b1b87SChris Mason ll_rw_block(WRITE, 1, &bh); 944d62b1b87SChris Mason spin_lock(lock); 945d62b1b87SChris Mason } 9461da177e4SLinus Torvalds put_bh(bh); 9471da177e4SLinus Torvalds cond_resched_lock(lock); 9481da177e4SLinus Torvalds } 9491da177e4SLinus Torvalds spin_unlock(lock); 9501da177e4SLinus Torvalds return ret; 9511da177e4SLinus Torvalds } 9521da177e4SLinus Torvalds 953bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s, 954bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 955bd4c625cSLinus Torvalds { 9561da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 9571da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 9581da177e4SLinus Torvalds struct reiserfs_journal_list *first_jl; 9591da177e4SLinus Torvalds struct list_head *entry; 960600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 961600ed416SJeff Mahoney unsigned int other_trans_id; 962600ed416SJeff Mahoney unsigned int first_trans_id; 9631da177e4SLinus Torvalds 9641da177e4SLinus Torvalds find_first: 9651da177e4SLinus Torvalds /* 9661da177e4SLinus Torvalds * first we walk backwards to find the oldest uncommitted transation 9671da177e4SLinus Torvalds */ 9681da177e4SLinus Torvalds first_jl = jl; 9691da177e4SLinus Torvalds entry = jl->j_list.prev; 9701da177e4SLinus Torvalds while (1) { 9711da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 9721da177e4SLinus Torvalds if (entry == &journal->j_journal_list || 9731da177e4SLinus Torvalds atomic_read(&other_jl->j_older_commits_done)) 9741da177e4SLinus Torvalds break; 9751da177e4SLinus Torvalds 9761da177e4SLinus Torvalds first_jl = other_jl; 9771da177e4SLinus Torvalds entry = other_jl->j_list.prev; 9781da177e4SLinus Torvalds } 9791da177e4SLinus Torvalds 9801da177e4SLinus Torvalds /* if we didn't find any older uncommitted transactions, return now */ 9811da177e4SLinus Torvalds if (first_jl == jl) { 9821da177e4SLinus Torvalds return 0; 9831da177e4SLinus Torvalds } 9841da177e4SLinus Torvalds 9851da177e4SLinus Torvalds first_trans_id = first_jl->j_trans_id; 9861da177e4SLinus Torvalds 9871da177e4SLinus Torvalds entry = &first_jl->j_list; 9881da177e4SLinus Torvalds while (1) { 9891da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 9901da177e4SLinus Torvalds other_trans_id = other_jl->j_trans_id; 9911da177e4SLinus Torvalds 9921da177e4SLinus Torvalds if (other_trans_id < trans_id) { 9931da177e4SLinus Torvalds if (atomic_read(&other_jl->j_commit_left) != 0) { 9941da177e4SLinus Torvalds flush_commit_list(s, other_jl, 0); 9951da177e4SLinus Torvalds 9961da177e4SLinus Torvalds /* list we were called with is gone, return */ 9971da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) 9981da177e4SLinus Torvalds return 1; 9991da177e4SLinus Torvalds 10001da177e4SLinus Torvalds /* the one we just flushed is gone, this means all 10011da177e4SLinus Torvalds * older lists are also gone, so first_jl is no longer 10021da177e4SLinus Torvalds * valid either. Go back to the beginning. 10031da177e4SLinus Torvalds */ 1004bd4c625cSLinus Torvalds if (!journal_list_still_alive 1005bd4c625cSLinus Torvalds (s, other_trans_id)) { 10061da177e4SLinus Torvalds goto find_first; 10071da177e4SLinus Torvalds } 10081da177e4SLinus Torvalds } 10091da177e4SLinus Torvalds entry = entry->next; 10101da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 10111da177e4SLinus Torvalds return 0; 10121da177e4SLinus Torvalds } else { 10131da177e4SLinus Torvalds return 0; 10141da177e4SLinus Torvalds } 10151da177e4SLinus Torvalds } 10161da177e4SLinus Torvalds return 0; 10171da177e4SLinus Torvalds } 1018deba0f49SAdrian Bunk 1019deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s) 1020bd4c625cSLinus Torvalds { 10211da177e4SLinus Torvalds DEFINE_WAIT(wait); 10221da177e4SLinus Torvalds struct reiserfs_journal *j = SB_JOURNAL(s); 10238ebc4232SFrederic Weisbecker 10248ebc4232SFrederic Weisbecker if (atomic_read(&j->j_async_throttle)) { 10258ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 10268aa7e847SJens Axboe congestion_wait(BLK_RW_ASYNC, HZ / 10); 10278ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 10288ebc4232SFrederic Weisbecker } 10298ebc4232SFrederic Weisbecker 10301da177e4SLinus Torvalds return 0; 10311da177e4SLinus Torvalds } 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds /* 10341da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk. 10351da177e4SLinus Torvalds ** 10361da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 10371da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk 10381da177e4SLinus Torvalds ** 10391da177e4SLinus Torvalds */ 1040bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 1041bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 1042bd4c625cSLinus Torvalds { 10431da177e4SLinus Torvalds int i; 10443ee16670SJeff Mahoney b_blocknr_t bn; 10451da177e4SLinus Torvalds struct buffer_head *tbh = NULL; 1046600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 10471da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 10481da177e4SLinus Torvalds int barrier = 0; 10491da177e4SLinus Torvalds int retval = 0; 1050e0e851cfSChris Mason int write_len; 10511da177e4SLinus Torvalds 10521da177e4SLinus Torvalds reiserfs_check_lock_depth(s, "flush_commit_list"); 10531da177e4SLinus Torvalds 10541da177e4SLinus Torvalds if (atomic_read(&jl->j_older_commits_done)) { 10551da177e4SLinus Torvalds return 0; 10561da177e4SLinus Torvalds } 10571da177e4SLinus Torvalds 105822e2c507SJens Axboe get_fs_excl(); 105922e2c507SJens Axboe 10601da177e4SLinus Torvalds /* before we can put our commit blocks on disk, we have to make sure everyone older than 10611da177e4SLinus Torvalds ** us is on disk too 10621da177e4SLinus Torvalds */ 10631da177e4SLinus Torvalds BUG_ON(jl->j_len <= 0); 10641da177e4SLinus Torvalds BUG_ON(trans_id == journal->j_trans_id); 10651da177e4SLinus Torvalds 10661da177e4SLinus Torvalds get_journal_list(jl); 10671da177e4SLinus Torvalds if (flushall) { 10681da177e4SLinus Torvalds if (flush_older_commits(s, jl) == 1) { 10691da177e4SLinus Torvalds /* list disappeared during flush_older_commits. return */ 10701da177e4SLinus Torvalds goto put_jl; 10711da177e4SLinus Torvalds } 10721da177e4SLinus Torvalds } 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds /* make sure nobody is trying to flush this one at the same time */ 10758ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s); 10768ebc4232SFrederic Weisbecker 10771da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) { 107890415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10791da177e4SLinus Torvalds goto put_jl; 10801da177e4SLinus Torvalds } 10811da177e4SLinus Torvalds BUG_ON(jl->j_trans_id == 0); 10821da177e4SLinus Torvalds 10831da177e4SLinus Torvalds /* this commit is done, exit */ 10841da177e4SLinus Torvalds if (atomic_read(&(jl->j_commit_left)) <= 0) { 10851da177e4SLinus Torvalds if (flushall) { 10861da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 10871da177e4SLinus Torvalds } 108890415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10891da177e4SLinus Torvalds goto put_jl; 10901da177e4SLinus Torvalds } 10911da177e4SLinus Torvalds 10921da177e4SLinus Torvalds if (!list_empty(&jl->j_bh_list)) { 10933d4492f8SChris Mason int ret; 10948ebc4232SFrederic Weisbecker 10958ebc4232SFrederic Weisbecker /* 10968ebc4232SFrederic Weisbecker * We might sleep in numerous places inside 10978ebc4232SFrederic Weisbecker * write_ordered_buffers. Relax the write lock. 10988ebc4232SFrederic Weisbecker */ 10998ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 11003d4492f8SChris Mason ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 11011da177e4SLinus Torvalds journal, jl, &jl->j_bh_list); 11023d4492f8SChris Mason if (ret < 0 && retval == 0) 11033d4492f8SChris Mason retval = ret; 11048ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 11051da177e4SLinus Torvalds } 11061da177e4SLinus Torvalds BUG_ON(!list_empty(&jl->j_bh_list)); 11071da177e4SLinus Torvalds /* 11081da177e4SLinus Torvalds * for the description block and all the log blocks, submit any buffers 1109e0e851cfSChris Mason * that haven't already reached the disk. Try to write at least 256 1110e0e851cfSChris Mason * log blocks. later on, we will only wait on blocks that correspond 1111e0e851cfSChris Mason * to this transaction, but while we're unplugging we might as well 1112e0e851cfSChris Mason * get a chunk of data on there. 11131da177e4SLinus Torvalds */ 11141da177e4SLinus Torvalds atomic_inc(&journal->j_async_throttle); 1115e0e851cfSChris Mason write_len = jl->j_len + 1; 1116e0e851cfSChris Mason if (write_len < 256) 1117e0e851cfSChris Mason write_len = 256; 1118e0e851cfSChris Mason for (i = 0 ; i < write_len ; i++) { 11191da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % 11201da177e4SLinus Torvalds SB_ONDISK_JOURNAL_SIZE(s); 11211da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 1122e0e851cfSChris Mason if (tbh) { 11236e3647acSFrederic Weisbecker if (buffer_dirty(tbh)) { 11246e3647acSFrederic Weisbecker reiserfs_write_unlock(s); 1125e0e851cfSChris Mason ll_rw_block(WRITE, 1, &tbh); 11266e3647acSFrederic Weisbecker reiserfs_write_lock(s); 11276e3647acSFrederic Weisbecker } 11281da177e4SLinus Torvalds put_bh(tbh) ; 11291da177e4SLinus Torvalds } 1130e0e851cfSChris Mason } 11311da177e4SLinus Torvalds atomic_dec(&journal->j_async_throttle); 11321da177e4SLinus Torvalds 11335d5e8156SJeff Mahoney /* We're skipping the commit if there's an error */ 11345d5e8156SJeff Mahoney if (retval || reiserfs_is_journal_aborted(journal)) 11355d5e8156SJeff Mahoney barrier = 0; 11365d5e8156SJeff Mahoney 11371da177e4SLinus Torvalds /* wait on everything written so far before writing the commit 11381da177e4SLinus Torvalds * if we are in barrier mode, send the commit down now 11391da177e4SLinus Torvalds */ 11401da177e4SLinus Torvalds barrier = reiserfs_barrier_flush(s); 11411da177e4SLinus Torvalds if (barrier) { 11421da177e4SLinus Torvalds int ret; 11431da177e4SLinus Torvalds lock_buffer(jl->j_commit_bh); 11441da177e4SLinus Torvalds ret = submit_barrier_buffer(jl->j_commit_bh); 11451da177e4SLinus Torvalds if (ret == -EOPNOTSUPP) { 11461da177e4SLinus Torvalds set_buffer_uptodate(jl->j_commit_bh); 11471da177e4SLinus Torvalds disable_barrier(s); 11481da177e4SLinus Torvalds barrier = 0; 11491da177e4SLinus Torvalds } 11501da177e4SLinus Torvalds } 11511da177e4SLinus Torvalds for (i = 0; i < (jl->j_len + 1); i++) { 11521da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 11531da177e4SLinus Torvalds (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 11541da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 11558ebc4232SFrederic Weisbecker 11568ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 11571da177e4SLinus Torvalds wait_on_buffer(tbh); 11588ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 11591da177e4SLinus Torvalds // since we're using ll_rw_blk above, it might have skipped over 11601da177e4SLinus Torvalds // a locked buffer. Double check here 11611da177e4SLinus Torvalds // 11628ebc4232SFrederic Weisbecker /* redundant, sync_dirty_buffer() checks */ 11638ebc4232SFrederic Weisbecker if (buffer_dirty(tbh)) { 11648ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 11651da177e4SLinus Torvalds sync_dirty_buffer(tbh); 11668ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 11678ebc4232SFrederic Weisbecker } 11681da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(tbh))) { 11691da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 117045b03d5eSJeff Mahoney reiserfs_warning(s, "journal-601", 117145b03d5eSJeff Mahoney "buffer write failed"); 11721da177e4SLinus Torvalds #endif 11731da177e4SLinus Torvalds retval = -EIO; 11741da177e4SLinus Torvalds } 11751da177e4SLinus Torvalds put_bh(tbh); /* once for journal_find_get_block */ 11761da177e4SLinus Torvalds put_bh(tbh); /* once due to original getblk in do_journal_end */ 11771da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 11781da177e4SLinus Torvalds } 11791da177e4SLinus Torvalds 11801da177e4SLinus Torvalds BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 11811da177e4SLinus Torvalds 11821da177e4SLinus Torvalds if (!barrier) { 11835d5e8156SJeff Mahoney /* If there was a write error in the journal - we can't commit 11845d5e8156SJeff Mahoney * this transaction - it will be invalid and, if successful, 1185beb7dd86SRobert P. J. Day * will just end up propagating the write error out to 11865d5e8156SJeff Mahoney * the file system. */ 11875d5e8156SJeff Mahoney if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 11881da177e4SLinus Torvalds if (buffer_dirty(jl->j_commit_bh)) 11891da177e4SLinus Torvalds BUG(); 11901da177e4SLinus Torvalds mark_buffer_dirty(jl->j_commit_bh) ; 11918ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 11921da177e4SLinus Torvalds sync_dirty_buffer(jl->j_commit_bh) ; 11938ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 11945d5e8156SJeff Mahoney } 11958ebc4232SFrederic Weisbecker } else { 11968ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 11971da177e4SLinus Torvalds wait_on_buffer(jl->j_commit_bh); 11988ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 11998ebc4232SFrederic Weisbecker } 12001da177e4SLinus Torvalds 12011da177e4SLinus Torvalds check_barrier_completion(s, jl->j_commit_bh); 12021da177e4SLinus Torvalds 12031da177e4SLinus Torvalds /* If there was a write error in the journal - we can't commit this 12041da177e4SLinus Torvalds * transaction - it will be invalid and, if successful, will just end 1205beb7dd86SRobert P. J. Day * up propagating the write error out to the filesystem. */ 12061da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 12071da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 120845b03d5eSJeff Mahoney reiserfs_warning(s, "journal-615", "buffer write failed"); 12091da177e4SLinus Torvalds #endif 12101da177e4SLinus Torvalds retval = -EIO; 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds bforget(jl->j_commit_bh); 12131da177e4SLinus Torvalds if (journal->j_last_commit_id != 0 && 12141da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_commit_id) != 1) { 121545b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", 1216bd4c625cSLinus Torvalds journal->j_last_commit_id, jl->j_trans_id); 12171da177e4SLinus Torvalds } 12181da177e4SLinus Torvalds journal->j_last_commit_id = jl->j_trans_id; 12191da177e4SLinus Torvalds 12201da177e4SLinus Torvalds /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 12211da177e4SLinus Torvalds cleanup_freed_for_journal_list(s, jl); 12221da177e4SLinus Torvalds 12231da177e4SLinus Torvalds retval = retval ? retval : journal->j_errno; 12241da177e4SLinus Torvalds 12251da177e4SLinus Torvalds /* mark the metadata dirty */ 12261da177e4SLinus Torvalds if (!retval) 12271da177e4SLinus Torvalds dirty_one_transaction(s, jl); 12281da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 12291da177e4SLinus Torvalds 12301da177e4SLinus Torvalds if (flushall) { 12311da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 12321da177e4SLinus Torvalds } 123390415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 12341da177e4SLinus Torvalds put_jl: 12351da177e4SLinus Torvalds put_journal_list(s, jl); 12361da177e4SLinus Torvalds 12371da177e4SLinus Torvalds if (retval) 1238bd4c625cSLinus Torvalds reiserfs_abort(s, retval, "Journal write error in %s", 1239fbe5498bSHarvey Harrison __func__); 124022e2c507SJens Axboe put_fs_excl(); 12411da177e4SLinus Torvalds return retval; 12421da177e4SLinus Torvalds } 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds /* 12451da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 12461da177e4SLinus Torvalds ** returns NULL if it can't find anything 12471da177e4SLinus Torvalds */ 1248bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1249bd4c625cSLinus Torvalds reiserfs_journal_cnode 1250bd4c625cSLinus Torvalds *cn) 1251bd4c625cSLinus Torvalds { 12521da177e4SLinus Torvalds struct super_block *sb = cn->sb; 12531da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 12541da177e4SLinus Torvalds 12551da177e4SLinus Torvalds cn = cn->hprev; 12561da177e4SLinus Torvalds while (cn) { 12571da177e4SLinus Torvalds if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 12581da177e4SLinus Torvalds return cn->jlist; 12591da177e4SLinus Torvalds } 12601da177e4SLinus Torvalds cn = cn->hprev; 12611da177e4SLinus Torvalds } 12621da177e4SLinus Torvalds return NULL; 12631da177e4SLinus Torvalds } 12641da177e4SLinus Torvalds 1265a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn) 1266a3172027SChris Mason { 1267a3172027SChris Mason struct super_block *sb = cn->sb; 1268a3172027SChris Mason b_blocknr_t blocknr = cn->blocknr; 1269a3172027SChris Mason 1270a3172027SChris Mason cn = cn->hprev; 1271a3172027SChris Mason while (cn) { 1272a3172027SChris Mason if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && 1273a3172027SChris Mason atomic_read(&cn->jlist->j_commit_left) != 0) 1274a3172027SChris Mason return 0; 1275a3172027SChris Mason cn = cn->hprev; 1276a3172027SChris Mason } 1277a3172027SChris Mason return 1; 1278a3172027SChris Mason } 1279a3172027SChris Mason 1280bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *, 1281bd4c625cSLinus Torvalds struct reiserfs_journal_cnode **, 1282bd4c625cSLinus Torvalds struct reiserfs_journal_list *, unsigned long, 1283bd4c625cSLinus Torvalds int); 12841da177e4SLinus Torvalds 12851da177e4SLinus Torvalds /* 12861da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the 12871da177e4SLinus Torvalds ** journal list for this transaction. Aside from freeing the cnode, this also allows the 12881da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted. 12891da177e4SLinus Torvalds */ 1290a9dd3643SJeff Mahoney static void remove_all_from_journal_list(struct super_block *sb, 1291bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, 1292bd4c625cSLinus Torvalds int debug) 1293bd4c625cSLinus Torvalds { 1294a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 12951da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 12961da177e4SLinus Torvalds cn = jl->j_realblock; 12971da177e4SLinus Torvalds 12981da177e4SLinus Torvalds /* which is better, to lock once around the whole loop, or 12991da177e4SLinus Torvalds ** to lock for each call to remove_journal_hash? 13001da177e4SLinus Torvalds */ 13011da177e4SLinus Torvalds while (cn) { 13021da177e4SLinus Torvalds if (cn->blocknr != 0) { 13031da177e4SLinus Torvalds if (debug) { 1304a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2201", 1305bd4c625cSLinus Torvalds "block %u, bh is %d, state %ld", 1306bd4c625cSLinus Torvalds cn->blocknr, cn->bh ? 1 : 0, 1307bd4c625cSLinus Torvalds cn->state); 13081da177e4SLinus Torvalds } 13091da177e4SLinus Torvalds cn->state = 0; 1310a9dd3643SJeff Mahoney remove_journal_hash(sb, journal->j_list_hash_table, 1311bd4c625cSLinus Torvalds jl, cn->blocknr, 1); 13121da177e4SLinus Torvalds } 13131da177e4SLinus Torvalds last = cn; 13141da177e4SLinus Torvalds cn = cn->next; 1315a9dd3643SJeff Mahoney free_cnode(sb, last); 13161da177e4SLinus Torvalds } 13171da177e4SLinus Torvalds jl->j_realblock = NULL; 13181da177e4SLinus Torvalds } 13191da177e4SLinus Torvalds 13201da177e4SLinus Torvalds /* 13211da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 13221da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 13231da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks. 13241da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list 13251da177e4SLinus Torvalds ** 13261da177e4SLinus Torvalds */ 1327a9dd3643SJeff Mahoney static int _update_journal_header_block(struct super_block *sb, 1328bd4c625cSLinus Torvalds unsigned long offset, 1329600ed416SJeff Mahoney unsigned int trans_id) 1330bd4c625cSLinus Torvalds { 13311da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 1332a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 13331da177e4SLinus Torvalds 13341da177e4SLinus Torvalds if (reiserfs_is_journal_aborted(journal)) 13351da177e4SLinus Torvalds return -EIO; 13361da177e4SLinus Torvalds 13371da177e4SLinus Torvalds if (trans_id >= journal->j_last_flush_trans_id) { 13381da177e4SLinus Torvalds if (buffer_locked((journal->j_header_bh))) { 13398ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 13401da177e4SLinus Torvalds wait_on_buffer((journal->j_header_bh)); 13418ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 13421da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 13431da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 1344a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-699", 134545b03d5eSJeff Mahoney "buffer write failed"); 13461da177e4SLinus Torvalds #endif 13471da177e4SLinus Torvalds return -EIO; 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds } 13501da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 13511da177e4SLinus Torvalds journal->j_first_unflushed_offset = offset; 1352bd4c625cSLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> 1353bd4c625cSLinus Torvalds b_data); 13541da177e4SLinus Torvalds jh->j_last_flush_trans_id = cpu_to_le32(trans_id); 13551da177e4SLinus Torvalds jh->j_first_unflushed_offset = cpu_to_le32(offset); 13561da177e4SLinus Torvalds jh->j_mount_id = cpu_to_le32(journal->j_mount_id); 13571da177e4SLinus Torvalds 1358a9dd3643SJeff Mahoney if (reiserfs_barrier_flush(sb)) { 13591da177e4SLinus Torvalds int ret; 13601da177e4SLinus Torvalds lock_buffer(journal->j_header_bh); 13611da177e4SLinus Torvalds ret = submit_barrier_buffer(journal->j_header_bh); 13621da177e4SLinus Torvalds if (ret == -EOPNOTSUPP) { 13631da177e4SLinus Torvalds set_buffer_uptodate(journal->j_header_bh); 1364a9dd3643SJeff Mahoney disable_barrier(sb); 13651da177e4SLinus Torvalds goto sync; 13661da177e4SLinus Torvalds } 13678ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 13681da177e4SLinus Torvalds wait_on_buffer(journal->j_header_bh); 13698ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 1370a9dd3643SJeff Mahoney check_barrier_completion(sb, journal->j_header_bh); 13711da177e4SLinus Torvalds } else { 13721da177e4SLinus Torvalds sync: 13731da177e4SLinus Torvalds set_buffer_dirty(journal->j_header_bh); 13748ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 13751da177e4SLinus Torvalds sync_dirty_buffer(journal->j_header_bh); 13768ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 13771da177e4SLinus Torvalds } 13781da177e4SLinus Torvalds if (!buffer_uptodate(journal->j_header_bh)) { 1379a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-837", 138045b03d5eSJeff Mahoney "IO error during journal replay"); 13811da177e4SLinus Torvalds return -EIO; 13821da177e4SLinus Torvalds } 13831da177e4SLinus Torvalds } 13841da177e4SLinus Torvalds return 0; 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds 1387a9dd3643SJeff Mahoney static int update_journal_header_block(struct super_block *sb, 13881da177e4SLinus Torvalds unsigned long offset, 1389600ed416SJeff Mahoney unsigned int trans_id) 1390bd4c625cSLinus Torvalds { 1391a9dd3643SJeff Mahoney return _update_journal_header_block(sb, offset, trans_id); 13921da177e4SLinus Torvalds } 1393bd4c625cSLinus Torvalds 13941da177e4SLinus Torvalds /* 13951da177e4SLinus Torvalds ** flush any and all journal lists older than you are 13961da177e4SLinus Torvalds ** can only be called from flush_journal_list 13971da177e4SLinus Torvalds */ 1398a9dd3643SJeff Mahoney static int flush_older_journal_lists(struct super_block *sb, 13991da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 14001da177e4SLinus Torvalds { 14011da177e4SLinus Torvalds struct list_head *entry; 14021da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 1403a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1404600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 14051da177e4SLinus Torvalds 14061da177e4SLinus Torvalds /* we know we are the only ones flushing things, no extra race 14071da177e4SLinus Torvalds * protection is required. 14081da177e4SLinus Torvalds */ 14091da177e4SLinus Torvalds restart: 14101da177e4SLinus Torvalds entry = journal->j_journal_list.next; 14111da177e4SLinus Torvalds /* Did we wrap? */ 14121da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 14131da177e4SLinus Torvalds return 0; 14141da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 14151da177e4SLinus Torvalds if (other_jl->j_trans_id < trans_id) { 14161da177e4SLinus Torvalds BUG_ON(other_jl->j_refcount <= 0); 14171da177e4SLinus Torvalds /* do not flush all */ 1418a9dd3643SJeff Mahoney flush_journal_list(sb, other_jl, 0); 14191da177e4SLinus Torvalds 14201da177e4SLinus Torvalds /* other_jl is now deleted from the list */ 14211da177e4SLinus Torvalds goto restart; 14221da177e4SLinus Torvalds } 14231da177e4SLinus Torvalds return 0; 14241da177e4SLinus Torvalds } 14251da177e4SLinus Torvalds 14261da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s, 1427bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1428bd4c625cSLinus Torvalds { 14291da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 14301da177e4SLinus Torvalds if (!list_empty(&jl->j_working_list)) { 14311da177e4SLinus Torvalds list_del_init(&jl->j_working_list); 14321da177e4SLinus Torvalds journal->j_num_work_lists--; 14331da177e4SLinus Torvalds } 14341da177e4SLinus Torvalds } 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks 14371da177e4SLinus Torvalds ** 14381da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside 14391da177e4SLinus Torvalds ** flush_journal_list 14401da177e4SLinus Torvalds ** 14411da177e4SLinus Torvalds ** IMPORTANT. This can only be called while there are no journal writers, 14421da177e4SLinus Torvalds ** and the journal is locked. That means it can only be called from 14431da177e4SLinus Torvalds ** do_journal_end, or by journal_release 14441da177e4SLinus Torvalds */ 14451da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s, 1446bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 1447bd4c625cSLinus Torvalds { 14481da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 14491da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 14501da177e4SLinus Torvalds int count; 14511da177e4SLinus Torvalds int was_jwait = 0; 14521da177e4SLinus Torvalds int was_dirty = 0; 14531da177e4SLinus Torvalds struct buffer_head *saved_bh; 14541da177e4SLinus Torvalds unsigned long j_len_saved = jl->j_len; 14551da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 14561da177e4SLinus Torvalds int err = 0; 14571da177e4SLinus Torvalds 14581da177e4SLinus Torvalds BUG_ON(j_len_saved <= 0); 14591da177e4SLinus Torvalds 14601da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) != 0) { 146145b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2048", "called with wcount %d", 14621da177e4SLinus Torvalds atomic_read(&journal->j_wcount)); 14631da177e4SLinus Torvalds } 14641da177e4SLinus Torvalds BUG_ON(jl->j_trans_id == 0); 14651da177e4SLinus Torvalds 14661da177e4SLinus Torvalds /* if flushall == 0, the lock is already held */ 14671da177e4SLinus Torvalds if (flushall) { 14688ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); 1469afe70259SJeff Mahoney } else if (mutex_trylock(&journal->j_flush_mutex)) { 14701da177e4SLinus Torvalds BUG(); 14711da177e4SLinus Torvalds } 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds count = 0; 14741da177e4SLinus Torvalds if (j_len_saved > journal->j_trans_max) { 1475c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu", 1476bd4c625cSLinus Torvalds j_len_saved, jl->j_trans_id); 14771da177e4SLinus Torvalds return 0; 14781da177e4SLinus Torvalds } 14791da177e4SLinus Torvalds 148022e2c507SJens Axboe get_fs_excl(); 148122e2c507SJens Axboe 14821da177e4SLinus Torvalds /* if all the work is already done, get out of here */ 14831da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 14841da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 14851da177e4SLinus Torvalds goto flush_older_and_return; 14861da177e4SLinus Torvalds } 14871da177e4SLinus Torvalds 14881da177e4SLinus Torvalds /* start by putting the commit list on disk. This will also flush 14891da177e4SLinus Torvalds ** the commit lists of any olders transactions 14901da177e4SLinus Torvalds */ 14911da177e4SLinus Torvalds flush_commit_list(s, jl, 1); 14921da177e4SLinus Torvalds 1493bd4c625cSLinus Torvalds if (!(jl->j_state & LIST_DIRTY) 1494bd4c625cSLinus Torvalds && !reiserfs_is_journal_aborted(journal)) 14951da177e4SLinus Torvalds BUG(); 14961da177e4SLinus Torvalds 14971da177e4SLinus Torvalds /* are we done now? */ 14981da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 14991da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 15001da177e4SLinus Torvalds goto flush_older_and_return; 15011da177e4SLinus Torvalds } 15021da177e4SLinus Torvalds 15031da177e4SLinus Torvalds /* loop through each cnode, see if we need to write it, 15041da177e4SLinus Torvalds ** or wait on a more recent transaction, or just ignore it 15051da177e4SLinus Torvalds */ 15061da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) != 0) { 1507c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-844", "journal list is flushing, " 1508c3a9c210SJeff Mahoney "wcount is not 0"); 15091da177e4SLinus Torvalds } 15101da177e4SLinus Torvalds cn = jl->j_realblock; 15111da177e4SLinus Torvalds while (cn) { 15121da177e4SLinus Torvalds was_jwait = 0; 15131da177e4SLinus Torvalds was_dirty = 0; 15141da177e4SLinus Torvalds saved_bh = NULL; 15151da177e4SLinus Torvalds /* blocknr of 0 is no longer in the hash, ignore it */ 15161da177e4SLinus Torvalds if (cn->blocknr == 0) { 15171da177e4SLinus Torvalds goto free_cnode; 15181da177e4SLinus Torvalds } 15191da177e4SLinus Torvalds 15201da177e4SLinus Torvalds /* This transaction failed commit. Don't write out to the disk */ 15211da177e4SLinus Torvalds if (!(jl->j_state & LIST_DIRTY)) 15221da177e4SLinus Torvalds goto free_cnode; 15231da177e4SLinus Torvalds 15241da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 15251da177e4SLinus Torvalds /* the order is important here. We check pjl to make sure we 15261da177e4SLinus Torvalds ** don't clear BH_JDirty_wait if we aren't the one writing this 15271da177e4SLinus Torvalds ** block to disk 15281da177e4SLinus Torvalds */ 15291da177e4SLinus Torvalds if (!pjl && cn->bh) { 15301da177e4SLinus Torvalds saved_bh = cn->bh; 15311da177e4SLinus Torvalds 15321da177e4SLinus Torvalds /* we do this to make sure nobody releases the buffer while 15331da177e4SLinus Torvalds ** we are working with it 15341da177e4SLinus Torvalds */ 15351da177e4SLinus Torvalds get_bh(saved_bh); 15361da177e4SLinus Torvalds 15371da177e4SLinus Torvalds if (buffer_journal_dirty(saved_bh)) { 15381da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 15391da177e4SLinus Torvalds was_jwait = 1; 15401da177e4SLinus Torvalds was_dirty = 1; 15411da177e4SLinus Torvalds } else if (can_dirty(cn)) { 15421da177e4SLinus Torvalds /* everything with !pjl && jwait should be writable */ 15431da177e4SLinus Torvalds BUG(); 15441da177e4SLinus Torvalds } 15451da177e4SLinus Torvalds } 15461da177e4SLinus Torvalds 15471da177e4SLinus Torvalds /* if someone has this block in a newer transaction, just make 15480779bf2dSMatt LaPlante ** sure they are committed, and don't try writing it to disk 15491da177e4SLinus Torvalds */ 15501da177e4SLinus Torvalds if (pjl) { 15511da177e4SLinus Torvalds if (atomic_read(&pjl->j_commit_left)) 15521da177e4SLinus Torvalds flush_commit_list(s, pjl, 1); 15531da177e4SLinus Torvalds goto free_cnode; 15541da177e4SLinus Torvalds } 15551da177e4SLinus Torvalds 15561da177e4SLinus Torvalds /* bh == NULL when the block got to disk on its own, OR, 15571da177e4SLinus Torvalds ** the block got freed in a future transaction 15581da177e4SLinus Torvalds */ 15591da177e4SLinus Torvalds if (saved_bh == NULL) { 15601da177e4SLinus Torvalds goto free_cnode; 15611da177e4SLinus Torvalds } 15621da177e4SLinus Torvalds 15631da177e4SLinus Torvalds /* this should never happen. kupdate_one_transaction has this list 15641da177e4SLinus Torvalds ** locked while it works, so we should never see a buffer here that 15651da177e4SLinus Torvalds ** is not marked JDirty_wait 15661da177e4SLinus Torvalds */ 15671da177e4SLinus Torvalds if ((!was_jwait) && !buffer_locked(saved_bh)) { 156845b03d5eSJeff Mahoney reiserfs_warning(s, "journal-813", 156945b03d5eSJeff Mahoney "BAD! buffer %llu %cdirty %cjwait, " 15701da177e4SLinus Torvalds "not in a newer tranasction", 1571bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1572bd4c625cSLinus Torvalds b_blocknr, was_dirty ? ' ' : '!', 1573bd4c625cSLinus Torvalds was_jwait ? ' ' : '!'); 15741da177e4SLinus Torvalds } 15751da177e4SLinus Torvalds if (was_dirty) { 15761da177e4SLinus Torvalds /* we inc again because saved_bh gets decremented at free_cnode */ 15771da177e4SLinus Torvalds get_bh(saved_bh); 15781da177e4SLinus Torvalds set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 15791da177e4SLinus Torvalds lock_buffer(saved_bh); 15801da177e4SLinus Torvalds BUG_ON(cn->blocknr != saved_bh->b_blocknr); 15811da177e4SLinus Torvalds if (buffer_dirty(saved_bh)) 15821da177e4SLinus Torvalds submit_logged_buffer(saved_bh); 15831da177e4SLinus Torvalds else 15841da177e4SLinus Torvalds unlock_buffer(saved_bh); 15851da177e4SLinus Torvalds count++; 15861da177e4SLinus Torvalds } else { 158745b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2082", 158845b03d5eSJeff Mahoney "Unable to flush buffer %llu in %s", 1589bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1590fbe5498bSHarvey Harrison b_blocknr, __func__); 15911da177e4SLinus Torvalds } 15921da177e4SLinus Torvalds free_cnode: 15931da177e4SLinus Torvalds last = cn; 15941da177e4SLinus Torvalds cn = cn->next; 15951da177e4SLinus Torvalds if (saved_bh) { 15961da177e4SLinus Torvalds /* we incremented this to keep others from taking the buffer head away */ 15971da177e4SLinus Torvalds put_bh(saved_bh); 15981da177e4SLinus Torvalds if (atomic_read(&(saved_bh->b_count)) < 0) { 159945b03d5eSJeff Mahoney reiserfs_warning(s, "journal-945", 160045b03d5eSJeff Mahoney "saved_bh->b_count < 0"); 16011da177e4SLinus Torvalds } 16021da177e4SLinus Torvalds } 16031da177e4SLinus Torvalds } 16041da177e4SLinus Torvalds if (count > 0) { 16051da177e4SLinus Torvalds cn = jl->j_realblock; 16061da177e4SLinus Torvalds while (cn) { 16071da177e4SLinus Torvalds if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 16081da177e4SLinus Torvalds if (!cn->bh) { 1609c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-1011", 1610c3a9c210SJeff Mahoney "cn->bh is NULL"); 16111da177e4SLinus Torvalds } 16128ebc4232SFrederic Weisbecker 16138ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 16141da177e4SLinus Torvalds wait_on_buffer(cn->bh); 16158ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 16168ebc4232SFrederic Weisbecker 16171da177e4SLinus Torvalds if (!cn->bh) { 1618c3a9c210SJeff Mahoney reiserfs_panic(s, "journal-1012", 1619c3a9c210SJeff Mahoney "cn->bh is NULL"); 16201da177e4SLinus Torvalds } 16211da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(cn->bh))) { 16221da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 162345b03d5eSJeff Mahoney reiserfs_warning(s, "journal-949", 162445b03d5eSJeff Mahoney "buffer write failed"); 16251da177e4SLinus Torvalds #endif 16261da177e4SLinus Torvalds err = -EIO; 16271da177e4SLinus Torvalds } 16281da177e4SLinus Torvalds /* note, we must clear the JDirty_wait bit after the up to date 16291da177e4SLinus Torvalds ** check, otherwise we race against our flushpage routine 16301da177e4SLinus Torvalds */ 1631bd4c625cSLinus Torvalds BUG_ON(!test_clear_buffer_journal_dirty 1632bd4c625cSLinus Torvalds (cn->bh)); 16331da177e4SLinus Torvalds 1634398c95bdSChris Mason /* drop one ref for us */ 16351da177e4SLinus Torvalds put_bh(cn->bh); 1636398c95bdSChris Mason /* drop one ref for journal_mark_dirty */ 1637398c95bdSChris Mason release_buffer_page(cn->bh); 16381da177e4SLinus Torvalds } 16391da177e4SLinus Torvalds cn = cn->next; 16401da177e4SLinus Torvalds } 16411da177e4SLinus Torvalds } 16421da177e4SLinus Torvalds 16431da177e4SLinus Torvalds if (err) 1644bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1645bd4c625cSLinus Torvalds "Write error while pushing transaction to disk in %s", 1646fbe5498bSHarvey Harrison __func__); 16471da177e4SLinus Torvalds flush_older_and_return: 16481da177e4SLinus Torvalds 16491da177e4SLinus Torvalds /* before we can update the journal header block, we _must_ flush all 16501da177e4SLinus Torvalds ** real blocks from all older transactions to disk. This is because 16511da177e4SLinus Torvalds ** once the header block is updated, this transaction will not be 16521da177e4SLinus Torvalds ** replayed after a crash 16531da177e4SLinus Torvalds */ 16541da177e4SLinus Torvalds if (flushall) { 16551da177e4SLinus Torvalds flush_older_journal_lists(s, jl); 16561da177e4SLinus Torvalds } 16571da177e4SLinus Torvalds 16581da177e4SLinus Torvalds err = journal->j_errno; 16591da177e4SLinus Torvalds /* before we can remove everything from the hash tables for this 16601da177e4SLinus Torvalds ** transaction, we must make sure it can never be replayed 16611da177e4SLinus Torvalds ** 16621da177e4SLinus Torvalds ** since we are only called from do_journal_end, we know for sure there 16631da177e4SLinus Torvalds ** are no allocations going on while we are flushing journal lists. So, 16641da177e4SLinus Torvalds ** we only need to update the journal header block for the last list 16651da177e4SLinus Torvalds ** being flushed 16661da177e4SLinus Torvalds */ 16671da177e4SLinus Torvalds if (!err && flushall) { 1668bd4c625cSLinus Torvalds err = 1669bd4c625cSLinus Torvalds update_journal_header_block(s, 1670bd4c625cSLinus Torvalds (jl->j_start + jl->j_len + 1671bd4c625cSLinus Torvalds 2) % SB_ONDISK_JOURNAL_SIZE(s), 1672bd4c625cSLinus Torvalds jl->j_trans_id); 16731da177e4SLinus Torvalds if (err) 1674bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1675bd4c625cSLinus Torvalds "Write error while updating journal header in %s", 1676fbe5498bSHarvey Harrison __func__); 16771da177e4SLinus Torvalds } 16781da177e4SLinus Torvalds remove_all_from_journal_list(s, jl, 0); 16791da177e4SLinus Torvalds list_del_init(&jl->j_list); 16801da177e4SLinus Torvalds journal->j_num_lists--; 16811da177e4SLinus Torvalds del_from_work_list(s, jl); 16821da177e4SLinus Torvalds 16831da177e4SLinus Torvalds if (journal->j_last_flush_id != 0 && 16841da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_flush_id) != 1) { 168545b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", 1686bd4c625cSLinus Torvalds journal->j_last_flush_id, jl->j_trans_id); 16871da177e4SLinus Torvalds } 16881da177e4SLinus Torvalds journal->j_last_flush_id = jl->j_trans_id; 16891da177e4SLinus Torvalds 16901da177e4SLinus Torvalds /* not strictly required since we are freeing the list, but it should 16911da177e4SLinus Torvalds * help find code using dead lists later on 16921da177e4SLinus Torvalds */ 16931da177e4SLinus Torvalds jl->j_len = 0; 16941da177e4SLinus Torvalds atomic_set(&(jl->j_nonzerolen), 0); 16951da177e4SLinus Torvalds jl->j_start = 0; 16961da177e4SLinus Torvalds jl->j_realblock = NULL; 16971da177e4SLinus Torvalds jl->j_commit_bh = NULL; 16981da177e4SLinus Torvalds jl->j_trans_id = 0; 16991da177e4SLinus Torvalds jl->j_state = 0; 17001da177e4SLinus Torvalds put_journal_list(s, jl); 17011da177e4SLinus Torvalds if (flushall) 1702afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 170322e2c507SJens Axboe put_fs_excl(); 17041da177e4SLinus Torvalds return err; 17051da177e4SLinus Torvalds } 17061da177e4SLinus Torvalds 1707a3172027SChris Mason static int test_transaction(struct super_block *s, 1708a3172027SChris Mason struct reiserfs_journal_list *jl) 1709a3172027SChris Mason { 1710a3172027SChris Mason struct reiserfs_journal_cnode *cn; 1711a3172027SChris Mason 1712a3172027SChris Mason if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) 1713a3172027SChris Mason return 1; 1714a3172027SChris Mason 1715a3172027SChris Mason cn = jl->j_realblock; 1716a3172027SChris Mason while (cn) { 1717a3172027SChris Mason /* if the blocknr == 0, this has been cleared from the hash, 1718a3172027SChris Mason ** skip it 1719a3172027SChris Mason */ 1720a3172027SChris Mason if (cn->blocknr == 0) { 1721a3172027SChris Mason goto next; 1722a3172027SChris Mason } 1723a3172027SChris Mason if (cn->bh && !newer_jl_done(cn)) 1724a3172027SChris Mason return 0; 1725a3172027SChris Mason next: 1726a3172027SChris Mason cn = cn->next; 1727a3172027SChris Mason cond_resched(); 1728a3172027SChris Mason } 1729a3172027SChris Mason return 0; 1730a3172027SChris Mason } 1731a3172027SChris Mason 17321da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s, 17331da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 17341da177e4SLinus Torvalds struct buffer_chunk *chunk) 17351da177e4SLinus Torvalds { 17361da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 17371da177e4SLinus Torvalds int ret = 0; 17381da177e4SLinus Torvalds 17391da177e4SLinus Torvalds jl->j_state |= LIST_TOUCHED; 17401da177e4SLinus Torvalds del_from_work_list(s, jl); 17411da177e4SLinus Torvalds if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 17421da177e4SLinus Torvalds return 0; 17431da177e4SLinus Torvalds } 17441da177e4SLinus Torvalds 17451da177e4SLinus Torvalds cn = jl->j_realblock; 17461da177e4SLinus Torvalds while (cn) { 17471da177e4SLinus Torvalds /* if the blocknr == 0, this has been cleared from the hash, 17481da177e4SLinus Torvalds ** skip it 17491da177e4SLinus Torvalds */ 17501da177e4SLinus Torvalds if (cn->blocknr == 0) { 17511da177e4SLinus Torvalds goto next; 17521da177e4SLinus Torvalds } 17531da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 17541da177e4SLinus Torvalds struct buffer_head *tmp_bh; 17551da177e4SLinus Torvalds /* we can race against journal_mark_freed when we try 17561da177e4SLinus Torvalds * to lock_buffer(cn->bh), so we have to inc the buffer 17571da177e4SLinus Torvalds * count, and recheck things after locking 17581da177e4SLinus Torvalds */ 17591da177e4SLinus Torvalds tmp_bh = cn->bh; 17601da177e4SLinus Torvalds get_bh(tmp_bh); 17611da177e4SLinus Torvalds lock_buffer(tmp_bh); 17621da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 17631da177e4SLinus Torvalds if (!buffer_journal_dirty(tmp_bh) || 17641da177e4SLinus Torvalds buffer_journal_prepared(tmp_bh)) 17651da177e4SLinus Torvalds BUG(); 17661da177e4SLinus Torvalds add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 17671da177e4SLinus Torvalds ret++; 17681da177e4SLinus Torvalds } else { 17691da177e4SLinus Torvalds /* note, cn->bh might be null now */ 17701da177e4SLinus Torvalds unlock_buffer(tmp_bh); 17711da177e4SLinus Torvalds } 17721da177e4SLinus Torvalds put_bh(tmp_bh); 17731da177e4SLinus Torvalds } 17741da177e4SLinus Torvalds next: 17751da177e4SLinus Torvalds cn = cn->next; 17761da177e4SLinus Torvalds cond_resched(); 17771da177e4SLinus Torvalds } 17781da177e4SLinus Torvalds return ret; 17791da177e4SLinus Torvalds } 17801da177e4SLinus Torvalds 17811da177e4SLinus Torvalds /* used by flush_commit_list */ 17821da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 17831da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 17841da177e4SLinus Torvalds { 17851da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 17861da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 17871da177e4SLinus Torvalds int ret = 0; 17881da177e4SLinus Torvalds 17891da177e4SLinus Torvalds jl->j_state |= LIST_DIRTY; 17901da177e4SLinus Torvalds cn = jl->j_realblock; 17911da177e4SLinus Torvalds while (cn) { 17921da177e4SLinus Torvalds /* look for a more recent transaction that logged this 17931da177e4SLinus Torvalds ** buffer. Only the most recent transaction with a buffer in 17941da177e4SLinus Torvalds ** it is allowed to send that buffer to disk 17951da177e4SLinus Torvalds */ 17961da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 1797bd4c625cSLinus Torvalds if (!pjl && cn->blocknr && cn->bh 1798bd4c625cSLinus Torvalds && buffer_journal_dirty(cn->bh)) { 17991da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 18001da177e4SLinus Torvalds /* if the buffer is prepared, it will either be logged 18011da177e4SLinus Torvalds * or restored. If restored, we need to make sure 18021da177e4SLinus Torvalds * it actually gets marked dirty 18031da177e4SLinus Torvalds */ 18041da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 18051da177e4SLinus Torvalds if (buffer_journal_prepared(cn->bh)) { 18061da177e4SLinus Torvalds set_buffer_journal_restore_dirty(cn->bh); 18071da177e4SLinus Torvalds } else { 18081da177e4SLinus Torvalds set_buffer_journal_test(cn->bh); 18091da177e4SLinus Torvalds mark_buffer_dirty(cn->bh); 18101da177e4SLinus Torvalds } 18111da177e4SLinus Torvalds } 18121da177e4SLinus Torvalds cn = cn->next; 18131da177e4SLinus Torvalds } 18141da177e4SLinus Torvalds return ret; 18151da177e4SLinus Torvalds } 18161da177e4SLinus Torvalds 18171da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s, 18181da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 18191da177e4SLinus Torvalds struct reiserfs_journal_list **next_jl, 1820600ed416SJeff Mahoney unsigned int *next_trans_id, 1821bd4c625cSLinus Torvalds int num_blocks, int num_trans) 1822bd4c625cSLinus Torvalds { 18231da177e4SLinus Torvalds int ret = 0; 18241da177e4SLinus Torvalds int written = 0; 18251da177e4SLinus Torvalds int transactions_flushed = 0; 1826600ed416SJeff Mahoney unsigned int orig_trans_id = jl->j_trans_id; 18271da177e4SLinus Torvalds struct buffer_chunk chunk; 18281da177e4SLinus Torvalds struct list_head *entry; 18291da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 18301da177e4SLinus Torvalds chunk.nr = 0; 18311da177e4SLinus Torvalds 1832a412f9efSFrederic Weisbecker reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s); 18331da177e4SLinus Torvalds if (!journal_list_still_alive(s, orig_trans_id)) { 18341da177e4SLinus Torvalds goto done; 18351da177e4SLinus Torvalds } 18361da177e4SLinus Torvalds 1837afe70259SJeff Mahoney /* we've got j_flush_mutex held, nobody is going to delete any 18381da177e4SLinus Torvalds * of these lists out from underneath us 18391da177e4SLinus Torvalds */ 18401da177e4SLinus Torvalds while ((num_trans && transactions_flushed < num_trans) || 18411da177e4SLinus Torvalds (!num_trans && written < num_blocks)) { 18421da177e4SLinus Torvalds 18431da177e4SLinus Torvalds if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1844bd4c625cSLinus Torvalds atomic_read(&jl->j_commit_left) 1845bd4c625cSLinus Torvalds || !(jl->j_state & LIST_DIRTY)) { 18461da177e4SLinus Torvalds del_from_work_list(s, jl); 18471da177e4SLinus Torvalds break; 18481da177e4SLinus Torvalds } 18491da177e4SLinus Torvalds ret = write_one_transaction(s, jl, &chunk); 18501da177e4SLinus Torvalds 18511da177e4SLinus Torvalds if (ret < 0) 18521da177e4SLinus Torvalds goto done; 18531da177e4SLinus Torvalds transactions_flushed++; 18541da177e4SLinus Torvalds written += ret; 18551da177e4SLinus Torvalds entry = jl->j_list.next; 18561da177e4SLinus Torvalds 18571da177e4SLinus Torvalds /* did we wrap? */ 18581da177e4SLinus Torvalds if (entry == &journal->j_journal_list) { 18591da177e4SLinus Torvalds break; 18601da177e4SLinus Torvalds } 18611da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 18621da177e4SLinus Torvalds 18631da177e4SLinus Torvalds /* don't bother with older transactions */ 18641da177e4SLinus Torvalds if (jl->j_trans_id <= orig_trans_id) 18651da177e4SLinus Torvalds break; 18661da177e4SLinus Torvalds } 18671da177e4SLinus Torvalds if (chunk.nr) { 18681da177e4SLinus Torvalds write_chunk(&chunk); 18691da177e4SLinus Torvalds } 18701da177e4SLinus Torvalds 18711da177e4SLinus Torvalds done: 1872afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 18731da177e4SLinus Torvalds return ret; 18741da177e4SLinus Torvalds } 18751da177e4SLinus Torvalds 18761da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use 18771da177e4SLinus Torvalds ** all the journa list slots with tiny transactions. These 18781da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which 18791da177e4SLinus Torvalds ** adds seeks and slows things down. 18801da177e4SLinus Torvalds ** 18811da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists 18821da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal 18831da177e4SLinus Torvalds ** list updates the header block 18841da177e4SLinus Torvalds */ 18851da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s, 1886bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1887bd4c625cSLinus Torvalds { 18881da177e4SLinus Torvalds unsigned long len = 0; 18891da177e4SLinus Torvalds unsigned long cur_len; 18901da177e4SLinus Torvalds int ret; 18911da177e4SLinus Torvalds int i; 18921da177e4SLinus Torvalds int limit = 256; 18931da177e4SLinus Torvalds struct reiserfs_journal_list *tjl; 18941da177e4SLinus Torvalds struct reiserfs_journal_list *flush_jl; 1895600ed416SJeff Mahoney unsigned int trans_id; 18961da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 18971da177e4SLinus Torvalds 18981da177e4SLinus Torvalds flush_jl = tjl = jl; 18991da177e4SLinus Torvalds 19001da177e4SLinus Torvalds /* in data logging mode, try harder to flush a lot of blocks */ 19011da177e4SLinus Torvalds if (reiserfs_data_log(s)) 19021da177e4SLinus Torvalds limit = 1024; 19031da177e4SLinus Torvalds /* flush for 256 transactions or limit blocks, whichever comes first */ 19041da177e4SLinus Torvalds for (i = 0; i < 256 && len < limit; i++) { 19051da177e4SLinus Torvalds if (atomic_read(&tjl->j_commit_left) || 19061da177e4SLinus Torvalds tjl->j_trans_id < jl->j_trans_id) { 19071da177e4SLinus Torvalds break; 19081da177e4SLinus Torvalds } 19091da177e4SLinus Torvalds cur_len = atomic_read(&tjl->j_nonzerolen); 19101da177e4SLinus Torvalds if (cur_len > 0) { 19111da177e4SLinus Torvalds tjl->j_state &= ~LIST_TOUCHED; 19121da177e4SLinus Torvalds } 19131da177e4SLinus Torvalds len += cur_len; 19141da177e4SLinus Torvalds flush_jl = tjl; 19151da177e4SLinus Torvalds if (tjl->j_list.next == &journal->j_journal_list) 19161da177e4SLinus Torvalds break; 19171da177e4SLinus Torvalds tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 19181da177e4SLinus Torvalds } 19191da177e4SLinus Torvalds /* try to find a group of blocks we can flush across all the 19201da177e4SLinus Torvalds ** transactions, but only bother if we've actually spanned 19211da177e4SLinus Torvalds ** across multiple lists 19221da177e4SLinus Torvalds */ 19231da177e4SLinus Torvalds if (flush_jl != jl) { 19241da177e4SLinus Torvalds ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 19251da177e4SLinus Torvalds } 19261da177e4SLinus Torvalds flush_journal_list(s, flush_jl, 1); 19271da177e4SLinus Torvalds return 0; 19281da177e4SLinus Torvalds } 19291da177e4SLinus Torvalds 19301da177e4SLinus Torvalds /* 19311da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh. 19321da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers. 19331da177e4SLinus Torvalds */ 19341da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb, 19351da177e4SLinus Torvalds struct reiserfs_journal_cnode **table, 19361da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 19371da177e4SLinus Torvalds unsigned long block, int remove_freed) 19381da177e4SLinus Torvalds { 19391da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur; 19401da177e4SLinus Torvalds struct reiserfs_journal_cnode **head; 19411da177e4SLinus Torvalds 19421da177e4SLinus Torvalds head = &(journal_hash(table, sb, block)); 19431da177e4SLinus Torvalds if (!head) { 19441da177e4SLinus Torvalds return; 19451da177e4SLinus Torvalds } 19461da177e4SLinus Torvalds cur = *head; 19471da177e4SLinus Torvalds while (cur) { 1948bd4c625cSLinus Torvalds if (cur->blocknr == block && cur->sb == sb 1949bd4c625cSLinus Torvalds && (jl == NULL || jl == cur->jlist) 1950bd4c625cSLinus Torvalds && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 19511da177e4SLinus Torvalds if (cur->hnext) { 19521da177e4SLinus Torvalds cur->hnext->hprev = cur->hprev; 19531da177e4SLinus Torvalds } 19541da177e4SLinus Torvalds if (cur->hprev) { 19551da177e4SLinus Torvalds cur->hprev->hnext = cur->hnext; 19561da177e4SLinus Torvalds } else { 19571da177e4SLinus Torvalds *head = cur->hnext; 19581da177e4SLinus Torvalds } 19591da177e4SLinus Torvalds cur->blocknr = 0; 19601da177e4SLinus Torvalds cur->sb = NULL; 19611da177e4SLinus Torvalds cur->state = 0; 19621da177e4SLinus Torvalds if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 19631da177e4SLinus Torvalds atomic_dec(&(cur->jlist->j_nonzerolen)); 19641da177e4SLinus Torvalds cur->bh = NULL; 19651da177e4SLinus Torvalds cur->jlist = NULL; 19661da177e4SLinus Torvalds } 19671da177e4SLinus Torvalds cur = cur->hnext; 19681da177e4SLinus Torvalds } 19691da177e4SLinus Torvalds } 19701da177e4SLinus Torvalds 1971a9dd3643SJeff Mahoney static void free_journal_ram(struct super_block *sb) 1972bd4c625cSLinus Torvalds { 1973a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 1974d739b42bSPekka Enberg kfree(journal->j_current_jl); 19751da177e4SLinus Torvalds journal->j_num_lists--; 19761da177e4SLinus Torvalds 19771da177e4SLinus Torvalds vfree(journal->j_cnode_free_orig); 1978a9dd3643SJeff Mahoney free_list_bitmaps(sb, journal->j_list_bitmap); 1979a9dd3643SJeff Mahoney free_bitmap_nodes(sb); /* must be after free_list_bitmaps */ 19801da177e4SLinus Torvalds if (journal->j_header_bh) { 19811da177e4SLinus Torvalds brelse(journal->j_header_bh); 19821da177e4SLinus Torvalds } 19831da177e4SLinus Torvalds /* j_header_bh is on the journal dev, make sure not to release the journal 19841da177e4SLinus Torvalds * dev until we brelse j_header_bh 19851da177e4SLinus Torvalds */ 1986a9dd3643SJeff Mahoney release_journal_dev(sb, journal); 19871da177e4SLinus Torvalds vfree(journal); 19881da177e4SLinus Torvalds } 19891da177e4SLinus Torvalds 19901da177e4SLinus Torvalds /* 19911da177e4SLinus Torvalds ** call on unmount. Only set error to 1 if you haven't made your way out 19921da177e4SLinus Torvalds ** of read_super() yet. Any other caller must keep error at 0. 19931da177e4SLinus Torvalds */ 1994bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th, 1995a9dd3643SJeff Mahoney struct super_block *sb, int error) 1996bd4c625cSLinus Torvalds { 19971da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 19981da177e4SLinus Torvalds int flushed = 0; 1999a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 20001da177e4SLinus Torvalds 20011da177e4SLinus Torvalds /* we only want to flush out transactions if we were called with error == 0 20021da177e4SLinus Torvalds */ 2003a9dd3643SJeff Mahoney if (!error && !(sb->s_flags & MS_RDONLY)) { 20041da177e4SLinus Torvalds /* end the current trans */ 20051da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 2006a9dd3643SJeff Mahoney do_journal_end(th, sb, 10, FLUSH_ALL); 20071da177e4SLinus Torvalds 20081da177e4SLinus Torvalds /* make sure something gets logged to force our way into the flush code */ 2009a9dd3643SJeff Mahoney if (!journal_join(&myth, sb, 1)) { 2010a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 2011a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 2012bd4c625cSLinus Torvalds 1); 2013a9dd3643SJeff Mahoney journal_mark_dirty(&myth, sb, 2014a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 2015a9dd3643SJeff Mahoney do_journal_end(&myth, sb, 1, FLUSH_ALL); 20161da177e4SLinus Torvalds flushed = 1; 20171da177e4SLinus Torvalds } 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds 20201da177e4SLinus Torvalds /* this also catches errors during the do_journal_end above */ 20211da177e4SLinus Torvalds if (!error && reiserfs_is_journal_aborted(journal)) { 20221da177e4SLinus Torvalds memset(&myth, 0, sizeof(myth)); 2023a9dd3643SJeff Mahoney if (!journal_join_abort(&myth, sb, 1)) { 2024a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 2025a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 2026bd4c625cSLinus Torvalds 1); 2027a9dd3643SJeff Mahoney journal_mark_dirty(&myth, sb, 2028a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 2029a9dd3643SJeff Mahoney do_journal_end(&myth, sb, 1, FLUSH_ALL); 20301da177e4SLinus Torvalds } 20311da177e4SLinus Torvalds } 20321da177e4SLinus Torvalds 20331da177e4SLinus Torvalds reiserfs_mounted_fs_count--; 20341da177e4SLinus Torvalds /* wait for all commits to finish */ 2035a9dd3643SJeff Mahoney cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 20368ebc4232SFrederic Weisbecker 20378ebc4232SFrederic Weisbecker /* 20388ebc4232SFrederic Weisbecker * We must release the write lock here because 20398ebc4232SFrederic Weisbecker * the workqueue job (flush_async_commit) needs this lock 20408ebc4232SFrederic Weisbecker */ 20418ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 20421da177e4SLinus Torvalds flush_workqueue(commit_wq); 20438ebc4232SFrederic Weisbecker 20441da177e4SLinus Torvalds if (!reiserfs_mounted_fs_count) { 20451da177e4SLinus Torvalds destroy_workqueue(commit_wq); 20461da177e4SLinus Torvalds commit_wq = NULL; 20471da177e4SLinus Torvalds } 20488ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 20491da177e4SLinus Torvalds 2050a9dd3643SJeff Mahoney free_journal_ram(sb); 20511da177e4SLinus Torvalds 20521da177e4SLinus Torvalds return 0; 20531da177e4SLinus Torvalds } 20541da177e4SLinus Torvalds 20551da177e4SLinus Torvalds /* 20561da177e4SLinus Torvalds ** call on unmount. flush all journal trans, release all alloc'd ram 20571da177e4SLinus Torvalds */ 2058bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th, 2059a9dd3643SJeff Mahoney struct super_block *sb) 2060bd4c625cSLinus Torvalds { 2061a9dd3643SJeff Mahoney return do_journal_release(th, sb, 0); 20621da177e4SLinus Torvalds } 2063bd4c625cSLinus Torvalds 20641da177e4SLinus Torvalds /* 20651da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super! 20661da177e4SLinus Torvalds */ 2067bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th, 2068a9dd3643SJeff Mahoney struct super_block *sb) 2069bd4c625cSLinus Torvalds { 2070a9dd3643SJeff Mahoney return do_journal_release(th, sb, 1); 20711da177e4SLinus Torvalds } 20721da177e4SLinus Torvalds 20731da177e4SLinus Torvalds /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 2074a9dd3643SJeff Mahoney static int journal_compare_desc_commit(struct super_block *sb, 2075bd4c625cSLinus Torvalds struct reiserfs_journal_desc *desc, 2076bd4c625cSLinus Torvalds struct reiserfs_journal_commit *commit) 2077bd4c625cSLinus Torvalds { 20781da177e4SLinus Torvalds if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || 20791da177e4SLinus Torvalds get_commit_trans_len(commit) != get_desc_trans_len(desc) || 2080a9dd3643SJeff Mahoney get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max || 2081bd4c625cSLinus Torvalds get_commit_trans_len(commit) <= 0) { 20821da177e4SLinus Torvalds return 1; 20831da177e4SLinus Torvalds } 20841da177e4SLinus Torvalds return 0; 20851da177e4SLinus Torvalds } 2086bd4c625cSLinus Torvalds 20871da177e4SLinus Torvalds /* returns 0 if it did not find a description block 20881da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block 20891da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid 20901da177e4SLinus Torvalds */ 2091a9dd3643SJeff Mahoney static int journal_transaction_is_valid(struct super_block *sb, 2092bd4c625cSLinus Torvalds struct buffer_head *d_bh, 2093600ed416SJeff Mahoney unsigned int *oldest_invalid_trans_id, 2094bd4c625cSLinus Torvalds unsigned long *newest_mount_id) 2095bd4c625cSLinus Torvalds { 20961da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 20971da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 20981da177e4SLinus Torvalds struct buffer_head *c_bh; 20991da177e4SLinus Torvalds unsigned long offset; 21001da177e4SLinus Torvalds 21011da177e4SLinus Torvalds if (!d_bh) 21021da177e4SLinus Torvalds return 0; 21031da177e4SLinus Torvalds 21041da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2105bd4c625cSLinus Torvalds if (get_desc_trans_len(desc) > 0 2106bd4c625cSLinus Torvalds && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { 2107bd4c625cSLinus Torvalds if (oldest_invalid_trans_id && *oldest_invalid_trans_id 2108bd4c625cSLinus Torvalds && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 2109a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2110bd4c625cSLinus Torvalds "journal-986: transaction " 21111da177e4SLinus Torvalds "is valid returning because trans_id %d is greater than " 2112bd4c625cSLinus Torvalds "oldest_invalid %lu", 2113bd4c625cSLinus Torvalds get_desc_trans_id(desc), 21141da177e4SLinus Torvalds *oldest_invalid_trans_id); 21151da177e4SLinus Torvalds return 0; 21161da177e4SLinus Torvalds } 2117bd4c625cSLinus Torvalds if (newest_mount_id 2118bd4c625cSLinus Torvalds && *newest_mount_id > get_desc_mount_id(desc)) { 2119a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2120bd4c625cSLinus Torvalds "journal-1087: transaction " 21211da177e4SLinus Torvalds "is valid returning because mount_id %d is less than " 2122bd4c625cSLinus Torvalds "newest_mount_id %lu", 2123bd4c625cSLinus Torvalds get_desc_mount_id(desc), 21241da177e4SLinus Torvalds *newest_mount_id); 21251da177e4SLinus Torvalds return -1; 21261da177e4SLinus Torvalds } 2127a9dd3643SJeff Mahoney if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) { 2128a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2018", 212945b03d5eSJeff Mahoney "Bad transaction length %d " 213045b03d5eSJeff Mahoney "encountered, ignoring transaction", 2131bd4c625cSLinus Torvalds get_desc_trans_len(desc)); 21321da177e4SLinus Torvalds return -1; 21331da177e4SLinus Torvalds } 2134a9dd3643SJeff Mahoney offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 21351da177e4SLinus Torvalds 21361da177e4SLinus Torvalds /* ok, we have a journal description block, lets see if the transaction was valid */ 2137bd4c625cSLinus Torvalds c_bh = 2138a9dd3643SJeff Mahoney journal_bread(sb, 2139a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2140bd4c625cSLinus Torvalds ((offset + get_desc_trans_len(desc) + 2141a9dd3643SJeff Mahoney 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 21421da177e4SLinus Torvalds if (!c_bh) 21431da177e4SLinus Torvalds return 0; 21441da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2145a9dd3643SJeff Mahoney if (journal_compare_desc_commit(sb, desc, commit)) { 2146a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 21471da177e4SLinus Torvalds "journal_transaction_is_valid, commit offset %ld had bad " 21481da177e4SLinus Torvalds "time %d or length %d", 2149bd4c625cSLinus Torvalds c_bh->b_blocknr - 2150a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 21511da177e4SLinus Torvalds get_commit_trans_id(commit), 21521da177e4SLinus Torvalds get_commit_trans_len(commit)); 21531da177e4SLinus Torvalds brelse(c_bh); 21541da177e4SLinus Torvalds if (oldest_invalid_trans_id) { 2155bd4c625cSLinus Torvalds *oldest_invalid_trans_id = 2156bd4c625cSLinus Torvalds get_desc_trans_id(desc); 2157a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2158bd4c625cSLinus Torvalds "journal-1004: " 21591da177e4SLinus Torvalds "transaction_is_valid setting oldest invalid trans_id " 2160bd4c625cSLinus Torvalds "to %d", 2161bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 21621da177e4SLinus Torvalds } 21631da177e4SLinus Torvalds return -1; 21641da177e4SLinus Torvalds } 21651da177e4SLinus Torvalds brelse(c_bh); 2166a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2167bd4c625cSLinus Torvalds "journal-1006: found valid " 21681da177e4SLinus Torvalds "transaction start offset %llu, len %d id %d", 2169bd4c625cSLinus Torvalds d_bh->b_blocknr - 2170a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 2171bd4c625cSLinus Torvalds get_desc_trans_len(desc), 2172bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 21731da177e4SLinus Torvalds return 1; 21741da177e4SLinus Torvalds } else { 21751da177e4SLinus Torvalds return 0; 21761da177e4SLinus Torvalds } 21771da177e4SLinus Torvalds } 21781da177e4SLinus Torvalds 2179bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num) 2180bd4c625cSLinus Torvalds { 21811da177e4SLinus Torvalds int i; 21821da177e4SLinus Torvalds for (i = 0; i < num; i++) { 21831da177e4SLinus Torvalds brelse(heads[i]); 21841da177e4SLinus Torvalds } 21851da177e4SLinus Torvalds } 21861da177e4SLinus Torvalds 21871da177e4SLinus Torvalds /* 21881da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions, 21891da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction 21901da177e4SLinus Torvalds ** is invalid, or too old. 21911da177e4SLinus Torvalds */ 2192a9dd3643SJeff Mahoney static int journal_read_transaction(struct super_block *sb, 2193bd4c625cSLinus Torvalds unsigned long cur_dblock, 2194bd4c625cSLinus Torvalds unsigned long oldest_start, 2195600ed416SJeff Mahoney unsigned int oldest_trans_id, 2196bd4c625cSLinus Torvalds unsigned long newest_mount_id) 2197bd4c625cSLinus Torvalds { 2198a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 21991da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 22001da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 2201600ed416SJeff Mahoney unsigned int trans_id = 0; 22021da177e4SLinus Torvalds struct buffer_head *c_bh; 22031da177e4SLinus Torvalds struct buffer_head *d_bh; 22041da177e4SLinus Torvalds struct buffer_head **log_blocks = NULL; 22051da177e4SLinus Torvalds struct buffer_head **real_blocks = NULL; 2206600ed416SJeff Mahoney unsigned int trans_offset; 22071da177e4SLinus Torvalds int i; 22081da177e4SLinus Torvalds int trans_half; 22091da177e4SLinus Torvalds 2210a9dd3643SJeff Mahoney d_bh = journal_bread(sb, cur_dblock); 22111da177e4SLinus Torvalds if (!d_bh) 22121da177e4SLinus Torvalds return 1; 22131da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2214a9dd3643SJeff Mahoney trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2215a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: " 22161da177e4SLinus Torvalds "journal_read_transaction, offset %llu, len %d mount_id %d", 2217a9dd3643SJeff Mahoney d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb), 22181da177e4SLinus Torvalds get_desc_trans_len(desc), get_desc_mount_id(desc)); 22191da177e4SLinus Torvalds if (get_desc_trans_id(desc) < oldest_trans_id) { 2220a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: " 22211da177e4SLinus Torvalds "journal_read_trans skipping because %lu is too old", 2222bd4c625cSLinus Torvalds cur_dblock - 2223a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 22241da177e4SLinus Torvalds brelse(d_bh); 22251da177e4SLinus Torvalds return 1; 22261da177e4SLinus Torvalds } 22271da177e4SLinus Torvalds if (get_desc_mount_id(desc) != newest_mount_id) { 2228a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: " 22291da177e4SLinus Torvalds "journal_read_trans skipping because %d is != " 22301da177e4SLinus Torvalds "newest_mount_id %lu", get_desc_mount_id(desc), 22311da177e4SLinus Torvalds newest_mount_id); 22321da177e4SLinus Torvalds brelse(d_bh); 22331da177e4SLinus Torvalds return 1; 22341da177e4SLinus Torvalds } 2235a9dd3643SJeff Mahoney c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 22361da177e4SLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 1) % 2237a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))); 22381da177e4SLinus Torvalds if (!c_bh) { 22391da177e4SLinus Torvalds brelse(d_bh); 22401da177e4SLinus Torvalds return 1; 22411da177e4SLinus Torvalds } 22421da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2243a9dd3643SJeff Mahoney if (journal_compare_desc_commit(sb, desc, commit)) { 2244a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2245bd4c625cSLinus Torvalds "journal_read_transaction, " 22461da177e4SLinus Torvalds "commit offset %llu had bad time %d or length %d", 2247bd4c625cSLinus Torvalds c_bh->b_blocknr - 2248a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 2249bd4c625cSLinus Torvalds get_commit_trans_id(commit), 2250bd4c625cSLinus Torvalds get_commit_trans_len(commit)); 22511da177e4SLinus Torvalds brelse(c_bh); 22521da177e4SLinus Torvalds brelse(d_bh); 22531da177e4SLinus Torvalds return 1; 22541da177e4SLinus Torvalds } 22551da177e4SLinus Torvalds trans_id = get_desc_trans_id(desc); 22561da177e4SLinus Torvalds /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2257d739b42bSPekka Enberg log_blocks = kmalloc(get_desc_trans_len(desc) * 2258d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 2259d739b42bSPekka Enberg real_blocks = kmalloc(get_desc_trans_len(desc) * 2260d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 22611da177e4SLinus Torvalds if (!log_blocks || !real_blocks) { 22621da177e4SLinus Torvalds brelse(c_bh); 22631da177e4SLinus Torvalds brelse(d_bh); 2264d739b42bSPekka Enberg kfree(log_blocks); 2265d739b42bSPekka Enberg kfree(real_blocks); 2266a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1169", 226745b03d5eSJeff Mahoney "kmalloc failed, unable to mount FS"); 22681da177e4SLinus Torvalds return -1; 22691da177e4SLinus Torvalds } 22701da177e4SLinus Torvalds /* get all the buffer heads */ 2271a9dd3643SJeff Mahoney trans_half = journal_trans_half(sb->s_blocksize); 22721da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 2273bd4c625cSLinus Torvalds log_blocks[i] = 2274a9dd3643SJeff Mahoney journal_getblk(sb, 2275a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2276bd4c625cSLinus Torvalds (trans_offset + 1 + 2277a9dd3643SJeff Mahoney i) % SB_ONDISK_JOURNAL_SIZE(sb)); 22781da177e4SLinus Torvalds if (i < trans_half) { 2279bd4c625cSLinus Torvalds real_blocks[i] = 2280a9dd3643SJeff Mahoney sb_getblk(sb, 2281bd4c625cSLinus Torvalds le32_to_cpu(desc->j_realblock[i])); 22821da177e4SLinus Torvalds } else { 2283bd4c625cSLinus Torvalds real_blocks[i] = 2284a9dd3643SJeff Mahoney sb_getblk(sb, 2285bd4c625cSLinus Torvalds le32_to_cpu(commit-> 2286bd4c625cSLinus Torvalds j_realblock[i - trans_half])); 22871da177e4SLinus Torvalds } 2288a9dd3643SJeff Mahoney if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) { 2289a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1207", 229045b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 229145b03d5eSJeff Mahoney "Block to replay is outside of " 229245b03d5eSJeff Mahoney "filesystem"); 22931da177e4SLinus Torvalds goto abort_replay; 22941da177e4SLinus Torvalds } 22951da177e4SLinus Torvalds /* make sure we don't try to replay onto log or reserved area */ 2296bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 2297a9dd3643SJeff Mahoney (sb, real_blocks[i]->b_blocknr)) { 2298a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1204", 229945b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 230045b03d5eSJeff Mahoney "Trying to replay onto a log block"); 23011da177e4SLinus Torvalds abort_replay: 23021da177e4SLinus Torvalds brelse_array(log_blocks, i); 23031da177e4SLinus Torvalds brelse_array(real_blocks, i); 23041da177e4SLinus Torvalds brelse(c_bh); 23051da177e4SLinus Torvalds brelse(d_bh); 2306d739b42bSPekka Enberg kfree(log_blocks); 2307d739b42bSPekka Enberg kfree(real_blocks); 23081da177e4SLinus Torvalds return -1; 23091da177e4SLinus Torvalds } 23101da177e4SLinus Torvalds } 23111da177e4SLinus Torvalds /* read in the log blocks, memcpy to the corresponding real block */ 23121da177e4SLinus Torvalds ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 23131da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 23148ebc4232SFrederic Weisbecker 23158ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 23161da177e4SLinus Torvalds wait_on_buffer(log_blocks[i]); 23178ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 23188ebc4232SFrederic Weisbecker 23191da177e4SLinus Torvalds if (!buffer_uptodate(log_blocks[i])) { 2320a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1212", 232145b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 232245b03d5eSJeff Mahoney "buffer write failed"); 2323bd4c625cSLinus Torvalds brelse_array(log_blocks + i, 2324bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 23251da177e4SLinus Torvalds brelse_array(real_blocks, get_desc_trans_len(desc)); 23261da177e4SLinus Torvalds brelse(c_bh); 23271da177e4SLinus Torvalds brelse(d_bh); 2328d739b42bSPekka Enberg kfree(log_blocks); 2329d739b42bSPekka Enberg kfree(real_blocks); 23301da177e4SLinus Torvalds return -1; 23311da177e4SLinus Torvalds } 2332bd4c625cSLinus Torvalds memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, 2333bd4c625cSLinus Torvalds real_blocks[i]->b_size); 23341da177e4SLinus Torvalds set_buffer_uptodate(real_blocks[i]); 23351da177e4SLinus Torvalds brelse(log_blocks[i]); 23361da177e4SLinus Torvalds } 23371da177e4SLinus Torvalds /* flush out the real blocks */ 23381da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 23391da177e4SLinus Torvalds set_buffer_dirty(real_blocks[i]); 234053778ffdSJan Kara ll_rw_block(SWRITE, 1, real_blocks + i); 23411da177e4SLinus Torvalds } 23421da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 23431da177e4SLinus Torvalds wait_on_buffer(real_blocks[i]); 23441da177e4SLinus Torvalds if (!buffer_uptodate(real_blocks[i])) { 2345a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1226", 234645b03d5eSJeff Mahoney "REPLAY FAILURE, fsck required! " 234745b03d5eSJeff Mahoney "buffer write failed"); 2348bd4c625cSLinus Torvalds brelse_array(real_blocks + i, 2349bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 23501da177e4SLinus Torvalds brelse(c_bh); 23511da177e4SLinus Torvalds brelse(d_bh); 2352d739b42bSPekka Enberg kfree(log_blocks); 2353d739b42bSPekka Enberg kfree(real_blocks); 23541da177e4SLinus Torvalds return -1; 23551da177e4SLinus Torvalds } 23561da177e4SLinus Torvalds brelse(real_blocks[i]); 23571da177e4SLinus Torvalds } 2358bd4c625cSLinus Torvalds cur_dblock = 2359a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2360bd4c625cSLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 2361a9dd3643SJeff Mahoney 2) % SB_ONDISK_JOURNAL_SIZE(sb)); 2362a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2363bd4c625cSLinus Torvalds "journal-1095: setting journal " "start to offset %ld", 2364a9dd3643SJeff Mahoney cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 23651da177e4SLinus Torvalds 23661da177e4SLinus Torvalds /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2367a9dd3643SJeff Mahoney journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 23681da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 23691da177e4SLinus Torvalds journal->j_trans_id = trans_id + 1; 2370a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2371a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2372a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 23731da177e4SLinus Torvalds brelse(c_bh); 23741da177e4SLinus Torvalds brelse(d_bh); 2375d739b42bSPekka Enberg kfree(log_blocks); 2376d739b42bSPekka Enberg kfree(real_blocks); 23771da177e4SLinus Torvalds return 0; 23781da177e4SLinus Torvalds } 23791da177e4SLinus Torvalds 23801da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize 23811da177e4SLinus Torvalds size (but no more than BUFNR blocks at a time). This proved to improve 23821da177e4SLinus Torvalds mounting speed on self-rebuilding raid5 arrays at least. 23831da177e4SLinus Torvalds Right now it is only used from journal code. But later we might use it 23841da177e4SLinus Torvalds from other places. 23851da177e4SLinus Torvalds Note: Do not use journal_getblk/sb_getblk functions here! */ 23863ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev, 23873ee16670SJeff Mahoney b_blocknr_t block, int bufsize, 23883ee16670SJeff Mahoney b_blocknr_t max_block) 23891da177e4SLinus Torvalds { 23901da177e4SLinus Torvalds struct buffer_head *bhlist[BUFNR]; 23911da177e4SLinus Torvalds unsigned int blocks = BUFNR; 23921da177e4SLinus Torvalds struct buffer_head *bh; 23931da177e4SLinus Torvalds int i, j; 23941da177e4SLinus Torvalds 23951da177e4SLinus Torvalds bh = __getblk(dev, block, bufsize); 23961da177e4SLinus Torvalds if (buffer_uptodate(bh)) 23971da177e4SLinus Torvalds return (bh); 23981da177e4SLinus Torvalds 23991da177e4SLinus Torvalds if (block + BUFNR > max_block) { 24001da177e4SLinus Torvalds blocks = max_block - block; 24011da177e4SLinus Torvalds } 24021da177e4SLinus Torvalds bhlist[0] = bh; 24031da177e4SLinus Torvalds j = 1; 24041da177e4SLinus Torvalds for (i = 1; i < blocks; i++) { 24051da177e4SLinus Torvalds bh = __getblk(dev, block + i, bufsize); 24061da177e4SLinus Torvalds if (buffer_uptodate(bh)) { 24071da177e4SLinus Torvalds brelse(bh); 24081da177e4SLinus Torvalds break; 2409bd4c625cSLinus Torvalds } else 2410bd4c625cSLinus Torvalds bhlist[j++] = bh; 24111da177e4SLinus Torvalds } 24121da177e4SLinus Torvalds ll_rw_block(READ, j, bhlist); 24131da177e4SLinus Torvalds for (i = 1; i < j; i++) 24141da177e4SLinus Torvalds brelse(bhlist[i]); 24151da177e4SLinus Torvalds bh = bhlist[0]; 24161da177e4SLinus Torvalds wait_on_buffer(bh); 24171da177e4SLinus Torvalds if (buffer_uptodate(bh)) 24181da177e4SLinus Torvalds return bh; 24191da177e4SLinus Torvalds brelse(bh); 24201da177e4SLinus Torvalds return NULL; 24211da177e4SLinus Torvalds } 24221da177e4SLinus Torvalds 24231da177e4SLinus Torvalds /* 24241da177e4SLinus Torvalds ** read and replay the log 24251da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid 24261da177e4SLinus Torvalds ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. 24271da177e4SLinus Torvalds ** 24281da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. 24291da177e4SLinus Torvalds ** 24301da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly. 24311da177e4SLinus Torvalds */ 2432a9dd3643SJeff Mahoney static int journal_read(struct super_block *sb) 2433bd4c625cSLinus Torvalds { 2434a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 24351da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 2436600ed416SJeff Mahoney unsigned int oldest_trans_id = 0; 2437600ed416SJeff Mahoney unsigned int oldest_invalid_trans_id = 0; 24381da177e4SLinus Torvalds time_t start; 24391da177e4SLinus Torvalds unsigned long oldest_start = 0; 24401da177e4SLinus Torvalds unsigned long cur_dblock = 0; 24411da177e4SLinus Torvalds unsigned long newest_mount_id = 9; 24421da177e4SLinus Torvalds struct buffer_head *d_bh; 24431da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 24441da177e4SLinus Torvalds int valid_journal_header = 0; 24451da177e4SLinus Torvalds int replay_count = 0; 24461da177e4SLinus Torvalds int continue_replay = 1; 24471da177e4SLinus Torvalds int ret; 24481da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 24491da177e4SLinus Torvalds 2450a9dd3643SJeff Mahoney cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2451a9dd3643SJeff Mahoney reiserfs_info(sb, "checking transaction log (%s)\n", 24521da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b)); 24531da177e4SLinus Torvalds start = get_seconds(); 24541da177e4SLinus Torvalds 24551da177e4SLinus Torvalds /* step 1, read in the journal header block. Check the transaction it says 24561da177e4SLinus Torvalds ** is the first unflushed, and if that transaction is not valid, 24571da177e4SLinus Torvalds ** replay is done 24581da177e4SLinus Torvalds */ 2459a9dd3643SJeff Mahoney journal->j_header_bh = journal_bread(sb, 2460a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) 2461a9dd3643SJeff Mahoney + SB_ONDISK_JOURNAL_SIZE(sb)); 24621da177e4SLinus Torvalds if (!journal->j_header_bh) { 24631da177e4SLinus Torvalds return 1; 24641da177e4SLinus Torvalds } 24651da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); 2466c499ec24SVladimir V. Saveliev if (le32_to_cpu(jh->j_first_unflushed_offset) < 2467a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) 2468bd4c625cSLinus Torvalds && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2469bd4c625cSLinus Torvalds oldest_start = 2470a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 24711da177e4SLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset); 24721da177e4SLinus Torvalds oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 24731da177e4SLinus Torvalds newest_mount_id = le32_to_cpu(jh->j_mount_id); 2474a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2475bd4c625cSLinus Torvalds "journal-1153: found in " 24761da177e4SLinus Torvalds "header: first_unflushed_offset %d, last_flushed_trans_id " 24771da177e4SLinus Torvalds "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 24781da177e4SLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id)); 24791da177e4SLinus Torvalds valid_journal_header = 1; 24801da177e4SLinus Torvalds 24811da177e4SLinus Torvalds /* now, we try to read the first unflushed offset. If it is not valid, 24821da177e4SLinus Torvalds ** there is nothing more we can do, and it makes no sense to read 24831da177e4SLinus Torvalds ** through the whole log. 24841da177e4SLinus Torvalds */ 2485bd4c625cSLinus Torvalds d_bh = 2486a9dd3643SJeff Mahoney journal_bread(sb, 2487a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2488bd4c625cSLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset)); 2489a9dd3643SJeff Mahoney ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL); 24901da177e4SLinus Torvalds if (!ret) { 24911da177e4SLinus Torvalds continue_replay = 0; 24921da177e4SLinus Torvalds } 24931da177e4SLinus Torvalds brelse(d_bh); 24941da177e4SLinus Torvalds goto start_log_replay; 24951da177e4SLinus Torvalds } 24961da177e4SLinus Torvalds 2497a9dd3643SJeff Mahoney if (continue_replay && bdev_read_only(sb->s_bdev)) { 2498a9dd3643SJeff Mahoney reiserfs_warning(sb, "clm-2076", 249945b03d5eSJeff Mahoney "device is readonly, unable to replay log"); 25001da177e4SLinus Torvalds return -1; 25011da177e4SLinus Torvalds } 25021da177e4SLinus Torvalds 25031da177e4SLinus Torvalds /* ok, there are transactions that need to be replayed. start with the first log block, find 25041da177e4SLinus Torvalds ** all the valid transactions, and pick out the oldest. 25051da177e4SLinus Torvalds */ 2506bd4c625cSLinus Torvalds while (continue_replay 2507bd4c625cSLinus Torvalds && cur_dblock < 2508a9dd3643SJeff Mahoney (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2509a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))) { 25101da177e4SLinus Torvalds /* Note that it is required for blocksize of primary fs device and journal 25111da177e4SLinus Torvalds device to be the same */ 2512bd4c625cSLinus Torvalds d_bh = 2513bd4c625cSLinus Torvalds reiserfs_breada(journal->j_dev_bd, cur_dblock, 2514a9dd3643SJeff Mahoney sb->s_blocksize, 2515a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2516a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)); 2517bd4c625cSLinus Torvalds ret = 2518a9dd3643SJeff Mahoney journal_transaction_is_valid(sb, d_bh, 2519bd4c625cSLinus Torvalds &oldest_invalid_trans_id, 2520bd4c625cSLinus Torvalds &newest_mount_id); 25211da177e4SLinus Torvalds if (ret == 1) { 25221da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 25231da177e4SLinus Torvalds if (oldest_start == 0) { /* init all oldest_ values */ 25241da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 25251da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 25261da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2527a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2528bd4c625cSLinus Torvalds "journal-1179: Setting " 25291da177e4SLinus Torvalds "oldest_start to offset %llu, trans_id %lu", 2530bd4c625cSLinus Torvalds oldest_start - 2531bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2532a9dd3643SJeff Mahoney (sb), oldest_trans_id); 25331da177e4SLinus Torvalds } else if (oldest_trans_id > get_desc_trans_id(desc)) { 25341da177e4SLinus Torvalds /* one we just read was older */ 25351da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 25361da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 2537a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2538bd4c625cSLinus Torvalds "journal-1180: Resetting " 25391da177e4SLinus Torvalds "oldest_start to offset %lu, trans_id %lu", 2540bd4c625cSLinus Torvalds oldest_start - 2541bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2542a9dd3643SJeff Mahoney (sb), oldest_trans_id); 25431da177e4SLinus Torvalds } 25441da177e4SLinus Torvalds if (newest_mount_id < get_desc_mount_id(desc)) { 25451da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2546a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2547bd4c625cSLinus Torvalds "journal-1299: Setting " 2548bd4c625cSLinus Torvalds "newest_mount_id to %d", 2549bd4c625cSLinus Torvalds get_desc_mount_id(desc)); 25501da177e4SLinus Torvalds } 25511da177e4SLinus Torvalds cur_dblock += get_desc_trans_len(desc) + 2; 25521da177e4SLinus Torvalds } else { 25531da177e4SLinus Torvalds cur_dblock++; 25541da177e4SLinus Torvalds } 25551da177e4SLinus Torvalds brelse(d_bh); 25561da177e4SLinus Torvalds } 25571da177e4SLinus Torvalds 25581da177e4SLinus Torvalds start_log_replay: 25591da177e4SLinus Torvalds cur_dblock = oldest_start; 25601da177e4SLinus Torvalds if (oldest_trans_id) { 2561a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2562bd4c625cSLinus Torvalds "journal-1206: Starting replay " 25631da177e4SLinus Torvalds "from offset %llu, trans_id %lu", 2564a9dd3643SJeff Mahoney cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb), 25651da177e4SLinus Torvalds oldest_trans_id); 25661da177e4SLinus Torvalds 25671da177e4SLinus Torvalds } 25681da177e4SLinus Torvalds replay_count = 0; 25691da177e4SLinus Torvalds while (continue_replay && oldest_trans_id > 0) { 2570bd4c625cSLinus Torvalds ret = 2571a9dd3643SJeff Mahoney journal_read_transaction(sb, cur_dblock, oldest_start, 2572bd4c625cSLinus Torvalds oldest_trans_id, newest_mount_id); 25731da177e4SLinus Torvalds if (ret < 0) { 25741da177e4SLinus Torvalds return ret; 25751da177e4SLinus Torvalds } else if (ret != 0) { 25761da177e4SLinus Torvalds break; 25771da177e4SLinus Torvalds } 2578bd4c625cSLinus Torvalds cur_dblock = 2579a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start; 25801da177e4SLinus Torvalds replay_count++; 25811da177e4SLinus Torvalds if (cur_dblock == oldest_start) 25821da177e4SLinus Torvalds break; 25831da177e4SLinus Torvalds } 25841da177e4SLinus Torvalds 25851da177e4SLinus Torvalds if (oldest_trans_id == 0) { 2586a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2587bd4c625cSLinus Torvalds "journal-1225: No valid " "transactions found"); 25881da177e4SLinus Torvalds } 25891da177e4SLinus Torvalds /* j_start does not get set correctly if we don't replay any transactions. 25901da177e4SLinus Torvalds ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 25911da177e4SLinus Torvalds ** copy the trans_id from the header 25921da177e4SLinus Torvalds */ 25931da177e4SLinus Torvalds if (valid_journal_header && replay_count == 0) { 25941da177e4SLinus Torvalds journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2595bd4c625cSLinus Torvalds journal->j_trans_id = 2596bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2597a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2598a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2599a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 2600bd4c625cSLinus Torvalds journal->j_last_flush_trans_id = 2601bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id); 26021da177e4SLinus Torvalds journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; 26031da177e4SLinus Torvalds } else { 26041da177e4SLinus Torvalds journal->j_mount_id = newest_mount_id + 1; 26051da177e4SLinus Torvalds } 2606a9dd3643SJeff Mahoney reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 26071da177e4SLinus Torvalds "newest_mount_id to %lu", journal->j_mount_id); 26081da177e4SLinus Torvalds journal->j_first_unflushed_offset = journal->j_start; 26091da177e4SLinus Torvalds if (replay_count > 0) { 2610a9dd3643SJeff Mahoney reiserfs_info(sb, 2611bd4c625cSLinus Torvalds "replayed %d transactions in %lu seconds\n", 26121da177e4SLinus Torvalds replay_count, get_seconds() - start); 26131da177e4SLinus Torvalds } 2614a9dd3643SJeff Mahoney if (!bdev_read_only(sb->s_bdev) && 2615a9dd3643SJeff Mahoney _update_journal_header_block(sb, journal->j_start, 2616bd4c625cSLinus Torvalds journal->j_last_flush_trans_id)) { 26171da177e4SLinus Torvalds /* replay failed, caller must call free_journal_ram and abort 26181da177e4SLinus Torvalds ** the mount 26191da177e4SLinus Torvalds */ 26201da177e4SLinus Torvalds return -1; 26211da177e4SLinus Torvalds } 26221da177e4SLinus Torvalds return 0; 26231da177e4SLinus Torvalds } 26241da177e4SLinus Torvalds 26251da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 26261da177e4SLinus Torvalds { 26271da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 26288c777cc4SPekka Enberg jl = kzalloc(sizeof(struct reiserfs_journal_list), 26298c777cc4SPekka Enberg GFP_NOFS | __GFP_NOFAIL); 26301da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_list); 26311da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_working_list); 26321da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_tail_bh_list); 26331da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_bh_list); 263490415deaSJeff Mahoney mutex_init(&jl->j_commit_mutex); 26351da177e4SLinus Torvalds SB_JOURNAL(s)->j_num_lists++; 26361da177e4SLinus Torvalds get_journal_list(jl); 26371da177e4SLinus Torvalds return jl; 26381da177e4SLinus Torvalds } 26391da177e4SLinus Torvalds 2640a9dd3643SJeff Mahoney static void journal_list_init(struct super_block *sb) 2641bd4c625cSLinus Torvalds { 2642a9dd3643SJeff Mahoney SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb); 26431da177e4SLinus Torvalds } 26441da177e4SLinus Torvalds 26451da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super, 26461da177e4SLinus Torvalds struct reiserfs_journal *journal) 26471da177e4SLinus Torvalds { 26481da177e4SLinus Torvalds int result; 26491da177e4SLinus Torvalds 26501da177e4SLinus Torvalds result = 0; 26511da177e4SLinus Torvalds 265286098fa0SChristoph Hellwig if (journal->j_dev_bd != NULL) { 265386098fa0SChristoph Hellwig if (journal->j_dev_bd->bd_dev != super->s_dev) 265486098fa0SChristoph Hellwig bd_release(journal->j_dev_bd); 2655e5eb8caaSAl Viro result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); 26561da177e4SLinus Torvalds journal->j_dev_bd = NULL; 26571da177e4SLinus Torvalds } 26581da177e4SLinus Torvalds 26591da177e4SLinus Torvalds if (result != 0) { 266045b03d5eSJeff Mahoney reiserfs_warning(super, "sh-457", 266145b03d5eSJeff Mahoney "Cannot release journal device: %i", result); 26621da177e4SLinus Torvalds } 26631da177e4SLinus Torvalds return result; 26641da177e4SLinus Torvalds } 26651da177e4SLinus Torvalds 26661da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super, 26671da177e4SLinus Torvalds struct reiserfs_journal *journal, 26681da177e4SLinus Torvalds const char *jdev_name) 26691da177e4SLinus Torvalds { 26701da177e4SLinus Torvalds int result; 26711da177e4SLinus Torvalds dev_t jdev; 2672aeb5d727SAl Viro fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; 26731da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 26741da177e4SLinus Torvalds 26751da177e4SLinus Torvalds result = 0; 26761da177e4SLinus Torvalds 26771da177e4SLinus Torvalds journal->j_dev_bd = NULL; 26781da177e4SLinus Torvalds jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 26791da177e4SLinus Torvalds new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 26801da177e4SLinus Torvalds 26811da177e4SLinus Torvalds if (bdev_read_only(super->s_bdev)) 26821da177e4SLinus Torvalds blkdev_mode = FMODE_READ; 26831da177e4SLinus Torvalds 26841da177e4SLinus Torvalds /* there is no "jdev" option and journal is on separate device */ 26851da177e4SLinus Torvalds if ((!jdev_name || !jdev_name[0])) { 26861da177e4SLinus Torvalds journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2687e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 26881da177e4SLinus Torvalds if (IS_ERR(journal->j_dev_bd)) { 26891da177e4SLinus Torvalds result = PTR_ERR(journal->j_dev_bd); 26901da177e4SLinus Torvalds journal->j_dev_bd = NULL; 269145b03d5eSJeff Mahoney reiserfs_warning(super, "sh-458", 26921da177e4SLinus Torvalds "cannot init journal device '%s': %i", 26931da177e4SLinus Torvalds __bdevname(jdev, b), result); 26941da177e4SLinus Torvalds return result; 269586098fa0SChristoph Hellwig } else if (jdev != super->s_dev) { 269686098fa0SChristoph Hellwig result = bd_claim(journal->j_dev_bd, journal); 269786098fa0SChristoph Hellwig if (result) { 26989a1c3542SAl Viro blkdev_put(journal->j_dev_bd, blkdev_mode); 269986098fa0SChristoph Hellwig return result; 270086098fa0SChristoph Hellwig } 270186098fa0SChristoph Hellwig 27021da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 270386098fa0SChristoph Hellwig } 270486098fa0SChristoph Hellwig 27051da177e4SLinus Torvalds return 0; 27061da177e4SLinus Torvalds } 27071da177e4SLinus Torvalds 2708e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 270930c40d2cSAl Viro journal->j_dev_bd = open_bdev_exclusive(jdev_name, 2710e5eb8caaSAl Viro blkdev_mode, journal); 271186098fa0SChristoph Hellwig if (IS_ERR(journal->j_dev_bd)) { 271286098fa0SChristoph Hellwig result = PTR_ERR(journal->j_dev_bd); 271386098fa0SChristoph Hellwig journal->j_dev_bd = NULL; 271486098fa0SChristoph Hellwig reiserfs_warning(super, 271586098fa0SChristoph Hellwig "journal_init_dev: Cannot open '%s': %i", 271686098fa0SChristoph Hellwig jdev_name, result); 271786098fa0SChristoph Hellwig return result; 271886098fa0SChristoph Hellwig } 271986098fa0SChristoph Hellwig 27201da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 2721bd4c625cSLinus Torvalds reiserfs_info(super, 2722bd4c625cSLinus Torvalds "journal_init_dev: journal device: %s\n", 272374f9f974SEdward Shishkin bdevname(journal->j_dev_bd, b)); 272486098fa0SChristoph Hellwig return 0; 27251da177e4SLinus Torvalds } 27261da177e4SLinus Torvalds 2727cf3d0b81SEdward Shishkin /** 2728cf3d0b81SEdward Shishkin * When creating/tuning a file system user can assign some 2729cf3d0b81SEdward Shishkin * journal params within boundaries which depend on the ratio 2730cf3d0b81SEdward Shishkin * blocksize/standard_blocksize. 2731cf3d0b81SEdward Shishkin * 2732cf3d0b81SEdward Shishkin * For blocks >= standard_blocksize transaction size should 2733cf3d0b81SEdward Shishkin * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more 2734cf3d0b81SEdward Shishkin * then JOURNAL_TRANS_MAX_DEFAULT. 2735cf3d0b81SEdward Shishkin * 2736cf3d0b81SEdward Shishkin * For blocks < standard_blocksize these boundaries should be 2737cf3d0b81SEdward Shishkin * decreased proportionally. 2738cf3d0b81SEdward Shishkin */ 2739cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096) 2740cf3d0b81SEdward Shishkin 2741a9dd3643SJeff Mahoney static int check_advise_trans_params(struct super_block *sb, 2742cf3d0b81SEdward Shishkin struct reiserfs_journal *journal) 2743cf3d0b81SEdward Shishkin { 2744cf3d0b81SEdward Shishkin if (journal->j_trans_max) { 2745cf3d0b81SEdward Shishkin /* Non-default journal params. 2746cf3d0b81SEdward Shishkin Do sanity check for them. */ 2747cf3d0b81SEdward Shishkin int ratio = 1; 2748a9dd3643SJeff Mahoney if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2749a9dd3643SJeff Mahoney ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; 2750cf3d0b81SEdward Shishkin 2751cf3d0b81SEdward Shishkin if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || 2752cf3d0b81SEdward Shishkin journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || 2753a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max < 2754cf3d0b81SEdward Shishkin JOURNAL_MIN_RATIO) { 2755a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-462", 275645b03d5eSJeff Mahoney "bad transaction max size (%u). " 275745b03d5eSJeff Mahoney "FSCK?", journal->j_trans_max); 2758cf3d0b81SEdward Shishkin return 1; 2759cf3d0b81SEdward Shishkin } 2760cf3d0b81SEdward Shishkin if (journal->j_max_batch != (journal->j_trans_max) * 2761cf3d0b81SEdward Shishkin JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { 2762a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-463", 276345b03d5eSJeff Mahoney "bad transaction max batch (%u). " 276445b03d5eSJeff Mahoney "FSCK?", journal->j_max_batch); 2765cf3d0b81SEdward Shishkin return 1; 2766cf3d0b81SEdward Shishkin } 2767cf3d0b81SEdward Shishkin } else { 2768cf3d0b81SEdward Shishkin /* Default journal params. 2769cf3d0b81SEdward Shishkin The file system was created by old version 2770cf3d0b81SEdward Shishkin of mkreiserfs, so some fields contain zeros, 2771cf3d0b81SEdward Shishkin and we need to advise proper values for them */ 2772a9dd3643SJeff Mahoney if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { 2773a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", 2774a9dd3643SJeff Mahoney sb->s_blocksize); 277545b03d5eSJeff Mahoney return 1; 277645b03d5eSJeff Mahoney } 2777cf3d0b81SEdward Shishkin journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; 2778cf3d0b81SEdward Shishkin journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; 2779cf3d0b81SEdward Shishkin journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; 2780cf3d0b81SEdward Shishkin } 2781cf3d0b81SEdward Shishkin return 0; 2782cf3d0b81SEdward Shishkin } 2783cf3d0b81SEdward Shishkin 27841da177e4SLinus Torvalds /* 27851da177e4SLinus Torvalds ** must be called once on fs mount. calls journal_read for you 27861da177e4SLinus Torvalds */ 2787a9dd3643SJeff Mahoney int journal_init(struct super_block *sb, const char *j_dev_name, 2788bd4c625cSLinus Torvalds int old_format, unsigned int commit_max_age) 2789bd4c625cSLinus Torvalds { 2790a9dd3643SJeff Mahoney int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2; 27911da177e4SLinus Torvalds struct buffer_head *bhjh; 27921da177e4SLinus Torvalds struct reiserfs_super_block *rs; 27931da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 27941da177e4SLinus Torvalds struct reiserfs_journal *journal; 27951da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 27961da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 27971da177e4SLinus Torvalds 2798a9dd3643SJeff Mahoney journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 27991da177e4SLinus Torvalds if (!journal) { 2800a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1256", 280145b03d5eSJeff Mahoney "unable to get memory for journal structure"); 28021da177e4SLinus Torvalds return 1; 28031da177e4SLinus Torvalds } 28041da177e4SLinus Torvalds memset(journal, 0, sizeof(struct reiserfs_journal)); 28051da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_bitmap_nodes); 28061da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_prealloc_list); 28071da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_working_list); 28081da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_journal_list); 28091da177e4SLinus Torvalds journal->j_persistent_trans = 0; 2810a9dd3643SJeff Mahoney if (reiserfs_allocate_list_bitmaps(sb, 28111da177e4SLinus Torvalds journal->j_list_bitmap, 2812a9dd3643SJeff Mahoney reiserfs_bmap_count(sb))) 28131da177e4SLinus Torvalds goto free_and_return; 2814a9dd3643SJeff Mahoney allocate_bitmap_nodes(sb); 28151da177e4SLinus Torvalds 28161da177e4SLinus Torvalds /* reserved for journal area support */ 2817a9dd3643SJeff Mahoney SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ? 2818bd4c625cSLinus Torvalds REISERFS_OLD_DISK_OFFSET_IN_BYTES 2819a9dd3643SJeff Mahoney / sb->s_blocksize + 2820a9dd3643SJeff Mahoney reiserfs_bmap_count(sb) + 2821bd4c625cSLinus Torvalds 1 : 2822bd4c625cSLinus Torvalds REISERFS_DISK_OFFSET_IN_BYTES / 2823a9dd3643SJeff Mahoney sb->s_blocksize + 2); 28241da177e4SLinus Torvalds 28251da177e4SLinus Torvalds /* Sanity check to see is the standard journal fitting withing first bitmap 28261da177e4SLinus Torvalds (actual for small blocksizes) */ 2827a9dd3643SJeff Mahoney if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2828a9dd3643SJeff Mahoney (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2829a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { 2830a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1393", 283145b03d5eSJeff Mahoney "journal does not fit for area addressed " 283245b03d5eSJeff Mahoney "by first of bitmap blocks. It starts at " 28331da177e4SLinus Torvalds "%u and its size is %u. Block size %ld", 2834a9dd3643SJeff Mahoney SB_JOURNAL_1st_RESERVED_BLOCK(sb), 2835a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb), 2836a9dd3643SJeff Mahoney sb->s_blocksize); 28371da177e4SLinus Torvalds goto free_and_return; 28381da177e4SLinus Torvalds } 28391da177e4SLinus Torvalds 2840a9dd3643SJeff Mahoney if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2841a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-462", 284245b03d5eSJeff Mahoney "unable to initialize jornal device"); 28431da177e4SLinus Torvalds goto free_and_return; 28441da177e4SLinus Torvalds } 28451da177e4SLinus Torvalds 2846a9dd3643SJeff Mahoney rs = SB_DISK_SUPER_BLOCK(sb); 28471da177e4SLinus Torvalds 28481da177e4SLinus Torvalds /* read journal header */ 2849a9dd3643SJeff Mahoney bhjh = journal_bread(sb, 2850a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2851a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)); 28521da177e4SLinus Torvalds if (!bhjh) { 2853a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-459", 285445b03d5eSJeff Mahoney "unable to read journal header"); 28551da177e4SLinus Torvalds goto free_and_return; 28561da177e4SLinus Torvalds } 28571da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(bhjh->b_data); 28581da177e4SLinus Torvalds 28591da177e4SLinus Torvalds /* make sure that journal matches to the super block */ 2860bd4c625cSLinus Torvalds if (is_reiserfs_jr(rs) 2861bd4c625cSLinus Torvalds && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != 2862bd4c625cSLinus Torvalds sb_jp_journal_magic(rs))) { 2863a9dd3643SJeff Mahoney reiserfs_warning(sb, "sh-460", 286445b03d5eSJeff Mahoney "journal header magic %x (device %s) does " 286545b03d5eSJeff Mahoney "not match to magic found in super block %x", 286645b03d5eSJeff Mahoney jh->jh_journal.jp_journal_magic, 28671da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 28681da177e4SLinus Torvalds sb_jp_journal_magic(rs)); 28691da177e4SLinus Torvalds brelse(bhjh); 28701da177e4SLinus Torvalds goto free_and_return; 28711da177e4SLinus Torvalds } 28721da177e4SLinus Torvalds 28731da177e4SLinus Torvalds journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); 28741da177e4SLinus Torvalds journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); 2875bd4c625cSLinus Torvalds journal->j_max_commit_age = 2876bd4c625cSLinus Torvalds le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); 28771da177e4SLinus Torvalds journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 28781da177e4SLinus Torvalds 2879a9dd3643SJeff Mahoney if (check_advise_trans_params(sb, journal) != 0) 2880cf3d0b81SEdward Shishkin goto free_and_return; 28811da177e4SLinus Torvalds journal->j_default_max_commit_age = journal->j_max_commit_age; 28821da177e4SLinus Torvalds 28831da177e4SLinus Torvalds if (commit_max_age != 0) { 28841da177e4SLinus Torvalds journal->j_max_commit_age = commit_max_age; 28851da177e4SLinus Torvalds journal->j_max_trans_age = commit_max_age; 28861da177e4SLinus Torvalds } 28871da177e4SLinus Torvalds 2888a9dd3643SJeff Mahoney reiserfs_info(sb, "journal params: device %s, size %u, " 28891da177e4SLinus Torvalds "journal first block %u, max trans len %u, max batch %u, " 28901da177e4SLinus Torvalds "max commit age %u, max trans age %u\n", 28911da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 2892a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb), 2893a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb), 28941da177e4SLinus Torvalds journal->j_trans_max, 28951da177e4SLinus Torvalds journal->j_max_batch, 2896bd4c625cSLinus Torvalds journal->j_max_commit_age, journal->j_max_trans_age); 28971da177e4SLinus Torvalds 28981da177e4SLinus Torvalds brelse(bhjh); 28991da177e4SLinus Torvalds 29001da177e4SLinus Torvalds journal->j_list_bitmap_index = 0; 2901a9dd3643SJeff Mahoney journal_list_init(sb); 29021da177e4SLinus Torvalds 2903bd4c625cSLinus Torvalds memset(journal->j_list_hash_table, 0, 2904bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 29051da177e4SLinus Torvalds 29061da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_dirty_buffers); 29071da177e4SLinus Torvalds spin_lock_init(&journal->j_dirty_buffers_lock); 29081da177e4SLinus Torvalds 29091da177e4SLinus Torvalds journal->j_start = 0; 29101da177e4SLinus Torvalds journal->j_len = 0; 29111da177e4SLinus Torvalds journal->j_len_alloc = 0; 29121da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 29131da177e4SLinus Torvalds atomic_set(&(journal->j_async_throttle), 0); 29141da177e4SLinus Torvalds journal->j_bcount = 0; 29151da177e4SLinus Torvalds journal->j_trans_start_time = 0; 29161da177e4SLinus Torvalds journal->j_last = NULL; 29171da177e4SLinus Torvalds journal->j_first = NULL; 29181da177e4SLinus Torvalds init_waitqueue_head(&(journal->j_join_wait)); 2919f68215c4SJeff Mahoney mutex_init(&journal->j_mutex); 2920afe70259SJeff Mahoney mutex_init(&journal->j_flush_mutex); 29211da177e4SLinus Torvalds 29221da177e4SLinus Torvalds journal->j_trans_id = 10; 29231da177e4SLinus Torvalds journal->j_mount_id = 10; 29241da177e4SLinus Torvalds journal->j_state = 0; 29251da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 29261da177e4SLinus Torvalds journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 29271da177e4SLinus Torvalds journal->j_cnode_free_orig = journal->j_cnode_free_list; 29281da177e4SLinus Torvalds journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 29291da177e4SLinus Torvalds journal->j_cnode_used = 0; 29301da177e4SLinus Torvalds journal->j_must_wait = 0; 29311da177e4SLinus Torvalds 2932576f6d79SJeff Mahoney if (journal->j_cnode_free == 0) { 2933a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2004", "Journal cnode memory " 2934576f6d79SJeff Mahoney "allocation failed (%ld bytes). Journal is " 2935576f6d79SJeff Mahoney "too large for available memory. Usually " 2936576f6d79SJeff Mahoney "this is due to a journal that is too large.", 2937576f6d79SJeff Mahoney sizeof (struct reiserfs_journal_cnode) * num_cnodes); 2938576f6d79SJeff Mahoney goto free_and_return; 2939576f6d79SJeff Mahoney } 2940576f6d79SJeff Mahoney 2941a9dd3643SJeff Mahoney init_journal_hash(sb); 29421da177e4SLinus Torvalds jl = journal->j_current_jl; 2943a9dd3643SJeff Mahoney jl->j_list_bitmap = get_list_bitmap(sb, jl); 29441da177e4SLinus Torvalds if (!jl->j_list_bitmap) { 2945a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2005", 294645b03d5eSJeff Mahoney "get_list_bitmap failed for journal list 0"); 29471da177e4SLinus Torvalds goto free_and_return; 29481da177e4SLinus Torvalds } 2949a9dd3643SJeff Mahoney if (journal_read(sb) < 0) { 2950a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2006", 295145b03d5eSJeff Mahoney "Replay Failure, unable to mount"); 29521da177e4SLinus Torvalds goto free_and_return; 29531da177e4SLinus Torvalds } 29541da177e4SLinus Torvalds 29551da177e4SLinus Torvalds reiserfs_mounted_fs_count++; 29561da177e4SLinus Torvalds if (reiserfs_mounted_fs_count <= 1) 29571da177e4SLinus Torvalds commit_wq = create_workqueue("reiserfs"); 29581da177e4SLinus Torvalds 2959c4028958SDavid Howells INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2960a9dd3643SJeff Mahoney journal->j_work_sb = sb; 29611da177e4SLinus Torvalds return 0; 29621da177e4SLinus Torvalds free_and_return: 2963a9dd3643SJeff Mahoney free_journal_ram(sb); 29641da177e4SLinus Torvalds return 1; 29651da177e4SLinus Torvalds } 29661da177e4SLinus Torvalds 29671da177e4SLinus Torvalds /* 29681da177e4SLinus Torvalds ** test for a polite end of the current transaction. Used by file_write, and should 29691da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single 29701da177e4SLinus Torvalds ** transaction 29711da177e4SLinus Torvalds */ 2972bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2973bd4c625cSLinus Torvalds int new_alloc) 2974bd4c625cSLinus Torvalds { 29751da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 29761da177e4SLinus Torvalds time_t now = get_seconds(); 29771da177e4SLinus Torvalds /* cannot restart while nested */ 29781da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 29791da177e4SLinus Torvalds if (th->t_refcount > 1) 29801da177e4SLinus Torvalds return 0; 29811da177e4SLinus Torvalds if (journal->j_must_wait > 0 || 29821da177e4SLinus Torvalds (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 29831da177e4SLinus Torvalds atomic_read(&(journal->j_jlock)) || 29841da177e4SLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age || 29851da177e4SLinus Torvalds journal->j_cnode_free < (journal->j_trans_max * 3)) { 29861da177e4SLinus Torvalds return 1; 29871da177e4SLinus Torvalds } 29886ae1ea44SChris Mason /* protected by the BKL here */ 29896ae1ea44SChris Mason journal->j_len_alloc += new_alloc; 29906ae1ea44SChris Mason th->t_blocks_allocated += new_alloc ; 29911da177e4SLinus Torvalds return 0; 29921da177e4SLinus Torvalds } 29931da177e4SLinus Torvalds 29941da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the 29951da177e4SLinus Torvalds ** kernel_lock to be held 29961da177e4SLinus Torvalds */ 2997bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2998bd4c625cSLinus Torvalds { 29991da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 30001da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 30011da177e4SLinus Torvalds journal->j_must_wait = 1; 30021da177e4SLinus Torvalds set_bit(J_WRITERS_BLOCKED, &journal->j_state); 30031da177e4SLinus Torvalds return; 30041da177e4SLinus Torvalds } 30051da177e4SLinus Torvalds 30061da177e4SLinus Torvalds /* this must be called without a transaction started, and does not 30071da177e4SLinus Torvalds ** require BKL 30081da177e4SLinus Torvalds */ 3009bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s) 3010bd4c625cSLinus Torvalds { 30111da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 30121da177e4SLinus Torvalds clear_bit(J_WRITERS_BLOCKED, &journal->j_state); 30131da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 30141da177e4SLinus Torvalds } 30151da177e4SLinus Torvalds 30161da177e4SLinus Torvalds /* this must be called without a transaction started, and does not 30171da177e4SLinus Torvalds ** require BKL 30181da177e4SLinus Torvalds */ 3019bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s) 3020bd4c625cSLinus Torvalds { 30211da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 30221da177e4SLinus Torvalds wait_event(journal->j_join_wait, 30231da177e4SLinus Torvalds !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); 30241da177e4SLinus Torvalds } 30251da177e4SLinus Torvalds 3026bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s) 3027bd4c625cSLinus Torvalds { 30281da177e4SLinus Torvalds wait_queue_t wait; 30291da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 30301da177e4SLinus Torvalds set_bit(J_WRITERS_QUEUED, &journal->j_state); 30311da177e4SLinus Torvalds 30321da177e4SLinus Torvalds /* 30331da177e4SLinus Torvalds * we don't want to use wait_event here because 30341da177e4SLinus Torvalds * we only want to wait once. 30351da177e4SLinus Torvalds */ 30361da177e4SLinus Torvalds init_waitqueue_entry(&wait, current); 30371da177e4SLinus Torvalds add_wait_queue(&journal->j_join_wait, &wait); 30381da177e4SLinus Torvalds set_current_state(TASK_UNINTERRUPTIBLE); 30398ebc4232SFrederic Weisbecker if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { 30408ebc4232SFrederic Weisbecker reiserfs_write_unlock(s); 30411da177e4SLinus Torvalds schedule(); 30428ebc4232SFrederic Weisbecker reiserfs_write_lock(s); 30438ebc4232SFrederic Weisbecker } 30445ab2f7e0SMilind Arun Choudhary __set_current_state(TASK_RUNNING); 30451da177e4SLinus Torvalds remove_wait_queue(&journal->j_join_wait, &wait); 30461da177e4SLinus Torvalds } 30471da177e4SLinus Torvalds 3048bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s) 3049bd4c625cSLinus Torvalds { 30501da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 30511da177e4SLinus Torvalds if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) 30521da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 30531da177e4SLinus Torvalds } 30541da177e4SLinus Torvalds 3055600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) 30561da177e4SLinus Torvalds { 30571da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 30581da177e4SLinus Torvalds unsigned long bcount = journal->j_bcount; 30591da177e4SLinus Torvalds while (1) { 30608ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 3061041e0e3bSNishanth Aravamudan schedule_timeout_uninterruptible(1); 30628ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 30631da177e4SLinus Torvalds journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 30641da177e4SLinus Torvalds while ((atomic_read(&journal->j_wcount) > 0 || 30651da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) && 30661da177e4SLinus Torvalds journal->j_trans_id == trans_id) { 30671da177e4SLinus Torvalds queue_log_writer(sb); 30681da177e4SLinus Torvalds } 30691da177e4SLinus Torvalds if (journal->j_trans_id != trans_id) 30701da177e4SLinus Torvalds break; 30711da177e4SLinus Torvalds if (bcount == journal->j_bcount) 30721da177e4SLinus Torvalds break; 30731da177e4SLinus Torvalds bcount = journal->j_bcount; 30741da177e4SLinus Torvalds } 30751da177e4SLinus Torvalds } 30761da177e4SLinus Torvalds 30771da177e4SLinus Torvalds /* join == true if you must join an existing transaction. 30781da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish 30791da177e4SLinus Torvalds ** 30801da177e4SLinus Torvalds ** this will block until the transaction is joinable. send the number of blocks you 30811da177e4SLinus Torvalds ** expect to use in nblocks. 30821da177e4SLinus Torvalds */ 3083bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 3084a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 3085bd4c625cSLinus Torvalds int join) 3086bd4c625cSLinus Torvalds { 30871da177e4SLinus Torvalds time_t now = get_seconds(); 3088600ed416SJeff Mahoney unsigned int old_trans_id; 3089a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 30901da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 30911da177e4SLinus Torvalds int sched_count = 0; 30921da177e4SLinus Torvalds int retval; 30931da177e4SLinus Torvalds 3094a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal_begin"); 309514a61442SEric Sesterhenn BUG_ON(nblocks > journal->j_trans_max); 30961da177e4SLinus Torvalds 3097a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_being); 30981da177e4SLinus Torvalds /* set here for journal_join */ 30991da177e4SLinus Torvalds th->t_refcount = 1; 3100a9dd3643SJeff Mahoney th->t_super = sb; 31011da177e4SLinus Torvalds 31021da177e4SLinus Torvalds relock: 3103a9dd3643SJeff Mahoney lock_journal(sb); 31041da177e4SLinus Torvalds if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 3105a9dd3643SJeff Mahoney unlock_journal(sb); 31061da177e4SLinus Torvalds retval = journal->j_errno; 31071da177e4SLinus Torvalds goto out_fail; 31081da177e4SLinus Torvalds } 31091da177e4SLinus Torvalds journal->j_bcount++; 31101da177e4SLinus Torvalds 31111da177e4SLinus Torvalds if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3112a9dd3643SJeff Mahoney unlock_journal(sb); 31138ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 3114a9dd3643SJeff Mahoney reiserfs_wait_on_write_block(sb); 31158ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 3116a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_relock_writers); 31171da177e4SLinus Torvalds goto relock; 31181da177e4SLinus Torvalds } 31191da177e4SLinus Torvalds now = get_seconds(); 31201da177e4SLinus Torvalds 31211da177e4SLinus Torvalds /* if there is no room in the journal OR 31221da177e4SLinus Torvalds ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 31231da177e4SLinus Torvalds ** we don't sleep if there aren't other writers 31241da177e4SLinus Torvalds */ 31251da177e4SLinus Torvalds 31261da177e4SLinus Torvalds if ((!join && journal->j_must_wait > 0) || 3127bd4c625cSLinus Torvalds (!join 3128bd4c625cSLinus Torvalds && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) 3129bd4c625cSLinus Torvalds || (!join && atomic_read(&journal->j_wcount) > 0 3130bd4c625cSLinus Torvalds && journal->j_trans_start_time > 0 3131bd4c625cSLinus Torvalds && (now - journal->j_trans_start_time) > 3132bd4c625cSLinus Torvalds journal->j_max_trans_age) || (!join 3133bd4c625cSLinus Torvalds && atomic_read(&journal->j_jlock)) 3134bd4c625cSLinus Torvalds || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 31351da177e4SLinus Torvalds 31361da177e4SLinus Torvalds old_trans_id = journal->j_trans_id; 3137a9dd3643SJeff Mahoney unlock_journal(sb); /* allow others to finish this transaction */ 31381da177e4SLinus Torvalds 31391da177e4SLinus Torvalds if (!join && (journal->j_len_alloc + nblocks + 2) >= 31401da177e4SLinus Torvalds journal->j_max_batch && 3141bd4c625cSLinus Torvalds ((journal->j_len + nblocks + 2) * 100) < 3142bd4c625cSLinus Torvalds (journal->j_len_alloc * 75)) { 31431da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) > 10) { 31441da177e4SLinus Torvalds sched_count++; 3145a9dd3643SJeff Mahoney queue_log_writer(sb); 31461da177e4SLinus Torvalds goto relock; 31471da177e4SLinus Torvalds } 31481da177e4SLinus Torvalds } 31491da177e4SLinus Torvalds /* don't mess with joining the transaction if all we have to do is 31501da177e4SLinus Torvalds * wait for someone else to do a commit 31511da177e4SLinus Torvalds */ 31521da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 31531da177e4SLinus Torvalds while (journal->j_trans_id == old_trans_id && 31541da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) { 3155a9dd3643SJeff Mahoney queue_log_writer(sb); 31561da177e4SLinus Torvalds } 31571da177e4SLinus Torvalds goto relock; 31581da177e4SLinus Torvalds } 3159a9dd3643SJeff Mahoney retval = journal_join(&myth, sb, 1); 31601da177e4SLinus Torvalds if (retval) 31611da177e4SLinus Torvalds goto out_fail; 31621da177e4SLinus Torvalds 31631da177e4SLinus Torvalds /* someone might have ended the transaction while we joined */ 31641da177e4SLinus Torvalds if (old_trans_id != journal->j_trans_id) { 3165a9dd3643SJeff Mahoney retval = do_journal_end(&myth, sb, 1, 0); 31661da177e4SLinus Torvalds } else { 3167a9dd3643SJeff Mahoney retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); 31681da177e4SLinus Torvalds } 31691da177e4SLinus Torvalds 31701da177e4SLinus Torvalds if (retval) 31711da177e4SLinus Torvalds goto out_fail; 31721da177e4SLinus Torvalds 3173a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.journal_relock_wcount); 31741da177e4SLinus Torvalds goto relock; 31751da177e4SLinus Torvalds } 31761da177e4SLinus Torvalds /* we are the first writer, set trans_id */ 31771da177e4SLinus Torvalds if (journal->j_trans_start_time == 0) { 31781da177e4SLinus Torvalds journal->j_trans_start_time = get_seconds(); 31791da177e4SLinus Torvalds } 31801da177e4SLinus Torvalds atomic_inc(&(journal->j_wcount)); 31811da177e4SLinus Torvalds journal->j_len_alloc += nblocks; 31821da177e4SLinus Torvalds th->t_blocks_logged = 0; 31831da177e4SLinus Torvalds th->t_blocks_allocated = nblocks; 31841da177e4SLinus Torvalds th->t_trans_id = journal->j_trans_id; 3185a9dd3643SJeff Mahoney unlock_journal(sb); 31861da177e4SLinus Torvalds INIT_LIST_HEAD(&th->t_list); 318722e2c507SJens Axboe get_fs_excl(); 31881da177e4SLinus Torvalds return 0; 31891da177e4SLinus Torvalds 31901da177e4SLinus Torvalds out_fail: 31911da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 31921da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 31931da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 31941da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 3195a9dd3643SJeff Mahoney th->t_super = sb; 31961da177e4SLinus Torvalds return retval; 31971da177e4SLinus Torvalds } 31981da177e4SLinus Torvalds 3199bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct 3200bd4c625cSLinus Torvalds super_block 3201bd4c625cSLinus Torvalds *s, 3202bd4c625cSLinus Torvalds int nblocks) 3203bd4c625cSLinus Torvalds { 32041da177e4SLinus Torvalds int ret; 32051da177e4SLinus Torvalds struct reiserfs_transaction_handle *th; 32061da177e4SLinus Torvalds 32071da177e4SLinus Torvalds /* if we're nesting into an existing transaction. It will be 32081da177e4SLinus Torvalds ** persistent on its own 32091da177e4SLinus Torvalds */ 32101da177e4SLinus Torvalds if (reiserfs_transaction_running(s)) { 32111da177e4SLinus Torvalds th = current->journal_info; 32121da177e4SLinus Torvalds th->t_refcount++; 321314a61442SEric Sesterhenn BUG_ON(th->t_refcount < 2); 321414a61442SEric Sesterhenn 32151da177e4SLinus Torvalds return th; 32161da177e4SLinus Torvalds } 3217d739b42bSPekka Enberg th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 32181da177e4SLinus Torvalds if (!th) 32191da177e4SLinus Torvalds return NULL; 32201da177e4SLinus Torvalds ret = journal_begin(th, s, nblocks); 32211da177e4SLinus Torvalds if (ret) { 3222d739b42bSPekka Enberg kfree(th); 32231da177e4SLinus Torvalds return NULL; 32241da177e4SLinus Torvalds } 32251da177e4SLinus Torvalds 32261da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans++; 32271da177e4SLinus Torvalds return th; 32281da177e4SLinus Torvalds } 32291da177e4SLinus Torvalds 3230bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) 3231bd4c625cSLinus Torvalds { 32321da177e4SLinus Torvalds struct super_block *s = th->t_super; 32331da177e4SLinus Torvalds int ret = 0; 32341da177e4SLinus Torvalds if (th->t_trans_id) 32351da177e4SLinus Torvalds ret = journal_end(th, th->t_super, th->t_blocks_allocated); 32361da177e4SLinus Torvalds else 32371da177e4SLinus Torvalds ret = -EIO; 32381da177e4SLinus Torvalds if (th->t_refcount == 0) { 32391da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans--; 3240d739b42bSPekka Enberg kfree(th); 32411da177e4SLinus Torvalds } 32421da177e4SLinus Torvalds return ret; 32431da177e4SLinus Torvalds } 32441da177e4SLinus Torvalds 3245bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 3246a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3247bd4c625cSLinus Torvalds { 32481da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 32491da177e4SLinus Torvalds 32501da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 32511da177e4SLinus Torvalds ** pointer 32521da177e4SLinus Torvalds */ 32531da177e4SLinus Torvalds th->t_handle_save = cur_th; 325414a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 3255a9dd3643SJeff Mahoney return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); 32561da177e4SLinus Torvalds } 32571da177e4SLinus Torvalds 3258bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th, 3259a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3260bd4c625cSLinus Torvalds { 32611da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 32621da177e4SLinus Torvalds 32631da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 32641da177e4SLinus Torvalds ** pointer 32651da177e4SLinus Torvalds */ 32661da177e4SLinus Torvalds th->t_handle_save = cur_th; 326714a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 3268a9dd3643SJeff Mahoney return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); 32691da177e4SLinus Torvalds } 32701da177e4SLinus Torvalds 3271bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th, 3272a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3273bd4c625cSLinus Torvalds { 32741da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 32751da177e4SLinus Torvalds int ret; 32761da177e4SLinus Torvalds 32771da177e4SLinus Torvalds th->t_handle_save = NULL; 32781da177e4SLinus Torvalds if (cur_th) { 32791da177e4SLinus Torvalds /* we are nesting into the current transaction */ 3280a9dd3643SJeff Mahoney if (cur_th->t_super == sb) { 32811da177e4SLinus Torvalds BUG_ON(!cur_th->t_refcount); 32821da177e4SLinus Torvalds cur_th->t_refcount++; 32831da177e4SLinus Torvalds memcpy(th, cur_th, sizeof(*th)); 32841da177e4SLinus Torvalds if (th->t_refcount <= 1) 3285a9dd3643SJeff Mahoney reiserfs_warning(sb, "reiserfs-2005", 328645b03d5eSJeff Mahoney "BAD: refcount <= 1, but " 328745b03d5eSJeff Mahoney "journal_info != 0"); 32881da177e4SLinus Torvalds return 0; 32891da177e4SLinus Torvalds } else { 32901da177e4SLinus Torvalds /* we've ended up with a handle from a different filesystem. 32911da177e4SLinus Torvalds ** save it and restore on journal_end. This should never 32921da177e4SLinus Torvalds ** really happen... 32931da177e4SLinus Torvalds */ 3294a9dd3643SJeff Mahoney reiserfs_warning(sb, "clm-2100", 329545b03d5eSJeff Mahoney "nesting info a different FS"); 32961da177e4SLinus Torvalds th->t_handle_save = current->journal_info; 32971da177e4SLinus Torvalds current->journal_info = th; 32981da177e4SLinus Torvalds } 32991da177e4SLinus Torvalds } else { 33001da177e4SLinus Torvalds current->journal_info = th; 33011da177e4SLinus Torvalds } 3302a9dd3643SJeff Mahoney ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); 330314a61442SEric Sesterhenn BUG_ON(current->journal_info != th); 33041da177e4SLinus Torvalds 33051da177e4SLinus Torvalds /* I guess this boils down to being the reciprocal of clm-2100 above. 33061da177e4SLinus Torvalds * If do_journal_begin_r fails, we need to put it back, since journal_end 33071da177e4SLinus Torvalds * won't be called to do it. */ 33081da177e4SLinus Torvalds if (ret) 33091da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 33101da177e4SLinus Torvalds else 33111da177e4SLinus Torvalds BUG_ON(!th->t_refcount); 33121da177e4SLinus Torvalds 33131da177e4SLinus Torvalds return ret; 33141da177e4SLinus Torvalds } 33151da177e4SLinus Torvalds 33161da177e4SLinus Torvalds /* 33171da177e4SLinus Torvalds ** puts bh into the current transaction. If it was already there, reorders removes the 33181da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 33191da177e4SLinus Torvalds ** 33201da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 33211da177e4SLinus Torvalds ** transaction is committed. 33221da177e4SLinus Torvalds ** 33231da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 33241da177e4SLinus Torvalds */ 3325bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3326a9dd3643SJeff Mahoney struct super_block *sb, struct buffer_head *bh) 3327bd4c625cSLinus Torvalds { 3328a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 33291da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 33301da177e4SLinus Torvalds int count_already_incd = 0; 33311da177e4SLinus Torvalds int prepared = 0; 33321da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 33331da177e4SLinus Torvalds 3334a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty); 33351da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3336c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1577", 3337c3a9c210SJeff Mahoney "handle trans id %ld != current trans id %ld", 33381da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 33391da177e4SLinus Torvalds } 33401da177e4SLinus Torvalds 3341a9dd3643SJeff Mahoney sb->s_dirt = 1; 33421da177e4SLinus Torvalds 33431da177e4SLinus Torvalds prepared = test_clear_buffer_journal_prepared(bh); 33441da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 33451da177e4SLinus Torvalds /* already in this transaction, we are done */ 33461da177e4SLinus Torvalds if (buffer_journaled(bh)) { 3347a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty_already); 33481da177e4SLinus Torvalds return 0; 33491da177e4SLinus Torvalds } 33501da177e4SLinus Torvalds 33511da177e4SLinus Torvalds /* this must be turned into a panic instead of a warning. We can't allow 33521da177e4SLinus Torvalds ** a dirty or journal_dirty or locked buffer to be logged, as some changes 33531da177e4SLinus Torvalds ** could get to disk too early. NOT GOOD. 33541da177e4SLinus Torvalds */ 33551da177e4SLinus Torvalds if (!prepared || buffer_dirty(bh)) { 3356a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1777", 335745b03d5eSJeff Mahoney "buffer %llu bad state " 33581da177e4SLinus Torvalds "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3359bd4c625cSLinus Torvalds (unsigned long long)bh->b_blocknr, 3360bd4c625cSLinus Torvalds prepared ? ' ' : '!', 33611da177e4SLinus Torvalds buffer_locked(bh) ? ' ' : '!', 33621da177e4SLinus Torvalds buffer_dirty(bh) ? ' ' : '!', 33631da177e4SLinus Torvalds buffer_journal_dirty(bh) ? ' ' : '!'); 33641da177e4SLinus Torvalds } 33651da177e4SLinus Torvalds 33661da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) <= 0) { 3367a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1409", 336845b03d5eSJeff Mahoney "returning because j_wcount was %d", 3369bd4c625cSLinus Torvalds atomic_read(&(journal->j_wcount))); 33701da177e4SLinus Torvalds return 1; 33711da177e4SLinus Torvalds } 33721da177e4SLinus Torvalds /* this error means I've screwed up, and we've overflowed the transaction. 33731da177e4SLinus Torvalds ** Nothing can be done here, except make the FS readonly or panic. 33741da177e4SLinus Torvalds */ 33751da177e4SLinus Torvalds if (journal->j_len >= journal->j_trans_max) { 3376c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1413", 3377c3a9c210SJeff Mahoney "j_len (%lu) is too big", 3378bd4c625cSLinus Torvalds journal->j_len); 33791da177e4SLinus Torvalds } 33801da177e4SLinus Torvalds 33811da177e4SLinus Torvalds if (buffer_journal_dirty(bh)) { 33821da177e4SLinus Torvalds count_already_incd = 1; 3383a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.mark_dirty_notjournal); 33841da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 33851da177e4SLinus Torvalds } 33861da177e4SLinus Torvalds 33871da177e4SLinus Torvalds if (journal->j_len > journal->j_len_alloc) { 33881da177e4SLinus Torvalds journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; 33891da177e4SLinus Torvalds } 33901da177e4SLinus Torvalds 33911da177e4SLinus Torvalds set_buffer_journaled(bh); 33921da177e4SLinus Torvalds 33931da177e4SLinus Torvalds /* now put this guy on the end */ 33941da177e4SLinus Torvalds if (!cn) { 3395a9dd3643SJeff Mahoney cn = get_cnode(sb); 33961da177e4SLinus Torvalds if (!cn) { 3397a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-4", "get_cnode failed!"); 33981da177e4SLinus Torvalds } 33991da177e4SLinus Torvalds 34001da177e4SLinus Torvalds if (th->t_blocks_logged == th->t_blocks_allocated) { 34011da177e4SLinus Torvalds th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; 34021da177e4SLinus Torvalds journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; 34031da177e4SLinus Torvalds } 34041da177e4SLinus Torvalds th->t_blocks_logged++; 34051da177e4SLinus Torvalds journal->j_len++; 34061da177e4SLinus Torvalds 34071da177e4SLinus Torvalds cn->bh = bh; 34081da177e4SLinus Torvalds cn->blocknr = bh->b_blocknr; 3409a9dd3643SJeff Mahoney cn->sb = sb; 34101da177e4SLinus Torvalds cn->jlist = NULL; 34111da177e4SLinus Torvalds insert_journal_hash(journal->j_hash_table, cn); 34121da177e4SLinus Torvalds if (!count_already_incd) { 34131da177e4SLinus Torvalds get_bh(bh); 34141da177e4SLinus Torvalds } 34151da177e4SLinus Torvalds } 34161da177e4SLinus Torvalds cn->next = NULL; 34171da177e4SLinus Torvalds cn->prev = journal->j_last; 34181da177e4SLinus Torvalds cn->bh = bh; 34191da177e4SLinus Torvalds if (journal->j_last) { 34201da177e4SLinus Torvalds journal->j_last->next = cn; 34211da177e4SLinus Torvalds journal->j_last = cn; 34221da177e4SLinus Torvalds } else { 34231da177e4SLinus Torvalds journal->j_first = cn; 34241da177e4SLinus Torvalds journal->j_last = cn; 34251da177e4SLinus Torvalds } 34261da177e4SLinus Torvalds return 0; 34271da177e4SLinus Torvalds } 34281da177e4SLinus Torvalds 3429bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th, 3430a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3431bd4c625cSLinus Torvalds { 34321da177e4SLinus Torvalds if (!current->journal_info && th->t_refcount > 1) 3433a9dd3643SJeff Mahoney reiserfs_warning(sb, "REISER-NESTING", 343445b03d5eSJeff Mahoney "th NULL, refcount %d", th->t_refcount); 34351da177e4SLinus Torvalds 34361da177e4SLinus Torvalds if (!th->t_trans_id) { 34371da177e4SLinus Torvalds WARN_ON(1); 34381da177e4SLinus Torvalds return -EIO; 34391da177e4SLinus Torvalds } 34401da177e4SLinus Torvalds 34411da177e4SLinus Torvalds th->t_refcount--; 34421da177e4SLinus Torvalds if (th->t_refcount > 0) { 3443bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *cur_th = 3444bd4c625cSLinus Torvalds current->journal_info; 34451da177e4SLinus Torvalds 34461da177e4SLinus Torvalds /* we aren't allowed to close a nested transaction on a different 34471da177e4SLinus Torvalds ** filesystem from the one in the task struct 34481da177e4SLinus Torvalds */ 344914a61442SEric Sesterhenn BUG_ON(cur_th->t_super != th->t_super); 34501da177e4SLinus Torvalds 34511da177e4SLinus Torvalds if (th != cur_th) { 34521da177e4SLinus Torvalds memcpy(current->journal_info, th, sizeof(*th)); 34531da177e4SLinus Torvalds th->t_trans_id = 0; 34541da177e4SLinus Torvalds } 34551da177e4SLinus Torvalds return 0; 34561da177e4SLinus Torvalds } else { 3457a9dd3643SJeff Mahoney return do_journal_end(th, sb, nblocks, 0); 34581da177e4SLinus Torvalds } 34591da177e4SLinus Torvalds } 34601da177e4SLinus Torvalds 34611da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters. 34621da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list 34631da177e4SLinus Torvalds ** 34641da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted 34651da177e4SLinus Torvalds ** 34661da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise 34671da177e4SLinus Torvalds */ 3468a9dd3643SJeff Mahoney static int remove_from_transaction(struct super_block *sb, 3469bd4c625cSLinus Torvalds b_blocknr_t blocknr, int already_cleaned) 3470bd4c625cSLinus Torvalds { 34711da177e4SLinus Torvalds struct buffer_head *bh; 34721da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 3473a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 34741da177e4SLinus Torvalds int ret = 0; 34751da177e4SLinus Torvalds 3476a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); 34771da177e4SLinus Torvalds if (!cn || !cn->bh) { 34781da177e4SLinus Torvalds return ret; 34791da177e4SLinus Torvalds } 34801da177e4SLinus Torvalds bh = cn->bh; 34811da177e4SLinus Torvalds if (cn->prev) { 34821da177e4SLinus Torvalds cn->prev->next = cn->next; 34831da177e4SLinus Torvalds } 34841da177e4SLinus Torvalds if (cn->next) { 34851da177e4SLinus Torvalds cn->next->prev = cn->prev; 34861da177e4SLinus Torvalds } 34871da177e4SLinus Torvalds if (cn == journal->j_first) { 34881da177e4SLinus Torvalds journal->j_first = cn->next; 34891da177e4SLinus Torvalds } 34901da177e4SLinus Torvalds if (cn == journal->j_last) { 34911da177e4SLinus Torvalds journal->j_last = cn->prev; 34921da177e4SLinus Torvalds } 34931da177e4SLinus Torvalds if (bh) 3494a9dd3643SJeff Mahoney remove_journal_hash(sb, journal->j_hash_table, NULL, 3495bd4c625cSLinus Torvalds bh->b_blocknr, 0); 34961da177e4SLinus Torvalds clear_buffer_journaled(bh); /* don't log this one */ 34971da177e4SLinus Torvalds 34981da177e4SLinus Torvalds if (!already_cleaned) { 34991da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 35001da177e4SLinus Torvalds clear_buffer_dirty(bh); 35011da177e4SLinus Torvalds clear_buffer_journal_test(bh); 35021da177e4SLinus Torvalds put_bh(bh); 35031da177e4SLinus Torvalds if (atomic_read(&(bh->b_count)) < 0) { 3504a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-1752", 350545b03d5eSJeff Mahoney "b_count < 0"); 35061da177e4SLinus Torvalds } 35071da177e4SLinus Torvalds ret = 1; 35081da177e4SLinus Torvalds } 35091da177e4SLinus Torvalds journal->j_len--; 35101da177e4SLinus Torvalds journal->j_len_alloc--; 3511a9dd3643SJeff Mahoney free_cnode(sb, cn); 35121da177e4SLinus Torvalds return ret; 35131da177e4SLinus Torvalds } 35141da177e4SLinus Torvalds 35151da177e4SLinus Torvalds /* 35161da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the 35170779bf2dSMatt LaPlante ** transactions that include it are committed to disk. 35181da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty, 35191da177e4SLinus Torvalds ** and 0 if you aren't 35201da177e4SLinus Torvalds ** 35211da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 35221da177e4SLinus Torvalds ** blocks for a given transaction on disk 35231da177e4SLinus Torvalds ** 35241da177e4SLinus Torvalds */ 3525bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn) 3526bd4c625cSLinus Torvalds { 35271da177e4SLinus Torvalds struct super_block *sb = cn->sb; 35281da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 35291da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur = cn->hprev; 35301da177e4SLinus Torvalds int can_dirty = 1; 35311da177e4SLinus Torvalds 35321da177e4SLinus Torvalds /* first test hprev. These are all newer than cn, so any node here 35331da177e4SLinus Torvalds ** with the same block number and dev means this node can't be sent 35341da177e4SLinus Torvalds ** to disk right now. 35351da177e4SLinus Torvalds */ 35361da177e4SLinus Torvalds while (cur && can_dirty) { 35371da177e4SLinus Torvalds if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 35381da177e4SLinus Torvalds cur->blocknr == blocknr) { 35391da177e4SLinus Torvalds can_dirty = 0; 35401da177e4SLinus Torvalds } 35411da177e4SLinus Torvalds cur = cur->hprev; 35421da177e4SLinus Torvalds } 35431da177e4SLinus Torvalds /* then test hnext. These are all older than cn. As long as they 35441da177e4SLinus Torvalds ** are committed to the log, it is safe to write cn to disk 35451da177e4SLinus Torvalds */ 35461da177e4SLinus Torvalds cur = cn->hnext; 35471da177e4SLinus Torvalds while (cur && can_dirty) { 35481da177e4SLinus Torvalds if (cur->jlist && cur->jlist->j_len > 0 && 35491da177e4SLinus Torvalds atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 35501da177e4SLinus Torvalds cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 35511da177e4SLinus Torvalds can_dirty = 0; 35521da177e4SLinus Torvalds } 35531da177e4SLinus Torvalds cur = cur->hnext; 35541da177e4SLinus Torvalds } 35551da177e4SLinus Torvalds return can_dirty; 35561da177e4SLinus Torvalds } 35571da177e4SLinus Torvalds 35581da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk 35590779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning 35601da177e4SLinus Torvalds */ 3561bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th, 3562a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks) 3563bd4c625cSLinus Torvalds { 3564a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 35651da177e4SLinus Torvalds 35661da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 35671da177e4SLinus Torvalds /* you can sync while nested, very, very bad */ 356814a61442SEric Sesterhenn BUG_ON(th->t_refcount > 1); 35691da177e4SLinus Torvalds if (journal->j_len == 0) { 3570a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3571bd4c625cSLinus Torvalds 1); 3572a9dd3643SJeff Mahoney journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 35731da177e4SLinus Torvalds } 3574a9dd3643SJeff Mahoney return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); 35751da177e4SLinus Torvalds } 35761da177e4SLinus Torvalds 35771da177e4SLinus Torvalds /* 35781da177e4SLinus Torvalds ** writeback the pending async commits to disk 35791da177e4SLinus Torvalds */ 3580c4028958SDavid Howells static void flush_async_commits(struct work_struct *work) 3581bd4c625cSLinus Torvalds { 3582c4028958SDavid Howells struct reiserfs_journal *journal = 3583c4028958SDavid Howells container_of(work, struct reiserfs_journal, j_work.work); 3584a9dd3643SJeff Mahoney struct super_block *sb = journal->j_work_sb; 35851da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 35861da177e4SLinus Torvalds struct list_head *entry; 35871da177e4SLinus Torvalds 35888ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 35891da177e4SLinus Torvalds if (!list_empty(&journal->j_journal_list)) { 35901da177e4SLinus Torvalds /* last entry is the youngest, commit it and you get everything */ 35911da177e4SLinus Torvalds entry = journal->j_journal_list.prev; 35921da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 3593a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 35941da177e4SLinus Torvalds } 35958ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 35961da177e4SLinus Torvalds } 35971da177e4SLinus Torvalds 35981da177e4SLinus Torvalds /* 35991da177e4SLinus Torvalds ** flushes any old transactions to disk 36001da177e4SLinus Torvalds ** ends the current transaction if it is too old 36011da177e4SLinus Torvalds */ 3602a9dd3643SJeff Mahoney int reiserfs_flush_old_commits(struct super_block *sb) 3603bd4c625cSLinus Torvalds { 36041da177e4SLinus Torvalds time_t now; 36051da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 3606a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 36071da177e4SLinus Torvalds 36081da177e4SLinus Torvalds now = get_seconds(); 36091da177e4SLinus Torvalds /* safety check so we don't flush while we are replaying the log during 36101da177e4SLinus Torvalds * mount 36111da177e4SLinus Torvalds */ 36121da177e4SLinus Torvalds if (list_empty(&journal->j_journal_list)) { 36131da177e4SLinus Torvalds return 0; 36141da177e4SLinus Torvalds } 36151da177e4SLinus Torvalds 36161da177e4SLinus Torvalds /* check the current transaction. If there are no writers, and it is 36171da177e4SLinus Torvalds * too old, finish it, and force the commit blocks to disk 36181da177e4SLinus Torvalds */ 36191da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) <= 0 && 36201da177e4SLinus Torvalds journal->j_trans_start_time > 0 && 36211da177e4SLinus Torvalds journal->j_len > 0 && 3622bd4c625cSLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3623a9dd3643SJeff Mahoney if (!journal_join(&th, sb, 1)) { 3624a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, 3625a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb), 3626bd4c625cSLinus Torvalds 1); 3627a9dd3643SJeff Mahoney journal_mark_dirty(&th, sb, 3628a9dd3643SJeff Mahoney SB_BUFFER_WITH_SB(sb)); 36291da177e4SLinus Torvalds 36301da177e4SLinus Torvalds /* we're only being called from kreiserfsd, it makes no sense to do 36311da177e4SLinus Torvalds ** an async commit so that kreiserfsd can do it later 36321da177e4SLinus Torvalds */ 3633a9dd3643SJeff Mahoney do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); 36341da177e4SLinus Torvalds } 36351da177e4SLinus Torvalds } 3636a9dd3643SJeff Mahoney return sb->s_dirt; 36371da177e4SLinus Torvalds } 36381da177e4SLinus Torvalds 36391da177e4SLinus Torvalds /* 36401da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 36411da177e4SLinus Torvalds ** 36421da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 36431da177e4SLinus Torvalds ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 36441da177e4SLinus Torvalds ** flushes the commit list and returns 0. 36451da177e4SLinus Torvalds ** 36461da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 36471da177e4SLinus Torvalds ** 36481da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log. 36491da177e4SLinus Torvalds */ 3650bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th, 3651a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 3652bd4c625cSLinus Torvalds int flags) 3653bd4c625cSLinus Torvalds { 36541da177e4SLinus Torvalds 36551da177e4SLinus Torvalds time_t now; 36561da177e4SLinus Torvalds int flush = flags & FLUSH_ALL; 36571da177e4SLinus Torvalds int commit_now = flags & COMMIT_NOW; 36581da177e4SLinus Torvalds int wait_on_commit = flags & WAIT; 36591da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 3660a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 36611da177e4SLinus Torvalds 36621da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 36631da177e4SLinus Torvalds 36641da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3665c3a9c210SJeff Mahoney reiserfs_panic(th->t_super, "journal-1577", 3666c3a9c210SJeff Mahoney "handle trans id %ld != current trans id %ld", 36671da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 36681da177e4SLinus Torvalds } 36691da177e4SLinus Torvalds 36701da177e4SLinus Torvalds journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 36711da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 36721da177e4SLinus Torvalds atomic_dec(&(journal->j_wcount)); 36731da177e4SLinus Torvalds } 36741da177e4SLinus Torvalds 36751da177e4SLinus Torvalds /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 36761da177e4SLinus Torvalds ** will be dealt with by next transaction that actually writes something, but should be taken 36771da177e4SLinus Torvalds ** care of in this trans 36781da177e4SLinus Torvalds */ 367914a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 368014a61442SEric Sesterhenn 36811da177e4SLinus Torvalds /* if wcount > 0, and we are called to with flush or commit_now, 36821da177e4SLinus Torvalds ** we wait on j_join_wait. We will wake up when the last writer has 36831da177e4SLinus Torvalds ** finished the transaction, and started it on its way to the disk. 36841da177e4SLinus Torvalds ** Then, we flush the commit or journal list, and just return 0 36851da177e4SLinus Torvalds ** because the rest of journal end was already done for this transaction. 36861da177e4SLinus Torvalds */ 36871da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { 36881da177e4SLinus Torvalds if (flush || commit_now) { 36891da177e4SLinus Torvalds unsigned trans_id; 36901da177e4SLinus Torvalds 36911da177e4SLinus Torvalds jl = journal->j_current_jl; 36921da177e4SLinus Torvalds trans_id = jl->j_trans_id; 36931da177e4SLinus Torvalds if (wait_on_commit) 36941da177e4SLinus Torvalds jl->j_state |= LIST_COMMIT_PENDING; 36951da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 1); 36961da177e4SLinus Torvalds if (flush) { 36971da177e4SLinus Torvalds journal->j_next_full_flush = 1; 36981da177e4SLinus Torvalds } 3699a9dd3643SJeff Mahoney unlock_journal(sb); 37001da177e4SLinus Torvalds 37011da177e4SLinus Torvalds /* sleep while the current transaction is still j_jlocked */ 37021da177e4SLinus Torvalds while (journal->j_trans_id == trans_id) { 37031da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 3704a9dd3643SJeff Mahoney queue_log_writer(sb); 37051da177e4SLinus Torvalds } else { 3706a9dd3643SJeff Mahoney lock_journal(sb); 37071da177e4SLinus Torvalds if (journal->j_trans_id == trans_id) { 3708bd4c625cSLinus Torvalds atomic_set(&(journal->j_jlock), 3709bd4c625cSLinus Torvalds 1); 37101da177e4SLinus Torvalds } 3711a9dd3643SJeff Mahoney unlock_journal(sb); 37121da177e4SLinus Torvalds } 37131da177e4SLinus Torvalds } 371414a61442SEric Sesterhenn BUG_ON(journal->j_trans_id == trans_id); 371514a61442SEric Sesterhenn 3716bd4c625cSLinus Torvalds if (commit_now 3717a9dd3643SJeff Mahoney && journal_list_still_alive(sb, trans_id) 3718bd4c625cSLinus Torvalds && wait_on_commit) { 3719a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 37201da177e4SLinus Torvalds } 37211da177e4SLinus Torvalds return 0; 37221da177e4SLinus Torvalds } 3723a9dd3643SJeff Mahoney unlock_journal(sb); 37241da177e4SLinus Torvalds return 0; 37251da177e4SLinus Torvalds } 37261da177e4SLinus Torvalds 37271da177e4SLinus Torvalds /* deal with old transactions where we are the last writers */ 37281da177e4SLinus Torvalds now = get_seconds(); 37291da177e4SLinus Torvalds if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 37301da177e4SLinus Torvalds commit_now = 1; 37311da177e4SLinus Torvalds journal->j_next_async_flush = 1; 37321da177e4SLinus Torvalds } 37331da177e4SLinus Torvalds /* don't batch when someone is waiting on j_join_wait */ 37341da177e4SLinus Torvalds /* don't batch when syncing the commit or flushing the whole trans */ 3735bd4c625cSLinus Torvalds if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) 3736bd4c625cSLinus Torvalds && !flush && !commit_now && (journal->j_len < journal->j_max_batch) 3737bd4c625cSLinus Torvalds && journal->j_len_alloc < journal->j_max_batch 3738bd4c625cSLinus Torvalds && journal->j_cnode_free > (journal->j_trans_max * 3)) { 37391da177e4SLinus Torvalds journal->j_bcount++; 3740a9dd3643SJeff Mahoney unlock_journal(sb); 37411da177e4SLinus Torvalds return 0; 37421da177e4SLinus Torvalds } 37431da177e4SLinus Torvalds 3744a9dd3643SJeff Mahoney if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) { 3745a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-003", 3746c3a9c210SJeff Mahoney "j_start (%ld) is too high", 3747bd4c625cSLinus Torvalds journal->j_start); 37481da177e4SLinus Torvalds } 37491da177e4SLinus Torvalds return 1; 37501da177e4SLinus Torvalds } 37511da177e4SLinus Torvalds 37521da177e4SLinus Torvalds /* 37531da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe. 37541da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 37551da177e4SLinus Torvalds ** 37561da177e4SLinus Torvalds ** otherwise: 37571da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 37581da177e4SLinus Torvalds ** before this transaction has finished. 37591da177e4SLinus Torvalds ** 37601da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 37611da177e4SLinus Torvalds ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 37621da177e4SLinus Torvalds ** the block can't be reallocated yet. 37631da177e4SLinus Torvalds ** 37641da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 37651da177e4SLinus Torvalds */ 3766bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th, 3767a9dd3643SJeff Mahoney struct super_block *sb, b_blocknr_t blocknr) 3768bd4c625cSLinus Torvalds { 3769a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 37701da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 37711da177e4SLinus Torvalds struct buffer_head *bh = NULL; 37721da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 37731da177e4SLinus Torvalds int cleaned = 0; 37741da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 37751da177e4SLinus Torvalds 3776a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr); 37771da177e4SLinus Torvalds if (cn && cn->bh) { 37781da177e4SLinus Torvalds bh = cn->bh; 37791da177e4SLinus Torvalds get_bh(bh); 37801da177e4SLinus Torvalds } 37811da177e4SLinus Torvalds /* if it is journal new, we just remove it from this transaction */ 37821da177e4SLinus Torvalds if (bh && buffer_journal_new(bh)) { 37831da177e4SLinus Torvalds clear_buffer_journal_new(bh); 37841da177e4SLinus Torvalds clear_prepared_bits(bh); 37851da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 3786a9dd3643SJeff Mahoney cleaned = remove_from_transaction(sb, blocknr, cleaned); 37871da177e4SLinus Torvalds } else { 37881da177e4SLinus Torvalds /* set the bit for this block in the journal bitmap for this transaction */ 37891da177e4SLinus Torvalds jb = journal->j_current_jl->j_list_bitmap; 37901da177e4SLinus Torvalds if (!jb) { 3791a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1702", 3792c3a9c210SJeff Mahoney "journal_list_bitmap is NULL"); 37931da177e4SLinus Torvalds } 3794a9dd3643SJeff Mahoney set_bit_in_list_bitmap(sb, blocknr, jb); 37951da177e4SLinus Torvalds 37961da177e4SLinus Torvalds /* Note, the entire while loop is not allowed to schedule. */ 37971da177e4SLinus Torvalds 37981da177e4SLinus Torvalds if (bh) { 37991da177e4SLinus Torvalds clear_prepared_bits(bh); 38001da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 38011da177e4SLinus Torvalds } 3802a9dd3643SJeff Mahoney cleaned = remove_from_transaction(sb, blocknr, cleaned); 38031da177e4SLinus Torvalds 38041da177e4SLinus Torvalds /* find all older transactions with this block, make sure they don't try to write it out */ 3805a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, journal->j_list_hash_table, 3806bd4c625cSLinus Torvalds blocknr); 38071da177e4SLinus Torvalds while (cn) { 3808a9dd3643SJeff Mahoney if (sb == cn->sb && blocknr == cn->blocknr) { 38091da177e4SLinus Torvalds set_bit(BLOCK_FREED, &cn->state); 38101da177e4SLinus Torvalds if (cn->bh) { 38111da177e4SLinus Torvalds if (!cleaned) { 38121da177e4SLinus Torvalds /* remove_from_transaction will brelse the buffer if it was 38131da177e4SLinus Torvalds ** in the current trans 38141da177e4SLinus Torvalds */ 3815bd4c625cSLinus Torvalds clear_buffer_journal_dirty(cn-> 3816bd4c625cSLinus Torvalds bh); 38171da177e4SLinus Torvalds clear_buffer_dirty(cn->bh); 3818bd4c625cSLinus Torvalds clear_buffer_journal_test(cn-> 3819bd4c625cSLinus Torvalds bh); 38201da177e4SLinus Torvalds cleaned = 1; 38211da177e4SLinus Torvalds put_bh(cn->bh); 3822bd4c625cSLinus Torvalds if (atomic_read 3823bd4c625cSLinus Torvalds (&(cn->bh->b_count)) < 0) { 3824a9dd3643SJeff Mahoney reiserfs_warning(sb, 382545b03d5eSJeff Mahoney "journal-2138", 382645b03d5eSJeff Mahoney "cn->bh->b_count < 0"); 38271da177e4SLinus Torvalds } 38281da177e4SLinus Torvalds } 38291da177e4SLinus Torvalds if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3830bd4c625cSLinus Torvalds atomic_dec(& 3831bd4c625cSLinus Torvalds (cn->jlist-> 3832bd4c625cSLinus Torvalds j_nonzerolen)); 38331da177e4SLinus Torvalds } 38341da177e4SLinus Torvalds cn->bh = NULL; 38351da177e4SLinus Torvalds } 38361da177e4SLinus Torvalds } 38371da177e4SLinus Torvalds cn = cn->hnext; 38381da177e4SLinus Torvalds } 38391da177e4SLinus Torvalds } 38401da177e4SLinus Torvalds 3841398c95bdSChris Mason if (bh) 3842398c95bdSChris Mason release_buffer_page(bh); /* get_hash grabs the buffer */ 38431da177e4SLinus Torvalds return 0; 38441da177e4SLinus Torvalds } 38451da177e4SLinus Torvalds 3846bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode) 3847bd4c625cSLinus Torvalds { 38481da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); 38491da177e4SLinus Torvalds REISERFS_I(inode)->i_jl = journal->j_current_jl; 38501da177e4SLinus Torvalds REISERFS_I(inode)->i_trans_id = journal->j_trans_id; 38511da177e4SLinus Torvalds } 38521da177e4SLinus Torvalds 38531da177e4SLinus Torvalds /* 38541da177e4SLinus Torvalds * returns -1 on error, 0 if no commits/barriers were done and 1 38551da177e4SLinus Torvalds * if a transaction was actually committed and the barrier was done 38561da177e4SLinus Torvalds */ 38571da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id, 38581da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 38591da177e4SLinus Torvalds { 38601da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 38611da177e4SLinus Torvalds struct super_block *sb = inode->i_sb; 38621da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 38631da177e4SLinus Torvalds int ret = 0; 38641da177e4SLinus Torvalds 38651da177e4SLinus Torvalds /* is it from the current transaction, or from an unknown transaction? */ 38661da177e4SLinus Torvalds if (id == journal->j_trans_id) { 38671da177e4SLinus Torvalds jl = journal->j_current_jl; 38681da177e4SLinus Torvalds /* try to let other writers come in and grow this transaction */ 38691da177e4SLinus Torvalds let_transaction_grow(sb, id); 38701da177e4SLinus Torvalds if (journal->j_trans_id != id) { 38711da177e4SLinus Torvalds goto flush_commit_only; 38721da177e4SLinus Torvalds } 38731da177e4SLinus Torvalds 38741da177e4SLinus Torvalds ret = journal_begin(&th, sb, 1); 38751da177e4SLinus Torvalds if (ret) 38761da177e4SLinus Torvalds return ret; 38771da177e4SLinus Torvalds 38781da177e4SLinus Torvalds /* someone might have ended this transaction while we joined */ 38791da177e4SLinus Torvalds if (journal->j_trans_id != id) { 3880bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3881bd4c625cSLinus Torvalds 1); 38821da177e4SLinus Torvalds journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); 38831da177e4SLinus Torvalds ret = journal_end(&th, sb, 1); 38841da177e4SLinus Torvalds goto flush_commit_only; 38851da177e4SLinus Torvalds } 38861da177e4SLinus Torvalds 38871da177e4SLinus Torvalds ret = journal_end_sync(&th, sb, 1); 38881da177e4SLinus Torvalds if (!ret) 38891da177e4SLinus Torvalds ret = 1; 38901da177e4SLinus Torvalds 38911da177e4SLinus Torvalds } else { 38921da177e4SLinus Torvalds /* this gets tricky, we have to make sure the journal list in 38931da177e4SLinus Torvalds * the inode still exists. We know the list is still around 38941da177e4SLinus Torvalds * if we've got a larger transaction id than the oldest list 38951da177e4SLinus Torvalds */ 38961da177e4SLinus Torvalds flush_commit_only: 38971da177e4SLinus Torvalds if (journal_list_still_alive(inode->i_sb, id)) { 38981da177e4SLinus Torvalds /* 38991da177e4SLinus Torvalds * we only set ret to 1 when we know for sure 39001da177e4SLinus Torvalds * the barrier hasn't been started yet on the commit 39011da177e4SLinus Torvalds * block. 39021da177e4SLinus Torvalds */ 39031da177e4SLinus Torvalds if (atomic_read(&jl->j_commit_left) > 1) 39041da177e4SLinus Torvalds ret = 1; 39051da177e4SLinus Torvalds flush_commit_list(sb, jl, 1); 39061da177e4SLinus Torvalds if (journal->j_errno) 39071da177e4SLinus Torvalds ret = journal->j_errno; 39081da177e4SLinus Torvalds } 39091da177e4SLinus Torvalds } 39101da177e4SLinus Torvalds /* otherwise the list is gone, and long since committed */ 39111da177e4SLinus Torvalds return ret; 39121da177e4SLinus Torvalds } 39131da177e4SLinus Torvalds 3914bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode) 3915bd4c625cSLinus Torvalds { 3916600ed416SJeff Mahoney unsigned int id = REISERFS_I(inode)->i_trans_id; 39171da177e4SLinus Torvalds struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 39181da177e4SLinus Torvalds 39191da177e4SLinus Torvalds /* for the whole inode, assume unset id means it was 39201da177e4SLinus Torvalds * changed in the current transaction. More conservative 39211da177e4SLinus Torvalds */ 39221da177e4SLinus Torvalds if (!id || !jl) { 39231da177e4SLinus Torvalds reiserfs_update_inode_transaction(inode); 39241da177e4SLinus Torvalds id = REISERFS_I(inode)->i_trans_id; 39251da177e4SLinus Torvalds /* jl will be updated in __commit_trans_jl */ 39261da177e4SLinus Torvalds } 39271da177e4SLinus Torvalds 39281da177e4SLinus Torvalds return __commit_trans_jl(inode, id, jl); 39291da177e4SLinus Torvalds } 39301da177e4SLinus Torvalds 3931a9dd3643SJeff Mahoney void reiserfs_restore_prepared_buffer(struct super_block *sb, 3932bd4c625cSLinus Torvalds struct buffer_head *bh) 3933bd4c625cSLinus Torvalds { 3934a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 3935a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.restore_prepared); 39361da177e4SLinus Torvalds if (!bh) { 39371da177e4SLinus Torvalds return; 39381da177e4SLinus Torvalds } 39391da177e4SLinus Torvalds if (test_clear_buffer_journal_restore_dirty(bh) && 39401da177e4SLinus Torvalds buffer_journal_dirty(bh)) { 39411da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 3942a9dd3643SJeff Mahoney cn = get_journal_hash_dev(sb, 39431da177e4SLinus Torvalds journal->j_list_hash_table, 39441da177e4SLinus Torvalds bh->b_blocknr); 39451da177e4SLinus Torvalds if (cn && can_dirty(cn)) { 39461da177e4SLinus Torvalds set_buffer_journal_test(bh); 39471da177e4SLinus Torvalds mark_buffer_dirty(bh); 39481da177e4SLinus Torvalds } 39491da177e4SLinus Torvalds } 39501da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 39511da177e4SLinus Torvalds } 39521da177e4SLinus Torvalds 39531da177e4SLinus Torvalds extern struct tree_balance *cur_tb; 39541da177e4SLinus Torvalds /* 39551da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't 39561da177e4SLinus Torvalds ** be written to disk while we are altering it. So, we must: 39571da177e4SLinus Torvalds ** clean it 39581da177e4SLinus Torvalds ** wait on it. 39591da177e4SLinus Torvalds ** 39601da177e4SLinus Torvalds */ 3961a9dd3643SJeff Mahoney int reiserfs_prepare_for_journal(struct super_block *sb, 3962bd4c625cSLinus Torvalds struct buffer_head *bh, int wait) 3963bd4c625cSLinus Torvalds { 3964a9dd3643SJeff Mahoney PROC_INFO_INC(sb, journal.prepare); 39651da177e4SLinus Torvalds 3966ca5de404SNick Piggin if (!trylock_buffer(bh)) { 39671da177e4SLinus Torvalds if (!wait) 39681da177e4SLinus Torvalds return 0; 39691da177e4SLinus Torvalds lock_buffer(bh); 39701da177e4SLinus Torvalds } 39711da177e4SLinus Torvalds set_buffer_journal_prepared(bh); 39721da177e4SLinus Torvalds if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 39731da177e4SLinus Torvalds clear_buffer_journal_test(bh); 39741da177e4SLinus Torvalds set_buffer_journal_restore_dirty(bh); 39751da177e4SLinus Torvalds } 39761da177e4SLinus Torvalds unlock_buffer(bh); 39771da177e4SLinus Torvalds return 1; 39781da177e4SLinus Torvalds } 39791da177e4SLinus Torvalds 3980bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s) 3981bd4c625cSLinus Torvalds { 39821da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 39831da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 39841da177e4SLinus Torvalds struct list_head *entry; 39851da177e4SLinus Torvalds time_t now = get_seconds(); 39861da177e4SLinus Torvalds 39871da177e4SLinus Torvalds while (!list_empty(&journal->j_journal_list)) { 39881da177e4SLinus Torvalds entry = journal->j_journal_list.next; 39891da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 39901da177e4SLinus Torvalds /* this check should always be run, to send old lists to disk */ 3991a3172027SChris Mason if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && 3992a3172027SChris Mason atomic_read(&jl->j_commit_left) == 0 && 3993a3172027SChris Mason test_transaction(s, jl)) { 39941da177e4SLinus Torvalds flush_used_journal_lists(s, jl); 39951da177e4SLinus Torvalds } else { 39961da177e4SLinus Torvalds break; 39971da177e4SLinus Torvalds } 39981da177e4SLinus Torvalds } 39991da177e4SLinus Torvalds } 40001da177e4SLinus Torvalds 40011da177e4SLinus Torvalds /* 40021da177e4SLinus Torvalds ** long and ugly. If flush, will not return until all commit 40031da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk. 40041da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk. 40051da177e4SLinus Torvalds ** 40061da177e4SLinus Torvalds ** keep reading, there are comments as you go along 40071da177e4SLinus Torvalds ** 40081da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing 40091da177e4SLinus Torvalds ** journal lists, etc just won't happen. 40101da177e4SLinus Torvalds */ 4011bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th, 4012a9dd3643SJeff Mahoney struct super_block *sb, unsigned long nblocks, 4013bd4c625cSLinus Torvalds int flags) 4014bd4c625cSLinus Torvalds { 4015a9dd3643SJeff Mahoney struct reiserfs_journal *journal = SB_JOURNAL(sb); 40161da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *next, *jl_cn; 40171da177e4SLinus Torvalds struct reiserfs_journal_cnode *last_cn = NULL; 40181da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 40191da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 40201da177e4SLinus Torvalds struct buffer_head *c_bh; /* commit bh */ 40211da177e4SLinus Torvalds struct buffer_head *d_bh; /* desc bh */ 40221da177e4SLinus Torvalds int cur_write_start = 0; /* start index of current log write */ 40231da177e4SLinus Torvalds int old_start; 40241da177e4SLinus Torvalds int i; 4025a44c94a7SAlexander Zarochentsev int flush; 4026a44c94a7SAlexander Zarochentsev int wait_on_commit; 40271da177e4SLinus Torvalds struct reiserfs_journal_list *jl, *temp_jl; 40281da177e4SLinus Torvalds struct list_head *entry, *safe; 40291da177e4SLinus Torvalds unsigned long jindex; 4030600ed416SJeff Mahoney unsigned int commit_trans_id; 40311da177e4SLinus Torvalds int trans_half; 40321da177e4SLinus Torvalds 40331da177e4SLinus Torvalds BUG_ON(th->t_refcount > 1); 40341da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 40351da177e4SLinus Torvalds 4036a44c94a7SAlexander Zarochentsev /* protect flush_older_commits from doing mistakes if the 4037a44c94a7SAlexander Zarochentsev transaction ID counter gets overflowed. */ 4038600ed416SJeff Mahoney if (th->t_trans_id == ~0U) 4039a44c94a7SAlexander Zarochentsev flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 4040a44c94a7SAlexander Zarochentsev flush = flags & FLUSH_ALL; 4041a44c94a7SAlexander Zarochentsev wait_on_commit = flags & WAIT; 4042a44c94a7SAlexander Zarochentsev 404322e2c507SJens Axboe put_fs_excl(); 40441da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 4045a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal end"); 40461da177e4SLinus Torvalds if (journal->j_len == 0) { 4047a9dd3643SJeff Mahoney reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 4048bd4c625cSLinus Torvalds 1); 4049a9dd3643SJeff Mahoney journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 40501da177e4SLinus Torvalds } 40511da177e4SLinus Torvalds 4052a9dd3643SJeff Mahoney lock_journal(sb); 40531da177e4SLinus Torvalds if (journal->j_next_full_flush) { 40541da177e4SLinus Torvalds flags |= FLUSH_ALL; 40551da177e4SLinus Torvalds flush = 1; 40561da177e4SLinus Torvalds } 40571da177e4SLinus Torvalds if (journal->j_next_async_flush) { 40581da177e4SLinus Torvalds flags |= COMMIT_NOW | WAIT; 40591da177e4SLinus Torvalds wait_on_commit = 1; 40601da177e4SLinus Torvalds } 40611da177e4SLinus Torvalds 40621da177e4SLinus Torvalds /* check_journal_end locks the journal, and unlocks if it does not return 1 40631da177e4SLinus Torvalds ** it tells us if we should continue with the journal_end, or just return 40641da177e4SLinus Torvalds */ 4065a9dd3643SJeff Mahoney if (!check_journal_end(th, sb, nblocks, flags)) { 4066a9dd3643SJeff Mahoney sb->s_dirt = 1; 4067a9dd3643SJeff Mahoney wake_queued_writers(sb); 4068a9dd3643SJeff Mahoney reiserfs_async_progress_wait(sb); 40691da177e4SLinus Torvalds goto out; 40701da177e4SLinus Torvalds } 40711da177e4SLinus Torvalds 40721da177e4SLinus Torvalds /* check_journal_end might set these, check again */ 40731da177e4SLinus Torvalds if (journal->j_next_full_flush) { 40741da177e4SLinus Torvalds flush = 1; 40751da177e4SLinus Torvalds } 40761da177e4SLinus Torvalds 40771da177e4SLinus Torvalds /* 40781da177e4SLinus Torvalds ** j must wait means we have to flush the log blocks, and the real blocks for 40791da177e4SLinus Torvalds ** this transaction 40801da177e4SLinus Torvalds */ 40811da177e4SLinus Torvalds if (journal->j_must_wait > 0) { 40821da177e4SLinus Torvalds flush = 1; 40831da177e4SLinus Torvalds } 40841da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE 4085ef43bc4fSJan Kara /* quota ops might need to nest, setup the journal_info pointer for them 4086ef43bc4fSJan Kara * and raise the refcount so that it is > 0. */ 40871da177e4SLinus Torvalds current->journal_info = th; 4088ef43bc4fSJan Kara th->t_refcount++; 40891da177e4SLinus Torvalds reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 40901da177e4SLinus Torvalds * the transaction */ 4091ef43bc4fSJan Kara th->t_refcount--; 40921da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 40931da177e4SLinus Torvalds #endif 40941da177e4SLinus Torvalds 40951da177e4SLinus Torvalds /* setup description block */ 4096bd4c625cSLinus Torvalds d_bh = 4097a9dd3643SJeff Mahoney journal_getblk(sb, 4098a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4099bd4c625cSLinus Torvalds journal->j_start); 41001da177e4SLinus Torvalds set_buffer_uptodate(d_bh); 41011da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; 41021da177e4SLinus Torvalds memset(d_bh->b_data, 0, d_bh->b_size); 41031da177e4SLinus Torvalds memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 41041da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 41051da177e4SLinus Torvalds 41061da177e4SLinus Torvalds /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 4107a9dd3643SJeff Mahoney c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4108bd4c625cSLinus Torvalds ((journal->j_start + journal->j_len + 4109a9dd3643SJeff Mahoney 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 41101da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 41111da177e4SLinus Torvalds memset(c_bh->b_data, 0, c_bh->b_size); 41121da177e4SLinus Torvalds set_commit_trans_id(commit, journal->j_trans_id); 41131da177e4SLinus Torvalds set_buffer_uptodate(c_bh); 41141da177e4SLinus Torvalds 41151da177e4SLinus Torvalds /* init this journal list */ 41161da177e4SLinus Torvalds jl = journal->j_current_jl; 41171da177e4SLinus Torvalds 41181da177e4SLinus Torvalds /* we lock the commit before doing anything because 41191da177e4SLinus Torvalds * we want to make sure nobody tries to run flush_commit_list until 41201da177e4SLinus Torvalds * the new transaction is fully setup, and we've already flushed the 41211da177e4SLinus Torvalds * ordered bh list 41221da177e4SLinus Torvalds */ 41238ebc4232SFrederic Weisbecker reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb); 41241da177e4SLinus Torvalds 41251da177e4SLinus Torvalds /* save the transaction id in case we need to commit it later */ 41261da177e4SLinus Torvalds commit_trans_id = jl->j_trans_id; 41271da177e4SLinus Torvalds 41281da177e4SLinus Torvalds atomic_set(&jl->j_older_commits_done, 0); 41291da177e4SLinus Torvalds jl->j_trans_id = journal->j_trans_id; 41301da177e4SLinus Torvalds jl->j_timestamp = journal->j_trans_start_time; 41311da177e4SLinus Torvalds jl->j_commit_bh = c_bh; 41321da177e4SLinus Torvalds jl->j_start = journal->j_start; 41331da177e4SLinus Torvalds jl->j_len = journal->j_len; 41341da177e4SLinus Torvalds atomic_set(&jl->j_nonzerolen, journal->j_len); 41351da177e4SLinus Torvalds atomic_set(&jl->j_commit_left, journal->j_len + 2); 41361da177e4SLinus Torvalds jl->j_realblock = NULL; 41371da177e4SLinus Torvalds 41381da177e4SLinus Torvalds /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 41391da177e4SLinus Torvalds ** for each real block, add it to the journal list hash, 41401da177e4SLinus Torvalds ** copy into real block index array in the commit or desc block 41411da177e4SLinus Torvalds */ 4142a9dd3643SJeff Mahoney trans_half = journal_trans_half(sb->s_blocksize); 41431da177e4SLinus Torvalds for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 41441da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 4145a9dd3643SJeff Mahoney jl_cn = get_cnode(sb); 41461da177e4SLinus Torvalds if (!jl_cn) { 4147a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1676", 4148c3a9c210SJeff Mahoney "get_cnode returned NULL"); 41491da177e4SLinus Torvalds } 41501da177e4SLinus Torvalds if (i == 0) { 41511da177e4SLinus Torvalds jl->j_realblock = jl_cn; 41521da177e4SLinus Torvalds } 41531da177e4SLinus Torvalds jl_cn->prev = last_cn; 41541da177e4SLinus Torvalds jl_cn->next = NULL; 41551da177e4SLinus Torvalds if (last_cn) { 41561da177e4SLinus Torvalds last_cn->next = jl_cn; 41571da177e4SLinus Torvalds } 41581da177e4SLinus Torvalds last_cn = jl_cn; 41591da177e4SLinus Torvalds /* make sure the block we are trying to log is not a block 41601da177e4SLinus Torvalds of journal or reserved area */ 41611da177e4SLinus Torvalds 4162bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 4163a9dd3643SJeff Mahoney (sb, cn->bh->b_blocknr)) { 4164a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-2332", 4165c3a9c210SJeff Mahoney "Trying to log block %lu, " 4166c3a9c210SJeff Mahoney "which is a log block", 4167bd4c625cSLinus Torvalds cn->bh->b_blocknr); 41681da177e4SLinus Torvalds } 41691da177e4SLinus Torvalds jl_cn->blocknr = cn->bh->b_blocknr; 41701da177e4SLinus Torvalds jl_cn->state = 0; 4171a9dd3643SJeff Mahoney jl_cn->sb = sb; 41721da177e4SLinus Torvalds jl_cn->bh = cn->bh; 41731da177e4SLinus Torvalds jl_cn->jlist = jl; 41741da177e4SLinus Torvalds insert_journal_hash(journal->j_list_hash_table, jl_cn); 41751da177e4SLinus Torvalds if (i < trans_half) { 4176bd4c625cSLinus Torvalds desc->j_realblock[i] = 4177bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 41781da177e4SLinus Torvalds } else { 4179bd4c625cSLinus Torvalds commit->j_realblock[i - trans_half] = 4180bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 41811da177e4SLinus Torvalds } 41821da177e4SLinus Torvalds } else { 41831da177e4SLinus Torvalds i--; 41841da177e4SLinus Torvalds } 41851da177e4SLinus Torvalds } 41861da177e4SLinus Torvalds set_desc_trans_len(desc, journal->j_len); 41871da177e4SLinus Torvalds set_desc_mount_id(desc, journal->j_mount_id); 41881da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 41891da177e4SLinus Torvalds set_commit_trans_len(commit, journal->j_len); 41901da177e4SLinus Torvalds 41911da177e4SLinus Torvalds /* special check in case all buffers in the journal were marked for not logging */ 419214a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 41931da177e4SLinus Torvalds 41941da177e4SLinus Torvalds /* we're about to dirty all the log blocks, mark the description block 41951da177e4SLinus Torvalds * dirty now too. Don't mark the commit block dirty until all the 41961da177e4SLinus Torvalds * others are on disk 41971da177e4SLinus Torvalds */ 41981da177e4SLinus Torvalds mark_buffer_dirty(d_bh); 41991da177e4SLinus Torvalds 42001da177e4SLinus Torvalds /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 42011da177e4SLinus Torvalds cur_write_start = journal->j_start; 42021da177e4SLinus Torvalds cn = journal->j_first; 42031da177e4SLinus Torvalds jindex = 1; /* start at one so we don't get the desc again */ 42041da177e4SLinus Torvalds while (cn) { 42051da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 42061da177e4SLinus Torvalds /* copy all the real blocks into log area. dirty log blocks */ 42071da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 42081da177e4SLinus Torvalds struct buffer_head *tmp_bh; 42091da177e4SLinus Torvalds char *addr; 42101da177e4SLinus Torvalds struct page *page; 4211bd4c625cSLinus Torvalds tmp_bh = 4212a9dd3643SJeff Mahoney journal_getblk(sb, 4213a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4214bd4c625cSLinus Torvalds ((cur_write_start + 4215bd4c625cSLinus Torvalds jindex) % 4216a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb))); 42171da177e4SLinus Torvalds set_buffer_uptodate(tmp_bh); 42181da177e4SLinus Torvalds page = cn->bh->b_page; 42191da177e4SLinus Torvalds addr = kmap(page); 4220bd4c625cSLinus Torvalds memcpy(tmp_bh->b_data, 4221bd4c625cSLinus Torvalds addr + offset_in_page(cn->bh->b_data), 42221da177e4SLinus Torvalds cn->bh->b_size); 42231da177e4SLinus Torvalds kunmap(page); 42241da177e4SLinus Torvalds mark_buffer_dirty(tmp_bh); 42251da177e4SLinus Torvalds jindex++; 42261da177e4SLinus Torvalds set_buffer_journal_dirty(cn->bh); 42271da177e4SLinus Torvalds clear_buffer_journaled(cn->bh); 42281da177e4SLinus Torvalds } else { 42291da177e4SLinus Torvalds /* JDirty cleared sometime during transaction. don't log this one */ 4230a9dd3643SJeff Mahoney reiserfs_warning(sb, "journal-2048", 423145b03d5eSJeff Mahoney "BAD, buffer in journal hash, " 423245b03d5eSJeff Mahoney "but not JDirty!"); 42331da177e4SLinus Torvalds brelse(cn->bh); 42341da177e4SLinus Torvalds } 42351da177e4SLinus Torvalds next = cn->next; 4236a9dd3643SJeff Mahoney free_cnode(sb, cn); 42371da177e4SLinus Torvalds cn = next; 4238e6950a4dSFrederic Weisbecker reiserfs_write_unlock(sb); 42391da177e4SLinus Torvalds cond_resched(); 4240e6950a4dSFrederic Weisbecker reiserfs_write_lock(sb); 42411da177e4SLinus Torvalds } 42421da177e4SLinus Torvalds 42431da177e4SLinus Torvalds /* we are done with both the c_bh and d_bh, but 42441da177e4SLinus Torvalds ** c_bh must be written after all other commit blocks, 42451da177e4SLinus Torvalds ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 42461da177e4SLinus Torvalds */ 42471da177e4SLinus Torvalds 4248a9dd3643SJeff Mahoney journal->j_current_jl = alloc_journal_list(sb); 42491da177e4SLinus Torvalds 42501da177e4SLinus Torvalds /* now it is safe to insert this transaction on the main list */ 42511da177e4SLinus Torvalds list_add_tail(&jl->j_list, &journal->j_journal_list); 42521da177e4SLinus Torvalds list_add_tail(&jl->j_working_list, &journal->j_working_list); 42531da177e4SLinus Torvalds journal->j_num_work_lists++; 42541da177e4SLinus Torvalds 42551da177e4SLinus Torvalds /* reset journal values for the next transaction */ 42561da177e4SLinus Torvalds old_start = journal->j_start; 4257bd4c625cSLinus Torvalds journal->j_start = 4258bd4c625cSLinus Torvalds (journal->j_start + journal->j_len + 4259a9dd3643SJeff Mahoney 2) % SB_ONDISK_JOURNAL_SIZE(sb); 42601da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 42611da177e4SLinus Torvalds journal->j_bcount = 0; 42621da177e4SLinus Torvalds journal->j_last = NULL; 42631da177e4SLinus Torvalds journal->j_first = NULL; 42641da177e4SLinus Torvalds journal->j_len = 0; 42651da177e4SLinus Torvalds journal->j_trans_start_time = 0; 4266a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 4267a44c94a7SAlexander Zarochentsev if (++journal->j_trans_id == 0) 4268a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 42691da177e4SLinus Torvalds journal->j_current_jl->j_trans_id = journal->j_trans_id; 42701da177e4SLinus Torvalds journal->j_must_wait = 0; 42711da177e4SLinus Torvalds journal->j_len_alloc = 0; 42721da177e4SLinus Torvalds journal->j_next_full_flush = 0; 42731da177e4SLinus Torvalds journal->j_next_async_flush = 0; 4274a9dd3643SJeff Mahoney init_journal_hash(sb); 42751da177e4SLinus Torvalds 42761da177e4SLinus Torvalds // make sure reiserfs_add_jh sees the new current_jl before we 42771da177e4SLinus Torvalds // write out the tails 42781da177e4SLinus Torvalds smp_mb(); 42791da177e4SLinus Torvalds 42801da177e4SLinus Torvalds /* tail conversion targets have to hit the disk before we end the 42811da177e4SLinus Torvalds * transaction. Otherwise a later transaction might repack the tail 42821da177e4SLinus Torvalds * before this transaction commits, leaving the data block unflushed and 42831da177e4SLinus Torvalds * clean, if we crash before the later transaction commits, the data block 42841da177e4SLinus Torvalds * is lost. 42851da177e4SLinus Torvalds */ 42861da177e4SLinus Torvalds if (!list_empty(&jl->j_tail_bh_list)) { 42878ebc4232SFrederic Weisbecker reiserfs_write_unlock(sb); 42881da177e4SLinus Torvalds write_ordered_buffers(&journal->j_dirty_buffers_lock, 42891da177e4SLinus Torvalds journal, jl, &jl->j_tail_bh_list); 42908ebc4232SFrederic Weisbecker reiserfs_write_lock(sb); 42911da177e4SLinus Torvalds } 429214a61442SEric Sesterhenn BUG_ON(!list_empty(&jl->j_tail_bh_list)); 429390415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 42941da177e4SLinus Torvalds 42951da177e4SLinus Torvalds /* honor the flush wishes from the caller, simple commits can 42961da177e4SLinus Torvalds ** be done outside the journal lock, they are done below 42971da177e4SLinus Torvalds ** 42981da177e4SLinus Torvalds ** if we don't flush the commit list right now, we put it into 42991da177e4SLinus Torvalds ** the work queue so the people waiting on the async progress work 43001da177e4SLinus Torvalds ** queue don't wait for this proc to flush journal lists and such. 43011da177e4SLinus Torvalds */ 43021da177e4SLinus Torvalds if (flush) { 4303a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 4304a9dd3643SJeff Mahoney flush_journal_list(sb, jl, 1); 43051da177e4SLinus Torvalds } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 43061da177e4SLinus Torvalds queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 43071da177e4SLinus Torvalds 43081da177e4SLinus Torvalds /* if the next transaction has any chance of wrapping, flush 43091da177e4SLinus Torvalds ** transactions that might get overwritten. If any journal lists are very 43101da177e4SLinus Torvalds ** old flush them as well. 43111da177e4SLinus Torvalds */ 43121da177e4SLinus Torvalds first_jl: 43131da177e4SLinus Torvalds list_for_each_safe(entry, safe, &journal->j_journal_list) { 43141da177e4SLinus Torvalds temp_jl = JOURNAL_LIST_ENTRY(entry); 43151da177e4SLinus Torvalds if (journal->j_start <= temp_jl->j_start) { 43161da177e4SLinus Torvalds if ((journal->j_start + journal->j_trans_max + 1) >= 4317bd4c625cSLinus Torvalds temp_jl->j_start) { 4318a9dd3643SJeff Mahoney flush_used_journal_lists(sb, temp_jl); 43191da177e4SLinus Torvalds goto first_jl; 43201da177e4SLinus Torvalds } else if ((journal->j_start + 43211da177e4SLinus Torvalds journal->j_trans_max + 1) < 4322a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) { 43231da177e4SLinus Torvalds /* if we don't cross into the next transaction and we don't 43241da177e4SLinus Torvalds * wrap, there is no way we can overlap any later transactions 43251da177e4SLinus Torvalds * break now 43261da177e4SLinus Torvalds */ 43271da177e4SLinus Torvalds break; 43281da177e4SLinus Torvalds } 43291da177e4SLinus Torvalds } else if ((journal->j_start + 43301da177e4SLinus Torvalds journal->j_trans_max + 1) > 4331a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) { 43321da177e4SLinus Torvalds if (((journal->j_start + journal->j_trans_max + 1) % 4333a9dd3643SJeff Mahoney SB_ONDISK_JOURNAL_SIZE(sb)) >= 4334bd4c625cSLinus Torvalds temp_jl->j_start) { 4335a9dd3643SJeff Mahoney flush_used_journal_lists(sb, temp_jl); 43361da177e4SLinus Torvalds goto first_jl; 43371da177e4SLinus Torvalds } else { 43381da177e4SLinus Torvalds /* we don't overlap anything from out start to the end of the 43391da177e4SLinus Torvalds * log, and our wrapped portion doesn't overlap anything at 43401da177e4SLinus Torvalds * the start of the log. We can break 43411da177e4SLinus Torvalds */ 43421da177e4SLinus Torvalds break; 43431da177e4SLinus Torvalds } 43441da177e4SLinus Torvalds } 43451da177e4SLinus Torvalds } 4346a9dd3643SJeff Mahoney flush_old_journal_lists(sb); 43471da177e4SLinus Torvalds 4348bd4c625cSLinus Torvalds journal->j_current_jl->j_list_bitmap = 4349a9dd3643SJeff Mahoney get_list_bitmap(sb, journal->j_current_jl); 43501da177e4SLinus Torvalds 43511da177e4SLinus Torvalds if (!(journal->j_current_jl->j_list_bitmap)) { 4352a9dd3643SJeff Mahoney reiserfs_panic(sb, "journal-1996", 4353c3a9c210SJeff Mahoney "could not get a list bitmap"); 43541da177e4SLinus Torvalds } 43551da177e4SLinus Torvalds 43561da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 4357a9dd3643SJeff Mahoney unlock_journal(sb); 43581da177e4SLinus Torvalds /* wake up any body waiting to join. */ 43591da177e4SLinus Torvalds clear_bit(J_WRITERS_QUEUED, &journal->j_state); 43601da177e4SLinus Torvalds wake_up(&(journal->j_join_wait)); 43611da177e4SLinus Torvalds 43621da177e4SLinus Torvalds if (!flush && wait_on_commit && 4363a9dd3643SJeff Mahoney journal_list_still_alive(sb, commit_trans_id)) { 4364a9dd3643SJeff Mahoney flush_commit_list(sb, jl, 1); 43651da177e4SLinus Torvalds } 43661da177e4SLinus Torvalds out: 4367a9dd3643SJeff Mahoney reiserfs_check_lock_depth(sb, "journal end2"); 43681da177e4SLinus Torvalds 43691da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 43701da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 43711da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 43721da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 4373a9dd3643SJeff Mahoney th->t_super = sb; 43741da177e4SLinus Torvalds 43751da177e4SLinus Torvalds return journal->j_errno; 43761da177e4SLinus Torvalds } 43771da177e4SLinus Torvalds 437832e8b106SJeff Mahoney /* Send the file system read only and refuse new transactions */ 437932e8b106SJeff Mahoney void reiserfs_abort_journal(struct super_block *sb, int errno) 43801da177e4SLinus Torvalds { 43811da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 43821da177e4SLinus Torvalds if (test_bit(J_ABORTED, &journal->j_state)) 43831da177e4SLinus Torvalds return; 43841da177e4SLinus Torvalds 438532e8b106SJeff Mahoney if (!journal->j_errno) 438632e8b106SJeff Mahoney journal->j_errno = errno; 43871da177e4SLinus Torvalds 43881da177e4SLinus Torvalds sb->s_flags |= MS_RDONLY; 43891da177e4SLinus Torvalds set_bit(J_ABORTED, &journal->j_state); 43901da177e4SLinus Torvalds 43911da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 43921da177e4SLinus Torvalds dump_stack(); 43931da177e4SLinus Torvalds #endif 43941da177e4SLinus Torvalds } 43951da177e4SLinus Torvalds 4396