11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds ** Write ahead logging implementation copyright Chris Mason 2000 31da177e4SLinus Torvalds ** 41da177e4SLinus Torvalds ** The background commits make this code very interelated, and 51da177e4SLinus Torvalds ** overly complex. I need to rethink things a bit....The major players: 61da177e4SLinus Torvalds ** 71da177e4SLinus Torvalds ** journal_begin -- call with the number of blocks you expect to log. 81da177e4SLinus Torvalds ** If the current transaction is too 91da177e4SLinus Torvalds ** old, it will block until the current transaction is 101da177e4SLinus Torvalds ** finished, and then start a new one. 111da177e4SLinus Torvalds ** Usually, your transaction will get joined in with 121da177e4SLinus Torvalds ** previous ones for speed. 131da177e4SLinus Torvalds ** 141da177e4SLinus Torvalds ** journal_join -- same as journal_begin, but won't block on the current 151da177e4SLinus Torvalds ** transaction regardless of age. Don't ever call 161da177e4SLinus Torvalds ** this. Ever. There are only two places it should be 171da177e4SLinus Torvalds ** called from, and they are both inside this file. 181da177e4SLinus Torvalds ** 191da177e4SLinus Torvalds ** journal_mark_dirty -- adds blocks into this transaction. clears any flags 201da177e4SLinus Torvalds ** that might make them get sent to disk 211da177e4SLinus Torvalds ** and then marks them BH_JDirty. Puts the buffer head 221da177e4SLinus Torvalds ** into the current transaction hash. 231da177e4SLinus Torvalds ** 241da177e4SLinus Torvalds ** journal_end -- if the current transaction is batchable, it does nothing 251da177e4SLinus Torvalds ** otherwise, it could do an async/synchronous commit, or 261da177e4SLinus Torvalds ** a full flush of all log and real blocks in the 271da177e4SLinus Torvalds ** transaction. 281da177e4SLinus Torvalds ** 291da177e4SLinus Torvalds ** flush_old_commits -- if the current transaction is too old, it is ended and 301da177e4SLinus Torvalds ** commit blocks are sent to disk. Forces commit blocks 311da177e4SLinus Torvalds ** to disk for all backgrounded commits that have been 321da177e4SLinus Torvalds ** around too long. 331da177e4SLinus Torvalds ** -- Note, if you call this as an immediate flush from 341da177e4SLinus Torvalds ** from within kupdate, it will ignore the immediate flag 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds #include <linux/time.h> 386188e10dSMatthew Wilcox #include <linux/semaphore.h> 391da177e4SLinus Torvalds #include <linux/vmalloc.h> 401da177e4SLinus Torvalds #include <linux/reiserfs_fs.h> 411da177e4SLinus Torvalds #include <linux/kernel.h> 421da177e4SLinus Torvalds #include <linux/errno.h> 431da177e4SLinus Torvalds #include <linux/fcntl.h> 441da177e4SLinus Torvalds #include <linux/stat.h> 451da177e4SLinus Torvalds #include <linux/string.h> 461da177e4SLinus Torvalds #include <linux/smp_lock.h> 471da177e4SLinus Torvalds #include <linux/buffer_head.h> 481da177e4SLinus Torvalds #include <linux/workqueue.h> 491da177e4SLinus Torvalds #include <linux/writeback.h> 501da177e4SLinus Torvalds #include <linux/blkdev.h> 513fcfab16SAndrew Morton #include <linux/backing-dev.h> 5290415deaSJeff Mahoney #include <linux/uaccess.h> 5390415deaSJeff Mahoney 5490415deaSJeff Mahoney #include <asm/system.h> 551da177e4SLinus Torvalds 561da177e4SLinus Torvalds /* gets a struct reiserfs_journal_list * from a list head */ 571da177e4SLinus Torvalds #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 581da177e4SLinus Torvalds j_list)) 591da177e4SLinus Torvalds #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 601da177e4SLinus Torvalds j_working_list)) 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds /* the number of mounted filesystems. This is used to decide when to 631da177e4SLinus Torvalds ** start and kill the commit workqueue 641da177e4SLinus Torvalds */ 651da177e4SLinus Torvalds static int reiserfs_mounted_fs_count; 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds static struct workqueue_struct *commit_wq; 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 701da177e4SLinus Torvalds structs at 4k */ 711da177e4SLinus Torvalds #define BUFNR 64 /*read ahead */ 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds /* cnode stat bits. Move these into reiserfs_fs.h */ 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 761da177e4SLinus Torvalds #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 791da177e4SLinus Torvalds #define BLOCK_DIRTIED 5 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* journal list state bits */ 821da177e4SLinus Torvalds #define LIST_TOUCHED 1 831da177e4SLinus Torvalds #define LIST_DIRTY 2 841da177e4SLinus Torvalds #define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 851da177e4SLinus Torvalds 861da177e4SLinus Torvalds /* flags for do_journal_end */ 871da177e4SLinus Torvalds #define FLUSH_ALL 1 /* flush commit and real blocks */ 881da177e4SLinus Torvalds #define COMMIT_NOW 2 /* end and commit this transaction */ 891da177e4SLinus Torvalds #define WAIT 4 /* wait for the log blocks to hit the disk */ 901da177e4SLinus Torvalds 91bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *, 92bd4c625cSLinus Torvalds struct super_block *, unsigned long nblocks, 93bd4c625cSLinus Torvalds int flags); 94bd4c625cSLinus Torvalds static int flush_journal_list(struct super_block *s, 95bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 96bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 97bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall); 981da177e4SLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn); 99bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 100bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks); 1011da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super, 1021da177e4SLinus Torvalds struct reiserfs_journal *journal); 1031da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 1041da177e4SLinus Torvalds struct reiserfs_journal_list *jl); 105c4028958SDavid Howells static void flush_async_commits(struct work_struct *work); 1061da177e4SLinus Torvalds static void queue_log_writer(struct super_block *s); 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds /* values for join in do_journal_begin_r */ 1091da177e4SLinus Torvalds enum { 1101da177e4SLinus Torvalds JBEGIN_REG = 0, /* regular journal begin */ 1111da177e4SLinus Torvalds JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 1121da177e4SLinus Torvalds JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 1131da177e4SLinus Torvalds }; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 1161da177e4SLinus Torvalds struct super_block *p_s_sb, 1171da177e4SLinus Torvalds unsigned long nblocks, int join); 1181da177e4SLinus Torvalds 119bd4c625cSLinus Torvalds static void init_journal_hash(struct super_block *p_s_sb) 120bd4c625cSLinus Torvalds { 1211da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 122bd4c625cSLinus Torvalds memset(journal->j_hash_table, 0, 123bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds /* 1271da177e4SLinus Torvalds ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 1281da177e4SLinus Torvalds ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 1291da177e4SLinus Torvalds ** more details. 1301da177e4SLinus Torvalds */ 131bd4c625cSLinus Torvalds static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 132bd4c625cSLinus Torvalds { 1331da177e4SLinus Torvalds if (bh) { 1341da177e4SLinus Torvalds clear_buffer_dirty(bh); 1351da177e4SLinus Torvalds clear_buffer_journal_test(bh); 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds return 0; 1381da177e4SLinus Torvalds } 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds static void disable_barrier(struct super_block *s) 1411da177e4SLinus Torvalds { 1421da177e4SLinus Torvalds REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); 143bd4c625cSLinus Torvalds printk("reiserfs: disabling flush barriers on %s\n", 144bd4c625cSLinus Torvalds reiserfs_bdevname(s)); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds 147bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block 148bd4c625cSLinus Torvalds *p_s_sb) 149bd4c625cSLinus Torvalds { 1501da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 1511da177e4SLinus Torvalds static int id; 1521da177e4SLinus Torvalds 153d739b42bSPekka Enberg bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS); 1541da177e4SLinus Torvalds if (!bn) { 1551da177e4SLinus Torvalds return NULL; 1561da177e4SLinus Torvalds } 157d739b42bSPekka Enberg bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); 1581da177e4SLinus Torvalds if (!bn->data) { 159d739b42bSPekka Enberg kfree(bn); 1601da177e4SLinus Torvalds return NULL; 1611da177e4SLinus Torvalds } 1621da177e4SLinus Torvalds bn->id = id++; 1631da177e4SLinus Torvalds INIT_LIST_HEAD(&bn->list); 1641da177e4SLinus Torvalds return bn; 1651da177e4SLinus Torvalds } 1661da177e4SLinus Torvalds 167bd4c625cSLinus Torvalds static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) 168bd4c625cSLinus Torvalds { 1691da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1701da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 1711da177e4SLinus Torvalds struct list_head *entry = journal->j_bitmap_nodes.next; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds journal->j_used_bitmap_nodes++; 1741da177e4SLinus Torvalds repeat: 1751da177e4SLinus Torvalds 1761da177e4SLinus Torvalds if (entry != &journal->j_bitmap_nodes) { 1771da177e4SLinus Torvalds bn = list_entry(entry, struct reiserfs_bitmap_node, list); 1781da177e4SLinus Torvalds list_del(entry); 1791da177e4SLinus Torvalds memset(bn->data, 0, p_s_sb->s_blocksize); 1801da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 1811da177e4SLinus Torvalds return bn; 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds bn = allocate_bitmap_node(p_s_sb); 1841da177e4SLinus Torvalds if (!bn) { 1851da177e4SLinus Torvalds yield(); 1861da177e4SLinus Torvalds goto repeat; 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds return bn; 1891da177e4SLinus Torvalds } 1901da177e4SLinus Torvalds static inline void free_bitmap_node(struct super_block *p_s_sb, 191bd4c625cSLinus Torvalds struct reiserfs_bitmap_node *bn) 192bd4c625cSLinus Torvalds { 1931da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1941da177e4SLinus Torvalds journal->j_used_bitmap_nodes--; 1951da177e4SLinus Torvalds if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 196d739b42bSPekka Enberg kfree(bn->data); 197d739b42bSPekka Enberg kfree(bn); 1981da177e4SLinus Torvalds } else { 1991da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 2001da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 2011da177e4SLinus Torvalds } 2021da177e4SLinus Torvalds } 2031da177e4SLinus Torvalds 204bd4c625cSLinus Torvalds static void allocate_bitmap_nodes(struct super_block *p_s_sb) 205bd4c625cSLinus Torvalds { 2061da177e4SLinus Torvalds int i; 2071da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2081da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn = NULL; 2091da177e4SLinus Torvalds for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { 2101da177e4SLinus Torvalds bn = allocate_bitmap_node(p_s_sb); 2111da177e4SLinus Torvalds if (bn) { 2121da177e4SLinus Torvalds list_add(&bn->list, &journal->j_bitmap_nodes); 2131da177e4SLinus Torvalds journal->j_free_bitmap_nodes++; 2141da177e4SLinus Torvalds } else { 2151da177e4SLinus Torvalds break; // this is ok, we'll try again when more are needed 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 2203ee16670SJeff Mahoney static int set_bit_in_list_bitmap(struct super_block *p_s_sb, 2213ee16670SJeff Mahoney b_blocknr_t block, 222bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 223bd4c625cSLinus Torvalds { 2243ee16670SJeff Mahoney unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3); 2253ee16670SJeff Mahoney unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3); 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds if (!jb->bitmaps[bmap_nr]) { 2281da177e4SLinus Torvalds jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); 2311da177e4SLinus Torvalds return 0; 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds static void cleanup_bitmap_list(struct super_block *p_s_sb, 235bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb) 236bd4c625cSLinus Torvalds { 2371da177e4SLinus Torvalds int i; 2381da177e4SLinus Torvalds if (jb->bitmaps == NULL) 2391da177e4SLinus Torvalds return; 2401da177e4SLinus Torvalds 241cb680c1bSJeff Mahoney for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) { 2421da177e4SLinus Torvalds if (jb->bitmaps[i]) { 2431da177e4SLinus Torvalds free_bitmap_node(p_s_sb, jb->bitmaps[i]); 2441da177e4SLinus Torvalds jb->bitmaps[i] = NULL; 2451da177e4SLinus Torvalds } 2461da177e4SLinus Torvalds } 2471da177e4SLinus Torvalds } 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds /* 2501da177e4SLinus Torvalds ** only call this on FS unmount. 2511da177e4SLinus Torvalds */ 2521da177e4SLinus Torvalds static int free_list_bitmaps(struct super_block *p_s_sb, 253bd4c625cSLinus Torvalds struct reiserfs_list_bitmap *jb_array) 254bd4c625cSLinus Torvalds { 2551da177e4SLinus Torvalds int i; 2561da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2571da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2581da177e4SLinus Torvalds jb = jb_array + i; 2591da177e4SLinus Torvalds jb->journal_list = NULL; 2601da177e4SLinus Torvalds cleanup_bitmap_list(p_s_sb, jb); 2611da177e4SLinus Torvalds vfree(jb->bitmaps); 2621da177e4SLinus Torvalds jb->bitmaps = NULL; 2631da177e4SLinus Torvalds } 2641da177e4SLinus Torvalds return 0; 2651da177e4SLinus Torvalds } 2661da177e4SLinus Torvalds 267bd4c625cSLinus Torvalds static int free_bitmap_nodes(struct super_block *p_s_sb) 268bd4c625cSLinus Torvalds { 2691da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2701da177e4SLinus Torvalds struct list_head *next = journal->j_bitmap_nodes.next; 2711da177e4SLinus Torvalds struct reiserfs_bitmap_node *bn; 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds while (next != &journal->j_bitmap_nodes) { 2741da177e4SLinus Torvalds bn = list_entry(next, struct reiserfs_bitmap_node, list); 2751da177e4SLinus Torvalds list_del(next); 276d739b42bSPekka Enberg kfree(bn->data); 277d739b42bSPekka Enberg kfree(bn); 2781da177e4SLinus Torvalds next = journal->j_bitmap_nodes.next; 2791da177e4SLinus Torvalds journal->j_free_bitmap_nodes--; 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds return 0; 2831da177e4SLinus Torvalds } 2841da177e4SLinus Torvalds 2851da177e4SLinus Torvalds /* 2861da177e4SLinus Torvalds ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 2871da177e4SLinus Torvalds ** jb_array is the array to be filled in. 2881da177e4SLinus Torvalds */ 2891da177e4SLinus Torvalds int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 2901da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb_array, 2913ee16670SJeff Mahoney unsigned int bmap_nr) 292bd4c625cSLinus Torvalds { 2931da177e4SLinus Torvalds int i; 2941da177e4SLinus Torvalds int failed = 0; 2951da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 2961da177e4SLinus Torvalds int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *); 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 2991da177e4SLinus Torvalds jb = jb_array + i; 3001da177e4SLinus Torvalds jb->journal_list = NULL; 3011da177e4SLinus Torvalds jb->bitmaps = vmalloc(mem); 3021da177e4SLinus Torvalds if (!jb->bitmaps) { 30345b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "clm-2000", "unable to " 30445b03d5eSJeff Mahoney "allocate bitmaps for journal lists"); 3051da177e4SLinus Torvalds failed = 1; 3061da177e4SLinus Torvalds break; 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds memset(jb->bitmaps, 0, mem); 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds if (failed) { 3111da177e4SLinus Torvalds free_list_bitmaps(p_s_sb, jb_array); 3121da177e4SLinus Torvalds return -1; 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds return 0; 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds /* 3181da177e4SLinus Torvalds ** find an available list bitmap. If you can't find one, flush a commit list 3191da177e4SLinus Torvalds ** and try again 3201da177e4SLinus Torvalds */ 321bd4c625cSLinus Torvalds static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, 322bd4c625cSLinus Torvalds struct reiserfs_journal_list 323bd4c625cSLinus Torvalds *jl) 324bd4c625cSLinus Torvalds { 3251da177e4SLinus Torvalds int i, j; 3261da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3271da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { 3301da177e4SLinus Torvalds i = journal->j_list_bitmap_index; 3311da177e4SLinus Torvalds journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; 3321da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 3331da177e4SLinus Torvalds if (journal->j_list_bitmap[i].journal_list) { 334bd4c625cSLinus Torvalds flush_commit_list(p_s_sb, 335bd4c625cSLinus Torvalds journal->j_list_bitmap[i]. 336bd4c625cSLinus Torvalds journal_list, 1); 3371da177e4SLinus Torvalds if (!journal->j_list_bitmap[i].journal_list) { 3381da177e4SLinus Torvalds break; 3391da177e4SLinus Torvalds } 3401da177e4SLinus Torvalds } else { 3411da177e4SLinus Torvalds break; 3421da177e4SLinus Torvalds } 3431da177e4SLinus Torvalds } 3441da177e4SLinus Torvalds if (jb->journal_list) { /* double check to make sure if flushed correctly */ 3451da177e4SLinus Torvalds return NULL; 3461da177e4SLinus Torvalds } 3471da177e4SLinus Torvalds jb->journal_list = jl; 3481da177e4SLinus Torvalds return jb; 3491da177e4SLinus Torvalds } 3501da177e4SLinus Torvalds 3511da177e4SLinus Torvalds /* 3521da177e4SLinus Torvalds ** allocates a new chunk of X nodes, and links them all together as a list. 3531da177e4SLinus Torvalds ** Uses the cnode->next and cnode->prev pointers 3541da177e4SLinus Torvalds ** returns NULL on failure 3551da177e4SLinus Torvalds */ 356bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 357bd4c625cSLinus Torvalds { 3581da177e4SLinus Torvalds struct reiserfs_journal_cnode *head; 3591da177e4SLinus Torvalds int i; 3601da177e4SLinus Torvalds if (num_cnodes <= 0) { 3611da177e4SLinus Torvalds return NULL; 3621da177e4SLinus Torvalds } 3631da177e4SLinus Torvalds head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); 3641da177e4SLinus Torvalds if (!head) { 3651da177e4SLinus Torvalds return NULL; 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); 3681da177e4SLinus Torvalds head[0].prev = NULL; 3691da177e4SLinus Torvalds head[0].next = head + 1; 3701da177e4SLinus Torvalds for (i = 1; i < num_cnodes; i++) { 3711da177e4SLinus Torvalds head[i].prev = head + (i - 1); 3721da177e4SLinus Torvalds head[i].next = head + (i + 1); /* if last one, overwrite it after the if */ 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds head[num_cnodes - 1].next = NULL; 3751da177e4SLinus Torvalds return head; 3761da177e4SLinus Torvalds } 3771da177e4SLinus Torvalds 3781da177e4SLinus Torvalds /* 3791da177e4SLinus Torvalds ** pulls a cnode off the free list, or returns NULL on failure 3801da177e4SLinus Torvalds */ 381bd4c625cSLinus Torvalds static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) 382bd4c625cSLinus Torvalds { 3831da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 3841da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3851da177e4SLinus Torvalds 3861da177e4SLinus Torvalds reiserfs_check_lock_depth(p_s_sb, "get_cnode"); 3871da177e4SLinus Torvalds 3881da177e4SLinus Torvalds if (journal->j_cnode_free <= 0) { 3891da177e4SLinus Torvalds return NULL; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds journal->j_cnode_used++; 3921da177e4SLinus Torvalds journal->j_cnode_free--; 3931da177e4SLinus Torvalds cn = journal->j_cnode_free_list; 3941da177e4SLinus Torvalds if (!cn) { 3951da177e4SLinus Torvalds return cn; 3961da177e4SLinus Torvalds } 3971da177e4SLinus Torvalds if (cn->next) { 3981da177e4SLinus Torvalds cn->next->prev = NULL; 3991da177e4SLinus Torvalds } 4001da177e4SLinus Torvalds journal->j_cnode_free_list = cn->next; 4011da177e4SLinus Torvalds memset(cn, 0, sizeof(struct reiserfs_journal_cnode)); 4021da177e4SLinus Torvalds return cn; 4031da177e4SLinus Torvalds } 4041da177e4SLinus Torvalds 4051da177e4SLinus Torvalds /* 4061da177e4SLinus Torvalds ** returns a cnode to the free list 4071da177e4SLinus Torvalds */ 408bd4c625cSLinus Torvalds static void free_cnode(struct super_block *p_s_sb, 409bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 410bd4c625cSLinus Torvalds { 4111da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 4121da177e4SLinus Torvalds 4131da177e4SLinus Torvalds reiserfs_check_lock_depth(p_s_sb, "free_cnode"); 4141da177e4SLinus Torvalds 4151da177e4SLinus Torvalds journal->j_cnode_used--; 4161da177e4SLinus Torvalds journal->j_cnode_free++; 4171da177e4SLinus Torvalds /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 4181da177e4SLinus Torvalds cn->next = journal->j_cnode_free_list; 4191da177e4SLinus Torvalds if (journal->j_cnode_free_list) { 4201da177e4SLinus Torvalds journal->j_cnode_free_list->prev = cn; 4211da177e4SLinus Torvalds } 4221da177e4SLinus Torvalds cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */ 4231da177e4SLinus Torvalds journal->j_cnode_free_list = cn; 4241da177e4SLinus Torvalds } 4251da177e4SLinus Torvalds 426bd4c625cSLinus Torvalds static void clear_prepared_bits(struct buffer_head *bh) 427bd4c625cSLinus Torvalds { 4281da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 4291da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 4301da177e4SLinus Torvalds } 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds /* utility function to force a BUG if it is called without the big 4331da177e4SLinus Torvalds ** kernel lock held. caller is the string printed just before calling BUG() 4341da177e4SLinus Torvalds */ 435bd4c625cSLinus Torvalds void reiserfs_check_lock_depth(struct super_block *sb, char *caller) 436bd4c625cSLinus Torvalds { 4371da177e4SLinus Torvalds #ifdef CONFIG_SMP 4381da177e4SLinus Torvalds if (current->lock_depth < 0) { 439bd4c625cSLinus Torvalds reiserfs_panic(sb, "%s called without kernel lock held", 440bd4c625cSLinus Torvalds caller); 4411da177e4SLinus Torvalds } 4421da177e4SLinus Torvalds #else 4431da177e4SLinus Torvalds ; 4441da177e4SLinus Torvalds #endif 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4471da177e4SLinus Torvalds /* return a cnode with same dev, block number and size in table, or null if not found */ 448bd4c625cSLinus Torvalds static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 449bd4c625cSLinus Torvalds super_block 450bd4c625cSLinus Torvalds *sb, 451bd4c625cSLinus Torvalds struct 452bd4c625cSLinus Torvalds reiserfs_journal_cnode 453bd4c625cSLinus Torvalds **table, 4541da177e4SLinus Torvalds long bl) 4551da177e4SLinus Torvalds { 4561da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4571da177e4SLinus Torvalds cn = journal_hash(table, sb, bl); 4581da177e4SLinus Torvalds while (cn) { 4591da177e4SLinus Torvalds if (cn->blocknr == bl && cn->sb == sb) 4601da177e4SLinus Torvalds return cn; 4611da177e4SLinus Torvalds cn = cn->hnext; 4621da177e4SLinus Torvalds } 4631da177e4SLinus Torvalds return (struct reiserfs_journal_cnode *)0; 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 4661da177e4SLinus Torvalds /* 4671da177e4SLinus Torvalds ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 4681da177e4SLinus Torvalds ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 4691da177e4SLinus Torvalds ** being overwritten by a replay after crashing. 4701da177e4SLinus Torvalds ** 4711da177e4SLinus Torvalds ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 4721da177e4SLinus Torvalds ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 4731da177e4SLinus Torvalds ** sure you never write the block without logging it. 4741da177e4SLinus Torvalds ** 4751da177e4SLinus Torvalds ** next_zero_bit is a suggestion about the next block to try for find_forward. 4761da177e4SLinus Torvalds ** when bl is rejected because it is set in a journal list bitmap, we search 4771da177e4SLinus Torvalds ** for the next zero bit in the bitmap that rejected bl. Then, we return that 4781da177e4SLinus Torvalds ** through next_zero_bit for find_forward to try. 4791da177e4SLinus Torvalds ** 4801da177e4SLinus Torvalds ** Just because we return something in next_zero_bit does not mean we won't 4811da177e4SLinus Torvalds ** reject it on the next call to reiserfs_in_journal 4821da177e4SLinus Torvalds ** 4831da177e4SLinus Torvalds */ 4841da177e4SLinus Torvalds int reiserfs_in_journal(struct super_block *p_s_sb, 4853ee16670SJeff Mahoney unsigned int bmap_nr, int bit_nr, int search_all, 486bd4c625cSLinus Torvalds b_blocknr_t * next_zero_bit) 487bd4c625cSLinus Torvalds { 4881da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 4891da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 4901da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb; 4911da177e4SLinus Torvalds int i; 4921da177e4SLinus Torvalds unsigned long bl; 4931da177e4SLinus Torvalds 4941da177e4SLinus Torvalds *next_zero_bit = 0; /* always start this at zero. */ 4951da177e4SLinus Torvalds 4961da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.in_journal); 4971da177e4SLinus Torvalds /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 4981da177e4SLinus Torvalds ** if we crash before the transaction that freed it commits, this transaction won't 4991da177e4SLinus Torvalds ** have committed either, and the block will never be written 5001da177e4SLinus Torvalds */ 5011da177e4SLinus Torvalds if (search_all) { 5021da177e4SLinus Torvalds for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 5031da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); 5041da177e4SLinus Torvalds jb = journal->j_list_bitmap + i; 5051da177e4SLinus Torvalds if (jb->journal_list && jb->bitmaps[bmap_nr] && 506bd4c625cSLinus Torvalds test_bit(bit_nr, 507bd4c625cSLinus Torvalds (unsigned long *)jb->bitmaps[bmap_nr]-> 508bd4c625cSLinus Torvalds data)) { 509bd4c625cSLinus Torvalds *next_zero_bit = 510bd4c625cSLinus Torvalds find_next_zero_bit((unsigned long *) 511bd4c625cSLinus Torvalds (jb->bitmaps[bmap_nr]-> 512bd4c625cSLinus Torvalds data), 513bd4c625cSLinus Torvalds p_s_sb->s_blocksize << 3, 514bd4c625cSLinus Torvalds bit_nr + 1); 5151da177e4SLinus Torvalds return 1; 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds } 5191da177e4SLinus Torvalds 5201da177e4SLinus Torvalds bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; 5211da177e4SLinus Torvalds /* is it in any old transactions? */ 522bd4c625cSLinus Torvalds if (search_all 523bd4c625cSLinus Torvalds && (cn = 524bd4c625cSLinus Torvalds get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { 5251da177e4SLinus Torvalds return 1; 5261da177e4SLinus Torvalds } 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds /* is it in the current transaction. This should never happen */ 5291da177e4SLinus Torvalds if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { 5301da177e4SLinus Torvalds BUG(); 5311da177e4SLinus Torvalds return 1; 5321da177e4SLinus Torvalds } 5331da177e4SLinus Torvalds 5341da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); 5351da177e4SLinus Torvalds /* safe for reuse */ 5361da177e4SLinus Torvalds return 0; 5371da177e4SLinus Torvalds } 5381da177e4SLinus Torvalds 5391da177e4SLinus Torvalds /* insert cn into table 5401da177e4SLinus Torvalds */ 541bd4c625cSLinus Torvalds static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 542bd4c625cSLinus Torvalds struct reiserfs_journal_cnode *cn) 543bd4c625cSLinus Torvalds { 5441da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn_orig; 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds cn_orig = journal_hash(table, cn->sb, cn->blocknr); 5471da177e4SLinus Torvalds cn->hnext = cn_orig; 5481da177e4SLinus Torvalds cn->hprev = NULL; 5491da177e4SLinus Torvalds if (cn_orig) { 5501da177e4SLinus Torvalds cn_orig->hprev = cn; 5511da177e4SLinus Torvalds } 5521da177e4SLinus Torvalds journal_hash(table, cn->sb, cn->blocknr) = cn; 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds /* lock the current transaction */ 55677933d72SJesper Juhl static inline void lock_journal(struct super_block *p_s_sb) 557bd4c625cSLinus Torvalds { 5581da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.lock_journal); 559f68215c4SJeff Mahoney mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex); 5601da177e4SLinus Torvalds } 5611da177e4SLinus Torvalds 5621da177e4SLinus Torvalds /* unlock the current transaction */ 56377933d72SJesper Juhl static inline void unlock_journal(struct super_block *p_s_sb) 564bd4c625cSLinus Torvalds { 565f68215c4SJeff Mahoney mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex); 5661da177e4SLinus Torvalds } 5671da177e4SLinus Torvalds 5681da177e4SLinus Torvalds static inline void get_journal_list(struct reiserfs_journal_list *jl) 5691da177e4SLinus Torvalds { 5701da177e4SLinus Torvalds jl->j_refcount++; 5711da177e4SLinus Torvalds } 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds static inline void put_journal_list(struct super_block *s, 5741da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 5751da177e4SLinus Torvalds { 5761da177e4SLinus Torvalds if (jl->j_refcount < 1) { 577600ed416SJeff Mahoney reiserfs_panic(s, "trans id %u, refcount at %d", 578bd4c625cSLinus Torvalds jl->j_trans_id, jl->j_refcount); 5791da177e4SLinus Torvalds } 5801da177e4SLinus Torvalds if (--jl->j_refcount == 0) 581d739b42bSPekka Enberg kfree(jl); 5821da177e4SLinus Torvalds } 5831da177e4SLinus Torvalds 5841da177e4SLinus Torvalds /* 5851da177e4SLinus Torvalds ** this used to be much more involved, and I'm keeping it just in case things get ugly again. 5861da177e4SLinus Torvalds ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 5871da177e4SLinus Torvalds ** transaction. 5881da177e4SLinus Torvalds */ 589bd4c625cSLinus Torvalds static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, 590bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 591bd4c625cSLinus Torvalds { 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; 5941da177e4SLinus Torvalds if (jb) { 5951da177e4SLinus Torvalds cleanup_bitmap_list(p_s_sb, jb); 5961da177e4SLinus Torvalds } 5971da177e4SLinus Torvalds jl->j_list_bitmap->journal_list = NULL; 5981da177e4SLinus Torvalds jl->j_list_bitmap = NULL; 5991da177e4SLinus Torvalds } 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds static int journal_list_still_alive(struct super_block *s, 602600ed416SJeff Mahoney unsigned int trans_id) 6031da177e4SLinus Torvalds { 6041da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 6051da177e4SLinus Torvalds struct list_head *entry = &journal->j_journal_list; 6061da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 6071da177e4SLinus Torvalds 6081da177e4SLinus Torvalds if (!list_empty(entry)) { 6091da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry->next); 6101da177e4SLinus Torvalds if (jl->j_trans_id <= trans_id) { 6111da177e4SLinus Torvalds return 1; 6121da177e4SLinus Torvalds } 6131da177e4SLinus Torvalds } 6141da177e4SLinus Torvalds return 0; 6151da177e4SLinus Torvalds } 6161da177e4SLinus Torvalds 617398c95bdSChris Mason /* 618398c95bdSChris Mason * If page->mapping was null, we failed to truncate this page for 619398c95bdSChris Mason * some reason. Most likely because it was truncated after being 620398c95bdSChris Mason * logged via data=journal. 621398c95bdSChris Mason * 622398c95bdSChris Mason * This does a check to see if the buffer belongs to one of these 623398c95bdSChris Mason * lost pages before doing the final put_bh. If page->mapping was 624398c95bdSChris Mason * null, it tries to free buffers on the page, which should make the 625398c95bdSChris Mason * final page_cache_release drop the page from the lru. 626398c95bdSChris Mason */ 627398c95bdSChris Mason static void release_buffer_page(struct buffer_head *bh) 628398c95bdSChris Mason { 629398c95bdSChris Mason struct page *page = bh->b_page; 630529ae9aaSNick Piggin if (!page->mapping && trylock_page(page)) { 631398c95bdSChris Mason page_cache_get(page); 632398c95bdSChris Mason put_bh(bh); 633398c95bdSChris Mason if (!page->mapping) 634398c95bdSChris Mason try_to_free_buffers(page); 635398c95bdSChris Mason unlock_page(page); 636398c95bdSChris Mason page_cache_release(page); 637398c95bdSChris Mason } else { 638398c95bdSChris Mason put_bh(bh); 639398c95bdSChris Mason } 640398c95bdSChris Mason } 641398c95bdSChris Mason 642bd4c625cSLinus Torvalds static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 643bd4c625cSLinus Torvalds { 6441da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 6451da177e4SLinus Torvalds 6461da177e4SLinus Torvalds if (buffer_journaled(bh)) { 64745b03d5eSJeff Mahoney reiserfs_warning(NULL, "clm-2084", 64845b03d5eSJeff Mahoney "pinned buffer %lu:%s sent to disk", 6491da177e4SLinus Torvalds bh->b_blocknr, bdevname(bh->b_bdev, b)); 6501da177e4SLinus Torvalds } 6511da177e4SLinus Torvalds if (uptodate) 6521da177e4SLinus Torvalds set_buffer_uptodate(bh); 6531da177e4SLinus Torvalds else 6541da177e4SLinus Torvalds clear_buffer_uptodate(bh); 655398c95bdSChris Mason 6561da177e4SLinus Torvalds unlock_buffer(bh); 657398c95bdSChris Mason release_buffer_page(bh); 6581da177e4SLinus Torvalds } 6591da177e4SLinus Torvalds 660bd4c625cSLinus Torvalds static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) 661bd4c625cSLinus Torvalds { 6621da177e4SLinus Torvalds if (uptodate) 6631da177e4SLinus Torvalds set_buffer_uptodate(bh); 6641da177e4SLinus Torvalds else 6651da177e4SLinus Torvalds clear_buffer_uptodate(bh); 6661da177e4SLinus Torvalds unlock_buffer(bh); 6671da177e4SLinus Torvalds put_bh(bh); 6681da177e4SLinus Torvalds } 6691da177e4SLinus Torvalds 670bd4c625cSLinus Torvalds static void submit_logged_buffer(struct buffer_head *bh) 671bd4c625cSLinus Torvalds { 6721da177e4SLinus Torvalds get_bh(bh); 6731da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_buffer_io_sync; 6741da177e4SLinus Torvalds clear_buffer_journal_new(bh); 6751da177e4SLinus Torvalds clear_buffer_dirty(bh); 6761da177e4SLinus Torvalds if (!test_clear_buffer_journal_test(bh)) 6771da177e4SLinus Torvalds BUG(); 6781da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 6791da177e4SLinus Torvalds BUG(); 6801da177e4SLinus Torvalds submit_bh(WRITE, bh); 6811da177e4SLinus Torvalds } 6821da177e4SLinus Torvalds 683bd4c625cSLinus Torvalds static void submit_ordered_buffer(struct buffer_head *bh) 684bd4c625cSLinus Torvalds { 6851da177e4SLinus Torvalds get_bh(bh); 6861da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_ordered_io; 6871da177e4SLinus Torvalds clear_buffer_dirty(bh); 6881da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 6891da177e4SLinus Torvalds BUG(); 6901da177e4SLinus Torvalds submit_bh(WRITE, bh); 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds 693bd4c625cSLinus Torvalds static int submit_barrier_buffer(struct buffer_head *bh) 694bd4c625cSLinus Torvalds { 6951da177e4SLinus Torvalds get_bh(bh); 6961da177e4SLinus Torvalds bh->b_end_io = reiserfs_end_ordered_io; 6971da177e4SLinus Torvalds clear_buffer_dirty(bh); 6981da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 6991da177e4SLinus Torvalds BUG(); 7001da177e4SLinus Torvalds return submit_bh(WRITE_BARRIER, bh); 7011da177e4SLinus Torvalds } 7021da177e4SLinus Torvalds 7031da177e4SLinus Torvalds static void check_barrier_completion(struct super_block *s, 704bd4c625cSLinus Torvalds struct buffer_head *bh) 705bd4c625cSLinus Torvalds { 7061da177e4SLinus Torvalds if (buffer_eopnotsupp(bh)) { 7071da177e4SLinus Torvalds clear_buffer_eopnotsupp(bh); 7081da177e4SLinus Torvalds disable_barrier(s); 7091da177e4SLinus Torvalds set_buffer_uptodate(bh); 7101da177e4SLinus Torvalds set_buffer_dirty(bh); 7111da177e4SLinus Torvalds sync_dirty_buffer(bh); 7121da177e4SLinus Torvalds } 7131da177e4SLinus Torvalds } 7141da177e4SLinus Torvalds 7151da177e4SLinus Torvalds #define CHUNK_SIZE 32 7161da177e4SLinus Torvalds struct buffer_chunk { 7171da177e4SLinus Torvalds struct buffer_head *bh[CHUNK_SIZE]; 7181da177e4SLinus Torvalds int nr; 7191da177e4SLinus Torvalds }; 7201da177e4SLinus Torvalds 721bd4c625cSLinus Torvalds static void write_chunk(struct buffer_chunk *chunk) 722bd4c625cSLinus Torvalds { 7231da177e4SLinus Torvalds int i; 72422e2c507SJens Axboe get_fs_excl(); 7251da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 7261da177e4SLinus Torvalds submit_logged_buffer(chunk->bh[i]); 7271da177e4SLinus Torvalds } 7281da177e4SLinus Torvalds chunk->nr = 0; 72922e2c507SJens Axboe put_fs_excl(); 7301da177e4SLinus Torvalds } 7311da177e4SLinus Torvalds 732bd4c625cSLinus Torvalds static void write_ordered_chunk(struct buffer_chunk *chunk) 733bd4c625cSLinus Torvalds { 7341da177e4SLinus Torvalds int i; 73522e2c507SJens Axboe get_fs_excl(); 7361da177e4SLinus Torvalds for (i = 0; i < chunk->nr; i++) { 7371da177e4SLinus Torvalds submit_ordered_buffer(chunk->bh[i]); 7381da177e4SLinus Torvalds } 7391da177e4SLinus Torvalds chunk->nr = 0; 74022e2c507SJens Axboe put_fs_excl(); 7411da177e4SLinus Torvalds } 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 744bd4c625cSLinus Torvalds spinlock_t * lock, void (fn) (struct buffer_chunk *)) 7451da177e4SLinus Torvalds { 7461da177e4SLinus Torvalds int ret = 0; 74714a61442SEric Sesterhenn BUG_ON(chunk->nr >= CHUNK_SIZE); 7481da177e4SLinus Torvalds chunk->bh[chunk->nr++] = bh; 7491da177e4SLinus Torvalds if (chunk->nr >= CHUNK_SIZE) { 7501da177e4SLinus Torvalds ret = 1; 7511da177e4SLinus Torvalds if (lock) 7521da177e4SLinus Torvalds spin_unlock(lock); 7531da177e4SLinus Torvalds fn(chunk); 7541da177e4SLinus Torvalds if (lock) 7551da177e4SLinus Torvalds spin_lock(lock); 7561da177e4SLinus Torvalds } 7571da177e4SLinus Torvalds return ret; 7581da177e4SLinus Torvalds } 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 761bd4c625cSLinus Torvalds static struct reiserfs_jh *alloc_jh(void) 762bd4c625cSLinus Torvalds { 7631da177e4SLinus Torvalds struct reiserfs_jh *jh; 7641da177e4SLinus Torvalds while (1) { 7651da177e4SLinus Torvalds jh = kmalloc(sizeof(*jh), GFP_NOFS); 7661da177e4SLinus Torvalds if (jh) { 7671da177e4SLinus Torvalds atomic_inc(&nr_reiserfs_jh); 7681da177e4SLinus Torvalds return jh; 7691da177e4SLinus Torvalds } 7701da177e4SLinus Torvalds yield(); 7711da177e4SLinus Torvalds } 7721da177e4SLinus Torvalds } 7731da177e4SLinus Torvalds 7741da177e4SLinus Torvalds /* 7751da177e4SLinus Torvalds * we want to free the jh when the buffer has been written 7761da177e4SLinus Torvalds * and waited on 7771da177e4SLinus Torvalds */ 778bd4c625cSLinus Torvalds void reiserfs_free_jh(struct buffer_head *bh) 779bd4c625cSLinus Torvalds { 7801da177e4SLinus Torvalds struct reiserfs_jh *jh; 7811da177e4SLinus Torvalds 7821da177e4SLinus Torvalds jh = bh->b_private; 7831da177e4SLinus Torvalds if (jh) { 7841da177e4SLinus Torvalds bh->b_private = NULL; 7851da177e4SLinus Torvalds jh->bh = NULL; 7861da177e4SLinus Torvalds list_del_init(&jh->list); 7871da177e4SLinus Torvalds kfree(jh); 7881da177e4SLinus Torvalds if (atomic_read(&nr_reiserfs_jh) <= 0) 7891da177e4SLinus Torvalds BUG(); 7901da177e4SLinus Torvalds atomic_dec(&nr_reiserfs_jh); 7911da177e4SLinus Torvalds put_bh(bh); 7921da177e4SLinus Torvalds } 7931da177e4SLinus Torvalds } 7941da177e4SLinus Torvalds 7951da177e4SLinus Torvalds static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 7961da177e4SLinus Torvalds int tail) 7971da177e4SLinus Torvalds { 7981da177e4SLinus Torvalds struct reiserfs_jh *jh; 7991da177e4SLinus Torvalds 8001da177e4SLinus Torvalds if (bh->b_private) { 8011da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 8021da177e4SLinus Torvalds if (!bh->b_private) { 8031da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 8041da177e4SLinus Torvalds goto no_jh; 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds jh = bh->b_private; 8071da177e4SLinus Torvalds list_del_init(&jh->list); 8081da177e4SLinus Torvalds } else { 8091da177e4SLinus Torvalds no_jh: 8101da177e4SLinus Torvalds get_bh(bh); 8111da177e4SLinus Torvalds jh = alloc_jh(); 8121da177e4SLinus Torvalds spin_lock(&j->j_dirty_buffers_lock); 8131da177e4SLinus Torvalds /* buffer must be locked for __add_jh, should be able to have 8141da177e4SLinus Torvalds * two adds at the same time 8151da177e4SLinus Torvalds */ 81614a61442SEric Sesterhenn BUG_ON(bh->b_private); 8171da177e4SLinus Torvalds jh->bh = bh; 8181da177e4SLinus Torvalds bh->b_private = jh; 8191da177e4SLinus Torvalds } 8201da177e4SLinus Torvalds jh->jl = j->j_current_jl; 8211da177e4SLinus Torvalds if (tail) 8221da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); 8231da177e4SLinus Torvalds else { 8241da177e4SLinus Torvalds list_add_tail(&jh->list, &jh->jl->j_bh_list); 8251da177e4SLinus Torvalds } 8261da177e4SLinus Torvalds spin_unlock(&j->j_dirty_buffers_lock); 8271da177e4SLinus Torvalds return 0; 8281da177e4SLinus Torvalds } 8291da177e4SLinus Torvalds 830bd4c625cSLinus Torvalds int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) 831bd4c625cSLinus Torvalds { 8321da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 8331da177e4SLinus Torvalds } 834bd4c625cSLinus Torvalds int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) 835bd4c625cSLinus Torvalds { 8361da177e4SLinus Torvalds return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 8371da177e4SLinus Torvalds } 8381da177e4SLinus Torvalds 8391da177e4SLinus Torvalds #define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 8401da177e4SLinus Torvalds static int write_ordered_buffers(spinlock_t * lock, 8411da177e4SLinus Torvalds struct reiserfs_journal *j, 8421da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 8431da177e4SLinus Torvalds struct list_head *list) 8441da177e4SLinus Torvalds { 8451da177e4SLinus Torvalds struct buffer_head *bh; 8461da177e4SLinus Torvalds struct reiserfs_jh *jh; 8471da177e4SLinus Torvalds int ret = j->j_errno; 8481da177e4SLinus Torvalds struct buffer_chunk chunk; 8491da177e4SLinus Torvalds struct list_head tmp; 8501da177e4SLinus Torvalds INIT_LIST_HEAD(&tmp); 8511da177e4SLinus Torvalds 8521da177e4SLinus Torvalds chunk.nr = 0; 8531da177e4SLinus Torvalds spin_lock(lock); 8541da177e4SLinus Torvalds while (!list_empty(list)) { 8551da177e4SLinus Torvalds jh = JH_ENTRY(list->next); 8561da177e4SLinus Torvalds bh = jh->bh; 8571da177e4SLinus Torvalds get_bh(bh); 858ca5de404SNick Piggin if (!trylock_buffer(bh)) { 8591da177e4SLinus Torvalds if (!buffer_dirty(bh)) { 860f116629dSAkinobu Mita list_move(&jh->list, &tmp); 8611da177e4SLinus Torvalds goto loop_next; 8621da177e4SLinus Torvalds } 8631da177e4SLinus Torvalds spin_unlock(lock); 8641da177e4SLinus Torvalds if (chunk.nr) 8651da177e4SLinus Torvalds write_ordered_chunk(&chunk); 8661da177e4SLinus Torvalds wait_on_buffer(bh); 8671da177e4SLinus Torvalds cond_resched(); 8681da177e4SLinus Torvalds spin_lock(lock); 8691da177e4SLinus Torvalds goto loop_next; 8701da177e4SLinus Torvalds } 8713d4492f8SChris Mason /* in theory, dirty non-uptodate buffers should never get here, 8723d4492f8SChris Mason * but the upper layer io error paths still have a few quirks. 8733d4492f8SChris Mason * Handle them here as gracefully as we can 8743d4492f8SChris Mason */ 8753d4492f8SChris Mason if (!buffer_uptodate(bh) && buffer_dirty(bh)) { 8763d4492f8SChris Mason clear_buffer_dirty(bh); 8773d4492f8SChris Mason ret = -EIO; 8783d4492f8SChris Mason } 8791da177e4SLinus Torvalds if (buffer_dirty(bh)) { 880f116629dSAkinobu Mita list_move(&jh->list, &tmp); 8811da177e4SLinus Torvalds add_to_chunk(&chunk, bh, lock, write_ordered_chunk); 8821da177e4SLinus Torvalds } else { 8831da177e4SLinus Torvalds reiserfs_free_jh(bh); 8841da177e4SLinus Torvalds unlock_buffer(bh); 8851da177e4SLinus Torvalds } 8861da177e4SLinus Torvalds loop_next: 8871da177e4SLinus Torvalds put_bh(bh); 8881da177e4SLinus Torvalds cond_resched_lock(lock); 8891da177e4SLinus Torvalds } 8901da177e4SLinus Torvalds if (chunk.nr) { 8911da177e4SLinus Torvalds spin_unlock(lock); 8921da177e4SLinus Torvalds write_ordered_chunk(&chunk); 8931da177e4SLinus Torvalds spin_lock(lock); 8941da177e4SLinus Torvalds } 8951da177e4SLinus Torvalds while (!list_empty(&tmp)) { 8961da177e4SLinus Torvalds jh = JH_ENTRY(tmp.prev); 8971da177e4SLinus Torvalds bh = jh->bh; 8981da177e4SLinus Torvalds get_bh(bh); 8991da177e4SLinus Torvalds reiserfs_free_jh(bh); 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds if (buffer_locked(bh)) { 9021da177e4SLinus Torvalds spin_unlock(lock); 9031da177e4SLinus Torvalds wait_on_buffer(bh); 9041da177e4SLinus Torvalds spin_lock(lock); 9051da177e4SLinus Torvalds } 9061da177e4SLinus Torvalds if (!buffer_uptodate(bh)) { 9071da177e4SLinus Torvalds ret = -EIO; 9081da177e4SLinus Torvalds } 909d62b1b87SChris Mason /* ugly interaction with invalidatepage here. 910d62b1b87SChris Mason * reiserfs_invalidate_page will pin any buffer that has a valid 911d62b1b87SChris Mason * journal head from an older transaction. If someone else sets 912d62b1b87SChris Mason * our buffer dirty after we write it in the first loop, and 913d62b1b87SChris Mason * then someone truncates the page away, nobody will ever write 914d62b1b87SChris Mason * the buffer. We're safe if we write the page one last time 915d62b1b87SChris Mason * after freeing the journal header. 916d62b1b87SChris Mason */ 917d62b1b87SChris Mason if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 918d62b1b87SChris Mason spin_unlock(lock); 919d62b1b87SChris Mason ll_rw_block(WRITE, 1, &bh); 920d62b1b87SChris Mason spin_lock(lock); 921d62b1b87SChris Mason } 9221da177e4SLinus Torvalds put_bh(bh); 9231da177e4SLinus Torvalds cond_resched_lock(lock); 9241da177e4SLinus Torvalds } 9251da177e4SLinus Torvalds spin_unlock(lock); 9261da177e4SLinus Torvalds return ret; 9271da177e4SLinus Torvalds } 9281da177e4SLinus Torvalds 929bd4c625cSLinus Torvalds static int flush_older_commits(struct super_block *s, 930bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 931bd4c625cSLinus Torvalds { 9321da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 9331da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 9341da177e4SLinus Torvalds struct reiserfs_journal_list *first_jl; 9351da177e4SLinus Torvalds struct list_head *entry; 936600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 937600ed416SJeff Mahoney unsigned int other_trans_id; 938600ed416SJeff Mahoney unsigned int first_trans_id; 9391da177e4SLinus Torvalds 9401da177e4SLinus Torvalds find_first: 9411da177e4SLinus Torvalds /* 9421da177e4SLinus Torvalds * first we walk backwards to find the oldest uncommitted transation 9431da177e4SLinus Torvalds */ 9441da177e4SLinus Torvalds first_jl = jl; 9451da177e4SLinus Torvalds entry = jl->j_list.prev; 9461da177e4SLinus Torvalds while (1) { 9471da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 9481da177e4SLinus Torvalds if (entry == &journal->j_journal_list || 9491da177e4SLinus Torvalds atomic_read(&other_jl->j_older_commits_done)) 9501da177e4SLinus Torvalds break; 9511da177e4SLinus Torvalds 9521da177e4SLinus Torvalds first_jl = other_jl; 9531da177e4SLinus Torvalds entry = other_jl->j_list.prev; 9541da177e4SLinus Torvalds } 9551da177e4SLinus Torvalds 9561da177e4SLinus Torvalds /* if we didn't find any older uncommitted transactions, return now */ 9571da177e4SLinus Torvalds if (first_jl == jl) { 9581da177e4SLinus Torvalds return 0; 9591da177e4SLinus Torvalds } 9601da177e4SLinus Torvalds 9611da177e4SLinus Torvalds first_trans_id = first_jl->j_trans_id; 9621da177e4SLinus Torvalds 9631da177e4SLinus Torvalds entry = &first_jl->j_list; 9641da177e4SLinus Torvalds while (1) { 9651da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 9661da177e4SLinus Torvalds other_trans_id = other_jl->j_trans_id; 9671da177e4SLinus Torvalds 9681da177e4SLinus Torvalds if (other_trans_id < trans_id) { 9691da177e4SLinus Torvalds if (atomic_read(&other_jl->j_commit_left) != 0) { 9701da177e4SLinus Torvalds flush_commit_list(s, other_jl, 0); 9711da177e4SLinus Torvalds 9721da177e4SLinus Torvalds /* list we were called with is gone, return */ 9731da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) 9741da177e4SLinus Torvalds return 1; 9751da177e4SLinus Torvalds 9761da177e4SLinus Torvalds /* the one we just flushed is gone, this means all 9771da177e4SLinus Torvalds * older lists are also gone, so first_jl is no longer 9781da177e4SLinus Torvalds * valid either. Go back to the beginning. 9791da177e4SLinus Torvalds */ 980bd4c625cSLinus Torvalds if (!journal_list_still_alive 981bd4c625cSLinus Torvalds (s, other_trans_id)) { 9821da177e4SLinus Torvalds goto find_first; 9831da177e4SLinus Torvalds } 9841da177e4SLinus Torvalds } 9851da177e4SLinus Torvalds entry = entry->next; 9861da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 9871da177e4SLinus Torvalds return 0; 9881da177e4SLinus Torvalds } else { 9891da177e4SLinus Torvalds return 0; 9901da177e4SLinus Torvalds } 9911da177e4SLinus Torvalds } 9921da177e4SLinus Torvalds return 0; 9931da177e4SLinus Torvalds } 994deba0f49SAdrian Bunk 995deba0f49SAdrian Bunk static int reiserfs_async_progress_wait(struct super_block *s) 996bd4c625cSLinus Torvalds { 9971da177e4SLinus Torvalds DEFINE_WAIT(wait); 9981da177e4SLinus Torvalds struct reiserfs_journal *j = SB_JOURNAL(s); 9991da177e4SLinus Torvalds if (atomic_read(&j->j_async_throttle)) 10003fcfab16SAndrew Morton congestion_wait(WRITE, HZ / 10); 10011da177e4SLinus Torvalds return 0; 10021da177e4SLinus Torvalds } 10031da177e4SLinus Torvalds 10041da177e4SLinus Torvalds /* 10051da177e4SLinus Torvalds ** if this journal list still has commit blocks unflushed, send them to disk. 10061da177e4SLinus Torvalds ** 10071da177e4SLinus Torvalds ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 10081da177e4SLinus Torvalds ** Before the commit block can by written, every other log block must be safely on disk 10091da177e4SLinus Torvalds ** 10101da177e4SLinus Torvalds */ 1011bd4c625cSLinus Torvalds static int flush_commit_list(struct super_block *s, 1012bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 1013bd4c625cSLinus Torvalds { 10141da177e4SLinus Torvalds int i; 10153ee16670SJeff Mahoney b_blocknr_t bn; 10161da177e4SLinus Torvalds struct buffer_head *tbh = NULL; 1017600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 10181da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 10191da177e4SLinus Torvalds int barrier = 0; 10201da177e4SLinus Torvalds int retval = 0; 1021e0e851cfSChris Mason int write_len; 10221da177e4SLinus Torvalds 10231da177e4SLinus Torvalds reiserfs_check_lock_depth(s, "flush_commit_list"); 10241da177e4SLinus Torvalds 10251da177e4SLinus Torvalds if (atomic_read(&jl->j_older_commits_done)) { 10261da177e4SLinus Torvalds return 0; 10271da177e4SLinus Torvalds } 10281da177e4SLinus Torvalds 102922e2c507SJens Axboe get_fs_excl(); 103022e2c507SJens Axboe 10311da177e4SLinus Torvalds /* before we can put our commit blocks on disk, we have to make sure everyone older than 10321da177e4SLinus Torvalds ** us is on disk too 10331da177e4SLinus Torvalds */ 10341da177e4SLinus Torvalds BUG_ON(jl->j_len <= 0); 10351da177e4SLinus Torvalds BUG_ON(trans_id == journal->j_trans_id); 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds get_journal_list(jl); 10381da177e4SLinus Torvalds if (flushall) { 10391da177e4SLinus Torvalds if (flush_older_commits(s, jl) == 1) { 10401da177e4SLinus Torvalds /* list disappeared during flush_older_commits. return */ 10411da177e4SLinus Torvalds goto put_jl; 10421da177e4SLinus Torvalds } 10431da177e4SLinus Torvalds } 10441da177e4SLinus Torvalds 10451da177e4SLinus Torvalds /* make sure nobody is trying to flush this one at the same time */ 104690415deaSJeff Mahoney mutex_lock(&jl->j_commit_mutex); 10471da177e4SLinus Torvalds if (!journal_list_still_alive(s, trans_id)) { 104890415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10491da177e4SLinus Torvalds goto put_jl; 10501da177e4SLinus Torvalds } 10511da177e4SLinus Torvalds BUG_ON(jl->j_trans_id == 0); 10521da177e4SLinus Torvalds 10531da177e4SLinus Torvalds /* this commit is done, exit */ 10541da177e4SLinus Torvalds if (atomic_read(&(jl->j_commit_left)) <= 0) { 10551da177e4SLinus Torvalds if (flushall) { 10561da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 10571da177e4SLinus Torvalds } 105890415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 10591da177e4SLinus Torvalds goto put_jl; 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds 10621da177e4SLinus Torvalds if (!list_empty(&jl->j_bh_list)) { 10633d4492f8SChris Mason int ret; 10641da177e4SLinus Torvalds unlock_kernel(); 10653d4492f8SChris Mason ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 10661da177e4SLinus Torvalds journal, jl, &jl->j_bh_list); 10673d4492f8SChris Mason if (ret < 0 && retval == 0) 10683d4492f8SChris Mason retval = ret; 10691da177e4SLinus Torvalds lock_kernel(); 10701da177e4SLinus Torvalds } 10711da177e4SLinus Torvalds BUG_ON(!list_empty(&jl->j_bh_list)); 10721da177e4SLinus Torvalds /* 10731da177e4SLinus Torvalds * for the description block and all the log blocks, submit any buffers 1074e0e851cfSChris Mason * that haven't already reached the disk. Try to write at least 256 1075e0e851cfSChris Mason * log blocks. later on, we will only wait on blocks that correspond 1076e0e851cfSChris Mason * to this transaction, but while we're unplugging we might as well 1077e0e851cfSChris Mason * get a chunk of data on there. 10781da177e4SLinus Torvalds */ 10791da177e4SLinus Torvalds atomic_inc(&journal->j_async_throttle); 1080e0e851cfSChris Mason write_len = jl->j_len + 1; 1081e0e851cfSChris Mason if (write_len < 256) 1082e0e851cfSChris Mason write_len = 256; 1083e0e851cfSChris Mason for (i = 0 ; i < write_len ; i++) { 10841da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % 10851da177e4SLinus Torvalds SB_ONDISK_JOURNAL_SIZE(s); 10861da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 1087e0e851cfSChris Mason if (tbh) { 1088e0e851cfSChris Mason if (buffer_dirty(tbh)) 1089e0e851cfSChris Mason ll_rw_block(WRITE, 1, &tbh) ; 10901da177e4SLinus Torvalds put_bh(tbh) ; 10911da177e4SLinus Torvalds } 1092e0e851cfSChris Mason } 10931da177e4SLinus Torvalds atomic_dec(&journal->j_async_throttle); 10941da177e4SLinus Torvalds 10955d5e8156SJeff Mahoney /* We're skipping the commit if there's an error */ 10965d5e8156SJeff Mahoney if (retval || reiserfs_is_journal_aborted(journal)) 10975d5e8156SJeff Mahoney barrier = 0; 10985d5e8156SJeff Mahoney 10991da177e4SLinus Torvalds /* wait on everything written so far before writing the commit 11001da177e4SLinus Torvalds * if we are in barrier mode, send the commit down now 11011da177e4SLinus Torvalds */ 11021da177e4SLinus Torvalds barrier = reiserfs_barrier_flush(s); 11031da177e4SLinus Torvalds if (barrier) { 11041da177e4SLinus Torvalds int ret; 11051da177e4SLinus Torvalds lock_buffer(jl->j_commit_bh); 11061da177e4SLinus Torvalds ret = submit_barrier_buffer(jl->j_commit_bh); 11071da177e4SLinus Torvalds if (ret == -EOPNOTSUPP) { 11081da177e4SLinus Torvalds set_buffer_uptodate(jl->j_commit_bh); 11091da177e4SLinus Torvalds disable_barrier(s); 11101da177e4SLinus Torvalds barrier = 0; 11111da177e4SLinus Torvalds } 11121da177e4SLinus Torvalds } 11131da177e4SLinus Torvalds for (i = 0; i < (jl->j_len + 1); i++) { 11141da177e4SLinus Torvalds bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 11151da177e4SLinus Torvalds (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 11161da177e4SLinus Torvalds tbh = journal_find_get_block(s, bn); 11171da177e4SLinus Torvalds wait_on_buffer(tbh); 11181da177e4SLinus Torvalds // since we're using ll_rw_blk above, it might have skipped over 11191da177e4SLinus Torvalds // a locked buffer. Double check here 11201da177e4SLinus Torvalds // 11211da177e4SLinus Torvalds if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 11221da177e4SLinus Torvalds sync_dirty_buffer(tbh); 11231da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(tbh))) { 11241da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 112545b03d5eSJeff Mahoney reiserfs_warning(s, "journal-601", 112645b03d5eSJeff Mahoney "buffer write failed"); 11271da177e4SLinus Torvalds #endif 11281da177e4SLinus Torvalds retval = -EIO; 11291da177e4SLinus Torvalds } 11301da177e4SLinus Torvalds put_bh(tbh); /* once for journal_find_get_block */ 11311da177e4SLinus Torvalds put_bh(tbh); /* once due to original getblk in do_journal_end */ 11321da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 11331da177e4SLinus Torvalds } 11341da177e4SLinus Torvalds 11351da177e4SLinus Torvalds BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 11361da177e4SLinus Torvalds 11371da177e4SLinus Torvalds if (!barrier) { 11385d5e8156SJeff Mahoney /* If there was a write error in the journal - we can't commit 11395d5e8156SJeff Mahoney * this transaction - it will be invalid and, if successful, 1140beb7dd86SRobert P. J. Day * will just end up propagating the write error out to 11415d5e8156SJeff Mahoney * the file system. */ 11425d5e8156SJeff Mahoney if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 11431da177e4SLinus Torvalds if (buffer_dirty(jl->j_commit_bh)) 11441da177e4SLinus Torvalds BUG(); 11451da177e4SLinus Torvalds mark_buffer_dirty(jl->j_commit_bh) ; 11461da177e4SLinus Torvalds sync_dirty_buffer(jl->j_commit_bh) ; 11475d5e8156SJeff Mahoney } 11481da177e4SLinus Torvalds } else 11491da177e4SLinus Torvalds wait_on_buffer(jl->j_commit_bh); 11501da177e4SLinus Torvalds 11511da177e4SLinus Torvalds check_barrier_completion(s, jl->j_commit_bh); 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds /* If there was a write error in the journal - we can't commit this 11541da177e4SLinus Torvalds * transaction - it will be invalid and, if successful, will just end 1155beb7dd86SRobert P. J. Day * up propagating the write error out to the filesystem. */ 11561da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 11571da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 115845b03d5eSJeff Mahoney reiserfs_warning(s, "journal-615", "buffer write failed"); 11591da177e4SLinus Torvalds #endif 11601da177e4SLinus Torvalds retval = -EIO; 11611da177e4SLinus Torvalds } 11621da177e4SLinus Torvalds bforget(jl->j_commit_bh); 11631da177e4SLinus Torvalds if (journal->j_last_commit_id != 0 && 11641da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_commit_id) != 1) { 116545b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu", 1166bd4c625cSLinus Torvalds journal->j_last_commit_id, jl->j_trans_id); 11671da177e4SLinus Torvalds } 11681da177e4SLinus Torvalds journal->j_last_commit_id = jl->j_trans_id; 11691da177e4SLinus Torvalds 11701da177e4SLinus Torvalds /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 11711da177e4SLinus Torvalds cleanup_freed_for_journal_list(s, jl); 11721da177e4SLinus Torvalds 11731da177e4SLinus Torvalds retval = retval ? retval : journal->j_errno; 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds /* mark the metadata dirty */ 11761da177e4SLinus Torvalds if (!retval) 11771da177e4SLinus Torvalds dirty_one_transaction(s, jl); 11781da177e4SLinus Torvalds atomic_dec(&(jl->j_commit_left)); 11791da177e4SLinus Torvalds 11801da177e4SLinus Torvalds if (flushall) { 11811da177e4SLinus Torvalds atomic_set(&(jl->j_older_commits_done), 1); 11821da177e4SLinus Torvalds } 118390415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 11841da177e4SLinus Torvalds put_jl: 11851da177e4SLinus Torvalds put_journal_list(s, jl); 11861da177e4SLinus Torvalds 11871da177e4SLinus Torvalds if (retval) 1188bd4c625cSLinus Torvalds reiserfs_abort(s, retval, "Journal write error in %s", 1189fbe5498bSHarvey Harrison __func__); 119022e2c507SJens Axboe put_fs_excl(); 11911da177e4SLinus Torvalds return retval; 11921da177e4SLinus Torvalds } 11931da177e4SLinus Torvalds 11941da177e4SLinus Torvalds /* 11951da177e4SLinus Torvalds ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 11961da177e4SLinus Torvalds ** returns NULL if it can't find anything 11971da177e4SLinus Torvalds */ 1198bd4c625cSLinus Torvalds static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1199bd4c625cSLinus Torvalds reiserfs_journal_cnode 1200bd4c625cSLinus Torvalds *cn) 1201bd4c625cSLinus Torvalds { 12021da177e4SLinus Torvalds struct super_block *sb = cn->sb; 12031da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 12041da177e4SLinus Torvalds 12051da177e4SLinus Torvalds cn = cn->hprev; 12061da177e4SLinus Torvalds while (cn) { 12071da177e4SLinus Torvalds if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 12081da177e4SLinus Torvalds return cn->jlist; 12091da177e4SLinus Torvalds } 12101da177e4SLinus Torvalds cn = cn->hprev; 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds return NULL; 12131da177e4SLinus Torvalds } 12141da177e4SLinus Torvalds 1215a3172027SChris Mason static int newer_jl_done(struct reiserfs_journal_cnode *cn) 1216a3172027SChris Mason { 1217a3172027SChris Mason struct super_block *sb = cn->sb; 1218a3172027SChris Mason b_blocknr_t blocknr = cn->blocknr; 1219a3172027SChris Mason 1220a3172027SChris Mason cn = cn->hprev; 1221a3172027SChris Mason while (cn) { 1222a3172027SChris Mason if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && 1223a3172027SChris Mason atomic_read(&cn->jlist->j_commit_left) != 0) 1224a3172027SChris Mason return 0; 1225a3172027SChris Mason cn = cn->hprev; 1226a3172027SChris Mason } 1227a3172027SChris Mason return 1; 1228a3172027SChris Mason } 1229a3172027SChris Mason 1230bd4c625cSLinus Torvalds static void remove_journal_hash(struct super_block *, 1231bd4c625cSLinus Torvalds struct reiserfs_journal_cnode **, 1232bd4c625cSLinus Torvalds struct reiserfs_journal_list *, unsigned long, 1233bd4c625cSLinus Torvalds int); 12341da177e4SLinus Torvalds 12351da177e4SLinus Torvalds /* 12361da177e4SLinus Torvalds ** once all the real blocks have been flushed, it is safe to remove them from the 12371da177e4SLinus Torvalds ** journal list for this transaction. Aside from freeing the cnode, this also allows the 12381da177e4SLinus Torvalds ** block to be reallocated for data blocks if it had been deleted. 12391da177e4SLinus Torvalds */ 1240bd4c625cSLinus Torvalds static void remove_all_from_journal_list(struct super_block *p_s_sb, 1241bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, 1242bd4c625cSLinus Torvalds int debug) 1243bd4c625cSLinus Torvalds { 12441da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 12451da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 12461da177e4SLinus Torvalds cn = jl->j_realblock; 12471da177e4SLinus Torvalds 12481da177e4SLinus Torvalds /* which is better, to lock once around the whole loop, or 12491da177e4SLinus Torvalds ** to lock for each call to remove_journal_hash? 12501da177e4SLinus Torvalds */ 12511da177e4SLinus Torvalds while (cn) { 12521da177e4SLinus Torvalds if (cn->blocknr != 0) { 12531da177e4SLinus Torvalds if (debug) { 125445b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "reiserfs-2201", 1255bd4c625cSLinus Torvalds "block %u, bh is %d, state %ld", 1256bd4c625cSLinus Torvalds cn->blocknr, cn->bh ? 1 : 0, 1257bd4c625cSLinus Torvalds cn->state); 12581da177e4SLinus Torvalds } 12591da177e4SLinus Torvalds cn->state = 0; 1260bd4c625cSLinus Torvalds remove_journal_hash(p_s_sb, journal->j_list_hash_table, 1261bd4c625cSLinus Torvalds jl, cn->blocknr, 1); 12621da177e4SLinus Torvalds } 12631da177e4SLinus Torvalds last = cn; 12641da177e4SLinus Torvalds cn = cn->next; 12651da177e4SLinus Torvalds free_cnode(p_s_sb, last); 12661da177e4SLinus Torvalds } 12671da177e4SLinus Torvalds jl->j_realblock = NULL; 12681da177e4SLinus Torvalds } 12691da177e4SLinus Torvalds 12701da177e4SLinus Torvalds /* 12711da177e4SLinus Torvalds ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 12721da177e4SLinus Torvalds ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 12731da177e4SLinus Torvalds ** releasing blocks in this transaction for reuse as data blocks. 12741da177e4SLinus Torvalds ** called by flush_journal_list, before it calls remove_all_from_journal_list 12751da177e4SLinus Torvalds ** 12761da177e4SLinus Torvalds */ 1277bd4c625cSLinus Torvalds static int _update_journal_header_block(struct super_block *p_s_sb, 1278bd4c625cSLinus Torvalds unsigned long offset, 1279600ed416SJeff Mahoney unsigned int trans_id) 1280bd4c625cSLinus Torvalds { 12811da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 12821da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 12831da177e4SLinus Torvalds 12841da177e4SLinus Torvalds if (reiserfs_is_journal_aborted(journal)) 12851da177e4SLinus Torvalds return -EIO; 12861da177e4SLinus Torvalds 12871da177e4SLinus Torvalds if (trans_id >= journal->j_last_flush_trans_id) { 12881da177e4SLinus Torvalds if (buffer_locked((journal->j_header_bh))) { 12891da177e4SLinus Torvalds wait_on_buffer((journal->j_header_bh)); 12901da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 12911da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 129245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-699", 129345b03d5eSJeff Mahoney "buffer write failed"); 12941da177e4SLinus Torvalds #endif 12951da177e4SLinus Torvalds return -EIO; 12961da177e4SLinus Torvalds } 12971da177e4SLinus Torvalds } 12981da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 12991da177e4SLinus Torvalds journal->j_first_unflushed_offset = offset; 1300bd4c625cSLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh-> 1301bd4c625cSLinus Torvalds b_data); 13021da177e4SLinus Torvalds jh->j_last_flush_trans_id = cpu_to_le32(trans_id); 13031da177e4SLinus Torvalds jh->j_first_unflushed_offset = cpu_to_le32(offset); 13041da177e4SLinus Torvalds jh->j_mount_id = cpu_to_le32(journal->j_mount_id); 13051da177e4SLinus Torvalds 13061da177e4SLinus Torvalds if (reiserfs_barrier_flush(p_s_sb)) { 13071da177e4SLinus Torvalds int ret; 13081da177e4SLinus Torvalds lock_buffer(journal->j_header_bh); 13091da177e4SLinus Torvalds ret = submit_barrier_buffer(journal->j_header_bh); 13101da177e4SLinus Torvalds if (ret == -EOPNOTSUPP) { 13111da177e4SLinus Torvalds set_buffer_uptodate(journal->j_header_bh); 13121da177e4SLinus Torvalds disable_barrier(p_s_sb); 13131da177e4SLinus Torvalds goto sync; 13141da177e4SLinus Torvalds } 13151da177e4SLinus Torvalds wait_on_buffer(journal->j_header_bh); 13161da177e4SLinus Torvalds check_barrier_completion(p_s_sb, journal->j_header_bh); 13171da177e4SLinus Torvalds } else { 13181da177e4SLinus Torvalds sync: 13191da177e4SLinus Torvalds set_buffer_dirty(journal->j_header_bh); 13201da177e4SLinus Torvalds sync_dirty_buffer(journal->j_header_bh); 13211da177e4SLinus Torvalds } 13221da177e4SLinus Torvalds if (!buffer_uptodate(journal->j_header_bh)) { 132345b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-837", 132445b03d5eSJeff Mahoney "IO error during journal replay"); 13251da177e4SLinus Torvalds return -EIO; 13261da177e4SLinus Torvalds } 13271da177e4SLinus Torvalds } 13281da177e4SLinus Torvalds return 0; 13291da177e4SLinus Torvalds } 13301da177e4SLinus Torvalds 13311da177e4SLinus Torvalds static int update_journal_header_block(struct super_block *p_s_sb, 13321da177e4SLinus Torvalds unsigned long offset, 1333600ed416SJeff Mahoney unsigned int trans_id) 1334bd4c625cSLinus Torvalds { 13351da177e4SLinus Torvalds return _update_journal_header_block(p_s_sb, offset, trans_id); 13361da177e4SLinus Torvalds } 1337bd4c625cSLinus Torvalds 13381da177e4SLinus Torvalds /* 13391da177e4SLinus Torvalds ** flush any and all journal lists older than you are 13401da177e4SLinus Torvalds ** can only be called from flush_journal_list 13411da177e4SLinus Torvalds */ 13421da177e4SLinus Torvalds static int flush_older_journal_lists(struct super_block *p_s_sb, 13431da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 13441da177e4SLinus Torvalds { 13451da177e4SLinus Torvalds struct list_head *entry; 13461da177e4SLinus Torvalds struct reiserfs_journal_list *other_jl; 13471da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1348600ed416SJeff Mahoney unsigned int trans_id = jl->j_trans_id; 13491da177e4SLinus Torvalds 13501da177e4SLinus Torvalds /* we know we are the only ones flushing things, no extra race 13511da177e4SLinus Torvalds * protection is required. 13521da177e4SLinus Torvalds */ 13531da177e4SLinus Torvalds restart: 13541da177e4SLinus Torvalds entry = journal->j_journal_list.next; 13551da177e4SLinus Torvalds /* Did we wrap? */ 13561da177e4SLinus Torvalds if (entry == &journal->j_journal_list) 13571da177e4SLinus Torvalds return 0; 13581da177e4SLinus Torvalds other_jl = JOURNAL_LIST_ENTRY(entry); 13591da177e4SLinus Torvalds if (other_jl->j_trans_id < trans_id) { 13601da177e4SLinus Torvalds BUG_ON(other_jl->j_refcount <= 0); 13611da177e4SLinus Torvalds /* do not flush all */ 13621da177e4SLinus Torvalds flush_journal_list(p_s_sb, other_jl, 0); 13631da177e4SLinus Torvalds 13641da177e4SLinus Torvalds /* other_jl is now deleted from the list */ 13651da177e4SLinus Torvalds goto restart; 13661da177e4SLinus Torvalds } 13671da177e4SLinus Torvalds return 0; 13681da177e4SLinus Torvalds } 13691da177e4SLinus Torvalds 13701da177e4SLinus Torvalds static void del_from_work_list(struct super_block *s, 1371bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1372bd4c625cSLinus Torvalds { 13731da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 13741da177e4SLinus Torvalds if (!list_empty(&jl->j_working_list)) { 13751da177e4SLinus Torvalds list_del_init(&jl->j_working_list); 13761da177e4SLinus Torvalds journal->j_num_work_lists--; 13771da177e4SLinus Torvalds } 13781da177e4SLinus Torvalds } 13791da177e4SLinus Torvalds 13801da177e4SLinus Torvalds /* flush a journal list, both commit and real blocks 13811da177e4SLinus Torvalds ** 13821da177e4SLinus Torvalds ** always set flushall to 1, unless you are calling from inside 13831da177e4SLinus Torvalds ** flush_journal_list 13841da177e4SLinus Torvalds ** 13851da177e4SLinus Torvalds ** IMPORTANT. This can only be called while there are no journal writers, 13861da177e4SLinus Torvalds ** and the journal is locked. That means it can only be called from 13871da177e4SLinus Torvalds ** do_journal_end, or by journal_release 13881da177e4SLinus Torvalds */ 13891da177e4SLinus Torvalds static int flush_journal_list(struct super_block *s, 1390bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl, int flushall) 1391bd4c625cSLinus Torvalds { 13921da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 13931da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *last; 13941da177e4SLinus Torvalds int count; 13951da177e4SLinus Torvalds int was_jwait = 0; 13961da177e4SLinus Torvalds int was_dirty = 0; 13971da177e4SLinus Torvalds struct buffer_head *saved_bh; 13981da177e4SLinus Torvalds unsigned long j_len_saved = jl->j_len; 13991da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 14001da177e4SLinus Torvalds int err = 0; 14011da177e4SLinus Torvalds 14021da177e4SLinus Torvalds BUG_ON(j_len_saved <= 0); 14031da177e4SLinus Torvalds 14041da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) != 0) { 140545b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2048", "called with wcount %d", 14061da177e4SLinus Torvalds atomic_read(&journal->j_wcount)); 14071da177e4SLinus Torvalds } 14081da177e4SLinus Torvalds BUG_ON(jl->j_trans_id == 0); 14091da177e4SLinus Torvalds 14101da177e4SLinus Torvalds /* if flushall == 0, the lock is already held */ 14111da177e4SLinus Torvalds if (flushall) { 1412afe70259SJeff Mahoney mutex_lock(&journal->j_flush_mutex); 1413afe70259SJeff Mahoney } else if (mutex_trylock(&journal->j_flush_mutex)) { 14141da177e4SLinus Torvalds BUG(); 14151da177e4SLinus Torvalds } 14161da177e4SLinus Torvalds 14171da177e4SLinus Torvalds count = 0; 14181da177e4SLinus Torvalds if (j_len_saved > journal->j_trans_max) { 1419bd4c625cSLinus Torvalds reiserfs_panic(s, 1420bd4c625cSLinus Torvalds "journal-715: flush_journal_list, length is %lu, trans id %lu\n", 1421bd4c625cSLinus Torvalds j_len_saved, jl->j_trans_id); 14221da177e4SLinus Torvalds return 0; 14231da177e4SLinus Torvalds } 14241da177e4SLinus Torvalds 142522e2c507SJens Axboe get_fs_excl(); 142622e2c507SJens Axboe 14271da177e4SLinus Torvalds /* if all the work is already done, get out of here */ 14281da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 14291da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 14301da177e4SLinus Torvalds goto flush_older_and_return; 14311da177e4SLinus Torvalds } 14321da177e4SLinus Torvalds 14331da177e4SLinus Torvalds /* start by putting the commit list on disk. This will also flush 14341da177e4SLinus Torvalds ** the commit lists of any olders transactions 14351da177e4SLinus Torvalds */ 14361da177e4SLinus Torvalds flush_commit_list(s, jl, 1); 14371da177e4SLinus Torvalds 1438bd4c625cSLinus Torvalds if (!(jl->j_state & LIST_DIRTY) 1439bd4c625cSLinus Torvalds && !reiserfs_is_journal_aborted(journal)) 14401da177e4SLinus Torvalds BUG(); 14411da177e4SLinus Torvalds 14421da177e4SLinus Torvalds /* are we done now? */ 14431da177e4SLinus Torvalds if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 14441da177e4SLinus Torvalds atomic_read(&(jl->j_commit_left)) <= 0) { 14451da177e4SLinus Torvalds goto flush_older_and_return; 14461da177e4SLinus Torvalds } 14471da177e4SLinus Torvalds 14481da177e4SLinus Torvalds /* loop through each cnode, see if we need to write it, 14491da177e4SLinus Torvalds ** or wait on a more recent transaction, or just ignore it 14501da177e4SLinus Torvalds */ 14511da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) != 0) { 1452bd4c625cSLinus Torvalds reiserfs_panic(s, 1453bd4c625cSLinus Torvalds "journal-844: panic journal list is flushing, wcount is not 0\n"); 14541da177e4SLinus Torvalds } 14551da177e4SLinus Torvalds cn = jl->j_realblock; 14561da177e4SLinus Torvalds while (cn) { 14571da177e4SLinus Torvalds was_jwait = 0; 14581da177e4SLinus Torvalds was_dirty = 0; 14591da177e4SLinus Torvalds saved_bh = NULL; 14601da177e4SLinus Torvalds /* blocknr of 0 is no longer in the hash, ignore it */ 14611da177e4SLinus Torvalds if (cn->blocknr == 0) { 14621da177e4SLinus Torvalds goto free_cnode; 14631da177e4SLinus Torvalds } 14641da177e4SLinus Torvalds 14651da177e4SLinus Torvalds /* This transaction failed commit. Don't write out to the disk */ 14661da177e4SLinus Torvalds if (!(jl->j_state & LIST_DIRTY)) 14671da177e4SLinus Torvalds goto free_cnode; 14681da177e4SLinus Torvalds 14691da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 14701da177e4SLinus Torvalds /* the order is important here. We check pjl to make sure we 14711da177e4SLinus Torvalds ** don't clear BH_JDirty_wait if we aren't the one writing this 14721da177e4SLinus Torvalds ** block to disk 14731da177e4SLinus Torvalds */ 14741da177e4SLinus Torvalds if (!pjl && cn->bh) { 14751da177e4SLinus Torvalds saved_bh = cn->bh; 14761da177e4SLinus Torvalds 14771da177e4SLinus Torvalds /* we do this to make sure nobody releases the buffer while 14781da177e4SLinus Torvalds ** we are working with it 14791da177e4SLinus Torvalds */ 14801da177e4SLinus Torvalds get_bh(saved_bh); 14811da177e4SLinus Torvalds 14821da177e4SLinus Torvalds if (buffer_journal_dirty(saved_bh)) { 14831da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 14841da177e4SLinus Torvalds was_jwait = 1; 14851da177e4SLinus Torvalds was_dirty = 1; 14861da177e4SLinus Torvalds } else if (can_dirty(cn)) { 14871da177e4SLinus Torvalds /* everything with !pjl && jwait should be writable */ 14881da177e4SLinus Torvalds BUG(); 14891da177e4SLinus Torvalds } 14901da177e4SLinus Torvalds } 14911da177e4SLinus Torvalds 14921da177e4SLinus Torvalds /* if someone has this block in a newer transaction, just make 14930779bf2dSMatt LaPlante ** sure they are committed, and don't try writing it to disk 14941da177e4SLinus Torvalds */ 14951da177e4SLinus Torvalds if (pjl) { 14961da177e4SLinus Torvalds if (atomic_read(&pjl->j_commit_left)) 14971da177e4SLinus Torvalds flush_commit_list(s, pjl, 1); 14981da177e4SLinus Torvalds goto free_cnode; 14991da177e4SLinus Torvalds } 15001da177e4SLinus Torvalds 15011da177e4SLinus Torvalds /* bh == NULL when the block got to disk on its own, OR, 15021da177e4SLinus Torvalds ** the block got freed in a future transaction 15031da177e4SLinus Torvalds */ 15041da177e4SLinus Torvalds if (saved_bh == NULL) { 15051da177e4SLinus Torvalds goto free_cnode; 15061da177e4SLinus Torvalds } 15071da177e4SLinus Torvalds 15081da177e4SLinus Torvalds /* this should never happen. kupdate_one_transaction has this list 15091da177e4SLinus Torvalds ** locked while it works, so we should never see a buffer here that 15101da177e4SLinus Torvalds ** is not marked JDirty_wait 15111da177e4SLinus Torvalds */ 15121da177e4SLinus Torvalds if ((!was_jwait) && !buffer_locked(saved_bh)) { 151345b03d5eSJeff Mahoney reiserfs_warning(s, "journal-813", 151445b03d5eSJeff Mahoney "BAD! buffer %llu %cdirty %cjwait, " 15151da177e4SLinus Torvalds "not in a newer tranasction", 1516bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1517bd4c625cSLinus Torvalds b_blocknr, was_dirty ? ' ' : '!', 1518bd4c625cSLinus Torvalds was_jwait ? ' ' : '!'); 15191da177e4SLinus Torvalds } 15201da177e4SLinus Torvalds if (was_dirty) { 15211da177e4SLinus Torvalds /* we inc again because saved_bh gets decremented at free_cnode */ 15221da177e4SLinus Torvalds get_bh(saved_bh); 15231da177e4SLinus Torvalds set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 15241da177e4SLinus Torvalds lock_buffer(saved_bh); 15251da177e4SLinus Torvalds BUG_ON(cn->blocknr != saved_bh->b_blocknr); 15261da177e4SLinus Torvalds if (buffer_dirty(saved_bh)) 15271da177e4SLinus Torvalds submit_logged_buffer(saved_bh); 15281da177e4SLinus Torvalds else 15291da177e4SLinus Torvalds unlock_buffer(saved_bh); 15301da177e4SLinus Torvalds count++; 15311da177e4SLinus Torvalds } else { 153245b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2082", 153345b03d5eSJeff Mahoney "Unable to flush buffer %llu in %s", 1534bd4c625cSLinus Torvalds (unsigned long long)saved_bh-> 1535fbe5498bSHarvey Harrison b_blocknr, __func__); 15361da177e4SLinus Torvalds } 15371da177e4SLinus Torvalds free_cnode: 15381da177e4SLinus Torvalds last = cn; 15391da177e4SLinus Torvalds cn = cn->next; 15401da177e4SLinus Torvalds if (saved_bh) { 15411da177e4SLinus Torvalds /* we incremented this to keep others from taking the buffer head away */ 15421da177e4SLinus Torvalds put_bh(saved_bh); 15431da177e4SLinus Torvalds if (atomic_read(&(saved_bh->b_count)) < 0) { 154445b03d5eSJeff Mahoney reiserfs_warning(s, "journal-945", 154545b03d5eSJeff Mahoney "saved_bh->b_count < 0"); 15461da177e4SLinus Torvalds } 15471da177e4SLinus Torvalds } 15481da177e4SLinus Torvalds } 15491da177e4SLinus Torvalds if (count > 0) { 15501da177e4SLinus Torvalds cn = jl->j_realblock; 15511da177e4SLinus Torvalds while (cn) { 15521da177e4SLinus Torvalds if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 15531da177e4SLinus Torvalds if (!cn->bh) { 1554bd4c625cSLinus Torvalds reiserfs_panic(s, 1555bd4c625cSLinus Torvalds "journal-1011: cn->bh is NULL\n"); 15561da177e4SLinus Torvalds } 15571da177e4SLinus Torvalds wait_on_buffer(cn->bh); 15581da177e4SLinus Torvalds if (!cn->bh) { 1559bd4c625cSLinus Torvalds reiserfs_panic(s, 1560bd4c625cSLinus Torvalds "journal-1012: cn->bh is NULL\n"); 15611da177e4SLinus Torvalds } 15621da177e4SLinus Torvalds if (unlikely(!buffer_uptodate(cn->bh))) { 15631da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 156445b03d5eSJeff Mahoney reiserfs_warning(s, "journal-949", 156545b03d5eSJeff Mahoney "buffer write failed"); 15661da177e4SLinus Torvalds #endif 15671da177e4SLinus Torvalds err = -EIO; 15681da177e4SLinus Torvalds } 15691da177e4SLinus Torvalds /* note, we must clear the JDirty_wait bit after the up to date 15701da177e4SLinus Torvalds ** check, otherwise we race against our flushpage routine 15711da177e4SLinus Torvalds */ 1572bd4c625cSLinus Torvalds BUG_ON(!test_clear_buffer_journal_dirty 1573bd4c625cSLinus Torvalds (cn->bh)); 15741da177e4SLinus Torvalds 1575398c95bdSChris Mason /* drop one ref for us */ 15761da177e4SLinus Torvalds put_bh(cn->bh); 1577398c95bdSChris Mason /* drop one ref for journal_mark_dirty */ 1578398c95bdSChris Mason release_buffer_page(cn->bh); 15791da177e4SLinus Torvalds } 15801da177e4SLinus Torvalds cn = cn->next; 15811da177e4SLinus Torvalds } 15821da177e4SLinus Torvalds } 15831da177e4SLinus Torvalds 15841da177e4SLinus Torvalds if (err) 1585bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1586bd4c625cSLinus Torvalds "Write error while pushing transaction to disk in %s", 1587fbe5498bSHarvey Harrison __func__); 15881da177e4SLinus Torvalds flush_older_and_return: 15891da177e4SLinus Torvalds 15901da177e4SLinus Torvalds /* before we can update the journal header block, we _must_ flush all 15911da177e4SLinus Torvalds ** real blocks from all older transactions to disk. This is because 15921da177e4SLinus Torvalds ** once the header block is updated, this transaction will not be 15931da177e4SLinus Torvalds ** replayed after a crash 15941da177e4SLinus Torvalds */ 15951da177e4SLinus Torvalds if (flushall) { 15961da177e4SLinus Torvalds flush_older_journal_lists(s, jl); 15971da177e4SLinus Torvalds } 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds err = journal->j_errno; 16001da177e4SLinus Torvalds /* before we can remove everything from the hash tables for this 16011da177e4SLinus Torvalds ** transaction, we must make sure it can never be replayed 16021da177e4SLinus Torvalds ** 16031da177e4SLinus Torvalds ** since we are only called from do_journal_end, we know for sure there 16041da177e4SLinus Torvalds ** are no allocations going on while we are flushing journal lists. So, 16051da177e4SLinus Torvalds ** we only need to update the journal header block for the last list 16061da177e4SLinus Torvalds ** being flushed 16071da177e4SLinus Torvalds */ 16081da177e4SLinus Torvalds if (!err && flushall) { 1609bd4c625cSLinus Torvalds err = 1610bd4c625cSLinus Torvalds update_journal_header_block(s, 1611bd4c625cSLinus Torvalds (jl->j_start + jl->j_len + 1612bd4c625cSLinus Torvalds 2) % SB_ONDISK_JOURNAL_SIZE(s), 1613bd4c625cSLinus Torvalds jl->j_trans_id); 16141da177e4SLinus Torvalds if (err) 1615bd4c625cSLinus Torvalds reiserfs_abort(s, -EIO, 1616bd4c625cSLinus Torvalds "Write error while updating journal header in %s", 1617fbe5498bSHarvey Harrison __func__); 16181da177e4SLinus Torvalds } 16191da177e4SLinus Torvalds remove_all_from_journal_list(s, jl, 0); 16201da177e4SLinus Torvalds list_del_init(&jl->j_list); 16211da177e4SLinus Torvalds journal->j_num_lists--; 16221da177e4SLinus Torvalds del_from_work_list(s, jl); 16231da177e4SLinus Torvalds 16241da177e4SLinus Torvalds if (journal->j_last_flush_id != 0 && 16251da177e4SLinus Torvalds (jl->j_trans_id - journal->j_last_flush_id) != 1) { 162645b03d5eSJeff Mahoney reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu", 1627bd4c625cSLinus Torvalds journal->j_last_flush_id, jl->j_trans_id); 16281da177e4SLinus Torvalds } 16291da177e4SLinus Torvalds journal->j_last_flush_id = jl->j_trans_id; 16301da177e4SLinus Torvalds 16311da177e4SLinus Torvalds /* not strictly required since we are freeing the list, but it should 16321da177e4SLinus Torvalds * help find code using dead lists later on 16331da177e4SLinus Torvalds */ 16341da177e4SLinus Torvalds jl->j_len = 0; 16351da177e4SLinus Torvalds atomic_set(&(jl->j_nonzerolen), 0); 16361da177e4SLinus Torvalds jl->j_start = 0; 16371da177e4SLinus Torvalds jl->j_realblock = NULL; 16381da177e4SLinus Torvalds jl->j_commit_bh = NULL; 16391da177e4SLinus Torvalds jl->j_trans_id = 0; 16401da177e4SLinus Torvalds jl->j_state = 0; 16411da177e4SLinus Torvalds put_journal_list(s, jl); 16421da177e4SLinus Torvalds if (flushall) 1643afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 164422e2c507SJens Axboe put_fs_excl(); 16451da177e4SLinus Torvalds return err; 16461da177e4SLinus Torvalds } 16471da177e4SLinus Torvalds 1648a3172027SChris Mason static int test_transaction(struct super_block *s, 1649a3172027SChris Mason struct reiserfs_journal_list *jl) 1650a3172027SChris Mason { 1651a3172027SChris Mason struct reiserfs_journal_cnode *cn; 1652a3172027SChris Mason 1653a3172027SChris Mason if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) 1654a3172027SChris Mason return 1; 1655a3172027SChris Mason 1656a3172027SChris Mason cn = jl->j_realblock; 1657a3172027SChris Mason while (cn) { 1658a3172027SChris Mason /* if the blocknr == 0, this has been cleared from the hash, 1659a3172027SChris Mason ** skip it 1660a3172027SChris Mason */ 1661a3172027SChris Mason if (cn->blocknr == 0) { 1662a3172027SChris Mason goto next; 1663a3172027SChris Mason } 1664a3172027SChris Mason if (cn->bh && !newer_jl_done(cn)) 1665a3172027SChris Mason return 0; 1666a3172027SChris Mason next: 1667a3172027SChris Mason cn = cn->next; 1668a3172027SChris Mason cond_resched(); 1669a3172027SChris Mason } 1670a3172027SChris Mason return 0; 1671a3172027SChris Mason } 1672a3172027SChris Mason 16731da177e4SLinus Torvalds static int write_one_transaction(struct super_block *s, 16741da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 16751da177e4SLinus Torvalds struct buffer_chunk *chunk) 16761da177e4SLinus Torvalds { 16771da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 16781da177e4SLinus Torvalds int ret = 0; 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds jl->j_state |= LIST_TOUCHED; 16811da177e4SLinus Torvalds del_from_work_list(s, jl); 16821da177e4SLinus Torvalds if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 16831da177e4SLinus Torvalds return 0; 16841da177e4SLinus Torvalds } 16851da177e4SLinus Torvalds 16861da177e4SLinus Torvalds cn = jl->j_realblock; 16871da177e4SLinus Torvalds while (cn) { 16881da177e4SLinus Torvalds /* if the blocknr == 0, this has been cleared from the hash, 16891da177e4SLinus Torvalds ** skip it 16901da177e4SLinus Torvalds */ 16911da177e4SLinus Torvalds if (cn->blocknr == 0) { 16921da177e4SLinus Torvalds goto next; 16931da177e4SLinus Torvalds } 16941da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 16951da177e4SLinus Torvalds struct buffer_head *tmp_bh; 16961da177e4SLinus Torvalds /* we can race against journal_mark_freed when we try 16971da177e4SLinus Torvalds * to lock_buffer(cn->bh), so we have to inc the buffer 16981da177e4SLinus Torvalds * count, and recheck things after locking 16991da177e4SLinus Torvalds */ 17001da177e4SLinus Torvalds tmp_bh = cn->bh; 17011da177e4SLinus Torvalds get_bh(tmp_bh); 17021da177e4SLinus Torvalds lock_buffer(tmp_bh); 17031da177e4SLinus Torvalds if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 17041da177e4SLinus Torvalds if (!buffer_journal_dirty(tmp_bh) || 17051da177e4SLinus Torvalds buffer_journal_prepared(tmp_bh)) 17061da177e4SLinus Torvalds BUG(); 17071da177e4SLinus Torvalds add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 17081da177e4SLinus Torvalds ret++; 17091da177e4SLinus Torvalds } else { 17101da177e4SLinus Torvalds /* note, cn->bh might be null now */ 17111da177e4SLinus Torvalds unlock_buffer(tmp_bh); 17121da177e4SLinus Torvalds } 17131da177e4SLinus Torvalds put_bh(tmp_bh); 17141da177e4SLinus Torvalds } 17151da177e4SLinus Torvalds next: 17161da177e4SLinus Torvalds cn = cn->next; 17171da177e4SLinus Torvalds cond_resched(); 17181da177e4SLinus Torvalds } 17191da177e4SLinus Torvalds return ret; 17201da177e4SLinus Torvalds } 17211da177e4SLinus Torvalds 17221da177e4SLinus Torvalds /* used by flush_commit_list */ 17231da177e4SLinus Torvalds static int dirty_one_transaction(struct super_block *s, 17241da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 17251da177e4SLinus Torvalds { 17261da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 17271da177e4SLinus Torvalds struct reiserfs_journal_list *pjl; 17281da177e4SLinus Torvalds int ret = 0; 17291da177e4SLinus Torvalds 17301da177e4SLinus Torvalds jl->j_state |= LIST_DIRTY; 17311da177e4SLinus Torvalds cn = jl->j_realblock; 17321da177e4SLinus Torvalds while (cn) { 17331da177e4SLinus Torvalds /* look for a more recent transaction that logged this 17341da177e4SLinus Torvalds ** buffer. Only the most recent transaction with a buffer in 17351da177e4SLinus Torvalds ** it is allowed to send that buffer to disk 17361da177e4SLinus Torvalds */ 17371da177e4SLinus Torvalds pjl = find_newer_jl_for_cn(cn); 1738bd4c625cSLinus Torvalds if (!pjl && cn->blocknr && cn->bh 1739bd4c625cSLinus Torvalds && buffer_journal_dirty(cn->bh)) { 17401da177e4SLinus Torvalds BUG_ON(!can_dirty(cn)); 17411da177e4SLinus Torvalds /* if the buffer is prepared, it will either be logged 17421da177e4SLinus Torvalds * or restored. If restored, we need to make sure 17431da177e4SLinus Torvalds * it actually gets marked dirty 17441da177e4SLinus Torvalds */ 17451da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 17461da177e4SLinus Torvalds if (buffer_journal_prepared(cn->bh)) { 17471da177e4SLinus Torvalds set_buffer_journal_restore_dirty(cn->bh); 17481da177e4SLinus Torvalds } else { 17491da177e4SLinus Torvalds set_buffer_journal_test(cn->bh); 17501da177e4SLinus Torvalds mark_buffer_dirty(cn->bh); 17511da177e4SLinus Torvalds } 17521da177e4SLinus Torvalds } 17531da177e4SLinus Torvalds cn = cn->next; 17541da177e4SLinus Torvalds } 17551da177e4SLinus Torvalds return ret; 17561da177e4SLinus Torvalds } 17571da177e4SLinus Torvalds 17581da177e4SLinus Torvalds static int kupdate_transactions(struct super_block *s, 17591da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 17601da177e4SLinus Torvalds struct reiserfs_journal_list **next_jl, 1761600ed416SJeff Mahoney unsigned int *next_trans_id, 1762bd4c625cSLinus Torvalds int num_blocks, int num_trans) 1763bd4c625cSLinus Torvalds { 17641da177e4SLinus Torvalds int ret = 0; 17651da177e4SLinus Torvalds int written = 0; 17661da177e4SLinus Torvalds int transactions_flushed = 0; 1767600ed416SJeff Mahoney unsigned int orig_trans_id = jl->j_trans_id; 17681da177e4SLinus Torvalds struct buffer_chunk chunk; 17691da177e4SLinus Torvalds struct list_head *entry; 17701da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 17711da177e4SLinus Torvalds chunk.nr = 0; 17721da177e4SLinus Torvalds 1773afe70259SJeff Mahoney mutex_lock(&journal->j_flush_mutex); 17741da177e4SLinus Torvalds if (!journal_list_still_alive(s, orig_trans_id)) { 17751da177e4SLinus Torvalds goto done; 17761da177e4SLinus Torvalds } 17771da177e4SLinus Torvalds 1778afe70259SJeff Mahoney /* we've got j_flush_mutex held, nobody is going to delete any 17791da177e4SLinus Torvalds * of these lists out from underneath us 17801da177e4SLinus Torvalds */ 17811da177e4SLinus Torvalds while ((num_trans && transactions_flushed < num_trans) || 17821da177e4SLinus Torvalds (!num_trans && written < num_blocks)) { 17831da177e4SLinus Torvalds 17841da177e4SLinus Torvalds if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1785bd4c625cSLinus Torvalds atomic_read(&jl->j_commit_left) 1786bd4c625cSLinus Torvalds || !(jl->j_state & LIST_DIRTY)) { 17871da177e4SLinus Torvalds del_from_work_list(s, jl); 17881da177e4SLinus Torvalds break; 17891da177e4SLinus Torvalds } 17901da177e4SLinus Torvalds ret = write_one_transaction(s, jl, &chunk); 17911da177e4SLinus Torvalds 17921da177e4SLinus Torvalds if (ret < 0) 17931da177e4SLinus Torvalds goto done; 17941da177e4SLinus Torvalds transactions_flushed++; 17951da177e4SLinus Torvalds written += ret; 17961da177e4SLinus Torvalds entry = jl->j_list.next; 17971da177e4SLinus Torvalds 17981da177e4SLinus Torvalds /* did we wrap? */ 17991da177e4SLinus Torvalds if (entry == &journal->j_journal_list) { 18001da177e4SLinus Torvalds break; 18011da177e4SLinus Torvalds } 18021da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 18031da177e4SLinus Torvalds 18041da177e4SLinus Torvalds /* don't bother with older transactions */ 18051da177e4SLinus Torvalds if (jl->j_trans_id <= orig_trans_id) 18061da177e4SLinus Torvalds break; 18071da177e4SLinus Torvalds } 18081da177e4SLinus Torvalds if (chunk.nr) { 18091da177e4SLinus Torvalds write_chunk(&chunk); 18101da177e4SLinus Torvalds } 18111da177e4SLinus Torvalds 18121da177e4SLinus Torvalds done: 1813afe70259SJeff Mahoney mutex_unlock(&journal->j_flush_mutex); 18141da177e4SLinus Torvalds return ret; 18151da177e4SLinus Torvalds } 18161da177e4SLinus Torvalds 18171da177e4SLinus Torvalds /* for o_sync and fsync heavy applications, they tend to use 18181da177e4SLinus Torvalds ** all the journa list slots with tiny transactions. These 18191da177e4SLinus Torvalds ** trigger lots and lots of calls to update the header block, which 18201da177e4SLinus Torvalds ** adds seeks and slows things down. 18211da177e4SLinus Torvalds ** 18221da177e4SLinus Torvalds ** This function tries to clear out a large chunk of the journal lists 18231da177e4SLinus Torvalds ** at once, which makes everything faster since only the newest journal 18241da177e4SLinus Torvalds ** list updates the header block 18251da177e4SLinus Torvalds */ 18261da177e4SLinus Torvalds static int flush_used_journal_lists(struct super_block *s, 1827bd4c625cSLinus Torvalds struct reiserfs_journal_list *jl) 1828bd4c625cSLinus Torvalds { 18291da177e4SLinus Torvalds unsigned long len = 0; 18301da177e4SLinus Torvalds unsigned long cur_len; 18311da177e4SLinus Torvalds int ret; 18321da177e4SLinus Torvalds int i; 18331da177e4SLinus Torvalds int limit = 256; 18341da177e4SLinus Torvalds struct reiserfs_journal_list *tjl; 18351da177e4SLinus Torvalds struct reiserfs_journal_list *flush_jl; 1836600ed416SJeff Mahoney unsigned int trans_id; 18371da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 18381da177e4SLinus Torvalds 18391da177e4SLinus Torvalds flush_jl = tjl = jl; 18401da177e4SLinus Torvalds 18411da177e4SLinus Torvalds /* in data logging mode, try harder to flush a lot of blocks */ 18421da177e4SLinus Torvalds if (reiserfs_data_log(s)) 18431da177e4SLinus Torvalds limit = 1024; 18441da177e4SLinus Torvalds /* flush for 256 transactions or limit blocks, whichever comes first */ 18451da177e4SLinus Torvalds for (i = 0; i < 256 && len < limit; i++) { 18461da177e4SLinus Torvalds if (atomic_read(&tjl->j_commit_left) || 18471da177e4SLinus Torvalds tjl->j_trans_id < jl->j_trans_id) { 18481da177e4SLinus Torvalds break; 18491da177e4SLinus Torvalds } 18501da177e4SLinus Torvalds cur_len = atomic_read(&tjl->j_nonzerolen); 18511da177e4SLinus Torvalds if (cur_len > 0) { 18521da177e4SLinus Torvalds tjl->j_state &= ~LIST_TOUCHED; 18531da177e4SLinus Torvalds } 18541da177e4SLinus Torvalds len += cur_len; 18551da177e4SLinus Torvalds flush_jl = tjl; 18561da177e4SLinus Torvalds if (tjl->j_list.next == &journal->j_journal_list) 18571da177e4SLinus Torvalds break; 18581da177e4SLinus Torvalds tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 18591da177e4SLinus Torvalds } 18601da177e4SLinus Torvalds /* try to find a group of blocks we can flush across all the 18611da177e4SLinus Torvalds ** transactions, but only bother if we've actually spanned 18621da177e4SLinus Torvalds ** across multiple lists 18631da177e4SLinus Torvalds */ 18641da177e4SLinus Torvalds if (flush_jl != jl) { 18651da177e4SLinus Torvalds ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 18661da177e4SLinus Torvalds } 18671da177e4SLinus Torvalds flush_journal_list(s, flush_jl, 1); 18681da177e4SLinus Torvalds return 0; 18691da177e4SLinus Torvalds } 18701da177e4SLinus Torvalds 18711da177e4SLinus Torvalds /* 18721da177e4SLinus Torvalds ** removes any nodes in table with name block and dev as bh. 18731da177e4SLinus Torvalds ** only touchs the hnext and hprev pointers. 18741da177e4SLinus Torvalds */ 18751da177e4SLinus Torvalds void remove_journal_hash(struct super_block *sb, 18761da177e4SLinus Torvalds struct reiserfs_journal_cnode **table, 18771da177e4SLinus Torvalds struct reiserfs_journal_list *jl, 18781da177e4SLinus Torvalds unsigned long block, int remove_freed) 18791da177e4SLinus Torvalds { 18801da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur; 18811da177e4SLinus Torvalds struct reiserfs_journal_cnode **head; 18821da177e4SLinus Torvalds 18831da177e4SLinus Torvalds head = &(journal_hash(table, sb, block)); 18841da177e4SLinus Torvalds if (!head) { 18851da177e4SLinus Torvalds return; 18861da177e4SLinus Torvalds } 18871da177e4SLinus Torvalds cur = *head; 18881da177e4SLinus Torvalds while (cur) { 1889bd4c625cSLinus Torvalds if (cur->blocknr == block && cur->sb == sb 1890bd4c625cSLinus Torvalds && (jl == NULL || jl == cur->jlist) 1891bd4c625cSLinus Torvalds && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 18921da177e4SLinus Torvalds if (cur->hnext) { 18931da177e4SLinus Torvalds cur->hnext->hprev = cur->hprev; 18941da177e4SLinus Torvalds } 18951da177e4SLinus Torvalds if (cur->hprev) { 18961da177e4SLinus Torvalds cur->hprev->hnext = cur->hnext; 18971da177e4SLinus Torvalds } else { 18981da177e4SLinus Torvalds *head = cur->hnext; 18991da177e4SLinus Torvalds } 19001da177e4SLinus Torvalds cur->blocknr = 0; 19011da177e4SLinus Torvalds cur->sb = NULL; 19021da177e4SLinus Torvalds cur->state = 0; 19031da177e4SLinus Torvalds if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 19041da177e4SLinus Torvalds atomic_dec(&(cur->jlist->j_nonzerolen)); 19051da177e4SLinus Torvalds cur->bh = NULL; 19061da177e4SLinus Torvalds cur->jlist = NULL; 19071da177e4SLinus Torvalds } 19081da177e4SLinus Torvalds cur = cur->hnext; 19091da177e4SLinus Torvalds } 19101da177e4SLinus Torvalds } 19111da177e4SLinus Torvalds 1912bd4c625cSLinus Torvalds static void free_journal_ram(struct super_block *p_s_sb) 1913bd4c625cSLinus Torvalds { 19141da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1915d739b42bSPekka Enberg kfree(journal->j_current_jl); 19161da177e4SLinus Torvalds journal->j_num_lists--; 19171da177e4SLinus Torvalds 19181da177e4SLinus Torvalds vfree(journal->j_cnode_free_orig); 19191da177e4SLinus Torvalds free_list_bitmaps(p_s_sb, journal->j_list_bitmap); 19201da177e4SLinus Torvalds free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ 19211da177e4SLinus Torvalds if (journal->j_header_bh) { 19221da177e4SLinus Torvalds brelse(journal->j_header_bh); 19231da177e4SLinus Torvalds } 19241da177e4SLinus Torvalds /* j_header_bh is on the journal dev, make sure not to release the journal 19251da177e4SLinus Torvalds * dev until we brelse j_header_bh 19261da177e4SLinus Torvalds */ 19271da177e4SLinus Torvalds release_journal_dev(p_s_sb, journal); 19281da177e4SLinus Torvalds vfree(journal); 19291da177e4SLinus Torvalds } 19301da177e4SLinus Torvalds 19311da177e4SLinus Torvalds /* 19321da177e4SLinus Torvalds ** call on unmount. Only set error to 1 if you haven't made your way out 19331da177e4SLinus Torvalds ** of read_super() yet. Any other caller must keep error at 0. 19341da177e4SLinus Torvalds */ 1935bd4c625cSLinus Torvalds static int do_journal_release(struct reiserfs_transaction_handle *th, 1936bd4c625cSLinus Torvalds struct super_block *p_s_sb, int error) 1937bd4c625cSLinus Torvalds { 19381da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 19391da177e4SLinus Torvalds int flushed = 0; 19401da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 19411da177e4SLinus Torvalds 19421da177e4SLinus Torvalds /* we only want to flush out transactions if we were called with error == 0 19431da177e4SLinus Torvalds */ 19441da177e4SLinus Torvalds if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { 19451da177e4SLinus Torvalds /* end the current trans */ 19461da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 19471da177e4SLinus Torvalds do_journal_end(th, p_s_sb, 10, FLUSH_ALL); 19481da177e4SLinus Torvalds 19491da177e4SLinus Torvalds /* make sure something gets logged to force our way into the flush code */ 19501da177e4SLinus Torvalds if (!journal_join(&myth, p_s_sb, 1)) { 1951bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(p_s_sb, 1952bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb), 1953bd4c625cSLinus Torvalds 1); 1954bd4c625cSLinus Torvalds journal_mark_dirty(&myth, p_s_sb, 1955bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb)); 19561da177e4SLinus Torvalds do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 19571da177e4SLinus Torvalds flushed = 1; 19581da177e4SLinus Torvalds } 19591da177e4SLinus Torvalds } 19601da177e4SLinus Torvalds 19611da177e4SLinus Torvalds /* this also catches errors during the do_journal_end above */ 19621da177e4SLinus Torvalds if (!error && reiserfs_is_journal_aborted(journal)) { 19631da177e4SLinus Torvalds memset(&myth, 0, sizeof(myth)); 19641da177e4SLinus Torvalds if (!journal_join_abort(&myth, p_s_sb, 1)) { 1965bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(p_s_sb, 1966bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb), 1967bd4c625cSLinus Torvalds 1); 1968bd4c625cSLinus Torvalds journal_mark_dirty(&myth, p_s_sb, 1969bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb)); 19701da177e4SLinus Torvalds do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 19711da177e4SLinus Torvalds } 19721da177e4SLinus Torvalds } 19731da177e4SLinus Torvalds 19741da177e4SLinus Torvalds reiserfs_mounted_fs_count--; 19751da177e4SLinus Torvalds /* wait for all commits to finish */ 19761da177e4SLinus Torvalds cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); 19771da177e4SLinus Torvalds flush_workqueue(commit_wq); 19781da177e4SLinus Torvalds if (!reiserfs_mounted_fs_count) { 19791da177e4SLinus Torvalds destroy_workqueue(commit_wq); 19801da177e4SLinus Torvalds commit_wq = NULL; 19811da177e4SLinus Torvalds } 19821da177e4SLinus Torvalds 19831da177e4SLinus Torvalds free_journal_ram(p_s_sb); 19841da177e4SLinus Torvalds 19851da177e4SLinus Torvalds return 0; 19861da177e4SLinus Torvalds } 19871da177e4SLinus Torvalds 19881da177e4SLinus Torvalds /* 19891da177e4SLinus Torvalds ** call on unmount. flush all journal trans, release all alloc'd ram 19901da177e4SLinus Torvalds */ 1991bd4c625cSLinus Torvalds int journal_release(struct reiserfs_transaction_handle *th, 1992bd4c625cSLinus Torvalds struct super_block *p_s_sb) 1993bd4c625cSLinus Torvalds { 19941da177e4SLinus Torvalds return do_journal_release(th, p_s_sb, 0); 19951da177e4SLinus Torvalds } 1996bd4c625cSLinus Torvalds 19971da177e4SLinus Torvalds /* 19981da177e4SLinus Torvalds ** only call from an error condition inside reiserfs_read_super! 19991da177e4SLinus Torvalds */ 2000bd4c625cSLinus Torvalds int journal_release_error(struct reiserfs_transaction_handle *th, 2001bd4c625cSLinus Torvalds struct super_block *p_s_sb) 2002bd4c625cSLinus Torvalds { 20031da177e4SLinus Torvalds return do_journal_release(th, p_s_sb, 1); 20041da177e4SLinus Torvalds } 20051da177e4SLinus Torvalds 20061da177e4SLinus Torvalds /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 2007bd4c625cSLinus Torvalds static int journal_compare_desc_commit(struct super_block *p_s_sb, 2008bd4c625cSLinus Torvalds struct reiserfs_journal_desc *desc, 2009bd4c625cSLinus Torvalds struct reiserfs_journal_commit *commit) 2010bd4c625cSLinus Torvalds { 20111da177e4SLinus Torvalds if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || 20121da177e4SLinus Torvalds get_commit_trans_len(commit) != get_desc_trans_len(desc) || 20131da177e4SLinus Torvalds get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || 2014bd4c625cSLinus Torvalds get_commit_trans_len(commit) <= 0) { 20151da177e4SLinus Torvalds return 1; 20161da177e4SLinus Torvalds } 20171da177e4SLinus Torvalds return 0; 20181da177e4SLinus Torvalds } 2019bd4c625cSLinus Torvalds 20201da177e4SLinus Torvalds /* returns 0 if it did not find a description block 20211da177e4SLinus Torvalds ** returns -1 if it found a corrupt commit block 20221da177e4SLinus Torvalds ** returns 1 if both desc and commit were valid 20231da177e4SLinus Torvalds */ 2024bd4c625cSLinus Torvalds static int journal_transaction_is_valid(struct super_block *p_s_sb, 2025bd4c625cSLinus Torvalds struct buffer_head *d_bh, 2026600ed416SJeff Mahoney unsigned int *oldest_invalid_trans_id, 2027bd4c625cSLinus Torvalds unsigned long *newest_mount_id) 2028bd4c625cSLinus Torvalds { 20291da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 20301da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 20311da177e4SLinus Torvalds struct buffer_head *c_bh; 20321da177e4SLinus Torvalds unsigned long offset; 20331da177e4SLinus Torvalds 20341da177e4SLinus Torvalds if (!d_bh) 20351da177e4SLinus Torvalds return 0; 20361da177e4SLinus Torvalds 20371da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2038bd4c625cSLinus Torvalds if (get_desc_trans_len(desc) > 0 2039bd4c625cSLinus Torvalds && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { 2040bd4c625cSLinus Torvalds if (oldest_invalid_trans_id && *oldest_invalid_trans_id 2041bd4c625cSLinus Torvalds && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 2042bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2043bd4c625cSLinus Torvalds "journal-986: transaction " 20441da177e4SLinus Torvalds "is valid returning because trans_id %d is greater than " 2045bd4c625cSLinus Torvalds "oldest_invalid %lu", 2046bd4c625cSLinus Torvalds get_desc_trans_id(desc), 20471da177e4SLinus Torvalds *oldest_invalid_trans_id); 20481da177e4SLinus Torvalds return 0; 20491da177e4SLinus Torvalds } 2050bd4c625cSLinus Torvalds if (newest_mount_id 2051bd4c625cSLinus Torvalds && *newest_mount_id > get_desc_mount_id(desc)) { 2052bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2053bd4c625cSLinus Torvalds "journal-1087: transaction " 20541da177e4SLinus Torvalds "is valid returning because mount_id %d is less than " 2055bd4c625cSLinus Torvalds "newest_mount_id %lu", 2056bd4c625cSLinus Torvalds get_desc_mount_id(desc), 20571da177e4SLinus Torvalds *newest_mount_id); 20581da177e4SLinus Torvalds return -1; 20591da177e4SLinus Torvalds } 20601da177e4SLinus Torvalds if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { 206145b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-2018", 206245b03d5eSJeff Mahoney "Bad transaction length %d " 206345b03d5eSJeff Mahoney "encountered, ignoring transaction", 2064bd4c625cSLinus Torvalds get_desc_trans_len(desc)); 20651da177e4SLinus Torvalds return -1; 20661da177e4SLinus Torvalds } 20671da177e4SLinus Torvalds offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 20681da177e4SLinus Torvalds 20691da177e4SLinus Torvalds /* ok, we have a journal description block, lets see if the transaction was valid */ 2070bd4c625cSLinus Torvalds c_bh = 2071bd4c625cSLinus Torvalds journal_bread(p_s_sb, 2072bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2073bd4c625cSLinus Torvalds ((offset + get_desc_trans_len(desc) + 2074bd4c625cSLinus Torvalds 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 20751da177e4SLinus Torvalds if (!c_bh) 20761da177e4SLinus Torvalds return 0; 20771da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 20781da177e4SLinus Torvalds if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 20791da177e4SLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 20801da177e4SLinus Torvalds "journal_transaction_is_valid, commit offset %ld had bad " 20811da177e4SLinus Torvalds "time %d or length %d", 2082bd4c625cSLinus Torvalds c_bh->b_blocknr - 2083bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 20841da177e4SLinus Torvalds get_commit_trans_id(commit), 20851da177e4SLinus Torvalds get_commit_trans_len(commit)); 20861da177e4SLinus Torvalds brelse(c_bh); 20871da177e4SLinus Torvalds if (oldest_invalid_trans_id) { 2088bd4c625cSLinus Torvalds *oldest_invalid_trans_id = 2089bd4c625cSLinus Torvalds get_desc_trans_id(desc); 2090bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2091bd4c625cSLinus Torvalds "journal-1004: " 20921da177e4SLinus Torvalds "transaction_is_valid setting oldest invalid trans_id " 2093bd4c625cSLinus Torvalds "to %d", 2094bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 20951da177e4SLinus Torvalds } 20961da177e4SLinus Torvalds return -1; 20971da177e4SLinus Torvalds } 20981da177e4SLinus Torvalds brelse(c_bh); 2099bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2100bd4c625cSLinus Torvalds "journal-1006: found valid " 21011da177e4SLinus Torvalds "transaction start offset %llu, len %d id %d", 2102bd4c625cSLinus Torvalds d_bh->b_blocknr - 2103bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2104bd4c625cSLinus Torvalds get_desc_trans_len(desc), 2105bd4c625cSLinus Torvalds get_desc_trans_id(desc)); 21061da177e4SLinus Torvalds return 1; 21071da177e4SLinus Torvalds } else { 21081da177e4SLinus Torvalds return 0; 21091da177e4SLinus Torvalds } 21101da177e4SLinus Torvalds } 21111da177e4SLinus Torvalds 2112bd4c625cSLinus Torvalds static void brelse_array(struct buffer_head **heads, int num) 2113bd4c625cSLinus Torvalds { 21141da177e4SLinus Torvalds int i; 21151da177e4SLinus Torvalds for (i = 0; i < num; i++) { 21161da177e4SLinus Torvalds brelse(heads[i]); 21171da177e4SLinus Torvalds } 21181da177e4SLinus Torvalds } 21191da177e4SLinus Torvalds 21201da177e4SLinus Torvalds /* 21211da177e4SLinus Torvalds ** given the start, and values for the oldest acceptable transactions, 21221da177e4SLinus Torvalds ** this either reads in a replays a transaction, or returns because the transaction 21231da177e4SLinus Torvalds ** is invalid, or too old. 21241da177e4SLinus Torvalds */ 2125bd4c625cSLinus Torvalds static int journal_read_transaction(struct super_block *p_s_sb, 2126bd4c625cSLinus Torvalds unsigned long cur_dblock, 2127bd4c625cSLinus Torvalds unsigned long oldest_start, 2128600ed416SJeff Mahoney unsigned int oldest_trans_id, 2129bd4c625cSLinus Torvalds unsigned long newest_mount_id) 2130bd4c625cSLinus Torvalds { 21311da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 21321da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 21331da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 2134600ed416SJeff Mahoney unsigned int trans_id = 0; 21351da177e4SLinus Torvalds struct buffer_head *c_bh; 21361da177e4SLinus Torvalds struct buffer_head *d_bh; 21371da177e4SLinus Torvalds struct buffer_head **log_blocks = NULL; 21381da177e4SLinus Torvalds struct buffer_head **real_blocks = NULL; 2139600ed416SJeff Mahoney unsigned int trans_offset; 21401da177e4SLinus Torvalds int i; 21411da177e4SLinus Torvalds int trans_half; 21421da177e4SLinus Torvalds 21431da177e4SLinus Torvalds d_bh = journal_bread(p_s_sb, cur_dblock); 21441da177e4SLinus Torvalds if (!d_bh) 21451da177e4SLinus Torvalds return 1; 21461da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 21471da177e4SLinus Torvalds trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 21481da177e4SLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " 21491da177e4SLinus Torvalds "journal_read_transaction, offset %llu, len %d mount_id %d", 21501da177e4SLinus Torvalds d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 21511da177e4SLinus Torvalds get_desc_trans_len(desc), get_desc_mount_id(desc)); 21521da177e4SLinus Torvalds if (get_desc_trans_id(desc) < oldest_trans_id) { 21531da177e4SLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " 21541da177e4SLinus Torvalds "journal_read_trans skipping because %lu is too old", 2155bd4c625cSLinus Torvalds cur_dblock - 2156bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 21571da177e4SLinus Torvalds brelse(d_bh); 21581da177e4SLinus Torvalds return 1; 21591da177e4SLinus Torvalds } 21601da177e4SLinus Torvalds if (get_desc_mount_id(desc) != newest_mount_id) { 21611da177e4SLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " 21621da177e4SLinus Torvalds "journal_read_trans skipping because %d is != " 21631da177e4SLinus Torvalds "newest_mount_id %lu", get_desc_mount_id(desc), 21641da177e4SLinus Torvalds newest_mount_id); 21651da177e4SLinus Torvalds brelse(d_bh); 21661da177e4SLinus Torvalds return 1; 21671da177e4SLinus Torvalds } 21681da177e4SLinus Torvalds c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 21691da177e4SLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 1) % 21701da177e4SLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 21711da177e4SLinus Torvalds if (!c_bh) { 21721da177e4SLinus Torvalds brelse(d_bh); 21731da177e4SLinus Torvalds return 1; 21741da177e4SLinus Torvalds } 21751da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 21761da177e4SLinus Torvalds if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2177bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2178bd4c625cSLinus Torvalds "journal_read_transaction, " 21791da177e4SLinus Torvalds "commit offset %llu had bad time %d or length %d", 2180bd4c625cSLinus Torvalds c_bh->b_blocknr - 2181bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2182bd4c625cSLinus Torvalds get_commit_trans_id(commit), 2183bd4c625cSLinus Torvalds get_commit_trans_len(commit)); 21841da177e4SLinus Torvalds brelse(c_bh); 21851da177e4SLinus Torvalds brelse(d_bh); 21861da177e4SLinus Torvalds return 1; 21871da177e4SLinus Torvalds } 21881da177e4SLinus Torvalds trans_id = get_desc_trans_id(desc); 21891da177e4SLinus Torvalds /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2190d739b42bSPekka Enberg log_blocks = kmalloc(get_desc_trans_len(desc) * 2191d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 2192d739b42bSPekka Enberg real_blocks = kmalloc(get_desc_trans_len(desc) * 2193d739b42bSPekka Enberg sizeof(struct buffer_head *), GFP_NOFS); 21941da177e4SLinus Torvalds if (!log_blocks || !real_blocks) { 21951da177e4SLinus Torvalds brelse(c_bh); 21961da177e4SLinus Torvalds brelse(d_bh); 2197d739b42bSPekka Enberg kfree(log_blocks); 2198d739b42bSPekka Enberg kfree(real_blocks); 219945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1169", 220045b03d5eSJeff Mahoney "kmalloc failed, unable to mount FS"); 22011da177e4SLinus Torvalds return -1; 22021da177e4SLinus Torvalds } 22031da177e4SLinus Torvalds /* get all the buffer heads */ 22041da177e4SLinus Torvalds trans_half = journal_trans_half(p_s_sb->s_blocksize); 22051da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 2206bd4c625cSLinus Torvalds log_blocks[i] = 2207bd4c625cSLinus Torvalds journal_getblk(p_s_sb, 2208bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2209bd4c625cSLinus Torvalds (trans_offset + 1 + 2210bd4c625cSLinus Torvalds i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 22111da177e4SLinus Torvalds if (i < trans_half) { 2212bd4c625cSLinus Torvalds real_blocks[i] = 2213bd4c625cSLinus Torvalds sb_getblk(p_s_sb, 2214bd4c625cSLinus Torvalds le32_to_cpu(desc->j_realblock[i])); 22151da177e4SLinus Torvalds } else { 2216bd4c625cSLinus Torvalds real_blocks[i] = 2217bd4c625cSLinus Torvalds sb_getblk(p_s_sb, 2218bd4c625cSLinus Torvalds le32_to_cpu(commit-> 2219bd4c625cSLinus Torvalds j_realblock[i - trans_half])); 22201da177e4SLinus Torvalds } 22211da177e4SLinus Torvalds if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { 222245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1207", 222345b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 222445b03d5eSJeff Mahoney "Block to replay is outside of " 222545b03d5eSJeff Mahoney "filesystem"); 22261da177e4SLinus Torvalds goto abort_replay; 22271da177e4SLinus Torvalds } 22281da177e4SLinus Torvalds /* make sure we don't try to replay onto log or reserved area */ 2229bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 2230bd4c625cSLinus Torvalds (p_s_sb, real_blocks[i]->b_blocknr)) { 223145b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1204", 223245b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 223345b03d5eSJeff Mahoney "Trying to replay onto a log block"); 22341da177e4SLinus Torvalds abort_replay: 22351da177e4SLinus Torvalds brelse_array(log_blocks, i); 22361da177e4SLinus Torvalds brelse_array(real_blocks, i); 22371da177e4SLinus Torvalds brelse(c_bh); 22381da177e4SLinus Torvalds brelse(d_bh); 2239d739b42bSPekka Enberg kfree(log_blocks); 2240d739b42bSPekka Enberg kfree(real_blocks); 22411da177e4SLinus Torvalds return -1; 22421da177e4SLinus Torvalds } 22431da177e4SLinus Torvalds } 22441da177e4SLinus Torvalds /* read in the log blocks, memcpy to the corresponding real block */ 22451da177e4SLinus Torvalds ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 22461da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 22471da177e4SLinus Torvalds wait_on_buffer(log_blocks[i]); 22481da177e4SLinus Torvalds if (!buffer_uptodate(log_blocks[i])) { 224945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1212", 225045b03d5eSJeff Mahoney "REPLAY FAILURE fsck required! " 225145b03d5eSJeff Mahoney "buffer write failed"); 2252bd4c625cSLinus Torvalds brelse_array(log_blocks + i, 2253bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 22541da177e4SLinus Torvalds brelse_array(real_blocks, get_desc_trans_len(desc)); 22551da177e4SLinus Torvalds brelse(c_bh); 22561da177e4SLinus Torvalds brelse(d_bh); 2257d739b42bSPekka Enberg kfree(log_blocks); 2258d739b42bSPekka Enberg kfree(real_blocks); 22591da177e4SLinus Torvalds return -1; 22601da177e4SLinus Torvalds } 2261bd4c625cSLinus Torvalds memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, 2262bd4c625cSLinus Torvalds real_blocks[i]->b_size); 22631da177e4SLinus Torvalds set_buffer_uptodate(real_blocks[i]); 22641da177e4SLinus Torvalds brelse(log_blocks[i]); 22651da177e4SLinus Torvalds } 22661da177e4SLinus Torvalds /* flush out the real blocks */ 22671da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 22681da177e4SLinus Torvalds set_buffer_dirty(real_blocks[i]); 226953778ffdSJan Kara ll_rw_block(SWRITE, 1, real_blocks + i); 22701da177e4SLinus Torvalds } 22711da177e4SLinus Torvalds for (i = 0; i < get_desc_trans_len(desc); i++) { 22721da177e4SLinus Torvalds wait_on_buffer(real_blocks[i]); 22731da177e4SLinus Torvalds if (!buffer_uptodate(real_blocks[i])) { 227445b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1226", 227545b03d5eSJeff Mahoney "REPLAY FAILURE, fsck required! " 227645b03d5eSJeff Mahoney "buffer write failed"); 2277bd4c625cSLinus Torvalds brelse_array(real_blocks + i, 2278bd4c625cSLinus Torvalds get_desc_trans_len(desc) - i); 22791da177e4SLinus Torvalds brelse(c_bh); 22801da177e4SLinus Torvalds brelse(d_bh); 2281d739b42bSPekka Enberg kfree(log_blocks); 2282d739b42bSPekka Enberg kfree(real_blocks); 22831da177e4SLinus Torvalds return -1; 22841da177e4SLinus Torvalds } 22851da177e4SLinus Torvalds brelse(real_blocks[i]); 22861da177e4SLinus Torvalds } 2287bd4c625cSLinus Torvalds cur_dblock = 2288bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2289bd4c625cSLinus Torvalds ((trans_offset + get_desc_trans_len(desc) + 2290bd4c625cSLinus Torvalds 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2291bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2292bd4c625cSLinus Torvalds "journal-1095: setting journal " "start to offset %ld", 22931da177e4SLinus Torvalds cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 22941da177e4SLinus Torvalds 22951da177e4SLinus Torvalds /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 22961da177e4SLinus Torvalds journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 22971da177e4SLinus Torvalds journal->j_last_flush_trans_id = trans_id; 22981da177e4SLinus Torvalds journal->j_trans_id = trans_id + 1; 2299a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2300a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2301a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 23021da177e4SLinus Torvalds brelse(c_bh); 23031da177e4SLinus Torvalds brelse(d_bh); 2304d739b42bSPekka Enberg kfree(log_blocks); 2305d739b42bSPekka Enberg kfree(real_blocks); 23061da177e4SLinus Torvalds return 0; 23071da177e4SLinus Torvalds } 23081da177e4SLinus Torvalds 23091da177e4SLinus Torvalds /* This function reads blocks starting from block and to max_block of bufsize 23101da177e4SLinus Torvalds size (but no more than BUFNR blocks at a time). This proved to improve 23111da177e4SLinus Torvalds mounting speed on self-rebuilding raid5 arrays at least. 23121da177e4SLinus Torvalds Right now it is only used from journal code. But later we might use it 23131da177e4SLinus Torvalds from other places. 23141da177e4SLinus Torvalds Note: Do not use journal_getblk/sb_getblk functions here! */ 23153ee16670SJeff Mahoney static struct buffer_head *reiserfs_breada(struct block_device *dev, 23163ee16670SJeff Mahoney b_blocknr_t block, int bufsize, 23173ee16670SJeff Mahoney b_blocknr_t max_block) 23181da177e4SLinus Torvalds { 23191da177e4SLinus Torvalds struct buffer_head *bhlist[BUFNR]; 23201da177e4SLinus Torvalds unsigned int blocks = BUFNR; 23211da177e4SLinus Torvalds struct buffer_head *bh; 23221da177e4SLinus Torvalds int i, j; 23231da177e4SLinus Torvalds 23241da177e4SLinus Torvalds bh = __getblk(dev, block, bufsize); 23251da177e4SLinus Torvalds if (buffer_uptodate(bh)) 23261da177e4SLinus Torvalds return (bh); 23271da177e4SLinus Torvalds 23281da177e4SLinus Torvalds if (block + BUFNR > max_block) { 23291da177e4SLinus Torvalds blocks = max_block - block; 23301da177e4SLinus Torvalds } 23311da177e4SLinus Torvalds bhlist[0] = bh; 23321da177e4SLinus Torvalds j = 1; 23331da177e4SLinus Torvalds for (i = 1; i < blocks; i++) { 23341da177e4SLinus Torvalds bh = __getblk(dev, block + i, bufsize); 23351da177e4SLinus Torvalds if (buffer_uptodate(bh)) { 23361da177e4SLinus Torvalds brelse(bh); 23371da177e4SLinus Torvalds break; 2338bd4c625cSLinus Torvalds } else 2339bd4c625cSLinus Torvalds bhlist[j++] = bh; 23401da177e4SLinus Torvalds } 23411da177e4SLinus Torvalds ll_rw_block(READ, j, bhlist); 23421da177e4SLinus Torvalds for (i = 1; i < j; i++) 23431da177e4SLinus Torvalds brelse(bhlist[i]); 23441da177e4SLinus Torvalds bh = bhlist[0]; 23451da177e4SLinus Torvalds wait_on_buffer(bh); 23461da177e4SLinus Torvalds if (buffer_uptodate(bh)) 23471da177e4SLinus Torvalds return bh; 23481da177e4SLinus Torvalds brelse(bh); 23491da177e4SLinus Torvalds return NULL; 23501da177e4SLinus Torvalds } 23511da177e4SLinus Torvalds 23521da177e4SLinus Torvalds /* 23531da177e4SLinus Torvalds ** read and replay the log 23541da177e4SLinus Torvalds ** on a clean unmount, the journal header's next unflushed pointer will be to an invalid 23551da177e4SLinus Torvalds ** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. 23561da177e4SLinus Torvalds ** 23571da177e4SLinus Torvalds ** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. 23581da177e4SLinus Torvalds ** 23591da177e4SLinus Torvalds ** On exit, it sets things up so the first transaction will work correctly. 23601da177e4SLinus Torvalds */ 2361bd4c625cSLinus Torvalds static int journal_read(struct super_block *p_s_sb) 2362bd4c625cSLinus Torvalds { 23631da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 23641da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 2365600ed416SJeff Mahoney unsigned int oldest_trans_id = 0; 2366600ed416SJeff Mahoney unsigned int oldest_invalid_trans_id = 0; 23671da177e4SLinus Torvalds time_t start; 23681da177e4SLinus Torvalds unsigned long oldest_start = 0; 23691da177e4SLinus Torvalds unsigned long cur_dblock = 0; 23701da177e4SLinus Torvalds unsigned long newest_mount_id = 9; 23711da177e4SLinus Torvalds struct buffer_head *d_bh; 23721da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 23731da177e4SLinus Torvalds int valid_journal_header = 0; 23741da177e4SLinus Torvalds int replay_count = 0; 23751da177e4SLinus Torvalds int continue_replay = 1; 23761da177e4SLinus Torvalds int ret; 23771da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 23781da177e4SLinus Torvalds 23791da177e4SLinus Torvalds cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 23801da177e4SLinus Torvalds reiserfs_info(p_s_sb, "checking transaction log (%s)\n", 23811da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b)); 23821da177e4SLinus Torvalds start = get_seconds(); 23831da177e4SLinus Torvalds 23841da177e4SLinus Torvalds /* step 1, read in the journal header block. Check the transaction it says 23851da177e4SLinus Torvalds ** is the first unflushed, and if that transaction is not valid, 23861da177e4SLinus Torvalds ** replay is done 23871da177e4SLinus Torvalds */ 23881da177e4SLinus Torvalds journal->j_header_bh = journal_bread(p_s_sb, 2389bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) 2390bd4c625cSLinus Torvalds + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 23911da177e4SLinus Torvalds if (!journal->j_header_bh) { 23921da177e4SLinus Torvalds return 1; 23931da177e4SLinus Torvalds } 23941da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); 2395c499ec24SVladimir V. Saveliev if (le32_to_cpu(jh->j_first_unflushed_offset) < 2396bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb) 2397bd4c625cSLinus Torvalds && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2398bd4c625cSLinus Torvalds oldest_start = 2399bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 24001da177e4SLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset); 24011da177e4SLinus Torvalds oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 24021da177e4SLinus Torvalds newest_mount_id = le32_to_cpu(jh->j_mount_id); 2403bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2404bd4c625cSLinus Torvalds "journal-1153: found in " 24051da177e4SLinus Torvalds "header: first_unflushed_offset %d, last_flushed_trans_id " 24061da177e4SLinus Torvalds "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 24071da177e4SLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id)); 24081da177e4SLinus Torvalds valid_journal_header = 1; 24091da177e4SLinus Torvalds 24101da177e4SLinus Torvalds /* now, we try to read the first unflushed offset. If it is not valid, 24111da177e4SLinus Torvalds ** there is nothing more we can do, and it makes no sense to read 24121da177e4SLinus Torvalds ** through the whole log. 24131da177e4SLinus Torvalds */ 2414bd4c625cSLinus Torvalds d_bh = 2415bd4c625cSLinus Torvalds journal_bread(p_s_sb, 2416bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2417bd4c625cSLinus Torvalds le32_to_cpu(jh->j_first_unflushed_offset)); 24181da177e4SLinus Torvalds ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); 24191da177e4SLinus Torvalds if (!ret) { 24201da177e4SLinus Torvalds continue_replay = 0; 24211da177e4SLinus Torvalds } 24221da177e4SLinus Torvalds brelse(d_bh); 24231da177e4SLinus Torvalds goto start_log_replay; 24241da177e4SLinus Torvalds } 24251da177e4SLinus Torvalds 24261da177e4SLinus Torvalds if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { 242745b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "clm-2076", 242845b03d5eSJeff Mahoney "device is readonly, unable to replay log"); 24291da177e4SLinus Torvalds return -1; 24301da177e4SLinus Torvalds } 24311da177e4SLinus Torvalds 24321da177e4SLinus Torvalds /* ok, there are transactions that need to be replayed. start with the first log block, find 24331da177e4SLinus Torvalds ** all the valid transactions, and pick out the oldest. 24341da177e4SLinus Torvalds */ 2435bd4c625cSLinus Torvalds while (continue_replay 2436bd4c625cSLinus Torvalds && cur_dblock < 2437bd4c625cSLinus Torvalds (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2438bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { 24391da177e4SLinus Torvalds /* Note that it is required for blocksize of primary fs device and journal 24401da177e4SLinus Torvalds device to be the same */ 2441bd4c625cSLinus Torvalds d_bh = 2442bd4c625cSLinus Torvalds reiserfs_breada(journal->j_dev_bd, cur_dblock, 2443bd4c625cSLinus Torvalds p_s_sb->s_blocksize, 2444bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2445bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2446bd4c625cSLinus Torvalds ret = 2447bd4c625cSLinus Torvalds journal_transaction_is_valid(p_s_sb, d_bh, 2448bd4c625cSLinus Torvalds &oldest_invalid_trans_id, 2449bd4c625cSLinus Torvalds &newest_mount_id); 24501da177e4SLinus Torvalds if (ret == 1) { 24511da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)d_bh->b_data; 24521da177e4SLinus Torvalds if (oldest_start == 0) { /* init all oldest_ values */ 24531da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 24541da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 24551da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2456bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2457bd4c625cSLinus Torvalds "journal-1179: Setting " 24581da177e4SLinus Torvalds "oldest_start to offset %llu, trans_id %lu", 2459bd4c625cSLinus Torvalds oldest_start - 2460bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2461bd4c625cSLinus Torvalds (p_s_sb), oldest_trans_id); 24621da177e4SLinus Torvalds } else if (oldest_trans_id > get_desc_trans_id(desc)) { 24631da177e4SLinus Torvalds /* one we just read was older */ 24641da177e4SLinus Torvalds oldest_trans_id = get_desc_trans_id(desc); 24651da177e4SLinus Torvalds oldest_start = d_bh->b_blocknr; 2466bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2467bd4c625cSLinus Torvalds "journal-1180: Resetting " 24681da177e4SLinus Torvalds "oldest_start to offset %lu, trans_id %lu", 2469bd4c625cSLinus Torvalds oldest_start - 2470bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK 2471bd4c625cSLinus Torvalds (p_s_sb), oldest_trans_id); 24721da177e4SLinus Torvalds } 24731da177e4SLinus Torvalds if (newest_mount_id < get_desc_mount_id(desc)) { 24741da177e4SLinus Torvalds newest_mount_id = get_desc_mount_id(desc); 2475bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2476bd4c625cSLinus Torvalds "journal-1299: Setting " 2477bd4c625cSLinus Torvalds "newest_mount_id to %d", 2478bd4c625cSLinus Torvalds get_desc_mount_id(desc)); 24791da177e4SLinus Torvalds } 24801da177e4SLinus Torvalds cur_dblock += get_desc_trans_len(desc) + 2; 24811da177e4SLinus Torvalds } else { 24821da177e4SLinus Torvalds cur_dblock++; 24831da177e4SLinus Torvalds } 24841da177e4SLinus Torvalds brelse(d_bh); 24851da177e4SLinus Torvalds } 24861da177e4SLinus Torvalds 24871da177e4SLinus Torvalds start_log_replay: 24881da177e4SLinus Torvalds cur_dblock = oldest_start; 24891da177e4SLinus Torvalds if (oldest_trans_id) { 2490bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2491bd4c625cSLinus Torvalds "journal-1206: Starting replay " 24921da177e4SLinus Torvalds "from offset %llu, trans_id %lu", 24931da177e4SLinus Torvalds cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 24941da177e4SLinus Torvalds oldest_trans_id); 24951da177e4SLinus Torvalds 24961da177e4SLinus Torvalds } 24971da177e4SLinus Torvalds replay_count = 0; 24981da177e4SLinus Torvalds while (continue_replay && oldest_trans_id > 0) { 2499bd4c625cSLinus Torvalds ret = 2500bd4c625cSLinus Torvalds journal_read_transaction(p_s_sb, cur_dblock, oldest_start, 2501bd4c625cSLinus Torvalds oldest_trans_id, newest_mount_id); 25021da177e4SLinus Torvalds if (ret < 0) { 25031da177e4SLinus Torvalds return ret; 25041da177e4SLinus Torvalds } else if (ret != 0) { 25051da177e4SLinus Torvalds break; 25061da177e4SLinus Torvalds } 2507bd4c625cSLinus Torvalds cur_dblock = 2508bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; 25091da177e4SLinus Torvalds replay_count++; 25101da177e4SLinus Torvalds if (cur_dblock == oldest_start) 25111da177e4SLinus Torvalds break; 25121da177e4SLinus Torvalds } 25131da177e4SLinus Torvalds 25141da177e4SLinus Torvalds if (oldest_trans_id == 0) { 2515bd4c625cSLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2516bd4c625cSLinus Torvalds "journal-1225: No valid " "transactions found"); 25171da177e4SLinus Torvalds } 25181da177e4SLinus Torvalds /* j_start does not get set correctly if we don't replay any transactions. 25191da177e4SLinus Torvalds ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 25201da177e4SLinus Torvalds ** copy the trans_id from the header 25211da177e4SLinus Torvalds */ 25221da177e4SLinus Torvalds if (valid_journal_header && replay_count == 0) { 25231da177e4SLinus Torvalds journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2524bd4c625cSLinus Torvalds journal->j_trans_id = 2525bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2526a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 2527a44c94a7SAlexander Zarochentsev if (journal->j_trans_id == 0) 2528a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 2529bd4c625cSLinus Torvalds journal->j_last_flush_trans_id = 2530bd4c625cSLinus Torvalds le32_to_cpu(jh->j_last_flush_trans_id); 25311da177e4SLinus Torvalds journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1; 25321da177e4SLinus Torvalds } else { 25331da177e4SLinus Torvalds journal->j_mount_id = newest_mount_id + 1; 25341da177e4SLinus Torvalds } 25351da177e4SLinus Torvalds reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 25361da177e4SLinus Torvalds "newest_mount_id to %lu", journal->j_mount_id); 25371da177e4SLinus Torvalds journal->j_first_unflushed_offset = journal->j_start; 25381da177e4SLinus Torvalds if (replay_count > 0) { 2539bd4c625cSLinus Torvalds reiserfs_info(p_s_sb, 2540bd4c625cSLinus Torvalds "replayed %d transactions in %lu seconds\n", 25411da177e4SLinus Torvalds replay_count, get_seconds() - start); 25421da177e4SLinus Torvalds } 25431da177e4SLinus Torvalds if (!bdev_read_only(p_s_sb->s_bdev) && 25441da177e4SLinus Torvalds _update_journal_header_block(p_s_sb, journal->j_start, 2545bd4c625cSLinus Torvalds journal->j_last_flush_trans_id)) { 25461da177e4SLinus Torvalds /* replay failed, caller must call free_journal_ram and abort 25471da177e4SLinus Torvalds ** the mount 25481da177e4SLinus Torvalds */ 25491da177e4SLinus Torvalds return -1; 25501da177e4SLinus Torvalds } 25511da177e4SLinus Torvalds return 0; 25521da177e4SLinus Torvalds } 25531da177e4SLinus Torvalds 25541da177e4SLinus Torvalds static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 25551da177e4SLinus Torvalds { 25561da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 25578c777cc4SPekka Enberg jl = kzalloc(sizeof(struct reiserfs_journal_list), 25588c777cc4SPekka Enberg GFP_NOFS | __GFP_NOFAIL); 25591da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_list); 25601da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_working_list); 25611da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_tail_bh_list); 25621da177e4SLinus Torvalds INIT_LIST_HEAD(&jl->j_bh_list); 256390415deaSJeff Mahoney mutex_init(&jl->j_commit_mutex); 25641da177e4SLinus Torvalds SB_JOURNAL(s)->j_num_lists++; 25651da177e4SLinus Torvalds get_journal_list(jl); 25661da177e4SLinus Torvalds return jl; 25671da177e4SLinus Torvalds } 25681da177e4SLinus Torvalds 2569bd4c625cSLinus Torvalds static void journal_list_init(struct super_block *p_s_sb) 2570bd4c625cSLinus Torvalds { 25711da177e4SLinus Torvalds SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); 25721da177e4SLinus Torvalds } 25731da177e4SLinus Torvalds 25741da177e4SLinus Torvalds static int release_journal_dev(struct super_block *super, 25751da177e4SLinus Torvalds struct reiserfs_journal *journal) 25761da177e4SLinus Torvalds { 25771da177e4SLinus Torvalds int result; 25781da177e4SLinus Torvalds 25791da177e4SLinus Torvalds result = 0; 25801da177e4SLinus Torvalds 258186098fa0SChristoph Hellwig if (journal->j_dev_bd != NULL) { 258286098fa0SChristoph Hellwig if (journal->j_dev_bd->bd_dev != super->s_dev) 258386098fa0SChristoph Hellwig bd_release(journal->j_dev_bd); 2584e5eb8caaSAl Viro result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); 25851da177e4SLinus Torvalds journal->j_dev_bd = NULL; 25861da177e4SLinus Torvalds } 25871da177e4SLinus Torvalds 25881da177e4SLinus Torvalds if (result != 0) { 258945b03d5eSJeff Mahoney reiserfs_warning(super, "sh-457", 259045b03d5eSJeff Mahoney "Cannot release journal device: %i", result); 25911da177e4SLinus Torvalds } 25921da177e4SLinus Torvalds return result; 25931da177e4SLinus Torvalds } 25941da177e4SLinus Torvalds 25951da177e4SLinus Torvalds static int journal_init_dev(struct super_block *super, 25961da177e4SLinus Torvalds struct reiserfs_journal *journal, 25971da177e4SLinus Torvalds const char *jdev_name) 25981da177e4SLinus Torvalds { 25991da177e4SLinus Torvalds int result; 26001da177e4SLinus Torvalds dev_t jdev; 2601aeb5d727SAl Viro fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; 26021da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 26031da177e4SLinus Torvalds 26041da177e4SLinus Torvalds result = 0; 26051da177e4SLinus Torvalds 26061da177e4SLinus Torvalds journal->j_dev_bd = NULL; 26071da177e4SLinus Torvalds jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 26081da177e4SLinus Torvalds new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 26091da177e4SLinus Torvalds 26101da177e4SLinus Torvalds if (bdev_read_only(super->s_bdev)) 26111da177e4SLinus Torvalds blkdev_mode = FMODE_READ; 26121da177e4SLinus Torvalds 26131da177e4SLinus Torvalds /* there is no "jdev" option and journal is on separate device */ 26141da177e4SLinus Torvalds if ((!jdev_name || !jdev_name[0])) { 26151da177e4SLinus Torvalds journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2616e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 26171da177e4SLinus Torvalds if (IS_ERR(journal->j_dev_bd)) { 26181da177e4SLinus Torvalds result = PTR_ERR(journal->j_dev_bd); 26191da177e4SLinus Torvalds journal->j_dev_bd = NULL; 262045b03d5eSJeff Mahoney reiserfs_warning(super, "sh-458", 26211da177e4SLinus Torvalds "cannot init journal device '%s': %i", 26221da177e4SLinus Torvalds __bdevname(jdev, b), result); 26231da177e4SLinus Torvalds return result; 262486098fa0SChristoph Hellwig } else if (jdev != super->s_dev) { 262586098fa0SChristoph Hellwig result = bd_claim(journal->j_dev_bd, journal); 262686098fa0SChristoph Hellwig if (result) { 26279a1c3542SAl Viro blkdev_put(journal->j_dev_bd, blkdev_mode); 262886098fa0SChristoph Hellwig return result; 262986098fa0SChristoph Hellwig } 263086098fa0SChristoph Hellwig 26311da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 263286098fa0SChristoph Hellwig } 263386098fa0SChristoph Hellwig 26341da177e4SLinus Torvalds return 0; 26351da177e4SLinus Torvalds } 26361da177e4SLinus Torvalds 2637e5eb8caaSAl Viro journal->j_dev_mode = blkdev_mode; 263830c40d2cSAl Viro journal->j_dev_bd = open_bdev_exclusive(jdev_name, 2639e5eb8caaSAl Viro blkdev_mode, journal); 264086098fa0SChristoph Hellwig if (IS_ERR(journal->j_dev_bd)) { 264186098fa0SChristoph Hellwig result = PTR_ERR(journal->j_dev_bd); 264286098fa0SChristoph Hellwig journal->j_dev_bd = NULL; 264386098fa0SChristoph Hellwig reiserfs_warning(super, 264486098fa0SChristoph Hellwig "journal_init_dev: Cannot open '%s': %i", 264586098fa0SChristoph Hellwig jdev_name, result); 264686098fa0SChristoph Hellwig return result; 264786098fa0SChristoph Hellwig } 264886098fa0SChristoph Hellwig 26491da177e4SLinus Torvalds set_blocksize(journal->j_dev_bd, super->s_blocksize); 2650bd4c625cSLinus Torvalds reiserfs_info(super, 2651bd4c625cSLinus Torvalds "journal_init_dev: journal device: %s\n", 265274f9f974SEdward Shishkin bdevname(journal->j_dev_bd, b)); 265386098fa0SChristoph Hellwig return 0; 26541da177e4SLinus Torvalds } 26551da177e4SLinus Torvalds 2656cf3d0b81SEdward Shishkin /** 2657cf3d0b81SEdward Shishkin * When creating/tuning a file system user can assign some 2658cf3d0b81SEdward Shishkin * journal params within boundaries which depend on the ratio 2659cf3d0b81SEdward Shishkin * blocksize/standard_blocksize. 2660cf3d0b81SEdward Shishkin * 2661cf3d0b81SEdward Shishkin * For blocks >= standard_blocksize transaction size should 2662cf3d0b81SEdward Shishkin * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more 2663cf3d0b81SEdward Shishkin * then JOURNAL_TRANS_MAX_DEFAULT. 2664cf3d0b81SEdward Shishkin * 2665cf3d0b81SEdward Shishkin * For blocks < standard_blocksize these boundaries should be 2666cf3d0b81SEdward Shishkin * decreased proportionally. 2667cf3d0b81SEdward Shishkin */ 2668cf3d0b81SEdward Shishkin #define REISERFS_STANDARD_BLKSIZE (4096) 2669cf3d0b81SEdward Shishkin 2670cf3d0b81SEdward Shishkin static int check_advise_trans_params(struct super_block *p_s_sb, 2671cf3d0b81SEdward Shishkin struct reiserfs_journal *journal) 2672cf3d0b81SEdward Shishkin { 2673cf3d0b81SEdward Shishkin if (journal->j_trans_max) { 2674cf3d0b81SEdward Shishkin /* Non-default journal params. 2675cf3d0b81SEdward Shishkin Do sanity check for them. */ 2676cf3d0b81SEdward Shishkin int ratio = 1; 2677cf3d0b81SEdward Shishkin if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2678cf3d0b81SEdward Shishkin ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize; 2679cf3d0b81SEdward Shishkin 2680cf3d0b81SEdward Shishkin if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || 2681cf3d0b81SEdward Shishkin journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || 2682cf3d0b81SEdward Shishkin SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < 2683cf3d0b81SEdward Shishkin JOURNAL_MIN_RATIO) { 268445b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-462", 268545b03d5eSJeff Mahoney "bad transaction max size (%u). " 268645b03d5eSJeff Mahoney "FSCK?", journal->j_trans_max); 2687cf3d0b81SEdward Shishkin return 1; 2688cf3d0b81SEdward Shishkin } 2689cf3d0b81SEdward Shishkin if (journal->j_max_batch != (journal->j_trans_max) * 2690cf3d0b81SEdward Shishkin JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { 269145b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-463", 269245b03d5eSJeff Mahoney "bad transaction max batch (%u). " 269345b03d5eSJeff Mahoney "FSCK?", journal->j_max_batch); 2694cf3d0b81SEdward Shishkin return 1; 2695cf3d0b81SEdward Shishkin } 2696cf3d0b81SEdward Shishkin } else { 2697cf3d0b81SEdward Shishkin /* Default journal params. 2698cf3d0b81SEdward Shishkin The file system was created by old version 2699cf3d0b81SEdward Shishkin of mkreiserfs, so some fields contain zeros, 2700cf3d0b81SEdward Shishkin and we need to advise proper values for them */ 270145b03d5eSJeff Mahoney if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { 270245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-464", "bad blocksize (%u)", 2703cf3d0b81SEdward Shishkin p_s_sb->s_blocksize); 270445b03d5eSJeff Mahoney return 1; 270545b03d5eSJeff Mahoney } 2706cf3d0b81SEdward Shishkin journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; 2707cf3d0b81SEdward Shishkin journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; 2708cf3d0b81SEdward Shishkin journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; 2709cf3d0b81SEdward Shishkin } 2710cf3d0b81SEdward Shishkin return 0; 2711cf3d0b81SEdward Shishkin } 2712cf3d0b81SEdward Shishkin 27131da177e4SLinus Torvalds /* 27141da177e4SLinus Torvalds ** must be called once on fs mount. calls journal_read for you 27151da177e4SLinus Torvalds */ 2716bd4c625cSLinus Torvalds int journal_init(struct super_block *p_s_sb, const char *j_dev_name, 2717bd4c625cSLinus Torvalds int old_format, unsigned int commit_max_age) 2718bd4c625cSLinus Torvalds { 27191da177e4SLinus Torvalds int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; 27201da177e4SLinus Torvalds struct buffer_head *bhjh; 27211da177e4SLinus Torvalds struct reiserfs_super_block *rs; 27221da177e4SLinus Torvalds struct reiserfs_journal_header *jh; 27231da177e4SLinus Torvalds struct reiserfs_journal *journal; 27241da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 27251da177e4SLinus Torvalds char b[BDEVNAME_SIZE]; 27261da177e4SLinus Torvalds 27271da177e4SLinus Torvalds journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); 27281da177e4SLinus Torvalds if (!journal) { 272945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1256", 273045b03d5eSJeff Mahoney "unable to get memory for journal structure"); 27311da177e4SLinus Torvalds return 1; 27321da177e4SLinus Torvalds } 27331da177e4SLinus Torvalds memset(journal, 0, sizeof(struct reiserfs_journal)); 27341da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_bitmap_nodes); 27351da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_prealloc_list); 27361da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_working_list); 27371da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_journal_list); 27381da177e4SLinus Torvalds journal->j_persistent_trans = 0; 27391da177e4SLinus Torvalds if (reiserfs_allocate_list_bitmaps(p_s_sb, 27401da177e4SLinus Torvalds journal->j_list_bitmap, 2741cb680c1bSJeff Mahoney reiserfs_bmap_count(p_s_sb))) 27421da177e4SLinus Torvalds goto free_and_return; 27431da177e4SLinus Torvalds allocate_bitmap_nodes(p_s_sb); 27441da177e4SLinus Torvalds 27451da177e4SLinus Torvalds /* reserved for journal area support */ 27461da177e4SLinus Torvalds SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2747bd4c625cSLinus Torvalds REISERFS_OLD_DISK_OFFSET_IN_BYTES 2748bd4c625cSLinus Torvalds / p_s_sb->s_blocksize + 2749cb680c1bSJeff Mahoney reiserfs_bmap_count(p_s_sb) + 2750bd4c625cSLinus Torvalds 1 : 2751bd4c625cSLinus Torvalds REISERFS_DISK_OFFSET_IN_BYTES / 2752bd4c625cSLinus Torvalds p_s_sb->s_blocksize + 2); 27531da177e4SLinus Torvalds 27541da177e4SLinus Torvalds /* Sanity check to see is the standard journal fitting withing first bitmap 27551da177e4SLinus Torvalds (actual for small blocksizes) */ 27561da177e4SLinus Torvalds if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && 2757bd4c625cSLinus Torvalds (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + 2758bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { 275945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1393", 276045b03d5eSJeff Mahoney "journal does not fit for area addressed " 276145b03d5eSJeff Mahoney "by first of bitmap blocks. It starts at " 27621da177e4SLinus Torvalds "%u and its size is %u. Block size %ld", 27631da177e4SLinus Torvalds SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), 2764bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2765bd4c625cSLinus Torvalds p_s_sb->s_blocksize); 27661da177e4SLinus Torvalds goto free_and_return; 27671da177e4SLinus Torvalds } 27681da177e4SLinus Torvalds 27691da177e4SLinus Torvalds if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { 277045b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-462", 277145b03d5eSJeff Mahoney "unable to initialize jornal device"); 27721da177e4SLinus Torvalds goto free_and_return; 27731da177e4SLinus Torvalds } 27741da177e4SLinus Torvalds 27751da177e4SLinus Torvalds rs = SB_DISK_SUPER_BLOCK(p_s_sb); 27761da177e4SLinus Torvalds 27771da177e4SLinus Torvalds /* read journal header */ 27781da177e4SLinus Torvalds bhjh = journal_bread(p_s_sb, 2779bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2780bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 27811da177e4SLinus Torvalds if (!bhjh) { 278245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-459", 278345b03d5eSJeff Mahoney "unable to read journal header"); 27841da177e4SLinus Torvalds goto free_and_return; 27851da177e4SLinus Torvalds } 27861da177e4SLinus Torvalds jh = (struct reiserfs_journal_header *)(bhjh->b_data); 27871da177e4SLinus Torvalds 27881da177e4SLinus Torvalds /* make sure that journal matches to the super block */ 2789bd4c625cSLinus Torvalds if (is_reiserfs_jr(rs) 2790bd4c625cSLinus Torvalds && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != 2791bd4c625cSLinus Torvalds sb_jp_journal_magic(rs))) { 279245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "sh-460", 279345b03d5eSJeff Mahoney "journal header magic %x (device %s) does " 279445b03d5eSJeff Mahoney "not match to magic found in super block %x", 279545b03d5eSJeff Mahoney jh->jh_journal.jp_journal_magic, 27961da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 27971da177e4SLinus Torvalds sb_jp_journal_magic(rs)); 27981da177e4SLinus Torvalds brelse(bhjh); 27991da177e4SLinus Torvalds goto free_and_return; 28001da177e4SLinus Torvalds } 28011da177e4SLinus Torvalds 28021da177e4SLinus Torvalds journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max); 28031da177e4SLinus Torvalds journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch); 2804bd4c625cSLinus Torvalds journal->j_max_commit_age = 2805bd4c625cSLinus Torvalds le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); 28061da177e4SLinus Torvalds journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 28071da177e4SLinus Torvalds 2808cf3d0b81SEdward Shishkin if (check_advise_trans_params(p_s_sb, journal) != 0) 2809cf3d0b81SEdward Shishkin goto free_and_return; 28101da177e4SLinus Torvalds journal->j_default_max_commit_age = journal->j_max_commit_age; 28111da177e4SLinus Torvalds 28121da177e4SLinus Torvalds if (commit_max_age != 0) { 28131da177e4SLinus Torvalds journal->j_max_commit_age = commit_max_age; 28141da177e4SLinus Torvalds journal->j_max_trans_age = commit_max_age; 28151da177e4SLinus Torvalds } 28161da177e4SLinus Torvalds 28171da177e4SLinus Torvalds reiserfs_info(p_s_sb, "journal params: device %s, size %u, " 28181da177e4SLinus Torvalds "journal first block %u, max trans len %u, max batch %u, " 28191da177e4SLinus Torvalds "max commit age %u, max trans age %u\n", 28201da177e4SLinus Torvalds bdevname(journal->j_dev_bd, b), 28211da177e4SLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb), 28221da177e4SLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 28231da177e4SLinus Torvalds journal->j_trans_max, 28241da177e4SLinus Torvalds journal->j_max_batch, 2825bd4c625cSLinus Torvalds journal->j_max_commit_age, journal->j_max_trans_age); 28261da177e4SLinus Torvalds 28271da177e4SLinus Torvalds brelse(bhjh); 28281da177e4SLinus Torvalds 28291da177e4SLinus Torvalds journal->j_list_bitmap_index = 0; 28301da177e4SLinus Torvalds journal_list_init(p_s_sb); 28311da177e4SLinus Torvalds 2832bd4c625cSLinus Torvalds memset(journal->j_list_hash_table, 0, 2833bd4c625cSLinus Torvalds JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 28341da177e4SLinus Torvalds 28351da177e4SLinus Torvalds INIT_LIST_HEAD(&journal->j_dirty_buffers); 28361da177e4SLinus Torvalds spin_lock_init(&journal->j_dirty_buffers_lock); 28371da177e4SLinus Torvalds 28381da177e4SLinus Torvalds journal->j_start = 0; 28391da177e4SLinus Torvalds journal->j_len = 0; 28401da177e4SLinus Torvalds journal->j_len_alloc = 0; 28411da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 28421da177e4SLinus Torvalds atomic_set(&(journal->j_async_throttle), 0); 28431da177e4SLinus Torvalds journal->j_bcount = 0; 28441da177e4SLinus Torvalds journal->j_trans_start_time = 0; 28451da177e4SLinus Torvalds journal->j_last = NULL; 28461da177e4SLinus Torvalds journal->j_first = NULL; 28471da177e4SLinus Torvalds init_waitqueue_head(&(journal->j_join_wait)); 2848f68215c4SJeff Mahoney mutex_init(&journal->j_mutex); 2849afe70259SJeff Mahoney mutex_init(&journal->j_flush_mutex); 28501da177e4SLinus Torvalds 28511da177e4SLinus Torvalds journal->j_trans_id = 10; 28521da177e4SLinus Torvalds journal->j_mount_id = 10; 28531da177e4SLinus Torvalds journal->j_state = 0; 28541da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 28551da177e4SLinus Torvalds journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 28561da177e4SLinus Torvalds journal->j_cnode_free_orig = journal->j_cnode_free_list; 28571da177e4SLinus Torvalds journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 28581da177e4SLinus Torvalds journal->j_cnode_used = 0; 28591da177e4SLinus Torvalds journal->j_must_wait = 0; 28601da177e4SLinus Torvalds 2861576f6d79SJeff Mahoney if (journal->j_cnode_free == 0) { 286245b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-2004", "Journal cnode memory " 2863576f6d79SJeff Mahoney "allocation failed (%ld bytes). Journal is " 2864576f6d79SJeff Mahoney "too large for available memory. Usually " 2865576f6d79SJeff Mahoney "this is due to a journal that is too large.", 2866576f6d79SJeff Mahoney sizeof (struct reiserfs_journal_cnode) * num_cnodes); 2867576f6d79SJeff Mahoney goto free_and_return; 2868576f6d79SJeff Mahoney } 2869576f6d79SJeff Mahoney 28701da177e4SLinus Torvalds init_journal_hash(p_s_sb); 28711da177e4SLinus Torvalds jl = journal->j_current_jl; 28721da177e4SLinus Torvalds jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); 28731da177e4SLinus Torvalds if (!jl->j_list_bitmap) { 287445b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-2005", 287545b03d5eSJeff Mahoney "get_list_bitmap failed for journal list 0"); 28761da177e4SLinus Torvalds goto free_and_return; 28771da177e4SLinus Torvalds } 28781da177e4SLinus Torvalds if (journal_read(p_s_sb) < 0) { 287945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "reiserfs-2006", 288045b03d5eSJeff Mahoney "Replay Failure, unable to mount"); 28811da177e4SLinus Torvalds goto free_and_return; 28821da177e4SLinus Torvalds } 28831da177e4SLinus Torvalds 28841da177e4SLinus Torvalds reiserfs_mounted_fs_count++; 28851da177e4SLinus Torvalds if (reiserfs_mounted_fs_count <= 1) 28861da177e4SLinus Torvalds commit_wq = create_workqueue("reiserfs"); 28871da177e4SLinus Torvalds 2888c4028958SDavid Howells INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2889c4028958SDavid Howells journal->j_work_sb = p_s_sb; 28901da177e4SLinus Torvalds return 0; 28911da177e4SLinus Torvalds free_and_return: 28921da177e4SLinus Torvalds free_journal_ram(p_s_sb); 28931da177e4SLinus Torvalds return 1; 28941da177e4SLinus Torvalds } 28951da177e4SLinus Torvalds 28961da177e4SLinus Torvalds /* 28971da177e4SLinus Torvalds ** test for a polite end of the current transaction. Used by file_write, and should 28981da177e4SLinus Torvalds ** be used by delete to make sure they don't write more than can fit inside a single 28991da177e4SLinus Torvalds ** transaction 29001da177e4SLinus Torvalds */ 2901bd4c625cSLinus Torvalds int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2902bd4c625cSLinus Torvalds int new_alloc) 2903bd4c625cSLinus Torvalds { 29041da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 29051da177e4SLinus Torvalds time_t now = get_seconds(); 29061da177e4SLinus Torvalds /* cannot restart while nested */ 29071da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 29081da177e4SLinus Torvalds if (th->t_refcount > 1) 29091da177e4SLinus Torvalds return 0; 29101da177e4SLinus Torvalds if (journal->j_must_wait > 0 || 29111da177e4SLinus Torvalds (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 29121da177e4SLinus Torvalds atomic_read(&(journal->j_jlock)) || 29131da177e4SLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age || 29141da177e4SLinus Torvalds journal->j_cnode_free < (journal->j_trans_max * 3)) { 29151da177e4SLinus Torvalds return 1; 29161da177e4SLinus Torvalds } 29176ae1ea44SChris Mason /* protected by the BKL here */ 29186ae1ea44SChris Mason journal->j_len_alloc += new_alloc; 29196ae1ea44SChris Mason th->t_blocks_allocated += new_alloc ; 29201da177e4SLinus Torvalds return 0; 29211da177e4SLinus Torvalds } 29221da177e4SLinus Torvalds 29231da177e4SLinus Torvalds /* this must be called inside a transaction, and requires the 29241da177e4SLinus Torvalds ** kernel_lock to be held 29251da177e4SLinus Torvalds */ 2926bd4c625cSLinus Torvalds void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2927bd4c625cSLinus Torvalds { 29281da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 29291da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 29301da177e4SLinus Torvalds journal->j_must_wait = 1; 29311da177e4SLinus Torvalds set_bit(J_WRITERS_BLOCKED, &journal->j_state); 29321da177e4SLinus Torvalds return; 29331da177e4SLinus Torvalds } 29341da177e4SLinus Torvalds 29351da177e4SLinus Torvalds /* this must be called without a transaction started, and does not 29361da177e4SLinus Torvalds ** require BKL 29371da177e4SLinus Torvalds */ 2938bd4c625cSLinus Torvalds void reiserfs_allow_writes(struct super_block *s) 2939bd4c625cSLinus Torvalds { 29401da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 29411da177e4SLinus Torvalds clear_bit(J_WRITERS_BLOCKED, &journal->j_state); 29421da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 29431da177e4SLinus Torvalds } 29441da177e4SLinus Torvalds 29451da177e4SLinus Torvalds /* this must be called without a transaction started, and does not 29461da177e4SLinus Torvalds ** require BKL 29471da177e4SLinus Torvalds */ 2948bd4c625cSLinus Torvalds void reiserfs_wait_on_write_block(struct super_block *s) 2949bd4c625cSLinus Torvalds { 29501da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 29511da177e4SLinus Torvalds wait_event(journal->j_join_wait, 29521da177e4SLinus Torvalds !test_bit(J_WRITERS_BLOCKED, &journal->j_state)); 29531da177e4SLinus Torvalds } 29541da177e4SLinus Torvalds 2955bd4c625cSLinus Torvalds static void queue_log_writer(struct super_block *s) 2956bd4c625cSLinus Torvalds { 29571da177e4SLinus Torvalds wait_queue_t wait; 29581da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 29591da177e4SLinus Torvalds set_bit(J_WRITERS_QUEUED, &journal->j_state); 29601da177e4SLinus Torvalds 29611da177e4SLinus Torvalds /* 29621da177e4SLinus Torvalds * we don't want to use wait_event here because 29631da177e4SLinus Torvalds * we only want to wait once. 29641da177e4SLinus Torvalds */ 29651da177e4SLinus Torvalds init_waitqueue_entry(&wait, current); 29661da177e4SLinus Torvalds add_wait_queue(&journal->j_join_wait, &wait); 29671da177e4SLinus Torvalds set_current_state(TASK_UNINTERRUPTIBLE); 29681da177e4SLinus Torvalds if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 29691da177e4SLinus Torvalds schedule(); 29705ab2f7e0SMilind Arun Choudhary __set_current_state(TASK_RUNNING); 29711da177e4SLinus Torvalds remove_wait_queue(&journal->j_join_wait, &wait); 29721da177e4SLinus Torvalds } 29731da177e4SLinus Torvalds 2974bd4c625cSLinus Torvalds static void wake_queued_writers(struct super_block *s) 2975bd4c625cSLinus Torvalds { 29761da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 29771da177e4SLinus Torvalds if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state)) 29781da177e4SLinus Torvalds wake_up(&journal->j_join_wait); 29791da177e4SLinus Torvalds } 29801da177e4SLinus Torvalds 2981600ed416SJeff Mahoney static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) 29821da177e4SLinus Torvalds { 29831da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 29841da177e4SLinus Torvalds unsigned long bcount = journal->j_bcount; 29851da177e4SLinus Torvalds while (1) { 2986041e0e3bSNishanth Aravamudan schedule_timeout_uninterruptible(1); 29871da177e4SLinus Torvalds journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 29881da177e4SLinus Torvalds while ((atomic_read(&journal->j_wcount) > 0 || 29891da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) && 29901da177e4SLinus Torvalds journal->j_trans_id == trans_id) { 29911da177e4SLinus Torvalds queue_log_writer(sb); 29921da177e4SLinus Torvalds } 29931da177e4SLinus Torvalds if (journal->j_trans_id != trans_id) 29941da177e4SLinus Torvalds break; 29951da177e4SLinus Torvalds if (bcount == journal->j_bcount) 29961da177e4SLinus Torvalds break; 29971da177e4SLinus Torvalds bcount = journal->j_bcount; 29981da177e4SLinus Torvalds } 29991da177e4SLinus Torvalds } 30001da177e4SLinus Torvalds 30011da177e4SLinus Torvalds /* join == true if you must join an existing transaction. 30021da177e4SLinus Torvalds ** join == false if you can deal with waiting for others to finish 30031da177e4SLinus Torvalds ** 30041da177e4SLinus Torvalds ** this will block until the transaction is joinable. send the number of blocks you 30051da177e4SLinus Torvalds ** expect to use in nblocks. 30061da177e4SLinus Torvalds */ 3007bd4c625cSLinus Torvalds static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 3008bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks, 3009bd4c625cSLinus Torvalds int join) 3010bd4c625cSLinus Torvalds { 30111da177e4SLinus Torvalds time_t now = get_seconds(); 3012600ed416SJeff Mahoney unsigned int old_trans_id; 30131da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 30141da177e4SLinus Torvalds struct reiserfs_transaction_handle myth; 30151da177e4SLinus Torvalds int sched_count = 0; 30161da177e4SLinus Torvalds int retval; 30171da177e4SLinus Torvalds 30181da177e4SLinus Torvalds reiserfs_check_lock_depth(p_s_sb, "journal_begin"); 301914a61442SEric Sesterhenn BUG_ON(nblocks > journal->j_trans_max); 30201da177e4SLinus Torvalds 30211da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.journal_being); 30221da177e4SLinus Torvalds /* set here for journal_join */ 30231da177e4SLinus Torvalds th->t_refcount = 1; 30241da177e4SLinus Torvalds th->t_super = p_s_sb; 30251da177e4SLinus Torvalds 30261da177e4SLinus Torvalds relock: 30271da177e4SLinus Torvalds lock_journal(p_s_sb); 30281da177e4SLinus Torvalds if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 30291da177e4SLinus Torvalds unlock_journal(p_s_sb); 30301da177e4SLinus Torvalds retval = journal->j_errno; 30311da177e4SLinus Torvalds goto out_fail; 30321da177e4SLinus Torvalds } 30331da177e4SLinus Torvalds journal->j_bcount++; 30341da177e4SLinus Torvalds 30351da177e4SLinus Torvalds if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 30361da177e4SLinus Torvalds unlock_journal(p_s_sb); 30371da177e4SLinus Torvalds reiserfs_wait_on_write_block(p_s_sb); 30381da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); 30391da177e4SLinus Torvalds goto relock; 30401da177e4SLinus Torvalds } 30411da177e4SLinus Torvalds now = get_seconds(); 30421da177e4SLinus Torvalds 30431da177e4SLinus Torvalds /* if there is no room in the journal OR 30441da177e4SLinus Torvalds ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 30451da177e4SLinus Torvalds ** we don't sleep if there aren't other writers 30461da177e4SLinus Torvalds */ 30471da177e4SLinus Torvalds 30481da177e4SLinus Torvalds if ((!join && journal->j_must_wait > 0) || 3049bd4c625cSLinus Torvalds (!join 3050bd4c625cSLinus Torvalds && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) 3051bd4c625cSLinus Torvalds || (!join && atomic_read(&journal->j_wcount) > 0 3052bd4c625cSLinus Torvalds && journal->j_trans_start_time > 0 3053bd4c625cSLinus Torvalds && (now - journal->j_trans_start_time) > 3054bd4c625cSLinus Torvalds journal->j_max_trans_age) || (!join 3055bd4c625cSLinus Torvalds && atomic_read(&journal->j_jlock)) 3056bd4c625cSLinus Torvalds || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 30571da177e4SLinus Torvalds 30581da177e4SLinus Torvalds old_trans_id = journal->j_trans_id; 30591da177e4SLinus Torvalds unlock_journal(p_s_sb); /* allow others to finish this transaction */ 30601da177e4SLinus Torvalds 30611da177e4SLinus Torvalds if (!join && (journal->j_len_alloc + nblocks + 2) >= 30621da177e4SLinus Torvalds journal->j_max_batch && 3063bd4c625cSLinus Torvalds ((journal->j_len + nblocks + 2) * 100) < 3064bd4c625cSLinus Torvalds (journal->j_len_alloc * 75)) { 30651da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) > 10) { 30661da177e4SLinus Torvalds sched_count++; 30671da177e4SLinus Torvalds queue_log_writer(p_s_sb); 30681da177e4SLinus Torvalds goto relock; 30691da177e4SLinus Torvalds } 30701da177e4SLinus Torvalds } 30711da177e4SLinus Torvalds /* don't mess with joining the transaction if all we have to do is 30721da177e4SLinus Torvalds * wait for someone else to do a commit 30731da177e4SLinus Torvalds */ 30741da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 30751da177e4SLinus Torvalds while (journal->j_trans_id == old_trans_id && 30761da177e4SLinus Torvalds atomic_read(&journal->j_jlock)) { 30771da177e4SLinus Torvalds queue_log_writer(p_s_sb); 30781da177e4SLinus Torvalds } 30791da177e4SLinus Torvalds goto relock; 30801da177e4SLinus Torvalds } 30811da177e4SLinus Torvalds retval = journal_join(&myth, p_s_sb, 1); 30821da177e4SLinus Torvalds if (retval) 30831da177e4SLinus Torvalds goto out_fail; 30841da177e4SLinus Torvalds 30851da177e4SLinus Torvalds /* someone might have ended the transaction while we joined */ 30861da177e4SLinus Torvalds if (old_trans_id != journal->j_trans_id) { 30871da177e4SLinus Torvalds retval = do_journal_end(&myth, p_s_sb, 1, 0); 30881da177e4SLinus Torvalds } else { 30891da177e4SLinus Torvalds retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); 30901da177e4SLinus Torvalds } 30911da177e4SLinus Torvalds 30921da177e4SLinus Torvalds if (retval) 30931da177e4SLinus Torvalds goto out_fail; 30941da177e4SLinus Torvalds 30951da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); 30961da177e4SLinus Torvalds goto relock; 30971da177e4SLinus Torvalds } 30981da177e4SLinus Torvalds /* we are the first writer, set trans_id */ 30991da177e4SLinus Torvalds if (journal->j_trans_start_time == 0) { 31001da177e4SLinus Torvalds journal->j_trans_start_time = get_seconds(); 31011da177e4SLinus Torvalds } 31021da177e4SLinus Torvalds atomic_inc(&(journal->j_wcount)); 31031da177e4SLinus Torvalds journal->j_len_alloc += nblocks; 31041da177e4SLinus Torvalds th->t_blocks_logged = 0; 31051da177e4SLinus Torvalds th->t_blocks_allocated = nblocks; 31061da177e4SLinus Torvalds th->t_trans_id = journal->j_trans_id; 31071da177e4SLinus Torvalds unlock_journal(p_s_sb); 31081da177e4SLinus Torvalds INIT_LIST_HEAD(&th->t_list); 310922e2c507SJens Axboe get_fs_excl(); 31101da177e4SLinus Torvalds return 0; 31111da177e4SLinus Torvalds 31121da177e4SLinus Torvalds out_fail: 31131da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 31141da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 31151da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 31161da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 31171da177e4SLinus Torvalds th->t_super = p_s_sb; 31181da177e4SLinus Torvalds return retval; 31191da177e4SLinus Torvalds } 31201da177e4SLinus Torvalds 3121bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct 3122bd4c625cSLinus Torvalds super_block 3123bd4c625cSLinus Torvalds *s, 3124bd4c625cSLinus Torvalds int nblocks) 3125bd4c625cSLinus Torvalds { 31261da177e4SLinus Torvalds int ret; 31271da177e4SLinus Torvalds struct reiserfs_transaction_handle *th; 31281da177e4SLinus Torvalds 31291da177e4SLinus Torvalds /* if we're nesting into an existing transaction. It will be 31301da177e4SLinus Torvalds ** persistent on its own 31311da177e4SLinus Torvalds */ 31321da177e4SLinus Torvalds if (reiserfs_transaction_running(s)) { 31331da177e4SLinus Torvalds th = current->journal_info; 31341da177e4SLinus Torvalds th->t_refcount++; 313514a61442SEric Sesterhenn BUG_ON(th->t_refcount < 2); 313614a61442SEric Sesterhenn 31371da177e4SLinus Torvalds return th; 31381da177e4SLinus Torvalds } 3139d739b42bSPekka Enberg th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 31401da177e4SLinus Torvalds if (!th) 31411da177e4SLinus Torvalds return NULL; 31421da177e4SLinus Torvalds ret = journal_begin(th, s, nblocks); 31431da177e4SLinus Torvalds if (ret) { 3144d739b42bSPekka Enberg kfree(th); 31451da177e4SLinus Torvalds return NULL; 31461da177e4SLinus Torvalds } 31471da177e4SLinus Torvalds 31481da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans++; 31491da177e4SLinus Torvalds return th; 31501da177e4SLinus Torvalds } 31511da177e4SLinus Torvalds 3152bd4c625cSLinus Torvalds int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) 3153bd4c625cSLinus Torvalds { 31541da177e4SLinus Torvalds struct super_block *s = th->t_super; 31551da177e4SLinus Torvalds int ret = 0; 31561da177e4SLinus Torvalds if (th->t_trans_id) 31571da177e4SLinus Torvalds ret = journal_end(th, th->t_super, th->t_blocks_allocated); 31581da177e4SLinus Torvalds else 31591da177e4SLinus Torvalds ret = -EIO; 31601da177e4SLinus Torvalds if (th->t_refcount == 0) { 31611da177e4SLinus Torvalds SB_JOURNAL(s)->j_persistent_trans--; 3162d739b42bSPekka Enberg kfree(th); 31631da177e4SLinus Torvalds } 31641da177e4SLinus Torvalds return ret; 31651da177e4SLinus Torvalds } 31661da177e4SLinus Torvalds 3167bd4c625cSLinus Torvalds static int journal_join(struct reiserfs_transaction_handle *th, 3168bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks) 3169bd4c625cSLinus Torvalds { 31701da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31711da177e4SLinus Torvalds 31721da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 31731da177e4SLinus Torvalds ** pointer 31741da177e4SLinus Torvalds */ 31751da177e4SLinus Torvalds th->t_handle_save = cur_th; 317614a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 31771da177e4SLinus Torvalds return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); 31781da177e4SLinus Torvalds } 31791da177e4SLinus Torvalds 3180bd4c625cSLinus Torvalds int journal_join_abort(struct reiserfs_transaction_handle *th, 3181bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks) 3182bd4c625cSLinus Torvalds { 31831da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31841da177e4SLinus Torvalds 31851da177e4SLinus Torvalds /* this keeps do_journal_end from NULLing out the current->journal_info 31861da177e4SLinus Torvalds ** pointer 31871da177e4SLinus Torvalds */ 31881da177e4SLinus Torvalds th->t_handle_save = cur_th; 318914a61442SEric Sesterhenn BUG_ON(cur_th && cur_th->t_refcount > 1); 31901da177e4SLinus Torvalds return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); 31911da177e4SLinus Torvalds } 31921da177e4SLinus Torvalds 3193bd4c625cSLinus Torvalds int journal_begin(struct reiserfs_transaction_handle *th, 3194bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks) 3195bd4c625cSLinus Torvalds { 31961da177e4SLinus Torvalds struct reiserfs_transaction_handle *cur_th = current->journal_info; 31971da177e4SLinus Torvalds int ret; 31981da177e4SLinus Torvalds 31991da177e4SLinus Torvalds th->t_handle_save = NULL; 32001da177e4SLinus Torvalds if (cur_th) { 32011da177e4SLinus Torvalds /* we are nesting into the current transaction */ 32021da177e4SLinus Torvalds if (cur_th->t_super == p_s_sb) { 32031da177e4SLinus Torvalds BUG_ON(!cur_th->t_refcount); 32041da177e4SLinus Torvalds cur_th->t_refcount++; 32051da177e4SLinus Torvalds memcpy(th, cur_th, sizeof(*th)); 32061da177e4SLinus Torvalds if (th->t_refcount <= 1) 320745b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "reiserfs-2005", 320845b03d5eSJeff Mahoney "BAD: refcount <= 1, but " 320945b03d5eSJeff Mahoney "journal_info != 0"); 32101da177e4SLinus Torvalds return 0; 32111da177e4SLinus Torvalds } else { 32121da177e4SLinus Torvalds /* we've ended up with a handle from a different filesystem. 32131da177e4SLinus Torvalds ** save it and restore on journal_end. This should never 32141da177e4SLinus Torvalds ** really happen... 32151da177e4SLinus Torvalds */ 321645b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "clm-2100", 321745b03d5eSJeff Mahoney "nesting info a different FS"); 32181da177e4SLinus Torvalds th->t_handle_save = current->journal_info; 32191da177e4SLinus Torvalds current->journal_info = th; 32201da177e4SLinus Torvalds } 32211da177e4SLinus Torvalds } else { 32221da177e4SLinus Torvalds current->journal_info = th; 32231da177e4SLinus Torvalds } 32241da177e4SLinus Torvalds ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); 322514a61442SEric Sesterhenn BUG_ON(current->journal_info != th); 32261da177e4SLinus Torvalds 32271da177e4SLinus Torvalds /* I guess this boils down to being the reciprocal of clm-2100 above. 32281da177e4SLinus Torvalds * If do_journal_begin_r fails, we need to put it back, since journal_end 32291da177e4SLinus Torvalds * won't be called to do it. */ 32301da177e4SLinus Torvalds if (ret) 32311da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 32321da177e4SLinus Torvalds else 32331da177e4SLinus Torvalds BUG_ON(!th->t_refcount); 32341da177e4SLinus Torvalds 32351da177e4SLinus Torvalds return ret; 32361da177e4SLinus Torvalds } 32371da177e4SLinus Torvalds 32381da177e4SLinus Torvalds /* 32391da177e4SLinus Torvalds ** puts bh into the current transaction. If it was already there, reorders removes the 32401da177e4SLinus Torvalds ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 32411da177e4SLinus Torvalds ** 32421da177e4SLinus Torvalds ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 32431da177e4SLinus Torvalds ** transaction is committed. 32441da177e4SLinus Torvalds ** 32451da177e4SLinus Torvalds ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 32461da177e4SLinus Torvalds */ 3247bd4c625cSLinus Torvalds int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3248bd4c625cSLinus Torvalds struct super_block *p_s_sb, struct buffer_head *bh) 3249bd4c625cSLinus Torvalds { 32501da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 32511da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 32521da177e4SLinus Torvalds int count_already_incd = 0; 32531da177e4SLinus Torvalds int prepared = 0; 32541da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 32551da177e4SLinus Torvalds 32561da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.mark_dirty); 32571da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3258bd4c625cSLinus Torvalds reiserfs_panic(th->t_super, 3259bd4c625cSLinus Torvalds "journal-1577: handle trans id %ld != current trans id %ld\n", 32601da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 32611da177e4SLinus Torvalds } 32621da177e4SLinus Torvalds 32631da177e4SLinus Torvalds p_s_sb->s_dirt = 1; 32641da177e4SLinus Torvalds 32651da177e4SLinus Torvalds prepared = test_clear_buffer_journal_prepared(bh); 32661da177e4SLinus Torvalds clear_buffer_journal_restore_dirty(bh); 32671da177e4SLinus Torvalds /* already in this transaction, we are done */ 32681da177e4SLinus Torvalds if (buffer_journaled(bh)) { 32691da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); 32701da177e4SLinus Torvalds return 0; 32711da177e4SLinus Torvalds } 32721da177e4SLinus Torvalds 32731da177e4SLinus Torvalds /* this must be turned into a panic instead of a warning. We can't allow 32741da177e4SLinus Torvalds ** a dirty or journal_dirty or locked buffer to be logged, as some changes 32751da177e4SLinus Torvalds ** could get to disk too early. NOT GOOD. 32761da177e4SLinus Torvalds */ 32771da177e4SLinus Torvalds if (!prepared || buffer_dirty(bh)) { 327845b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1777", 327945b03d5eSJeff Mahoney "buffer %llu bad state " 32801da177e4SLinus Torvalds "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3281bd4c625cSLinus Torvalds (unsigned long long)bh->b_blocknr, 3282bd4c625cSLinus Torvalds prepared ? ' ' : '!', 32831da177e4SLinus Torvalds buffer_locked(bh) ? ' ' : '!', 32841da177e4SLinus Torvalds buffer_dirty(bh) ? ' ' : '!', 32851da177e4SLinus Torvalds buffer_journal_dirty(bh) ? ' ' : '!'); 32861da177e4SLinus Torvalds } 32871da177e4SLinus Torvalds 32881da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) <= 0) { 328945b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1409", 329045b03d5eSJeff Mahoney "returning because j_wcount was %d", 3291bd4c625cSLinus Torvalds atomic_read(&(journal->j_wcount))); 32921da177e4SLinus Torvalds return 1; 32931da177e4SLinus Torvalds } 32941da177e4SLinus Torvalds /* this error means I've screwed up, and we've overflowed the transaction. 32951da177e4SLinus Torvalds ** Nothing can be done here, except make the FS readonly or panic. 32961da177e4SLinus Torvalds */ 32971da177e4SLinus Torvalds if (journal->j_len >= journal->j_trans_max) { 3298bd4c625cSLinus Torvalds reiserfs_panic(th->t_super, 3299bd4c625cSLinus Torvalds "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", 3300bd4c625cSLinus Torvalds journal->j_len); 33011da177e4SLinus Torvalds } 33021da177e4SLinus Torvalds 33031da177e4SLinus Torvalds if (buffer_journal_dirty(bh)) { 33041da177e4SLinus Torvalds count_already_incd = 1; 33051da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); 33061da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 33071da177e4SLinus Torvalds } 33081da177e4SLinus Torvalds 33091da177e4SLinus Torvalds if (journal->j_len > journal->j_len_alloc) { 33101da177e4SLinus Torvalds journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT; 33111da177e4SLinus Torvalds } 33121da177e4SLinus Torvalds 33131da177e4SLinus Torvalds set_buffer_journaled(bh); 33141da177e4SLinus Torvalds 33151da177e4SLinus Torvalds /* now put this guy on the end */ 33161da177e4SLinus Torvalds if (!cn) { 33171da177e4SLinus Torvalds cn = get_cnode(p_s_sb); 33181da177e4SLinus Torvalds if (!cn) { 33191da177e4SLinus Torvalds reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 33201da177e4SLinus Torvalds } 33211da177e4SLinus Torvalds 33221da177e4SLinus Torvalds if (th->t_blocks_logged == th->t_blocks_allocated) { 33231da177e4SLinus Torvalds th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT; 33241da177e4SLinus Torvalds journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT; 33251da177e4SLinus Torvalds } 33261da177e4SLinus Torvalds th->t_blocks_logged++; 33271da177e4SLinus Torvalds journal->j_len++; 33281da177e4SLinus Torvalds 33291da177e4SLinus Torvalds cn->bh = bh; 33301da177e4SLinus Torvalds cn->blocknr = bh->b_blocknr; 33311da177e4SLinus Torvalds cn->sb = p_s_sb; 33321da177e4SLinus Torvalds cn->jlist = NULL; 33331da177e4SLinus Torvalds insert_journal_hash(journal->j_hash_table, cn); 33341da177e4SLinus Torvalds if (!count_already_incd) { 33351da177e4SLinus Torvalds get_bh(bh); 33361da177e4SLinus Torvalds } 33371da177e4SLinus Torvalds } 33381da177e4SLinus Torvalds cn->next = NULL; 33391da177e4SLinus Torvalds cn->prev = journal->j_last; 33401da177e4SLinus Torvalds cn->bh = bh; 33411da177e4SLinus Torvalds if (journal->j_last) { 33421da177e4SLinus Torvalds journal->j_last->next = cn; 33431da177e4SLinus Torvalds journal->j_last = cn; 33441da177e4SLinus Torvalds } else { 33451da177e4SLinus Torvalds journal->j_first = cn; 33461da177e4SLinus Torvalds journal->j_last = cn; 33471da177e4SLinus Torvalds } 33481da177e4SLinus Torvalds return 0; 33491da177e4SLinus Torvalds } 33501da177e4SLinus Torvalds 3351bd4c625cSLinus Torvalds int journal_end(struct reiserfs_transaction_handle *th, 3352bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks) 3353bd4c625cSLinus Torvalds { 33541da177e4SLinus Torvalds if (!current->journal_info && th->t_refcount > 1) 335545b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "REISER-NESTING", 335645b03d5eSJeff Mahoney "th NULL, refcount %d", th->t_refcount); 33571da177e4SLinus Torvalds 33581da177e4SLinus Torvalds if (!th->t_trans_id) { 33591da177e4SLinus Torvalds WARN_ON(1); 33601da177e4SLinus Torvalds return -EIO; 33611da177e4SLinus Torvalds } 33621da177e4SLinus Torvalds 33631da177e4SLinus Torvalds th->t_refcount--; 33641da177e4SLinus Torvalds if (th->t_refcount > 0) { 3365bd4c625cSLinus Torvalds struct reiserfs_transaction_handle *cur_th = 3366bd4c625cSLinus Torvalds current->journal_info; 33671da177e4SLinus Torvalds 33681da177e4SLinus Torvalds /* we aren't allowed to close a nested transaction on a different 33691da177e4SLinus Torvalds ** filesystem from the one in the task struct 33701da177e4SLinus Torvalds */ 337114a61442SEric Sesterhenn BUG_ON(cur_th->t_super != th->t_super); 33721da177e4SLinus Torvalds 33731da177e4SLinus Torvalds if (th != cur_th) { 33741da177e4SLinus Torvalds memcpy(current->journal_info, th, sizeof(*th)); 33751da177e4SLinus Torvalds th->t_trans_id = 0; 33761da177e4SLinus Torvalds } 33771da177e4SLinus Torvalds return 0; 33781da177e4SLinus Torvalds } else { 33791da177e4SLinus Torvalds return do_journal_end(th, p_s_sb, nblocks, 0); 33801da177e4SLinus Torvalds } 33811da177e4SLinus Torvalds } 33821da177e4SLinus Torvalds 33831da177e4SLinus Torvalds /* removes from the current transaction, relsing and descrementing any counters. 33841da177e4SLinus Torvalds ** also files the removed buffer directly onto the clean list 33851da177e4SLinus Torvalds ** 33861da177e4SLinus Torvalds ** called by journal_mark_freed when a block has been deleted 33871da177e4SLinus Torvalds ** 33881da177e4SLinus Torvalds ** returns 1 if it cleaned and relsed the buffer. 0 otherwise 33891da177e4SLinus Torvalds */ 3390bd4c625cSLinus Torvalds static int remove_from_transaction(struct super_block *p_s_sb, 3391bd4c625cSLinus Torvalds b_blocknr_t blocknr, int already_cleaned) 3392bd4c625cSLinus Torvalds { 33931da177e4SLinus Torvalds struct buffer_head *bh; 33941da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 33951da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 33961da177e4SLinus Torvalds int ret = 0; 33971da177e4SLinus Torvalds 33981da177e4SLinus Torvalds cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 33991da177e4SLinus Torvalds if (!cn || !cn->bh) { 34001da177e4SLinus Torvalds return ret; 34011da177e4SLinus Torvalds } 34021da177e4SLinus Torvalds bh = cn->bh; 34031da177e4SLinus Torvalds if (cn->prev) { 34041da177e4SLinus Torvalds cn->prev->next = cn->next; 34051da177e4SLinus Torvalds } 34061da177e4SLinus Torvalds if (cn->next) { 34071da177e4SLinus Torvalds cn->next->prev = cn->prev; 34081da177e4SLinus Torvalds } 34091da177e4SLinus Torvalds if (cn == journal->j_first) { 34101da177e4SLinus Torvalds journal->j_first = cn->next; 34111da177e4SLinus Torvalds } 34121da177e4SLinus Torvalds if (cn == journal->j_last) { 34131da177e4SLinus Torvalds journal->j_last = cn->prev; 34141da177e4SLinus Torvalds } 34151da177e4SLinus Torvalds if (bh) 3416bd4c625cSLinus Torvalds remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, 3417bd4c625cSLinus Torvalds bh->b_blocknr, 0); 34181da177e4SLinus Torvalds clear_buffer_journaled(bh); /* don't log this one */ 34191da177e4SLinus Torvalds 34201da177e4SLinus Torvalds if (!already_cleaned) { 34211da177e4SLinus Torvalds clear_buffer_journal_dirty(bh); 34221da177e4SLinus Torvalds clear_buffer_dirty(bh); 34231da177e4SLinus Torvalds clear_buffer_journal_test(bh); 34241da177e4SLinus Torvalds put_bh(bh); 34251da177e4SLinus Torvalds if (atomic_read(&(bh->b_count)) < 0) { 342645b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-1752", 342745b03d5eSJeff Mahoney "b_count < 0"); 34281da177e4SLinus Torvalds } 34291da177e4SLinus Torvalds ret = 1; 34301da177e4SLinus Torvalds } 34311da177e4SLinus Torvalds journal->j_len--; 34321da177e4SLinus Torvalds journal->j_len_alloc--; 34331da177e4SLinus Torvalds free_cnode(p_s_sb, cn); 34341da177e4SLinus Torvalds return ret; 34351da177e4SLinus Torvalds } 34361da177e4SLinus Torvalds 34371da177e4SLinus Torvalds /* 34381da177e4SLinus Torvalds ** for any cnode in a journal list, it can only be dirtied of all the 34390779bf2dSMatt LaPlante ** transactions that include it are committed to disk. 34401da177e4SLinus Torvalds ** this checks through each transaction, and returns 1 if you are allowed to dirty, 34411da177e4SLinus Torvalds ** and 0 if you aren't 34421da177e4SLinus Torvalds ** 34431da177e4SLinus Torvalds ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 34441da177e4SLinus Torvalds ** blocks for a given transaction on disk 34451da177e4SLinus Torvalds ** 34461da177e4SLinus Torvalds */ 3447bd4c625cSLinus Torvalds static int can_dirty(struct reiserfs_journal_cnode *cn) 3448bd4c625cSLinus Torvalds { 34491da177e4SLinus Torvalds struct super_block *sb = cn->sb; 34501da177e4SLinus Torvalds b_blocknr_t blocknr = cn->blocknr; 34511da177e4SLinus Torvalds struct reiserfs_journal_cnode *cur = cn->hprev; 34521da177e4SLinus Torvalds int can_dirty = 1; 34531da177e4SLinus Torvalds 34541da177e4SLinus Torvalds /* first test hprev. These are all newer than cn, so any node here 34551da177e4SLinus Torvalds ** with the same block number and dev means this node can't be sent 34561da177e4SLinus Torvalds ** to disk right now. 34571da177e4SLinus Torvalds */ 34581da177e4SLinus Torvalds while (cur && can_dirty) { 34591da177e4SLinus Torvalds if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 34601da177e4SLinus Torvalds cur->blocknr == blocknr) { 34611da177e4SLinus Torvalds can_dirty = 0; 34621da177e4SLinus Torvalds } 34631da177e4SLinus Torvalds cur = cur->hprev; 34641da177e4SLinus Torvalds } 34651da177e4SLinus Torvalds /* then test hnext. These are all older than cn. As long as they 34661da177e4SLinus Torvalds ** are committed to the log, it is safe to write cn to disk 34671da177e4SLinus Torvalds */ 34681da177e4SLinus Torvalds cur = cn->hnext; 34691da177e4SLinus Torvalds while (cur && can_dirty) { 34701da177e4SLinus Torvalds if (cur->jlist && cur->jlist->j_len > 0 && 34711da177e4SLinus Torvalds atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 34721da177e4SLinus Torvalds cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 34731da177e4SLinus Torvalds can_dirty = 0; 34741da177e4SLinus Torvalds } 34751da177e4SLinus Torvalds cur = cur->hnext; 34761da177e4SLinus Torvalds } 34771da177e4SLinus Torvalds return can_dirty; 34781da177e4SLinus Torvalds } 34791da177e4SLinus Torvalds 34801da177e4SLinus Torvalds /* syncs the commit blocks, but does not force the real buffers to disk 34810779bf2dSMatt LaPlante ** will wait until the current transaction is done/committed before returning 34821da177e4SLinus Torvalds */ 3483bd4c625cSLinus Torvalds int journal_end_sync(struct reiserfs_transaction_handle *th, 3484bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks) 3485bd4c625cSLinus Torvalds { 34861da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 34871da177e4SLinus Torvalds 34881da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 34891da177e4SLinus Torvalds /* you can sync while nested, very, very bad */ 349014a61442SEric Sesterhenn BUG_ON(th->t_refcount > 1); 34911da177e4SLinus Torvalds if (journal->j_len == 0) { 3492bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3493bd4c625cSLinus Torvalds 1); 34941da177e4SLinus Torvalds journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 34951da177e4SLinus Torvalds } 34961da177e4SLinus Torvalds return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); 34971da177e4SLinus Torvalds } 34981da177e4SLinus Torvalds 34991da177e4SLinus Torvalds /* 35001da177e4SLinus Torvalds ** writeback the pending async commits to disk 35011da177e4SLinus Torvalds */ 3502c4028958SDavid Howells static void flush_async_commits(struct work_struct *work) 3503bd4c625cSLinus Torvalds { 3504c4028958SDavid Howells struct reiserfs_journal *journal = 3505c4028958SDavid Howells container_of(work, struct reiserfs_journal, j_work.work); 3506c4028958SDavid Howells struct super_block *p_s_sb = journal->j_work_sb; 35071da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 35081da177e4SLinus Torvalds struct list_head *entry; 35091da177e4SLinus Torvalds 35101da177e4SLinus Torvalds lock_kernel(); 35111da177e4SLinus Torvalds if (!list_empty(&journal->j_journal_list)) { 35121da177e4SLinus Torvalds /* last entry is the youngest, commit it and you get everything */ 35131da177e4SLinus Torvalds entry = journal->j_journal_list.prev; 35141da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 35151da177e4SLinus Torvalds flush_commit_list(p_s_sb, jl, 1); 35161da177e4SLinus Torvalds } 35171da177e4SLinus Torvalds unlock_kernel(); 35181da177e4SLinus Torvalds } 35191da177e4SLinus Torvalds 35201da177e4SLinus Torvalds /* 35211da177e4SLinus Torvalds ** flushes any old transactions to disk 35221da177e4SLinus Torvalds ** ends the current transaction if it is too old 35231da177e4SLinus Torvalds */ 3524bd4c625cSLinus Torvalds int reiserfs_flush_old_commits(struct super_block *p_s_sb) 3525bd4c625cSLinus Torvalds { 35261da177e4SLinus Torvalds time_t now; 35271da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 35281da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 35291da177e4SLinus Torvalds 35301da177e4SLinus Torvalds now = get_seconds(); 35311da177e4SLinus Torvalds /* safety check so we don't flush while we are replaying the log during 35321da177e4SLinus Torvalds * mount 35331da177e4SLinus Torvalds */ 35341da177e4SLinus Torvalds if (list_empty(&journal->j_journal_list)) { 35351da177e4SLinus Torvalds return 0; 35361da177e4SLinus Torvalds } 35371da177e4SLinus Torvalds 35381da177e4SLinus Torvalds /* check the current transaction. If there are no writers, and it is 35391da177e4SLinus Torvalds * too old, finish it, and force the commit blocks to disk 35401da177e4SLinus Torvalds */ 35411da177e4SLinus Torvalds if (atomic_read(&journal->j_wcount) <= 0 && 35421da177e4SLinus Torvalds journal->j_trans_start_time > 0 && 35431da177e4SLinus Torvalds journal->j_len > 0 && 3544bd4c625cSLinus Torvalds (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 35451da177e4SLinus Torvalds if (!journal_join(&th, p_s_sb, 1)) { 3546bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(p_s_sb, 3547bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb), 3548bd4c625cSLinus Torvalds 1); 3549bd4c625cSLinus Torvalds journal_mark_dirty(&th, p_s_sb, 3550bd4c625cSLinus Torvalds SB_BUFFER_WITH_SB(p_s_sb)); 35511da177e4SLinus Torvalds 35521da177e4SLinus Torvalds /* we're only being called from kreiserfsd, it makes no sense to do 35531da177e4SLinus Torvalds ** an async commit so that kreiserfsd can do it later 35541da177e4SLinus Torvalds */ 35551da177e4SLinus Torvalds do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); 35561da177e4SLinus Torvalds } 35571da177e4SLinus Torvalds } 35581da177e4SLinus Torvalds return p_s_sb->s_dirt; 35591da177e4SLinus Torvalds } 35601da177e4SLinus Torvalds 35611da177e4SLinus Torvalds /* 35621da177e4SLinus Torvalds ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 35631da177e4SLinus Torvalds ** 35641da177e4SLinus Torvalds ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 35651da177e4SLinus Torvalds ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 35661da177e4SLinus Torvalds ** flushes the commit list and returns 0. 35671da177e4SLinus Torvalds ** 35681da177e4SLinus Torvalds ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 35691da177e4SLinus Torvalds ** 35701da177e4SLinus Torvalds ** Note, we can't allow the journal_end to proceed while there are still writers in the log. 35711da177e4SLinus Torvalds */ 3572bd4c625cSLinus Torvalds static int check_journal_end(struct reiserfs_transaction_handle *th, 3573bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks, 3574bd4c625cSLinus Torvalds int flags) 3575bd4c625cSLinus Torvalds { 35761da177e4SLinus Torvalds 35771da177e4SLinus Torvalds time_t now; 35781da177e4SLinus Torvalds int flush = flags & FLUSH_ALL; 35791da177e4SLinus Torvalds int commit_now = flags & COMMIT_NOW; 35801da177e4SLinus Torvalds int wait_on_commit = flags & WAIT; 35811da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 35821da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 35831da177e4SLinus Torvalds 35841da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 35851da177e4SLinus Torvalds 35861da177e4SLinus Torvalds if (th->t_trans_id != journal->j_trans_id) { 3587bd4c625cSLinus Torvalds reiserfs_panic(th->t_super, 3588bd4c625cSLinus Torvalds "journal-1577: handle trans id %ld != current trans id %ld\n", 35891da177e4SLinus Torvalds th->t_trans_id, journal->j_trans_id); 35901da177e4SLinus Torvalds } 35911da177e4SLinus Torvalds 35921da177e4SLinus Torvalds journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 35931da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 35941da177e4SLinus Torvalds atomic_dec(&(journal->j_wcount)); 35951da177e4SLinus Torvalds } 35961da177e4SLinus Torvalds 35971da177e4SLinus Torvalds /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 35981da177e4SLinus Torvalds ** will be dealt with by next transaction that actually writes something, but should be taken 35991da177e4SLinus Torvalds ** care of in this trans 36001da177e4SLinus Torvalds */ 360114a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 360214a61442SEric Sesterhenn 36031da177e4SLinus Torvalds /* if wcount > 0, and we are called to with flush or commit_now, 36041da177e4SLinus Torvalds ** we wait on j_join_wait. We will wake up when the last writer has 36051da177e4SLinus Torvalds ** finished the transaction, and started it on its way to the disk. 36061da177e4SLinus Torvalds ** Then, we flush the commit or journal list, and just return 0 36071da177e4SLinus Torvalds ** because the rest of journal end was already done for this transaction. 36081da177e4SLinus Torvalds */ 36091da177e4SLinus Torvalds if (atomic_read(&(journal->j_wcount)) > 0) { 36101da177e4SLinus Torvalds if (flush || commit_now) { 36111da177e4SLinus Torvalds unsigned trans_id; 36121da177e4SLinus Torvalds 36131da177e4SLinus Torvalds jl = journal->j_current_jl; 36141da177e4SLinus Torvalds trans_id = jl->j_trans_id; 36151da177e4SLinus Torvalds if (wait_on_commit) 36161da177e4SLinus Torvalds jl->j_state |= LIST_COMMIT_PENDING; 36171da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 1); 36181da177e4SLinus Torvalds if (flush) { 36191da177e4SLinus Torvalds journal->j_next_full_flush = 1; 36201da177e4SLinus Torvalds } 36211da177e4SLinus Torvalds unlock_journal(p_s_sb); 36221da177e4SLinus Torvalds 36231da177e4SLinus Torvalds /* sleep while the current transaction is still j_jlocked */ 36241da177e4SLinus Torvalds while (journal->j_trans_id == trans_id) { 36251da177e4SLinus Torvalds if (atomic_read(&journal->j_jlock)) { 36261da177e4SLinus Torvalds queue_log_writer(p_s_sb); 36271da177e4SLinus Torvalds } else { 36281da177e4SLinus Torvalds lock_journal(p_s_sb); 36291da177e4SLinus Torvalds if (journal->j_trans_id == trans_id) { 3630bd4c625cSLinus Torvalds atomic_set(&(journal->j_jlock), 3631bd4c625cSLinus Torvalds 1); 36321da177e4SLinus Torvalds } 36331da177e4SLinus Torvalds unlock_journal(p_s_sb); 36341da177e4SLinus Torvalds } 36351da177e4SLinus Torvalds } 363614a61442SEric Sesterhenn BUG_ON(journal->j_trans_id == trans_id); 363714a61442SEric Sesterhenn 3638bd4c625cSLinus Torvalds if (commit_now 3639bd4c625cSLinus Torvalds && journal_list_still_alive(p_s_sb, trans_id) 3640bd4c625cSLinus Torvalds && wait_on_commit) { 36411da177e4SLinus Torvalds flush_commit_list(p_s_sb, jl, 1); 36421da177e4SLinus Torvalds } 36431da177e4SLinus Torvalds return 0; 36441da177e4SLinus Torvalds } 36451da177e4SLinus Torvalds unlock_journal(p_s_sb); 36461da177e4SLinus Torvalds return 0; 36471da177e4SLinus Torvalds } 36481da177e4SLinus Torvalds 36491da177e4SLinus Torvalds /* deal with old transactions where we are the last writers */ 36501da177e4SLinus Torvalds now = get_seconds(); 36511da177e4SLinus Torvalds if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 36521da177e4SLinus Torvalds commit_now = 1; 36531da177e4SLinus Torvalds journal->j_next_async_flush = 1; 36541da177e4SLinus Torvalds } 36551da177e4SLinus Torvalds /* don't batch when someone is waiting on j_join_wait */ 36561da177e4SLinus Torvalds /* don't batch when syncing the commit or flushing the whole trans */ 3657bd4c625cSLinus Torvalds if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) 3658bd4c625cSLinus Torvalds && !flush && !commit_now && (journal->j_len < journal->j_max_batch) 3659bd4c625cSLinus Torvalds && journal->j_len_alloc < journal->j_max_batch 3660bd4c625cSLinus Torvalds && journal->j_cnode_free > (journal->j_trans_max * 3)) { 36611da177e4SLinus Torvalds journal->j_bcount++; 36621da177e4SLinus Torvalds unlock_journal(p_s_sb); 36631da177e4SLinus Torvalds return 0; 36641da177e4SLinus Torvalds } 36651da177e4SLinus Torvalds 36661da177e4SLinus Torvalds if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 3667bd4c625cSLinus Torvalds reiserfs_panic(p_s_sb, 3668bd4c625cSLinus Torvalds "journal-003: journal_end: j_start (%ld) is too high\n", 3669bd4c625cSLinus Torvalds journal->j_start); 36701da177e4SLinus Torvalds } 36711da177e4SLinus Torvalds return 1; 36721da177e4SLinus Torvalds } 36731da177e4SLinus Torvalds 36741da177e4SLinus Torvalds /* 36751da177e4SLinus Torvalds ** Does all the work that makes deleting blocks safe. 36761da177e4SLinus Torvalds ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 36771da177e4SLinus Torvalds ** 36781da177e4SLinus Torvalds ** otherwise: 36791da177e4SLinus Torvalds ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 36801da177e4SLinus Torvalds ** before this transaction has finished. 36811da177e4SLinus Torvalds ** 36821da177e4SLinus Torvalds ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 36831da177e4SLinus Torvalds ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 36841da177e4SLinus Torvalds ** the block can't be reallocated yet. 36851da177e4SLinus Torvalds ** 36861da177e4SLinus Torvalds ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 36871da177e4SLinus Torvalds */ 3688bd4c625cSLinus Torvalds int journal_mark_freed(struct reiserfs_transaction_handle *th, 3689bd4c625cSLinus Torvalds struct super_block *p_s_sb, b_blocknr_t blocknr) 3690bd4c625cSLinus Torvalds { 36911da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 36921da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn = NULL; 36931da177e4SLinus Torvalds struct buffer_head *bh = NULL; 36941da177e4SLinus Torvalds struct reiserfs_list_bitmap *jb = NULL; 36951da177e4SLinus Torvalds int cleaned = 0; 36961da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 36971da177e4SLinus Torvalds 36981da177e4SLinus Torvalds cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 36991da177e4SLinus Torvalds if (cn && cn->bh) { 37001da177e4SLinus Torvalds bh = cn->bh; 37011da177e4SLinus Torvalds get_bh(bh); 37021da177e4SLinus Torvalds } 37031da177e4SLinus Torvalds /* if it is journal new, we just remove it from this transaction */ 37041da177e4SLinus Torvalds if (bh && buffer_journal_new(bh)) { 37051da177e4SLinus Torvalds clear_buffer_journal_new(bh); 37061da177e4SLinus Torvalds clear_prepared_bits(bh); 37071da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 37081da177e4SLinus Torvalds cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 37091da177e4SLinus Torvalds } else { 37101da177e4SLinus Torvalds /* set the bit for this block in the journal bitmap for this transaction */ 37111da177e4SLinus Torvalds jb = journal->j_current_jl->j_list_bitmap; 37121da177e4SLinus Torvalds if (!jb) { 3713bd4c625cSLinus Torvalds reiserfs_panic(p_s_sb, 3714bd4c625cSLinus Torvalds "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); 37151da177e4SLinus Torvalds } 37161da177e4SLinus Torvalds set_bit_in_list_bitmap(p_s_sb, blocknr, jb); 37171da177e4SLinus Torvalds 37181da177e4SLinus Torvalds /* Note, the entire while loop is not allowed to schedule. */ 37191da177e4SLinus Torvalds 37201da177e4SLinus Torvalds if (bh) { 37211da177e4SLinus Torvalds clear_prepared_bits(bh); 37221da177e4SLinus Torvalds reiserfs_clean_and_file_buffer(bh); 37231da177e4SLinus Torvalds } 37241da177e4SLinus Torvalds cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 37251da177e4SLinus Torvalds 37261da177e4SLinus Torvalds /* find all older transactions with this block, make sure they don't try to write it out */ 3727bd4c625cSLinus Torvalds cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, 3728bd4c625cSLinus Torvalds blocknr); 37291da177e4SLinus Torvalds while (cn) { 37301da177e4SLinus Torvalds if (p_s_sb == cn->sb && blocknr == cn->blocknr) { 37311da177e4SLinus Torvalds set_bit(BLOCK_FREED, &cn->state); 37321da177e4SLinus Torvalds if (cn->bh) { 37331da177e4SLinus Torvalds if (!cleaned) { 37341da177e4SLinus Torvalds /* remove_from_transaction will brelse the buffer if it was 37351da177e4SLinus Torvalds ** in the current trans 37361da177e4SLinus Torvalds */ 3737bd4c625cSLinus Torvalds clear_buffer_journal_dirty(cn-> 3738bd4c625cSLinus Torvalds bh); 37391da177e4SLinus Torvalds clear_buffer_dirty(cn->bh); 3740bd4c625cSLinus Torvalds clear_buffer_journal_test(cn-> 3741bd4c625cSLinus Torvalds bh); 37421da177e4SLinus Torvalds cleaned = 1; 37431da177e4SLinus Torvalds put_bh(cn->bh); 3744bd4c625cSLinus Torvalds if (atomic_read 3745bd4c625cSLinus Torvalds (&(cn->bh->b_count)) < 0) { 3746bd4c625cSLinus Torvalds reiserfs_warning(p_s_sb, 374745b03d5eSJeff Mahoney "journal-2138", 374845b03d5eSJeff Mahoney "cn->bh->b_count < 0"); 37491da177e4SLinus Torvalds } 37501da177e4SLinus Torvalds } 37511da177e4SLinus Torvalds if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3752bd4c625cSLinus Torvalds atomic_dec(& 3753bd4c625cSLinus Torvalds (cn->jlist-> 3754bd4c625cSLinus Torvalds j_nonzerolen)); 37551da177e4SLinus Torvalds } 37561da177e4SLinus Torvalds cn->bh = NULL; 37571da177e4SLinus Torvalds } 37581da177e4SLinus Torvalds } 37591da177e4SLinus Torvalds cn = cn->hnext; 37601da177e4SLinus Torvalds } 37611da177e4SLinus Torvalds } 37621da177e4SLinus Torvalds 3763398c95bdSChris Mason if (bh) 3764398c95bdSChris Mason release_buffer_page(bh); /* get_hash grabs the buffer */ 37651da177e4SLinus Torvalds return 0; 37661da177e4SLinus Torvalds } 37671da177e4SLinus Torvalds 3768bd4c625cSLinus Torvalds void reiserfs_update_inode_transaction(struct inode *inode) 3769bd4c625cSLinus Torvalds { 37701da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb); 37711da177e4SLinus Torvalds REISERFS_I(inode)->i_jl = journal->j_current_jl; 37721da177e4SLinus Torvalds REISERFS_I(inode)->i_trans_id = journal->j_trans_id; 37731da177e4SLinus Torvalds } 37741da177e4SLinus Torvalds 37751da177e4SLinus Torvalds /* 37761da177e4SLinus Torvalds * returns -1 on error, 0 if no commits/barriers were done and 1 37771da177e4SLinus Torvalds * if a transaction was actually committed and the barrier was done 37781da177e4SLinus Torvalds */ 37791da177e4SLinus Torvalds static int __commit_trans_jl(struct inode *inode, unsigned long id, 37801da177e4SLinus Torvalds struct reiserfs_journal_list *jl) 37811da177e4SLinus Torvalds { 37821da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 37831da177e4SLinus Torvalds struct super_block *sb = inode->i_sb; 37841da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 37851da177e4SLinus Torvalds int ret = 0; 37861da177e4SLinus Torvalds 37871da177e4SLinus Torvalds /* is it from the current transaction, or from an unknown transaction? */ 37881da177e4SLinus Torvalds if (id == journal->j_trans_id) { 37891da177e4SLinus Torvalds jl = journal->j_current_jl; 37901da177e4SLinus Torvalds /* try to let other writers come in and grow this transaction */ 37911da177e4SLinus Torvalds let_transaction_grow(sb, id); 37921da177e4SLinus Torvalds if (journal->j_trans_id != id) { 37931da177e4SLinus Torvalds goto flush_commit_only; 37941da177e4SLinus Torvalds } 37951da177e4SLinus Torvalds 37961da177e4SLinus Torvalds ret = journal_begin(&th, sb, 1); 37971da177e4SLinus Torvalds if (ret) 37981da177e4SLinus Torvalds return ret; 37991da177e4SLinus Torvalds 38001da177e4SLinus Torvalds /* someone might have ended this transaction while we joined */ 38011da177e4SLinus Torvalds if (journal->j_trans_id != id) { 3802bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3803bd4c625cSLinus Torvalds 1); 38041da177e4SLinus Torvalds journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); 38051da177e4SLinus Torvalds ret = journal_end(&th, sb, 1); 38061da177e4SLinus Torvalds goto flush_commit_only; 38071da177e4SLinus Torvalds } 38081da177e4SLinus Torvalds 38091da177e4SLinus Torvalds ret = journal_end_sync(&th, sb, 1); 38101da177e4SLinus Torvalds if (!ret) 38111da177e4SLinus Torvalds ret = 1; 38121da177e4SLinus Torvalds 38131da177e4SLinus Torvalds } else { 38141da177e4SLinus Torvalds /* this gets tricky, we have to make sure the journal list in 38151da177e4SLinus Torvalds * the inode still exists. We know the list is still around 38161da177e4SLinus Torvalds * if we've got a larger transaction id than the oldest list 38171da177e4SLinus Torvalds */ 38181da177e4SLinus Torvalds flush_commit_only: 38191da177e4SLinus Torvalds if (journal_list_still_alive(inode->i_sb, id)) { 38201da177e4SLinus Torvalds /* 38211da177e4SLinus Torvalds * we only set ret to 1 when we know for sure 38221da177e4SLinus Torvalds * the barrier hasn't been started yet on the commit 38231da177e4SLinus Torvalds * block. 38241da177e4SLinus Torvalds */ 38251da177e4SLinus Torvalds if (atomic_read(&jl->j_commit_left) > 1) 38261da177e4SLinus Torvalds ret = 1; 38271da177e4SLinus Torvalds flush_commit_list(sb, jl, 1); 38281da177e4SLinus Torvalds if (journal->j_errno) 38291da177e4SLinus Torvalds ret = journal->j_errno; 38301da177e4SLinus Torvalds } 38311da177e4SLinus Torvalds } 38321da177e4SLinus Torvalds /* otherwise the list is gone, and long since committed */ 38331da177e4SLinus Torvalds return ret; 38341da177e4SLinus Torvalds } 38351da177e4SLinus Torvalds 3836bd4c625cSLinus Torvalds int reiserfs_commit_for_inode(struct inode *inode) 3837bd4c625cSLinus Torvalds { 3838600ed416SJeff Mahoney unsigned int id = REISERFS_I(inode)->i_trans_id; 38391da177e4SLinus Torvalds struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 38401da177e4SLinus Torvalds 38411da177e4SLinus Torvalds /* for the whole inode, assume unset id means it was 38421da177e4SLinus Torvalds * changed in the current transaction. More conservative 38431da177e4SLinus Torvalds */ 38441da177e4SLinus Torvalds if (!id || !jl) { 38451da177e4SLinus Torvalds reiserfs_update_inode_transaction(inode); 38461da177e4SLinus Torvalds id = REISERFS_I(inode)->i_trans_id; 38471da177e4SLinus Torvalds /* jl will be updated in __commit_trans_jl */ 38481da177e4SLinus Torvalds } 38491da177e4SLinus Torvalds 38501da177e4SLinus Torvalds return __commit_trans_jl(inode, id, jl); 38511da177e4SLinus Torvalds } 38521da177e4SLinus Torvalds 38531da177e4SLinus Torvalds void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, 3854bd4c625cSLinus Torvalds struct buffer_head *bh) 3855bd4c625cSLinus Torvalds { 38561da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 38571da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.restore_prepared); 38581da177e4SLinus Torvalds if (!bh) { 38591da177e4SLinus Torvalds return; 38601da177e4SLinus Torvalds } 38611da177e4SLinus Torvalds if (test_clear_buffer_journal_restore_dirty(bh) && 38621da177e4SLinus Torvalds buffer_journal_dirty(bh)) { 38631da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn; 38641da177e4SLinus Torvalds cn = get_journal_hash_dev(p_s_sb, 38651da177e4SLinus Torvalds journal->j_list_hash_table, 38661da177e4SLinus Torvalds bh->b_blocknr); 38671da177e4SLinus Torvalds if (cn && can_dirty(cn)) { 38681da177e4SLinus Torvalds set_buffer_journal_test(bh); 38691da177e4SLinus Torvalds mark_buffer_dirty(bh); 38701da177e4SLinus Torvalds } 38711da177e4SLinus Torvalds } 38721da177e4SLinus Torvalds clear_buffer_journal_prepared(bh); 38731da177e4SLinus Torvalds } 38741da177e4SLinus Torvalds 38751da177e4SLinus Torvalds extern struct tree_balance *cur_tb; 38761da177e4SLinus Torvalds /* 38771da177e4SLinus Torvalds ** before we can change a metadata block, we have to make sure it won't 38781da177e4SLinus Torvalds ** be written to disk while we are altering it. So, we must: 38791da177e4SLinus Torvalds ** clean it 38801da177e4SLinus Torvalds ** wait on it. 38811da177e4SLinus Torvalds ** 38821da177e4SLinus Torvalds */ 38831da177e4SLinus Torvalds int reiserfs_prepare_for_journal(struct super_block *p_s_sb, 3884bd4c625cSLinus Torvalds struct buffer_head *bh, int wait) 3885bd4c625cSLinus Torvalds { 38861da177e4SLinus Torvalds PROC_INFO_INC(p_s_sb, journal.prepare); 38871da177e4SLinus Torvalds 3888ca5de404SNick Piggin if (!trylock_buffer(bh)) { 38891da177e4SLinus Torvalds if (!wait) 38901da177e4SLinus Torvalds return 0; 38911da177e4SLinus Torvalds lock_buffer(bh); 38921da177e4SLinus Torvalds } 38931da177e4SLinus Torvalds set_buffer_journal_prepared(bh); 38941da177e4SLinus Torvalds if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 38951da177e4SLinus Torvalds clear_buffer_journal_test(bh); 38961da177e4SLinus Torvalds set_buffer_journal_restore_dirty(bh); 38971da177e4SLinus Torvalds } 38981da177e4SLinus Torvalds unlock_buffer(bh); 38991da177e4SLinus Torvalds return 1; 39001da177e4SLinus Torvalds } 39011da177e4SLinus Torvalds 3902bd4c625cSLinus Torvalds static void flush_old_journal_lists(struct super_block *s) 3903bd4c625cSLinus Torvalds { 39041da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(s); 39051da177e4SLinus Torvalds struct reiserfs_journal_list *jl; 39061da177e4SLinus Torvalds struct list_head *entry; 39071da177e4SLinus Torvalds time_t now = get_seconds(); 39081da177e4SLinus Torvalds 39091da177e4SLinus Torvalds while (!list_empty(&journal->j_journal_list)) { 39101da177e4SLinus Torvalds entry = journal->j_journal_list.next; 39111da177e4SLinus Torvalds jl = JOURNAL_LIST_ENTRY(entry); 39121da177e4SLinus Torvalds /* this check should always be run, to send old lists to disk */ 3913a3172027SChris Mason if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && 3914a3172027SChris Mason atomic_read(&jl->j_commit_left) == 0 && 3915a3172027SChris Mason test_transaction(s, jl)) { 39161da177e4SLinus Torvalds flush_used_journal_lists(s, jl); 39171da177e4SLinus Torvalds } else { 39181da177e4SLinus Torvalds break; 39191da177e4SLinus Torvalds } 39201da177e4SLinus Torvalds } 39211da177e4SLinus Torvalds } 39221da177e4SLinus Torvalds 39231da177e4SLinus Torvalds /* 39241da177e4SLinus Torvalds ** long and ugly. If flush, will not return until all commit 39251da177e4SLinus Torvalds ** blocks and all real buffers in the trans are on disk. 39261da177e4SLinus Torvalds ** If no_async, won't return until all commit blocks are on disk. 39271da177e4SLinus Torvalds ** 39281da177e4SLinus Torvalds ** keep reading, there are comments as you go along 39291da177e4SLinus Torvalds ** 39301da177e4SLinus Torvalds ** If the journal is aborted, we just clean up. Things like flushing 39311da177e4SLinus Torvalds ** journal lists, etc just won't happen. 39321da177e4SLinus Torvalds */ 3933bd4c625cSLinus Torvalds static int do_journal_end(struct reiserfs_transaction_handle *th, 3934bd4c625cSLinus Torvalds struct super_block *p_s_sb, unsigned long nblocks, 3935bd4c625cSLinus Torvalds int flags) 3936bd4c625cSLinus Torvalds { 39371da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 39381da177e4SLinus Torvalds struct reiserfs_journal_cnode *cn, *next, *jl_cn; 39391da177e4SLinus Torvalds struct reiserfs_journal_cnode *last_cn = NULL; 39401da177e4SLinus Torvalds struct reiserfs_journal_desc *desc; 39411da177e4SLinus Torvalds struct reiserfs_journal_commit *commit; 39421da177e4SLinus Torvalds struct buffer_head *c_bh; /* commit bh */ 39431da177e4SLinus Torvalds struct buffer_head *d_bh; /* desc bh */ 39441da177e4SLinus Torvalds int cur_write_start = 0; /* start index of current log write */ 39451da177e4SLinus Torvalds int old_start; 39461da177e4SLinus Torvalds int i; 3947a44c94a7SAlexander Zarochentsev int flush; 3948a44c94a7SAlexander Zarochentsev int wait_on_commit; 39491da177e4SLinus Torvalds struct reiserfs_journal_list *jl, *temp_jl; 39501da177e4SLinus Torvalds struct list_head *entry, *safe; 39511da177e4SLinus Torvalds unsigned long jindex; 3952600ed416SJeff Mahoney unsigned int commit_trans_id; 39531da177e4SLinus Torvalds int trans_half; 39541da177e4SLinus Torvalds 39551da177e4SLinus Torvalds BUG_ON(th->t_refcount > 1); 39561da177e4SLinus Torvalds BUG_ON(!th->t_trans_id); 39571da177e4SLinus Torvalds 3958a44c94a7SAlexander Zarochentsev /* protect flush_older_commits from doing mistakes if the 3959a44c94a7SAlexander Zarochentsev transaction ID counter gets overflowed. */ 3960600ed416SJeff Mahoney if (th->t_trans_id == ~0U) 3961a44c94a7SAlexander Zarochentsev flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 3962a44c94a7SAlexander Zarochentsev flush = flags & FLUSH_ALL; 3963a44c94a7SAlexander Zarochentsev wait_on_commit = flags & WAIT; 3964a44c94a7SAlexander Zarochentsev 396522e2c507SJens Axboe put_fs_excl(); 39661da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 39671da177e4SLinus Torvalds reiserfs_check_lock_depth(p_s_sb, "journal end"); 39681da177e4SLinus Torvalds if (journal->j_len == 0) { 3969bd4c625cSLinus Torvalds reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3970bd4c625cSLinus Torvalds 1); 39711da177e4SLinus Torvalds journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 39721da177e4SLinus Torvalds } 39731da177e4SLinus Torvalds 39741da177e4SLinus Torvalds lock_journal(p_s_sb); 39751da177e4SLinus Torvalds if (journal->j_next_full_flush) { 39761da177e4SLinus Torvalds flags |= FLUSH_ALL; 39771da177e4SLinus Torvalds flush = 1; 39781da177e4SLinus Torvalds } 39791da177e4SLinus Torvalds if (journal->j_next_async_flush) { 39801da177e4SLinus Torvalds flags |= COMMIT_NOW | WAIT; 39811da177e4SLinus Torvalds wait_on_commit = 1; 39821da177e4SLinus Torvalds } 39831da177e4SLinus Torvalds 39841da177e4SLinus Torvalds /* check_journal_end locks the journal, and unlocks if it does not return 1 39851da177e4SLinus Torvalds ** it tells us if we should continue with the journal_end, or just return 39861da177e4SLinus Torvalds */ 39871da177e4SLinus Torvalds if (!check_journal_end(th, p_s_sb, nblocks, flags)) { 39881da177e4SLinus Torvalds p_s_sb->s_dirt = 1; 39891da177e4SLinus Torvalds wake_queued_writers(p_s_sb); 39901da177e4SLinus Torvalds reiserfs_async_progress_wait(p_s_sb); 39911da177e4SLinus Torvalds goto out; 39921da177e4SLinus Torvalds } 39931da177e4SLinus Torvalds 39941da177e4SLinus Torvalds /* check_journal_end might set these, check again */ 39951da177e4SLinus Torvalds if (journal->j_next_full_flush) { 39961da177e4SLinus Torvalds flush = 1; 39971da177e4SLinus Torvalds } 39981da177e4SLinus Torvalds 39991da177e4SLinus Torvalds /* 40001da177e4SLinus Torvalds ** j must wait means we have to flush the log blocks, and the real blocks for 40011da177e4SLinus Torvalds ** this transaction 40021da177e4SLinus Torvalds */ 40031da177e4SLinus Torvalds if (journal->j_must_wait > 0) { 40041da177e4SLinus Torvalds flush = 1; 40051da177e4SLinus Torvalds } 40061da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE 4007ef43bc4fSJan Kara /* quota ops might need to nest, setup the journal_info pointer for them 4008ef43bc4fSJan Kara * and raise the refcount so that it is > 0. */ 40091da177e4SLinus Torvalds current->journal_info = th; 4010ef43bc4fSJan Kara th->t_refcount++; 40111da177e4SLinus Torvalds reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 40121da177e4SLinus Torvalds * the transaction */ 4013ef43bc4fSJan Kara th->t_refcount--; 40141da177e4SLinus Torvalds current->journal_info = th->t_handle_save; 40151da177e4SLinus Torvalds #endif 40161da177e4SLinus Torvalds 40171da177e4SLinus Torvalds /* setup description block */ 4018bd4c625cSLinus Torvalds d_bh = 4019bd4c625cSLinus Torvalds journal_getblk(p_s_sb, 4020bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4021bd4c625cSLinus Torvalds journal->j_start); 40221da177e4SLinus Torvalds set_buffer_uptodate(d_bh); 40231da177e4SLinus Torvalds desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; 40241da177e4SLinus Torvalds memset(d_bh->b_data, 0, d_bh->b_size); 40251da177e4SLinus Torvalds memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 40261da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 40271da177e4SLinus Torvalds 40281da177e4SLinus Torvalds /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 40291da177e4SLinus Torvalds c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4030bd4c625cSLinus Torvalds ((journal->j_start + journal->j_len + 4031bd4c625cSLinus Torvalds 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 40321da177e4SLinus Torvalds commit = (struct reiserfs_journal_commit *)c_bh->b_data; 40331da177e4SLinus Torvalds memset(c_bh->b_data, 0, c_bh->b_size); 40341da177e4SLinus Torvalds set_commit_trans_id(commit, journal->j_trans_id); 40351da177e4SLinus Torvalds set_buffer_uptodate(c_bh); 40361da177e4SLinus Torvalds 40371da177e4SLinus Torvalds /* init this journal list */ 40381da177e4SLinus Torvalds jl = journal->j_current_jl; 40391da177e4SLinus Torvalds 40401da177e4SLinus Torvalds /* we lock the commit before doing anything because 40411da177e4SLinus Torvalds * we want to make sure nobody tries to run flush_commit_list until 40421da177e4SLinus Torvalds * the new transaction is fully setup, and we've already flushed the 40431da177e4SLinus Torvalds * ordered bh list 40441da177e4SLinus Torvalds */ 404590415deaSJeff Mahoney mutex_lock(&jl->j_commit_mutex); 40461da177e4SLinus Torvalds 40471da177e4SLinus Torvalds /* save the transaction id in case we need to commit it later */ 40481da177e4SLinus Torvalds commit_trans_id = jl->j_trans_id; 40491da177e4SLinus Torvalds 40501da177e4SLinus Torvalds atomic_set(&jl->j_older_commits_done, 0); 40511da177e4SLinus Torvalds jl->j_trans_id = journal->j_trans_id; 40521da177e4SLinus Torvalds jl->j_timestamp = journal->j_trans_start_time; 40531da177e4SLinus Torvalds jl->j_commit_bh = c_bh; 40541da177e4SLinus Torvalds jl->j_start = journal->j_start; 40551da177e4SLinus Torvalds jl->j_len = journal->j_len; 40561da177e4SLinus Torvalds atomic_set(&jl->j_nonzerolen, journal->j_len); 40571da177e4SLinus Torvalds atomic_set(&jl->j_commit_left, journal->j_len + 2); 40581da177e4SLinus Torvalds jl->j_realblock = NULL; 40591da177e4SLinus Torvalds 40601da177e4SLinus Torvalds /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 40611da177e4SLinus Torvalds ** for each real block, add it to the journal list hash, 40621da177e4SLinus Torvalds ** copy into real block index array in the commit or desc block 40631da177e4SLinus Torvalds */ 40641da177e4SLinus Torvalds trans_half = journal_trans_half(p_s_sb->s_blocksize); 40651da177e4SLinus Torvalds for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 40661da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 40671da177e4SLinus Torvalds jl_cn = get_cnode(p_s_sb); 40681da177e4SLinus Torvalds if (!jl_cn) { 4069bd4c625cSLinus Torvalds reiserfs_panic(p_s_sb, 4070bd4c625cSLinus Torvalds "journal-1676, get_cnode returned NULL\n"); 40711da177e4SLinus Torvalds } 40721da177e4SLinus Torvalds if (i == 0) { 40731da177e4SLinus Torvalds jl->j_realblock = jl_cn; 40741da177e4SLinus Torvalds } 40751da177e4SLinus Torvalds jl_cn->prev = last_cn; 40761da177e4SLinus Torvalds jl_cn->next = NULL; 40771da177e4SLinus Torvalds if (last_cn) { 40781da177e4SLinus Torvalds last_cn->next = jl_cn; 40791da177e4SLinus Torvalds } 40801da177e4SLinus Torvalds last_cn = jl_cn; 40811da177e4SLinus Torvalds /* make sure the block we are trying to log is not a block 40821da177e4SLinus Torvalds of journal or reserved area */ 40831da177e4SLinus Torvalds 4084bd4c625cSLinus Torvalds if (is_block_in_log_or_reserved_area 4085bd4c625cSLinus Torvalds (p_s_sb, cn->bh->b_blocknr)) { 4086bd4c625cSLinus Torvalds reiserfs_panic(p_s_sb, 4087bd4c625cSLinus Torvalds "journal-2332: Trying to log block %lu, which is a log block\n", 4088bd4c625cSLinus Torvalds cn->bh->b_blocknr); 40891da177e4SLinus Torvalds } 40901da177e4SLinus Torvalds jl_cn->blocknr = cn->bh->b_blocknr; 40911da177e4SLinus Torvalds jl_cn->state = 0; 40921da177e4SLinus Torvalds jl_cn->sb = p_s_sb; 40931da177e4SLinus Torvalds jl_cn->bh = cn->bh; 40941da177e4SLinus Torvalds jl_cn->jlist = jl; 40951da177e4SLinus Torvalds insert_journal_hash(journal->j_list_hash_table, jl_cn); 40961da177e4SLinus Torvalds if (i < trans_half) { 4097bd4c625cSLinus Torvalds desc->j_realblock[i] = 4098bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 40991da177e4SLinus Torvalds } else { 4100bd4c625cSLinus Torvalds commit->j_realblock[i - trans_half] = 4101bd4c625cSLinus Torvalds cpu_to_le32(cn->bh->b_blocknr); 41021da177e4SLinus Torvalds } 41031da177e4SLinus Torvalds } else { 41041da177e4SLinus Torvalds i--; 41051da177e4SLinus Torvalds } 41061da177e4SLinus Torvalds } 41071da177e4SLinus Torvalds set_desc_trans_len(desc, journal->j_len); 41081da177e4SLinus Torvalds set_desc_mount_id(desc, journal->j_mount_id); 41091da177e4SLinus Torvalds set_desc_trans_id(desc, journal->j_trans_id); 41101da177e4SLinus Torvalds set_commit_trans_len(commit, journal->j_len); 41111da177e4SLinus Torvalds 41121da177e4SLinus Torvalds /* special check in case all buffers in the journal were marked for not logging */ 411314a61442SEric Sesterhenn BUG_ON(journal->j_len == 0); 41141da177e4SLinus Torvalds 41151da177e4SLinus Torvalds /* we're about to dirty all the log blocks, mark the description block 41161da177e4SLinus Torvalds * dirty now too. Don't mark the commit block dirty until all the 41171da177e4SLinus Torvalds * others are on disk 41181da177e4SLinus Torvalds */ 41191da177e4SLinus Torvalds mark_buffer_dirty(d_bh); 41201da177e4SLinus Torvalds 41211da177e4SLinus Torvalds /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 41221da177e4SLinus Torvalds cur_write_start = journal->j_start; 41231da177e4SLinus Torvalds cn = journal->j_first; 41241da177e4SLinus Torvalds jindex = 1; /* start at one so we don't get the desc again */ 41251da177e4SLinus Torvalds while (cn) { 41261da177e4SLinus Torvalds clear_buffer_journal_new(cn->bh); 41271da177e4SLinus Torvalds /* copy all the real blocks into log area. dirty log blocks */ 41281da177e4SLinus Torvalds if (buffer_journaled(cn->bh)) { 41291da177e4SLinus Torvalds struct buffer_head *tmp_bh; 41301da177e4SLinus Torvalds char *addr; 41311da177e4SLinus Torvalds struct page *page; 4132bd4c625cSLinus Torvalds tmp_bh = 4133bd4c625cSLinus Torvalds journal_getblk(p_s_sb, 4134bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4135bd4c625cSLinus Torvalds ((cur_write_start + 4136bd4c625cSLinus Torvalds jindex) % 4137bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 41381da177e4SLinus Torvalds set_buffer_uptodate(tmp_bh); 41391da177e4SLinus Torvalds page = cn->bh->b_page; 41401da177e4SLinus Torvalds addr = kmap(page); 4141bd4c625cSLinus Torvalds memcpy(tmp_bh->b_data, 4142bd4c625cSLinus Torvalds addr + offset_in_page(cn->bh->b_data), 41431da177e4SLinus Torvalds cn->bh->b_size); 41441da177e4SLinus Torvalds kunmap(page); 41451da177e4SLinus Torvalds mark_buffer_dirty(tmp_bh); 41461da177e4SLinus Torvalds jindex++; 41471da177e4SLinus Torvalds set_buffer_journal_dirty(cn->bh); 41481da177e4SLinus Torvalds clear_buffer_journaled(cn->bh); 41491da177e4SLinus Torvalds } else { 41501da177e4SLinus Torvalds /* JDirty cleared sometime during transaction. don't log this one */ 415145b03d5eSJeff Mahoney reiserfs_warning(p_s_sb, "journal-2048", 415245b03d5eSJeff Mahoney "BAD, buffer in journal hash, " 415345b03d5eSJeff Mahoney "but not JDirty!"); 41541da177e4SLinus Torvalds brelse(cn->bh); 41551da177e4SLinus Torvalds } 41561da177e4SLinus Torvalds next = cn->next; 41571da177e4SLinus Torvalds free_cnode(p_s_sb, cn); 41581da177e4SLinus Torvalds cn = next; 41591da177e4SLinus Torvalds cond_resched(); 41601da177e4SLinus Torvalds } 41611da177e4SLinus Torvalds 41621da177e4SLinus Torvalds /* we are done with both the c_bh and d_bh, but 41631da177e4SLinus Torvalds ** c_bh must be written after all other commit blocks, 41641da177e4SLinus Torvalds ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 41651da177e4SLinus Torvalds */ 41661da177e4SLinus Torvalds 41671da177e4SLinus Torvalds journal->j_current_jl = alloc_journal_list(p_s_sb); 41681da177e4SLinus Torvalds 41691da177e4SLinus Torvalds /* now it is safe to insert this transaction on the main list */ 41701da177e4SLinus Torvalds list_add_tail(&jl->j_list, &journal->j_journal_list); 41711da177e4SLinus Torvalds list_add_tail(&jl->j_working_list, &journal->j_working_list); 41721da177e4SLinus Torvalds journal->j_num_work_lists++; 41731da177e4SLinus Torvalds 41741da177e4SLinus Torvalds /* reset journal values for the next transaction */ 41751da177e4SLinus Torvalds old_start = journal->j_start; 4176bd4c625cSLinus Torvalds journal->j_start = 4177bd4c625cSLinus Torvalds (journal->j_start + journal->j_len + 4178bd4c625cSLinus Torvalds 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); 41791da177e4SLinus Torvalds atomic_set(&(journal->j_wcount), 0); 41801da177e4SLinus Torvalds journal->j_bcount = 0; 41811da177e4SLinus Torvalds journal->j_last = NULL; 41821da177e4SLinus Torvalds journal->j_first = NULL; 41831da177e4SLinus Torvalds journal->j_len = 0; 41841da177e4SLinus Torvalds journal->j_trans_start_time = 0; 4185a44c94a7SAlexander Zarochentsev /* check for trans_id overflow */ 4186a44c94a7SAlexander Zarochentsev if (++journal->j_trans_id == 0) 4187a44c94a7SAlexander Zarochentsev journal->j_trans_id = 10; 41881da177e4SLinus Torvalds journal->j_current_jl->j_trans_id = journal->j_trans_id; 41891da177e4SLinus Torvalds journal->j_must_wait = 0; 41901da177e4SLinus Torvalds journal->j_len_alloc = 0; 41911da177e4SLinus Torvalds journal->j_next_full_flush = 0; 41921da177e4SLinus Torvalds journal->j_next_async_flush = 0; 41931da177e4SLinus Torvalds init_journal_hash(p_s_sb); 41941da177e4SLinus Torvalds 41951da177e4SLinus Torvalds // make sure reiserfs_add_jh sees the new current_jl before we 41961da177e4SLinus Torvalds // write out the tails 41971da177e4SLinus Torvalds smp_mb(); 41981da177e4SLinus Torvalds 41991da177e4SLinus Torvalds /* tail conversion targets have to hit the disk before we end the 42001da177e4SLinus Torvalds * transaction. Otherwise a later transaction might repack the tail 42011da177e4SLinus Torvalds * before this transaction commits, leaving the data block unflushed and 42021da177e4SLinus Torvalds * clean, if we crash before the later transaction commits, the data block 42031da177e4SLinus Torvalds * is lost. 42041da177e4SLinus Torvalds */ 42051da177e4SLinus Torvalds if (!list_empty(&jl->j_tail_bh_list)) { 42061da177e4SLinus Torvalds unlock_kernel(); 42071da177e4SLinus Torvalds write_ordered_buffers(&journal->j_dirty_buffers_lock, 42081da177e4SLinus Torvalds journal, jl, &jl->j_tail_bh_list); 42091da177e4SLinus Torvalds lock_kernel(); 42101da177e4SLinus Torvalds } 421114a61442SEric Sesterhenn BUG_ON(!list_empty(&jl->j_tail_bh_list)); 421290415deaSJeff Mahoney mutex_unlock(&jl->j_commit_mutex); 42131da177e4SLinus Torvalds 42141da177e4SLinus Torvalds /* honor the flush wishes from the caller, simple commits can 42151da177e4SLinus Torvalds ** be done outside the journal lock, they are done below 42161da177e4SLinus Torvalds ** 42171da177e4SLinus Torvalds ** if we don't flush the commit list right now, we put it into 42181da177e4SLinus Torvalds ** the work queue so the people waiting on the async progress work 42191da177e4SLinus Torvalds ** queue don't wait for this proc to flush journal lists and such. 42201da177e4SLinus Torvalds */ 42211da177e4SLinus Torvalds if (flush) { 42221da177e4SLinus Torvalds flush_commit_list(p_s_sb, jl, 1); 42231da177e4SLinus Torvalds flush_journal_list(p_s_sb, jl, 1); 42241da177e4SLinus Torvalds } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 42251da177e4SLinus Torvalds queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 42261da177e4SLinus Torvalds 42271da177e4SLinus Torvalds /* if the next transaction has any chance of wrapping, flush 42281da177e4SLinus Torvalds ** transactions that might get overwritten. If any journal lists are very 42291da177e4SLinus Torvalds ** old flush them as well. 42301da177e4SLinus Torvalds */ 42311da177e4SLinus Torvalds first_jl: 42321da177e4SLinus Torvalds list_for_each_safe(entry, safe, &journal->j_journal_list) { 42331da177e4SLinus Torvalds temp_jl = JOURNAL_LIST_ENTRY(entry); 42341da177e4SLinus Torvalds if (journal->j_start <= temp_jl->j_start) { 42351da177e4SLinus Torvalds if ((journal->j_start + journal->j_trans_max + 1) >= 4236bd4c625cSLinus Torvalds temp_jl->j_start) { 42371da177e4SLinus Torvalds flush_used_journal_lists(p_s_sb, temp_jl); 42381da177e4SLinus Torvalds goto first_jl; 42391da177e4SLinus Torvalds } else if ((journal->j_start + 42401da177e4SLinus Torvalds journal->j_trans_max + 1) < 4241bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 42421da177e4SLinus Torvalds /* if we don't cross into the next transaction and we don't 42431da177e4SLinus Torvalds * wrap, there is no way we can overlap any later transactions 42441da177e4SLinus Torvalds * break now 42451da177e4SLinus Torvalds */ 42461da177e4SLinus Torvalds break; 42471da177e4SLinus Torvalds } 42481da177e4SLinus Torvalds } else if ((journal->j_start + 42491da177e4SLinus Torvalds journal->j_trans_max + 1) > 4250bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 42511da177e4SLinus Torvalds if (((journal->j_start + journal->j_trans_max + 1) % 4252bd4c625cSLinus Torvalds SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= 4253bd4c625cSLinus Torvalds temp_jl->j_start) { 42541da177e4SLinus Torvalds flush_used_journal_lists(p_s_sb, temp_jl); 42551da177e4SLinus Torvalds goto first_jl; 42561da177e4SLinus Torvalds } else { 42571da177e4SLinus Torvalds /* we don't overlap anything from out start to the end of the 42581da177e4SLinus Torvalds * log, and our wrapped portion doesn't overlap anything at 42591da177e4SLinus Torvalds * the start of the log. We can break 42601da177e4SLinus Torvalds */ 42611da177e4SLinus Torvalds break; 42621da177e4SLinus Torvalds } 42631da177e4SLinus Torvalds } 42641da177e4SLinus Torvalds } 42651da177e4SLinus Torvalds flush_old_journal_lists(p_s_sb); 42661da177e4SLinus Torvalds 4267bd4c625cSLinus Torvalds journal->j_current_jl->j_list_bitmap = 4268bd4c625cSLinus Torvalds get_list_bitmap(p_s_sb, journal->j_current_jl); 42691da177e4SLinus Torvalds 42701da177e4SLinus Torvalds if (!(journal->j_current_jl->j_list_bitmap)) { 4271bd4c625cSLinus Torvalds reiserfs_panic(p_s_sb, 4272bd4c625cSLinus Torvalds "journal-1996: do_journal_end, could not get a list bitmap\n"); 42731da177e4SLinus Torvalds } 42741da177e4SLinus Torvalds 42751da177e4SLinus Torvalds atomic_set(&(journal->j_jlock), 0); 42761da177e4SLinus Torvalds unlock_journal(p_s_sb); 42771da177e4SLinus Torvalds /* wake up any body waiting to join. */ 42781da177e4SLinus Torvalds clear_bit(J_WRITERS_QUEUED, &journal->j_state); 42791da177e4SLinus Torvalds wake_up(&(journal->j_join_wait)); 42801da177e4SLinus Torvalds 42811da177e4SLinus Torvalds if (!flush && wait_on_commit && 42821da177e4SLinus Torvalds journal_list_still_alive(p_s_sb, commit_trans_id)) { 42831da177e4SLinus Torvalds flush_commit_list(p_s_sb, jl, 1); 42841da177e4SLinus Torvalds } 42851da177e4SLinus Torvalds out: 42861da177e4SLinus Torvalds reiserfs_check_lock_depth(p_s_sb, "journal end2"); 42871da177e4SLinus Torvalds 42881da177e4SLinus Torvalds memset(th, 0, sizeof(*th)); 42891da177e4SLinus Torvalds /* Re-set th->t_super, so we can properly keep track of how many 42901da177e4SLinus Torvalds * persistent transactions there are. We need to do this so if this 42911da177e4SLinus Torvalds * call is part of a failed restart_transaction, we can free it later */ 42921da177e4SLinus Torvalds th->t_super = p_s_sb; 42931da177e4SLinus Torvalds 42941da177e4SLinus Torvalds return journal->j_errno; 42951da177e4SLinus Torvalds } 42961da177e4SLinus Torvalds 4297bd4c625cSLinus Torvalds static void __reiserfs_journal_abort_hard(struct super_block *sb) 42981da177e4SLinus Torvalds { 42991da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 43001da177e4SLinus Torvalds if (test_bit(J_ABORTED, &journal->j_state)) 43011da177e4SLinus Torvalds return; 43021da177e4SLinus Torvalds 43031da177e4SLinus Torvalds printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", 43041da177e4SLinus Torvalds reiserfs_bdevname(sb)); 43051da177e4SLinus Torvalds 43061da177e4SLinus Torvalds sb->s_flags |= MS_RDONLY; 43071da177e4SLinus Torvalds set_bit(J_ABORTED, &journal->j_state); 43081da177e4SLinus Torvalds 43091da177e4SLinus Torvalds #ifdef CONFIG_REISERFS_CHECK 43101da177e4SLinus Torvalds dump_stack(); 43111da177e4SLinus Torvalds #endif 43121da177e4SLinus Torvalds } 43131da177e4SLinus Torvalds 4314bd4c625cSLinus Torvalds static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno) 43151da177e4SLinus Torvalds { 43161da177e4SLinus Torvalds struct reiserfs_journal *journal = SB_JOURNAL(sb); 43171da177e4SLinus Torvalds if (test_bit(J_ABORTED, &journal->j_state)) 43181da177e4SLinus Torvalds return; 43191da177e4SLinus Torvalds 43201da177e4SLinus Torvalds if (!journal->j_errno) 43211da177e4SLinus Torvalds journal->j_errno = errno; 43221da177e4SLinus Torvalds 43231da177e4SLinus Torvalds __reiserfs_journal_abort_hard(sb); 43241da177e4SLinus Torvalds } 43251da177e4SLinus Torvalds 4326bd4c625cSLinus Torvalds void reiserfs_journal_abort(struct super_block *sb, int errno) 43271da177e4SLinus Torvalds { 4328e13601bcSHarvey Harrison __reiserfs_journal_abort_soft(sb, errno); 43291da177e4SLinus Torvalds } 4330